82 files changed, 35047 insertions, 0 deletions
diff --git a/sys/fs/cd9660/TODO b/sys/fs/cd9660/TODO
new file mode 100644
index 0000000..cf3fadc
--- /dev/null
+++ b/sys/fs/cd9660/TODO
@@ -0,0 +1,43 @@
+#	$Id$
+
+ 2) should understand Rock Ridge
+
+   Yes, we have follows function.
+
+       o Symbolic Link
+       o Real Name(long name)
+       o File Attribute 
+       o Time stamp
+       o uid, gid
+       o Devices
+       o Relocated directories
+
+   Except follows:
+
+       o POSIX device number mapping
+
+         There is some preliminary stuff in there that (ab-)uses the mknod
+         system call, but this needs a writable filesystem
+         
+ 5) should have name translation enabled by mount flag
+
+   Yes. we can disable the Rock Ridge Extension by follows option;
+
+      "mount -t isofs -o -norrip /dev/cd0d /cdrom"
+
+ 6) should run as a user process, and not take up kernel space (cdroms
+    are slow)
+
+   Not yet.
+
+ 7) ECMA support.
+
+   Not yet. we need not only a technical spec but also ECMA format
+   cd-rom itself!
+
+ 8) Character set change by SVD ( multi SVD support )
+
+   Not yet. We should also hack the other part of system as 8 bit
+   clean. As far as I know, if you export the cdrom by NFS, the client
+   can access the 8 bit clean (ie. Solaris Japanese with EUC code )
+
diff --git a/sys/fs/cd9660/TODO.hibler b/sys/fs/cd9660/TODO.hibler
new file mode 100644
index 0000000..660b268
--- /dev/null
+++ b/sys/fs/cd9660/TODO.hibler
@@ -0,0 +1,14 @@
+1. Investiate making ISOFS another UFS shared filesystem (ala FFS/MFS/LFS).
+   Since it was modelled after the inode code, we might be able to merge
+   them back.  It looks like a seperate (but very similar) lookup routine
+   will be needed due to the associated file stuff.
+
+2. It would be nice to be able to use the vfs_cluster code.
+   Unfortunately, if the logical block size is smaller than the page size,
+   it won't work.  Also, if throughtput is relatively constant for any
+   block size (as it is for the HP drive--150kbs) then clustering may not
+   buy much (or may even hurt when vfs_cluster comes up with a large sync
+   cluster).
+
+3. Seems like there should be a "notrans" or some such mount option to show
+   filenames as they really are without lower-casing.  Does this make sense?
diff --git a/sys/fs/cd9660/cd9660_bmap.c b/sys/fs/cd9660/cd9660_bmap.c
new file mode 100644
index 0000000..e787f83
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_bmap.c
@@ -0,0 +1,105 @@
+/*-
+ * Copyright (c) 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cd9660_bmap.c	8.3 (Berkeley) 1/23/94
+ * $Id: cd9660_bmap.c,v 1.6 1997/02/22 09:38:47 peter Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+
+/*
+ * Bmap converts a the logical block number of a file to its physical block
+ * number on the disk. The conversion is done by using the logical block
+ * number to index into the data block (extent) for the file.
+ */
+int
+cd9660_bmap(ap)
+	struct vop_bmap_args /* {
+		struct vnode *a_vp;
+		daddr_t  a_bn;
+		struct vnode **a_vpp;
+		daddr_t *a_bnp;
+		int *a_runp;
+		int *a_runb;
+	} */ *ap;
+{
+	struct iso_node *ip = VTOI(ap->a_vp);
+	daddr_t lblkno = ap->a_bn;
+	int bshift;
+
+	/*
+	 * Check for underlying vnode requests and ensure that logical
+	 * to physical mapping is requested.
+	 */
+	if (ap->a_vpp != NULL)
+		*ap->a_vpp = ip->i_devvp;
+	if (ap->a_bnp == NULL)
+		return (0);
+
+	/*
+	 * Compute the requested block number
+	 */
+	bshift = ip->i_mnt->im_bshift;
+	*ap->a_bnp = (ip->iso_start + lblkno) << (bshift - DEV_BSHIFT);
+
+	/*
+	 * Determine maximum number of readahead blocks following the
+	 * requested block.
+	 */
+	if (ap->a_runp) {
+		int nblk;
+
+		nblk = (ip->i_size >> bshift) - (lblkno + 1);
+		if (nblk <= 0)
+			*ap->a_runp = 0;
+		else if (nblk >= (MAXBSIZE >> bshift))
+			*ap->a_runp = (MAXBSIZE >> bshift) - 1;
+		else
+			*ap->a_runp = nblk;
+	}
+
+	if (ap->a_runb) {
+		*ap->a_runb = 0;
+	}
+
+	return 0;
+}
diff --git a/sys/fs/cd9660/cd9660_lookup.c b/sys/fs/cd9660/cd9660_lookup.c
new file mode 100644
index 0000000..3d0ff74
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_lookup.c
@@ -0,0 +1,422 @@
+/*-
+ * Copyright (c) 1989, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)ufs_lookup.c	7.33 (Berkeley) 5/19/91
+ *
+ *	@(#)cd9660_lookup.c	8.2 (Berkeley) 1/23/94
+ * $Id: cd9660_lookup.c,v 1.20 1997/11/07 08:52:50 phk Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+#include <isofs/cd9660/iso_rrip.h>
+
+/*
+ * Convert a component of a pathname into a pointer to a locked inode.
+ * This is a very central and rather complicated routine.
+ * If the file system is not maintained in a strict tree hierarchy,
+ * this can result in a deadlock situation (see comments in code below).
+ *
+ * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
+ * whether the name is to be looked up, created, renamed, or deleted.
+ * When CREATE, RENAME, or DELETE is specified, information usable in
+ * creating, renaming, or deleting a directory entry may be calculated.
+ * If flag has LOCKPARENT or'ed into it and the target of the pathname
+ * exists, lookup returns both the target and its parent directory locked.
+ * When creating or renaming and LOCKPARENT is specified, the target may
+ * not be ".".  When deleting and LOCKPARENT is specified, the target may
+ * be "."., but the caller must check to ensure it does an vrele and iput
+ * instead of two iputs.
+ *
+ * Overall outline of ufs_lookup:
+ *
+ *	search for name in directory, to found or notfound
+ * notfound:
+ *	if creating, return locked directory, leaving info on available slots
+ *	else return error
+ * found:
+ *	if at end of path and deleting, return information to allow delete
+ *	if at end of path and rewriting (RENAME and LOCKPARENT), lock target
+ *	  inode and return info to allow rewrite
+ *	if not at end, add name to cache; if at end and neither creating
+ *	  nor deleting, add name to cache
+ *
+ * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode unlocked.
+ */
+int
+cd9660_lookup(ap)
+	struct vop_cachedlookup_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	register struct vnode *vdp;	/* vnode for directory being searched */
+	register struct iso_node *dp;	/* inode for directory being searched */
+	register struct iso_mnt *imp;	/* file system that directory is in */
+	struct buf *bp;			/* a buffer of directory entries */
+	struct iso_directory_record *ep = 0;/* the current directory entry */
+	int entryoffsetinblock;		/* offset of ep in bp's buffer */
+	int saveoffset = 0;		/* offset of last directory entry in dir */
+	int numdirpasses;		/* strategy for directory search */
+	doff_t endsearch;		/* offset to end directory search */
+	struct vnode *pdp;		/* saved dp during symlink work */
+	struct vnode *tdp;		/* returned by cd9660_vget_internal */
+	u_long bmask;			/* block offset mask */
+	int lockparent;			/* 1 => lockparent flag is set */
+	int wantparent;			/* 1 => wantparent or lockparent flag */
+	int error;
+	ino_t ino = 0;
+	int reclen;
+	u_short namelen;
+	int isoflags;
+	char altname[NAME_MAX];
+	int res;
+	int assoc, len;
+	char *name;
+	struct vnode **vpp = ap->a_vpp;
+	struct componentname *cnp = ap->a_cnp;
+	int flags = cnp->cn_flags;
+	int nameiop = cnp->cn_nameiop;
+	struct proc *p = cnp->cn_proc;
+
+	bp = NULL;
+	*vpp = NULL;
+	vdp = ap->a_dvp;
+	dp = VTOI(vdp);
+	imp = dp->i_mnt;
+	lockparent = flags & LOCKPARENT;
+	wantparent = flags & (LOCKPARENT|WANTPARENT);
+
+	/*
+	 * We now have a segment name to search for, and a directory to search.
+	 */
+
+	len = cnp->cn_namelen;
+	name = cnp->cn_nameptr;
+	/*
+	 * A leading `=' means, we are looking for an associated file
+	 */
+	if ((assoc = (imp->iso_ftype != ISO_FTYPE_RRIP && *name == ASSOCCHAR)))
+	{
+		len--;
+		name++;
+	}
+
+	/*
+	 * If there is cached information on a previous search of
+	 * this directory, pick up where we last left off.
+	 * We cache only lookups as these are the most common
+	 * and have the greatest payoff. Caching CREATE has little
+	 * benefit as it usually must search the entire directory
+	 * to determine that the entry does not exist. Caching the
+	 * location of the last DELETE or RENAME has not reduced
+	 * profiling time and hence has been removed in the interest
+	 * of simplicity.
+	 */
+	bmask = imp->im_bmask;
+	if (nameiop != LOOKUP || dp->i_diroff == 0 ||
+	    dp->i_diroff > dp->i_size) {
+		entryoffsetinblock = 0;
+		dp->i_offset = 0;
+		numdirpasses = 1;
+	} else {
+		dp->i_offset = dp->i_diroff;
+		if ((entryoffsetinblock = dp->i_offset & bmask) &&
+		    (error = cd9660_blkatoff(vdp, (off_t)dp->i_offset, NULL, &bp)))
+				return (error);
+		numdirpasses = 2;
+		nchstats.ncs_2passes++;
+	}
+	endsearch = dp->i_size;
+	
+searchloop:
+	while (dp->i_offset < endsearch) {
+		/*
+		 * If offset is on a block boundary,
+		 * read the next directory block.
+		 * Release previous if it exists.
+		 */
+		if ((dp->i_offset & bmask) == 0) {
+			if (bp != NULL)
+				brelse(bp);
+			if ((error =
+			    cd9660_blkatoff(vdp, (off_t)dp->i_offset, NULL, &bp)) != 0)
+				return (error);
+			entryoffsetinblock = 0;
+		}
+		/*
+		 * Get pointer to next entry.
+		 */
+		ep = (struct iso_directory_record *)
+			((char *)bp->b_data + entryoffsetinblock);
+		
+		reclen = isonum_711(ep->length);
+		if (reclen == 0) {
+			/* skip to next block, if any */
+			dp->i_offset =
+			    (dp->i_offset & ~bmask) + imp->logical_block_size;
+			continue;
+		}
+
+		if (reclen < ISO_DIRECTORY_RECORD_SIZE)
+			/* illegal entry, stop */
+			break;
+
+		if (entryoffsetinblock + reclen > imp->logical_block_size)
+			/* entries are not allowed to cross boundaries */
+			break;
+		
+		namelen = isonum_711(ep->name_len);
+		isoflags = isonum_711(imp->iso_ftype == ISO_FTYPE_HIGH_SIERRA?
+				      &ep->date[6]: ep->flags);
+
+		if (reclen < ISO_DIRECTORY_RECORD_SIZE + namelen)
+			/* illegal entry, stop */
+			break;
+		
+		/*
+		 * Check for a name match.
+		 */
+		switch (imp->iso_ftype) {
+		default:
+			if (!(isoflags & 4) == !assoc) {
+				if ((len == 1
+				     && *name == '.')
+				    || (flags & ISDOTDOT)) {
+					if (namelen == 1
+					    && ep->name[0] == ((flags & ISDOTDOT) ? 1 : 0)) {
+						/*
+						 * Save directory entry's inode number and
+						 * release directory buffer.
+						 */
+						dp->i_ino = isodirino(ep, imp);
+						goto found;
+					}
+					if (namelen != 1
+					    || ep->name[0] != 0)
+						goto notfound;
+				} else if (!(res = isofncmp(name,len,
+							    ep->name,namelen))) {
+					if (isoflags & 2)
+						ino = isodirino(ep, imp);
+					else
+						ino = dbtob(bp->b_blkno)
+							+ entryoffsetinblock;
+					saveoffset = dp->i_offset;
+				} else if (ino)
+					goto foundino;
+#ifdef	NOSORTBUG	/* On some CDs directory entries are not sorted correctly */
+				else if (res < 0)
+					goto notfound;
+				else if (res > 0 && numdirpasses == 2)
+					numdirpasses++;
+#endif
+			}
+			break;
+		case ISO_FTYPE_RRIP:
+			if (isonum_711(ep->flags)&2)
+				ino = isodirino(ep, imp);
+			else
+				ino = dbtob(bp->b_blkno) + entryoffsetinblock;
+			dp->i_ino = ino;
+			cd9660_rrip_getname(ep,altname,&namelen,&dp->i_ino,imp);
+			if (namelen == cnp->cn_namelen
+			    && !bcmp(name,altname,namelen))
+				goto found;
+			ino = 0;
+			break;
+		}
+		dp->i_offset += reclen;
+		entryoffsetinblock += reclen;
+	}
+	if (ino) {
+foundino:
+		dp->i_ino = ino;
+		if (saveoffset != dp->i_offset) {
+			if (lblkno(imp, dp->i_offset) !=
+			    lblkno(imp, saveoffset)) {
+				if (bp != NULL)
+					brelse(bp);
+				if ((error = cd9660_blkatoff(vdp,
+				    (off_t)saveoffset, NULL, &bp)) != 0)
+					return (error);
+			}
+			entryoffsetinblock = saveoffset & bmask;
+			ep = (struct iso_directory_record *)
+				((char *)bp->b_data + entryoffsetinblock);
+			dp->i_offset = saveoffset;
+		}
+		goto found;
+	}
+notfound:
+	/*
+	 * If we started in the middle of the directory and failed
+	 * to find our target, we must check the beginning as well.
+	 */
+	if (numdirpasses == 2) {
+		numdirpasses--;
+		dp->i_offset = 0;
+		endsearch = dp->i_diroff;
+		goto searchloop;
+	}
+	if (bp != NULL)
+		brelse(bp);
+
+	/*
+	 * Insert name into cache (as non-existent) if appropriate.
+	 */
+	if (cnp->cn_flags & MAKEENTRY)
+		cache_enter(vdp, *vpp, cnp);
+	if (nameiop == CREATE || nameiop == RENAME)
+		return (EROFS);
+	return (ENOENT);
+
+found:
+	if (numdirpasses == 2)
+		nchstats.ncs_pass2++;
+	
+	/*
+	 * Found component in pathname.
+	 * If the final component of path name, save information
+	 * in the cache as to where the entry was found.
+	 */
+	if ((flags & ISLASTCN) && nameiop == LOOKUP)
+		dp->i_diroff = dp->i_offset;
+
+	/*
+	 * Step through the translation in the name.  We do not `iput' the
+	 * directory because we may need it again if a symbolic link
+	 * is relative to the current directory.  Instead we save it
+	 * unlocked as "pdp".  We must get the target inode before unlocking
+	 * the directory to insure that the inode will not be removed
+	 * before we get it.  We prevent deadlock by always fetching
+	 * inodes from the root, moving down the directory tree. Thus
+	 * when following backward pointers ".." we must unlock the
+	 * parent directory before getting the requested directory.
+	 * There is a potential race condition here if both the current
+	 * and parent directories are removed before the `iget' for the
+	 * inode associated with ".." returns.  We hope that this occurs
+	 * infrequently since we cannot avoid this race condition without
+	 * implementing a sophisticated deadlock detection algorithm.
+	 * Note also that this simple deadlock detection scheme will not
+	 * work if the file system has any hard links other than ".."
+	 * that point backwards in the directory structure.
+	 */
+	pdp = vdp;
+	/*
+	 * If ino is different from dp->i_ino,
+	 * it's a relocated directory.
+	 */
+	if (flags & ISDOTDOT) {
+		VOP_UNLOCK(pdp, 0, p);	/* race to get the inode */
+		error = cd9660_vget_internal(vdp->v_mount, dp->i_ino, &tdp,
+					     dp->i_ino != ino, ep);
+		brelse(bp);
+		if (error) {
+			vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, p);
+			return (error);
+		}
+		if (lockparent && (flags & ISLASTCN) &&
+		    (error = vn_lock(pdp, LK_EXCLUSIVE, p))) {
+			vput(tdp);
+			return (error);
+		}
+		*vpp = tdp;
+	} else if (dp->i_number == dp->i_ino) {
+		brelse(bp);
+		VREF(vdp);	/* we want ourself, ie "." */
+		*vpp = vdp;
+	} else {
+		error = cd9660_vget_internal(vdp->v_mount, dp->i_ino, &tdp,
+					     dp->i_ino != ino, ep);
+		brelse(bp);
+		if (error)
+			return (error);
+		if (!lockparent || !(flags & ISLASTCN))
+			VOP_UNLOCK(pdp, 0, p);
+		*vpp = tdp;
+	}
+
+	/*
+	 * Insert name into cache if appropriate.
+	 */
+	if (cnp->cn_flags & MAKEENTRY)
+		cache_enter(vdp, *vpp, cnp);
+	return (0);
+}
+
+/*
+ * Return buffer with the contents of block "offset" from the beginning of
+ * directory "ip".  If "res" is non-zero, fill it in with a pointer to the
+ * remaining space in the directory.
+ */
+int
+cd9660_blkatoff(vp, offset, res, bpp)
+	struct vnode *vp;
+	off_t offset;
+	char **res;
+	struct buf **bpp;
+{
+	struct iso_node *ip;
+	register struct iso_mnt *imp;
+	struct buf *bp;
+	daddr_t lbn;
+	int bsize, error;
+
+	ip = VTOI(vp);
+	imp = ip->i_mnt;
+	lbn = lblkno(imp, offset);
+	bsize = blksize(imp, ip, lbn);
+	
+	if ((error = bread(vp, lbn, bsize, NOCRED, &bp)) != 0) {
+		brelse(bp);
+		*bpp = NULL;
+		return (error);
+	}
+	if (res)
+		*res = (char *)bp->b_data + blkoff(imp, offset);
+	*bpp = bp;
+	return (0);
+}
diff --git a/sys/fs/cd9660/cd9660_mount.h b/sys/fs/cd9660/cd9660_mount.h
new file mode 100644
index 0000000..9d3f78e
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_mount.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cd9660_mount.h	8.1 (Berkeley) 5/24/95
+ */
+
+/*
+ * Arguments to mount ISO 9660 filesystems.
+ */
+struct iso_args {
+	char	*fspec;			/* block special device to mount */
+	struct	export_args export;	/* network export info */
+	int	flags;			/* mounting flags, see below */
+	int	ssector;		/* starting sector, 0 for 1st session */
+};
+#define	ISOFSMNT_NORRIP	0x00000001	/* disable Rock Ridge Ext.*/
+#define	ISOFSMNT_GENS	0x00000002	/* enable generation numbers */
+#define	ISOFSMNT_EXTATT	0x00000004	/* enable extended attributes */
diff --git a/sys/fs/cd9660/cd9660_node.c b/sys/fs/cd9660/cd9660_node.c
new file mode 100644
index 0000000..9640d6e
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_node.c
@@ -0,0 +1,428 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1994, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cd9660_node.c	8.2 (Berkeley) 1/23/94
+ * $Id: cd9660_node.c,v 1.26 1999/01/02 11:34:54 bde Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <sys/stat.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+#include <isofs/cd9660/cd9660_mount.h>
+
+/*
+ * Structures associated with iso_node caching.
+ */
+static struct iso_node **isohashtbl;
+static u_long isohash;
+#define	INOHASH(device, inum)	(((device) + ((inum)>>12)) & isohash)
+#ifndef NULL_SIMPLELOCKS
+static struct simplelock cd9660_ihash_slock;
+#endif
+
+static void cd9660_ihashrem __P((struct iso_node *));
+static unsigned	cd9660_chars2ui __P((unsigned char *begin, int len));
+
+/*
+ * Initialize hash links for inodes and dnodes.
+ */
+int
+cd9660_init(vfsp)
+	struct vfsconf *vfsp;
+{
+
+	isohashtbl = hashinit(desiredvnodes, M_ISOFSMNT, &isohash);
+	simple_lock_init(&cd9660_ihash_slock);
+	return (0);
+}
+
+
+/*
+ * Use the device/inum pair to find the incore inode, and return a pointer
+ * to it. If it is in core, but locked, wait for it.
+ */
+struct vnode *
+cd9660_ihashget(dev, inum)
+	dev_t dev;
+	ino_t inum;
+{
+	struct proc *p = curproc;		/* XXX */
+	struct iso_node *ip;
+	struct vnode *vp;
+
+loop:
+	simple_lock(&cd9660_ihash_slock);
+	for (ip = isohashtbl[INOHASH(dev, inum)]; ip; ip = ip->i_next) {
+		if (inum == ip->i_number && dev == ip->i_dev) {
+			vp = ITOV(ip);
+			simple_lock(&vp->v_interlock);
+			simple_unlock(&cd9660_ihash_slock);
+			if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p))
+				goto loop;
+			return (vp);
+		}
+	}
+	simple_unlock(&cd9660_ihash_slock);
+	return (NULL);
+}
+
+/*
+ * Insert the inode into the hash table, and return it locked.
+ */
+void
+cd9660_ihashins(ip)
+	struct iso_node *ip;
+{
+	struct proc *p = curproc;		/* XXX */
+	struct iso_node **ipp, *iq;
+
+	simple_lock(&cd9660_ihash_slock);
+	ipp = &isohashtbl[INOHASH(ip->i_dev, ip->i_number)];
+	if ((iq = *ipp) != NULL)
+		iq->i_prev = &ip->i_next;
+	ip->i_next = iq;
+	ip->i_prev = ipp;
+	*ipp = ip;
+	simple_unlock(&cd9660_ihash_slock);
+
+	lockmgr(&ip->i_lock, LK_EXCLUSIVE, (struct simplelock *)0, p);
+}
+
+/*
+ * Remove the inode from the hash table.
+ */
+static void
+cd9660_ihashrem(ip)
+	register struct iso_node *ip;
+{
+	register struct iso_node *iq;
+
+	simple_lock(&cd9660_ihash_slock);
+	if ((iq = ip->i_next) != NULL)
+		iq->i_prev = ip->i_prev;
+	*ip->i_prev = iq;
+#ifdef DIAGNOSTIC
+	ip->i_next = NULL;
+	ip->i_prev = NULL;
+#endif
+	simple_unlock(&cd9660_ihash_slock);
+}
+
+/*
+ * Last reference to an inode, write the inode out and if necessary,
+ * truncate and deallocate the file.
+ */
+int
+cd9660_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct proc *p = ap->a_p;
+	register struct iso_node *ip = VTOI(vp);
+	int error = 0;
+
+	if (prtactive && vp->v_usecount != 0)
+		vprint("cd9660_inactive: pushing active", vp);
+
+	ip->i_flag = 0;
+	VOP_UNLOCK(vp, 0, p);
+	/*
+	 * If we are done with the inode, reclaim it
+	 * so that it can be reused immediately.
+	 */
+	if (ip->inode.iso_mode == 0)
+		vrecycle(vp, (struct simplelock *)0, p);
+	return error;
+}
+
+/*
+ * Reclaim an inode so that it can be used for other purposes.
+ */
+int
+cd9660_reclaim(ap)
+	struct vop_reclaim_args /* {
+		struct vnode *a_vp;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct iso_node *ip = VTOI(vp);
+
+	if (prtactive && vp->v_usecount != 0)
+		vprint("cd9660_reclaim: pushing active", vp);
+	/*
+	 * Remove the inode from its hash chain.
+	 */
+	cd9660_ihashrem(ip);
+	/*
+	 * Purge old data structures associated with the inode.
+	 */
+	cache_purge(vp);
+	if (ip->i_devvp) {
+		vrele(ip->i_devvp);
+		ip->i_devvp = 0;
+	}
+	FREE(vp->v_data, M_ISOFSNODE);
+	vp->v_data = NULL;
+	return (0);
+}
+
+/*
+ * File attributes
+ */
+void
+cd9660_defattr(isodir, inop, bp, ftype)
+	struct iso_directory_record *isodir;
+	struct iso_node *inop;
+	struct buf *bp;
+	enum ISO_FTYPE ftype;
+{
+	struct buf *bp2 = NULL;
+	struct iso_mnt *imp;
+	struct iso_extended_attributes *ap = NULL;
+	int off;
+
+	/* high sierra does not have timezone data, flag is one byte ahead */
+	if (isonum_711(ftype == ISO_FTYPE_HIGH_SIERRA?
+		       &isodir->date[6]: isodir->flags)&2) {
+		inop->inode.iso_mode = S_IFDIR;
+		/*
+		 * If we return 2, fts() will assume there are no subdirectories
+		 * (just links for the path and .), so instead we return 1.
+		 */
+		inop->inode.iso_links = 1;
+	} else {
+		inop->inode.iso_mode = S_IFREG;
+		inop->inode.iso_links = 1;
+	}
+	if (!bp
+	    && ((imp = inop->i_mnt)->im_flags & ISOFSMNT_EXTATT)
+	    && (off = isonum_711(isodir->ext_attr_length))) {
+		cd9660_blkatoff(ITOV(inop), (off_t)-(off << imp->im_bshift), NULL,
+			     &bp2);
+		bp = bp2;
+	}
+	if (bp) {
+		ap = (struct iso_extended_attributes *)bp->b_data;
+		
+		if (isonum_711(ap->version) == 1) {
+			if (!(ap->perm[0]&0x40))
+				inop->inode.iso_mode |= VEXEC >> 6;
+			if (!(ap->perm[0]&0x10))
+				inop->inode.iso_mode |= VREAD >> 6;
+			if (!(ap->perm[0]&4))
+				inop->inode.iso_mode |= VEXEC >> 3;
+			if (!(ap->perm[0]&1))
+				inop->inode.iso_mode |= VREAD >> 3;
+			if (!(ap->perm[1]&0x40))
+				inop->inode.iso_mode |= VEXEC;
+			if (!(ap->perm[1]&0x10))
+				inop->inode.iso_mode |= VREAD;
+			inop->inode.iso_uid = isonum_723(ap->owner); /* what about 0? */
+			inop->inode.iso_gid = isonum_723(ap->group); /* what about 0? */
+		} else
+			ap = NULL;
+	}
+	if (!ap) {
+		inop->inode.iso_mode |= VREAD|VEXEC|(VREAD|VEXEC)>>3|(VREAD|VEXEC)>>6;
+		inop->inode.iso_uid = (uid_t)0;
+		inop->inode.iso_gid = (gid_t)0;
+	}
+	if (bp2)
+		brelse(bp2);
+}
+
+/*
+ * Time stamps
+ */
+void
+cd9660_deftstamp(isodir,inop,bp,ftype)
+	struct iso_directory_record *isodir;
+	struct iso_node *inop;
+	struct buf *bp;
+	enum ISO_FTYPE ftype;
+{
+	struct buf *bp2 = NULL;
+	struct iso_mnt *imp;
+	struct iso_extended_attributes *ap = NULL;
+	int off;
+
+	if (!bp
+	    && ((imp = inop->i_mnt)->im_flags & ISOFSMNT_EXTATT)
+	    && (off = isonum_711(isodir->ext_attr_length))) {
+		cd9660_blkatoff(ITOV(inop), (off_t)-(off << imp->im_bshift), NULL,
+			     &bp2);
+		bp = bp2;
+	}
+	if (bp) {
+		ap = (struct iso_extended_attributes *)bp->b_data;
+		
+		if (ftype != ISO_FTYPE_HIGH_SIERRA
+		    && isonum_711(ap->version) == 1) {
+			if (!cd9660_tstamp_conv17(ap->ftime,&inop->inode.iso_atime))
+				cd9660_tstamp_conv17(ap->ctime,&inop->inode.iso_atime);
+			if (!cd9660_tstamp_conv17(ap->ctime,&inop->inode.iso_ctime))
+				inop->inode.iso_ctime = inop->inode.iso_atime;
+			if (!cd9660_tstamp_conv17(ap->mtime,&inop->inode.iso_mtime))
+				inop->inode.iso_mtime = inop->inode.iso_ctime;
+		} else
+			ap = NULL;
+	}
+	if (!ap) {
+		cd9660_tstamp_conv7(isodir->date,&inop->inode.iso_ctime,ftype);
+		inop->inode.iso_atime = inop->inode.iso_ctime;
+		inop->inode.iso_mtime = inop->inode.iso_ctime;
+	}
+	if (bp2)
+		brelse(bp2);
+}
+
+int
+cd9660_tstamp_conv7(pi,pu,ftype)
+	u_char *pi;
+	struct timespec *pu;
+	enum ISO_FTYPE ftype;
+{
+	int crtime, days;
+	int y, m, d, hour, minute, second, tz;
+
+	y = pi[0] + 1900;
+	m = pi[1];
+	d = pi[2];
+	hour = pi[3];
+	minute = pi[4];
+	second = pi[5];
+	if(ftype != ISO_FTYPE_HIGH_SIERRA)
+		tz = pi[6];
+	else
+		/* original high sierra misses timezone data */
+		tz = 0;
+
+	if (y < 1970) {
+		pu->tv_sec  = 0;
+		pu->tv_nsec = 0;
+		return 0;
+	} else {
+#ifdef	ORIGINAL
+		/* computes day number relative to Sept. 19th,1989 */
+		/* don't even *THINK* about changing formula. It works! */
+		days = 367*(y-1980)-7*(y+(m+9)/12)/4-3*((y+(m-9)/7)/100+1)/4+275*m/9+d-100;
+#else
+		/*
+		 * Changed :-) to make it relative to Jan. 1st, 1970
+		 * and to disambiguate negative division
+		 */
+		days = 367*(y-1960)-7*(y+(m+9)/12)/4-3*((y+(m+9)/12-1)/100+1)/4+275*m/9+d-239;
+#endif
+		crtime = ((((days * 24) + hour) * 60 + minute) * 60) + second;
+
+		/* timezone offset is unreliable on some disks */
+		if (-48 <= tz && tz <= 52)
+			crtime -= tz * 15 * 60;
+	}
+	pu->tv_sec  = crtime;
+	pu->tv_nsec = 0;
+	return 1;
+}
+
+static u_int
+cd9660_chars2ui(begin,len)
+	u_char *begin;
+	int len;
+{
+	u_int rc;
+	
+	for (rc = 0; --len >= 0;) {
+		rc *= 10;
+		rc += *begin++ - '0';
+	}
+	return rc;
+}
+
+int
+cd9660_tstamp_conv17(pi,pu)
+	u_char *pi;
+	struct timespec *pu;
+{
+	u_char buf[7];
+	
+	/* year:"0001"-"9999" -> -1900  */
+	buf[0] = cd9660_chars2ui(pi,4) - 1900;
+
+	/* month: " 1"-"12"   -> 1 - 12 */
+	buf[1] = cd9660_chars2ui(pi + 4,2);
+
+	/* day:	  " 1"-"31"   -> 1 - 31 */
+	buf[2] = cd9660_chars2ui(pi + 6,2);
+
+	/* hour:  " 0"-"23"   -> 0 - 23 */
+	buf[3] = cd9660_chars2ui(pi + 8,2);
+
+	/* minute:" 0"-"59"   -> 0 - 59 */
+	buf[4] = cd9660_chars2ui(pi + 10,2);
+
+	/* second:" 0"-"59"   -> 0 - 59 */
+	buf[5] = cd9660_chars2ui(pi + 12,2);
+
+	/* difference of GMT */
+	buf[6] = pi[16];
+
+	return cd9660_tstamp_conv7(buf, pu, ISO_FTYPE_DEFAULT);
+}
+
+ino_t
+isodirino(isodir, imp)
+	struct iso_directory_record *isodir;
+	struct iso_mnt *imp;
+{
+	ino_t ino;
+
+	ino = (isonum_733(isodir->extent) + isonum_711(isodir->ext_attr_length))
+	      << imp->im_bshift;
+	return (ino);
+}
diff --git a/sys/fs/cd9660/cd9660_node.h b/sys/fs/cd9660/cd9660_node.h
new file mode 100644
index 0000000..33b208f
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_node.h
@@ -0,0 +1,126 @@
+/*-
+ * Copyright (c) 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cd9660_node.h	8.6 (Berkeley) 5/14/95
+ * $Id: cd9660_node.h,v 1.16 1997/11/18 14:40:34 phk Exp $
+ */
+
+/*
+ * Theoretically, directories can be more than 2Gb in length,
+ * however, in practice this seems unlikely. So, we define
+ * the type doff_t as a long to keep down the cost of doing
+ * lookup on a 32-bit machine. If you are porting to a 64-bit
+ * architecture, you should make doff_t the same as off_t.
+ */
+#define doff_t	long
+
+typedef	struct	{
+	struct timespec	iso_atime;	/* time of last access */
+	struct timespec	iso_mtime;	/* time of last modification */
+	struct timespec	iso_ctime;	/* time file changed */
+	u_short		iso_mode;	/* files access mode and type */
+	uid_t		iso_uid;	/* owner user id */
+	gid_t		iso_gid;	/* owner group id */
+	short		iso_links;	/* links of file */
+	dev_t		iso_rdev;	/* Major/Minor number for special */
+} ISO_RRIP_INODE;
+
+
+struct iso_node {
+	struct	lock i_lock;	/* node lock > Keep this first< */
+	struct	iso_node *i_next, **i_prev;	/* hash chain */
+	struct	vnode *i_vnode;	/* vnode associated with this inode */
+	struct	vnode *i_devvp;	/* vnode for block I/O */
+	u_long	i_flag;		/* see below */
+	dev_t	i_dev;		/* device where inode resides */
+	ino_t	i_number;	/* the identity of the inode */
+				/* we use the actual starting block of the file */
+	struct	iso_mnt *i_mnt;	/* filesystem associated with this inode */
+	struct	lockf *i_lockf;	/* head of byte-level lock list */
+	doff_t	i_endoff;	/* end of useful stuff in directory */
+	doff_t	i_diroff;	/* offset in dir, where we found last entry */
+	doff_t	i_offset;	/* offset of free space in directory */
+	ino_t	i_ino;		/* inode number of found directory */
+
+	long iso_extent;	/* extent of file */
+	long i_size;
+	long iso_start;		/* actual start of data of file (may be different */
+				/* from iso_extent, if file has extended attributes) */
+	ISO_RRIP_INODE	inode;
+};
+
+#define	i_forw		i_chain[0]
+#define	i_back		i_chain[1]
+
+/* flags */
+#define	IN_ACCESS	0x0020		/* inode access time to be updated */
+
+#define VTOI(vp) ((struct iso_node *)(vp)->v_data)
+#define ITOV(ip) ((ip)->i_vnode)
+
+#ifdef KERNEL
+
+#ifdef MALLOC_DECLARE
+MALLOC_DECLARE(M_ISOFSMNT);
+MALLOC_DECLARE(M_ISOFSNODE);
+#endif
+
+struct buf;
+struct vop_bmap_args;
+struct vop_cachedlookup_args;
+struct vop_inactive_args;
+struct vop_reclaim_args;
+
+/*
+ * Prototypes for ISOFS vnode operations
+ */
+int cd9660_lookup __P((struct vop_cachedlookup_args *));
+int cd9660_inactive __P((struct vop_inactive_args *));
+int cd9660_reclaim __P((struct vop_reclaim_args *));
+int cd9660_bmap __P((struct vop_bmap_args *));
+int cd9660_blkatoff __P((struct vnode *vp, off_t offset, char **res, struct buf **bpp));
+
+void cd9660_defattr __P((struct iso_directory_record *,
+			struct iso_node *, struct buf *, enum ISO_FTYPE));
+void cd9660_deftstamp __P((struct iso_directory_record *,
+			struct iso_node *, struct buf *, enum ISO_FTYPE));
+struct vnode *cd9660_ihashget __P((dev_t, ino_t));
+void cd9660_ihashins __P((struct iso_node *));
+int cd9660_tstamp_conv7 __P((u_char *, struct timespec *, enum ISO_FTYPE));
+int cd9660_tstamp_conv17 __P((u_char *, struct timespec *));
+
+#endif /* KERNEL */
diff --git a/sys/fs/cd9660/cd9660_rrip.c b/sys/fs/cd9660/cd9660_rrip.c
new file mode 100644
index 0000000..b34553f
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_rrip.c
@@ -0,0 +1,723 @@
+/*-
+ * Copyright (c) 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cd9660_rrip.c	8.6 (Berkeley) 12/5/94
+ * $Id: cd9660_rrip.c,v 1.12 1997/02/22 09:38:49 peter Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+#include <isofs/cd9660/cd9660_rrip.h>
+#include <isofs/cd9660/iso_rrip.h>
+
+typedef int	rrt_func_t __P((void *, ISO_RRIP_ANALYZE *ana));
+
+typedef struct {
+	char type[2];
+	rrt_func_t *func;
+	void (*func2) __P((struct iso_directory_record *isodir, ISO_RRIP_ANALYZE *ana));
+	int result;
+} RRIP_TABLE;
+
+static int	cd9660_rrip_altname __P((ISO_RRIP_ALTNAME *p,
+					 ISO_RRIP_ANALYZE *ana));
+static int	cd9660_rrip_attr __P((ISO_RRIP_ATTR *p, ISO_RRIP_ANALYZE *ana));
+static int	cd9660_rrip_cont __P((ISO_RRIP_CONT *p, ISO_RRIP_ANALYZE *ana));
+static void	cd9660_rrip_defattr __P((struct iso_directory_record *isodir,
+					 ISO_RRIP_ANALYZE *ana));
+static void	cd9660_rrip_defname __P((struct iso_directory_record *isodir,
+					 ISO_RRIP_ANALYZE *ana));
+static void	cd9660_rrip_deftstamp __P((struct iso_directory_record *isodir,
+					   ISO_RRIP_ANALYZE *ana));
+static int	cd9660_rrip_device __P((ISO_RRIP_DEVICE *p,
+					ISO_RRIP_ANALYZE *ana));
+static int	cd9660_rrip_extref __P((ISO_RRIP_EXTREF *p,
+					ISO_RRIP_ANALYZE *ana));
+static int	cd9660_rrip_idflag __P((ISO_RRIP_IDFLAG *p,
+					ISO_RRIP_ANALYZE *ana));
+static int	cd9660_rrip_loop __P((struct iso_directory_record *isodir,
+				      ISO_RRIP_ANALYZE *ana,
+				      RRIP_TABLE *table));
+static int	cd9660_rrip_pclink __P((ISO_RRIP_CLINK *p,
+					ISO_RRIP_ANALYZE *ana));
+static int	cd9660_rrip_reldir __P((ISO_RRIP_RELDIR *p,
+					ISO_RRIP_ANALYZE *ana));
+static int	cd9660_rrip_slink __P((ISO_RRIP_SLINK *p,
+				       ISO_RRIP_ANALYZE *ana));
+static int	cd9660_rrip_stop __P((ISO_SUSP_HEADER *p,
+				      ISO_RRIP_ANALYZE *ana));
+static int	cd9660_rrip_tstamp __P((ISO_RRIP_TSTAMP *p,
+					ISO_RRIP_ANALYZE *ana));
+
+/*
+ * POSIX file attribute
+ */
+static int
+cd9660_rrip_attr(p,ana)
+	ISO_RRIP_ATTR *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	ana->inop->inode.iso_mode = isonum_733(p->mode);
+	ana->inop->inode.iso_uid = isonum_733(p->uid);
+	ana->inop->inode.iso_gid = isonum_733(p->gid);
+	ana->inop->inode.iso_links = isonum_733(p->links);
+	ana->fields &= ~ISO_SUSP_ATTR;
+	return ISO_SUSP_ATTR;
+}
+
+static void
+cd9660_rrip_defattr(isodir,ana)
+	struct iso_directory_record *isodir;
+	ISO_RRIP_ANALYZE *ana;
+{
+	/* But this is a required field! */
+	printf("RRIP without PX field?\n");
+	cd9660_defattr(isodir,ana->inop,NULL,ISO_FTYPE_RRIP);
+}
+
+/*
+ * Symbolic Links
+ */
+static int
+cd9660_rrip_slink(p,ana)
+	ISO_RRIP_SLINK	*p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	register ISO_RRIP_SLINK_COMPONENT *pcomp;
+	register ISO_RRIP_SLINK_COMPONENT *pcompe;
+	int len, wlen, cont;
+	char *outbuf, *inbuf;
+
+	pcomp = (ISO_RRIP_SLINK_COMPONENT *)p->component;
+	pcompe = (ISO_RRIP_SLINK_COMPONENT *)((char *)p + isonum_711(p->h.length));
+	len = *ana->outlen;
+	outbuf = ana->outbuf;
+	cont = ana->cont;
+
+	/*
+	 * Gathering a Symbolic name from each component with path
+	 */
+	for (;
+	     pcomp < pcompe;
+	     pcomp = (ISO_RRIP_SLINK_COMPONENT *)((char *)pcomp + ISO_RRIP_SLSIZ
+						  + isonum_711(pcomp->clen))) {
+
+		if (!cont) {
+			if (len < ana->maxlen) {
+				len++;
+				*outbuf++ = '/';
+			}
+		}
+		cont = 0;
+
+		inbuf = "..";
+		wlen = 0;
+
+		switch (*pcomp->cflag) {
+
+		case ISO_SUSP_CFLAG_CURRENT:
+			/* Inserting Current */
+			wlen = 1;
+			break;
+
+		case ISO_SUSP_CFLAG_PARENT:
+			/* Inserting Parent */
+			wlen = 2;
+			break;
+
+		case ISO_SUSP_CFLAG_ROOT:
+			/* Inserting slash for ROOT */
+			/* start over from beginning(?) */
+			outbuf -= len;
+			len = 0;
+			break;
+
+		case ISO_SUSP_CFLAG_VOLROOT:
+			/* Inserting a mount point i.e. "/cdrom" */
+			/* same as above */
+			outbuf -= len;
+			len = 0;
+			inbuf = ana->imp->im_mountp->mnt_stat.f_mntonname;
+			wlen = strlen(inbuf);
+			break;
+
+		case ISO_SUSP_CFLAG_HOST:
+			/* Inserting hostname i.e. "kurt.tools.de" */
+			inbuf = hostname;
+			wlen = strlen(hostname);
+			break;
+
+		case ISO_SUSP_CFLAG_CONTINUE:
+			cont = 1;
+			/* fall thru */
+		case 0:
+			/* Inserting component */
+			wlen = isonum_711(pcomp->clen);
+			inbuf = pcomp->name;
+			break;
+		default:
+			printf("RRIP with incorrect flags?");
+			wlen = ana->maxlen + 1;
+			break;
+		}
+
+		if (len + wlen > ana->maxlen) {
+			/* indicate error to caller */
+			ana->cont = 1;
+			ana->fields = 0;
+			ana->outbuf -= *ana->outlen;
+			*ana->outlen = 0;
+			return 0;
+		}
+
+		bcopy(inbuf,outbuf,wlen);
+		outbuf += wlen;
+		len += wlen;
+
+	}
+	ana->outbuf = outbuf;
+	*ana->outlen = len;
+	ana->cont = cont;
+
+	if (!isonum_711(p->flags)) {
+		ana->fields &= ~ISO_SUSP_SLINK;
+		return ISO_SUSP_SLINK;
+	}
+	return 0;
+}
+
+/*
+ * Alternate name
+ */
+static int
+cd9660_rrip_altname(p,ana)
+	ISO_RRIP_ALTNAME *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	char *inbuf;
+	int wlen;
+	int cont;
+
+	inbuf = "..";
+	wlen = 0;
+	cont = 0;
+
+	switch (*p->flags) {
+	case ISO_SUSP_CFLAG_CURRENT:
+		/* Inserting Current */
+		wlen = 1;
+		break;
+
+	case ISO_SUSP_CFLAG_PARENT:
+		/* Inserting Parent */
+		wlen = 2;
+		break;
+
+	case ISO_SUSP_CFLAG_HOST:
+		/* Inserting hostname i.e. "kurt.tools.de" */
+		inbuf = hostname;
+		wlen = strlen(hostname);
+		break;
+
+	case ISO_SUSP_CFLAG_CONTINUE:
+		cont = 1;
+		/* fall thru */
+	case 0:
+		/* Inserting component */
+		wlen = isonum_711(p->h.length) - 5;
+		inbuf = (char *)p + 5;
+		break;
+
+	default:
+		printf("RRIP with incorrect NM flags?\n");
+		wlen = ana->maxlen + 1;
+		break;
+	}
+
+	if ((*ana->outlen += wlen) > ana->maxlen) {
+		/* treat as no name field */
+		ana->fields &= ~ISO_SUSP_ALTNAME;
+		ana->outbuf -= *ana->outlen - wlen;
+		*ana->outlen = 0;
+		return 0;
+	}
+
+	bcopy(inbuf,ana->outbuf,wlen);
+	ana->outbuf += wlen;
+
+	if (!cont) {
+		ana->fields &= ~ISO_SUSP_ALTNAME;
+		return ISO_SUSP_ALTNAME;
+	}
+	return 0;
+}
+
+static void
+cd9660_rrip_defname(isodir,ana)
+	struct iso_directory_record *isodir;
+	ISO_RRIP_ANALYZE *ana;
+{
+	strcpy(ana->outbuf,"..");
+	switch (*isodir->name) {
+	default:
+		isofntrans(isodir->name,isonum_711(isodir->name_len),
+			   ana->outbuf,ana->outlen,
+			   1,isonum_711(isodir->flags)&4);
+		break;
+	case 0:
+		*ana->outlen = 1;
+		break;
+	case 1:
+		*ana->outlen = 2;
+		break;
+	}
+}
+
+/*
+ * Parent or Child Link
+ */
+static int
+cd9660_rrip_pclink(p,ana)
+	ISO_RRIP_CLINK	*p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	*ana->inump = isonum_733(p->dir_loc) << ana->imp->im_bshift;
+	ana->fields &= ~(ISO_SUSP_CLINK|ISO_SUSP_PLINK);
+	return *p->h.type == 'C' ? ISO_SUSP_CLINK : ISO_SUSP_PLINK;
+}
+
+/*
+ * Relocated directory
+ */
+static int
+cd9660_rrip_reldir(p,ana)
+	ISO_RRIP_RELDIR	 *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	/* special hack to make caller aware of RE field */
+	*ana->outlen = 0;
+	ana->fields = 0;
+	return ISO_SUSP_RELDIR|ISO_SUSP_ALTNAME|ISO_SUSP_CLINK|ISO_SUSP_PLINK;
+}
+
+static int
+cd9660_rrip_tstamp(p,ana)
+	ISO_RRIP_TSTAMP *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	u_char *ptime;
+	
+	ptime = p->time;
+
+	/* Check a format of time stamp (7bytes/17bytes) */
+	if (!(*p->flags&ISO_SUSP_TSTAMP_FORM17)) {
+		if (*p->flags&ISO_SUSP_TSTAMP_CREAT)
+			ptime += 7;
+
+		if (*p->flags&ISO_SUSP_TSTAMP_MODIFY) {
+			cd9660_tstamp_conv7(ptime,&ana->inop->inode.iso_mtime,
+					    ISO_FTYPE_RRIP);
+			ptime += 7;
+		} else
+			bzero(&ana->inop->inode.iso_mtime,sizeof(struct timespec));
+		
+		if (*p->flags&ISO_SUSP_TSTAMP_ACCESS) {
+			cd9660_tstamp_conv7(ptime,&ana->inop->inode.iso_atime,
+					    ISO_FTYPE_RRIP);
+			ptime += 7;
+		} else
+			ana->inop->inode.iso_atime = ana->inop->inode.iso_mtime;
+
+		if (*p->flags&ISO_SUSP_TSTAMP_ATTR)
+			cd9660_tstamp_conv7(ptime,&ana->inop->inode.iso_ctime,
+					    ISO_FTYPE_RRIP);
+		else
+			ana->inop->inode.iso_ctime = ana->inop->inode.iso_mtime;
+
+	} else {
+		if (*p->flags&ISO_SUSP_TSTAMP_CREAT)
+			ptime += 17;
+
+		if (*p->flags&ISO_SUSP_TSTAMP_MODIFY) {
+			cd9660_tstamp_conv17(ptime,&ana->inop->inode.iso_mtime);
+			ptime += 17;
+		} else
+			bzero(&ana->inop->inode.iso_mtime,sizeof(struct timespec));
+		
+		if (*p->flags&ISO_SUSP_TSTAMP_ACCESS) {
+			cd9660_tstamp_conv17(ptime,&ana->inop->inode.iso_atime);
+			ptime += 17;
+		} else
+			ana->inop->inode.iso_atime = ana->inop->inode.iso_mtime;
+
+		if (*p->flags&ISO_SUSP_TSTAMP_ATTR)
+			cd9660_tstamp_conv17(ptime,&ana->inop->inode.iso_ctime);
+		else
+			ana->inop->inode.iso_ctime = ana->inop->inode.iso_mtime;
+
+	}
+	ana->fields &= ~ISO_SUSP_TSTAMP;
+	return ISO_SUSP_TSTAMP;
+}
+
+static void
+cd9660_rrip_deftstamp(isodir,ana)
+	struct iso_directory_record  *isodir;
+	ISO_RRIP_ANALYZE *ana;
+{
+	cd9660_deftstamp(isodir,ana->inop,NULL,ISO_FTYPE_RRIP);
+}
+
+/*
+ * POSIX device modes
+ */
+static int
+cd9660_rrip_device(p,ana)
+	ISO_RRIP_DEVICE *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	u_int high, low;
+	
+	high = isonum_733(p->dev_t_high);
+	low  = isonum_733(p->dev_t_low);
+	
+	if (high == 0)
+		ana->inop->inode.iso_rdev = makedev(major(low), minor(low));
+	else
+		ana->inop->inode.iso_rdev = makedev(high, minor(low));
+	ana->fields &= ~ISO_SUSP_DEVICE;
+	return ISO_SUSP_DEVICE;
+}
+
+/*
+ * Flag indicating
+ */
+static int
+cd9660_rrip_idflag(p,ana)
+	ISO_RRIP_IDFLAG *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	ana->fields &= isonum_711(p->flags)|~0xff; /* don't touch high bits */
+	/* special handling of RE field */
+	if (ana->fields&ISO_SUSP_RELDIR)
+		return cd9660_rrip_reldir(/* XXX */ (ISO_RRIP_RELDIR *)p,ana);
+
+	return ISO_SUSP_IDFLAG;
+}
+
+/*
+ * Continuation pointer
+ */
+static int
+cd9660_rrip_cont(p,ana)
+	ISO_RRIP_CONT *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	ana->iso_ce_blk = isonum_733(p->location);
+	ana->iso_ce_off = isonum_733(p->offset);
+	ana->iso_ce_len = isonum_733(p->length);
+	return ISO_SUSP_CONT;
+}
+
+/*
+ * System Use end
+ */
+static int
+cd9660_rrip_stop(p,ana)
+	ISO_SUSP_HEADER *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	return ISO_SUSP_STOP;
+}
+
+/*
+ * Extension reference
+ */
+static int
+cd9660_rrip_extref(p,ana)
+	ISO_RRIP_EXTREF *p;
+	ISO_RRIP_ANALYZE *ana;
+{
+	if (isonum_711(p->len_id) != 10
+	    || bcmp((char *)p + 8,"RRIP_1991A",10)
+	    || isonum_711(p->version) != 1)
+		return 0;
+	ana->fields &= ~ISO_SUSP_EXTREF;
+	return ISO_SUSP_EXTREF;
+}
+
+static int
+cd9660_rrip_loop(isodir,ana,table)
+	struct iso_directory_record *isodir;
+	ISO_RRIP_ANALYZE *ana;
+	RRIP_TABLE *table;
+{
+	register RRIP_TABLE *ptable;
+	register ISO_SUSP_HEADER *phead;
+	register ISO_SUSP_HEADER *pend;
+	struct buf *bp = NULL;
+	char *pwhead;
+	int result;
+
+	/*
+	 * Note: If name length is odd,
+	 *	 it will be padding 1 byte after the name
+	 */
+	pwhead = isodir->name + isonum_711(isodir->name_len);
+	if (!(isonum_711(isodir->name_len)&1))
+		pwhead++;
+
+	/* If it's not the '.' entry of the root dir obey SP field */
+	if (*isodir->name != 0
+	    || isonum_733(isodir->extent) != ana->imp->root_extent)
+		pwhead += ana->imp->rr_skip;
+	else
+		pwhead += ana->imp->rr_skip0;
+
+	phead = (ISO_SUSP_HEADER *)pwhead;
+	pend = (ISO_SUSP_HEADER *)((char *)isodir + isonum_711(isodir->length));
+
+	result = 0;
+	while (1) {
+		ana->iso_ce_len = 0;
+		/*
+		 * Note: "pend" should be more than one SUSP header
+		 */
+		while (pend >= phead + 1) {
+			if (isonum_711(phead->version) == 1) {
+				for (ptable = table; ptable->func; ptable++) {
+					if (*phead->type == *ptable->type
+					    && phead->type[1] == ptable->type[1]) {
+						result |= ptable->func(phead,ana);
+						break;
+					}
+				}
+				if (!ana->fields)
+					break;
+			}
+			if (result&ISO_SUSP_STOP) {
+				result &= ~ISO_SUSP_STOP;
+				break;
+			}
+			/* plausibility check */
+			if (isonum_711(phead->length) < sizeof(*phead))
+				break;
+			/*
+			 * move to next SUSP
+			 * Hopefully this works with newer versions, too
+			 */
+			phead = (ISO_SUSP_HEADER *)((char *)phead + isonum_711(phead->length));
+		}
+		
+		if (ana->fields && ana->iso_ce_len) {
+			if (ana->iso_ce_blk >= ana->imp->volume_space_size
+			    || ana->iso_ce_off + ana->iso_ce_len > ana->imp->logical_block_size
+			    || bread(ana->imp->im_devvp,
+				     ana->iso_ce_blk <<
+				     (ana->imp->im_bshift - DEV_BSHIFT),
+				     ana->imp->logical_block_size, NOCRED, &bp))
+				/* what to do now? */
+				break;
+			phead = (ISO_SUSP_HEADER *)(bp->b_data + ana->iso_ce_off);
+			pend = (ISO_SUSP_HEADER *) ((char *)phead + ana->iso_ce_len);
+		} else
+			break;
+	}
+	if (bp)
+		brelse(bp);
+	/*
+	 * If we don't find the Basic SUSP stuffs, just set default value
+	 *   (attribute/time stamp)
+	 */
+	for (ptable = table; ptable->func2; ptable++)
+		if (!(ptable->result&result))
+			ptable->func2(isodir,ana);
+
+	return result;
+}
+
+/*
+ * Get Attributes.
+ */
+/*
+ * XXX the casts are bogus but will do for now.
+ */
+#define	BC	(rrt_func_t *)
+static RRIP_TABLE rrip_table_analyze[] = {
+	{ "PX", BC cd9660_rrip_attr,	cd9660_rrip_defattr,	ISO_SUSP_ATTR },
+	{ "TF", BC cd9660_rrip_tstamp,	cd9660_rrip_deftstamp,	ISO_SUSP_TSTAMP },
+	{ "PN", BC cd9660_rrip_device,	0,			ISO_SUSP_DEVICE },
+	{ "RR", BC cd9660_rrip_idflag,	0,			ISO_SUSP_IDFLAG },
+	{ "CE", BC cd9660_rrip_cont,	0,			ISO_SUSP_CONT },
+	{ "ST", BC cd9660_rrip_stop,	0,			ISO_SUSP_STOP },
+	{ "",	0,			0,			0 }
+};
+
+int
+cd9660_rrip_analyze(isodir,inop,imp)
+	struct iso_directory_record *isodir;
+	struct iso_node *inop;
+	struct iso_mnt *imp;
+{
+	ISO_RRIP_ANALYZE analyze;
+
+	analyze.inop = inop;
+	analyze.imp = imp;
+	analyze.fields = ISO_SUSP_ATTR|ISO_SUSP_TSTAMP|ISO_SUSP_DEVICE;
+
+	return cd9660_rrip_loop(isodir,&analyze,rrip_table_analyze);
+}
+
+/* 
+ * Get Alternate Name.
+ */
+static RRIP_TABLE rrip_table_getname[] = {
+	{ "NM", BC cd9660_rrip_altname,	cd9660_rrip_defname,	ISO_SUSP_ALTNAME },
+	{ "CL", BC cd9660_rrip_pclink,	0,			ISO_SUSP_CLINK|ISO_SUSP_PLINK },
+	{ "PL", BC cd9660_rrip_pclink,	0,			ISO_SUSP_CLINK|ISO_SUSP_PLINK },
+	{ "RE", BC cd9660_rrip_reldir,	0,			ISO_SUSP_RELDIR },
+	{ "RR", BC cd9660_rrip_idflag,	0,			ISO_SUSP_IDFLAG },
+	{ "CE", BC cd9660_rrip_cont,	0,			ISO_SUSP_CONT },
+	{ "ST", BC cd9660_rrip_stop,	0,			ISO_SUSP_STOP },
+	{ "",	0,			0,			0 }
+};
+
+int
+cd9660_rrip_getname(isodir,outbuf,outlen,inump,imp)
+	struct iso_directory_record *isodir;
+	char *outbuf;
+	u_short *outlen;
+	ino_t *inump;
+	struct iso_mnt *imp;
+{
+	ISO_RRIP_ANALYZE analyze;
+	RRIP_TABLE *tab;
+
+	analyze.outbuf = outbuf;
+	analyze.outlen = outlen;
+	analyze.maxlen = NAME_MAX;
+	analyze.inump = inump;
+	analyze.imp = imp;
+	analyze.fields = ISO_SUSP_ALTNAME|ISO_SUSP_RELDIR|ISO_SUSP_CLINK|ISO_SUSP_PLINK;
+	*outlen = 0;
+
+	tab = rrip_table_getname;
+	if (*isodir->name == 0
+	    || *isodir->name == 1) {
+		cd9660_rrip_defname(isodir,&analyze);
+
+		analyze.fields &= ~ISO_SUSP_ALTNAME;
+		tab++;
+	}
+
+	return cd9660_rrip_loop(isodir,&analyze,tab);
+}
+
+/* 
+ * Get Symbolic Link.
+ */
+static RRIP_TABLE rrip_table_getsymname[] = {
+	{ "SL", BC cd9660_rrip_slink,	0,			ISO_SUSP_SLINK },
+	{ "RR", BC cd9660_rrip_idflag,	0,			ISO_SUSP_IDFLAG },
+	{ "CE", BC cd9660_rrip_cont,	0,			ISO_SUSP_CONT },
+	{ "ST", BC cd9660_rrip_stop,	0,			ISO_SUSP_STOP },
+	{ "",	0,			0,			0 }
+};
+
+int
+cd9660_rrip_getsymname(isodir,outbuf,outlen,imp)
+	struct iso_directory_record *isodir;
+	char *outbuf;
+	u_short *outlen;
+	struct iso_mnt *imp;
+{
+	ISO_RRIP_ANALYZE analyze;
+
+	analyze.outbuf = outbuf;
+	analyze.outlen = outlen;
+	*outlen = 0;
+	analyze.maxlen = MAXPATHLEN;
+	analyze.cont = 1;		/* don't start with a slash */
+	analyze.imp = imp;
+	analyze.fields = ISO_SUSP_SLINK;
+
+	return (cd9660_rrip_loop(isodir,&analyze,rrip_table_getsymname)&ISO_SUSP_SLINK);
+}
+
+static RRIP_TABLE rrip_table_extref[] = {
+	{ "ER", BC cd9660_rrip_extref,	0,			ISO_SUSP_EXTREF },
+	{ "CE", BC cd9660_rrip_cont,	0,			ISO_SUSP_CONT },
+	{ "ST", BC cd9660_rrip_stop,	0,			ISO_SUSP_STOP },
+	{ "",	0,			0,			0 }
+};
+
+/*
+ * Check for Rock Ridge Extension and return offset of its fields.
+ * Note: We insist on the ER field.
+ */
+int
+cd9660_rrip_offset(isodir,imp)
+	struct iso_directory_record *isodir;
+	struct iso_mnt *imp;
+{
+	ISO_RRIP_OFFSET *p;
+	ISO_RRIP_ANALYZE analyze;
+
+	imp->rr_skip0 = 0;
+	p = (ISO_RRIP_OFFSET *)(isodir->name + 1);
+	if (bcmp(p,"SP\7\1\276\357",6)) {
+		/* Maybe, it's a CDROM XA disc? */
+		imp->rr_skip0 = 15;
+		p = (ISO_RRIP_OFFSET *)((char *)p + 15);
+		if (bcmp(p,"SP\7\1\276\357",6))
+			return -1;
+	}
+
+	analyze.imp = imp;
+	analyze.fields = ISO_SUSP_EXTREF;
+	if (!(cd9660_rrip_loop(isodir,&analyze,rrip_table_extref)&ISO_SUSP_EXTREF))
+		return -1;
+
+	return isonum_711(p->skip);
+}
diff --git a/sys/fs/cd9660/cd9660_rrip.h b/sys/fs/cd9660/cd9660_rrip.h
new file mode 100644
index 0000000..cacee39
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_rrip.h
@@ -0,0 +1,141 @@
+/*-
+ * Copyright (c) 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cd9660_rrip.h	8.2 (Berkeley) 12/5/94
+ * $Id: cd9660_rrip.h,v 1.3.2000.1 1996/09/30 12:46:48 dfr Exp $
+ */
+  
+typedef struct {
+	char   type			[ISODCL (  0,    1)];
+	u_char length			[ISODCL (  2,    2)]; /* 711 */
+	u_char version			[ISODCL (  3,    3)];
+} ISO_SUSP_HEADER;
+  
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	char mode			[ISODCL (  4,   11)]; /* 733 */
+	char links			[ISODCL ( 12,   19)]; /* 733 */
+	char uid			[ISODCL ( 20,   27)]; /* 733 */
+	char gid			[ISODCL ( 28,   35)]; /* 733 */
+} ISO_RRIP_ATTR;
+  
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	char dev_t_high			[ISODCL (  4,   11)]; /* 733 */
+	char dev_t_low			[ISODCL ( 12,   19)]; /* 733 */
+} ISO_RRIP_DEVICE;
+  
+#define	ISO_SUSP_CFLAG_CONTINUE	0x01
+#define	ISO_SUSP_CFLAG_CURRENT	0x02
+#define	ISO_SUSP_CFLAG_PARENT	0x04
+#define	ISO_SUSP_CFLAG_ROOT	0x08
+#define	ISO_SUSP_CFLAG_VOLROOT	0x10
+#define	ISO_SUSP_CFLAG_HOST	0x20
+
+typedef struct {
+	u_char cflag			[ISODCL (  1,    1)];
+	u_char clen			[ISODCL (  2,    2)];
+	u_char name			[1];			/* XXX */
+} ISO_RRIP_SLINK_COMPONENT;
+#define	ISO_RRIP_SLSIZ	2
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	u_char flags			[ISODCL (  4,	 4)];
+	u_char component		[ISODCL (  5,	 5)];
+} ISO_RRIP_SLINK;
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	char flags			[ISODCL (  4,	 4)];
+} ISO_RRIP_ALTNAME;
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	char dir_loc			[ISODCL (  4,	 11)]; /* 733 */
+} ISO_RRIP_CLINK;
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	char dir_loc			[ISODCL (  4,	 11)]; /* 733 */
+} ISO_RRIP_PLINK;
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+} ISO_RRIP_RELDIR;
+
+#define	ISO_SUSP_TSTAMP_FORM17	0x80
+#define	ISO_SUSP_TSTAMP_FORM7	0x00
+#define	ISO_SUSP_TSTAMP_CREAT	0x01
+#define	ISO_SUSP_TSTAMP_MODIFY	0x02
+#define	ISO_SUSP_TSTAMP_ACCESS	0x04
+#define	ISO_SUSP_TSTAMP_ATTR	0x08
+#define	ISO_SUSP_TSTAMP_BACKUP	0x10
+#define	ISO_SUSP_TSTAMP_EXPIRE	0x20
+#define	ISO_SUSP_TSTAMP_EFFECT	0x40
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	u_char flags			[ISODCL (  4,    4)];
+	u_char time			[ISODCL (  5,    5)];
+} ISO_RRIP_TSTAMP;
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	u_char flags			[ISODCL (  4,    4)];
+} ISO_RRIP_IDFLAG;
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	char len_id			[ISODCL (  4,	 4)];
+	char len_des			[ISODCL (  5,	 5)];
+	char len_src			[ISODCL (  6,	 6)];
+	char version			[ISODCL (  7,	 7)];
+} ISO_RRIP_EXTREF;
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	char check			[ISODCL (  4,	 5)];
+	char skip			[ISODCL (  6,	 6)];
+} ISO_RRIP_OFFSET;
+
+typedef struct {
+	ISO_SUSP_HEADER			h;
+	char location			[ISODCL (  4,	11)];
+	char offset			[ISODCL ( 12,	19)];
+	char length			[ISODCL ( 20,	27)];
+} ISO_RRIP_CONT;
diff --git a/sys/fs/cd9660/cd9660_util.c b/sys/fs/cd9660/cd9660_util.c
new file mode 100644
index 0000000..090f10d
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_util.c
@@ -0,0 +1,141 @@
+/*-
+ * Copyright (c) 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cd9660_util.c	8.3 (Berkeley) 12/5/94
+ * $Id: cd9660_util.c,v 1.9 1997/02/22 09:38:50 peter Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+
+#include <isofs/cd9660/iso.h>
+
+/*
+ * translate and compare a filename
+ * Note: Version number plus ';' may be omitted.
+ */
+int
+isofncmp(fn, fnlen, isofn, isolen)
+	u_char *fn;
+	int fnlen;
+	u_char *isofn;
+	int isolen;
+{
+	int i, j;
+	unsigned char c;
+
+	while (--fnlen >= 0) {
+		if (--isolen < 0)
+			return *fn;
+		if ((c = *isofn++) == ';') {
+			switch (*fn++) {
+			default:
+				return *--fn;
+			case 0:
+				return 0;
+			case ';':
+				break;
+			}
+			for (i = 0; --fnlen >= 0; i = i * 10 + *fn++ - '0') {
+				if (*fn < '0' || *fn > '9') {
+					return -1;
+				}
+			}
+			for (j = 0; --isolen >= 0; j = j * 10 + *isofn++ - '0');
+			return i - j;
+		}
+		if (c != *fn) {
+			if (c >= 'A' && c <= 'Z') {
+				if (c + ('a' - 'A') != *fn) {
+					if (*fn >= 'a' && *fn <= 'z')
+						return *fn - ('a' - 'A') - c;
+					else
+						return *fn - c;
+				}
+			} else
+				return *fn - c;
+		}
+		fn++;
+	}
+	if (isolen > 0) {
+		switch (*isofn) {
+		default:
+			return -1;
+		case '.':
+			if (isofn[1] != ';')
+				return -1;
+		case ';':
+			return 0;
+		}
+	}
+	return 0;
+}
+
+/*
+ * translate a filename
+ */
+void
+isofntrans(infn, infnlen, outfn, outfnlen, original, assoc)
+	u_char *infn;
+	int infnlen;
+	u_char *outfn;
+	u_short *outfnlen;
+	int original;
+	int assoc;
+{
+	int fnidx = 0;
+
+	if (assoc) {
+		*outfn++ = ASSOCCHAR;
+		fnidx++;
+		infnlen++;
+	}
+	for (; fnidx < infnlen; fnidx++) {
+		char c = *infn++;
+
+		if (!original && c >= 'A' && c <= 'Z')
+			*outfn++ = c + ('a' - 'A');
+		else if (!original && c == '.' && *infn == ';')
+			break;
+		else if (!original && c == ';')
+			break;
+		else
+			*outfn++ = c;
+	}
+	*outfnlen = fnidx;
+}
diff --git a/sys/fs/cd9660/cd9660_vfsops.c b/sys/fs/cd9660/cd9660_vfsops.c
new file mode 100644
index 0000000..ba4e385
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_vfsops.c
@@ -0,0 +1,894 @@
+/*-
+ * Copyright (c) 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cd9660_vfsops.c	8.18 (Berkeley) 5/22/95
+ * $Id: cd9660_vfsops.c,v 1.50 1999/01/30 12:26:22 phk Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+#include <miscfs/specfs/specdev.h>
+#include <sys/mount.h>
+#include <sys/buf.h>
+#include <sys/cdio.h>
+#include <sys/conf.h>
+#include <sys/fcntl.h>
+#include <sys/malloc.h>
+#include <sys/stat.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/iso_rrip.h>
+#include <isofs/cd9660/cd9660_node.h>
+#include <isofs/cd9660/cd9660_mount.h>
+
+MALLOC_DEFINE(M_ISOFSMNT, "ISOFS mount", "ISOFS mount structure");
+MALLOC_DEFINE(M_ISOFSNODE, "ISOFS node", "ISOFS vnode private part");
+
+static int cd9660_mount __P((struct mount *,
+	    char *, caddr_t, struct nameidata *, struct proc *));
+static int cd9660_start __P((struct mount *, int, struct proc *));
+static int cd9660_unmount __P((struct mount *, int, struct proc *));
+static int cd9660_root __P((struct mount *, struct vnode **));
+static int cd9660_quotactl __P((struct mount *, int, uid_t, caddr_t, 
+	    struct proc *));
+static int cd9660_statfs __P((struct mount *, struct statfs *, struct proc *));
+static int cd9660_sync __P((struct mount *, int, struct ucred *, 
+	    struct proc *));
+static int cd9660_vget __P((struct mount *, ino_t, struct vnode **));
+static int cd9660_fhtovp __P((struct mount *, struct fid *, struct sockaddr *,
+	    struct vnode **, int *, struct ucred **));
+static int cd9660_vptofh __P((struct vnode *, struct fid *));
+
+static struct vfsops cd9660_vfsops = {
+	cd9660_mount,
+	cd9660_start,
+	cd9660_unmount,
+	cd9660_root,
+	cd9660_quotactl,
+	cd9660_statfs,
+	cd9660_sync,
+	cd9660_vget,
+	cd9660_fhtovp,
+	cd9660_vptofh,
+	cd9660_init
+};
+VFS_SET(cd9660_vfsops, cd9660, VFCF_READONLY);
+
+
+/*
+ * Called by vfs_mountroot when iso is going to be mounted as root.
+ */
+
+static int iso_get_ssector __P((dev_t dev, struct proc *p));
+static int iso_mountfs __P((struct vnode *devvp, struct mount *mp,
+			    struct proc *p, struct iso_args *argp));
+
+/*
+ * Try to find the start of the last data track on this CD-ROM.  This
+ * is used to mount the last session of a multi-session CD.  Bail out
+ * and return 0 if we fail, this is always a safe bet.
+ */
+static int
+iso_get_ssector(dev, p)
+	dev_t dev;
+	struct proc *p;
+{
+	struct ioc_toc_header h;
+	struct ioc_read_toc_single_entry t;
+	int i;
+	struct cdevsw *bd;
+	d_ioctl_t *ioctlp;
+
+	bd = bdevsw[major(dev)];
+	ioctlp = bd->d_ioctl;
+	if (ioctlp == NULL)
+		return 0;
+
+	if (ioctlp(dev, CDIOREADTOCHEADER, (caddr_t)&h, FREAD, p) != 0)
+		return 0;
+
+	for (i = h.ending_track; i >= 0; i--) {
+		t.address_format = CD_LBA_FORMAT;
+		t.track = i;
+		if (ioctlp(dev, CDIOREADTOCENTRY, (caddr_t)&t, FREAD, p) != 0)
+			return 0;
+		if ((t.entry.control & 4) != 0)
+			/* found a data track */
+			break;
+	}
+
+	if (i < 0)
+		return 0;
+
+	return ntohl(t.entry.addr.lba);
+}
+
+static int iso_mountroot __P((struct mount *mp, struct proc *p));
+
+static int
+iso_mountroot(mp, p)
+	struct mount *mp;
+	struct proc *p;
+{
+	struct iso_args args;
+	int error;
+
+	if ((error = bdevvp(rootdev, &rootvp))) {
+		printf("iso_mountroot: can't find rootvp");
+		return (error);
+	}
+	args.flags = ISOFSMNT_ROOT;
+	args.ssector = iso_get_ssector(rootdev, p);
+	if (bootverbose)
+		printf("iso_mountroot(): using session at block %d\n",
+		       args.ssector);
+	if ((error = iso_mountfs(rootvp, mp, p, &args)) != 0)
+		return (error);
+
+	(void)cd9660_statfs(mp, &mp->mnt_stat, p);
+	return (0);
+}
+
+/*
+ * VFS Operations.
+ *
+ * mount system call
+ */
+static int
+cd9660_mount(mp, path, data, ndp, p)
+	register struct mount *mp;
+	char *path;
+	caddr_t data;
+	struct nameidata *ndp;
+	struct proc *p;
+{
+	struct vnode *devvp;
+	struct iso_args args;
+	size_t size;
+	int error;
+	mode_t accessmode;
+	struct iso_mnt *imp = 0;
+
+	if ((mp->mnt_flag & MNT_ROOTFS) != 0) {
+		if (bdevsw[major(rootdev)]->d_flags & D_NOCLUSTERR)
+			mp->mnt_flag |= MNT_NOCLUSTERR;
+		return (iso_mountroot(mp, p));
+	}
+	if ((error = copyin(data, (caddr_t)&args, sizeof (struct iso_args))))
+		return (error);
+
+	if ((mp->mnt_flag & MNT_RDONLY) == 0)
+		return (EROFS);
+
+	/*
+	 * If updating, check whether changing from read-only to
+	 * read/write; if there is no device name, that's all we do.
+	 * Disallow clearing MNT_NOCLUSTERR flag, if block device requests.
+	 */
+	if (mp->mnt_flag & MNT_UPDATE) {
+		imp = VFSTOISOFS(mp);
+		if (bdevsw[major(imp->im_devvp->v_rdev)]->d_flags &
+		    D_NOCLUSTERR)
+			mp->mnt_flag |= MNT_NOCLUSTERR;
+		if (args.fspec == 0)
+			return (vfs_export(mp, &imp->im_export, &args.export));
+	}
+	/*
+	 * Not an update, or updating the name: look up the name
+	 * and verify that it refers to a sensible block device.
+	 */
+	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
+	if ((error = namei(ndp)))
+		return (error);
+	devvp = ndp->ni_vp;
+
+	if (devvp->v_type != VBLK) {
+		vrele(devvp);
+		return ENOTBLK;
+	}
+	if (major(devvp->v_rdev) >= nblkdev ||
+	    bdevsw[major(devvp->v_rdev)] == NULL) {
+		vrele(devvp);
+		return ENXIO;
+	}
+
+	/*       
+	 * Verify that user has necessary permissions on the device,
+	 * or has superuser abilities
+	 */
+	accessmode = VREAD;
+	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
+	error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p);
+	if (error) 
+		error = suser(p->p_ucred, &p->p_acflag);
+	if (error) {
+		vput(devvp);
+		return (error);
+	}
+	VOP_UNLOCK(devvp, 0, p);
+
+	if ((mp->mnt_flag & MNT_UPDATE) == 0) {
+		if (bdevsw[major(devvp->v_rdev)]->d_flags & D_NOCLUSTERR)
+			mp->mnt_flag |= MNT_NOCLUSTERR;
+		error = iso_mountfs(devvp, mp, p, &args);
+	} else {
+		if (devvp != imp->im_devvp)
+			error = EINVAL;	/* needs translation */
+		else
+			vrele(devvp);
+	}
+	if (error) {
+		vrele(devvp);
+		return error;
+	}
+	imp = VFSTOISOFS(mp);
+	(void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+	bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+	(void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+	    &size);
+	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+	(void) cd9660_statfs(mp, &mp->mnt_stat, p);
+	return 0;
+}
+
+/*
+ * Common code for mount and mountroot
+ */
+static int
+iso_mountfs(devvp, mp, p, argp)
+	register struct vnode *devvp;
+	struct mount *mp;
+	struct proc *p;
+	struct iso_args *argp;
+{
+	register struct iso_mnt *isomp = (struct iso_mnt *)0;
+	struct buf *bp = NULL;
+	dev_t dev = devvp->v_rdev;
+	int error = EINVAL;
+	int needclose = 0;
+	int high_sierra = 0;
+	int iso_bsize;
+	int iso_blknum;
+	struct iso_volume_descriptor *vdp = 0;
+	struct iso_primary_descriptor *pri;
+	struct iso_sierra_primary_descriptor *pri_sierra;
+	struct iso_directory_record *rootp;
+	int logical_block_size;
+
+	if (!(mp->mnt_flag & MNT_RDONLY))
+		return EROFS;
+
+	/*
+	 * Disallow multiple mounts of the same device.
+	 * Disallow mounting of a device that is currently in use
+	 * (except for root, which might share swap device for miniroot).
+	 * Flush out any old buffers remaining from a previous use.
+	 */
+	if ((error = vfs_mountedon(devvp)))
+		return error;
+	if (vcount(devvp) > 1 && devvp != rootvp)
+		return EBUSY;
+	if ((error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0)))
+		return (error);
+
+	if ((error = VOP_OPEN(devvp, FREAD, FSCRED, p)))
+		return error;
+	needclose = 1;
+
+	/* This is the "logical sector size".  The standard says this
+	 * should be 2048 or the physical sector size on the device,
+	 * whichever is greater.  For now, we'll just use a constant.
+	 */
+	iso_bsize = ISO_DEFAULT_BLOCK_SIZE;
+
+	for (iso_blknum = 16 + argp->ssector;
+	     iso_blknum < 100 + argp->ssector;
+	     iso_blknum++) {
+		if ((error = bread(devvp, iso_blknum * btodb(iso_bsize),
+				  iso_bsize, NOCRED, &bp)) != 0)
+			goto out;
+		
+		vdp = (struct iso_volume_descriptor *)bp->b_data;
+		if (bcmp (vdp->id, ISO_STANDARD_ID, sizeof vdp->id) != 0) {
+			if (bcmp (vdp->id_sierra, ISO_SIERRA_ID,
+				  sizeof vdp->id) != 0) {
+				error = EINVAL;
+				goto out;
+			} else
+				high_sierra = 1;
+		}
+
+		if (isonum_711 (high_sierra? vdp->type_sierra: vdp->type) == ISO_VD_END) {
+			error = EINVAL;
+			goto out;
+		}
+
+		if (isonum_711 (high_sierra? vdp->type_sierra: vdp->type) == ISO_VD_PRIMARY)
+			break;
+		brelse(bp);
+	}
+
+	if (isonum_711 (high_sierra? vdp->type_sierra: vdp->type) != ISO_VD_PRIMARY) {
+		error = EINVAL;
+		goto out;
+	}
+
+	pri = (struct iso_primary_descriptor *)vdp;
+	pri_sierra = (struct iso_sierra_primary_descriptor *)vdp;
+
+	logical_block_size =
+		isonum_723 (high_sierra?
+			    pri_sierra->logical_block_size:
+			    pri->logical_block_size);
+
+	if (logical_block_size < DEV_BSIZE || logical_block_size > MAXBSIZE
+	    || (logical_block_size & (logical_block_size - 1)) != 0) {
+		error = EINVAL;
+		goto out;
+	}
+
+	rootp = (struct iso_directory_record *)
+		(high_sierra?
+		 pri_sierra->root_directory_record:
+		 pri->root_directory_record);
+
+	isomp = malloc(sizeof *isomp, M_ISOFSMNT, M_WAITOK);
+	bzero((caddr_t)isomp, sizeof *isomp);
+	isomp->logical_block_size = logical_block_size;
+	isomp->volume_space_size =
+		isonum_733 (high_sierra?
+			    pri_sierra->volume_space_size:
+			    pri->volume_space_size);
+	/*
+	 * Since an ISO9660 multi-session CD can also access previous
+	 * sessions, we have to include them into the space consider-
+	 * ations.  This doesn't yield a very accurate number since
+	 * parts of the old sessions might be inaccessible now, but we
+	 * can't do much better.  This is also important for the NFS
+	 * filehandle validation.
+	 */
+	isomp->volume_space_size += argp->ssector;
+	bcopy (rootp, isomp->root, sizeof isomp->root);
+	isomp->root_extent = isonum_733 (rootp->extent);
+	isomp->root_size = isonum_733 (rootp->size);
+
+	isomp->im_bmask = logical_block_size - 1;
+	isomp->im_bshift = 0;
+	while ((1 << isomp->im_bshift) < isomp->logical_block_size)
+		isomp->im_bshift++;
+
+	bp->b_flags |= B_AGE;
+	brelse(bp);
+	bp = NULL;
+
+	mp->mnt_data = (qaddr_t)isomp;
+	mp->mnt_stat.f_fsid.val[0] = (long)dev;
+	mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
+	mp->mnt_maxsymlinklen = 0;
+	mp->mnt_flag |= MNT_LOCAL;
+	isomp->im_mountp = mp;
+	isomp->im_dev = dev;
+	isomp->im_devvp = devvp;
+
+	devvp->v_specmountpoint = mp;
+
+	/* Check the Rock Ridge Extention support */
+	if (!(argp->flags & ISOFSMNT_NORRIP)) {
+		if ((error = bread(isomp->im_devvp,
+				  (isomp->root_extent + isonum_711(rootp->ext_attr_length)) <<
+				  (isomp->im_bshift - DEV_BSHIFT),
+				  isomp->logical_block_size, NOCRED, &bp)) != 0)
+		    goto out;
+		
+		rootp = (struct iso_directory_record *)bp->b_data;
+		
+		if ((isomp->rr_skip = cd9660_rrip_offset(rootp,isomp)) < 0) {
+		    argp->flags	 |= ISOFSMNT_NORRIP;
+		} else {
+		    argp->flags	 &= ~ISOFSMNT_GENS;
+		}
+
+		/*
+		 * The contents are valid,
+		 * but they will get reread as part of another vnode, so...
+		 */
+		bp->b_flags |= B_AGE;
+		brelse(bp);
+		bp = NULL;
+	}
+	isomp->im_flags = argp->flags&(ISOFSMNT_NORRIP|ISOFSMNT_GENS|ISOFSMNT_EXTATT);
+
+	if(high_sierra)
+		/* this effectively ignores all the mount flags */
+		isomp->iso_ftype = ISO_FTYPE_HIGH_SIERRA;
+	else
+		switch (isomp->im_flags&(ISOFSMNT_NORRIP|ISOFSMNT_GENS)) {
+		  default:
+			  isomp->iso_ftype = ISO_FTYPE_DEFAULT;
+			  break;
+		  case ISOFSMNT_GENS|ISOFSMNT_NORRIP:
+			  isomp->iso_ftype = ISO_FTYPE_9660;
+			  break;
+		  case 0:
+			  isomp->iso_ftype = ISO_FTYPE_RRIP;
+			  break;
+		}
+
+	return 0;
+out:
+	devvp->v_specmountpoint = NULL;
+	if (bp)
+		brelse(bp);
+	if (needclose)
+		(void)VOP_CLOSE(devvp, FREAD, NOCRED, p);
+	if (isomp) {
+		free((caddr_t)isomp, M_ISOFSMNT);
+		mp->mnt_data = (qaddr_t)0;
+	}
+	return error;
+}
+
+/*
+ * Make a filesystem operational.
+ * Nothing to do at the moment.
+ */
+/* ARGSUSED */
+static int
+cd9660_start(mp, flags, p)
+	struct mount *mp;
+	int flags;
+	struct proc *p;
+{
+	return 0;
+}
+
+/*
+ * unmount system call
+ */
+static int
+cd9660_unmount(mp, mntflags, p)
+	struct mount *mp;
+	int mntflags;
+	struct proc *p;
+{
+	register struct iso_mnt *isomp;
+	int error, flags = 0;
+	
+	if (mntflags & MNT_FORCE)
+		flags |= FORCECLOSE;
+#if 0
+	mntflushbuf(mp, 0);
+	if (mntinvalbuf(mp))
+		return EBUSY;
+#endif
+	if ((error = vflush(mp, NULLVP, flags)))
+		return (error);
+
+	isomp = VFSTOISOFS(mp);
+
+
+	isomp->im_devvp->v_specmountpoint = NULL;
+	error = VOP_CLOSE(isomp->im_devvp, FREAD, NOCRED, p);
+	vrele(isomp->im_devvp);
+	free((caddr_t)isomp, M_ISOFSMNT);
+	mp->mnt_data = (qaddr_t)0;
+	mp->mnt_flag &= ~MNT_LOCAL;
+	return (error);
+}
+
+/*
+ * Return root of a filesystem
+ */
+static int
+cd9660_root(mp, vpp)
+	struct mount *mp;
+	struct vnode **vpp;
+{
+	struct iso_mnt *imp = VFSTOISOFS(mp);
+	struct iso_directory_record *dp =
+	    (struct iso_directory_record *)imp->root;
+	ino_t ino = isodirino(dp, imp);
+	
+	/*
+	 * With RRIP we must use the `.' entry of the root directory.
+	 * Simply tell vget, that it's a relocated directory.
+	 */
+	return (cd9660_vget_internal(mp, ino, vpp,
+	    imp->iso_ftype == ISO_FTYPE_RRIP, dp));
+}
+
+/*
+ * Do operations associated with quotas, not supported
+ */
+/* ARGSUSED */
+static int
+cd9660_quotactl(mp, cmd, uid, arg, p)
+	struct mount *mp;
+	int cmd;
+	uid_t uid;
+	caddr_t arg;
+	struct proc *p;
+{
+
+	return (EOPNOTSUPP);
+}
+
+/*
+ * Get file system statistics.
+ */
+int
+cd9660_statfs(mp, sbp, p)
+	struct mount *mp;
+	register struct statfs *sbp;
+	struct proc *p;
+{
+	register struct iso_mnt *isomp;
+
+	isomp = VFSTOISOFS(mp);
+
+	sbp->f_bsize = isomp->logical_block_size;
+	sbp->f_iosize = sbp->f_bsize;	/* XXX */
+	sbp->f_blocks = isomp->volume_space_size;
+	sbp->f_bfree = 0; /* total free blocks */
+	sbp->f_bavail = 0; /* blocks free for non superuser */
+	sbp->f_files =	0; /* total files */
+	sbp->f_ffree = 0; /* free file nodes */
+	if (sbp != &mp->mnt_stat) {
+		sbp->f_type = mp->mnt_vfc->vfc_typenum;
+		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+	}
+	return 0;
+}
+
+/* ARGSUSED */
+static int
+cd9660_sync(mp, waitfor, cred, p)
+	struct mount *mp;
+	int waitfor;
+	struct ucred *cred;
+	struct proc *p;
+{
+	return (0);
+}
+
+/*
+ * File handle to vnode
+ *
+ * Have to be really careful about stale file handles:
+ * - check that the inode number is in range
+ * - call iget() to get the locked inode
+ * - check for an unallocated inode (i_mode == 0)
+ * - check that the generation number matches
+ */
+
+struct ifid {
+	ushort	ifid_len;
+	ushort	ifid_pad;
+	int	ifid_ino;
+	long	ifid_start;
+};
+
+/* ARGSUSED */
+int
+cd9660_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
+	register struct mount *mp;
+	struct fid *fhp;
+	struct sockaddr *nam;
+	struct vnode **vpp;
+	int *exflagsp;
+	struct ucred **credanonp;
+{
+	struct ifid *ifhp = (struct ifid *)fhp;
+	register struct iso_node *ip;
+	register struct netcred *np;
+	register struct iso_mnt *imp = VFSTOISOFS(mp);
+	struct vnode *nvp;
+	int error;
+	
+#ifdef	ISOFS_DBG
+	printf("fhtovp: ino %d, start %ld\n",
+	       ifhp->ifid_ino, ifhp->ifid_start);
+#endif
+	
+	/*
+	 * Get the export permission structure for this <mp, client> tuple.
+	 */
+	np = vfs_export_lookup(mp, &imp->im_export, nam);
+	if (np == NULL)
+		return (EACCES);
+
+	if ((error = VFS_VGET(mp, ifhp->ifid_ino, &nvp)) != 0) {
+		*vpp = NULLVP;
+		return (error);
+	}
+	ip = VTOI(nvp);
+	if (ip->inode.iso_mode == 0) {
+		vput(nvp);
+		*vpp = NULLVP;
+		return (ESTALE);
+	}
+	*vpp = nvp;
+	*exflagsp = np->netc_exflags;
+	*credanonp = &np->netc_anon;
+	return (0);
+}
+
+int
+cd9660_vget(mp, ino, vpp)
+	struct mount *mp;
+	ino_t ino;
+	struct vnode **vpp;
+{
+
+	/*
+	 * XXXX
+	 * It would be nice if we didn't always set the `relocated' flag
+	 * and force the extra read, but I don't want to think about fixing
+	 * that right now.
+	 */
+	return (cd9660_vget_internal(mp, ino, vpp,
+#if 0
+	    VFSTOISOFS(mp)->iso_ftype == ISO_FTYPE_RRIP,
+#else
+	    0,
+#endif
+	    (struct iso_directory_record *)0));
+}
+
+int
+cd9660_vget_internal(mp, ino, vpp, relocated, isodir)
+	struct mount *mp;
+	ino_t ino;
+	struct vnode **vpp;
+	int relocated;
+	struct iso_directory_record *isodir;
+{
+	struct iso_mnt *imp;
+	struct iso_node *ip;
+	struct buf *bp;
+	struct vnode *vp, *nvp;
+	dev_t dev;
+	int error;
+
+	imp = VFSTOISOFS(mp);
+	dev = imp->im_dev;
+	if ((*vpp = cd9660_ihashget(dev, ino)) != NULLVP)
+		return (0);
+
+	/* Allocate a new vnode/iso_node. */
+	if ((error = getnewvnode(VT_ISOFS, mp, cd9660_vnodeop_p, &vp)) != 0) {
+		*vpp = NULLVP;
+		return (error);
+	}
+	MALLOC(ip, struct iso_node *, sizeof(struct iso_node), M_ISOFSNODE,
+	    M_WAITOK);
+	bzero((caddr_t)ip, sizeof(struct iso_node));
+	lockinit(&ip->i_lock, PINOD, "isonode", 0, 0);
+	vp->v_data = ip;
+	ip->i_vnode = vp;
+	ip->i_dev = dev;
+	ip->i_number = ino;
+
+	/*
+	 * Put it onto its hash chain and lock it so that other requests for
+	 * this inode will block if they arrive while we are sleeping waiting
+	 * for old data structures to be purged or for the contents of the
+	 * disk portion of this inode to be read.
+	 */
+	cd9660_ihashins(ip);
+
+	if (isodir == 0) {
+		int lbn, off;
+
+		lbn = lblkno(imp, ino);
+		if (lbn >= imp->volume_space_size) {
+			vput(vp);
+			printf("fhtovp: lbn exceed volume space %d\n", lbn);
+			return (ESTALE);
+		}
+	
+		off = blkoff(imp, ino);
+		if (off + ISO_DIRECTORY_RECORD_SIZE > imp->logical_block_size) {
+			vput(vp);
+			printf("fhtovp: crosses block boundary %d\n",
+			       off + ISO_DIRECTORY_RECORD_SIZE);
+			return (ESTALE);
+		}
+	
+		error = bread(imp->im_devvp,
+			      lbn << (imp->im_bshift - DEV_BSHIFT),
+			      imp->logical_block_size, NOCRED, &bp);
+		if (error) {
+			vput(vp);
+			brelse(bp);
+			printf("fhtovp: bread error %d\n",error);
+			return (error);
+		}
+		isodir = (struct iso_directory_record *)(bp->b_data + off);
+
+		if (off + isonum_711(isodir->length) >
+		    imp->logical_block_size) {
+			vput(vp);
+			if (bp != 0)
+				brelse(bp);
+			printf("fhtovp: directory crosses block boundary %d[off=%d/len=%d]\n",
+			       off +isonum_711(isodir->length), off,
+			       isonum_711(isodir->length));
+			return (ESTALE);
+		}
+	
+#if 0
+		if (isonum_733(isodir->extent) +
+		    isonum_711(isodir->ext_attr_length) != ifhp->ifid_start) {
+			if (bp != 0)
+				brelse(bp);
+			printf("fhtovp: file start miss %d vs %d\n",
+			       isonum_733(isodir->extent) + isonum_711(isodir->ext_attr_length),
+			       ifhp->ifid_start);
+			return (ESTALE);
+		}
+#endif
+	} else
+		bp = 0;
+
+	ip->i_mnt = imp;
+	ip->i_devvp = imp->im_devvp;
+	VREF(ip->i_devvp);
+
+	if (relocated) {
+		/*
+		 * On relocated directories we must
+		 * read the `.' entry out of a dir.
+		 */
+		ip->iso_start = ino >> imp->im_bshift;
+		if (bp != 0)
+			brelse(bp);
+		if ((error = cd9660_blkatoff(vp, (off_t)0, NULL, &bp)) != 0) {
+			vput(vp);
+			return (error);
+		}
+		isodir = (struct iso_directory_record *)bp->b_data;
+	}
+
+	ip->iso_extent = isonum_733(isodir->extent);
+	ip->i_size = isonum_733(isodir->size);
+	ip->iso_start = isonum_711(isodir->ext_attr_length) + ip->iso_extent;
+	
+	/*
+	 * Setup time stamp, attribute
+	 */
+	vp->v_type = VNON;
+	switch (imp->iso_ftype) {
+	default:	/* ISO_FTYPE_9660 */
+	    {
+		struct buf *bp2;
+		int off;
+		if ((imp->im_flags & ISOFSMNT_EXTATT)
+		    && (off = isonum_711(isodir->ext_attr_length)))
+			cd9660_blkatoff(vp, (off_t)-(off << imp->im_bshift), NULL,
+				     &bp2);
+		else
+			bp2 = NULL;
+		cd9660_defattr(isodir, ip, bp2, ISO_FTYPE_9660);
+		cd9660_deftstamp(isodir, ip, bp2, ISO_FTYPE_9660);
+		if (bp2)
+			brelse(bp2);
+		break;
+	    }
+	case ISO_FTYPE_RRIP:
+		cd9660_rrip_analyze(isodir, ip, imp);
+		break;
+	}
+
+	if (bp != 0)
+		brelse(bp);
+
+	/*
+	 * Initialize the associated vnode
+	 */
+	switch (vp->v_type = IFTOVT(ip->inode.iso_mode)) {
+	case VFIFO:
+		vp->v_op = cd9660_fifoop_p;
+		break;
+	case VCHR:
+	case VBLK:
+		/*
+		 * if device, look at device number table for translation
+		 */
+		vp->v_op = cd9660_specop_p;
+		if ((nvp = checkalias(vp, ip->inode.iso_rdev, mp)) != NULL) {
+			/*
+			 * Discard unneeded vnode, but save its iso_node.
+			 * Note that the lock is carried over in the iso_node
+			 * to the replacement vnode.
+			 */
+			nvp->v_data = vp->v_data;
+			vp->v_data = NULL;
+			vp->v_op = spec_vnodeop_p;
+			vrele(vp);
+			vgone(vp);
+			/*
+			 * Reinitialize aliased inode.
+			 */
+			vp = nvp;
+			ip->i_vnode = vp;
+		}
+		break;
+	default:
+		break;
+	}
+	
+	if (ip->iso_extent == imp->root_extent)
+		vp->v_flag |= VROOT;
+
+	/*
+	 * XXX need generation number?
+	 */
+	
+	*vpp = vp;
+	return (0);
+}
+
+/*
+ * Vnode pointer to File handle
+ */
+/* ARGSUSED */
+int
+cd9660_vptofh(vp, fhp)
+	struct vnode *vp;
+	struct fid *fhp;
+{
+	register struct iso_node *ip = VTOI(vp);
+	register struct ifid *ifhp;
+
+	ifhp = (struct ifid *)fhp;
+	ifhp->ifid_len = sizeof(struct ifid);
+
+	ifhp->ifid_ino = ip->i_number;
+	ifhp->ifid_start = ip->iso_start;
+
+#ifdef	ISOFS_DBG
+	printf("vptofh: ino %d, start %ld\n",
+	       ifhp->ifid_ino,ifhp->ifid_start);
+#endif
+	return 0;
+}
diff --git a/sys/fs/cd9660/cd9660_vnops.c b/sys/fs/cd9660/cd9660_vnops.c
new file mode 100644
index 0000000..5ec970a
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_vnops.c
@@ -0,0 +1,920 @@
+/*-
+ * Copyright (c) 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)cd9660_vnops.c	8.19 (Berkeley) 5/27/95
+ * $Id: cd9660_vnops.c,v 1.53 1998/07/04 20:45:30 julian Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/kernel.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <miscfs/specfs/specdev.h>
+#include <miscfs/fifofs/fifo.h>
+#include <sys/malloc.h>
+#include <sys/dirent.h>
+#include <sys/unistd.h>
+
+#include <vm/vm.h>
+#include <vm/vm_zone.h>
+#include <vm/vnode_pager.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+#include <isofs/cd9660/iso_rrip.h>
+
+static int cd9660_setattr __P((struct vop_setattr_args *));
+static int cd9660_access __P((struct vop_access_args *));
+static int cd9660_getattr __P((struct vop_getattr_args *));
+static int cd9660_pathconf __P((struct vop_pathconf_args *));
+static int cd9660_read __P((struct vop_read_args *));
+struct isoreaddir;
+static int iso_uiodir __P((struct isoreaddir *idp, struct dirent *dp,
+			   off_t off));
+static int iso_shipdir __P((struct isoreaddir *idp));
+static int cd9660_readdir __P((struct vop_readdir_args *));
+static int cd9660_readlink __P((struct vop_readlink_args *ap));
+static int cd9660_abortop __P((struct vop_abortop_args *));
+static int cd9660_strategy __P((struct vop_strategy_args *));
+static int cd9660_print __P((struct vop_print_args *));
+static int cd9660_getpages __P((struct vop_getpages_args *));
+static int cd9660_putpages __P((struct vop_putpages_args *));
+
+/*
+ * Setattr call. Only allowed for block and character special devices.
+ */
+int
+cd9660_setattr(ap)
+	struct vop_setattr_args /* {
+		struct vnodeop_desc *a_desc;
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct vattr *vap = ap->a_vap;
+
+  	if (vap->va_flags != (u_long)VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
+	    vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
+	    vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL)
+		return (EROFS);
+	if (vap->va_size != (u_quad_t)VNOVAL) {
+ 		switch (vp->v_type) {
+ 		case VDIR:
+ 			return (EISDIR);
+		case VLNK:
+		case VREG:
+			return (EROFS);
+ 		case VCHR:
+ 		case VBLK:
+ 		case VSOCK:
+ 		case VFIFO:
+		case VNON:
+		case VBAD:
+			return (0);
+		}
+	}
+	return (0);
+}
+
+/*
+ * Check mode permission on inode pointer. Mode is READ, WRITE or EXEC.
+ * The mode is shifted to select the owner/group/other fields. The
+ * super user is granted all permissions.
+ */
+/* ARGSUSED */
+static int
+cd9660_access(ap)
+	struct vop_access_args /* {
+		struct vnode *a_vp;
+		int  a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct iso_node *ip = VTOI(vp);
+	struct ucred *cred = ap->a_cred;
+	mode_t mask, mode = ap->a_mode;
+	gid_t *gp;
+	int i;
+
+	/*
+	 * Disallow write attempts unless the file is a socket,
+	 * fifo, or a block or character device resident on the
+	 * file system.
+	 */
+	if (mode & VWRITE) {
+		switch (vp->v_type) {
+		case VDIR:
+		case VLNK:
+		case VREG:
+			return (EROFS);
+			/* NOT REACHED */
+		default:
+			break;
+		}
+	}
+
+	/* User id 0 always gets access. */
+	if (cred->cr_uid == 0)
+		return (0);
+
+	mask = 0;
+
+	/* Otherwise, check the owner. */
+	if (cred->cr_uid == ip->inode.iso_uid) {
+		if (mode & VEXEC)
+			mask |= S_IXUSR;
+		if (mode & VREAD)
+			mask |= S_IRUSR;
+		if (mode & VWRITE)
+			mask |= S_IWUSR;
+		return ((ip->inode.iso_mode & mask) == mask ? 0 : EACCES);
+	}
+
+	/* Otherwise, check the groups. */
+	for (i = 0, gp = cred->cr_groups; i < cred->cr_ngroups; i++, gp++)
+		if (ip->inode.iso_gid == *gp) {
+			if (mode & VEXEC)
+				mask |= S_IXGRP;
+			if (mode & VREAD)
+				mask |= S_IRGRP;
+			if (mode & VWRITE)
+				mask |= S_IWGRP;
+			return ((ip->inode.iso_mode & mask) == mask ?
+			    0 : EACCES);
+		}
+
+	/* Otherwise, check everyone else. */
+	if (mode & VEXEC)
+		mask |= S_IXOTH;
+	if (mode & VREAD)
+		mask |= S_IROTH;
+	if (mode & VWRITE)
+		mask |= S_IWOTH;
+	return ((ip->inode.iso_mode & mask) == mask ? 0 : EACCES);
+}
+
+static int
+cd9660_getattr(ap)
+	struct vop_getattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+
+{
+	struct vnode *vp = ap->a_vp;
+	register struct vattr *vap = ap->a_vap;
+	register struct iso_node *ip = VTOI(vp);
+
+	vap->va_fsid	= ip->i_dev;
+	vap->va_fileid	= ip->i_number;
+
+	vap->va_mode	= ip->inode.iso_mode;
+	vap->va_nlink	= ip->inode.iso_links;
+	vap->va_uid	= ip->inode.iso_uid;
+	vap->va_gid	= ip->inode.iso_gid;
+	vap->va_atime	= ip->inode.iso_atime;
+	vap->va_mtime	= ip->inode.iso_mtime;
+	vap->va_ctime	= ip->inode.iso_ctime;
+	vap->va_rdev	= ip->inode.iso_rdev;
+
+	vap->va_size	= (u_quad_t) ip->i_size;
+	if (ip->i_size == 0 && (vap->va_mode & S_IFMT) == S_IFLNK) {
+		struct vop_readlink_args rdlnk;
+		struct iovec aiov;
+		struct uio auio;
+		char *cp;
+
+		MALLOC(cp, char *, MAXPATHLEN, M_TEMP, M_WAITOK);
+		aiov.iov_base = cp;
+		aiov.iov_len = MAXPATHLEN;
+		auio.uio_iov = &aiov;
+		auio.uio_iovcnt = 1;
+		auio.uio_offset = 0;
+		auio.uio_rw = UIO_READ;
+		auio.uio_segflg = UIO_SYSSPACE;
+		auio.uio_procp = ap->a_p;
+		auio.uio_resid = MAXPATHLEN;
+		rdlnk.a_uio = &auio;
+		rdlnk.a_vp = ap->a_vp;
+		rdlnk.a_cred = ap->a_cred;
+		if (cd9660_readlink(&rdlnk) == 0)
+			vap->va_size = MAXPATHLEN - auio.uio_resid;
+		FREE(cp, M_TEMP);
+	}
+	vap->va_flags	= 0;
+	vap->va_gen = 1;
+	vap->va_blocksize = ip->i_mnt->logical_block_size;
+	vap->va_bytes	= (u_quad_t) ip->i_size;
+	vap->va_type	= vp->v_type;
+	vap->va_filerev	= 0;
+	return (0);
+}
+
+/*
+ * Vnode op for reading.
+ */
+static int
+cd9660_read(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	register struct uio *uio = ap->a_uio;
+	register struct iso_node *ip = VTOI(vp);
+	register struct iso_mnt *imp;
+	struct buf *bp;
+	daddr_t lbn, rablock;
+	off_t diff;
+	int rasize, error = 0;
+	long size, n, on;
+
+	if (uio->uio_resid == 0)
+		return (0);
+	if (uio->uio_offset < 0)
+		return (EINVAL);
+	ip->i_flag |= IN_ACCESS;
+	imp = ip->i_mnt;
+	do {
+		lbn = lblkno(imp, uio->uio_offset);
+		on = blkoff(imp, uio->uio_offset);
+		n = min((u_int)(imp->logical_block_size - on),
+			uio->uio_resid);
+		diff = (off_t)ip->i_size - uio->uio_offset;
+		if (diff <= 0)
+			return (0);
+		if (diff < n)
+			n = diff;
+		size = blksize(imp, ip, lbn);
+		rablock = lbn + 1;
+		if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
+			if (lblktosize(imp, rablock) < ip->i_size)
+				error = cluster_read(vp, (off_t)ip->i_size,
+				         lbn, size, NOCRED, uio->uio_resid,
+					 (ap->a_ioflag >> 16), &bp);
+			else
+				error = bread(vp, lbn, size, NOCRED, &bp);
+		} else {
+			if (vp->v_lastr + 1 == lbn &&
+			    lblktosize(imp, rablock) < ip->i_size) {
+				rasize = blksize(imp, ip, rablock);
+				error = breadn(vp, lbn, size, &rablock,
+					       &rasize, 1, NOCRED, &bp);
+			} else
+				error = bread(vp, lbn, size, NOCRED, &bp);
+		}
+		vp->v_lastr = lbn;
+		n = min(n, size - bp->b_resid);
+		if (error) {
+			brelse(bp);
+			return (error);
+		}
+
+		error = uiomove(bp->b_data + on, (int)n, uio);
+		brelse(bp);
+	} while (error == 0 && uio->uio_resid > 0 && n != 0);
+	return (error);
+}
+
+/*
+ * Structure for reading directories
+ */
+struct isoreaddir {
+	struct dirent saveent;
+	struct dirent assocent;
+	struct dirent current;
+	off_t saveoff;
+	off_t assocoff;
+	off_t curroff;
+	struct uio *uio;
+	off_t uio_off;
+	int eofflag;
+	u_long *cookies;
+	int ncookies;
+};
+
+int
+iso_uiodir(idp,dp,off)
+	struct isoreaddir *idp;
+	struct dirent *dp;
+	off_t off;
+{
+	int error;
+
+	dp->d_name[dp->d_namlen] = 0;
+	dp->d_reclen = GENERIC_DIRSIZ(dp);
+
+	if (idp->uio->uio_resid < dp->d_reclen) {
+		idp->eofflag = 0;
+		return (-1);
+	}
+
+	if (idp->cookies) {
+		if (idp->ncookies <= 0) {
+			idp->eofflag = 0;
+			return (-1);
+		}
+
+		*idp->cookies++ = off;
+		--idp->ncookies;
+	}
+
+	if ((error = uiomove((caddr_t) dp,dp->d_reclen,idp->uio)) != 0)
+		return (error);
+	idp->uio_off = off;
+	return (0);
+}
+
+int
+iso_shipdir(idp)
+	struct isoreaddir *idp;
+{
+	struct dirent *dp;
+	int cl, sl, assoc;
+	int error;
+	char *cname, *sname;
+
+	cl = idp->current.d_namlen;
+	cname = idp->current.d_name;
+assoc = (cl > 1) && (*cname == ASSOCCHAR);
+	if (assoc) {
+		cl--;
+		cname++;
+	}
+
+	dp = &idp->saveent;
+	sname = dp->d_name;
+	if (!(sl = dp->d_namlen)) {
+		dp = &idp->assocent;
+		sname = dp->d_name + 1;
+		sl = dp->d_namlen - 1;
+	}
+	if (sl > 0) {
+		if (sl != cl
+		    || bcmp(sname,cname,sl)) {
+			if (idp->assocent.d_namlen) {
+				if ((error = iso_uiodir(idp,&idp->assocent,idp->assocoff)) != 0)
+					return (error);
+				idp->assocent.d_namlen = 0;
+			}
+			if (idp->saveent.d_namlen) {
+				if ((error = iso_uiodir(idp,&idp->saveent,idp->saveoff)) != 0)
+					return (error);
+				idp->saveent.d_namlen = 0;
+			}
+		}
+	}
+	idp->current.d_reclen = GENERIC_DIRSIZ(&idp->current);
+	if (assoc) {
+		idp->assocoff = idp->curroff;
+		bcopy(&idp->current,&idp->assocent,idp->current.d_reclen);
+	} else {
+		idp->saveoff = idp->curroff;
+		bcopy(&idp->current,&idp->saveent,idp->current.d_reclen);
+	}
+	return (0);
+}
+
+/*
+ * Vnode op for readdir
+ */
+static int
+cd9660_readdir(ap)
+	struct vop_readdir_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+		int *a_eofflag;
+		int *a_ncookies;
+		u_long *a_cookies;
+	} */ *ap;
+{
+	register struct uio *uio = ap->a_uio;
+	struct isoreaddir *idp;
+	struct vnode *vdp = ap->a_vp;
+	struct iso_node *dp;
+	struct iso_mnt *imp;
+	struct buf *bp = NULL;
+	struct iso_directory_record *ep;
+	int entryoffsetinblock;
+	doff_t endsearch;
+	u_long bmask;
+	int error = 0;
+	int reclen;
+	u_short namelen;
+	int ncookies = 0;
+	u_long *cookies = NULL;
+
+	dp = VTOI(vdp);
+	imp = dp->i_mnt;
+	bmask = imp->im_bmask;
+
+	MALLOC(idp, struct isoreaddir *, sizeof(*idp), M_TEMP, M_WAITOK);
+	idp->saveent.d_namlen = idp->assocent.d_namlen = 0;
+	/*
+	 * XXX
+	 * Is it worth trying to figure out the type?
+	 */
+	idp->saveent.d_type = idp->assocent.d_type = idp->current.d_type =
+	    DT_UNKNOWN;
+	idp->uio = uio;
+	if (ap->a_ncookies == NULL) {
+		idp->cookies = NULL;
+	} else {
+		/*
+		 * Guess the number of cookies needed.
+		 */
+		ncookies = uio->uio_resid / 16;
+		MALLOC(cookies, u_long *, ncookies * sizeof(u_int), M_TEMP,
+		    M_WAITOK);
+		idp->cookies = cookies;
+		idp->ncookies = ncookies;
+	}
+	idp->eofflag = 1;
+	idp->curroff = uio->uio_offset;
+
+	if ((entryoffsetinblock = idp->curroff & bmask) &&
+	    (error = cd9660_blkatoff(vdp, (off_t)idp->curroff, NULL, &bp))) {
+		FREE(idp, M_TEMP);
+		return (error);
+	}
+	endsearch = dp->i_size;
+
+	while (idp->curroff < endsearch) {
+		/*
+		 * If offset is on a block boundary,
+		 * read the next directory block.
+		 * Release previous if it exists.
+		 */
+		if ((idp->curroff & bmask) == 0) {
+			if (bp != NULL)
+				brelse(bp);
+			if ((error =
+			    cd9660_blkatoff(vdp, (off_t)idp->curroff, NULL, &bp)) != 0)
+				break;
+			entryoffsetinblock = 0;
+		}
+		/*
+		 * Get pointer to next entry.
+		 */
+		ep = (struct iso_directory_record *)
+			((char *)bp->b_data + entryoffsetinblock);
+
+		reclen = isonum_711(ep->length);
+		if (reclen == 0) {
+			/* skip to next block, if any */
+			idp->curroff =
+			    (idp->curroff & ~bmask) + imp->logical_block_size;
+			continue;
+		}
+
+		if (reclen < ISO_DIRECTORY_RECORD_SIZE) {
+			error = EINVAL;
+			/* illegal entry, stop */
+			break;
+		}
+
+		if (entryoffsetinblock + reclen > imp->logical_block_size) {
+			error = EINVAL;
+			/* illegal directory, so stop looking */
+			break;
+		}
+
+		idp->current.d_namlen = isonum_711(ep->name_len);
+
+		if (reclen < ISO_DIRECTORY_RECORD_SIZE + idp->current.d_namlen) {
+			error = EINVAL;
+			/* illegal entry, stop */
+			break;
+		}
+
+		if (isonum_711(ep->flags)&2)
+			idp->current.d_fileno = isodirino(ep, imp);
+		else
+			idp->current.d_fileno = dbtob(bp->b_blkno) +
+				entryoffsetinblock;
+
+		idp->curroff += reclen;
+
+		switch (imp->iso_ftype) {
+		case ISO_FTYPE_RRIP:
+			cd9660_rrip_getname(ep,idp->current.d_name, &namelen,
+					   &idp->current.d_fileno,imp);
+			idp->current.d_namlen = (u_char)namelen;
+			if (idp->current.d_namlen)
+				error = iso_uiodir(idp,&idp->current,idp->curroff);
+			break;
+		default: /* ISO_FTYPE_DEFAULT || ISO_FTYPE_9660 || ISO_FTYPE_HIGH_SIERRA*/
+			strcpy(idp->current.d_name,"..");
+			switch (ep->name[0]) {
+			case 0:
+				idp->current.d_namlen = 1;
+				error = iso_uiodir(idp,&idp->current,idp->curroff);
+				break;
+			case 1:
+				idp->current.d_namlen = 2;
+				error = iso_uiodir(idp,&idp->current,idp->curroff);
+				break;
+			default:
+				isofntrans(ep->name,idp->current.d_namlen,
+					   idp->current.d_name, &namelen,
+					   imp->iso_ftype == ISO_FTYPE_9660,
+					   isonum_711(ep->flags)&4);
+				idp->current.d_namlen = (u_char)namelen;
+				if (imp->iso_ftype == ISO_FTYPE_DEFAULT)
+					error = iso_shipdir(idp);
+				else
+					error = iso_uiodir(idp,&idp->current,idp->curroff);
+				break;
+			}
+		}
+		if (error)
+			break;
+
+		entryoffsetinblock += reclen;
+	}
+
+	if (!error && imp->iso_ftype == ISO_FTYPE_DEFAULT) {
+		idp->current.d_namlen = 0;
+		error = iso_shipdir(idp);
+	}
+	if (error < 0)
+		error = 0;
+
+	if (ap->a_ncookies != NULL) {
+		if (error)
+			free(cookies, M_TEMP);
+		else {
+			/*
+			 * Work out the number of cookies actually used.
+			 */
+			*ap->a_ncookies = ncookies - idp->ncookies;
+			*ap->a_cookies = cookies;
+		}
+	}
+
+	if (bp)
+		brelse (bp);
+
+	uio->uio_offset = idp->uio_off;
+	*ap->a_eofflag = idp->eofflag;
+
+	FREE(idp, M_TEMP);
+
+	return (error);
+}
+
+/*
+ * Return target name of a symbolic link
+ * Shouldn't we get the parent vnode and read the data from there?
+ * This could eventually result in deadlocks in cd9660_lookup.
+ * But otherwise the block read here is in the block buffer two times.
+ */
+typedef struct iso_directory_record ISODIR;
+typedef struct iso_node		    ISONODE;
+typedef struct iso_mnt		    ISOMNT;
+static int
+cd9660_readlink(ap)
+	struct vop_readlink_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	ISONODE	*ip;
+	ISODIR	*dirp;
+	ISOMNT	*imp;
+	struct	buf *bp;
+	struct	uio *uio;
+	u_short	symlen;
+	int	error;
+	char	*symname;
+
+	ip  = VTOI(ap->a_vp);
+	imp = ip->i_mnt;
+	uio = ap->a_uio;
+
+	if (imp->iso_ftype != ISO_FTYPE_RRIP)
+		return (EINVAL);
+
+	/*
+	 * Get parents directory record block that this inode included.
+	 */
+	error = bread(imp->im_devvp,
+		      (ip->i_number >> imp->im_bshift) <<
+		      (imp->im_bshift - DEV_BSHIFT),
+		      imp->logical_block_size, NOCRED, &bp);
+	if (error) {
+		brelse(bp);
+		return (EINVAL);
+	}
+
+	/*
+	 * Setup the directory pointer for this inode
+	 */
+	dirp = (ISODIR *)(bp->b_data + (ip->i_number & imp->im_bmask));
+
+	/*
+	 * Just make sure, we have a right one....
+	 *   1: Check not cross boundary on block
+	 */
+	if ((ip->i_number & imp->im_bmask) + isonum_711(dirp->length)
+	    > (unsigned)imp->logical_block_size) {
+		brelse(bp);
+		return (EINVAL);
+	}
+
+	/*
+	 * Now get a buffer
+	 * Abuse a namei buffer for now.
+	 */
+	if (uio->uio_segflg == UIO_SYSSPACE)
+		symname = uio->uio_iov->iov_base;
+	else
+		symname = zalloc(namei_zone);
+	
+	/*
+	 * Ok, we just gathering a symbolic name in SL record.
+	 */
+	if (cd9660_rrip_getsymname(dirp, symname, &symlen, imp) == 0) {
+		if (uio->uio_segflg != UIO_SYSSPACE)
+			zfree(namei_zone, symname);
+		brelse(bp);
+		return (EINVAL);
+	}
+	/*
+	 * Don't forget before you leave from home ;-)
+	 */
+	brelse(bp);
+
+	/*
+	 * return with the symbolic name to caller's.
+	 */
+	if (uio->uio_segflg != UIO_SYSSPACE) {
+		error = uiomove(symname, symlen, uio);
+		zfree(namei_zone, symname);
+		return (error);
+	}
+	uio->uio_resid -= symlen;
+	uio->uio_iov->iov_base += symlen;
+	uio->uio_iov->iov_len -= symlen;
+	return (0);
+}
+
+/*
+ * Ufs abort op, called after namei() when a CREATE/DELETE isn't actually
+ * done. If a buffer has been saved in anticipation of a CREATE, delete it.
+ */
+static int
+cd9660_abortop(ap)
+	struct vop_abortop_args /* {
+		struct vnode *a_dvp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
+		zfree(namei_zone, ap->a_cnp->cn_pnbuf);
+	return (0);
+}
+
+/*
+ * Calculate the logical to physical mapping if not done already,
+ * then call the device strategy routine.
+ */
+static int
+cd9660_strategy(ap)
+	struct vop_strategy_args /* {
+		struct buf *a_vp;
+		struct buf *a_bp;
+	} */ *ap;
+{
+	register struct buf *bp = ap->a_bp;
+	register struct vnode *vp = bp->b_vp;
+	register struct iso_node *ip;
+	int error;
+
+	ip = VTOI(vp);
+	if (vp->v_type == VBLK || vp->v_type == VCHR)
+		panic("cd9660_strategy: spec");
+	if (bp->b_blkno == bp->b_lblkno) {
+		if ((error =
+		    VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL, NULL))) {
+			bp->b_error = error;
+			bp->b_flags |= B_ERROR;
+			biodone(bp);
+			return (error);
+		}
+		if ((long)bp->b_blkno == -1)
+			clrbuf(bp);
+	}
+	if ((long)bp->b_blkno == -1) {
+		biodone(bp);
+		return (0);
+	}
+	vp = ip->i_devvp;
+	bp->b_dev = vp->v_rdev;
+	VOP_STRATEGY(vp, bp);
+	return (0);
+}
+
+/*
+ * Print out the contents of an inode.
+ */
+static int
+cd9660_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	printf("tag VT_ISOFS, isofs vnode\n");
+	return (0);
+}
+
+/*
+ * Return POSIX pathconf information applicable to cd9660 filesystems.
+ */
+static int
+cd9660_pathconf(ap)
+	struct vop_pathconf_args /* {
+		struct vnode *a_vp;
+		int a_name;
+		register_t *a_retval;
+	} */ *ap;
+{
+
+	switch (ap->a_name) {
+	case _PC_LINK_MAX:
+		*ap->a_retval = 1;
+		return (0);
+	case _PC_NAME_MAX:
+		if (VTOI(ap->a_vp)->i_mnt->iso_ftype == ISO_FTYPE_RRIP)
+			*ap->a_retval = NAME_MAX;
+		else
+			*ap->a_retval = 37;
+		return (0);
+	case _PC_PATH_MAX:
+		*ap->a_retval = PATH_MAX;
+		return (0);
+	case _PC_PIPE_BUF:
+		*ap->a_retval = PIPE_BUF;
+		return (0);
+	case _PC_CHOWN_RESTRICTED:
+		*ap->a_retval = 1;
+		return (0);
+	case _PC_NO_TRUNC:
+		*ap->a_retval = 1;
+		return (0);
+	default:
+		return (EINVAL);
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * get page routine
+ *
+ * XXX By default, wimp out... note that a_offset is ignored (and always
+ * XXX has been).
+ */
+int
+cd9660_getpages(ap)
+	struct vop_getpages_args *ap;
+{
+	return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count,
+		ap->a_reqpage);
+}
+
+/*
+ * put page routine
+ *
+ * XXX By default, wimp out... note that a_offset is ignored (and always
+ * XXX has been).
+ */
+int
+cd9660_putpages(ap)
+	struct vop_putpages_args *ap;
+{
+	return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
+		ap->a_sync, ap->a_rtvals);
+}
+
+/*
+ * Global vfs data structures for cd9660
+ */
+vop_t **cd9660_vnodeop_p;
+static struct vnodeopv_entry_desc cd9660_vnodeop_entries[] = {
+	{ &vop_default_desc,		(vop_t *) vop_defaultop },
+	{ &vop_abortop_desc,		(vop_t *) cd9660_abortop },
+	{ &vop_access_desc,		(vop_t *) cd9660_access },
+	{ &vop_bmap_desc,		(vop_t *) cd9660_bmap },
+	{ &vop_cachedlookup_desc,	(vop_t *) cd9660_lookup },
+	{ &vop_getattr_desc,		(vop_t *) cd9660_getattr },
+	{ &vop_inactive_desc,		(vop_t *) cd9660_inactive },
+	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
+	{ &vop_lock_desc,		(vop_t *) vop_stdlock },
+	{ &vop_lookup_desc,		(vop_t *) vfs_cache_lookup },
+	{ &vop_pathconf_desc,		(vop_t *) cd9660_pathconf },
+	{ &vop_print_desc,		(vop_t *) cd9660_print },
+	{ &vop_read_desc,		(vop_t *) cd9660_read },
+	{ &vop_readdir_desc,		(vop_t *) cd9660_readdir },
+	{ &vop_readlink_desc,		(vop_t *) cd9660_readlink },
+	{ &vop_reclaim_desc,		(vop_t *) cd9660_reclaim },
+	{ &vop_setattr_desc,		(vop_t *) cd9660_setattr },
+	{ &vop_strategy_desc,		(vop_t *) cd9660_strategy },
+	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
+	{ &vop_getpages_desc,		(vop_t *) cd9660_getpages },
+	{ &vop_putpages_desc,		(vop_t *) cd9660_putpages },
+	{ NULL, NULL }
+};
+static struct vnodeopv_desc cd9660_vnodeop_opv_desc =
+	{ &cd9660_vnodeop_p, cd9660_vnodeop_entries };
+VNODEOP_SET(cd9660_vnodeop_opv_desc);
+
+/*
+ * Special device vnode ops
+ */
+vop_t **cd9660_specop_p;
+static struct vnodeopv_entry_desc cd9660_specop_entries[] = {
+	{ &vop_default_desc,		(vop_t *) spec_vnoperate },
+	{ &vop_access_desc,		(vop_t *) cd9660_access },
+	{ &vop_getattr_desc,		(vop_t *) cd9660_getattr },
+	{ &vop_inactive_desc,		(vop_t *) cd9660_inactive },
+	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
+	{ &vop_lock_desc,		(vop_t *) vop_stdlock },
+	{ &vop_print_desc,		(vop_t *) cd9660_print },
+	{ &vop_reclaim_desc,		(vop_t *) cd9660_reclaim },
+	{ &vop_setattr_desc,		(vop_t *) cd9660_setattr },
+	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
+	{ NULL, NULL }
+};
+static struct vnodeopv_desc cd9660_specop_opv_desc =
+	{ &cd9660_specop_p, cd9660_specop_entries };
+VNODEOP_SET(cd9660_specop_opv_desc);
+
+vop_t **cd9660_fifoop_p;
+static struct vnodeopv_entry_desc cd9660_fifoop_entries[] = {
+	{ &vop_default_desc,		(vop_t *) fifo_vnoperate },
+	{ &vop_access_desc,		(vop_t *) cd9660_access },
+	{ &vop_getattr_desc,		(vop_t *) cd9660_getattr },
+	{ &vop_inactive_desc,		(vop_t *) cd9660_inactive },
+	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
+	{ &vop_lock_desc,		(vop_t *) vop_stdlock },
+	{ &vop_print_desc,		(vop_t *) cd9660_print },
+	{ &vop_reclaim_desc,		(vop_t *) cd9660_reclaim },
+	{ &vop_setattr_desc,		(vop_t *) cd9660_setattr },
+	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
+	{ NULL, NULL }
+};
+static struct vnodeopv_desc cd9660_fifoop_opv_desc =
+	{ &cd9660_fifoop_p, cd9660_fifoop_entries };
+
+VNODEOP_SET(cd9660_fifoop_opv_desc);
diff --git a/sys/fs/cd9660/iso.h b/sys/fs/cd9660/iso.h
new file mode 100644
index 0000000..7b50fb6
--- /dev/null
+++ b/sys/fs/cd9660/iso.h
@@ -0,0 +1,312 @@
+/*-
+ * Copyright (c) 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)iso.h	8.6 (Berkeley) 5/10/95
+ * $Id: iso.h,v 1.15 1997/05/04 16:17:49 joerg Exp $
+ */
+
+#define ISODCL(from, to) (to - from + 1)
+
+struct iso_volume_descriptor {
+	char type[ISODCL(1,1)]; /* 711 */
+	char id[ISODCL(2,6)];
+	char version[ISODCL(7,7)];
+	char unused[ISODCL(8,8)];
+	char type_sierra[ISODCL(9,9)]; /* 711 */
+	char id_sierra[ISODCL(10,14)];
+	char version_sierra[ISODCL(15,15)];
+	char data[ISODCL(16,2048)];
+};
+
+/* volume descriptor types */
+#define ISO_VD_PRIMARY 1
+#define ISO_VD_END 255
+
+#define ISO_STANDARD_ID "CD001"
+#define ISO_ECMA_ID	"CDW01"
+
+#define ISO_SIERRA_ID	"CDROM"
+
+struct iso_primary_descriptor {
+	char type			[ISODCL (  1,	1)]; /* 711 */
+	char id				[ISODCL (  2,	6)];
+	char version			[ISODCL (  7,	7)]; /* 711 */
+	char unused1			[ISODCL (  8,	8)];
+	char system_id			[ISODCL (  9,  40)]; /* achars */
+	char volume_id			[ISODCL ( 41,  72)]; /* dchars */
+	char unused2			[ISODCL ( 73,  80)];
+	char volume_space_size		[ISODCL ( 81,  88)]; /* 733 */
+	char unused3			[ISODCL ( 89, 120)];
+	char volume_set_size		[ISODCL (121, 124)]; /* 723 */
+	char volume_sequence_number	[ISODCL (125, 128)]; /* 723 */
+	char logical_block_size		[ISODCL (129, 132)]; /* 723 */
+	char path_table_size		[ISODCL (133, 140)]; /* 733 */
+	char type_l_path_table		[ISODCL (141, 144)]; /* 731 */
+	char opt_type_l_path_table	[ISODCL (145, 148)]; /* 731 */
+	char type_m_path_table		[ISODCL (149, 152)]; /* 732 */
+	char opt_type_m_path_table	[ISODCL (153, 156)]; /* 732 */
+	char root_directory_record	[ISODCL (157, 190)]; /* 9.1 */
+	char volume_set_id		[ISODCL (191, 318)]; /* dchars */
+	char publisher_id		[ISODCL (319, 446)]; /* achars */
+	char preparer_id		[ISODCL (447, 574)]; /* achars */
+	char application_id		[ISODCL (575, 702)]; /* achars */
+	char copyright_file_id		[ISODCL (703, 739)]; /* 7.5 dchars */
+	char abstract_file_id		[ISODCL (740, 776)]; /* 7.5 dchars */
+	char bibliographic_file_id	[ISODCL (777, 813)]; /* 7.5 dchars */
+	char creation_date		[ISODCL (814, 830)]; /* 8.4.26.1 */
+	char modification_date		[ISODCL (831, 847)]; /* 8.4.26.1 */
+	char expiration_date		[ISODCL (848, 864)]; /* 8.4.26.1 */
+	char effective_date		[ISODCL (865, 881)]; /* 8.4.26.1 */
+	char file_structure_version	[ISODCL (882, 882)]; /* 711 */
+	char unused4			[ISODCL (883, 883)];
+	char application_data		[ISODCL (884, 1395)];
+	char unused5			[ISODCL (1396, 2048)];
+};
+#define ISO_DEFAULT_BLOCK_SIZE		2048
+
+struct iso_sierra_primary_descriptor {
+	char unknown1			[ISODCL (  1,	8)]; /* 733 */
+	char type			[ISODCL (  9,	9)]; /* 711 */
+	char id				[ISODCL ( 10,  14)];
+	char version			[ISODCL ( 15,  15)]; /* 711 */
+	char unused1			[ISODCL ( 16,  16)];
+	char system_id			[ISODCL ( 17,  48)]; /* achars */
+	char volume_id			[ISODCL ( 49,  80)]; /* dchars */
+	char unused2			[ISODCL ( 81,  88)];
+	char volume_space_size		[ISODCL ( 89,  96)]; /* 733 */
+	char unused3			[ISODCL ( 97, 128)];
+	char volume_set_size		[ISODCL (129, 132)]; /* 723 */
+	char volume_sequence_number	[ISODCL (133, 136)]; /* 723 */
+	char logical_block_size		[ISODCL (137, 140)]; /* 723 */
+	char path_table_size		[ISODCL (141, 148)]; /* 733 */
+	char type_l_path_table		[ISODCL (149, 152)]; /* 731 */
+	char opt_type_l_path_table	[ISODCL (153, 156)]; /* 731 */
+	char unknown2			[ISODCL (157, 160)]; /* 731 */
+	char unknown3			[ISODCL (161, 164)]; /* 731 */
+	char type_m_path_table		[ISODCL (165, 168)]; /* 732 */
+	char opt_type_m_path_table	[ISODCL (169, 172)]; /* 732 */
+	char unknown4			[ISODCL (173, 176)]; /* 732 */
+	char unknown5			[ISODCL (177, 180)]; /* 732 */
+	char root_directory_record	[ISODCL (181, 214)]; /* 9.1 */
+	char volume_set_id		[ISODCL (215, 342)]; /* dchars */
+	char publisher_id		[ISODCL (343, 470)]; /* achars */
+	char preparer_id		[ISODCL (471, 598)]; /* achars */
+	char application_id		[ISODCL (599, 726)]; /* achars */
+	char copyright_id		[ISODCL (727, 790)]; /* achars */
+	char creation_date		[ISODCL (791, 806)]; /* ? */
+	char modification_date		[ISODCL (807, 822)]; /* ? */
+	char expiration_date		[ISODCL (823, 838)]; /* ? */
+	char effective_date		[ISODCL (839, 854)]; /* ? */
+	char file_structure_version	[ISODCL (855, 855)]; /* 711 */
+	char unused4			[ISODCL (856, 2048)];
+};
+
+struct iso_directory_record {
+	char length			[ISODCL (1, 1)]; /* 711 */
+	char ext_attr_length		[ISODCL (2, 2)]; /* 711 */
+	u_char extent			[ISODCL (3, 10)]; /* 733 */
+	u_char size			[ISODCL (11, 18)]; /* 733 */
+	char date			[ISODCL (19, 25)]; /* 7 by 711 */
+	char flags			[ISODCL (26, 26)];
+	char file_unit_size		[ISODCL (27, 27)]; /* 711 */
+	char interleave			[ISODCL (28, 28)]; /* 711 */
+	char volume_sequence_number	[ISODCL (29, 32)]; /* 723 */
+	char name_len			[ISODCL (33, 33)]; /* 711 */
+	char name			[1];			/* XXX */
+};
+/* can't take sizeof(iso_directory_record), because of possible alignment
+   of the last entry (34 instead of 33) */
+#define ISO_DIRECTORY_RECORD_SIZE	33
+
+struct iso_extended_attributes {
+	u_char owner			[ISODCL (1, 4)]; /* 723 */
+	u_char group			[ISODCL (5, 8)]; /* 723 */
+	u_char perm			[ISODCL (9, 10)]; /* 9.5.3 */
+	char ctime			[ISODCL (11, 27)]; /* 8.4.26.1 */
+	char mtime			[ISODCL (28, 44)]; /* 8.4.26.1 */
+	char xtime			[ISODCL (45, 61)]; /* 8.4.26.1 */
+	char ftime			[ISODCL (62, 78)]; /* 8.4.26.1 */
+	char recfmt			[ISODCL (79, 79)]; /* 711 */
+	char recattr			[ISODCL (80, 80)]; /* 711 */
+	u_char reclen			[ISODCL (81, 84)]; /* 723 */
+	char system_id			[ISODCL (85, 116)]; /* achars */
+	char system_use			[ISODCL (117, 180)];
+	char version			[ISODCL (181, 181)]; /* 711 */
+	char len_esc			[ISODCL (182, 182)]; /* 711 */
+	char reserved			[ISODCL (183, 246)];
+	u_char len_au			[ISODCL (247, 250)]; /* 723 */
+};
+
+#ifdef KERNEL
+
+/* CD-ROM Format type */
+enum ISO_FTYPE	{ ISO_FTYPE_DEFAULT, ISO_FTYPE_9660, ISO_FTYPE_RRIP,
+		  ISO_FTYPE_ECMA, ISO_FTYPE_HIGH_SIERRA };
+
+#ifndef	ISOFSMNT_ROOT
+#define	ISOFSMNT_ROOT	0
+#endif
+
+struct iso_mnt {
+	int im_flags;
+
+	struct mount *im_mountp;
+	dev_t im_dev;
+	struct vnode *im_devvp;
+
+	int logical_block_size;
+	int im_bshift;
+	int im_bmask;
+
+	int volume_space_size;
+	struct netexport im_export;
+
+	char root[ISODCL (157, 190)];
+	int root_extent;
+	int root_size;
+	enum ISO_FTYPE	iso_ftype;
+
+	int rr_skip;
+	int rr_skip0;
+};
+
+#define VFSTOISOFS(mp)	((struct iso_mnt *)((mp)->mnt_data))
+
+#define blkoff(imp, loc)	((loc) & (imp)->im_bmask)
+#define lblktosize(imp, blk)	((blk) << (imp)->im_bshift)
+#define lblkno(imp, loc)	((loc) >> (imp)->im_bshift)
+#define blksize(imp, ip, lbn)	((imp)->logical_block_size)
+
+int cd9660_vget_internal __P((struct mount *, ino_t, struct vnode **, int,
+			      struct iso_directory_record *));
+int cd9660_init __P((struct vfsconf *));
+#define cd9660_sysctl ((int (*) __P((int *, u_int, void *, size_t *, void *, \
+                                    size_t, struct proc *)))eopnotsupp)
+
+extern vop_t **cd9660_vnodeop_p;
+extern vop_t **cd9660_specop_p;
+extern vop_t **cd9660_fifoop_p;
+
+int isofncmp __P((u_char *, int, u_char *, int));
+void isofntrans __P((u_char *, int, u_char *, u_short *, int, int));
+ino_t isodirino __P((struct iso_directory_record *, struct iso_mnt *));
+
+#endif /* KERNEL */
+
+/*
+ * The isonum_xxx functions are inlined anyway, and could come handy even
+ * outside the kernel.  Thus we don't hide them here.
+ */
+
+static __inline int isonum_711 __P((u_char *));
+static __inline int
+isonum_711(p)
+	u_char *p;
+{
+	return *p;
+}
+
+static __inline int isonum_712 __P((char *));
+static __inline int
+isonum_712(p)
+	char *p;
+{
+	return *p;
+}
+
+#ifndef UNALIGNED_ACCESS
+
+static __inline int isonum_723 __P((u_char *));
+static __inline int
+isonum_723(p)
+	u_char *p;
+{
+	return *p|(p[1] << 8);
+}
+
+static __inline int isonum_733 __P((u_char *));
+static __inline int
+isonum_733(p)
+	u_char *p;
+{
+	return *p|(p[1] << 8)|(p[2] << 16)|(p[3] << 24);
+}
+
+#else /* UNALIGNED_ACCESS */
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+
+static __inline int
+isonum_723(p)
+	u_char *p
+{
+	return *(u_int16t *)p;
+}
+
+static __inline int
+isonum_733(p)
+	u_char *p;
+{
+	return *(u_int32t *)p;
+}
+
+#endif
+
+#if BYTE_ORDER == BIG_ENDIAN
+
+static __inline int
+isonum_723(p)
+	u_char *p
+{
+	return *(u_int16t *)(p + 2);
+}
+
+static __inline int
+isonum_733(p)
+	u_char *p;
+{
+	return *(u_int32t *)(p + 4);
+}
+
+#endif
+
+#endif /* UNALIGNED_ACCESS */
+
+/*
+ * Associated files have a leading '='.
+ */
+#define	ASSOCCHAR	'='
diff --git a/sys/fs/cd9660/iso_rrip.h b/sys/fs/cd9660/iso_rrip.h
new file mode 100644
index 0000000..2b256d5
--- /dev/null
+++ b/sys/fs/cd9660/iso_rrip.h
@@ -0,0 +1,86 @@
+/*-
+ * Copyright (c) 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com).  The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)iso_rrip.h	8.2 (Berkeley) 1/23/94
+ * $Id: iso_rrip.h,v 1.4 1997/02/22 09:38:52 peter Exp $
+ */
+
+
+/*
+ *	Analyze function flag (similar to RR field bits)
+ */
+#define	ISO_SUSP_ATTR		0x0001
+#define	ISO_SUSP_DEVICE		0x0002
+#define	ISO_SUSP_SLINK		0x0004
+#define	ISO_SUSP_ALTNAME	0x0008
+#define	ISO_SUSP_CLINK		0x0010
+#define	ISO_SUSP_PLINK		0x0020
+#define	ISO_SUSP_RELDIR		0x0040
+#define	ISO_SUSP_TSTAMP		0x0080
+#define	ISO_SUSP_IDFLAG		0x0100
+#define	ISO_SUSP_EXTREF		0x0200
+#define	ISO_SUSP_CONT		0x0400
+#define	ISO_SUSP_OFFSET		0x0800
+#define	ISO_SUSP_STOP		0x1000
+#define	ISO_SUSP_UNKNOWN	0x8000
+
+typedef struct {
+	struct iso_node	*inop;
+	int		fields;		/* interesting fields in this analysis */
+	daddr_t		iso_ce_blk;	/* block of continuation area */
+	off_t		iso_ce_off;	/* offset of continuation area */
+	int		iso_ce_len;	/* length of continuation area */
+	struct iso_mnt	*imp;		/* mount structure */
+	ino_t		*inump;		/* inode number pointer */
+	char		*outbuf;	/* name/symbolic link output area */
+	u_short		*outlen;	/* length of above */
+	u_short		maxlen;		/* maximum length of above */
+	int		cont;		/* continuation of above */
+} ISO_RRIP_ANALYZE;
+
+struct iso_directory_record;
+
+int cd9660_rrip_analyze __P((struct iso_directory_record *isodir,
+			    struct iso_node *inop, struct iso_mnt *imp));
+int cd9660_rrip_getname __P((struct iso_directory_record *isodir,
+			    char *outbuf, u_short *outlen,
+			    ino_t *inump, struct iso_mnt *imp));
+int cd9660_rrip_getsymname __P((struct iso_directory_record *isodir,
+			       char *outbuf, u_short *outlen,
+			       struct iso_mnt *imp));
+int cd9660_rrip_offset __P((struct iso_directory_record *isodir,
+			   struct iso_mnt *imp));
diff --git a/sys/fs/coda/README b/sys/fs/coda/README
new file mode 100644
index 0000000..f9bf3c3
--- /dev/null
+++ b/sys/fs/coda/README
@@ -0,0 +1,60 @@
+                Announcing the Availability of the
+                        Coda Distributed
+                           Filesystem
+                              for
+                         BSD Unix Systems
+
+        Coda is a distributed file system like NFS and AFS.  It is
+freely available, like NFS.  But it functions much like AFS in being a
+"stateful" file system.  Coda and AFS cache files on your local
+machine to improve performance.  But Coda goes a step further than AFS
+by letting you access the cached files when there is no available
+network, viz. disconnected laptops and network outages.  In Coda, both
+the client and server are outside the kernel which makes them easier
+to experiment with.
+
+To get more information on Coda, I would like to refer people to
+        http://www.coda.cs.cmu.edu
+There is a wealth of documents, papers, and theses there.  There is
+also a good introduction to the Coda File System in
+        http://www.coda.cs.cmu.edu/ljpaper/lj.html
+
+Coda was originally developed as an academic prototype/testbed.  It is
+being polished and rewritten where necessary.  Coda is a work in
+progress and does have bugs.  It is, though, very usable.  Our
+interest is in making Coda available to as many people as possible and
+to have Coda evolve and flourish.
+
+The bulk of the Coda file system code supports the Coda client
+program, the Coda server program and the utilities needed by both.
+All these programs are unix programs and can run equally well on any
+Unix platform.  Our main development thrust is improving these
+programs.  There is a small part of Coda that deals with the kernel to
+file system interface.  This code is OS specific (but should not be
+platform specific).
+
+Coda is currently available for several OS's and platforms:
+        Freebsd-2.2.5: i386
+        Freebsd-2.2.6: i386
+	Freebsd -current: i386
+        linux 2.0: i386 & sparc
+        linux 2.1: i386 & sparc
+        NetBSD 1.3: i386
+	NetBSD -current: i386
+The relevant sources, binaries, and docs can be found in
+        ftp://ftp.coda.cs.cmu.edu/pub/coda/
+
+We intend to come out with new Coda releases often, not daily.  We
+don't want to slight any OS/platform not mentioned above.  We are just
+limited in our resources as to what we can support internally.  We
+will be happy to integrate OpenBSD support as well as other OS
+support.  Also, adding platform support should be relatively easy and
+we can discuss this.  The only difficulty is that Coda has a light weight
+process package.  It does some manipulations in assembler which would
+have to be redone for a different platform.
+
+There are several mailing lists @coda.cs.cmu.edu that discuss coda:
+coda-announce and linux-coda.  We are going to revise linux-coda to be
+OS neutral, since it is mainly Coda we want to discuss.  We appreciate
+comments, feedback, bug reports, bug fixes, enhancements, etc.
+
diff --git a/sys/fs/coda/TODO b/sys/fs/coda/TODO
new file mode 100644
index 0000000..eac5143
--- /dev/null
+++ b/sys/fs/coda/TODO
@@ -0,0 +1,17 @@
+OOPS:
+	FreeBSD does not fsync!!!
+
+Near term:
+	Fix bug in executing/mapping new files.
+	cfs_mount bug: interaction with cfs_inactive no cfs_unsave.
+	vref/vn_lock == vget except no VXWANT which may be on.
+	Review locks: vn_lock/VOP_UNLOCK/lockmgr ...
+
+Medium term:
+	Add missing VFS methods.
+	Do performance profile.
+	Tune hash algorithm used in cfs_namecache.
+	Tune hash algorithm used in cfs_subr.
+
+Eventually:
+	Use standard queue macros.
diff --git a/sys/fs/coda/cnode.h b/sys/fs/coda/cnode.h
new file mode 100644
index 0000000..bf6f632
--- /dev/null
+++ b/sys/fs/coda/cnode.h
@@ -0,0 +1,319 @@
+/*
+ * 
+ *             Coda: an Experimental Distributed File System
+ *                              Release 3.1
+ * 
+ *           Copyright (c) 1987-1998 Carnegie Mellon University
+ *                          All Rights Reserved
+ * 
+ * Permission  to  use, copy, modify and distribute this software and its
+ * documentation is hereby granted,  provided  that  both  the  copyright
+ * notice  and  this  permission  notice  appear  in  all  copies  of the
+ * software, derivative works or  modified  versions,  and  any  portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University  in  all  documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ * 
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
+ * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
+ * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
+ * ANY DERIVATIVE WORK.
+ * 
+ * Carnegie  Mellon  encourages  users  of  this  software  to return any
+ * improvements or extensions that  they  make,  and  to  grant  Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ * 
+ * 	@(#) src/sys/coda/cnode.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ 
+ *  $Id: cnode.h,v 1.4 1998/09/13 13:57:59 rvb Exp $
+ * 
+ */
+
+/* 
+ * Mach Operating System
+ * Copyright (c) 1990 Carnegie-Mellon University
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * All rights reserved.  The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+
+/*
+ * This code was written for the Coda file system at Carnegie Mellon University.
+ * Contributers include David Steere, James Kistler, and M. Satyanarayanan.
+ */
+
+/* 
+ * HISTORY
+ * $Log: cnode.h,v $
+ * Revision 1.4  1998/09/13 13:57:59  rvb
+ * Finish conversion of cfs -> coda
+ *
+ * Revision 1.3  1998/09/11 18:50:17  rvb
+ * All the references to cfs, in symbols, structs, and strings
+ * have been changed to coda.  (Same for CFS.)
+ *
+ * Revision 1.2  1998/09/02 19:09:53  rvb
+ * Pass2 complete
+ *
+ * Revision 1.1.1.1  1998/08/29 21:14:52  rvb
+ * Very Preliminary Coda
+ *
+ * Revision 1.10  1998/08/28 18:12:25  rvb
+ * Now it also works on FreeBSD -current.  This code will be
+ * committed to the FreeBSD -current and NetBSD -current
+ * trees.  It will then be tailored to the particular platform
+ * by flushing conditional code.
+ *
+ * Revision 1.9  1998/08/18 17:05:24  rvb
+ * Don't use __RCSID now
+ *
+ * Revision 1.8  1998/08/18 16:31:49  rvb
+ * Sync the code for NetBSD -current; test on 1.3 later
+ *
+ * Revision 1.7  98/02/24  22:22:53  rvb
+ * Fixes up mainly to flush iopen and friends
+ * 
+ * Revision 1.6  98/01/31  20:53:19  rvb
+ * First version that works on FreeBSD 2.2.5
+ * 
+ * Revision 1.5  98/01/23  11:53:51  rvb
+ * Bring RVB_CODA1_1 to HEAD
+ * 
+ * Revision 1.4.2.5  98/01/23  11:21:14  rvb
+ * Sync with 2.2.5
+ * 
+ * Revision 1.4.2.4  98/01/22  13:03:38  rvb
+ * Had Breaken ls .
+ * 
+ * Revision 1.4.2.3  97/12/19  14:26:09  rvb
+ * session id
+ * 
+ * Revision 1.4.2.2  97/12/16  12:40:24  rvb
+ * Sync with 1.3
+ * 
+ * Revision 1.4.2.1  97/12/06  17:41:28  rvb
+ * Sync with peters coda.h
+ * 
+ * Revision 1.4  97/12/05  10:39:30  rvb
+ * Read CHANGES
+ * 
+ * Revision 1.3.18.2  97/11/12  12:09:45  rvb
+ * reorg pass1
+ * 
+ * Revision 1.3.18.1  97/10/29  16:06:31  rvb
+ * Kill DYING
+ * 
+ * Revision 1.3  1996/12/12 22:11:03  bnoble
+ * Fixed the "downcall invokes venus operation" deadlock in all known cases. 
+ *  There may be more.
+ *
+ * Revision 1.2  1996/01/02 16:57:26  bnoble
+ * Added support for Coda MiniCache and raw inode calls (final commit)
+ *
+ * Revision 1.1.2.1  1995/12/20 01:57:53  bnoble
+ * Added CODA-specific files
+ *
+ * Revision 3.1.1.1  1995/03/04  19:08:23  bnoble
+ * Branch for NetBSD port revisions
+ *
+ * Revision 3.1  1995/03/04  19:08:23  bnoble
+ * Bump to major revision 3 to prepare for NetBSD port
+ *
+ * Revision 2.2  1994/12/06  13:39:18  dcs
+ * Add a flag value to indicate a cnode was orphaned, e.g. the venus
+ * that created it has exited. This will allow one to restart venus
+ * even though some process may be cd'd into /coda.
+ *
+ * Revision 2.1  94/07/21  16:25:33  satya
+ * Conversion to C++ 3.0; start of Coda Release 2.0
+ * 
+ * Revision 1.2.7.1  94/06/16  11:26:02  raiff
+ * Branch for release beta-16Jun1994_39118
+ * 
+ * Revision 1.2  92/10/27  17:58:41  lily
+ * merge kernel/latest and alpha/src/cfs
+ * 
+ * Revision 2.3  92/09/30  14:16:53  mja
+ * 	Picked up fixed #ifdef _KERNEL. Also...
+ * 
+ * 	Substituted rvb's history blurb so that we agree with Mach 2.5 sources.
+ * 	[91/02/09            jjk]
+ * 
+ * 	Added contributors blurb.
+ * 	[90/12/13            jjk]
+ * 
+ * Revision 2.2  90/07/05  11:27:24  mrt
+ * 	Created for the Coda File System.
+ * 	[90/05/23            dcs]
+ * 
+ * Revision 1.4  90/05/31  17:02:16  dcs
+ * Prepare for merge with facilities kernel.
+ * 
+ * 
+ * 
+ */
+
+#ifndef	_CNODE_H_
+#define	_CNODE_H_
+
+#include <sys/vnode.h>
+#include <sys/lock.h>
+#include <machine/clock.h>
+
+MALLOC_DECLARE(M_CODA);
+
+/*
+ * tmp below since we need struct queue
+ */
+#include <coda/coda_kernel.h>
+
+/*
+ * Cnode lookup stuff.
+ * NOTE: CODA_CACHESIZE must be a power of 2 for cfshash to work!
+ */
+#define CODA_CACHESIZE 512
+
+#define CODA_ALLOC(ptr, cast, size)                                        \
+do {                                                                      \
+    ptr = (cast)malloc((unsigned long) size, M_CODA, M_WAITOK);            \
+    if (ptr == 0) {                                                       \
+	panic("kernel malloc returns 0 at %s:%d\n", __FILE__, __LINE__);  \
+    }                                                                     \
+} while (0)
+
+#define CODA_FREE(ptr, size)  free((ptr), M_CODA)
+
+/*
+ * global cache state control
+ */
+extern int coda_nc_use;
+
+/*
+ * Used to select debugging statements throughout the cfs code.
+ */
+extern int codadebug;
+extern int coda_nc_debug;
+extern int coda_printf_delay;
+extern int coda_vnop_print_entry;
+extern int coda_psdev_print_entry;
+extern int coda_vfsop_print_entry;
+
+#define CODADBGMSK(N)            (1 << N)
+#define CODADEBUG(N, STMT)       { if (codadebug & CODADBGMSK(N)) { STMT } }
+#define myprintf(args)          \
+do {                            \
+    if (coda_printf_delay)       \
+	DELAY(coda_printf_delay);\
+    printf args ;               \
+} while (0)
+
+struct cnode {
+    struct vnode	*c_vnode;
+    u_short		 c_flags;	/* flags (see below) */
+    ViceFid		 c_fid;		/* file handle */
+    struct lock		 c_lock;	/* new lock protocol */
+    struct vnode	*c_ovp;		/* open vnode pointer */
+    u_short		 c_ocount;	/* count of openers */
+    u_short		 c_owrite;	/* count of open for write */
+    struct vattr	 c_vattr; 	/* attributes */
+    char		*c_symlink;	/* pointer to symbolic link */
+    u_short		 c_symlen;	/* length of symbolic link */
+    dev_t		 c_device;	/* associated vnode device */
+    ino_t		 c_inode;	/* associated vnode inode */
+    struct cnode	*c_next;	/* links if on NetBSD machine */
+};
+#define	VTOC(vp)	((struct cnode *)(vp)->v_data)
+#define	CTOV(cp)	((struct vnode *)((cp)->c_vnode))
+
+/* flags */
+#define C_VATTR		0x01	/* Validity of vattr in the cnode */
+#define C_SYMLINK	0x02	/* Validity of symlink pointer in the Code */
+#define C_WANTED	0x08	/* Set if lock wanted */
+#define C_LOCKED	0x10	/* Set if lock held */
+#define C_UNMOUNTING	0X20	/* Set if unmounting */
+#define C_PURGING	0x40	/* Set if purging a fid */
+
+#define VALID_VATTR(cp)		((cp->c_flags) & C_VATTR)
+#define VALID_SYMLINK(cp)	((cp->c_flags) & C_SYMLINK)
+#define IS_UNMOUNTING(cp)	((cp)->c_flags & C_UNMOUNTING)
+
+struct vcomm {
+	u_long		vc_seq;
+	struct selinfo	vc_selproc;
+	struct queue	vc_requests;
+	struct queue	vc_replys;
+};
+
+#define	VC_OPEN(vcp)	    ((vcp)->vc_requests.forw != NULL)
+#define MARK_VC_CLOSED(vcp) (vcp)->vc_requests.forw = NULL;
+#define MARK_VC_OPEN(vcp)    /* MT */
+
+struct coda_clstat {
+	int	ncalls;			/* client requests */
+	int	nbadcalls;		/* upcall failures */
+	int	reqs[CODA_NCALLS];	/* count of each request */
+};
+extern struct coda_clstat coda_clstat;
+
+/*
+ * CODA structure to hold mount/file system information
+ */
+struct coda_mntinfo {
+    struct vnode	*mi_rootvp;
+    struct mount	*mi_vfsp;
+    struct vcomm	 mi_vcomm;
+};
+extern struct coda_mntinfo coda_mnttbl[]; /* indexed by minor device number */
+
+/*
+ * vfs pointer to mount info
+ */
+#define vftomi(vfsp)    ((struct coda_mntinfo *)(vfsp->mnt_data))
+#define	CODA_MOUNTED(vfsp)   (vftomi((vfsp)) != (struct coda_mntinfo *)0)
+
+/*
+ * vnode pointer to mount info
+ */
+#define vtomi(vp)       ((struct coda_mntinfo *)(vp->v_mount->mnt_data))
+
+/*
+ * Used for identifying usage of "Control" object
+ */
+extern struct vnode *coda_ctlvp;
+#define	IS_CTL_VP(vp)		((vp) == coda_ctlvp)
+#define	IS_CTL_NAME(vp, name, l)((l == CODA_CONTROLLEN) \
+ 				 && ((vp) == vtomi((vp))->mi_rootvp)    \
+				 && strncmp(name, CODA_CONTROL, l) == 0)
+
+/* 
+ * An enum to tell us whether something that will remove a reference
+ * to a cnode was a downcall or not
+ */
+enum dc_status {
+    IS_DOWNCALL = 6,
+    NOT_DOWNCALL = 7
+};
+
+/* cfs_psdev.h */
+extern int coda_call(struct coda_mntinfo *mntinfo, int inSize, int *outSize, caddr_t buffer);
+extern int coda_kernel_version;
+
+/* cfs_subr.h */
+extern int  handleDownCall(int opcode, union outputArgs *out);
+extern void coda_unmounting(struct mount *whoIam);
+extern int  coda_vmflush(struct cnode *cp);
+
+/* cfs_vnodeops.h */
+extern struct cnode *make_coda_node(ViceFid *fid, struct mount *vfsp, short type);
+extern int coda_vnodeopstats_init(void);
+
+/* coda_vfsops.h */
+extern struct mount *devtomp(dev_t dev);
+
+/* sigh */
+#define CODA_RDWR ((u_long) 31)
+
+#endif	/* _CNODE_H_ */
+
diff --git a/sys/fs/coda/coda.h b/sys/fs/coda/coda.h
new file mode 100644
index 0000000..7b67ea9
--- /dev/null
+++ b/sys/fs/coda/coda.h
@@ -0,0 +1,761 @@
+/*
+ * 
+ *             Coda: an Experimental Distributed File System
+ *                              Release 3.1
+ * 
+ *           Copyright (c) 1987-1998 Carnegie Mellon University
+ *                          All Rights Reserved
+ * 
+ * Permission  to  use, copy, modify and distribute this software and its
+ * documentation is hereby granted,  provided  that  both  the  copyright
+ * notice  and  this  permission  notice  appear  in  all  copies  of the
+ * software, derivative works or  modified  versions,  and  any  portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University  in  all  documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ * 
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
+ * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
+ * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
+ * ANY DERIVATIVE WORK.
+ * 
+ * Carnegie  Mellon  encourages  users  of  this  software  to return any
+ * improvements or extensions that  they  make,  and  to  grant  Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ * 
+ * 	@(#) src/sys/coda/coda.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ 
+ *  $Id: coda.h,v 1.5 1998/10/28 19:33:49 rvb Exp $
+ * 
+ */
+
+
+/*
+ *
+ * Based on cfs.h from Mach, but revamped for increased simplicity.
+ * Linux modifications by Peter Braam, Aug 1996
+ */
+
+#ifndef _CODA_HEADER_
+#define _CODA_HEADER_
+
+
+
+/* Catch new _KERNEL defn for NetBSD */
+#ifdef __NetBSD__
+#include <sys/types.h>
+#endif 
+
+#ifndef CODA_MAXSYMLINKS
+#define CODA_MAXSYMLINKS 10
+#endif
+
+#if defined(DJGPP) || defined(__CYGWIN32__)
+#ifdef KERNEL
+typedef unsigned long u_long;
+typedef unsigned int u_int;
+typedef unsigned short u_short;
+typedef u_long ino_t;
+typedef u_long dev_t;
+typedef void * caddr_t;
+#ifdef DOS
+typedef unsigned __int64 u_quad_t;
+#else 
+typedef unsigned long long u_quad_t;
+#endif
+
+#define inline
+
+struct timespec {
+        long       ts_sec;
+        long       ts_nsec;
+};
+#else  /* DJGPP but not KERNEL */
+#include <sys/types.h>
+#include <sys/time.h>
+typedef unsigned long long u_quad_t;
+#endif /* !KERNEL */
+#endif /* !DJGPP */
+
+
+#if defined(__linux__)
+#define cdev_t u_quad_t
+#if !defined(_UQUAD_T_) && (!defined(__GLIBC__) || __GLIBC__ < 2)
+#define _UQUAD_T_ 1
+typedef unsigned long long u_quad_t;
+#endif
+#else
+#define cdev_t dev_t
+#endif
+
+#ifdef __CYGWIN32__
+typedef unsigned char u_int8_t;
+struct timespec {
+        time_t  tv_sec;         /* seconds */
+        long    tv_nsec;        /* nanoseconds */
+};
+#endif
+
+
+/*
+ * Cfs constants
+ */
+#define CODA_MAXNAMLEN   255
+#define CODA_MAXPATHLEN  1024
+#define CODA_MAXSYMLINK  10
+
+/* these are Coda's version of O_RDONLY etc combinations
+ * to deal with VFS open modes
+ */
+#define	C_O_READ	0x001
+#define	C_O_WRITE       0x002
+#define C_O_TRUNC       0x010
+#define C_O_EXCL	0x100
+#define C_O_CREAT	0x200
+
+/* these are to find mode bits in Venus */ 
+#define C_M_READ  00400
+#define C_M_WRITE 00200
+
+/* for access Venus will use */
+#define C_A_C_OK    8               /* Test for writing upon create.  */
+#define C_A_R_OK    4               /* Test for read permission.  */
+#define C_A_W_OK    2               /* Test for write permission.  */
+#define C_A_X_OK    1               /* Test for execute permission.  */
+#define C_A_F_OK    0               /* Test for existence.  */
+
+
+
+#ifndef _VENUS_DIRENT_T_
+#define _VENUS_DIRENT_T_ 1
+struct venus_dirent {
+        unsigned long	d_fileno;		/* file number of entry */
+        unsigned short	d_reclen;		/* length of this record */
+        char 		d_type;			/* file type, see below */
+        char		d_namlen;		/* length of string in d_name */
+        char		d_name[CODA_MAXNAMLEN + 1];/* name must be no longer than this */
+};
+#undef DIRSIZ
+#define DIRSIZ(dp)      ((sizeof (struct venus_dirent) - (CODA_MAXNAMLEN+1)) + \
+                         (((dp)->d_namlen+1 + 3) &~ 3))
+
+/*
+ * File types
+ */
+#define	CDT_UNKNOWN	 0
+#define	CDT_FIFO	 1
+#define	CDT_CHR		 2
+#define	CDT_DIR		 4
+#define	CDT_BLK		 6
+#define	CDT_REG		 8
+#define	CDT_LNK		10
+#define	CDT_SOCK	12
+#define	CDT_WHT		14
+
+/*
+ * Convert between stat structure types and directory types.
+ */
+#define	IFTOCDT(mode)	(((mode) & 0170000) >> 12)
+#define	CDTTOIF(dirtype)	((dirtype) << 12)
+
+#endif
+
+#ifndef	_FID_T_
+#define _FID_T_	1
+typedef u_long VolumeId;
+typedef u_long VnodeId;
+typedef u_long Unique_t;
+typedef u_long FileVersion;
+#endif 
+
+#ifndef	_VICEFID_T_
+#define _VICEFID_T_	1
+typedef struct ViceFid {
+    VolumeId Volume;
+    VnodeId Vnode;
+    Unique_t Unique;
+} ViceFid;
+#endif	/* VICEFID */
+
+
+#ifdef __linux__
+static __inline__ ino_t  coda_f2i(struct ViceFid *fid)
+{
+	if ( ! fid ) 
+		return 0; 
+	if (fid->Vnode == 0xfffffffe || fid->Vnode == 0xffffffff)
+		return ((fid->Volume << 20) | (fid->Unique & 0xfffff));
+	else
+		return (fid->Unique + (fid->Vnode<<10) + (fid->Volume<<20));
+}
+	
+#else
+#define coda_f2i(fid)\
+	((fid) ? ((fid)->Unique + ((fid)->Vnode<<10) + ((fid)->Volume<<20)) : 0)
+#endif
+
+
+#ifndef __BIT_TYPES_DEFINED__
+#define u_int32_t unsigned int
+#endif
+
+
+#ifndef _VUID_T_
+#define _VUID_T_
+typedef u_int32_t vuid_t;
+typedef u_int32_t vgid_t;
+#endif /*_VUID_T_ */
+
+#ifndef _CODACRED_T_
+#define _CODACRED_T_
+struct coda_cred {
+    vuid_t cr_uid, cr_euid, cr_suid, cr_fsuid; /* Real, efftve, set, fs uid*/
+    vgid_t cr_groupid,     cr_egid, cr_sgid, cr_fsgid; /* same for groups */
+};
+#endif 
+
+#ifndef _VENUS_VATTR_T_
+#define _VENUS_VATTR_T_
+/*
+ * Vnode types.  VNON means no type.
+ */
+enum coda_vtype	{ C_VNON, C_VREG, C_VDIR, C_VBLK, C_VCHR, C_VLNK, C_VSOCK, C_VFIFO, C_VBAD };
+
+struct coda_vattr {
+	int     	va_type;	/* vnode type (for create) */
+	u_short		va_mode;	/* files access mode and type */
+	short		va_nlink;	/* number of references to file */
+	vuid_t		va_uid;		/* owner user id */
+	vgid_t		va_gid;		/* owner group id */
+	long		va_fileid;	/* file id */
+	u_quad_t	va_size;	/* file size in bytes */
+	long		va_blocksize;	/* blocksize preferred for i/o */
+	struct timespec	va_atime;	/* time of last access */
+	struct timespec	va_mtime;	/* time of last modification */
+	struct timespec	va_ctime;	/* time file changed */
+	u_long		va_gen;		/* generation number of file */
+	u_long		va_flags;	/* flags defined for file */
+	cdev_t	        va_rdev;	/* device special file represents */
+	u_quad_t	va_bytes;	/* bytes of disk space held by file */
+	u_quad_t	va_filerev;	/* file modification number */
+};
+
+#endif 
+
+/*
+ * Kernel <--> Venus communications.
+ */
+
+#define CODA_ROOT	2
+#define CODA_SYNC	3
+#define CODA_OPEN	4
+#define CODA_CLOSE	5
+#define CODA_IOCTL	6
+#define CODA_GETATTR	7
+#define CODA_SETATTR	8
+#define CODA_ACCESS	9
+#define CODA_LOOKUP	10
+#define CODA_CREATE	11
+#define CODA_REMOVE	12
+#define CODA_LINK	13
+#define CODA_RENAME	14
+#define CODA_MKDIR	15
+#define CODA_RMDIR	16
+#define CODA_READDIR	17
+#define CODA_SYMLINK	18
+#define CODA_READLINK	19
+#define CODA_FSYNC	20
+#define CODA_INACTIVE	21
+#define CODA_VGET	22
+#define CODA_SIGNAL	23
+#define CODA_REPLACE	24
+#define CODA_FLUSH       25
+#define CODA_PURGEUSER   26
+#define CODA_ZAPFILE     27
+#define CODA_ZAPDIR      28
+#define CODA_PURGEFID    30
+#define CODA_OPEN_BY_PATH 31
+#define CODA_RESOLVE     32
+#define CODA_REINTEGRATE 33
+#define CODA_NCALLS 34
+
+#define DOWNCALL(opcode) (opcode >= CODA_REPLACE && opcode <= CODA_PURGEFID)
+
+#define VC_MAXDATASIZE	    8192
+#define VC_MAXMSGSIZE      sizeof(union inputArgs)+sizeof(union outputArgs) +\
+                            VC_MAXDATASIZE  
+
+#define CIOC_KERNEL_VERSION _IOWR('c', 10, sizeof (int))
+#if	0
+	/* don't care about kernel version number */
+#define CODA_KERNEL_VERSION 0
+	/* The old venus 4.6 compatible interface */
+#define CODA_KERNEL_VERSION 1
+#endif
+	/* venus_lookup gets an extra parameter to aid windows.*/
+#define CODA_KERNEL_VERSION 2
+
+/*
+ *        Venus <-> Coda  RPC arguments
+ */
+struct coda_in_hdr {
+    unsigned long opcode;
+    unsigned long unique;	    /* Keep multiple outstanding msgs distinct */
+    u_short pid;		    /* Common to all */
+    u_short pgid;		    /* Common to all */
+    u_short sid;                    /* Common to all */
+    struct coda_cred cred;	    /* Common to all */
+};
+
+/* Really important that opcode and unique are 1st two fields! */
+struct coda_out_hdr {
+    unsigned long opcode;
+    unsigned long unique;	
+    unsigned long result;
+};
+
+/* coda_root: NO_IN */
+struct coda_root_out {
+    struct coda_out_hdr oh;
+    ViceFid VFid;
+};
+
+struct coda_root_in {
+    struct coda_in_hdr in;
+};
+
+/* coda_sync: */
+/* Nothing needed for coda_sync */
+
+/* coda_open: */
+struct coda_open_in {
+    struct coda_in_hdr ih;
+    ViceFid	VFid;
+    int	flags;
+};
+
+struct coda_open_out {
+    struct coda_out_hdr oh;
+    cdev_t	dev;
+    ino_t	inode;
+};
+
+
+/* coda_close: */
+struct coda_close_in {
+    struct coda_in_hdr ih;
+    ViceFid	VFid;
+    int	flags;
+};
+
+struct coda_close_out {
+    struct coda_out_hdr out;
+};
+
+/* coda_ioctl: */
+struct coda_ioctl_in {
+    struct coda_in_hdr ih;
+    ViceFid VFid;
+    int	cmd;
+    int	len;
+    int	rwflag;
+    char *data;			/* Place holder for data. */
+};
+
+struct coda_ioctl_out {
+    struct coda_out_hdr oh;
+    int	len;
+    caddr_t	data;		/* Place holder for data. */
+};
+
+
+/* coda_getattr: */
+struct coda_getattr_in {
+    struct coda_in_hdr ih;
+    ViceFid VFid;
+};
+
+struct coda_getattr_out {
+    struct coda_out_hdr oh;
+    struct coda_vattr attr;
+};
+
+
+/* coda_setattr: NO_OUT */
+struct coda_setattr_in {
+    struct coda_in_hdr ih;
+    ViceFid VFid;
+    struct coda_vattr attr;
+};
+
+struct coda_setattr_out {
+    struct coda_out_hdr out;
+};
+
+/* coda_access: NO_OUT */
+struct coda_access_in {
+    struct coda_in_hdr ih;
+    ViceFid	VFid;
+    int	flags;
+};
+
+struct coda_access_out {
+    struct coda_out_hdr out;
+};
+
+
+/* lookup flags */
+#define CLU_CASE_SENSITIVE     0x01
+#define CLU_CASE_INSENSITIVE   0x02
+
+/* coda_lookup: */
+struct  coda_lookup_in {
+    struct coda_in_hdr ih;
+    ViceFid	VFid;
+    int         name;		/* Place holder for data. */
+    int         flags;	
+};
+
+struct coda_lookup_out {
+    struct coda_out_hdr oh;
+    ViceFid VFid;
+    int	vtype;
+};
+
+
+/* coda_create: */
+struct coda_create_in {
+    struct coda_in_hdr ih;
+    ViceFid VFid;
+    struct coda_vattr attr;
+    int excl;
+    int mode;
+    int 	name;		/* Place holder for data. */
+};
+
+struct coda_create_out {
+    struct coda_out_hdr oh;
+    ViceFid VFid;
+    struct coda_vattr attr;
+};
+
+
+/* coda_remove: NO_OUT */
+struct coda_remove_in {
+    struct coda_in_hdr ih;
+    ViceFid	VFid;
+    int name;		/* Place holder for data. */
+};
+
+struct coda_remove_out {
+    struct coda_out_hdr out;
+};
+
+/* coda_link: NO_OUT */
+struct coda_link_in {
+    struct coda_in_hdr ih;
+    ViceFid sourceFid;          /* cnode to link *to* */
+    ViceFid destFid;            /* Directory in which to place link */
+    int tname;		/* Place holder for data. */
+};
+
+struct coda_link_out {
+    struct coda_out_hdr out;
+};
+
+
+/* coda_rename: NO_OUT */
+struct coda_rename_in {
+    struct coda_in_hdr ih;
+    ViceFid	sourceFid;
+    int 	srcname;
+    ViceFid destFid;
+    int 	destname;
+};
+
+struct coda_rename_out {
+    struct coda_out_hdr out;
+};
+
+/* coda_mkdir: */
+struct coda_mkdir_in {
+    struct coda_in_hdr ih;
+    ViceFid	VFid;
+    struct coda_vattr attr;
+    int	   name;		/* Place holder for data. */
+};
+
+struct coda_mkdir_out {
+    struct coda_out_hdr oh;
+    ViceFid VFid;
+    struct coda_vattr attr;
+};
+
+
+/* coda_rmdir: NO_OUT */
+struct coda_rmdir_in {
+    struct coda_in_hdr ih;
+    ViceFid	VFid;
+    int name;		/* Place holder for data. */
+};
+
+struct coda_rmdir_out {
+    struct coda_out_hdr out;
+};
+
+/* coda_readdir: */
+struct coda_readdir_in {
+    struct coda_in_hdr ih;
+    ViceFid	VFid;
+    int	count;
+    int	offset;
+};
+
+struct coda_readdir_out {
+    struct coda_out_hdr oh;
+    int	size;
+    caddr_t	data;		/* Place holder for data. */
+};
+
+/* coda_symlink: NO_OUT */
+struct coda_symlink_in {
+    struct coda_in_hdr ih;
+    ViceFid	VFid;          /* Directory to put symlink in */
+    int srcname;
+    struct coda_vattr attr;
+    int tname;
+};
+
+struct coda_symlink_out {
+    struct coda_out_hdr out;
+};
+
+/* coda_readlink: */
+struct coda_readlink_in {
+    struct coda_in_hdr ih;
+    ViceFid VFid;
+};
+
+struct coda_readlink_out {
+    struct coda_out_hdr oh;
+    int	count;
+    caddr_t	data;		/* Place holder for data. */
+};
+
+
+/* coda_fsync: NO_OUT */
+struct coda_fsync_in {
+    struct coda_in_hdr ih;
+    ViceFid VFid;
+};
+
+struct coda_fsync_out {
+    struct coda_out_hdr out;
+};
+
+/* coda_inactive: NO_OUT */
+struct coda_inactive_in {
+    struct coda_in_hdr ih;
+    ViceFid VFid;
+};
+
+/* coda_vget: */
+struct coda_vget_in {
+    struct coda_in_hdr ih;
+    ViceFid VFid;
+};
+
+struct coda_vget_out {
+    struct coda_out_hdr oh;
+    ViceFid VFid;
+    int	vtype;
+};
+
+
+/* CODA_SIGNAL is out-of-band, doesn't need data. */
+/* CODA_INVALIDATE is a venus->kernel call */
+/* CODA_FLUSH is a venus->kernel call */
+
+/* coda_purgeuser: */
+/* CODA_PURGEUSER is a venus->kernel call */
+struct coda_purgeuser_out {
+    struct coda_out_hdr oh;
+    struct coda_cred cred;
+};
+
+/* coda_zapfile: */
+/* CODA_ZAPFILE is a venus->kernel call */
+struct coda_zapfile_out {  
+    struct coda_out_hdr oh;
+    ViceFid CodaFid;
+};
+
+/* coda_zapdir: */
+/* CODA_ZAPDIR is a venus->kernel call */	
+struct coda_zapdir_out {	  
+    struct coda_out_hdr oh;
+    ViceFid CodaFid;
+};
+
+/* coda_zapnode: */
+/* CODA_ZAPVNODE is a venus->kernel call */	
+struct coda_zapvnode_out { 
+    struct coda_out_hdr oh;
+    struct coda_cred cred;
+    ViceFid VFid;
+};
+
+/* coda_purgefid: */
+/* CODA_PURGEFID is a venus->kernel call */	
+struct coda_purgefid_out { 
+    struct coda_out_hdr oh;
+    ViceFid CodaFid;
+};
+
+/* coda_rdwr: */
+struct coda_rdwr_in {
+    struct coda_in_hdr ih;
+    ViceFid	VFid;
+    int	rwflag;
+    int	count;
+    int	offset;
+    int	ioflag;
+    caddr_t	data;		/* Place holder for data. */	
+};
+
+struct coda_rdwr_out {
+    struct coda_out_hdr oh;
+    int	rwflag;
+    int	count;
+    caddr_t	data;	/* Place holder for data. */
+};
+
+
+/* coda_replace: */
+/* CODA_REPLACE is a venus->kernel call */	
+struct coda_replace_out { /* coda_replace is a venus->kernel call */
+    struct coda_out_hdr oh;
+    ViceFid NewFid;
+    ViceFid OldFid;
+};
+
+/* coda_open_by_path: */
+struct coda_open_by_path_in {
+    struct coda_in_hdr ih;
+    ViceFid	VFid;
+    int	flags;
+};
+
+struct coda_open_by_path_out {
+    struct coda_out_hdr oh;
+	int path;
+};
+
+/* 
+ * Occasionally, we don't cache the fid returned by CODA_LOOKUP. 
+ * For instance, if the fid is inconsistent. 
+ * This case is handled by setting the top bit of the type result parameter.
+ */
+#define CODA_NOCACHE          0x80000000
+
+union inputArgs {
+    struct coda_in_hdr ih;		/* NB: every struct below begins with an ih */
+    struct coda_open_in coda_open;
+    struct coda_close_in coda_close;
+    struct coda_ioctl_in coda_ioctl;
+    struct coda_getattr_in coda_getattr;
+    struct coda_setattr_in coda_setattr;
+    struct coda_access_in coda_access;
+    struct coda_lookup_in coda_lookup;
+    struct coda_create_in coda_create;
+    struct coda_remove_in coda_remove;
+    struct coda_link_in coda_link;
+    struct coda_rename_in coda_rename;
+    struct coda_mkdir_in coda_mkdir;
+    struct coda_rmdir_in coda_rmdir;
+    struct coda_readdir_in coda_readdir;
+    struct coda_symlink_in coda_symlink;
+    struct coda_readlink_in coda_readlink;
+    struct coda_fsync_in coda_fsync;
+    struct coda_inactive_in coda_inactive;
+    struct coda_vget_in coda_vget;
+    struct coda_rdwr_in coda_rdwr;
+	struct coda_open_by_path_in coda_open_by_path;
+};
+
+union outputArgs {
+    struct coda_out_hdr oh;		/* NB: every struct below begins with an oh */
+    struct coda_root_out coda_root;
+    struct coda_open_out coda_open;
+    struct coda_ioctl_out coda_ioctl;
+    struct coda_getattr_out coda_getattr;
+    struct coda_lookup_out coda_lookup;
+    struct coda_create_out coda_create;
+    struct coda_mkdir_out coda_mkdir;
+    struct coda_readdir_out coda_readdir;
+    struct coda_readlink_out coda_readlink;
+    struct coda_vget_out coda_vget;
+    struct coda_purgeuser_out coda_purgeuser;
+    struct coda_zapfile_out coda_zapfile;
+    struct coda_zapdir_out coda_zapdir;
+    struct coda_zapvnode_out coda_zapvnode;
+    struct coda_purgefid_out coda_purgefid;
+    struct coda_rdwr_out coda_rdwr;
+    struct coda_replace_out coda_replace;
+	struct coda_open_by_path_out coda_open_by_path;
+};    
+
+union coda_downcalls {
+    /* CODA_INVALIDATE is a venus->kernel call */
+    /* CODA_FLUSH is a venus->kernel call */
+    struct coda_purgeuser_out purgeuser;
+    struct coda_zapfile_out zapfile;
+    struct coda_zapdir_out zapdir;
+    struct coda_zapvnode_out zapvnode;
+    struct coda_purgefid_out purgefid;
+    struct coda_replace_out replace;
+};
+
+
+/*
+ * Used for identifying usage of "Control" and pioctls
+ */
+
+#define PIOCPARM_MASK 0x0000ffff
+struct ViceIoctl {
+        caddr_t in, out;        /* Data to be transferred in, or out */
+        short in_size;          /* Size of input buffer <= 2K */
+        short out_size;         /* Maximum size of output buffer, <= 2K */
+};
+
+#if defined(__CYGWIN32__) || defined(DJGPP)
+struct PioctlData {
+	unsigned long cmd;
+        const char *path;
+        int follow;
+        struct ViceIoctl vi;
+};
+#else
+struct PioctlData {
+        const char *path;
+        int follow;
+        struct ViceIoctl vi;
+};
+#endif
+
+#define	CODA_CONTROL		".CONTROL"
+#define CODA_CONTROLLEN           8
+#define	CTL_VOL			-1
+#define	CTL_VNO			-1
+#define	CTL_UNI			-1
+#define CTL_INO                 -1
+#define	CTL_FILE		"/coda/.CONTROL"
+
+
+#define	IS_CTL_FID(fidp)	((fidp)->Volume == CTL_VOL &&\
+				 (fidp)->Vnode == CTL_VNO &&\
+				 (fidp)->Unique == CTL_UNI)
+#endif 
+
diff --git a/sys/fs/coda/coda_fbsd.c b/sys/fs/coda/coda_fbsd.c
new file mode 100644
index 0000000..703708c
--- /dev/null
+++ b/sys/fs/coda/coda_fbsd.c
@@ -0,0 +1,216 @@
+/*
+ * 
+ *             Coda: an Experimental Distributed File System
+ *                              Release 3.1
+ * 
+ *           Copyright (c) 1987-1998 Carnegie Mellon University
+ *                          All Rights Reserved
+ * 
+ * Permission  to  use, copy, modify and distribute this software and its
+ * documentation is hereby granted,  provided  that  both  the  copyright
+ * notice  and  this  permission  notice  appear  in  all  copies  of the
+ * software, derivative works or  modified  versions,  and  any  portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University  in  all  documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ * 
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
+ * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
+ * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
+ * ANY DERIVATIVE WORK.
+ * 
+ * Carnegie  Mellon  encourages  users  of  this  software  to return any
+ * improvements or extensions that  they  make,  and  to  grant  Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ * 
+ * 	@(#) src/sys/coda/coda_fbsd.cr,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
+ *  $Id: coda_fbsd.c,v 1.12 1999/01/27 20:09:17 dillon Exp $
+ * 
+ */
+
+#include "vcoda.h"
+#include "opt_devfs.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/fcntl.h>
+#include <sys/ucred.h>
+#include <sys/vnode.h>
+#include <sys/conf.h>
+
+#include <vm/vm.h>
+#include <vm/vnode_pager.h>
+
+#include <coda/coda.h>
+#include <coda/cnode.h>
+#include <coda/coda_vnops.h>
+#include <coda/coda_psdev.h>
+
+#ifdef DEVFS
+#include <sys/devfsext.h>
+
+static	void	*cfs_devfs_token[NVCODA];
+static	void	*coda_devfs_token[NVCODA];
+#endif
+
+/* 
+   From: "Jordan K. Hubbard" <jkh@time.cdrom.com>
+   Subject: Re: New 3.0 SNAPshot CDROM about ready for production.. 
+   To: "Robert.V.Baron" <rvb@GLUCK.CODA.CS.CMU.EDU>
+   Date: Fri, 20 Feb 1998 15:57:01 -0800
+
+   > Also I need a character device major number. (and might want to reserve
+   > a block of 10 syscalls.)
+
+   Just one char device number?  No block devices?  Very well, cdev 93 is yours!
+*/
+
+#define VC_DEV_NO      93
+
+static struct cdevsw codadevsw =
+{ 
+  vc_nb_open,      vc_nb_close,    vc_nb_read,        vc_nb_write,	/*93*/
+  vc_nb_ioctl,     nostop,         nullreset,         nodevtotty,
+  vc_nb_poll,      nommap,         NULL,              "Coda", NULL, -1 
+};
+
+int     vcdebug = 1;
+#define VCDEBUG if (vcdebug) printf
+
+static int
+codadev_modevent(module_t mod, int type, void *data)
+{
+	dev_t dev;
+#ifdef DEVFS
+	int i;
+#endif
+	static struct cdevsw *oldcdevsw;
+
+	switch (type) {
+	case MOD_LOAD:
+		dev = makedev(VC_DEV_NO, 0);
+		cdevsw_add(&dev,&codadevsw, &oldcdevsw);
+#ifdef DEVFS
+		/* tmp */
+#undef	NVCODA
+#define	NVCODA 1
+		for (i = 0; i < NVCODA; i++) {
+			cfs_devfs_token[i] =
+				devfs_add_devswf(&codadevsw, i,
+					DV_CHR, UID_ROOT, GID_WHEEL, 0666,
+					"cfs%d", i);
+			coda_devfs_token[i] =
+				devfs_add_devswf(&codadevsw, i,
+					DV_CHR, UID_ROOT, GID_WHEEL, 0666,
+					"coda%d", i);
+		}
+#endif
+		break;
+	case MOD_UNLOAD:
+#ifdef DEVFS
+		for (i = 0; i < NVCODA; i++) {
+			devfs_remove_dev(cfs_devfs_token[i]);
+			devfs_remove_dev(coda_devfs_token[i]);
+		}
+#endif
+		cdevsw_add(&dev, oldcdevsw, NULL);
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+static moduledata_t codadev_mod = {
+	"codadev",
+	codadev_modevent,
+	NULL
+};
+DECLARE_MODULE(codadev, codadev_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE+VC_DEV_NO);
+
+int
+coda_fbsd_getpages(v)
+	void *v;
+{
+    struct vop_getpages_args *ap = v;
+    int ret = 0;
+
+#if	1
+	/* ??? a_offset */
+	ret = vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count,
+		ap->a_reqpage);
+	return ret;
+#else
+  {
+    struct vnode *vp = ap->a_vp;
+    struct cnode *cp = VTOC(vp);
+    struct vnode *cfvp = cp->c_ovp;
+    int opened_internally = 0;
+    struct ucred *cred = (struct ucred *) 0;
+    struct proc *p = curproc;
+    int error = 0;
+	
+    if (IS_CTL_VP(vp)) {
+	return(EINVAL);
+    }
+
+    /* Redirect the request to UFS. */
+
+    if (cfvp == NULL) {
+	opened_internally = 1;
+
+	error = VOP_OPEN(vp, FREAD,  cred, p);
+printf("coda_getp: Internally Opening %p\n", vp);
+
+	if (error) {
+	    printf("coda_getpage: VOP_OPEN on container failed %d\n", error);
+		return (error);
+	}
+	if (vp->v_type == VREG) {
+	    error = vfs_object_create(vp, p, cred);
+	    if (error != 0) {
+		printf("coda_getpage: vfs_object_create() returns %d\n", error);
+		vput(vp);
+		return(error);
+	    }
+	}
+
+	cfvp = cp->c_ovp;
+    } else {
+printf("coda_getp: has container %p\n", cfvp);
+    }
+
+printf("coda_fbsd_getpages: using container ");
+/*
+    error = vnode_pager_generic_getpages(cfvp, ap->a_m, ap->a_count,
+	ap->a_reqpage);
+*/
+    error = VOP_GETPAGES(cfvp, ap->a_m, ap->a_count,
+	ap->a_reqpage, ap->a_offset);
+printf("error = %d\n", error);
+
+    /* Do an internal close if necessary. */
+    if (opened_internally) {
+	(void)VOP_CLOSE(vp, FREAD, cred, p);
+    }
+
+    return(error);
+  }
+#endif
+}
+
+int
+coda_fbsd_putpages(v)
+	void *v;
+{
+	struct vop_putpages_args *ap = v;
+
+	/*??? a_offset */
+	return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
+		ap->a_sync, ap->a_rtvals);
+}
diff --git a/sys/fs/coda/coda_io.h b/sys/fs/coda/coda_io.h
new file mode 100644
index 0000000..dd12fa1
--- /dev/null
+++ b/sys/fs/coda/coda_io.h
@@ -0,0 +1,128 @@
+/*
+ * 
+ *             Coda: an Experimental Distributed File System
+ *                              Release 3.1
+ * 
+ *           Copyright (c) 1987-1998 Carnegie Mellon University
+ *                          All Rights Reserved
+ * 
+ * Permission  to  use, copy, modify and distribute this software and its
+ * documentation is hereby granted,  provided  that  both  the  copyright
+ * notice  and  this  permission  notice  appear  in  all  copies  of the
+ * software, derivative works or  modified  versions,  and  any  portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University  in  all  documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ * 
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
+ * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
+ * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
+ * ANY DERIVATIVE WORK.
+ * 
+ * Carnegie  Mellon  encourages  users  of  this  software  to return any
+ * improvements or extensions that  they  make,  and  to  grant  Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ * 
+ * 	@(#) src/sys/coda/coda_io.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ 
+ *  $Id: coda_io.h,v 1.3 1998/09/11 18:50:17 rvb Exp $
+ * 
+ */
+
+/* 
+ * Mach Operating System
+ * Copyright (c) 1990 Carnegie-Mellon University
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * All rights reserved.  The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+
+/*
+ * This code was written for the Coda file system at Carnegie Mellon University.
+ * Contributers include David Steere, James Kistler, and M. Satyanarayanan.
+ */
+
+/* 
+ * HISTORY
+ * $Log: coda_io.h,v $
+ * Revision 1.3  1998/09/11 18:50:17  rvb
+ * All the references to cfs, in symbols, structs, and strings
+ * have been changed to coda.  (Same for CFS.)
+ *
+ * Revision 1.2  1998/09/02 19:09:53  rvb
+ * Pass2 complete
+ *
+ * Revision 1.1.1.1  1998/08/29 21:14:52  rvb
+ * Very Preliminary Coda
+ *
+ * Revision 1.5  1998/08/18 17:05:23  rvb
+ * Don't use __RCSID now
+ *
+ * Revision 1.4  1998/08/18 16:31:47  rvb
+ * Sync the code for NetBSD -current; test on 1.3 later
+ *
+ * Revision 1.3  98/01/23  11:53:49  rvb
+ * Bring RVB_CODA1_1 to HEAD
+ * 
+ * Revision 1.2.38.1  97/12/16  12:40:22  rvb
+ * Sync with 1.3
+ * 
+ * Revision 1.2  96/01/02  16:57:15  bnoble
+ * Added support for Coda MiniCache and raw inode calls (final commit)
+ * 
+ * Revision 1.1.2.1  1995/12/20 01:57:42  bnoble
+ * Added CODA-specific files
+ *
+ * Revision 3.1.1.1  1995/03/04  19:08:20  bnoble
+ * Branch for NetBSD port revisions
+ *
+ * Revision 3.1  1995/03/04  19:08:20  bnoble
+ * Bump to major revision 3 to prepare for NetBSD port
+ *
+ * Revision 2.1  1994/07/21  16:25:25  satya
+ * Conversion to C++ 3.0; start of Coda Release 2.0
+ *
+ * Revision 1.3  94/06/14  16:53:47  dcs
+ * Added support for ODY-like mounting in the kernel (SETS)
+ * 
+ * Revision 1.3  94/06/14  16:48:03  dcs
+ * Added support for ODY-like mounting in the kernel (SETS)
+ * 
+ * Revision 1.2  92/10/27  17:58:28  lily
+ * merge kernel/latest and alpha/src/cfs
+ * 
+ * Revision 1.1  92/04/03  17:35:34  satya
+ * Initial revision
+ * 
+ * Revision 1.5  91/02/09  12:53:26  jjk
+ * Substituted rvb's history blurb so that we agree with Mach 2.5 sources.
+ * 
+ * Revision 2.2.1.1  91/01/06  22:08:22  rvb
+ * 	Created for the Coda File System.
+ * 	[90/05/23            dcs]
+ * 
+ * Revision 1.3  90/07/19  10:23:05  dcs
+ * Added ; to cfs_resize definition for port to 386.
+ * 
+ * Revision 1.2  90/05/31  17:02:09  dcs
+ * Prepare for merge with facilities kernel.
+ * 
+ * 
+ * 
+ */
+
+#ifndef _CODAIO_H_
+#define _CODAIO_H_
+
+/* Define ioctl commands for vcioctl, /dev/cfs */
+
+#define CODARESIZE    _IOW('c', 1, struct coda_resize ) /* Resize CODA NameCache */
+#define CODASTATS      _IO('c', 2)                      /* Collect stats */
+#define CODAPRINT      _IO('c', 3)                      /* Print Cache */
+#define CODATEST       _IO('c', 4)                      /* Print Cache */
+
+struct coda_resize { int hashsize, heapsize; };
+
+#endif
diff --git a/sys/fs/coda/coda_kernel.h b/sys/fs/coda/coda_kernel.h
new file mode 100644
index 0000000..33b372f
--- /dev/null
+++ b/sys/fs/coda/coda_kernel.h
@@ -0,0 +1,66 @@
+/*
+ * 
+ *             Coda: an Experimental Distributed File System
+ *                              Release 3.1
+ * 
+ *           Copyright (c) 1987-1998 Carnegie Mellon University
+ *                          All Rights Reserved
+ * 
+ * Permission  to  use, copy, modify and distribute this software and its
+ * documentation is hereby granted,  provided  that  both  the  copyright
+ * notice  and  this  permission  notice  appear  in  all  copies  of the
+ * software, derivative works or  modified  versions,  and  any  portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University  in  all  documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ * 
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
+ * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
+ * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
+ * ANY DERIVATIVE WORK.
+ * 
+ * Carnegie  Mellon  encourages  users  of  this  software  to return any
+ * improvements or extensions that  they  make,  and  to  grant  Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ * 
+ * 	@(#) src/sys/coda/coda_kernel.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ 
+ *  $Id: coda_kernel.h,v 1.2 1998/09/02 19:09:53 rvb Exp $
+ * 
+ */
+
+/* Macros to manipulate the queue */
+#ifndef INIT_QUEUE
+struct queue {
+    struct queue *forw, *back;
+};
+
+#define INIT_QUEUE(head)                     \
+do {                                         \
+    (head).forw = (struct queue *)&(head);   \
+    (head).back = (struct queue *)&(head);   \
+} while (0)
+
+#define GETNEXT(head) (head).forw
+
+#define EMPTY(head) ((head).forw == &(head))
+
+#define EOQ(el, head) ((struct queue *)(el) == (struct queue *)&(head))
+		   
+#define INSQUE(el, head)                             \
+do {                                                 \
+	(el).forw = ((head).back)->forw;             \
+	(el).back = (head).back;                     \
+	((head).back)->forw = (struct queue *)&(el); \
+	(head).back = (struct queue *)&(el);         \
+} while (0)
+
+#define REMQUE(el)                         \
+do {                                       \
+	((el).forw)->back = (el).back;     \
+	(el).back->forw = (el).forw;       \
+}  while (0)
+
+#endif
diff --git a/sys/fs/coda/coda_namecache.c b/sys/fs/coda/coda_namecache.c
new file mode 100644
index 0000000..2da7b09
--- /dev/null
+++ b/sys/fs/coda/coda_namecache.c
@@ -0,0 +1,915 @@
+/*
+ * 
+ *             Coda: an Experimental Distributed File System
+ *                              Release 3.1
+ * 
+ *           Copyright (c) 1987-1998 Carnegie Mellon University
+ *                          All Rights Reserved
+ * 
+ * Permission  to  use, copy, modify and distribute this software and its
+ * documentation is hereby granted,  provided  that  both  the  copyright
+ * notice  and  this  permission  notice  appear  in  all  copies  of the
+ * software, derivative works or  modified  versions,  and  any  portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University  in  all  documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ * 
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
+ * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
+ * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
+ * ANY DERIVATIVE WORK.
+ * 
+ * Carnegie  Mellon  encourages  users  of  this  software  to return any
+ * improvements or extensions that  they  make,  and  to  grant  Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ * 
+ * 	@(#) src/sys/coda/coda_namecache.c,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
+ *  $Id: coda_namecache.c,v 1.7 1998/09/28 20:52:58 rvb Exp $
+ * 
+ */
+
+/* 
+ * Mach Operating System
+ * Copyright (c) 1990 Carnegie-Mellon University
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * All rights reserved.  The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+
+/*
+ * This code was written for the Coda file system at Carnegie Mellon University.
+ * Contributers include David Steere, James Kistler, and M. Satyanarayanan.
+ */
+
+/*
+ * HISTORY
+ * $Log: coda_namecache.c,v $
+ * Revision 1.7  1998/09/28 20:52:58  rvb
+ * Cleanup and fix THE bug
+ *
+ * Revision 1.6  1998/09/25 17:38:31  rvb
+ * Put "stray" printouts under DIAGNOSTIC.  Make everything build
+ * with DEBUG on.  Add support for lkm.  (The macro's don't work
+ * for me; for a good chuckle look at the end of coda_fbsd.c.)
+ *
+ * Revision 1.5  1998/09/13 13:57:59  rvb
+ * Finish conversion of cfs -> coda
+ *
+ * Revision 1.4  1998/09/11 18:50:17  rvb
+ * All the references to cfs, in symbols, structs, and strings
+ * have been changed to coda.  (Same for CFS.)
+ *
+ * Revision 1.2  1998/09/02 19:09:53  rvb
+ * Pass2 complete
+ *
+ * Revision 1.1.1.1  1998/08/29 21:14:52  rvb
+ * Very Preliminary Coda
+ *
+ * Revision 1.11  1998/08/28 18:12:16  rvb
+ * Now it also works on FreeBSD -current.  This code will be
+ * committed to the FreeBSD -current and NetBSD -current
+ * trees.  It will then be tailored to the particular platform
+ * by flushing conditional code.
+ *
+ * Revision 1.10  1998/08/18 17:05:14  rvb
+ * Don't use __RCSID now
+ *
+ * Revision 1.9  1998/08/18 16:31:39  rvb
+ * Sync the code for NetBSD -current; test on 1.3 later
+ *
+ * Revision 1.8  98/01/31  20:53:10  rvb
+ * First version that works on FreeBSD 2.2.5
+ * 
+ * Revision 1.7  98/01/23  11:53:39  rvb
+ * Bring RVB_CODA1_1 to HEAD
+ * 
+ * Revision 1.6.2.4  98/01/23  11:21:02  rvb
+ * Sync with 2.2.5
+ * 
+ * Revision 1.6.2.3  97/12/16  12:40:03  rvb
+ * Sync with 1.3
+ * 
+ * Revision 1.6.2.2  97/12/09  16:07:10  rvb
+ * Sync with vfs/include/coda.h
+ * 
+ * Revision 1.6.2.1  97/12/06  17:41:18  rvb
+ * Sync with peters coda.h
+ * 
+ * Revision 1.6  97/12/05  10:39:13  rvb
+ * Read CHANGES
+ * 
+ * Revision 1.5.4.7  97/11/25  08:08:43  rvb
+ * cfs_venus ... done; until cred/vattr change
+ * 
+ * Revision 1.5.4.6  97/11/24  15:44:43  rvb
+ * Final cfs_venus.c w/o macros, but one locking bug
+ * 
+ * Revision 1.5.4.5  97/11/20  11:46:38  rvb
+ * Capture current cfs_venus
+ * 
+ * Revision 1.5.4.4  97/11/18  10:27:13  rvb
+ * cfs_nbsd.c is DEAD!!!; integrated into cfs_vf/vnops.c
+ * cfs_nb_foo and cfs_foo are joined
+ * 
+ * Revision 1.5.4.3  97/11/13  22:02:57  rvb
+ * pass2 cfs_NetBSD.h mt
+ * 
+ * Revision 1.5.4.2  97/11/12  12:09:35  rvb
+ * reorg pass1
+ * 
+ * Revision 1.5.4.1  97/10/28  23:10:12  rvb
+ * >64Meg; venus can be killed!
+ * 
+ * Revision 1.5  97/08/05  11:08:01  lily
+ * Removed cfsnc_replace, replaced it with a coda_find, unhash, and
+ * rehash.  This fixes a cnode leak and a bug in which the fid is
+ * not actually replaced.  (cfs_namecache.c, cfsnc.h, cfs_subr.c)
+ * 
+ * Revision 1.4  96/12/12  22:10:57  bnoble
+ * Fixed the "downcall invokes venus operation" deadlock in all known cases.
+ * There may be more
+ * 
+ * Revision 1.3  1996/11/08 18:06:09  bnoble
+ * Minor changes in vnode operation signature, VOP_UPDATE signature, and
+ * some newly defined bits in the include files.
+ *
+ * Revision 1.2  1996/01/02 16:56:50  bnoble
+ * Added support for Coda MiniCache and raw inode calls (final commit)
+ *
+ * Revision 1.1.2.1  1995/12/20 01:57:15  bnoble
+ * Added CODA-specific files
+ *
+ * Revision 3.1.1.1  1995/03/04  19:07:57  bnoble
+ * Branch for NetBSD port revisions
+ *
+ * Revision 3.1  1995/03/04  19:07:56  bnoble
+ * Bump to major revision 3 to prepare for NetBSD port
+ *
+ * Revision 2.3  1994/10/14  09:57:54  dcs
+ * Made changes 'cause sun4s have braindead compilers
+ *
+ * Revision 2.2  94/08/28  19:37:35  luqi
+ * Add a new CODA_REPLACE call to allow venus to replace a ViceFid in the
+ * mini-cache. 
+ * 
+ * In "cfs.h":
+ * Add CODA_REPLACE decl.
+ * 
+ * In "cfs_namecache.c":
+ * Add routine cfsnc_replace.
+ * 
+ * In "cfs_subr.c":
+ * Add case-statement to process CODA_REPLACE.
+ * 
+ * In "cfsnc.h":
+ * Add decl for CODA_NC_REPLACE.
+ * 
+ * 
+ * Revision 2.1  94/07/21  16:25:15  satya
+ * Conversion to C++ 3.0; start of Coda Release 2.0
+ *
+ * Revision 1.2  92/10/27  17:58:21  lily
+ * merge kernel/latest and alpha/src/cfs
+ * 
+ * Revision 2.3  92/09/30  14:16:20  mja
+ * 	call coda_flush instead of calling inode_uncache_try directly 
+ * 	(from dcs). Also...
+ * 
+ * 	Substituted rvb's history blurb so that we agree with Mach 2.5 sources.
+ * 	[91/02/09            jjk]
+ * 
+ * 	Added contributors blurb.
+ * 	[90/12/13            jjk]
+ * 
+ * Revision 2.2  90/07/05  11:26:30  mrt
+ * 	Created for the Coda File System.
+ * 	[90/05/23            dcs]
+ * 
+ * Revision 1.3  90/05/31  17:01:24  dcs
+ * Prepare for merge with facilities kernel.
+ * 
+ * 
+ */
+
+/*
+ * This module contains the routines to implement the CODA name cache. The
+ * purpose of this cache is to reduce the cost of translating pathnames 
+ * into Vice FIDs. Each entry in the cache contains the name of the file,
+ * the vnode (FID) of the parent directory, and the cred structure of the
+ * user accessing the file.
+ *
+ * The first time a file is accessed, it is looked up by the local Venus
+ * which first insures that the user has access to the file. In addition
+ * we are guaranteed that Venus will invalidate any name cache entries in
+ * case the user no longer should be able to access the file. For these
+ * reasons we do not need to keep access list information as well as a
+ * cred structure for each entry.
+ *
+ * The table can be accessed through the routines cnc_init(), cnc_enter(),
+ * cnc_lookup(), cnc_rmfidcred(), cnc_rmfid(), cnc_rmcred(), and cnc_purge().
+ * There are several other routines which aid in the implementation of the
+ * hash table.
+ */
+
+/*
+ * NOTES: rvb@cs
+ * 1.	The name cache holds a reference to every vnode in it.  Hence files can not be
+ *	 closed or made inactive until they are released.
+ * 2.	coda_nc_name(cp) was added to get a name for a cnode pointer for debugging.
+ * 3.	coda_nc_find() has debug code to detect when entries are stored with different
+ *	 credentials.  We don't understand yet, if/how entries are NOT EQ but still
+ *	 EQUAL
+ * 4.	I wonder if this name cache could be replace by the vnode name cache.
+ *	The latter has no zapping functions, so probably not.
+ */
+
+#include <sys/param.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+#include <sys/ucred.h>
+#include <sys/select.h>
+
+#ifndef insque
+#include <sys/systm.h>
+#endif /* insque */
+
+#include <vm/vm.h>
+#include <vm/vm_object.h>
+
+#include <coda/coda.h>
+#include <coda/cnode.h>
+#include <coda/coda_namecache.h>
+
+#ifdef	DEBUG
+#include <coda/coda_vnops.h>
+#endif
+
+/* 
+ * Declaration of the name cache data structure.
+ */
+
+int 	coda_nc_use = 1;			 /* Indicate use of CODA Name Cache */
+int	coda_nc_size = CODA_NC_CACHESIZE;	 /* size of the cache */
+int	coda_nc_hashsize = CODA_NC_HASHSIZE; /* size of the primary hash */
+
+struct 	coda_cache *coda_nc_heap;	/* pointer to the cache entries */
+struct	coda_hash  *coda_nc_hash;	/* hash table of coda_cache pointers */
+struct	coda_lru   coda_nc_lru;		/* head of lru chain */
+
+struct coda_nc_statistics coda_nc_stat;	/* Keep various stats */
+
+/* 
+ * for testing purposes
+ */
+int coda_nc_debug = 0;
+
+/*
+ * Entry points for the CODA Name Cache
+ */
+static struct coda_cache *coda_nc_find(struct cnode *dcp, const char *name, int namelen,
+	struct ucred *cred, int hash);
+static void coda_nc_remove(struct coda_cache *cncp, enum dc_status dcstat);
+
+/*  
+ * Initialize the cache, the LRU structure and the Hash structure(s)
+ */
+
+#define TOTAL_CACHE_SIZE 	(sizeof(struct coda_cache) * coda_nc_size)
+#define TOTAL_HASH_SIZE 	(sizeof(struct coda_hash)  * coda_nc_hashsize)
+
+int coda_nc_initialized = 0;      /* Initially the cache has not been initialized */
+
+void
+coda_nc_init(void)
+{
+    int i;
+
+    /* zero the statistics structure */
+    
+    bzero(&coda_nc_stat, (sizeof(struct coda_nc_statistics)));
+
+#ifdef	CODA_VERBOSE
+    printf("CODA NAME CACHE: CACHE %d, HASH TBL %d\n", CODA_NC_CACHESIZE, CODA_NC_HASHSIZE);
+#endif
+    CODA_ALLOC(coda_nc_heap, struct coda_cache *, TOTAL_CACHE_SIZE);
+    CODA_ALLOC(coda_nc_hash, struct coda_hash *, TOTAL_HASH_SIZE);
+    
+    coda_nc_lru.lru_next = 
+	coda_nc_lru.lru_prev = (struct coda_cache *)LRU_PART(&coda_nc_lru);
+    
+    
+    for (i=0; i < coda_nc_size; i++) {	/* initialize the heap */
+	CODA_NC_LRUINS(&coda_nc_heap[i], &coda_nc_lru);
+	CODA_NC_HSHNUL(&coda_nc_heap[i]);
+	coda_nc_heap[i].cp = coda_nc_heap[i].dcp = (struct cnode *)0;
+    }
+    
+    for (i=0; i < coda_nc_hashsize; i++) {	/* initialize the hashtable */
+	CODA_NC_HSHNUL((struct coda_cache *)&coda_nc_hash[i]);
+    }
+    
+    coda_nc_initialized++;
+}
+
+/*
+ * Auxillary routines -- shouldn't be entry points
+ */
+
+static struct coda_cache *
+coda_nc_find(dcp, name, namelen, cred, hash)
+	struct cnode *dcp;
+	const char *name;
+	int namelen;
+	struct ucred *cred;
+	int hash;
+{
+	/* 
+	 * hash to find the appropriate bucket, look through the chain
+	 * for the right entry (especially right cred, unless cred == 0) 
+	 */
+	struct coda_cache *cncp;
+	int count = 1;
+
+	CODA_NC_DEBUG(CODA_NC_FIND, 
+		    myprintf(("coda_nc_find(dcp %p, name %s, len %d, cred %p, hash %d\n",
+			   dcp, name, namelen, cred, hash));)
+
+	for (cncp = coda_nc_hash[hash].hash_next; 
+	     cncp != (struct coda_cache *)&coda_nc_hash[hash];
+	     cncp = cncp->hash_next, count++) 
+	{
+
+	    if ((CODA_NAMEMATCH(cncp, name, namelen, dcp)) &&
+		((cred == 0) || (cncp->cred == cred))) 
+	    { 
+		/* compare cr_uid instead */
+		coda_nc_stat.Search_len += count;
+		return(cncp);
+	    }
+#ifdef	DEBUG
+	    else if (CODA_NAMEMATCH(cncp, name, namelen, dcp)) {
+	    	printf("coda_nc_find: name %s, new cred = %p, cred = %p\n",
+			name, cred, cncp->cred);
+		printf("nref %d, nuid %d, ngid %d // oref %d, ocred %d, ogid %d\n",
+			cred->cr_ref, cred->cr_uid, cred->cr_gid,
+			cncp->cred->cr_ref, cncp->cred->cr_uid, cncp->cred->cr_gid);
+		print_cred(cred);
+		print_cred(cncp->cred);
+	    }
+#endif
+	}
+
+	return((struct coda_cache *)0);
+}
+
+/*
+ * Enter a new (dir cnode, name) pair into the cache, updating the
+ * LRU and Hash as needed.
+ */
+void
+coda_nc_enter(dcp, name, namelen, cred, cp)
+    struct cnode *dcp;
+    const char *name;
+    int namelen;
+    struct ucred *cred;
+    struct cnode *cp;
+{
+    struct coda_cache *cncp;
+    int hash;
+    
+    if (coda_nc_use == 0)			/* Cache is off */
+	return;
+    
+    CODA_NC_DEBUG(CODA_NC_ENTER, 
+		myprintf(("Enter: dcp %p cp %p name %s cred %p \n",
+		       dcp, cp, name, cred)); )
+	
+    if (namelen > CODA_NC_NAMELEN) {
+	CODA_NC_DEBUG(CODA_NC_ENTER, 
+		    myprintf(("long name enter %s\n",name));)
+	    coda_nc_stat.long_name_enters++;	/* record stats */
+	return;
+    }
+    
+    hash = CODA_NC_HASH(name, namelen, dcp);
+    cncp = coda_nc_find(dcp, name, namelen, cred, hash);
+    if (cncp != (struct coda_cache *) 0) {	
+	coda_nc_stat.dbl_enters++;		/* duplicate entry */
+	return;
+    }
+    
+    coda_nc_stat.enters++;		/* record the enters statistic */
+    
+    /* Grab the next element in the lru chain */
+    cncp = CODA_NC_LRUGET(coda_nc_lru);
+    
+    CODA_NC_LRUREM(cncp);	/* remove it from the lists */
+    
+    if (CODA_NC_VALID(cncp)) {
+	/* Seems really ugly, but we have to decrement the appropriate
+	   hash bucket length here, so we have to find the hash bucket
+	   */
+	coda_nc_hash[CODA_NC_HASH(cncp->name, cncp->namelen, cncp->dcp)].length--;
+	
+	coda_nc_stat.lru_rm++;	/* zapped a valid entry */
+	CODA_NC_HSHREM(cncp);
+	vrele(CTOV(cncp->dcp)); 
+	vrele(CTOV(cncp->cp));
+	crfree(cncp->cred);
+    }
+    
+    /*
+     * Put a hold on the current vnodes and fill in the cache entry.
+     */
+    vref(CTOV(cp));
+    vref(CTOV(dcp));
+    crhold(cred); 
+    cncp->dcp = dcp;
+    cncp->cp = cp;
+    cncp->namelen = namelen;
+    cncp->cred = cred;
+    
+    bcopy(name, cncp->name, (unsigned)namelen);
+    
+    /* Insert into the lru and hash chains. */
+    
+    CODA_NC_LRUINS(cncp, &coda_nc_lru);
+    CODA_NC_HSHINS(cncp, &coda_nc_hash[hash]);
+    coda_nc_hash[hash].length++;                      /* Used for tuning */
+    
+    CODA_NC_DEBUG(CODA_NC_PRINTCODA_NC, print_coda_nc(); )
+}
+
+/*
+ * Find the (dir cnode, name) pair in the cache, if it's cred
+ * matches the input, return it, otherwise return 0
+ */
+struct cnode *
+coda_nc_lookup(dcp, name, namelen, cred)
+	struct cnode *dcp;
+	const char *name;
+	int namelen;
+	struct ucred *cred;
+{
+	int hash;
+	struct coda_cache *cncp;
+
+	if (coda_nc_use == 0)			/* Cache is off */
+		return((struct cnode *) 0);
+
+	if (namelen > CODA_NC_NAMELEN) {
+	        CODA_NC_DEBUG(CODA_NC_LOOKUP, 
+			    myprintf(("long name lookup %s\n",name));)
+		coda_nc_stat.long_name_lookups++;		/* record stats */
+		return((struct cnode *) 0);
+	}
+
+	/* Use the hash function to locate the starting point,
+	   then the search routine to go down the list looking for
+	   the correct cred.
+ 	 */
+
+	hash = CODA_NC_HASH(name, namelen, dcp);
+	cncp = coda_nc_find(dcp, name, namelen, cred, hash);
+	if (cncp == (struct coda_cache *) 0) {
+		coda_nc_stat.misses++;			/* record miss */
+		return((struct cnode *) 0);
+	}
+
+	coda_nc_stat.hits++;
+
+	/* put this entry at the end of the LRU */
+	CODA_NC_LRUREM(cncp);
+	CODA_NC_LRUINS(cncp, &coda_nc_lru);
+
+	/* move it to the front of the hash chain */
+	/* don't need to change the hash bucket length */
+	CODA_NC_HSHREM(cncp);
+	CODA_NC_HSHINS(cncp, &coda_nc_hash[hash]);
+
+	CODA_NC_DEBUG(CODA_NC_LOOKUP, 
+		printf("lookup: dcp %p, name %s, cred %p = cp %p\n",
+			dcp, name, cred, cncp->cp); )
+
+	return(cncp->cp);
+}
+
+static void
+coda_nc_remove(cncp, dcstat)
+	struct coda_cache *cncp;
+	enum dc_status dcstat;
+{
+	/* 
+	 * remove an entry -- vrele(cncp->dcp, cp), crfree(cred),
+	 * remove it from it's hash chain, and
+	 * place it at the head of the lru list.
+	 */
+        CODA_NC_DEBUG(CODA_NC_REMOVE,
+		    myprintf(("coda_nc_remove %s from parent %lx.%lx.%lx\n",
+			   cncp->name, (cncp->dcp)->c_fid.Volume,
+			   (cncp->dcp)->c_fid.Vnode, (cncp->dcp)->c_fid.Unique));)
+
+  	CODA_NC_HSHREM(cncp);
+
+	CODA_NC_HSHNUL(cncp);		/* have it be a null chain */
+	if ((dcstat == IS_DOWNCALL) && (CTOV(cncp->dcp)->v_usecount == 1)) {
+		cncp->dcp->c_flags |= C_PURGING;
+	}
+	vrele(CTOV(cncp->dcp)); 
+
+	if ((dcstat == IS_DOWNCALL) && (CTOV(cncp->cp)->v_usecount == 1)) {
+		cncp->cp->c_flags |= C_PURGING;
+	}
+	vrele(CTOV(cncp->cp)); 
+
+	crfree(cncp->cred); 
+	bzero(DATA_PART(cncp),DATA_SIZE);
+
+	/* Put the null entry just after the least-recently-used entry */
+	/* LRU_TOP adjusts the pointer to point to the top of the structure. */
+	CODA_NC_LRUREM(cncp);
+	CODA_NC_LRUINS(cncp, LRU_TOP(coda_nc_lru.lru_prev));
+}
+
+/*
+ * Remove all entries with a parent which has the input fid.
+ */
+void
+coda_nc_zapParentfid(fid, dcstat)
+	ViceFid *fid;
+	enum dc_status dcstat;
+{
+	/* To get to a specific fid, we might either have another hashing
+	   function or do a sequential search through the cache for the
+	   appropriate entries. The later may be acceptable since I don't
+	   think callbacks or whatever Case 1 covers are frequent occurences.
+	 */
+	struct coda_cache *cncp, *ncncp;
+	int i;
+
+	if (coda_nc_use == 0)			/* Cache is off */
+		return;
+
+	CODA_NC_DEBUG(CODA_NC_ZAPPFID, 
+		myprintf(("ZapParent: fid 0x%lx, 0x%lx, 0x%lx \n",
+			fid->Volume, fid->Vnode, fid->Unique)); )
+
+	coda_nc_stat.zapPfids++;
+
+	for (i = 0; i < coda_nc_hashsize; i++) {
+
+		/*
+		 * Need to save the hash_next pointer in case we remove the
+		 * entry. remove causes hash_next to point to itself.
+		 */
+
+		for (cncp = coda_nc_hash[i].hash_next; 
+		     cncp != (struct coda_cache *)&coda_nc_hash[i];
+		     cncp = ncncp) {
+			ncncp = cncp->hash_next;
+			if ((cncp->dcp->c_fid.Volume == fid->Volume) &&
+			    (cncp->dcp->c_fid.Vnode == fid->Vnode)   &&
+			    (cncp->dcp->c_fid.Unique == fid->Unique)) {
+			        coda_nc_hash[i].length--;      /* Used for tuning */
+				coda_nc_remove(cncp, dcstat); 
+			}
+		}
+	}
+}
+
+
+/*
+ * Remove all entries which have the same fid as the input
+ */
+void
+coda_nc_zapfid(fid, dcstat)
+	ViceFid *fid;
+	enum dc_status dcstat;
+{
+	/* See comment for zapParentfid. This routine will be used
+	   if attributes are being cached. 
+	 */
+	struct coda_cache *cncp, *ncncp;
+	int i;
+
+	if (coda_nc_use == 0)			/* Cache is off */
+		return;
+
+	CODA_NC_DEBUG(CODA_NC_ZAPFID, 
+		myprintf(("Zapfid: fid 0x%lx, 0x%lx, 0x%lx \n",
+			fid->Volume, fid->Vnode, fid->Unique)); )
+
+	coda_nc_stat.zapFids++;
+
+	for (i = 0; i < coda_nc_hashsize; i++) {
+		for (cncp = coda_nc_hash[i].hash_next; 
+		     cncp != (struct coda_cache *)&coda_nc_hash[i];
+		     cncp = ncncp) {
+			ncncp = cncp->hash_next;
+			if ((cncp->cp->c_fid.Volume == fid->Volume) &&
+			    (cncp->cp->c_fid.Vnode == fid->Vnode)   &&
+			    (cncp->cp->c_fid.Unique == fid->Unique)) {
+			        coda_nc_hash[i].length--;     /* Used for tuning */
+				coda_nc_remove(cncp, dcstat); 
+			}
+		}
+	}
+}
+
+/* 
+ * Remove all entries which match the fid and the cred
+ */
+void
+coda_nc_zapvnode(fid, cred, dcstat)	
+	ViceFid *fid;
+	struct ucred *cred;
+	enum dc_status dcstat;
+{
+	/* See comment for zapfid. I don't think that one would ever
+	   want to zap a file with a specific cred from the kernel.
+	   We'll leave this one unimplemented.
+	 */
+	if (coda_nc_use == 0)			/* Cache is off */
+		return;
+
+	CODA_NC_DEBUG(CODA_NC_ZAPVNODE, 
+		myprintf(("Zapvnode: fid 0x%lx, 0x%lx, 0x%lx cred %p\n",
+			  fid->Volume, fid->Vnode, fid->Unique, cred)); )
+
+}
+
+/*
+ * Remove all entries which have the (dir vnode, name) pair
+ */
+void
+coda_nc_zapfile(dcp, name, namelen)
+	struct cnode *dcp;
+	const char *name;
+	int namelen;
+{
+	/* use the hash function to locate the file, then zap all
+ 	   entries of it regardless of the cred.
+	 */
+	struct coda_cache *cncp;
+	int hash;
+
+	if (coda_nc_use == 0)			/* Cache is off */
+		return;
+
+	CODA_NC_DEBUG(CODA_NC_ZAPFILE, 
+		myprintf(("Zapfile: dcp %p name %s \n",
+			  dcp, name)); )
+
+	if (namelen > CODA_NC_NAMELEN) {
+		coda_nc_stat.long_remove++;		/* record stats */
+		return;
+	}
+
+	coda_nc_stat.zapFile++;
+
+	hash = CODA_NC_HASH(name, namelen, dcp);
+	cncp = coda_nc_find(dcp, name, namelen, 0, hash);
+
+	while (cncp) {
+	  coda_nc_hash[hash].length--;                 /* Used for tuning */
+
+	  coda_nc_remove(cncp, NOT_DOWNCALL);
+	  cncp = coda_nc_find(dcp, name, namelen, 0, hash);
+	}
+}
+
+/* 
+ * Remove all the entries for a particular user. Used when tokens expire.
+ * A user is determined by his/her effective user id (id_uid).
+ */
+void
+coda_nc_purge_user(uid, dcstat)
+	vuid_t	uid;
+	enum dc_status  dcstat;
+{
+	/* 
+	 * I think the best approach is to go through the entire cache
+	 * via HASH or whatever and zap all entries which match the
+	 * input cred. Or just flush the whole cache.  It might be
+	 * best to go through on basis of LRU since cache will almost
+	 * always be full and LRU is more straightforward.  
+	 */
+
+	struct coda_cache *cncp, *ncncp;
+	int hash;
+
+	if (coda_nc_use == 0)			/* Cache is off */
+		return;
+
+	CODA_NC_DEBUG(CODA_NC_PURGEUSER, 
+		myprintf(("ZapDude: uid %x\n", uid)); )
+	coda_nc_stat.zapUsers++;
+
+	for (cncp = CODA_NC_LRUGET(coda_nc_lru);
+	     cncp != (struct coda_cache *)(&coda_nc_lru);
+	     cncp = ncncp) {
+		ncncp = CODA_NC_LRUGET(*cncp);
+
+		if ((CODA_NC_VALID(cncp)) &&
+		   ((cncp->cred)->cr_uid == uid)) {
+		        /* Seems really ugly, but we have to decrement the appropriate
+			   hash bucket length here, so we have to find the hash bucket
+			   */
+		        hash = CODA_NC_HASH(cncp->name, cncp->namelen, cncp->dcp);
+			coda_nc_hash[hash].length--;     /* For performance tuning */
+
+			coda_nc_remove(cncp, dcstat); 
+		}
+	}
+}
+
+/*
+ * Flush the entire name cache. In response to a flush of the Venus cache.
+ */
+void
+coda_nc_flush(dcstat)
+	enum dc_status dcstat;
+{
+	/* One option is to deallocate the current name cache and
+	   call init to start again. Or just deallocate, then rebuild.
+	   Or again, we could just go through the array and zero the 
+	   appropriate fields. 
+	 */
+	
+	/* 
+	 * Go through the whole lru chain and kill everything as we go.
+	 * I don't use remove since that would rebuild the lru chain
+	 * as it went and that seemed unneccesary.
+	 */
+	struct coda_cache *cncp;
+	int i;
+
+	if (coda_nc_use == 0)			/* Cache is off */
+		return;
+
+	coda_nc_stat.Flushes++;
+
+	for (cncp = CODA_NC_LRUGET(coda_nc_lru);
+	     cncp != (struct coda_cache *)&coda_nc_lru;
+	     cncp = CODA_NC_LRUGET(*cncp)) {
+		if (CODA_NC_VALID(cncp)) {
+
+			CODA_NC_HSHREM(cncp);	/* only zero valid nodes */
+			CODA_NC_HSHNUL(cncp);
+			if ((dcstat == IS_DOWNCALL) 
+			    && (CTOV(cncp->dcp)->v_usecount == 1))
+			{
+				cncp->dcp->c_flags |= C_PURGING;
+			}
+			vrele(CTOV(cncp->dcp)); 
+
+			if (CTOV(cncp->cp)->v_flag & VTEXT) {
+			    if (coda_vmflush(cncp->cp))
+				CODADEBUG(CODA_FLUSH, 
+					 myprintf(("coda_nc_flush: (%lx.%lx.%lx) busy\n", cncp->cp->c_fid.Volume, cncp->cp->c_fid.Vnode, cncp->cp->c_fid.Unique)); )
+			}
+
+			if ((dcstat == IS_DOWNCALL) 
+			    && (CTOV(cncp->cp)->v_usecount == 1))
+			{
+				cncp->cp->c_flags |= C_PURGING;
+			}
+			vrele(CTOV(cncp->cp));  
+
+			crfree(cncp->cred); 
+			bzero(DATA_PART(cncp),DATA_SIZE);
+		}
+	}
+
+	for (i = 0; i < coda_nc_hashsize; i++)
+	  coda_nc_hash[i].length = 0;
+}
+
+/*
+ * Debugging routines
+ */
+
+/* 
+ * This routine should print out all the hash chains to the console.
+ */
+void
+print_coda_nc(void)
+{
+	int hash;
+	struct coda_cache *cncp;
+
+	for (hash = 0; hash < coda_nc_hashsize; hash++) {
+		myprintf(("\nhash %d\n",hash));
+
+		for (cncp = coda_nc_hash[hash].hash_next; 
+		     cncp != (struct coda_cache *)&coda_nc_hash[hash];
+		     cncp = cncp->hash_next) {
+			myprintf(("cp %p dcp %p cred %p name %s\n",
+				  cncp->cp, cncp->dcp,
+				  cncp->cred, cncp->name));
+		     }
+	}
+}
+
+void
+coda_nc_gather_stats(void)
+{
+    int i, max = 0, sum = 0, temp, zeros = 0, ave, n;
+
+	for (i = 0; i < coda_nc_hashsize; i++) {
+	  if (coda_nc_hash[i].length) {
+	    sum += coda_nc_hash[i].length;
+	  } else {
+	    zeros++;
+	  }
+
+	  if (coda_nc_hash[i].length > max)
+	    max = coda_nc_hash[i].length;
+	}
+
+	/*
+	 * When computing the Arithmetic mean, only count slots which 
+	 * are not empty in the distribution.
+	 */
+        coda_nc_stat.Sum_bucket_len = sum;
+        coda_nc_stat.Num_zero_len = zeros;
+        coda_nc_stat.Max_bucket_len = max;
+
+	if ((n = coda_nc_hashsize - zeros) > 0) 
+	  ave = sum / n;
+	else
+	  ave = 0;
+
+	sum = 0;
+	for (i = 0; i < coda_nc_hashsize; i++) {
+	  if (coda_nc_hash[i].length) {
+	    temp = coda_nc_hash[i].length - ave;
+	    sum += temp * temp;
+	  }
+	}
+        coda_nc_stat.Sum2_bucket_len = sum;
+}
+
+/*
+ * The purpose of this routine is to allow the hash and cache sizes to be
+ * changed dynamically. This should only be used in controlled environments,
+ * it makes no effort to lock other users from accessing the cache while it
+ * is in an improper state (except by turning the cache off).
+ */
+int
+coda_nc_resize(hashsize, heapsize, dcstat)
+     int hashsize, heapsize;
+     enum dc_status dcstat;
+{
+    if ((hashsize % 2) || (heapsize % 2)) { /* Illegal hash or cache sizes */
+	return(EINVAL);
+    }                 
+    
+    coda_nc_use = 0;                       /* Turn the cache off */
+    
+    coda_nc_flush(dcstat);                 /* free any cnodes in the cache */
+    
+    /* WARNING: free must happen *before* size is reset */
+    CODA_FREE(coda_nc_heap,TOTAL_CACHE_SIZE);
+    CODA_FREE(coda_nc_hash,TOTAL_HASH_SIZE);
+    
+    coda_nc_hashsize = hashsize;
+    coda_nc_size = heapsize;
+    
+    coda_nc_init();                        /* Set up a cache with the new size */
+    
+    coda_nc_use = 1;                       /* Turn the cache back on */
+    return(0);
+}
+
+#ifdef	DEBUG
+char coda_nc_name_buf[CODA_MAXNAMLEN+1];
+
+void
+coda_nc_name(struct cnode *cp)
+{
+	struct coda_cache *cncp, *ncncp;
+	int i;
+
+	if (coda_nc_use == 0)			/* Cache is off */
+		return;
+
+	for (i = 0; i < coda_nc_hashsize; i++) {
+		for (cncp = coda_nc_hash[i].hash_next; 
+		     cncp != (struct coda_cache *)&coda_nc_hash[i];
+		     cncp = ncncp) {
+			ncncp = cncp->hash_next;
+			if (cncp->cp == cp) {
+				bcopy(cncp->name, coda_nc_name_buf, cncp->namelen);
+				coda_nc_name_buf[cncp->namelen] = 0;
+				printf(" is %s (%p,%p)@%p",
+					coda_nc_name_buf, cncp->cp, cncp->dcp, cncp);
+			}
+
+		}
+	}
+}
+#endif
diff --git a/sys/fs/coda/coda_namecache.h b/sys/fs/coda/coda_namecache.h
new file mode 100644
index 0000000..f7b3194
--- /dev/null
+++ b/sys/fs/coda/coda_namecache.h
@@ -0,0 +1,285 @@
+/*
+ * 
+ *             Coda: an Experimental Distributed File System
+ *                              Release 3.1
+ * 
+ *           Copyright (c) 1987-1998 Carnegie Mellon University
+ *                          All Rights Reserved
+ * 
+ * Permission  to  use, copy, modify and distribute this software and its
+ * documentation is hereby granted,  provided  that  both  the  copyright
+ * notice  and  this  permission  notice  appear  in  all  copies  of the
+ * software, derivative works or  modified  versions,  and  any  portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University  in  all  documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ * 
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
+ * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
+ * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
+ * ANY DERIVATIVE WORK.
+ * 
+ * Carnegie  Mellon  encourages  users  of  this  software  to return any
+ * improvements or extensions that  they  make,  and  to  grant  Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ * 
+ * 	@(#) src/sys/coda/coda_namecache.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ 
+ *  $Id: coda_namecache.h,v 1.3 1998/09/11 18:50:17 rvb Exp $
+ * 
+ */
+
+/* 
+ * Mach Operating System
+ * Copyright (c) 1990 Carnegie-Mellon University
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * All rights reserved.  The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+
+/*
+ * This code was written for the Coda file system at Carnegie Mellon University.
+ * Contributers include David Steere, James Kistler, and M. Satyanarayanan.
+ */
+
+/* 
+ * HISTORY
+ * $Log: coda_namecache.h,v $
+ * Revision 1.3  1998/09/11 18:50:17  rvb
+ * All the references to cfs, in symbols, structs, and strings
+ * have been changed to coda.  (Same for CFS.)
+ *
+ * Revision 1.2  1998/09/02 19:09:53  rvb
+ * Pass2 complete
+ *
+ * Revision 1.1.1.1  1998/08/29 21:14:52  rvb
+ * Very Preliminary Coda
+ *
+ * Revision 1.8  1998/08/28 18:12:25  rvb
+ * Now it also works on FreeBSD -current.  This code will be
+ * committed to the FreeBSD -current and NetBSD -current
+ * trees.  It will then be tailored to the particular platform
+ * by flushing conditional code.
+ *
+ * Revision 1.7  1998/08/18 17:05:24  rvb
+ * Don't use __RCSID now
+ *
+ * Revision 1.6  1998/08/18 16:31:49  rvb
+ * Sync the code for NetBSD -current; test on 1.3 later
+ *
+ * Revision 1.5  98/01/23  11:53:51  rvb
+ * Bring RVB_CODA1_1 to HEAD
+ * 
+ * Revision 1.4.2.1  97/12/16  12:40:23  rvb
+ * Sync with 1.3
+ * 
+ * Revision 1.4  97/12/05  10:39:29  rvb
+ * Read CHANGES
+ * 
+ * Revision 1.3.4.3  97/11/24  15:44:51  rvb
+ * Final cfs_venus.c w/o macros, but one locking bug
+ * 
+ * Revision 1.3.4.2  97/11/12  12:09:44  rvb
+ * reorg pass1
+ * 
+ * Revision 1.3.4.1  97/11/06  21:06:05  rvb
+ * don't include headers in headers
+ * 
+ * Revision 1.3  97/08/05  11:08:19  lily
+ * Removed cfsnc_replace, replaced it with a coda_find, unhash, and
+ * rehash.  This fixes a cnode leak and a bug in which the fid is
+ * not actually replaced.  (cfs_namecache.c, cfsnc.h, cfs_subr.c)
+ * 
+ * Revision 1.2  96/01/02  16:57:19  bnoble
+ * Added support for Coda MiniCache and raw inode calls (final commit)
+ * 
+ * Revision 1.1.2.1  1995/12/20 01:57:45  bnoble
+ * Added CODA-specific files
+ *
+ * Revision 3.1.1.1  1995/03/04  19:08:22  bnoble
+ * Branch for NetBSD port revisions
+ *
+ * Revision 3.1  1995/03/04  19:08:21  bnoble
+ * Bump to major revision 3 to prepare for NetBSD port
+ *
+ * Revision 2.2  1994/08/28  19:37:39  luqi
+ * Add a new CODA_REPLACE call to allow venus to replace a ViceFid in the
+ * mini-cache.
+ *
+ * In "cfs.h":
+ * Add CODA_REPLACE decl.
+ *
+ * In "cfs_namecache.c":
+ * Add routine cfsnc_replace.
+ *
+ * In "cfs_subr.c":
+ * Add case-statement to process CODA_REPLACE.
+ *
+ * In "cfsnc.h":
+ * Add decl for CODA_NC_REPLACE.
+ *
+ * Revision 2.1  94/07/21  16:25:27  satya
+ * Conversion to C++ 3.0; start of Coda Release 2.0
+ *
+ * Revision 1.2  92/10/27  17:58:34  lily
+ * merge kernel/latest and alpha/src/cfs
+ * 
+ * Revision 2.2  90/07/05  11:27:04  mrt
+ * 	Created for the Coda File System.
+ * 	[90/05/23            dcs]
+ * 
+ * Revision 1.4  90/05/31  17:02:12  dcs
+ * Prepare for merge with facilities kernel.
+ * 
+ * 
+ */
+#ifndef _CODA_NC_HEADER_
+#define _CODA_NC_HEADER_
+
+/*
+ * Coda constants
+ */
+#define CODA_NC_NAMELEN	15		/* longest name stored in cache */
+#define CODA_NC_CACHESIZE 256		/* Default cache size */
+#define CODA_NC_HASHSIZE	64		/* Must be multiple of 2 */
+
+/*
+ * Hash function for the primary hash.
+ */
+
+/* 
+ * First try -- (first + last letters + length + (int)cp) mod size
+ * 2nd try -- same, except dir fid.vnode instead of cp
+ */
+
+#ifdef	oldhash
+#define CODA_NC_HASH(name, namelen, cp) \
+	((name[0] + name[namelen-1] + namelen + (int)(cp)) & (coda_nc_hashsize-1))
+#else
+#define CODA_NC_HASH(name, namelen, cp) \
+	((name[0] + (name[namelen-1]<<4) + namelen + (((int)cp)>>8)) & (coda_nc_hashsize-1))
+#endif
+
+#define CODA_NAMEMATCH(cp, name, namelen, dcp) \
+	((namelen == cp->namelen) && (dcp == cp->dcp) && \
+		 (bcmp(cp->name,name,namelen) == 0))
+
+/*
+ * Functions to modify the hash and lru chains.
+ * insque and remque assume that the pointers are the first thing
+ * in the list node, thus the trickery for lru.
+ */
+
+#define CODA_NC_HSHINS(elem, pred)	insque(elem,pred)
+#define CODA_NC_HSHREM(elem)		remque(elem)
+#define CODA_NC_HSHNUL(elem)		(elem)->hash_next = \
+					(elem)->hash_prev = (elem)
+
+#define CODA_NC_LRUINS(elem, pred)	insque(LRU_PART(elem), LRU_PART(pred))
+#define CODA_NC_LRUREM(elem)		remque(LRU_PART(elem));
+#define CODA_NC_LRUGET(lruhead)		LRU_TOP((lruhead).lru_prev)
+
+#define CODA_NC_VALID(cncp)	(cncp->dcp != (struct cnode *)0)
+ 
+#define LRU_PART(cncp)			(struct coda_cache *) \
+				((char *)cncp + (2*sizeof(struct coda_cache *)))
+#define LRU_TOP(cncp)				(struct coda_cache *) \
+			((char *)cncp - (2*sizeof(struct coda_cache *)))
+#define DATA_PART(cncp)				(struct coda_cache *) \
+			((char *)cncp + (4*sizeof(struct coda_cache *)))
+#define DATA_SIZE	(sizeof(struct coda_cache)-(4*sizeof(struct coda_cache *)))
+
+/*
+ * Structure for an element in the CODA Name Cache.
+ * NOTE: I use the position of arguments and their size in the
+ * implementation of the functions CODA_NC_LRUINS, CODA_NC_LRUREM, and
+ * DATA_PART.
+ */
+
+struct coda_cache {	
+	struct coda_cache	*hash_next,*hash_prev;	/* Hash list */
+	struct coda_cache	*lru_next, *lru_prev;	/* LRU list */
+	struct cnode	*cp;			/* vnode of the file */
+	struct cnode	*dcp;			/* parent's cnode */
+	struct ucred	*cred;			/* user credentials */
+	char		name[CODA_NC_NAMELEN];	/* segment name */
+	int		namelen;		/* length of name */
+};
+
+struct	coda_lru {		/* Start of LRU chain */
+	char *dummy1, *dummy2;			/* place holders */
+	struct coda_cache *lru_next, *lru_prev;   /* position of pointers is important */
+};
+
+
+struct coda_hash {		/* Start of Hash chain */
+	struct coda_cache *hash_next, *hash_prev; /* NOTE: chain pointers must be first */
+        int length;                             /* used for tuning purposes */
+};
+
+
+/* 
+ * Symbols to aid in debugging the namecache code. Assumes the existence
+ * of the variable coda_nc_debug, which is defined in cfs_namecache.c
+ */
+#define CODA_NC_DEBUG(N, STMT)     { if (coda_nc_debug & (1 <<N)) { STMT } }
+
+/* Prototypes of functions exported within cfs */
+extern void coda_nc_init(void);
+extern void coda_nc_enter(struct cnode *, const char *, int, struct ucred *, struct cnode *);
+extern struct cnode *coda_nc_lookup(struct cnode *, const char *, int, struct ucred *);
+
+extern void coda_nc_zapParentfid(ViceFid *, enum dc_status);
+extern void coda_nc_zapfid(ViceFid *, enum dc_status);
+extern void coda_nc_zapvnode(ViceFid *, struct ucred *, enum dc_status);
+extern void coda_nc_zapfile(struct cnode *, const char *, int);
+extern void coda_nc_purge_user(vuid_t, enum dc_status);
+extern void coda_nc_flush(enum dc_status);
+
+extern void print_coda_nc(void);
+extern void coda_nc_gather_stats(void);
+extern int  coda_nc_resize(int, int, enum dc_status);
+extern void coda_nc_name(struct cnode *cp);
+
+/*
+ * Structure to contain statistics on the cache usage
+ */
+
+struct coda_nc_statistics {
+	unsigned	hits;
+	unsigned	misses;
+	unsigned	enters;
+	unsigned	dbl_enters;
+	unsigned	long_name_enters;
+	unsigned	long_name_lookups;
+	unsigned	long_remove;
+	unsigned	lru_rm;
+	unsigned	zapPfids;
+	unsigned	zapFids;
+	unsigned	zapFile;
+	unsigned	zapUsers;
+	unsigned	Flushes;
+	unsigned        Sum_bucket_len;
+	unsigned        Sum2_bucket_len;
+	unsigned        Max_bucket_len;
+	unsigned        Num_zero_len;
+	unsigned        Search_len;
+};
+
+#define CODA_NC_FIND		((u_long) 1)
+#define CODA_NC_REMOVE		((u_long) 2)
+#define CODA_NC_INIT		((u_long) 3)
+#define CODA_NC_ENTER		((u_long) 4)
+#define CODA_NC_LOOKUP		((u_long) 5)
+#define CODA_NC_ZAPPFID		((u_long) 6)
+#define CODA_NC_ZAPFID		((u_long) 7)
+#define CODA_NC_ZAPVNODE		((u_long) 8)
+#define CODA_NC_ZAPFILE		((u_long) 9)
+#define CODA_NC_PURGEUSER		((u_long) 10)
+#define CODA_NC_FLUSH		((u_long) 11)
+#define CODA_NC_PRINTCODA_NC	((u_long) 12)
+#define CODA_NC_PRINTSTATS	((u_long) 13)
+
+#endif
diff --git a/sys/fs/coda/coda_opstats.h b/sys/fs/coda/coda_opstats.h
new file mode 100644
index 0000000..e62c04d
--- /dev/null
+++ b/sys/fs/coda/coda_opstats.h
@@ -0,0 +1,127 @@
+/*
+ * 
+ *             Coda: an Experimental Distributed File System
+ *                              Release 3.1
+ * 
+ *           Copyright (c) 1987-1998 Carnegie Mellon University
+ *                          All Rights Reserved
+ * 
+ * Permission  to  use, copy, modify and distribute this software and its
+ * documentation is hereby granted,  provided  that  both  the  copyright
+ * notice  and  this  permission  notice  appear  in  all  copies  of the
+ * software, derivative works or  modified  versions,  and  any  portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University  in  all  documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ * 
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
+ * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
+ * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
+ * ANY DERIVATIVE WORK.
+ * 
+ * Carnegie  Mellon  encourages  users  of  this  software  to return any
+ * improvements or extensions that  they  make,  and  to  grant  Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ * 
+ * 	@(#) src/sys/coda/coda_opstats.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ 
+ *  $Id: coda_opstats.h,v 1.3 1998/09/11 18:50:17 rvb Exp $
+ * 
+ */
+
+/*
+ * operation stats: what the minicache can intercept that
+ * *isn't* seen by venus.  These stats are kept to augment
+ * the stats maintained by the Volume-Session mechanism.
+ */
+
+/* vfsops:
+ *          mount: not currently bounced to Venus
+ *          umount: nope
+ *          root: only first call, rest is cached.
+ *          statfs: none (bogus)
+ *          sync: none (bogus)
+ *          vget: all
+ */
+
+#define CODA_MOUNT_STATS  0
+#define CODA_UMOUNT_STATS 1
+#define CODA_ROOT_STATS   2
+#define CODA_STATFS_STATS 3
+#define CODA_SYNC_STATS   4
+#define CODA_VGET_STATS   5
+#define CODA_VFSOPS_SIZE  6
+
+/* vnodeops:
+ *            open: all to venus
+ *            close: all to venus
+ *            rdrw: bogus.  Maybe redirected to UFS.
+ *                          May call open/close for internal opens/closes
+ *                          (Does exec not call open?)
+ *            ioctl: causes a lookupname
+ *                   passes through
+ *            select: can't get there from here.
+ *            getattr: can be satsified by cache
+ *            setattr: all go through
+ *            access: can be satisfied by cache
+ *            readlink: can be satisfied by cache
+ *            fsync: passes through
+ *            inactive: passes through
+ *            lookup: can be satisfied by cache
+ *            create: passes through
+ *            remove: passes through
+ *            link: passes through
+ *            rename: passes through
+ *            mkdir: passes through
+ *            rmdir: passes through
+ *            symlink: passes through
+ *            readdir: may be redirected to UFS
+ *                     may cause an "internal" open/close
+ */
+
+#define CODA_OPEN_STATS     0
+#define CODA_CLOSE_STATS    1
+#define CODA_RDWR_STATS     2
+#define CODA_IOCTL_STATS    3
+#define CODA_SELECT_STATS   4
+#define CODA_GETATTR_STATS  5
+#define CODA_SETATTR_STATS  6
+#define CODA_ACCESS_STATS   7
+#define CODA_READLINK_STATS 8
+#define CODA_FSYNC_STATS    9
+#define CODA_INACTIVE_STATS 10
+#define CODA_LOOKUP_STATS   11
+#define CODA_CREATE_STATS   12
+#define CODA_REMOVE_STATS   13
+#define CODA_LINK_STATS     14
+#define CODA_RENAME_STATS   15
+#define CODA_MKDIR_STATS    16
+#define CODA_RMDIR_STATS    17
+#define CODA_SYMLINK_STATS  18
+#define CODA_READDIR_STATS  19
+#define CODA_VNODEOPS_SIZE  20
+
+/*
+ * I propose the following structres:
+ */
+
+struct coda_op_stats {
+    int opcode;       /* vfs opcode */
+    long entries;     /* number of times call attempted */
+    long sat_intrn;   /* number of times call satisfied by cache */
+    long unsat_intrn; /* number of times call failed in cache, but
+                         was not bounced to venus proper. */
+    long gen_intrn;   /* number of times call generated internally */
+                      /* (do we need that?) */
+};
+
+/*
+ * With each call to the minicache, we'll bump the counters whenver
+ * a call is satisfied internally (through the cache or through a
+ * redirect), and whenever an operation is caused internally.
+ * Then, we can add the total operations caught by the minicache
+ * to the world-wide totals, and leave a caveat for the specific
+ * graphs later.
+ */
diff --git a/sys/fs/coda/coda_pioctl.h b/sys/fs/coda/coda_pioctl.h
new file mode 100644
index 0000000..2aa55bb
--- /dev/null
+++ b/sys/fs/coda/coda_pioctl.h
@@ -0,0 +1,133 @@
+/*
+ * 
+ *             Coda: an Experimental Distributed File System
+ *                              Release 3.1
+ * 
+ *           Copyright (c) 1987-1998 Carnegie Mellon University
+ *                          All Rights Reserved
+ * 
+ * Permission  to  use, copy, modify and distribute this software and its
+ * documentation is hereby granted,  provided  that  both  the  copyright
+ * notice  and  this  permission  notice  appear  in  all  copies  of the
+ * software, derivative works or  modified  versions,  and  any  portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University  in  all  documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ * 
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
+ * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
+ * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
+ * ANY DERIVATIVE WORK.
+ * 
+ * Carnegie  Mellon  encourages  users  of  this  software  to return any
+ * improvements or extensions that  they  make,  and  to  grant  Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ * 
+ * 	@(#) src/sys/coda/coda_pioctl.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ 
+ *  $Id: coda_pioctl.h,v 1.3 1998/09/11 18:50:17 rvb Exp $
+ * 
+ */
+
+/* 
+ * Mach Operating System
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * Copyright (c) 1988 Carnegie-Mellon University
+ * Copyright (c) 1987 Carnegie-Mellon University
+ * All rights reserved.  The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+
+/*
+ * HISTORY
+ * $Log: coda_pioctl.h,v $
+ * Revision 1.3  1998/09/11 18:50:17  rvb
+ * All the references to cfs, in symbols, structs, and strings
+ * have been changed to coda.  (Same for CFS.)
+ *
+ * Revision 1.2  1998/09/02 19:09:53  rvb
+ * Pass2 complete
+ *
+ * Revision 1.1.1.1  1998/08/29 21:14:52  rvb
+ * Very Preliminary Coda
+ *
+ * Revision 1.7  1998/08/28 18:12:26  rvb
+ * Now it also works on FreeBSD -current.  This code will be
+ * committed to the FreeBSD -current and NetBSD -current
+ * trees.  It will then be tailored to the particular platform
+ * by flushing conditional code.
+ *
+ * Revision 1.6  1998/08/18 17:05:26  rvb
+ * Don't use __RCSID now
+ *
+ * Revision 1.5  1998/08/18 16:31:51  rvb
+ * Sync the code for NetBSD -current; test on 1.3 later
+ *
+ * Revision 1.4  98/01/23  11:53:54  rvb
+ * Bring RVB_CODA1_1 to HEAD
+ * 
+ * Revision 1.3.2.1  97/12/06  17:41:29  rvb
+ * Sync with peters coda.h
+ * 
+ * Revision 1.3  97/12/05  10:39:31  rvb
+ * Read CHANGES
+ * 
+ * Revision 1.2.34.2  97/11/13  22:03:06  rvb
+ * pass2 cfs_NetBSD.h mt
+ * 
+ * Revision 1.2.34.1  97/11/12  12:38:11  rvb
+ * mach_vioctl.h -> pioctl.h
+ * 
+ * Revision 1.2  96/01/02  16:57:27  bnoble
+ * Added support for Coda MiniCache and raw inode calls (final commit)
+ * 
+ * Revision 1.1.2.1  1995/12/20 01:57:54  bnoble
+ * Added CODA-specific files
+ *
+ * Revision 2.4  90/08/30  11:51:12  bohman
+ * 	Ioctl changes for STDC.
+ * 	[90/08/28            bohman]
+ * 
+ * Revision 2.3  89/03/09  22:10:26  rpd
+ * 	More cleanup.
+ * 
+ * Revision 2.2  89/02/25  17:58:32  gm0w
+ * 	Changes for cleanup.
+ * 
+ *  7-Feb-87  Avadis Tevanian (avie) at Carnegie-Mellon University
+ *	No need for VICE conditional.
+ *
+ * 22-Oct-86  Jay Kistler (jjk) at Carnegie-Mellon University
+ *	Created from Andrew's vice.h and viceioctl.h.
+ *
+ */
+/*
+ * ITC Remote file system - vice ioctl interface module
+ */
+
+/*
+ *  TODO:  Find /usr/local/include/viceioctl.h.
+ */
+
+#ifndef	_SYS_PIOCTL_H_
+#define _SYS_PIOCTL_H_
+
+/* The 2K limits above are a consequence of the size of the kernel buffer
+   used to buffer requests from the user to venus--2*MAXPATHLEN.
+   The buffer pointers may be null, or the counts may be 0 if there
+   are no input or output parameters
+ */
+
+#define _VICEIOCTL(id)  ((unsigned int ) _IOW('V', id, struct ViceIoctl))
+
+/* Use this macro to define up to 256 vice ioctl's.  These ioctl's
+   all potentially have in/out parameters--this depends upon the
+   values in the ViceIoctl structure.  This structure is itself passed
+   into the kernel by the normal ioctl parameter passing mechanism.
+ */
+
+#define _VALIDVICEIOCTL(com) (com >= _VICEIOCTL(0) && com <= _VICEIOCTL(255))
+
+#endif
diff --git a/sys/fs/coda/coda_psdev.c b/sys/fs/coda/coda_psdev.c
new file mode 100644
index 0000000..0d29f6e
--- /dev/null
+++ b/sys/fs/coda/coda_psdev.c
@@ -0,0 +1,788 @@
+/*
+ * 
+ *             Coda: an Experimental Distributed File System
+ *                              Release 3.1
+ * 
+ *           Copyright (c) 1987-1998 Carnegie Mellon University
+ *                          All Rights Reserved
+ * 
+ * Permission  to  use, copy, modify and distribute this software and its
+ * documentation is hereby granted,  provided  that  both  the  copyright
+ * notice  and  this  permission  notice  appear  in  all  copies  of the
+ * software, derivative works or  modified  versions,  and  any  portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University  in  all  documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ * 
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
+ * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
+ * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
+ * ANY DERIVATIVE WORK.
+ * 
+ * Carnegie  Mellon  encourages  users  of  this  software  to return any
+ * improvements or extensions that  they  make,  and  to  grant  Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ * 
+ * 	@(#) src/sys/coda/coda_psdev.c,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
+ *  $Id: coda_psdev.c,v 1.9 1998/11/11 20:32:20 rvb Exp $
+ * 
+ */
+
+/* 
+ * Mach Operating System
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * All rights reserved.  The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+
+/*
+ * This code was written for the Coda file system at Carnegie Mellon
+ * University.  Contributers include David Steere, James Kistler, and
+ * M. Satyanarayanan.  */
+
+/* 
+ * These routines define the psuedo device for communication between
+ * Coda's Venus and Minicache in Mach 2.6. They used to be in cfs_subr.c, 
+ * but I moved them to make it easier to port the Minicache without 
+ * porting coda. -- DCS 10/12/94
+ */
+
+/*
+ * HISTORY
+ * $Log: coda_psdev.c,v $
+ * Revision 1.9  1998/11/11 20:32:20  rvb
+ * coda_lookup now passes up an extra flag.  But old veni will
+ * be ok; new veni will check /dev/cfs0 to make sure that a new
+ * kernel is running.
+ * Also, a bug in vc_nb_close iff CODA_SIGNAL's were seen has been
+ * fixed.
+ *
+ * Revision 1.8  1998/10/28 20:31:13  rvb
+ * Change the way unmounting happens to guarantee that the
+ * client programs are allowed to finish up (coda_call is
+ * forced to complete) and release their locks.  Thus there
+ * is a reasonable chance that the vflush implicit in the
+ * unmount will not get hung on held locks.
+ *
+ * Revision 1.7  1998/09/29 20:19:45  rvb
+ * Fixes for lkm:
+ * 1. use VFS_LKM vs ACTUALLY_LKM_NOT_KERNEL
+ * 2. don't pass -DCODA to lkm build
+ *
+ * Revision 1.6  1998/09/28 20:52:58  rvb
+ * Cleanup and fix THE bug
+ *
+ * Revision 1.5  1998/09/25 17:38:31  rvb
+ * Put "stray" printouts under DIAGNOSTIC.  Make everything build
+ * with DEBUG on.  Add support for lkm.  (The macro's don't work
+ * for me; for a good chuckle look at the end of coda_fbsd.c.)
+ *
+ * Revision 1.4  1998/09/13 13:57:59  rvb
+ * Finish conversion of cfs -> coda
+ *
+ * Revision 1.3  1998/09/11 18:50:17  rvb
+ * All the references to cfs, in symbols, structs, and strings
+ * have been changed to coda.  (Same for CFS.)
+ *
+ * Revision 1.2  1998/09/02 19:09:53  rvb
+ * Pass2 complete
+ *
+ * Revision 1.1.1.1  1998/08/29 21:14:52  rvb
+ * Very Preliminary Coda
+ *
+ * Revision 1.9  1998/08/28 18:12:17  rvb
+ * Now it also works on FreeBSD -current.  This code will be
+ * committed to the FreeBSD -current and NetBSD -current
+ * trees.  It will then be tailored to the particular platform
+ * by flushing conditional code.
+ *
+ * Revision 1.8  1998/08/18 17:05:15  rvb
+ * Don't use __RCSID now
+ *
+ * Revision 1.7  1998/08/18 16:31:41  rvb
+ * Sync the code for NetBSD -current; test on 1.3 later
+ *
+ * Revision 1.8  1998/06/09 23:30:42  rvb
+ * Try to allow ^C -- take 1
+ *
+ * Revision 1.5.2.8  98/01/23  11:21:04  rvb
+ * Sync with 2.2.5
+ * 
+ * Revision 1.5.2.7  98/01/22  22:22:21  rvb
+ * sync 1.2 and 1.3
+ * 
+ * Revision 1.5.2.6  98/01/22  13:11:24  rvb
+ * Move make_coda_node ctlfid later so vfsp is known; work on ^c and ^z
+ * 
+ * Revision 1.5.2.5  97/12/16  22:01:27  rvb
+ * Oops add cfs_subr.h cfs_venus.h; sync with peter
+ * 
+ * Revision 1.5.2.4  97/12/16  12:40:05  rvb
+ * Sync with 1.3
+ * 
+ * Revision 1.5.2.3  97/12/10  14:08:24  rvb
+ * Fix O_ flags; check result in coda_call
+ * 
+ * Revision 1.5.2.2  97/12/10  11:40:24  rvb
+ * No more ody
+ * 
+ * Revision 1.5.2.1  97/12/06  17:41:20  rvb
+ * Sync with peters coda.h
+ * 
+ * Revision 1.5  97/12/05  10:39:16  rvb
+ * Read CHANGES
+ * 
+ * Revision 1.4.18.9  97/12/05  08:58:07  rvb
+ * peter found this one
+ * 
+ * Revision 1.4.18.8  97/11/26  15:28:57  rvb
+ * Cant make downcall pbuf == union cfs_downcalls yet
+ * 
+ * Revision 1.4.18.7  97/11/25  09:40:49  rvb
+ * Final cfs_venus.c w/o macros, but one locking bug
+ * 
+ * Revision 1.4.18.6  97/11/20  11:46:41  rvb
+ * Capture current cfs_venus
+ * 
+ * Revision 1.4.18.5  97/11/18  10:27:15  rvb
+ * cfs_nbsd.c is DEAD!!!; integrated into cfs_vf/vnops.c
+ * cfs_nb_foo and cfs_foo are joined
+ * 
+ * Revision 1.4.18.4  97/11/13  22:02:59  rvb
+ * pass2 cfs_NetBSD.h mt
+ * 
+ * Revision 1.4.18.3  97/11/12  12:09:38  rvb
+ * reorg pass1
+ * 
+ * Revision 1.4.18.2  97/10/29  16:06:09  rvb
+ * Kill DYING
+ * 
+ * Revision 1.4.18.1  1997/10/28 23:10:15  rvb
+ * >64Meg; venus can be killed!
+ *
+ * Revision 1.4  1996/12/12 22:10:58  bnoble
+ * Fixed the "downcall invokes venus operation" deadlock in all known cases.
+ * There may be more
+ *
+ * Revision 1.3  1996/11/13 04:14:20  bnoble
+ * Merging BNOBLE_WORK_6_20_96 into main line
+ *
+ * Revision 1.2.8.1  1996/08/22 14:25:04  bnoble
+ * Added a return code from vc_nb_close
+ *
+ * Revision 1.2  1996/01/02 16:56:58  bnoble
+ * Added support for Coda MiniCache and raw inode calls (final commit)
+ *
+ * Revision 1.1.2.1  1995/12/20 01:57:24  bnoble
+ * Added CODA-specific files
+ *
+ * Revision 1.1  1995/03/14  20:52:15  bnoble
+ * Initial revision
+ *
+ */
+
+/* These routines are the device entry points for Venus. */
+
+extern int coda_nc_initialized;    /* Set if cache has been initialized */
+
+#include <vcoda.h>
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/file.h>
+#include <sys/ioccom.h>
+#include <sys/poll.h>
+#include <sys/conf.h>
+
+#include <coda/coda.h>
+#include <coda/cnode.h>
+#include <coda/coda_namecache.h>
+#include <coda/coda_io.h>
+#include <coda/coda_psdev.h>
+
+#define CTL_C
+
+int coda_psdev_print_entry = 0;
+static
+int outstanding_upcalls = 0;
+int coda_call_sleep = PZERO - 1;
+#ifdef	CTL_C
+int coda_pcatch = PCATCH;
+#else
+#endif
+
+#define ENTRY if(coda_psdev_print_entry) myprintf(("Entered %s\n",__FUNCTION__))
+
+void vcodaattach(int n);
+
+struct vmsg {
+    struct queue vm_chain;
+    caddr_t	 vm_data;
+    u_short	 vm_flags;
+    u_short      vm_inSize;	/* Size is at most 5000 bytes */
+    u_short	 vm_outSize;
+    u_short	 vm_opcode; 	/* copied from data to save ptr lookup */
+    int		 vm_unique;
+    caddr_t	 vm_sleep;	/* Not used by Mach. */
+};
+
+#define	VM_READ	    1
+#define	VM_WRITE    2
+#define	VM_INTR	    4
+
+/* vcodaattach: do nothing */
+void
+vcodaattach(n)
+    int n;
+{
+}
+
+int 
+vc_nb_open(dev, flag, mode, p)    
+    dev_t        dev;      
+    int          flag;     
+    int          mode;     
+    struct proc *p;             /* NetBSD only */
+{
+    register struct vcomm *vcp;
+    
+    ENTRY;
+
+    if (minor(dev) >= NVCODA || minor(dev) < 0)
+	return(ENXIO);
+    
+    if (!coda_nc_initialized)
+	coda_nc_init();
+    
+    vcp = &coda_mnttbl[minor(dev)].mi_vcomm;
+    if (VC_OPEN(vcp))
+	return(EBUSY);
+    
+    bzero(&(vcp->vc_selproc), sizeof (struct selinfo));
+    INIT_QUEUE(vcp->vc_requests);
+    INIT_QUEUE(vcp->vc_replys);
+    MARK_VC_OPEN(vcp);
+    
+    coda_mnttbl[minor(dev)].mi_vfsp = NULL;
+    coda_mnttbl[minor(dev)].mi_rootvp = NULL;
+
+    return(0);
+}
+
+int 
+vc_nb_close (dev, flag, mode, p)    
+    dev_t        dev;      
+    int          flag;     
+    int          mode;     
+    struct proc *p;
+{
+    register struct vcomm *vcp;
+    register struct vmsg *vmp, *nvmp = NULL;
+    struct coda_mntinfo *mi;
+    int                 err;
+	
+    ENTRY;
+
+    if (minor(dev) >= NVCODA || minor(dev) < 0)
+	return(ENXIO);
+
+    mi = &coda_mnttbl[minor(dev)];
+    vcp = &(mi->mi_vcomm);
+    
+    if (!VC_OPEN(vcp))
+	panic("vcclose: not open");
+    
+    /* prevent future operations on this vfs from succeeding by auto-
+     * unmounting any vfs mounted via this device. This frees user or
+     * sysadm from having to remember where all mount points are located.
+     * Put this before WAKEUPs to avoid queuing new messages between
+     * the WAKEUP and the unmount (which can happen if we're unlucky)
+     */
+    if (!mi->mi_rootvp) {
+	/* just a simple open/close w no mount */
+	MARK_VC_CLOSED(vcp);
+	return 0;
+    }
+
+    /* Let unmount know this is for real */
+    VTOC(mi->mi_rootvp)->c_flags |= C_UNMOUNTING;
+    coda_unmounting(mi->mi_vfsp);
+
+    outstanding_upcalls = 0;
+    /* Wakeup clients so they can return. */
+    for (vmp = (struct vmsg *)GETNEXT(vcp->vc_requests);
+	 !EOQ(vmp, vcp->vc_requests);
+	 vmp = nvmp)
+    {
+    	nvmp = (struct vmsg *)GETNEXT(vmp->vm_chain);
+	/* Free signal request messages and don't wakeup cause
+	   no one is waiting. */
+	if (vmp->vm_opcode == CODA_SIGNAL) {
+	    CODA_FREE((caddr_t)vmp->vm_data, (u_int)VC_IN_NO_DATA);
+	    CODA_FREE((caddr_t)vmp, (u_int)sizeof(struct vmsg));
+	    continue;
+	}
+	outstanding_upcalls++;	
+	wakeup(&vmp->vm_sleep);
+    }
+
+    for (vmp = (struct vmsg *)GETNEXT(vcp->vc_replys);
+	 !EOQ(vmp, vcp->vc_replys);
+	 vmp = (struct vmsg *)GETNEXT(vmp->vm_chain))
+    {
+	outstanding_upcalls++;	
+	wakeup(&vmp->vm_sleep);
+    }
+
+    MARK_VC_CLOSED(vcp);
+
+    if (outstanding_upcalls) {
+#ifdef	CODA_VERBOSE
+	printf("presleep: outstanding_upcalls = %d\n", outstanding_upcalls);
+    	(void) tsleep(&outstanding_upcalls, coda_call_sleep, "coda_umount", 0);
+	printf("postsleep: outstanding_upcalls = %d\n", outstanding_upcalls);
+#else
+    	(void) tsleep(&outstanding_upcalls, coda_call_sleep, "coda_umount", 0);
+#endif
+    }
+
+    err = dounmount(mi->mi_vfsp, flag, p);
+    if (err)
+	myprintf(("Error %d unmounting vfs in vcclose(%d)\n", 
+	           err, minor(dev)));
+    return 0;
+}
+
+int 
+vc_nb_read(dev, uiop, flag)   
+    dev_t        dev;  
+    struct uio  *uiop; 
+    int          flag;
+{
+    register struct vcomm *	vcp;
+    register struct vmsg *vmp;
+    int error = 0;
+    
+    ENTRY;
+
+    if (minor(dev) >= NVCODA || minor(dev) < 0)
+	return(ENXIO);
+    
+    vcp = &coda_mnttbl[minor(dev)].mi_vcomm;
+    /* Get message at head of request queue. */
+    if (EMPTY(vcp->vc_requests))
+	return(0);	/* Nothing to read */
+    
+    vmp = (struct vmsg *)GETNEXT(vcp->vc_requests);
+    
+    /* Move the input args into userspace */
+    uiop->uio_rw = UIO_READ;
+    error = uiomove(vmp->vm_data, vmp->vm_inSize, uiop);
+    if (error) {
+	myprintf(("vcread: error (%d) on uiomove\n", error));
+	error = EINVAL;
+    }
+
+#ifdef OLD_DIAGNOSTIC    
+    if (vmp->vm_chain.forw == 0 || vmp->vm_chain.back == 0)
+	panic("vc_nb_read: bad chain");
+#endif
+
+    REMQUE(vmp->vm_chain);
+    
+    /* If request was a signal, free up the message and don't
+       enqueue it in the reply queue. */
+    if (vmp->vm_opcode == CODA_SIGNAL) {
+	if (codadebug)
+	    myprintf(("vcread: signal msg (%d, %d)\n", 
+		      vmp->vm_opcode, vmp->vm_unique));
+	CODA_FREE((caddr_t)vmp->vm_data, (u_int)VC_IN_NO_DATA);
+	CODA_FREE((caddr_t)vmp, (u_int)sizeof(struct vmsg));
+	return(error);
+    }
+    
+    vmp->vm_flags |= VM_READ;
+    INSQUE(vmp->vm_chain, vcp->vc_replys);
+    
+    return(error);
+}
+
+int
+vc_nb_write(dev, uiop, flag)   
+    dev_t        dev;  
+    struct uio  *uiop; 
+    int          flag;
+{
+    register struct vcomm *	vcp;
+    register struct vmsg *vmp;
+    struct coda_out_hdr *out;
+    u_long seq;
+    u_long opcode;
+    int buf[2];
+    int error = 0;
+
+    ENTRY;
+
+    if (minor(dev) >= NVCODA || minor(dev) < 0)
+	return(ENXIO);
+    
+    vcp = &coda_mnttbl[minor(dev)].mi_vcomm;
+    
+    /* Peek at the opcode, unique without transfering the data. */
+    uiop->uio_rw = UIO_WRITE;
+    error = uiomove((caddr_t)buf, sizeof(int) * 2, uiop);
+    if (error) {
+	myprintf(("vcwrite: error (%d) on uiomove\n", error));
+	return(EINVAL);
+    }
+    
+    opcode = buf[0];
+    seq = buf[1];
+	
+    if (codadebug)
+	myprintf(("vcwrite got a call for %ld.%ld\n", opcode, seq));
+    
+    if (DOWNCALL(opcode)) {
+	union outputArgs pbuf;
+	
+	/* get the rest of the data. */
+	uiop->uio_rw = UIO_WRITE;
+	error = uiomove((caddr_t)&pbuf.coda_purgeuser.oh.result, sizeof(pbuf) - (sizeof(int)*2), uiop);
+	if (error) {
+	    myprintf(("vcwrite: error (%d) on uiomove (Op %ld seq %ld)\n", 
+		      error, opcode, seq));
+	    return(EINVAL);
+	    }
+	
+	return handleDownCall(opcode, &pbuf);
+    }
+    
+    /* Look for the message on the (waiting for) reply queue. */
+    for (vmp = (struct vmsg *)GETNEXT(vcp->vc_replys);
+	 !EOQ(vmp, vcp->vc_replys);
+	 vmp = (struct vmsg *)GETNEXT(vmp->vm_chain))
+    {
+	if (vmp->vm_unique == seq) break;
+    }
+    
+    if (EOQ(vmp, vcp->vc_replys)) {
+	if (codadebug)
+	    myprintf(("vcwrite: msg (%ld, %ld) not found\n", opcode, seq));
+	
+	return(ESRCH);
+	}
+    
+    /* Remove the message from the reply queue */
+    REMQUE(vmp->vm_chain);
+    
+    /* move data into response buffer. */
+    out = (struct coda_out_hdr *)vmp->vm_data;
+    /* Don't need to copy opcode and uniquifier. */
+    
+    /* get the rest of the data. */
+    if (vmp->vm_outSize < uiop->uio_resid) {
+	myprintf(("vcwrite: more data than asked for (%d < %d)\n",
+		  vmp->vm_outSize, uiop->uio_resid));
+	wakeup(&vmp->vm_sleep); 	/* Notify caller of the error. */
+	return(EINVAL);
+    } 
+    
+    buf[0] = uiop->uio_resid; 	/* Save this value. */
+    uiop->uio_rw = UIO_WRITE;
+    error = uiomove((caddr_t) &out->result, vmp->vm_outSize - (sizeof(int) * 2), uiop);
+    if (error) {
+	myprintf(("vcwrite: error (%d) on uiomove (op %ld seq %ld)\n", 
+		  error, opcode, seq));
+	return(EINVAL);
+    }
+    
+    /* I don't think these are used, but just in case. */
+    /* XXX - aren't these two already correct? -bnoble */
+    out->opcode = opcode;
+    out->unique = seq;
+    vmp->vm_outSize	= buf[0];	/* Amount of data transferred? */
+    vmp->vm_flags |= VM_WRITE;
+    wakeup(&vmp->vm_sleep);
+    
+    return(0);
+}
+
+int
+vc_nb_ioctl(dev, cmd, addr, flag, p) 
+    dev_t         dev;       
+    u_long        cmd;       
+    caddr_t       addr;      
+    int           flag;      
+    struct proc  *p;
+{
+    ENTRY;
+
+    switch(cmd) {
+    case CODARESIZE: {
+	struct coda_resize *data = (struct coda_resize *)addr;
+	return(coda_nc_resize(data->hashsize, data->heapsize, IS_DOWNCALL));
+	break;
+    }
+    case CODASTATS:
+	if (coda_nc_use) {
+	    coda_nc_gather_stats();
+	    return(0);
+	} else {
+	    return(ENODEV);
+	}
+	break;
+    case CODAPRINT:
+	if (coda_nc_use) {
+	    print_coda_nc();
+	    return(0);
+	} else {
+	    return(ENODEV);
+	}
+	break;
+    case CIOC_KERNEL_VERSION:
+	switch (*(u_int *)addr) {
+	case 0:
+		*(u_int *)addr = coda_kernel_version;
+		return 0;
+		break;
+	case 1:
+	case 2:
+		if (coda_kernel_version != *(u_int *)addr)
+		    return ENOENT;
+		else
+		    return 0;
+	default:
+		return ENOENT;
+	}
+    	break;
+    default :
+	return(EINVAL);
+	break;
+    }
+}
+
+int
+vc_nb_poll(dev, events, p)         
+    dev_t         dev;    
+    int           events;   
+    struct proc  *p;
+{
+    register struct vcomm *vcp;
+    int event_msk = 0;
+
+    ENTRY;
+    
+    if (minor(dev) >= NVCODA || minor(dev) < 0)
+	return(ENXIO);
+    
+    vcp = &coda_mnttbl[minor(dev)].mi_vcomm;
+    
+    event_msk = events & (POLLIN|POLLRDNORM);
+    if (!event_msk)
+	return(0);
+    
+    if (!EMPTY(vcp->vc_requests))
+	return(events & (POLLIN|POLLRDNORM));
+
+    selrecord(p, &(vcp->vc_selproc));
+    
+    return(0);
+}
+
+/*
+ * Statistics
+ */
+struct coda_clstat coda_clstat;
+
+/* 
+ * Key question: whether to sleep interuptably or uninteruptably when
+ * waiting for Venus.  The former seems better (cause you can ^C a
+ * job), but then GNU-EMACS completion breaks. Use tsleep with no
+ * timeout, and no longjmp happens. But, when sleeping
+ * "uninterruptibly", we don't get told if it returns abnormally
+ * (e.g. kill -9).  
+ */
+
+int
+coda_call(mntinfo, inSize, outSize, buffer) 
+     struct coda_mntinfo *mntinfo; int inSize; int *outSize; caddr_t buffer;
+{
+	struct vcomm *vcp;
+	struct vmsg *vmp;
+	int error;
+#ifdef	CTL_C
+	struct proc *p = curproc;
+	unsigned int psig_omask = p->p_sigmask;
+	int i;
+#endif
+	if (mntinfo == NULL) {
+	    /* Unlikely, but could be a race condition with a dying warden */
+	    return ENODEV;
+	}
+
+	vcp = &(mntinfo->mi_vcomm);
+	
+	coda_clstat.ncalls++;
+	coda_clstat.reqs[((struct coda_in_hdr *)buffer)->opcode]++;
+
+	if (!VC_OPEN(vcp))
+	    return(ENODEV);
+
+	CODA_ALLOC(vmp,struct vmsg *,sizeof(struct vmsg));
+	/* Format the request message. */
+	vmp->vm_data = buffer;
+	vmp->vm_flags = 0;
+	vmp->vm_inSize = inSize;
+	vmp->vm_outSize 
+	    = *outSize ? *outSize : inSize; /* |buffer| >= inSize */
+	vmp->vm_opcode = ((struct coda_in_hdr *)buffer)->opcode;
+	vmp->vm_unique = ++vcp->vc_seq;
+	if (codadebug)
+	    myprintf(("Doing a call for %d.%d\n", 
+		      vmp->vm_opcode, vmp->vm_unique));
+	
+	/* Fill in the common input args. */
+	((struct coda_in_hdr *)buffer)->unique = vmp->vm_unique;
+
+	/* Append msg to request queue and poke Venus. */
+	INSQUE(vmp->vm_chain, vcp->vc_requests);
+	selwakeup(&(vcp->vc_selproc));
+
+	/* We can be interrupted while we wait for Venus to process
+	 * our request.  If the interrupt occurs before Venus has read
+	 * the request, we dequeue and return. If it occurs after the
+	 * read but before the reply, we dequeue, send a signal
+	 * message, and return. If it occurs after the reply we ignore
+	 * it. In no case do we want to restart the syscall.  If it
+	 * was interrupted by a venus shutdown (vcclose), return
+	 * ENODEV.  */
+
+	/* Ignore return, We have to check anyway */
+#ifdef	CTL_C
+	/* This is work in progress.  Setting coda_pcatch lets tsleep reawaken
+	   on a ^c or ^z.  The problem is that emacs sets certain interrupts
+	   as SA_RESTART.  This means that we should exit sleep handle the
+	   "signal" and then go to sleep again.  Mostly this is done by letting
+	   the syscall complete and be restarted.  We are not idempotent and 
+	   can not do this.  A better solution is necessary.
+	 */
+	i = 0;
+	do {
+	    error = tsleep(&vmp->vm_sleep, (coda_call_sleep|coda_pcatch), "coda_call", hz*2);
+	    if (error == 0)
+	    	break;
+	    else if (error == EWOULDBLOCK) {
+#ifdef	CODA_VERBOSE
+		    printf("coda_call: tsleep TIMEOUT %d sec\n", 2+2*i);
+#endif
+    	    } else if (p->p_siglist == sigmask(SIGIO)) {
+		    p->p_sigmask |= p->p_siglist;
+#ifdef	CODA_VERBOSE
+		    printf("coda_call: tsleep returns %d SIGIO, cnt %d\n", error, i);
+#endif
+    	    } else if (p->p_siglist == sigmask(SIGALRM)) {
+		    p->p_sigmask |= p->p_siglist;
+#ifdef	CODA_VERBOSE
+		    printf("coda_call: tsleep returns %d SIGALRM, cnt %d\n", error, i);
+#endif
+	    } else {
+		    printf("coda_call: tsleep returns %d, cnt %d\n", error, i);
+		    printf("coda_call: siglist = %x, sigmask = %x, mask %x\n",
+			    p->p_siglist, p->p_sigmask,
+			    p->p_siglist & ~p->p_sigmask);
+		    break;
+#ifdef	notyet
+		    p->p_sigmask |= p->p_siglist;
+		    printf("coda_call: new mask, siglist = %x, sigmask = %x, mask %x\n",
+			    p->p_siglist, p->p_sigmask,
+			    p->p_siglist & ~p->p_sigmask);
+#endif
+	    }
+	} while (error && i++ < 128 && VC_OPEN(vcp));
+	p->p_sigmask = psig_omask;
+#else
+	(void) tsleep(&vmp->vm_sleep, coda_call_sleep, "coda_call", 0);
+#endif
+	if (VC_OPEN(vcp)) {	/* Venus is still alive */
+ 	/* Op went through, interrupt or not... */
+	    if (vmp->vm_flags & VM_WRITE) {
+		error = 0;
+		*outSize = vmp->vm_outSize;
+	    }
+
+	    else if (!(vmp->vm_flags & VM_READ)) { 
+		/* Interrupted before venus read it. */
+#ifdef	CODA_VERBOSE
+		if (1)
+#else
+		if (codadebug)
+#endif
+		    myprintf(("interrupted before read: op = %d.%d, flags = %x\n",
+			   vmp->vm_opcode, vmp->vm_unique, vmp->vm_flags));
+		REMQUE(vmp->vm_chain);
+		error = EINTR;
+	    }
+	    
+	    else { 	
+		/* (!(vmp->vm_flags & VM_WRITE)) means interrupted after
+                   upcall started */
+		/* Interrupted after start of upcall, send venus a signal */
+		struct coda_in_hdr *dog;
+		struct vmsg *svmp;
+		
+#ifdef	CODA_VERBOSE
+		if (1)
+#else
+		if (codadebug)
+#endif
+		    myprintf(("Sending Venus a signal: op = %d.%d, flags = %x\n",
+			   vmp->vm_opcode, vmp->vm_unique, vmp->vm_flags));
+		
+		REMQUE(vmp->vm_chain);
+		error = EINTR;
+		
+		CODA_ALLOC(svmp, struct vmsg *, sizeof (struct vmsg));
+
+		CODA_ALLOC((svmp->vm_data), char *, sizeof (struct coda_in_hdr));
+		dog = (struct coda_in_hdr *)svmp->vm_data;
+		
+		svmp->vm_flags = 0;
+		dog->opcode = svmp->vm_opcode = CODA_SIGNAL;
+		dog->unique = svmp->vm_unique = vmp->vm_unique;
+		svmp->vm_inSize = sizeof (struct coda_in_hdr);
+/*??? rvb */	svmp->vm_outSize = sizeof (struct coda_in_hdr);
+		
+		if (codadebug)
+		    myprintf(("coda_call: enqueing signal msg (%d, %d)\n",
+			   svmp->vm_opcode, svmp->vm_unique));
+		
+		/* insert at head of queue! */
+		INSQUE(svmp->vm_chain, vcp->vc_requests);
+		selwakeup(&(vcp->vc_selproc));
+	    }
+	}
+
+	else {	/* If venus died (!VC_OPEN(vcp)) */
+	    if (codadebug)
+		myprintf(("vcclose woke op %d.%d flags %d\n",
+		       vmp->vm_opcode, vmp->vm_unique, vmp->vm_flags));
+	    
+		error = ENODEV;
+	}
+
+	CODA_FREE(vmp, sizeof(struct vmsg));
+
+	if (outstanding_upcalls > 0 && (--outstanding_upcalls == 0))
+		wakeup(&outstanding_upcalls);
+
+	if (!error)
+		error = ((struct coda_out_hdr *)buffer)->result;
+	return(error);
+}
diff --git a/sys/fs/coda/coda_psdev.h b/sys/fs/coda/coda_psdev.h
new file mode 100644
index 0000000..11922ad
--- /dev/null
+++ b/sys/fs/coda/coda_psdev.h
@@ -0,0 +1,39 @@
+/*
+ * 
+ *             Coda: an Experimental Distributed File System
+ *                              Release 3.1
+ * 
+ *           Copyright (c) 1998 Carnegie Mellon University
+ *                          All Rights Reserved
+ * 
+ * Permission  to  use, copy, modify and distribute this software and its
+ * documentation is hereby granted,  provided  that  both  the  copyright
+ * notice  and  this  permission  notice  appear  in  all  copies  of the
+ * software, derivative works or  modified  versions,  and  any  portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University  in  all  documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ * 
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
+ * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
+ * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
+ * ANY DERIVATIVE WORK.
+ * 
+ * Carnegie  Mellon  encourages  users  of  this  software  to return any
+ * improvements or extensions that  they  make,  and  to  grant  Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ * 
+ * 	@(#) src/sys/coda/coda_psdev.c,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
+ *  $Id: coda_psdev.c,v 1.4 1998/09/13 13:57:59 rvb Exp $
+ * 
+ */
+
+int vc_nb_open(dev_t dev, int flag, int mode, struct proc *p);
+int vc_nb_close (dev_t dev, int flag, int mode, struct proc *p);
+int vc_nb_read(dev_t dev, struct uio *uiop, int flag);
+int vc_nb_write(dev_t dev, struct uio *uiop, int flag);
+int vc_nb_ioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p);
+int vc_nb_poll(dev_t dev, int events, struct proc *p);
diff --git a/sys/fs/coda/coda_subr.c b/sys/fs/coda/coda_subr.c
new file mode 100644
index 0000000..40d2d0b
--- /dev/null
+++ b/sys/fs/coda/coda_subr.c
@@ -0,0 +1,747 @@
+/*
+ * 
+ *             Coda: an Experimental Distributed File System
+ *                              Release 3.1
+ * 
+ *           Copyright (c) 1987-1998 Carnegie Mellon University
+ *                          All Rights Reserved
+ * 
+ * Permission  to  use, copy, modify and distribute this software and its
+ * documentation is hereby granted,  provided  that  both  the  copyright
+ * notice  and  this  permission  notice  appear  in  all  copies  of the
+ * software, derivative works or  modified  versions,  and  any  portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University  in  all  documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ * 
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
+ * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
+ * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
+ * ANY DERIVATIVE WORK.
+ * 
+ * Carnegie  Mellon  encourages  users  of  this  software  to return any
+ * improvements or extensions that  they  make,  and  to  grant  Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ * 
+ * 	@(#) src/sys/coda/coda_subr.c,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
+ *  $Id: coda_subr.c,v 1.8 1998/10/28 19:33:50 rvb Exp $
+ * 
+  */
+
+/* 
+ * Mach Operating System
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * All rights reserved.  The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+
+/*
+ * This code was written for the Coda file system at Carnegie Mellon
+ * University.  Contributers include David Steere, James Kistler, and
+ * M. Satyanarayanan.  */
+
+/*
+ * HISTORY
+ * $Log: coda_subr.c,v $
+ * Revision 1.8  1998/10/28 19:33:50  rvb
+ * Venus must be passed O_CREAT flag on VOP_OPEN iff this is
+ * a creat so that we can will allow a mode 444 file to be
+ * written into.  Sync with the latest coda.h and deal with
+ * collateral damage.
+ *
+ * Revision 1.7  1998/09/29 20:19:45  rvb
+ * Fixes for lkm:
+ * 1. use VFS_LKM vs ACTUALLY_LKM_NOT_KERNEL
+ * 2. don't pass -DCODA to lkm build
+ *
+ * Revision 1.6  1998/09/25 17:38:31  rvb
+ * Put "stray" printouts under DIAGNOSTIC.  Make everything build
+ * with DEBUG on.  Add support for lkm.  (The macro's don't work
+ * for me; for a good chuckle look at the end of coda_fbsd.c.)
+ *
+ * Revision 1.5  1998/09/13 13:57:59  rvb
+ * Finish conversion of cfs -> coda
+ *
+ * Revision 1.4  1998/09/11 18:50:17  rvb
+ * All the references to cfs, in symbols, structs, and strings
+ * have been changed to coda.  (Same for CFS.)
+ *
+ * Revision 1.2  1998/09/02 19:09:53  rvb
+ * Pass2 complete
+ *
+ * Revision 1.1.1.1  1998/08/29 21:14:52  rvb
+ * Very Preliminary Coda
+ *
+ * Revision 1.11  1998/08/28 18:12:18  rvb
+ * Now it also works on FreeBSD -current.  This code will be
+ * committed to the FreeBSD -current and NetBSD -current
+ * trees.  It will then be tailored to the particular platform
+ * by flushing conditional code.
+ *
+ * Revision 1.10  1998/08/18 17:05:16  rvb
+ * Don't use __RCSID now
+ *
+ * Revision 1.9  1998/08/18 16:31:41  rvb
+ * Sync the code for NetBSD -current; test on 1.3 later
+ *
+ * Revision 1.8  98/01/31  20:53:12  rvb
+ * First version that works on FreeBSD 2.2.5
+ * 
+ * Revision 1.7  98/01/23  11:53:42  rvb
+ * Bring RVB_CODA1_1 to HEAD
+ * 
+ * Revision 1.6.2.3  98/01/23  11:21:05  rvb
+ * Sync with 2.2.5
+ * 
+ * Revision 1.6.2.2  97/12/16  12:40:06  rvb
+ * Sync with 1.3
+ * 
+ * Revision 1.6.2.1  97/12/06  17:41:21  rvb
+ * Sync with peters coda.h
+ * 
+ * Revision 1.6  97/12/05  10:39:17  rvb
+ * Read CHANGES
+ * 
+ * Revision 1.5.4.8  97/11/26  15:28:58  rvb
+ * Cant make downcall pbuf == union cfs_downcalls yet
+ * 
+ * Revision 1.5.4.7  97/11/20  11:46:42  rvb
+ * Capture current cfs_venus
+ * 
+ * Revision 1.5.4.6  97/11/18  10:27:16  rvb
+ * cfs_nbsd.c is DEAD!!!; integrated into cfs_vf/vnops.c
+ * cfs_nb_foo and cfs_foo are joined
+ * 
+ * Revision 1.5.4.5  97/11/13  22:03:00  rvb
+ * pass2 cfs_NetBSD.h mt
+ * 
+ * Revision 1.5.4.4  97/11/12  12:09:39  rvb
+ * reorg pass1
+ * 
+ * Revision 1.5.4.3  97/11/06  21:02:38  rvb
+ * first pass at ^c ^z
+ * 
+ * Revision 1.5.4.2  97/10/29  16:06:27  rvb
+ * Kill DYING
+ * 
+ * Revision 1.5.4.1  97/10/28 23:10:16  rvb
+ * >64Meg; venus can be killed!
+ *
+ * Revision 1.5  97/08/05  11:08:17  lily
+ * Removed cfsnc_replace, replaced it with a coda_find, unhash, and
+ * rehash.  This fixes a cnode leak and a bug in which the fid is
+ * not actually replaced.  (cfs_namecache.c, cfsnc.h, cfs_subr.c)
+ * 
+ * Revision 1.4  96/12/12  22:10:59  bnoble
+ * Fixed the "downcall invokes venus operation" deadlock in all known cases. 
+ * There may be more
+ * 
+ * Revision 1.3  1996/12/05 16:20:15  bnoble
+ * Minor debugging aids
+ *
+ * Revision 1.2  1996/01/02 16:57:01  bnoble
+ * Added support for Coda MiniCache and raw inode calls (final commit)
+ *
+ * Revision 1.1.2.1  1995/12/20 01:57:27  bnoble
+ * Added CODA-specific files
+ *
+ * Revision 3.1.1.1  1995/03/04  19:07:59  bnoble
+ * Branch for NetBSD port revisions
+ *
+ * Revision 3.1  1995/03/04  19:07:58  bnoble
+ * Bump to major revision 3 to prepare for NetBSD port
+ *
+ * Revision 2.8  1995/03/03  17:00:04  dcs
+ * Fixed kernel bug involving sleep and upcalls. Basically if you killed
+ * a job waiting on venus, the venus upcall queues got trashed. Depending
+ * on luck, you could kill the kernel or not.
+ * (mods to cfs_subr.c and cfs_mach.d)
+ *
+ * Revision 2.7  95/03/02  22:45:21  dcs
+ * Sun4 compatibility
+ * 
+ * Revision 2.6  95/02/17  16:25:17  dcs
+ * These versions represent several changes:
+ * 1. Allow venus to restart even if outstanding references exist.
+ * 2. Have only one ctlvp per client, as opposed to one per mounted cfs device.d
+ * 3. Allow ody_expand to return many members, not just one.
+ * 
+ * Revision 2.5  94/11/09  15:56:26  dcs
+ * Had the thread sleeping on the wrong thing!
+ * 
+ * Revision 2.4  94/10/14  09:57:57  dcs
+ * Made changes 'cause sun4s have braindead compilers
+ * 
+ * Revision 2.3  94/10/12  16:46:26  dcs
+ * Cleaned kernel/venus interface by removing XDR junk, plus
+ * so cleanup to allow this code to be more easily ported.
+ * 
+ * Revision 1.2  92/10/27  17:58:22  lily
+ * merge kernel/latest and alpha/src/cfs
+ * 
+ * Revision 2.4  92/09/30  14:16:26  mja
+ * 	Incorporated Dave Steere's fix for the GNU-Emacs bug.
+ * 	Also, included his coda_flush routine in place of the former coda_nc_flush.
+ * 	[91/02/07            jjk]
+ * 
+ * 	Added contributors blurb.
+ * 	[90/12/13            jjk]
+ * 
+ * 	Hack to allow users to keep coda venus calls uninterruptible. THis
+ * 	basically prevents the Gnu-emacs bug from appearing, in which a call
+ * 	was being interrupted, and return EINTR, but gnu didn't check for the
+ * 	error and figured the file was buggered.
+ * 	[90/12/09            dcs]
+ * 
+ * Revision 2.3  90/08/10  10:23:20  mrt
+ * 	Removed include of vm/vm_page.h as it no longer exists.
+ * 	[90/08/10            mrt]
+ * 
+ * Revision 2.2  90/07/05  11:26:35  mrt
+ * 	Initialize name cache on first call to vcopen.
+ * 	[90/05/23            dcs]
+ * 
+ * 	Created for the Coda File System.
+ * 	[90/05/23            dcs]
+ * 
+ * Revision 1.5  90/05/31  17:01:35  dcs
+ * Prepare for merge with facilities kernel.
+ * 
+ * Revision 1.2  90/03/19  15:56:25  dcs
+ * Initialize name cache on first call to vcopen.
+ * 
+ * Revision 1.1  90/03/15  10:43:26  jjk
+ * Initial revision
+ * 
+ */ 
+
+/* NOTES: rvb
+ * 1.	Added coda_unmounting to mark all cnodes as being UNMOUNTING.  This has to
+ *	 be done before dounmount is called.  Because some of the routines that
+ *	 dounmount calls before coda_unmounted might try to force flushes to venus.
+ *	 The vnode pager does this.
+ * 2.	coda_unmounting marks all cnodes scanning coda_cache.
+ * 3.	cfs_checkunmounting (under DEBUG) checks all cnodes by chasing the vnodes
+ *	 under the /coda mount point.
+ * 4.	coda_cacheprint (under DEBUG) prints names with vnode/cnode address
+ */
+
+#include <vcoda.h>
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/select.h>
+#include <sys/mount.h>
+
+#include <coda/coda.h>
+#include <coda/cnode.h>
+#include <coda/coda_subr.h>
+#include <coda/coda_namecache.h>
+
+int coda_active = 0;
+int coda_reuse = 0;
+int coda_new = 0;
+
+struct cnode *coda_freelist = NULL;
+struct cnode *coda_cache[CODA_CACHESIZE];
+
+#define coda_hash(fid) (((fid)->Volume + (fid)->Vnode) & (CODA_CACHESIZE-1))
+#define	CNODE_NEXT(cp)	((cp)->c_next)
+#define ODD(vnode)        ((vnode) & 0x1)
+
+/*
+ * Allocate a cnode.
+ */
+struct cnode *
+coda_alloc(void)
+{
+    struct cnode *cp;
+
+    if (coda_freelist) {
+	cp = coda_freelist;
+	coda_freelist = CNODE_NEXT(cp);
+	coda_reuse++;
+    }
+    else {
+	CODA_ALLOC(cp, struct cnode *, sizeof(struct cnode));
+	/* NetBSD vnodes don't have any Pager info in them ('cause there are
+	   no external pagers, duh!) */
+#define VNODE_VM_INFO_INIT(vp)         /* MT */
+	VNODE_VM_INFO_INIT(CTOV(cp));
+	coda_new++;
+    }
+    bzero(cp, sizeof (struct cnode));
+
+    return(cp);
+}
+
+/*
+ * Deallocate a cnode.
+ */
+void
+coda_free(cp)
+     register struct cnode *cp;
+{
+
+    CNODE_NEXT(cp) = coda_freelist;
+    coda_freelist = cp;
+}
+
+/*
+ * Put a cnode in the hash table
+ */
+void
+coda_save(cp)
+     struct cnode *cp;
+{
+	CNODE_NEXT(cp) = coda_cache[coda_hash(&cp->c_fid)];
+	coda_cache[coda_hash(&cp->c_fid)] = cp;
+}
+
+/*
+ * Remove a cnode from the hash table
+ */
+void
+coda_unsave(cp)
+     struct cnode *cp;
+{
+    struct cnode *ptr;
+    struct cnode *ptrprev = NULL;
+    
+    ptr = coda_cache[coda_hash(&cp->c_fid)]; 
+    while (ptr != NULL) { 
+	if (ptr == cp) { 
+	    if (ptrprev == NULL) {
+		coda_cache[coda_hash(&cp->c_fid)] 
+		    = CNODE_NEXT(ptr);
+	    } else {
+		CNODE_NEXT(ptrprev) = CNODE_NEXT(ptr);
+	    }
+	    CNODE_NEXT(cp) = (struct cnode *)NULL;
+	    
+	    return; 
+	}	
+	ptrprev = ptr;
+	ptr = CNODE_NEXT(ptr);
+    }	
+}
+
+/*
+ * Lookup a cnode by fid. If the cnode is dying, it is bogus so skip it.
+ * NOTE: this allows multiple cnodes with same fid -- dcs 1/25/95
+ */
+struct cnode *
+coda_find(fid) 
+     ViceFid *fid;
+{
+    struct cnode *cp;
+
+    cp = coda_cache[coda_hash(fid)];
+    while (cp) {
+	if ((cp->c_fid.Vnode == fid->Vnode) &&
+	    (cp->c_fid.Volume == fid->Volume) &&
+	    (cp->c_fid.Unique == fid->Unique) &&
+	    (!IS_UNMOUNTING(cp)))
+	    {
+		coda_active++;
+		return(cp); 
+	    }		    
+	cp = CNODE_NEXT(cp);
+    }
+    return(NULL);
+}
+
+/*
+ * coda_kill is called as a side effect to vcopen. To prevent any
+ * cnodes left around from an earlier run of a venus or warden from
+ * causing problems with the new instance, mark any outstanding cnodes
+ * as dying. Future operations on these cnodes should fail (excepting
+ * coda_inactive of course!). Since multiple venii/wardens can be
+ * running, only kill the cnodes for a particular entry in the
+ * coda_mnttbl. -- DCS 12/1/94 */
+
+int
+coda_kill(whoIam, dcstat)
+	struct mount *whoIam;
+	enum dc_status dcstat;
+{
+	int hash, count = 0;
+	struct cnode *cp;
+	
+	/* 
+	 * Algorithm is as follows: 
+	 *     Second, flush whatever vnodes we can from the name cache.
+	 * 
+	 *     Finally, step through whatever is left and mark them dying.
+	 *        This prevents any operation at all.
+	 */
+	
+	/* This is slightly overkill, but should work. Eventually it'd be
+	 * nice to only flush those entries from the namecache that
+	 * reference a vnode in this vfs.  */
+	coda_nc_flush(dcstat);
+	
+	for (hash = 0; hash < CODA_CACHESIZE; hash++) {
+		for (cp = coda_cache[hash]; cp != NULL; cp = CNODE_NEXT(cp)) {
+			if (CTOV(cp)->v_mount == whoIam) {
+#ifdef	DEBUG
+				printf("coda_kill: vp %p, cp %p\n", CTOV(cp), cp);
+#endif
+				count++;
+				CODADEBUG(CODA_FLUSH, 
+					 myprintf(("Live cnode fid %lx.%lx.%lx flags %d count %d\n",
+						   (cp->c_fid).Volume,
+						   (cp->c_fid).Vnode,
+						   (cp->c_fid).Unique, 
+						   cp->c_flags,
+						   CTOV(cp)->v_usecount)); );
+			}
+		}
+	}
+	return count;
+}
+
+/*
+ * There are two reasons why a cnode may be in use, it may be in the
+ * name cache or it may be executing.  
+ */
+void
+coda_flush(dcstat)
+	enum dc_status dcstat;
+{
+    int hash;
+    struct cnode *cp;
+    
+    coda_clstat.ncalls++;
+    coda_clstat.reqs[CODA_FLUSH]++;
+    
+    coda_nc_flush(dcstat);	    /* flush files from the name cache */
+
+    for (hash = 0; hash < CODA_CACHESIZE; hash++) {
+	for (cp = coda_cache[hash]; cp != NULL; cp = CNODE_NEXT(cp)) {  
+	    if (!ODD(cp->c_fid.Vnode)) /* only files can be executed */
+		coda_vmflush(cp);
+	}
+    }
+}
+
+/*
+ * As a debugging measure, print out any cnodes that lived through a
+ * name cache flush.  
+ */
+void
+coda_testflush(void)
+{
+    int hash;
+    struct cnode *cp;
+    
+    for (hash = 0; hash < CODA_CACHESIZE; hash++) {
+	for (cp = coda_cache[hash];
+	     cp != NULL;
+	     cp = CNODE_NEXT(cp)) {  
+	    myprintf(("Live cnode fid %lx.%lx.%lx count %d\n",
+		      (cp->c_fid).Volume,(cp->c_fid).Vnode,
+		      (cp->c_fid).Unique, CTOV(cp)->v_usecount));
+	}
+    }
+}
+
+/*
+ *     First, step through all cnodes and mark them unmounting.
+ *         NetBSD kernels may try to fsync them now that venus
+ *         is dead, which would be a bad thing.
+ *
+ */
+void
+coda_unmounting(whoIam)
+	struct mount *whoIam;
+{	
+	int hash;
+	struct cnode *cp;
+
+	for (hash = 0; hash < CODA_CACHESIZE; hash++) {
+		for (cp = coda_cache[hash]; cp != NULL; cp = CNODE_NEXT(cp)) {
+			if (CTOV(cp)->v_mount == whoIam) {
+				if (cp->c_flags & (C_LOCKED|C_WANTED)) {
+					printf("coda_unmounting: Unlocking %p\n", cp);
+					cp->c_flags &= ~(C_LOCKED|C_WANTED);
+					wakeup((caddr_t) cp);
+				}
+				cp->c_flags |= C_UNMOUNTING;
+			}
+		}
+	}
+}
+
+#ifdef	DEBUG
+void
+coda_checkunmounting(mp)
+	struct mount *mp;
+{	
+	register struct vnode *vp, *nvp;
+	struct cnode *cp;
+	int count = 0, bad = 0;
+loop:
+	for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
+		if (vp->v_mount != mp)
+			goto loop;
+		nvp = vp->v_mntvnodes.le_next;
+		cp = VTOC(vp);
+		count++;
+		if (!(cp->c_flags & C_UNMOUNTING)) {
+			bad++;
+			printf("vp %p, cp %p missed\n", vp, cp);
+			cp->c_flags |= C_UNMOUNTING;
+		}
+	}
+}
+
+void
+coda_cacheprint(whoIam)
+	struct mount *whoIam;
+{	
+	int hash;
+	struct cnode *cp;
+	int count = 0;
+
+	printf("coda_cacheprint: coda_ctlvp %p, cp %p", coda_ctlvp, VTOC(coda_ctlvp));
+	coda_nc_name(VTOC(coda_ctlvp));
+	printf("\n");
+
+	for (hash = 0; hash < CODA_CACHESIZE; hash++) {
+		for (cp = coda_cache[hash]; cp != NULL; cp = CNODE_NEXT(cp)) {
+			if (CTOV(cp)->v_mount == whoIam) {
+				printf("coda_cacheprint: vp %p, cp %p", CTOV(cp), cp);
+				coda_nc_name(cp);
+				printf("\n");
+				count++;
+			}
+		}
+	}
+	printf("coda_cacheprint: count %d\n", count);
+}
+#endif
+
+/*
+ * There are 6 cases where invalidations occur. The semantics of each
+ * is listed here.
+ *
+ * CODA_FLUSH     -- flush all entries from the name cache and the cnode cache.
+ * CODA_PURGEUSER -- flush all entries from the name cache for a specific user
+ *                  This call is a result of token expiration.
+ *
+ * The next two are the result of callbacks on a file or directory.
+ * CODA_ZAPDIR    -- flush the attributes for the dir from its cnode.
+ *                  Zap all children of this directory from the namecache.
+ * CODA_ZAPFILE   -- flush the attributes for a file.
+ *
+ * The fifth is a result of Venus detecting an inconsistent file.
+ * CODA_PURGEFID  -- flush the attribute for the file
+ *                  If it is a dir (odd vnode), purge its 
+ *                  children from the namecache
+ *                  remove the file from the namecache.
+ *
+ * The sixth allows Venus to replace local fids with global ones
+ * during reintegration.
+ *
+ * CODA_REPLACE -- replace one ViceFid with another throughout the name cache 
+ */
+
+int handleDownCall(opcode, out)
+     int opcode; union outputArgs *out;
+{
+    int error;
+
+    /* Handle invalidate requests. */
+    switch (opcode) {
+      case CODA_FLUSH : {
+
+	  coda_flush(IS_DOWNCALL);
+	  
+	  CODADEBUG(CODA_FLUSH,coda_testflush();)    /* print remaining cnodes */
+	      return(0);
+      }
+	
+      case CODA_PURGEUSER : {
+	  coda_clstat.ncalls++;
+	  coda_clstat.reqs[CODA_PURGEUSER]++;
+	  
+	  /* XXX - need to prevent fsync's */
+	  coda_nc_purge_user(out->coda_purgeuser.cred.cr_uid, IS_DOWNCALL);
+	  return(0);
+      }
+	
+      case CODA_ZAPFILE : {
+	  struct cnode *cp;
+
+	  error = 0;
+	  coda_clstat.ncalls++;
+	  coda_clstat.reqs[CODA_ZAPFILE]++;
+	  
+	  cp = coda_find(&out->coda_zapfile.CodaFid);
+	  if (cp != NULL) {
+	      vref(CTOV(cp));
+	      
+	      cp->c_flags &= ~C_VATTR;
+	      if (CTOV(cp)->v_flag & VTEXT)
+		  error = coda_vmflush(cp);
+	      CODADEBUG(CODA_ZAPFILE, myprintf(("zapfile: fid = (%lx.%lx.%lx), 
+                                              refcnt = %d, error = %d\n",
+					      cp->c_fid.Volume, 
+					      cp->c_fid.Vnode, 
+					      cp->c_fid.Unique, 
+					      CTOV(cp)->v_usecount - 1, error)););
+	      if (CTOV(cp)->v_usecount == 1) {
+		  cp->c_flags |= C_PURGING;
+	      }
+	      vrele(CTOV(cp));
+	  }
+	  
+	  return(error);
+      }
+	
+      case CODA_ZAPDIR : {
+	  struct cnode *cp;
+
+	  coda_clstat.ncalls++;
+	  coda_clstat.reqs[CODA_ZAPDIR]++;
+	  
+	  cp = coda_find(&out->coda_zapdir.CodaFid);
+	  if (cp != NULL) {
+	      vref(CTOV(cp));
+	      
+	      cp->c_flags &= ~C_VATTR;
+	      coda_nc_zapParentfid(&out->coda_zapdir.CodaFid, IS_DOWNCALL);     
+	      
+	      CODADEBUG(CODA_ZAPDIR, myprintf(("zapdir: fid = (%lx.%lx.%lx), 
+                                          refcnt = %d\n",cp->c_fid.Volume, 
+					     cp->c_fid.Vnode, 
+					     cp->c_fid.Unique, 
+					     CTOV(cp)->v_usecount - 1)););
+	      if (CTOV(cp)->v_usecount == 1) {
+		  cp->c_flags |= C_PURGING;
+	      }
+	      vrele(CTOV(cp));
+	  }
+	  
+	  return(0);
+      }
+	
+      case CODA_PURGEFID : {
+	  struct cnode *cp;
+
+	  error = 0;
+	  coda_clstat.ncalls++;
+	  coda_clstat.reqs[CODA_PURGEFID]++;
+
+	  cp = coda_find(&out->coda_purgefid.CodaFid);
+	  if (cp != NULL) {
+	      vref(CTOV(cp));
+	      if (ODD(out->coda_purgefid.CodaFid.Vnode)) { /* Vnode is a directory */
+		  coda_nc_zapParentfid(&out->coda_purgefid.CodaFid,
+				     IS_DOWNCALL);     
+	      }
+	      cp->c_flags &= ~C_VATTR;
+	      coda_nc_zapfid(&out->coda_purgefid.CodaFid, IS_DOWNCALL);
+	      if (!(ODD(out->coda_purgefid.CodaFid.Vnode)) 
+		  && (CTOV(cp)->v_flag & VTEXT)) {
+		  
+		  error = coda_vmflush(cp);
+	      }
+	      CODADEBUG(CODA_PURGEFID, myprintf(("purgefid: fid = (%lx.%lx.%lx), refcnt = %d, error = %d\n",
+                                            cp->c_fid.Volume, cp->c_fid.Vnode,
+                                            cp->c_fid.Unique, 
+					    CTOV(cp)->v_usecount - 1, error)););
+	      if (CTOV(cp)->v_usecount == 1) {
+		  cp->c_flags |= C_PURGING;
+	      }
+	      vrele(CTOV(cp));
+	  }
+	  return(error);
+      }
+
+      case CODA_REPLACE : {
+	  struct cnode *cp = NULL;
+
+	  coda_clstat.ncalls++;
+	  coda_clstat.reqs[CODA_REPLACE]++;
+	  
+	  cp = coda_find(&out->coda_replace.OldFid);
+	  if (cp != NULL) { 
+	      /* remove the cnode from the hash table, replace the fid, and reinsert */
+	      vref(CTOV(cp));
+	      coda_unsave(cp);
+	      cp->c_fid = out->coda_replace.NewFid;
+	      coda_save(cp);
+
+	      CODADEBUG(CODA_REPLACE, myprintf(("replace: oldfid = (%lx.%lx.%lx), newfid = (%lx.%lx.%lx), cp = %p\n",
+					   out->coda_replace.OldFid.Volume,
+					   out->coda_replace.OldFid.Vnode,
+					   out->coda_replace.OldFid.Unique,
+					   cp->c_fid.Volume, cp->c_fid.Vnode, 
+					   cp->c_fid.Unique, cp));)
+	      vrele(CTOV(cp));
+	  }
+	  return (0);
+      }
+      default:
+      	myprintf(("handleDownCall: unknown opcode %d\n", opcode));
+	return (EINVAL);
+    }
+}
+
+/* coda_grab_vnode: lives in either cfs_mach.c or cfs_nbsd.c */
+
+int
+coda_vmflush(cp)
+     struct cnode *cp;
+{
+    return 0;
+}
+
+
+/* 
+ * kernel-internal debugging switches
+ */
+void coda_debugon(void)
+{
+    codadebug = -1;
+    coda_nc_debug = -1;
+    coda_vnop_print_entry = 1;
+    coda_psdev_print_entry = 1;
+    coda_vfsop_print_entry = 1;
+}
+
+void coda_debugoff(void)
+{
+    codadebug = 0;
+    coda_nc_debug = 0;
+    coda_vnop_print_entry = 0;
+    coda_psdev_print_entry = 0;
+    coda_vfsop_print_entry = 0;
+}
+
+/*
+ * Utilities used by both client and server
+ * Standard levels:
+ * 0) no debugging
+ * 1) hard failures
+ * 2) soft failures
+ * 3) current test software
+ * 4) main procedure entry points
+ * 5) main procedure exit points
+ * 6) utility procedure entry points
+ * 7) utility procedure exit points
+ * 8) obscure procedure entry points
+ * 9) obscure procedure exit points
+ * 10) random stuff
+ * 11) all <= 1
+ * 12) all <= 2
+ * 13) all <= 3
+ * ...
+ */
diff --git a/sys/fs/coda/coda_subr.h b/sys/fs/coda/coda_subr.h
new file mode 100644
index 0000000..fe27bfd
--- /dev/null
+++ b/sys/fs/coda/coda_subr.h
@@ -0,0 +1,45 @@
+/*
+ * 
+ *             Coda: an Experimental Distributed File System
+ *                              Release 3.1
+ * 
+ *           Copyright (c) 1987-1998 Carnegie Mellon University
+ *                          All Rights Reserved
+ * 
+ * Permission  to  use, copy, modify and distribute this software and its
+ * documentation is hereby granted,  provided  that  both  the  copyright
+ * notice  and  this  permission  notice  appear  in  all  copies  of the
+ * software, derivative works or  modified  versions,  and  any  portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University  in  all  documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ * 
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
+ * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
+ * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
+ * ANY DERIVATIVE WORK.
+ * 
+ * Carnegie  Mellon  encourages  users  of  this  software  to return any
+ * improvements or extensions that  they  make,  and  to  grant  Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ * 
+ * 	@(#) src/sys/coda/coda_subr.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ 
+ *  $Id: coda_subr.h,v 1.5 1998/09/13 13:57:59 rvb Exp $
+ * 
+ */
+
+struct cnode *coda_alloc(void);
+void  coda_free(struct cnode *cp);
+struct cnode *coda_find(ViceFid *fid);
+void coda_flush(enum dc_status dcstat);
+void coda_testflush(void);
+void coda_checkunmounting(struct mount *mp);
+void coda_cacheprint(struct mount *whoIam);
+void coda_debugon(void);
+void coda_debugoff(void);
+int  coda_kill(struct mount *whoIam, enum dc_status dcstat);
+void coda_save(struct cnode *cp);
+void coda_unsave(struct cnode *cp);
diff --git a/sys/fs/coda/coda_venus.c b/sys/fs/coda/coda_venus.c
new file mode 100644
index 0000000..96228f1
--- /dev/null
+++ b/sys/fs/coda/coda_venus.c
@@ -0,0 +1,660 @@
+/*
+ * 
+ *             Coda: an Experimental Distributed File System
+ *                              Release 3.1
+ * 
+ *           Copyright (c) 1987-1998 Carnegie Mellon University
+ *                          All Rights Reserved
+ * 
+ * Permission  to  use, copy, modify and distribute this software and its
+ * documentation is hereby granted,  provided  that  both  the  copyright
+ * notice  and  this  permission  notice  appear  in  all  copies  of the
+ * software, derivative works or  modified  versions,  and  any  portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University  in  all  documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ * 
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
+ * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
+ * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
+ * ANY DERIVATIVE WORK.
+ * 
+ * Carnegie  Mellon  encourages  users  of  this  software  to return any
+ * improvements or extensions that  they  make,  and  to  grant  Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ * 
+ * 	@(#) src/sys/cfs/coda_venus.c,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
+ *  $Id: coda_venus.c,v 1.5 1998/10/28 19:33:50 rvb Exp $
+ * 
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+#include <sys/select.h>
+#include <sys/ioccom.h>
+#include <sys/fcntl.h>
+
+#include <coda/coda.h>
+#include <coda/cnode.h>
+#include <coda/coda_venus.h>
+#include <coda/coda_pioctl.h>
+
+#define DECL_NO_IN(name) 				\
+    struct coda_in_hdr *inp;				\
+    struct name ## _out *outp;				\
+    int name ## _size = sizeof (struct coda_in_hdr);	\
+    int Isize = sizeof (struct coda_in_hdr);		\
+    int Osize = sizeof (struct name ## _out);		\
+    int error
+
+#define DECL(name)					\
+    struct name ## _in *inp;				\
+    struct name ## _out *outp;				\
+    int name ## _size = sizeof (struct name ## _in);	\
+    int Isize = sizeof (struct name ## _in);		\
+    int Osize = sizeof (struct name ## _out);		\
+    int error
+
+#define DECL_NO_OUT(name)				\
+    struct name ## _in *inp;				\
+    struct coda_out_hdr *outp;				\
+    int name ## _size = sizeof (struct name ## _in);	\
+    int Isize = sizeof (struct name ## _in);		\
+    int Osize = sizeof (struct coda_out_hdr);		\
+    int error
+
+#define ALLOC_NO_IN(name)				\
+    if (Osize > name ## _size)				\
+    	name ## _size = Osize;				\
+    CODA_ALLOC(inp, struct coda_in_hdr *, name ## _size);\
+    outp = (struct name ## _out *) inp
+
+#define ALLOC(name)					\
+    if (Osize > name ## _size)				\
+    	name ## _size = Osize;				\
+    CODA_ALLOC(inp, struct name ## _in *, name ## _size);\
+    outp = (struct name ## _out *) inp
+
+#define ALLOC_NO_OUT(name)				\
+    if (Osize > name ## _size)				\
+    	name ## _size = Osize;				\
+    CODA_ALLOC(inp, struct name ## _in *, name ## _size);\
+    outp = (struct coda_out_hdr *) inp
+
+#define STRCPY(struc, name, len) \
+    bcopy(name, (char *)inp + (int)inp->struc, len); \
+    ((char*)inp + (int)inp->struc)[len++] = 0; \
+    Isize += len
+
+#define INIT_IN(in, op, ident, p) \
+	  (in)->opcode = (op); \
+	  (in)->pid = p ? p->p_pid : -1; \
+          (in)->pgid = p ? p->p_pgid : -1; \
+          (in)->sid = (p && p->p_session && p->p_session->s_leader) ? (p->p_session->s_leader->p_pid) : -1; \
+          if (ident != NOCRED) {                              \
+	      (in)->cred.cr_uid = ident->cr_uid;              \
+	      (in)->cred.cr_groupid = ident->cr_gid;          \
+          } else {                                            \
+	      bzero(&((in)->cred),sizeof(struct coda_cred));  \
+	      (in)->cred.cr_uid = -1;                         \
+	      (in)->cred.cr_groupid = -1;                     \
+          }                                                   \
+
+#define	CNV_OFLAG(to, from) 				\
+    do { 						\
+	  to = 0;					\
+	  if (from & FREAD)   to |= C_O_READ; 		\
+	  if (from & FWRITE)  to |= C_O_WRITE; 		\
+	  if (from & O_TRUNC) to |= C_O_TRUNC; 		\
+	  if (from & O_EXCL)  to |= C_O_EXCL; 		\
+	  if (from & O_CREAT) to |= C_O_CREAT;		\
+    } while (0)
+
+#define CNV_VV2V_ATTR(top, fromp) \
+	do { \
+		(top)->va_type = (fromp)->va_type; \
+		(top)->va_mode = (fromp)->va_mode; \
+		(top)->va_nlink = (fromp)->va_nlink; \
+		(top)->va_uid = (fromp)->va_uid; \
+		(top)->va_gid = (fromp)->va_gid; \
+		(top)->va_fsid = VNOVAL; \
+		(top)->va_fileid = (fromp)->va_fileid; \
+		(top)->va_size = (fromp)->va_size; \
+		(top)->va_blocksize = (fromp)->va_blocksize; \
+		(top)->va_atime = (fromp)->va_atime; \
+		(top)->va_mtime = (fromp)->va_mtime; \
+		(top)->va_ctime = (fromp)->va_ctime; \
+		(top)->va_gen = (fromp)->va_gen; \
+		(top)->va_flags = (fromp)->va_flags; \
+		(top)->va_rdev = (fromp)->va_rdev; \
+		(top)->va_bytes = (fromp)->va_bytes; \
+		(top)->va_filerev = (fromp)->va_filerev; \
+		(top)->va_vaflags = VNOVAL; \
+		(top)->va_spare = VNOVAL; \
+	} while (0)
+
+#define CNV_V2VV_ATTR(top, fromp) \
+	do { \
+		(top)->va_type = (fromp)->va_type; \
+		(top)->va_mode = (fromp)->va_mode; \
+		(top)->va_nlink = (fromp)->va_nlink; \
+		(top)->va_uid = (fromp)->va_uid; \
+		(top)->va_gid = (fromp)->va_gid; \
+		(top)->va_fileid = (fromp)->va_fileid; \
+		(top)->va_size = (fromp)->va_size; \
+		(top)->va_blocksize = (fromp)->va_blocksize; \
+		(top)->va_atime = (fromp)->va_atime; \
+		(top)->va_mtime = (fromp)->va_mtime; \
+		(top)->va_ctime = (fromp)->va_ctime; \
+		(top)->va_gen = (fromp)->va_gen; \
+		(top)->va_flags = (fromp)->va_flags; \
+		(top)->va_rdev = (fromp)->va_rdev; \
+		(top)->va_bytes = (fromp)->va_bytes; \
+		(top)->va_filerev = (fromp)->va_filerev; \
+	} while (0)
+
+
+int coda_kernel_version = CODA_KERNEL_VERSION;
+
+int
+venus_root(void *mdp,
+	struct ucred *cred, struct proc *p,
+/*out*/	ViceFid *VFid)
+{
+    DECL_NO_IN(coda_root);		/* sets Isize & Osize */
+    ALLOC_NO_IN(coda_root);		/* sets inp & outp */
+
+    /* send the open to venus. */
+    INIT_IN(inp, CODA_ROOT, cred, p);  
+
+    error = coda_call(mdp, Isize, &Osize, (char *)inp);
+    if (!error)
+	*VFid = outp->VFid;
+
+    CODA_FREE(inp, coda_root_size);
+    return error;
+}
+
+int
+venus_open(void *mdp, ViceFid *fid, int flag,
+	struct ucred *cred, struct proc *p,
+/*out*/	dev_t *dev, ino_t *inode)
+{
+    int cflag;
+    DECL(coda_open);			/* sets Isize & Osize */
+    ALLOC(coda_open);			/* sets inp & outp */
+
+    /* send the open to venus. */
+    INIT_IN(&inp->ih, CODA_OPEN, cred, p);
+    inp->VFid = *fid;
+    CNV_OFLAG(cflag, flag);
+    inp->flags = cflag;
+
+    error = coda_call(mdp, Isize, &Osize, (char *)inp);
+    if (!error) {
+	*dev =  outp->dev;
+	*inode = outp->inode;
+    }
+
+    CODA_FREE(inp, coda_open_size);
+    return error;
+}
+
+int
+venus_close(void *mdp, ViceFid *fid, int flag,
+	struct ucred *cred, struct proc *p)
+{
+    int cflag;
+    DECL_NO_OUT(coda_close);		/* sets Isize & Osize */
+    ALLOC_NO_OUT(coda_close);		/* sets inp & outp */
+
+    INIT_IN(&inp->ih, CODA_CLOSE, cred, p);
+    inp->VFid = *fid;
+    CNV_OFLAG(cflag, flag);
+    inp->flags = cflag;
+
+    error = coda_call(mdp, Isize, &Osize, (char *)inp);
+
+    CODA_FREE(inp, coda_close_size);
+    return error;
+}
+
+/*
+ * these two calls will not exist!!!  the container file is read/written
+ * directly.
+ */
+void
+venus_read(void)
+{
+}
+
+void
+venus_write(void)
+{
+}
+
+/*
+ * this is a bit sad too.  the ioctl's are for the control file, not for
+ * normal files.
+ */
+int
+venus_ioctl(void *mdp, ViceFid *fid,
+	int com, int flag, caddr_t data,
+	struct ucred *cred, struct proc *p)
+{
+    DECL(coda_ioctl);			/* sets Isize & Osize */
+    struct PioctlData *iap = (struct PioctlData *)data;
+    int tmp;
+
+    coda_ioctl_size = VC_MAXMSGSIZE;
+    ALLOC(coda_ioctl);			/* sets inp & outp */
+
+    INIT_IN(&inp->ih, CODA_IOCTL, cred, p);
+    inp->VFid = *fid;
+
+    /* command was mutated by increasing its size field to reflect the  
+     * path and follow args. we need to subtract that out before sending
+     * the command to venus.
+     */
+    inp->cmd = (com & ~(IOCPARM_MASK << 16));
+    tmp = ((com >> 16) & IOCPARM_MASK) - sizeof (char *) - sizeof (int);
+    inp->cmd |= (tmp & IOCPARM_MASK) <<	16;
+
+    inp->rwflag = flag;
+    inp->len = iap->vi.in_size;
+    inp->data = (char *)(sizeof (struct coda_ioctl_in));
+
+    error = copyin(iap->vi.in, (char*)inp + (int)inp->data, 
+		   iap->vi.in_size);
+    if (error) {
+	CODA_FREE(inp, coda_ioctl_size);
+	return(error);
+    }
+
+    Osize = VC_MAXMSGSIZE;
+    error = coda_call(mdp, Isize + iap->vi.in_size, &Osize, (char *)inp);
+
+	/* copy out the out buffer. */
+    if (!error) {
+	if (outp->len > iap->vi.out_size) {
+	    error = EINVAL;
+	} else {
+	    error = copyout((char *)outp + (int)outp->data, 
+			    iap->vi.out, iap->vi.out_size);
+	}
+    }
+
+    CODA_FREE(inp, coda_ioctl_size);
+    return error;
+}
+
+int
+venus_getattr(void *mdp, ViceFid *fid,
+	struct ucred *cred, struct proc *p,
+/*out*/	struct vattr *vap)
+{
+    DECL(coda_getattr);			/* sets Isize & Osize */
+    ALLOC(coda_getattr);			/* sets inp & outp */
+
+    /* send the open to venus. */
+    INIT_IN(&inp->ih, CODA_GETATTR, cred, p);
+    inp->VFid = *fid;
+
+    error = coda_call(mdp, Isize, &Osize, (char *)inp);
+    if (!error) {
+	CNV_VV2V_ATTR(vap, &outp->attr);
+    }
+
+    CODA_FREE(inp, coda_getattr_size);
+    return error;
+}
+
+int
+venus_setattr(void *mdp, ViceFid *fid, struct vattr *vap,
+	struct ucred *cred, struct proc *p)
+{
+    DECL_NO_OUT(coda_setattr);		/* sets Isize & Osize */
+    ALLOC_NO_OUT(coda_setattr);		/* sets inp & outp */
+
+    /* send the open to venus. */
+    INIT_IN(&inp->ih, CODA_SETATTR, cred, p);
+    inp->VFid = *fid;
+    CNV_V2VV_ATTR(&inp->attr, vap);
+
+    error = coda_call(mdp, Isize, &Osize, (char *)inp);
+
+    CODA_FREE(inp, coda_setattr_size);
+    return error;
+}
+
+int
+venus_access(void *mdp, ViceFid *fid, int mode,
+	struct ucred *cred, struct proc *p)
+{
+    DECL_NO_OUT(coda_access);		/* sets Isize & Osize */
+    ALLOC_NO_OUT(coda_access);		/* sets inp & outp */
+
+    /* send the open to venus. */
+    INIT_IN(&inp->ih, CODA_ACCESS, cred, p);
+    inp->VFid = *fid;
+    /* NOTE:
+     * NetBSD and Venus internals use the "data" in the low 3 bits.
+     * Hence, the conversion.
+     */
+    inp->flags = mode>>6;
+
+    error = coda_call(mdp, Isize, &Osize, (char *)inp);
+
+    CODA_FREE(inp, coda_access_size);
+    return error;
+}
+
+int
+venus_readlink(void *mdp, ViceFid *fid,
+	struct ucred *cred, struct proc *p,
+/*out*/	char **str, int *len)
+{
+    DECL(coda_readlink);			/* sets Isize & Osize */
+    coda_readlink_size += CODA_MAXPATHLEN;
+    ALLOC(coda_readlink);		/* sets inp & outp */
+
+    /* send the open to venus. */
+    INIT_IN(&inp->ih, CODA_READLINK, cred, p);
+    inp->VFid = *fid;
+
+    Osize += CODA_MAXPATHLEN;
+    error = coda_call(mdp, Isize, &Osize, (char *)inp);
+    if (!error) {
+	    CODA_ALLOC(*str, char *, outp->count);
+	    *len = outp->count;
+	    bcopy((char *)outp + (int)outp->data, *str, *len);
+    }
+
+    CODA_FREE(inp, coda_readlink_size);
+    return error;
+}
+
+int
+venus_fsync(void *mdp, ViceFid *fid,
+	struct ucred *cred, struct proc *p)
+{
+    DECL_NO_OUT(coda_fsync);		/* sets Isize & Osize */
+    ALLOC_NO_OUT(coda_fsync);		/* sets inp & outp */
+
+    /* send the open to venus. */
+    INIT_IN(&inp->ih, CODA_FSYNC, cred, p);
+    inp->VFid = *fid;
+
+    error = coda_call(mdp, Isize, &Osize, (char *)inp);
+
+    CODA_FREE(inp, coda_fsync_size);
+    return error;
+}
+
+int
+venus_lookup(void *mdp, ViceFid *fid,
+    	const char *nm, int len,
+	struct ucred *cred, struct proc *p,
+/*out*/	ViceFid *VFid, int *vtype)
+{
+    DECL(coda_lookup);			/* sets Isize & Osize */
+    coda_lookup_size += len + 1;
+    ALLOC(coda_lookup);			/* sets inp & outp */
+
+    /* send the open to venus. */
+    INIT_IN(&inp->ih, CODA_LOOKUP, cred, p);
+    inp->VFid = *fid;
+
+    /* NOTE:
+     * Between version 1 and version 2 we have added an extra flag field
+     * to this structure.  But because the string was at the end and because
+     * of the wierd way we represent strings by having the slot point to
+     * where the string characters are in the "heap", we can just slip the
+     * flag parameter in after the string slot pointer and veni that don't
+     * know better won't see this new flag field ...
+     * Otherwise we'd need two different venus_lookup functions.
+     */
+    inp->name = Isize;
+    inp->flags = CLU_CASE_SENSITIVE;	/* doesn't really matter for BSD */
+    STRCPY(name, nm, len);		/* increments Isize */
+
+    error = coda_call(mdp, Isize, &Osize, (char *)inp);
+    if (!error) {
+	*VFid = outp->VFid;
+	*vtype = outp->vtype;
+    }
+
+    CODA_FREE(inp, coda_lookup_size);
+    return error;
+}
+
+int
+venus_create(void *mdp, ViceFid *fid,
+    	const char *nm, int len, int exclusive, int mode, struct vattr *va,
+	struct ucred *cred, struct proc *p,
+/*out*/	ViceFid *VFid, struct vattr *attr)
+{
+    DECL(coda_create);			/* sets Isize & Osize */
+    coda_create_size += len + 1;
+    ALLOC(coda_create);			/* sets inp & outp */
+
+    /* send the open to venus. */
+    INIT_IN(&inp->ih, CODA_CREATE, cred, p);
+    inp->VFid = *fid;
+    inp->excl = exclusive ? C_O_EXCL : 0;
+    inp->mode = mode;
+    CNV_V2VV_ATTR(&inp->attr, va);
+
+    inp->name = Isize;
+    STRCPY(name, nm, len);		/* increments Isize */
+
+    error = coda_call(mdp, Isize, &Osize, (char *)inp);
+    if (!error) {
+	*VFid = outp->VFid;
+	CNV_VV2V_ATTR(attr, &outp->attr);
+    }
+
+    CODA_FREE(inp, coda_create_size);
+    return error;
+}
+
+int
+venus_remove(void *mdp, ViceFid *fid,
+        const char *nm, int len,
+	struct ucred *cred, struct proc *p)
+{
+    DECL_NO_OUT(coda_remove);		/* sets Isize & Osize */
+    coda_remove_size += len + 1;
+    ALLOC_NO_OUT(coda_remove);		/* sets inp & outp */
+
+    /* send the open to venus. */
+    INIT_IN(&inp->ih, CODA_REMOVE, cred, p);
+    inp->VFid = *fid;
+
+    inp->name = Isize;
+    STRCPY(name, nm, len);		/* increments Isize */
+
+    error = coda_call(mdp, Isize, &Osize, (char *)inp);
+
+    CODA_FREE(inp, coda_remove_size);
+    return error;
+}
+
+int
+venus_link(void *mdp, ViceFid *fid, ViceFid *tfid,
+        const char *nm, int len,
+	struct ucred *cred, struct proc *p)
+{
+    DECL_NO_OUT(coda_link);		/* sets Isize & Osize */
+    coda_link_size += len + 1;
+    ALLOC_NO_OUT(coda_link);		/* sets inp & outp */
+
+    /* send the open to venus. */
+    INIT_IN(&inp->ih, CODA_LINK, cred, p);
+    inp->sourceFid = *fid;
+    inp->destFid = *tfid;
+
+    inp->tname = Isize;
+    STRCPY(tname, nm, len);		/* increments Isize */
+
+    error = coda_call(mdp, Isize, &Osize, (char *)inp);
+
+    CODA_FREE(inp, coda_link_size);
+    return error;
+}
+
+int
+venus_rename(void *mdp, ViceFid *fid, ViceFid *tfid,
+        const char *nm, int len, const char *tnm, int tlen,
+	struct ucred *cred, struct proc *p)
+{
+    DECL_NO_OUT(coda_rename);		/* sets Isize & Osize */
+    coda_rename_size += len + 1 + tlen + 1;
+    ALLOC_NO_OUT(coda_rename);		/* sets inp & outp */
+
+    /* send the open to venus. */
+    INIT_IN(&inp->ih, CODA_RENAME, cred, p);
+    inp->sourceFid = *fid;
+    inp->destFid = *tfid;
+
+    inp->srcname = Isize;
+    STRCPY(srcname, nm, len);		/* increments Isize */
+
+    inp->destname = Isize;
+    STRCPY(destname, tnm, tlen);	/* increments Isize */
+
+    error = coda_call(mdp, Isize, &Osize, (char *)inp);
+
+    CODA_FREE(inp, coda_rename_size);
+    return error;
+}
+
+int
+venus_mkdir(void *mdp, ViceFid *fid,
+    	const char *nm, int len, struct vattr *va,
+	struct ucred *cred, struct proc *p,
+/*out*/	ViceFid *VFid, struct vattr *ova)
+{
+    DECL(coda_mkdir);			/* sets Isize & Osize */
+    coda_mkdir_size += len + 1;
+    ALLOC(coda_mkdir);			/* sets inp & outp */
+
+    /* send the open to venus. */
+    INIT_IN(&inp->ih, CODA_MKDIR, cred, p);
+    inp->VFid = *fid;
+    CNV_V2VV_ATTR(&inp->attr, va);
+
+    inp->name = Isize;
+    STRCPY(name, nm, len);		/* increments Isize */
+
+    error = coda_call(mdp, Isize, &Osize, (char *)inp);
+    if (!error) {
+	*VFid = outp->VFid;
+	CNV_VV2V_ATTR(ova, &outp->attr);
+    }
+
+    CODA_FREE(inp, coda_mkdir_size);
+    return error;
+}
+
+int
+venus_rmdir(void *mdp, ViceFid *fid,
+    	const char *nm, int len,
+	struct ucred *cred, struct proc *p)
+{
+    DECL_NO_OUT(coda_rmdir);		/* sets Isize & Osize */
+    coda_rmdir_size += len + 1;
+    ALLOC_NO_OUT(coda_rmdir);		/* sets inp & outp */
+
+    /* send the open to venus. */
+    INIT_IN(&inp->ih, CODA_RMDIR, cred, p);
+    inp->VFid = *fid;
+
+    inp->name = Isize;
+    STRCPY(name, nm, len);		/* increments Isize */
+
+    error = coda_call(mdp, Isize, &Osize, (char *)inp);
+
+    CODA_FREE(inp, coda_rmdir_size);
+    return error;
+}
+
+int
+venus_symlink(void *mdp, ViceFid *fid,
+        const char *lnm, int llen, const char *nm, int len, struct vattr *va,
+	struct ucred *cred, struct proc *p)
+{
+    DECL_NO_OUT(coda_symlink);		/* sets Isize & Osize */
+    coda_symlink_size += llen + 1 + len + 1;
+    ALLOC_NO_OUT(coda_symlink);		/* sets inp & outp */
+
+    /* send the open to venus. */
+    INIT_IN(&inp->ih, CODA_SYMLINK, cred, p);
+    inp->VFid = *fid;
+    CNV_V2VV_ATTR(&inp->attr, va);
+
+    inp->srcname = Isize;
+    STRCPY(srcname, lnm, llen);		/* increments Isize */
+
+    inp->tname = Isize;
+    STRCPY(tname, nm, len);		/* increments Isize */
+
+    error = coda_call(mdp, Isize, &Osize, (char *)inp);
+
+    CODA_FREE(inp, coda_symlink_size);
+    return error;
+}
+
+int
+venus_readdir(void *mdp, ViceFid *fid,
+    	int count, int offset,
+	struct ucred *cred, struct proc *p,
+/*out*/	char *buffer, int *len)
+{
+    DECL(coda_readdir);			/* sets Isize & Osize */
+    coda_readdir_size = VC_MAXMSGSIZE;
+    ALLOC(coda_readdir);			/* sets inp & outp */
+
+    /* send the open to venus. */
+    INIT_IN(&inp->ih, CODA_READDIR, cred, p);
+    inp->VFid = *fid;
+    inp->count = count;
+    inp->offset = offset;
+
+    Osize = VC_MAXMSGSIZE;
+    error = coda_call(mdp, Isize, &Osize, (char *)inp);
+    if (!error) {
+	bcopy((char *)outp + (int)outp->data, buffer, outp->size);
+	*len = outp->size;
+    }
+
+    CODA_FREE(inp, coda_readdir_size);
+    return error;
+}
+
+int
+venus_fhtovp(void *mdp, ViceFid *fid,
+	struct ucred *cred, struct proc *p,
+/*out*/	ViceFid *VFid, int *vtype)
+{
+    DECL(coda_vget);			/* sets Isize & Osize */
+    ALLOC(coda_vget);			/* sets inp & outp */
+
+    /* Send the open to Venus. */
+    INIT_IN(&inp->ih, CODA_VGET, cred, p);
+    inp->VFid = *fid;
+
+    error = coda_call(mdp, Isize, &Osize, (char *)inp);
+    if (!error) {
+	*VFid = outp->VFid;
+	*vtype = outp->vtype;
+    }
+
+    CODA_FREE(inp, coda_vget_size);
+    return error;
+}
diff --git a/sys/fs/coda/coda_venus.h b/sys/fs/coda/coda_venus.h
new file mode 100644
index 0000000..6fdd15c
--- /dev/null
+++ b/sys/fs/coda/coda_venus.h
@@ -0,0 +1,133 @@
+/*
+ * 
+ *             Coda: an Experimental Distributed File System
+ *                              Release 3.1
+ * 
+ *           Copyright (c) 1987-1998 Carnegie Mellon University
+ *                          All Rights Reserved
+ * 
+ * Permission  to  use, copy, modify and distribute this software and its
+ * documentation is hereby granted,  provided  that  both  the  copyright
+ * notice  and  this  permission  notice  appear  in  all  copies  of the
+ * software, derivative works or  modified  versions,  and  any  portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University  in  all  documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ * 
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
+ * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
+ * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
+ * ANY DERIVATIVE WORK.
+ * 
+ * Carnegie  Mellon  encourages  users  of  this  software  to return any
+ * improvements or extensions that  they  make,  and  to  grant  Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ * 
+ * 	@(#) src/sys/coda/coda_venus.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ 
+ *  $Id: coda_venus.h,v 1.2 1998/09/02 19:09:53 rvb Exp $
+ * 
+ */
+
+int
+venus_root(void *mdp,
+	struct ucred *cred, struct proc *p,
+/*out*/	ViceFid *VFid);
+
+int
+venus_open(void *mdp, ViceFid *fid, int flag,
+	struct ucred *cred, struct proc *p,
+/*out*/	dev_t *dev, ino_t *inode);
+
+int
+venus_close(void *mdp, ViceFid *fid, int flag,
+	struct ucred *cred, struct proc *p);
+
+void
+venus_read(void);
+
+void
+venus_write(void);
+
+int
+venus_ioctl(void *mdp, ViceFid *fid,
+	int com, int flag, caddr_t data,
+	struct ucred *cred, struct proc *p);
+
+int
+venus_getattr(void *mdp, ViceFid *fid,
+	struct ucred *cred, struct proc *p,
+/*out*/	struct vattr *vap);
+
+int
+venus_setattr(void *mdp, ViceFid *fid, struct vattr *vap,
+	struct ucred *cred, struct proc *p);
+
+int
+venus_access(void *mdp, ViceFid *fid, int mode,
+	struct ucred *cred, struct proc *p);
+
+int
+venus_readlink(void *mdp, ViceFid *fid,
+	struct ucred *cred, struct proc *p,
+/*out*/	char **str, int *len);
+
+int
+venus_fsync(void *mdp, ViceFid *fid,
+	struct ucred *cred, struct proc *p);
+
+int
+venus_lookup(void *mdp, ViceFid *fid,
+    	const char *nm, int len,
+	struct ucred *cred, struct proc *p,
+/*out*/	ViceFid *VFid, int *vtype);
+
+int
+venus_create(void *mdp, ViceFid *fid,
+    	const char *nm, int len, int exclusive, int mode, struct vattr *va,
+	struct ucred *cred, struct proc *p,
+/*out*/	ViceFid *VFid, struct vattr *attr);
+
+int
+venus_remove(void *mdp, ViceFid *fid,
+        const char *nm, int len,
+	struct ucred *cred, struct proc *p);
+
+int
+venus_link(void *mdp, ViceFid *fid, ViceFid *tfid,
+        const char *nm, int len,
+	struct ucred *cred, struct proc *p);
+
+int
+venus_rename(void *mdp, ViceFid *fid, ViceFid *tfid,
+        const char *nm, int len, const char *tnm, int tlen,
+	struct ucred *cred, struct proc *p);
+
+int
+venus_mkdir(void *mdp, ViceFid *fid,
+    	const char *nm, int len, struct vattr *va,
+	struct ucred *cred, struct proc *p,
+/*out*/	ViceFid *VFid, struct vattr *ova);
+
+int
+venus_rmdir(void *mdp, ViceFid *fid,
+    	const char *nm, int len,
+	struct ucred *cred, struct proc *p);
+
+int
+venus_symlink(void *mdp, ViceFid *fid,
+        const char *lnm, int llen, const char *nm, int len, struct vattr *va,
+	struct ucred *cred, struct proc *p);
+
+int
+venus_readdir(void *mdp, ViceFid *fid,
+    	int count, int offset,
+	struct ucred *cred, struct proc *p,
+/*out*/	char *buffer, int *len);
+
+int
+venus_fhtovp(void *mdp, ViceFid *fid,
+	struct ucred *cred, struct proc *p,
+/*out*/	ViceFid *VFid, int *vtype);
diff --git a/sys/fs/coda/coda_vfsops.c b/sys/fs/coda/coda_vfsops.c
new file mode 100644
index 0000000..8f6befe
--- /dev/null
+++ b/sys/fs/coda/coda_vfsops.c
@@ -0,0 +1,770 @@
+/*
+ * 
+ *             Coda: an Experimental Distributed File System
+ *                              Release 3.1
+ * 
+ *           Copyright (c) 1987-1998 Carnegie Mellon University
+ *                          All Rights Reserved
+ * 
+ * Permission  to  use, copy, modify and distribute this software and its
+ * documentation is hereby granted,  provided  that  both  the  copyright
+ * notice  and  this  permission  notice  appear  in  all  copies  of the
+ * software, derivative works or  modified  versions,  and  any  portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University  in  all  documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ * 
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
+ * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
+ * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
+ * ANY DERIVATIVE WORK.
+ * 
+ * Carnegie  Mellon  encourages  users  of  this  software  to return any
+ * improvements or extensions that  they  make,  and  to  grant  Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ * 
+ *  	@(#) src/sys/cfs/coda_vfsops.c,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
+ *  $Id: coda_vfsops.c,v 1.10 1998/12/04 22:54:43 archie Exp $
+ * 
+ */
+
+/* 
+ * Mach Operating System
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * All rights reserved.  The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+
+/*
+ * This code was written for the Coda file system at Carnegie Mellon
+ * University.  Contributers include David Steere, James Kistler, and
+ * M. Satyanarayanan.  
+ */
+
+/*
+ * HISTORY
+ * $Log: coda_vfsops.c,v $
+ * Revision 1.10  1998/12/04 22:54:43  archie
+ * Examine all occurrences of sprintf(), strcat(), and str[n]cpy()
+ * for possible buffer overflow problems. Replaced most sprintf()'s
+ * with snprintf(); for others cases, added terminating NUL bytes where
+ * appropriate, replaced constants like "16" with sizeof(), etc.
+ *
+ * These changes include several bug fixes, but most changes are for
+ * maintainability's sake. Any instance where it wasn't "immediately
+ * obvious" that a buffer overflow could not occur was made safer.
+ *
+ * Reviewed by:	Bruce Evans <bde@zeta.org.au>
+ * Reviewed by:	Matthew Dillon <dillon@apollo.backplane.com>
+ * Reviewed by:	Mike Spengler <mks@networkcs.com>
+ *
+ * Revision 1.9  1998/11/16 19:48:26  rvb
+ * A few bug fixes for Robert Watson
+ *
+ * Revision 1.8  1998/11/03 08:55:06  peter
+ * Support KLD.  We register and unregister two modules. "coda" (the vfs)
+ * via VFS_SET(), and "codadev" for the cdevsw entry.  From kldstat -v:
+ *  3    1 0xf02c5000 115d8    coda.ko
+ *         Contains modules:
+ *                 Id Name
+ *                  2 codadev
+ *                  3 coda
+ *
+ * Revision 1.7  1998/09/29 20:19:45  rvb
+ * Fixes for lkm:
+ * 1. use VFS_LKM vs ACTUALLY_LKM_NOT_KERNEL
+ * 2. don't pass -DCODA to lkm build
+ *
+ * Revision 1.6  1998/09/25 17:38:32  rvb
+ * Put "stray" printouts under DIAGNOSTIC.  Make everything build
+ * with DEBUG on.  Add support for lkm.  (The macro's don't work
+ * for me; for a good chuckle look at the end of coda_fbsd.c.)
+ *
+ * Revision 1.5  1998/09/13 13:57:59  rvb
+ * Finish conversion of cfs -> coda
+ *
+ * Revision 1.4  1998/09/11 18:50:17  rvb
+ * All the references to cfs, in symbols, structs, and strings
+ * have been changed to coda.  (Same for CFS.)
+ *
+ * Revision 1.2  1998/09/02 19:09:53  rvb
+ * Pass2 complete
+ *
+ * Revision 1.1.1.1  1998/08/29 21:14:52  rvb
+ * Very Preliminary Coda
+ *
+ * Revision 1.11  1998/08/28 18:12:22  rvb
+ * Now it also works on FreeBSD -current.  This code will be
+ * committed to the FreeBSD -current and NetBSD -current
+ * trees.  It will then be tailored to the particular platform
+ * by flushing conditional code.
+ *
+ * Revision 1.10  1998/08/18 17:05:19  rvb
+ * Don't use __RCSID now
+ *
+ * Revision 1.9  1998/08/18 16:31:44  rvb
+ * Sync the code for NetBSD -current; test on 1.3 later
+ *
+ * Revision 1.8  98/02/24  22:22:48  rvb
+ * Fixes up mainly to flush iopen and friends
+ * 
+ * Revision 1.7  98/01/23  11:53:45  rvb
+ * Bring RVB_CODA1_1 to HEAD
+ * 
+ * Revision 1.6.2.6  98/01/23  11:21:07  rvb
+ * Sync with 2.2.5
+ * 
+ * Revision 1.6.2.5  98/01/22  13:05:33  rvb
+ * Move make_coda_node ctlfid later so vfsp is known
+ * 
+ * Revision 1.6.2.4  97/12/19  14:26:05  rvb
+ * session id
+ * 
+ * Revision 1.6.2.3  97/12/16  12:40:11  rvb
+ * Sync with 1.3
+ * 
+ * Revision 1.6.2.2  97/12/10  11:40:25  rvb
+ * No more ody
+ * 
+ * Revision 1.6.2.1  97/12/06  17:41:24  rvb
+ * Sync with peters coda.h
+ * 
+ * Revision 1.6  97/12/05  10:39:21  rvb
+ * Read CHANGES
+ * 
+ * Revision 1.5.14.8  97/11/24  15:44:46  rvb
+ * Final cfs_venus.c w/o macros, but one locking bug
+ * 
+ * Revision 1.5.14.7  97/11/21  13:22:03  rvb
+ * Catch a few coda_calls in coda_vfsops.c
+ * 
+ * Revision 1.5.14.6  97/11/20  11:46:48  rvb
+ * Capture current cfs_venus
+ * 
+ * Revision 1.5.14.5  97/11/18  10:27:17  rvb
+ * cfs_nbsd.c is DEAD!!!; integrated into cfs_vf/vnops.c
+ * cfs_nb_foo and cfs_foo are joined
+ * 
+ * Revision 1.5.14.4  97/11/13  22:03:01  rvb
+ * pass2 cfs_NetBSD.h mt
+ * 
+ * Revision 1.5.14.3  97/11/12  12:09:40  rvb
+ * reorg pass1
+ * 
+ * Revision 1.5.14.2  97/10/29  16:06:28  rvb
+ * Kill DYING
+ * 
+ * Revision 1.5.14.1  1997/10/28 23:10:17  rvb
+ * >64Meg; venus can be killed!
+ *
+ * Revision 1.5  1997/01/13 17:11:07  bnoble
+ * Coda statfs needs to return something other than -1 for blocks avail. and
+ * files available for wabi (and other windowsish) programs to install
+ * there correctly.
+ *
+ * Revision 1.4  1996/12/12 22:11:00  bnoble
+ * Fixed the "downcall invokes venus operation" deadlock in all known cases.
+ * There may be more
+ *
+ * Revision 1.3  1996/11/08 18:06:12  bnoble
+ * Minor changes in vnode operation signature, VOP_UPDATE signature, and
+ * some newly defined bits in the include files.
+ *
+ * Revision 1.2  1996/01/02 16:57:04  bnoble
+ * Added support for Coda MiniCache and raw inode calls (final commit)
+ *
+ * Revision 1.1.2.1  1995/12/20 01:57:32  bnoble
+ * Added CODA-specific files
+ *
+ * Revision 3.1.1.1  1995/03/04  19:08:02  bnoble
+ * Branch for NetBSD port revisions
+ *
+ * Revision 3.1  1995/03/04  19:08:01  bnoble
+ * Bump to major revision 3 to prepare for NetBSD port
+ *
+ * Revision 2.4  1995/02/17  16:25:22  dcs
+ * These versions represent several changes:
+ * 1. Allow venus to restart even if outstanding references exist.
+ * 2. Have only one ctlvp per client, as opposed to one per mounted cfs device.d
+ * 3. Allow ody_expand to return many members, not just one.
+ *
+ * Revision 2.3  94/10/14  09:58:21  dcs
+ * Made changes 'cause sun4s have braindead compilers
+ * 
+ * Revision 2.2  94/10/12  16:46:33  dcs
+ * Cleaned kernel/venus interface by removing XDR junk, plus
+ * so cleanup to allow this code to be more easily ported.
+ * 
+ * Revision 1.3  93/05/28  16:24:29  bnoble
+ * *** empty log message ***
+ * 
+ * Revision 1.2  92/10/27  17:58:24  lily
+ * merge kernel/latest and alpha/src/cfs
+ * 
+ * Revision 2.3  92/09/30  14:16:32  mja
+ * 	Added call to coda_flush to coda_unmount.
+ * 	[90/12/15            dcs]
+ * 
+ * 	Added contributors blurb.
+ * 	[90/12/13            jjk]
+ * 
+ * Revision 2.2  90/07/05  11:26:40  mrt
+ * 	Created for the Coda File System.
+ * 	[90/05/23            dcs]
+ * 
+ * Revision 1.3  90/05/31  17:01:42  dcs
+ * Prepare for merge with facilities kernel.
+ * 
+ * 
+ */ 
+
+#include <vcoda.h>
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/conf.h>
+#include <sys/namei.h>
+#include <sys/mount.h>
+#include <sys/select.h>
+
+#include <coda/coda.h>
+#include <coda/cnode.h>
+#include <coda/coda_vfsops.h>
+#include <coda/coda_venus.h>
+#include <coda/coda_subr.h>
+#include <coda/coda_opstats.h>
+
+#include <miscfs/specfs/specdev.h>
+
+MALLOC_DEFINE(M_CODA, "CODA storage", "Various Coda Structures");
+
+int codadebug = 0;
+int coda_vfsop_print_entry = 0;
+#define ENTRY    if(coda_vfsop_print_entry) myprintf(("Entered %s\n",__FUNCTION__))
+
+struct vnode *coda_ctlvp;
+struct coda_mntinfo coda_mnttbl[NVCODA]; /* indexed by minor device number */
+
+/* structure to keep statistics of internally generated/satisfied calls */
+
+struct coda_op_stats coda_vfsopstats[CODA_VFSOPS_SIZE];
+
+#define MARK_ENTRY(op) (coda_vfsopstats[op].entries++)
+#define MARK_INT_SAT(op) (coda_vfsopstats[op].sat_intrn++)
+#define MARK_INT_FAIL(op) (coda_vfsopstats[op].unsat_intrn++)
+#define MRAK_INT_GEN(op) (coda_vfsopstats[op].gen_intrn++)
+
+extern int coda_nc_initialized;     /* Set if cache has been initialized */
+extern int vc_nb_open __P((dev_t, int, int, struct proc *));
+
+int
+coda_vfsopstats_init(void)
+{
+	register int i;
+	
+	for (i=0;i<CODA_VFSOPS_SIZE;i++) {
+		coda_vfsopstats[i].opcode = i;
+		coda_vfsopstats[i].entries = 0;
+		coda_vfsopstats[i].sat_intrn = 0;
+		coda_vfsopstats[i].unsat_intrn = 0;
+		coda_vfsopstats[i].gen_intrn = 0;
+	}
+	
+	return 0;
+}
+
+/*
+ * cfs mount vfsop
+ * Set up mount info record and attach it to vfs struct.
+ */
+/*ARGSUSED*/
+int
+coda_mount(vfsp, path, data, ndp, p)
+    struct mount *vfsp;		/* Allocated and initialized by mount(2) */
+    char *path;			/* path covered: ignored by the fs-layer */
+    caddr_t data;		/* Need to define a data type for this in netbsd? */
+    struct nameidata *ndp;	/* Clobber this to lookup the device name */
+    struct proc *p;		/* The ever-famous proc pointer */
+{
+    struct vnode *dvp;
+    struct cnode *cp;
+    dev_t dev;
+    struct coda_mntinfo *mi;
+    struct vnode *rootvp;
+    ViceFid rootfid;
+    ViceFid ctlfid;
+    int error;
+
+    ENTRY;
+
+    coda_vfsopstats_init();
+    coda_vnodeopstats_init();
+    
+    MARK_ENTRY(CODA_MOUNT_STATS);
+    if (CODA_MOUNTED(vfsp)) {
+	MARK_INT_FAIL(CODA_MOUNT_STATS);
+	return(EBUSY);
+    }
+    
+    /* Validate mount device.  Similar to getmdev(). */
+
+    NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, data, p);
+    error = namei(ndp);
+    dvp = ndp->ni_vp;
+
+    if (error) {
+	MARK_INT_FAIL(CODA_MOUNT_STATS);
+	return (error);
+    }
+    if (dvp->v_type != VCHR) {
+	MARK_INT_FAIL(CODA_MOUNT_STATS);
+	vrele(dvp);
+	return(ENXIO);
+    }
+    dev = dvp->v_specinfo->si_rdev;
+    vrele(dvp);
+    if (major(dev) >= nchrdev || major(dev) < 0) {
+	MARK_INT_FAIL(CODA_MOUNT_STATS);
+	return(ENXIO);
+    }
+
+    /*
+     * See if the device table matches our expectations.
+     */
+    if (cdevsw[major(dev)]->d_open != vc_nb_open)
+    {
+	MARK_INT_FAIL(CODA_MOUNT_STATS);
+	return(ENXIO);
+    }
+    
+    if (minor(dev) >= NVCODA || minor(dev) < 0) {
+	MARK_INT_FAIL(CODA_MOUNT_STATS);
+	return(ENXIO);
+    }
+    
+    /*
+     * Initialize the mount record and link it to the vfs struct
+     */
+    mi = &coda_mnttbl[minor(dev)];
+    
+    if (!VC_OPEN(&mi->mi_vcomm)) {
+	MARK_INT_FAIL(CODA_MOUNT_STATS);
+	return(ENODEV);
+    }
+    
+    /* No initialization (here) of mi_vcomm! */
+    vfsp->mnt_data = (qaddr_t)mi;
+    vfs_getnewfsid (vfsp);
+
+    mi->mi_vfsp = vfsp;
+    
+    /*
+     * Make a root vnode to placate the Vnode interface, but don't
+     * actually make the CODA_ROOT call to venus until the first call
+     * to coda_root in case a server is down while venus is starting.
+     */
+    rootfid.Volume = 0;
+    rootfid.Vnode = 0;
+    rootfid.Unique = 0;
+    cp = make_coda_node(&rootfid, vfsp, VDIR);
+    rootvp = CTOV(cp);
+    rootvp->v_flag |= VROOT;
+	
+    ctlfid.Volume = CTL_VOL;
+    ctlfid.Vnode = CTL_VNO;
+    ctlfid.Unique = CTL_UNI;
+/*  cp = make_coda_node(&ctlfid, vfsp, VCHR);
+    The above code seems to cause a loop in the cnode links.
+    I don't totally understand when it happens, it is caught
+    when closing down the system.
+ */
+    cp = make_coda_node(&ctlfid, 0, VCHR);
+
+    coda_ctlvp = CTOV(cp);
+
+    /* Add vfs and rootvp to chain of vfs hanging off mntinfo */
+    mi->mi_vfsp = vfsp;
+    mi->mi_rootvp = rootvp;
+    
+    /* set filesystem block size */
+    vfsp->mnt_stat.f_bsize = 8192;	    /* XXX -JJK */
+
+    /* Set f_iosize.  XXX -- inamura@isl.ntt.co.jp. 
+       For vnode_pager_haspage() references. The value should be obtained 
+       from underlying UFS. */
+    /* Checked UFS. iosize is set as 8192 */
+    vfsp->mnt_stat.f_iosize = 8192;
+
+    /* error is currently guaranteed to be zero, but in case some
+       code changes... */
+    CODADEBUG(1,
+	     myprintf(("coda_mount returned %d\n",error)););
+    if (error)
+	MARK_INT_FAIL(CODA_MOUNT_STATS);
+    else
+	MARK_INT_SAT(CODA_MOUNT_STATS);
+    
+    return(error);
+}
+
+int
+coda_start(vfsp, flags, p)
+    struct mount *vfsp;
+    int flags;
+    struct proc *p;
+{
+    ENTRY;
+    return (0);
+}
+
+int
+coda_unmount(vfsp, mntflags, p)
+    struct mount *vfsp;
+    int mntflags;
+    struct proc *p;
+{
+    struct coda_mntinfo *mi = vftomi(vfsp);
+    int active, error = 0;
+    
+    ENTRY;
+    MARK_ENTRY(CODA_UMOUNT_STATS);
+    if (!CODA_MOUNTED(vfsp)) {
+	MARK_INT_FAIL(CODA_UMOUNT_STATS);
+	return(EINVAL);
+    }
+    
+    if (mi->mi_vfsp == vfsp) {	/* We found the victim */
+	if (!IS_UNMOUNTING(VTOC(mi->mi_rootvp)))
+	    return (EBUSY); 	/* Venus is still running */
+
+#ifdef	DEBUG
+	printf("coda_unmount: ROOT: vp %p, cp %p\n", mi->mi_rootvp, VTOC(mi->mi_rootvp));
+#endif
+	vrele(mi->mi_rootvp);
+
+	active = coda_kill(vfsp, NOT_DOWNCALL);
+	mi->mi_rootvp->v_flag &= ~VROOT;
+	error = vflush(mi->mi_vfsp, NULLVP, FORCECLOSE);
+	printf("coda_unmount: active = %d, vflush active %d\n", active, error);
+	error = 0;
+	/* I'm going to take this out to allow lookups to go through. I'm
+	 * not sure it's important anyway. -- DCS 2/2/94
+	 */
+	/* vfsp->VFS_DATA = NULL; */
+
+	/* No more vfsp's to hold onto */
+	mi->mi_vfsp = NULL;
+	mi->mi_rootvp = NULL;
+
+	if (error)
+	    MARK_INT_FAIL(CODA_UMOUNT_STATS);
+	else
+	    MARK_INT_SAT(CODA_UMOUNT_STATS);
+
+	return(error);
+    }
+    return (EINVAL);
+}
+
+/*
+ * find root of cfs
+ */
+int
+coda_root(vfsp, vpp)
+	struct mount *vfsp;
+	struct vnode **vpp;
+{
+    struct coda_mntinfo *mi = vftomi(vfsp);
+    struct vnode **result;
+    int error;
+    struct proc *p = curproc;    /* XXX - bnoble */
+    ViceFid VFid;
+
+    ENTRY;
+    MARK_ENTRY(CODA_ROOT_STATS);
+    result = NULL;
+    
+    if (vfsp == mi->mi_vfsp) {
+	if ((VTOC(mi->mi_rootvp)->c_fid.Volume != 0) ||
+	    (VTOC(mi->mi_rootvp)->c_fid.Vnode != 0) ||
+	    (VTOC(mi->mi_rootvp)->c_fid.Unique != 0))
+	    { /* Found valid root. */
+		*vpp = mi->mi_rootvp;
+		/* On Mach, this is vref.  On NetBSD, VOP_LOCK */
+#if	1
+		vref(*vpp);
+		vn_lock(*vpp, LK_EXCLUSIVE, p);
+#else
+		vget(*vpp, LK_EXCLUSIVE, p);
+#endif
+		MARK_INT_SAT(CODA_ROOT_STATS);
+		return(0);
+	    }
+    }
+
+    error = venus_root(vftomi(vfsp), p->p_cred->pc_ucred, p, &VFid);
+
+    if (!error) {
+	/*
+	 * Save the new rootfid in the cnode, and rehash the cnode into the
+	 * cnode hash with the new fid key.
+	 */
+	coda_unsave(VTOC(mi->mi_rootvp));
+	VTOC(mi->mi_rootvp)->c_fid = VFid;
+	coda_save(VTOC(mi->mi_rootvp));
+
+	*vpp = mi->mi_rootvp;
+#if	1
+	vref(*vpp);
+	vn_lock(*vpp, LK_EXCLUSIVE, p);
+#else
+	vget(*vpp, LK_EXCLUSIVE, p);
+#endif
+
+	MARK_INT_SAT(CODA_ROOT_STATS);
+	goto exit;
+    } else if (error == ENODEV || error == EINTR) {
+	/* Gross hack here! */
+	/*
+	 * If Venus fails to respond to the CODA_ROOT call, coda_call returns
+	 * ENODEV. Return the uninitialized root vnode to allow vfs
+	 * operations such as unmount to continue. Without this hack,
+	 * there is no way to do an unmount if Venus dies before a 
+	 * successful CODA_ROOT call is done. All vnode operations 
+	 * will fail.
+	 */
+	*vpp = mi->mi_rootvp;
+#if	1
+	vref(*vpp);
+	vn_lock(*vpp, LK_EXCLUSIVE, p);
+#else
+	vget(*vpp, LK_EXCLUSIVE, p);
+#endif
+
+	MARK_INT_FAIL(CODA_ROOT_STATS);
+	error = 0;
+	goto exit;
+    } else {
+	CODADEBUG( CODA_ROOT, myprintf(("error %d in CODA_ROOT\n", error)); );
+	MARK_INT_FAIL(CODA_ROOT_STATS);
+		
+	goto exit;
+    }
+
+ exit:
+    return(error);
+}
+
+int
+coda_quotactl(vfsp, cmd, uid, arg, p)
+    struct mount *vfsp;
+    int cmd;
+    uid_t uid;
+    caddr_t arg;
+    struct proc *p;
+{
+    ENTRY;
+    return (EOPNOTSUPP);
+}
+     
+/*
+ * Get file system statistics.
+ */
+int
+coda_nb_statfs(vfsp, sbp, p)
+    register struct mount *vfsp;
+    struct statfs *sbp;
+    struct proc *p;
+{
+    ENTRY;
+/*  MARK_ENTRY(CODA_STATFS_STATS); */
+    if (!CODA_MOUNTED(vfsp)) {
+/*	MARK_INT_FAIL(CODA_STATFS_STATS);*/
+	return(EINVAL);
+    }
+    
+    bzero(sbp, sizeof(struct statfs));
+    /* XXX - what to do about f_flags, others? --bnoble */
+    /* Below This is what AFS does
+    	#define NB_SFS_SIZ 0x895440
+     */
+    /* Note: Normal fs's have a bsize of 0x400 == 1024 */
+    sbp->f_type = vfsp->mnt_vfc->vfc_typenum;
+    sbp->f_bsize = 8192; /* XXX */
+    sbp->f_iosize = 8192; /* XXX */
+#define NB_SFS_SIZ 0x8AB75D
+    sbp->f_blocks = NB_SFS_SIZ;
+    sbp->f_bfree = NB_SFS_SIZ;
+    sbp->f_bavail = NB_SFS_SIZ;
+    sbp->f_files = NB_SFS_SIZ;
+    sbp->f_ffree = NB_SFS_SIZ;
+    bcopy((caddr_t)&(vfsp->mnt_stat.f_fsid), (caddr_t)&(sbp->f_fsid), sizeof (fsid_t));
+    snprintf(sbp->f_mntonname, sizeof(sbp->f_mntonname), "/coda");
+    snprintf(sbp->f_mntfromname, sizeof(sbp->f_mntfromname), "CODA");
+/*  MARK_INT_SAT(CODA_STATFS_STATS); */
+    return(0);
+}
+
+/*
+ * Flush any pending I/O.
+ */
+int
+coda_sync(vfsp, waitfor, cred, p)
+    struct mount *vfsp;
+    int    waitfor;
+    struct ucred *cred;
+    struct proc *p;
+{
+    ENTRY;
+    MARK_ENTRY(CODA_SYNC_STATS);
+    MARK_INT_SAT(CODA_SYNC_STATS);
+    return(0);
+}
+
+int
+coda_vget(vfsp, ino, vpp)
+    struct mount *vfsp;
+    ino_t ino;
+    struct vnode **vpp;
+{
+    ENTRY;
+    return (EOPNOTSUPP);
+}
+
+/* 
+ * fhtovp is now what vget used to be in 4.3-derived systems.  For
+ * some silly reason, vget is now keyed by a 32 bit ino_t, rather than
+ * a type-specific fid.  
+ */
+int
+coda_fhtovp(vfsp, fhp, nam, vpp, exflagsp, creadanonp)
+    register struct mount *vfsp;    
+    struct fid *fhp;
+    struct mbuf *nam;
+    struct vnode **vpp;
+    int *exflagsp;
+    struct ucred **creadanonp;
+{
+    struct cfid *cfid = (struct cfid *)fhp;
+    struct cnode *cp = 0;
+    int error;
+    struct proc *p = curproc; /* XXX -mach */
+    ViceFid VFid;
+    int vtype;
+
+    ENTRY;
+    
+    MARK_ENTRY(CODA_VGET_STATS);
+    /* Check for vget of control object. */
+    if (IS_CTL_FID(&cfid->cfid_fid)) {
+	*vpp = coda_ctlvp;
+	vref(coda_ctlvp);
+	MARK_INT_SAT(CODA_VGET_STATS);
+	return(0);
+    }
+    
+    error = venus_fhtovp(vftomi(vfsp), &cfid->cfid_fid, p->p_cred->pc_ucred, p, &VFid, &vtype);
+    
+    if (error) {
+	CODADEBUG(CODA_VGET, myprintf(("vget error %d\n",error));)
+	    *vpp = (struct vnode *)0;
+    } else {
+	CODADEBUG(CODA_VGET, 
+		 myprintf(("vget: vol %lx vno %lx uni %lx type %d result %d\n",
+			VFid.Volume, VFid.Vnode, VFid.Unique, vtype, error)); )
+	    
+	cp = make_coda_node(&VFid, vfsp, vtype);
+	*vpp = CTOV(cp);
+    }
+    return(error);
+}
+
+int
+coda_vptofh(vnp, fidp)
+    struct vnode *vnp;
+    struct fid   *fidp;
+{
+    ENTRY;
+    return (EOPNOTSUPP);
+}
+
+int
+coda_init(struct vfsconf *vfsp)
+{
+    ENTRY;
+    return 0;
+}
+
+/*
+ * To allow for greater ease of use, some vnodes may be orphaned when
+ * Venus dies.  Certain operations should still be allowed to go
+ * through, but without propagating ophan-ness.  So this function will
+ * get a new vnode for the file from the current run of Venus.  */
+ 
+int
+getNewVnode(vpp)
+     struct vnode **vpp;
+{
+    struct cfid cfid;
+    struct coda_mntinfo *mi = vftomi((*vpp)->v_mount);
+    
+    ENTRY;
+
+    cfid.cfid_len = (short)sizeof(ViceFid);
+    cfid.cfid_fid = VTOC(*vpp)->c_fid;	/* Structure assignment. */
+    /* XXX ? */
+
+    /* We're guessing that if set, the 1st element on the list is a
+     * valid vnode to use. If not, return ENODEV as venus is dead.
+     */
+    if (mi->mi_vfsp == NULL)
+	return ENODEV;
+    
+    return coda_fhtovp(mi->mi_vfsp, (struct fid*)&cfid, NULL, vpp,
+		      NULL, NULL);
+}
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/ufsmount.h>
+/* get the mount structure corresponding to a given device.  Assume 
+ * device corresponds to a UFS. Return NULL if no device is found.
+ */ 
+struct mount *devtomp(dev)
+    dev_t dev;
+{
+    struct mount *mp, *nmp;
+    
+    for (mp = mountlist.cqh_first; mp != (void*)&mountlist; mp = nmp) {
+	nmp = mp->mnt_list.cqe_next;
+	if (((VFSTOUFS(mp))->um_dev == (dev_t) dev)) {
+	    /* mount corresponds to UFS and the device matches one we want */
+	    return(mp); 
+	}
+    }
+    /* mount structure wasn't found */ 
+    return(NULL); 
+}
+
+struct vfsops coda_vfsops = {
+    coda_mount,
+    coda_start,
+    coda_unmount,
+    coda_root,
+    coda_quotactl,
+    coda_nb_statfs,
+    coda_sync,
+    coda_vget,
+    (int (*) (struct mount *, struct fid *, struct sockaddr *, struct vnode **,
+	      int *, struct ucred **))
+	eopnotsupp,
+    (int (*) (struct vnode *, struct fid *)) eopnotsupp,
+    coda_init,
+};
+
+VFS_SET(coda_vfsops, coda, VFCF_NETWORK);
diff --git a/sys/fs/coda/coda_vfsops.h b/sys/fs/coda/coda_vfsops.h
new file mode 100644
index 0000000..ef23c3f
--- /dev/null
+++ b/sys/fs/coda/coda_vfsops.h
@@ -0,0 +1,63 @@
+/*
+ * 
+ *             Coda: an Experimental Distributed File System
+ *                              Release 3.1
+ * 
+ *           Copyright (c) 1987-1998 Carnegie Mellon University
+ *                          All Rights Reserved
+ * 
+ * Permission  to  use, copy, modify and distribute this software and its
+ * documentation is hereby granted,  provided  that  both  the  copyright
+ * notice  and  this  permission  notice  appear  in  all  copies  of the
+ * software, derivative works or  modified  versions,  and  any  portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University  in  all  documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ * 
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
+ * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
+ * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
+ * ANY DERIVATIVE WORK.
+ * 
+ * Carnegie  Mellon  encourages  users  of  this  software  to return any
+ * improvements or extensions that  they  make,  and  to  grant  Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ * 
+ * 	@(#) src/sys/cfs/coda_vfsops.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ 
+ *  $Id: coda_vfsops.h,v 1.2 1998/09/02 19:09:53 rvb Exp $
+ * 
+ */
+
+/*
+ * cfid structure:
+ * This overlays the fid structure (see vfs.h)
+ * Only used below and will probably go away.
+ */
+
+struct cfid {
+    u_short	cfid_len;
+    u_short     padding;
+    ViceFid	cfid_fid;
+};
+
+struct mount;
+
+int coda_vfsopstats_init(void);
+int coda_mount(struct mount *, char *, caddr_t, struct nameidata *, 
+		       struct proc *);
+int coda_start(struct mount *, int, struct proc *);
+int coda_unmount(struct mount *, int, struct proc *);
+int coda_root(struct mount *, struct vnode **);
+int coda_quotactl(struct mount *, int, uid_t, caddr_t, struct proc *);
+int coda_nb_statfs(struct mount *, struct statfs *, struct proc *);
+int coda_sync(struct mount *, int, struct ucred *, struct proc *);
+int coda_vget(struct mount *, ino_t, struct vnode **);
+int coda_fhtovp(struct mount *, struct fid *, struct mbuf *, struct vnode **,
+		       int *, struct ucred **);
+int coda_vptofh(struct vnode *, struct fid *);
+int coda_init(struct vfsconf *vfsp);
+
+int getNewVnode(struct vnode **vpp);
diff --git a/sys/fs/coda/coda_vnops.c b/sys/fs/coda/coda_vnops.c
new file mode 100644
index 0000000..efa0dda
--- /dev/null
+++ b/sys/fs/coda/coda_vnops.c
@@ -0,0 +1,2233 @@
+/*
+ * 
+ *             Coda: an Experimental Distributed File System
+ *                              Release 3.1
+ * 
+ *           Copyright (c) 1987-1998 Carnegie Mellon University
+ *                          All Rights Reserved
+ * 
+ * Permission  to  use, copy, modify and distribute this software and its
+ * documentation is hereby granted,  provided  that  both  the  copyright
+ * notice  and  this  permission  notice  appear  in  all  copies  of the
+ * software, derivative works or  modified  versions,  and  any  portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University  in  all  documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ * 
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
+ * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
+ * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
+ * ANY DERIVATIVE WORK.
+ * 
+ * Carnegie  Mellon  encourages  users  of  this  software  to return any
+ * improvements or extensions that  they  make,  and  to  grant  Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ * 
+ *  	@(#) src/sys/coda/coda_vnops.c,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
+ *  $Id: coda_vnops.c,v 1.14 1999/01/27 20:09:17 dillon Exp $
+ * 
+ */
+
+/* 
+ * Mach Operating System
+ * Copyright (c) 1990 Carnegie-Mellon University
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * All rights reserved.  The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+
+/*
+ * This code was written for the Coda file system at Carnegie Mellon
+ * University.  Contributers include David Steere, James Kistler, and
+ * M. Satyanarayanan.  
+ */
+
+/*
+ * HISTORY
+ * $Log: coda_vnops.c,v $
+ * Revision 1.14  1999/01/27 20:09:17  dillon
+ *     Fix warnings preparing for -Wall -Wcast-qual
+ *
+ *     Also disable one usb module in LINT due to fatal compilation errors,
+ *     temporary.
+ *
+ * Revision 1.13  1999/01/20 14:49:05  eivind
+ * Add 'options DEBUG_LOCKS', which stores extra information in struct
+ * lock, and add some macros and function parameters to make sure that
+ * the information get to the point where it can be put in the lock
+ * structure.
+ *
+ * While I'm here, add DEBUG_VFS_LOCKS to LINT.
+ *
+ * Revision 1.12  1999/01/07 16:14:12  bde
+ * Don't pass unused unused timestamp args to UFS_UPDATE() or waste
+ * time initializing them.  This almost finishes centralizing (in-core)
+ * timestamp updates in ufs_itimes().
+ *
+ * Revision 1.11  1999/01/05 18:49:51  eivind
+ * Remove the 'waslocked' parameter to vfs_object_create().
+ *
+ * Revision 1.10  1998/12/04 18:44:21  rvb
+ * Don't print diagnostic anymore
+ *
+ * Revision 1.9  1998/11/16 19:48:26  rvb
+ * A few bug fixes for Robert Watson
+ *
+ * Revision 1.8  1998/10/28 20:31:13  rvb
+ * Change the way unmounting happens to guarantee that the
+ * client programs are allowed to finish up (coda_call is
+ * forced to complete) and release their locks.  Thus there
+ * is a reasonable chance that the vflush implicit in the
+ * unmount will not get hung on held locks.
+ *
+ * Revision 1.7  1998/10/25 17:44:41  phk
+ * Nitpicking and dusting performed on a train.  Removes trivial warnings
+ * about unused variables, labels and other lint.
+ *
+ * Revision 1.6  1998/09/28 20:52:58  rvb
+ * Cleanup and fix THE bug
+ *
+ * Revision 1.5  1998/09/25 17:38:32  rvb
+ * Put "stray" printouts under DIAGNOSTIC.  Make everything build
+ * with DEBUG on.  Add support for lkm.  (The macro's don't work
+ * for me; for a good chuckle look at the end of coda_fbsd.c.)
+ *
+ * Revision 1.4  1998/09/13 13:57:59  rvb
+ * Finish conversion of cfs -> coda
+ *
+ * Revision 1.3  1998/09/11 18:50:17  rvb
+ * All the references to cfs, in symbols, structs, and strings
+ * have been changed to coda.  (Same for CFS.)
+ *
+ * Revision 1.2  1998/09/02 19:09:53  rvb
+ * Pass2 complete
+ *
+ * Revision 1.1.1.1  1998/08/29 21:14:52  rvb
+ * Very Preliminary Coda
+ *
+ * Revision 1.12  1998/08/28 18:28:00  rvb
+ * NetBSD -current is stricter!
+ *
+ * Revision 1.11  1998/08/28 18:12:23  rvb
+ * Now it also works on FreeBSD -current.  This code will be
+ * committed to the FreeBSD -current and NetBSD -current
+ * trees.  It will then be tailored to the particular platform
+ * by flushing conditional code.
+ *
+ * Revision 1.10  1998/08/18 17:05:21  rvb
+ * Don't use __RCSID now
+ *
+ * Revision 1.9  1998/08/18 16:31:46  rvb
+ * Sync the code for NetBSD -current; test on 1.3 later
+ *
+ * Revision 1.8  98/02/24  22:22:50  rvb
+ * Fixes up mainly to flush iopen and friends
+ * 
+ * Revision 1.7  98/01/31  20:53:15  rvb
+ * First version that works on FreeBSD 2.2.5
+ * 
+ * Revision 1.6  98/01/23  11:53:47  rvb
+ * Bring RVB_CODA1_1 to HEAD
+ * 
+ * Revision 1.5.2.8  98/01/23  11:21:11  rvb
+ * Sync with 2.2.5
+ * 
+ * Revision 1.5.2.7  97/12/19  14:26:08  rvb
+ * session id
+ * 
+ * Revision 1.5.2.6  97/12/16  22:01:34  rvb
+ * Oops add cfs_subr.h cfs_venus.h; sync with peter
+ * 
+ * Revision 1.5.2.5  97/12/16  12:40:14  rvb
+ * Sync with 1.3
+ * 
+ * Revision 1.5.2.4  97/12/10  14:08:31  rvb
+ * Fix O_ flags; check result in coda_call
+ * 
+ * Revision 1.5.2.3  97/12/10  11:40:27  rvb
+ * No more ody
+ * 
+ * Revision 1.5.2.2  97/12/09  16:07:15  rvb
+ * Sync with vfs/include/coda.h
+ * 
+ * Revision 1.5.2.1  97/12/06  17:41:25  rvb
+ * Sync with peters coda.h
+ * 
+ * Revision 1.5  97/12/05  10:39:23  rvb
+ * Read CHANGES
+ * 
+ * Revision 1.4.14.10  97/11/25  08:08:48  rvb
+ * cfs_venus ... done; until cred/vattr change
+ * 
+ * Revision 1.4.14.9  97/11/24  15:44:48  rvb
+ * Final cfs_venus.c w/o macros, but one locking bug
+ * 
+ * Revision 1.4.14.8  97/11/21  11:28:04  rvb
+ * cfs_venus.c is done: first pass
+ * 
+ * Revision 1.4.14.7  97/11/20  11:46:51  rvb
+ * Capture current cfs_venus
+ * 
+ * Revision 1.4.14.6  97/11/18  10:27:19  rvb
+ * cfs_nbsd.c is DEAD!!!; integrated into cfs_vf/vnops.c
+ * cfs_nb_foo and cfs_foo are joined
+ * 
+ * Revision 1.4.14.5  97/11/13  22:03:03  rvb
+ * pass2 cfs_NetBSD.h mt
+ * 
+ * Revision 1.4.14.4  97/11/12  12:09:42  rvb
+ * reorg pass1
+ * 
+ * Revision 1.4.14.3  97/11/06  21:03:28  rvb
+ * don't include headers in headers
+ * 
+ * Revision 1.4.14.2  97/10/29  16:06:30  rvb
+ * Kill DYING
+ * 
+ * Revision 1.4.14.1  1997/10/28 23:10:18  rvb
+ * >64Meg; venus can be killed!
+ *
+ * Revision 1.4  1997/02/20 13:54:50  lily
+ * check for NULL return from coda_nc_lookup before CTOV
+ *
+ * Revision 1.3  1996/12/12 22:11:02  bnoble
+ * Fixed the "downcall invokes venus operation" deadlock in all known cases.
+ * There may be more
+ *
+ * Revision 1.2  1996/01/02 16:57:07  bnoble
+ * Added support for Coda MiniCache and raw inode calls (final commit)
+ *
+ * Revision 1.1.2.1  1995/12/20 01:57:34  bnoble
+ * Added CODA-specific files
+ *
+ * Revision 3.1.1.1  1995/03/04  19:08:06  bnoble
+ * Branch for NetBSD port revisions
+ *
+ * Revision 3.1  1995/03/04  19:08:04  bnoble
+ * Bump to major revision 3 to prepare for NetBSD port
+ *
+ * Revision 2.6  1995/02/17  16:25:26  dcs
+ * These versions represent several changes:
+ * 1. Allow venus to restart even if outstanding references exist.
+ * 2. Have only one ctlvp per client, as opposed to one per mounted cfs device.d
+ * 3. Allow ody_expand to return many members, not just one.
+ *
+ * Revision 2.5  94/11/09  20:29:27  dcs
+ * Small bug in remove dealing with hard links and link counts was fixed.
+ * 
+ * Revision 2.4  94/10/14  09:58:42  dcs
+ * Made changes 'cause sun4s have braindead compilers
+ * 
+ * Revision 2.3  94/10/12  16:46:37  dcs
+ * Cleaned kernel/venus interface by removing XDR junk, plus
+ * so cleanup to allow this code to be more easily ported.
+ * 
+ * Revision 2.2  94/09/20  14:12:41  dcs
+ * Fixed bug in rename when moving a directory.
+ * 
+ * Revision 2.1  94/07/21  16:25:22  satya
+ * Conversion to C++ 3.0; start of Coda Release 2.0
+ * 
+ * Revision 1.4  93/12/17  01:38:01  luqi
+ * Changes made for kernel to pass process info to Venus:
+ * 
+ * (1) in file cfs.h
+ * add process id and process group id in most of the cfs argument types.
+ * 
+ * (2) in file cfs_vnodeops.c
+ * add process info passing in most of the cfs vnode operations.
+ * 
+ * (3) in file cfs_xdr.c
+ * expand xdr routines according changes in (1). 
+ * add variable pass_process_info to allow venus for kernel version checking.
+ * 
+ * Revision 1.3  93/05/28  16:24:33  bnoble
+ * *** empty log message ***
+ * 
+ * Revision 1.2  92/10/27  17:58:25  lily
+ * merge kernel/latest and alpha/src/cfs
+ * 
+ * Revision 2.4  92/09/30  14:16:37  mja
+ * 	Redid buffer allocation so that it does kmem_{alloc,free} for all
+ * 	architectures.  Zone allocation, previously used on the 386, caused
+ * 	panics if it was invoked repeatedly.  Stack allocation, previously
+ * 	used on all other architectures, tickled some Mach bug that appeared
+ * 	with large stack frames.
+ * 	[91/02/09            jjk]
+ * 
+ * 	Added contributors blurb.
+ * 	[90/12/13            jjk]
+ * 
+ * Revision 2.3  90/07/26  15:50:09  mrt
+ * 	    Fixed fix to rename to remove .. from moved directories.
+ * 	[90/06/28            dcs]
+ * 
+ * Revision 1.7  90/06/28  16:24:25  dcs
+ * Fixed bug with moving directories, we weren't flushing .. for the moved directory.
+ * 
+ * Revision 1.6  90/05/31  17:01:47  dcs
+ * Prepare for merge with facilities kernel.
+ * 
+ * 
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/errno.h>
+#include <sys/acct.h>
+#include <sys/file.h>
+#include <sys/fcntl.h>
+#include <sys/uio.h>
+#include <sys/namei.h>
+#include <sys/ioccom.h>
+#include <sys/select.h>
+
+#include <vm/vm.h>
+#include <vm/vm_object.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_zone.h>
+
+#include <coda/coda.h>
+#include <coda/cnode.h>
+#include <coda/coda_vnops.h>
+#include <coda/coda_venus.h>
+#include <coda/coda_opstats.h>
+#include <coda/coda_subr.h>
+#include <coda/coda_namecache.h>
+#include <coda/coda_pioctl.h>
+
+/* 
+ * These flags select various performance enhancements.
+ */
+int coda_attr_cache  = 1;       /* Set to cache attributes in the kernel */
+int coda_symlink_cache = 1;     /* Set to cache symbolic link information */
+int coda_access_cache = 1;      /* Set to handle some access checks directly */
+
+/* structure to keep track of vfs calls */
+
+struct coda_op_stats coda_vnodeopstats[CODA_VNODEOPS_SIZE];
+
+#define MARK_ENTRY(op) (coda_vnodeopstats[op].entries++)
+#define MARK_INT_SAT(op) (coda_vnodeopstats[op].sat_intrn++)
+#define MARK_INT_FAIL(op) (coda_vnodeopstats[op].unsat_intrn++)
+#define MARK_INT_GEN(op) (coda_vnodeopstats[op].gen_intrn++)
+
+/* What we are delaying for in printf */
+int coda_printf_delay = 0;  /* in microseconds */
+int coda_vnop_print_entry = 0;
+static int coda_lockdebug = 0;
+
+/* Definition of the vfs operation vector */
+
+/*
+ * Some NetBSD details:
+ * 
+ *   coda_start is called at the end of the mount syscall.
+ *   coda_init is called at boot time.
+ */
+
+#define ENTRY  if(coda_vnop_print_entry) myprintf(("Entered %s\n",__FUNCTION__))
+
+/* Definition of the vnode operation vector */
+
+struct vnodeopv_entry_desc coda_vnodeop_entries[] = {
+    { &vop_default_desc, coda_vop_error },
+    { &vop_lookup_desc, coda_lookup },		/* lookup */
+    { &vop_create_desc, coda_create },		/* create */
+    { &vop_mknod_desc, coda_vop_error },	/* mknod */
+    { &vop_open_desc, coda_open },		/* open */
+    { &vop_close_desc, coda_close },		/* close */
+    { &vop_access_desc, coda_access },		/* access */
+    { &vop_getattr_desc, coda_getattr },	/* getattr */
+    { &vop_setattr_desc, coda_setattr },	/* setattr */
+    { &vop_read_desc, coda_read },		/* read */
+    { &vop_write_desc, coda_write },		/* write */
+    { &vop_ioctl_desc, coda_ioctl },		/* ioctl */
+    { &vop_mmap_desc, coda_vop_error },		/* mmap */
+    { &vop_fsync_desc, coda_fsync },		/* fsync */
+    { &vop_remove_desc, coda_remove },		/* remove */
+    { &vop_link_desc, coda_link },		/* link */
+    { &vop_rename_desc, coda_rename },		/* rename */
+    { &vop_mkdir_desc, coda_mkdir },		/* mkdir */
+    { &vop_rmdir_desc, coda_rmdir },		/* rmdir */
+    { &vop_symlink_desc, coda_symlink },	/* symlink */
+    { &vop_readdir_desc, coda_readdir },	/* readdir */
+    { &vop_readlink_desc, coda_readlink },	/* readlink */
+    { &vop_abortop_desc, coda_abortop },	/* abortop */
+    { &vop_inactive_desc, coda_inactive },	/* inactive */
+    { &vop_reclaim_desc, coda_reclaim },	/* reclaim */
+    { &vop_lock_desc, coda_lock },		/* lock */
+    { &vop_unlock_desc, coda_unlock },		/* unlock */
+    { &vop_bmap_desc, coda_bmap },		/* bmap */
+    { &vop_strategy_desc, coda_strategy },	/* strategy */
+    { &vop_print_desc, coda_vop_error },	/* print */
+    { &vop_islocked_desc, coda_islocked },	/* islocked */
+    { &vop_pathconf_desc, coda_vop_error },	/* pathconf */
+    { &vop_advlock_desc, coda_vop_nop },	/* advlock */
+    { &vop_bwrite_desc, coda_vop_error },	/* bwrite */
+    { &vop_lease_desc, coda_vop_nop },		/* lease */
+    { &vop_poll_desc, (vop_t *) vop_stdpoll },
+    { &vop_getpages_desc, coda_fbsd_getpages },	/* pager intf.*/
+    { &vop_putpages_desc, coda_fbsd_putpages },	/* pager intf.*/
+
+#if	0
+
+    we need to define these someday
+#define UFS_BLKATOFF(aa, bb, cc, dd) VFSTOUFS((aa)->v_mount)->um_blkatoff(aa, bb, cc, dd)
+#define UFS_VALLOC(aa, bb, cc, dd) VFSTOUFS((aa)->v_mount)->um_valloc(aa, bb, cc, dd)
+#define UFS_VFREE(aa, bb, cc) VFSTOUFS((aa)->v_mount)->um_vfree(aa, bb, cc)
+#define UFS_TRUNCATE(aa, bb, cc, dd, ee) VFSTOUFS((aa)->v_mount)->um_truncate(aa, bb, cc, dd, ee)
+#define UFS_UPDATE(aa, bb) VFSTOUFS((aa)->v_mount)->um_update(aa, bb)
+
+    missing
+    { &vop_reallocblks_desc,	(vop_t *) ufs_missingop },
+    { &vop_cachedlookup_desc,	(vop_t *) ufs_lookup },
+    { &vop_whiteout_desc,	(vop_t *) ufs_whiteout },
+#endif
+    { (struct vnodeop_desc*)NULL, (int(*)(void *))NULL }
+};
+
+static struct vnodeopv_desc coda_vnodeop_opv_desc =
+		{ &coda_vnodeop_p, coda_vnodeop_entries };
+
+VNODEOP_SET(coda_vnodeop_opv_desc);
+
+/* A generic panic: we were called with something we didn't define yet */
+int
+coda_vop_error(void *anon) {
+    struct vnodeop_desc **desc = (struct vnodeop_desc **)anon;
+
+    myprintf(("coda_vop_error: Vnode operation %s called, but not defined.\n",
+	      (*desc)->vdesc_name));
+    /*
+    panic("coda_vop_error");
+    */
+    return EIO;
+}
+
+/* A generic do-nothing.  For lease_check, advlock */
+int
+coda_vop_nop(void *anon) {
+    struct vnodeop_desc **desc = (struct vnodeop_desc **)anon;
+
+    if (codadebug) {
+	myprintf(("Vnode operation %s called, but unsupported\n",
+		  (*desc)->vdesc_name));
+    } 
+   return (0);
+}
+
+int
+coda_vnodeopstats_init(void)
+{
+	register int i;
+	
+	for(i=0;i<CODA_VNODEOPS_SIZE;i++) {
+		coda_vnodeopstats[i].opcode = i;
+		coda_vnodeopstats[i].entries = 0;
+		coda_vnodeopstats[i].sat_intrn = 0;
+		coda_vnodeopstats[i].unsat_intrn = 0;
+		coda_vnodeopstats[i].gen_intrn = 0;
+	}
+	return 0;
+}
+		
+/* 
+ * coda_open calls Venus to return the device, inode pair of the cache
+ * file holding the data. Using iget, coda_open finds the vnode of the
+ * cache file, and then opens it.
+ */
+int
+coda_open(v)
+    void *v;
+{
+    /* 
+     * NetBSD can pass the O_EXCL flag in mode, even though the check
+     * has already happened.  Venus defensively assumes that if open
+     * is passed the EXCL, it must be a bug.  We strip the flag here.
+     */
+/* true args */
+    struct vop_open_args *ap = v;
+    register struct vnode **vpp = &(ap->a_vp);
+    struct cnode *cp = VTOC(*vpp);
+    int flag = ap->a_mode & (~O_EXCL);
+    struct ucred *cred = ap->a_cred;
+    struct proc *p = ap->a_p;
+/* locals */
+    int error;
+    struct vnode *vp;
+    dev_t dev;
+    ino_t inode;
+
+    MARK_ENTRY(CODA_OPEN_STATS);
+
+    /* Check for open of control file. */
+    if (IS_CTL_VP(*vpp)) {
+	/* XXX */
+	/* if (WRITEABLE(flag)) */ 
+	if (flag & (FWRITE | O_TRUNC | O_CREAT | O_EXCL)) {
+	    MARK_INT_FAIL(CODA_OPEN_STATS);
+	    return(EACCES);
+	}
+	MARK_INT_SAT(CODA_OPEN_STATS);
+	return(0);
+    }
+
+    error = venus_open(vtomi((*vpp)), &cp->c_fid, flag, cred, p, &dev, &inode);
+    if (error)
+	return (error);
+    if (!error) {
+	CODADEBUG( CODA_OPEN,myprintf(("open: dev %d inode %d result %d\n",
+				  dev, inode, error)); )
+    }
+
+    /* Translate the <device, inode> pair for the cache file into
+       an inode pointer. */
+    error = coda_grab_vnode(dev, inode, &vp);
+    if (error)
+	return (error);
+
+    /* We get the vnode back locked.  Needs unlocked */
+    VOP_UNLOCK(vp, 0, p);
+    /* Keep a reference until the close comes in. */
+    vref(*vpp);                
+
+    /* Save the vnode pointer for the cache file. */
+    if (cp->c_ovp == NULL) {
+	cp->c_ovp = vp;
+    } else {
+	if (cp->c_ovp != vp)
+	    panic("coda_open:  cp->c_ovp != ITOV(ip)");
+    }
+    cp->c_ocount++;
+
+    /* Flush the attribute cached if writing the file. */
+    if (flag & FWRITE) {
+	cp->c_owrite++;
+	cp->c_flags &= ~C_VATTR;
+    }
+
+    /* Save the <device, inode> pair for the cache file to speed
+       up subsequent page_read's. */
+    cp->c_device = dev;
+    cp->c_inode = inode;
+
+    /* Open the cache file. */
+    error = VOP_OPEN(vp, flag, cred, p); 
+    if (error) {
+    	printf("coda_open: VOP_OPEN on container failed %d\n", error);
+	return (error);
+    }
+/* grab (above) does this when it calls newvnode unless it's in the cache*/
+    if (vp->v_type == VREG) {
+    	error = vfs_object_create(vp, p, cred);
+	if (error != 0) {
+	    printf("coda_open: vfs_object_create() returns %d\n", error);
+	    vput(vp);
+	}
+    }
+
+    return(error);
+}
+
+/*
+ * Close the cache file used for I/O and notify Venus.
+ */
+int
+coda_close(v)
+    void *v;
+{
+/* true args */
+    struct vop_close_args *ap = v;
+    struct vnode *vp = ap->a_vp;
+    struct cnode *cp = VTOC(vp);
+    int flag = ap->a_fflag;
+    struct ucred *cred = ap->a_cred;
+    struct proc *p = ap->a_p;
+/* locals */
+    int error;
+
+    MARK_ENTRY(CODA_CLOSE_STATS);
+
+    /* Check for close of control file. */
+    if (IS_CTL_VP(vp)) {
+	MARK_INT_SAT(CODA_CLOSE_STATS);
+	return(0);
+    }
+
+    if (IS_UNMOUNTING(cp)) {
+	if (cp->c_ovp) {
+#ifdef	CODA_VERBOSE
+	    printf("coda_close: destroying container ref %d, ufs vp %p of vp %p/cp %p\n",
+		    vp->v_usecount, cp->c_ovp, vp, cp);
+#endif
+#ifdef	hmm
+	    vgone(cp->c_ovp);
+#else
+	    VOP_CLOSE(cp->c_ovp, flag, cred, p); /* Do errors matter here? */
+	    vrele(cp->c_ovp);
+#endif
+	} else {
+#ifdef	CODA_VERBOSE
+	    printf("coda_close: NO container vp %p/cp %p\n", vp, cp);
+#endif
+	}
+	return ENODEV;
+    } else {
+	VOP_CLOSE(cp->c_ovp, flag, cred, p); /* Do errors matter here? */
+	vrele(cp->c_ovp);
+    }
+
+    if (--cp->c_ocount == 0)
+	cp->c_ovp = NULL;
+
+    if (flag & FWRITE)                    /* file was opened for write */
+	--cp->c_owrite;
+
+    error = venus_close(vtomi(vp), &cp->c_fid, flag, cred, p);
+    vrele(CTOV(cp));
+
+    CODADEBUG(CODA_CLOSE, myprintf(("close: result %d\n",error)); )
+    return(error);
+}
+
+int
+coda_read(v)
+    void *v;
+{
+    struct vop_read_args *ap = v;
+
+    ENTRY;
+    return(coda_rdwr(ap->a_vp, ap->a_uio, UIO_READ,
+		    ap->a_ioflag, ap->a_cred, ap->a_uio->uio_procp));
+}
+
+int
+coda_write(v)
+    void *v;
+{
+    struct vop_write_args *ap = v;
+
+    ENTRY;
+    return(coda_rdwr(ap->a_vp, ap->a_uio, UIO_WRITE,
+		    ap->a_ioflag, ap->a_cred, ap->a_uio->uio_procp));
+}
+
+int
+coda_rdwr(vp, uiop, rw, ioflag, cred, p)
+    struct vnode *vp;
+    struct uio *uiop;
+    enum uio_rw rw;
+    int ioflag;
+    struct ucred *cred;
+    struct proc *p;
+{ 
+/* upcall decl */
+  /* NOTE: container file operation!!! */
+/* locals */
+    struct cnode *cp = VTOC(vp);
+    struct vnode *cfvp = cp->c_ovp;
+    int igot_internally = 0;
+    int opened_internally = 0;
+    int error = 0;
+
+    MARK_ENTRY(CODA_RDWR_STATS);
+
+    CODADEBUG(CODA_RDWR, myprintf(("coda_rdwr(%d, %p, %d, %qd, %d)\n", rw, 
+			      uiop->uio_iov->iov_base, uiop->uio_resid, 
+			      uiop->uio_offset, uiop->uio_segflg)); )
+	
+    /* Check for rdwr of control object. */
+    if (IS_CTL_VP(vp)) {
+	MARK_INT_FAIL(CODA_RDWR_STATS);
+	return(EINVAL);
+    }
+
+    /* 
+     * If file is not already open this must be a page
+     * {read,write} request.  Iget the cache file's inode
+     * pointer if we still have its <device, inode> pair.
+     * Otherwise, we must do an internal open to derive the
+     * pair. 
+     */
+    if (cfvp == NULL) {
+	/* 
+	 * If we're dumping core, do the internal open. Otherwise
+	 * venus won't have the correct size of the core when
+	 * it's completely written.
+	 */
+	if (cp->c_inode != 0 && !(p && (p->p_acflag & ACORE))) { 
+	    igot_internally = 1;
+	    error = coda_grab_vnode(cp->c_device, cp->c_inode, &cfvp);
+	    if (error) {
+		MARK_INT_FAIL(CODA_RDWR_STATS);
+		return(error);
+	    }
+	    /* 
+	     * We get the vnode back locked in both Mach and
+	     * NetBSD.  Needs unlocked 
+	     */
+	    VOP_UNLOCK(cfvp, 0, p);
+	}
+	else {
+	    opened_internally = 1;
+	    MARK_INT_GEN(CODA_OPEN_STATS);
+	    error = VOP_OPEN(vp, (rw == UIO_READ ? FREAD : FWRITE), 
+			     cred, p);
+printf("coda_rdwr: Internally Opening %p\n", vp);
+	    if (error) {
+		printf("coda_rdwr: VOP_OPEN on container failed %d\n", error);
+		return (error);
+	    }
+	    if (vp->v_type == VREG) {
+		error = vfs_object_create(vp, p, cred);
+		if (error != 0) {
+		    printf("coda_rdwr: vfs_object_create() returns %d\n", error);
+		    vput(vp);
+		}
+	    }
+	    if (error) {
+		MARK_INT_FAIL(CODA_RDWR_STATS);
+		return(error);
+	    }
+	    cfvp = cp->c_ovp;
+	}
+    }
+
+    /* Have UFS handle the call. */
+    CODADEBUG(CODA_RDWR, myprintf(("indirect rdwr: fid = (%lx.%lx.%lx), refcnt = %d\n",
+			      cp->c_fid.Volume, cp->c_fid.Vnode, 
+			      cp->c_fid.Unique, CTOV(cp)->v_usecount)); )
+
+
+    if (rw == UIO_READ) {
+	error = VOP_READ(cfvp, uiop, ioflag, cred);
+    } else {
+	error = VOP_WRITE(cfvp, uiop, ioflag, cred);
+	/* ufs_write updates the vnode_pager_setsize for the vnode/object */
+
+	{   struct vattr attr;
+
+	    if (VOP_GETATTR(cfvp, &attr, cred, p) == 0) {
+		vnode_pager_setsize(vp, attr.va_size);
+	    }
+	}
+    }
+
+    if (error)
+	MARK_INT_FAIL(CODA_RDWR_STATS);
+    else
+	MARK_INT_SAT(CODA_RDWR_STATS);
+
+    /* Do an internal close if necessary. */
+    if (opened_internally) {
+	MARK_INT_GEN(CODA_CLOSE_STATS);
+	(void)VOP_CLOSE(vp, (rw == UIO_READ ? FREAD : FWRITE), cred, p);
+    }
+
+    /* Invalidate cached attributes if writing. */
+    if (rw == UIO_WRITE)
+	cp->c_flags &= ~C_VATTR;
+    return(error);
+}
+
+int
+coda_ioctl(v)
+    void *v;
+{
+/* true args */
+    struct vop_ioctl_args *ap = v;
+    struct vnode *vp = ap->a_vp;
+    int com = ap->a_command;
+    caddr_t data = ap->a_data;
+    int flag = ap->a_fflag;
+    struct ucred *cred = ap->a_cred;
+    struct proc  *p = ap->a_p;
+/* locals */
+    int error;
+    struct vnode *tvp;
+    struct nameidata ndp;
+    struct PioctlData *iap = (struct PioctlData *)data;
+
+    MARK_ENTRY(CODA_IOCTL_STATS);
+
+    CODADEBUG(CODA_IOCTL, myprintf(("in coda_ioctl on %s\n", iap->path));)
+	
+    /* Don't check for operation on a dying object, for ctlvp it
+       shouldn't matter */
+	
+    /* Must be control object to succeed. */
+    if (!IS_CTL_VP(vp)) {
+	MARK_INT_FAIL(CODA_IOCTL_STATS);
+	CODADEBUG(CODA_IOCTL, myprintf(("coda_ioctl error: vp != ctlvp"));)
+	    return (EOPNOTSUPP);
+    }
+    /* Look up the pathname. */
+
+    /* Should we use the name cache here? It would get it from
+       lookupname sooner or later anyway, right? */
+
+    NDINIT(&ndp, LOOKUP, (iap->follow ? FOLLOW : NOFOLLOW), UIO_USERSPACE, iap->path, p);
+    error = namei(&ndp);
+    tvp = ndp.ni_vp;
+
+    if (error) {
+	MARK_INT_FAIL(CODA_IOCTL_STATS);
+	CODADEBUG(CODA_IOCTL, myprintf(("coda_ioctl error: lookup returns %d\n",
+				   error));)
+	return(error);
+    }
+
+    /* 
+     * Make sure this is a coda style cnode, but it may be a
+     * different vfsp 
+     */
+    /* XXX: this totally violates the comment about vtagtype in vnode.h */
+    if (tvp->v_tag != VT_CODA) {
+	vrele(tvp);
+	MARK_INT_FAIL(CODA_IOCTL_STATS);
+	CODADEBUG(CODA_IOCTL, 
+		 myprintf(("coda_ioctl error: %s not a coda object\n", 
+			iap->path));)
+	return(EINVAL);
+    }
+
+    if (iap->vi.in_size > VC_MAXDATASIZE) {
+	vrele(tvp);
+	return(EINVAL);
+    }
+    error = venus_ioctl(vtomi(tvp), &((VTOC(tvp))->c_fid), com, flag, data, cred, p);
+
+    if (error)
+	MARK_INT_FAIL(CODA_IOCTL_STATS);
+    else
+	CODADEBUG(CODA_IOCTL, myprintf(("Ioctl returns %d \n", error)); )
+
+    vrele(tvp);
+    return(error);
+}
+
+/*
+ * To reduce the cost of a user-level venus;we cache attributes in
+ * the kernel.  Each cnode has storage allocated for an attribute. If
+ * c_vattr is valid, return a reference to it. Otherwise, get the
+ * attributes from venus and store them in the cnode.  There is some
+ * question if this method is a security leak. But I think that in
+ * order to make this call, the user must have done a lookup and
+ * opened the file, and therefore should already have access.  
+ */
+int
+coda_getattr(v)
+    void *v;
+{
+/* true args */
+    struct vop_getattr_args *ap = v;
+    struct vnode *vp = ap->a_vp;
+    struct cnode *cp = VTOC(vp);
+    struct vattr *vap = ap->a_vap;
+    struct ucred *cred = ap->a_cred;
+    struct proc *p = ap->a_p;
+/* locals */
+    int error;
+
+    MARK_ENTRY(CODA_GETATTR_STATS);
+
+    if (IS_UNMOUNTING(cp))
+	return ENODEV;
+
+    /* Check for getattr of control object. */
+    if (IS_CTL_VP(vp)) {
+	MARK_INT_FAIL(CODA_GETATTR_STATS);
+	return(ENOENT);
+    }
+
+    /* Check to see if the attributes have already been cached */
+    if (VALID_VATTR(cp)) { 
+	CODADEBUG(CODA_GETATTR, { myprintf(("attr cache hit: (%lx.%lx.%lx)\n",
+				       cp->c_fid.Volume,
+				       cp->c_fid.Vnode,
+				       cp->c_fid.Unique));});
+	CODADEBUG(CODA_GETATTR, if (!(codadebug & ~CODA_GETATTR))
+		 print_vattr(&cp->c_vattr); );
+	
+	*vap = cp->c_vattr;
+	MARK_INT_SAT(CODA_GETATTR_STATS);
+	return(0);
+    }
+
+    error = venus_getattr(vtomi(vp), &cp->c_fid, cred, p, vap);
+
+    if (!error) {
+	CODADEBUG(CODA_GETATTR, myprintf(("getattr miss (%lx.%lx.%lx): result %d\n",
+				     cp->c_fid.Volume,
+				     cp->c_fid.Vnode,
+				     cp->c_fid.Unique,
+				     error)); )
+	    
+	CODADEBUG(CODA_GETATTR, if (!(codadebug & ~CODA_GETATTR))
+		 print_vattr(vap);	);
+	
+    {	int size = vap->va_size;
+    	struct vnode *convp = cp->c_ovp;
+	if (convp != (struct vnode *)0) {
+	    vnode_pager_setsize(convp, size);
+	}
+    }
+	/* If not open for write, store attributes in cnode */   
+	if ((cp->c_owrite == 0) && (coda_attr_cache)) {  
+	    cp->c_vattr = *vap;
+	    cp->c_flags |= C_VATTR; 
+	}
+	
+    }
+    return(error);
+}
+
+int
+coda_setattr(v)
+    void *v;
+{
+/* true args */
+    struct vop_setattr_args *ap = v;
+    register struct vnode *vp = ap->a_vp;
+    struct cnode *cp = VTOC(vp);
+    register struct vattr *vap = ap->a_vap;
+    struct ucred *cred = ap->a_cred;
+    struct proc *p = ap->a_p;
+/* locals */
+    int error;
+
+    MARK_ENTRY(CODA_SETATTR_STATS);
+
+    /* Check for setattr of control object. */
+    if (IS_CTL_VP(vp)) {
+	MARK_INT_FAIL(CODA_SETATTR_STATS);
+	return(ENOENT);
+    }
+
+    if (codadebug & CODADBGMSK(CODA_SETATTR)) {
+	print_vattr(vap);
+    }
+    error = venus_setattr(vtomi(vp), &cp->c_fid, vap, cred, p);
+
+    if (!error)
+	cp->c_flags &= ~C_VATTR;
+
+    {	int size = vap->va_size;
+    	struct vnode *convp = cp->c_ovp;
+	if (size != VNOVAL && convp != (struct vnode *)0) {
+	    vnode_pager_setsize(convp, size);
+	}
+    }
+    CODADEBUG(CODA_SETATTR,	myprintf(("setattr %d\n", error)); )
+    return(error);
+}
+
+int
+coda_access(v)
+    void *v;
+{
+/* true args */
+    struct vop_access_args *ap = v;
+    struct vnode *vp = ap->a_vp;
+    struct cnode *cp = VTOC(vp);
+    int mode = ap->a_mode;
+    struct ucred *cred = ap->a_cred;
+    struct proc *p = ap->a_p;
+/* locals */
+    int error;
+
+    MARK_ENTRY(CODA_ACCESS_STATS);
+
+    /* Check for access of control object.  Only read access is
+       allowed on it. */
+    if (IS_CTL_VP(vp)) {
+	/* bogus hack - all will be marked as successes */
+	MARK_INT_SAT(CODA_ACCESS_STATS);
+	return(((mode & VREAD) && !(mode & (VWRITE | VEXEC))) 
+	       ? 0 : EACCES);
+    }
+
+    /*
+     * if the file is a directory, and we are checking exec (eg lookup) 
+     * access, and the file is in the namecache, then the user must have 
+     * lookup access to it.
+     */
+    if (coda_access_cache) {
+	if ((vp->v_type == VDIR) && (mode & VEXEC)) {
+	    if (coda_nc_lookup(cp, ".", 1, cred)) {
+		MARK_INT_SAT(CODA_ACCESS_STATS);
+		return(0);                     /* it was in the cache */
+	    }
+	}
+    }
+
+    error = venus_access(vtomi(vp), &cp->c_fid, mode, cred, p);
+
+    return(error);
+}
+
+/*
+ * CODA abort op, called after namei() when a CREATE/DELETE isn't actually
+ * done. If a buffer has been saved in anticipation of a coda_create or
+ * a coda_remove, delete it.
+ */
+/* ARGSUSED */
+int
+coda_abortop(v)
+    void *v;
+{
+/* true args */
+    struct vop_abortop_args /* {
+	struct vnode *a_dvp;
+	struct componentname *a_cnp;
+    } */ *ap = v;
+/* upcall decl */
+/* locals */
+
+    if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
+	zfree(namei_zone, ap->a_cnp->cn_pnbuf);
+    return (0);
+}
+
+int
+coda_readlink(v)
+    void *v;
+{
+/* true args */
+    struct vop_readlink_args *ap = v;
+    struct vnode *vp = ap->a_vp;
+    struct cnode *cp = VTOC(vp);
+    struct uio *uiop = ap->a_uio;
+    struct ucred *cred = ap->a_cred;
+    struct proc *p = ap->a_uio->uio_procp;
+/* locals */
+    int error;
+    char *str;
+    int len;
+
+    MARK_ENTRY(CODA_READLINK_STATS);
+
+    /* Check for readlink of control object. */
+    if (IS_CTL_VP(vp)) {
+	MARK_INT_FAIL(CODA_READLINK_STATS);
+	return(ENOENT);
+    }
+
+    if ((coda_symlink_cache) && (VALID_SYMLINK(cp))) { /* symlink was cached */
+	uiop->uio_rw = UIO_READ;
+	error = uiomove(cp->c_symlink, (int)cp->c_symlen, uiop);
+	if (error)
+	    MARK_INT_FAIL(CODA_READLINK_STATS);
+	else
+	    MARK_INT_SAT(CODA_READLINK_STATS);
+	return(error);
+    }
+
+    error = venus_readlink(vtomi(vp), &cp->c_fid, cred, p, &str, &len);
+
+    if (!error) {
+	uiop->uio_rw = UIO_READ;
+	error = uiomove(str, len, uiop);
+
+	if (coda_symlink_cache) {
+	    cp->c_symlink = str;
+	    cp->c_symlen = len;
+	    cp->c_flags |= C_SYMLINK;
+	} else
+	    CODA_FREE(str, len);
+    }
+
+    CODADEBUG(CODA_READLINK, myprintf(("in readlink result %d\n",error));)
+    return(error);
+}
+
+int
+coda_fsync(v)
+    void *v;
+{
+/* true args */
+    struct vop_fsync_args *ap = v;
+    struct vnode *vp = ap->a_vp;
+    struct cnode *cp = VTOC(vp);
+    struct ucred *cred = ap->a_cred;
+    struct proc *p = ap->a_p;
+/* locals */
+    struct vnode *convp = cp->c_ovp;
+    int error;
+   
+    MARK_ENTRY(CODA_FSYNC_STATS);
+
+    /* Check for fsync on an unmounting object */
+    /* The NetBSD kernel, in it's infinite wisdom, can try to fsync
+     * after an unmount has been initiated.  This is a Bad Thing,
+     * which we have to avoid.  Not a legitimate failure for stats.
+     */
+    if (IS_UNMOUNTING(cp)) {
+	return(ENODEV);
+    }
+
+    /* Check for fsync of control object. */
+    if (IS_CTL_VP(vp)) {
+	MARK_INT_SAT(CODA_FSYNC_STATS);
+	return(0);
+    }
+
+    if (convp)
+    	VOP_FSYNC(convp, cred, MNT_WAIT, p);
+
+    /*
+     * We see fsyncs with usecount == 1 then usecount == 0.
+     * For now we ignore them.
+     */
+    /*
+    if (!vp->v_usecount) {
+    	printf("coda_fsync on vnode %p with %d usecount.  c_flags = %x (%x)\n",
+		vp, vp->v_usecount, cp->c_flags, cp->c_flags&C_PURGING);
+    }
+    */
+
+    /*
+     * We can expect fsync on any vnode at all if venus is pruging it.
+     * Venus can't very well answer the fsync request, now can it?
+     * Hopefully, it won't have to, because hopefully, venus preserves
+     * the (possibly untrue) invariant that it never purges an open
+     * vnode.  Hopefully.
+     */
+    if (cp->c_flags & C_PURGING) {
+	return(0);
+    }
+
+    /* needs research */
+    return 0;
+    error = venus_fsync(vtomi(vp), &cp->c_fid, cred, p);
+
+    CODADEBUG(CODA_FSYNC, myprintf(("in fsync result %d\n",error)); );
+    return(error);
+}
+
+int
+coda_inactive(v)
+    void *v;
+{
+    /* XXX - at the moment, inactive doesn't look at cred, and doesn't
+       have a proc pointer.  Oops. */
+/* true args */
+    struct vop_inactive_args *ap = v;
+    struct vnode *vp = ap->a_vp;
+    struct cnode *cp = VTOC(vp);
+    struct ucred *cred __attribute__((unused)) = NULL;
+    struct proc *p __attribute__((unused)) = curproc;
+/* upcall decl */
+/* locals */
+
+    /* We don't need to send inactive to venus - DCS */
+    MARK_ENTRY(CODA_INACTIVE_STATS);
+
+    if (IS_CTL_VP(vp)) {
+	MARK_INT_SAT(CODA_INACTIVE_STATS);
+	return 0;
+    }
+
+    CODADEBUG(CODA_INACTIVE, myprintf(("in inactive, %lx.%lx.%lx. vfsp %p\n",
+				  cp->c_fid.Volume, cp->c_fid.Vnode, 
+				  cp->c_fid.Unique, vp->v_mount));)
+
+    /* If an array has been allocated to hold the symlink, deallocate it */
+    if ((coda_symlink_cache) && (VALID_SYMLINK(cp))) {
+	if (cp->c_symlink == NULL)
+	    panic("coda_inactive: null symlink pointer in cnode");
+	
+	CODA_FREE(cp->c_symlink, cp->c_symlen);
+	cp->c_flags &= ~C_SYMLINK;
+	cp->c_symlen = 0;
+    }
+
+    /* Remove it from the table so it can't be found. */
+    coda_unsave(cp);
+    if ((struct coda_mntinfo *)(vp->v_mount->mnt_data) == NULL) {
+	myprintf(("Help! vfsp->vfs_data was NULL, but vnode %p wasn't dying\n", vp));
+	panic("badness in coda_inactive\n");
+    }
+
+    if (IS_UNMOUNTING(cp)) {
+#ifdef	DEBUG
+	printf("coda_inactive: IS_UNMOUNTING use %d: vp %p, cp %p\n", vp->v_usecount, vp, cp);
+	if (cp->c_ovp != NULL)
+	    printf("coda_inactive: cp->ovp != NULL use %d: vp %p, cp %p\n",
+	    	   vp->v_usecount, vp, cp);
+#endif
+	lockmgr(&cp->c_lock, LK_RELEASE, &vp->v_interlock, p);
+    } else {
+#ifdef OLD_DIAGNOSTIC
+	if (CTOV(cp)->v_usecount) {
+	    panic("coda_inactive: nonzero reference count");
+	}
+	if (cp->c_ovp != NULL) {
+	    panic("coda_inactive:  cp->ovp != NULL");
+	}
+#endif
+	VOP_UNLOCK(vp, 0, p);
+	vgone(vp);
+    }
+
+    MARK_INT_SAT(CODA_INACTIVE_STATS);
+    return(0);
+}
+
+/*
+ * Remote file system operations having to do with directory manipulation.
+ */
+
+/* 
+ * It appears that in NetBSD, lookup is supposed to return the vnode locked
+ */
+int
+coda_lookup(v)
+    void *v;
+{
+/* true args */
+    struct vop_lookup_args *ap = v;
+    struct vnode *dvp = ap->a_dvp;
+    struct cnode *dcp = VTOC(dvp);
+    struct vnode **vpp = ap->a_vpp;
+    /* 
+     * It looks as though ap->a_cnp->ni_cnd->cn_nameptr holds the rest
+     * of the string to xlate, and that we must try to get at least
+     * ap->a_cnp->ni_cnd->cn_namelen of those characters to macth.  I
+     * could be wrong. 
+     */
+    struct componentname  *cnp = ap->a_cnp;
+    struct ucred *cred = cnp->cn_cred;
+    struct proc *p = cnp->cn_proc;
+/* locals */
+    struct cnode *cp;
+    const char *nm = cnp->cn_nameptr;
+    int len = cnp->cn_namelen;
+    ViceFid VFid;
+    int	vtype;
+    int error = 0;
+
+    MARK_ENTRY(CODA_LOOKUP_STATS);
+
+    CODADEBUG(CODA_LOOKUP, myprintf(("lookup: %s in %lx.%lx.%lx\n",
+				   nm, dcp->c_fid.Volume,
+				   dcp->c_fid.Vnode, dcp->c_fid.Unique)););
+
+    /* Check for lookup of control object. */
+    if (IS_CTL_NAME(dvp, nm, len)) {
+	*vpp = coda_ctlvp;
+	vref(*vpp);
+	MARK_INT_SAT(CODA_LOOKUP_STATS);
+	goto exit;
+    }
+
+    if (len+1 > CODA_MAXNAMLEN) {
+	MARK_INT_FAIL(CODA_LOOKUP_STATS);
+	CODADEBUG(CODA_LOOKUP, myprintf(("name too long: lookup, %lx.%lx.%lx(%s)\n",
+				    dcp->c_fid.Volume, dcp->c_fid.Vnode,
+				    dcp->c_fid.Unique, nm)););
+	*vpp = (struct vnode *)0;
+	error = EINVAL;
+	goto exit;
+    }
+    /* First try to look the file up in the cfs name cache */
+    /* lock the parent vnode? */
+    cp = coda_nc_lookup(dcp, nm, len, cred);
+    if (cp) {
+	*vpp = CTOV(cp);
+	vref(*vpp);
+	CODADEBUG(CODA_LOOKUP, 
+		 myprintf(("lookup result %d vpp %p\n",error,*vpp));)
+    } else {
+	
+	/* The name wasn't cached, so we need to contact Venus */
+	error = venus_lookup(vtomi(dvp), &dcp->c_fid, nm, len, cred, p, &VFid, &vtype);
+	
+	if (error) {
+	    MARK_INT_FAIL(CODA_LOOKUP_STATS);
+	    CODADEBUG(CODA_LOOKUP, myprintf(("lookup error on %lx.%lx.%lx(%s)%d\n",
+					dcp->c_fid.Volume, dcp->c_fid.Vnode, dcp->c_fid.Unique, nm, error));)
+	    *vpp = (struct vnode *)0;
+	} else {
+	    MARK_INT_SAT(CODA_LOOKUP_STATS);
+	    CODADEBUG(CODA_LOOKUP, 
+		     myprintf(("lookup: vol %lx vno %lx uni %lx type %o result %d\n",
+			    VFid.Volume, VFid.Vnode, VFid.Unique, vtype,
+			    error)); )
+		
+	    cp = make_coda_node(&VFid, dvp->v_mount, vtype);
+	    *vpp = CTOV(cp);
+	    
+	    /* enter the new vnode in the Name Cache only if the top bit isn't set */
+	    /* And don't enter a new vnode for an invalid one! */
+	    if (!(vtype & CODA_NOCACHE))
+		coda_nc_enter(VTOC(dvp), nm, len, cred, VTOC(*vpp));
+	}
+    }
+
+ exit:
+    /* 
+     * If we are creating, and this was the last name to be looked up,
+     * and the error was ENOENT, then there really shouldn't be an
+     * error and we can make the leaf NULL and return success.  Since
+     * this is supposed to work under Mach as well as NetBSD, we're
+     * leaving this fn wrapped.  We also must tell lookup/namei that
+     * we need to save the last component of the name.  (Create will
+     * have to free the name buffer later...lucky us...)
+     */
+    if (((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME))
+	&& (cnp->cn_flags & ISLASTCN)
+	&& (error == ENOENT))
+    {
+	error = EJUSTRETURN;
+	cnp->cn_flags |= SAVENAME;
+	*ap->a_vpp = NULL;
+    }
+
+    /* 
+     * If we are removing, and we are at the last element, and we
+     * found it, then we need to keep the name around so that the
+     * removal will go ahead as planned.  Unfortunately, this will
+     * probably also lock the to-be-removed vnode, which may or may
+     * not be a good idea.  I'll have to look at the bits of
+     * coda_remove to make sure.  We'll only save the name if we did in
+     * fact find the name, otherwise coda_remove won't have a chance
+     * to free the pathname.  
+     */
+    if ((cnp->cn_nameiop == DELETE)
+	&& (cnp->cn_flags & ISLASTCN)
+	&& !error)
+    {
+	cnp->cn_flags |= SAVENAME;
+    }
+
+    /* 
+     * If the lookup went well, we need to (potentially?) unlock the
+     * parent, and lock the child.  We are only responsible for
+     * checking to see if the parent is supposed to be unlocked before
+     * we return.  We must always lock the child (provided there is
+     * one, and (the parent isn't locked or it isn't the same as the
+     * parent.)  Simple, huh?  We can never leave the parent locked unless
+     * we are ISLASTCN
+     */
+    if (!error || (error == EJUSTRETURN)) {
+	if (!(cnp->cn_flags & LOCKPARENT) || !(cnp->cn_flags & ISLASTCN)) {
+	    if ((error = VOP_UNLOCK(dvp, 0, p))) {
+		return error; 
+	    }	    
+	    /* 
+	     * The parent is unlocked.  As long as there is a child,
+	     * lock it without bothering to check anything else. 
+	     */
+	    if (*ap->a_vpp) {
+		if ((error = VOP_LOCK(*ap->a_vpp, LK_EXCLUSIVE, p))) {
+		    printf("coda_lookup: ");
+		    panic("unlocked parent but couldn't lock child");
+		}
+	    }
+	} else {
+	    /* The parent is locked, and may be the same as the child */
+	    if (*ap->a_vpp && (*ap->a_vpp != dvp)) {
+		/* Different, go ahead and lock it. */
+		if ((error = VOP_LOCK(*ap->a_vpp, LK_EXCLUSIVE, p))) {
+		    printf("coda_lookup: ");
+		    panic("unlocked parent but couldn't lock child");
+		}
+	    }
+	}
+    } else {
+	/* If the lookup failed, we need to ensure that the leaf is NULL */
+	/* Don't change any locking? */
+	*ap->a_vpp = NULL;
+    }
+    return(error);
+}
+
+/*ARGSUSED*/
+int
+coda_create(v)
+    void *v;
+{
+/* true args */
+    struct vop_create_args *ap = v;
+    struct vnode *dvp = ap->a_dvp;
+    struct cnode *dcp = VTOC(dvp);
+    struct vattr *va = ap->a_vap;
+    int exclusive = 1;
+    int mode = ap->a_vap->va_mode;
+    struct vnode **vpp = ap->a_vpp;
+    struct componentname  *cnp = ap->a_cnp;
+    struct ucred *cred = cnp->cn_cred;
+    struct proc *p = cnp->cn_proc;
+/* locals */
+    int error;
+    struct cnode *cp;
+    const char *nm = cnp->cn_nameptr;
+    int len = cnp->cn_namelen;
+    ViceFid VFid;
+    struct vattr attr;
+
+    MARK_ENTRY(CODA_CREATE_STATS);
+
+    /* All creates are exclusive XXX */
+    /* I'm assuming the 'mode' argument is the file mode bits XXX */
+
+    /* Check for create of control object. */
+    if (IS_CTL_NAME(dvp, nm, len)) {
+	*vpp = (struct vnode *)0;
+	MARK_INT_FAIL(CODA_CREATE_STATS);
+	return(EACCES);
+    }
+
+    error = venus_create(vtomi(dvp), &dcp->c_fid, nm, len, exclusive, mode, va, cred, p, &VFid, &attr);
+
+    if (!error) {
+	
+	/* If this is an exclusive create, panic if the file already exists. */
+	/* Venus should have detected the file and reported EEXIST. */
+
+	if ((exclusive == 1) &&
+	    (coda_find(&VFid) != NULL))
+	    panic("cnode existed for newly created file!");
+	
+	cp = make_coda_node(&VFid, dvp->v_mount, attr.va_type);
+	*vpp = CTOV(cp);
+	
+	/* Update va to reflect the new attributes. */
+	(*va) = attr;
+	
+	/* Update the attribute cache and mark it as valid */
+	if (coda_attr_cache) {
+	    VTOC(*vpp)->c_vattr = attr;
+	    VTOC(*vpp)->c_flags |= C_VATTR;       
+	}
+
+	/* Invalidate the parent's attr cache, the modification time has changed */
+	VTOC(dvp)->c_flags &= ~C_VATTR;
+	
+	/* enter the new vnode in the Name Cache */
+	coda_nc_enter(VTOC(dvp), nm, len, cred, VTOC(*vpp));
+	
+	CODADEBUG(CODA_CREATE, 
+		 myprintf(("create: (%lx.%lx.%lx), result %d\n",
+			VFid.Volume, VFid.Vnode, VFid.Unique, error)); )
+    } else {
+	*vpp = (struct vnode *)0;
+	CODADEBUG(CODA_CREATE, myprintf(("create error %d\n", error));)
+    }
+
+    if (!error) {
+	if (cnp->cn_flags & LOCKLEAF) {
+	    if ((error = VOP_LOCK(*ap->a_vpp, LK_EXCLUSIVE, p))) {
+		printf("coda_create: ");
+		panic("unlocked parent but couldn't lock child");
+	    }
+	}
+#ifdef OLD_DIAGNOSTIC
+	else {
+	    printf("coda_create: LOCKLEAF not set!\n");
+	}
+#endif
+    }
+    /* Have to free the previously saved name */
+    /* 
+     * This condition is stolen from ufs_makeinode.  I have no idea
+     * why it's here, but what the hey...
+     */
+    if ((cnp->cn_flags & SAVESTART) == 0) {
+	zfree(namei_zone, cnp->cn_pnbuf);
+    }
+    return(error);
+}
+
+int
+coda_remove(v)
+    void *v;
+{
+/* true args */
+    struct vop_remove_args *ap = v;
+    struct vnode *dvp = ap->a_dvp;
+    struct cnode *cp = VTOC(dvp);
+    struct componentname  *cnp = ap->a_cnp;
+    struct ucred *cred = cnp->cn_cred;
+    struct proc *p = cnp->cn_proc;
+/* locals */
+    int error;
+    const char *nm = cnp->cn_nameptr;
+    int len = cnp->cn_namelen;
+    struct cnode *tp;
+
+    MARK_ENTRY(CODA_REMOVE_STATS);
+
+    CODADEBUG(CODA_REMOVE, myprintf(("remove: %s in %lx.%lx.%lx\n",
+				   nm, cp->c_fid.Volume, cp->c_fid.Vnode,
+				   cp->c_fid.Unique)););
+
+    /* Remove the file's entry from the CODA Name Cache */
+    /* We're being conservative here, it might be that this person
+     * doesn't really have sufficient access to delete the file
+     * but we feel zapping the entry won't really hurt anyone -- dcs
+     */
+    /* I'm gonna go out on a limb here. If a file and a hardlink to it
+     * exist, and one is removed, the link count on the other will be
+     * off by 1. We could either invalidate the attrs if cached, or
+     * fix them. I'll try to fix them. DCS 11/8/94
+     */
+    tp = coda_nc_lookup(VTOC(dvp), nm, len, cred);
+    if (tp) {
+	if (VALID_VATTR(tp)) {	/* If attrs are cached */
+	    if (tp->c_vattr.va_nlink > 1) {	/* If it's a hard link */
+		tp->c_vattr.va_nlink--;
+	    }
+	}
+	
+	coda_nc_zapfile(VTOC(dvp), nm, len); 
+	/* No need to flush it if it doesn't exist! */
+    }
+    /* Invalidate the parent's attr cache, the modification time has changed */
+    VTOC(dvp)->c_flags &= ~C_VATTR;
+
+    /* Check for remove of control object. */
+    if (IS_CTL_NAME(dvp, nm, len)) {
+	MARK_INT_FAIL(CODA_REMOVE_STATS);
+	return(ENOENT);
+    }
+
+    error = venus_remove(vtomi(dvp), &cp->c_fid, nm, len, cred, p);
+
+    CODADEBUG(CODA_REMOVE, myprintf(("in remove result %d\n",error)); )
+
+    if ((cnp->cn_flags & SAVESTART) == 0) {
+	zfree(namei_zone, cnp->cn_pnbuf);
+    }
+    return(error);
+}
+
+int
+coda_link(v)
+    void *v;
+{
+/* true args */
+    struct vop_link_args *ap = v;
+    struct vnode *vp = ap->a_vp;
+    struct cnode *cp = VTOC(vp);
+    struct vnode *tdvp = ap->a_tdvp;
+    struct cnode *tdcp = VTOC(tdvp);
+    struct componentname *cnp = ap->a_cnp;
+    struct ucred *cred = cnp->cn_cred;
+    struct proc *p = cnp->cn_proc;
+/* locals */
+    int error;
+    const char *nm = cnp->cn_nameptr;
+    int len = cnp->cn_namelen;
+
+    MARK_ENTRY(CODA_LINK_STATS);
+
+    if (codadebug & CODADBGMSK(CODA_LINK)) {
+
+	myprintf(("nb_link:   vp fid: (%lx.%lx.%lx)\n",
+		  cp->c_fid.Volume, cp->c_fid.Vnode, cp->c_fid.Unique));
+	myprintf(("nb_link: tdvp fid: (%lx.%lx.%lx)\n",
+		  tdcp->c_fid.Volume, tdcp->c_fid.Vnode, tdcp->c_fid.Unique));
+	
+    }
+    if (codadebug & CODADBGMSK(CODA_LINK)) {
+	myprintf(("link:   vp fid: (%lx.%lx.%lx)\n",
+		  cp->c_fid.Volume, cp->c_fid.Vnode, cp->c_fid.Unique));
+	myprintf(("link: tdvp fid: (%lx.%lx.%lx)\n",
+		  tdcp->c_fid.Volume, tdcp->c_fid.Vnode, tdcp->c_fid.Unique));
+
+    }
+
+    /* Check for link to/from control object. */
+    if (IS_CTL_NAME(tdvp, nm, len) || IS_CTL_VP(vp)) {
+	MARK_INT_FAIL(CODA_LINK_STATS);
+	return(EACCES);
+    }
+
+    error = venus_link(vtomi(vp), &cp->c_fid, &tdcp->c_fid, nm, len, cred, p);
+
+    /* Invalidate the parent's attr cache, the modification time has changed */
+    VTOC(tdvp)->c_flags &= ~C_VATTR;
+    VTOC(vp)->c_flags &= ~C_VATTR;
+
+    CODADEBUG(CODA_LINK,	myprintf(("in link result %d\n",error)); )
+
+    /* Drop the name buffer if we don't need to SAVESTART */
+    if ((cnp->cn_flags & SAVESTART) == 0) {
+	zfree(namei_zone, cnp->cn_pnbuf);
+    }
+    return(error);
+}
+
+int
+coda_rename(v)
+    void *v;
+{
+/* true args */
+    struct vop_rename_args *ap = v;
+    struct vnode *odvp = ap->a_fdvp;
+    struct cnode *odcp = VTOC(odvp);
+    struct componentname  *fcnp = ap->a_fcnp;
+    struct vnode *ndvp = ap->a_tdvp;
+    struct cnode *ndcp = VTOC(ndvp);
+    struct componentname  *tcnp = ap->a_tcnp;
+    struct ucred *cred = fcnp->cn_cred;
+    struct proc *p = fcnp->cn_proc;
+/* true args */
+    int error;
+    const char *fnm = fcnp->cn_nameptr;
+    int flen = fcnp->cn_namelen;
+    const char *tnm = tcnp->cn_nameptr;
+    int tlen = tcnp->cn_namelen;
+
+    MARK_ENTRY(CODA_RENAME_STATS);
+
+    /* Hmmm.  The vnodes are already looked up.  Perhaps they are locked?
+       This could be Bad. XXX */
+#ifdef OLD_DIAGNOSTIC
+    if ((fcnp->cn_cred != tcnp->cn_cred)
+	|| (fcnp->cn_proc != tcnp->cn_proc))
+    {
+	panic("coda_rename: component names don't agree");
+    }
+#endif
+
+    /* Check for rename involving control object. */ 
+    if (IS_CTL_NAME(odvp, fnm, flen) || IS_CTL_NAME(ndvp, tnm, tlen)) {
+	MARK_INT_FAIL(CODA_RENAME_STATS);
+	return(EACCES);
+    }
+
+    /* Problem with moving directories -- need to flush entry for .. */
+    if (odvp != ndvp) {
+	struct cnode *ovcp = coda_nc_lookup(VTOC(odvp), fnm, flen, cred);
+	if (ovcp) {
+	    struct vnode *ovp = CTOV(ovcp);
+	    if ((ovp) &&
+		(ovp->v_type == VDIR)) /* If it's a directory */
+		coda_nc_zapfile(VTOC(ovp),"..", 2);
+	}
+    }
+
+    /* Remove the entries for both source and target files */
+    coda_nc_zapfile(VTOC(odvp), fnm, flen);
+    coda_nc_zapfile(VTOC(ndvp), tnm, tlen);
+
+    /* Invalidate the parent's attr cache, the modification time has changed */
+    VTOC(odvp)->c_flags &= ~C_VATTR;
+    VTOC(ndvp)->c_flags &= ~C_VATTR;
+
+    if (flen+1 > CODA_MAXNAMLEN) {
+	MARK_INT_FAIL(CODA_RENAME_STATS);
+	error = EINVAL;
+	goto exit;
+    }
+
+    if (tlen+1 > CODA_MAXNAMLEN) {
+	MARK_INT_FAIL(CODA_RENAME_STATS);
+	error = EINVAL;
+	goto exit;
+    }
+
+    error = venus_rename(vtomi(odvp), &odcp->c_fid, &ndcp->c_fid, fnm, flen, tnm, tlen, cred, p);
+
+ exit:
+    CODADEBUG(CODA_RENAME, myprintf(("in rename result %d\n",error));)
+    /* XXX - do we need to call cache pureg on the moved vnode? */
+    cache_purge(ap->a_fvp);
+
+    /* It seems to be incumbent on us to drop locks on all four vnodes */
+    /* From-vnodes are not locked, only ref'd.  To-vnodes are locked. */
+
+    vrele(ap->a_fvp);
+    vrele(odvp);
+
+    if (ap->a_tvp) {
+	if (ap->a_tvp == ndvp) {
+	    vrele(ap->a_tvp);
+	} else {
+	    vput(ap->a_tvp);
+	}
+    }
+
+    vput(ndvp);
+    return(error);
+}
+
+int
+coda_mkdir(v)
+    void *v;
+{
+/* true args */
+    struct vop_mkdir_args *ap = v;
+    struct vnode *dvp = ap->a_dvp;
+    struct cnode *dcp = VTOC(dvp);	
+    struct componentname  *cnp = ap->a_cnp;
+    register struct vattr *va = ap->a_vap;
+    struct vnode **vpp = ap->a_vpp;
+    struct ucred *cred = cnp->cn_cred;
+    struct proc *p = cnp->cn_proc;
+/* locals */
+    int error;
+    const char *nm = cnp->cn_nameptr;
+    int len = cnp->cn_namelen;
+    struct cnode *cp;
+    ViceFid VFid;
+    struct vattr ova;
+
+    MARK_ENTRY(CODA_MKDIR_STATS);
+
+    /* Check for mkdir of target object. */
+    if (IS_CTL_NAME(dvp, nm, len)) {
+	*vpp = (struct vnode *)0;
+	MARK_INT_FAIL(CODA_MKDIR_STATS);
+	return(EACCES);
+    }
+
+    if (len+1 > CODA_MAXNAMLEN) {
+	*vpp = (struct vnode *)0;
+	MARK_INT_FAIL(CODA_MKDIR_STATS);
+	return(EACCES);
+    }
+
+    error = venus_mkdir(vtomi(dvp), &dcp->c_fid, nm, len, va, cred, p, &VFid, &ova);
+
+    if (!error) {
+	if (coda_find(&VFid) != NULL)
+	    panic("cnode existed for newly created directory!");
+	
+	
+	cp =  make_coda_node(&VFid, dvp->v_mount, va->va_type);
+	*vpp = CTOV(cp);
+	
+	/* enter the new vnode in the Name Cache */
+	coda_nc_enter(VTOC(dvp), nm, len, cred, VTOC(*vpp));
+
+	/* as a side effect, enter "." and ".." for the directory */
+	coda_nc_enter(VTOC(*vpp), ".", 1, cred, VTOC(*vpp));
+	coda_nc_enter(VTOC(*vpp), "..", 2, cred, VTOC(dvp));
+
+	if (coda_attr_cache) {
+	    VTOC(*vpp)->c_vattr = ova;		/* update the attr cache */
+	    VTOC(*vpp)->c_flags |= C_VATTR;	/* Valid attributes in cnode */
+	}
+
+	/* Invalidate the parent's attr cache, the modification time has changed */
+	VTOC(dvp)->c_flags &= ~C_VATTR;
+	
+	CODADEBUG( CODA_MKDIR, myprintf(("mkdir: (%lx.%lx.%lx) result %d\n",
+				    VFid.Volume, VFid.Vnode, VFid.Unique, error)); )
+    } else {
+	*vpp = (struct vnode *)0;
+	CODADEBUG(CODA_MKDIR, myprintf(("mkdir error %d\n",error));)
+    }
+
+    /* Have to free the previously saved name */
+    /* 
+     * ufs_mkdir doesn't check for SAVESTART before freeing the
+     * pathname buffer, but ufs_create does.  For the moment, I'll
+     * follow their lead, but this seems like it is probably
+     * incorrect.  
+     */
+    zfree(namei_zone, cnp->cn_pnbuf);
+    return(error);
+}
+
+int
+coda_rmdir(v)
+    void *v;
+{
+/* true args */
+    struct vop_rmdir_args *ap = v;
+    struct vnode *dvp = ap->a_dvp;
+    struct cnode *dcp = VTOC(dvp);
+    struct componentname  *cnp = ap->a_cnp;
+    struct ucred *cred = cnp->cn_cred;
+    struct proc *p = cnp->cn_proc;
+/* true args */
+    int error;
+    const char *nm = cnp->cn_nameptr;
+    int len = cnp->cn_namelen;
+    struct cnode *cp;
+   
+    MARK_ENTRY(CODA_RMDIR_STATS);
+
+    /* Check for rmdir of control object. */
+    if (IS_CTL_NAME(dvp, nm, len)) {
+	MARK_INT_FAIL(CODA_RMDIR_STATS);
+	return(ENOENT);
+    }
+
+    /* We're being conservative here, it might be that this person
+     * doesn't really have sufficient access to delete the file
+     * but we feel zapping the entry won't really hurt anyone -- dcs
+     */
+    /*
+     * As a side effect of the rmdir, remove any entries for children of
+     * the directory, especially "." and "..".
+     */
+    cp = coda_nc_lookup(dcp, nm, len, cred);
+    if (cp) coda_nc_zapParentfid(&(cp->c_fid), NOT_DOWNCALL);
+
+    /* Remove the file's entry from the CODA Name Cache */
+    coda_nc_zapfile(dcp, nm, len);
+
+    /* Invalidate the parent's attr cache, the modification time has changed */
+    dcp->c_flags &= ~C_VATTR;
+
+    error = venus_rmdir(vtomi(dvp), &dcp->c_fid, nm, len, cred, p);
+
+    CODADEBUG(CODA_RMDIR, myprintf(("in rmdir result %d\n", error)); )
+
+    if ((cnp->cn_flags & SAVESTART) == 0) {
+	zfree(namei_zone, cnp->cn_pnbuf);
+    }
+    return(error);
+}
+
+int
+coda_symlink(v)
+    void *v;
+{
+/* true args */
+    struct vop_symlink_args *ap = v;
+    struct vnode *tdvp = ap->a_dvp;
+    struct cnode *tdcp = VTOC(tdvp);	
+    struct componentname *cnp = ap->a_cnp;
+    struct vattr *tva = ap->a_vap;
+    char *path = ap->a_target;
+    struct ucred *cred = cnp->cn_cred;
+    struct proc *p = cnp->cn_proc;
+/* locals */
+    int error;
+    /* 
+     * XXX I'm assuming the following things about coda_symlink's
+     * arguments: 
+     *       t(foo) is the new name/parent/etc being created.
+     *       lname is the contents of the new symlink. 
+     */
+    char *nm = cnp->cn_nameptr;
+    int len = cnp->cn_namelen;
+    int plen = strlen(path);
+
+    /* XXX What about the vpp argument?  Do we need it? */
+    /* 
+     * Here's the strategy for the moment: perform the symlink, then
+     * do a lookup to grab the resulting vnode.  I know this requires
+     * two communications with Venus for a new sybolic link, but
+     * that's the way the ball bounces.  I don't yet want to change
+     * the way the Mach symlink works.  When Mach support is
+     * deprecated, we should change symlink so that the common case
+     * returns the resultant vnode in a vpp argument.
+     */
+
+    MARK_ENTRY(CODA_SYMLINK_STATS);
+
+    /* Check for symlink of control object. */
+    if (IS_CTL_NAME(tdvp, nm, len)) {
+	MARK_INT_FAIL(CODA_SYMLINK_STATS);
+	return(EACCES);
+    }
+
+    if (plen+1 > CODA_MAXPATHLEN) {
+	MARK_INT_FAIL(CODA_SYMLINK_STATS);
+	return(EINVAL);
+    }
+
+    if (len+1 > CODA_MAXNAMLEN) {
+	MARK_INT_FAIL(CODA_SYMLINK_STATS);
+	error = EINVAL;
+	goto exit;
+    }
+
+    error = venus_symlink(vtomi(tdvp), &tdcp->c_fid, path, plen, nm, len, tva, cred, p);
+
+    /* Invalidate the parent's attr cache, the modification time has changed */
+    tdcp->c_flags &= ~C_VATTR;
+
+    /* 
+     * Free the name buffer 
+     */
+    if ((cnp->cn_flags & SAVESTART) == 0) {
+	zfree(namei_zone, cnp->cn_pnbuf);
+    }
+
+ exit:    
+    CODADEBUG(CODA_SYMLINK, myprintf(("in symlink result %d\n",error)); )
+    return(error);
+}
+
+/*
+ * Read directory entries.
+ */
+int
+coda_readdir(v)
+    void *v;
+{
+/* true args */
+    struct vop_readdir_args *ap = v;
+    struct vnode *vp = ap->a_vp;
+    struct cnode *cp = VTOC(vp);
+    register struct uio *uiop = ap->a_uio;
+    struct ucred *cred = ap->a_cred;
+    int *eofflag = ap->a_eofflag;
+    u_long **cookies = ap->a_cookies;
+    int *ncookies = ap->a_ncookies;
+    struct proc *p = ap->a_uio->uio_procp;
+/* upcall decl */
+/* locals */
+    int error = 0;
+
+    MARK_ENTRY(CODA_READDIR_STATS);
+
+    CODADEBUG(CODA_READDIR, myprintf(("coda_readdir(%p, %d, %qd, %d)\n", uiop->uio_iov->iov_base, uiop->uio_resid, uiop->uio_offset, uiop->uio_segflg)); )
+	
+    /* Check for readdir of control object. */
+    if (IS_CTL_VP(vp)) {
+	MARK_INT_FAIL(CODA_READDIR_STATS);
+	return(ENOENT);
+    }
+
+    {
+	/* If directory is not already open do an "internal open" on it. */
+	int opened_internally = 0;
+	if (cp->c_ovp == NULL) {
+	    opened_internally = 1;
+	    MARK_INT_GEN(CODA_OPEN_STATS);
+	    error = VOP_OPEN(vp, FREAD, cred, p);
+printf("coda_readdir: Internally Opening %p\n", vp);
+	    if (error) {
+		printf("coda_readdir: VOP_OPEN on container failed %d\n", error);
+		return (error);
+	    }
+	    if (vp->v_type == VREG) {
+		error = vfs_object_create(vp, p, cred);
+		if (error != 0) {
+		    printf("coda_readdir: vfs_object_create() returns %d\n", error);
+		    vput(vp);
+		}
+	    }
+	    if (error) return(error);
+	}
+	
+	/* Have UFS handle the call. */
+	CODADEBUG(CODA_READDIR, myprintf(("indirect readdir: fid = (%lx.%lx.%lx), refcnt = %d\n",cp->c_fid.Volume, cp->c_fid.Vnode, cp->c_fid.Unique, vp->v_usecount)); )
+	error = VOP_READDIR(cp->c_ovp, uiop, cred, eofflag, ncookies,
+			       cookies);
+	
+	if (error)
+	    MARK_INT_FAIL(CODA_READDIR_STATS);
+	else
+	    MARK_INT_SAT(CODA_READDIR_STATS);
+	
+	/* Do an "internal close" if necessary. */ 
+	if (opened_internally) {
+	    MARK_INT_GEN(CODA_CLOSE_STATS);
+	    (void)VOP_CLOSE(vp, FREAD, cred, p);
+	}
+    }
+
+    return(error);
+}
+
+/*
+ * Convert from file system blocks to device blocks
+ */
+int
+coda_bmap(v)
+    void *v;
+{
+    /* XXX on the global proc */
+/* true args */
+    struct vop_bmap_args *ap = v;
+    struct vnode *vp __attribute__((unused)) = ap->a_vp;	/* file's vnode */
+    daddr_t bn __attribute__((unused)) = ap->a_bn;	/* fs block number */
+    struct vnode **vpp = ap->a_vpp;			/* RETURN vp of device */
+    daddr_t *bnp __attribute__((unused)) = ap->a_bnp;	/* RETURN device block number */
+    struct proc *p __attribute__((unused)) = curproc;
+/* upcall decl */
+/* locals */
+
+	int ret = 0;
+	struct cnode *cp;
+
+	cp = VTOC(vp);
+	if (cp->c_ovp) {
+		return EINVAL;
+		ret =  VOP_BMAP(cp->c_ovp, bn, vpp, bnp, ap->a_runp, ap->a_runb);
+#if	0
+		printf("VOP_BMAP(cp->c_ovp %p, bn %p, vpp %p, bnp %p, ap->a_runp %p, ap->a_runb %p) = %d\n",
+			cp->c_ovp, bn, vpp, bnp, ap->a_runp, ap->a_runb, ret);
+#endif
+		return ret;
+	} else {
+#if	0
+		printf("coda_bmap: no container\n");
+#endif
+		return(EOPNOTSUPP);
+	}
+}
+
+/*
+ * I don't think the following two things are used anywhere, so I've
+ * commented them out 
+ * 
+ * struct buf *async_bufhead; 
+ * int async_daemon_count;
+ */
+int
+coda_strategy(v)
+    void *v;
+{
+/* true args */
+    struct vop_strategy_args *ap = v;
+    register struct buf *bp __attribute__((unused)) = ap->a_bp;
+    struct proc *p __attribute__((unused)) = curproc;
+/* upcall decl */
+/* locals */
+
+	printf("coda_strategy: called ???\n");
+	return(EOPNOTSUPP);
+}
+
+int
+coda_reclaim(v) 
+    void *v;
+{
+/* true args */
+    struct vop_reclaim_args *ap = v;
+    struct vnode *vp = ap->a_vp;
+    struct cnode *cp = VTOC(vp);
+/* upcall decl */
+/* locals */
+
+/*
+ * Forced unmount/flush will let vnodes with non zero use be destroyed!
+ */
+    ENTRY;
+
+    if (IS_UNMOUNTING(cp)) {
+#ifdef	DEBUG
+	if (VTOC(vp)->c_ovp) {
+	    if (IS_UNMOUNTING(cp))
+		printf("coda_reclaim: c_ovp not void: vp %p, cp %p\n", vp, cp);
+	}
+#endif
+    } else {
+#ifdef OLD_DIAGNOSTIC
+	if (vp->v_usecount != 0) 
+	    print("coda_reclaim: pushing active %p\n", vp);
+	if (VTOC(vp)->c_ovp) {
+	    panic("coda_reclaim: c_ovp not void");
+    }
+#endif
+    }	
+    cache_purge(vp);
+    coda_free(VTOC(vp));
+    VTOC(vp) = NULL;
+    return (0);
+}
+
+int
+coda_lock(v)
+    void *v;
+{
+/* true args */
+    struct vop_lock_args *ap = v;
+    struct vnode *vp = ap->a_vp;
+    struct cnode *cp = VTOC(vp);
+    struct proc  *p = ap->a_p;
+/* upcall decl */
+/* locals */
+
+    ENTRY;
+
+    if (coda_lockdebug) {
+	myprintf(("Attempting lock on %lx.%lx.%lx\n",
+		  cp->c_fid.Volume, cp->c_fid.Vnode, cp->c_fid.Unique));
+    }
+
+#ifndef	DEBUG_LOCKS
+    return (lockmgr(&cp->c_lock, ap->a_flags, &vp->v_interlock, p));
+#else
+    return (debuglockmgr(&cp->c_lock, ap->a_flags, &vp->v_interlock, p,
+			 "coda_lock", vp->filename, vp->line));
+#endif
+}
+
+int
+coda_unlock(v)
+    void *v;
+{
+/* true args */
+    struct vop_unlock_args *ap = v;
+    struct vnode *vp = ap->a_vp;
+    struct cnode *cp = VTOC(vp);
+    struct proc  *p = ap->a_p;
+/* upcall decl */
+/* locals */
+
+    ENTRY;
+    if (coda_lockdebug) {
+	myprintf(("Attempting unlock on %lx.%lx.%lx\n",
+		  cp->c_fid.Volume, cp->c_fid.Vnode, cp->c_fid.Unique));
+    }
+
+    return (lockmgr(&cp->c_lock, ap->a_flags | LK_RELEASE, &vp->v_interlock, p));
+}
+
+int
+coda_islocked(v)
+    void *v;
+{
+/* true args */
+    struct vop_islocked_args *ap = v;
+    struct cnode *cp = VTOC(ap->a_vp);
+    ENTRY;
+
+    return (lockstatus(&cp->c_lock));
+}
+
+/* How one looks up a vnode given a device/inode pair: */
+int
+coda_grab_vnode(dev_t dev, ino_t ino, struct vnode **vpp)
+{
+    /* This is like VFS_VGET() or igetinode()! */
+    int           error;
+    struct mount *mp;
+
+    if (!(mp = devtomp(dev))) {
+	myprintf(("coda_grab_vnode: devtomp(%d) returns NULL\n", dev));
+	return(ENXIO);
+    }
+
+    /* XXX - ensure that nonzero-return means failure */
+    error = VFS_VGET(mp,ino,vpp);
+    if (error) {
+	myprintf(("coda_grab_vnode: iget/vget(%d, %d) returns %p, err %d\n", 
+		  dev, ino, *vpp, error));
+	return(ENOENT);
+    }
+    return(0);
+}
+
+void
+print_vattr( attr )
+	struct vattr *attr;
+{
+    char *typestr;
+
+    switch (attr->va_type) {
+    case VNON:
+	typestr = "VNON";
+	break;
+    case VREG:
+	typestr = "VREG";
+	break;
+    case VDIR:
+	typestr = "VDIR";
+	break;
+    case VBLK:
+	typestr = "VBLK";
+	break;
+    case VCHR:
+	typestr = "VCHR";
+	break;
+    case VLNK:
+	typestr = "VLNK";
+	break;
+    case VSOCK:
+	typestr = "VSCK";
+	break;
+    case VFIFO:
+	typestr = "VFFO";
+	break;
+    case VBAD:
+	typestr = "VBAD";
+	break;
+    default:
+	typestr = "????";
+	break;
+    }
+
+
+    myprintf(("attr: type %s mode %d uid %d gid %d fsid %d rdev %d\n",
+	      typestr, (int)attr->va_mode, (int)attr->va_uid,
+	      (int)attr->va_gid, (int)attr->va_fsid, (int)attr->va_rdev));
+
+    myprintf(("      fileid %d nlink %d size %d blocksize %d bytes %d\n",
+	      (int)attr->va_fileid, (int)attr->va_nlink, 
+	      (int)attr->va_size,
+	      (int)attr->va_blocksize,(int)attr->va_bytes));
+    myprintf(("      gen %ld flags %ld vaflags %d\n",
+	      attr->va_gen, attr->va_flags, attr->va_vaflags));
+    myprintf(("      atime sec %d nsec %d\n",
+	      (int)attr->va_atime.tv_sec, (int)attr->va_atime.tv_nsec));
+    myprintf(("      mtime sec %d nsec %d\n",
+	      (int)attr->va_mtime.tv_sec, (int)attr->va_mtime.tv_nsec));
+    myprintf(("      ctime sec %d nsec %d\n",
+	      (int)attr->va_ctime.tv_sec, (int)attr->va_ctime.tv_nsec));
+}
+
+/* How to print a ucred */
+void
+print_cred(cred)
+	struct ucred *cred;
+{
+
+	int i;
+
+	myprintf(("ref %d\tuid %d\n",cred->cr_ref,cred->cr_uid));
+
+	for (i=0; i < cred->cr_ngroups; i++)
+		myprintf(("\tgroup %d: (%d)\n",i,cred->cr_groups[i]));
+	myprintf(("\n"));
+
+}
+
+/*
+ * Return a vnode for the given fid.
+ * If no cnode exists for this fid create one and put it
+ * in a table hashed by fid.Volume and fid.Vnode.  If the cnode for
+ * this fid is already in the table return it (ref count is
+ * incremented by coda_find.  The cnode will be flushed from the
+ * table when coda_inactive calls coda_unsave.
+ */
+struct cnode *
+make_coda_node(fid, vfsp, type)
+     ViceFid *fid; struct mount *vfsp; short type;
+{
+    struct cnode *cp;
+    int          err;
+
+    if ((cp = coda_find(fid)) == NULL) {
+	struct vnode *vp;
+	
+	cp = coda_alloc();
+	lockinit(&cp->c_lock, PINOD, "cnode", 0, 0);
+	cp->c_fid = *fid;
+	
+	err = getnewvnode(VT_CODA, vfsp, coda_vnodeop_p, &vp);  
+	if (err) {                                                
+	    panic("coda: getnewvnode returned error %d\n", err);   
+	}                                                         
+	vp->v_data = cp;                                          
+	vp->v_type = type;                                      
+	cp->c_vnode = vp;                                         
+	coda_save(cp);
+	
+    } else {
+	vref(CTOV(cp));
+    }
+
+    return cp;
+}
diff --git a/sys/fs/coda/coda_vnops.h b/sys/fs/coda/coda_vnops.h
new file mode 100644
index 0000000..6c787d5
--- /dev/null
+++ b/sys/fs/coda/coda_vnops.h
@@ -0,0 +1,142 @@
+/*
+ * 
+ *             Coda: an Experimental Distributed File System
+ *                              Release 3.1
+ * 
+ *           Copyright (c) 1987-1998 Carnegie Mellon University
+ *                          All Rights Reserved
+ * 
+ * Permission  to  use, copy, modify and distribute this software and its
+ * documentation is hereby granted,  provided  that  both  the  copyright
+ * notice  and  this  permission  notice  appear  in  all  copies  of the
+ * software, derivative works or  modified  versions,  and  any  portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University  in  all  documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ * 
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS  KNOWN  TO  HAVE  BUGS,
+ * SOME  OF  WHICH MAY HAVE SERIOUS CONSEQUENCES.  CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION.   CARNEGIE  MELLON
+ * DISCLAIMS  ANY  LIABILITY  OF  ANY  KIND  FOR  ANY  DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE  OR  OF
+ * ANY DERIVATIVE WORK.
+ * 
+ * Carnegie  Mellon  encourages  users  of  this  software  to return any
+ * improvements or extensions that  they  make,  and  to  grant  Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ * 
+ * 	@(#) src/sys/coda/coda_vnops.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
+ *  $Id: coda_vnops.h,v 1.3 1998/09/11 18:50:17 rvb Exp $
+ * 
+  */
+
+/* 
+ * Mach Operating System
+ * Copyright (c) 1990 Carnegie-Mellon University
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * All rights reserved.  The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+
+/*
+ * This code was written for the Coda file system at Carnegie Mellon
+ * University.  Contributers include David Steere, James Kistler, and
+ * M. Satyanarayanan.  
+ */
+
+/*
+ * HISTORY
+ * $Log: coda_vnops.h,v $
+ * Revision 1.3  1998/09/11 18:50:17  rvb
+ * All the references to cfs, in symbols, structs, and strings
+ * have been changed to coda.  (Same for CFS.)
+ *
+ * Revision 1.2  1998/09/02 19:09:53  rvb
+ * Pass2 complete
+ *
+ * Revision 1.1.1.1  1998/08/29 21:14:52  rvb
+ * Very Preliminary Coda
+ *
+ * Revision 1.7  1998/08/28 18:12:24  rvb
+ * Now it also works on FreeBSD -current.  This code will be
+ * committed to the FreeBSD -current and NetBSD -current
+ * trees.  It will then be tailored to the particular platform
+ * by flushing conditional code.
+ *
+ * Revision 1.6  1998/08/18 17:05:22  rvb
+ * Don't use __RCSID now
+ *
+ * Revision 1.5  1998/08/18 16:31:47  rvb
+ * Sync the code for NetBSD -current; test on 1.3 later
+ *
+ * Revision 1.4  98/01/23  11:53:49  rvb
+ * Bring RVB_CODA1_1 to HEAD
+ * 
+ * Revision 1.3.2.3  98/01/23  11:21:13  rvb
+ * Sync with 2.2.5
+ * 
+ * Revision 1.3.2.2  97/12/16  12:40:20  rvb
+ * Sync with 1.3
+ * 
+ * Revision 1.3.2.1  97/12/10  14:08:34  rvb
+ * Fix O_ flags; check result in coda_call
+ * 
+ * Revision 1.3  97/12/05  10:39:25  rvb
+ * Read CHANGES
+ * 
+ * Revision 1.2.34.2  97/11/20  11:46:54  rvb
+ * Capture current cfs_venus
+ * 
+ * Revision 1.2.34.1  97/11/13  22:03:04  rvb
+ * pass2 cfs_NetBSD.h mt
+ * 
+ * Revision 1.2  96/01/02  16:57:14  bnoble
+ * Added support for Coda MiniCache and raw inode calls (final commit)
+ * 
+ * Revision 1.1.2.1  1995/12/20 01:57:40  bnoble
+ * Added CODA-specific files
+ *
+ */
+
+/* NetBSD interfaces to the vnodeops */
+int coda_open      __P((void *));
+int coda_close     __P((void *));
+int coda_read      __P((void *));
+int coda_write     __P((void *));
+int coda_ioctl     __P((void *));
+/* 1.3 int cfs_select    __P((void *));*/
+int coda_getattr   __P((void *));
+int coda_setattr   __P((void *));
+int coda_access    __P((void *));
+int coda_abortop   __P((void *));
+int coda_readlink  __P((void *));
+int coda_fsync     __P((void *));
+int coda_inactive  __P((void *));
+int coda_lookup    __P((void *));
+int coda_create    __P((void *));
+int coda_remove    __P((void *));
+int coda_link      __P((void *));
+int coda_rename    __P((void *));
+int coda_mkdir     __P((void *));
+int coda_rmdir     __P((void *));
+int coda_symlink   __P((void *));
+int coda_readdir   __P((void *));
+int coda_bmap      __P((void *));
+int coda_strategy  __P((void *));
+int coda_reclaim   __P((void *));
+int coda_lock      __P((void *));
+int coda_unlock    __P((void *));
+int coda_islocked  __P((void *));
+int coda_vop_error   __P((void *));
+int coda_vop_nop     __P((void *));
+int coda_fbsd_getpages	__P((void *));
+int coda_fbsd_putpages	__P((void *));
+
+int (**coda_vnodeop_p)(void *);
+
+int coda_rdwr(struct vnode *vp, struct uio *uiop, enum uio_rw rw,
+    int ioflag, struct ucred *cred, struct proc *p);
+int coda_grab_vnode(dev_t dev, ino_t ino, struct vnode **vpp);
+void print_vattr(struct vattr *attr);
+void print_cred(struct ucred *cred);
diff --git a/sys/fs/deadfs/dead_vnops.c b/sys/fs/deadfs/dead_vnops.c
new file mode 100644
index 0000000..4e3853c
--- /dev/null
+++ b/sys/fs/deadfs/dead_vnops.c
@@ -0,0 +1,296 @@
+/*
+ * Copyright (c) 1989, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)dead_vnops.c	8.1 (Berkeley) 6/10/93
+ * $Id: dead_vnops.c,v 1.24 1998/08/23 11:43:29 bde Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/vnode.h>
+#include <sys/buf.h>
+#include <sys/poll.h>
+
+static int	chkvnlock __P((struct vnode *));
+/*
+ * Prototypes for dead operations on vnodes.
+ */
+static int	dead_badop __P((void));
+static int	dead_bmap __P((struct vop_bmap_args *));
+static int	dead_ioctl __P((struct vop_ioctl_args *));
+static int	dead_lock __P((struct vop_lock_args *));
+static int	dead_lookup __P((struct vop_lookup_args *));
+static int	dead_open __P((struct vop_open_args *));
+static int	dead_poll __P((struct vop_poll_args *));
+static int	dead_print __P((struct vop_print_args *));
+static int	dead_read __P((struct vop_read_args *));
+static int	dead_write __P((struct vop_write_args *));
+
+vop_t **dead_vnodeop_p;
+static struct vnodeopv_entry_desc dead_vnodeop_entries[] = {
+	{ &vop_default_desc,		(vop_t *) vop_defaultop },
+	{ &vop_access_desc,		(vop_t *) vop_ebadf },
+	{ &vop_advlock_desc,		(vop_t *) vop_ebadf },
+	{ &vop_bmap_desc,		(vop_t *) dead_bmap },
+	{ &vop_create_desc,		(vop_t *) dead_badop },
+	{ &vop_getattr_desc,		(vop_t *) vop_ebadf },
+	{ &vop_inactive_desc,		(vop_t *) vop_null },
+	{ &vop_ioctl_desc,		(vop_t *) dead_ioctl },
+	{ &vop_link_desc,		(vop_t *) dead_badop },
+	{ &vop_lock_desc,		(vop_t *) dead_lock },
+	{ &vop_lookup_desc,		(vop_t *) dead_lookup },
+	{ &vop_mkdir_desc,		(vop_t *) dead_badop },
+	{ &vop_mknod_desc,		(vop_t *) dead_badop },
+	{ &vop_mmap_desc,		(vop_t *) dead_badop },
+	{ &vop_open_desc,		(vop_t *) dead_open },
+	{ &vop_pathconf_desc,		(vop_t *) vop_ebadf },	/* per pathconf(2) */
+	{ &vop_poll_desc,		(vop_t *) dead_poll },
+	{ &vop_print_desc,		(vop_t *) dead_print },
+	{ &vop_read_desc,		(vop_t *) dead_read },
+	{ &vop_readdir_desc,		(vop_t *) vop_ebadf },
+	{ &vop_readlink_desc,		(vop_t *) vop_ebadf },
+	{ &vop_reclaim_desc,		(vop_t *) vop_null },
+	{ &vop_remove_desc,		(vop_t *) dead_badop },
+	{ &vop_rename_desc,		(vop_t *) dead_badop },
+	{ &vop_rmdir_desc,		(vop_t *) dead_badop },
+	{ &vop_setattr_desc,		(vop_t *) vop_ebadf },
+	{ &vop_symlink_desc,		(vop_t *) dead_badop },
+	{ &vop_write_desc,		(vop_t *) dead_write },
+	{ NULL, NULL }
+};
+static struct vnodeopv_desc dead_vnodeop_opv_desc =
+	{ &dead_vnodeop_p, dead_vnodeop_entries };
+
+VNODEOP_SET(dead_vnodeop_opv_desc);
+
+/*
+ * Trivial lookup routine that always fails.
+ */
+/* ARGSUSED */
+static int
+dead_lookup(ap)
+	struct vop_lookup_args /* {
+		struct vnode * a_dvp;
+		struct vnode ** a_vpp;
+		struct componentname * a_cnp;
+	} */ *ap;
+{
+
+	*ap->a_vpp = NULL;
+	return (ENOTDIR);
+}
+
+/*
+ * Open always fails as if device did not exist.
+ */
+/* ARGSUSED */
+static int
+dead_open(ap)
+	struct vop_open_args /* {
+		struct vnode *a_vp;
+		int  a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	return (ENXIO);
+}
+
+/*
+ * Vnode op for read
+ */
+/* ARGSUSED */
+static int
+dead_read(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+
+	if (chkvnlock(ap->a_vp))
+		panic("dead_read: lock");
+	/*
+	 * Return EOF for tty devices, EIO for others
+	 */
+	if ((ap->a_vp->v_flag & VISTTY) == 0)
+		return (EIO);
+	return (0);
+}
+
+/*
+ * Vnode op for write
+ */
+/* ARGSUSED */
+static int
+dead_write(ap)
+	struct vop_write_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+
+	if (chkvnlock(ap->a_vp))
+		panic("dead_write: lock");
+	return (EIO);
+}
+
+/*
+ * Device ioctl operation.
+ */
+/* ARGSUSED */
+static int
+dead_ioctl(ap)
+	struct vop_ioctl_args /* {
+		struct vnode *a_vp;
+		int  a_command;
+		caddr_t  a_data;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	if (!chkvnlock(ap->a_vp))
+		return (ENOTTY);
+	return (VCALL(ap->a_vp, VOFFSET(vop_ioctl), ap));
+}
+
+
+/*
+ * Wait until the vnode has finished changing state.
+ */
+static int
+dead_lock(ap)
+	struct vop_lock_args /* {
+		struct vnode *a_vp;
+		int a_flags;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+
+	/*
+	 * Since we are not using the lock manager, we must clear
+	 * the interlock here.
+	 */
+	if (ap->a_flags & LK_INTERLOCK) {
+		simple_unlock(&vp->v_interlock);
+		ap->a_flags &= ~LK_INTERLOCK;
+	}
+	if (!chkvnlock(vp))
+		return (0);
+	return (VCALL(vp, VOFFSET(vop_lock), ap));
+}
+
+/*
+ * Wait until the vnode has finished changing state.
+ */
+static int
+dead_bmap(ap)
+	struct vop_bmap_args /* {
+		struct vnode *a_vp;
+		daddr_t  a_bn;
+		struct vnode **a_vpp;
+		daddr_t *a_bnp;
+		int *a_runp;
+		int *a_runb;
+	} */ *ap;
+{
+
+	if (!chkvnlock(ap->a_vp))
+		return (EIO);
+	return (VOP_BMAP(ap->a_vp, ap->a_bn, ap->a_vpp, ap->a_bnp, ap->a_runp, ap->a_runb));
+}
+
+/*
+ * Print out the contents of a dead vnode.
+ */
+/* ARGSUSED */
+static int
+dead_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	printf("tag VT_NON, dead vnode\n");
+	return (0);
+}
+
+/*
+ * Empty vnode bad operation
+ */
+static int
+dead_badop()
+{
+
+	panic("dead_badop called");
+	/* NOTREACHED */
+}
+
+/*
+ * We have to wait during times when the vnode is
+ * in a state of change.
+ */
+int
+chkvnlock(vp)
+	register struct vnode *vp;
+{
+	int locked = 0;
+
+	while (vp->v_flag & VXLOCK) {
+		vp->v_flag |= VXWANT;
+		(void) tsleep((caddr_t)vp, PINOD, "ckvnlk", 0);
+		locked = 1;
+	}
+	return (locked);
+}
+
+/*
+ * Trivial poll routine that always returns POLLHUP.
+ * This is necessary so that a process which is polling a file
+ * gets notified when that file is revoke()d.
+ */
+static int
+dead_poll(ap)
+	struct vop_poll_args *ap;
+{
+	return (POLLHUP);
+}
diff --git a/sys/fs/fdescfs/fdesc.h b/sys/fs/fdescfs/fdesc.h
new file mode 100644
index 0000000..bbba54d
--- /dev/null
+++ b/sys/fs/fdescfs/fdesc.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)fdesc.h	8.5 (Berkeley) 1/21/94
+ *
+ * $Id: fdesc.h,v 1.5 1997/02/22 09:40:14 peter Exp $
+ */
+
+#ifdef KERNEL
+struct fdescmount {
+	struct vnode	*f_root;	/* Root node */
+};
+
+#define FD_ROOT		2
+#define FD_DEVFD	3
+#define FD_STDIN	4
+#define FD_STDOUT	5
+#define FD_STDERR	6
+#define FD_CTTY		7
+#define FD_DESC		8
+#define FD_MAX		12
+
+typedef enum {
+	Froot,
+	Fdevfd,
+	Fdesc,
+	Flink,
+	Fctty
+} fdntype;
+
+struct fdescnode {
+	LIST_ENTRY(fdescnode) fd_hash;	/* Hash list */
+	struct vnode	*fd_vnode;	/* Back ptr to vnode */
+	fdntype		fd_type;	/* Type of this node */
+	unsigned	fd_fd;		/* Fd to be dup'ed */
+	char		*fd_link;	/* Link to fd/n */
+	int		fd_ix;		/* filesystem index */
+};
+
+#define VFSTOFDESC(mp)	((struct fdescmount *)((mp)->mnt_data))
+#define	VTOFDESC(vp) ((struct fdescnode *)(vp)->v_data)
+
+extern dev_t devctty;
+extern int fdesc_init __P((struct vfsconf *));
+extern int fdesc_root __P((struct mount *, struct vnode **));
+extern int fdesc_allocvp __P((fdntype, int, struct mount *, struct vnode **));
+#endif /* KERNEL */
diff --git a/sys/fs/fdescfs/fdesc_vfsops.c b/sys/fs/fdescfs/fdesc_vfsops.c
new file mode 100644
index 0000000..758f3b5
--- /dev/null
+++ b/sys/fs/fdescfs/fdesc_vfsops.c
@@ -0,0 +1,264 @@
+/*
+ * Copyright (c) 1992, 1993, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)fdesc_vfsops.c	8.4 (Berkeley) 1/21/94
+ *
+ * $Id: fdesc_vfsops.c,v 1.17 1999/01/12 11:49:30 eivind Exp $
+ */
+
+/*
+ * /dev/fd Filesystem
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+#include <sys/filedesc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/malloc.h>
+#include <miscfs/fdesc/fdesc.h>
+
+static MALLOC_DEFINE(M_FDESCMNT, "FDESC mount", "FDESC mount structure");
+
+static int	fdesc_mount __P((struct mount *mp, char *path, caddr_t data,
+				 struct nameidata *ndp, struct proc *p));
+static int	fdesc_start __P((struct mount *mp, int flags, struct proc *p));
+static int	fdesc_unmount __P((struct mount *mp, int mntflags,
+				   struct proc *p));
+static int	fdesc_statfs __P((struct mount *mp, struct statfs *sbp,
+				  struct proc *p));
+static int	fdesc_sync __P((struct mount *mp, int waitfor,
+				struct ucred *cred, struct proc *p));
+  
+/*
+ * Mount the per-process file descriptors (/dev/fd)
+ */
+static int
+fdesc_mount(mp, path, data, ndp, p)
+	struct mount *mp;
+	char *path;
+	caddr_t data;
+	struct nameidata *ndp;
+	struct proc *p;
+{
+	int error = 0;
+	u_int size;
+	struct fdescmount *fmp;
+	struct vnode *rvp;
+
+	/*
+	 * Update is a no-op
+	 */
+	if (mp->mnt_flag & MNT_UPDATE)
+		return (EOPNOTSUPP);
+
+	error = fdesc_allocvp(Froot, FD_ROOT, mp, &rvp);
+	if (error)
+		return (error);
+
+	MALLOC(fmp, struct fdescmount *, sizeof(struct fdescmount),
+				M_FDESCMNT, M_WAITOK);	/* XXX */
+	rvp->v_type = VDIR;
+	rvp->v_flag |= VROOT;
+	fmp->f_root = rvp;
+	/* XXX -- don't mark as local to work around fts() problems */
+	/*mp->mnt_flag |= MNT_LOCAL;*/
+	mp->mnt_data = (qaddr_t) fmp;
+	vfs_getnewfsid(mp);
+
+	(void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+	bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+	bzero(mp->mnt_stat.f_mntfromname, MNAMELEN);
+	bcopy("fdesc", mp->mnt_stat.f_mntfromname, sizeof("fdesc"));
+	(void)fdesc_statfs(mp, &mp->mnt_stat, p);
+	return (0);
+}
+
+static int
+fdesc_start(mp, flags, p)
+	struct mount *mp;
+	int flags;
+	struct proc *p;
+{
+	return (0);
+}
+
+static int
+fdesc_unmount(mp, mntflags, p)
+	struct mount *mp;
+	int mntflags;
+	struct proc *p;
+{
+	int error;
+	int flags = 0;
+	struct vnode *rootvp = VFSTOFDESC(mp)->f_root;
+
+	if (mntflags & MNT_FORCE)
+		flags |= FORCECLOSE;
+
+	/*
+	 * Clear out buffer cache.  I don't think we
+	 * ever get anything cached at this level at the
+	 * moment, but who knows...
+	 */
+	if (rootvp->v_usecount > 1)
+		return (EBUSY);
+	if ((error = vflush(mp, rootvp, flags)) != 0)
+		return (error);
+
+	/*
+	 * Release reference on underlying root vnode
+	 */
+	vrele(rootvp);
+	/*
+	 * And blow it away for future re-use
+	 */
+	vgone(rootvp);
+	/*
+	 * Finally, throw away the fdescmount structure
+	 */
+	free(mp->mnt_data, M_FDESCMNT);	/* XXX */
+	mp->mnt_data = 0;
+
+	return (0);
+}
+
+int
+fdesc_root(mp, vpp)
+	struct mount *mp;
+	struct vnode **vpp;
+{
+	struct proc *p = curproc;	/* XXX */
+	struct vnode *vp;
+
+	/*
+	 * Return locked reference to root.
+	 */
+	vp = VFSTOFDESC(mp)->f_root;
+	VREF(vp);
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+	*vpp = vp;
+	return (0);
+}
+
+static int
+fdesc_statfs(mp, sbp, p)
+	struct mount *mp;
+	struct statfs *sbp;
+	struct proc *p;
+{
+	struct filedesc *fdp;
+	int lim;
+	int i;
+	int last;
+	int freefd;
+
+	/*
+	 * Compute number of free file descriptors.
+	 * [ Strange results will ensue if the open file
+	 * limit is ever reduced below the current number
+	 * of open files... ]
+	 */
+	lim = p->p_rlimit[RLIMIT_NOFILE].rlim_cur;
+	fdp = p->p_fd;
+	last = min(fdp->fd_nfiles, lim);
+	freefd = 0;
+	for (i = fdp->fd_freefile; i < last; i++)
+		if (fdp->fd_ofiles[i] == NULL)
+			freefd++;
+
+	/*
+	 * Adjust for the fact that the fdesc array may not
+	 * have been fully allocated yet.
+	 */
+	if (fdp->fd_nfiles < lim)
+		freefd += (lim - fdp->fd_nfiles);
+
+	sbp->f_flags = 0;
+	sbp->f_bsize = DEV_BSIZE;
+	sbp->f_iosize = DEV_BSIZE;
+	sbp->f_blocks = 2;		/* 1K to keep df happy */
+	sbp->f_bfree = 0;
+	sbp->f_bavail = 0;
+	sbp->f_files = lim + 1;		/* Allow for "." */
+	sbp->f_ffree = freefd;		/* See comments above */
+	if (sbp != &mp->mnt_stat) {
+		sbp->f_type = mp->mnt_vfc->vfc_typenum;
+		bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+	}
+	return (0);
+}
+
+static int
+fdesc_sync(mp, waitfor, cred, p)
+	struct mount *mp;
+	int waitfor;
+	struct ucred *cred;
+	struct proc *p;
+{
+
+	return (0);
+}
+
+#define fdesc_fhtovp ((int (*) __P((struct mount *, struct fid *, \
+	    struct sockaddr *, struct vnode **, int *, struct ucred **)))eopnotsupp)
+#define fdesc_quotactl ((int (*) __P((struct mount *, int, uid_t, caddr_t, \
+	    struct proc *)))eopnotsupp)
+#define fdesc_sysctl ((int (*) __P((int *, u_int, void *, size_t *, void *, \
+	    size_t, struct proc *)))eopnotsupp)
+#define fdesc_vget ((int (*) __P((struct mount *, ino_t, struct vnode **))) \
+	    eopnotsupp)
+#define fdesc_vptofh ((int (*) __P((struct vnode *, struct fid *)))eopnotsupp)
+
+static struct vfsops fdesc_vfsops = {
+	fdesc_mount,
+	fdesc_start,
+	fdesc_unmount,
+	fdesc_root,
+	fdesc_quotactl,
+	fdesc_statfs,
+	fdesc_sync,
+	fdesc_vget,
+	fdesc_fhtovp,
+	fdesc_vptofh,
+	fdesc_init,
+};
+
+VFS_SET(fdesc_vfsops, fdesc, VFCF_SYNTHETIC);
diff --git a/sys/fs/fdescfs/fdesc_vnops.c b/sys/fs/fdescfs/fdesc_vnops.c
new file mode 100644
index 0000000..6bdea5f
--- /dev/null
+++ b/sys/fs/fdescfs/fdesc_vnops.c
@@ -0,0 +1,872 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)fdesc_vnops.c	8.9 (Berkeley) 1/21/94
+ *
+ * $Id: fdesc_vnops.c,v 1.40 1998/12/14 05:00:57 dillon Exp $
+ */
+
+/*
+ * /dev/fd Filesystem
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>	/* boottime */
+#include <sys/filedesc.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/dirent.h>
+#include <sys/socketvar.h>
+#include <sys/conf.h>
+#include <miscfs/fdesc/fdesc.h>
+
+extern	struct cdevsw ctty_cdevsw;
+
+#define cttyvp(p) ((p)->p_flag & P_CONTROLT ? (p)->p_session->s_ttyvp : NULL)
+
+#define FDL_WANT	0x01
+#define FDL_LOCKED	0x02
+static int fdcache_lock;
+
+static vop_t **fdesc_vnodeop_p;
+
+dev_t devctty;
+
+#if (FD_STDIN != FD_STDOUT-1) || (FD_STDOUT != FD_STDERR-1)
+FD_STDIN, FD_STDOUT, FD_STDERR must be a sequence n, n+1, n+2
+#endif
+
+#define	NFDCACHE 4
+#define FD_NHASH(ix) \
+	(&fdhashtbl[(ix) & fdhash])
+static LIST_HEAD(fdhashhead, fdescnode) *fdhashtbl;
+static u_long fdhash;
+
+static int	fdesc_attr __P((int fd, struct vattr *vap, struct ucred *cred,
+				struct proc *p));
+static int	fdesc_badop __P((void));
+static int	fdesc_getattr __P((struct vop_getattr_args *ap));
+static int	fdesc_inactive __P((struct vop_inactive_args *ap));
+static int	fdesc_ioctl __P((struct vop_ioctl_args *ap));
+static int	fdesc_lookup __P((struct vop_lookup_args *ap));
+static int	fdesc_open __P((struct vop_open_args *ap));
+static int	fdesc_print __P((struct vop_print_args *ap));
+static int	fdesc_read __P((struct vop_read_args *ap));
+static int	fdesc_readdir __P((struct vop_readdir_args *ap));
+static int	fdesc_readlink __P((struct vop_readlink_args *ap));
+static int	fdesc_reclaim __P((struct vop_reclaim_args *ap));
+static int	fdesc_poll __P((struct vop_poll_args *ap));
+static int	fdesc_setattr __P((struct vop_setattr_args *ap));
+static int	fdesc_write __P((struct vop_write_args *ap));
+
+/*
+ * Initialise cache headers
+ */
+int
+fdesc_init(vfsp)
+	struct vfsconf *vfsp;
+{
+
+	devctty = makedev(nchrdev, 0);
+	fdhashtbl = hashinit(NFDCACHE, M_CACHE, &fdhash);
+	return (0);
+}
+
+int
+fdesc_allocvp(ftype, ix, mp, vpp)
+	fdntype ftype;
+	int ix;
+	struct mount *mp;
+	struct vnode **vpp;
+{
+	struct proc *p = curproc;	/* XXX */
+	struct fdhashhead *fc;
+	struct fdescnode *fd;
+	int error = 0;
+
+	fc = FD_NHASH(ix);
+loop:
+	for (fd = fc->lh_first; fd != 0; fd = fd->fd_hash.le_next) {
+		if (fd->fd_ix == ix && fd->fd_vnode->v_mount == mp) {
+			if (vget(fd->fd_vnode, 0, p))
+				goto loop;
+			*vpp = fd->fd_vnode;
+			return (error);
+		}
+	}
+
+	/*
+	 * otherwise lock the array while we call getnewvnode
+	 * since that can block.
+	 */
+	if (fdcache_lock & FDL_LOCKED) {
+		fdcache_lock |= FDL_WANT;
+		(void) tsleep((caddr_t) &fdcache_lock, PINOD, "fdalvp", 0);
+		goto loop;
+	}
+	fdcache_lock |= FDL_LOCKED;
+
+	/*
+	 * Do the MALLOC before the getnewvnode since doing so afterward
+	 * might cause a bogus v_data pointer to get dereferenced
+	 * elsewhere if MALLOC should block.
+	 */
+	MALLOC(fd, struct fdescnode *, sizeof(struct fdescnode), M_TEMP, M_WAITOK);
+
+	error = getnewvnode(VT_FDESC, mp, fdesc_vnodeop_p, vpp);
+	if (error) {
+		FREE(fd, M_TEMP);
+		goto out;
+	}
+	(*vpp)->v_data = fd;
+	fd->fd_vnode = *vpp;
+	fd->fd_type = ftype;
+	fd->fd_fd = -1;
+	fd->fd_link = 0;
+	fd->fd_ix = ix;
+	LIST_INSERT_HEAD(fc, fd, fd_hash);
+
+out:;
+	fdcache_lock &= ~FDL_LOCKED;
+
+	if (fdcache_lock & FDL_WANT) {
+		fdcache_lock &= ~FDL_WANT;
+		wakeup((caddr_t) &fdcache_lock);
+	}
+
+	return (error);
+}
+
+/*
+ * vp is the current namei directory
+ * ndp is the name to locate in that directory...
+ */
+static int
+fdesc_lookup(ap)
+	struct vop_lookup_args /* {
+		struct vnode * a_dvp;
+		struct vnode ** a_vpp;
+		struct componentname * a_cnp;
+	} */ *ap;
+{
+	struct vnode **vpp = ap->a_vpp;
+	struct vnode *dvp = ap->a_dvp;
+	struct componentname *cnp = ap->a_cnp;
+	char *pname = cnp->cn_nameptr;
+	struct proc *p = cnp->cn_proc;
+	int nfiles = p->p_fd->fd_nfiles;
+	unsigned fd = -1;
+	int error;
+	struct vnode *fvp;
+	char *ln;
+
+	if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME) {
+		error = EROFS;
+		goto bad;
+	}
+
+	VOP_UNLOCK(dvp, 0, p);
+	if (cnp->cn_namelen == 1 && *pname == '.') {
+		*vpp = dvp;
+		VREF(dvp);	
+		vn_lock(dvp, LK_SHARED | LK_RETRY, p);
+		return (0);
+	}
+
+	switch (VTOFDESC(dvp)->fd_type) {
+	default:
+	case Flink:
+	case Fdesc:
+	case Fctty:
+		error = ENOTDIR;
+		goto bad;
+
+	case Froot:
+		if (cnp->cn_namelen == 2 && bcmp(pname, "fd", 2) == 0) {
+			error = fdesc_allocvp(Fdevfd, FD_DEVFD, dvp->v_mount, &fvp);
+			if (error)
+				goto bad;
+			*vpp = fvp;
+			fvp->v_type = VDIR;
+			vn_lock(fvp, LK_SHARED | LK_RETRY, p);
+			return (0);
+		}
+
+		if (cnp->cn_namelen == 3 && bcmp(pname, "tty", 3) == 0) {
+			struct vnode *ttyvp = cttyvp(p);
+			if (ttyvp == NULL) {
+				error = ENXIO;
+				goto bad;
+			}
+			error = fdesc_allocvp(Fctty, FD_CTTY, dvp->v_mount, &fvp);
+			if (error)
+				goto bad;
+			*vpp = fvp;
+			fvp->v_type = VFIFO;
+			vn_lock(fvp, LK_SHARED | LK_RETRY, p);
+			return (0);
+		}
+
+		ln = 0;
+		switch (cnp->cn_namelen) {
+		case 5:
+			if (bcmp(pname, "stdin", 5) == 0) {
+				ln = "fd/0";
+				fd = FD_STDIN;
+			}
+			break;
+		case 6:
+			if (bcmp(pname, "stdout", 6) == 0) {
+				ln = "fd/1";
+				fd = FD_STDOUT;
+			} else
+			if (bcmp(pname, "stderr", 6) == 0) {
+				ln = "fd/2";
+				fd = FD_STDERR;
+			}
+			break;
+		}
+
+		if (ln) {
+			error = fdesc_allocvp(Flink, fd, dvp->v_mount, &fvp);
+			if (error)
+				goto bad;
+			VTOFDESC(fvp)->fd_link = ln;
+			*vpp = fvp;
+			fvp->v_type = VLNK;
+			vn_lock(fvp, LK_SHARED | LK_RETRY, p);
+			return (0);
+		} else {
+			error = ENOENT;
+			goto bad;
+		}
+
+		/* FALL THROUGH */
+
+	case Fdevfd:
+		if (cnp->cn_namelen == 2 && bcmp(pname, "..", 2) == 0) {
+			if ((error = fdesc_root(dvp->v_mount, vpp)) != 0)
+				goto bad;
+			return (0);
+		}
+
+		fd = 0;
+		while (*pname >= '0' && *pname <= '9') {
+			fd = 10 * fd + *pname++ - '0';
+			if (fd >= nfiles)
+				break;
+		}
+
+		if (*pname != '\0') {
+			error = ENOENT;
+			goto bad;
+		}
+
+		if (fd >= nfiles || p->p_fd->fd_ofiles[fd] == NULL) {
+			error = EBADF;
+			goto bad;
+		}
+
+		error = fdesc_allocvp(Fdesc, FD_DESC+fd, dvp->v_mount, &fvp);
+		if (error)
+			goto bad;
+		VTOFDESC(fvp)->fd_fd = fd;
+		vn_lock(fvp, LK_SHARED | LK_RETRY, p);
+		*vpp = fvp;
+		return (0);
+	}
+
+bad:;
+	vn_lock(dvp, LK_SHARED | LK_RETRY, p);
+	*vpp = NULL;
+	return (error);
+}
+
+static int
+fdesc_open(ap)
+	struct vop_open_args /* {
+		struct vnode *a_vp;
+		int  a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	int error = 0;
+
+	switch (VTOFDESC(vp)->fd_type) {
+	case Fdesc:
+		/*
+		 * XXX Kludge: set p->p_dupfd to contain the value of the
+		 * the file descriptor being sought for duplication. The error
+		 * return ensures that the vnode for this device will be
+		 * released by vn_open. Open will detect this special error and
+		 * take the actions in dupfdopen.  Other callers of vn_open or
+		 * VOP_OPEN will simply report the error.
+		 */
+		ap->a_p->p_dupfd = VTOFDESC(vp)->fd_fd;	/* XXX */
+		error = ENODEV;
+		break;
+
+	case Fctty:
+		error = (*ctty_cdevsw.d_open)(devctty, ap->a_mode, 0, ap->a_p);
+		break;
+	}
+
+	return (error);
+}
+
+static int
+fdesc_attr(fd, vap, cred, p)
+	int fd;
+	struct vattr *vap;
+	struct ucred *cred;
+	struct proc *p;
+{
+	struct filedesc *fdp = p->p_fd;
+	struct file *fp;
+	struct stat stb;
+	int error;
+
+	if (fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL)
+		return (EBADF);
+
+	switch (fp->f_type) {
+	case DTYPE_FIFO:
+	case DTYPE_VNODE:
+		error = VOP_GETATTR((struct vnode *) fp->f_data, vap, cred, p);
+		if (error == 0 && vap->va_type == VDIR) {
+			/*
+			 * directories can cause loops in the namespace,
+			 * so turn off the 'x' bits to avoid trouble.
+			 */
+			vap->va_mode &= ~((VEXEC)|(VEXEC>>3)|(VEXEC>>6));
+		}
+		break;
+
+	case DTYPE_SOCKET:
+		error = soo_stat((struct socket *)fp->f_data, &stb);
+		if (error == 0) {
+			vattr_null(vap);
+			vap->va_type = VSOCK;
+			vap->va_mode = stb.st_mode;
+			vap->va_nlink = stb.st_nlink;
+			vap->va_uid = stb.st_uid;
+			vap->va_gid = stb.st_gid;
+			vap->va_fsid = stb.st_dev;
+			vap->va_fileid = stb.st_ino;
+			vap->va_size = stb.st_size;
+			vap->va_blocksize = stb.st_blksize;
+			vap->va_atime = stb.st_atimespec;
+			vap->va_mtime = stb.st_mtimespec;
+			vap->va_ctime = stb.st_ctimespec;
+			vap->va_gen = stb.st_gen;
+			vap->va_flags = stb.st_flags;
+			vap->va_rdev = stb.st_rdev;
+			vap->va_bytes = stb.st_blocks * stb.st_blksize;
+		}
+		break;
+
+	default:
+		panic("fdesc attr");
+		break;
+	}
+
+	return (error);
+}
+
+static int
+fdesc_getattr(ap)
+	struct vop_getattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct vattr *vap = ap->a_vap;
+	unsigned fd;
+	int error = 0;
+
+	switch (VTOFDESC(vp)->fd_type) {
+	case Froot:
+	case Fdevfd:
+	case Flink:
+	case Fctty:
+		bzero((caddr_t) vap, sizeof(*vap));
+		vattr_null(vap);
+		vap->va_fileid = VTOFDESC(vp)->fd_ix;
+
+		switch (VTOFDESC(vp)->fd_type) {
+		case Flink:
+			vap->va_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH;
+			vap->va_type = VLNK;
+			vap->va_nlink = 1;
+			vap->va_size = strlen(VTOFDESC(vp)->fd_link);
+			break;
+
+		case Fctty:
+			vap->va_mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH;
+			vap->va_type = VFIFO;
+			vap->va_nlink = 1;
+			vap->va_size = 0;
+			break;
+
+		default:
+			vap->va_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH;
+			vap->va_type = VDIR;
+			vap->va_nlink = 2;
+			vap->va_size = DEV_BSIZE;
+			break;
+		}
+		vap->va_uid = 0;
+		vap->va_gid = 0;
+		vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+		vap->va_blocksize = DEV_BSIZE;
+		vap->va_atime.tv_sec = boottime.tv_sec;
+		vap->va_atime.tv_nsec = 0;
+		vap->va_mtime = vap->va_atime;
+		vap->va_ctime = vap->va_mtime;
+		vap->va_gen = 0;
+		vap->va_flags = 0;
+		vap->va_rdev = 0;
+		vap->va_bytes = 0;
+		break;
+
+	case Fdesc:
+		fd = VTOFDESC(vp)->fd_fd;
+		error = fdesc_attr(fd, vap, ap->a_cred, ap->a_p);
+		break;
+
+	default:
+		panic("fdesc_getattr");
+		break;
+	}
+
+	if (error == 0)
+		vp->v_type = vap->va_type;
+
+	return (error);
+}
+
+static int
+fdesc_setattr(ap)
+	struct vop_setattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct filedesc *fdp = ap->a_p->p_fd;
+	struct vattr *vap = ap->a_vap;
+	struct file *fp;
+	unsigned fd;
+	int error;
+
+	/*
+	 * Can't mess with the root vnode
+	 */
+	switch (VTOFDESC(ap->a_vp)->fd_type) {
+	case Fdesc:
+		break;
+
+	case Fctty:
+		if (vap->va_flags != VNOVAL)
+			return (EOPNOTSUPP);
+		return (0);
+
+	default:
+		return (EACCES);
+	}
+
+	fd = VTOFDESC(ap->a_vp)->fd_fd;
+	if (fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) {
+		return (EBADF);
+	}
+
+	/*
+	 * Can setattr the underlying vnode, but not sockets!
+	 */
+	switch (fp->f_type) {
+	case DTYPE_FIFO:
+	case DTYPE_PIPE:
+	case DTYPE_VNODE:
+		error = VOP_SETATTR((struct vnode *) fp->f_data, ap->a_vap, ap->a_cred, ap->a_p);
+		break;
+
+	case DTYPE_SOCKET:
+		if (vap->va_flags != VNOVAL)
+			error = EOPNOTSUPP;
+		else
+			error = 0;
+		break;
+
+	default:
+		error = EBADF;
+		break;
+	}
+
+	return (error);
+}
+
+#define UIO_MX 16
+
+static struct dirtmp {
+	u_long d_fileno;
+	u_short d_reclen;
+	u_short d_namlen;
+	char d_name[8];
+} rootent[] = {
+	{ FD_DEVFD, UIO_MX, 2, "fd" },
+	{ FD_STDIN, UIO_MX, 5, "stdin" },
+	{ FD_STDOUT, UIO_MX, 6, "stdout" },
+	{ FD_STDERR, UIO_MX, 6, "stderr" },
+	{ FD_CTTY, UIO_MX, 3, "tty" },
+};
+
+static int
+fdesc_readdir(ap)
+	struct vop_readdir_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+		int *a_eofflag;
+		u_long *a_cookies;
+		int a_ncookies;
+	} */ *ap;
+{
+	struct uio *uio = ap->a_uio;
+	struct filedesc *fdp;
+	int error, i, off;
+
+	/*
+	 * We don't allow exporting fdesc mounts, and currently local
+	 * requests do not need cookies.
+	 */
+	if (ap->a_ncookies)
+		panic("fdesc_readdir: not hungry");
+
+	if (VTOFDESC(ap->a_vp)->fd_type != Froot &&
+	    VTOFDESC(ap->a_vp)->fd_type != Fdevfd)
+		panic("fdesc_readdir: not dir");
+
+	off = (int)uio->uio_offset;
+	if (off != uio->uio_offset || off < 0 || (u_int)off % UIO_MX != 0 ||
+	    uio->uio_resid < UIO_MX)
+		return (EINVAL);
+	i = (u_int)off / UIO_MX;
+	fdp = uio->uio_procp->p_fd;
+
+	if (VTOFDESC(ap->a_vp)->fd_type == Froot) {
+		struct dirent d;
+		struct dirent *dp = &d;
+		struct dirtmp *dt;
+
+		error = 0;
+
+		while (i < sizeof(rootent) / sizeof(rootent[0]) &&
+		    uio->uio_resid >= UIO_MX) {
+			dt = &rootent[i];
+			switch (dt->d_fileno) {
+			case FD_CTTY:
+				if (cttyvp(uio->uio_procp) == NULL)
+					continue;
+				break;
+
+			case FD_STDIN:
+			case FD_STDOUT:
+			case FD_STDERR:
+				if ((dt->d_fileno-FD_STDIN) >= fdp->fd_nfiles)
+					continue;
+				if (fdp->fd_ofiles[dt->d_fileno-FD_STDIN] == NULL)
+					continue;
+				break;
+			}
+			bzero((caddr_t) dp, UIO_MX);
+			dp->d_fileno = dt->d_fileno;
+			dp->d_namlen = dt->d_namlen;
+			dp->d_type = DT_UNKNOWN;
+			dp->d_reclen = dt->d_reclen;
+			bcopy(dt->d_name, dp->d_name, dp->d_namlen+1);
+			error = uiomove((caddr_t) dp, UIO_MX, uio);
+			if (error)
+				break;
+			i++;
+		}
+		uio->uio_offset = i * UIO_MX;
+		return (error);
+	}
+
+	error = 0;
+	while (i < fdp->fd_nfiles && uio->uio_resid >= UIO_MX) {
+		if (fdp->fd_ofiles[i] != NULL) {
+			struct dirent d;
+			struct dirent *dp = &d;
+
+			bzero((caddr_t) dp, UIO_MX);
+
+			dp->d_namlen = sprintf(dp->d_name, "%d", i);
+			dp->d_reclen = UIO_MX;
+			dp->d_type = DT_UNKNOWN;
+			dp->d_fileno = i + FD_STDIN;
+			/*
+			 * And ship to userland
+			 */
+			error = uiomove((caddr_t) dp, UIO_MX, uio);
+			if (error)
+				break;
+		}
+		i++;
+	}
+
+	uio->uio_offset = i * UIO_MX;
+	return (error);
+}
+
+static int
+fdesc_readlink(ap)
+	struct vop_readlink_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	int error;
+
+	if (vp->v_type != VLNK)
+		return (EPERM);
+
+	if (VTOFDESC(vp)->fd_type == Flink) {
+		char *ln = VTOFDESC(vp)->fd_link;
+		error = uiomove(ln, strlen(ln), ap->a_uio);
+	} else {
+		error = EOPNOTSUPP;
+	}
+
+	return (error);
+}
+
+static int
+fdesc_read(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	int error = EOPNOTSUPP;
+
+	switch (VTOFDESC(ap->a_vp)->fd_type) {
+	case Fctty:
+		error = (*ctty_cdevsw.d_read)(devctty, ap->a_uio, ap->a_ioflag);
+		break;
+
+	default:
+		error = EOPNOTSUPP;
+		break;
+	}
+
+	return (error);
+}
+
+static int
+fdesc_write(ap)
+	struct vop_write_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	int error = EOPNOTSUPP;
+
+	switch (VTOFDESC(ap->a_vp)->fd_type) {
+	case Fctty:
+		error = (*ctty_cdevsw.d_write)(devctty, ap->a_uio, ap->a_ioflag);
+		break;
+
+	default:
+		error = EOPNOTSUPP;
+		break;
+	}
+
+	return (error);
+}
+
+static int
+fdesc_ioctl(ap)
+	struct vop_ioctl_args /* {
+		struct vnode *a_vp;
+		int  a_command;
+		caddr_t  a_data;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	int error = EOPNOTSUPP;
+
+	switch (VTOFDESC(ap->a_vp)->fd_type) {
+	case Fctty:
+		error = (*ctty_cdevsw.d_ioctl)(devctty, ap->a_command,
+					ap->a_data, ap->a_fflag, ap->a_p);
+		break;
+
+	default:
+		error = EOPNOTSUPP;
+		break;
+	}
+
+	return (error);
+}
+
+static int
+fdesc_poll(ap)
+	struct vop_poll_args /* {
+		struct vnode *a_vp;
+		int  a_events;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	int revents;
+
+	switch (VTOFDESC(ap->a_vp)->fd_type) {
+	case Fctty:
+		revents = (*ctty_cdevsw.d_poll)(devctty, ap->a_events, ap->a_p);
+		break;
+
+	default:
+		revents = seltrue(0, ap->a_events, ap->a_p);
+		break;
+	}
+
+	return (revents);
+}
+
+static int
+fdesc_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+
+	/*
+	 * Clear out the v_type field to avoid
+	 * nasty things happening in vgone().
+	 */
+	VOP_UNLOCK(vp, 0, ap->a_p);
+	vp->v_type = VNON;
+	return (0);
+}
+
+static int
+fdesc_reclaim(ap)
+	struct vop_reclaim_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct fdescnode *fd = VTOFDESC(vp);
+
+	LIST_REMOVE(fd, fd_hash);
+	FREE(vp->v_data, M_TEMP);
+	vp->v_data = 0;
+
+	return (0);
+}
+
+/*
+ * Print out the contents of a /dev/fd vnode.
+ */
+/* ARGSUSED */
+static int
+fdesc_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	printf("tag VT_NON, fdesc vnode\n");
+	return (0);
+}
+
+/*
+ * /dev/fd "should never get here" operation
+ */
+static int
+fdesc_badop()
+{
+
+	panic("fdesc: bad op");
+	/* NOTREACHED */
+}
+
+static struct vnodeopv_entry_desc fdesc_vnodeop_entries[] = {
+	{ &vop_default_desc,		(vop_t *) vop_defaultop },
+	{ &vop_access_desc,		(vop_t *) vop_null },
+	{ &vop_bmap_desc,		(vop_t *) fdesc_badop },
+	{ &vop_getattr_desc,		(vop_t *) fdesc_getattr },
+	{ &vop_inactive_desc,		(vop_t *) fdesc_inactive },
+	{ &vop_ioctl_desc,		(vop_t *) fdesc_ioctl },
+	{ &vop_lookup_desc,		(vop_t *) fdesc_lookup },
+	{ &vop_open_desc,		(vop_t *) fdesc_open },
+	{ &vop_pathconf_desc,		(vop_t *) vop_stdpathconf },
+	{ &vop_poll_desc,		(vop_t *) fdesc_poll },
+	{ &vop_print_desc,		(vop_t *) fdesc_print },
+	{ &vop_read_desc,		(vop_t *) fdesc_read },
+	{ &vop_readdir_desc,		(vop_t *) fdesc_readdir },
+	{ &vop_readlink_desc,		(vop_t *) fdesc_readlink },
+	{ &vop_reclaim_desc,		(vop_t *) fdesc_reclaim },
+	{ &vop_setattr_desc,		(vop_t *) fdesc_setattr },
+	{ &vop_write_desc,		(vop_t *) fdesc_write },
+	{ NULL, NULL }
+};
+static struct vnodeopv_desc fdesc_vnodeop_opv_desc =
+	{ &fdesc_vnodeop_p, fdesc_vnodeop_entries };
+
+VNODEOP_SET(fdesc_vnodeop_opv_desc);
diff --git a/sys/fs/fifofs/fifo.h b/sys/fs/fifofs/fifo.h
new file mode 100644
index 0000000..ec186d0d
--- /dev/null
+++ b/sys/fs/fifofs/fifo.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 1991, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)fifo.h	8.6 (Berkeley) 5/21/95
+ * $Id: fifo.h,v 1.14 1997/09/14 02:57:51 peter Exp $
+ */
+
+extern vop_t **fifo_vnodeop_p;
+
+/*
+ * Prototypes for fifo operations on vnodes.
+ */
+int	fifo_vnoperate __P((struct vop_generic_args *));
+int	fifo_printinfo __P((struct vnode *));
+
diff --git a/sys/fs/fifofs/fifo_vnops.c b/sys/fs/fifofs/fifo_vnops.c
new file mode 100644
index 0000000..f7e47e1
--- /dev/null
+++ b/sys/fs/fifofs/fifo_vnops.c
@@ -0,0 +1,537 @@
+/*
+ * Copyright (c) 1990, 1993, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)fifo_vnops.c	8.10 (Berkeley) 5/27/95
+ * $Id: fifo_vnops.c,v 1.42 1998/02/04 22:32:45 eivind Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/unistd.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/vnode.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/filio.h>
+#include <sys/fcntl.h>
+#include <sys/file.h>
+#include <sys/poll.h>
+#include <sys/un.h>
+#include <miscfs/fifofs/fifo.h>
+
+/*
+ * This structure is associated with the FIFO vnode and stores
+ * the state associated with the FIFO.
+ */
+struct fifoinfo {
+	struct socket	*fi_readsock;
+	struct socket	*fi_writesock;
+	long		fi_readers;
+	long		fi_writers;
+};
+
+static int	fifo_badop __P((void));
+static int	fifo_print __P((struct vop_print_args *));
+static int	fifo_lookup __P((struct vop_lookup_args *));
+static int	fifo_open __P((struct vop_open_args *));
+static int	fifo_close __P((struct vop_close_args *));
+static int	fifo_read __P((struct vop_read_args *));
+static int	fifo_write __P((struct vop_write_args *));
+static int	fifo_ioctl __P((struct vop_ioctl_args *));
+static int	fifo_poll __P((struct vop_poll_args *));
+static int	fifo_inactive __P((struct  vop_inactive_args *));
+static int	fifo_bmap __P((struct vop_bmap_args *));
+static int	fifo_pathconf __P((struct vop_pathconf_args *));
+static int	fifo_advlock __P((struct vop_advlock_args *));
+
+
+vop_t **fifo_vnodeop_p;
+static struct vnodeopv_entry_desc fifo_vnodeop_entries[] = {
+	{ &vop_default_desc,		(vop_t *) vop_defaultop },
+	{ &vop_abortop_desc,		(vop_t *) fifo_badop },
+	{ &vop_access_desc,		(vop_t *) vop_ebadf },
+	{ &vop_advlock_desc,		(vop_t *) fifo_advlock },
+	{ &vop_bmap_desc,		(vop_t *) fifo_bmap },
+	{ &vop_close_desc,		(vop_t *) fifo_close },
+	{ &vop_create_desc,		(vop_t *) fifo_badop },
+	{ &vop_getattr_desc,		(vop_t *) vop_ebadf },
+	{ &vop_inactive_desc,		(vop_t *) fifo_inactive },
+	{ &vop_ioctl_desc,		(vop_t *) fifo_ioctl },
+	{ &vop_lease_desc,		(vop_t *) vop_null },
+	{ &vop_link_desc,		(vop_t *) fifo_badop },
+	{ &vop_lookup_desc,		(vop_t *) fifo_lookup },
+	{ &vop_mkdir_desc,		(vop_t *) fifo_badop },
+	{ &vop_mknod_desc,		(vop_t *) fifo_badop },
+	{ &vop_open_desc,		(vop_t *) fifo_open },
+	{ &vop_pathconf_desc,		(vop_t *) fifo_pathconf },
+	{ &vop_poll_desc,		(vop_t *) fifo_poll },
+	{ &vop_print_desc,		(vop_t *) fifo_print },
+	{ &vop_read_desc,		(vop_t *) fifo_read },
+	{ &vop_readdir_desc,		(vop_t *) fifo_badop },
+	{ &vop_readlink_desc,		(vop_t *) fifo_badop },
+	{ &vop_reallocblks_desc,	(vop_t *) fifo_badop },
+	{ &vop_reclaim_desc,		(vop_t *) vop_null },
+	{ &vop_remove_desc,		(vop_t *) fifo_badop },
+	{ &vop_rename_desc,		(vop_t *) fifo_badop },
+	{ &vop_rmdir_desc,		(vop_t *) fifo_badop },
+	{ &vop_setattr_desc,		(vop_t *) vop_ebadf },
+	{ &vop_symlink_desc,		(vop_t *) fifo_badop },
+	{ &vop_write_desc,		(vop_t *) fifo_write },
+	{ NULL, NULL }
+};
+static struct vnodeopv_desc fifo_vnodeop_opv_desc =
+	{ &fifo_vnodeop_p, fifo_vnodeop_entries };
+
+VNODEOP_SET(fifo_vnodeop_opv_desc);
+
+int
+fifo_vnoperate(ap)
+	struct vop_generic_args /* {
+		struct vnodeop_desc *a_desc;
+		<other random data follows, presumably>
+	} */ *ap;
+{
+	return (VOCALL(fifo_vnodeop_p, ap->a_desc->vdesc_offset, ap));
+}
+
+/*
+ * Trivial lookup routine that always fails.
+ */
+/* ARGSUSED */
+static int
+fifo_lookup(ap)
+	struct vop_lookup_args /* {
+		struct vnode * a_dvp;
+		struct vnode ** a_vpp;
+		struct componentname * a_cnp;
+	} */ *ap;
+{
+
+	*ap->a_vpp = NULL;
+	return (ENOTDIR);
+}
+
+/*
+ * Open called to set up a new instance of a fifo or
+ * to find an active instance of a fifo.
+ */
+/* ARGSUSED */
+static int
+fifo_open(ap)
+	struct vop_open_args /* {
+		struct vnode *a_vp;
+		int  a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct fifoinfo *fip;
+	struct proc *p = ap->a_p;
+	struct socket *rso, *wso;
+	int error;
+
+	if ((fip = vp->v_fifoinfo) == NULL) {
+		MALLOC(fip, struct fifoinfo *, sizeof(*fip), M_VNODE, M_WAITOK);
+		vp->v_fifoinfo = fip;
+		error = socreate(AF_LOCAL, &rso, SOCK_STREAM, 0, ap->a_p);
+		if (error) {
+			free(fip, M_VNODE);
+			vp->v_fifoinfo = NULL;
+			return (error);
+		}
+		fip->fi_readsock = rso;
+		error = socreate(AF_LOCAL, &wso, SOCK_STREAM, 0, ap->a_p);
+		if (error) {
+			(void)soclose(rso);
+			free(fip, M_VNODE);
+			vp->v_fifoinfo = NULL;
+			return (error);
+		}
+		fip->fi_writesock = wso;
+		error = unp_connect2(wso, rso);
+		if (error) {
+			(void)soclose(wso);
+			(void)soclose(rso);
+			free(fip, M_VNODE);
+			vp->v_fifoinfo = NULL;
+			return (error);
+		}
+		fip->fi_readers = fip->fi_writers = 0;
+		wso->so_snd.sb_lowat = PIPE_BUF;
+		rso->so_state |= SS_CANTRCVMORE;
+	}
+	if (ap->a_mode & FREAD) {
+		fip->fi_readers++;
+		if (fip->fi_readers == 1) {
+			fip->fi_writesock->so_state &= ~SS_CANTSENDMORE;
+			if (fip->fi_writers > 0)
+				wakeup((caddr_t)&fip->fi_writers);
+		}
+	}
+	if (ap->a_mode & FWRITE) {
+		fip->fi_writers++;
+		if (fip->fi_writers == 1) {
+			fip->fi_readsock->so_state &= ~SS_CANTRCVMORE;
+			if (fip->fi_readers > 0)
+				wakeup((caddr_t)&fip->fi_readers);
+		}
+	}
+	if ((ap->a_mode & FREAD) && (ap->a_mode & O_NONBLOCK) == 0) {
+		while (fip->fi_writers == 0) {
+			VOP_UNLOCK(vp, 0, p);
+			error = tsleep((caddr_t)&fip->fi_readers,
+			    PCATCH | PSOCK, "fifoor", 0);
+			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+			if (error)
+				goto bad;
+		}
+	}
+	if (ap->a_mode & FWRITE) {
+		if (ap->a_mode & O_NONBLOCK) {
+			if (fip->fi_readers == 0) {
+				error = ENXIO;
+				goto bad;
+			}
+		} else {
+			while (fip->fi_readers == 0) {
+				VOP_UNLOCK(vp, 0, p);
+				error = tsleep((caddr_t)&fip->fi_writers,
+				    PCATCH | PSOCK, "fifoow", 0);
+				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+				if (error)
+					goto bad;
+			}
+		}
+	}
+	return (0);
+bad:
+	VOP_CLOSE(vp, ap->a_mode, ap->a_cred, p);
+	return (error);
+}
+
+/*
+ * Vnode op for read
+ */
+/* ARGSUSED */
+static int
+fifo_read(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	struct uio *uio = ap->a_uio;
+	struct socket *rso = ap->a_vp->v_fifoinfo->fi_readsock;
+	struct proc *p = uio->uio_procp;
+	int error, startresid;
+
+#ifdef DIAGNOSTIC
+	if (uio->uio_rw != UIO_READ)
+		panic("fifo_read mode");
+#endif
+	if (uio->uio_resid == 0)
+		return (0);
+	if (ap->a_ioflag & IO_NDELAY)
+		rso->so_state |= SS_NBIO;
+	startresid = uio->uio_resid;
+	VOP_UNLOCK(ap->a_vp, 0, p);
+	error = soreceive(rso, (struct sockaddr **)0, uio, (struct mbuf **)0,
+	    (struct mbuf **)0, (int *)0);
+	vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY, p);
+	if (ap->a_ioflag & IO_NDELAY)
+		rso->so_state &= ~SS_NBIO;
+	return (error);
+}
+
+/*
+ * Vnode op for write
+ */
+/* ARGSUSED */
+static int
+fifo_write(ap)
+	struct vop_write_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	struct socket *wso = ap->a_vp->v_fifoinfo->fi_writesock;
+	struct proc *p = ap->a_uio->uio_procp;
+	int error;
+
+#ifdef DIAGNOSTIC
+	if (ap->a_uio->uio_rw != UIO_WRITE)
+		panic("fifo_write mode");
+#endif
+	if (ap->a_ioflag & IO_NDELAY)
+		wso->so_state |= SS_NBIO;
+	VOP_UNLOCK(ap->a_vp, 0, p);
+	error = sosend(wso, (struct sockaddr *)0, ap->a_uio, 0,
+		       (struct mbuf *)0, 0, p);
+	vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY, p);
+	if (ap->a_ioflag & IO_NDELAY)
+		wso->so_state &= ~SS_NBIO;
+	return (error);
+}
+
+/*
+ * Device ioctl operation.
+ */
+/* ARGSUSED */
+static int
+fifo_ioctl(ap)
+	struct vop_ioctl_args /* {
+		struct vnode *a_vp;
+		int  a_command;
+		caddr_t  a_data;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct file filetmp;
+	int error;
+
+	if (ap->a_command == FIONBIO)
+		return (0);
+	if (ap->a_fflag & FREAD) {
+		filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_readsock;
+		error = soo_ioctl(&filetmp, ap->a_command, ap->a_data, ap->a_p);
+		if (error)
+			return (error);
+	}
+	if (ap->a_fflag & FWRITE) {
+		filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_writesock;
+		error = soo_ioctl(&filetmp, ap->a_command, ap->a_data, ap->a_p);
+		if (error)
+			return (error);
+	}
+	return (0);
+}
+
+/* ARGSUSED */
+static int
+fifo_poll(ap)
+	struct vop_poll_args /* {
+		struct vnode *a_vp;
+		int  a_events;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct file filetmp;
+	int revents = 0;
+
+	if (ap->a_events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
+		filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_readsock;
+		if (filetmp.f_data)
+			revents |= soo_poll(&filetmp, ap->a_events, ap->a_cred,
+			    ap->a_p);
+	}
+	if (ap->a_events & (POLLOUT | POLLWRNORM | POLLWRBAND)) {
+		filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_writesock;
+		if (filetmp.f_data)
+			revents |= soo_poll(&filetmp, ap->a_events, ap->a_cred,
+			    ap->a_p);
+	}
+	return (revents);
+}
+
+static int
+fifo_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	VOP_UNLOCK(ap->a_vp, 0, ap->a_p);
+	return (0);
+}
+
+/*
+ * This is a noop, simply returning what one has been given.
+ */
+static int
+fifo_bmap(ap)
+	struct vop_bmap_args /* {
+		struct vnode *a_vp;
+		daddr_t  a_bn;
+		struct vnode **a_vpp;
+		daddr_t *a_bnp;
+		int *a_runp;
+		int *a_runb;
+	} */ *ap;
+{
+
+	if (ap->a_vpp != NULL)
+		*ap->a_vpp = ap->a_vp;
+	if (ap->a_bnp != NULL)
+		*ap->a_bnp = ap->a_bn;
+	if (ap->a_runp != NULL)
+		*ap->a_runp = 0;
+	if (ap->a_runb != NULL)
+		*ap->a_runb = 0;
+	return (0);
+}
+
+/*
+ * Device close routine
+ */
+/* ARGSUSED */
+static int
+fifo_close(ap)
+	struct vop_close_args /* {
+		struct vnode *a_vp;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct fifoinfo *fip = vp->v_fifoinfo;
+	int error1, error2;
+
+	if (ap->a_fflag & FREAD) {
+		fip->fi_readers--;
+		if (fip->fi_readers == 0)
+			socantsendmore(fip->fi_writesock);
+	}
+	if (ap->a_fflag & FWRITE) {
+		fip->fi_writers--;
+		if (fip->fi_writers == 0)
+			socantrcvmore(fip->fi_readsock);
+	}
+	if (vp->v_usecount > 1)
+		return (0);
+	error1 = soclose(fip->fi_readsock);
+	error2 = soclose(fip->fi_writesock);
+	FREE(fip, M_VNODE);
+	vp->v_fifoinfo = NULL;
+	if (error1)
+		return (error1);
+	return (error2);
+}
+
+
+/*
+ * Print out internal contents of a fifo vnode.
+ */
+int
+fifo_printinfo(vp)
+	struct vnode *vp;
+{
+	register struct fifoinfo *fip = vp->v_fifoinfo;
+
+	printf(", fifo with %ld readers and %ld writers",
+		fip->fi_readers, fip->fi_writers);
+	return (0);
+}
+
+/*
+ * Print out the contents of a fifo vnode.
+ */
+static int
+fifo_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	printf("tag VT_NON");
+	fifo_printinfo(ap->a_vp);
+	printf("\n");
+	return (0);
+}
+
+/*
+ * Return POSIX pathconf information applicable to fifo's.
+ */
+int
+fifo_pathconf(ap)
+	struct vop_pathconf_args /* {
+		struct vnode *a_vp;
+		int a_name;
+		int *a_retval;
+	} */ *ap;
+{
+
+	switch (ap->a_name) {
+	case _PC_LINK_MAX:
+		*ap->a_retval = LINK_MAX;
+		return (0);
+	case _PC_PIPE_BUF:
+		*ap->a_retval = PIPE_BUF;
+		return (0);
+	case _PC_CHOWN_RESTRICTED:
+		*ap->a_retval = 1;
+		return (0);
+	default:
+		return (EINVAL);
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * Fifo advisory byte-level locks.
+ */
+/* ARGSUSED */
+static int
+fifo_advlock(ap)
+	struct vop_advlock_args /* {
+		struct vnode *a_vp;
+		caddr_t  a_id;
+		int  a_op;
+		struct flock *a_fl;
+		int  a_flags;
+	} */ *ap;
+{
+
+	return (ap->a_flags & F_FLOCK ? EOPNOTSUPP : EINVAL);
+}
+
+/*
+ * Fifo bad operation
+ */
+static int
+fifo_badop()
+{
+
+	panic("fifo_badop called");
+	/* NOTREACHED */
+}
diff --git a/sys/fs/msdosfs/bootsect.h b/sys/fs/msdosfs/bootsect.h
new file mode 100644
index 0000000..11b93371a
--- /dev/null
+++ b/sys/fs/msdosfs/bootsect.h
@@ -0,0 +1,113 @@
+/*	$Id: bootsect.h,v 1.5 1997/02/22 09:40:43 peter Exp $ */
+/*	$NetBSD: bootsect.h,v 1.9 1997/11/17 15:36:17 ws Exp $	*/
+
+/*
+ * Written by Paul Popelka (paulp@uts.amdahl.com)
+ *
+ * You can do anything you want with this software, just don't say you wrote
+ * it, and don't remove this notice.
+ *
+ * This software is provided "as is".
+ *
+ * The author supplies this software to be publicly redistributed on the
+ * understanding that the author is not responsible for the correct
+ * functioning of this software in any circumstances and is not liable for
+ * any damages caused by this software.
+ *
+ * October 1992
+ */
+
+/*
+ * Format of a boot sector.  This is the first sector on a DOS floppy disk
+ * or the fist sector of a partition on a hard disk.  But, it is not the
+ * first sector of a partitioned hard disk.
+ */
+struct bootsector33 {
+	u_int8_t	bsJump[3];		/* jump inst E9xxxx or EBxx90 */
+	int8_t		bsOemName[8];		/* OEM name and version */
+	int8_t		bsBPB[19];		/* BIOS parameter block */
+	int8_t		bsDriveNumber;		/* drive number (0x80) */
+	int8_t		bsBootCode[479];	/* pad so struct is 512b */
+	u_int8_t	bsBootSectSig0;
+	u_int8_t	bsBootSectSig1;
+#define	BOOTSIG0	0x55
+#define	BOOTSIG1	0xaa
+};
+
+struct extboot {
+	int8_t		exDriveNumber;		/* drive number (0x80) */
+	int8_t		exReserved1;		/* reserved */
+	int8_t		exBootSignature;	/* ext. boot signature (0x29) */
+#define	EXBOOTSIG	0x29
+	int8_t		exVolumeID[4];		/* volume ID number */
+	int8_t		exVolumeLabel[11];	/* volume label */
+	int8_t		exFileSysType[8];	/* fs type (FAT12 or FAT16) */
+};
+
+struct bootsector50 {
+	u_int8_t	bsJump[3];		/* jump inst E9xxxx or EBxx90 */
+	int8_t		bsOemName[8];		/* OEM name and version */
+	int8_t		bsBPB[25];		/* BIOS parameter block */
+	int8_t		bsExt[26];		/* Bootsector Extension */
+	int8_t		bsBootCode[448];	/* pad so structure is 512b */
+	u_int8_t	bsBootSectSig0;
+	u_int8_t	bsBootSectSig1;
+#define	BOOTSIG0	0x55
+#define	BOOTSIG1	0xaa
+};
+
+struct bootsector710 {
+	u_int8_t	bsJump[3];		/* jump inst E9xxxx or EBxx90 */
+	int8_t		bsOEMName[8];		/* OEM name and version */
+	int8_t		bsPBP[53];		/* BIOS parameter block */
+	int8_t		bsExt[26];		/* Bootsector Extension */
+	int8_t		bsBootCode[418];	/* pad so structure is 512b */
+	u_int8_t	bsBootSectSig2;		/* 2 & 3 are only defined for FAT32? */
+	u_int8_t	bsBootSectSig3;
+	u_int8_t	bsBootSectSig0;
+	u_int8_t	bsBootSectSig1;
+#define	BOOTSIG0	0x55
+#define	BOOTSIG1	0xaa
+#define	BOOTSIG2	0
+#define	BOOTSIG3	0
+};
+#ifdef	atari
+/*
+ * The boot sector on a gemdos fs is a little bit different from the msdos fs
+ * format. Currently there is no need to declare a seperate structure, the
+ * bootsector33 struct will do.
+ */
+#if 0
+struct bootsec_atari {
+	u_int8_t	bsBranch[2];		/* branch inst if auto-boot	*/
+	int8_t		bsFiller[6];		/* anything or nothing		*/
+	int8_t		bsSerial[3];		/* serial no. for mediachange	*/
+	int8_t		bsBPB[19];		/* BIOS parameter block		*/
+	int8_t		bsBootCode[482];	/* pad so struct is 512b	*/
+};
+#endif
+#endif /* atari */
+
+union bootsector {
+	struct bootsector33 bs33;
+	struct bootsector50 bs50;
+	struct bootsector710 bs710;
+};
+
+#if 0
+/*
+ * Shorthand for fields in the bpb.
+ */
+#define	bsBytesPerSec	bsBPB.bpbBytesPerSec
+#define	bsSectPerClust	bsBPB.bpbSectPerClust
+#define	bsResSectors	bsBPB.bpbResSectors
+#define	bsFATS		bsBPB.bpbFATS
+#define	bsRootDirEnts	bsBPB.bpbRootDirEnts
+#define	bsSectors	bsBPB.bpbSectors
+#define	bsMedia		bsBPB.bpbMedia
+#define	bsFATsecs	bsBPB.bpbFATsecs
+#define	bsSectPerTrack	bsBPB.bpbSectPerTrack
+#define	bsHeads		bsBPB.bpbHeads
+#define	bsHiddenSecs	bsBPB.bpbHiddenSecs
+#define	bsHugeSectors	bsBPB.bpbHugeSectors
+#endif
diff --git a/sys/fs/msdosfs/bpb.h b/sys/fs/msdosfs/bpb.h
new file mode 100644
index 0000000..bc00a75
--- /dev/null
+++ b/sys/fs/msdosfs/bpb.h
@@ -0,0 +1,209 @@
+/*	$Id: bpb.h,v 1.5 1997/02/22 09:40:44 peter Exp $ */
+/*	$NetBSD: bpb.h,v 1.7 1997/11/17 15:36:24 ws Exp $	*/
+
+/*
+ * Written by Paul Popelka (paulp@uts.amdahl.com)
+ *
+ * You can do anything you want with this software, just don't say you wrote
+ * it, and don't remove this notice.
+ *
+ * This software is provided "as is".
+ *
+ * The author supplies this software to be publicly redistributed on the
+ * understanding that the author is not responsible for the correct
+ * functioning of this software in any circumstances and is not liable for
+ * any damages caused by this software.
+ *
+ * October 1992
+ */
+
+/*
+ * BIOS Parameter Block (BPB) for DOS 3.3
+ */
+struct bpb33 {
+	u_int16_t	bpbBytesPerSec;	/* bytes per sector */
+	u_int8_t	bpbSecPerClust;	/* sectors per cluster */
+	u_int16_t	bpbResSectors;	/* number of reserved sectors */
+	u_int8_t	bpbFATs;	/* number of FATs */
+	u_int16_t	bpbRootDirEnts;	/* number of root directory entries */
+	u_int16_t	bpbSectors;	/* total number of sectors */
+	u_int8_t	bpbMedia;	/* media descriptor */
+	u_int16_t	bpbFATsecs;	/* number of sectors per FAT */
+	u_int16_t	bpbSecPerTrack;	/* sectors per track */
+	u_int16_t	bpbHeads;	/* number of heads */
+	u_int16_t	bpbHiddenSecs;	/* number of hidden sectors */
+};
+
+/*
+ * BPB for DOS 5.0 The difference is bpbHiddenSecs is a short for DOS 3.3,
+ * and bpbHugeSectors is not in the 3.3 bpb.
+ */
+struct bpb50 {
+	u_int16_t	bpbBytesPerSec;	/* bytes per sector */
+	u_int8_t	bpbSecPerClust;	/* sectors per cluster */
+	u_int16_t	bpbResSectors;	/* number of reserved sectors */
+	u_int8_t	bpbFATs;	/* number of FATs */
+	u_int16_t	bpbRootDirEnts;	/* number of root directory entries */
+	u_int16_t	bpbSectors;	/* total number of sectors */
+	u_int8_t	bpbMedia;	/* media descriptor */
+	u_int16_t	bpbFATsecs;	/* number of sectors per FAT */
+	u_int16_t	bpbSecPerTrack;	/* sectors per track */
+	u_int16_t	bpbHeads;	/* number of heads */
+	u_int32_t	bpbHiddenSecs;	/* # of hidden sectors */
+	u_int32_t	bpbHugeSectors;	/* # of sectors if bpbSectors == 0 */
+};
+
+/*
+ * BPB for DOS 7.10 (FAT32).  This one has a few extensions to bpb50.
+ */
+struct bpb710 {
+	u_int16_t	bpbBytesPerSec;	/* bytes per sector */
+	u_int8_t	bpbSecPerClust;	/* sectors per cluster */
+	u_int16_t	bpbResSectors;	/* number of reserved sectors */
+	u_int8_t	bpbFATs;	/* number of FATs */
+	u_int16_t	bpbRootDirEnts;	/* number of root directory entries */
+	u_int16_t	bpbSectors;	/* total number of sectors */
+	u_int8_t	bpbMedia;	/* media descriptor */
+	u_int16_t	bpbFATsecs;	/* number of sectors per FAT */
+	u_int16_t	bpbSecPerTrack;	/* sectors per track */
+	u_int16_t	bpbHeads;	/* number of heads */
+	u_int32_t	bpbHiddenSecs;	/* # of hidden sectors */
+	u_int32_t	bpbHugeSectors;	/* # of sectors if bpbSectors == 0 */
+	u_int32_t	bpbBigFATsecs;	/* like bpbFATsecs for FAT32 */
+	u_int16_t	bpbExtFlags;	/* extended flags: */
+#define	FATNUM		0xf		/* mask for numbering active FAT */
+#define	FATMIRROR	0x80		/* FAT is mirrored (like it always was) */
+	u_int16_t	bpbFSVers;	/* filesystem version */
+#define	FSVERS		0		/* currently only 0 is understood */
+	u_int32_t	bpbRootClust;	/* start cluster for root directory */
+	u_int16_t	bpbFSInfo;	/* filesystem info structure sector */
+	u_int16_t	bpbBackup;	/* backup boot sector */
+	/* There is a 12 byte filler here, but we ignore it */
+};
+
+#ifdef	atari
+/*
+ * BPB for gemdos filesystems. Atari leaves the obsolete stuff undefined.
+ * Currently there is no need for a separate BPB structure.
+ */
+#if 0
+struct bpb_a {
+	u_int16_t	bpbBytesPerSec;	/* bytes per sector		*/
+	u_int8_t	bpbSecPerClust;	/* sectors per cluster		*/
+	u_int16_t	bpbResSectors;	/* number of reserved sectors	*/
+	u_int8_t	bpbFATs;	/* number of FATs		*/
+	u_int16_t	bpbRootDirEnts;	/* number of root directory entries */
+	u_int16_t	bpbSectors;	/* total number of sectors	*/
+	u_int8_t	bpbUseless1;	/* meaningless on gemdos fs	*/
+	u_int16_t	bpbFATsecs;	/* number of sectors per FAT	*/
+	u_int16_t	bpbUseless2;	/* meaningless for harddisk fs	*/
+	u_int16_t	bpbUseless3;	/* meaningless for harddisk fs	*/
+	u_int16_t	bpbHiddenSecs;	/* the TOS-BIOS ignores this	*/
+};
+#endif
+#endif	/* atari */
+
+/*
+ * The following structures represent how the bpb's look on disk.  shorts
+ * and longs are just character arrays of the appropriate length.  This is
+ * because the compiler forces shorts and longs to align on word or
+ * halfword boundaries.
+ *
+ * XXX The little-endian code here assumes that the processor can access
+ * 16-bit and 32-bit quantities on byte boundaries.  If this is not true,
+ * use the macros for the big-endian case.
+ */
+#include <machine/endian.h>
+#if (BYTE_ORDER == LITTLE_ENDIAN) 			/* && defined(UNALIGNED_ACCESS) */
+#define	getushort(x)	*((u_int16_t *)(x))
+#define	getulong(x)	*((u_int32_t *)(x))
+#define	putushort(p, v)	(*((u_int16_t *)(p)) = (v))
+#define	putulong(p, v)	(*((u_int32_t *)(p)) = (v))
+#else
+#define getushort(x)	(((u_int8_t *)(x))[0] + (((u_int8_t *)(x))[1] << 8))
+#define getulong(x)	(((u_int8_t *)(x))[0] + (((u_int8_t *)(x))[1] << 8) \
+			 + (((u_int8_t *)(x))[2] << 16)	\
+			 + (((u_int8_t *)(x))[3] << 24))
+#define putushort(p, v)	(((u_int8_t *)(p))[0] = (v),	\
+			 ((u_int8_t *)(p))[1] = (v) >> 8)
+#define putulong(p, v)	(((u_int8_t *)(p))[0] = (v),	\
+			 ((u_int8_t *)(p))[1] = (v) >> 8, \
+			 ((u_int8_t *)(p))[2] = (v) >> 16,\
+			 ((u_int8_t *)(p))[3] = (v) >> 24)
+#endif
+
+/*
+ * BIOS Parameter Block (BPB) for DOS 3.3
+ */
+struct byte_bpb33 {
+	int8_t bpbBytesPerSec[2];	/* bytes per sector */
+	int8_t bpbSecPerClust;		/* sectors per cluster */
+	int8_t bpbResSectors[2];	/* number of reserved sectors */
+	int8_t bpbFATs;			/* number of FATs */
+	int8_t bpbRootDirEnts[2];	/* number of root directory entries */
+	int8_t bpbSectors[2];		/* total number of sectors */
+	int8_t bpbMedia;		/* media descriptor */
+	int8_t bpbFATsecs[2];		/* number of sectors per FAT */
+	int8_t bpbSecPerTrack[2];	/* sectors per track */
+	int8_t bpbHeads[2];		/* number of heads */
+	int8_t bpbHiddenSecs[2];	/* number of hidden sectors */
+};
+
+/*
+ * BPB for DOS 5.0 The difference is bpbHiddenSecs is a short for DOS 3.3,
+ * and bpbHugeSectors is not in the 3.3 bpb.
+ */
+struct byte_bpb50 {
+	int8_t bpbBytesPerSec[2];	/* bytes per sector */
+	int8_t bpbSecPerClust;		/* sectors per cluster */
+	int8_t bpbResSectors[2];	/* number of reserved sectors */
+	int8_t bpbFATs;			/* number of FATs */
+	int8_t bpbRootDirEnts[2];	/* number of root directory entries */
+	int8_t bpbSectors[2];		/* total number of sectors */
+	int8_t bpbMedia;		/* media descriptor */
+	int8_t bpbFATsecs[2];		/* number of sectors per FAT */
+	int8_t bpbSecPerTrack[2];	/* sectors per track */
+	int8_t bpbHeads[2];		/* number of heads */
+	int8_t bpbHiddenSecs[4];	/* number of hidden sectors */
+	int8_t bpbHugeSectors[4];	/* # of sectors if bpbSectors == 0 */
+};
+
+/*
+ * BPB for DOS 7.10 (FAT32).  This one has a few extensions to bpb50.
+ */
+struct byte_bpb710 {
+	u_int8_t bpbBytesPerSec[2];	/* bytes per sector */
+	u_int8_t bpbSecPerClust;	/* sectors per cluster */
+	u_int8_t bpbResSectors[2];	/* number of reserved sectors */
+	u_int8_t bpbFATs;		/* number of FATs */
+	u_int8_t bpbRootDirEnts[2];	/* number of root directory entries */
+	u_int8_t bpbSectors[2];		/* total number of sectors */
+	u_int8_t bpbMedia;		/* media descriptor */
+	u_int8_t bpbFATsecs[2];		/* number of sectors per FAT */
+	u_int8_t bpbSecPerTrack[2];	/* sectors per track */
+	u_int8_t bpbHeads[2];		/* number of heads */
+	u_int8_t bpbHiddenSecs[4];	/* # of hidden sectors */
+	u_int8_t bpbHugeSectors[4];	/* # of sectors if bpbSectors == 0 */
+	u_int8_t bpbBigFATsecs[4];	/* like bpbFATsecs for FAT32 */
+	u_int8_t bpbExtFlags[2];	/* extended flags: */
+	u_int8_t bpbFSVers[2];		/* filesystem version */
+	u_int8_t bpbRootClust[4];	/* start cluster for root directory */
+	u_int8_t bpbFSInfo[2];		/* filesystem info structure sector */
+	u_int8_t bpbBackup[2];		/* backup boot sector */
+	/* There is a 12 byte filler here, but we ignore it */
+};
+
+/*
+ * FAT32 FSInfo block.
+ */
+struct fsinfo {
+	u_int8_t fsisig1[4];
+	u_int8_t fsifill1[480];
+	u_int8_t fsisig2[4];
+	u_int8_t fsinfree[4];
+	u_int8_t fsinxtfree[4];
+	u_int8_t fsifill2[12];
+	u_int8_t fsisig3[4];
+	u_int8_t fsifill3[508];
+	u_int8_t fsisig4[4];
+};
diff --git a/sys/fs/msdosfs/denode.h b/sys/fs/msdosfs/denode.h
new file mode 100644
index 0000000..ba2ef8c
--- /dev/null
+++ b/sys/fs/msdosfs/denode.h
@@ -0,0 +1,286 @@
+/*	$Id: denode.h,v 1.17 1998/11/21 00:20:24 dt Exp $ */
+/*	$NetBSD: denode.h,v 1.25 1997/11/17 15:36:28 ws Exp $	*/
+
+/*-
+ * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
+ * Copyright (C) 1994, 1995, 1997 TooLs GmbH.
+ * All rights reserved.
+ * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by TooLs GmbH.
+ * 4. The name of TooLs GmbH may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/*
+ * Written by Paul Popelka (paulp@uts.amdahl.com)
+ *
+ * You can do anything you want with this software, just don't say you wrote
+ * it, and don't remove this notice.
+ *
+ * This software is provided "as is".
+ *
+ * The author supplies this software to be publicly redistributed on the
+ * understanding that the author is not responsible for the correct
+ * functioning of this software in any circumstances and is not liable for
+ * any damages caused by this software.
+ *
+ * October 1992
+ */
+
+/*
+ * This is the pc filesystem specific portion of the vnode structure.
+ *
+ * To describe a file uniquely the de_dirclust, de_diroffset, and
+ * de_StartCluster fields are used.
+ *
+ * de_dirclust contains the cluster number of the directory cluster
+ *	containing the entry for a file or directory.
+ * de_diroffset is the index into the cluster for the entry describing
+ *	a file or directory.
+ * de_StartCluster is the number of the first cluster of the file or directory.
+ *
+ * Now to describe the quirks of the pc filesystem.
+ * - Clusters 0 and 1 are reserved.
+ * - The first allocatable cluster is 2.
+ * - The root directory is of fixed size and all blocks that make it up
+ *   are contiguous.
+ * - Cluster 0 refers to the root directory when it is found in the
+ *   startcluster field of a directory entry that points to another directory.
+ * - Cluster 0 implies a 0 length file when found in the start cluster field
+ *   of a directory entry that points to a file.
+ * - You can't use the cluster number 0 to derive the address of the root
+ *   directory.
+ * - Multiple directory entries can point to a directory. The entry in the
+ *   parent directory points to a child directory.  Any directories in the
+ *   child directory contain a ".." entry that points back to the parent.
+ *   The child directory itself contains a "." entry that points to itself.
+ * - The root directory does not contain a "." or ".." entry.
+ * - Directory entries for directories are never changed once they are created
+ *   (except when removed).  The size stays 0, and the last modification time
+ *   is never changed.  This is because so many directory entries can point to
+ *   the physical clusters that make up a directory.  It would lead to an
+ *   update nightmare.
+ * - The length field in a directory entry pointing to a directory contains 0
+ *   (always).  The only way to find the end of a directory is to follow the
+ *   cluster chain until the "last cluster" marker is found.
+ *
+ * My extensions to make this house of cards work.  These apply only to the in
+ * memory copy of the directory entry.
+ * - A reference count for each denode will be kept since dos doesn't keep such
+ *   things.
+ */
+
+/*
+ * Internal pseudo-offset for (nonexistent) directory entry for the root
+ * dir in the root dir
+ */
+#define	MSDOSFSROOT_OFS	0x1fffffff
+
+/*
+ * The fat cache structure. fc_fsrcn is the filesystem relative cluster
+ * number that corresponds to the file relative cluster number in this
+ * structure (fc_frcn).
+ */
+struct fatcache {
+	u_long fc_frcn;		/* file relative cluster number */
+	u_long fc_fsrcn;	/* filesystem relative cluster number */
+};
+
+/*
+ * The fat entry cache as it stands helps make extending files a "quick"
+ * operation by avoiding having to scan the fat to discover the last
+ * cluster of the file. The cache also helps sequential reads by
+ * remembering the last cluster read from the file.  This also prevents us
+ * from having to rescan the fat to find the next cluster to read.  This
+ * cache is probably pretty worthless if a file is opened by multiple
+ * processes.
+ */
+#define	FC_SIZE		2	/* number of entries in the cache */
+#define	FC_LASTMAP	0	/* entry the last call to pcbmap() resolved
+				 * to */
+#define	FC_LASTFC	1	/* entry for the last cluster in the file */
+
+#define	FCE_EMPTY	0xffffffff	/* doesn't represent an actual cluster # */
+
+/*
+ * Set a slot in the fat cache.
+ */
+#define	fc_setcache(dep, slot, frcn, fsrcn) \
+	(dep)->de_fc[slot].fc_frcn = frcn; \
+	(dep)->de_fc[slot].fc_fsrcn = fsrcn;
+
+/*
+ * This is the in memory variant of a dos directory entry.  It is usually
+ * contained within a vnode.
+ */
+struct denode {
+	struct lock de_lock;	/* denode lock >Keep this first< */
+	struct denode *de_next;	/* Hash chain forward */
+	struct denode **de_prev; /* Hash chain back */
+	struct vnode *de_vnode;	/* addr of vnode we are part of */
+	struct vnode *de_devvp;	/* vnode of blk dev we live on */
+	u_long de_flag;		/* flag bits */
+	dev_t de_dev;		/* device where direntry lives */
+	u_long de_dirclust;	/* cluster of the directory file containing this entry */
+	u_long de_diroffset;	/* offset of this entry in the directory cluster */
+	u_long de_fndoffset;	/* offset of found dir entry */
+	int de_fndcnt;		/* number of slots before de_fndoffset */
+	long de_refcnt;		/* reference count */
+	struct msdosfsmount *de_pmp;	/* addr of our mount struct */
+	u_char de_Name[12];	/* name, from DOS directory entry */
+	u_char de_Attributes;	/* attributes, from directory entry */
+	u_char de_LowerCase;	/* NT VFAT lower case flags */
+	u_char de_CHun;		/* Hundredth of second of CTime*/
+	u_short de_CTime;	/* creation time */
+	u_short de_CDate;	/* creation date */
+	u_short de_ADate;	/* access date */
+	u_short de_MTime;	/* modification time */
+	u_short de_MDate;	/* modification date */
+	u_long de_StartCluster; /* starting cluster of file */
+	u_long de_FileSize;	/* size of file in bytes */
+	struct fatcache de_fc[FC_SIZE];	/* fat cache */
+	u_quad_t de_modrev;	/* Revision level for lease. */
+};
+
+/*
+ * Values for the de_flag field of the denode.
+ */
+#define	DE_UPDATE	0x0004	/* Modification time update request */
+#define	DE_CREATE	0x0008	/* Creation time update */
+#define	DE_ACCESS	0x0010	/* Access time update */
+#define	DE_MODIFIED	0x0020	/* Denode has been modified */
+#define	DE_RENAME	0x0040	/* Denode is in the process of being renamed */
+
+
+/*
+ * Transfer directory entries between internal and external form.
+ * dep is a struct denode * (internal form),
+ * dp is a struct direntry * (external form).
+ */
+#define DE_INTERNALIZE32(dep, dp)			\
+	 ((dep)->de_StartCluster |= getushort((dp)->deHighClust) << 16)
+#define DE_INTERNALIZE(dep, dp)				\
+	(bcopy((dp)->deName, (dep)->de_Name, 11),	\
+	 (dep)->de_Attributes = (dp)->deAttributes,	\
+	 (dep)->de_LowerCase = (dp)->deLowerCase,	\
+	 (dep)->de_CHun = (dp)->deCHundredth,		\
+	 (dep)->de_CTime = getushort((dp)->deCTime),	\
+	 (dep)->de_CDate = getushort((dp)->deCDate),	\
+	 (dep)->de_ADate = getushort((dp)->deADate),	\
+	 (dep)->de_MTime = getushort((dp)->deMTime),	\
+	 (dep)->de_MDate = getushort((dp)->deMDate),	\
+	 (dep)->de_StartCluster = getushort((dp)->deStartCluster), \
+	 (dep)->de_FileSize = getulong((dp)->deFileSize), \
+	 (FAT32((dep)->de_pmp) ? DE_INTERNALIZE32((dep), (dp)) : 0))
+
+#define DE_EXTERNALIZE(dp, dep)				\
+	(bcopy((dep)->de_Name, (dp)->deName, 11),	\
+	 (dp)->deAttributes = (dep)->de_Attributes,	\
+	 (dp)->deLowerCase = (dep)->de_LowerCase,	\
+	 (dp)->deCHundredth = (dep)->de_CHun,		\
+	 putushort((dp)->deCTime, (dep)->de_CTime),	\
+	 putushort((dp)->deCDate, (dep)->de_CDate),	\
+	 putushort((dp)->deADate, (dep)->de_ADate),	\
+	 putushort((dp)->deMTime, (dep)->de_MTime),	\
+	 putushort((dp)->deMDate, (dep)->de_MDate),	\
+	 putushort((dp)->deStartCluster, (dep)->de_StartCluster), \
+	 putulong((dp)->deFileSize,			\
+	     ((dep)->de_Attributes & ATTR_DIRECTORY) ? 0 : (dep)->de_FileSize), \
+	 putushort((dp)->deHighClust, (dep)->de_StartCluster >> 16))
+
+#define	de_forw		de_chain[0]
+#define	de_back		de_chain[1]
+
+#ifdef KERNEL
+
+#define	VTODE(vp)	((struct denode *)(vp)->v_data)
+#define	DETOV(de)	((de)->de_vnode)
+
+#define	DETIMES(dep, acc, mod, cre) do {				\
+	if ((dep)->de_flag & DE_UPDATE) { 				\
+		(dep)->de_flag |= DE_MODIFIED;				\
+		unix2dostime((mod), &(dep)->de_MDate, &(dep)->de_MTime,	\
+		    NULL);						\
+		(dep)->de_Attributes |= ATTR_ARCHIVE; 			\
+	}								\
+	if ((dep)->de_pmp->pm_flags & MSDOSFSMNT_NOWIN95) {		\
+		(dep)->de_flag &= ~(DE_UPDATE | DE_CREATE | DE_ACCESS);	\
+		break;							\
+	}								\
+	if ((dep)->de_flag & DE_ACCESS) {				\
+	    	u_int16_t adate;					\
+									\
+		unix2dostime((acc), &adate, NULL, NULL);		\
+		if (adate != (dep)->de_ADate) {				\
+			(dep)->de_flag |= DE_MODIFIED;			\
+			(dep)->de_ADate = adate;			\
+		}							\
+	}								\
+	if ((dep)->de_flag & DE_CREATE) {				\
+		unix2dostime((cre), &(dep)->de_CDate, &(dep)->de_CTime,	\
+		    &(dep)->de_CHun);					\
+		    (dep)->de_flag |= DE_MODIFIED;			\
+	}								\
+	(dep)->de_flag &= ~(DE_UPDATE | DE_CREATE | DE_ACCESS);		\
+} while (0);
+
+/*
+ * This overlays the fid structure (see mount.h)
+ */
+struct defid {
+	u_short defid_len;	/* length of structure */
+	u_short defid_pad;	/* force long alignment */
+
+	u_long defid_dirclust;	/* cluster this dir entry came from */
+	u_long defid_dirofs;	/* offset of entry within the cluster */
+#if 0
+	u_long	defid_gen;	/* generation number */
+#endif
+};
+
+extern vop_t **msdosfs_vnodeop_p;
+
+int msdosfs_lookup __P((struct vop_cachedlookup_args *));
+int msdosfs_inactive __P((struct vop_inactive_args *));
+int msdosfs_reclaim __P((struct vop_reclaim_args *));
+
+/*
+ * Internal service routine prototypes.
+ */
+int deget __P((struct msdosfsmount *, u_long, u_long, struct denode **));
+int uniqdosname __P((struct denode *, struct componentname *, u_char *));
+int findwin95 __P((struct denode *));
+
+int readep __P((struct msdosfsmount *pmp, u_long dirclu, u_long dirofs,  struct buf **bpp, struct direntry **epp));
+int readde __P((struct denode *dep, struct buf **bpp, struct direntry **epp));
+int deextend __P((struct denode *dep, u_long length, struct ucred *cred));
+int fillinusemap __P((struct msdosfsmount *pmp));
+void reinsert __P((struct denode *dep));
+int dosdirempty __P((struct denode *dep));
+int createde __P((struct denode *dep, struct denode *ddep, struct denode **depp, struct componentname *cnp));
+int deupdat __P((struct denode *dep, int waitfor));
+int removede __P((struct denode *pdep, struct denode *dep));
+int detrunc __P((struct denode *dep, u_long length, int flags, struct ucred *cred, struct proc *p));
+int doscheckpath __P(( struct denode *source, struct denode *target));
+#endif	/* KERNEL */
diff --git a/sys/fs/msdosfs/direntry.h b/sys/fs/msdosfs/direntry.h
new file mode 100644
index 0000000..796fe78
--- /dev/null
+++ b/sys/fs/msdosfs/direntry.h
@@ -0,0 +1,143 @@
+/*	$Id: direntry.h,v 1.12 1998/02/26 06:45:42 msmith Exp $ */
+/*	$NetBSD: direntry.h,v 1.14 1997/11/17 15:36:32 ws Exp $	*/
+
+/*-
+ * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
+ * Copyright (C) 1994, 1995, 1997 TooLs GmbH.
+ * All rights reserved.
+ * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by TooLs GmbH.
+ * 4. The name of TooLs GmbH may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/*
+ * Written by Paul Popelka (paulp@uts.amdahl.com)
+ *
+ * You can do anything you want with this software, just don't say you wrote
+ * it, and don't remove this notice.
+ *
+ * This software is provided "as is".
+ *
+ * The author supplies this software to be publicly redistributed on the
+ * understanding that the author is not responsible for the correct
+ * functioning of this software in any circumstances and is not liable for
+ * any damages caused by this software.
+ *
+ * October 1992
+ */
+
+/*
+ * Structure of a dos directory entry.
+ */
+struct direntry {
+	u_int8_t	deName[8];	/* filename, blank filled */
+#define	SLOT_EMPTY	0x00		/* slot has never been used */
+#define	SLOT_E5		0x05		/* the real value is 0xe5 */
+#define	SLOT_DELETED	0xe5		/* file in this slot deleted */
+	u_int8_t	deExtension[3];	/* extension, blank filled */
+	u_int8_t	deAttributes;	/* file attributes */
+#define	ATTR_NORMAL	0x00		/* normal file */
+#define	ATTR_READONLY	0x01		/* file is readonly */
+#define	ATTR_HIDDEN	0x02		/* file is hidden */
+#define	ATTR_SYSTEM	0x04		/* file is a system file */
+#define	ATTR_VOLUME	0x08		/* entry is a volume label */
+#define	ATTR_DIRECTORY	0x10		/* entry is a directory name */
+#define	ATTR_ARCHIVE	0x20		/* file is new or modified */
+	u_int8_t	deLowerCase;	/* NT VFAT lower case flags */
+#define	LCASE_BASE	0x08		/* filename base in lower case */
+#define	LCASE_EXT	0x10		/* filename extension in lower case */
+	u_int8_t	deCHundredth;	/* hundredth of seconds in CTime */
+	u_int8_t	deCTime[2];	/* create time */
+	u_int8_t	deCDate[2];	/* create date */
+	u_int8_t	deADate[2];	/* access date */
+	u_int8_t	deHighClust[2];	/* high bytes of cluster number */
+	u_int8_t	deMTime[2];	/* last update time */
+	u_int8_t	deMDate[2];	/* last update date */
+	u_int8_t	deStartCluster[2]; /* starting cluster of file */
+	u_int8_t	deFileSize[4];	/* size of file in bytes */
+};
+
+/*
+ * Structure of a Win95 long name directory entry
+ */
+struct winentry {
+	u_int8_t	weCnt;
+#define	WIN_LAST	0x40
+#define	WIN_CNT		0x3f
+	u_int8_t	wePart1[10];
+	u_int8_t	weAttributes;
+#define	ATTR_WIN95	0x0f
+	u_int8_t	weReserved1;
+	u_int8_t	weChksum;
+	u_int8_t	wePart2[12];
+	u_int16_t	weReserved2;
+	u_int8_t	wePart3[4];
+};
+#define	WIN_CHARS	13	/* Number of chars per winentry */
+
+/*
+ * Maximum filename length in Win95
+ * Note: Must be < sizeof(dirent.d_name)
+ */
+#define	WIN_MAXLEN	255
+
+/*
+ * This is the format of the contents of the deTime field in the direntry
+ * structure.
+ * We don't use bitfields because we don't know how compilers for
+ * arbitrary machines will lay them out.
+ */
+#define DT_2SECONDS_MASK	0x1F	/* seconds divided by 2 */
+#define DT_2SECONDS_SHIFT	0
+#define DT_MINUTES_MASK		0x7E0	/* minutes */
+#define DT_MINUTES_SHIFT	5
+#define DT_HOURS_MASK		0xF800	/* hours */
+#define DT_HOURS_SHIFT		11
+
+/*
+ * This is the format of the contents of the deDate field in the direntry
+ * structure.
+ */
+#define DD_DAY_MASK		0x1F	/* day of month */
+#define DD_DAY_SHIFT		0
+#define DD_MONTH_MASK		0x1E0	/* month */
+#define DD_MONTH_SHIFT		5
+#define DD_YEAR_MASK		0xFE00	/* year - 1980 */
+#define DD_YEAR_SHIFT		9
+
+#ifdef KERNEL
+struct dirent;
+void unix2dostime __P((struct timespec *tsp, u_int16_t *ddp, 
+	     u_int16_t *dtp, u_int8_t *dhp));
+void dos2unixtime __P((u_int dd, u_int dt, u_int dh, struct timespec *tsp));
+int dos2unixfn __P((u_char dn[11], u_char *un, int lower, int d2u_loaded, u_int8_t *d2u, int ul_loaded, u_int8_t *ul));
+int unix2dosfn __P((const u_char *un, u_char dn[12], int unlen, u_int gen, int u2d_loaded, u_int8_t *u2d, int lu_loaded, u_int8_t *lu));
+int unix2winfn __P((const u_char *un, int unlen, struct winentry *wep, int cnt, int chksum, int table_loaded, u_int16_t *u2w));
+int winChkName __P((const u_char *un, int unlen, struct winentry *wep, int chksum, int u2w_loaded, u_int16_t *u2w, int ul_loaded, u_int8_t *ul));
+int win2unixfn __P((struct winentry *wep, struct dirent *dp, int chksum, int table_loaded, u_int16_t *u2w));
+u_int8_t winChksum __P((u_int8_t *name));
+int winSlotCnt __P((const u_char *un, int unlen));
+int winLenFixup __P((const u_char *un, int unlen));
+#endif	/* KERNEL */
diff --git a/sys/fs/msdosfs/fat.h b/sys/fs/msdosfs/fat.h
new file mode 100644
index 0000000..74b05e2
--- /dev/null
+++ b/sys/fs/msdosfs/fat.h
@@ -0,0 +1,108 @@
+/*	$Id: fat.h,v 1.6 1997/02/22 09:40:45 peter Exp $ */
+/*	$NetBSD: fat.h,v 1.12 1997/11/17 15:36:36 ws Exp $	*/
+
+/*-
+ * Copyright (C) 1994, 1997 Wolfgang Solfrank.
+ * Copyright (C) 1994, 1997 TooLs GmbH.
+ * All rights reserved.
+ * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by TooLs GmbH.
+ * 4. The name of TooLs GmbH may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/*
+ * Written by Paul Popelka (paulp@uts.amdahl.com)
+ *
+ * You can do anything you want with this software, just don't say you wrote
+ * it, and don't remove this notice.
+ *
+ * This software is provided "as is".
+ *
+ * The author supplies this software to be publicly redistributed on the
+ * understanding that the author is not responsible for the correct
+ * functioning of this software in any circumstances and is not liable for
+ * any damages caused by this software.
+ *
+ * October 1992
+ */
+
+/*
+ * Some useful cluster numbers.
+ */
+#define	MSDOSFSROOT	0		/* cluster 0 means the root dir */
+#define	CLUST_FREE	0		/* cluster 0 also means a free cluster */
+#define	MSDOSFSFREE	CLUST_FREE
+#define	CLUST_FIRST	2		/* first legal cluster number */
+#define	CLUST_RSRVD	0xfffffff6	/* reserved cluster range */
+#define	CLUST_BAD	0xfffffff7	/* a cluster with a defect */
+#define	CLUST_EOFS	0xfffffff8	/* start of eof cluster range */
+#define	CLUST_EOFE	0xffffffff	/* end of eof cluster range */
+
+#define	FAT12_MASK	0x00000fff	/* mask for 12 bit cluster numbers */
+#define	FAT16_MASK	0x0000ffff	/* mask for 16 bit cluster numbers */
+#define	FAT32_MASK	0x0fffffff	/* mask for FAT32 cluster numbers */
+
+/*
+ * MSDOSFS:
+ * Return true if filesystem uses 12 bit fats. Microsoft Programmer's
+ * Reference says if the maximum cluster number in a filesystem is greater
+ * than 4078 ((CLUST_RSRVS - CLUST_FIRST) & FAT12_MASK) then we've got a
+ * 16 bit fat filesystem. While mounting, the result of this test is stored
+ * in pm_fatentrysize.
+ * GEMDOS-flavour (atari):
+ * If the filesystem is on floppy we've got a 12 bit fat filesystem, otherwise
+ * 16 bit. We check the d_type field in the disklabel struct while mounting
+ * and store the result in the pm_fatentrysize. Note that this kind of
+ * detection gets flakey when mounting a vnd-device.
+ */
+#define	FAT12(pmp)	(pmp->pm_fatmask == FAT12_MASK)
+#define	FAT16(pmp)	(pmp->pm_fatmask == FAT16_MASK)
+#define	FAT32(pmp)	(pmp->pm_fatmask == FAT32_MASK)
+
+#define	MSDOSFSEOF(pmp, cn)	((((cn) | ~(pmp)->pm_fatmask) & CLUST_EOFS) == CLUST_EOFS)
+
+#ifdef KERNEL
+/*
+ * These are the values for the function argument to the function
+ * fatentry().
+ */
+#define	FAT_GET		0x0001	/* get a fat entry */
+#define	FAT_SET		0x0002	/* set a fat entry */
+#define	FAT_GET_AND_SET	(FAT_GET | FAT_SET)
+
+/*
+ * Flags to extendfile:
+ */
+#define	DE_CLEAR	1	/* Zero out the blocks allocated */
+
+int pcbmap __P((struct denode *dep, u_long findcn, daddr_t *bnp, u_long *cnp, int* sp));
+int clusterfree __P((struct msdosfsmount *pmp, u_long cn, u_long *oldcnp));
+int clusteralloc __P((struct msdosfsmount *pmp, u_long start, u_long count, u_long fillwith, u_long *retcluster, u_long *got));
+int fatentry __P((int function, struct msdosfsmount *pmp, u_long cluster, u_long *oldcontents, u_long newcontents));
+int freeclusterchain __P((struct msdosfsmount *pmp, u_long startchain));
+int extendfile __P((struct denode *dep, u_long count, struct buf **bpp, u_long *ncp, int flags));
+void fc_purge __P((struct denode *dep, u_int frcn));
+
+#endif	/* KERNEL */
diff --git a/sys/fs/msdosfs/msdosfs_conv.c b/sys/fs/msdosfs/msdosfs_conv.c
new file mode 100644
index 0000000..2c792eb
--- /dev/null
+++ b/sys/fs/msdosfs/msdosfs_conv.c
@@ -0,0 +1,1041 @@
+/*	$Id: msdosfs_conv.c,v 1.27 1998/05/17 21:18:08 dt Exp $ */
+/*	$NetBSD: msdosfs_conv.c,v 1.25 1997/11/17 15:36:40 ws Exp $	*/
+
+/*-
+ * Copyright (C) 1995, 1997 Wolfgang Solfrank.
+ * Copyright (C) 1995, 1997 TooLs GmbH.
+ * All rights reserved.
+ * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by TooLs GmbH.
+ * 4. The name of TooLs GmbH may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/*
+ * Written by Paul Popelka (paulp@uts.amdahl.com)
+ *
+ * You can do anything you want with this software, just don't say you wrote
+ * it, and don't remove this notice.
+ *
+ * This software is provided "as is".
+ *
+ * The author supplies this software to be publicly redistributed on the
+ * understanding that the author is not responsible for the correct
+ * functioning of this software in any circumstances and is not liable for
+ * any damages caused by this software.
+ *
+ * October 1992
+ */
+
+/*
+ * System include files.
+ */
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/kernel.h>		/* defines tz */
+#include <sys/systm.h>
+#include <machine/clock.h>
+#include <sys/dirent.h>
+
+/*
+ * MSDOSFS include files.
+ */
+#include <msdosfs/direntry.h>
+
+/*
+ * Total number of days that have passed for each month in a regular year.
+ */
+static u_short regyear[] = {
+	31, 59, 90, 120, 151, 181,
+	212, 243, 273, 304, 334, 365
+};
+
+/*
+ * Total number of days that have passed for each month in a leap year.
+ */
+static u_short leapyear[] = {
+	31, 60, 91, 121, 152, 182,
+	213, 244, 274, 305, 335, 366
+};
+
+/*
+ * Variables used to remember parts of the last time conversion.  Maybe we
+ * can avoid a full conversion.
+ */
+static u_long  lasttime;
+static u_long  lastday;
+static u_short lastddate;
+static u_short lastdtime;
+
+static __inline u_int8_t find_lcode __P((u_int16_t code, u_int16_t *u2w));
+
+/*
+ * Convert the unix version of time to dos's idea of time to be used in
+ * file timestamps. The passed in unix time is assumed to be in GMT.
+ */
+void
+unix2dostime(tsp, ddp, dtp, dhp)
+	struct timespec *tsp;
+	u_int16_t *ddp;
+	u_int16_t *dtp;
+	u_int8_t *dhp;
+{
+	u_long t;
+	u_long days;
+	u_long inc;
+	u_long year;
+	u_long month;
+	u_short *months;
+
+	/*
+	 * If the time from the last conversion is the same as now, then
+	 * skip the computations and use the saved result.
+	 */
+	t = tsp->tv_sec - (tz.tz_minuteswest * 60)
+	    - (wall_cmos_clock ? adjkerntz : 0);
+	    /* - daylight savings time correction */
+	t &= ~1;
+	if (lasttime != t) {
+		lasttime = t;
+		lastdtime = (((t / 2) % 30) << DT_2SECONDS_SHIFT)
+		    + (((t / 60) % 60) << DT_MINUTES_SHIFT)
+		    + (((t / 3600) % 24) << DT_HOURS_SHIFT);
+
+		/*
+		 * If the number of days since 1970 is the same as the last
+		 * time we did the computation then skip all this leap year
+		 * and month stuff.
+		 */
+		days = t / (24 * 60 * 60);
+		if (days != lastday) {
+			lastday = days;
+			for (year = 1970;; year++) {
+				inc = year & 0x03 ? 365 : 366;
+				if (days < inc)
+					break;
+				days -= inc;
+			}
+			months = year & 0x03 ? regyear : leapyear;
+			for (month = 0; days >= months[month]; month++)
+				;
+			if (month > 0)
+				days -= months[month - 1];
+			lastddate = ((days + 1) << DD_DAY_SHIFT)
+			    + ((month + 1) << DD_MONTH_SHIFT);
+			/*
+			 * Remember dos's idea of time is relative to 1980.
+			 * unix's is relative to 1970.  If somehow we get a
+			 * time before 1980 then don't give totally crazy
+			 * results.
+			 */
+			if (year > 1980)
+				lastddate += (year - 1980) << DD_YEAR_SHIFT;
+		}
+	}
+	if (dtp)
+		*dtp = lastdtime;
+	if (dhp)
+		*dhp = (tsp->tv_sec & 1) * 100 + tsp->tv_nsec / 10000000;
+
+	*ddp = lastddate;
+}
+
+/*
+ * The number of seconds between Jan 1, 1970 and Jan 1, 1980. In that
+ * interval there were 8 regular years and 2 leap years.
+ */
+#define	SECONDSTO1980	(((8 * 365) + (2 * 366)) * (24 * 60 * 60))
+
+static u_short lastdosdate;
+static u_long  lastseconds;
+
+/*
+ * Convert from dos' idea of time to unix'. This will probably only be
+ * called from the stat(), and fstat() system calls and so probably need
+ * not be too efficient.
+ */
+void
+dos2unixtime(dd, dt, dh, tsp)
+	u_int dd;
+	u_int dt;
+	u_int dh;
+	struct timespec *tsp;
+{
+	u_long seconds;
+	u_long month;
+	u_long year;
+	u_long days;
+	u_short *months;
+
+	if (dd == 0) {
+		/*
+		 * Uninitialized field, return the epoch.
+		 */
+		tsp->tv_sec = 0;
+		tsp->tv_nsec = 0;
+		return;
+	}
+	seconds = (((dt & DT_2SECONDS_MASK) >> DT_2SECONDS_SHIFT) << 1)
+	    + ((dt & DT_MINUTES_MASK) >> DT_MINUTES_SHIFT) * 60
+	    + ((dt & DT_HOURS_MASK) >> DT_HOURS_SHIFT) * 3600
+	    + dh / 100;
+	/*
+	 * If the year, month, and day from the last conversion are the
+	 * same then use the saved value.
+	 */
+	if (lastdosdate != dd) {
+		lastdosdate = dd;
+		days = 0;
+		year = (dd & DD_YEAR_MASK) >> DD_YEAR_SHIFT;
+		days = year * 365;
+		days += year / 4 + 1;	/* add in leap days */
+		if ((year & 0x03) == 0)
+			days--;		/* if year is a leap year */
+		months = year & 0x03 ? regyear : leapyear;
+		month = (dd & DD_MONTH_MASK) >> DD_MONTH_SHIFT;
+		if (month < 1 || month > 12) {
+			printf("dos2unixtime(): month value out of range (%ld)\n",
+			    month);
+			month = 1;
+		}
+		if (month > 1)
+			days += months[month - 2];
+		days += ((dd & DD_DAY_MASK) >> DD_DAY_SHIFT) - 1;
+		lastseconds = (days * 24 * 60 * 60) + SECONDSTO1980;
+	}
+	tsp->tv_sec = seconds + lastseconds + (tz.tz_minuteswest * 60)
+	     + adjkerntz;
+	     /* + daylight savings time correction */
+	tsp->tv_nsec = (dh % 100) * 10000000;
+}
+
+/*
+ * 0 - character disallowed in long file name.
+ * 1 - character should be replaced by '_' in DOS file name, 
+ *     and generation number inserted.
+ * 2 - character ('.' and ' ') should be skipped in DOS file name,
+ *     and generation number inserted.
+ */
+static u_char
+unix2dos[256] = {
+	0,    0,    0,    0,    0,    0,    0,    0,	/* 00-07 */
+	0,    0,    0,    0,    0,    0,    0,    0,	/* 08-0f */
+	0,    0,    0,    0,    0,    0,    0,    0,	/* 10-17 */
+	0,    0,    0,    0,    0,    0,    0,    0,	/* 18-1f */
+	2,    0x21, 0,    0x23, 0x24, 0x25, 0x26, 0x27,	/* 20-27 */
+	0x28, 0x29, 0,    1,    1,    0x2d, 2,    0,	/* 28-2f */
+	0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,	/* 30-37 */
+	0x38, 0x39, 0,    1,    0,    1,    0,    0,	/* 38-3f */
+	0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,	/* 40-47 */
+	0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,	/* 48-4f */
+	0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,	/* 50-57 */
+	0x58, 0x59, 0x5a, 1,    0,    1,    0x5e, 0x5f,	/* 58-5f */
+	0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,	/* 60-67 */
+	0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,	/* 68-6f */
+	0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,	/* 70-77 */
+	0x58, 0x59, 0x5a, 0x7b, 0,    0x7d, 0x7e, 0,	/* 78-7f */
+	0,    0,    0,    0,    0,    0,    0,    0,	/* 80-87 */
+	0,    0,    0,    0,    0,    0,    0,    0,	/* 88-8f */
+	0,    0,    0,    0,    0,    0,    0,    0,	/* 90-97 */
+	0,    0,    0,    0,    0,    0,    0,    0,	/* 98-9f */
+	0,    0xad, 0xbd, 0x9c, 0xcf, 0xbe, 0xdd, 0xf5,	/* a0-a7 */
+	0xf9, 0xb8, 0xa6, 0xae, 0xaa, 0xf0, 0xa9, 0xee,	/* a8-af */
+	0xf8, 0xf1, 0xfd, 0xfc, 0xef, 0xe6, 0xf4, 0xfa,	/* b0-b7 */
+	0xf7, 0xfb, 0xa7, 0xaf, 0xac, 0xab, 0xf3, 0xa8,	/* b8-bf */
+	0xb7, 0xb5, 0xb6, 0xc7, 0x8e, 0x8f, 0x92, 0x80,	/* c0-c7 */
+	0xd4, 0x90, 0xd2, 0xd3, 0xde, 0xd6, 0xd7, 0xd8,	/* c8-cf */
+	0xd1, 0xa5, 0xe3, 0xe0, 0xe2, 0xe5, 0x99, 0x9e,	/* d0-d7 */
+	0x9d, 0xeb, 0xe9, 0xea, 0x9a, 0xed, 0xe8, 0xe1,	/* d8-df */
+	0xb7, 0xb5, 0xb6, 0xc7, 0x8e, 0x8f, 0x92, 0x80,	/* e0-e7 */
+	0xd4, 0x90, 0xd2, 0xd3, 0xde, 0xd6, 0xd7, 0xd8,	/* e8-ef */
+	0xd1, 0xa5, 0xe3, 0xe0, 0xe2, 0xe5, 0x99, 0xf6,	/* f0-f7 */
+	0x9d, 0xeb, 0xe9, 0xea, 0x9a, 0xed, 0xe8, 0x98,	/* f8-ff */
+};
+
+static u_char
+dos2unix[256] = {
+	0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f,	/* 00-07 */
+	0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f,	/* 08-0f */
+	0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f,	/* 10-17 */
+	0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f,	/* 18-1f */
+	0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,	/* 20-27 */
+	0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,	/* 28-2f */
+	0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,	/* 30-37 */
+	0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,	/* 38-3f */
+	0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,	/* 40-47 */
+	0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f,	/* 48-4f */
+	0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,	/* 50-57 */
+	0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,	/* 58-5f */
+	0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,	/* 60-67 */
+	0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,	/* 68-6f */
+	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,	/* 70-77 */
+	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,	/* 78-7f */
+	0xc7, 0xfc, 0xe9, 0xe2, 0xe4, 0xe0, 0xe5, 0xe7,	/* 80-87 */
+	0xea, 0xeb, 0xe8, 0xef, 0xee, 0xec, 0xc4, 0xc5,	/* 88-8f */
+	0xc9, 0xe6, 0xc6, 0xf4, 0xf6, 0xf2, 0xfb, 0xf9,	/* 90-97 */
+	0xff, 0xd6, 0xdc, 0xf8, 0xa3, 0xd8, 0xd7, 0x3f,	/* 98-9f */
+	0xe1, 0xed, 0xf3, 0xfa, 0xf1, 0xd1, 0xaa, 0xba,	/* a0-a7 */
+	0xbf, 0xae, 0xac, 0xbd, 0xbc, 0xa1, 0xab, 0xbb,	/* a8-af */
+	0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0xc1, 0xc2, 0xc0,	/* b0-b7 */
+	0xa9, 0x3f, 0x3f, 0x3f, 0x3f, 0xa2, 0xa5, 0x3f,	/* b8-bf */
+	0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0xe3, 0xc3,	/* c0-c7 */
+	0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0xa4,	/* c8-cf */
+	0xf0, 0xd0, 0xca, 0xcb, 0xc8, 0x3f, 0xcd, 0xce,	/* d0-d7 */
+	0xcf, 0x3f, 0x3f, 0x3f, 0x3f, 0xa6, 0xcc, 0x3f,	/* d8-df */
+	0xd3, 0xdf, 0xd4, 0xd2, 0xf5, 0xd5, 0xb5, 0xfe,	/* e0-e7 */
+	0xde, 0xda, 0xdb, 0xd9, 0xfd, 0xdd, 0xaf, 0x3f,	/* e8-ef */
+	0xad, 0xb1, 0x3f, 0xbe, 0xb6, 0xa7, 0xf7, 0xb8,	/* f0-f7 */
+	0xb0, 0xa8, 0xb7, 0xb9, 0xb3, 0xb2, 0x3f, 0x3f,	/* f8-ff */
+};
+
+static u_char
+u2l[256] = {
+	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 00-07 */
+	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 08-0f */
+	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 10-17 */
+	0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 18-1f */
+	0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 20-27 */
+	0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 28-2f */
+	0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 30-37 */
+	0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 38-3f */
+	0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 40-47 */
+	0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 48-4f */
+	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 50-57 */
+	0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 58-5f */
+	0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 60-67 */
+	0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 68-6f */
+	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 70-77 */
+	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 78-7f */
+	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 80-87 */
+	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 88-8f */
+	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 90-97 */
+	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 98-9f */
+	0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* a0-a7 */
+	0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* a8-af */
+	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* b0-b7 */
+	0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* b8-bf */
+	0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* c0-c7 */
+	0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* c8-cf */
+	0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xd7, /* d0-d7 */
+	0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xdf, /* d8-df */
+	0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* e0-e7 */
+	0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* e8-ef */
+	0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* f0-f7 */
+	0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* f8-ff */
+};
+
+static u_char
+l2u[256] = {
+	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 00-07 */
+	0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 08-0f */
+	0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 10-17 */
+	0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 18-1f */
+	0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 20-27 */
+	0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 28-2f */
+	0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 30-37 */
+	0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 38-3f */
+	0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 40-47 */
+	0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 48-4f */
+	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 50-57 */
+	0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 58-5f */
+	0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 60-67 */
+	0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 68-6f */
+	0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 70-77 */
+	0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 78-7f */
+	0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 80-87 */
+	0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 88-8f */
+	0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 90-97 */
+	0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 98-9f */
+	0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* a0-a7 */
+	0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* a8-af */
+	0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* b0-b7 */
+	0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* b8-bf */
+	0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* c0-c7 */
+	0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* c8-cf */
+	0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xd7, /* d0-d7 */
+	0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xdf, /* d8-df */
+	0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* e0-e7 */
+	0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* e8-ef */
+	0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* f0-f7 */
+	0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* f8-ff */
+};
+
+/*
+ * DOS filenames are made of 2 parts, the name part and the extension part.
+ * The name part is 8 characters long and the extension part is 3
+ * characters long.  They may contain trailing blanks if the name or
+ * extension are not long enough to fill their respective fields.
+ */
+
+/*
+ * Convert a DOS filename to a unix filename. And, return the number of
+ * characters in the resulting unix filename excluding the terminating
+ * null.
+ */
+int
+dos2unixfn(dn, un, lower, d2u_loaded, d2u, ul_loaded, ul)
+	u_char dn[11];
+	u_char *un;
+	int lower;
+	int d2u_loaded;
+	u_int8_t *d2u;
+	int ul_loaded;
+	u_int8_t *ul;
+{
+	int i;
+	int thislong = 1;
+	u_char c;
+
+	/*
+	 * If first char of the filename is SLOT_E5 (0x05), then the real
+	 * first char of the filename should be 0xe5. But, they couldn't
+	 * just have a 0xe5 mean 0xe5 because that is used to mean a freed
+	 * directory slot. Another dos quirk.
+	 */
+	if (*dn == SLOT_E5)
+		c = d2u_loaded ? d2u[0xe5 & 0x7f] : dos2unix[0xe5];
+	else
+		c = d2u_loaded && (*dn & 0x80) ? d2u[*dn & 0x7f] :
+		    dos2unix[*dn];
+	*un++ = (lower & LCASE_BASE) ? (ul_loaded && (c & 0x80) ?
+			 ul[c & 0x7f] : u2l[c]) : c;
+	dn++;
+
+	/*
+	 * Copy the name portion into the unix filename string.
+	 */
+	for (i = 1; i < 8 && *dn != ' '; i++) {
+		c = d2u_loaded && (*dn & 0x80) ? d2u[*dn & 0x7f] :
+		    dos2unix[*dn];
+		dn++;
+		*un++ = (lower & LCASE_BASE) ? (ul_loaded && (c & 0x80) ?
+				 ul[c & 0x7f] : u2l[c]) : c;
+		thislong++;
+	}
+	dn += 8 - i;
+
+	/*
+	 * Now, if there is an extension then put in a period and copy in
+	 * the extension.
+	 */
+	if (*dn != ' ') {
+		*un++ = '.';
+		thislong++;
+		for (i = 0; i < 3 && *dn != ' '; i++) {
+			c = d2u_loaded && (*dn & 0x80) ? d2u[*dn & 0x7f] :
+			    dos2unix[*dn];
+			dn++;
+			*un++ = (lower & LCASE_EXT) ? (ul_loaded && (c & 0x80) ?
+					 ul[c & 0x7f] : u2l[c]) : c;
+			thislong++;
+		}
+	}
+	*un++ = 0;
+
+	return (thislong);
+}
+
+/*
+ * Convert a unix filename to a DOS filename according to Win95 rules.
+ * If applicable and gen is not 0, it is inserted into the converted
+ * filename as a generation number.
+ * Returns
+ *	0 if name couldn't be converted
+ *	1 if the converted name is the same as the original
+ *	  (no long filename entry necessary for Win95)
+ *	2 if conversion was successful
+ *	3 if conversion was successful and generation number was inserted
+ */
+int
+unix2dosfn(un, dn, unlen, gen, u2d_loaded, u2d, lu_loaded, lu)
+	const u_char *un;
+	u_char dn[12];
+	int unlen;
+	u_int gen;
+	int u2d_loaded;
+	u_int8_t *u2d;
+	int lu_loaded;
+	u_int8_t *lu;
+{
+	int i, j, l;
+	int conv = 1;
+	const u_char *cp, *dp, *dp1;
+	u_char gentext[6], *wcp;
+	u_int8_t c;
+#define U2D(c) (u2d_loaded && ((c) & 0x80) ? u2d[(c) & 0x7f] : unix2dos[c])
+
+	/*
+	 * Fill the dos filename string with blanks. These are DOS's pad
+	 * characters.
+	 */
+	for (i = 0; i < 11; i++)
+		dn[i] = ' ';
+	dn[11] = 0;
+
+	/*
+	 * The filenames "." and ".." are handled specially, since they
+	 * don't follow dos filename rules.
+	 */
+	if (un[0] == '.' && unlen == 1) {
+		dn[0] = '.';
+		return gen <= 1;
+	}
+	if (un[0] == '.' && un[1] == '.' && unlen == 2) {
+		dn[0] = '.';
+		dn[1] = '.';
+		return gen <= 1;
+	}
+
+	/*
+	 * Filenames with only blanks and dots are not allowed!
+	 */
+	for (cp = un, i = unlen; --i >= 0; cp++)
+		if (*cp != ' ' && *cp != '.')
+			break;
+	if (i < 0)
+		return 0;
+
+
+	/*
+	 * Filenames with some characters are not allowed!
+	 */
+	for (cp = un, i = unlen; --i >= 0; cp++)
+		if (U2D(*cp) == 0)
+			return 0;
+
+	/*
+	 * Now find the extension
+	 * Note: dot as first char doesn't start extension
+	 *	 and trailing dots and blanks are ignored
+	 */
+	dp = dp1 = 0;
+	for (cp = un + 1, i = unlen - 1; --i >= 0;) {
+		switch (*cp++) {
+		case '.':
+			if (!dp1)
+				dp1 = cp;
+			break;
+		case ' ':
+			break;
+		default:
+			if (dp1)
+				dp = dp1;
+			dp1 = 0;
+			break;
+		}
+	}
+
+	/*
+	 * Now convert it
+	 */
+	if (dp) {
+		if (dp1)
+			l = dp1 - dp;
+		else
+			l = unlen - (dp - un);
+		for (i = 0, j = 8; i < l && j < 11; i++, j++) {
+			c = dp[i];
+			c = lu_loaded && (c & 0x80) ?
+			    lu[c & 0x7f] : l2u[c];
+			c = U2D(c);
+			if (dp[i] != (dn[j] = c)
+			    && conv != 3)
+				conv = 2;
+			if (dn[j] == 1) {
+				conv = 3;
+				dn[j] = '_';
+			}
+			if (dn[j] == 2) {
+				conv = 3;
+				dn[j--] = ' ';
+			}
+		}
+		if (i < l)
+			conv = 3;
+		dp--;
+	} else {
+		for (dp = cp; *--dp == ' ' || *dp == '.';);
+		dp++;
+	}
+
+	/*
+	 * Now convert the rest of the name
+	 */
+	for (i = j = 0; un < dp && j < 8; i++, j++, un++) {
+		c = lu_loaded && (*un & 0x80) ?
+		    lu[*un & 0x7f] : l2u[*un];
+		c = U2D(c);
+		if (*un != (dn[j] = c)
+		    && conv != 3)
+			conv = 2;
+		if (dn[j] == 1) {
+			conv = 3;
+			dn[j] = '_';
+		}
+		if (dn[j] == 2) {
+			conv = 3;
+			dn[j--] = ' ';
+		}
+	}
+	if (un < dp)
+		conv = 3;
+	/*
+	 * If we didn't have any chars in filename,
+	 * generate a default
+	 */
+	if (!j)
+		dn[0] = '_';
+
+	/*
+	 * The first character cannot be E5,
+	 * because that means a deleted entry
+	 */
+	if (dn[0] == 0xe5)
+		dn[0] = SLOT_E5;
+
+	/*
+	 * If there wasn't any char dropped,
+	 * there is no place for generation numbers
+	 */
+	if (conv != 3) {
+		if (gen > 1)
+			return 0;
+		return conv;
+	}
+
+	/*
+	 * Now insert the generation number into the filename part
+	 */
+	if (gen == 0)
+		return conv;
+	for (wcp = gentext + sizeof(gentext); wcp > gentext && gen; gen /= 10)
+		*--wcp = gen % 10 + '0';
+	if (gen)
+		return 0;
+	for (i = 8; dn[--i] == ' ';);
+	i++;
+	if (gentext + sizeof(gentext) - wcp + 1 > 8 - i)
+		i = 8 - (gentext + sizeof(gentext) - wcp + 1);
+	dn[i++] = '~';
+	while (wcp < gentext + sizeof(gentext))
+		dn[i++] = *wcp++;
+	return 3;
+#undef U2D
+}
+
+/*
+ * Create a Win95 long name directory entry
+ * Note: assumes that the filename is valid,
+ *	 i.e. doesn't consist solely of blanks and dots
+ */
+int
+unix2winfn(un, unlen, wep, cnt, chksum, table_loaded, u2w)
+	const u_char *un;
+	int unlen;
+	struct winentry *wep;
+	int cnt;
+	int chksum;
+	int table_loaded;
+	u_int16_t *u2w;
+{
+	const u_int8_t *cp;
+	u_int8_t *wcp;
+	int i;
+	u_int16_t code;
+
+	/*
+	 * Drop trailing blanks and dots
+	 */
+	for (cp = un + unlen; *--cp == ' ' || *cp == '.'; unlen--);
+
+	un += (cnt - 1) * WIN_CHARS;
+	unlen -= (cnt - 1) * WIN_CHARS;
+
+	/*
+	 * Initialize winentry to some useful default
+	 */
+	for (wcp = (u_int8_t *)wep, i = sizeof(*wep); --i >= 0; *wcp++ = 0xff);
+	wep->weCnt = cnt;
+	wep->weAttributes = ATTR_WIN95;
+	wep->weReserved1 = 0;
+	wep->weChksum = chksum;
+	wep->weReserved2 = 0;
+
+	/*
+	 * Now convert the filename parts
+	 */
+	for (wcp = wep->wePart1, i = sizeof(wep->wePart1)/2; --i >= 0;) {
+		if (--unlen < 0)
+			goto done;
+		if (table_loaded && (*un & 0x80)) {
+			code = u2w[*un++ & 0x7f];
+			*wcp++ = code;
+			*wcp++ = code >> 8;
+		} else {
+			*wcp++ = *un++;
+			*wcp++ = 0;
+		}
+	}
+	for (wcp = wep->wePart2, i = sizeof(wep->wePart2)/2; --i >= 0;) {
+		if (--unlen < 0)
+			goto done;
+		if (table_loaded && (*un & 0x80)) {
+			code = u2w[*un++ & 0x7f];
+			*wcp++ = code;
+			*wcp++ = code >> 8;
+		} else {
+			*wcp++ = *un++;
+			*wcp++ = 0;
+		}
+	}
+	for (wcp = wep->wePart3, i = sizeof(wep->wePart3)/2; --i >= 0;) {
+		if (--unlen < 0)
+			goto done;
+		if (table_loaded && (*un & 0x80)) {
+			code = u2w[*un++ & 0x7f];
+			*wcp++ = code;
+			*wcp++ = code >> 8;
+		} else {
+			*wcp++ = *un++;
+			*wcp++ = 0;
+		}
+	}
+	if (!unlen)
+		wep->weCnt |= WIN_LAST;
+	return unlen;
+
+done:
+	*wcp++ = 0;
+	*wcp++ = 0;
+	wep->weCnt |= WIN_LAST;
+	return 0;
+}
+
+static __inline u_int8_t
+find_lcode(code, u2w)
+	u_int16_t code;
+	u_int16_t *u2w;
+{
+	int i;
+
+	for (i = 0; i < 128; i++)
+		if (u2w[i] == code)
+			return (i | 0x80);
+	return '?';
+}
+
+/*
+ * Compare our filename to the one in the Win95 entry
+ * Returns the checksum or -1 if no match
+ */
+int
+winChkName(un, unlen, wep, chksum, u2w_loaded, u2w, ul_loaded, ul)
+	const u_char *un;
+	int unlen;
+	struct winentry *wep;
+	int chksum;
+	int u2w_loaded;
+	u_int16_t *u2w;
+	int ul_loaded;
+	u_int8_t *ul;
+{
+	u_int8_t *cp;
+	int i;
+	u_int16_t code;
+	u_int8_t c1, c2;
+
+	/*
+	 * First compare checksums
+	 */
+	if (wep->weCnt&WIN_LAST)
+		chksum = wep->weChksum;
+	else if (chksum != wep->weChksum)
+		chksum = -1;
+	if (chksum == -1)
+		return -1;
+
+	/*
+	 * Offset of this entry
+	 */
+	i = ((wep->weCnt&WIN_CNT) - 1) * WIN_CHARS;
+	un += i;
+	if ((unlen -= i) <= 0)
+		return -1;
+	if ((wep->weCnt&WIN_LAST) && unlen > WIN_CHARS)
+		return -1;
+
+	/*
+	 * Compare the name parts
+	 */
+	for (cp = wep->wePart1, i = sizeof(wep->wePart1)/2; --i >= 0;) {
+		if (--unlen < 0) {
+			if (!*cp++ && !*cp)
+				return chksum;
+			return -1;
+		}
+		code = (cp[1] << 8) | cp[0];
+		if (code & 0xff80) {
+			if (u2w_loaded)
+				code = find_lcode(code, u2w);
+			else if (code & 0xff00)
+				code = '?';
+		}
+		c1 = ul_loaded && (code & 0x80) ?
+		     ul[code & 0x7f] : u2l[code];
+		c2 = ul_loaded && (*un & 0x80) ?
+		     ul[*un & 0x7f] : u2l[*un];
+		if (c1 != c2)
+			return -1;
+		cp += 2;
+		un++;
+	}
+	for (cp = wep->wePart2, i = sizeof(wep->wePart2)/2; --i >= 0;) {
+		if (--unlen < 0) {
+			if (!*cp++ && !*cp)
+				return chksum;
+			return -1;
+		}
+		code = (cp[1] << 8) | cp[0];
+		if (code & 0xff80) {
+			if (u2w_loaded)
+				code = find_lcode(code, u2w);
+			else if (code & 0xff00)
+				code = '?';
+		}
+		c1 = ul_loaded && (code & 0x80) ?
+		     ul[code & 0x7f] : u2l[code];
+		c2 = ul_loaded && (*un & 0x80) ?
+		     ul[*un & 0x7f] : u2l[*un];
+		if (c1 != c2)
+			return -1;
+		cp += 2;
+		un++;
+	}
+	for (cp = wep->wePart3, i = sizeof(wep->wePart3)/2; --i >= 0;) {
+		if (--unlen < 0) {
+			if (!*cp++ && !*cp)
+				return chksum;
+			return -1;
+		}
+		code = (cp[1] << 8) | cp[0];
+		if (code & 0xff80) {
+			if (u2w_loaded)
+				code = find_lcode(code, u2w);
+			else if (code & 0xff00)
+				code = '?';
+		}
+		c1 = ul_loaded && (code & 0x80) ?
+		     ul[code & 0x7f] : u2l[code];
+		c2 = ul_loaded && (*un & 0x80) ?
+		     ul[*un & 0x7f] : u2l[*un];
+		if (c1 != c2)
+			return -1;
+		cp += 2;
+		un++;
+	}
+	return chksum;
+}
+
+/*
+ * Convert Win95 filename to dirbuf.
+ * Returns the checksum or -1 if impossible
+ */
+int
+win2unixfn(wep, dp, chksum, table_loaded, u2w)
+	struct winentry *wep;
+	struct dirent *dp;
+	int chksum;
+	int table_loaded;
+	u_int16_t *u2w;
+{
+	u_int8_t *cp;
+	u_int8_t *np, *ep = dp->d_name + WIN_MAXLEN;
+	u_int16_t code;
+	int i;
+
+	if ((wep->weCnt&WIN_CNT) > howmany(WIN_MAXLEN, WIN_CHARS)
+	    || !(wep->weCnt&WIN_CNT))
+		return -1;
+
+	/*
+	 * First compare checksums
+	 */
+	if (wep->weCnt&WIN_LAST) {
+		chksum = wep->weChksum;
+		/*
+		 * This works even though d_namlen is one byte!
+		 */
+		dp->d_namlen = (wep->weCnt&WIN_CNT) * WIN_CHARS;
+	} else if (chksum != wep->weChksum)
+		chksum = -1;
+	if (chksum == -1)
+		return -1;
+
+	/*
+	 * Offset of this entry
+	 */
+	i = ((wep->weCnt&WIN_CNT) - 1) * WIN_CHARS;
+	np = (u_int8_t *)dp->d_name + i;
+
+	/*
+	 * Convert the name parts
+	 */
+	for (cp = wep->wePart1, i = sizeof(wep->wePart1)/2; --i >= 0;) {
+		code = (cp[1] << 8) | cp[0];
+		switch (code) {
+		case 0:
+			*np = '\0';
+			dp->d_namlen -= sizeof(wep->wePart2)/2
+			    + sizeof(wep->wePart3)/2 + i + 1;
+			return chksum;
+		case '/':
+			*np = '\0';
+			return -1;
+		default:
+			if (code & 0xff80) {
+				if (table_loaded)
+					code = find_lcode(code, u2w);
+				else if (code & 0xff00)
+					code = '?';
+			}
+			*np++ = code;
+			break;
+		}
+		/*
+		 * The size comparison should result in the compiler
+		 * optimizing the whole if away
+		 */
+		if (WIN_MAXLEN % WIN_CHARS < sizeof(wep->wePart1) / 2
+		    && np > ep) {
+			np[-1] = 0;
+			return -1;
+		}
+		cp += 2;
+	}
+	for (cp = wep->wePart2, i = sizeof(wep->wePart2)/2; --i >= 0;) {
+		code = (cp[1] << 8) | cp[0];
+		switch (code) {
+		case 0:
+			*np = '\0';
+			dp->d_namlen -= sizeof(wep->wePart3)/2 + i + 1;
+			return chksum;
+		case '/':
+			*np = '\0';
+			return -1;
+		default:
+			if (code & 0xff80) {
+				if (table_loaded)
+					code = find_lcode(code, u2w);
+				else if (code & 0xff00)
+					code = '?';
+			}
+			*np++ = code;
+			break;
+		}
+		/*
+		 * The size comparisons should be optimized away
+		 */
+		if (WIN_MAXLEN % WIN_CHARS >= sizeof(wep->wePart1) / 2
+		    && WIN_MAXLEN % WIN_CHARS < (sizeof(wep->wePart1) + sizeof(wep->wePart2)) / 2
+		    && np > ep) {
+			np[-1] = 0;
+			return -1;
+		}
+		cp += 2;
+	}
+	for (cp = wep->wePart3, i = sizeof(wep->wePart3)/2; --i >= 0;) {
+		code = (cp[1] << 8) | cp[0];
+		switch (code) {
+		case 0:
+			*np = '\0';
+			dp->d_namlen -= i + 1;
+			return chksum;
+		case '/':
+			*np = '\0';
+			return -1;
+		default:
+			if (code & 0xff80) {
+				if (table_loaded)
+					code = find_lcode(code, u2w);
+				else if (code & 0xff00)
+					code = '?';
+			}
+			*np++ = code;
+			break;
+		}
+		/*
+		 * See above
+		 */
+		if (WIN_MAXLEN % WIN_CHARS >= (sizeof(wep->wePart1) + sizeof(wep->wePart2)) / 2
+		    && np > ep) {
+			np[-1] = 0;
+			return -1;
+		}
+		cp += 2;
+	}
+	return chksum;
+}
+
+/*
+ * Compute the checksum of a DOS filename for Win95 use
+ */
+u_int8_t
+winChksum(name)
+	u_int8_t *name;
+{
+	int i;
+	u_int8_t s;
+
+	for (s = 0, i = 11; --i >= 0; s += *name++)
+		s = (s << 7)|(s >> 1);
+	return s;
+}
+
+/*
+ * Determine the number of slots necessary for Win95 names
+ */
+int
+winSlotCnt(un, unlen)
+	const u_char *un;
+	int unlen;
+{
+	unlen = winLenFixup(un, unlen);
+	if (unlen > WIN_MAXLEN)
+		return 0;
+	return howmany(unlen, WIN_CHARS);
+}
+
+/*
+ * Determine the number of bytes neccesary for Win95 names
+ */
+int
+winLenFixup(un, unlen)
+	const u_char* un;
+	int unlen;
+{
+	for (un += unlen; unlen > 0; unlen--)
+		if (*--un != ' ' && *un != '.')
+			break;
+	return unlen;
+}
diff --git a/sys/fs/msdosfs/msdosfs_denode.c b/sys/fs/msdosfs/msdosfs_denode.c
new file mode 100644
index 0000000..74be5c4
--- /dev/null
+++ b/sys/fs/msdosfs/msdosfs_denode.c
@@ -0,0 +1,712 @@
+/*	$Id: msdosfs_denode.c,v 1.43 1998/12/07 21:58:34 archie Exp $ */
+/*	$NetBSD: msdosfs_denode.c,v 1.28 1998/02/10 14:10:00 mrg Exp $	*/
+
+/*-
+ * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
+ * Copyright (C) 1994, 1995, 1997 TooLs GmbH.
+ * All rights reserved.
+ * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by TooLs GmbH.
+ * 4. The name of TooLs GmbH may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/*
+ * Written by Paul Popelka (paulp@uts.amdahl.com)
+ *
+ * You can do anything you want with this software, just don't say you wrote
+ * it, and don't remove this notice.
+ *
+ * This software is provided "as is".
+ *
+ * The author supplies this software to be publicly redistributed on the
+ * understanding that the author is not responsible for the correct
+ * functioning of this software in any circumstances and is not liable for
+ * any damages caused by this software.
+ *
+ * October 1992
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/mount.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+
+#include <msdosfs/bpb.h>
+#include <msdosfs/msdosfsmount.h>
+#include <msdosfs/direntry.h>
+#include <msdosfs/denode.h>
+#include <msdosfs/fat.h>
+
+static MALLOC_DEFINE(M_MSDOSFSNODE, "MSDOSFS node", "MSDOSFS vnode private part");
+
+static struct denode **dehashtbl;
+static u_long dehash;			/* size of hash table - 1 */
+#define	DEHASH(dev, dcl, doff)	(dehashtbl[((dev) + (dcl) + (doff) / 	\
+				sizeof(struct direntry)) & dehash])
+#ifndef NULL_SIMPLELOCKS
+static struct simplelock dehash_slock;
+#endif
+
+union _qcvt {
+	quad_t qcvt;
+	long val[2];
+};
+#define SETHIGH(q, h) { \
+	union _qcvt tmp; \
+	tmp.qcvt = (q); \
+	tmp.val[_QUAD_HIGHWORD] = (h); \
+	(q) = tmp.qcvt; \
+}
+#define SETLOW(q, l) { \
+	union _qcvt tmp; \
+	tmp.qcvt = (q); \
+	tmp.val[_QUAD_LOWWORD] = (l); \
+	(q) = tmp.qcvt; \
+}
+
+static struct denode *
+		msdosfs_hashget __P((dev_t dev, u_long dirclust,
+				     u_long diroff));
+static void	msdosfs_hashins __P((struct denode *dep));
+static void	msdosfs_hashrem __P((struct denode *dep));
+
+/*ARGSUSED*/
+int 
+msdosfs_init(vfsp)
+	struct vfsconf *vfsp;
+{
+	dehashtbl = hashinit(desiredvnodes/2, M_MSDOSFSMNT, &dehash);
+	simple_lock_init(&dehash_slock);
+	return (0);
+}
+
+static struct denode *
+msdosfs_hashget(dev, dirclust, diroff)
+	dev_t dev;
+	u_long dirclust;
+	u_long diroff;
+{
+	struct proc *p = curproc;	/* XXX */
+	struct denode *dep;
+	struct vnode *vp;
+
+loop:
+	simple_lock(&dehash_slock);
+	for (dep = DEHASH(dev, dirclust, diroff); dep; dep = dep->de_next) {
+		if (dirclust == dep->de_dirclust
+		    && diroff == dep->de_diroffset
+		    && dev == dep->de_dev
+		    && dep->de_refcnt != 0) {
+			vp = DETOV(dep);
+			simple_lock(&vp->v_interlock);
+			simple_unlock(&dehash_slock);
+			if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p))
+				goto loop;
+			return (dep);
+		}
+	}
+	simple_unlock(&dehash_slock);
+	return (NULL);
+}
+
+static void
+msdosfs_hashins(dep)
+	struct denode *dep;
+{
+	struct denode **depp, *deq;
+
+	simple_lock(&dehash_slock);
+	depp = &DEHASH(dep->de_dev, dep->de_dirclust, dep->de_diroffset);
+	deq = *depp;
+	if (deq)
+		deq->de_prev = &dep->de_next;
+	dep->de_next = deq;
+	dep->de_prev = depp;
+	*depp = dep;
+	simple_unlock(&dehash_slock);
+}
+
+static void
+msdosfs_hashrem(dep)
+	struct denode *dep;
+{
+	struct denode *deq;
+
+	simple_lock(&dehash_slock);
+	deq = dep->de_next;
+	if (deq)
+		deq->de_prev = dep->de_prev;
+	*dep->de_prev = deq;
+#ifdef DIAGNOSTIC
+	dep->de_next = NULL;
+	dep->de_prev = NULL;
+#endif
+	simple_unlock(&dehash_slock);
+}
+
+/*
+ * If deget() succeeds it returns with the gotten denode locked().
+ *
+ * pmp	     - address of msdosfsmount structure of the filesystem containing
+ *	       the denode of interest.  The pm_dev field and the address of
+ *	       the msdosfsmount structure are used.
+ * dirclust  - which cluster bp contains, if dirclust is 0 (root directory)
+ *	       diroffset is relative to the beginning of the root directory,
+ *	       otherwise it is cluster relative.
+ * diroffset - offset past begin of cluster of denode we want
+ * depp	     - returns the address of the gotten denode.
+ */
+int
+deget(pmp, dirclust, diroffset, depp)
+	struct msdosfsmount *pmp;	/* so we know the maj/min number */
+	u_long dirclust;		/* cluster this dir entry came from */
+	u_long diroffset;		/* index of entry within the cluster */
+	struct denode **depp;		/* returns the addr of the gotten denode */
+{
+	int error;
+	dev_t dev = pmp->pm_dev;
+	struct mount *mntp = pmp->pm_mountp;
+	struct direntry *direntptr;
+	struct denode *ldep;
+	struct vnode *nvp;
+	struct buf *bp;
+	struct proc *p = curproc;	/* XXX */
+	struct timeval tv;
+
+#ifdef MSDOSFS_DEBUG
+	printf("deget(pmp %p, dirclust %lu, diroffset %lx, depp %p)\n",
+	    pmp, dirclust, diroffset, depp);
+#endif
+
+	/*
+	 * On FAT32 filesystems, root is a (more or less) normal
+	 * directory
+	 */
+	if (FAT32(pmp) && dirclust == MSDOSFSROOT)
+		dirclust = pmp->pm_rootdirblk;
+
+	/*
+	 * See if the denode is in the denode cache. Use the location of
+	 * the directory entry to compute the hash value. For subdir use
+	 * address of "." entry. For root dir (if not FAT32) use cluster
+	 * MSDOSFSROOT, offset MSDOSFSROOT_OFS
+	 *
+	 * NOTE: The check for de_refcnt > 0 below insures the denode being
+	 * examined does not represent an unlinked but still open file.
+	 * These files are not to be accessible even when the directory
+	 * entry that represented the file happens to be reused while the
+	 * deleted file is still open.
+	 */
+	ldep = msdosfs_hashget(dev, dirclust, diroffset);
+	if (ldep) {
+		*depp = ldep;
+		return (0);
+	}
+
+	/*
+	 * Do the MALLOC before the getnewvnode since doing so afterward
+	 * might cause a bogus v_data pointer to get dereferenced
+	 * elsewhere if MALLOC should block.
+	 */
+	MALLOC(ldep, struct denode *, sizeof(struct denode), M_MSDOSFSNODE, M_WAITOK);
+
+	/*
+	 * Directory entry was not in cache, have to create a vnode and
+	 * copy it from the passed disk buffer.
+	 */
+	/* getnewvnode() does a VREF() on the vnode */
+	error = getnewvnode(VT_MSDOSFS, mntp, msdosfs_vnodeop_p, &nvp);
+	if (error) {
+		*depp = NULL;
+		FREE(ldep, M_MSDOSFSNODE);
+		return error;
+	}
+	bzero((caddr_t)ldep, sizeof *ldep);
+	lockinit(&ldep->de_lock, PINOD, "denode", 0, 0);
+	nvp->v_data = ldep;
+	ldep->de_vnode = nvp;
+	ldep->de_flag = 0;
+	ldep->de_devvp = 0;
+	ldep->de_dev = dev;
+	ldep->de_dirclust = dirclust;
+	ldep->de_diroffset = diroffset;
+	fc_purge(ldep, 0);	/* init the fat cache for this denode */
+
+	/*
+	 * Lock the denode so that it can't be accessed until we've read
+	 * it in and have done what we need to it.  Do this here instead
+	 * of at the start of msdosfs_hashins() so that reinsert() can
+	 * call msdosfs_hashins() with a locked denode.
+	 */
+	if (lockmgr(&ldep->de_lock, LK_EXCLUSIVE, (struct simplelock *)0, p))
+		panic("deget: unexpected lock failure");
+
+	/*
+	 * Insert the denode into the hash queue.
+	 */
+	msdosfs_hashins(ldep);
+
+	ldep->de_pmp = pmp;
+	ldep->de_refcnt = 1;
+	/*
+	 * Copy the directory entry into the denode area of the vnode.
+	 */
+	if ((dirclust == MSDOSFSROOT
+	     || (FAT32(pmp) && dirclust == pmp->pm_rootdirblk))
+	    && diroffset == MSDOSFSROOT_OFS) {
+		/*
+		 * Directory entry for the root directory. There isn't one,
+		 * so we manufacture one. We should probably rummage
+		 * through the root directory and find a label entry (if it
+		 * exists), and then use the time and date from that entry
+		 * as the time and date for the root denode.
+		 */
+		nvp->v_flag |= VROOT; /* should be further down		XXX */
+
+		ldep->de_Attributes = ATTR_DIRECTORY;
+		ldep->de_LowerCase = 0;
+		if (FAT32(pmp))
+			ldep->de_StartCluster = pmp->pm_rootdirblk;
+			/* de_FileSize will be filled in further down */
+		else {
+			ldep->de_StartCluster = MSDOSFSROOT;
+			ldep->de_FileSize = pmp->pm_rootdirsize * pmp->pm_BytesPerSec;
+		}
+		/*
+		 * fill in time and date so that dos2unixtime() doesn't
+		 * spit up when called from msdosfs_getattr() with root
+		 * denode
+		 */
+		ldep->de_CHun = 0;
+		ldep->de_CTime = 0x0000;	/* 00:00:00	 */
+		ldep->de_CDate = (0 << DD_YEAR_SHIFT) | (1 << DD_MONTH_SHIFT)
+		    | (1 << DD_DAY_SHIFT);
+		/* Jan 1, 1980	 */
+		ldep->de_ADate = ldep->de_CDate;
+		ldep->de_MTime = ldep->de_CTime;
+		ldep->de_MDate = ldep->de_CDate;
+		/* leave the other fields as garbage */
+	} else {
+		error = readep(pmp, dirclust, diroffset, &bp, &direntptr);
+		if (error) {
+			/*
+			 * The denode does not contain anything useful, so
+			 * it would be wrong to leave it on its hash chain.
+			 * Arrange for vput() to just forget about it.
+			 */
+			ldep->de_Name[0] = SLOT_DELETED;
+
+			vput(nvp);
+			*depp = NULL;
+			return (error);
+		}
+		DE_INTERNALIZE(ldep, direntptr);
+		brelse(bp);
+	}
+
+	/*
+	 * Fill in a few fields of the vnode and finish filling in the
+	 * denode.  Then return the address of the found denode.
+	 */
+	if (ldep->de_Attributes & ATTR_DIRECTORY) {
+		/*
+		 * Since DOS directory entries that describe directories
+		 * have 0 in the filesize field, we take this opportunity
+		 * to find out the length of the directory and plug it into
+		 * the denode structure.
+		 */
+		u_long size;
+
+		nvp->v_type = VDIR;
+		if (ldep->de_StartCluster != MSDOSFSROOT) {
+			error = pcbmap(ldep, 0xffff, 0, &size, 0);
+			if (error == E2BIG) {
+				ldep->de_FileSize = de_cn2off(pmp, size);
+				error = 0;
+			} else
+				printf("deget(): pcbmap returned %d\n", error);
+		}
+	} else
+		nvp->v_type = VREG;
+	getmicrouptime(&tv);
+	SETHIGH(ldep->de_modrev, tv.tv_sec);
+	SETLOW(ldep->de_modrev, tv.tv_usec * 4294);
+	ldep->de_devvp = pmp->pm_devvp;
+	VREF(ldep->de_devvp);
+	*depp = ldep;
+	return (0);
+}
+
+int
+deupdat(dep, waitfor)
+	struct denode *dep;
+	int waitfor;
+{
+	int error;
+	struct buf *bp;
+	struct direntry *dirp;
+	struct timespec ts;
+
+	if (DETOV(dep)->v_mount->mnt_flag & MNT_RDONLY)
+		return (0);
+	getnanotime(&ts);
+	DETIMES(dep, &ts, &ts, &ts);
+	if ((dep->de_flag & DE_MODIFIED) == 0)
+		return (0);
+	dep->de_flag &= ~DE_MODIFIED;
+	if (dep->de_Attributes & ATTR_DIRECTORY)
+		return (0);
+	if (dep->de_refcnt <= 0)
+		return (0);
+	error = readde(dep, &bp, &dirp);
+	if (error)
+		return (error);
+	DE_EXTERNALIZE(dirp, dep);
+	if (waitfor)
+		return (bwrite(bp));
+	else {
+		bdwrite(bp);
+		return (0);
+	}
+}
+
+/*
+ * Truncate the file described by dep to the length specified by length.
+ */
+int
+detrunc(dep, length, flags, cred, p)
+	struct denode *dep;
+	u_long length;
+	int flags;
+	struct ucred *cred;
+	struct proc *p;
+{
+	int error;
+	int allerror;
+	u_long eofentry;
+	u_long chaintofree;
+	daddr_t bn;
+	int boff;
+	int isadir = dep->de_Attributes & ATTR_DIRECTORY;
+	struct buf *bp;
+	struct msdosfsmount *pmp = dep->de_pmp;
+
+#ifdef MSDOSFS_DEBUG
+	printf("detrunc(): file %s, length %lu, flags %x\n", dep->de_Name, length, flags);
+#endif
+
+	/*
+	 * Disallow attempts to truncate the root directory since it is of
+	 * fixed size.  That's just the way dos filesystems are.  We use
+	 * the VROOT bit in the vnode because checking for the directory
+	 * bit and a startcluster of 0 in the denode is not adequate to
+	 * recognize the root directory at this point in a file or
+	 * directory's life.
+	 */
+	if ((DETOV(dep)->v_flag & VROOT) && !FAT32(pmp)) {
+		printf("detrunc(): can't truncate root directory, clust %ld, offset %ld\n",
+		    dep->de_dirclust, dep->de_diroffset);
+		return (EINVAL);
+	}
+
+
+	if (dep->de_FileSize < length) {
+		vnode_pager_setsize(DETOV(dep), length);
+		return deextend(dep, length, cred);
+	}
+
+	/*
+	 * If the desired length is 0 then remember the starting cluster of
+	 * the file and set the StartCluster field in the directory entry
+	 * to 0.  If the desired length is not zero, then get the number of
+	 * the last cluster in the shortened file.  Then get the number of
+	 * the first cluster in the part of the file that is to be freed.
+	 * Then set the next cluster pointer in the last cluster of the
+	 * file to CLUST_EOFE.
+	 */
+	if (length == 0) {
+		chaintofree = dep->de_StartCluster;
+		dep->de_StartCluster = 0;
+		eofentry = ~0;
+	} else {
+		error = pcbmap(dep, de_clcount(pmp, length) - 1, 0, 
+			       &eofentry, 0);
+		if (error) {
+#ifdef MSDOSFS_DEBUG
+			printf("detrunc(): pcbmap fails %d\n", error);
+#endif
+			return (error);
+		}
+	}
+
+	fc_purge(dep, de_clcount(pmp, length));
+
+	/*
+	 * If the new length is not a multiple of the cluster size then we
+	 * must zero the tail end of the new last cluster in case it
+	 * becomes part of the file again because of a seek.
+	 */
+	if ((boff = length & pmp->pm_crbomask) != 0) {
+		if (isadir) {
+			bn = cntobn(pmp, eofentry);
+			error = bread(pmp->pm_devvp, bn, pmp->pm_bpcluster,
+			    NOCRED, &bp);
+		} else {
+			bn = de_blk(pmp, length);
+			error = bread(DETOV(dep), bn, pmp->pm_bpcluster,
+			    NOCRED, &bp);
+		}
+		if (error) {
+			brelse(bp);
+#ifdef MSDOSFS_DEBUG
+			printf("detrunc(): bread fails %d\n", error);
+#endif
+			return (error);
+		}
+		/*
+		 * is this the right place for it?
+		 */
+		bzero(bp->b_data + boff, pmp->pm_bpcluster - boff);
+		if (flags & IO_SYNC)
+			bwrite(bp);
+		else
+			bdwrite(bp);
+	}
+
+	/*
+	 * Write out the updated directory entry.  Even if the update fails
+	 * we free the trailing clusters.
+	 */
+	dep->de_FileSize = length;
+	if (!isadir)
+		dep->de_flag |= DE_UPDATE|DE_MODIFIED;
+	allerror = vtruncbuf(DETOV(dep), cred, p, length, pmp->pm_bpcluster);
+#ifdef MSDOSFS_DEBUG
+	if (allerror)
+		printf("detrunc(): vtruncbuf error %d\n", allerror);
+#endif
+	error = deupdat(dep, 1);
+	if (error && (allerror == 0))
+		allerror = error;
+#ifdef MSDOSFS_DEBUG
+	printf("detrunc(): allerror %d, eofentry %lu\n",
+	       allerror, eofentry);
+#endif
+
+	/*
+	 * If we need to break the cluster chain for the file then do it
+	 * now.
+	 */
+	if (eofentry != ~0) {
+		error = fatentry(FAT_GET_AND_SET, pmp, eofentry,
+				 &chaintofree, CLUST_EOFE);
+		if (error) {
+#ifdef MSDOSFS_DEBUG
+			printf("detrunc(): fatentry errors %d\n", error);
+#endif
+			return (error);
+		}
+		fc_setcache(dep, FC_LASTFC, de_cluster(pmp, length - 1),
+			    eofentry);
+	}
+
+	/*
+	 * Now free the clusters removed from the file because of the
+	 * truncation.
+	 */
+	if (chaintofree != 0 && !MSDOSFSEOF(pmp, chaintofree))
+		freeclusterchain(pmp, chaintofree);
+
+	return (allerror);
+}
+
+/*
+ * Extend the file described by dep to length specified by length.
+ */
+int
+deextend(dep, length, cred)
+	struct denode *dep;
+	u_long length;
+	struct ucred *cred;
+{
+	struct msdosfsmount *pmp = dep->de_pmp;
+	u_long count;
+	int error;
+
+	/*
+	 * The root of a DOS filesystem cannot be extended.
+	 */
+	if ((DETOV(dep)->v_flag & VROOT) && !FAT32(pmp))
+		return (EINVAL);
+
+	/*
+	 * Directories cannot be extended.
+	 */
+	if (dep->de_Attributes & ATTR_DIRECTORY)
+		return (EISDIR);
+
+	if (length <= dep->de_FileSize)
+		panic("deextend: file too large");
+
+	/*
+	 * Compute the number of clusters to allocate.
+	 */
+	count = de_clcount(pmp, length) - de_clcount(pmp, dep->de_FileSize);
+	if (count > 0) {
+		if (count > pmp->pm_freeclustercount)
+			return (ENOSPC);
+		error = extendfile(dep, count, NULL, NULL, DE_CLEAR);
+		if (error) {
+			/* truncate the added clusters away again */
+			(void) detrunc(dep, dep->de_FileSize, 0, cred, NULL);
+			return (error);
+		}
+	}
+	dep->de_FileSize = length;
+	dep->de_flag |= DE_UPDATE|DE_MODIFIED;
+	return (deupdat(dep, 1));
+}
+
+/*
+ * Move a denode to its correct hash queue after the file it represents has
+ * been moved to a new directory.
+ */
+void
+reinsert(dep)
+	struct denode *dep;
+{
+	/*
+	 * Fix up the denode cache.  If the denode is for a directory,
+	 * there is nothing to do since the hash is based on the starting
+	 * cluster of the directory file and that hasn't changed.  If for a
+	 * file the hash is based on the location of the directory entry,
+	 * so we must remove it from the cache and re-enter it with the
+	 * hash based on the new location of the directory entry.
+	 */
+	if (dep->de_Attributes & ATTR_DIRECTORY)
+		return;
+	msdosfs_hashrem(dep);
+	msdosfs_hashins(dep);
+}
+
+int
+msdosfs_reclaim(ap)
+	struct vop_reclaim_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct denode *dep = VTODE(vp);
+
+#ifdef MSDOSFS_DEBUG
+	printf("msdosfs_reclaim(): dep %p, file %s, refcnt %ld\n",
+	    dep, dep->de_Name, dep->de_refcnt);
+#endif
+
+	if (prtactive && vp->v_usecount != 0)
+		vprint("msdosfs_reclaim(): pushing active", vp);
+	/*
+	 * Remove the denode from its hash chain.
+	 */
+	msdosfs_hashrem(dep);
+	/*
+	 * Purge old data structures associated with the denode.
+	 */
+	cache_purge(vp);
+	if (dep->de_devvp) {
+		vrele(dep->de_devvp);
+		dep->de_devvp = 0;
+	}
+#if 0 /* XXX */
+	dep->de_flag = 0;
+#endif
+	FREE(dep, M_MSDOSFSNODE);
+	vp->v_data = NULL;
+
+	return (0);
+}
+
+int
+msdosfs_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct denode *dep = VTODE(vp);
+	struct proc *p = ap->a_p;
+	int error = 0;
+
+#ifdef MSDOSFS_DEBUG
+	printf("msdosfs_inactive(): dep %p, de_Name[0] %x\n", dep, dep->de_Name[0]);
+#endif
+
+	if (prtactive && vp->v_usecount != 0)
+		vprint("msdosfs_inactive(): pushing active", vp);
+
+	/*
+	 * Ignore denodes related to stale file handles.
+	 */
+	if (dep->de_Name[0] == SLOT_DELETED)
+		goto out;
+
+	/*
+	 * If the file has been deleted and it is on a read/write
+	 * filesystem, then truncate the file, and mark the directory slot
+	 * as empty.  (This may not be necessary for the dos filesystem.)
+	 */
+#ifdef MSDOSFS_DEBUG
+	printf("msdosfs_inactive(): dep %p, refcnt %ld, mntflag %x, MNT_RDONLY %x\n",
+	       dep, dep->de_refcnt, vp->v_mount->mnt_flag, MNT_RDONLY);
+#endif
+	if (dep->de_refcnt <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
+		error = detrunc(dep, (u_long) 0, 0, NOCRED, p);
+		dep->de_flag |= DE_UPDATE;
+		dep->de_Name[0] = SLOT_DELETED;
+	}
+	deupdat(dep, 0);
+
+out:
+	VOP_UNLOCK(vp, 0, p);
+	/*
+	 * If we are done with the denode, reclaim it
+	 * so that it can be reused immediately.
+	 */
+#ifdef MSDOSFS_DEBUG
+	printf("msdosfs_inactive(): v_usecount %d, de_Name[0] %x\n", vp->v_usecount,
+	       dep->de_Name[0]);
+#endif
+	if (dep->de_Name[0] == SLOT_DELETED)
+		vrecycle(vp, (struct simplelock *)0, p);
+	return (error);
+}
diff --git a/sys/fs/msdosfs/msdosfs_fat.c b/sys/fs/msdosfs/msdosfs_fat.c
new file mode 100644
index 0000000..1ec29db
--- /dev/null
+++ b/sys/fs/msdosfs/msdosfs_fat.c
@@ -0,0 +1,1100 @@
+/*	$Id: msdosfs_fat.c,v 1.20 1998/04/06 11:39:04 phk Exp $ */
+/*	$NetBSD: msdosfs_fat.c,v 1.28 1997/11/17 15:36:49 ws Exp $	*/
+
+/*-
+ * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
+ * Copyright (C) 1994, 1995, 1997 TooLs GmbH.
+ * All rights reserved.
+ * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by TooLs GmbH.
+ * 4. The name of TooLs GmbH may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/*
+ * Written by Paul Popelka (paulp@uts.amdahl.com)
+ *
+ * You can do anything you want with this software, just don't say you wrote
+ * it, and don't remove this notice.
+ *
+ * This software is provided "as is".
+ *
+ * The author supplies this software to be publicly redistributed on the
+ * understanding that the author is not responsible for the correct
+ * functioning of this software in any circumstances and is not liable for
+ * any damages caused by this software.
+ *
+ * October 1992
+ */
+
+/*
+ * kernel include files.
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/mount.h>		/* to define statfs structure */
+#include <sys/vnode.h>		/* to define vattr structure */
+
+/*
+ * msdosfs include files.
+ */
+#include <msdosfs/bpb.h>
+#include <msdosfs/msdosfsmount.h>
+#include <msdosfs/direntry.h>
+#include <msdosfs/denode.h>
+#include <msdosfs/fat.h>
+
+/*
+ * Fat cache stats.
+ */
+static int fc_fileextends;	/* # of file extends			 */
+static int fc_lfcempty;		/* # of time last file cluster cache entry
+				 * was empty */
+static int fc_bmapcalls;		/* # of times pcbmap was called		 */
+
+#define	LMMAX	20
+static int fc_lmdistance[LMMAX];/* counters for how far off the last
+				 * cluster mapped entry was. */
+static int fc_largedistance;	/* off by more than LMMAX		 */
+
+static int	chainalloc __P((struct msdosfsmount *pmp, u_long start,
+				u_long count, u_long fillwith,
+				u_long *retcluster, u_long *got));
+static int	chainlength __P((struct msdosfsmount *pmp, u_long start,
+				 u_long count));
+static void	fatblock __P((struct msdosfsmount *pmp, u_long ofs,
+			      u_long *bnp, u_long *sizep, u_long *bop));
+static int	fatchain __P((struct msdosfsmount *pmp, u_long start,
+			      u_long count, u_long fillwith));
+static void	fc_lookup __P((struct denode *dep, u_long findcn,
+			       u_long *frcnp, u_long *fsrcnp));
+static void	updatefats __P((struct msdosfsmount *pmp, struct buf *bp,
+				u_long fatbn));
+static __inline void
+		usemap_alloc __P((struct msdosfsmount *pmp, u_long cn));
+static __inline void
+		usemap_free __P((struct msdosfsmount *pmp, u_long cn));
+
+static void
+fatblock(pmp, ofs, bnp, sizep, bop)
+	struct msdosfsmount *pmp;
+	u_long ofs;
+	u_long *bnp;
+	u_long *sizep;
+	u_long *bop;
+{
+	u_long bn, size;
+
+	bn = ofs / pmp->pm_fatblocksize * pmp->pm_fatblocksec;
+	size = min(pmp->pm_fatblocksec, pmp->pm_FATsecs - bn)
+	    * pmp->pm_BytesPerSec;
+	bn += pmp->pm_fatblk + pmp->pm_curfat * pmp->pm_FATsecs;
+
+	if (bnp)
+		*bnp = bn;
+	if (sizep)
+		*sizep = size;
+	if (bop)
+		*bop = ofs % pmp->pm_fatblocksize;
+}
+
+/*
+ * Map the logical cluster number of a file into a physical disk sector
+ * that is filesystem relative.
+ *
+ * dep	  - address of denode representing the file of interest
+ * findcn - file relative cluster whose filesystem relative cluster number
+ *	    and/or block number are/is to be found
+ * bnp	  - address of where to place the file system relative block number.
+ *	    If this pointer is null then don't return this quantity.
+ * cnp	  - address of where to place the file system relative cluster number.
+ *	    If this pointer is null then don't return this quantity.
+ *
+ * NOTE: Either bnp or cnp must be non-null.
+ * This function has one side effect.  If the requested file relative cluster
+ * is beyond the end of file, then the actual number of clusters in the file
+ * is returned in *cnp.  This is useful for determining how long a directory is.
+ *  If cnp is null, nothing is returned.
+ */
+int
+pcbmap(dep, findcn, bnp, cnp, sp)
+	struct denode *dep;
+	u_long findcn;		/* file relative cluster to get		 */
+	daddr_t *bnp;		/* returned filesys relative blk number	 */
+	u_long *cnp;		/* returned cluster number		 */
+	int *sp;		/* returned block size			 */
+{
+	int error;
+	u_long i;
+	u_long cn;
+	u_long prevcn = 0; /* XXX: prevcn could be used unititialized */
+	u_long byteoffset;
+	u_long bn;
+	u_long bo;
+	struct buf *bp = NULL;
+	u_long bp_bn = -1;
+	struct msdosfsmount *pmp = dep->de_pmp;
+	u_long bsize;
+
+	fc_bmapcalls++;
+
+	/*
+	 * If they don't give us someplace to return a value then don't
+	 * bother doing anything.
+	 */
+	if (bnp == NULL && cnp == NULL && sp == NULL)
+		return (0);
+
+	cn = dep->de_StartCluster;
+	/*
+	 * The "file" that makes up the root directory is contiguous,
+	 * permanently allocated, of fixed size, and is not made up of
+	 * clusters.  If the cluster number is beyond the end of the root
+	 * directory, then return the number of clusters in the file.
+	 */
+	if (cn == MSDOSFSROOT) {
+		if (dep->de_Attributes & ATTR_DIRECTORY) {
+			if (de_cn2off(pmp, findcn) >= dep->de_FileSize) {
+				if (cnp)
+					*cnp = de_bn2cn(pmp, pmp->pm_rootdirsize);
+				return (E2BIG);
+			}
+			if (bnp)
+				*bnp = pmp->pm_rootdirblk + de_cn2bn(pmp, findcn);
+			if (cnp)
+				*cnp = MSDOSFSROOT;
+			if (sp)
+				*sp = min(pmp->pm_bpcluster,
+				    dep->de_FileSize - de_cn2off(pmp, findcn));
+			return (0);
+		} else {		/* just an empty file */
+			if (cnp)
+				*cnp = 0;
+			return (E2BIG);
+		}
+	}
+
+	/*
+	 * All other files do I/O in cluster sized blocks
+	 */
+	if (sp)
+		*sp = pmp->pm_bpcluster;
+
+	/*
+	 * Rummage around in the fat cache, maybe we can avoid tromping
+	 * thru every fat entry for the file. And, keep track of how far
+	 * off the cache was from where we wanted to be.
+	 */
+	i = 0;
+	fc_lookup(dep, findcn, &i, &cn);
+	if ((bn = findcn - i) >= LMMAX)
+		fc_largedistance++;
+	else
+		fc_lmdistance[bn]++;
+
+	/*
+	 * Handle all other files or directories the normal way.
+	 */
+	for (; i < findcn; i++) {
+		/*
+		 * Stop with all reserved clusters, not just with EOF.
+		 */
+		if ((cn | ~pmp->pm_fatmask) >= CLUST_RSRVD)
+			goto hiteof;
+		byteoffset = FATOFS(pmp, cn);
+		fatblock(pmp, byteoffset, &bn, &bsize, &bo);
+		if (bn != bp_bn) {
+			if (bp)
+				brelse(bp);
+			error = bread(pmp->pm_devvp, bn, bsize, NOCRED, &bp);
+			if (error) {
+				brelse(bp);
+				return (error);
+			}
+			bp_bn = bn;
+		}
+		prevcn = cn;
+		if (FAT32(pmp))
+			cn = getulong(&bp->b_data[bo]);
+		else
+			cn = getushort(&bp->b_data[bo]);
+		if (FAT12(pmp) && (prevcn & 1))
+			cn >>= 4;
+		cn &= pmp->pm_fatmask;
+
+		/*
+		 * Force the special cluster numbers
+		 * to be the same for all cluster sizes
+		 * to let the rest of msdosfs handle
+		 * all cases the same.
+		 */
+		if ((cn | ~pmp->pm_fatmask) >= CLUST_RSRVD)
+			cn |= ~pmp->pm_fatmask;
+	}
+
+	if (!MSDOSFSEOF(pmp, cn)) {
+		if (bp)
+			brelse(bp);
+		if (bnp)
+			*bnp = cntobn(pmp, cn);
+		if (cnp)
+			*cnp = cn;
+		fc_setcache(dep, FC_LASTMAP, i, cn);
+		return (0);
+	}
+
+hiteof:;
+	if (cnp)
+		*cnp = i;
+	if (bp)
+		brelse(bp);
+	/* update last file cluster entry in the fat cache */
+	fc_setcache(dep, FC_LASTFC, i - 1, prevcn);
+	return (E2BIG);
+}
+
+/*
+ * Find the closest entry in the fat cache to the cluster we are looking
+ * for.
+ */
+static void
+fc_lookup(dep, findcn, frcnp, fsrcnp)
+	struct denode *dep;
+	u_long findcn;
+	u_long *frcnp;
+	u_long *fsrcnp;
+{
+	int i;
+	u_long cn;
+	struct fatcache *closest = 0;
+
+	for (i = 0; i < FC_SIZE; i++) {
+		cn = dep->de_fc[i].fc_frcn;
+		if (cn != FCE_EMPTY && cn <= findcn) {
+			if (closest == 0 || cn > closest->fc_frcn)
+				closest = &dep->de_fc[i];
+		}
+	}
+	if (closest) {
+		*frcnp = closest->fc_frcn;
+		*fsrcnp = closest->fc_fsrcn;
+	}
+}
+
+/*
+ * Purge the fat cache in denode dep of all entries relating to file
+ * relative cluster frcn and beyond.
+ */
+void
+fc_purge(dep, frcn)
+	struct denode *dep;
+	u_int frcn;
+{
+	int i;
+	struct fatcache *fcp;
+
+	fcp = dep->de_fc;
+	for (i = 0; i < FC_SIZE; i++, fcp++) {
+		if (fcp->fc_frcn >= frcn)
+			fcp->fc_frcn = FCE_EMPTY;
+	}
+}
+
+/*
+ * Update the fat.
+ * If mirroring the fat, update all copies, with the first copy as last.
+ * Else update only the current fat (ignoring the others).
+ *
+ * pmp	 - msdosfsmount structure for filesystem to update
+ * bp	 - addr of modified fat block
+ * fatbn - block number relative to begin of filesystem of the modified fat block.
+ */
+static void
+updatefats(pmp, bp, fatbn)
+	struct msdosfsmount *pmp;
+	struct buf *bp;
+	u_long fatbn;
+{
+	int i;
+	struct buf *bpn;
+
+#ifdef MSDOSFS_DEBUG
+	printf("updatefats(pmp %p, bp %p, fatbn %lu)\n", pmp, bp, fatbn);
+#endif
+
+	/*
+	 * If we have an FSInfo block, update it.
+	 */
+	if (pmp->pm_fsinfo) {
+		u_long cn = pmp->pm_nxtfree;
+
+		if (pmp->pm_freeclustercount
+		    && (pmp->pm_inusemap[cn / N_INUSEBITS]
+			& (1 << (cn % N_INUSEBITS)))) {
+			/*
+			 * The cluster indicated in FSInfo isn't free
+			 * any longer.  Got get a new free one.
+			 */
+			for (cn = 0; cn < pmp->pm_maxcluster; cn += N_INUSEBITS)
+				if (pmp->pm_inusemap[cn / N_INUSEBITS] != (u_int)-1)
+					break;
+			pmp->pm_nxtfree = cn
+				+ ffs(pmp->pm_inusemap[cn / N_INUSEBITS]
+				      ^ (u_int)-1) - 1;
+		}
+		if (bread(pmp->pm_devvp, pmp->pm_fsinfo, 1024, NOCRED, &bpn) != 0) {
+			/*
+			 * Ignore the error, but turn off FSInfo update for the future.
+			 */
+			pmp->pm_fsinfo = 0;
+			brelse(bpn);
+		} else {
+			struct fsinfo *fp = (struct fsinfo *)bpn->b_data;
+
+			putulong(fp->fsinfree, pmp->pm_freeclustercount);
+			putulong(fp->fsinxtfree, pmp->pm_nxtfree);
+			if (pmp->pm_flags & MSDOSFSMNT_WAITONFAT)
+				bwrite(bpn);
+			else
+				bdwrite(bpn);
+		}
+	}
+
+	if (pmp->pm_flags & MSDOSFS_FATMIRROR) {
+		/*
+		 * Now copy the block(s) of the modified fat to the other copies of
+		 * the fat and write them out.  This is faster than reading in the
+		 * other fats and then writing them back out.  This could tie up
+		 * the fat for quite a while. Preventing others from accessing it.
+		 * To prevent us from going after the fat quite so much we use
+		 * delayed writes, unless they specfied "synchronous" when the
+		 * filesystem was mounted.  If synch is asked for then use
+		 * bwrite()'s and really slow things down.
+		 */
+		for (i = 1; i < pmp->pm_FATs; i++) {
+			fatbn += pmp->pm_FATsecs;
+			/* getblk() never fails */
+			bpn = getblk(pmp->pm_devvp, fatbn, bp->b_bcount, 0, 0);
+			bcopy(bp->b_data, bpn->b_data, bp->b_bcount);
+			if (pmp->pm_flags & MSDOSFSMNT_WAITONFAT)
+				bwrite(bpn);
+			else
+				bdwrite(bpn);
+		}
+	}
+
+	/*
+	 * Write out the first (or current) fat last.
+	 */
+	if (pmp->pm_flags & MSDOSFSMNT_WAITONFAT)
+		bwrite(bp);
+	else
+		bdwrite(bp);
+	/*
+	 * Maybe update fsinfo sector here?
+	 */
+}
+
+/*
+ * Updating entries in 12 bit fats is a pain in the butt.
+ *
+ * The following picture shows where nibbles go when moving from a 12 bit
+ * cluster number into the appropriate bytes in the FAT.
+ *
+ *	byte m        byte m+1      byte m+2
+ *	+----+----+   +----+----+   +----+----+
+ *	|  0    1 |   |  2    3 |   |  4    5 |   FAT bytes
+ *	+----+----+   +----+----+   +----+----+
+ *
+ *	+----+----+----+   +----+----+----+
+ *	|  3    0    1 |   |  4    5    2 |
+ *	+----+----+----+   +----+----+----+
+ *	cluster n  	   cluster n+1
+ *
+ * Where n is even. m = n + (n >> 2)
+ *
+ */
+static __inline void
+usemap_alloc(pmp, cn)
+	struct msdosfsmount *pmp;
+	u_long cn;
+{
+
+	pmp->pm_inusemap[cn / N_INUSEBITS] |= 1 << (cn % N_INUSEBITS);
+	pmp->pm_freeclustercount--;
+}
+
+static __inline void
+usemap_free(pmp, cn)
+	struct msdosfsmount *pmp;
+	u_long cn;
+{
+
+	pmp->pm_freeclustercount++;
+	pmp->pm_inusemap[cn / N_INUSEBITS] &= ~(1 << (cn % N_INUSEBITS));
+}
+
+int
+clusterfree(pmp, cluster, oldcnp)
+	struct msdosfsmount *pmp;
+	u_long cluster;
+	u_long *oldcnp;
+{
+	int error;
+	u_long oldcn;
+
+	usemap_free(pmp, cluster);
+	error = fatentry(FAT_GET_AND_SET, pmp, cluster, &oldcn, MSDOSFSFREE);
+	if (error) {
+		usemap_alloc(pmp, cluster);
+		return (error);
+	}
+	/*
+	 * If the cluster was successfully marked free, then update
+	 * the count of free clusters, and turn off the "allocated"
+	 * bit in the "in use" cluster bit map.
+	 */
+	if (oldcnp)
+		*oldcnp = oldcn;
+	return (0);
+}
+
+/*
+ * Get or Set or 'Get and Set' the cluster'th entry in the fat.
+ *
+ * function	- whether to get or set a fat entry
+ * pmp		- address of the msdosfsmount structure for the filesystem
+ *		  whose fat is to be manipulated.
+ * cn		- which cluster is of interest
+ * oldcontents	- address of a word that is to receive the contents of the
+ *		  cluster'th entry if this is a get function
+ * newcontents	- the new value to be written into the cluster'th element of
+ *		  the fat if this is a set function.
+ *
+ * This function can also be used to free a cluster by setting the fat entry
+ * for a cluster to 0.
+ *
+ * All copies of the fat are updated if this is a set function. NOTE: If
+ * fatentry() marks a cluster as free it does not update the inusemap in
+ * the msdosfsmount structure. This is left to the caller.
+ */
+int
+fatentry(function, pmp, cn, oldcontents, newcontents)
+	int function;
+	struct msdosfsmount *pmp;
+	u_long cn;
+	u_long *oldcontents;
+	u_long newcontents;
+{
+	int error;
+	u_long readcn;
+	u_long bn, bo, bsize, byteoffset;
+	struct buf *bp;
+
+#ifdef	MSDOSFS_DEBUG
+	printf("fatentry(func %d, pmp %p, clust %lu, oldcon %p, newcon %lx)\n",
+	     function, pmp, cn, oldcontents, newcontents);
+#endif
+
+#ifdef DIAGNOSTIC
+	/*
+	 * Be sure they asked us to do something.
+	 */
+	if ((function & (FAT_SET | FAT_GET)) == 0) {
+		printf("fatentry(): function code doesn't specify get or set\n");
+		return (EINVAL);
+	}
+
+	/*
+	 * If they asked us to return a cluster number but didn't tell us
+	 * where to put it, give them an error.
+	 */
+	if ((function & FAT_GET) && oldcontents == NULL) {
+		printf("fatentry(): get function with no place to put result\n");
+		return (EINVAL);
+	}
+#endif
+
+	/*
+	 * Be sure the requested cluster is in the filesystem.
+	 */
+	if (cn < CLUST_FIRST || cn > pmp->pm_maxcluster)
+		return (EINVAL);
+
+	byteoffset = FATOFS(pmp, cn);
+	fatblock(pmp, byteoffset, &bn, &bsize, &bo);
+	error = bread(pmp->pm_devvp, bn, bsize, NOCRED, &bp);
+	if (error) {
+		brelse(bp);
+		return (error);
+	}
+
+	if (function & FAT_GET) {
+		if (FAT32(pmp))
+			readcn = getulong(&bp->b_data[bo]);
+		else
+			readcn = getushort(&bp->b_data[bo]);
+		if (FAT12(pmp) & (cn & 1))
+			readcn >>= 4;
+		readcn &= pmp->pm_fatmask;
+		/* map reserved fat entries to same values for all fats */
+		if ((readcn | ~pmp->pm_fatmask) >= CLUST_RSRVD)
+			readcn |= ~pmp->pm_fatmask;
+		*oldcontents = readcn;
+	}
+	if (function & FAT_SET) {
+		switch (pmp->pm_fatmask) {
+		case FAT12_MASK:
+			readcn = getushort(&bp->b_data[bo]);
+			if (cn & 1) {
+				readcn &= 0x000f;
+				readcn |= newcontents << 4;
+			} else {
+				readcn &= 0xf000;
+				readcn |= newcontents & 0xfff;
+			}
+			putushort(&bp->b_data[bo], readcn);
+			break;
+		case FAT16_MASK:
+			putushort(&bp->b_data[bo], newcontents);
+			break;
+		case FAT32_MASK:
+			/*
+			 * According to spec we have to retain the
+			 * high order bits of the fat entry.
+			 */
+			readcn = getulong(&bp->b_data[bo]);
+			readcn &= ~FAT32_MASK;
+			readcn |= newcontents & FAT32_MASK;
+			putulong(&bp->b_data[bo], readcn);
+			break;
+		}
+		updatefats(pmp, bp, bn);
+		bp = NULL;
+		pmp->pm_fmod = 1;
+	}
+	if (bp)
+		brelse(bp);
+	return (0);
+}
+
+/*
+ * Update a contiguous cluster chain
+ *
+ * pmp	    - mount point
+ * start    - first cluster of chain
+ * count    - number of clusters in chain
+ * fillwith - what to write into fat entry of last cluster
+ */
+static int
+fatchain(pmp, start, count, fillwith)
+	struct msdosfsmount *pmp;
+	u_long start;
+	u_long count;
+	u_long fillwith;
+{
+	int error;
+	u_long bn, bo, bsize, byteoffset, readcn, newc;
+	struct buf *bp;
+
+#ifdef MSDOSFS_DEBUG
+	printf("fatchain(pmp %p, start %lu, count %lu, fillwith %lx)\n",
+	    pmp, start, count, fillwith);
+#endif
+	/*
+	 * Be sure the clusters are in the filesystem.
+	 */
+	if (start < CLUST_FIRST || start + count - 1 > pmp->pm_maxcluster)
+		return (EINVAL);
+
+	while (count > 0) {
+		byteoffset = FATOFS(pmp, start);
+		fatblock(pmp, byteoffset, &bn, &bsize, &bo);
+		error = bread(pmp->pm_devvp, bn, bsize, NOCRED, &bp);
+		if (error) {
+			brelse(bp);
+			return (error);
+		}
+		while (count > 0) {
+			start++;
+			newc = --count > 0 ? start : fillwith;
+			switch (pmp->pm_fatmask) {
+			case FAT12_MASK:
+				readcn = getushort(&bp->b_data[bo]);
+				if (start & 1) {
+					readcn &= 0xf000;
+					readcn |= newc & 0xfff;
+				} else {
+					readcn &= 0x000f;
+					readcn |= newc << 4;
+				}
+				putushort(&bp->b_data[bo], readcn);
+				bo++;
+				if (!(start & 1))
+					bo++;
+				break;
+			case FAT16_MASK:
+				putushort(&bp->b_data[bo], newc);
+				bo += 2;
+				break;
+			case FAT32_MASK:
+				readcn = getulong(&bp->b_data[bo]);
+				readcn &= ~pmp->pm_fatmask;
+				readcn |= newc & pmp->pm_fatmask;
+				putulong(&bp->b_data[bo], readcn);
+				bo += 4;
+				break;
+			}
+			if (bo >= bsize)
+				break;
+		}
+		updatefats(pmp, bp, bn);
+	}
+	pmp->pm_fmod = 1;
+	return (0);
+}
+
+/*
+ * Check the length of a free cluster chain starting at start.
+ *
+ * pmp	 - mount point
+ * start - start of chain
+ * count - maximum interesting length
+ */
+static int
+chainlength(pmp, start, count)
+	struct msdosfsmount *pmp;
+	u_long start;
+	u_long count;
+{
+	u_long idx, max_idx;
+	u_int map;
+	u_long len;
+
+	max_idx = pmp->pm_maxcluster / N_INUSEBITS;
+	idx = start / N_INUSEBITS;
+	start %= N_INUSEBITS;
+	map = pmp->pm_inusemap[idx];
+	map &= ~((1 << start) - 1);
+	if (map) {
+		len = ffs(map) - 1 - start;
+		return (len > count ? count : len);
+	}
+	len = N_INUSEBITS - start;
+	if (len >= count)
+		return (count);
+	while (++idx <= max_idx) {
+		if (len >= count)
+			break;
+		map = pmp->pm_inusemap[idx];
+		if (map) {
+			len +=  ffs(map) - 1;
+			break;
+		}
+		len += N_INUSEBITS;
+	}
+	return (len > count ? count : len);
+}
+
+/*
+ * Allocate contigous free clusters.
+ *
+ * pmp	      - mount point.
+ * start      - start of cluster chain.
+ * count      - number of clusters to allocate.
+ * fillwith   - put this value into the fat entry for the
+ *		last allocated cluster.
+ * retcluster - put the first allocated cluster's number here.
+ * got	      - how many clusters were actually allocated.
+ */
+static int
+chainalloc(pmp, start, count, fillwith, retcluster, got)
+	struct msdosfsmount *pmp;
+	u_long start;
+	u_long count;
+	u_long fillwith;
+	u_long *retcluster;
+	u_long *got;
+{
+	int error;
+	u_long cl, n;
+
+	for (cl = start, n = count; n-- > 0;)
+		usemap_alloc(pmp, cl++);
+
+	error = fatchain(pmp, start, count, fillwith);
+	if (error != 0)
+		return (error);
+#ifdef MSDOSFS_DEBUG
+	printf("clusteralloc(): allocated cluster chain at %lu (%lu clusters)\n",
+	    start, count);
+#endif
+	if (retcluster)
+		*retcluster = start;
+	if (got)
+		*got = count;
+	return (0);
+}
+
+/*
+ * Allocate contiguous free clusters.
+ *
+ * pmp	      - mount point.
+ * start      - preferred start of cluster chain.
+ * count      - number of clusters requested.
+ * fillwith   - put this value into the fat entry for the
+ *		last allocated cluster.
+ * retcluster - put the first allocated cluster's number here.
+ * got	      - how many clusters were actually allocated.
+ */
+int
+clusteralloc(pmp, start, count, fillwith, retcluster, got)
+	struct msdosfsmount *pmp;
+	u_long start;
+	u_long count;
+	u_long fillwith;
+	u_long *retcluster;
+	u_long *got;
+{
+	u_long idx;
+	u_long len, newst, foundl, cn, l;
+	u_long foundcn = 0; /* XXX: foundcn could be used unititialized */
+	u_int map;
+
+#ifdef MSDOSFS_DEBUG
+	printf("clusteralloc(): find %lu clusters\n",count);
+#endif
+	if (start) {
+		if ((len = chainlength(pmp, start, count)) >= count)
+			return (chainalloc(pmp, start, count, fillwith, retcluster, got));
+	} else 
+		len = 0;
+
+	/*
+	 * Start at a (pseudo) random place to maximize cluster runs
+	 * under multiple writers.
+	 */
+	newst = random() % (pmp->pm_maxcluster + 1);
+	foundl = 0;
+
+	for (cn = newst; cn <= pmp->pm_maxcluster;) {
+		idx = cn / N_INUSEBITS;
+		map = pmp->pm_inusemap[idx];
+		map |= (1 << (cn % N_INUSEBITS)) - 1;
+		if (map != (u_int)-1) {
+			cn = idx * N_INUSEBITS + ffs(map^(u_int)-1) - 1;
+			if ((l = chainlength(pmp, cn, count)) >= count)
+				return (chainalloc(pmp, cn, count, fillwith, retcluster, got));
+			if (l > foundl) {
+				foundcn = cn;
+				foundl = l;
+			}
+			cn += l + 1;
+			continue;
+		}
+		cn += N_INUSEBITS - cn % N_INUSEBITS;
+	}
+	for (cn = 0; cn < newst;) {
+		idx = cn / N_INUSEBITS;
+		map = pmp->pm_inusemap[idx];
+		map |= (1 << (cn % N_INUSEBITS)) - 1;
+		if (map != (u_int)-1) {
+			cn = idx * N_INUSEBITS + ffs(map^(u_int)-1) - 1;
+			if ((l = chainlength(pmp, cn, count)) >= count)
+				return (chainalloc(pmp, cn, count, fillwith, retcluster, got));
+			if (l > foundl) {
+				foundcn = cn;
+				foundl = l;
+			}
+			cn += l + 1;
+			continue;
+		}
+		cn += N_INUSEBITS - cn % N_INUSEBITS;
+	}
+
+	if (!foundl)
+		return (ENOSPC);
+
+	if (len)
+		return (chainalloc(pmp, start, len, fillwith, retcluster, got));
+	else
+		return (chainalloc(pmp, foundcn, foundl, fillwith, retcluster, got));
+}
+
+
+/*
+ * Free a chain of clusters.
+ *
+ * pmp		- address of the msdosfs mount structure for the filesystem
+ *		  containing the cluster chain to be freed.
+ * startcluster - number of the 1st cluster in the chain of clusters to be
+ *		  freed.
+ */
+int
+freeclusterchain(pmp, cluster)
+	struct msdosfsmount *pmp;
+	u_long cluster;
+{
+	int error;
+	struct buf *bp = NULL;
+	u_long bn, bo, bsize, byteoffset;
+	u_long readcn, lbn = -1;
+
+	while (cluster >= CLUST_FIRST && cluster <= pmp->pm_maxcluster) {
+		byteoffset = FATOFS(pmp, cluster);
+		fatblock(pmp, byteoffset, &bn, &bsize, &bo);
+		if (lbn != bn) {
+			if (bp)
+				updatefats(pmp, bp, lbn);
+			error = bread(pmp->pm_devvp, bn, bsize, NOCRED, &bp);
+			if (error) {
+				brelse(bp);
+				return (error);
+			}
+			lbn = bn;
+		}
+		usemap_free(pmp, cluster);
+		switch (pmp->pm_fatmask) {
+		case FAT12_MASK:
+			readcn = getushort(&bp->b_data[bo]);
+			if (cluster & 1) {
+				cluster = readcn >> 4;
+				readcn &= 0x000f;
+				readcn |= MSDOSFSFREE << 4;
+			} else {
+				cluster = readcn;
+				readcn &= 0xf000;
+				readcn |= MSDOSFSFREE & 0xfff;
+			}
+			putushort(&bp->b_data[bo], readcn);
+			break;
+		case FAT16_MASK:
+			cluster = getushort(&bp->b_data[bo]);
+			putushort(&bp->b_data[bo], MSDOSFSFREE);
+			break;
+		case FAT32_MASK:
+			cluster = getulong(&bp->b_data[bo]);
+			putulong(&bp->b_data[bo],
+				 (MSDOSFSFREE & FAT32_MASK) | (cluster & ~FAT32_MASK));
+			break;
+		}
+		cluster &= pmp->pm_fatmask;
+		if ((cluster | ~pmp->pm_fatmask) >= CLUST_RSRVD)
+			cluster |= pmp->pm_fatmask;
+	}
+	if (bp)
+		updatefats(pmp, bp, bn);
+	return (0);
+}
+
+/*
+ * Read in fat blocks looking for free clusters. For every free cluster
+ * found turn off its corresponding bit in the pm_inusemap.
+ */
+int
+fillinusemap(pmp)
+	struct msdosfsmount *pmp;
+{
+	struct buf *bp = NULL;
+	u_long cn, readcn;
+	int error;
+	u_long bn, bo, bsize, byteoffset;
+
+	/*
+	 * Mark all clusters in use, we mark the free ones in the fat scan
+	 * loop further down.
+	 */
+	for (cn = 0; cn < (pmp->pm_maxcluster + N_INUSEBITS) / N_INUSEBITS; cn++)
+		pmp->pm_inusemap[cn] = (u_int)-1;
+
+	/*
+	 * Figure how many free clusters are in the filesystem by ripping
+	 * through the fat counting the number of entries whose content is
+	 * zero.  These represent free clusters.
+	 */
+	pmp->pm_freeclustercount = 0;
+	for (cn = CLUST_FIRST; cn <= pmp->pm_maxcluster; cn++) {
+		byteoffset = FATOFS(pmp, cn);
+		bo = byteoffset % pmp->pm_fatblocksize;
+		if (!bo || !bp) {
+			/* Read new FAT block */
+			if (bp)
+				brelse(bp);
+			fatblock(pmp, byteoffset, &bn, &bsize, NULL);
+			error = bread(pmp->pm_devvp, bn, bsize, NOCRED, &bp);
+			if (error) {
+				brelse(bp);
+				return (error);
+			}
+		}
+		if (FAT32(pmp))
+			readcn = getulong(&bp->b_data[bo]);
+		else
+			readcn = getushort(&bp->b_data[bo]);
+		if (FAT12(pmp) && (cn & 1))
+			readcn >>= 4;
+		readcn &= pmp->pm_fatmask;
+
+		if (readcn == 0)
+			usemap_free(pmp, cn);
+	}
+	brelse(bp);
+	return (0);
+}
+
+/*
+ * Allocate a new cluster and chain it onto the end of the file.
+ *
+ * dep	 - the file to extend
+ * count - number of clusters to allocate
+ * bpp	 - where to return the address of the buf header for the first new
+ *	   file block
+ * ncp	 - where to put cluster number of the first newly allocated cluster
+ *	   If this pointer is 0, do not return the cluster number.
+ * flags - see fat.h
+ *
+ * NOTE: This function is not responsible for turning on the DE_UPDATE bit of
+ * the de_flag field of the denode and it does not change the de_FileSize
+ * field.  This is left for the caller to do.
+ */
+int
+extendfile(dep, count, bpp, ncp, flags)
+	struct denode *dep;
+	u_long count;
+	struct buf **bpp;
+	u_long *ncp;
+	int flags;
+{
+	int error;
+	u_long frcn;
+	u_long cn, got;
+	struct msdosfsmount *pmp = dep->de_pmp;
+	struct buf *bp;
+
+	/*
+	 * Don't try to extend the root directory
+	 */
+	if (dep->de_StartCluster == MSDOSFSROOT
+	    && (dep->de_Attributes & ATTR_DIRECTORY)) {
+		printf("extendfile(): attempt to extend root directory\n");
+		return (ENOSPC);
+	}
+
+	/*
+	 * If the "file's last cluster" cache entry is empty, and the file
+	 * is not empty, then fill the cache entry by calling pcbmap().
+	 */
+	fc_fileextends++;
+	if (dep->de_fc[FC_LASTFC].fc_frcn == FCE_EMPTY &&
+	    dep->de_StartCluster != 0) {
+		fc_lfcempty++;
+		error = pcbmap(dep, 0xffff, 0, &cn, 0);
+		/* we expect it to return E2BIG */
+		if (error != E2BIG)
+			return (error);
+	}
+
+	while (count > 0) {
+		/*
+		 * Allocate a new cluster chain and cat onto the end of the
+		 * file.  * If the file is empty we make de_StartCluster point
+		 * to the new block.  Note that de_StartCluster being 0 is
+		 * sufficient to be sure the file is empty since we exclude
+		 * attempts to extend the root directory above, and the root
+		 * dir is the only file with a startcluster of 0 that has
+		 * blocks allocated (sort of).
+		 */
+		if (dep->de_StartCluster == 0)
+			cn = 0;
+		else
+			cn = dep->de_fc[FC_LASTFC].fc_fsrcn + 1;
+		error = clusteralloc(pmp, cn, count, CLUST_EOFE, &cn, &got);
+		if (error)
+			return (error);
+
+		count -= got;
+
+		/*
+		 * Give them the filesystem relative cluster number if they want
+		 * it.
+		 */
+		if (ncp) {
+			*ncp = cn;
+			ncp = NULL;
+		}
+
+		if (dep->de_StartCluster == 0) {
+			dep->de_StartCluster = cn;
+			frcn = 0;
+		} else {
+			error = fatentry(FAT_SET, pmp,
+					 dep->de_fc[FC_LASTFC].fc_fsrcn,
+					 0, cn);
+			if (error) {
+				clusterfree(pmp, cn, NULL);
+				return (error);
+			}
+			frcn = dep->de_fc[FC_LASTFC].fc_frcn + 1;
+		}
+
+		/*
+		 * Update the "last cluster of the file" entry in the denode's fat
+		 * cache.
+		 */
+		fc_setcache(dep, FC_LASTFC, frcn + got - 1, cn + got - 1);
+
+		if (flags & DE_CLEAR) {
+			while (got-- > 0) {
+				/*
+				 * Get the buf header for the new block of the file.
+				 */
+				if (dep->de_Attributes & ATTR_DIRECTORY)
+					bp = getblk(pmp->pm_devvp, cntobn(pmp, cn++),
+						    pmp->pm_bpcluster, 0, 0);
+				else {
+					bp = getblk(DETOV(dep), de_cn2bn(pmp, frcn++),
+					    pmp->pm_bpcluster, 0, 0);
+					/*
+					 * Do the bmap now, as in msdosfs_write
+					 */
+					if (pcbmap(dep,
+					    de_bn2cn(pmp, bp->b_lblkno),
+					    &bp->b_blkno, 0, 0))
+						bp->b_blkno = -1;
+					if (bp->b_blkno == -1)
+						panic("extendfile: pcbmap");
+				}
+				clrbuf(bp);
+				if (bpp) {
+					*bpp = bp;
+					bpp = NULL;
+				} else
+					bdwrite(bp);
+			}
+		}
+	}
+
+	return (0);
+}
diff --git a/sys/fs/msdosfs/msdosfs_lookup.c b/sys/fs/msdosfs/msdosfs_lookup.c
new file mode 100644
index 0000000..87de1f2
--- /dev/null
+++ b/sys/fs/msdosfs/msdosfs_lookup.c
@@ -0,0 +1,1085 @@
+/*	$Id: msdosfs_lookup.c,v 1.27 1998/12/07 21:58:35 archie Exp $ */
+/*	$NetBSD: msdosfs_lookup.c,v 1.37 1997/11/17 15:36:54 ws Exp $	*/
+
+/*-
+ * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
+ * Copyright (C) 1994, 1995, 1997 TooLs GmbH.
+ * All rights reserved.
+ * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by TooLs GmbH.
+ * 4. The name of TooLs GmbH may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/*
+ * Written by Paul Popelka (paulp@uts.amdahl.com)
+ *
+ * You can do anything you want with this software, just don't say you wrote
+ * it, and don't remove this notice.
+ *
+ * This software is provided "as is".
+ *
+ * The author supplies this software to be publicly redistributed on the
+ * understanding that the author is not responsible for the correct
+ * functioning of this software in any circumstances and is not liable for
+ * any damages caused by this software.
+ *
+ * October 1992
+ */
+
+#include <sys/param.h>
+#include <sys/namei.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/systm.h>
+
+#include <msdosfs/bpb.h>
+#include <msdosfs/direntry.h>
+#include <msdosfs/denode.h>
+#include <msdosfs/msdosfsmount.h>
+#include <msdosfs/fat.h>
+
+/*
+ * When we search a directory the blocks containing directory entries are
+ * read and examined.  The directory entries contain information that would
+ * normally be in the inode of a unix filesystem.  This means that some of
+ * a directory's contents may also be in memory resident denodes (sort of
+ * an inode).  This can cause problems if we are searching while some other
+ * process is modifying a directory.  To prevent one process from accessing
+ * incompletely modified directory information we depend upon being the
+ * sole owner of a directory block.  bread/brelse provide this service.
+ * This being the case, when a process modifies a directory it must first
+ * acquire the disk block that contains the directory entry to be modified.
+ * Then update the disk block and the denode, and then write the disk block
+ * out to disk.  This way disk blocks containing directory entries and in
+ * memory denode's will be in synch.
+ */
+int
+msdosfs_lookup(ap)
+	struct vop_cachedlookup_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	struct vnode *vdp = ap->a_dvp;
+	struct vnode **vpp = ap->a_vpp;
+	struct componentname *cnp = ap->a_cnp;
+	daddr_t bn;
+	int error;
+	int lockparent;
+	int wantparent;
+	int slotcount;
+	int slotoffset = 0;
+	int frcn;
+	u_long cluster;
+	int blkoff;
+	int diroff;
+	int blsize;
+	int isadir;		/* ~0 if found direntry is a directory	 */
+	u_long scn;		/* starting cluster number		 */
+	struct vnode *pdp;
+	struct denode *dp;
+	struct denode *tdp;
+	struct msdosfsmount *pmp;
+	struct buf *bp = 0;
+	struct direntry *dep = NULL;
+	u_char dosfilename[12];
+	int flags = cnp->cn_flags;
+	int nameiop = cnp->cn_nameiop;
+	struct proc *p = cnp->cn_proc;
+	int unlen;
+
+	int wincnt = 1;
+	int chksum = -1;
+	int olddos = 1;
+
+#ifdef MSDOSFS_DEBUG
+	printf("msdosfs_lookup(): looking for %s\n", cnp->cn_nameptr);
+#endif
+	dp = VTODE(vdp);
+	pmp = dp->de_pmp;
+	*vpp = NULL;
+	lockparent = flags & LOCKPARENT;
+	wantparent = flags & (LOCKPARENT | WANTPARENT);
+#ifdef MSDOSFS_DEBUG
+	printf("msdosfs_lookup(): vdp %p, dp %p, Attr %02x\n",
+	    vdp, dp, dp->de_Attributes);
+#endif
+
+	/*
+	 * If they are going after the . or .. entry in the root directory,
+	 * they won't find it.  DOS filesystems don't have them in the root
+	 * directory.  So, we fake it. deget() is in on this scam too.
+	 */
+	if ((vdp->v_flag & VROOT) && cnp->cn_nameptr[0] == '.' &&
+	    (cnp->cn_namelen == 1 ||
+		(cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.'))) {
+		isadir = ATTR_DIRECTORY;
+		scn = MSDOSFSROOT;
+#ifdef MSDOSFS_DEBUG
+		printf("msdosfs_lookup(): looking for . or .. in root directory\n");
+#endif
+		cluster = MSDOSFSROOT;
+		blkoff = MSDOSFSROOT_OFS;
+		goto foundroot;
+	}
+
+	switch (unix2dosfn((const u_char *)cnp->cn_nameptr, dosfilename,
+	    cnp->cn_namelen, 0,
+	    pmp->pm_flags & MSDOSFSMNT_U2WTABLE, pmp->pm_u2d,
+	    pmp->pm_flags & MSDOSFSMNT_ULTABLE, pmp->pm_lu)) {
+	case 0:
+		return (EINVAL);
+	case 1:
+		break;
+	case 2:
+		wincnt = winSlotCnt((const u_char *)cnp->cn_nameptr,
+		    cnp->cn_namelen) + 1;
+		break;
+	case 3:
+		olddos = 0;
+		wincnt = winSlotCnt((const u_char *)cnp->cn_nameptr,
+		    cnp->cn_namelen) + 1;
+		break;
+	}
+	if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME) {
+		wincnt = 1;
+		olddos = 1;
+	}
+	unlen = winLenFixup(cnp->cn_nameptr, cnp->cn_namelen);
+
+	/*
+	 * Suppress search for slots unless creating
+	 * file and at end of pathname, in which case
+	 * we watch for a place to put the new file in
+	 * case it doesn't already exist.
+	 */
+	slotcount = wincnt;
+	if ((nameiop == CREATE || nameiop == RENAME) &&
+	    (flags & ISLASTCN))
+		slotcount = 0;
+
+#ifdef MSDOSFS_DEBUG
+	printf("msdosfs_lookup(): dos version of filename %s, length %ld\n",
+	    dosfilename, cnp->cn_namelen);
+#endif
+	/*
+	 * Search the directory pointed at by vdp for the name pointed at
+	 * by cnp->cn_nameptr.
+	 */
+	tdp = NULL;
+	/*
+	 * The outer loop ranges over the clusters that make up the
+	 * directory.  Note that the root directory is different from all
+	 * other directories.  It has a fixed number of blocks that are not
+	 * part of the pool of allocatable clusters.  So, we treat it a
+	 * little differently. The root directory starts at "cluster" 0.
+	 */
+	diroff = 0;
+	for (frcn = 0;; frcn++) {
+		error = pcbmap(dp, frcn, &bn, &cluster, &blsize);
+		if (error) {
+			if (error == E2BIG)
+				break;
+			return (error);
+		}
+		error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp);
+		if (error) {
+			brelse(bp);
+			return (error);
+		}
+		for (blkoff = 0; blkoff < blsize;
+		     blkoff += sizeof(struct direntry),
+		     diroff += sizeof(struct direntry)) {
+			dep = (struct direntry *)(bp->b_data + blkoff);
+			/*
+			 * If the slot is empty and we are still looking
+			 * for an empty then remember this one.  If the
+			 * slot is not empty then check to see if it
+			 * matches what we are looking for.  If the slot
+			 * has never been filled with anything, then the
+			 * remainder of the directory has never been used,
+			 * so there is no point in searching it.
+			 */
+			if (dep->deName[0] == SLOT_EMPTY ||
+			    dep->deName[0] == SLOT_DELETED) {
+				/*
+				 * Drop memory of previous long matches
+				 */
+				chksum = -1;
+
+				if (slotcount < wincnt) {
+					slotcount++;
+					slotoffset = diroff;
+				}
+				if (dep->deName[0] == SLOT_EMPTY) {
+					brelse(bp);
+					goto notfound;
+				}
+			} else {
+				/*
+				 * If there wasn't enough space for our winentries,
+				 * forget about the empty space
+				 */
+				if (slotcount < wincnt)
+					slotcount = 0;
+
+				/*
+				 * Check for Win95 long filename entry
+				 */
+				if (dep->deAttributes == ATTR_WIN95) {
+					if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME)
+						continue;
+
+					chksum = winChkName((const u_char *)cnp->cn_nameptr,
+							    unlen,
+							    (struct winentry *)dep,
+							    chksum,
+							    pmp->pm_flags & MSDOSFSMNT_U2WTABLE,
+							    pmp->pm_u2w,
+							    pmp->pm_flags & MSDOSFSMNT_ULTABLE,
+							    pmp->pm_ul);
+					continue;
+				}
+
+				/*
+				 * Ignore volume labels (anywhere, not just
+				 * the root directory).
+				 */
+				if (dep->deAttributes & ATTR_VOLUME) {
+					chksum = -1;
+					continue;
+				}
+
+				/*
+				 * Check for a checksum or name match
+				 */
+				if (chksum != winChksum(dep->deName)
+				    && (!olddos || bcmp(dosfilename, dep->deName, 11))) {
+					chksum = -1;
+					continue;
+				}
+#ifdef MSDOSFS_DEBUG
+				printf("msdosfs_lookup(): match blkoff %d, diroff %d\n",
+				    blkoff, diroff);
+#endif
+				/*
+				 * Remember where this directory
+				 * entry came from for whoever did
+				 * this lookup.
+				 */
+				dp->de_fndoffset = diroff;
+				dp->de_fndcnt = wincnt - 1;
+
+				goto found;
+			}
+		}	/* for (blkoff = 0; .... */
+		/*
+		 * Release the buffer holding the directory cluster just
+		 * searched.
+		 */
+		brelse(bp);
+	}	/* for (frcn = 0; ; frcn++) */
+
+notfound:
+	/*
+	 * We hold no disk buffers at this point.
+	 */
+
+	/*
+	 * Fixup the slot description to point to the place where
+	 * we might put the new DOS direntry (putting the Win95
+	 * long name entries before that)
+	 */
+	if (!slotcount) {
+		slotcount = 1;
+		slotoffset = diroff;
+	}
+	if (wincnt > slotcount)
+		slotoffset += sizeof(struct direntry) * (wincnt - slotcount);
+
+	/*
+	 * If we get here we didn't find the entry we were looking for. But
+	 * that's ok if we are creating or renaming and are at the end of
+	 * the pathname and the directory hasn't been removed.
+	 */
+#ifdef MSDOSFS_DEBUG
+	printf("msdosfs_lookup(): op %d, refcnt %ld\n",
+	    nameiop, dp->de_refcnt);
+	printf("               slotcount %d, slotoffset %d\n",
+	       slotcount, slotoffset);
+#endif
+	if ((nameiop == CREATE || nameiop == RENAME) &&
+	    (flags & ISLASTCN) && dp->de_refcnt != 0) {
+		/*
+		 * Access for write is interpreted as allowing
+		 * creation of files in the directory.
+		 */
+		error = VOP_ACCESS(vdp, VWRITE, cnp->cn_cred, cnp->cn_proc);
+		if (error)
+			return (error);
+		/*
+		 * Return an indication of where the new directory
+		 * entry should be put.
+		 */
+		dp->de_fndoffset = slotoffset;
+		dp->de_fndcnt = wincnt - 1;
+
+		/*
+		 * We return with the directory locked, so that
+		 * the parameters we set up above will still be
+		 * valid if we actually decide to do a direnter().
+		 * We return ni_vp == NULL to indicate that the entry
+		 * does not currently exist; we leave a pointer to
+		 * the (locked) directory inode in ndp->ni_dvp.
+		 * The pathname buffer is saved so that the name
+		 * can be obtained later.
+		 *
+		 * NB - if the directory is unlocked, then this
+		 * information cannot be used.
+		 */
+		cnp->cn_flags |= SAVENAME;
+		if (!lockparent)
+			VOP_UNLOCK(vdp, 0, p);
+		return (EJUSTRETURN);
+	}
+	/*
+	 * Insert name into cache (as non-existent) if appropriate.
+	 */
+	if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE)
+		cache_enter(vdp, *vpp, cnp);
+	return (ENOENT);
+
+found:
+	/*
+	 * NOTE:  We still have the buffer with matched directory entry at
+	 * this point.
+	 */
+	isadir = dep->deAttributes & ATTR_DIRECTORY;
+	scn = getushort(dep->deStartCluster);
+	if (FAT32(pmp)) {
+		scn |= getushort(dep->deHighClust) << 16;
+		if (scn == pmp->pm_rootdirblk) {
+			/*
+			 * There should actually be 0 here.
+			 * Just ignore the error.
+			 */
+			scn = MSDOSFSROOT;
+		}
+	}
+
+	if (isadir) {
+		cluster = scn;
+		if (cluster == MSDOSFSROOT)
+			blkoff = MSDOSFSROOT_OFS;
+		else
+			blkoff = 0;
+	} else if (cluster == MSDOSFSROOT)
+		blkoff = diroff;
+
+	/*
+	 * Now release buf to allow deget to read the entry again.
+	 * Reserving it here and giving it to deget could result
+	 * in a deadlock.
+	 */
+	brelse(bp);
+	bp = 0;
+	
+foundroot:
+	/*
+	 * If we entered at foundroot, then we are looking for the . or ..
+	 * entry of the filesystems root directory.  isadir and scn were
+	 * setup before jumping here.  And, bp is already null.
+	 */
+	if (FAT32(pmp) && scn == MSDOSFSROOT)
+		scn = pmp->pm_rootdirblk;
+
+	/*
+	 * If deleting, and at end of pathname, return
+	 * parameters which can be used to remove file.
+	 * If the wantparent flag isn't set, we return only
+	 * the directory (in ndp->ni_dvp), otherwise we go
+	 * on and lock the inode, being careful with ".".
+	 */
+	if (nameiop == DELETE && (flags & ISLASTCN)) {
+		/*
+		 * Don't allow deleting the root.
+		 */
+		if (blkoff == MSDOSFSROOT_OFS)
+			return EROFS;				/* really? XXX */
+
+		/*
+		 * Write access to directory required to delete files.
+		 */
+		error = VOP_ACCESS(vdp, VWRITE, cnp->cn_cred, cnp->cn_proc);
+		if (error)
+			return (error);
+
+		/*
+		 * Return pointer to current entry in dp->i_offset.
+		 * Save directory inode pointer in ndp->ni_dvp for dirremove().
+		 */
+		if (dp->de_StartCluster == scn && isadir) {	/* "." */
+			VREF(vdp);
+			*vpp = vdp;
+			return (0);
+		}
+		error = deget(pmp, cluster, blkoff, &tdp);
+		if (error)
+			return (error);
+		*vpp = DETOV(tdp);
+		if (!lockparent)
+			VOP_UNLOCK(vdp, 0, p);
+		return (0);
+	}
+
+	/*
+	 * If rewriting (RENAME), return the inode and the
+	 * information required to rewrite the present directory
+	 * Must get inode of directory entry to verify it's a
+	 * regular file, or empty directory.
+	 */
+	if (nameiop == RENAME && wantparent &&
+	    (flags & ISLASTCN)) {
+		if (blkoff == MSDOSFSROOT_OFS)
+			return EROFS;				/* really? XXX */
+
+		error = VOP_ACCESS(vdp, VWRITE, cnp->cn_cred, cnp->cn_proc);
+		if (error)
+			return (error);
+
+		/*
+		 * Careful about locking second inode.
+		 * This can only occur if the target is ".".
+		 */
+		if (dp->de_StartCluster == scn && isadir)
+			return (EISDIR);
+
+		if ((error = deget(pmp, cluster, blkoff, &tdp)) != 0)
+			return (error);
+		*vpp = DETOV(tdp);
+		cnp->cn_flags |= SAVENAME;
+		if (!lockparent)
+			VOP_UNLOCK(vdp, 0, p);
+		return (0);
+	}
+
+	/*
+	 * Step through the translation in the name.  We do not `vput' the
+	 * directory because we may need it again if a symbolic link
+	 * is relative to the current directory.  Instead we save it
+	 * unlocked as "pdp".  We must get the target inode before unlocking
+	 * the directory to insure that the inode will not be removed
+	 * before we get it.  We prevent deadlock by always fetching
+	 * inodes from the root, moving down the directory tree. Thus
+	 * when following backward pointers ".." we must unlock the
+	 * parent directory before getting the requested directory.
+	 * There is a potential race condition here if both the current
+	 * and parent directories are removed before the VFS_VGET for the
+	 * inode associated with ".." returns.  We hope that this occurs
+	 * infrequently since we cannot avoid this race condition without
+	 * implementing a sophisticated deadlock detection algorithm.
+	 * Note also that this simple deadlock detection scheme will not
+	 * work if the file system has any hard links other than ".."
+	 * that point backwards in the directory structure.
+	 */
+	pdp = vdp;
+	if (flags & ISDOTDOT) {
+		VOP_UNLOCK(pdp, 0, p);
+		error = deget(pmp, cluster, blkoff,  &tdp);
+		if (error) {
+			vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, p); 
+			return (error);
+		}
+		if (lockparent && (flags & ISLASTCN) &&
+		    (error = vn_lock(pdp, LK_EXCLUSIVE, p))) {
+			vput(DETOV(tdp));
+			return (error);
+		}
+		*vpp = DETOV(tdp);
+	} else if (dp->de_StartCluster == scn && isadir) {
+		VREF(vdp);	/* we want ourself, ie "." */
+		*vpp = vdp;
+	} else {
+		if ((error = deget(pmp, cluster, blkoff, &tdp)) != 0)
+			return (error);
+		if (!lockparent || !(flags & ISLASTCN))
+			VOP_UNLOCK(pdp, 0, p);
+		*vpp = DETOV(tdp);
+	}
+
+	/*
+	 * Insert name into cache if appropriate.
+	 */
+	if (cnp->cn_flags & MAKEENTRY)
+		cache_enter(vdp, *vpp, cnp);
+	return (0);
+}
+
+/*
+ * dep  - directory entry to copy into the directory
+ * ddep - directory to add to
+ * depp - return the address of the denode for the created directory entry
+ *	  if depp != 0
+ * cnp  - componentname needed for Win95 long filenames
+ */
+int
+createde(dep, ddep, depp, cnp)
+	struct denode *dep;
+	struct denode *ddep;
+	struct denode **depp;
+	struct componentname *cnp;
+{
+	int error;
+	u_long dirclust, diroffset;
+	struct direntry *ndep;
+	struct msdosfsmount *pmp = ddep->de_pmp;
+	struct buf *bp;
+	daddr_t bn;
+	int blsize;
+
+#ifdef MSDOSFS_DEBUG
+	printf("createde(dep %p, ddep %p, depp %p, cnp %p)\n",
+	    dep, ddep, depp, cnp);
+#endif
+
+	/*
+	 * If no space left in the directory then allocate another cluster
+	 * and chain it onto the end of the file.  There is one exception
+	 * to this.  That is, if the root directory has no more space it
+	 * can NOT be expanded.  extendfile() checks for and fails attempts
+	 * to extend the root directory.  We just return an error in that
+	 * case.
+	 */
+	if (ddep->de_fndoffset >= ddep->de_FileSize) {
+		diroffset = ddep->de_fndoffset + sizeof(struct direntry)
+		    - ddep->de_FileSize;
+		dirclust = de_clcount(pmp, diroffset);
+		error = extendfile(ddep, dirclust, 0, 0, DE_CLEAR);
+		if (error) {
+			(void)detrunc(ddep, ddep->de_FileSize, 0, NOCRED, NULL);
+			return error;
+		}
+
+		/*
+		 * Update the size of the directory
+		 */
+		ddep->de_FileSize += de_cn2off(pmp, dirclust);
+	}
+
+	/*
+	 * We just read in the cluster with space.  Copy the new directory
+	 * entry in.  Then write it to disk. NOTE:  DOS directories
+	 * do not get smaller as clusters are emptied.
+	 */
+	error = pcbmap(ddep, de_cluster(pmp, ddep->de_fndoffset),
+		       &bn, &dirclust, &blsize);
+	if (error)
+		return error;
+	diroffset = ddep->de_fndoffset;
+	if (dirclust != MSDOSFSROOT)
+		diroffset &= pmp->pm_crbomask;
+	if ((error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp)) != 0) {
+		brelse(bp);
+		return error;
+	}
+	ndep = bptoep(pmp, bp, ddep->de_fndoffset);
+
+	DE_EXTERNALIZE(ndep, dep);
+
+	/*
+	 * Now write the Win95 long name
+	 */
+	if (ddep->de_fndcnt > 0) {
+		u_int8_t chksum = winChksum(ndep->deName);
+		const u_char *un = (const u_char *)cnp->cn_nameptr;
+		int unlen = cnp->cn_namelen;
+		int cnt = 1;
+
+		while (--ddep->de_fndcnt >= 0) {
+			if (!(ddep->de_fndoffset & pmp->pm_crbomask)) {
+				if ((error = bwrite(bp)) != 0)
+					return error;
+
+				ddep->de_fndoffset -= sizeof(struct direntry);
+				error = pcbmap(ddep,
+					       de_cluster(pmp,
+							  ddep->de_fndoffset),
+					       &bn, 0, &blsize);
+				if (error)
+					return error;
+
+				error = bread(pmp->pm_devvp, bn, blsize,
+					      NOCRED, &bp);
+				if (error) {
+					brelse(bp);
+					return error;
+				}
+				ndep = bptoep(pmp, bp, ddep->de_fndoffset);
+			} else {
+				ndep--;
+				ddep->de_fndoffset -= sizeof(struct direntry);
+			}
+			if (!unix2winfn(un, unlen, (struct winentry *)ndep,
+					cnt++, chksum,
+					pmp->pm_flags & MSDOSFSMNT_U2WTABLE,
+					pmp->pm_u2w))
+				break;
+		}
+	}
+
+	if ((error = bwrite(bp)) != 0)
+		return error;
+
+	/*
+	 * If they want us to return with the denode gotten.
+	 */
+	if (depp) {
+		if (dep->de_Attributes & ATTR_DIRECTORY) {
+			dirclust = dep->de_StartCluster;
+			if (FAT32(pmp) && dirclust == pmp->pm_rootdirblk)
+				dirclust = MSDOSFSROOT;
+			if (dirclust == MSDOSFSROOT)
+				diroffset = MSDOSFSROOT_OFS;
+			else
+				diroffset = 0;
+		}
+		return deget(pmp, dirclust, diroffset, depp);
+	}
+
+	return 0;
+}
+
+/*
+ * Be sure a directory is empty except for "." and "..". Return 1 if empty,
+ * return 0 if not empty or error.
+ */
+int
+dosdirempty(dep)
+	struct denode *dep;
+{
+	int blsize;
+	int error;
+	u_long cn;
+	daddr_t bn;
+	struct buf *bp;
+	struct msdosfsmount *pmp = dep->de_pmp;
+	struct direntry *dentp;
+
+	/*
+	 * Since the filesize field in directory entries for a directory is
+	 * zero, we just have to feel our way through the directory until
+	 * we hit end of file.
+	 */
+	for (cn = 0;; cn++) {
+		if ((error = pcbmap(dep, cn, &bn, 0, &blsize)) != 0) {
+			if (error == E2BIG)
+				return (1);	/* it's empty */
+			return (0);
+		}
+		error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp);
+		if (error) {
+			brelse(bp);
+			return (0);
+		}
+		for (dentp = (struct direntry *)bp->b_data;
+		     (char *)dentp < bp->b_data + blsize;
+		     dentp++) {
+			if (dentp->deName[0] != SLOT_DELETED &&
+			    (dentp->deAttributes & ATTR_VOLUME) == 0) {
+				/*
+				 * In dos directories an entry whose name
+				 * starts with SLOT_EMPTY (0) starts the
+				 * beginning of the unused part of the
+				 * directory, so we can just return that it
+				 * is empty.
+				 */
+				if (dentp->deName[0] == SLOT_EMPTY) {
+					brelse(bp);
+					return (1);
+				}
+				/*
+				 * Any names other than "." and ".." in a
+				 * directory mean it is not empty.
+				 */
+				if (bcmp(dentp->deName, ".          ", 11) &&
+				    bcmp(dentp->deName, "..         ", 11)) {
+					brelse(bp);
+#ifdef MSDOSFS_DEBUG
+					printf("dosdirempty(): entry found %02x, %02x\n",
+					    dentp->deName[0], dentp->deName[1]);
+#endif
+					return (0);	/* not empty */
+				}
+			}
+		}
+		brelse(bp);
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * Check to see if the directory described by target is in some
+ * subdirectory of source.  This prevents something like the following from
+ * succeeding and leaving a bunch or files and directories orphaned. mv
+ * /a/b/c /a/b/c/d/e/f Where c and f are directories.
+ *
+ * source - the inode for /a/b/c
+ * target - the inode for /a/b/c/d/e/f
+ *
+ * Returns 0 if target is NOT a subdirectory of source.
+ * Otherwise returns a non-zero error number.
+ * The target inode is always unlocked on return.
+ */
+int
+doscheckpath(source, target)
+	struct denode *source;
+	struct denode *target;
+{
+	daddr_t scn;
+	struct msdosfsmount *pmp;
+	struct direntry *ep;
+	struct denode *dep;
+	struct buf *bp = NULL;
+	int error = 0;
+
+	dep = target;
+	if ((target->de_Attributes & ATTR_DIRECTORY) == 0 ||
+	    (source->de_Attributes & ATTR_DIRECTORY) == 0) {
+		error = ENOTDIR;
+		goto out;
+	}
+	if (dep->de_StartCluster == source->de_StartCluster) {
+		error = EEXIST;
+		goto out;
+	}
+	if (dep->de_StartCluster == MSDOSFSROOT)
+		goto out;
+	pmp = dep->de_pmp;
+#ifdef	DIAGNOSTIC
+	if (pmp != source->de_pmp)
+		panic("doscheckpath: source and target on different filesystems");
+#endif
+	if (FAT32(pmp) && dep->de_StartCluster == pmp->pm_rootdirblk)
+		goto out;
+
+	for (;;) {
+		if ((dep->de_Attributes & ATTR_DIRECTORY) == 0) {
+			error = ENOTDIR;
+			break;
+		}
+		scn = dep->de_StartCluster;
+		error = bread(pmp->pm_devvp, cntobn(pmp, scn),
+			      pmp->pm_bpcluster, NOCRED, &bp);
+		if (error)
+			break;
+
+		ep = (struct direntry *) bp->b_data + 1;
+		if ((ep->deAttributes & ATTR_DIRECTORY) == 0 ||
+		    bcmp(ep->deName, "..         ", 11) != 0) {
+			error = ENOTDIR;
+			break;
+		}
+		scn = getushort(ep->deStartCluster);
+		if (FAT32(pmp))
+			scn |= getushort(ep->deHighClust) << 16;
+
+		if (scn == source->de_StartCluster) {
+			error = EINVAL;
+			break;
+		}
+		if (scn == MSDOSFSROOT)
+			break;
+		if (FAT32(pmp) && scn == pmp->pm_rootdirblk) {
+			/*
+			 * scn should be 0 in this case,
+			 * but we silently ignore the error.
+			 */
+			break;
+		}
+
+		vput(DETOV(dep));
+		brelse(bp);
+		bp = NULL;
+		/* NOTE: deget() clears dep on error */
+		if ((error = deget(pmp, scn, 0, &dep)) != 0)
+			break;
+	}
+out:;
+	if (bp)
+		brelse(bp);
+	if (error == ENOTDIR)
+		printf("doscheckpath(): .. not a directory?\n");
+	if (dep != NULL)
+		vput(DETOV(dep));
+	return (error);
+}
+
+/*
+ * Read in the disk block containing the directory entry (dirclu, dirofs)
+ * and return the address of the buf header, and the address of the
+ * directory entry within the block.
+ */
+int
+readep(pmp, dirclust, diroffset, bpp, epp)
+	struct msdosfsmount *pmp;
+	u_long dirclust, diroffset;
+	struct buf **bpp;
+	struct direntry **epp;
+{
+	int error;
+	daddr_t bn;
+	int blsize;
+
+	blsize = pmp->pm_bpcluster;
+	if (dirclust == MSDOSFSROOT
+	    && de_blk(pmp, diroffset + blsize) > pmp->pm_rootdirsize)
+		blsize = de_bn2off(pmp, pmp->pm_rootdirsize) & pmp->pm_crbomask;
+	bn = detobn(pmp, dirclust, diroffset);
+	if ((error = bread(pmp->pm_devvp, bn, blsize, NOCRED, bpp)) != 0) {
+		brelse(*bpp);
+		*bpp = NULL;
+		return (error);
+	}
+	if (epp)
+		*epp = bptoep(pmp, *bpp, diroffset);
+	return (0);
+}
+
+/*
+ * Read in the disk block containing the directory entry dep came from and
+ * return the address of the buf header, and the address of the directory
+ * entry within the block.
+ */
+int
+readde(dep, bpp, epp)
+	struct denode *dep;
+	struct buf **bpp;
+	struct direntry **epp;
+{
+
+	return (readep(dep->de_pmp, dep->de_dirclust, dep->de_diroffset,
+	    bpp, epp));
+}
+
+/*
+ * Remove a directory entry. At this point the file represented by the
+ * directory entry to be removed is still full length until noone has it
+ * open.  When the file no longer being used msdosfs_inactive() is called
+ * and will truncate the file to 0 length.  When the vnode containing the
+ * denode is needed for some other purpose by VFS it will call
+ * msdosfs_reclaim() which will remove the denode from the denode cache.
+ */
+int
+removede(pdep, dep)
+	struct denode *pdep;	/* directory where the entry is removed */
+	struct denode *dep;	/* file to be removed */
+{
+	int error;
+	struct direntry *ep;
+	struct buf *bp;
+	daddr_t bn;
+	int blsize;
+	struct msdosfsmount *pmp = pdep->de_pmp;
+	u_long offset = pdep->de_fndoffset;
+
+#ifdef MSDOSFS_DEBUG
+	printf("removede(): filename %s, dep %p, offset %08lx\n",
+	    dep->de_Name, dep, offset);
+#endif
+
+	dep->de_refcnt--;
+	offset += sizeof(struct direntry);
+	do {
+		offset -= sizeof(struct direntry);
+		error = pcbmap(pdep, de_cluster(pmp, offset), &bn, 0, &blsize);
+		if (error)
+			return error;
+		error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp);
+		if (error) {
+			brelse(bp);
+			return error;
+		}
+		ep = bptoep(pmp, bp, offset);
+		/*
+		 * Check whether, if we came here the second time, i.e.
+		 * when underflowing into the previous block, the last
+		 * entry in this block is a longfilename entry, too.
+		 */
+		if (ep->deAttributes != ATTR_WIN95
+		    && offset != pdep->de_fndoffset) {
+			brelse(bp);
+			break;
+		}
+		offset += sizeof(struct direntry);
+		while (1) {
+			/*
+			 * We are a bit agressive here in that we delete any Win95
+			 * entries preceding this entry, not just the ones we "own".
+			 * Since these presumably aren't valid anyway,
+			 * there should be no harm.
+			 */
+			offset -= sizeof(struct direntry);
+			ep--->deName[0] = SLOT_DELETED;
+			if ((pmp->pm_flags & MSDOSFSMNT_NOWIN95)
+			    || !(offset & pmp->pm_crbomask)
+			    || ep->deAttributes != ATTR_WIN95)
+				break;
+		}
+		if ((error = bwrite(bp)) != 0)
+			return error;
+	} while (!(pmp->pm_flags & MSDOSFSMNT_NOWIN95)
+	    && !(offset & pmp->pm_crbomask)
+	    && offset);
+	return 0;
+}
+
+/*
+ * Create a unique DOS name in dvp
+ */
+int
+uniqdosname(dep, cnp, cp)
+	struct denode *dep;
+	struct componentname *cnp;
+	u_char *cp;
+{
+	struct msdosfsmount *pmp = dep->de_pmp;
+	struct direntry *dentp;
+	int gen;
+	int blsize;
+	u_long cn;
+	daddr_t bn;
+	struct buf *bp;
+	int error;
+	
+	if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME)
+		return (unix2dosfn((const u_char *)cnp->cn_nameptr, cp,
+		    cnp->cn_namelen, 0,
+		    pmp->pm_flags & MSDOSFSMNT_U2WTABLE, pmp->pm_u2d,
+		    pmp->pm_flags & MSDOSFSMNT_ULTABLE, pmp->pm_lu) ?
+		    0 : EINVAL);
+
+	for (gen = 1;; gen++) {
+		/*
+		 * Generate DOS name with generation number
+		 */
+		if (!unix2dosfn((const u_char *)cnp->cn_nameptr, cp,
+		    cnp->cn_namelen, gen,
+		    pmp->pm_flags & MSDOSFSMNT_U2WTABLE, pmp->pm_u2d,
+		    pmp->pm_flags & MSDOSFSMNT_ULTABLE, pmp->pm_lu))
+			return gen == 1 ? EINVAL : EEXIST;
+
+		/*
+		 * Now look for a dir entry with this exact name
+		 */
+		for (cn = error = 0; !error; cn++) {
+			if ((error = pcbmap(dep, cn, &bn, 0, &blsize)) != 0) {
+				if (error == E2BIG)	/* EOF reached and not found */
+					return 0;
+				return error;
+			}
+			error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp);
+			if (error) {
+				brelse(bp);
+				return error;
+			}
+			for (dentp = (struct direntry *)bp->b_data;
+			     (char *)dentp < bp->b_data + blsize;
+			     dentp++) {
+				if (dentp->deName[0] == SLOT_EMPTY) {
+					/*
+					 * Last used entry and not found
+					 */
+					brelse(bp);
+					return 0;
+				}
+				/*
+				 * Ignore volume labels and Win95 entries
+				 */
+				if (dentp->deAttributes & ATTR_VOLUME)
+					continue;
+				if (!bcmp(dentp->deName, cp, 11)) {
+					error = EEXIST;
+					break;
+				}
+			}
+			brelse(bp);
+		}
+	}
+}
+
+/*
+ * Find any Win'95 long filename entry in directory dep
+ */
+int
+findwin95(dep)
+	struct denode *dep;
+{
+	struct msdosfsmount *pmp = dep->de_pmp;
+	struct direntry *dentp;
+	int blsize, win95;
+	u_long cn;
+	daddr_t bn;
+	struct buf *bp;
+
+	win95 = 1;
+	/*
+	 * Read through the directory looking for Win'95 entries
+	 * Note: Error currently handled just as EOF			XXX
+	 */
+	for (cn = 0;; cn++) {
+		if (pcbmap(dep, cn, &bn, 0, &blsize))
+			return (win95);
+		if (bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp)) {
+			brelse(bp);
+			return (win95);
+		}
+		for (dentp = (struct direntry *)bp->b_data;
+		     (char *)dentp < bp->b_data + blsize;
+		     dentp++) {
+			if (dentp->deName[0] == SLOT_EMPTY) {
+				/*
+				 * Last used entry and not found
+				 */
+				brelse(bp);
+				return (win95);
+			}
+			if (dentp->deName[0] == SLOT_DELETED) {
+				/*
+				 * Ignore deleted files
+				 * Note: might be an indication of Win'95 anyway	XXX
+				 */
+				continue;
+			}
+			if (dentp->deAttributes == ATTR_WIN95) {
+				brelse(bp);
+				return 1;
+			}
+			win95 = 0;
+		}
+		brelse(bp);
+	}
+}
diff --git a/sys/fs/msdosfs/msdosfs_vfsops.c b/sys/fs/msdosfs/msdosfs_vfsops.c
new file mode 100644
index 0000000..bca552c
--- /dev/null
+++ b/sys/fs/msdosfs/msdosfs_vfsops.c
@@ -0,0 +1,1019 @@
+/*	$Id: msdosfs_vfsops.c,v 1.39 1998/12/07 21:58:35 archie Exp $ */
+/*	$NetBSD: msdosfs_vfsops.c,v 1.51 1997/11/17 15:36:58 ws Exp $	*/
+
+/*-
+ * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
+ * Copyright (C) 1994, 1995, 1997 TooLs GmbH.
+ * All rights reserved.
+ * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by TooLs GmbH.
+ * 4. The name of TooLs GmbH may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/*
+ * Written by Paul Popelka (paulp@uts.amdahl.com)
+ *
+ * You can do anything you want with this software, just don't say you wrote
+ * it, and don't remove this notice.
+ *
+ * This software is provided "as is".
+ *
+ * The author supplies this software to be publicly redistributed on the
+ * understanding that the author is not responsible for the correct
+ * functioning of this software in any circumstances and is not liable for
+ * any damages caused by this software.
+ *
+ * October 1992
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/namei.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+#include <miscfs/specfs/specdev.h> /* XXX */	/* defines v_rdev */
+#include <sys/mount.h>
+#include <sys/buf.h>
+#include <sys/fcntl.h>
+#include <sys/malloc.h>
+#include <sys/stat.h> 				/* defines ALLPERMS */
+
+#include <msdosfs/bpb.h>
+#include <msdosfs/bootsect.h>
+#include <msdosfs/direntry.h>
+#include <msdosfs/denode.h>
+#include <msdosfs/msdosfsmount.h>
+#include <msdosfs/fat.h>
+
+MALLOC_DEFINE(M_MSDOSFSMNT, "MSDOSFS mount", "MSDOSFS mount structure");
+static MALLOC_DEFINE(M_MSDOSFSFAT, "MSDOSFS FAT", "MSDOSFS file allocation table");
+
+static int	update_mp __P((struct mount *mp, struct msdosfs_args *argp));
+static int	mountmsdosfs __P((struct vnode *devvp, struct mount *mp,
+				  struct proc *p, struct msdosfs_args *argp));
+static int	msdosfs_fhtovp __P((struct mount *, struct fid *,
+				    struct sockaddr *, struct vnode **, int *,
+				    struct ucred **));
+static int	msdosfs_mount __P((struct mount *, char *, caddr_t,
+				   struct nameidata *, struct proc *));
+static int	msdosfs_quotactl __P((struct mount *, int, uid_t, caddr_t,
+				      struct proc *));
+static int	msdosfs_root __P((struct mount *, struct vnode **));
+static int	msdosfs_start __P((struct mount *, int, struct proc *));
+static int	msdosfs_statfs __P((struct mount *, struct statfs *,
+				    struct proc *));
+static int	msdosfs_sync __P((struct mount *, int, struct ucred *,
+				  struct proc *));
+static int	msdosfs_unmount __P((struct mount *, int, struct proc *));
+static int	msdosfs_vget __P((struct mount *mp, ino_t ino,
+				  struct vnode **vpp));
+static int	msdosfs_vptofh __P((struct vnode *, struct fid *));
+
+static int
+update_mp(mp, argp)
+	struct mount *mp;
+	struct msdosfs_args *argp;
+{
+	struct msdosfsmount *pmp = VFSTOMSDOSFS(mp);
+	int error;
+
+	pmp->pm_gid = argp->gid;
+	pmp->pm_uid = argp->uid;
+	pmp->pm_mask = argp->mask & ALLPERMS;
+	pmp->pm_flags |= argp->flags & MSDOSFSMNT_MNTOPT;
+	if (pmp->pm_flags & MSDOSFSMNT_U2WTABLE) {
+		bcopy(argp->u2w, pmp->pm_u2w, sizeof(pmp->pm_u2w));
+		bcopy(argp->d2u, pmp->pm_d2u, sizeof(pmp->pm_d2u));
+		bcopy(argp->u2d, pmp->pm_u2d, sizeof(pmp->pm_u2d));
+	}
+	if (pmp->pm_flags & MSDOSFSMNT_ULTABLE) {
+		bcopy(argp->ul, pmp->pm_ul, sizeof(pmp->pm_ul));
+		bcopy(argp->lu, pmp->pm_lu, sizeof(pmp->pm_lu));
+	}
+
+#ifndef __FreeBSD__
+	/*
+	 * GEMDOS knows nothing (yet) about win95
+	 */
+	if (pmp->pm_flags & MSDOSFSMNT_GEMDOSFS)
+		pmp->pm_flags |= MSDOSFSMNT_NOWIN95;
+#endif
+
+	if (pmp->pm_flags & MSDOSFSMNT_NOWIN95)
+		pmp->pm_flags |= MSDOSFSMNT_SHORTNAME;
+	else if (!(pmp->pm_flags &
+	    (MSDOSFSMNT_SHORTNAME | MSDOSFSMNT_LONGNAME))) {
+		struct vnode *rootvp;
+
+		/*
+		 * Try to divine whether to support Win'95 long filenames
+		 */
+		if (FAT32(pmp))
+			pmp->pm_flags |= MSDOSFSMNT_LONGNAME;
+		else {
+			if ((error = msdosfs_root(mp, &rootvp)) != 0)
+				return error;
+			pmp->pm_flags |= findwin95(VTODE(rootvp))
+				? MSDOSFSMNT_LONGNAME
+					: MSDOSFSMNT_SHORTNAME;
+			vput(rootvp);
+		}
+	}
+	return 0;
+}
+
+#ifndef __FreeBSD__
+int
+msdosfs_mountroot()
+{
+	register struct mount *mp;
+	struct proc *p = curproc;	/* XXX */
+	size_t size;
+	int error;
+	struct msdosfs_args args;
+
+	if (root_device->dv_class != DV_DISK)
+		return (ENODEV);
+
+	/*
+	 * Get vnodes for swapdev and rootdev.
+	 */
+	if (bdevvp(rootdev, &rootvp))
+		panic("msdosfs_mountroot: can't setup rootvp");
+
+	mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
+	bzero((char *)mp, (u_long)sizeof(struct mount));
+	mp->mnt_op = &msdosfs_vfsops;
+	mp->mnt_flag = 0;
+	LIST_INIT(&mp->mnt_vnodelist);
+
+	args.flags = 0;
+	args.uid = 0;
+	args.gid = 0;
+	args.mask = 0777;
+
+	if ((error = mountmsdosfs(rootvp, mp, p, &args)) != 0) {
+		free(mp, M_MOUNT);
+		return (error);
+	}
+
+	if ((error = update_mp(mp, &args)) != 0) {
+		(void)msdosfs_unmount(mp, 0, p);
+		free(mp, M_MOUNT);
+		return (error);
+	}
+
+	if ((error = vfs_lock(mp)) != 0) {
+		(void)msdosfs_unmount(mp, 0, p);
+		free(mp, M_MOUNT);
+		return (error);
+	}
+
+	CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
+	mp->mnt_vnodecovered = NULLVP;
+	(void) copystr("/", mp->mnt_stat.f_mntonname, MNAMELEN - 1,
+	    &size);
+	bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+	(void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+	    &size);
+	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+	(void)msdosfs_statfs(mp, &mp->mnt_stat, p);
+	vfs_unlock(mp);
+	return (0);
+}
+#endif
+
+/*
+ * mp - path - addr in user space of mount point (ie /usr or whatever)
+ * data - addr in user space of mount params including the name of the block
+ * special file to treat as a filesystem.
+ */
+static int
+msdosfs_mount(mp, path, data, ndp, p)
+	struct mount *mp;
+	char *path;
+	caddr_t data;
+	struct nameidata *ndp;
+	struct proc *p;
+{
+	struct vnode *devvp;	  /* vnode for blk device to mount */
+	struct msdosfs_args args; /* will hold data from mount request */
+	/* msdosfs specific mount control block */
+	struct msdosfsmount *pmp = NULL;
+	size_t size;
+	int error, flags;
+	mode_t accessmode;
+
+	error = copyin(data, (caddr_t)&args, sizeof(struct msdosfs_args));
+	if (error)
+		return (error);
+	if (args.magic != MSDOSFS_ARGSMAGIC)
+		args.flags = 0;
+	/*
+	 * If updating, check whether changing from read-only to
+	 * read/write; if there is no device name, that's all we do.
+	 */
+	if (mp->mnt_flag & MNT_UPDATE) {
+		pmp = VFSTOMSDOSFS(mp);
+		error = 0;
+		if (!(pmp->pm_flags & MSDOSFSMNT_RONLY) && (mp->mnt_flag & MNT_RDONLY)) {
+			flags = WRITECLOSE;
+			if (mp->mnt_flag & MNT_FORCE)
+				flags |= FORCECLOSE;
+			error = vflush(mp, NULLVP, flags);
+		}
+		if (!error && (mp->mnt_flag & MNT_RELOAD))
+			/* not yet implemented */
+			error = EOPNOTSUPP;
+		if (error)
+			return (error);
+		if ((pmp->pm_flags & MSDOSFSMNT_RONLY) && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
+			/*
+			 * If upgrade to read-write by non-root, then verify
+			 * that user has necessary permissions on the device.
+			 */
+			if (p->p_ucred->cr_uid != 0) {
+				devvp = pmp->pm_devvp;
+				vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
+				error = VOP_ACCESS(devvp, VREAD | VWRITE,
+						   p->p_ucred, p);
+				if (error) {
+					VOP_UNLOCK(devvp, 0, p);
+					return (error);
+				}
+				VOP_UNLOCK(devvp, 0, p);
+			}
+			pmp->pm_flags &= ~MSDOSFSMNT_RONLY;
+		}
+		if (args.fspec == 0) {
+#ifdef	__notyet__		/* doesn't work correctly with current mountd	XXX */
+			if (args.flags & MSDOSFSMNT_MNTOPT) {
+				pmp->pm_flags &= ~MSDOSFSMNT_MNTOPT;
+				pmp->pm_flags |= args.flags & MSDOSFSMNT_MNTOPT;
+				if (pmp->pm_flags & MSDOSFSMNT_NOWIN95)
+					pmp->pm_flags |= MSDOSFSMNT_SHORTNAME;
+			}
+#endif
+			/*
+			 * Process export requests.
+			 */
+			return (vfs_export(mp, &pmp->pm_export, &args.export));
+		}
+	}
+	/*
+	 * Not an update, or updating the name: look up the name
+	 * and verify that it refers to a sensible block device.
+	 */
+	NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
+	error = namei(ndp);
+	if (error)
+		return (error);
+	devvp = ndp->ni_vp;
+
+	if (devvp->v_type != VBLK) {
+		vrele(devvp);
+		return (ENOTBLK);
+	}
+	if (major(devvp->v_rdev) >= nblkdev ||
+	    bdevsw[major(devvp->v_rdev)] == NULL) {
+		vrele(devvp);
+		return (ENXIO);
+	}
+	/*
+	 * If mount by non-root, then verify that user has necessary
+	 * permissions on the device.
+	 */
+	if (p->p_ucred->cr_uid != 0) {
+		accessmode = VREAD;
+		if ((mp->mnt_flag & MNT_RDONLY) == 0)
+			accessmode |= VWRITE;
+		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
+		error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p);
+		if (error) {
+			vput(devvp);
+			return (error);
+		}
+		VOP_UNLOCK(devvp, 0, p);
+	}
+	if ((mp->mnt_flag & MNT_UPDATE) == 0) {
+		error = mountmsdosfs(devvp, mp, p, &args);
+#ifdef MSDOSFS_DEBUG		/* only needed for the printf below */
+		pmp = VFSTOMSDOSFS(mp);
+#endif
+	} else {
+		if (devvp != pmp->pm_devvp)
+			error = EINVAL;	/* XXX needs translation */
+		else
+			vrele(devvp);
+	}
+	if (error) {
+		vrele(devvp);
+		return (error);
+	}
+
+	error = update_mp(mp, &args);
+	if (error) {
+		msdosfs_unmount(mp, MNT_FORCE, p);
+		return error;
+	}
+
+	(void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+	bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+	(void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+	    &size);
+	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+	(void) msdosfs_statfs(mp, &mp->mnt_stat, p);
+#ifdef MSDOSFS_DEBUG
+	printf("msdosfs_mount(): mp %p, pmp %p, inusemap %p\n", mp, pmp, pmp->pm_inusemap);
+#endif
+	return (0);
+}
+
+static int
+mountmsdosfs(devvp, mp, p, argp)
+	struct vnode *devvp;
+	struct mount *mp;
+	struct proc *p;
+	struct msdosfs_args *argp;
+{
+	struct msdosfsmount *pmp;
+	struct buf *bp;
+	dev_t dev = devvp->v_rdev;
+#ifndef __FreeBSD__
+	struct partinfo dpart;
+	int bsize = 0, dtype = 0, tmp;
+#endif
+	union bootsector *bsp;
+	struct byte_bpb33 *b33;
+	struct byte_bpb50 *b50;
+	struct byte_bpb710 *b710;
+	u_int8_t SecPerClust;
+	int	ronly, error;
+
+	/*
+	 * Disallow multiple mounts of the same device.
+	 * Disallow mounting of a device that is currently in use
+	 * (except for root, which might share swap device for miniroot).
+	 * Flush out any old buffers remaining from a previous use.
+	 */
+	error = vfs_mountedon(devvp);
+	if (error)
+		return (error);
+	if (vcount(devvp) > 1 && devvp != rootvp)
+		return (EBUSY);
+	vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
+	error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0);
+	VOP_UNLOCK(devvp, 0, p);
+	if (error)
+		return (error);
+
+	ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
+	error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p);
+	if (error)
+		return (error);
+
+	bp  = NULL; /* both used in error_exit */
+	pmp = NULL;
+
+#ifndef __FreeBSD__
+	if (argp->flags & MSDOSFSMNT_GEMDOSFS) {
+		/*
+	 	 * We need the disklabel to calculate the size of a FAT entry
+		 * later on. Also make sure the partition contains a filesystem
+		 * of type FS_MSDOS. This doesn't work for floppies, so we have
+		 * to check for them too.
+	 	 *
+	 	 * At least some parts of the msdos fs driver seem to assume
+		 * that the size of a disk block will always be 512 bytes.
+		 * Let's check it...
+		 */
+		error = VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart,
+				  FREAD, NOCRED, p);
+		if (error)
+			goto error_exit;
+		tmp   = dpart.part->p_fstype;
+		dtype = dpart.disklab->d_type;
+		bsize = dpart.disklab->d_secsize;
+		if (bsize != 512 || (dtype!=DTYPE_FLOPPY && tmp!=FS_MSDOS)) {
+			error = EINVAL;
+			goto error_exit;
+		}
+	}
+#endif
+
+	/*
+	 * Read the boot sector of the filesystem, and then check the
+	 * boot signature.  If not a dos boot sector then error out.
+	 */
+#ifdef	PC98
+	error = bread(devvp, 0, 1024, NOCRED, &bp);
+#else
+	error = bread(devvp, 0, 512, NOCRED, &bp);
+#endif
+	if (error)
+		goto error_exit;
+	bp->b_flags |= B_AGE;
+	bsp = (union bootsector *)bp->b_data;
+	b33 = (struct byte_bpb33 *)bsp->bs33.bsBPB;
+	b50 = (struct byte_bpb50 *)bsp->bs50.bsBPB;
+	b710 = (struct byte_bpb710 *)bsp->bs710.bsPBP;
+
+#ifndef __FreeBSD__
+	if (!(argp->flags & MSDOSFSMNT_GEMDOSFS)) {
+#endif
+#ifdef PC98
+		if ((bsp->bs50.bsBootSectSig0 != BOOTSIG0
+		    || bsp->bs50.bsBootSectSig1 != BOOTSIG1)
+		    && (bsp->bs50.bsBootSectSig0 != 0       /* PC98 DOS 3.3x */
+		    || bsp->bs50.bsBootSectSig1 != 0)
+		    && (bsp->bs50.bsBootSectSig0 != 0x90    /* PC98 DOS 5.0  */
+		    || bsp->bs50.bsBootSectSig1 != 0x3d)
+		    && (bsp->bs50.bsBootSectSig0 != 0x46    /* PC98 DOS 3.3B */
+		    || bsp->bs50.bsBootSectSig1 != 0xfa)) {
+#else
+		if (bsp->bs50.bsBootSectSig0 != BOOTSIG0
+		    || bsp->bs50.bsBootSectSig1 != BOOTSIG1) {
+#endif
+			error = EINVAL;
+			printf("mountmsdosfs(): bad signature\n");
+			goto error_exit;
+		}
+#ifndef __FreeBSD__
+	}
+#endif
+
+	pmp = malloc(sizeof *pmp, M_MSDOSFSMNT, M_WAITOK);
+	bzero((caddr_t)pmp, sizeof *pmp);
+	pmp->pm_mountp = mp;
+
+	/*
+	 * Compute several useful quantities from the bpb in the
+	 * bootsector.  Copy in the dos 5 variant of the bpb then fix up
+	 * the fields that are different between dos 5 and dos 3.3.
+	 */
+	SecPerClust = b50->bpbSecPerClust;
+	pmp->pm_BytesPerSec = getushort(b50->bpbBytesPerSec);
+	pmp->pm_ResSectors = getushort(b50->bpbResSectors);
+	pmp->pm_FATs = b50->bpbFATs;
+	pmp->pm_RootDirEnts = getushort(b50->bpbRootDirEnts);
+	pmp->pm_Sectors = getushort(b50->bpbSectors);
+	pmp->pm_FATsecs = getushort(b50->bpbFATsecs);
+	pmp->pm_SecPerTrack = getushort(b50->bpbSecPerTrack);
+	pmp->pm_Heads = getushort(b50->bpbHeads);
+	pmp->pm_Media = b50->bpbMedia;
+
+#ifndef __FreeBSD__
+	if (!(argp->flags & MSDOSFSMNT_GEMDOSFS)) {
+#endif
+		/* XXX - We should probably check more values here */
+		if (!pmp->pm_BytesPerSec || !SecPerClust
+			|| !pmp->pm_Heads || pmp->pm_Heads > 255
+#ifdef PC98
+	    		|| !pmp->pm_SecPerTrack || pmp->pm_SecPerTrack > 255) {
+#else
+			|| !pmp->pm_SecPerTrack || pmp->pm_SecPerTrack > 63) {
+#endif
+			error = EINVAL;
+			printf("mountmsdosfs(): bad bpb\n");
+			goto error_exit;
+		}
+#ifndef __FreeBSD__
+	}
+#endif
+
+	if (pmp->pm_Sectors == 0) {
+		pmp->pm_HiddenSects = getulong(b50->bpbHiddenSecs);
+		pmp->pm_HugeSectors = getulong(b50->bpbHugeSectors);
+	} else {
+		pmp->pm_HiddenSects = getushort(b33->bpbHiddenSecs);
+		pmp->pm_HugeSectors = pmp->pm_Sectors;
+	}
+	if (pmp->pm_HugeSectors > 0xffffffff / 
+	    (pmp->pm_BytesPerSec / sizeof(struct direntry)) + 1) {
+		/*
+		 * We cannot deal currently with this size of disk
+		 * due to fileid limitations (see msdosfs_getattr and
+		 * msdosfs_readdir)
+		 */
+		error = EINVAL;
+		printf("mountmsdosfs(): disk too big, sorry\n");
+		goto error_exit;
+	}
+
+	if (pmp->pm_RootDirEnts == 0) {
+		if (bsp->bs710.bsBootSectSig2 != BOOTSIG2
+		    || bsp->bs710.bsBootSectSig3 != BOOTSIG3
+		    || pmp->pm_Sectors
+		    || pmp->pm_FATsecs
+		    || getushort(b710->bpbFSVers)) {
+			error = EINVAL;
+			printf("mountmsdosfs(): bad FAT32 filesystem\n");
+			goto error_exit;
+		}
+		pmp->pm_fatmask = FAT32_MASK;
+		pmp->pm_fatmult = 4;
+		pmp->pm_fatdiv = 1;
+		pmp->pm_FATsecs = getulong(b710->bpbBigFATsecs);
+		if (getushort(b710->bpbExtFlags) & FATMIRROR)
+			pmp->pm_curfat = getushort(b710->bpbExtFlags) & FATNUM;
+		else
+			pmp->pm_flags |= MSDOSFS_FATMIRROR;
+	} else
+		pmp->pm_flags |= MSDOSFS_FATMIRROR;
+
+#ifndef __FreeBSD__
+	if (argp->flags & MSDOSFSMNT_GEMDOSFS) {
+		if (FAT32(pmp)) {
+			/*
+			 * GEMDOS doesn't know fat32.
+			 */
+			error = EINVAL;
+			goto error_exit;
+		}
+
+		/*
+		 * Check a few values (could do some more):
+		 * - logical sector size: power of 2, >= block size
+		 * - sectors per cluster: power of 2, >= 1
+		 * - number of sectors:   >= 1, <= size of partition
+		 */
+		if ( (SecPerClust == 0)
+		  || (SecPerClust & (SecPerClust - 1))
+		  || (pmp->pm_BytesPerSec < bsize)
+		  || (pmp->pm_BytesPerSec & (pmp->pm_BytesPerSec - 1))
+		  || (pmp->pm_HugeSectors == 0)
+		  || (pmp->pm_HugeSectors * (pmp->pm_BytesPerSec / bsize)
+							> dpart.part->p_size)
+		   ) {
+			error = EINVAL;
+			goto error_exit;
+		}
+		/*
+		 * XXX - Many parts of the msdos fs driver seem to assume that
+		 * the number of bytes per logical sector (BytesPerSec) will
+		 * always be the same as the number of bytes per disk block
+		 * Let's pretend it is.
+		 */
+		tmp = pmp->pm_BytesPerSec / bsize;
+		pmp->pm_BytesPerSec  = bsize;
+		pmp->pm_HugeSectors *= tmp;
+		pmp->pm_HiddenSects *= tmp;
+		pmp->pm_ResSectors  *= tmp;
+		pmp->pm_Sectors     *= tmp;
+		pmp->pm_FATsecs     *= tmp;
+		SecPerClust         *= tmp;
+	}
+#endif
+	pmp->pm_fatblk = pmp->pm_ResSectors;
+	if (FAT32(pmp)) {
+		pmp->pm_rootdirblk = getulong(b710->bpbRootClust);
+		pmp->pm_firstcluster = pmp->pm_fatblk
+			+ (pmp->pm_FATs * pmp->pm_FATsecs);
+		pmp->pm_fsinfo = getushort(b710->bpbFSInfo);
+	} else {
+		pmp->pm_rootdirblk = pmp->pm_fatblk +
+			(pmp->pm_FATs * pmp->pm_FATsecs);
+		pmp->pm_rootdirsize = (pmp->pm_RootDirEnts * sizeof(struct direntry)
+				       + pmp->pm_BytesPerSec - 1)
+			/ pmp->pm_BytesPerSec;/* in sectors */
+		pmp->pm_firstcluster = pmp->pm_rootdirblk + pmp->pm_rootdirsize;
+	}
+
+	pmp->pm_nmbrofclusters = (pmp->pm_HugeSectors - pmp->pm_firstcluster) /
+	    SecPerClust;
+	pmp->pm_maxcluster = pmp->pm_nmbrofclusters + 1;
+	pmp->pm_fatsize = pmp->pm_FATsecs * pmp->pm_BytesPerSec;
+
+#ifndef __FreeBSD__
+	if (argp->flags & MSDOSFSMNT_GEMDOSFS) {
+		if ((pmp->pm_nmbrofclusters <= (0xff0 - 2))
+		      && ((dtype == DTYPE_FLOPPY) || ((dtype == DTYPE_VNODE)
+		      && ((pmp->pm_Heads == 1) || (pmp->pm_Heads == 2))))
+		    ) {
+			pmp->pm_fatmask = FAT12_MASK;
+			pmp->pm_fatmult = 3;
+			pmp->pm_fatdiv = 2;
+		} else {
+			pmp->pm_fatmask = FAT16_MASK;
+			pmp->pm_fatmult = 2;
+			pmp->pm_fatdiv = 1;
+		}
+	} else 
+#endif
+	if (pmp->pm_fatmask == 0) {
+		if (pmp->pm_maxcluster
+		    <= ((CLUST_RSRVD - CLUST_FIRST) & FAT12_MASK)) {
+			/*
+			 * This will usually be a floppy disk. This size makes
+			 * sure that one fat entry will not be split across
+			 * multiple blocks.
+			 */
+			pmp->pm_fatmask = FAT12_MASK;
+			pmp->pm_fatmult = 3;
+			pmp->pm_fatdiv = 2;
+		} else {
+			pmp->pm_fatmask = FAT16_MASK;
+			pmp->pm_fatmult = 2;
+			pmp->pm_fatdiv = 1;
+		}
+	}
+	if (FAT12(pmp))
+		pmp->pm_fatblocksize = 3 * pmp->pm_BytesPerSec;
+	else
+		pmp->pm_fatblocksize = DFLTBSIZE;
+
+	pmp->pm_fatblocksec = pmp->pm_fatblocksize / pmp->pm_BytesPerSec;
+	pmp->pm_bnshift = ffs(pmp->pm_BytesPerSec) - 1;
+
+	/*
+	 * Compute mask and shift value for isolating cluster relative byte
+	 * offsets and cluster numbers from a file offset.
+	 */
+	pmp->pm_bpcluster = SecPerClust * pmp->pm_BytesPerSec;
+	pmp->pm_crbomask = pmp->pm_bpcluster - 1;
+	pmp->pm_cnshift = ffs(pmp->pm_bpcluster) - 1;
+
+	/*
+	 * Check for valid cluster size
+	 * must be a power of 2
+	 */
+	if (pmp->pm_bpcluster ^ (1 << pmp->pm_cnshift)) {
+		error = EINVAL;
+		goto error_exit;
+	}
+
+	/*
+	 * Release the bootsector buffer.
+	 */
+	brelse(bp);
+	bp = NULL;
+
+	/*
+	 * Check FSInfo.
+	 */
+	if (pmp->pm_fsinfo) {
+		struct fsinfo *fp;
+
+		if ((error = bread(devvp, pmp->pm_fsinfo, 1024, NOCRED, &bp)) != 0)
+			goto error_exit;
+		fp = (struct fsinfo *)bp->b_data;
+		if (!bcmp(fp->fsisig1, "RRaA", 4)
+		    && !bcmp(fp->fsisig2, "rrAa", 4)
+		    && !bcmp(fp->fsisig3, "\0\0\125\252", 4)
+		    && !bcmp(fp->fsisig4, "\0\0\125\252", 4))
+			pmp->pm_nxtfree = getulong(fp->fsinxtfree);
+		else
+			pmp->pm_fsinfo = 0;
+		brelse(bp);
+		bp = NULL;
+	}
+
+	/*
+	 * Check and validate (or perhaps invalidate?) the fsinfo structure?		XXX
+	 */
+
+	/*
+	 * Allocate memory for the bitmap of allocated clusters, and then
+	 * fill it in.
+	 */
+	pmp->pm_inusemap = malloc(((pmp->pm_maxcluster + N_INUSEBITS - 1)
+				   / N_INUSEBITS)
+				  * sizeof(*pmp->pm_inusemap),
+				  M_MSDOSFSFAT, M_WAITOK);
+
+	/*
+	 * fillinusemap() needs pm_devvp.
+	 */
+	pmp->pm_dev = dev;
+	pmp->pm_devvp = devvp;
+
+	/*
+	 * Have the inuse map filled in.
+	 */
+	if ((error = fillinusemap(pmp)) != 0)
+		goto error_exit;
+
+	/*
+	 * If they want fat updates to be synchronous then let them suffer
+	 * the performance degradation in exchange for the on disk copy of
+	 * the fat being correct just about all the time.  I suppose this
+	 * would be a good thing to turn on if the kernel is still flakey.
+	 */
+	if (mp->mnt_flag & MNT_SYNCHRONOUS)
+		pmp->pm_flags |= MSDOSFSMNT_WAITONFAT;
+
+	/*
+	 * Finish up.
+	 */
+	if (ronly)
+		pmp->pm_flags |= MSDOSFSMNT_RONLY;
+	else
+		pmp->pm_fmod = 1;
+	mp->mnt_data = (qaddr_t) pmp;
+	mp->mnt_stat.f_fsid.val[0] = (long)dev;
+	mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
+	mp->mnt_flag |= MNT_LOCAL;
+	devvp->v_specmountpoint = mp;
+
+	return 0;
+
+error_exit:
+	if (bp)
+		brelse(bp);
+	(void) VOP_CLOSE(devvp, ronly ? FREAD : FREAD | FWRITE, NOCRED, p);
+	if (pmp) {
+		if (pmp->pm_inusemap)
+			free(pmp->pm_inusemap, M_MSDOSFSFAT);
+		free(pmp, M_MSDOSFSMNT);
+		mp->mnt_data = (qaddr_t)0;
+	}
+	return (error);
+}
+
+static int
+msdosfs_start(mp, flags, p)
+	struct mount *mp;
+	int flags;
+	struct proc *p;
+{
+
+	return (0);
+}
+
+/*
+ * Unmount the filesystem described by mp.
+ */
+static int
+msdosfs_unmount(mp, mntflags, p)
+	struct mount *mp;
+	int mntflags;
+	struct proc *p;
+{
+	struct msdosfsmount *pmp;
+	int error, flags;
+
+	flags = 0;
+	if (mntflags & MNT_FORCE)
+		flags |= FORCECLOSE;
+	error = vflush(mp, NULLVP, flags);
+	if (error)
+		return error;
+	pmp = VFSTOMSDOSFS(mp);
+	pmp->pm_devvp->v_specmountpoint = NULL;
+#ifdef MSDOSFS_DEBUG
+	{
+		struct vnode *vp = pmp->pm_devvp;
+
+		printf("msdosfs_umount(): just before calling VOP_CLOSE()\n");
+		printf("flag %08lx, usecount %d, writecount %d, holdcnt %ld\n",
+		    vp->v_flag, vp->v_usecount, vp->v_writecount, vp->v_holdcnt);
+		printf("lastr %d, id %lu, mount %p, op %p\n",
+		    vp->v_lastr, vp->v_id, vp->v_mount, vp->v_op);
+		printf("freef %p, freeb %p, mount %p\n",
+		    vp->v_freelist.tqe_next, vp->v_freelist.tqe_prev,
+		    vp->v_mount);
+		printf("cleanblkhd %p, dirtyblkhd %p, numoutput %ld, type %d\n",
+		    TAILQ_FIRST(&vp->v_cleanblkhd),
+		    TAILQ_FIRST(&vp->v_dirtyblkhd),
+		    vp->v_numoutput, vp->v_type);
+		printf("union %p, tag %d, data[0] %08x, data[1] %08x\n",
+		    vp->v_socket, vp->v_tag,
+		    ((u_int *)vp->v_data)[0],
+		    ((u_int *)vp->v_data)[1]);
+	}
+#endif
+	error = VOP_CLOSE(pmp->pm_devvp,
+		    (pmp->pm_flags&MSDOSFSMNT_RONLY) ? FREAD : FREAD | FWRITE,
+		    NOCRED, p);
+	vrele(pmp->pm_devvp);
+	free(pmp->pm_inusemap, M_MSDOSFSFAT);
+	free(pmp, M_MSDOSFSMNT);
+	mp->mnt_data = (qaddr_t)0;
+	mp->mnt_flag &= ~MNT_LOCAL;
+	return (error);
+}
+
+static int
+msdosfs_root(mp, vpp)
+	struct mount *mp;
+	struct vnode **vpp;
+{
+	struct msdosfsmount *pmp = VFSTOMSDOSFS(mp);
+	struct denode *ndep;
+	int error;
+
+#ifdef MSDOSFS_DEBUG
+	printf("msdosfs_root(); mp %p, pmp %p\n", mp, pmp);
+#endif
+	error = deget(pmp, MSDOSFSROOT, MSDOSFSROOT_OFS, &ndep);
+	if (error)
+		return (error);
+	*vpp = DETOV(ndep);
+	return (0);
+}
+
+static int
+msdosfs_quotactl(mp, cmds, uid, arg, p)
+	struct mount *mp;
+	int cmds;
+	uid_t uid;
+	caddr_t arg;
+	struct proc *p;
+{
+	return EOPNOTSUPP;
+}
+
+static int
+msdosfs_statfs(mp, sbp, p)
+	struct mount *mp;
+	struct statfs *sbp;
+	struct proc *p;
+{
+	struct msdosfsmount *pmp;
+
+	pmp = VFSTOMSDOSFS(mp);
+	sbp->f_bsize = pmp->pm_bpcluster;
+	sbp->f_iosize = pmp->pm_bpcluster;
+	sbp->f_blocks = pmp->pm_nmbrofclusters;
+	sbp->f_bfree = pmp->pm_freeclustercount;
+	sbp->f_bavail = pmp->pm_freeclustercount;
+	sbp->f_files = pmp->pm_RootDirEnts;			/* XXX */
+	sbp->f_ffree = 0;	/* what to put in here? */
+	if (sbp != &mp->mnt_stat) {
+		sbp->f_type = mp->mnt_vfc->vfc_typenum;
+		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+	}
+	strncpy(sbp->f_fstypename, mp->mnt_vfc->vfc_name, MFSNAMELEN);
+	return (0);
+}
+
+static int
+msdosfs_sync(mp, waitfor, cred, p)
+	struct mount *mp;
+	int waitfor;
+	struct ucred *cred;
+	struct proc *p;
+{
+	struct vnode *vp, *nvp;
+	struct denode *dep;
+	struct msdosfsmount *pmp = VFSTOMSDOSFS(mp);
+	int error, allerror = 0;
+
+	/*
+	 * If we ever switch to not updating all of the fats all the time,
+	 * this would be the place to update them from the first one.
+	 */
+	if (pmp->pm_fmod != 0)
+		if (pmp->pm_flags & MSDOSFSMNT_RONLY)
+			panic("msdosfs_sync: rofs mod");
+		else {
+			/* update fats here */
+		}
+	/*
+	 * Write back each (modified) denode.
+	 */
+	simple_lock(&mntvnode_slock);
+loop:
+	for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
+		/*
+		 * If the vnode that we are about to sync is no longer
+		 * associated with this mount point, start over.
+		 */
+		if (vp->v_mount != mp)
+			goto loop;
+
+		simple_lock(&vp->v_interlock);
+		nvp = vp->v_mntvnodes.le_next;
+		dep = VTODE(vp);
+		if (vp->v_type == VNON ||
+		    ((dep->de_flag &
+		    (DE_ACCESS | DE_CREATE | DE_UPDATE | DE_MODIFIED)) == 0 &&
+		    (TAILQ_EMPTY(&vp->v_dirtyblkhd) || waitfor == MNT_LAZY))) {
+			simple_unlock(&vp->v_interlock);
+			continue;
+		}
+		simple_unlock(&mntvnode_slock);
+		error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p);
+		if (error) {
+			simple_lock(&mntvnode_slock);
+			if (error == ENOENT)
+				goto loop;
+			continue;
+		}
+		error = VOP_FSYNC(vp, cred, waitfor, p);
+		if (error)
+			allerror = error;
+		VOP_UNLOCK(vp, 0, p);
+		vrele(vp);
+		simple_lock(&mntvnode_slock);
+	}
+	simple_unlock(&mntvnode_slock);
+
+	/*
+	 * Flush filesystem control info.
+	 */
+	if (waitfor != MNT_LAZY) {
+		vn_lock(pmp->pm_devvp, LK_EXCLUSIVE | LK_RETRY, p);
+		error = VOP_FSYNC(pmp->pm_devvp, cred, waitfor, p);
+		if (error)
+			allerror = error;
+		VOP_UNLOCK(pmp->pm_devvp, 0, p);
+	}
+	return (allerror);
+}
+
+static int
+msdosfs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
+	struct mount *mp;
+	struct fid *fhp;
+	struct sockaddr *nam;
+	struct vnode **vpp;
+	int *exflagsp;
+	struct ucred **credanonp;
+{
+	struct msdosfsmount *pmp = VFSTOMSDOSFS(mp);
+	struct defid *defhp = (struct defid *) fhp;
+	struct denode *dep;
+	struct netcred *np;
+	int error;
+
+	np = vfs_export_lookup(mp, &pmp->pm_export, nam);
+	if (np == NULL)
+		return (EACCES);
+	error = deget(pmp, defhp->defid_dirclust, defhp->defid_dirofs, &dep);
+	if (error) {
+		*vpp = NULLVP;
+		return (error);
+	}
+	*vpp = DETOV(dep);
+	*exflagsp = np->netc_exflags;
+	*credanonp = &np->netc_anon;
+	return (0);
+}
+
+static int
+msdosfs_vptofh(vp, fhp)
+	struct vnode *vp;
+	struct fid *fhp;
+{
+	struct denode *dep;
+	struct defid *defhp;
+
+	dep = VTODE(vp);
+	defhp = (struct defid *)fhp;
+	defhp->defid_len = sizeof(struct defid);
+	defhp->defid_dirclust = dep->de_dirclust;
+	defhp->defid_dirofs = dep->de_diroffset;
+	/* defhp->defid_gen = dep->de_gen; */
+	return (0);
+}
+
+static int
+msdosfs_vget(mp, ino, vpp)
+	struct mount *mp;
+	ino_t ino;
+	struct vnode **vpp;
+{
+	return EOPNOTSUPP;
+}
+
+static struct vfsops msdosfs_vfsops = {
+	msdosfs_mount,
+	msdosfs_start,
+	msdosfs_unmount,
+	msdosfs_root,
+	msdosfs_quotactl,
+	msdosfs_statfs,
+	msdosfs_sync,
+	msdosfs_vget,
+	msdosfs_fhtovp,
+	msdosfs_vptofh,
+	msdosfs_init
+};
+
+VFS_SET(msdosfs_vfsops, msdos, 0);
diff --git a/sys/fs/msdosfs/msdosfs_vnops.c b/sys/fs/msdosfs/msdosfs_vnops.c
new file mode 100644
index 0000000..36aa91d
--- /dev/null
+++ b/sys/fs/msdosfs/msdosfs_vnops.c
@@ -0,0 +1,1983 @@
+/*	$Id: msdosfs_vnops.c,v 1.80 1998/12/07 21:58:35 archie Exp $ */
+/*	$NetBSD: msdosfs_vnops.c,v 1.68 1998/02/10 14:10:04 mrg Exp $	*/
+
+/*-
+ * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
+ * Copyright (C) 1994, 1995, 1997 TooLs GmbH.
+ * All rights reserved.
+ * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by TooLs GmbH.
+ * 4. The name of TooLs GmbH may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/*
+ * Written by Paul Popelka (paulp@uts.amdahl.com)
+ *
+ * You can do anything you want with this software, just don't say you wrote
+ * it, and don't remove this notice.
+ *
+ * This software is provided "as is".
+ *
+ * The author supplies this software to be publicly redistributed on the
+ * understanding that the author is not responsible for the correct
+ * functioning of this software in any circumstances and is not liable for
+ * any damages caused by this software.
+ *
+ * October 1992
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/resourcevar.h>	/* defines plimit structure in proc struct */
+#include <sys/kernel.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/unistd.h>
+#include <sys/vnode.h>
+#include <miscfs/specfs/specdev.h> /* XXX */	/* defines v_rdev */
+#include <sys/malloc.h>
+#include <sys/dirent.h>
+#include <sys/signalvar.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_zone.h>
+#include <vm/vnode_pager.h>
+
+#include <msdosfs/bpb.h>
+#include <msdosfs/direntry.h>
+#include <msdosfs/denode.h>
+#include <msdosfs/msdosfsmount.h>
+#include <msdosfs/fat.h>
+
+/*
+ * Prototypes for MSDOSFS vnode operations
+ */
+static int msdosfs_create __P((struct vop_create_args *));
+static int msdosfs_mknod __P((struct vop_mknod_args *));
+static int msdosfs_close __P((struct vop_close_args *));
+static int msdosfs_access __P((struct vop_access_args *));
+static int msdosfs_getattr __P((struct vop_getattr_args *));
+static int msdosfs_setattr __P((struct vop_setattr_args *));
+static int msdosfs_read __P((struct vop_read_args *));
+static int msdosfs_write __P((struct vop_write_args *));
+static int msdosfs_fsync __P((struct vop_fsync_args *));
+static int msdosfs_remove __P((struct vop_remove_args *));
+static int msdosfs_link __P((struct vop_link_args *));
+static int msdosfs_rename __P((struct vop_rename_args *));
+static int msdosfs_mkdir __P((struct vop_mkdir_args *));
+static int msdosfs_rmdir __P((struct vop_rmdir_args *));
+static int msdosfs_symlink __P((struct vop_symlink_args *));
+static int msdosfs_readdir __P((struct vop_readdir_args *));
+static int msdosfs_abortop __P((struct vop_abortop_args *));
+static int msdosfs_bmap __P((struct vop_bmap_args *));
+static int msdosfs_strategy __P((struct vop_strategy_args *));
+static int msdosfs_print __P((struct vop_print_args *));
+static int msdosfs_pathconf __P((struct vop_pathconf_args *ap));
+static int msdosfs_getpages __P((struct vop_getpages_args *));
+static int msdosfs_putpages __P((struct vop_putpages_args *));
+
+/*
+ * Some general notes:
+ *
+ * In the ufs filesystem the inodes, superblocks, and indirect blocks are
+ * read/written using the vnode for the filesystem. Blocks that represent
+ * the contents of a file are read/written using the vnode for the file
+ * (including directories when they are read/written as files). This
+ * presents problems for the dos filesystem because data that should be in
+ * an inode (if dos had them) resides in the directory itself.  Since we
+ * must update directory entries without the benefit of having the vnode
+ * for the directory we must use the vnode for the filesystem.  This means
+ * that when a directory is actually read/written (via read, write, or
+ * readdir, or seek) we must use the vnode for the filesystem instead of
+ * the vnode for the directory as would happen in ufs. This is to insure we
+ * retreive the correct block from the buffer cache since the hash value is
+ * based upon the vnode address and the desired block number.
+ */
+
+/*
+ * Create a regular file. On entry the directory to contain the file being
+ * created is locked.  We must release before we return. We must also free
+ * the pathname buffer pointed at by cnp->cn_pnbuf, always on error, or
+ * only if the SAVESTART bit in cn_flags is clear on success.
+ */
+static int
+msdosfs_create(ap)
+	struct vop_create_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+	} */ *ap;
+{
+	struct componentname *cnp = ap->a_cnp;
+	struct denode ndirent;
+	struct denode *dep;
+	struct denode *pdep = VTODE(ap->a_dvp);
+	struct timespec ts;
+	int error;
+
+#ifdef MSDOSFS_DEBUG
+	printf("msdosfs_create(cnp %p, vap %p\n", cnp, ap->a_vap);
+#endif
+
+	/*
+	 * If this is the root directory and there is no space left we
+	 * can't do anything.  This is because the root directory can not
+	 * change size.
+	 */
+	if (pdep->de_StartCluster == MSDOSFSROOT
+	    && pdep->de_fndoffset >= pdep->de_FileSize) {
+		error = ENOSPC;
+		goto bad;
+	}
+
+	/*
+	 * Create a directory entry for the file, then call createde() to
+	 * have it installed. NOTE: DOS files are always executable.  We
+	 * use the absence of the owner write bit to make the file
+	 * readonly.
+	 */
+#ifdef DIAGNOSTIC
+	if ((cnp->cn_flags & HASBUF) == 0)
+		panic("msdosfs_create: no name");
+#endif
+	bzero(&ndirent, sizeof(ndirent));
+	error = uniqdosname(pdep, cnp, ndirent.de_Name);
+	if (error)
+		goto bad;
+
+	ndirent.de_Attributes = (ap->a_vap->va_mode & VWRITE) ?
+				ATTR_ARCHIVE : ATTR_ARCHIVE | ATTR_READONLY;
+	ndirent.de_LowerCase = 0;
+	ndirent.de_StartCluster = 0;
+	ndirent.de_FileSize = 0;
+	ndirent.de_dev = pdep->de_dev;
+	ndirent.de_devvp = pdep->de_devvp;
+	ndirent.de_pmp = pdep->de_pmp;
+	ndirent.de_flag = DE_ACCESS | DE_CREATE | DE_UPDATE;
+	getnanotime(&ts);
+	DETIMES(&ndirent, &ts, &ts, &ts);
+	error = createde(&ndirent, pdep, &dep, cnp);
+	if (error)
+		goto bad;
+	if ((cnp->cn_flags & SAVESTART) == 0)
+		zfree(namei_zone, cnp->cn_pnbuf);
+	*ap->a_vpp = DETOV(dep);
+	return (0);
+
+bad:
+	zfree(namei_zone, cnp->cn_pnbuf);
+	return (error);
+}
+
+static int
+msdosfs_mknod(ap)
+	struct vop_mknod_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+	} */ *ap;
+{
+
+	switch (ap->a_vap->va_type) {
+	case VDIR:
+		return (msdosfs_mkdir((struct vop_mkdir_args *)ap));
+		break;
+
+	case VREG:
+		return (msdosfs_create((struct vop_create_args *)ap));
+		break;
+
+	default:
+		zfree(namei_zone, ap->a_cnp->cn_pnbuf);
+		return (EINVAL);
+	}
+	/* NOTREACHED */
+}
+
+static int
+msdosfs_close(ap)
+	struct vop_close_args /* {
+		struct vnode *a_vp;
+		int a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct denode *dep = VTODE(vp);
+	struct timespec ts;
+
+	simple_lock(&vp->v_interlock);
+	if (vp->v_usecount > 1) {
+		getnanotime(&ts);
+		DETIMES(dep, &ts, &ts, &ts);
+	}
+	simple_unlock(&vp->v_interlock);
+	return 0;
+}
+
+static int
+msdosfs_access(ap)
+	struct vop_access_args /* {
+		struct vnode *a_vp;
+		int a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct denode *dep = VTODE(ap->a_vp);
+	struct msdosfsmount *pmp = dep->de_pmp;
+	struct ucred *cred = ap->a_cred;
+	mode_t mask, file_mode, mode = ap->a_mode;
+	register gid_t *gp;
+	int i;
+
+	file_mode = (S_IXUSR|S_IXGRP|S_IXOTH) | (S_IRUSR|S_IRGRP|S_IROTH) |
+	    ((dep->de_Attributes & ATTR_READONLY) ? 0 : (S_IWUSR|S_IWGRP|S_IWOTH));
+	file_mode &= pmp->pm_mask;
+
+	/*
+	 * Disallow write attempts on read-only file systems;
+	 * unless the file is a socket, fifo, or a block or
+	 * character device resident on the file system.
+	 */
+	if (mode & VWRITE) {
+		switch (vp->v_type) {
+		case VDIR:
+		case VLNK:
+		case VREG:
+			if (vp->v_mount->mnt_flag & MNT_RDONLY)
+				return (EROFS);
+			break;
+		default:
+			break;
+		}
+	}
+
+	/* User id 0 always gets access. */
+	if (cred->cr_uid == 0)
+		return 0;
+
+	mask = 0;
+
+	/* Otherwise, check the owner. */
+	if (cred->cr_uid == pmp->pm_uid) {
+		if (mode & VEXEC)
+			mask |= S_IXUSR;
+		if (mode & VREAD)
+			mask |= S_IRUSR;
+		if (mode & VWRITE)
+			mask |= S_IWUSR;
+		return (file_mode & mask) == mask ? 0 : EACCES;
+	}
+
+	/* Otherwise, check the groups. */
+	for (i = 0, gp = cred->cr_groups; i < cred->cr_ngroups; i++, gp++)
+		if (pmp->pm_gid == *gp) {
+			if (mode & VEXEC)
+				mask |= S_IXGRP;
+			if (mode & VREAD)
+				mask |= S_IRGRP;
+			if (mode & VWRITE)
+				mask |= S_IWGRP;
+			return (file_mode & mask) == mask ? 0 : EACCES;
+		}
+
+	/* Otherwise, check everyone else. */
+	if (mode & VEXEC)
+		mask |= S_IXOTH;
+	if (mode & VREAD)
+		mask |= S_IROTH;
+	if (mode & VWRITE)
+		mask |= S_IWOTH;
+	return (file_mode & mask) == mask ? 0 : EACCES;
+}
+
+static int
+msdosfs_getattr(ap)
+	struct vop_getattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct denode *dep = VTODE(ap->a_vp);
+	struct msdosfsmount *pmp = dep->de_pmp;
+	struct vattr *vap = ap->a_vap;
+	mode_t mode;
+	struct timespec ts;
+	u_long dirsperblk = pmp->pm_BytesPerSec / sizeof(struct direntry);
+	u_long fileid;
+
+	getnanotime(&ts);
+	DETIMES(dep, &ts, &ts, &ts);
+	vap->va_fsid = dep->de_dev;
+	/*
+	 * The following computation of the fileid must be the same as that
+	 * used in msdosfs_readdir() to compute d_fileno. If not, pwd
+	 * doesn't work.
+	 */
+	if (dep->de_Attributes & ATTR_DIRECTORY) {
+		fileid = cntobn(pmp, dep->de_StartCluster) * dirsperblk;
+		if (dep->de_StartCluster == MSDOSFSROOT)
+			fileid = 1;
+	} else {
+		fileid = cntobn(pmp, dep->de_dirclust) * dirsperblk;
+		if (dep->de_dirclust == MSDOSFSROOT)
+			fileid = roottobn(pmp, 0) * dirsperblk;
+		fileid += dep->de_diroffset / sizeof(struct direntry);
+	}
+	vap->va_fileid = fileid;
+	if ((dep->de_Attributes & ATTR_READONLY) == 0)
+		mode = S_IRWXU|S_IRWXG|S_IRWXO;
+	else
+		mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH;
+	vap->va_mode = mode & pmp->pm_mask;
+	vap->va_uid = pmp->pm_uid;
+	vap->va_gid = pmp->pm_gid;
+	vap->va_nlink = 1;
+	vap->va_rdev = 0;
+	vap->va_size = dep->de_FileSize;
+	dos2unixtime(dep->de_MDate, dep->de_MTime, 0, &vap->va_mtime);
+	if (pmp->pm_flags & MSDOSFSMNT_LONGNAME) {
+		dos2unixtime(dep->de_ADate, 0, 0, &vap->va_atime);
+		dos2unixtime(dep->de_CDate, dep->de_CTime, dep->de_CHun, &vap->va_ctime);
+	} else {
+		vap->va_atime = vap->va_mtime;
+		vap->va_ctime = vap->va_mtime;
+	}
+	vap->va_flags = 0;
+	if ((dep->de_Attributes & ATTR_ARCHIVE) == 0)
+		vap->va_flags |= SF_ARCHIVED;
+	vap->va_gen = 0;
+	vap->va_blocksize = pmp->pm_bpcluster;
+	vap->va_bytes =
+	    (dep->de_FileSize + pmp->pm_crbomask) & ~pmp->pm_crbomask;
+	vap->va_type = ap->a_vp->v_type;
+	vap->va_filerev = dep->de_modrev;
+	return (0);
+}
+
+static int
+msdosfs_setattr(ap)
+	struct vop_setattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct denode *dep = VTODE(ap->a_vp);
+	struct msdosfsmount *pmp = dep->de_pmp;
+	struct vattr *vap = ap->a_vap;
+	struct ucred *cred = ap->a_cred;
+	int error = 0;
+
+#ifdef MSDOSFS_DEBUG
+	printf("msdosfs_setattr(): vp %p, vap %p, cred %p, p %p\n",
+	    ap->a_vp, vap, cred, ap->a_p);
+#endif
+
+	/*
+	 * Check for unsettable attributes.
+	 */
+	if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
+	    (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
+	    (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
+	    (vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
+#ifdef MSDOSFS_DEBUG
+		printf("msdosfs_setattr(): returning EINVAL\n");
+		printf("    va_type %d, va_nlink %x, va_fsid %lx, va_fileid %lx\n",
+		    vap->va_type, vap->va_nlink, vap->va_fsid, vap->va_fileid);
+		printf("    va_blocksize %lx, va_rdev %x, va_bytes %qx, va_gen %lx\n",
+		    vap->va_blocksize, vap->va_rdev, vap->va_bytes, vap->va_gen);
+		printf("    va_uid %x, va_gid %x\n",
+		    vap->va_uid, vap->va_gid);
+#endif
+		return (EINVAL);
+	}
+	if (vap->va_flags != VNOVAL) {
+		if (vp->v_mount->mnt_flag & MNT_RDONLY)
+			return (EROFS);
+		if (cred->cr_uid != pmp->pm_uid &&
+		    (error = suser(cred, &ap->a_p->p_acflag)))
+			return (error);
+		/*
+		 * We are very inconsistent about handling unsupported
+		 * attributes.  We ignored the access time and the
+		 * read and execute bits.  We were strict for the other
+		 * attributes.
+		 *
+		 * Here we are strict, stricter than ufs in not allowing
+		 * users to attempt to set SF_SETTABLE bits or anyone to
+		 * set unsupported bits.  However, we ignore attempts to
+		 * set ATTR_ARCHIVE for directories `cp -pr' from a more
+		 * sensible file system attempts it a lot.
+		 */
+		if (cred->cr_uid != 0) {
+			if (vap->va_flags & SF_SETTABLE)
+				return EPERM;
+		}
+		if (vap->va_flags & ~SF_ARCHIVED)
+			return EOPNOTSUPP;
+		if (vap->va_flags & SF_ARCHIVED)
+			dep->de_Attributes &= ~ATTR_ARCHIVE;
+		else if (!(dep->de_Attributes & ATTR_DIRECTORY))
+			dep->de_Attributes |= ATTR_ARCHIVE;
+		dep->de_flag |= DE_MODIFIED;
+	}
+
+	if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
+		uid_t uid;
+		gid_t gid;
+		
+		if (vp->v_mount->mnt_flag & MNT_RDONLY)
+			return (EROFS);
+		uid = vap->va_uid;
+		if (uid == (uid_t)VNOVAL)
+			uid = pmp->pm_uid;
+		gid = vap->va_gid;
+		if (gid == (gid_t)VNOVAL)
+			gid = pmp->pm_gid;
+		if ((cred->cr_uid != pmp->pm_uid || uid != pmp->pm_uid ||
+		    (gid != pmp->pm_gid && !groupmember(gid, cred))) &&
+		    (error = suser(cred, &ap->a_p->p_acflag)))
+			return error;
+		if (uid != pmp->pm_uid || gid != pmp->pm_gid)
+			return EINVAL;
+	}
+
+	if (vap->va_size != VNOVAL) {
+		/*
+		 * Disallow write attempts on read-only file systems;
+		 * unless the file is a socket, fifo, or a block or
+		 * character device resident on the file system.
+		 */
+		switch (vp->v_type) {
+		case VDIR:
+			return (EISDIR);
+			/* NOT REACHED */
+		case VLNK:
+		case VREG:
+			if (vp->v_mount->mnt_flag & MNT_RDONLY)
+				return (EROFS);
+			break;
+		default:
+			break;
+		}
+		error = detrunc(dep, vap->va_size, 0, cred, ap->a_p);
+		if (error)
+			return error;
+	}
+	if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
+		if (vp->v_mount->mnt_flag & MNT_RDONLY)
+			return (EROFS);
+		if (cred->cr_uid != pmp->pm_uid &&
+		    (error = suser(cred, &ap->a_p->p_acflag)) &&
+		    ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
+		    (error = VOP_ACCESS(ap->a_vp, VWRITE, cred, ap->a_p))))
+			return (error);
+		if (vp->v_type != VDIR) {
+			if ((pmp->pm_flags & MSDOSFSMNT_NOWIN95) == 0 &&
+			    vap->va_atime.tv_sec != VNOVAL)
+				unix2dostime(&vap->va_atime, &dep->de_ADate, NULL, NULL);
+			if (vap->va_mtime.tv_sec != VNOVAL)
+				unix2dostime(&vap->va_mtime, &dep->de_MDate, &dep->de_MTime, NULL);
+			dep->de_Attributes |= ATTR_ARCHIVE;
+			dep->de_flag |= DE_MODIFIED;
+		}
+	}
+	/*
+	 * DOS files only have the ability to have their writability
+	 * attribute set, so we use the owner write bit to set the readonly
+	 * attribute.
+	 */
+	if (vap->va_mode != (mode_t)VNOVAL) {
+		if (vp->v_mount->mnt_flag & MNT_RDONLY)
+			return (EROFS);
+		if (cred->cr_uid != pmp->pm_uid &&
+		    (error = suser(cred, &ap->a_p->p_acflag)))
+			return (error);
+		if (vp->v_type != VDIR) {
+			/* We ignore the read and execute bits. */
+			if (vap->va_mode & VWRITE)
+				dep->de_Attributes &= ~ATTR_READONLY;
+			else
+				dep->de_Attributes |= ATTR_READONLY;
+			dep->de_flag |= DE_MODIFIED;
+		}
+	}
+	return (deupdat(dep, 1));
+}
+
+static int
+msdosfs_read(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	int error = 0;
+	int diff;
+	int blsize;
+	int isadir;
+	long n;
+	long on;
+	daddr_t lbn;
+	daddr_t rablock;
+	int rasize;
+	struct buf *bp;
+	struct vnode *vp = ap->a_vp;
+	struct denode *dep = VTODE(vp);
+	struct msdosfsmount *pmp = dep->de_pmp;
+	struct uio *uio = ap->a_uio;
+
+	/*
+	 * If they didn't ask for any data, then we are done.
+	 */
+	if (uio->uio_resid == 0)
+		return (0);
+	if (uio->uio_offset < 0)
+		return (EINVAL);
+
+	isadir = dep->de_Attributes & ATTR_DIRECTORY;
+	do {
+		lbn = de_cluster(pmp, uio->uio_offset);
+		on = uio->uio_offset & pmp->pm_crbomask;
+		n = min((u_long) (pmp->pm_bpcluster - on), uio->uio_resid);
+		diff = dep->de_FileSize - uio->uio_offset;
+		if (diff <= 0)
+			return (0);
+		if (diff < n)
+			n = diff;
+		/* convert cluster # to block # if a directory */
+		if (isadir) {
+			error = pcbmap(dep, lbn, &lbn, 0, &blsize);
+			if (error)
+				return (error);
+		}
+		/*
+		 * If we are operating on a directory file then be sure to
+		 * do i/o with the vnode for the filesystem instead of the
+		 * vnode for the directory.
+		 */
+		if (isadir) {
+			error = bread(pmp->pm_devvp, lbn, blsize, NOCRED, &bp);
+		} else {
+			rablock = lbn + 1;
+			if (vp->v_lastr + 1 == lbn &&
+			    de_cn2off(pmp, rablock) < dep->de_FileSize) {
+				rasize = pmp->pm_bpcluster;
+				error = breadn(vp, lbn, pmp->pm_bpcluster,
+				    &rablock, &rasize, 1, NOCRED, &bp); 
+			} else
+				error = bread(vp, lbn, pmp->pm_bpcluster, 
+				    NOCRED, &bp);
+			vp->v_lastr = lbn;
+		}
+		n = min(n, pmp->pm_bpcluster - bp->b_resid);
+		if (error) {
+			brelse(bp);
+			return (error);
+		}
+		error = uiomove(bp->b_data + on, (int) n, uio);
+		brelse(bp);
+	} while (error == 0 && uio->uio_resid > 0 && n != 0);
+	if (!isadir && !(vp->v_mount->mnt_flag & MNT_NOATIME))
+		dep->de_flag |= DE_ACCESS;
+	return (error);
+}
+
+/*
+ * Write data to a file or directory.
+ */
+static int
+msdosfs_write(ap)
+	struct vop_write_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	int n;
+	int croffset;
+	int resid;
+	u_long osize;
+	int error = 0;
+	u_long count;
+	daddr_t bn, lastcn;
+	struct buf *bp;
+	int ioflag = ap->a_ioflag;
+	struct uio *uio = ap->a_uio;
+	struct proc *p = uio->uio_procp;
+	struct vnode *vp = ap->a_vp;
+	struct vnode *thisvp;
+	struct denode *dep = VTODE(vp);
+	struct msdosfsmount *pmp = dep->de_pmp;
+	struct ucred *cred = ap->a_cred;
+
+#ifdef MSDOSFS_DEBUG
+	printf("msdosfs_write(vp %p, uio %p, ioflag %x, cred %p\n",
+	    vp, uio, ioflag, cred);
+	printf("msdosfs_write(): diroff %lu, dirclust %lu, startcluster %lu\n",
+	    dep->de_diroffset, dep->de_dirclust, dep->de_StartCluster);
+#endif
+
+	switch (vp->v_type) {
+	case VREG:
+		if (ioflag & IO_APPEND)
+			uio->uio_offset = dep->de_FileSize;
+		thisvp = vp;
+		break;
+	case VDIR:
+		return EISDIR;
+	default:
+		panic("msdosfs_write(): bad file type");
+	}
+
+	if (uio->uio_offset < 0)
+		return (EINVAL);
+
+	if (uio->uio_resid == 0)
+		return (0);
+
+	/*
+	 * If they've exceeded their filesize limit, tell them about it.
+	 */
+	if (p &&
+	    ((uio->uio_offset + uio->uio_resid) >
+	    p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) {
+		psignal(p, SIGXFSZ);
+		return (EFBIG);
+	}
+
+	/*
+	 * If the offset we are starting the write at is beyond the end of
+	 * the file, then they've done a seek.  Unix filesystems allow
+	 * files with holes in them, DOS doesn't so we must fill the hole
+	 * with zeroed blocks.
+	 */
+	if (uio->uio_offset > dep->de_FileSize) {
+		error = deextend(dep, uio->uio_offset, cred);
+		if (error)
+			return (error);
+	}
+
+	/*
+	 * Remember some values in case the write fails.
+	 */
+	resid = uio->uio_resid;
+	osize = dep->de_FileSize;
+
+	/*
+	 * If we write beyond the end of the file, extend it to its ultimate
+	 * size ahead of the time to hopefully get a contiguous area.
+	 */
+	if (uio->uio_offset + resid > osize) {
+		count = de_clcount(pmp, uio->uio_offset + resid) -
+			de_clcount(pmp, osize);
+		error = extendfile(dep, count, NULL, NULL, 0);
+		if (error &&  (error != ENOSPC || (ioflag & IO_UNIT)))
+			goto errexit;
+		lastcn = dep->de_fc[FC_LASTFC].fc_frcn;
+	} else
+		lastcn = de_clcount(pmp, osize) - 1;
+
+	do {
+		if (de_cluster(pmp, uio->uio_offset) > lastcn) {
+			error = ENOSPC;
+			break;
+		}
+
+		croffset = uio->uio_offset & pmp->pm_crbomask;
+		n = min(uio->uio_resid, pmp->pm_bpcluster - croffset);
+		if (uio->uio_offset + n > dep->de_FileSize) {
+			dep->de_FileSize = uio->uio_offset + n;
+			/* The object size needs to be set before buffer is allocated */
+			vnode_pager_setsize(vp, dep->de_FileSize);
+		}
+
+		bn = de_cluster(pmp, uio->uio_offset);
+		if ((uio->uio_offset & pmp->pm_crbomask) == 0
+		    && (de_cluster(pmp, uio->uio_offset + uio->uio_resid) 
+		        > de_cluster(pmp, uio->uio_offset)
+			|| uio->uio_offset + uio->uio_resid >= dep->de_FileSize)) {
+			/*
+			 * If either the whole cluster gets written,
+			 * or we write the cluster from its start beyond EOF,
+			 * then no need to read data from disk.
+			 */
+			bp = getblk(thisvp, bn, pmp->pm_bpcluster, 0, 0);
+			clrbuf(bp);
+			/*
+			 * Do the bmap now, since pcbmap needs buffers
+			 * for the fat table. (see msdosfs_strategy)
+			 */
+			if (bp->b_blkno == bp->b_lblkno) {
+				error = pcbmap(dep, bp->b_lblkno, &bp->b_blkno, 
+				     0, 0);
+				if (error)
+					bp->b_blkno = -1;
+			}
+			if (bp->b_blkno == -1) {
+				brelse(bp);
+				if (!error)
+					error = EIO;		/* XXX */
+				break;
+			}
+		} else {
+			/*
+			 * The block we need to write into exists, so read it in.
+			 */
+			error = bread(thisvp, bn, pmp->pm_bpcluster, cred, &bp);
+			if (error) {
+				brelse(bp);
+				break;
+			}
+		}
+
+		/*
+		 * Should these vnode_pager_* functions be done on dir
+		 * files?
+		 */
+
+		/*
+		 * Copy the data from user space into the buf header.
+		 */
+		error = uiomove(bp->b_data + croffset, n, uio);
+
+		/*
+		 * If they want this synchronous then write it and wait for
+		 * it.  Otherwise, if on a cluster boundary write it
+		 * asynchronously so we can move on to the next block
+		 * without delay.  Otherwise do a delayed write because we
+		 * may want to write somemore into the block later.
+		 */
+		if (ioflag & IO_SYNC)
+			(void) bwrite(bp);
+		else if (n + croffset == pmp->pm_bpcluster)
+			bawrite(bp);
+		else
+			bdwrite(bp);
+		dep->de_flag |= DE_UPDATE;
+	} while (error == 0 && uio->uio_resid > 0);
+
+	/*
+	 * If the write failed and they want us to, truncate the file back
+	 * to the size it was before the write was attempted.
+	 */
+errexit:
+	if (error) {
+		if (ioflag & IO_UNIT) {
+			detrunc(dep, osize, ioflag & IO_SYNC, NOCRED, NULL);
+			uio->uio_offset -= resid - uio->uio_resid;
+			uio->uio_resid = resid;
+		} else {
+			detrunc(dep, dep->de_FileSize, ioflag & IO_SYNC, NOCRED, NULL);
+			if (uio->uio_resid != resid)
+				error = 0;
+		}
+	} else if (ioflag & IO_SYNC)
+		error = deupdat(dep, 1);
+	return (error);
+}
+
+/*
+ * Flush the blocks of a file to disk.
+ *
+ * This function is worthless for vnodes that represent directories. Maybe we
+ * could just do a sync if they try an fsync on a directory file.
+ */
+static int
+msdosfs_fsync(ap)
+	struct vop_fsync_args /* {
+		struct vnode *a_vp;
+		struct ucred *a_cred;
+		int a_waitfor;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	int s;
+	struct buf *bp, *nbp;
+
+	/*
+	 * Flush all dirty buffers associated with a vnode.
+	 */
+loop:
+	s = splbio();
+	for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
+		nbp = TAILQ_NEXT(bp, b_vnbufs);
+		if ((bp->b_flags & B_BUSY))
+			continue;
+		if ((bp->b_flags & B_DELWRI) == 0)
+			panic("msdosfs_fsync: not dirty");
+		bremfree(bp);
+		bp->b_flags |= B_BUSY;
+		splx(s);
+		(void) bwrite(bp);
+		goto loop;
+	}
+	while (vp->v_numoutput) {
+		vp->v_flag |= VBWAIT;
+		(void) tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "msdosfsn", 0);
+	}
+#ifdef DIAGNOSTIC
+	if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
+		vprint("msdosfs_fsync: dirty", vp);
+		goto loop;
+	}
+#endif
+	splx(s);
+	return (deupdat(VTODE(vp), ap->a_waitfor == MNT_WAIT));
+}
+
+static int
+msdosfs_remove(ap)
+	struct vop_remove_args /* {
+		struct vnode *a_dvp;
+		struct vnode *a_vp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	struct denode *dep = VTODE(ap->a_vp);
+	struct denode *ddep = VTODE(ap->a_dvp);
+	int error;
+
+	if (ap->a_vp->v_type == VDIR)
+		error = EPERM;
+	else
+		error = removede(ddep, dep);
+#ifdef MSDOSFS_DEBUG
+	printf("msdosfs_remove(), dep %p, v_usecount %d\n", dep, ap->a_vp->v_usecount);
+#endif
+	return (error);
+}
+
+/*
+ * DOS filesystems don't know what links are. But since we already called
+ * msdosfs_lookup() with create and lockparent, the parent is locked so we
+ * have to free it before we return the error.
+ */
+static int
+msdosfs_link(ap)
+	struct vop_link_args /* {
+		struct vnode *a_tdvp;
+		struct vnode *a_vp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	VOP_ABORTOP(ap->a_tdvp, ap->a_cnp);
+	return (EOPNOTSUPP);
+}
+
+/*
+ * Renames on files require moving the denode to a new hash queue since the
+ * denode's location is used to compute which hash queue to put the file
+ * in. Unless it is a rename in place.  For example "mv a b".
+ *
+ * What follows is the basic algorithm:
+ *
+ * if (file move) {
+ *	if (dest file exists) {
+ *		remove dest file
+ *	}
+ *	if (dest and src in same directory) {
+ *		rewrite name in existing directory slot
+ *	} else {
+ *		write new entry in dest directory
+ *		update offset and dirclust in denode
+ *		move denode to new hash chain
+ *		clear old directory entry
+ *	}
+ * } else {
+ *	directory move
+ *	if (dest directory exists) {
+ *		if (dest is not empty) {
+ *			return ENOTEMPTY
+ *		}
+ *		remove dest directory
+ *	}
+ *	if (dest and src in same directory) {
+ *		rewrite name in existing entry
+ *	} else {
+ *		be sure dest is not a child of src directory
+ *		write entry in dest directory
+ *		update "." and ".." in moved directory
+ *		clear old directory entry for moved directory
+ *	}
+ * }
+ *
+ * On entry:
+ *	source's parent directory is unlocked
+ *	source file or directory is unlocked
+ *	destination's parent directory is locked
+ *	destination file or directory is locked if it exists
+ *
+ * On exit:
+ *	all denodes should be released
+ *
+ * Notes:
+ * I'm not sure how the memory containing the pathnames pointed at by the
+ * componentname structures is freed, there may be some memory bleeding
+ * for each rename done.
+ */
+static int
+msdosfs_rename(ap)
+	struct vop_rename_args /* {
+		struct vnode *a_fdvp;
+		struct vnode *a_fvp;
+		struct componentname *a_fcnp;
+		struct vnode *a_tdvp;
+		struct vnode *a_tvp;
+		struct componentname *a_tcnp;
+	} */ *ap;
+{
+	struct vnode *tdvp = ap->a_tdvp;
+	struct vnode *fvp = ap->a_fvp;
+	struct vnode *fdvp = ap->a_fdvp;
+	struct vnode *tvp = ap->a_tvp;
+	struct componentname *tcnp = ap->a_tcnp;
+	struct componentname *fcnp = ap->a_fcnp;
+	struct proc *p = fcnp->cn_proc;
+	struct denode *ip, *xp, *dp, *zp;
+	u_char toname[11], oldname[11];
+	u_long from_diroffset, to_diroffset;
+	u_char to_count;
+	int doingdirectory = 0, newparent = 0;
+	int error;
+	u_long cn;
+	daddr_t bn;
+	struct denode *fddep;	/* from file's parent directory	 */
+	struct denode *fdep;	/* from file or directory	 */
+	struct denode *tddep;	/* to file's parent directory	 */
+	struct denode *tdep;	/* to file or directory		 */
+	struct msdosfsmount *pmp;
+	struct direntry *dotdotp;
+	struct buf *bp;
+
+	fddep = VTODE(ap->a_fdvp);
+	fdep = VTODE(ap->a_fvp);
+	tddep = VTODE(ap->a_tdvp);
+	tdep = tvp ? VTODE(tvp) : NULL;
+	pmp = fddep->de_pmp;
+
+	pmp = VFSTOMSDOSFS(fdvp->v_mount);
+
+#ifdef DIAGNOSTIC
+	if ((tcnp->cn_flags & HASBUF) == 0 ||
+	    (fcnp->cn_flags & HASBUF) == 0)
+		panic("msdosfs_rename: no name");
+#endif
+	/*
+	 * Check for cross-device rename.
+	 */
+	if ((fvp->v_mount != tdvp->v_mount) ||
+	    (tvp && (fvp->v_mount != tvp->v_mount))) {
+		error = EXDEV;
+abortit:
+		VOP_ABORTOP(tdvp, tcnp);
+		if (tdvp == tvp)
+			vrele(tdvp);
+		else
+			vput(tdvp);
+		if (tvp)
+			vput(tvp);
+		VOP_ABORTOP(fdvp, fcnp);
+		vrele(fdvp);
+		vrele(fvp);
+		return (error);
+	}
+
+	/*
+	 * If source and dest are the same, do nothing.
+	 */
+	if (tvp == fvp) {
+		error = 0;
+		goto abortit;
+	}
+
+	error = vn_lock(fvp, LK_EXCLUSIVE, p);
+	if (error)
+		goto abortit;
+	dp = VTODE(fdvp);
+	ip = VTODE(fvp);
+
+	/*
+	 * Be sure we are not renaming ".", "..", or an alias of ".". This
+	 * leads to a crippled directory tree.  It's pretty tough to do a
+	 * "ls" or "pwd" with the "." directory entry missing, and "cd .."
+	 * doesn't work if the ".." entry is missing.
+	 */
+	if (ip->de_Attributes & ATTR_DIRECTORY) {
+		/*
+		 * Avoid ".", "..", and aliases of "." for obvious reasons.
+		 */
+		if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
+		    dp == ip ||
+		    (fcnp->cn_flags & ISDOTDOT) ||
+		    (tcnp->cn_flags & ISDOTDOT) ||
+		    (ip->de_flag & DE_RENAME)) {
+			VOP_UNLOCK(fvp, 0, p);
+			error = EINVAL;
+			goto abortit;
+		}
+		ip->de_flag |= DE_RENAME;
+		doingdirectory++;
+	}
+
+	/*
+	 * When the target exists, both the directory
+	 * and target vnodes are returned locked.
+	 */
+	dp = VTODE(tdvp);
+	xp = tvp ? VTODE(tvp) : NULL;
+	/*
+	 * Remember direntry place to use for destination
+	 */
+	to_diroffset = dp->de_fndoffset;
+	to_count = dp->de_fndcnt;
+
+	/*
+	 * If ".." must be changed (ie the directory gets a new
+	 * parent) then the source directory must not be in the
+	 * directory heirarchy above the target, as this would
+	 * orphan everything below the source directory. Also
+	 * the user must have write permission in the source so
+	 * as to be able to change "..". We must repeat the call
+	 * to namei, as the parent directory is unlocked by the
+	 * call to doscheckpath().
+	 */
+	error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_proc);
+	VOP_UNLOCK(fvp, 0, p);
+	if (VTODE(fdvp)->de_StartCluster != VTODE(tdvp)->de_StartCluster)
+		newparent = 1;
+	vrele(fdvp);
+	if (doingdirectory && newparent) {
+		if (error)	/* write access check above */
+			goto bad;
+		if (xp != NULL)
+			vput(tvp);
+		/*
+		 * doscheckpath() vput()'s dp,
+		 * so we have to do a relookup afterwards
+		 */
+		error = doscheckpath(ip, dp);
+		if (error)
+			goto out;
+		if ((tcnp->cn_flags & SAVESTART) == 0)
+			panic("msdosfs_rename: lost to startdir");
+		error = relookup(tdvp, &tvp, tcnp);
+		if (error)
+			goto out;
+		dp = VTODE(tdvp);
+		xp = tvp ? VTODE(tvp) : NULL;
+	}
+
+	if (xp != NULL) {
+		/*
+		 * Target must be empty if a directory and have no links
+		 * to it. Also, ensure source and target are compatible
+		 * (both directories, or both not directories).
+		 */
+		if (xp->de_Attributes & ATTR_DIRECTORY) {
+			if (!dosdirempty(xp)) {
+				error = ENOTEMPTY;
+				goto bad;
+			}
+			if (!doingdirectory) {
+				error = ENOTDIR;
+				goto bad;
+			}
+			cache_purge(tdvp);
+		} else if (doingdirectory) {
+			error = EISDIR;
+			goto bad;
+		}
+		error = removede(dp, xp);
+		if (error)
+			goto bad;
+		vput(tvp);
+		xp = NULL;
+	}
+
+	/*
+	 * Convert the filename in tcnp into a dos filename. We copy this
+	 * into the denode and directory entry for the destination
+	 * file/directory.
+	 */
+	error = uniqdosname(VTODE(tdvp), tcnp, toname);
+	if (error)
+		goto abortit;
+
+	/*
+	 * Since from wasn't locked at various places above,
+	 * have to do a relookup here.
+	 */
+	fcnp->cn_flags &= ~MODMASK;
+	fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
+	if ((fcnp->cn_flags & SAVESTART) == 0)
+		panic("msdosfs_rename: lost from startdir");
+	if (!newparent)
+		VOP_UNLOCK(tdvp, 0, p);
+	(void) relookup(fdvp, &fvp, fcnp);
+	if (fvp == NULL) {
+		/*
+		 * From name has disappeared.
+		 */
+		if (doingdirectory)
+			panic("rename: lost dir entry");
+		vrele(ap->a_fvp);
+		if (newparent)
+			VOP_UNLOCK(tdvp, 0, p);
+		vrele(tdvp);
+		return 0;
+	}
+	xp = VTODE(fvp);
+	zp = VTODE(fdvp);
+	from_diroffset = zp->de_fndoffset;
+
+	/*
+	 * Ensure that the directory entry still exists and has not
+	 * changed till now. If the source is a file the entry may
+	 * have been unlinked or renamed. In either case there is
+	 * no further work to be done. If the source is a directory
+	 * then it cannot have been rmdir'ed or renamed; this is
+	 * prohibited by the DE_RENAME flag.
+	 */
+	if (xp != ip) {
+		if (doingdirectory)
+			panic("rename: lost dir entry");
+		vrele(ap->a_fvp);
+		VOP_UNLOCK(fvp, 0, p);
+		if (newparent)
+			VOP_UNLOCK(fdvp, 0, p);
+		xp = NULL;
+	} else {
+		vrele(fvp);
+		xp = NULL;
+
+		/*
+		 * First write a new entry in the destination
+		 * directory and mark the entry in the source directory
+		 * as deleted.  Then move the denode to the correct hash
+		 * chain for its new location in the filesystem.  And, if
+		 * we moved a directory, then update its .. entry to point
+		 * to the new parent directory.
+		 */
+		bcopy(ip->de_Name, oldname, 11);
+		bcopy(toname, ip->de_Name, 11);	/* update denode */
+		dp->de_fndoffset = to_diroffset;
+		dp->de_fndcnt = to_count;
+		error = createde(ip, dp, (struct denode **)0, tcnp);
+		if (error) {
+			bcopy(oldname, ip->de_Name, 11);
+			if (newparent)
+				VOP_UNLOCK(fdvp, 0, p);
+			VOP_UNLOCK(fvp, 0, p);
+			goto bad;
+		}
+		ip->de_refcnt++;
+		zp->de_fndoffset = from_diroffset;
+		error = removede(zp, ip);
+		if (error) {
+			/* XXX should really panic here, fs is corrupt */
+			if (newparent)
+				VOP_UNLOCK(fdvp, 0, p);
+			VOP_UNLOCK(fvp, 0, p);
+			goto bad;
+		}
+		if (!doingdirectory) {
+			error = pcbmap(dp, de_cluster(pmp, to_diroffset), 0,
+				       &ip->de_dirclust, 0);
+			if (error) {
+				/* XXX should really panic here, fs is corrupt */
+				if (newparent)
+					VOP_UNLOCK(fdvp, 0, p);
+				VOP_UNLOCK(fvp, 0, p);
+				goto bad;
+			}
+			if (ip->de_dirclust == MSDOSFSROOT)
+				ip->de_diroffset = to_diroffset;
+			else
+				ip->de_diroffset = to_diroffset & pmp->pm_crbomask;
+		}
+		reinsert(ip);
+		if (newparent)
+			VOP_UNLOCK(fdvp, 0, p);
+	}
+
+	/*
+	 * If we moved a directory to a new parent directory, then we must
+	 * fixup the ".." entry in the moved directory.
+	 */
+	if (doingdirectory && newparent) {
+		cn = ip->de_StartCluster;
+		if (cn == MSDOSFSROOT) {
+			/* this should never happen */
+			panic("msdosfs_rename(): updating .. in root directory?");
+		} else
+			bn = cntobn(pmp, cn);
+		error = bread(pmp->pm_devvp, bn, pmp->pm_bpcluster,
+			      NOCRED, &bp);
+		if (error) {
+			/* XXX should really panic here, fs is corrupt */
+			brelse(bp);
+			VOP_UNLOCK(fvp, 0, p);
+			goto bad;
+		}
+		dotdotp = (struct direntry *)bp->b_data + 1;
+		putushort(dotdotp->deStartCluster, dp->de_StartCluster);
+		if (FAT32(pmp))
+			putushort(dotdotp->deHighClust, dp->de_StartCluster >> 16);
+		error = bwrite(bp);
+		if (error) {
+			/* XXX should really panic here, fs is corrupt */
+			VOP_UNLOCK(fvp, 0, p);
+			goto bad;
+		}
+	}
+
+	VOP_UNLOCK(fvp, 0, p);
+bad:
+	if (xp)
+		vput(tvp);
+	vput(tdvp);
+out:
+	ip->de_flag &= ~DE_RENAME;
+	vrele(fdvp);
+	vrele(fvp);
+	return (error);
+
+}
+
+static struct {
+	struct direntry dot;
+	struct direntry dotdot;
+} dosdirtemplate = {
+	{	".       ", "   ",			/* the . entry */
+		ATTR_DIRECTORY,				/* file attribute */
+		0,	 				/* reserved */
+		0, { 0, 0 }, { 0, 0 },			/* create time & date */
+		{ 0, 0 },				/* access date */
+		{ 0, 0 },				/* high bits of start cluster */
+		{ 210, 4 }, { 210, 4 },			/* modify time & date */
+		{ 0, 0 },				/* startcluster */
+		{ 0, 0, 0, 0 } 				/* filesize */
+	},
+	{	"..      ", "   ",			/* the .. entry */
+		ATTR_DIRECTORY,				/* file attribute */
+		0,	 				/* reserved */
+		0, { 0, 0 }, { 0, 0 },			/* create time & date */
+		{ 0, 0 },				/* access date */
+		{ 0, 0 },				/* high bits of start cluster */
+		{ 210, 4 }, { 210, 4 },			/* modify time & date */
+		{ 0, 0 },				/* startcluster */
+		{ 0, 0, 0, 0 }				/* filesize */
+	}
+};
+
+static int
+msdosfs_mkdir(ap)
+	struct vop_mkdir_args /* {
+		struct vnode *a_dvp;
+		struvt vnode **a_vpp;
+		struvt componentname *a_cnp;
+		struct vattr *a_vap;
+	} */ *ap;
+{
+	struct componentname *cnp = ap->a_cnp;
+	struct denode *dep;
+	struct denode *pdep = VTODE(ap->a_dvp);
+	struct direntry *denp;
+	struct msdosfsmount *pmp = pdep->de_pmp;
+	struct buf *bp;
+	u_long newcluster, pcl;
+	int bn;
+	int error;
+	struct denode ndirent;
+	struct timespec ts;
+
+	/*
+	 * If this is the root directory and there is no space left we
+	 * can't do anything.  This is because the root directory can not
+	 * change size.
+	 */
+	if (pdep->de_StartCluster == MSDOSFSROOT
+	    && pdep->de_fndoffset >= pdep->de_FileSize) {
+		error = ENOSPC;
+		goto bad2;
+	}
+
+	/*
+	 * Allocate a cluster to hold the about to be created directory.
+	 */
+	error = clusteralloc(pmp, 0, 1, CLUST_EOFE, &newcluster, NULL);
+	if (error)
+		goto bad2;
+
+	bzero(&ndirent, sizeof(ndirent));
+	ndirent.de_pmp = pmp;
+	ndirent.de_flag = DE_ACCESS | DE_CREATE | DE_UPDATE;
+	getnanotime(&ts);
+	DETIMES(&ndirent, &ts, &ts, &ts);
+
+	/*
+	 * Now fill the cluster with the "." and ".." entries. And write
+	 * the cluster to disk.  This way it is there for the parent
+	 * directory to be pointing at if there were a crash.
+	 */
+	bn = cntobn(pmp, newcluster);
+	/* always succeeds */
+	bp = getblk(pmp->pm_devvp, bn, pmp->pm_bpcluster, 0, 0);
+	bzero(bp->b_data, pmp->pm_bpcluster);
+	bcopy(&dosdirtemplate, bp->b_data, sizeof dosdirtemplate);
+	denp = (struct direntry *)bp->b_data;
+	putushort(denp[0].deStartCluster, newcluster);
+	putushort(denp[0].deCDate, ndirent.de_CDate);
+	putushort(denp[0].deCTime, ndirent.de_CTime);
+	denp[0].deCHundredth = ndirent.de_CHun;
+	putushort(denp[0].deADate, ndirent.de_ADate);
+	putushort(denp[0].deMDate, ndirent.de_MDate);
+	putushort(denp[0].deMTime, ndirent.de_MTime);
+	pcl = pdep->de_StartCluster;
+	if (FAT32(pmp) && pcl == pmp->pm_rootdirblk)
+		pcl = 0;
+	putushort(denp[1].deStartCluster, pcl);
+	putushort(denp[1].deCDate, ndirent.de_CDate);
+	putushort(denp[1].deCTime, ndirent.de_CTime);
+	denp[1].deCHundredth = ndirent.de_CHun;
+	putushort(denp[1].deADate, ndirent.de_ADate);
+	putushort(denp[1].deMDate, ndirent.de_MDate);
+	putushort(denp[1].deMTime, ndirent.de_MTime);
+	if (FAT32(pmp)) {
+		putushort(denp[0].deHighClust, newcluster >> 16);
+		putushort(denp[1].deHighClust, pdep->de_StartCluster >> 16);
+	}
+
+	error = bwrite(bp);
+	if (error)
+		goto bad;
+
+	/*
+	 * Now build up a directory entry pointing to the newly allocated
+	 * cluster.  This will be written to an empty slot in the parent
+	 * directory.
+	 */
+#ifdef DIAGNOSTIC
+	if ((cnp->cn_flags & HASBUF) == 0)
+		panic("msdosfs_mkdir: no name");
+#endif
+	error = uniqdosname(pdep, cnp, ndirent.de_Name);
+	if (error)
+		goto bad;
+
+	ndirent.de_Attributes = ATTR_DIRECTORY;
+	ndirent.de_LowerCase = 0;
+	ndirent.de_StartCluster = newcluster;
+	ndirent.de_FileSize = 0;
+	ndirent.de_dev = pdep->de_dev;
+	ndirent.de_devvp = pdep->de_devvp;
+	error = createde(&ndirent, pdep, &dep, cnp);
+	if (error)
+		goto bad;
+	if ((cnp->cn_flags & SAVESTART) == 0)
+		zfree(namei_zone, cnp->cn_pnbuf);
+	*ap->a_vpp = DETOV(dep);
+	return (0);
+
+bad:
+	clusterfree(pmp, newcluster, NULL);
+bad2:
+	zfree(namei_zone, cnp->cn_pnbuf);
+	return (error);
+}
+
+static int
+msdosfs_rmdir(ap)
+	struct vop_rmdir_args /* {
+		struct vnode *a_dvp;
+		struct vnode *a_vp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct vnode *dvp = ap->a_dvp;
+	register struct componentname *cnp = ap->a_cnp;
+	register struct denode *ip, *dp;
+	struct proc *p = cnp->cn_proc;
+	int error;
+	
+	ip = VTODE(vp);
+	dp = VTODE(dvp);
+
+	/*
+	 * Verify the directory is empty (and valid).
+	 * (Rmdir ".." won't be valid since
+	 *  ".." will contain a reference to
+	 *  the current directory and thus be
+	 *  non-empty.)
+	 */
+	error = 0;
+	if (!dosdirempty(ip) || ip->de_flag & DE_RENAME) {
+		error = ENOTEMPTY;
+		goto out;
+	}
+	/*
+	 * Delete the entry from the directory.  For dos filesystems this
+	 * gets rid of the directory entry on disk, the in memory copy
+	 * still exists but the de_refcnt is <= 0.  This prevents it from
+	 * being found by deget().  When the vput() on dep is done we give
+	 * up access and eventually msdosfs_reclaim() will be called which
+	 * will remove it from the denode cache.
+	 */
+	error = removede(dp, ip);
+	if (error)
+		goto out;
+	/*
+	 * This is where we decrement the link count in the parent
+	 * directory.  Since dos filesystems don't do this we just purge
+	 * the name cache.
+	 */
+	cache_purge(dvp);
+	VOP_UNLOCK(dvp, 0, p);
+	/*
+	 * Truncate the directory that is being deleted.
+	 */
+	error = detrunc(ip, (u_long)0, IO_SYNC, cnp->cn_cred, p);
+	cache_purge(vp);
+
+	vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p);
+out:
+	return (error);
+}
+
+/*
+ * DOS filesystems don't know what symlinks are.
+ */
+static int
+msdosfs_symlink(ap)
+	struct vop_symlink_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+		char *a_target;
+	} */ *ap;
+{
+	zfree(namei_zone, ap->a_cnp->cn_pnbuf);
+	/* VOP_ABORTOP(ap->a_dvp, ap->a_cnp); ??? */
+	return (EOPNOTSUPP);
+}
+
+static int
+msdosfs_readdir(ap)
+	struct vop_readdir_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+		int *a_eofflag;
+		int *a_ncookies;
+		u_long **a_cookies;
+	} */ *ap;
+{
+	int error = 0;
+	int diff;
+	long n;
+	int blsize;
+	long on;
+	u_long cn;
+	u_long fileno;
+	u_long dirsperblk;
+	long bias = 0;
+	daddr_t bn, lbn;
+	struct buf *bp;
+	struct denode *dep = VTODE(ap->a_vp);
+	struct msdosfsmount *pmp = dep->de_pmp;
+	struct direntry *dentp;
+	struct dirent dirbuf;
+	struct uio *uio = ap->a_uio;
+	u_long *cookies = NULL;
+	int ncookies = 0;
+	off_t offset, off;
+	int chksum = -1;
+
+#ifdef MSDOSFS_DEBUG
+	printf("msdosfs_readdir(): vp %p, uio %p, cred %p, eofflagp %p\n",
+	    ap->a_vp, uio, ap->a_cred, ap->a_eofflag);
+#endif
+
+	/*
+	 * msdosfs_readdir() won't operate properly on regular files since
+	 * it does i/o only with the the filesystem vnode, and hence can
+	 * retrieve the wrong block from the buffer cache for a plain file.
+	 * So, fail attempts to readdir() on a plain file.
+	 */
+	if ((dep->de_Attributes & ATTR_DIRECTORY) == 0)
+		return (ENOTDIR);
+
+	/*
+	 * To be safe, initialize dirbuf
+	 */
+	bzero(dirbuf.d_name, sizeof(dirbuf.d_name));
+
+	/*
+	 * If the user buffer is smaller than the size of one dos directory
+	 * entry or the file offset is not a multiple of the size of a
+	 * directory entry, then we fail the read.
+	 */
+	off = offset = uio->uio_offset;
+	if (uio->uio_resid < sizeof(struct direntry) ||
+	    (offset & (sizeof(struct direntry) - 1)))
+		return (EINVAL);
+
+	if (ap->a_ncookies) {
+		ncookies = uio->uio_resid / 16;
+		MALLOC(cookies, u_long *, ncookies * sizeof(u_long), M_TEMP,
+		       M_WAITOK);
+		*ap->a_cookies = cookies;
+		*ap->a_ncookies = ncookies;
+	}
+
+	dirsperblk = pmp->pm_BytesPerSec / sizeof(struct direntry);
+
+	/*
+	 * If they are reading from the root directory then, we simulate
+	 * the . and .. entries since these don't exist in the root
+	 * directory.  We also set the offset bias to make up for having to
+	 * simulate these entries. By this I mean that at file offset 64 we
+	 * read the first entry in the root directory that lives on disk.
+	 */
+	if (dep->de_StartCluster == MSDOSFSROOT
+	    || (FAT32(pmp) && dep->de_StartCluster == pmp->pm_rootdirblk)) {
+#if 0
+		printf("msdosfs_readdir(): going after . or .. in root dir, offset %d\n",
+		    offset);
+#endif
+		bias = 2 * sizeof(struct direntry);
+		if (offset < bias) {
+			for (n = (int)offset / sizeof(struct direntry);
+			     n < 2; n++) {
+				if (FAT32(pmp))
+					dirbuf.d_fileno = cntobn(pmp,
+								 pmp->pm_rootdirblk)
+							  * dirsperblk;
+				else
+					dirbuf.d_fileno = 1;
+				dirbuf.d_type = DT_DIR;
+				switch (n) {
+				case 0:
+					dirbuf.d_namlen = 1;
+					strcpy(dirbuf.d_name, ".");
+					break;
+				case 1:
+					dirbuf.d_namlen = 2;
+					strcpy(dirbuf.d_name, "..");
+					break;
+				}
+				dirbuf.d_reclen = GENERIC_DIRSIZ(&dirbuf);
+				if (uio->uio_resid < dirbuf.d_reclen)
+					goto out;
+				error = uiomove((caddr_t) &dirbuf,
+						dirbuf.d_reclen, uio);
+				if (error)
+					goto out;
+				offset += sizeof(struct direntry);
+				off = offset;
+				if (cookies) {
+					*cookies++ = offset;
+					if (--ncookies <= 0)
+						goto out;
+				}
+			}
+		}
+	}
+
+	off = offset;
+	while (uio->uio_resid > 0) {
+		lbn = de_cluster(pmp, offset - bias);
+		on = (offset - bias) & pmp->pm_crbomask;
+		n = min(pmp->pm_bpcluster - on, uio->uio_resid);
+		diff = dep->de_FileSize - (offset - bias);
+		if (diff <= 0)
+			break;
+		n = min(n, diff);
+		error = pcbmap(dep, lbn, &bn, &cn, &blsize);
+		if (error)
+			break;
+		error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp);
+		if (error) {
+			brelse(bp);
+			return (error);
+		}
+		n = min(n, blsize - bp->b_resid);
+
+		/*
+		 * Convert from dos directory entries to fs-independent
+		 * directory entries.
+		 */
+		for (dentp = (struct direntry *)(bp->b_data + on);
+		     (char *)dentp < bp->b_data + on + n;
+		     dentp++, offset += sizeof(struct direntry)) {
+#if 0
+			printf("rd: dentp %08x prev %08x crnt %08x deName %02x attr %02x\n",
+			    dentp, prev, crnt, dentp->deName[0], dentp->deAttributes);
+#endif
+			/*
+			 * If this is an unused entry, we can stop.
+			 */
+			if (dentp->deName[0] == SLOT_EMPTY) {
+				brelse(bp);
+				goto out;
+			}
+			/*
+			 * Skip deleted entries.
+			 */
+			if (dentp->deName[0] == SLOT_DELETED) {
+				chksum = -1;
+				continue;
+			}
+
+			/*
+			 * Handle Win95 long directory entries
+			 */
+			if (dentp->deAttributes == ATTR_WIN95) {
+				if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME)
+					continue;
+				chksum = win2unixfn((struct winentry *)dentp,
+					&dirbuf, chksum,
+					pmp->pm_flags & MSDOSFSMNT_U2WTABLE,
+					pmp->pm_u2w);
+				continue;
+			}
+
+			/*
+			 * Skip volume labels
+			 */
+			if (dentp->deAttributes & ATTR_VOLUME) {
+				chksum = -1;
+				continue;
+			}
+			/*
+			 * This computation of d_fileno must match
+			 * the computation of va_fileid in
+			 * msdosfs_getattr.
+			 */
+			if (dentp->deAttributes & ATTR_DIRECTORY) {
+				fileno = getushort(dentp->deStartCluster);
+				if (FAT32(pmp))
+					fileno |= getushort(dentp->deHighClust) << 16;
+				/* if this is the root directory */
+				if (fileno == MSDOSFSROOT)
+					if (FAT32(pmp))
+						fileno = cntobn(pmp,
+								pmp->pm_rootdirblk)
+							 * dirsperblk;
+					else
+						fileno = 1;
+				else
+					fileno = cntobn(pmp, fileno) * dirsperblk;
+				dirbuf.d_fileno = fileno;
+				dirbuf.d_type = DT_DIR;
+			} else {
+				dirbuf.d_fileno = offset / sizeof(struct direntry);
+				dirbuf.d_type = DT_REG;
+			}
+			if (chksum != winChksum(dentp->deName))
+				dirbuf.d_namlen = dos2unixfn(dentp->deName,
+				    (u_char *)dirbuf.d_name,
+				    dentp->deLowerCase |
+					((pmp->pm_flags & MSDOSFSMNT_SHORTNAME) ?
+					(LCASE_BASE | LCASE_EXT) : 0),
+				    pmp->pm_flags & MSDOSFSMNT_U2WTABLE,
+				    pmp->pm_d2u,
+				    pmp->pm_flags & MSDOSFSMNT_ULTABLE,
+				    pmp->pm_ul);
+			else
+				dirbuf.d_name[dirbuf.d_namlen] = 0;
+			chksum = -1;
+			dirbuf.d_reclen = GENERIC_DIRSIZ(&dirbuf);
+			if (uio->uio_resid < dirbuf.d_reclen) {
+				brelse(bp);
+				goto out;
+			}
+			error = uiomove((caddr_t) &dirbuf,
+					dirbuf.d_reclen, uio);
+			if (error) {
+				brelse(bp);
+				goto out;
+			}
+			if (cookies) {
+				*cookies++ = offset + sizeof(struct direntry);
+				if (--ncookies <= 0) {
+					brelse(bp);
+					goto out;
+				}
+			}
+			off = offset + sizeof(struct direntry);
+		}
+		brelse(bp);
+	}
+out:
+	/* Subtract unused cookies */
+	if (ap->a_ncookies)
+		*ap->a_ncookies -= ncookies;
+
+	uio->uio_offset = off;
+
+	/*
+	 * Set the eofflag (NFS uses it)
+	 */
+	if (ap->a_eofflag)
+		if (dep->de_FileSize - (offset - bias) <= 0)
+			*ap->a_eofflag = 1;
+		else
+			*ap->a_eofflag = 0;
+
+	return (error);
+}
+
+static int
+msdosfs_abortop(ap)
+	struct vop_abortop_args /* {
+		struct vnode *a_dvp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
+		zfree(namei_zone, ap->a_cnp->cn_pnbuf);
+	return (0);
+}
+
+/*
+ * vp  - address of vnode file the file
+ * bn  - which cluster we are interested in mapping to a filesystem block number.
+ * vpp - returns the vnode for the block special file holding the filesystem
+ *	 containing the file of interest
+ * bnp - address of where to return the filesystem relative block number
+ */
+static int
+msdosfs_bmap(ap)
+	struct vop_bmap_args /* {
+		struct vnode *a_vp;
+		daddr_t a_bn;
+		struct vnode **a_vpp;
+		daddr_t *a_bnp;
+		int *a_runp;
+		int *a_runb;
+	} */ *ap;
+{
+	struct denode *dep = VTODE(ap->a_vp);
+
+	if (ap->a_vpp != NULL)
+		*ap->a_vpp = dep->de_devvp;
+	if (ap->a_bnp == NULL)
+		return (0);
+	if (ap->a_runp) {
+		/*
+		 * Sequential clusters should be counted here.
+		 */
+		*ap->a_runp = 0;
+	}
+	if (ap->a_runb) {
+		*ap->a_runb = 0;
+	}
+	return (pcbmap(dep, ap->a_bn, ap->a_bnp, 0, 0));
+}
+
+static int
+msdosfs_strategy(ap)
+	struct vop_strategy_args /* {
+		struct vnode *a_vp;
+		struct buf *a_bp;
+	} */ *ap;
+{
+	struct buf *bp = ap->a_bp;
+	struct denode *dep = VTODE(bp->b_vp);
+	struct vnode *vp;
+	int error = 0;
+
+	if (bp->b_vp->v_type == VBLK || bp->b_vp->v_type == VCHR)
+		panic("msdosfs_strategy: spec");
+	/*
+	 * If we don't already know the filesystem relative block number
+	 * then get it using pcbmap().  If pcbmap() returns the block
+	 * number as -1 then we've got a hole in the file.  DOS filesystems
+	 * don't allow files with holes, so we shouldn't ever see this.
+	 */
+	if (bp->b_blkno == bp->b_lblkno) {
+		error = pcbmap(dep, bp->b_lblkno, &bp->b_blkno, 0, 0);
+		if (error) {
+			bp->b_error = error;
+			bp->b_flags |= B_ERROR;
+			biodone(bp);
+			return (error);
+		}
+		if ((long)bp->b_blkno == -1)
+			vfs_bio_clrbuf(bp);
+	}
+	if (bp->b_blkno == -1) {
+		biodone(bp);
+		return (0);
+	}
+	/*
+	 * Read/write the block from/to the disk that contains the desired
+	 * file block.
+	 */
+	vp = dep->de_devvp;
+	bp->b_dev = vp->v_rdev;
+	VOP_STRATEGY(vp, bp);
+	return (0);
+}
+
+static int
+msdosfs_print(ap)
+	struct vop_print_args /* {
+		struct vnode *vp;
+	} */ *ap;
+{
+	struct denode *dep = VTODE(ap->a_vp);
+
+	printf(
+	    "tag VT_MSDOSFS, startcluster %lu, dircluster %lu, diroffset %lu ",
+	       dep->de_StartCluster, dep->de_dirclust, dep->de_diroffset);
+	printf(" dev %d, %d", major(dep->de_dev), minor(dep->de_dev));
+	lockmgr_printinfo(&dep->de_lock);
+	printf("\n");
+	return (0);
+}
+
+static int
+msdosfs_pathconf(ap)
+	struct vop_pathconf_args /* {
+		struct vnode *a_vp;
+		int a_name;
+		int *a_retval;
+	} */ *ap;
+{
+	struct msdosfsmount *pmp = VTODE(ap->a_vp)->de_pmp;
+
+	switch (ap->a_name) {
+	case _PC_LINK_MAX:
+		*ap->a_retval = 1;
+		return (0);
+	case _PC_NAME_MAX:
+		*ap->a_retval = pmp->pm_flags & MSDOSFSMNT_LONGNAME ? WIN_MAXLEN : 12;
+		return (0);
+	case _PC_PATH_MAX:
+		*ap->a_retval = PATH_MAX;
+		return (0);
+	case _PC_CHOWN_RESTRICTED:
+		*ap->a_retval = 1;
+		return (0);
+	case _PC_NO_TRUNC:
+		*ap->a_retval = 0;
+		return (0);
+	default:
+		return (EINVAL);
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * get page routine
+ *
+ * XXX By default, wimp out... note that a_offset is ignored (and always
+ * XXX has been).
+ */
+int
+msdosfs_getpages(ap)
+	struct vop_getpages_args *ap;
+{
+	return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count,
+		ap->a_reqpage);
+}
+
+/*
+ * put page routine
+ *
+ * XXX By default, wimp out... note that a_offset is ignored (and always
+ * XXX has been).
+ */
+int
+msdosfs_putpages(ap)
+	struct vop_putpages_args *ap;
+{
+	return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
+		ap->a_sync, ap->a_rtvals);
+}
+
+/* Global vfs data structures for msdosfs */
+vop_t **msdosfs_vnodeop_p;
+static struct vnodeopv_entry_desc msdosfs_vnodeop_entries[] = {
+	{ &vop_default_desc,		(vop_t *) vop_defaultop },
+	{ &vop_abortop_desc,		(vop_t *) msdosfs_abortop },
+	{ &vop_access_desc,		(vop_t *) msdosfs_access },
+	{ &vop_bmap_desc,		(vop_t *) msdosfs_bmap },
+	{ &vop_cachedlookup_desc,	(vop_t *) msdosfs_lookup },
+	{ &vop_close_desc,		(vop_t *) msdosfs_close },
+	{ &vop_create_desc,		(vop_t *) msdosfs_create },
+	{ &vop_fsync_desc,		(vop_t *) msdosfs_fsync },
+	{ &vop_getattr_desc,		(vop_t *) msdosfs_getattr },
+	{ &vop_inactive_desc,		(vop_t *) msdosfs_inactive },
+	{ &vop_islocked_desc,		(vop_t *) vop_stdislocked },
+	{ &vop_link_desc,		(vop_t *) msdosfs_link },
+	{ &vop_lock_desc,		(vop_t *) vop_stdlock },
+	{ &vop_lookup_desc,		(vop_t *) vfs_cache_lookup },
+	{ &vop_mkdir_desc,		(vop_t *) msdosfs_mkdir },
+	{ &vop_mknod_desc,		(vop_t *) msdosfs_mknod },
+	{ &vop_pathconf_desc,		(vop_t *) msdosfs_pathconf },
+	{ &vop_print_desc,		(vop_t *) msdosfs_print },
+	{ &vop_read_desc,		(vop_t *) msdosfs_read },
+	{ &vop_readdir_desc,		(vop_t *) msdosfs_readdir },
+	{ &vop_reclaim_desc,		(vop_t *) msdosfs_reclaim },
+	{ &vop_remove_desc,		(vop_t *) msdosfs_remove },
+	{ &vop_rename_desc,		(vop_t *) msdosfs_rename },
+	{ &vop_rmdir_desc,		(vop_t *) msdosfs_rmdir },
+	{ &vop_setattr_desc,		(vop_t *) msdosfs_setattr },
+	{ &vop_strategy_desc,		(vop_t *) msdosfs_strategy },
+	{ &vop_symlink_desc,		(vop_t *) msdosfs_symlink },
+	{ &vop_unlock_desc,		(vop_t *) vop_stdunlock },
+	{ &vop_write_desc,		(vop_t *) msdosfs_write },
+	{ &vop_getpages_desc,		(vop_t *) msdosfs_getpages },
+	{ &vop_putpages_desc,		(vop_t *) msdosfs_putpages },
+	{ NULL, NULL }
+};
+static struct vnodeopv_desc msdosfs_vnodeop_opv_desc =
+	{ &msdosfs_vnodeop_p, msdosfs_vnodeop_entries };
+
+VNODEOP_SET(msdosfs_vnodeop_opv_desc);
diff --git a/sys/fs/msdosfs/msdosfsmount.h b/sys/fs/msdosfs/msdosfsmount.h
new file mode 100644
index 0000000..31a2a67
--- /dev/null
+++ b/sys/fs/msdosfs/msdosfsmount.h
@@ -0,0 +1,239 @@
+/*	$Id: msdosfsmount.h,v 1.15 1998/02/23 09:39:29 ache Exp $ */
+/*	$NetBSD: msdosfsmount.h,v 1.17 1997/11/17 15:37:07 ws Exp $	*/
+
+/*-
+ * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
+ * Copyright (C) 1994, 1995, 1997 TooLs GmbH.
+ * All rights reserved.
+ * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by TooLs GmbH.
+ * 4. The name of TooLs GmbH may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/*
+ * Written by Paul Popelka (paulp@uts.amdahl.com)
+ *
+ * You can do anything you want with this software, just don't say you wrote
+ * it, and don't remove this notice.
+ *
+ * This software is provided "as is".
+ *
+ * The author supplies this software to be publicly redistributed on the
+ * understanding that the author is not responsible for the correct
+ * functioning of this software in any circumstances and is not liable for
+ * any damages caused by this software.
+ *
+ * October 1992
+ */
+
+#ifndef _MSDOSFS_MSDOSFSMOUNT_H_
+#define	_MSDOSFS_MSDOSFSMOUNT_H_
+
+#ifdef KERNEL
+
+#ifdef MALLOC_DECLARE
+MALLOC_DECLARE(M_MSDOSFSMNT);
+#endif
+
+/*
+ * Layout of the mount control block for a msdos file system.
+ */
+struct msdosfsmount {
+	struct mount *pm_mountp;/* vfs mount struct for this fs */
+	dev_t pm_dev;		/* block special device mounted */
+	uid_t pm_uid;		/* uid to set as owner of the files */
+	gid_t pm_gid;		/* gid to set as owner of the files */
+	mode_t pm_mask;		/* mask to and with file protection bits */
+	struct vnode *pm_devvp;	/* vnode for block device mntd */
+	struct bpb50 pm_bpb;	/* BIOS parameter blk for this fs */
+	u_long pm_FATsecs;	/* actual number of fat sectors */
+	u_long pm_fatblk;	/* block # of first FAT */
+	u_long pm_rootdirblk;	/* block # (cluster # for FAT32) of root directory number */
+	u_long pm_rootdirsize;	/* size in blocks (not clusters) */
+	u_long pm_firstcluster;	/* block number of first cluster */
+	u_long pm_nmbrofclusters;	/* # of clusters in filesystem */
+	u_long pm_maxcluster;	/* maximum cluster number */
+	u_long pm_freeclustercount;	/* number of free clusters */
+	u_long pm_cnshift;	/* shift file offset right this amount to get a cluster number */
+	u_long pm_crbomask;	/* and a file offset with this mask to get cluster rel offset */
+	u_long pm_bnshift;	/* shift file offset right this amount to get a block number */
+	u_long pm_bpcluster;	/* bytes per cluster */
+	u_long pm_fmod;		/* ~0 if fs is modified, this can rollover to 0	*/
+	u_long pm_fatblocksize;	/* size of fat blocks in bytes */
+	u_long pm_fatblocksec;	/* size of fat blocks in sectors */
+	u_long pm_fatsize;	/* size of fat in bytes */
+	u_long pm_fatmask;	/* mask to use for fat numbers */
+	u_long pm_fsinfo;	/* fsinfo block number */
+	u_long pm_nxtfree;	/* next free cluster in fsinfo block */
+	u_int pm_fatmult;	/* these 2 values are used in fat */
+	u_int pm_fatdiv;	/*	offset computation */
+	u_int pm_curfat;	/* current fat for FAT32 (0 otherwise) */
+	u_int *pm_inusemap;	/* ptr to bitmap of in-use clusters */
+	u_int pm_flags;		/* see below */
+	struct netexport pm_export;	/* export information */
+	u_int16_t pm_u2w[128];  /* Local->Unicode table */
+	u_int8_t  pm_ul[128];   /* Local upper->lower table */
+	u_int8_t  pm_lu[128];   /* Local lower->upper table */
+	u_int8_t  pm_d2u[128];  /* DOS->local table */
+	u_int8_t  pm_u2d[128];  /* Local->DOS table */
+};
+/* Byte offset in FAT on filesystem pmp, cluster cn */
+#define	FATOFS(pmp, cn)	((cn) * (pmp)->pm_fatmult / (pmp)->pm_fatdiv)
+
+
+#define	VFSTOMSDOSFS(mp)	((struct msdosfsmount *)mp->mnt_data)
+
+/* Number of bits in one pm_inusemap item: */
+#define	N_INUSEBITS	(8 * sizeof(u_int))
+
+/*
+ * Shorthand for fields in the bpb contained in the msdosfsmount structure.
+ */
+#define	pm_BytesPerSec	pm_bpb.bpbBytesPerSec
+#define	pm_ResSectors	pm_bpb.bpbResSectors
+#define	pm_FATs		pm_bpb.bpbFATs
+#define	pm_RootDirEnts	pm_bpb.bpbRootDirEnts
+#define	pm_Sectors	pm_bpb.bpbSectors
+#define	pm_Media	pm_bpb.bpbMedia
+#define	pm_SecPerTrack	pm_bpb.bpbSecPerTrack
+#define	pm_Heads	pm_bpb.bpbHeads
+#define	pm_HiddenSects	pm_bpb.bpbHiddenSecs
+#define	pm_HugeSectors	pm_bpb.bpbHugeSectors
+
+/*
+ * Convert pointer to buffer -> pointer to direntry
+ */
+#define	bptoep(pmp, bp, dirofs) \
+	((struct direntry *)(((bp)->b_data)	\
+	 + ((dirofs) & (pmp)->pm_crbomask)))
+
+/*
+ * Convert block number to cluster number
+ */
+#define	de_bn2cn(pmp, bn) \
+	((bn) >> ((pmp)->pm_cnshift - (pmp)->pm_bnshift))
+
+/*
+ * Convert cluster number to block number
+ */
+#define	de_cn2bn(pmp, cn) \
+	((cn) << ((pmp)->pm_cnshift - (pmp)->pm_bnshift))
+
+/*
+ * Convert file offset to cluster number
+ */
+#define de_cluster(pmp, off) \
+	((off) >> (pmp)->pm_cnshift)
+
+/*
+ * Clusters required to hold size bytes
+ */
+#define	de_clcount(pmp, size) \
+	(((size) + (pmp)->pm_bpcluster - 1) >> (pmp)->pm_cnshift)
+
+/*
+ * Convert file offset to block number
+ */
+#define de_blk(pmp, off) \
+	(de_cn2bn(pmp, de_cluster((pmp), (off))))
+
+/*
+ * Convert cluster number to file offset
+ */
+#define	de_cn2off(pmp, cn) \
+	((cn) << (pmp)->pm_cnshift)
+
+/*
+ * Convert block number to file offset
+ */
+#define	de_bn2off(pmp, bn) \
+	((bn) << (pmp)->pm_bnshift)
+/*
+ * Map a cluster number into a filesystem relative block number.
+ */
+#define	cntobn(pmp, cn) \
+	(de_cn2bn((pmp), (cn)-CLUST_FIRST) + (pmp)->pm_firstcluster)
+
+/*
+ * Calculate block number for directory entry in root dir, offset dirofs
+ */
+#define	roottobn(pmp, dirofs) \
+	(de_blk((pmp), (dirofs)) + (pmp)->pm_rootdirblk)
+
+/*
+ * Calculate block number for directory entry at cluster dirclu, offset
+ * dirofs
+ */
+#define	detobn(pmp, dirclu, dirofs) \
+	((dirclu) == MSDOSFSROOT \
+	 ? roottobn((pmp), (dirofs)) \
+	 : cntobn((pmp), (dirclu)))
+
+int msdosfs_init __P((struct vfsconf *vfsp));
+int msdosfs_mountroot __P((void));
+
+#endif /* KERNEL */
+
+/*
+ *  Arguments to mount MSDOS filesystems.
+ */
+struct msdosfs_args {
+	char	*fspec;		/* blocks special holding the fs to mount */
+	struct	export_args export;	/* network export information */
+	uid_t	uid;		/* uid that owns msdosfs files */
+	gid_t	gid;		/* gid that owns msdosfs files */
+	mode_t	mask;		/* mask to be applied for msdosfs perms */
+	int	flags;		/* see below */
+	int magic;		/* version number */
+	u_int16_t u2w[128];     /* Local->Unicode table */
+	u_int8_t  ul[128];      /* Local upper->lower table */
+	u_int8_t  lu[128];      /* Local lower->upper table */
+	u_int8_t  d2u[128];     /* DOS->local table */
+	u_int8_t  u2d[128];     /* Local->DOS table */
+};
+
+/*
+ * Msdosfs mount options:
+ */
+#define	MSDOSFSMNT_SHORTNAME	1	/* Force old DOS short names only */
+#define	MSDOSFSMNT_LONGNAME	2	/* Force Win'95 long names */
+#define	MSDOSFSMNT_NOWIN95	4	/* Completely ignore Win95 entries */
+#ifndef __FreeBSD__
+#define	MSDOSFSMNT_GEMDOSFS	8	/* This is a gemdos-flavour */
+#endif
+#define MSDOSFSMNT_U2WTABLE     0x10    /* Local->Unicode and local<->DOS   */
+					/* tables loaded                    */
+#define MSDOSFSMNT_ULTABLE      0x20    /* Local upper<->lower table loaded */
+/* All flags above: */
+#define	MSDOSFSMNT_MNTOPT \
+	(MSDOSFSMNT_SHORTNAME|MSDOSFSMNT_LONGNAME|MSDOSFSMNT_NOWIN95 \
+	 /*|MSDOSFSMNT_GEMDOSFS*/|MSDOSFSMNT_U2WTABLE|MSDOSFSMNT_ULTABLE)
+#define	MSDOSFSMNT_RONLY	0x80000000	/* mounted read-only	*/
+#define	MSDOSFSMNT_WAITONFAT	0x40000000	/* mounted synchronous	*/
+#define	MSDOSFS_FATMIRROR	0x20000000	/* FAT is mirrored */
+
+#define MSDOSFS_ARGSMAGIC	0xe4eff300
+
+#endif /* !_MSDOSFS_MSDOSFSMOUNT_H_ */
diff --git a/sys/fs/nullfs/null.h b/sys/fs/nullfs/null.h
new file mode 100644
index 0000000..7d46a11
--- /dev/null
+++ b/sys/fs/nullfs/null.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)null.h	8.3 (Berkeley) 8/20/94
+ *
+ * $Id: null.h,v 1.7 1997/05/25 04:50:02 peter Exp $
+ */
+
+struct null_args {
+	char		*target;	/* Target of loopback  */
+};
+
+struct null_mount {
+	struct mount	*nullm_vfs;
+	struct vnode	*nullm_rootvp;	/* Reference to root null_node */
+};
+
+#ifdef KERNEL
+/*
+ * A cache of vnode references
+ */
+struct null_node {
+	LIST_ENTRY(null_node)	null_hash;	/* Hash list */
+	struct vnode	        *null_lowervp;	/* VREFed once */
+	struct vnode		*null_vnode;	/* Back pointer */
+};
+
+extern int nullfs_init __P((struct vfsconf *vfsp));
+extern int null_node_create __P((struct mount *mp, struct vnode *target, struct vnode **vpp));
+
+#define	MOUNTTONULLMOUNT(mp) ((struct null_mount *)((mp)->mnt_data))
+#define	VTONULL(vp) ((struct null_node *)(vp)->v_data)
+#define	NULLTOV(xp) ((xp)->null_vnode)
+#ifdef NULLFS_DIAGNOSTIC
+extern struct vnode *null_checkvp __P((struct vnode *vp, char *fil, int lno));
+#define	NULLVPTOLOWERVP(vp) null_checkvp((vp), __FILE__, __LINE__)
+#else
+#define	NULLVPTOLOWERVP(vp) (VTONULL(vp)->null_lowervp)
+#endif
+
+extern int	null_bypass __P((struct vop_generic_args *ap));
+
+extern vop_t **null_vnodeop_p;
+#endif /* KERNEL */
diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c
new file mode 100644
index 0000000..603f418
--- /dev/null
+++ b/sys/fs/nullfs/null_subr.c
@@ -0,0 +1,303 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)null_subr.c	8.7 (Berkeley) 5/14/95
+ *
+ * $Id: null_subr.c,v 1.18 1998/07/15 02:32:18 bde Exp $
+ */
+
+#include "opt_debug_nullfs.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/malloc.h>
+#include <miscfs/nullfs/null.h>
+
+#define LOG2_SIZEVNODE 7		/* log2(sizeof struct vnode) */
+#define	NNULLNODECACHE 16
+
+/*
+ * Null layer cache:
+ * Each cache entry holds a reference to the lower vnode
+ * along with a pointer to the alias vnode.  When an
+ * entry is added the lower vnode is VREF'd.  When the
+ * alias is removed the lower vnode is vrele'd.
+ */
+
+#define	NULL_NHASH(vp) \
+	(&null_node_hashtbl[(((uintptr_t)vp)>>LOG2_SIZEVNODE) & null_node_hash])
+static LIST_HEAD(null_node_hashhead, null_node) *null_node_hashtbl;
+static u_long null_node_hash;
+
+static int	null_node_alloc __P((struct mount *mp, struct vnode *lowervp,
+				     struct vnode **vpp));
+static struct vnode *
+		null_node_find __P((struct mount *mp, struct vnode *lowervp));
+
+/*
+ * Initialise cache headers
+ */
+int
+nullfs_init(vfsp)
+	struct vfsconf *vfsp;
+{
+
+#ifdef NULLFS_DIAGNOSTIC
+	printf("nullfs_init\n");		/* printed during system boot */
+#endif
+	null_node_hashtbl = hashinit(NNULLNODECACHE, M_CACHE, &null_node_hash);
+	return (0);
+}
+
+/*
+ * Return a VREF'ed alias for lower vnode if already exists, else 0.
+ */
+static struct vnode *
+null_node_find(mp, lowervp)
+	struct mount *mp;
+	struct vnode *lowervp;
+{
+	struct proc *p = curproc;	/* XXX */
+	struct null_node_hashhead *hd;
+	struct null_node *a;
+	struct vnode *vp;
+
+	/*
+	 * Find hash base, and then search the (two-way) linked
+	 * list looking for a null_node structure which is referencing
+	 * the lower vnode.  If found, the increment the null_node
+	 * reference count (but NOT the lower vnode's VREF counter).
+	 */
+	hd = NULL_NHASH(lowervp);
+loop:
+	for (a = hd->lh_first; a != 0; a = a->null_hash.le_next) {
+		if (a->null_lowervp == lowervp && NULLTOV(a)->v_mount == mp) {
+			vp = NULLTOV(a);
+			/*
+			 * We need vget for the VXLOCK
+			 * stuff, but we don't want to lock
+			 * the lower node.
+			 */
+			if (vget(vp, 0, p)) {
+				printf ("null_node_find: vget failed.\n");
+				goto loop;
+			};
+			return (vp);
+		}
+	}
+
+	return NULLVP;
+}
+
+
+/*
+ * Make a new null_node node.
+ * Vp is the alias vnode, lofsvp is the lower vnode.
+ * Maintain a reference to (lowervp).
+ */
+static int
+null_node_alloc(mp, lowervp, vpp)
+	struct mount *mp;
+	struct vnode *lowervp;
+	struct vnode **vpp;
+{
+	struct null_node_hashhead *hd;
+	struct null_node *xp;
+	struct vnode *othervp, *vp;
+	int error;
+
+	/*
+	 * Do the MALLOC before the getnewvnode since doing so afterward
+	 * might cause a bogus v_data pointer to get dereferenced
+	 * elsewhere if MALLOC should block.
+	 */
+	MALLOC(xp, struct null_node *, sizeof(struct null_node), M_TEMP, M_WAITOK);
+
+	error = getnewvnode(VT_NULL, mp, null_vnodeop_p, vpp);
+	if (error) {
+		FREE(xp, M_TEMP);
+		return (error);
+	}
+	vp = *vpp;
+
+	vp->v_type = lowervp->v_type;
+	xp->null_vnode = vp;
+	vp->v_data = xp;
+	xp->null_lowervp = lowervp;
+	/*
+	 * Before we insert our new node onto the hash chains,
+	 * check to see if someone else has beaten us to it.
+	 * (We could have slept in MALLOC.)
+	 */
+	othervp = null_node_find(mp, lowervp);
+	if (othervp) {
+		FREE(xp, M_TEMP);
+		vp->v_type = VBAD;	/* node is discarded */
+		vp->v_usecount = 0;	/* XXX */
+		*vpp = othervp;
+		return 0;
+	};
+	VREF(lowervp);   /* Extra VREF will be vrele'd in null_node_create */
+	hd = NULL_NHASH(lowervp);
+	LIST_INSERT_HEAD(hd, xp, null_hash);
+	return 0;
+}
+
+
+/*
+ * Try to find an existing null_node vnode refering
+ * to it, otherwise make a new null_node vnode which
+ * contains a reference to the lower vnode.
+ */
+int
+null_node_create(mp, lowervp, newvpp)
+	struct mount *mp;
+	struct vnode *lowervp;
+	struct vnode **newvpp;
+{
+	struct vnode *aliasvp;
+
+	aliasvp = null_node_find(mp, lowervp);
+	if (aliasvp) {
+		/*
+		 * null_node_find has taken another reference
+		 * to the alias vnode.
+		 */
+#ifdef NULLFS_DIAGNOSTIC
+		vprint("null_node_create: exists", aliasvp);
+#endif
+		/* VREF(aliasvp); --- done in null_node_find */
+	} else {
+		int error;
+
+		/*
+		 * Get new vnode.
+		 */
+#ifdef NULLFS_DIAGNOSTIC
+		printf("null_node_create: create new alias vnode\n");
+#endif
+
+		/*
+		 * Make new vnode reference the null_node.
+		 */
+		error = null_node_alloc(mp, lowervp, &aliasvp);
+		if (error)
+			return error;
+
+		/*
+		 * aliasvp is already VREF'd by getnewvnode()
+		 */
+	}
+
+	vrele(lowervp);
+
+#ifdef DIAGNOSTIC
+	if (lowervp->v_usecount < 1) {
+		/* Should never happen... */
+		vprint ("null_node_create: alias ", aliasvp);
+		vprint ("null_node_create: lower ", lowervp);
+		panic ("null_node_create: lower has 0 usecount.");
+	};
+#endif
+
+#ifdef NULLFS_DIAGNOSTIC
+	vprint("null_node_create: alias", aliasvp);
+	vprint("null_node_create: lower", lowervp);
+#endif
+
+	*newvpp = aliasvp;
+	return (0);
+}
+
+#ifdef NULLFS_DIAGNOSTIC
+#include "opt_ddb.h"
+
+#ifdef DDB
+#define	null_checkvp_barrier	1
+#else
+#define	null_checkvp_barrier	0
+#endif
+
+struct vnode *
+null_checkvp(vp, fil, lno)
+	struct vnode *vp;
+	char *fil;
+	int lno;
+{
+	struct null_node *a = VTONULL(vp);
+#ifdef notyet
+	/*
+	 * Can't do this check because vop_reclaim runs
+	 * with a funny vop vector.
+	 */
+	if (vp->v_op != null_vnodeop_p) {
+		printf ("null_checkvp: on non-null-node\n");
+		while (null_checkvp_barrier) /*WAIT*/ ;
+		panic("null_checkvp");
+	};
+#endif
+	if (a->null_lowervp == NULLVP) {
+		/* Should never happen */
+		int i; u_long *p;
+		printf("vp = %p, ZERO ptr\n", (void *)vp);
+		for (p = (u_long *) a, i = 0; i < 8; i++)
+			printf(" %lx", p[i]);
+		printf("\n");
+		/* wait for debugger */
+		while (null_checkvp_barrier) /*WAIT*/ ;
+		panic("null_checkvp");
+	}
+	if (a->null_lowervp->v_usecount < 1) {
+		int i; u_long *p;
+		printf("vp = %p, unref'ed lowervp\n", (void *)vp);
+		for (p = (u_long *) a, i = 0; i < 8; i++)
+			printf(" %lx", p[i]);
+		printf("\n");
+		/* wait for debugger */
+		while (null_checkvp_barrier) /*WAIT*/ ;
+		panic ("null with unref'ed lowervp");
+	};
+#ifdef notyet
+	printf("null %x/%d -> %x/%d [%s, %d]\n",
+	        NULLTOV(a), NULLTOV(a)->v_usecount,
+		a->null_lowervp, a->null_lowervp->v_usecount,
+		fil, lno);
+#endif
+	return a->null_lowervp;
+}
+#endif
diff --git a/sys/fs/nullfs/null_vfsops.c b/sys/fs/nullfs/null_vfsops.c
new file mode 100644
index 0000000..4ead5bd
--- /dev/null
+++ b/sys/fs/nullfs/null_vfsops.c
@@ -0,0 +1,425 @@
+/*
+ * Copyright (c) 1992, 1993, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)null_vfsops.c	8.2 (Berkeley) 1/21/94
+ *
+ * @(#)lofs_vfsops.c	1.2 (Berkeley) 6/18/92
+ * $Id: null_vfsops.c,v 1.27 1998/07/30 17:40:45 bde Exp $
+ */
+
+/*
+ * Null Layer
+ * (See null_vnops.c for a description of what this does.)
+ */
+
+#include "opt_debug_nullfs.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <miscfs/nullfs/null.h>
+
+static MALLOC_DEFINE(M_NULLFSMNT, "NULLFS mount", "NULLFS mount structure");
+
+static int	nullfs_fhtovp __P((struct mount *mp, struct fid *fidp,
+				   struct sockaddr *nam, struct vnode **vpp,
+				   int *exflagsp, struct ucred **credanonp));
+static int	nullfs_mount __P((struct mount *mp, char *path, caddr_t data,
+				  struct nameidata *ndp, struct proc *p));
+static int	nullfs_quotactl __P((struct mount *mp, int cmd, uid_t uid,
+				     caddr_t arg, struct proc *p));
+static int	nullfs_root __P((struct mount *mp, struct vnode **vpp));
+static int	nullfs_start __P((struct mount *mp, int flags, struct proc *p));
+static int	nullfs_statfs __P((struct mount *mp, struct statfs *sbp,
+				   struct proc *p));
+static int	nullfs_sync __P((struct mount *mp, int waitfor,
+				 struct ucred *cred, struct proc *p));
+static int	nullfs_unmount __P((struct mount *mp, int mntflags,
+				    struct proc *p));
+static int	nullfs_vget __P((struct mount *mp, ino_t ino,
+				 struct vnode **vpp));
+static int	nullfs_vptofh __P((struct vnode *vp, struct fid *fhp));
+
+/*
+ * Mount null layer
+ */
+static int
+nullfs_mount(mp, path, data, ndp, p)
+	struct mount *mp;
+	char *path;
+	caddr_t data;
+	struct nameidata *ndp;
+	struct proc *p;
+{
+	int error = 0;
+	struct null_args args;
+	struct vnode *lowerrootvp, *vp;
+	struct vnode *nullm_rootvp;
+	struct null_mount *xmp;
+	u_int size;
+	int isvnunlocked = 0;
+
+#ifdef NULLFS_DIAGNOSTIC
+	printf("nullfs_mount(mp = %p)\n", (void *)mp);
+#endif
+
+	/*
+	 * Update is a no-op
+	 */
+	if (mp->mnt_flag & MNT_UPDATE) {
+		return (EOPNOTSUPP);
+		/* return VFS_MOUNT(MOUNTTONULLMOUNT(mp)->nullm_vfs, path, data, ndp, p);*/
+	}
+
+	/*
+	 * Get argument
+	 */
+	error = copyin(data, (caddr_t)&args, sizeof(struct null_args));
+	if (error)
+		return (error);
+
+	/*
+	 * Unlock lower node to avoid deadlock.
+	 * (XXX) VOP_ISLOCKED is needed?
+	 */
+	if ((mp->mnt_vnodecovered->v_op == null_vnodeop_p) &&
+		VOP_ISLOCKED(mp->mnt_vnodecovered)) {
+		VOP_UNLOCK(mp->mnt_vnodecovered, 0, p);
+		isvnunlocked = 1;
+	}
+	/*
+	 * Find lower node
+	 */
+	NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT|LOCKLEAF,
+		UIO_USERSPACE, args.target, p);
+	error = namei(ndp);
+	/*
+	 * Re-lock vnode.
+	 */
+	if (isvnunlocked && !VOP_ISLOCKED(mp->mnt_vnodecovered))
+		vn_lock(mp->mnt_vnodecovered, LK_EXCLUSIVE | LK_RETRY, p);
+
+	if (error)
+		return (error);
+
+	/*
+	 * Sanity check on lower vnode
+	 */
+	lowerrootvp = ndp->ni_vp;
+
+	vrele(ndp->ni_dvp);
+	ndp->ni_dvp = NULLVP;
+
+	/*
+	 * Check multi null mount to avoid `lock against myself' panic.
+	 */
+	if (lowerrootvp == VTONULL(mp->mnt_vnodecovered)->null_lowervp) {
+#ifdef DIAGNOSTIC
+		printf("nullfs_mount: multi null mount?\n");
+#endif
+		return (EDEADLK);
+	}
+
+	xmp = (struct null_mount *) malloc(sizeof(struct null_mount),
+				M_NULLFSMNT, M_WAITOK);	/* XXX */
+
+	/*
+	 * Save reference to underlying FS
+	 */
+	xmp->nullm_vfs = lowerrootvp->v_mount;
+
+	/*
+	 * Save reference.  Each mount also holds
+	 * a reference on the root vnode.
+	 */
+	error = null_node_create(mp, lowerrootvp, &vp);
+	/*
+	 * Unlock the node (either the lower or the alias)
+	 */
+	VOP_UNLOCK(vp, 0, p);
+	/*
+	 * Make sure the node alias worked
+	 */
+	if (error) {
+		vrele(lowerrootvp);
+		free(xmp, M_NULLFSMNT);	/* XXX */
+		return (error);
+	}
+
+	/*
+	 * Keep a held reference to the root vnode.
+	 * It is vrele'd in nullfs_unmount.
+	 */
+	nullm_rootvp = vp;
+	nullm_rootvp->v_flag |= VROOT;
+	xmp->nullm_rootvp = nullm_rootvp;
+	if (NULLVPTOLOWERVP(nullm_rootvp)->v_mount->mnt_flag & MNT_LOCAL)
+		mp->mnt_flag |= MNT_LOCAL;
+	mp->mnt_data = (qaddr_t) xmp;
+	vfs_getnewfsid(mp);
+
+	(void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+	bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+	(void) copyinstr(args.target, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+	    &size);
+	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+	(void)nullfs_statfs(mp, &mp->mnt_stat, p);
+#ifdef NULLFS_DIAGNOSTIC
+	printf("nullfs_mount: lower %s, alias at %s\n",
+		mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
+#endif
+	return (0);
+}
+
+/*
+ * VFS start.  Nothing needed here - the start routine
+ * on the underlying filesystem will have been called
+ * when that filesystem was mounted.
+ */
+static int
+nullfs_start(mp, flags, p)
+	struct mount *mp;
+	int flags;
+	struct proc *p;
+{
+	return (0);
+	/* return VFS_START(MOUNTTONULLMOUNT(mp)->nullm_vfs, flags, p); */
+}
+
+/*
+ * Free reference to null layer
+ */
+static int
+nullfs_unmount(mp, mntflags, p)
+	struct mount *mp;
+	int mntflags;
+	struct proc *p;
+{
+	struct vnode *nullm_rootvp = MOUNTTONULLMOUNT(mp)->nullm_rootvp;
+	int error;
+	int flags = 0;
+
+#ifdef NULLFS_DIAGNOSTIC
+	printf("nullfs_unmount(mp = %p)\n", (void *)mp);
+#endif
+
+	if (mntflags & MNT_FORCE)
+		flags |= FORCECLOSE;
+
+	/*
+	 * Clear out buffer cache.  I don't think we
+	 * ever get anything cached at this level at the
+	 * moment, but who knows...
+	 */
+#if 0
+	mntflushbuf(mp, 0);
+	if (mntinvalbuf(mp, 1))
+		return (EBUSY);
+#endif
+	if (nullm_rootvp->v_usecount > 1)
+		return (EBUSY);
+	error = vflush(mp, nullm_rootvp, flags);
+	if (error)
+		return (error);
+
+#ifdef NULLFS_DIAGNOSTIC
+	vprint("alias root of lower", nullm_rootvp);
+#endif
+	/*
+	 * Release reference on underlying root vnode
+	 */
+	vrele(nullm_rootvp);
+	/*
+	 * And blow it away for future re-use
+	 */
+	vgone(nullm_rootvp);
+	/*
+	 * Finally, throw away the null_mount structure
+	 */
+	free(mp->mnt_data, M_NULLFSMNT);	/* XXX */
+	mp->mnt_data = 0;
+	return 0;
+}
+
+static int
+nullfs_root(mp, vpp)
+	struct mount *mp;
+	struct vnode **vpp;
+{
+	struct proc *p = curproc;	/* XXX */
+	struct vnode *vp;
+
+#ifdef NULLFS_DIAGNOSTIC
+	printf("nullfs_root(mp = %p, vp = %p->%p)\n", (void *)mp,
+	    (void *)MOUNTTONULLMOUNT(mp)->nullm_rootvp,
+	    (void *)NULLVPTOLOWERVP(MOUNTTONULLMOUNT(mp)->nullm_rootvp));
+#endif
+
+	/*
+	 * Return locked reference to root.
+	 */
+	vp = MOUNTTONULLMOUNT(mp)->nullm_rootvp;
+	VREF(vp);
+	if (VOP_ISLOCKED(vp)) {
+		/*
+		 * XXX
+		 * Should we check type of node?
+		 */
+#ifdef DIAGNOSTIC
+		printf("nullfs_root: multi null mount?\n");
+#endif
+		vrele(vp);
+		return (EDEADLK);
+	} else
+		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+	*vpp = vp;
+	return 0;
+}
+
+static int
+nullfs_quotactl(mp, cmd, uid, arg, p)
+	struct mount *mp;
+	int cmd;
+	uid_t uid;
+	caddr_t arg;
+	struct proc *p;
+{
+	return VFS_QUOTACTL(MOUNTTONULLMOUNT(mp)->nullm_vfs, cmd, uid, arg, p);
+}
+
+static int
+nullfs_statfs(mp, sbp, p)
+	struct mount *mp;
+	struct statfs *sbp;
+	struct proc *p;
+{
+	int error;
+	struct statfs mstat;
+
+#ifdef NULLFS_DIAGNOSTIC
+	printf("nullfs_statfs(mp = %p, vp = %p->%p)\n", (void *)mp,
+	    (void *)MOUNTTONULLMOUNT(mp)->nullm_rootvp,
+	    (void *)NULLVPTOLOWERVP(MOUNTTONULLMOUNT(mp)->nullm_rootvp));
+#endif
+
+	bzero(&mstat, sizeof(mstat));
+
+	error = VFS_STATFS(MOUNTTONULLMOUNT(mp)->nullm_vfs, &mstat, p);
+	if (error)
+		return (error);
+
+	/* now copy across the "interesting" information and fake the rest */
+	sbp->f_type = mstat.f_type;
+	sbp->f_flags = mstat.f_flags;
+	sbp->f_bsize = mstat.f_bsize;
+	sbp->f_iosize = mstat.f_iosize;
+	sbp->f_blocks = mstat.f_blocks;
+	sbp->f_bfree = mstat.f_bfree;
+	sbp->f_bavail = mstat.f_bavail;
+	sbp->f_files = mstat.f_files;
+	sbp->f_ffree = mstat.f_ffree;
+	if (sbp != &mp->mnt_stat) {
+		bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+	}
+	return (0);
+}
+
+static int
+nullfs_sync(mp, waitfor, cred, p)
+	struct mount *mp;
+	int waitfor;
+	struct ucred *cred;
+	struct proc *p;
+{
+	/*
+	 * XXX - Assumes no data cached at null layer.
+	 */
+	return (0);
+}
+
+static int
+nullfs_vget(mp, ino, vpp)
+	struct mount *mp;
+	ino_t ino;
+	struct vnode **vpp;
+{
+
+	return VFS_VGET(MOUNTTONULLMOUNT(mp)->nullm_vfs, ino, vpp);
+}
+
+static int
+nullfs_fhtovp(mp, fidp, nam, vpp, exflagsp, credanonp)
+	struct mount *mp;
+	struct fid *fidp;
+	struct sockaddr *nam;
+	struct vnode **vpp;
+	int *exflagsp;
+	struct ucred**credanonp;
+{
+
+	return VFS_FHTOVP(MOUNTTONULLMOUNT(mp)->nullm_vfs, fidp, nam, 
+			  vpp, exflagsp, credanonp);
+}
+
+static int
+nullfs_vptofh(vp, fhp)
+	struct vnode *vp;
+	struct fid *fhp;
+{
+	return VFS_VPTOFH(NULLVPTOLOWERVP(vp), fhp);
+}
+
+static struct vfsops null_vfsops = {
+	nullfs_mount,
+	nullfs_start,
+	nullfs_unmount,
+	nullfs_root,
+	nullfs_quotactl,
+	nullfs_statfs,
+	nullfs_sync,
+	nullfs_vget,
+	nullfs_fhtovp,
+	nullfs_vptofh,
+	nullfs_init,
+};
+
+VFS_SET(null_vfsops, null, VFCF_LOOPBACK);
diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c
new file mode 100644
index 0000000..db5c341
--- /dev/null
+++ b/sys/fs/nullfs/null_vnops.c
@@ -0,0 +1,669 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * John Heidemann of the UCLA Ficus project.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)null_vnops.c	8.6 (Berkeley) 5/27/95
+ *
+ * Ancestors:
+ *	@(#)lofs_vnops.c	1.2 (Berkeley) 6/18/92
+ *	$Id: null_vnops.c,v 1.31 1999/01/27 22:42:06 dillon Exp $
+ *	...and...
+ *	@(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project
+ *
+ * $Id: null_vnops.c,v 1.31 1999/01/27 22:42:06 dillon Exp $
+ */
+
+/*
+ * Null Layer
+ *
+ * (See mount_null(8) for more information.)
+ *
+ * The null layer duplicates a portion of the file system
+ * name space under a new name.  In this respect, it is
+ * similar to the loopback file system.  It differs from
+ * the loopback fs in two respects:  it is implemented using
+ * a stackable layers techniques, and its "null-node"s stack above
+ * all lower-layer vnodes, not just over directory vnodes.
+ *
+ * The null layer has two purposes.  First, it serves as a demonstration
+ * of layering by proving a layer which does nothing.  (It actually
+ * does everything the loopback file system does, which is slightly
+ * more than nothing.)  Second, the null layer can serve as a prototype
+ * layer.  Since it provides all necessary layer framework,
+ * new file system layers can be created very easily be starting
+ * with a null layer.
+ *
+ * The remainder of this man page examines the null layer as a basis
+ * for constructing new layers.
+ *
+ *
+ * INSTANTIATING NEW NULL LAYERS
+ *
+ * New null layers are created with mount_null(8).
+ * Mount_null(8) takes two arguments, the pathname
+ * of the lower vfs (target-pn) and the pathname where the null
+ * layer will appear in the namespace (alias-pn).  After
+ * the null layer is put into place, the contents
+ * of target-pn subtree will be aliased under alias-pn.
+ *
+ *
+ * OPERATION OF A NULL LAYER
+ *
+ * The null layer is the minimum file system layer,
+ * simply bypassing all possible operations to the lower layer
+ * for processing there.  The majority of its activity centers
+ * on the bypass routine, through which nearly all vnode operations
+ * pass.
+ *
+ * The bypass routine accepts arbitrary vnode operations for
+ * handling by the lower layer.  It begins by examing vnode
+ * operation arguments and replacing any null-nodes by their
+ * lower-layer equivlants.  It then invokes the operation
+ * on the lower layer.  Finally, it replaces the null-nodes
+ * in the arguments and, if a vnode is return by the operation,
+ * stacks a null-node on top of the returned vnode.
+ *
+ * Although bypass handles most operations, vop_getattr, vop_lock,
+ * vop_unlock, vop_inactive, vop_reclaim, and vop_print are not
+ * bypassed. Vop_getattr must change the fsid being returned.
+ * Vop_lock and vop_unlock must handle any locking for the
+ * current vnode as well as pass the lock request down.
+ * Vop_inactive and vop_reclaim are not bypassed so that
+ * they can handle freeing null-layer specific data. Vop_print
+ * is not bypassed to avoid excessive debugging information.
+ * Also, certain vnode operations change the locking state within
+ * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
+ * and symlink). Ideally these operations should not change the
+ * lock state, but should be changed to let the caller of the
+ * function unlock them. Otherwise all intermediate vnode layers
+ * (such as union, umapfs, etc) must catch these functions to do
+ * the necessary locking at their layer.
+ *
+ *
+ * INSTANTIATING VNODE STACKS
+ *
+ * Mounting associates the null layer with a lower layer,
+ * effect stacking two VFSes.  Vnode stacks are instead
+ * created on demand as files are accessed.
+ *
+ * The initial mount creates a single vnode stack for the
+ * root of the new null layer.  All other vnode stacks
+ * are created as a result of vnode operations on
+ * this or other null vnode stacks.
+ *
+ * New vnode stacks come into existance as a result of
+ * an operation which returns a vnode.
+ * The bypass routine stacks a null-node above the new
+ * vnode before returning it to the caller.
+ *
+ * For example, imagine mounting a null layer with
+ * "mount_null /usr/include /dev/layer/null".
+ * Changing directory to /dev/layer/null will assign
+ * the root null-node (which was created when the null layer was mounted).
+ * Now consider opening "sys".  A vop_lookup would be
+ * done on the root null-node.  This operation would bypass through
+ * to the lower layer which would return a vnode representing
+ * the UFS "sys".  Null_bypass then builds a null-node
+ * aliasing the UFS "sys" and returns this to the caller.
+ * Later operations on the null-node "sys" will repeat this
+ * process when constructing other vnode stacks.
+ *
+ *
+ * CREATING OTHER FILE SYSTEM LAYERS
+ *
+ * One of the easiest ways to construct new file system layers is to make
+ * a copy of the null layer, rename all files and variables, and
+ * then begin modifing the copy.  Sed can be used to easily rename
+ * all variables.
+ *
+ * The umap layer is an example of a layer descended from the
+ * null layer.
+ *
+ *
+ * INVOKING OPERATIONS ON LOWER LAYERS
+ *
+ * There are two techniques to invoke operations on a lower layer
+ * when the operation cannot be completely bypassed.  Each method
+ * is appropriate in different situations.  In both cases,
+ * it is the responsibility of the aliasing layer to make
+ * the operation arguments "correct" for the lower layer
+ * by mapping an vnode arguments to the lower layer.
+ *
+ * The first approach is to call the aliasing layer's bypass routine.
+ * This method is most suitable when you wish to invoke the operation
+ * currently being handled on the lower layer.  It has the advantage
+ * that the bypass routine already must do argument mapping.
+ * An example of this is null_getattrs in the null layer.
+ *
+ * A second approach is to directly invoke vnode operations on
+ * the lower layer with the VOP_OPERATIONNAME interface.
+ * The advantage of this method is that it is easy to invoke
+ * arbitrary operations on the lower layer.  The disadvantage
+ * is that vnode arguments must be manualy mapped.
+ *
+ */
+
+#include "opt_debug_nullfs.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/buf.h>
+#include <miscfs/nullfs/null.h>
+
+static int null_bug_bypass = 0;   /* for debugging: enables bypass printf'ing */
+SYSCTL_INT(_debug, OID_AUTO, nullfs_bug_bypass, CTLFLAG_RW, 
+	&null_bug_bypass, 0, "");
+
+static int	null_access __P((struct vop_access_args *ap));
+static int	null_bwrite __P((struct vop_bwrite_args *ap));
+static int	null_getattr __P((struct vop_getattr_args *ap));
+static int	null_inactive __P((struct vop_inactive_args *ap));
+static int	null_lock __P((struct vop_lock_args *ap));
+static int	null_lookup __P((struct vop_lookup_args *ap));
+static int	null_print __P((struct vop_print_args *ap));
+static int	null_reclaim __P((struct vop_reclaim_args *ap));
+static int	null_setattr __P((struct vop_setattr_args *ap));
+static int	null_strategy __P((struct vop_strategy_args *ap));
+static int	null_unlock __P((struct vop_unlock_args *ap));
+
+/*
+ * This is the 10-Apr-92 bypass routine.
+ *    This version has been optimized for speed, throwing away some
+ * safety checks.  It should still always work, but it's not as
+ * robust to programmer errors.
+ *    Define SAFETY to include some error checking code.
+ *
+ * In general, we map all vnodes going down and unmap them on the way back.
+ * As an exception to this, vnodes can be marked "unmapped" by setting
+ * the Nth bit in operation's vdesc_flags.
+ *
+ * Also, some BSD vnode operations have the side effect of vrele'ing
+ * their arguments.  With stacking, the reference counts are held
+ * by the upper node, not the lower one, so we must handle these
+ * side-effects here.  This is not of concern in Sun-derived systems
+ * since there are no such side-effects.
+ *
+ * This makes the following assumptions:
+ * - only one returned vpp
+ * - no INOUT vpp's (Sun's vop_open has one of these)
+ * - the vnode operation vector of the first vnode should be used
+ *   to determine what implementation of the op should be invoked
+ * - all mapped vnodes are of our vnode-type (NEEDSWORK:
+ *   problems on rmdir'ing mount points and renaming?)
+ */
+int
+null_bypass(ap)
+	struct vop_generic_args /* {
+		struct vnodeop_desc *a_desc;
+		<other random data follows, presumably>
+	} */ *ap;
+{
+	register struct vnode **this_vp_p;
+	int error;
+	struct vnode *old_vps[VDESC_MAX_VPS];
+	struct vnode **vps_p[VDESC_MAX_VPS];
+	struct vnode ***vppp;
+	struct vnodeop_desc *descp = ap->a_desc;
+	int reles, i;
+
+	if (null_bug_bypass)
+		printf ("null_bypass: %s\n", descp->vdesc_name);
+
+#ifdef SAFETY
+	/*
+	 * We require at least one vp.
+	 */
+	if (descp->vdesc_vp_offsets == NULL ||
+	    descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET)
+		panic ("null_bypass: no vp's in map.");
+#endif
+
+	/*
+	 * Map the vnodes going in.
+	 * Later, we'll invoke the operation based on
+	 * the first mapped vnode's operation vector.
+	 */
+	reles = descp->vdesc_flags;
+	for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
+		if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
+			break;   /* bail out at end of list */
+		vps_p[i] = this_vp_p =
+			VOPARG_OFFSETTO(struct vnode**,descp->vdesc_vp_offsets[i],ap);
+		/*
+		 * We're not guaranteed that any but the first vnode
+		 * are of our type.  Check for and don't map any
+		 * that aren't.  (We must always map first vp or vclean fails.)
+		 */
+		if (i && (*this_vp_p == NULLVP ||
+		    (*this_vp_p)->v_op != null_vnodeop_p)) {
+			old_vps[i] = NULLVP;
+		} else {
+			old_vps[i] = *this_vp_p;
+			*(vps_p[i]) = NULLVPTOLOWERVP(*this_vp_p);
+			/*
+			 * XXX - Several operations have the side effect
+			 * of vrele'ing their vp's.  We must account for
+			 * that.  (This should go away in the future.)
+			 */
+			if (reles & 1)
+				VREF(*this_vp_p);
+		}
+
+	}
+
+	/*
+	 * Call the operation on the lower layer
+	 * with the modified argument structure.
+	 */
+	error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
+
+	/*
+	 * Maintain the illusion of call-by-value
+	 * by restoring vnodes in the argument structure
+	 * to their original value.
+	 */
+	reles = descp->vdesc_flags;
+	for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
+		if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
+			break;   /* bail out at end of list */
+		if (old_vps[i]) {
+			*(vps_p[i]) = old_vps[i];
+			if (reles & 1)
+				vrele(*(vps_p[i]));
+		}
+	}
+
+	/*
+	 * Map the possible out-going vpp
+	 * (Assumes that the lower layer always returns
+	 * a VREF'ed vpp unless it gets an error.)
+	 */
+	if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET &&
+	    !(descp->vdesc_flags & VDESC_NOMAP_VPP) &&
+	    !error) {
+		/*
+		 * XXX - even though some ops have vpp returned vp's,
+		 * several ops actually vrele this before returning.
+		 * We must avoid these ops.
+		 * (This should go away when these ops are regularized.)
+		 */
+		if (descp->vdesc_flags & VDESC_VPP_WILLRELE)
+			goto out;
+		vppp = VOPARG_OFFSETTO(struct vnode***,
+				 descp->vdesc_vpp_offset,ap);
+		if (*vppp)
+			error = null_node_create(old_vps[0]->v_mount, **vppp, *vppp);
+	}
+
+ out:
+	return (error);
+}
+
+/*
+ * We have to carry on the locking protocol on the null layer vnodes
+ * as we progress through the tree. We also have to enforce read-only
+ * if this layer is mounted read-only.
+ */
+static int
+null_lookup(ap)
+	struct vop_lookup_args /* {
+		struct vnode * a_dvp;
+		struct vnode ** a_vpp;
+		struct componentname * a_cnp;
+	} */ *ap;
+{
+	struct componentname *cnp = ap->a_cnp;
+	struct proc *p = cnp->cn_proc;
+	int flags = cnp->cn_flags;
+	struct vop_lock_args lockargs;
+	struct vop_unlock_args unlockargs;
+	struct vnode *dvp, *vp;
+	int error;
+
+	if ((flags & ISLASTCN) && (ap->a_dvp->v_mount->mnt_flag & MNT_RDONLY) &&
+	    (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
+		return (EROFS);
+	error = null_bypass((struct vop_generic_args *)ap);
+	if (error == EJUSTRETURN && (flags & ISLASTCN) &&
+	    (ap->a_dvp->v_mount->mnt_flag & MNT_RDONLY) &&
+	    (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME))
+		error = EROFS;
+	/*
+	 * We must do the same locking and unlocking at this layer as 
+	 * is done in the layers below us. We could figure this out 
+	 * based on the error return and the LASTCN, LOCKPARENT, and
+	 * LOCKLEAF flags. However, it is more expidient to just find 
+	 * out the state of the lower level vnodes and set ours to the
+	 * same state.
+	 */
+	dvp = ap->a_dvp;
+	vp = *ap->a_vpp;
+	if (dvp == vp)
+		return (error);
+	if (!VOP_ISLOCKED(dvp)) {
+		unlockargs.a_vp = dvp;
+		unlockargs.a_flags = 0;
+		unlockargs.a_p = p;
+		vop_nounlock(&unlockargs);
+	}
+	if (vp != NULLVP && VOP_ISLOCKED(vp)) {
+		lockargs.a_vp = vp;
+		lockargs.a_flags = LK_SHARED;
+		lockargs.a_p = p;
+		vop_nolock(&lockargs);
+	}
+	return (error);
+}
+
+/*
+ * Setattr call. Disallow write attempts if the layer is mounted read-only.
+ */
+int
+null_setattr(ap)
+	struct vop_setattr_args /* {
+		struct vnodeop_desc *a_desc;
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct vattr *vap = ap->a_vap;
+
+  	if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
+	    vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
+	    vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
+	    (vp->v_mount->mnt_flag & MNT_RDONLY))
+		return (EROFS);
+	if (vap->va_size != VNOVAL) {
+ 		switch (vp->v_type) {
+ 		case VDIR:
+ 			return (EISDIR);
+ 		case VCHR:
+ 		case VBLK:
+ 		case VSOCK:
+ 		case VFIFO:
+			if (vap->va_flags != VNOVAL)
+				return (EOPNOTSUPP);
+			return (0);
+		case VREG:
+		case VLNK:
+ 		default:
+			/*
+			 * Disallow write attempts if the filesystem is
+			 * mounted read-only.
+			 */
+			if (vp->v_mount->mnt_flag & MNT_RDONLY)
+				return (EROFS);
+		}
+	}
+	return (null_bypass((struct vop_generic_args *)ap));
+}
+
+/*
+ *  We handle getattr only to change the fsid.
+ */
+static int
+null_getattr(ap)
+	struct vop_getattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	int error;
+
+	if ((error = null_bypass((struct vop_generic_args *)ap)) != 0)
+		return (error);
+	/* Requires that arguments be restored. */
+	ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
+	return (0);
+}
+
+static int
+null_access(ap)
+	struct vop_access_args /* {
+		struct vnode *a_vp;
+		int  a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	mode_t mode = ap->a_mode;
+
+	/*
+	 * Disallow write attempts on read-only layers;
+	 * unless the file is a socket, fifo, or a block or
+	 * character device resident on the file system.
+	 */
+	if (mode & VWRITE) {
+		switch (vp->v_type) {
+		case VDIR:
+		case VLNK:
+		case VREG:
+			if (vp->v_mount->mnt_flag & MNT_RDONLY)
+				return (EROFS);
+			break;
+		default:
+			break;
+		}
+	}
+	return (null_bypass((struct vop_generic_args *)ap));
+}
+
+/*
+ * We need to process our own vnode lock and then clear the
+ * interlock flag as it applies only to our vnode, not the
+ * vnodes below us on the stack.
+ */
+static int
+null_lock(ap)
+	struct vop_lock_args /* {
+		struct vnode *a_vp;
+		int a_flags;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	vop_nolock(ap);
+	if ((ap->a_flags & LK_TYPE_MASK) == LK_DRAIN)
+		return (0);
+	ap->a_flags &= ~LK_INTERLOCK;
+	return (null_bypass((struct vop_generic_args *)ap));
+}
+
+/*
+ * We need to process our own vnode unlock and then clear the
+ * interlock flag as it applies only to our vnode, not the
+ * vnodes below us on the stack.
+ */
+static int
+null_unlock(ap)
+	struct vop_unlock_args /* {
+		struct vnode *a_vp;
+		int a_flags;
+		struct proc *a_p;
+	} */ *ap;
+{
+	vop_nounlock(ap);
+	ap->a_flags &= ~LK_INTERLOCK;
+	return (null_bypass((struct vop_generic_args *)ap));
+}
+
+static int
+null_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct null_node *xp = VTONULL(vp);
+	struct vnode *lowervp = xp->null_lowervp;
+	/*
+	 * Do nothing (and _don't_ bypass).
+	 * Wait to vrele lowervp until reclaim,
+	 * so that until then our null_node is in the
+	 * cache and reusable.
+	 * We still have to tell the lower layer the vnode
+	 * is now inactive though.
+	 *
+	 * NEEDSWORK: Someday, consider inactive'ing
+	 * the lowervp and then trying to reactivate it
+	 * with capabilities (v_id)
+	 * like they do in the name lookup cache code.
+	 * That's too much work for now.
+	 */
+	VOP_INACTIVE(lowervp, ap->a_p);
+	VOP_UNLOCK(ap->a_vp, 0, ap->a_p);
+	return (0);
+}
+
+static int
+null_reclaim(ap)
+	struct vop_reclaim_args /* {
+		struct vnode *a_vp;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct null_node *xp = VTONULL(vp);
+	struct vnode *lowervp = xp->null_lowervp;
+
+	/*
+	 * Note: in vop_reclaim, vp->v_op == dead_vnodeop_p,
+	 * so we can't call VOPs on ourself.
+	 */
+	/* After this assignment, this node will not be re-used. */
+	xp->null_lowervp = NULLVP;
+	LIST_REMOVE(xp, null_hash);
+	FREE(vp->v_data, M_TEMP);
+	vp->v_data = NULL;
+	vrele (lowervp);
+	return (0);
+}
+
+static int
+null_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	printf ("\ttag VT_NULLFS, vp=%p, lowervp=%p\n", vp, NULLVPTOLOWERVP(vp));
+	return (0);
+}
+
+/*
+ * XXX - vop_strategy must be hand coded because it has no
+ * vnode in its arguments.
+ * This goes away with a merged VM/buffer cache.
+ */
+static int
+null_strategy(ap)
+	struct vop_strategy_args /* {
+		struct buf *a_bp;
+	} */ *ap;
+{
+	struct buf *bp = ap->a_bp;
+	int error;
+	struct vnode *savedvp;
+
+	savedvp = bp->b_vp;
+	bp->b_vp = NULLVPTOLOWERVP(bp->b_vp);
+
+	error = VOP_STRATEGY(bp->b_vp, bp);
+
+	bp->b_vp = savedvp;
+
+	return (error);
+}
+
+/*
+ * XXX - like vop_strategy, vop_bwrite must be hand coded because it has no
+ * vnode in its arguments.
+ * This goes away with a merged VM/buffer cache.
+ */
+static int
+null_bwrite(ap)
+	struct vop_bwrite_args /* {
+		struct buf *a_bp;
+	} */ *ap;
+{
+	struct buf *bp = ap->a_bp;
+	int error;
+	struct vnode *savedvp;
+
+	savedvp = bp->b_vp;
+	bp->b_vp = NULLVPTOLOWERVP(bp->b_vp);
+
+	error = VOP_BWRITE(bp);
+
+	bp->b_vp = savedvp;
+
+	return (error);
+}
+
+/*
+ * Global vfs data structures
+ */
+vop_t **null_vnodeop_p;
+static struct vnodeopv_entry_desc null_vnodeop_entries[] = {
+	{ &vop_default_desc,		(vop_t *) null_bypass },
+	{ &vop_access_desc,		(vop_t *) null_access },
+	{ &vop_bwrite_desc,		(vop_t *) null_bwrite },
+	{ &vop_getattr_desc,		(vop_t *) null_getattr },
+	{ &vop_inactive_desc,		(vop_t *) null_inactive },
+	{ &vop_lock_desc,		(vop_t *) null_lock },
+	{ &vop_lookup_desc,		(vop_t *) null_lookup },
+	{ &vop_print_desc,		(vop_t *) null_print },
+	{ &vop_reclaim_desc,		(vop_t *) null_reclaim },
+	{ &vop_setattr_desc,		(vop_t *) null_setattr },
+	{ &vop_strategy_desc,		(vop_t *) null_strategy },
+	{ &vop_unlock_desc,		(vop_t *) null_unlock },
+	{ NULL, NULL }
+};
+static struct vnodeopv_desc null_vnodeop_opv_desc =
+	{ &null_vnodeop_p, null_vnodeop_entries };
+
+VNODEOP_SET(null_vnodeop_opv_desc);
diff --git a/sys/fs/portalfs/portal.h b/sys/fs/portalfs/portal.h
new file mode 100644
index 0000000..d60826e
--- /dev/null
+++ b/sys/fs/portalfs/portal.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)portal.h	8.4 (Berkeley) 1/21/94
+ *
+ * $Id: portal.h,v 1.4 1997/02/22 09:40:24 peter Exp $
+ */
+
+struct portal_args {
+	char		*pa_config;	/* Config file */
+	int		pa_socket;	/* Socket to server */
+};
+
+struct portal_cred {
+	int		pcr_flag;		/* File open mode */
+	uid_t		pcr_uid;		/* From ucred */
+	short		pcr_ngroups;		/* From ucred */
+	gid_t		pcr_groups[NGROUPS];	/* From ucred */
+};
+
+#ifdef KERNEL
+struct portalmount {
+	struct vnode	*pm_root;	/* Root node */
+	struct file	*pm_server;	/* Held reference to server socket */
+};
+
+struct portalnode {
+	int		pt_size;	/* Length of Arg */
+	char		*pt_arg;	/* Arg to send to server */
+	int		pt_fileid;	/* cookie */
+};
+
+#define VFSTOPORTAL(mp)	((struct portalmount *)((mp)->mnt_data))
+#define	VTOPORTAL(vp) ((struct portalnode *)(vp)->v_data)
+
+#define PORTAL_ROOTFILEID	2
+
+extern vop_t **portal_vnodeop_p;
+#endif /* KERNEL */
diff --git a/sys/fs/portalfs/portal_vfsops.c b/sys/fs/portalfs/portal_vfsops.c
new file mode 100644
index 0000000..633bf77
--- /dev/null
+++ b/sys/fs/portalfs/portal_vfsops.c
@@ -0,0 +1,292 @@
+/*
+ * Copyright (c) 1992, 1993, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)portal_vfsops.c	8.11 (Berkeley) 5/14/95
+ *
+ * $Id: portal_vfsops.c,v 1.21 1998/05/06 05:29:35 msmith Exp $
+ */
+
+/*
+ * Portal Filesystem
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/filedesc.h>
+#include <sys/file.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/malloc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/domain.h>
+#include <miscfs/portal/portal.h>
+
+static MALLOC_DEFINE(M_PORTALFSMNT, "PORTAL mount", "PORTAL mount structure");
+
+static int	portal_init __P((struct vfsconf *));
+static int	portal_mount __P((struct mount *mp, char *path, caddr_t data,
+				  struct nameidata *ndp, struct proc *p));
+static int	portal_start __P((struct mount *mp, int flags, struct proc *p));
+static int	portal_unmount __P((struct mount *mp, int mntflags,
+				    struct proc *p));
+static int	portal_root __P((struct mount *mp, struct vnode **vpp));
+static int	portal_statfs __P((struct mount *mp, struct statfs *sbp,
+				   struct proc *p));
+
+static int
+portal_init(vfsp)
+	struct vfsconf *vfsp;
+{
+
+	return (0);
+}
+
+/*
+ * Mount the per-process file descriptors (/dev/fd)
+ */
+static int
+portal_mount(mp, path, data, ndp, p)
+	struct mount *mp;
+	char *path;
+	caddr_t data;
+	struct nameidata *ndp;
+	struct proc *p;
+{
+	struct file *fp;
+	struct portal_args args;
+	struct portalmount *fmp;
+	struct socket *so;
+	struct vnode *rvp;
+	struct portalnode *pn;
+	u_int size;
+	int error;
+
+	/*
+	 * Update is a no-op
+	 */
+	if (mp->mnt_flag & MNT_UPDATE)
+		return (EOPNOTSUPP);
+
+	error = copyin(data, (caddr_t) &args, sizeof(struct portal_args));
+	if (error)
+		return (error);
+
+	error = getsock(p->p_fd, args.pa_socket, &fp);
+	if (error)
+		return (error);
+	so = (struct socket *) fp->f_data;
+	if (so->so_proto->pr_domain->dom_family != AF_UNIX)
+		return (ESOCKTNOSUPPORT);
+
+	MALLOC(pn, struct portalnode *, sizeof(struct portalnode),
+		M_TEMP, M_WAITOK);
+
+	MALLOC(fmp, struct portalmount *, sizeof(struct portalmount),
+		M_PORTALFSMNT, M_WAITOK);	/* XXX */
+
+	error = getnewvnode(VT_PORTAL, mp, portal_vnodeop_p, &rvp); /* XXX */
+	if (error) {
+		FREE(fmp, M_PORTALFSMNT);
+		FREE(pn, M_TEMP);
+		return (error);
+	}
+
+	rvp->v_data = pn;
+	rvp->v_type = VDIR;
+	rvp->v_flag |= VROOT;
+	VTOPORTAL(rvp)->pt_arg = 0;
+	VTOPORTAL(rvp)->pt_size = 0;
+	VTOPORTAL(rvp)->pt_fileid = PORTAL_ROOTFILEID;
+	fmp->pm_root = rvp;
+	fmp->pm_server = fp; fp->f_count++;
+
+	mp->mnt_flag |= MNT_LOCAL;
+	mp->mnt_data = (qaddr_t) fmp;
+	vfs_getnewfsid(mp);
+
+	(void)copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+	bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+	(void)copyinstr(args.pa_config,
+	    mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
+	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+
+#ifdef notdef
+	bzero(mp->mnt_stat.f_mntfromname, MNAMELEN);
+	bcopy("portal", mp->mnt_stat.f_mntfromname, sizeof("portal"));
+#endif
+
+	(void)portal_statfs(mp, &mp->mnt_stat, p);
+	return (0);
+}
+
+static int
+portal_start(mp, flags, p)
+	struct mount *mp;
+	int flags;
+	struct proc *p;
+{
+
+	return (0);
+}
+
+static int
+portal_unmount(mp, mntflags, p)
+	struct mount *mp;
+	int mntflags;
+	struct proc *p;
+{
+	struct vnode *rootvp = VFSTOPORTAL(mp)->pm_root;
+	int error, flags = 0;
+
+
+	if (mntflags & MNT_FORCE)
+		flags |= FORCECLOSE;
+
+	/*
+	 * Clear out buffer cache.  I don't think we
+	 * ever get anything cached at this level at the
+	 * moment, but who knows...
+	 */
+#ifdef notyet
+	mntflushbuf(mp, 0);
+	if (mntinvalbuf(mp, 1))
+		return (EBUSY);
+#endif
+	if (rootvp->v_usecount > 1)
+		return (EBUSY);
+	error = vflush(mp, rootvp, flags);
+	if (error)
+		return (error);
+
+	/*
+	 * Release reference on underlying root vnode
+	 */
+	vrele(rootvp);
+	/*
+	 * And blow it away for future re-use
+	 */
+	vgone(rootvp);
+	/*
+	 * Shutdown the socket.  This will cause the select in the
+	 * daemon to wake up, and then the accept will get ECONNABORTED
+	 * which it interprets as a request to go and bury itself.
+	 */
+	soshutdown((struct socket *) VFSTOPORTAL(mp)->pm_server->f_data, 2);
+	/*
+	 * Discard reference to underlying file.  Must call closef because
+	 * this may be the last reference.
+	 */
+	closef(VFSTOPORTAL(mp)->pm_server, (struct proc *) 0);
+	/*
+	 * Finally, throw away the portalmount structure
+	 */
+	free(mp->mnt_data, M_PORTALFSMNT);	/* XXX */
+	mp->mnt_data = 0;
+	return (0);
+}
+
+static int
+portal_root(mp, vpp)
+	struct mount *mp;
+	struct vnode **vpp;
+{
+	struct proc *p = curproc;	/* XXX */
+	struct vnode *vp;
+
+	/*
+	 * Return locked reference to root.
+	 */
+	vp = VFSTOPORTAL(mp)->pm_root;
+	VREF(vp);
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+	*vpp = vp;
+	return (0);
+}
+
+static int
+portal_statfs(mp, sbp, p)
+	struct mount *mp;
+	struct statfs *sbp;
+	struct proc *p;
+{
+
+	sbp->f_flags = 0;
+	sbp->f_bsize = DEV_BSIZE;
+	sbp->f_iosize = DEV_BSIZE;
+	sbp->f_blocks = 2;		/* 1K to keep df happy */
+	sbp->f_bfree = 0;
+	sbp->f_bavail = 0;
+	sbp->f_files = 1;		/* Allow for "." */
+	sbp->f_ffree = 0;		/* See comments above */
+	if (sbp != &mp->mnt_stat) {
+		sbp->f_type = mp->mnt_vfc->vfc_typenum;
+		bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+	}
+	return (0);
+}
+
+#define portal_fhtovp ((int (*) __P((struct mount *, struct fid *, \
+	    struct sockaddr *, struct vnode **, int *, struct ucred **)))eopnotsupp)
+#define portal_quotactl ((int (*) __P((struct mount *, int, uid_t, caddr_t, \
+	    struct proc *)))eopnotsupp)
+#define portal_sync ((int (*) __P((struct mount *, int, struct ucred *, \
+	    struct proc *)))nullop)
+#define portal_sysctl ((int (*) __P((int *, u_int, void *, size_t *, void *, \
+	    size_t, struct proc *)))eopnotsupp)
+#define portal_vget ((int (*) __P((struct mount *, ino_t, struct vnode **))) \
+	    eopnotsupp)
+#define portal_vptofh ((int (*) __P((struct vnode *, struct fid *)))eopnotsupp)
+
+static struct vfsops portal_vfsops = {
+	portal_mount,
+	portal_start,
+	portal_unmount,
+	portal_root,
+	portal_quotactl,
+	portal_statfs,
+	portal_sync,
+	portal_vget,
+	portal_fhtovp,
+	portal_vptofh,
+	portal_init,
+};
+
+VFS_SET(portal_vfsops, portal, VFCF_SYNTHETIC);
diff --git a/sys/fs/portalfs/portal_vnops.c b/sys/fs/portalfs/portal_vnops.c
new file mode 100644
index 0000000..819d636
--- /dev/null
+++ b/sys/fs/portalfs/portal_vnops.c
@@ -0,0 +1,607 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)portal_vnops.c	8.14 (Berkeley) 5/21/95
+ *
+ * $Id: portal_vnops.c,v 1.34 1998/12/07 21:58:32 archie Exp $
+ */
+
+/*
+ * Portal Filesystem
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/sysproto.h>
+#include <sys/kernel.h>
+#include <sys/time.h>
+#include <sys/proc.h>
+#include <sys/filedesc.h>
+#include <sys/vnode.h>
+#include <sys/fcntl.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <sys/malloc.h>
+#include <sys/namei.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/un.h>
+#include <sys/unpcb.h>
+#include <miscfs/portal/portal.h>
+
+static int portal_fileid = PORTAL_ROOTFILEID+1;
+
+static int	portal_badop __P((void));
+static void	portal_closefd __P((struct proc *p, int fd));
+static int	portal_connect __P((struct socket *so, struct socket *so2));
+static int	portal_getattr __P((struct vop_getattr_args *ap));
+static int	portal_inactive __P((struct vop_inactive_args *ap));
+static int	portal_lookup __P((struct vop_lookup_args *ap));
+static int	portal_open __P((struct vop_open_args *ap));
+static int	portal_print __P((struct vop_print_args *ap));
+static int	portal_readdir __P((struct vop_readdir_args *ap));
+static int	portal_reclaim __P((struct vop_reclaim_args *ap));
+static int	portal_setattr __P((struct vop_setattr_args *ap));
+
+static void
+portal_closefd(p, fd)
+	struct proc *p;
+	int fd;
+{
+	int error;
+	struct close_args ua;
+
+	ua.fd = fd;
+	error = close(p, &ua);
+	/*
+	 * We should never get an error, and there isn't anything
+	 * we could do if we got one, so just print a message.
+	 */
+	if (error)
+		printf("portal_closefd: error = %d\n", error);
+}
+
+/*
+ * vp is the current namei directory
+ * cnp is the name to locate in that directory...
+ */
+static int
+portal_lookup(ap)
+	struct vop_lookup_args /* {
+		struct vnode * a_dvp;
+		struct vnode ** a_vpp;
+		struct componentname * a_cnp;
+	} */ *ap;
+{
+	struct componentname *cnp = ap->a_cnp;
+	struct vnode **vpp = ap->a_vpp;
+	struct vnode *dvp = ap->a_dvp;
+	char *pname = cnp->cn_nameptr;
+	struct portalnode *pt;
+	int error;
+	struct vnode *fvp = 0;
+	char *path;
+	int size;
+
+	*vpp = NULLVP;
+
+	if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)
+		return (EROFS);
+
+	if (cnp->cn_namelen == 1 && *pname == '.') {
+		*vpp = dvp;
+		VREF(dvp);
+		/*VOP_LOCK(dvp);*/
+		return (0);
+	}
+
+	/*
+	 * Do the MALLOC before the getnewvnode since doing so afterward
+	 * might cause a bogus v_data pointer to get dereferenced
+	 * elsewhere if MALLOC should block.
+	 */
+	MALLOC(pt, struct portalnode *, sizeof(struct portalnode),
+		M_TEMP, M_WAITOK);
+
+	error = getnewvnode(VT_PORTAL, dvp->v_mount, portal_vnodeop_p, &fvp);
+	if (error) {
+		FREE(pt, M_TEMP);
+		goto bad;
+	}
+	fvp->v_type = VREG;
+	fvp->v_data = pt;
+	/*
+	 * Save all of the remaining pathname and
+	 * advance the namei next pointer to the end
+	 * of the string.
+	 */
+	for (size = 0, path = pname; *path; path++)
+		size++;
+	cnp->cn_consume = size - cnp->cn_namelen;
+
+	pt->pt_arg = malloc(size+1, M_TEMP, M_WAITOK);
+	pt->pt_size = size+1;
+	bcopy(pname, pt->pt_arg, pt->pt_size);
+	pt->pt_fileid = portal_fileid++;
+
+	*vpp = fvp;
+	/*VOP_LOCK(fvp);*/
+	return (0);
+
+bad:;
+	if (fvp)
+		vrele(fvp);
+	return (error);
+}
+
+static int
+portal_connect(so, so2)
+	struct socket *so;
+	struct socket *so2;
+{
+	/* from unp_connect, bypassing the namei stuff... */
+	struct socket *so3;
+	struct unpcb *unp2;
+	struct unpcb *unp3;
+
+	if (so2 == 0)
+		return (ECONNREFUSED);
+
+	if (so->so_type != so2->so_type)
+		return (EPROTOTYPE);
+
+	if ((so2->so_options & SO_ACCEPTCONN) == 0)
+		return (ECONNREFUSED);
+
+	if ((so3 = sonewconn(so2, 0)) == 0)
+		return (ECONNREFUSED);
+
+	unp2 = sotounpcb(so2);
+	unp3 = sotounpcb(so3);
+	if (unp2->unp_addr)
+		unp3->unp_addr = (struct sockaddr_un *)
+			dup_sockaddr((struct sockaddr *)unp2->unp_addr, 0);
+	so2 = so3;
+
+	return (unp_connect2(so, so2));
+}
+
+static int
+portal_open(ap)
+	struct vop_open_args /* {
+		struct vnode *a_vp;
+		int  a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct socket *so = 0;
+	struct portalnode *pt;
+	struct proc *p = ap->a_p;
+	struct vnode *vp = ap->a_vp;
+	int s;
+	struct uio auio;
+	struct iovec aiov[2];
+	int res;
+	struct mbuf *cm = 0;
+	struct cmsghdr *cmsg;
+	int newfds;
+	int *ip;
+	int fd;
+	int error;
+	int len;
+	struct portalmount *fmp;
+	struct file *fp;
+	struct portal_cred pcred;
+
+	/*
+	 * Nothing to do when opening the root node.
+	 */
+	if (vp->v_flag & VROOT)
+		return (0);
+
+	/*
+	 * Can't be opened unless the caller is set up
+	 * to deal with the side effects.  Check for this
+	 * by testing whether the p_dupfd has been set.
+	 */
+	if (p->p_dupfd >= 0)
+		return (ENODEV);
+
+	pt = VTOPORTAL(vp);
+	fmp = VFSTOPORTAL(vp->v_mount);
+
+	/*
+	 * Create a new socket.
+	 */
+	error = socreate(AF_UNIX, &so, SOCK_STREAM, 0, ap->a_p);
+	if (error)
+		goto bad;
+
+	/*
+	 * Reserve some buffer space
+	 */
+	res = pt->pt_size + sizeof(pcred) + 512;	/* XXX */
+	error = soreserve(so, res, res);
+	if (error)
+		goto bad;
+
+	/*
+	 * Kick off connection
+	 */
+	error = portal_connect(so, (struct socket *)fmp->pm_server->f_data);
+	if (error)
+		goto bad;
+
+	/*
+	 * Wait for connection to complete
+	 */
+	/*
+	 * XXX: Since the mount point is holding a reference on the
+	 * underlying server socket, it is not easy to find out whether
+	 * the server process is still running.  To handle this problem
+	 * we loop waiting for the new socket to be connected (something
+	 * which will only happen if the server is still running) or for
+	 * the reference count on the server socket to drop to 1, which
+	 * will happen if the server dies.  Sleep for 5 second intervals
+	 * and keep polling the reference count.   XXX.
+	 */
+	s = splnet();
+	while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
+		if (fmp->pm_server->f_count == 1) {
+			error = ECONNREFUSED;
+			splx(s);
+			goto bad;
+		}
+		(void) tsleep((caddr_t) &so->so_timeo, PSOCK, "portalcon", 5 * hz);
+	}
+	splx(s);
+
+	if (so->so_error) {
+		error = so->so_error;
+		goto bad;
+	}
+
+	/*
+	 * Set miscellaneous flags
+	 */
+	so->so_rcv.sb_timeo = 0;
+	so->so_snd.sb_timeo = 0;
+	so->so_rcv.sb_flags |= SB_NOINTR;
+	so->so_snd.sb_flags |= SB_NOINTR;
+
+
+	pcred.pcr_flag = ap->a_mode;
+	pcred.pcr_uid = ap->a_cred->cr_uid;
+	pcred.pcr_ngroups = ap->a_cred->cr_ngroups;
+	bcopy(ap->a_cred->cr_groups, pcred.pcr_groups, NGROUPS * sizeof(gid_t));
+	aiov[0].iov_base = (caddr_t) &pcred;
+	aiov[0].iov_len = sizeof(pcred);
+	aiov[1].iov_base = pt->pt_arg;
+	aiov[1].iov_len = pt->pt_size;
+	auio.uio_iov = aiov;
+	auio.uio_iovcnt = 2;
+	auio.uio_rw = UIO_WRITE;
+	auio.uio_segflg = UIO_SYSSPACE;
+	auio.uio_procp = p;
+	auio.uio_offset = 0;
+	auio.uio_resid = aiov[0].iov_len + aiov[1].iov_len;
+
+	error = sosend(so, (struct sockaddr *) 0, &auio,
+			(struct mbuf *) 0, (struct mbuf *) 0, 0, p);
+	if (error)
+		goto bad;
+
+	len = auio.uio_resid = sizeof(int);
+	do {
+		struct mbuf *m = 0;
+		int flags = MSG_WAITALL;
+		error = soreceive(so, (struct sockaddr **) 0, &auio,
+					&m, &cm, &flags);
+		if (error)
+			goto bad;
+
+		/*
+		 * Grab an error code from the mbuf.
+		 */
+		if (m) {
+			m = m_pullup(m, sizeof(int));	/* Needed? */
+			if (m) {
+				error = *(mtod(m, int *));
+				m_freem(m);
+			} else {
+				error = EINVAL;
+			}
+		} else {
+			if (cm == 0) {
+				error = ECONNRESET;	 /* XXX */
+#ifdef notdef
+				break;
+#endif
+			}
+		}
+	} while (cm == 0 && auio.uio_resid == len && !error);
+
+	if (cm == 0)
+		goto bad;
+
+	if (auio.uio_resid) {
+		error = 0;
+#ifdef notdef
+		error = EMSGSIZE;
+		goto bad;
+#endif
+	}
+
+	/*
+	 * XXX: Break apart the control message, and retrieve the
+	 * received file descriptor.  Note that more than one descriptor
+	 * may have been received, or that the rights chain may have more
+	 * than a single mbuf in it.  What to do?
+	 */
+	cmsg = mtod(cm, struct cmsghdr *);
+	newfds = (cmsg->cmsg_len - sizeof(*cmsg)) / sizeof (int);
+	if (newfds == 0) {
+		error = ECONNREFUSED;
+		goto bad;
+	}
+	/*
+	 * At this point the rights message consists of a control message
+	 * header, followed by a data region containing a vector of
+	 * integer file descriptors.  The fds were allocated by the action
+	 * of receiving the control message.
+	 */
+	ip = (int *) (cmsg + 1);
+	fd = *ip++;
+	if (newfds > 1) {
+		/*
+		 * Close extra fds.
+		 */
+		int i;
+		printf("portal_open: %d extra fds\n", newfds - 1);
+		for (i = 1; i < newfds; i++) {
+			portal_closefd(p, *ip);
+			ip++;
+		}
+	}
+
+	/*
+	 * Check that the mode the file is being opened for is a subset
+	 * of the mode of the existing descriptor.
+	 */
+ 	fp = p->p_fd->fd_ofiles[fd];
+	if (((ap->a_mode & (FREAD|FWRITE)) | fp->f_flag) != fp->f_flag) {
+		portal_closefd(p, fd);
+		error = EACCES;
+		goto bad;
+	}
+
+	/*
+	 * Save the dup fd in the proc structure then return the
+	 * special error code (ENXIO) which causes magic things to
+	 * happen in vn_open.  The whole concept is, well, hmmm.
+	 */
+	p->p_dupfd = fd;
+	error = ENXIO;
+
+bad:;
+	/*
+	 * And discard the control message.
+	 */
+	if (cm) {
+		m_freem(cm);
+	}
+
+	if (so) {
+		soshutdown(so, 2);
+		soclose(so);
+	}
+	return (error);
+}
+
+static int
+portal_getattr(ap)
+	struct vop_getattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct vattr *vap = ap->a_vap;
+
+	bzero(vap, sizeof(*vap));
+	vattr_null(vap);
+	vap->va_uid = 0;
+	vap->va_gid = 0;
+	vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+	vap->va_size = DEV_BSIZE;
+	vap->va_blocksize = DEV_BSIZE;
+	nanotime(&vap->va_atime);
+	vap->va_mtime = vap->va_atime;
+	vap->va_ctime = vap->va_ctime;
+	vap->va_gen = 0;
+	vap->va_flags = 0;
+	vap->va_rdev = 0;
+	/* vap->va_qbytes = 0; */
+	vap->va_bytes = 0;
+	/* vap->va_qsize = 0; */
+	if (vp->v_flag & VROOT) {
+		vap->va_type = VDIR;
+		vap->va_mode = S_IRUSR|S_IWUSR|S_IXUSR|
+				S_IRGRP|S_IWGRP|S_IXGRP|
+				S_IROTH|S_IWOTH|S_IXOTH;
+		vap->va_nlink = 2;
+		vap->va_fileid = 2;
+	} else {
+		vap->va_type = VREG;
+		vap->va_mode = S_IRUSR|S_IWUSR|
+				S_IRGRP|S_IWGRP|
+				S_IROTH|S_IWOTH;
+		vap->va_nlink = 1;
+		vap->va_fileid = VTOPORTAL(vp)->pt_fileid;
+	}
+	return (0);
+}
+
+static int
+portal_setattr(ap)
+	struct vop_setattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	/*
+	 * Can't mess with the root vnode
+	 */
+	if (ap->a_vp->v_flag & VROOT)
+		return (EACCES);
+
+	if (ap->a_vap->va_flags != VNOVAL)
+		return (EOPNOTSUPP);
+
+	return (0);
+}
+
+/*
+ * Fake readdir, just return empty directory.
+ * It is hard to deal with '.' and '..' so don't bother.
+ */
+static int
+portal_readdir(ap)
+	struct vop_readdir_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+		int *a_eofflag;
+		u_long *a_cookies;
+		int a_ncookies;
+	} */ *ap;
+{
+
+	/*
+	 * We don't allow exporting portal mounts, and currently local
+	 * requests do not need cookies.
+	 */
+	if (ap->a_ncookies)
+		panic("portal_readdir: not hungry");
+
+	return (0);
+}
+
+static int
+portal_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	VOP_UNLOCK(ap->a_vp, 0, ap->a_p);
+	return (0);
+}
+
+static int
+portal_reclaim(ap)
+	struct vop_reclaim_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	struct portalnode *pt = VTOPORTAL(ap->a_vp);
+
+	if (pt->pt_arg) {
+		free((caddr_t) pt->pt_arg, M_TEMP);
+		pt->pt_arg = 0;
+	}
+	FREE(ap->a_vp->v_data, M_TEMP);
+	ap->a_vp->v_data = 0;
+
+	return (0);
+}
+
+
+/*
+ * Print out the contents of a Portal vnode.
+ */
+/* ARGSUSED */
+static int
+portal_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	printf("tag VT_PORTAL, portal vnode\n");
+	return (0);
+}
+
+
+/*
+ * Portal "should never get here" operation
+ */
+static int
+portal_badop()
+{
+
+	panic("portal: bad op");
+	/* NOTREACHED */
+}
+
+vop_t **portal_vnodeop_p;
+static struct vnodeopv_entry_desc portal_vnodeop_entries[] = {
+	{ &vop_default_desc,		(vop_t *) vop_defaultop },
+	{ &vop_access_desc,		(vop_t *) vop_null },
+	{ &vop_bmap_desc,		(vop_t *) portal_badop },
+	{ &vop_getattr_desc,		(vop_t *) portal_getattr },
+	{ &vop_inactive_desc,		(vop_t *) portal_inactive },
+	{ &vop_lookup_desc,		(vop_t *) portal_lookup },
+	{ &vop_open_desc,		(vop_t *) portal_open },
+	{ &vop_pathconf_desc,		(vop_t *) vop_stdpathconf },
+	{ &vop_print_desc,		(vop_t *) portal_print },
+	{ &vop_readdir_desc,		(vop_t *) portal_readdir },
+	{ &vop_reclaim_desc,		(vop_t *) portal_reclaim },
+	{ &vop_setattr_desc,		(vop_t *) portal_setattr },
+	{ NULL, NULL }
+};
+static struct vnodeopv_desc portal_vnodeop_opv_desc =
+	{ &portal_vnodeop_p, portal_vnodeop_entries };
+
+VNODEOP_SET(portal_vnodeop_opv_desc);
diff --git a/sys/fs/procfs/README b/sys/fs/procfs/README
new file mode 100644
index 0000000..5f1b6cc
--- /dev/null
+++ b/sys/fs/procfs/README
@@ -0,0 +1,113 @@
+saute procfs lyonnais
+
+procfs supports two levels of directory.  the filesystem root
+directory contains a representation of the system process table.
+this consists of an entry for each active and zombie process, and
+an additional entry "curproc" which always represents the process
+making the lookup request.
+
+each of the sub-directories contains several files.  these files
+are used to control and interrogate processes.  the files implemented
+are:
+
+	file	- xxx.  the exec'ed file.
+
+	status  - r/o.  returns process status.
+
+	ctl	- w/o.  sends a control message to the process.
+			for example:
+				echo hup > /proc/curproc/note
+			will send a SIGHUP to the shell.
+			whereas
+				echo attach > /proc/1293/ctl
+			would set up process 1293 for debugging.
+			see below for more details.
+
+	mem	- r/w.  virtual memory image of the process.
+			parts of the address space are readable
+			only if they exist in the target process.
+			a more reasonable alternative might be
+			to return zero pages instead of an error.
+			comments?
+
+	note	- w/o.  writing a string here sends the
+			equivalent note to the process.
+			[ not implemented. ]
+
+	notepg	- w/o.  the same as note, but sends to all
+			members of the process group.
+			[ not implemented. ]
+
+	regs	- r/w.	process register set.  this can be read
+			or written any time even if the process
+			is not stopped.  since the bsd kernel
+			is single-processor, this implementation
+			will get the "right" register values.
+			a multi-proc kernel would need to do some
+			synchronisation.
+
+this then looks like:
+
+% ls -li /proc
+total 0
+   9 dr-xr-xr-x  2 root  wheel  0 Sep 21 15:06 0
+  17 dr-xr-xr-x  2 root  wheel  0 Sep 21 15:06 1
+  89 dr-xr-xr-x  2 root  wheel  0 Sep 21 15:06 10
+  25 dr-xr-xr-x  2 root  wheel  0 Sep 21 15:06 2
+2065 dr-xr-xr-x  2 root  wheel  0 Sep 21 15:06 257
+2481 dr-xr-xr-x  2 jsp   staff  0 Sep 21 15:06 309
+ 265 dr-xr-xr-x  2 root  wheel  0 Sep 21 15:06 32
+3129 dr-xr-xr-x  2 jsp   staff  0 Sep 21 15:06 390
+3209 dr-xr-xr-x  2 jsp   staff  0 Sep 21 15:06 400
+3217 dr-xr-xr-x  2 jsp   staff  0 Sep 21 15:06 401
+3273 dr-xr-xr-x  2 jsp   staff  0 Sep 21 15:06 408
+ 393 dr-xr-xr-x  2 root  wheel  0 Sep 21 15:06 48
+ 409 dr-xr-xr-x  2 root  wheel  0 Sep 21 15:06 50
+ 465 dr-xr-xr-x  2 root  wheel  0 Sep 21 15:06 57
+ 481 dr-xr-xr-x  2 root  wheel  0 Sep 21 15:06 59
+ 537 dr-xr-xr-x  2 root  kmem   0 Sep 21 15:06 66
+ 545 dr-xr-xr-x  2 root  wheel  0 Sep 21 15:06 67
+ 657 dr-xr-xr-x  2 jsp   staff  0 Sep 21 15:06 81
+ 665 dr-xr-xr-x  2 jsp   staff  0 Sep 21 15:06 82
+ 673 dr-xr-xr-x  2 jsp   staff  0 Sep 21 15:06 83
+ 681 dr-xr-xr-x  2 root  wheel  0 Sep 21 15:06 84
+3273 dr-xr-xr-x  2 jsp   staff  0 Sep 21 15:06 curproc
+% ls -li /proc/curproc
+total 408
+3341 --w-------  1 jsp  staff       0 Sep 21 15:06 ctl
+1554 -r-xr-xr-x  1 bin  bin     90112 Mar 29 04:52 file
+3339 -rw-------  1 jsp  staff  118784 Sep 21 15:06 mem
+3343 --w-------  1 jsp  staff       0 Sep 21 15:06 note
+3344 --w-------  1 jsp  staff       0 Sep 21 15:06 notepg
+3340 -rw-------  1 jsp  staff       0 Sep 21 15:06 regs
+3342 -r--r--r--  1 jsp  staff       0 Sep 21 15:06 status
+% df /proc/curproc /proc/curproc/file
+Filesystem  512-blocks    Used   Avail Capacity  Mounted on
+proc                 2       2       0   100%    /proc
+/dev/wd0a        16186   13548    1018    93%    /
+% cat /proc/curproc/status
+cat 446 439 400 81 12,0 ctty 748620684 270000 0 0 0 20000 nochan 11 20 20 20 0 21 117
+
+
+
+the basic sequence of commands written to "ctl" would be
+
+	attach		- this stops the target process and
+			  arranges for the sending process
+			  to become the debug control process
+	wait		- wait for the target process to come to
+			  a steady state ready for debugging.
+	step		- single step, with no signal delivery.
+	run		- continue running, with no signal delivery,
+			  until next trap or breakpoint.
+	<signame>	- deliver signal <signame> and continue running.
+	detach		- continue execution of the target process
+			  and remove it from control by the debug process
+
+in a normal debugging environment, where the target is fork/exec'd by
+the debugger, the debugger should fork and the child should stop itself
+(with a self-inflicted SIGSTOP).  the parent should do a "wait" then an
+"attach".  as before, the child will hit a breakpoint on the first
+instruction in any newly exec'd image.
+
+$Id$
diff --git a/sys/fs/procfs/procfs.h b/sys/fs/procfs/procfs.h
new file mode 100644
index 0000000..619e1b2
--- /dev/null
+++ b/sys/fs/procfs/procfs.h
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)procfs.h	8.9 (Berkeley) 5/14/95
+ *
+ * From:
+ *	$Id: procfs.h,v 1.20 1998/07/07 04:08:44 bde Exp $
+ */
+
+/*
+ * The different types of node in a procfs filesystem
+ */
+typedef enum {
+	Proot,		/* the filesystem root */
+	Pcurproc,	/* symbolic link for curproc */
+	Pproc,		/* a process-specific sub-directory */
+	Pfile,		/* the executable file */
+	Pmem,		/* the process's memory image */
+	Pregs,		/* the process's register set */
+	Pfpregs,	/* the process's FP register set */
+	Pctl,		/* process control */
+	Pstatus,	/* process status */
+	Pnote,		/* process notifier */
+	Pnotepg,	/* process group notifier */
+	Pmap,		/* memory map */
+	Ptype,		/* executable type */
+	Pcmdline	/* command line */
+} pfstype;
+
+/*
+ * control data for the proc file system.
+ */
+struct pfsnode {
+	struct pfsnode	*pfs_next;	/* next on list */
+	struct vnode	*pfs_vnode;	/* vnode associated with this pfsnode */
+	pfstype		pfs_type;	/* type of procfs node */
+	pid_t		pfs_pid;	/* associated process */
+	u_short		pfs_mode;	/* mode bits for stat() */
+	u_long		pfs_flags;	/* open flags */
+	u_long		pfs_fileno;	/* unique file id */
+	pid_t		pfs_lockowner;	/* pfs lock owner */
+};
+
+#define PROCFS_NOTELEN	64	/* max length of a note (/proc/$pid/note) */
+#define PROCFS_CTLLEN 	8	/* max length of a ctl msg (/proc/$pid/ctl */
+
+/*
+ * Kernel stuff follows
+ */
+#ifdef KERNEL
+#define CNEQ(cnp, s, len) \
+	 ((cnp)->cn_namelen == (len) && \
+	  (bcmp((s), (cnp)->cn_nameptr, (len)) == 0))
+
+#define KMEM_GROUP 2
+
+/*
+ * Check to see whether access to target process is allowed
+ * Evaluates to 1 if access is allowed.
+ */
+#define CHECKIO(p1, p2) \
+     ((((p1)->p_cred->pc_ucred->cr_uid == (p2)->p_cred->p_ruid) && \
+       ((p1)->p_cred->p_ruid == (p2)->p_cred->p_ruid) && \
+       ((p1)->p_cred->p_svuid == (p2)->p_cred->p_ruid) && \
+       ((p2)->p_flag & P_SUGID) == 0) || \
+      (suser((p1)->p_cred->pc_ucred, &(p1)->p_acflag) == 0))
+      
+/*
+ * Format of a directory entry in /proc, ...
+ * This must map onto struct dirent (see <dirent.h>)
+ */
+#define PROCFS_NAMELEN 8
+struct pfsdent {
+	u_int32_t d_fileno;
+	u_int16_t d_reclen;
+	u_int8_t  d_type;
+	u_int8_t  d_namlen;
+	char	d_name[PROCFS_NAMELEN];
+};
+#define UIO_MX sizeof(struct pfsdent)
+#define PROCFS_FILENO(pid, type) \
+	(((type) < Pproc) ? \
+			((type) + 2) : \
+			((((pid)+1) << 4) + ((int) (type))))
+
+/*
+ * Convert between pfsnode vnode
+ */
+#define VTOPFS(vp)	((struct pfsnode *)(vp)->v_data)
+#define PFSTOV(pfs)	((pfs)->pfs_vnode)
+
+typedef struct vfs_namemap vfs_namemap_t;
+struct vfs_namemap {
+	const char *nm_name;
+	int nm_val;
+};
+
+int vfs_getuserstr __P((struct uio *, char *, int *));
+vfs_namemap_t *vfs_findname __P((vfs_namemap_t *, char *, int));
+
+/* <machine/reg.h> */
+struct reg;
+struct fpreg;
+
+#define PFIND(pid) ((pid) ? pfind(pid) : &proc0)
+
+void procfs_exit __P((struct proc *));
+int procfs_freevp __P((struct vnode *));
+int procfs_allocvp __P((struct mount *, struct vnode **, long, pfstype));
+struct vnode *procfs_findtextvp __P((struct proc *));
+int procfs_sstep __P((struct proc *));
+void procfs_fix_sstep __P((struct proc *));
+int procfs_read_regs __P((struct proc *, struct reg *));
+int procfs_write_regs __P((struct proc *, struct reg *));
+int procfs_read_fpregs __P((struct proc *, struct fpreg *));
+int procfs_write_fpregs __P((struct proc *, struct fpreg *));
+int procfs_donote __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+int procfs_doregs __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+int procfs_dofpregs __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+int procfs_domem __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+int procfs_doctl __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+int procfs_dostatus __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+int procfs_domap __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+int procfs_dotype __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+int procfs_docmdline __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+
+/* Return 1 if process has special kernel digging privileges */
+int procfs_kmemaccess __P((struct proc *));
+
+/* functions to check whether or not files should be displayed */
+int procfs_validfile __P((struct proc *));
+int procfs_validfpregs __P((struct proc *));
+int procfs_validregs __P((struct proc *));
+int procfs_validmap __P((struct proc *));
+int procfs_validtype __P((struct proc *));
+
+#define PROCFS_LOCKED	0x01
+#define PROCFS_WANT	0x02
+
+extern vop_t **procfs_vnodeop_p;
+
+int	procfs_root __P((struct mount *, struct vnode **));
+int	procfs_rw __P((struct vop_read_args *));
+#endif /* KERNEL */
diff --git a/sys/fs/procfs/procfs_ctl.c b/sys/fs/procfs/procfs_ctl.c
new file mode 100644
index 0000000..21724e5
--- /dev/null
+++ b/sys/fs/procfs/procfs_ctl.c
@@ -0,0 +1,315 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)procfs_ctl.c	8.4 (Berkeley) 6/15/94
+ *
+ * From:
+ *	$Id: procfs_ctl.c,v 1.16 1997/04/27 21:32:21 alex Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/ptrace.h>
+#include <sys/signalvar.h>
+#include <miscfs/procfs/procfs.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+
+#ifndef FIX_SSTEP
+#define FIX_SSTEP(p)
+#endif
+
+/*
+ * True iff process (p) is in trace wait state
+ * relative to process (curp)
+ */
+#define TRACE_WAIT_P(curp, p) \
+	((p)->p_stat == SSTOP && \
+	 (p)->p_pptr == (curp) && \
+	 ((p)->p_flag & P_TRACED))
+
+#define PROCFS_CTL_ATTACH	1
+#define PROCFS_CTL_DETACH	2
+#define PROCFS_CTL_STEP		3
+#define PROCFS_CTL_RUN		4
+#define PROCFS_CTL_WAIT		5
+
+static vfs_namemap_t ctlnames[] = {
+	/* special /proc commands */
+	{ "attach",	PROCFS_CTL_ATTACH },
+	{ "detach",	PROCFS_CTL_DETACH },
+	{ "step",	PROCFS_CTL_STEP },
+	{ "run",	PROCFS_CTL_RUN },
+	{ "wait",	PROCFS_CTL_WAIT },
+	{ 0 },
+};
+
+static vfs_namemap_t signames[] = {
+	/* regular signal names */
+	{ "hup",	SIGHUP },	{ "int",	SIGINT },
+	{ "quit",	SIGQUIT },	{ "ill",	SIGILL },
+	{ "trap",	SIGTRAP },	{ "abrt",	SIGABRT },
+	{ "iot",	SIGIOT },	{ "emt",	SIGEMT },
+	{ "fpe",	SIGFPE },	{ "kill",	SIGKILL },
+	{ "bus",	SIGBUS },	{ "segv",	SIGSEGV },
+	{ "sys",	SIGSYS },	{ "pipe",	SIGPIPE },
+	{ "alrm",	SIGALRM },	{ "term",	SIGTERM },
+	{ "urg",	SIGURG },	{ "stop",	SIGSTOP },
+	{ "tstp",	SIGTSTP },	{ "cont",	SIGCONT },
+	{ "chld",	SIGCHLD },	{ "ttin",	SIGTTIN },
+	{ "ttou",	SIGTTOU },	{ "io",		SIGIO },
+	{ "xcpu",	SIGXCPU },	{ "xfsz",	SIGXFSZ },
+	{ "vtalrm",	SIGVTALRM },	{ "prof",	SIGPROF },
+	{ "winch",	SIGWINCH },	{ "info",	SIGINFO },
+	{ "usr1",	SIGUSR1 },	{ "usr2",	SIGUSR2 },
+	{ 0 },
+};
+
+static int	procfs_control __P((struct proc *curp, struct proc *p, int op));
+
+static int
+procfs_control(curp, p, op)
+	struct proc *curp;
+	struct proc *p;
+	int op;
+{
+	int error;
+
+	/*
+	 * Attach - attaches the target process for debugging
+	 * by the calling process.
+	 */
+	if (op == PROCFS_CTL_ATTACH) {
+		/* check whether already being traced */
+		if (p->p_flag & P_TRACED)
+			return (EBUSY);
+
+		/* can't trace yourself! */
+		if (p->p_pid == curp->p_pid)
+			return (EINVAL);
+
+		/* can't trace init when securelevel > 0 */
+		if (securelevel > 0 && p->p_pid == 1)
+			return (EPERM);
+
+		/*
+		 * Go ahead and set the trace flag.
+		 * Save the old parent (it's reset in
+		 *   _DETACH, and also in kern_exit.c:wait4()
+		 * Reparent the process so that the tracing
+		 *   proc gets to see all the action.
+		 * Stop the target.
+		 */
+		p->p_flag |= P_TRACED;
+		faultin(p);
+		p->p_xstat = 0;		/* XXX ? */
+		if (p->p_pptr != curp) {
+			p->p_oppid = p->p_pptr->p_pid;
+			proc_reparent(p, curp);
+		}
+		psignal(p, SIGSTOP);
+		return (0);
+	}
+
+	/*
+	 * Target process must be stopped, owned by (curp) and
+	 * be set up for tracing (P_TRACED flag set).
+	 * Allow DETACH to take place at any time for sanity.
+	 * Allow WAIT any time, of course.
+	 */
+	switch (op) {
+	case PROCFS_CTL_DETACH:
+	case PROCFS_CTL_WAIT:
+		break;
+
+	default:
+		if (!TRACE_WAIT_P(curp, p))
+			return (EBUSY);
+	}
+
+
+#ifdef FIX_SSTEP
+	/*
+	 * do single-step fixup if needed
+	 */
+	FIX_SSTEP(p);
+#endif
+
+	/*
+	 * Don't deliver any signal by default.
+	 * To continue with a signal, just send
+	 * the signal name to the ctl file
+	 */
+	p->p_xstat = 0;
+
+	switch (op) {
+	/*
+	 * Detach.  Cleans up the target process, reparent it if possible
+	 * and set it running once more.
+	 */
+	case PROCFS_CTL_DETACH:
+		/* if not being traced, then this is a painless no-op */
+		if ((p->p_flag & P_TRACED) == 0)
+			return (0);
+
+		/* not being traced any more */
+		p->p_flag &= ~P_TRACED;
+
+		/* remove pending SIGTRAP, else the process will die */
+		p->p_siglist &= ~sigmask (SIGTRAP);
+
+		/* give process back to original parent */
+		if (p->p_oppid != p->p_pptr->p_pid) {
+			struct proc *pp;
+
+			pp = pfind(p->p_oppid);
+			if (pp)
+				proc_reparent(p, pp);
+		}
+
+		p->p_oppid = 0;
+		p->p_flag &= ~P_WAITED;	/* XXX ? */
+		wakeup((caddr_t) curp);	/* XXX for CTL_WAIT below ? */
+
+		break;
+
+	/*
+	 * Step.  Let the target process execute a single instruction.
+	 */
+	case PROCFS_CTL_STEP:
+		PHOLD(p);
+		error = procfs_sstep(p);
+		PRELE(p);
+		if (error)
+			return (error);
+		break;
+
+	/*
+	 * Run.  Let the target process continue running until a breakpoint
+	 * or some other trap.
+	 */
+	case PROCFS_CTL_RUN:
+		break;
+
+	/*
+	 * Wait for the target process to stop.
+	 * If the target is not being traced then just wait
+	 * to enter
+	 */
+	case PROCFS_CTL_WAIT:
+		error = 0;
+		if (p->p_flag & P_TRACED) {
+			while (error == 0 &&
+					(p->p_stat != SSTOP) &&
+					(p->p_flag & P_TRACED) &&
+					(p->p_pptr == curp)) {
+				error = tsleep((caddr_t) p,
+						PWAIT|PCATCH, "procfsx", 0);
+			}
+			if (error == 0 && !TRACE_WAIT_P(curp, p))
+				error = EBUSY;
+		} else {
+			while (error == 0 && p->p_stat != SSTOP) {
+				error = tsleep((caddr_t) p,
+						PWAIT|PCATCH, "procfs", 0);
+			}
+		}
+		return (error);
+
+	default:
+		panic("procfs_control");
+	}
+
+	if (p->p_stat == SSTOP)
+		setrunnable(p);
+	return (0);
+}
+
+int
+procfs_doctl(curp, p, pfs, uio)
+	struct proc *curp;
+	struct pfsnode *pfs;
+	struct uio *uio;
+	struct proc *p;
+{
+	int xlen;
+	int error;
+	char msg[PROCFS_CTLLEN+1];
+	vfs_namemap_t *nm;
+
+	if (uio->uio_rw != UIO_WRITE)
+		return (EOPNOTSUPP);
+
+	xlen = PROCFS_CTLLEN;
+	error = vfs_getuserstr(uio, msg, &xlen);
+	if (error)
+		return (error);
+
+	/*
+	 * Map signal names into signal generation
+	 * or debug control.  Unknown commands and/or signals
+	 * return EOPNOTSUPP.
+	 *
+	 * Sending a signal while the process is being debugged
+	 * also has the side effect of letting the target continue
+	 * to run.  There is no way to single-step a signal delivery.
+	 */
+	error = EOPNOTSUPP;
+
+	nm = vfs_findname(ctlnames, msg, xlen);
+	if (nm) {
+		error = procfs_control(curp, p, nm->nm_val);
+	} else {
+		nm = vfs_findname(signames, msg, xlen);
+		if (nm) {
+			if (TRACE_WAIT_P(curp, p)) {
+				p->p_xstat = nm->nm_val;
+#ifdef FIX_SSTEP
+				FIX_SSTEP(p);
+#endif
+				setrunnable(p);
+			} else {
+				psignal(p, nm->nm_val);
+			}
+			error = 0;
+		}
+	}
+
+	return (error);
+}
diff --git a/sys/fs/procfs/procfs_fpregs.c b/sys/fs/procfs/procfs_fpregs.c
new file mode 100644
index 0000000..14c3fd3
--- /dev/null
+++ b/sys/fs/procfs/procfs_fpregs.c
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)procfs_fpregs.c	8.2 (Berkeley) 6/15/94
+ *
+ * From:
+ *	$Id: procfs_fpregs.c,v 1.7 1997/08/02 14:32:11 bde Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <machine/reg.h>
+#include <miscfs/procfs/procfs.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+
+int
+procfs_dofpregs(curp, p, pfs, uio)
+	struct proc *curp;
+	struct proc *p;
+	struct pfsnode *pfs;
+	struct uio *uio;
+{
+	int error;
+	struct fpreg r;
+	char *kv;
+	int kl;
+
+	if (!CHECKIO(curp, p))
+		return EPERM;
+	kl = sizeof(r);
+	kv = (char *) &r;
+
+	kv += uio->uio_offset;
+	kl -= uio->uio_offset;
+	if (kl > uio->uio_resid)
+		kl = uio->uio_resid;
+
+	PHOLD(p);
+
+	if (kl < 0)
+		error = EINVAL;
+	else
+		error = procfs_read_fpregs(p, &r);
+	if (error == 0)
+		error = uiomove(kv, kl, uio);
+	if (error == 0 && uio->uio_rw == UIO_WRITE) {
+		if (p->p_stat != SSTOP)
+			error = EBUSY;
+		else
+			error = procfs_write_fpregs(p, &r);
+	}
+	PRELE(p);
+
+	uio->uio_offset = 0;
+	return (error);
+}
+
+int
+procfs_validfpregs(p)
+	struct proc *p;
+{
+	return ((p->p_flag & P_SYSTEM) == 0);
+}
diff --git a/sys/fs/procfs/procfs_map.c b/sys/fs/procfs/procfs_map.c
new file mode 100644
index 0000000..c6b8966
--- /dev/null
+++ b/sys/fs/procfs/procfs_map.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)procfs_status.c	8.3 (Berkeley) 2/17/94
+ *
+ *	$Id: procfs_map.c,v 1.18 1998/12/04 22:54:51 archie Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <miscfs/procfs/procfs.h>
+
+#include <vm/vm.h>
+#include <vm/vm_prot.h>
+#include <sys/lock.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_page.h>
+#include <vm/vm_object.h>
+
+
+#define MEBUFFERSIZE 256
+
+/*
+ * The map entries can *almost* be read with programs like cat.  However,
+ * large maps need special programs to read.  It is not easy to implement
+ * a program that can sense the required size of the buffer, and then
+ * subsequently do a read with the appropriate size.  This operation cannot
+ * be atomic.  The best that we can do is to allow the program to do a read
+ * with an arbitrarily large buffer, and return as much as we can.  We can
+ * return an error code if the buffer is too small (EFBIG), then the program
+ * can try a bigger buffer.
+ */
+int
+procfs_domap(curp, p, pfs, uio)
+	struct proc *curp;
+	struct proc *p;
+	struct pfsnode *pfs;
+	struct uio *uio;
+{
+	int len;
+	int error;
+	vm_map_t map = &p->p_vmspace->vm_map;
+	pmap_t pmap = &p->p_vmspace->vm_pmap;
+	vm_map_entry_t entry;
+	char mebuffer[MEBUFFERSIZE];
+
+	if (uio->uio_rw != UIO_READ)
+		return (EOPNOTSUPP);
+
+	if (uio->uio_offset != 0)
+		return (0);
+	
+	error = 0;
+	if (map != &curproc->p_vmspace->vm_map)
+		vm_map_lock_read(map);
+	for (entry = map->header.next;
+		((uio->uio_resid > 0) && (entry != &map->header));
+		entry = entry->next) {
+		vm_object_t obj, tobj, lobj;
+		int ref_count, shadow_count, flags;
+		vm_offset_t addr;
+		int resident, privateresident;
+		char *type;
+
+		if (entry->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP))
+			continue;
+
+		obj = entry->object.vm_object;
+		if (obj && (obj->shadow_count == 1))
+			privateresident = obj->resident_page_count;
+		else
+			privateresident = 0;
+
+		resident = 0;
+		addr = entry->start;
+		while (addr < entry->end) {
+			if (pmap_extract( pmap, addr))
+				resident++;
+			addr += PAGE_SIZE;
+		}
+
+		for( lobj = tobj = obj; tobj; tobj = tobj->backing_object)
+			lobj = tobj;
+
+		if (lobj) {
+			switch(lobj->type) {
+
+default:
+case OBJT_DEFAULT:
+				type = "default";
+				break;
+case OBJT_VNODE:
+				type = "vnode";
+				break;
+case OBJT_SWAP:
+				type = "swap";
+				break;
+case OBJT_DEVICE:
+				type = "device";
+				break;
+			}
+			
+			flags = obj->flags;
+			ref_count = obj->ref_count;
+			shadow_count = obj->shadow_count;
+		} else {
+			type = "none";
+			flags = 0;
+			ref_count = 0;
+			shadow_count = 0;
+		}
+			
+
+		/*
+		 * format:
+		 *  start, end, resident, private resident, cow, access, type.
+		 */
+		snprintf(mebuffer, sizeof(mebuffer),
+		    "0x%x 0x%x %d %d %p %s%s%s %d %d 0x%x %s %s %s\n",
+			entry->start, entry->end,
+			resident, privateresident, obj,
+			(entry->protection & VM_PROT_READ)?"r":"-",
+			(entry->protection & VM_PROT_WRITE)?"w":"-",
+			(entry->protection & VM_PROT_EXECUTE)?"x":"-",
+			ref_count, shadow_count, flags,
+			(entry->eflags & MAP_ENTRY_COW)?"COW":"NCOW",
+			(entry->eflags & MAP_ENTRY_NEEDS_COPY)?"NC":"NNC",
+			type);
+
+		len = strlen(mebuffer);
+		if (len > uio->uio_resid) {
+			error = EFBIG;
+			break;
+		}
+		error = uiomove(mebuffer, len, uio);
+		if (error)
+			break;
+	}
+	if (map != &curproc->p_vmspace->vm_map)
+		vm_map_unlock_read(map);
+	return error;
+}
+
+int
+procfs_validmap(p)
+	struct proc *p;
+{
+	return ((p->p_flag & P_SYSTEM) == 0);
+}
diff --git a/sys/fs/procfs/procfs_mem.c b/sys/fs/procfs/procfs_mem.c
new file mode 100644
index 0000000..22d8f74
--- /dev/null
+++ b/sys/fs/procfs/procfs_mem.c
@@ -0,0 +1,342 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993 Sean Eric Fagan
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry and Sean Eric Fagan.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)procfs_mem.c	8.5 (Berkeley) 6/15/94
+ *
+ *	$Id: procfs_mem.c,v 1.34 1998/07/15 02:32:19 bde Exp $
+ */
+
+/*
+ * This is a lightly hacked and merged version
+ * of sef's pread/pwrite functions
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <miscfs/procfs/procfs.h>
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_prot.h>
+#include <sys/lock.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_extern.h>
+#include <sys/user.h>
+#include <sys/ptrace.h>
+
+static int	procfs_rwmem __P((struct proc *curp,
+				  struct proc *p, struct uio *uio));
+
+static int
+procfs_rwmem(curp, p, uio)
+	struct proc *curp;
+	struct proc *p;
+	struct uio *uio;
+{
+	int error;
+	int writing;
+	struct vmspace *vm;
+	vm_map_t map;
+	vm_object_t object = NULL;
+	vm_offset_t pageno = 0;		/* page number */
+	vm_prot_t reqprot;
+	vm_offset_t kva;
+
+	/*
+	 * if the vmspace is in the midst of being deallocated or the
+	 * process is exiting, don't try to grab anything.  The page table
+	 * usage in that process can be messed up.
+	 */
+	vm = p->p_vmspace;
+	if ((p->p_flag & P_WEXIT) || (vm->vm_refcnt < 1))
+		return EFAULT;
+	++vm->vm_refcnt;
+	/*
+	 * The map we want...
+	 */
+	map = &vm->vm_map;
+
+	writing = uio->uio_rw == UIO_WRITE;
+	reqprot = writing ? (VM_PROT_WRITE | VM_PROT_OVERRIDE_WRITE) : VM_PROT_READ;
+
+	kva = kmem_alloc_pageable(kernel_map, PAGE_SIZE);
+
+	/*
+	 * Only map in one page at a time.  We don't have to, but it
+	 * makes things easier.  This way is trivial - right?
+	 */
+	do {
+		vm_map_t tmap;
+		vm_offset_t uva;
+		int page_offset;		/* offset into page */
+		vm_map_entry_t out_entry;
+		vm_prot_t out_prot;
+		boolean_t wired;
+		vm_pindex_t pindex;
+		u_int len;
+		vm_page_t m;
+
+		object = NULL;
+
+		uva = (vm_offset_t) uio->uio_offset;
+
+		/*
+		 * Get the page number of this segment.
+		 */
+		pageno = trunc_page(uva);
+		page_offset = uva - pageno;
+
+		/*
+		 * How many bytes to copy
+		 */
+		len = min(PAGE_SIZE - page_offset, uio->uio_resid);
+
+		if (uva >= VM_MAXUSER_ADDRESS) {
+			vm_offset_t tkva;
+
+			if (writing || 
+			    uva >= VM_MAXUSER_ADDRESS + UPAGES * PAGE_SIZE ||
+			    (ptrace_read_u_check(p,
+						 uva - (vm_offset_t) VM_MAXUSER_ADDRESS,
+						 (size_t) len) &&
+			     !procfs_kmemaccess(curp))) {
+				error = 0;
+				break;
+			}
+
+			/* we are reading the "U area", force it into core */
+			PHOLD(p);
+
+			/* sanity check */
+			if (!(p->p_flag & P_INMEM)) {
+				/* aiee! */
+				PRELE(p);
+				error = EFAULT;
+				break;
+			}
+
+			/* populate the ptrace/procfs area */
+			p->p_addr->u_kproc.kp_proc = *p;
+			fill_eproc (p, &p->p_addr->u_kproc.kp_eproc);
+
+			/* locate the in-core address */
+			tkva = (uintptr_t)p->p_addr + uva - VM_MAXUSER_ADDRESS;
+
+			/* transfer it */
+			error = uiomove((caddr_t)tkva, len, uio);
+
+			/* let the pages go */
+			PRELE(p);
+
+			continue;
+		}
+
+		/*
+		 * Fault the page on behalf of the process
+		 */
+		error = vm_fault(map, pageno, reqprot, FALSE);
+		if (error) {
+			error = EFAULT;
+			break;
+		}
+
+		/*
+		 * Now we need to get the page.  out_entry, out_prot, wired,
+		 * and single_use aren't used.  One would think the vm code
+		 * would be a *bit* nicer...  We use tmap because
+		 * vm_map_lookup() can change the map argument.
+		 */
+		tmap = map;
+		error = vm_map_lookup(&tmap, pageno, reqprot,
+			      &out_entry, &object, &pindex, &out_prot,
+			      &wired);
+
+		if (error) {
+			error = EFAULT;
+
+			/*
+			 * Make sure that there is no residue in 'object' from
+			 * an error return on vm_map_lookup.
+			 */
+			object = NULL;
+
+			break;
+		}
+
+		m = vm_page_lookup(object, pindex);
+
+		/* Allow fallback to backing objects if we are reading */
+
+		while (m == NULL && !writing && object->backing_object) {
+
+		  pindex += OFF_TO_IDX(object->backing_object_offset);
+		  object = object->backing_object;
+
+		  m = vm_page_lookup(object, pindex);
+		}
+
+		if (m == NULL) {
+			error = EFAULT;
+
+			/*
+			 * Make sure that there is no residue in 'object' from
+			 * an error return on vm_map_lookup.
+			 */
+			object = NULL;
+
+			vm_map_lookup_done(tmap, out_entry);
+
+			break;
+		}
+
+		/*
+		 * Wire the page into memory
+		 */
+		vm_page_wire(m);
+
+		/*
+		 * We're done with tmap now.
+		 * But reference the object first, so that we won't loose
+		 * it.
+		 */
+		vm_object_reference(object);
+		vm_map_lookup_done(tmap, out_entry);
+
+		pmap_kenter(kva, VM_PAGE_TO_PHYS(m));
+
+		/*
+		 * Now do the i/o move.
+		 */
+		error = uiomove((caddr_t)(kva + page_offset), len, uio);
+
+		pmap_kremove(kva);
+
+		/*
+		 * release the page and the object
+		 */
+		vm_page_unwire(m, 1);
+		vm_object_deallocate(object);
+
+		object = NULL;
+
+	} while (error == 0 && uio->uio_resid > 0);
+
+	if (object)
+		vm_object_deallocate(object);
+
+	kmem_free(kernel_map, kva, PAGE_SIZE);
+	vmspace_free(vm);
+	return (error);
+}
+
+/*
+ * Copy data in and out of the target process.
+ * We do this by mapping the process's page into
+ * the kernel and then doing a uiomove direct
+ * from the kernel address space.
+ */
+int
+procfs_domem(curp, p, pfs, uio)
+	struct proc *curp;
+	struct proc *p;
+	struct pfsnode *pfs;
+	struct uio *uio;
+{
+
+	if (uio->uio_resid == 0)
+		return (0);
+
+ 	/*
+ 	 * XXX
+ 	 * We need to check for KMEM_GROUP because ps is sgid kmem;
+ 	 * not allowing it here causes ps to not work properly.  Arguably,
+ 	 * this is a bug with what ps does.  We only need to do this
+ 	 * for Pmem nodes, and only if it's reading.  This is still not
+ 	 * good, as it may still be possible to grab illicit data if
+ 	 * a process somehow gets to be KMEM_GROUP.  Note that this also
+ 	 * means that KMEM_GROUP can't change without editing procfs.h!
+ 	 * All in all, quite yucky.
+ 	 */
+ 
+ 	if (!CHECKIO(curp, p) &&
+	    !(uio->uio_rw == UIO_READ &&
+	      procfs_kmemaccess(curp)))
+ 		return EPERM;
+
+	return (procfs_rwmem(curp, p, uio));
+}
+
+/*
+ * Given process (p), find the vnode from which
+ * its text segment is being executed.
+ *
+ * It would be nice to grab this information from
+ * the VM system, however, there is no sure-fire
+ * way of doing that.  Instead, fork(), exec() and
+ * wait() all maintain the p_textvp field in the
+ * process proc structure which contains a held
+ * reference to the exec'ed vnode.
+ */
+struct vnode *
+procfs_findtextvp(p)
+	struct proc *p;
+{
+
+	return (p->p_textvp);
+}
+
+int procfs_kmemaccess(curp)
+	struct proc *curp;
+{
+	int i;
+	struct ucred *cred;
+
+	cred = curp->p_cred->pc_ucred;
+	if (suser(cred, &curp->p_acflag))
+		return 1;
+	
+	for (i = 0; i < cred->cr_ngroups; i++)
+		if (cred->cr_groups[i] == KMEM_GROUP)
+			return 1;
+	
+	return 0;
+}
diff --git a/sys/fs/procfs/procfs_note.c b/sys/fs/procfs/procfs_note.c
new file mode 100644
index 0000000..8bfde33
--- /dev/null
+++ b/sys/fs/procfs/procfs_note.c
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)procfs_note.c	8.2 (Berkeley) 1/21/94
+ *
+ *	$Id: procfs_note.c,v 1.4 1997/02/22 09:40:28 peter Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/vnode.h>
+#include <miscfs/procfs/procfs.h>
+
+int
+procfs_donote(curp, p, pfs, uio)
+	struct proc *curp;
+	struct proc *p;
+	struct pfsnode *pfs;
+	struct uio *uio;
+{
+	int xlen;
+	int error;
+	char note[PROCFS_NOTELEN+1];
+
+	if (uio->uio_rw != UIO_WRITE)
+		return (EINVAL);
+
+	xlen = PROCFS_NOTELEN;
+	error = vfs_getuserstr(uio, note, &xlen);
+	if (error)
+		return (error);
+
+	/* send to process's notify function */
+	return (EOPNOTSUPP);
+}
diff --git a/sys/fs/procfs/procfs_regs.c b/sys/fs/procfs/procfs_regs.c
new file mode 100644
index 0000000..d215d44
--- /dev/null
+++ b/sys/fs/procfs/procfs_regs.c
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)procfs_regs.c	8.4 (Berkeley) 6/15/94
+ *
+ * From:
+ *	$Id: procfs_regs.c,v 1.7 1997/08/02 14:32:16 bde Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <machine/reg.h>
+#include <miscfs/procfs/procfs.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+
+int
+procfs_doregs(curp, p, pfs, uio)
+	struct proc *curp;
+	struct proc *p;
+	struct pfsnode *pfs;
+	struct uio *uio;
+{
+	int error;
+	struct reg r;
+	char *kv;
+	int kl;
+
+	if (!CHECKIO(curp, p))
+		return EPERM;
+	kl = sizeof(r);
+	kv = (char *) &r;
+
+	kv += uio->uio_offset;
+	kl -= uio->uio_offset;
+	if (kl > uio->uio_resid)
+		kl = uio->uio_resid;
+
+	PHOLD(p);
+
+	if (kl < 0)
+		error = EINVAL;
+	else
+		error = procfs_read_regs(p, &r);
+	if (error == 0)
+		error = uiomove(kv, kl, uio);
+	if (error == 0 && uio->uio_rw == UIO_WRITE) {
+		if (p->p_stat != SSTOP)
+			error = EBUSY;
+		else
+			error = procfs_write_regs(p, &r);
+	}
+	PRELE(p);
+
+	uio->uio_offset = 0;
+	return (error);
+}
+
+int
+procfs_validregs(p)
+	struct proc *p;
+{
+	return ((p->p_flag & P_SYSTEM) == 0);
+}
diff --git a/sys/fs/procfs/procfs_status.c b/sys/fs/procfs/procfs_status.c
new file mode 100644
index 0000000..3176a64
--- /dev/null
+++ b/sys/fs/procfs/procfs_status.c
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)procfs_status.c	8.4 (Berkeley) 6/15/94
+ *
+ * From:
+ *	$Id: procfs_status.c,v 1.11 1998/07/11 07:45:45 bde Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/tty.h>
+#include <sys/resourcevar.h>
+#include <miscfs/procfs/procfs.h>
+
+int
+procfs_dostatus(curp, p, pfs, uio)
+	struct proc *curp;
+	struct proc *p;
+	struct pfsnode *pfs;
+	struct uio *uio;
+{
+	struct session *sess;
+	struct tty *tp;
+	struct ucred *cr;
+	char *ps;
+	char *sep;
+	int pid, ppid, pgid, sid;
+	int i;
+	int xlen;
+	int error;
+	char psbuf[256];		/* XXX - conservative */
+
+	if (uio->uio_rw != UIO_READ)
+		return (EOPNOTSUPP);
+
+	pid = p->p_pid;
+	ppid = p->p_pptr ? p->p_pptr->p_pid : 0,
+	pgid = p->p_pgrp->pg_id;
+	sess = p->p_pgrp->pg_session;
+	sid = sess->s_leader ? sess->s_leader->p_pid : 0;
+
+/* comm pid ppid pgid sid maj,min ctty,sldr start ut st wmsg 
+                                euid ruid rgid,egid,groups[1 .. NGROUPS]
+*/
+	ps = psbuf;
+	bcopy(p->p_comm, ps, MAXCOMLEN);
+	ps[MAXCOMLEN] = '\0';
+	ps += strlen(ps);
+	ps += sprintf(ps, " %d %d %d %d ", pid, ppid, pgid, sid);
+
+	if ((p->p_flag&P_CONTROLT) && (tp = sess->s_ttyp))
+		ps += sprintf(ps, "%d,%d ", major(tp->t_dev), minor(tp->t_dev));
+	else
+		ps += sprintf(ps, "%d,%d ", -1, -1);
+
+	sep = "";
+	if (sess->s_ttyvp) {
+		ps += sprintf(ps, "%sctty", sep);
+		sep = ",";
+	}
+	if (SESS_LEADER(p)) {
+		ps += sprintf(ps, "%ssldr", sep);
+		sep = ",";
+	}
+	if (*sep != ',')
+		ps += sprintf(ps, "noflags");
+
+	if (p->p_flag & P_INMEM)
+		ps += sprintf(ps, " %ld,%ld",
+			p->p_stats->p_start.tv_sec,
+			p->p_stats->p_start.tv_usec);
+	else
+		ps += sprintf(ps, " -1,-1");
+
+	{
+		struct timeval ut, st;
+
+		calcru(p, &ut, &st, (void *) 0);
+		ps += sprintf(ps, " %ld,%ld %ld,%ld",
+			ut.tv_sec,
+			ut.tv_usec,
+			st.tv_sec,
+			st.tv_usec);
+	}
+
+	ps += sprintf(ps, " %s",
+		(p->p_wchan && p->p_wmesg) ? p->p_wmesg : "nochan");
+
+	cr = p->p_ucred;
+
+	ps += sprintf(ps, " %lu %lu %lu", 
+		(u_long)cr->cr_uid,
+		(u_long)p->p_cred->p_ruid,
+		(u_long)p->p_cred->p_rgid);
+
+	/* egid (p->p_cred->p_svgid) is equal to cr_ngroups[0] 
+	   see also getegid(2) in /sys/kern/kern_prot.c */
+
+	for (i = 0; i < cr->cr_ngroups; i++)
+		ps += sprintf(ps, ",%lu", (u_long)cr->cr_groups[i]);
+	ps += sprintf(ps, "\n");
+
+	xlen = ps - psbuf;
+	xlen -= uio->uio_offset;
+	ps = psbuf + uio->uio_offset;
+	xlen = imin(xlen, uio->uio_resid);
+	if (xlen <= 0)
+		error = 0;
+	else
+		error = uiomove(ps, xlen, uio);
+
+	return (error);
+}
+
+int
+procfs_docmdline(curp, p, pfs, uio)
+	struct proc *curp;
+	struct proc *p;
+	struct pfsnode *pfs;
+	struct uio *uio;
+{
+	char *ps;
+	int xlen;
+	int error;
+	char psbuf[256];
+
+	if (uio->uio_rw != UIO_READ)
+		return (EOPNOTSUPP);
+
+	/*
+	 * For now, this is a hack.  To implement this fully would require
+	 * groping around in the process address space to follow argv etc.
+	 */
+	ps = psbuf;
+	bcopy(p->p_comm, ps, MAXCOMLEN);
+	ps[MAXCOMLEN] = '\0';
+	ps += strlen(ps);
+
+	ps += sprintf(ps, "\n");
+
+	xlen = ps - psbuf;
+	xlen -= uio->uio_offset;
+	ps = psbuf + uio->uio_offset;
+	xlen = min(xlen, uio->uio_resid);
+	if (xlen <= 0)
+		error = 0;
+	else
+		error = uiomove(ps, xlen, uio);
+	return (error);
+}
diff --git a/sys/fs/procfs/procfs_subr.c b/sys/fs/procfs/procfs_subr.c
new file mode 100644
index 0000000..98e3687
--- /dev/null
+++ b/sys/fs/procfs/procfs_subr.c
@@ -0,0 +1,392 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)procfs_subr.c	8.6 (Berkeley) 5/14/95
+ *
+ *	$Id: procfs_subr.c,v 1.22 1999/01/05 03:53:06 peter Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <miscfs/procfs/procfs.h>
+
+static struct pfsnode *pfshead;
+static int pfsvplock;
+
+/*
+ * allocate a pfsnode/vnode pair.  the vnode is
+ * referenced, but not locked.
+ *
+ * the pid, pfs_type, and mount point uniquely
+ * identify a pfsnode.  the mount point is needed
+ * because someone might mount this filesystem
+ * twice.
+ *
+ * all pfsnodes are maintained on a singly-linked
+ * list.  new nodes are only allocated when they cannot
+ * be found on this list.  entries on the list are
+ * removed when the vfs reclaim entry is called.
+ *
+ * a single lock is kept for the entire list.  this is
+ * needed because the getnewvnode() function can block
+ * waiting for a vnode to become free, in which case there
+ * may be more than one process trying to get the same
+ * vnode.  this lock is only taken if we are going to
+ * call getnewvnode, since the kernel itself is single-threaded.
+ *
+ * if an entry is found on the list, then call vget() to
+ * take a reference.  this is done because there may be
+ * zero references to it and so it needs to removed from
+ * the vnode free list.
+ */
+int
+procfs_allocvp(mp, vpp, pid, pfs_type)
+	struct mount *mp;
+	struct vnode **vpp;
+	long pid;
+	pfstype pfs_type;
+{
+	struct proc *p = curproc;	/* XXX */
+	struct pfsnode *pfs;
+	struct vnode *vp;
+	struct pfsnode **pp;
+	int error;
+
+loop:
+	for (pfs = pfshead; pfs != 0; pfs = pfs->pfs_next) {
+		vp = PFSTOV(pfs);
+		if (pfs->pfs_pid == pid &&
+		    pfs->pfs_type == pfs_type &&
+		    vp->v_mount == mp) {
+			if (vget(vp, 0, p))
+				goto loop;
+			*vpp = vp;
+			return (0);
+		}
+	}
+
+	/*
+	 * otherwise lock the vp list while we call getnewvnode
+	 * since that can block.
+	 */
+	if (pfsvplock & PROCFS_LOCKED) {
+		pfsvplock |= PROCFS_WANT;
+		(void) tsleep((caddr_t) &pfsvplock, PINOD, "pfsavp", 0);
+		goto loop;
+	}
+	pfsvplock |= PROCFS_LOCKED;
+
+	/*
+	 * Do the MALLOC before the getnewvnode since doing so afterward
+	 * might cause a bogus v_data pointer to get dereferenced
+	 * elsewhere if MALLOC should block.
+	 */
+	MALLOC(pfs, struct pfsnode *, sizeof(struct pfsnode), M_TEMP, M_WAITOK);
+
+	if ((error = getnewvnode(VT_PROCFS, mp, procfs_vnodeop_p, vpp)) != 0) {
+		FREE(pfs, M_TEMP);
+		goto out;
+	}
+	vp = *vpp;
+
+	vp->v_data = pfs;
+
+	pfs->pfs_next = 0;
+	pfs->pfs_pid = (pid_t) pid;
+	pfs->pfs_type = pfs_type;
+	pfs->pfs_vnode = vp;
+	pfs->pfs_flags = 0;
+	pfs->pfs_lockowner = 0;
+	pfs->pfs_fileno = PROCFS_FILENO(pid, pfs_type);
+
+	switch (pfs_type) {
+	case Proot:	/* /proc = dr-xr-xr-x */
+		pfs->pfs_mode = (VREAD|VEXEC) |
+				(VREAD|VEXEC) >> 3 |
+				(VREAD|VEXEC) >> 6;
+		vp->v_type = VDIR;
+		vp->v_flag = VROOT;
+		break;
+
+	case Pcurproc:	/* /proc/curproc = lr--r--r-- */
+		pfs->pfs_mode = (VREAD) |
+				(VREAD >> 3) |
+				(VREAD >> 6);
+		vp->v_type = VLNK;
+		break;
+
+	case Pproc:
+		pfs->pfs_mode = (VREAD|VEXEC) |
+				(VREAD|VEXEC) >> 3 |
+				(VREAD|VEXEC) >> 6;
+		vp->v_type = VDIR;
+		break;
+
+	case Pfile:
+	case Pmem:
+		pfs->pfs_mode = (VREAD|VWRITE) |
+				(VREAD) >> 3;;
+		vp->v_type = VREG;
+		break;
+
+	case Pregs:
+	case Pfpregs:
+		pfs->pfs_mode = (VREAD|VWRITE);
+		vp->v_type = VREG;
+		break;
+
+	case Pctl:
+	case Pnote:
+	case Pnotepg:
+		pfs->pfs_mode = (VWRITE);
+		vp->v_type = VREG;
+		break;
+
+	case Ptype:
+	case Pmap:
+	case Pstatus:
+	case Pcmdline:
+		pfs->pfs_mode = (VREAD) |
+				(VREAD >> 3) |
+				(VREAD >> 6);
+		vp->v_type = VREG;
+		break;
+
+	default:
+		panic("procfs_allocvp");
+	}
+
+	/* add to procfs vnode list */
+	for (pp = &pfshead; *pp; pp = &(*pp)->pfs_next)
+		continue;
+	*pp = pfs;
+
+out:
+	pfsvplock &= ~PROCFS_LOCKED;
+
+	if (pfsvplock & PROCFS_WANT) {
+		pfsvplock &= ~PROCFS_WANT;
+		wakeup((caddr_t) &pfsvplock);
+	}
+
+	return (error);
+}
+
+int
+procfs_freevp(vp)
+	struct vnode *vp;
+{
+	struct pfsnode **pfspp;
+	struct pfsnode *pfs = VTOPFS(vp);
+
+	for (pfspp = &pfshead; *pfspp != 0; pfspp = &(*pfspp)->pfs_next) {
+		if (*pfspp == pfs) {
+			*pfspp = pfs->pfs_next;
+			break;
+		}
+	}
+
+	FREE(vp->v_data, M_TEMP);
+	vp->v_data = 0;
+	return (0);
+}
+
+int
+procfs_rw(ap)
+	struct vop_read_args *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct uio *uio = ap->a_uio;
+	struct proc *curp = uio->uio_procp;
+	struct pfsnode *pfs = VTOPFS(vp);
+	struct proc *p;
+	int rtval;
+
+	p = PFIND(pfs->pfs_pid);
+	if (p == 0)
+		return (EINVAL);
+	if (p->p_pid == 1 && securelevel > 0 && uio->uio_rw == UIO_WRITE)
+		return (EACCES);
+
+	while (pfs->pfs_lockowner) {
+		tsleep(&pfs->pfs_lockowner, PRIBIO, "pfslck", 0);
+	}
+	pfs->pfs_lockowner = curproc->p_pid;
+
+	switch (pfs->pfs_type) {
+	case Pnote:
+	case Pnotepg:
+		rtval = procfs_donote(curp, p, pfs, uio);
+		break;
+
+	case Pregs:
+		rtval = procfs_doregs(curp, p, pfs, uio);
+		break;
+
+	case Pfpregs:
+		rtval = procfs_dofpregs(curp, p, pfs, uio);
+		break;
+
+	case Pctl:
+		rtval = procfs_doctl(curp, p, pfs, uio);
+		break;
+
+	case Pstatus:
+		rtval = procfs_dostatus(curp, p, pfs, uio);
+		break;
+
+	case Pmap:
+		rtval = procfs_domap(curp, p, pfs, uio);
+		break;
+
+	case Pmem:
+		rtval = procfs_domem(curp, p, pfs, uio);
+		break;
+
+	case Ptype:
+		rtval = procfs_dotype(curp, p, pfs, uio);
+		break;
+
+	case Pcmdline:
+		rtval = procfs_docmdline(curp, p, pfs, uio);
+		break;
+
+	default:
+		rtval = EOPNOTSUPP;
+		break;
+	}
+	pfs->pfs_lockowner = 0;
+	wakeup(&pfs->pfs_lockowner);
+	return rtval;
+}
+
+/*
+ * Get a string from userland into (buf).  Strip a trailing
+ * nl character (to allow easy access from the shell).
+ * The buffer should be *buflenp + 1 chars long.  vfs_getuserstr
+ * will automatically add a nul char at the end.
+ *
+ * Returns 0 on success or the following errors
+ *
+ * EINVAL:    file offset is non-zero.
+ * EMSGSIZE:  message is longer than kernel buffer
+ * EFAULT:    user i/o buffer is not addressable
+ */
+int
+vfs_getuserstr(uio, buf, buflenp)
+	struct uio *uio;
+	char *buf;
+	int *buflenp;
+{
+	int xlen;
+	int error;
+
+	if (uio->uio_offset != 0)
+		return (EINVAL);
+
+	xlen = *buflenp;
+
+	/* must be able to read the whole string in one go */
+	if (xlen < uio->uio_resid)
+		return (EMSGSIZE);
+	xlen = uio->uio_resid;
+
+	if ((error = uiomove(buf, xlen, uio)) != 0)
+		return (error);
+
+	/* allow multiple writes without seeks */
+	uio->uio_offset = 0;
+
+	/* cleanup string and remove trailing newline */
+	buf[xlen] = '\0';
+	xlen = strlen(buf);
+	if (xlen > 0 && buf[xlen-1] == '\n')
+		buf[--xlen] = '\0';
+	*buflenp = xlen;
+
+	return (0);
+}
+
+vfs_namemap_t *
+vfs_findname(nm, buf, buflen)
+	vfs_namemap_t *nm;
+	char *buf;
+	int buflen;
+{
+
+	for (; nm->nm_name; nm++)
+		if (bcmp(buf, nm->nm_name, buflen+1) == 0)
+			return (nm);
+
+	return (0);
+}
+
+void
+procfs_exit(struct proc *p)
+{
+	struct pfsnode *pfs;
+	pid_t pid = p->p_pid;
+
+	/*
+	 * The reason for this loop is not obvious -- basicly,
+	 * procfs_freevp(), which is called via vgone() (eventually),
+	 * removes the specified procfs node from the pfshead list.
+	 * It does this by *pfsp = pfs->pfs_next, meaning that it
+	 * overwrites the node.  So when we do pfs = pfs->next, we
+	 * end up skipping the node that replaces the one that was
+	 * vgone'd.  Since it may have been the last one on the list,
+	 * it may also have been set to null -- but *our* pfs pointer,
+	 * here, doesn't see this.  So the loop starts from the beginning
+	 * again.
+	 *
+	 * This is not a for() loop because the final event
+	 * would be "pfs = pfs->pfs_next"; in the case where
+	 * pfs is set to pfshead again, that would mean that
+	 * pfshead is skipped over.
+	 *
+	 */
+	pfs = pfshead;
+	while (pfs) {
+		if (pfs->pfs_pid == pid) {
+			vgone(PFSTOV(pfs));
+			pfs = pfshead;
+		} else
+			pfs = pfs->pfs_next;
+	}
+}
diff --git a/sys/fs/procfs/procfs_type.c b/sys/fs/procfs/procfs_type.c
new file mode 100644
index 0000000..8f85c54
--- /dev/null
+++ b/sys/fs/procfs/procfs_type.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	$Id: procfs_type.c,v 1.4 1997/03/24 11:24:42 bde Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/sysent.h>
+#include <sys/vnode.h>
+#include <miscfs/procfs/procfs.h>
+
+int
+procfs_dotype(curp, p, pfs, uio)
+	struct proc *curp;
+	struct proc *p;
+	struct pfsnode *pfs;
+	struct uio *uio;
+{
+	int len;
+	int error;
+	/*
+	 * buffer for emulation type
+	 */
+	char mebuffer[256];
+	char *none = "Not Available";
+
+	if (uio->uio_rw != UIO_READ)
+		return (EOPNOTSUPP);
+
+	if (uio->uio_offset != 0)
+		return (0);
+
+	if (p && p->p_sysent && p->p_sysent->sv_name) {
+		len = strlen(p->p_sysent->sv_name);
+		bcopy(p->p_sysent->sv_name, mebuffer, len);
+	} else {
+		len = strlen(none);
+		bcopy(none, mebuffer, len);
+	}
+	mebuffer[len++] = '\n';
+	error = uiomove(mebuffer, len, uio);
+	return error;
+}
+
+int
+procfs_validtype(p)
+	struct proc *p;
+{
+	return ((p->p_flag & P_SYSTEM) == 0);
+}
diff --git a/sys/fs/procfs/procfs_vfsops.c b/sys/fs/procfs/procfs_vfsops.c
new file mode 100644
index 0000000..ac1ab53
--- /dev/null
+++ b/sys/fs/procfs/procfs_vfsops.c
@@ -0,0 +1,215 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)procfs_vfsops.c	8.7 (Berkeley) 5/10/95
+ *
+ *	$Id: procfs_vfsops.c,v 1.25 1998/07/27 22:47:17 alex Exp $
+ */
+
+/*
+ * procfs VFS interface
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/syslog.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <miscfs/procfs/procfs.h>
+
+static int	procfs_init __P((struct vfsconf *vfsp));
+static int	procfs_mount __P((struct mount *mp, char *path, caddr_t data,
+				  struct nameidata *ndp, struct proc *p));
+static int	procfs_start __P((struct mount *mp, int flags, struct proc *p));
+static int	procfs_statfs __P((struct mount *mp, struct statfs *sbp,
+				   struct proc *p));
+static int	procfs_unmount __P((struct mount *mp, int mntflags,
+				    struct proc *p));
+
+/*
+ * VFS Operations.
+ *
+ * mount system call
+ */
+/* ARGSUSED */
+static int
+procfs_mount(mp, path, data, ndp, p)
+	struct mount *mp;
+	char *path;
+	caddr_t data;
+	struct nameidata *ndp;
+	struct proc *p;
+{
+	size_t size;
+	int error;
+
+	if (UIO_MX & (UIO_MX-1)) {
+		log(LOG_ERR, "procfs: invalid directory entry size\n");
+		return (EINVAL);
+	}
+
+	if (mp->mnt_flag & MNT_UPDATE)
+		return (EOPNOTSUPP);
+
+	if (mp->mnt_vfc->vfc_refcount == 1 && (error = at_exit(procfs_exit))) {
+		printf("procfs:  cannot register procfs_exit with at_exit\n");
+		return(error);
+	}
+
+	mp->mnt_flag |= MNT_LOCAL;
+	mp->mnt_data = 0;
+	vfs_getnewfsid(mp);
+
+	(void) copyinstr(path, (caddr_t)mp->mnt_stat.f_mntonname, MNAMELEN, &size);
+	bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+
+	size = sizeof("procfs") - 1;
+	bcopy("procfs", mp->mnt_stat.f_mntfromname, size);
+	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+	(void)procfs_statfs(mp, &mp->mnt_stat, p);
+
+	return (0);
+}
+
+/*
+ * unmount system call
+ */
+static int
+procfs_unmount(mp, mntflags, p)
+	struct mount *mp;
+	int mntflags;
+	struct proc *p;
+{
+	int error;
+	int flags = 0;
+
+	if (mntflags & MNT_FORCE)
+		flags |= FORCECLOSE;
+
+	error = vflush(mp, 0, flags);
+	if (error)
+		return (error);
+
+	if (mp->mnt_vfc->vfc_refcount == 1)
+		rm_at_exit(procfs_exit);
+
+	return (0);
+}
+
+int
+procfs_root(mp, vpp)
+	struct mount *mp;
+	struct vnode **vpp;
+{
+
+	return (procfs_allocvp(mp, vpp, 0, Proot));
+}
+
+/* ARGSUSED */
+static int
+procfs_start(mp, flags, p)
+	struct mount *mp;
+	int flags;
+	struct proc *p;
+{
+
+	return (0);
+}
+
+/*
+ * Get file system statistics.
+ */
+static int
+procfs_statfs(mp, sbp, p)
+	struct mount *mp;
+	struct statfs *sbp;
+	struct proc *p;
+{
+	sbp->f_bsize = PAGE_SIZE;
+	sbp->f_iosize = PAGE_SIZE;
+	sbp->f_blocks = 1;	/* avoid divide by zero in some df's */
+	sbp->f_bfree = 0;
+	sbp->f_bavail = 0;
+	sbp->f_files = maxproc;			/* approx */
+	sbp->f_ffree = maxproc - nprocs;	/* approx */
+
+	if (sbp != &mp->mnt_stat) {
+		sbp->f_type = mp->mnt_vfc->vfc_typenum;
+		bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+	}
+
+	return (0);
+}
+
+static int
+procfs_init(vfsp)
+	struct vfsconf *vfsp;
+{
+
+	return (0);
+}
+
+#define procfs_fhtovp ((int (*) __P((struct mount *, struct fid *, \
+	    struct sockaddr *, struct vnode **, int *, struct ucred **)))einval)
+#define procfs_quotactl ((int (*) __P((struct mount *, int, uid_t, caddr_t, \
+	    struct proc *)))eopnotsupp)
+#define procfs_sync ((int (*) __P((struct mount *, int, struct ucred *, \
+	    struct proc *)))nullop)
+#define procfs_sysctl ((int (*) __P((int *, u_int, void *, size_t *, void *, \
+	    size_t, struct proc *)))eopnotsupp)
+#define procfs_vget ((int (*) __P((struct mount *, ino_t, struct vnode **))) \
+	    eopnotsupp)
+#define procfs_vptofh ((int (*) __P((struct vnode *, struct fid *)))einval)
+
+static struct vfsops procfs_vfsops = {
+	procfs_mount,
+	procfs_start,
+	procfs_unmount,
+	procfs_root,
+	procfs_quotactl,
+	procfs_statfs,
+	procfs_sync,
+	procfs_vget,
+	procfs_fhtovp,
+	procfs_vptofh,
+	procfs_init,
+};
+
+VFS_SET(procfs_vfsops, procfs, VFCF_SYNTHETIC);
diff --git a/sys/fs/procfs/procfs_vnops.c b/sys/fs/procfs/procfs_vnops.c
new file mode 100644
index 0000000..1aa5453
--- /dev/null
+++ b/sys/fs/procfs/procfs_vnops.c
@@ -0,0 +1,1013 @@
+/*
+ * Copyright (c) 1993, 1995 Jan-Simon Pendry
+ * Copyright (c) 1993, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)procfs_vnops.c	8.18 (Berkeley) 5/21/95
+ *
+ *	$Id: procfs_vnops.c,v 1.63 1999/01/05 03:53:06 peter Exp $
+ */
+
+/*
+ * procfs vnode interface
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/fcntl.h>
+#include <sys/proc.h>
+#include <sys/signalvar.h>
+#include <sys/vnode.h>
+#include <sys/namei.h>
+#include <sys/dirent.h>
+#include <machine/reg.h>
+#include <vm/vm_zone.h>
+#include <miscfs/procfs/procfs.h>
+#include <sys/pioctl.h>
+
+static int	procfs_abortop __P((struct vop_abortop_args *));
+static int	procfs_access __P((struct vop_access_args *));
+static int	procfs_badop __P((void));
+static int	procfs_bmap __P((struct vop_bmap_args *));
+static int	procfs_close __P((struct vop_close_args *));
+static int	procfs_getattr __P((struct vop_getattr_args *));
+static int	procfs_inactive __P((struct vop_inactive_args *));
+static int	procfs_ioctl __P((struct vop_ioctl_args *));
+static int	procfs_lookup __P((struct vop_lookup_args *));
+static int	procfs_open __P((struct vop_open_args *));
+static int	procfs_print __P((struct vop_print_args *));
+static int	procfs_readdir __P((struct vop_readdir_args *));
+static int	procfs_readlink __P((struct vop_readlink_args *));
+static int	procfs_reclaim __P((struct vop_reclaim_args *));
+static int	procfs_setattr __P((struct vop_setattr_args *));
+
+/*
+ * This is a list of the valid names in the
+ * process-specific sub-directories.  It is
+ * used in procfs_lookup and procfs_readdir
+ */
+static struct proc_target {
+	u_char	pt_type;
+	u_char	pt_namlen;
+	char	*pt_name;
+	pfstype	pt_pfstype;
+	int	(*pt_valid) __P((struct proc *p));
+} proc_targets[] = {
+#define N(s) sizeof(s)-1, s
+	/*	  name		type		validp */
+	{ DT_DIR, N("."),	Pproc,		NULL },
+	{ DT_DIR, N(".."),	Proot,		NULL },
+	{ DT_REG, N("file"),	Pfile,		procfs_validfile },
+	{ DT_REG, N("mem"),	Pmem,		NULL },
+	{ DT_REG, N("regs"),	Pregs,		procfs_validregs },
+	{ DT_REG, N("fpregs"),	Pfpregs,	procfs_validfpregs },
+	{ DT_REG, N("ctl"),	Pctl,		NULL },
+	{ DT_REG, N("status"),	Pstatus,	NULL },
+	{ DT_REG, N("note"),	Pnote,		NULL },
+	{ DT_REG, N("notepg"),	Pnotepg,	NULL },
+	{ DT_REG, N("map"), 	Pmap,		procfs_validmap },
+	{ DT_REG, N("etype"),	Ptype,		procfs_validtype },
+	{ DT_REG, N("cmdline"),	Pcmdline,	NULL },
+#undef N
+};
+static const int nproc_targets = sizeof(proc_targets) / sizeof(proc_targets[0]);
+
+static pid_t atopid __P((const char *, u_int));
+
+/*
+ * set things up for doing i/o on
+ * the pfsnode (vp).  (vp) is locked
+ * on entry, and should be left locked
+ * on exit.
+ *
+ * for procfs we don't need to do anything
+ * in particular for i/o.  all that is done
+ * is to support exclusive open on process
+ * memory images.
+ */
+static int
+procfs_open(ap)
+	struct vop_open_args /* {
+		struct vnode *a_vp;
+		int  a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct pfsnode *pfs = VTOPFS(ap->a_vp);
+	struct proc *p1, *p2;
+
+	p2 = PFIND(pfs->pfs_pid);
+	if (p2 == NULL)
+		return (ENOENT);
+
+	switch (pfs->pfs_type) {
+	case Pmem:
+		if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) ||
+		    ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE)))
+			return (EBUSY);
+
+		p1 = ap->a_p;
+		if (!CHECKIO(p1, p2) &&
+		    !procfs_kmemaccess(p1))
+			return (EPERM);
+
+		if (ap->a_mode & FWRITE)
+			pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL);
+
+		return (0);
+
+	default:
+		break;
+	}
+
+	return (0);
+}
+
+/*
+ * close the pfsnode (vp) after doing i/o.
+ * (vp) is not locked on entry or exit.
+ *
+ * nothing to do for procfs other than undo
+ * any exclusive open flag (see _open above).
+ */
+static int
+procfs_close(ap)
+	struct vop_close_args /* {
+		struct vnode *a_vp;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct pfsnode *pfs = VTOPFS(ap->a_vp);
+	struct proc *p;
+
+	switch (pfs->pfs_type) {
+	case Pmem:
+		if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL))
+			pfs->pfs_flags &= ~(FWRITE|O_EXCL);
+		/*
+		 * This rather complicated-looking code is trying to
+		 * determine if this was the last close on this particular
+		 * vnode.  While one would expect v_usecount to be 1 at
+		 * that point, it seems that (according to John Dyson)
+		 * the VM system will bump up the usecount.  So:  if the
+		 * usecount is 2, and VOBJBUF is set, then this is really
+		 * the last close.  Otherwise, if the usecount is < 2
+		 * then it is definitely the last close.
+		 * If this is the last close, then it checks to see if
+		 * the target process has PF_LINGER set in p_pfsflags,
+		 * if this is *not* the case, then the process' stop flags
+		 * are cleared, and the process is woken up.  This is
+		 * to help prevent the case where a process has been
+		 * told to stop on an event, but then the requesting process
+		 * has gone away or forgotten about it.
+		 */
+		if ((ap->a_vp->v_usecount < 2)
+		    && (p = pfind(pfs->pfs_pid))
+		    && !(p->p_pfsflags & PF_LINGER)) {
+			p->p_stops = 0;
+			p->p_step = 0;
+			wakeup(&p->p_step);
+		}
+		break;
+	default:
+		break;
+	}
+
+	return (0);
+}
+
+/*
+ * do an ioctl operation on a pfsnode (vp).
+ * (vp) is not locked on entry or exit.
+ */
+static int
+procfs_ioctl(ap)
+	struct vop_ioctl_args *ap;
+{
+	struct pfsnode *pfs = VTOPFS(ap->a_vp);
+	struct proc *procp, *p;
+	int error;
+	int signo;
+	struct procfs_status *psp;
+	unsigned char flags;
+
+	p = ap->a_p;
+	procp = pfind(pfs->pfs_pid);
+	if (procp == NULL) {
+		return ENOTTY;
+	}
+
+	if (!CHECKIO(p, procp))
+		return EPERM;
+
+	switch (ap->a_command) {
+	case PIOCBIS:
+	  procp->p_stops |= *(unsigned int*)ap->a_data;
+	  break;
+	case PIOCBIC:
+	  procp->p_stops &= ~*(unsigned int*)ap->a_data;
+	  break;
+	case PIOCSFL:
+	  /*
+	   * NFLAGS is "non-suser flags" -- currently, only
+	   * PFS_ISUGID ("ignore set u/g id");
+	   */
+#define NFLAGS	(PF_ISUGID)
+	  flags = (unsigned char)*(unsigned int*)ap->a_data;
+	  if (flags & NFLAGS && (error = suser(p->p_ucred, &p->p_acflag)))
+	    return error;
+	  procp->p_pfsflags = flags;
+	  break;
+	case PIOCGFL:
+	  *(unsigned int*)ap->a_data = (unsigned int)procp->p_pfsflags;
+	case PIOCSTATUS:
+	  psp = (struct procfs_status *)ap->a_data;
+	  psp->state = (procp->p_step == 0);
+	  psp->flags = procp->p_pfsflags;
+	  psp->events = procp->p_stops;
+	  if (procp->p_step) {
+	    psp->why = procp->p_stype;
+	    psp->val = procp->p_xstat;
+	  } else {
+	    psp->why = psp->val = 0;	/* Not defined values */
+	  }
+	  break;
+	case PIOCWAIT:
+	  psp = (struct procfs_status *)ap->a_data;
+	  if (procp->p_step == 0) {
+	    error = tsleep(&procp->p_stype, PWAIT | PCATCH, "piocwait", 0);
+	    if (error)
+	      return error;
+	  }
+	  psp->state = 1;	/* It stopped */
+	  psp->flags = procp->p_pfsflags;
+	  psp->events = procp->p_stops;
+	  psp->why = procp->p_stype;	/* why it stopped */
+	  psp->val = procp->p_xstat;	/* any extra info */
+	  break;
+	case PIOCCONT:	/* Restart a proc */
+	  if (procp->p_step == 0)
+	    return EINVAL;	/* Can only start a stopped process */
+	  if ((signo = *(int*)ap->a_data) != 0) {
+	    if (signo >= NSIG || signo <= 0)
+	      return EINVAL;
+	    psignal(procp, signo);
+	  }
+	  procp->p_step = 0;
+	  wakeup(&procp->p_step);
+	  break;
+	default:
+	  return (ENOTTY);
+	}
+	return 0;
+}
+
+/*
+ * do block mapping for pfsnode (vp).
+ * since we don't use the buffer cache
+ * for procfs this function should never
+ * be called.  in any case, it's not clear
+ * what part of the kernel ever makes use
+ * of this function.  for sanity, this is the
+ * usual no-op bmap, although returning
+ * (EIO) would be a reasonable alternative.
+ */
+static int
+procfs_bmap(ap)
+	struct vop_bmap_args /* {
+		struct vnode *a_vp;
+		daddr_t  a_bn;
+		struct vnode **a_vpp;
+		daddr_t *a_bnp;
+		int *a_runp;
+	} */ *ap;
+{
+
+	if (ap->a_vpp != NULL)
+		*ap->a_vpp = ap->a_vp;
+	if (ap->a_bnp != NULL)
+		*ap->a_bnp = ap->a_bn;
+	if (ap->a_runp != NULL)
+		*ap->a_runp = 0;
+	return (0);
+}
+
+/*
+ * procfs_inactive is called when the pfsnode
+ * is vrele'd and the reference count goes
+ * to zero.  (vp) will be on the vnode free
+ * list, so to get it back vget() must be
+ * used.
+ *
+ * (vp) is locked on entry, but must be unlocked on exit.
+ */
+static int
+procfs_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+
+	VOP_UNLOCK(vp, 0, ap->a_p);
+
+	return (0);
+}
+
+/*
+ * _reclaim is called when getnewvnode()
+ * wants to make use of an entry on the vnode
+ * free list.  at this time the filesystem needs
+ * to free any private data and remove the node
+ * from any private lists.
+ */
+static int
+procfs_reclaim(ap)
+	struct vop_reclaim_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	return (procfs_freevp(ap->a_vp));
+}
+
+/*
+ * _print is used for debugging.
+ * just print a readable description
+ * of (vp).
+ */
+static int
+procfs_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	struct pfsnode *pfs = VTOPFS(ap->a_vp);
+
+	printf("tag VT_PROCFS, type %d, pid %ld, mode %x, flags %lx\n",
+	    pfs->pfs_type, (long)pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags);
+	return (0);
+}
+
+/*
+ * _abortop is called when operations such as
+ * rename and create fail.  this entry is responsible
+ * for undoing any side-effects caused by the lookup.
+ * this will always include freeing the pathname buffer.
+ */
+static int
+procfs_abortop(ap)
+	struct vop_abortop_args /* {
+		struct vnode *a_dvp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+
+	if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
+		zfree(namei_zone, ap->a_cnp->cn_pnbuf);
+	return (0);
+}
+
+/*
+ * generic entry point for unsupported operations
+ */
+static int
+procfs_badop()
+{
+
+	return (EIO);
+}
+
+/*
+ * Invent attributes for pfsnode (vp) and store
+ * them in (vap).
+ * Directories lengths are returned as zero since
+ * any real length would require the genuine size
+ * to be computed, and nothing cares anyway.
+ *
+ * this is relatively minimal for procfs.
+ */
+static int
+procfs_getattr(ap)
+	struct vop_getattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct pfsnode *pfs = VTOPFS(ap->a_vp);
+	struct vattr *vap = ap->a_vap;
+	struct proc *procp;
+	int error;
+
+	/*
+	 * First make sure that the process and its credentials 
+	 * still exist.
+	 */
+	switch (pfs->pfs_type) {
+	case Proot:
+	case Pcurproc:
+		procp = 0;
+		break;
+
+	default:
+		procp = PFIND(pfs->pfs_pid);
+		if (procp == 0 || procp->p_cred == NULL ||
+		    procp->p_ucred == NULL)
+			return (ENOENT);
+	}
+
+	error = 0;
+
+	/* start by zeroing out the attributes */
+	VATTR_NULL(vap);
+
+	/* next do all the common fields */
+	vap->va_type = ap->a_vp->v_type;
+	vap->va_mode = pfs->pfs_mode;
+	vap->va_fileid = pfs->pfs_fileno;
+	vap->va_flags = 0;
+	vap->va_blocksize = PAGE_SIZE;
+	vap->va_bytes = vap->va_size = 0;
+
+	/*
+	 * Make all times be current TOD.
+	 * It would be possible to get the process start
+	 * time from the p_stat structure, but there's
+	 * no "file creation" time stamp anyway, and the
+	 * p_stat structure is not addressible if u. gets
+	 * swapped out for that process.
+	 */
+	nanotime(&vap->va_ctime);
+	vap->va_atime = vap->va_mtime = vap->va_ctime;
+
+	/*
+	 * If the process has exercised some setuid or setgid
+	 * privilege, then rip away read/write permission so
+	 * that only root can gain access.
+	 */
+	switch (pfs->pfs_type) {
+	case Pctl:
+	case Pregs:
+	case Pfpregs:
+		if (procp->p_flag & P_SUGID)
+			vap->va_mode &= ~((VREAD|VWRITE)|
+					  ((VREAD|VWRITE)>>3)|
+					  ((VREAD|VWRITE)>>6));
+		break;
+	case Pmem:
+		/* Retain group kmem readablity. */
+		if (procp->p_flag & P_SUGID)
+			vap->va_mode &= ~(VREAD|VWRITE);
+		break;
+	default:
+		break;
+	}
+
+	/*
+	 * now do the object specific fields
+	 *
+	 * The size could be set from struct reg, but it's hardly
+	 * worth the trouble, and it puts some (potentially) machine
+	 * dependent data into this machine-independent code.  If it
+	 * becomes important then this function should break out into
+	 * a per-file stat function in the corresponding .c file.
+	 */
+
+	switch (pfs->pfs_type) {
+	case Proot:
+		/*
+		 * Set nlink to 1 to tell fts(3) we don't actually know.
+		 */
+		vap->va_nlink = 1;
+		vap->va_uid = 0;
+		vap->va_gid = 0;
+		vap->va_size = vap->va_bytes = DEV_BSIZE;
+		break;
+
+	case Pcurproc: {
+		char buf[16];		/* should be enough */
+		vap->va_nlink = 1;
+		vap->va_uid = 0;
+		vap->va_gid = 0;
+		vap->va_size = vap->va_bytes =
+		    snprintf(buf, sizeof(buf), "%ld", (long)curproc->p_pid);
+		break;
+	}
+
+	case Pproc:
+		vap->va_nlink = nproc_targets;
+		vap->va_uid = procp->p_ucred->cr_uid;
+		vap->va_gid = procp->p_ucred->cr_gid;
+		vap->va_size = vap->va_bytes = DEV_BSIZE;
+		break;
+
+	case Pfile:
+		error = EOPNOTSUPP;
+		break;
+
+	case Pmem:
+		vap->va_nlink = 1;
+		/*
+		 * If we denied owner access earlier, then we have to
+		 * change the owner to root - otherwise 'ps' and friends
+		 * will break even though they are setgid kmem. *SIGH*
+		 */
+		if (procp->p_flag & P_SUGID)
+			vap->va_uid = 0;
+		else
+			vap->va_uid = procp->p_ucred->cr_uid;
+		vap->va_gid = KMEM_GROUP;
+		break;
+
+	case Ptype:
+	case Pmap:
+	case Pregs:
+		vap->va_bytes = vap->va_size = sizeof(struct reg);
+		vap->va_nlink = 1;
+		vap->va_uid = procp->p_ucred->cr_uid;
+		vap->va_gid = procp->p_ucred->cr_gid;
+		break;
+
+	case Pfpregs:
+		vap->va_bytes = vap->va_size = sizeof(struct fpreg);
+
+	case Pctl:
+	case Pstatus:
+	case Pnote:
+	case Pnotepg:
+	case Pcmdline:
+		vap->va_nlink = 1;
+		vap->va_uid = procp->p_ucred->cr_uid;
+		vap->va_gid = procp->p_ucred->cr_gid;
+		break;
+
+	default:
+		panic("procfs_getattr");
+	}
+
+	return (error);
+}
+
+static int
+procfs_setattr(ap)
+	struct vop_setattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	if (ap->a_vap->va_flags != VNOVAL)
+		return (EOPNOTSUPP);
+
+	/*
+	 * just fake out attribute setting
+	 * it's not good to generate an error
+	 * return, otherwise things like creat()
+	 * will fail when they try to set the
+	 * file length to 0.  worse, this means
+	 * that echo $note > /proc/$pid/note will fail.
+	 */
+
+	return (0);
+}
+
+/*
+ * implement access checking.
+ *
+ * something very similar to this code is duplicated
+ * throughout the 4bsd kernel and should be moved
+ * into kern/vfs_subr.c sometime.
+ *
+ * actually, the check for super-user is slightly
+ * broken since it will allow read access to write-only
+ * objects.  this doesn't cause any particular trouble
+ * but does mean that the i/o entry points need to check
+ * that the operation really does make sense.
+ */
+static int
+procfs_access(ap)
+	struct vop_access_args /* {
+		struct vnode *a_vp;
+		int a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vattr *vap;
+	struct vattr vattr;
+	int error;
+
+	/*
+	 * If you're the super-user,
+	 * you always get access.
+	 */
+	if (ap->a_cred->cr_uid == 0)
+		return (0);
+
+	vap = &vattr;
+	error = VOP_GETATTR(ap->a_vp, vap, ap->a_cred, ap->a_p);
+	if (error)
+		return (error);
+
+	/*
+	 * Access check is based on only one of owner, group, public.
+	 * If not owner, then check group. If not a member of the
+	 * group, then check public access.
+	 */
+	if (ap->a_cred->cr_uid != vap->va_uid) {
+		gid_t *gp;
+		int i;
+
+		ap->a_mode >>= 3;
+		gp = ap->a_cred->cr_groups;
+		for (i = 0; i < ap->a_cred->cr_ngroups; i++, gp++)
+			if (vap->va_gid == *gp)
+				goto found;
+		ap->a_mode >>= 3;
+found:
+		;
+	}
+
+	if ((vap->va_mode & ap->a_mode) == ap->a_mode)
+		return (0);
+
+	return (EACCES);
+}
+
+/*
+ * lookup.  this is incredibly complicated in the
+ * general case, however for most pseudo-filesystems
+ * very little needs to be done.
+ *
+ * unless you want to get a migraine, just make sure your
+ * filesystem doesn't do any locking of its own.  otherwise
+ * read and inwardly digest ufs_lookup().
+ */
+static int
+procfs_lookup(ap)
+	struct vop_lookup_args /* {
+		struct vnode * a_dvp;
+		struct vnode ** a_vpp;
+		struct componentname * a_cnp;
+	} */ *ap;
+{
+	struct componentname *cnp = ap->a_cnp;
+	struct vnode **vpp = ap->a_vpp;
+	struct vnode *dvp = ap->a_dvp;
+	char *pname = cnp->cn_nameptr;
+	struct proc *curp = cnp->cn_proc;
+	struct proc_target *pt;
+	struct vnode *fvp;
+	pid_t pid;
+	struct pfsnode *pfs;
+	struct proc *p;
+	int i;
+
+	*vpp = NULL;
+
+	if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)
+		return (EROFS);
+
+	if (cnp->cn_namelen == 1 && *pname == '.') {
+		*vpp = dvp;
+		VREF(dvp);
+		/* vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, curp); */
+		return (0);
+	}
+
+	pfs = VTOPFS(dvp);
+	switch (pfs->pfs_type) {
+	case Proot:
+		if (cnp->cn_flags & ISDOTDOT)
+			return (EIO);
+
+		if (CNEQ(cnp, "curproc", 7))
+			return (procfs_allocvp(dvp->v_mount, vpp, 0, Pcurproc));
+
+		pid = atopid(pname, cnp->cn_namelen);
+		if (pid == NO_PID)
+			break;
+
+		p = PFIND(pid);
+		if (p == 0)
+			break;
+
+		return (procfs_allocvp(dvp->v_mount, vpp, pid, Pproc));
+
+	case Pproc:
+		if (cnp->cn_flags & ISDOTDOT)
+			return (procfs_root(dvp->v_mount, vpp));
+
+		p = PFIND(pfs->pfs_pid);
+		if (p == 0)
+			break;
+
+		for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) {
+			if (cnp->cn_namelen == pt->pt_namlen &&
+			    bcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 &&
+			    (pt->pt_valid == NULL || (*pt->pt_valid)(p)))
+				goto found;
+		}
+		break;
+
+	found:
+		if (pt->pt_pfstype == Pfile) {
+			fvp = procfs_findtextvp(p);
+			/* We already checked that it exists. */
+			VREF(fvp);
+			vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, curp);
+			*vpp = fvp;
+			return (0);
+		}
+
+		return (procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
+		    pt->pt_pfstype));
+
+	default:
+		return (ENOTDIR);
+	}
+
+	return (cnp->cn_nameiop == LOOKUP ? ENOENT : EROFS);
+}
+
+/*
+ * Does this process have a text file?
+ */
+int
+procfs_validfile(p)
+	struct proc *p;
+{
+
+	return (procfs_findtextvp(p) != NULLVP);
+}
+
+/*
+ * readdir returns directory entries from pfsnode (vp).
+ *
+ * the strategy here with procfs is to generate a single
+ * directory entry at a time (struct pfsdent) and then
+ * copy that out to userland using uiomove.  a more efficent
+ * though more complex implementation, would try to minimize
+ * the number of calls to uiomove().  for procfs, this is
+ * hardly worth the added code complexity.
+ *
+ * this should just be done through read()
+ */
+static int
+procfs_readdir(ap)
+	struct vop_readdir_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+		int *a_eofflag;
+		int *a_ncookies;
+		u_long **a_cookies;
+	} */ *ap;
+{
+	struct uio *uio = ap->a_uio;
+	struct pfsdent d;
+	struct pfsdent *dp = &d;
+	struct pfsnode *pfs;
+	int count, error, i, off;
+
+	pfs = VTOPFS(ap->a_vp);
+
+	off = (int)uio->uio_offset;
+	if (off != uio->uio_offset || off < 0 || (u_int)off % UIO_MX != 0 ||
+	    uio->uio_resid < UIO_MX)
+		return (EINVAL);
+
+	error = 0;
+	count = 0;
+	i = (u_int)off / UIO_MX;
+
+	switch (pfs->pfs_type) {
+	/*
+	 * this is for the process-specific sub-directories.
+	 * all that is needed to is copy out all the entries
+	 * from the procent[] table (top of this file).
+	 */
+	case Pproc: {
+		struct proc *p;
+		struct proc_target *pt;
+
+		p = PFIND(pfs->pfs_pid);
+		if (p == NULL)
+			break;
+
+		for (pt = &proc_targets[i];
+		     uio->uio_resid >= UIO_MX && i < nproc_targets; pt++, i++) {
+			if (pt->pt_valid && (*pt->pt_valid)(p) == 0)
+				continue;
+
+			dp->d_reclen = UIO_MX;
+			dp->d_fileno = PROCFS_FILENO(pfs->pfs_pid, pt->pt_pfstype);
+			dp->d_namlen = pt->pt_namlen;
+			bcopy(pt->pt_name, dp->d_name, pt->pt_namlen + 1);
+			dp->d_type = pt->pt_type;
+
+			if ((error = uiomove((caddr_t)dp, UIO_MX, uio)) != 0)
+				break;
+		}
+
+	    	break;
+	    }
+
+	/*
+	 * this is for the root of the procfs filesystem
+	 * what is needed is a special entry for "curproc"
+	 * followed by an entry for each process on allproc
+#ifdef PROCFS_ZOMBIE
+	 * and zombproc.
+#endif
+	 */
+
+	case Proot: {
+#ifdef PROCFS_ZOMBIE
+		int doingzomb = 0;
+#endif
+		int pcnt = 0;
+		volatile struct proc *p = allproc.lh_first;
+
+		for (; p && uio->uio_resid >= UIO_MX; i++, pcnt++) {
+			bzero((char *) dp, UIO_MX);
+			dp->d_reclen = UIO_MX;
+
+			switch (i) {
+			case 0:		/* `.' */
+			case 1:		/* `..' */
+				dp->d_fileno = PROCFS_FILENO(0, Proot);
+				dp->d_namlen = i + 1;
+				bcopy("..", dp->d_name, dp->d_namlen);
+				dp->d_name[i + 1] = '\0';
+				dp->d_type = DT_DIR;
+				break;
+
+			case 2:
+				dp->d_fileno = PROCFS_FILENO(0, Pcurproc);
+				dp->d_namlen = 7;
+				bcopy("curproc", dp->d_name, 8);
+				dp->d_type = DT_LNK;
+				break;
+
+			default:
+				while (pcnt < i) {
+					pcnt++;
+					p = p->p_list.le_next;
+					if (!p)
+						goto done;
+				}
+				dp->d_fileno = PROCFS_FILENO(p->p_pid, Pproc);
+				dp->d_namlen = sprintf(dp->d_name, "%ld",
+				    (long)p->p_pid);
+				dp->d_type = DT_REG;
+				p = p->p_list.le_next;
+				break;
+			}
+
+			if ((error = uiomove((caddr_t)dp, UIO_MX, uio)) != 0)
+				break;
+		}
+	done:
+
+#ifdef PROCFS_ZOMBIE
+		if (p == 0 && doingzomb == 0) {
+			doingzomb = 1;
+			p = zombproc.lh_first;
+			goto again;
+		}
+#endif
+
+		break;
+
+	    }
+
+	default:
+		error = ENOTDIR;
+		break;
+	}
+
+	uio->uio_offset = i * UIO_MX;
+
+	return (error);
+}
+
+/*
+ * readlink reads the link of `curproc'
+ */
+static int
+procfs_readlink(ap)
+	struct vop_readlink_args *ap;
+{
+	char buf[16];		/* should be enough */
+	int len;
+
+	if (VTOPFS(ap->a_vp)->pfs_fileno != PROCFS_FILENO(0, Pcurproc))
+		return (EINVAL);
+
+	len = snprintf(buf, sizeof(buf), "%ld", (long)curproc->p_pid);
+
+	return (uiomove((caddr_t)buf, len, ap->a_uio));
+}
+
+/*
+ * convert decimal ascii to pid_t
+ */
+static pid_t
+atopid(b, len)
+	const char *b;
+	u_int len;
+{
+	pid_t p = 0;
+
+	while (len--) {
+		char c = *b++;
+		if (c < '0' || c > '9')
+			return (NO_PID);
+		p = 10 * p + (c - '0');
+		if (p > PID_MAX)
+			return (NO_PID);
+	}
+
+	return (p);
+}
+
+/*
+ * procfs vnode operations.
+ */
+vop_t **procfs_vnodeop_p;
+static struct vnodeopv_entry_desc procfs_vnodeop_entries[] = {
+	{ &vop_default_desc,		(vop_t *) vop_defaultop },
+	{ &vop_abortop_desc,		(vop_t *) procfs_abortop },
+	{ &vop_access_desc,		(vop_t *) procfs_access },
+	{ &vop_advlock_desc,		(vop_t *) procfs_badop },
+	{ &vop_bmap_desc,		(vop_t *) procfs_bmap },
+	{ &vop_close_desc,		(vop_t *) procfs_close },
+	{ &vop_create_desc,		(vop_t *) procfs_badop },
+	{ &vop_getattr_desc,		(vop_t *) procfs_getattr },
+	{ &vop_inactive_desc,		(vop_t *) procfs_inactive },
+	{ &vop_link_desc,		(vop_t *) procfs_badop },
+	{ &vop_lookup_desc,		(vop_t *) procfs_lookup },
+	{ &vop_mkdir_desc,		(vop_t *) procfs_badop },
+	{ &vop_mknod_desc,		(vop_t *) procfs_badop },
+	{ &vop_open_desc,		(vop_t *) procfs_open },
+	{ &vop_pathconf_desc,		(vop_t *) vop_stdpathconf },
+	{ &vop_print_desc,		(vop_t *) procfs_print },
+	{ &vop_read_desc,		(vop_t *) procfs_rw },
+	{ &vop_readdir_desc,		(vop_t *) procfs_readdir },
+	{ &vop_readlink_desc,		(vop_t *) procfs_readlink },
+	{ &vop_reclaim_desc,		(vop_t *) procfs_reclaim },
+	{ &vop_remove_desc,		(vop_t *) procfs_badop },
+	{ &vop_rename_desc,		(vop_t *) procfs_badop },
+	{ &vop_rmdir_desc,		(vop_t *) procfs_badop },
+	{ &vop_setattr_desc,		(vop_t *) procfs_setattr },
+	{ &vop_symlink_desc,		(vop_t *) procfs_badop },
+	{ &vop_write_desc,		(vop_t *) procfs_rw },
+	{ &vop_ioctl_desc,		(vop_t *) procfs_ioctl },
+	{ NULL, NULL }
+};
+static struct vnodeopv_desc procfs_vnodeop_opv_desc =
+	{ &procfs_vnodeop_p, procfs_vnodeop_entries };
+
+VNODEOP_SET(procfs_vnodeop_opv_desc);
diff --git a/sys/fs/specfs/spec_vnops.c b/sys/fs/specfs/spec_vnops.c
new file mode 100644
index 0000000..88290e4
--- /dev/null
+++ b/sys/fs/specfs/spec_vnops.c
@@ -0,0 +1,938 @@
+/*
+ * Copyright (c) 1989, 1993, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)spec_vnops.c	8.14 (Berkeley) 5/21/95
+ * $Id: spec_vnops.c,v 1.79 1999/01/21 08:29:07 dillon Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/conf.h>
+#include <sys/buf.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/stat.h>
+#include <sys/fcntl.h>
+#include <sys/disklabel.h>
+#include <sys/vmmeter.h>
+
+#include <vm/vm.h>
+#include <vm/vm_prot.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+#include <vm/vnode_pager.h>
+#include <vm/vm_extern.h>
+
+#include <miscfs/specfs/specdev.h>
+
+static int	spec_advlock __P((struct vop_advlock_args *));  
+static int	spec_badop __P((void));
+static int	spec_bmap __P((struct vop_bmap_args *));
+static int	spec_close __P((struct vop_close_args *));
+static int	spec_freeblks __P((struct vop_freeblks_args *));
+static int	spec_fsync __P((struct  vop_fsync_args *));
+static int	spec_getattr __P((struct  vop_getattr_args *));
+static int	spec_getpages __P((struct vop_getpages_args *));
+static int	spec_inactive __P((struct  vop_inactive_args *));
+static int	spec_ioctl __P((struct vop_ioctl_args *));
+static int	spec_lookup __P((struct vop_lookup_args *));
+static int	spec_open __P((struct vop_open_args *));
+static int	spec_poll __P((struct vop_poll_args *));
+static int	spec_print __P((struct vop_print_args *));
+static int	spec_read __P((struct vop_read_args *));  
+static int	spec_strategy __P((struct vop_strategy_args *));
+static int	spec_write __P((struct vop_write_args *));
+
+struct vnode *speclisth[SPECHSZ];
+vop_t **spec_vnodeop_p;
+static struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
+	{ &vop_default_desc,		(vop_t *) vop_defaultop },
+	{ &vop_access_desc,		(vop_t *) vop_ebadf },
+	{ &vop_advlock_desc,		(vop_t *) spec_advlock },
+	{ &vop_bmap_desc,		(vop_t *) spec_bmap },
+	{ &vop_close_desc,		(vop_t *) spec_close },
+	{ &vop_create_desc,		(vop_t *) spec_badop },
+	{ &vop_freeblks_desc,		(vop_t *) spec_freeblks },
+	{ &vop_fsync_desc,		(vop_t *) spec_fsync },
+	{ &vop_getattr_desc,		(vop_t *) spec_getattr },
+	{ &vop_getpages_desc,		(vop_t *) spec_getpages },
+	{ &vop_inactive_desc,		(vop_t *) spec_inactive },
+	{ &vop_ioctl_desc,		(vop_t *) spec_ioctl },
+	{ &vop_lease_desc,		(vop_t *) vop_null },
+	{ &vop_link_desc,		(vop_t *) spec_badop },
+	{ &vop_lookup_desc,		(vop_t *) spec_lookup },
+	{ &vop_mkdir_desc,		(vop_t *) spec_badop },
+	{ &vop_mknod_desc,		(vop_t *) spec_badop },
+	{ &vop_open_desc,		(vop_t *) spec_open },
+	{ &vop_pathconf_desc,		(vop_t *) vop_stdpathconf },
+	{ &vop_poll_desc,		(vop_t *) spec_poll },
+	{ &vop_print_desc,		(vop_t *) spec_print },
+	{ &vop_read_desc,		(vop_t *) spec_read },
+	{ &vop_readdir_desc,		(vop_t *) spec_badop },
+	{ &vop_readlink_desc,		(vop_t *) spec_badop },
+	{ &vop_reallocblks_desc,	(vop_t *) spec_badop },
+	{ &vop_reclaim_desc,		(vop_t *) vop_null },
+	{ &vop_remove_desc,		(vop_t *) spec_badop },
+	{ &vop_rename_desc,		(vop_t *) spec_badop },
+	{ &vop_rmdir_desc,		(vop_t *) spec_badop },
+	{ &vop_setattr_desc,		(vop_t *) vop_ebadf },
+	{ &vop_strategy_desc,		(vop_t *) spec_strategy },
+	{ &vop_symlink_desc,		(vop_t *) spec_badop },
+	{ &vop_write_desc,		(vop_t *) spec_write },
+	{ NULL, NULL }
+};
+static struct vnodeopv_desc spec_vnodeop_opv_desc =
+	{ &spec_vnodeop_p, spec_vnodeop_entries };
+
+VNODEOP_SET(spec_vnodeop_opv_desc);
+
+
+int
+spec_vnoperate(ap)
+	struct vop_generic_args /* {
+		struct vnodeop_desc *a_desc;
+		<other random data follows, presumably>
+	} */ *ap;
+{
+	return (VOCALL(spec_vnodeop_p, ap->a_desc->vdesc_offset, ap));
+}
+
+static void spec_getpages_iodone __P((struct buf *bp));
+
+/*
+ * Trivial lookup routine that always fails.
+ */
+static int
+spec_lookup(ap)
+	struct vop_lookup_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+
+	*ap->a_vpp = NULL;
+	return (ENOTDIR);
+}
+
+/*
+ * Open a special file.
+ */
+/* ARGSUSED */
+static int
+spec_open(ap)
+	struct vop_open_args /* {
+		struct vnode *a_vp;
+		int  a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct proc *p = ap->a_p;
+	struct vnode *bvp, *vp = ap->a_vp;
+	dev_t bdev, dev = (dev_t)vp->v_rdev;
+	int maj = major(dev);
+	int error;
+
+	/*
+	 * Don't allow open if fs is mounted -nodev.
+	 */
+	if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV))
+		return (ENXIO);
+
+	switch (vp->v_type) {
+
+	case VCHR:
+		if ((u_int)maj >= nchrdev)
+			return (ENXIO);
+		if ( (cdevsw[maj] == NULL) || (cdevsw[maj]->d_open == NULL))
+			return ENXIO;
+		if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) {
+			/*
+			 * When running in very secure mode, do not allow
+			 * opens for writing of any disk character devices.
+			 */
+			if (securelevel >= 2
+			    && cdevsw[maj]->d_bmaj != -1
+			    && (cdevsw[maj]->d_flags & D_TYPEMASK) == D_DISK)
+				return (EPERM);
+			/*
+			 * When running in secure mode, do not allow opens
+			 * for writing of /dev/mem, /dev/kmem, or character
+			 * devices whose corresponding block devices are
+			 * currently mounted.
+			 */
+			if (securelevel >= 1) {
+				if ((bdev = chrtoblk(dev)) != NODEV &&
+				    vfinddev(bdev, VBLK, &bvp) &&
+				    bvp->v_usecount > 0 &&
+				    (error = vfs_mountedon(bvp)))
+					return (error);
+				if (iskmemdev(dev))
+					return (EPERM);
+			}
+		}
+		if ((cdevsw[maj]->d_flags & D_TYPEMASK) == D_TTY)
+			vp->v_flag |= VISTTY;
+		VOP_UNLOCK(vp, 0, p);
+		error = (*cdevsw[maj]->d_open)(dev, ap->a_mode, S_IFCHR, p);
+		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+		return (error);
+		/* NOT REACHED */
+	case VBLK:
+		if ((u_int)maj >= nblkdev)
+			return (ENXIO);
+		if ( (bdevsw[maj] == NULL) || (bdevsw[maj]->d_open == NULL))
+			return ENXIO;
+		/*
+		 * When running in very secure mode, do not allow
+		 * opens for writing of any disk block devices.
+		 */
+		if (securelevel >= 2 && ap->a_cred != FSCRED &&
+		    (ap->a_mode & FWRITE) &&
+		    (bdevsw[maj]->d_flags & D_TYPEMASK) == D_DISK)
+			return (EPERM);
+
+		/*
+		 * Do not allow opens of block devices that are
+		 * currently mounted.
+		 */
+		error = vfs_mountedon(vp);
+		if (error)
+			return (error);
+		return ((*bdevsw[maj]->d_open)(dev, ap->a_mode, S_IFBLK, p));
+		/* NOT REACHED */
+	default:
+		break;
+	}
+	return (0);
+}
+
+/*
+ * Vnode op for read
+ */
+/* ARGSUSED */
+static int
+spec_read(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct uio *uio = ap->a_uio;
+ 	struct proc *p = uio->uio_procp;
+	struct buf *bp;
+	daddr_t bn, nextbn;
+	long bsize, bscale;
+	struct partinfo dpart;
+	int n, on;
+	d_ioctl_t *ioctl;
+	int error = 0;
+	dev_t dev;
+
+#ifdef DIAGNOSTIC
+	if (uio->uio_rw != UIO_READ)
+		panic("spec_read mode");
+	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
+		panic("spec_read proc");
+#endif
+	if (uio->uio_resid == 0)
+		return (0);
+
+	switch (vp->v_type) {
+
+	case VCHR:
+		VOP_UNLOCK(vp, 0, p);
+		error = (*cdevsw[major(vp->v_rdev)]->d_read)
+			(vp->v_rdev, uio, ap->a_ioflag);
+		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+		return (error);
+
+	case VBLK:
+		if (uio->uio_offset < 0)
+			return (EINVAL);
+		bsize = BLKDEV_IOSIZE;
+		dev = vp->v_rdev;
+		if ((ioctl = bdevsw[major(dev)]->d_ioctl) != NULL &&
+		    (*ioctl)(dev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0 &&
+		    dpart.part->p_fstype == FS_BSDFFS &&
+		    dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
+			bsize = dpart.part->p_frag * dpart.part->p_fsize;
+		bscale = btodb(bsize);
+		do {
+			bn = btodb(uio->uio_offset) & ~(bscale - 1);
+			on = uio->uio_offset % bsize;
+			n = min((unsigned)(bsize - on), uio->uio_resid);
+			if (vp->v_lastr + bscale == bn) {
+				nextbn = bn + bscale;
+				error = breadn(vp, bn, (int)bsize, &nextbn,
+					(int *)&bsize, 1, NOCRED, &bp);
+			} else
+				error = bread(vp, bn, (int)bsize, NOCRED, &bp);
+			vp->v_lastr = bn;
+			n = min(n, bsize - bp->b_resid);
+			if (error) {
+				brelse(bp);
+				return (error);
+			}
+			error = uiomove((char *)bp->b_data + on, n, uio);
+			brelse(bp);
+		} while (error == 0 && uio->uio_resid > 0 && n != 0);
+		return (error);
+
+	default:
+		panic("spec_read type");
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * Vnode op for write
+ */
+/* ARGSUSED */
+static int
+spec_write(ap)
+	struct vop_write_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct uio *uio = ap->a_uio;
+	struct proc *p = uio->uio_procp;
+	struct buf *bp;
+	daddr_t bn;
+	int bsize, blkmask;
+	struct partinfo dpart;
+	register int n, on;
+	int error = 0;
+
+#ifdef DIAGNOSTIC
+	if (uio->uio_rw != UIO_WRITE)
+		panic("spec_write mode");
+	if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
+		panic("spec_write proc");
+#endif
+
+	switch (vp->v_type) {
+
+	case VCHR:
+		VOP_UNLOCK(vp, 0, p);
+		error = (*cdevsw[major(vp->v_rdev)]->d_write)
+			(vp->v_rdev, uio, ap->a_ioflag);
+		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+		return (error);
+
+	case VBLK:
+		if (uio->uio_resid == 0)
+			return (0);
+		if (uio->uio_offset < 0)
+			return (EINVAL);
+		bsize = BLKDEV_IOSIZE;
+		if ((*bdevsw[major(vp->v_rdev)]->d_ioctl)(vp->v_rdev, DIOCGPART,
+		    (caddr_t)&dpart, FREAD, p) == 0) {
+			if (dpart.part->p_fstype == FS_BSDFFS &&
+			    dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
+				bsize = dpart.part->p_frag *
+				    dpart.part->p_fsize;
+		}
+		blkmask = btodb(bsize) - 1;
+		do {
+			bn = btodb(uio->uio_offset) & ~blkmask;
+			on = uio->uio_offset % bsize;
+			n = min((unsigned)(bsize - on), uio->uio_resid);
+			if (n == bsize)
+				bp = getblk(vp, bn, bsize, 0, 0);
+			else
+				error = bread(vp, bn, bsize, NOCRED, &bp);
+			if (error) {
+				brelse(bp);
+				return (error);
+			}
+			n = min(n, bsize - bp->b_resid);
+			error = uiomove((char *)bp->b_data + on, n, uio);
+			if (n + on == bsize)
+				bawrite(bp);
+			else
+				bdwrite(bp);
+		} while (error == 0 && uio->uio_resid > 0 && n != 0);
+		return (error);
+
+	default:
+		panic("spec_write type");
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * Device ioctl operation.
+ */
+/* ARGSUSED */
+static int
+spec_ioctl(ap)
+	struct vop_ioctl_args /* {
+		struct vnode *a_vp;
+		int  a_command;
+		caddr_t  a_data;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	dev_t dev = ap->a_vp->v_rdev;
+
+	switch (ap->a_vp->v_type) {
+
+	case VCHR:
+		return ((*cdevsw[major(dev)]->d_ioctl)(dev, ap->a_command, 
+		    ap->a_data, ap->a_fflag, ap->a_p));
+	case VBLK:
+		return ((*bdevsw[major(dev)]->d_ioctl)(dev, ap->a_command, 
+		    ap->a_data, ap->a_fflag, ap->a_p));
+	default:
+		panic("spec_ioctl");
+		/* NOTREACHED */
+	}
+}
+
+/* ARGSUSED */
+static int
+spec_poll(ap)
+	struct vop_poll_args /* {
+		struct vnode *a_vp;
+		int  a_events;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register dev_t dev;
+
+	switch (ap->a_vp->v_type) {
+
+	case VCHR:
+		dev = ap->a_vp->v_rdev;
+		return (*cdevsw[major(dev)]->d_poll)(dev, ap->a_events, ap->a_p);
+	default:
+		return (vop_defaultop((struct vop_generic_args *)ap));
+
+	}
+}
+/*
+ * Synch buffers associated with a block device
+ */
+/* ARGSUSED */
+static int
+spec_fsync(ap)
+	struct vop_fsync_args /* {
+		struct vnode *a_vp;
+		struct ucred *a_cred;
+		int  a_waitfor;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct buf *bp;
+	struct buf *nbp;
+	int s;
+
+	if (vp->v_type == VCHR)
+		return (0);
+	/*
+	 * Flush all dirty buffers associated with a block device.
+	 */
+loop:
+	s = splbio();
+	for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
+		nbp = TAILQ_NEXT(bp, b_vnbufs);
+		if ((bp->b_flags & B_BUSY))
+			continue;
+		if ((bp->b_flags & B_DELWRI) == 0)
+			panic("spec_fsync: not dirty");
+		if ((vp->v_flag & VOBJBUF) && (bp->b_flags & B_CLUSTEROK)) {
+			vfs_bio_awrite(bp);
+			splx(s);
+		} else {
+			bremfree(bp);
+			bp->b_flags |= B_BUSY;
+			splx(s);
+			bawrite(bp);
+		}
+		goto loop;
+	}
+	if (ap->a_waitfor == MNT_WAIT) {
+		while (vp->v_numoutput) {
+			vp->v_flag |= VBWAIT;
+			(void) tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "spfsyn", 0);
+		}
+#ifdef DIAGNOSTIC
+		if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
+			vprint("spec_fsync: dirty", vp);
+			splx(s);
+			goto loop;
+		}
+#endif
+	}
+	splx(s);
+	return (0);
+}
+
+static int
+spec_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	VOP_UNLOCK(ap->a_vp, 0, ap->a_p);
+	return (0);
+}
+
+/*
+ * Just call the device strategy routine
+ */
+static int
+spec_strategy(ap)
+	struct vop_strategy_args /* {
+		struct buf *a_bp;
+	} */ *ap;
+{
+	struct buf *bp;
+
+	bp = ap->a_bp;
+	if (((bp->b_flags & B_READ) == 0) &&
+		(LIST_FIRST(&bp->b_dep)) != NULL && bioops.io_start)
+		(*bioops.io_start)(bp);
+	(*bdevsw[major(bp->b_dev)]->d_strategy)(bp);
+	return (0);
+}
+
+static int
+spec_freeblks(ap)
+	struct vop_freeblks_args /* {
+		struct vnode *a_vp;
+		daddr_t a_addr;
+		daddr_t a_length;
+	} */ *ap;
+{
+	struct cdevsw *bsw;
+	struct buf *bp;
+
+	bsw = bdevsw[major(ap->a_vp->v_rdev)];
+	if ((bsw->d_flags & D_CANFREE) == 0)
+		return (0);
+	bp = geteblk(ap->a_length);
+	bp->b_flags |= B_FREEBUF | B_BUSY;
+	bp->b_dev = ap->a_vp->v_rdev;
+	bp->b_blkno = ap->a_addr;
+	bp->b_offset = dbtob(ap->a_addr);
+	bp->b_bcount = ap->a_length;
+	(*bsw->d_strategy)(bp);
+	return (0);
+}
+
+/*
+ * This is a noop, simply returning what one has been given.
+ */
+static int
+spec_bmap(ap)
+	struct vop_bmap_args /* {
+		struct vnode *a_vp;
+		daddr_t  a_bn;
+		struct vnode **a_vpp;
+		daddr_t *a_bnp;
+		int *a_runp;
+		int *a_runb;
+	} */ *ap;
+{
+
+	if (ap->a_vpp != NULL)
+		*ap->a_vpp = ap->a_vp;
+	if (ap->a_bnp != NULL)
+		*ap->a_bnp = ap->a_bn;
+	if (ap->a_runp != NULL)
+		*ap->a_runp = 0;
+	if (ap->a_runb != NULL)
+		*ap->a_runb = 0;
+	return (0);
+}
+
+/*
+ * Device close routine
+ */
+/* ARGSUSED */
+static int
+spec_close(ap)
+	struct vop_close_args /* {
+		struct vnode *a_vp;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	dev_t dev = vp->v_rdev;
+	d_close_t *devclose;
+	int mode, error;
+
+	switch (vp->v_type) {
+
+	case VCHR:
+		/*
+		 * Hack: a tty device that is a controlling terminal
+		 * has a reference from the session structure.
+		 * We cannot easily tell that a character device is
+		 * a controlling terminal, unless it is the closing
+		 * process' controlling terminal.  In that case,
+		 * if the reference count is 2 (this last descriptor
+		 * plus the session), release the reference from the session.
+		 */
+		if (vcount(vp) == 2 && ap->a_p &&
+		    (vp->v_flag & VXLOCK) == 0 &&
+		    vp == ap->a_p->p_session->s_ttyvp) {
+			vrele(vp);
+			ap->a_p->p_session->s_ttyvp = NULL;
+		}
+		/*
+		 * If the vnode is locked, then we are in the midst
+		 * of forcably closing the device, otherwise we only
+		 * close on last reference.
+		 */
+		if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0)
+			return (0);
+		devclose = cdevsw[major(dev)]->d_close;
+		mode = S_IFCHR;
+		break;
+
+	case VBLK:
+		/*
+		 * On last close of a block device (that isn't mounted)
+		 * we must invalidate any in core blocks, so that
+		 * we can, for instance, change floppy disks.
+		 */
+		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p);
+		error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0);
+		VOP_UNLOCK(vp, 0, ap->a_p);
+		if (error)
+			return (error);
+
+		/*
+		 * We do not want to really close the device if it
+		 * is still in use unless we are trying to close it
+		 * forcibly. Since every use (buffer, vnode, swap, cmap)
+		 * holds a reference to the vnode, and because we mark
+		 * any other vnodes that alias this device, when the
+		 * sum of the reference counts on all the aliased
+		 * vnodes descends to one, we are on last close.
+		 */
+		if ((vcount(vp) > 1) && (vp->v_flag & VXLOCK) == 0)
+			return (0);
+
+		devclose = bdevsw[major(dev)]->d_close;
+		mode = S_IFBLK;
+		break;
+
+	default:
+		panic("spec_close: not special");
+	}
+
+	return ((*devclose)(dev, ap->a_fflag, mode, ap->a_p));
+}
+
+/*
+ * Print out the contents of a special device vnode.
+ */
+static int
+spec_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev),
+		minor(ap->a_vp->v_rdev));
+	return (0);
+}
+
+/*
+ * Special device advisory byte-level locks.
+ */
+/* ARGSUSED */
+static int
+spec_advlock(ap)
+	struct vop_advlock_args /* {
+		struct vnode *a_vp;
+		caddr_t  a_id;
+		int  a_op;
+		struct flock *a_fl;
+		int  a_flags;
+	} */ *ap;
+{
+
+	return (ap->a_flags & F_FLOCK ? EOPNOTSUPP : EINVAL);
+}
+
+/*
+ * Special device bad operation
+ */
+static int
+spec_badop()
+{
+
+	panic("spec_badop called");
+	/* NOTREACHED */
+}
+
+static void
+spec_getpages_iodone(bp)
+	struct buf *bp;
+{
+
+	bp->b_flags |= B_DONE;
+	wakeup(bp);
+}
+
+static int
+spec_getpages(ap)
+	struct vop_getpages_args *ap;
+{
+	vm_offset_t kva;
+	int error;
+	int i, pcount, size, s;
+	daddr_t blkno;
+	struct buf *bp;
+	vm_page_t m;
+	vm_ooffset_t offset;
+	int toff, nextoff, nread;
+	struct vnode *vp = ap->a_vp;
+	int blksiz;
+	int gotreqpage;
+
+	error = 0;
+	pcount = round_page(ap->a_count) / PAGE_SIZE;
+
+	/*
+	 * Calculate the offset of the transfer.
+	 */
+	offset = IDX_TO_OFF(ap->a_m[0]->pindex) + ap->a_offset;
+
+	/* XXX sanity check before we go into details. */
+	/* XXX limits should be defined elsewhere. */
+#define	DADDR_T_BIT	32
+#define	OFFSET_MAX	((1LL << (DADDR_T_BIT + DEV_BSHIFT)) - 1)
+	if (offset < 0 || offset > OFFSET_MAX) {
+		/* XXX still no %q in kernel. */
+		printf("spec_getpages: preposterous offset 0x%x%08x\n",
+		       (u_int)((u_quad_t)offset >> 32),
+		       (u_int)(offset & 0xffffffff));
+		return (VM_PAGER_ERROR);
+	}
+
+	blkno = btodb(offset);
+
+	/*
+	 * Round up physical size for real devices, use the
+	 * fundamental blocksize of the fs if possible.
+	 */
+	if (vp && vp->v_mount) {
+		if (vp->v_type != VBLK) {
+			vprint("Non VBLK", vp);
+		}
+		blksiz = vp->v_mount->mnt_stat.f_bsize;
+		if (blksiz < DEV_BSIZE) {
+			blksiz = DEV_BSIZE;
+		}
+	}
+	else
+		blksiz = DEV_BSIZE;
+	size = (ap->a_count + blksiz - 1) & ~(blksiz - 1);
+
+	bp = getpbuf(NULL);
+	kva = (vm_offset_t)bp->b_data;
+
+	/*
+	 * Map the pages to be read into the kva.
+	 */
+	pmap_qenter(kva, ap->a_m, pcount);
+
+	/* Build a minimal buffer header. */
+	bp->b_flags = B_BUSY | B_READ | B_CALL;
+	bp->b_iodone = spec_getpages_iodone;
+
+	/* B_PHYS is not set, but it is nice to fill this in. */
+	bp->b_proc = curproc;
+	bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
+	if (bp->b_rcred != NOCRED)
+		crhold(bp->b_rcred);
+	if (bp->b_wcred != NOCRED)
+		crhold(bp->b_wcred);
+	bp->b_blkno = blkno;
+	bp->b_lblkno = blkno;
+	pbgetvp(ap->a_vp, bp);
+	bp->b_bcount = size;
+	bp->b_bufsize = size;
+	bp->b_resid = 0;
+
+	cnt.v_vnodein++;
+	cnt.v_vnodepgsin += pcount;
+
+	/* Do the input. */
+	VOP_STRATEGY(bp->b_vp, bp);
+
+	s = splbio();
+
+	/* We definitely need to be at splbio here. */
+	while ((bp->b_flags & B_DONE) == 0)
+		tsleep(bp, PVM, "spread", 0);
+
+	splx(s);
+
+	if ((bp->b_flags & B_ERROR) != 0) {
+		if (bp->b_error)
+			error = bp->b_error;
+		else
+			error = EIO;
+	}
+
+	nread = size - bp->b_resid;
+
+	if (nread < ap->a_count) {
+		bzero((caddr_t)kva + nread,
+			ap->a_count - nread);
+	}
+	pmap_qremove(kva, pcount);
+
+
+	gotreqpage = 0;
+	for (i = 0, toff = 0; i < pcount; i++, toff = nextoff) {
+		nextoff = toff + PAGE_SIZE;
+		m = ap->a_m[i];
+
+		m->flags &= ~PG_ZERO;
+
+		if (nextoff <= nread) {
+			m->valid = VM_PAGE_BITS_ALL;
+			m->dirty = 0;
+		} else if (toff < nread) {
+			int nvalid = ((nread + DEV_BSIZE - 1) - toff) & ~(DEV_BSIZE - 1);
+			vm_page_set_validclean(m, 0, nvalid);
+		} else {
+			m->valid = 0;
+			m->dirty = 0;
+		}
+
+		if (i != ap->a_reqpage) {
+			/*
+			 * Just in case someone was asking for this page we
+			 * now tell them that it is ok to use.
+			 */
+			if (!error || (m->valid == VM_PAGE_BITS_ALL)) {
+				if (m->valid) {
+					if (m->flags & PG_WANTED) {
+						vm_page_activate(m);
+					} else {
+						vm_page_deactivate(m);
+					}
+					vm_page_wakeup(m);
+				} else {
+					vm_page_free(m);
+				}
+			} else {
+				vm_page_free(m);
+			}
+		} else if (m->valid) {
+			gotreqpage = 1;
+		}
+	}
+	if (!gotreqpage) {
+		m = ap->a_m[ap->a_reqpage];
+#ifndef MAX_PERF
+		printf(
+	    "spec_getpages: I/O read failure: (error code=%d)\n",
+		    error);
+		printf(
+	    "               size: %d, resid: %ld, a_count: %d, valid: 0x%x\n",
+		    size, bp->b_resid, ap->a_count, m->valid);
+		printf(
+	    "               nread: %d, reqpage: %d, pindex: %lu, pcount: %d\n",
+		    nread, ap->a_reqpage, (u_long)m->pindex, pcount);
+#endif
+		/*
+		 * Free the buffer header back to the swap buffer pool.
+		 */
+		relpbuf(bp, NULL);
+		return VM_PAGER_ERROR;
+	}
+	/*
+	 * Free the buffer header back to the swap buffer pool.
+	 */
+	relpbuf(bp, NULL);
+	return VM_PAGER_OK;
+}
+
+/* ARGSUSED */
+static int
+spec_getattr(ap)
+	struct vop_getattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *vp = ap->a_vp;
+	register struct vattr *vap = ap->a_vap;
+	struct partinfo dpart;
+
+	bzero(vap, sizeof (*vap));
+
+	if (vp->v_type == VBLK)
+		vap->va_blocksize = BLKDEV_IOSIZE;
+	else if (vp->v_type == VCHR)
+		vap->va_blocksize = MAXBSIZE;
+
+	if ((*bdevsw[major(vp->v_rdev)]->d_ioctl)(vp->v_rdev, DIOCGPART,
+	    (caddr_t)&dpart, FREAD, ap->a_p) == 0) {
+		vap->va_bytes = dbtob(dpart.disklab->d_partitions
+				      [minor(vp->v_rdev)].p_size);
+		vap->va_size = vap->va_bytes;
+	}
+	return (0);
+}
diff --git a/sys/fs/umapfs/umap.h b/sys/fs/umapfs/umap.h
new file mode 100644
index 0000000..0c6ca34
--- /dev/null
+++ b/sys/fs/umapfs/umap.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * the UCLA Ficus project.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)umap.h	8.4 (Berkeley) 8/20/94
+ *
+ * $Id: umap.h,v 1.8 1997/02/22 09:40:37 peter Exp $
+ */
+
+#define MAPFILEENTRIES 64
+#define GMAPFILEENTRIES 16
+#define NOBODY 32767
+#define NULLGROUP 65534
+
+struct umap_args {
+	char		*target;	/* Target of loopback  */
+	int 		nentries;       /* # of entries in user map array */
+	int 		gnentries;	/* # of entries in group map array */
+	u_long 		(*mapdata)[2];	/* pointer to array of user mappings */
+	u_long 		(*gmapdata)[2];	/* pointer to array of group mappings */
+};
+
+struct umap_mount {
+	struct mount	*umapm_vfs;
+	struct vnode	*umapm_rootvp;	/* Reference to root umap_node */
+	int             info_nentries;  /* number of uid mappings */
+	int		info_gnentries;	/* number of gid mappings */
+	u_long		info_mapdata[MAPFILEENTRIES][2]; /* mapping data for
+	    user mapping in ficus */
+	u_long		info_gmapdata[GMAPFILEENTRIES][2]; /*mapping data for
+	    group mapping in ficus */
+};
+
+#ifdef KERNEL
+/*
+ * A cache of vnode references
+ */
+struct umap_node {
+	LIST_ENTRY(umap_node) umap_hash;	/* Hash list */
+	struct vnode	*umap_lowervp;	/* Aliased vnode - VREFed once */
+	struct vnode	*umap_vnode;	/* Back pointer to vnode/umap_node */
+};
+
+extern int umapfs_init __P((struct vfsconf *vfsp));
+extern int umap_node_create __P((struct mount *mp, struct vnode *target, struct vnode **vpp));
+extern u_long umap_reverse_findid __P((u_long id, u_long map[][2], int nentries));
+extern void umap_mapids __P((struct mount *v_mount, struct ucred *credp));
+
+#define	MOUNTTOUMAPMOUNT(mp) ((struct umap_mount *)((mp)->mnt_data))
+#define	VTOUMAP(vp) ((struct umap_node *)(vp)->v_data)
+#define UMAPTOV(xp) ((xp)->umap_vnode)
+#ifdef UMAPFS_DIAGNOSTIC
+extern struct vnode *umap_checkvp __P((struct vnode *vp, char *fil, int lno));
+#define	UMAPVPTOLOWERVP(vp) umap_checkvp((vp), __FILE__, __LINE__)
+#else
+#define	UMAPVPTOLOWERVP(vp) (VTOUMAP(vp)->umap_lowervp)
+#endif
+
+extern vop_t **umap_vnodeop_p;
+#endif /* KERNEL */
diff --git a/sys/fs/umapfs/umap_subr.c b/sys/fs/umapfs/umap_subr.c
new file mode 100644
index 0000000..4974f03
--- /dev/null
+++ b/sys/fs/umapfs/umap_subr.c
@@ -0,0 +1,402 @@
+/*
+ * Copyright (c) 1992, 1993, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)umap_subr.c	8.9 (Berkeley) 5/14/95
+ *
+ * $Id: umap_subr.c,v 1.15 1998/11/09 09:21:25 peter Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/malloc.h>
+#include <miscfs/umapfs/umap.h>
+
+#define LOG2_SIZEVNODE 7		/* log2(sizeof struct vnode) */
+#define	NUMAPNODECACHE 16
+
+/*
+ * Null layer cache:
+ * Each cache entry holds a reference to the target vnode
+ * along with a pointer to the alias vnode.  When an
+ * entry is added the target vnode is VREF'd.  When the
+ * alias is removed the target vnode is vrele'd.
+ */
+
+#define	UMAP_NHASH(vp) \
+	(&umap_node_hashtbl \
+	[((uintptr_t)(void *)(vp) >> LOG2_SIZEVNODE) & umap_node_hash])
+static LIST_HEAD(umap_node_hashhead, umap_node) *umap_node_hashtbl;
+static u_long umap_node_hash;
+
+static u_long	umap_findid __P((u_long id, u_long map[][2], int nentries));
+static int	umap_node_alloc __P((struct mount *mp, struct vnode *lowervp,
+				     struct vnode **vpp));
+static struct vnode *
+		umap_node_find __P((struct mount *mp, struct vnode *targetvp));
+
+/*
+ * Initialise cache headers
+ */
+int
+umapfs_init(vfsp)
+	struct vfsconf *vfsp;
+{
+
+#ifdef UMAPFS_DIAGNOSTIC
+	printf("umapfs_init\n");		/* printed during system boot */
+#endif
+	umap_node_hashtbl = hashinit(NUMAPNODECACHE, M_CACHE, &umap_node_hash);
+	return (0);
+}
+
+/*
+ * umap_findid is called by various routines in umap_vnodeops.c to
+ * find a user or group id in a map.
+ */
+static u_long
+umap_findid(id, map, nentries)
+	u_long id;
+	u_long map[][2];
+	int nentries;
+{
+	int i;
+
+	/* Find uid entry in map */
+	i = 0;
+	while ((i<nentries) && ((map[i][0]) != id))
+		i++;
+
+	if (i < nentries)
+		return (map[i][1]);
+	else
+		return (-1);
+
+}
+
+/*
+ * umap_reverse_findid is called by umap_getattr() in umap_vnodeops.c to
+ * find a user or group id in a map, in reverse.
+ */
+u_long
+umap_reverse_findid(id, map, nentries)
+	u_long id;
+	u_long map[][2];
+	int nentries;
+{
+	int i;
+
+	/* Find uid entry in map */
+	i = 0;
+	while ((i<nentries) && ((map[i][1]) != id))
+		i++;
+
+	if (i < nentries)
+		return (map[i][0]);
+	else
+		return (-1);
+
+}
+
+/*
+ * Return alias for target vnode if already exists, else 0.
+ */
+static struct vnode *
+umap_node_find(mp, targetvp)
+	struct mount *mp;
+	struct vnode *targetvp;
+{
+	struct proc *p = curproc;		/* XXX */
+	struct umap_node_hashhead *hd;
+	struct umap_node *a;
+	struct vnode *vp;
+
+#ifdef UMAPFS_DIAGNOSTIC
+	printf("umap_node_find(mp = %x, target = %x)\n", mp, targetvp);
+#endif
+
+	/*
+	 * Find hash base, and then search the (two-way) linked
+	 * list looking for a umap_node structure which is referencing
+	 * the target vnode.  If found, the increment the umap_node
+	 * reference count (but NOT the target vnode's VREF counter).
+	 */
+	hd = UMAP_NHASH(targetvp);
+loop:
+	for (a = hd->lh_first; a != 0; a = a->umap_hash.le_next) {
+		if (a->umap_lowervp == targetvp &&
+		    a->umap_vnode->v_mount == mp) {
+			vp = UMAPTOV(a);
+			/*
+			 * We need vget for the VXLOCK
+			 * stuff, but we don't want to lock
+			 * the lower node.
+			 */
+			if (vget(vp, 0, p)) {
+#ifdef UMAPFS_DIAGNOSTIC
+				printf ("umap_node_find: vget failed.\n");
+#endif
+				goto loop;
+			}
+			return (vp);
+		}
+	}
+
+#ifdef UMAPFS_DIAGNOSTIC
+	printf("umap_node_find(%x, %x): NOT found\n", mp, targetvp);
+#endif
+
+	return (0);
+}
+
+/*
+ * Make a new umap_node node.
+ * Vp is the alias vnode, lofsvp is the target vnode.
+ * Maintain a reference to (targetvp).
+ */
+static int
+umap_node_alloc(mp, lowervp, vpp)
+	struct mount *mp;
+	struct vnode *lowervp;
+	struct vnode **vpp;
+{
+	struct umap_node_hashhead *hd;
+	struct umap_node *xp;
+	struct vnode *othervp, *vp;
+	int error;
+
+	/* XXX This routine probably needs a node_alloc lock */
+
+	/*
+	 * Do the MALLOC before the getnewvnode since doing so afterward
+	 * might cause a bogus v_data pointer to get dereferenced
+	 * elsewhere if MALLOC should block.
+	 */
+	MALLOC(xp, struct umap_node *, sizeof(struct umap_node),
+	    M_TEMP, M_WAITOK);
+
+	error = getnewvnode(VT_UMAP, mp, umap_vnodeop_p, vpp);
+	if (error) {
+		FREE(xp, M_TEMP);
+		return (error);
+	}
+	vp = *vpp;
+
+	vp->v_type = lowervp->v_type;
+	xp->umap_vnode = vp;
+	vp->v_data = xp;
+	xp->umap_lowervp = lowervp;
+	/*
+	 * Before we insert our new node onto the hash chains,
+	 * check to see if someone else has beaten us to it.
+	 * (We could have slept in MALLOC.)
+	 */
+	othervp = umap_node_find(mp, lowervp);
+	if (othervp) {
+		FREE(xp, M_TEMP);
+		vp->v_type = VBAD;	/* node is discarded */
+		vp->v_usecount = 0;	/* XXX */
+		*vpp = othervp;
+		return (0);
+	}
+	VREF(lowervp);   /* Extra VREF will be vrele'd in umap_node_create */
+	hd = UMAP_NHASH(lowervp);
+	LIST_INSERT_HEAD(hd, xp, umap_hash);
+	return (0);
+}
+
+
+/*
+ * Try to find an existing umap_node vnode refering
+ * to it, otherwise make a new umap_node vnode which
+ * contains a reference to the target vnode.
+ */
+int
+umap_node_create(mp, targetvp, newvpp)
+	struct mount *mp;
+	struct vnode *targetvp;
+	struct vnode **newvpp;
+{
+	struct vnode *aliasvp;
+
+	aliasvp = umap_node_find(mp, targetvp);
+	if (aliasvp) {
+		/*
+		 * Take another reference to the alias vnode
+		 */
+#ifdef UMAPFS_DIAGNOSTIC
+		vprint("umap_node_create: exists", aliasvp);
+#endif
+		/* VREF(aliasvp); */
+	} else {
+		int error;
+
+		/*
+		 * Get new vnode.
+		 */
+#ifdef UMAPFS_DIAGNOSTIC
+		printf("umap_node_create: create new alias vnode\n");
+#endif
+		/*
+		 * Make new vnode reference the umap_node.
+		 */
+		error = umap_node_alloc(mp, targetvp, &aliasvp);
+		if (error)
+			return (error);
+
+		/*
+		 * aliasvp is already VREF'd by getnewvnode()
+		 */
+	}
+
+	vrele(targetvp);
+
+#ifdef UMAPFS_DIAGNOSTIC
+	vprint("umap_node_create: alias", aliasvp);
+	vprint("umap_node_create: target", targetvp);
+#endif
+
+	*newvpp = aliasvp;
+	return (0);
+}
+
+#ifdef UMAPFS_DIAGNOSTIC
+int umap_checkvp_barrier = 1;
+struct vnode *
+umap_checkvp(vp, fil, lno)
+	struct vnode *vp;
+	char *fil;
+	int lno;
+{
+	struct umap_node *a = VTOUMAP(vp);
+#if 0
+	/*
+	 * Can't do this check because vop_reclaim runs
+	 * with funny vop vector.
+	 */
+	if (vp->v_op != umap_vnodeop_p) {
+		printf ("umap_checkvp: on non-umap-node\n");
+		while (umap_checkvp_barrier) /*WAIT*/ ;
+		panic("umap_checkvp");
+	}
+#endif
+	if (a->umap_lowervp == NULL) {
+		/* Should never happen */
+		int i; u_long *p;
+		printf("vp = %x, ZERO ptr\n", vp);
+		for (p = (u_long *) a, i = 0; i < 8; i++)
+			printf(" %x", p[i]);
+		printf("\n");
+		/* wait for debugger */
+		while (umap_checkvp_barrier) /*WAIT*/ ;
+		panic("umap_checkvp");
+	}
+	if (a->umap_lowervp->v_usecount < 1) {
+		int i; u_long *p;
+		printf("vp = %x, unref'ed lowervp\n", vp);
+		for (p = (u_long *) a, i = 0; i < 8; i++)
+			printf(" %x", p[i]);
+		printf("\n");
+		/* wait for debugger */
+		while (umap_checkvp_barrier) /*WAIT*/ ;
+		panic ("umap with unref'ed lowervp");
+	}
+#if 0
+	printf("umap %x/%d -> %x/%d [%s, %d]\n",
+	        a->umap_vnode, a->umap_vnode->v_usecount,
+		a->umap_lowervp, a->umap_lowervp->v_usecount,
+		fil, lno);
+#endif
+	return (a->umap_lowervp);
+}
+#endif
+
+/* umap_mapids maps all of the ids in a credential, both user and group. */
+
+void
+umap_mapids(v_mount, credp)
+	struct mount *v_mount;
+	struct ucred *credp;
+{
+	int i;
+	uid_t uid;
+	gid_t gid;
+
+	if (credp == NOCRED)
+		return;
+
+	/* Find uid entry in map */
+
+	uid = (uid_t) umap_findid(credp->cr_uid,
+				MOUNTTOUMAPMOUNT(v_mount)->info_mapdata,
+				MOUNTTOUMAPMOUNT(v_mount)->info_nentries);
+
+	if (uid != -1)
+		credp->cr_uid = uid;
+	else
+		credp->cr_uid = (uid_t) NOBODY;
+
+#ifdef notdef
+	/* cr_gid is the same as cr_groups[0] in 4BSD */
+
+	/* Find gid entry in map */
+
+	gid = (gid_t) umap_findid(credp->cr_gid,
+				MOUNTTOUMAPMOUNT(v_mount)->info_gmapdata,
+				MOUNTTOUMAPMOUNT(v_mount)->info_gnentries);
+
+	if (gid != -1)
+		credp->cr_gid = gid;
+	else
+		credp->cr_gid = NULLGROUP;
+#endif
+
+	/* Now we must map each of the set of groups in the cr_groups
+		structure. */
+
+	i = 0;
+	while (credp->cr_groups[i] != 0) {
+		gid = (gid_t) umap_findid(credp->cr_groups[i],
+				MOUNTTOUMAPMOUNT(v_mount)->info_gmapdata,
+				MOUNTTOUMAPMOUNT(v_mount)->info_gnentries);
+
+		if (gid != -1)
+			credp->cr_groups[i++] = gid;
+		else
+			credp->cr_groups[i++] = NULLGROUP;
+	}
+}
diff --git a/sys/fs/umapfs/umap_vfsops.c b/sys/fs/umapfs/umap_vfsops.c
new file mode 100644
index 0000000..03b4cb4
--- /dev/null
+++ b/sys/fs/umapfs/umap_vfsops.c
@@ -0,0 +1,431 @@
+/*
+ * Copyright (c) 1992, 1993, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * the UCLA Ficus project.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)umap_vfsops.c	8.8 (Berkeley) 5/14/95
+ *
+ * $Id: umap_vfsops.c,v 1.22 1998/05/06 05:29:36 msmith Exp $
+ */
+
+/*
+ * Umap Layer
+ * (See mount_umap(8) for a description of this layer.)
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <miscfs/umapfs/umap.h>
+
+static MALLOC_DEFINE(M_UMAPFSMNT, "UMAP mount", "UMAP mount structure");
+
+static int	umapfs_fhtovp __P((struct mount *mp, struct fid *fidp,
+				   struct sockaddr *nam, struct vnode **vpp,
+				   int *exflagsp, struct ucred **credanonp));
+static int	umapfs_mount __P((struct mount *mp, char *path, caddr_t data,
+				  struct nameidata *ndp, struct proc *p));
+static int	umapfs_quotactl __P((struct mount *mp, int cmd, uid_t uid,
+				     caddr_t arg, struct proc *p));
+static int	umapfs_root __P((struct mount *mp, struct vnode **vpp));
+static int	umapfs_start __P((struct mount *mp, int flags, struct proc *p));
+static int	umapfs_statfs __P((struct mount *mp, struct statfs *sbp,
+				   struct proc *p));
+static int	umapfs_sync __P((struct mount *mp, int waitfor,
+				 struct ucred *cred, struct proc *p));
+static int	umapfs_unmount __P((struct mount *mp, int mntflags,
+				    struct proc *p));
+static int	umapfs_vget __P((struct mount *mp, ino_t ino,
+				 struct vnode **vpp));
+static int	umapfs_vptofh __P((struct vnode *vp, struct fid *fhp));
+
+/*
+ * Mount umap layer
+ */
+static int
+umapfs_mount(mp, path, data, ndp, p)
+	struct mount *mp;
+	char *path;
+	caddr_t data;
+	struct nameidata *ndp;
+	struct proc *p;
+{
+	struct umap_args args;
+	struct vnode *lowerrootvp, *vp;
+	struct vnode *umapm_rootvp;
+	struct umap_mount *amp;
+	u_int size;
+	int error;
+#ifdef UMAP_DIAGNOSTIC
+	int	i;
+#endif
+
+#ifdef UMAPFS_DIAGNOSTIC
+	printf("umapfs_mount(mp = %x)\n", mp);
+#endif
+
+	/*
+	 * Update is a no-op
+	 */
+	if (mp->mnt_flag & MNT_UPDATE) {
+		return (EOPNOTSUPP);
+		/* return (VFS_MOUNT(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, path, data, ndp, p));*/
+	}
+
+	/*
+	 * Get argument
+	 */
+	error = copyin(data, (caddr_t)&args, sizeof(struct umap_args));
+	if (error)
+		return (error);
+
+	/*
+	 * Find lower node
+	 */
+	NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT|LOCKLEAF,
+		UIO_USERSPACE, args.target, p);
+	error = namei(ndp);
+	if (error)
+		return (error);
+
+	/*
+	 * Sanity check on lower vnode
+	 */
+	lowerrootvp = ndp->ni_vp;
+#ifdef UMAPFS_DIAGNOSTIC
+	printf("vp = %x, check for VDIR...\n", lowerrootvp);
+#endif
+	vrele(ndp->ni_dvp);
+	ndp->ni_dvp = 0;
+
+	if (lowerrootvp->v_type != VDIR) {
+		vput(lowerrootvp);
+		return (EINVAL);
+	}
+
+#ifdef UMAPFS_DIAGNOSTIC
+	printf("mp = %x\n", mp);
+#endif
+
+	amp = (struct umap_mount *) malloc(sizeof(struct umap_mount),
+				M_UMAPFSMNT, M_WAITOK);	/* XXX */
+
+	/*
+	 * Save reference to underlying FS
+	 */
+	amp->umapm_vfs = lowerrootvp->v_mount;
+
+	/*
+	 * Now copy in the number of entries and maps for umap mapping.
+	 */
+	amp->info_nentries = args.nentries;
+	amp->info_gnentries = args.gnentries;
+	error = copyin(args.mapdata, (caddr_t)amp->info_mapdata,
+	    2*sizeof(u_long)*args.nentries);
+	if (error)
+		return (error);
+
+#ifdef UMAP_DIAGNOSTIC
+	printf("umap_mount:nentries %d\n",args.nentries);
+	for (i = 0; i < args.nentries; i++)
+		printf("   %d maps to %d\n", amp->info_mapdata[i][0],
+	 	    amp->info_mapdata[i][1]);
+#endif
+
+	error = copyin(args.gmapdata, (caddr_t)amp->info_gmapdata,
+	    2*sizeof(u_long)*args.gnentries);
+	if (error)
+		return (error);
+
+#ifdef UMAP_DIAGNOSTIC
+	printf("umap_mount:gnentries %d\n",args.gnentries);
+	for (i = 0; i < args.gnentries; i++)
+		printf("	group %d maps to %d\n",
+		    amp->info_gmapdata[i][0],
+	 	    amp->info_gmapdata[i][1]);
+#endif
+
+
+	/*
+	 * Save reference.  Each mount also holds
+	 * a reference on the root vnode.
+	 */
+	error = umap_node_create(mp, lowerrootvp, &vp);
+	/*
+	 * Unlock the node (either the lower or the alias)
+	 */
+	VOP_UNLOCK(vp, 0, p);
+	/*
+	 * Make sure the node alias worked
+	 */
+	if (error) {
+		vrele(lowerrootvp);
+		free(amp, M_UMAPFSMNT);	/* XXX */
+		return (error);
+	}
+
+	/*
+	 * Keep a held reference to the root vnode.
+	 * It is vrele'd in umapfs_unmount.
+	 */
+	umapm_rootvp = vp;
+	umapm_rootvp->v_flag |= VROOT;
+	amp->umapm_rootvp = umapm_rootvp;
+	if (UMAPVPTOLOWERVP(umapm_rootvp)->v_mount->mnt_flag & MNT_LOCAL)
+		mp->mnt_flag |= MNT_LOCAL;
+	mp->mnt_data = (qaddr_t) amp;
+	vfs_getnewfsid(mp);
+
+	(void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+	bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+	(void) copyinstr(args.target, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+	    &size);
+	bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+	(void)umapfs_statfs(mp, &mp->mnt_stat, p);
+#ifdef UMAPFS_DIAGNOSTIC
+	printf("umapfs_mount: lower %s, alias at %s\n",
+		mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
+#endif
+	return (0);
+}
+
+/*
+ * VFS start.  Nothing needed here - the start routine
+ * on the underlying filesystem will have been called
+ * when that filesystem was mounted.
+ */
+static int
+umapfs_start(mp, flags, p)
+	struct mount *mp;
+	int flags;
+	struct proc *p;
+{
+	return (0);
+	/* return (VFS_START(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, flags, p)); */
+}
+
+/*
+ * Free reference to umap layer
+ */
+static int
+umapfs_unmount(mp, mntflags, p)
+	struct mount *mp;
+	int mntflags;
+	struct proc *p;
+{
+	struct vnode *umapm_rootvp = MOUNTTOUMAPMOUNT(mp)->umapm_rootvp;
+	int error;
+	int flags = 0;
+
+#ifdef UMAPFS_DIAGNOSTIC
+	printf("umapfs_unmount(mp = %x)\n", mp);
+#endif
+
+	if (mntflags & MNT_FORCE)
+		flags |= FORCECLOSE;
+
+	/*
+	 * Clear out buffer cache.  I don't think we
+	 * ever get anything cached at this level at the
+	 * moment, but who knows...
+	 */
+#ifdef notyet
+	mntflushbuf(mp, 0);
+	if (mntinvalbuf(mp, 1))
+		return (EBUSY);
+#endif
+	if (umapm_rootvp->v_usecount > 1)
+		return (EBUSY);
+	error = vflush(mp, umapm_rootvp, flags);
+	if (error)
+		return (error);
+
+#ifdef UMAPFS_DIAGNOSTIC
+	vprint("alias root of lower", umapm_rootvp);
+#endif
+	/*
+	 * Release reference on underlying root vnode
+	 */
+	vrele(umapm_rootvp);
+	/*
+	 * And blow it away for future re-use
+	 */
+	vgone(umapm_rootvp);
+	/*
+	 * Finally, throw away the umap_mount structure
+	 */
+	free(mp->mnt_data, M_UMAPFSMNT);	/* XXX */
+	mp->mnt_data = 0;
+	return (0);
+}
+
+static int
+umapfs_root(mp, vpp)
+	struct mount *mp;
+	struct vnode **vpp;
+{
+	struct proc *p = curproc;	/* XXX */
+	struct vnode *vp;
+
+#ifdef UMAPFS_DIAGNOSTIC
+	printf("umapfs_root(mp = %x, vp = %x->%x)\n", mp,
+			MOUNTTOUMAPMOUNT(mp)->umapm_rootvp,
+			UMAPVPTOLOWERVP(MOUNTTOUMAPMOUNT(mp)->umapm_rootvp)
+			);
+#endif
+
+	/*
+	 * Return locked reference to root.
+	 */
+	vp = MOUNTTOUMAPMOUNT(mp)->umapm_rootvp;
+	VREF(vp);
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+	*vpp = vp;
+	return (0);
+}
+
+static int
+umapfs_quotactl(mp, cmd, uid, arg, p)
+	struct mount *mp;
+	int cmd;
+	uid_t uid;
+	caddr_t arg;
+	struct proc *p;
+{
+	return (VFS_QUOTACTL(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, cmd, uid, arg, p));
+}
+
+static int
+umapfs_statfs(mp, sbp, p)
+	struct mount *mp;
+	struct statfs *sbp;
+	struct proc *p;
+{
+	int error;
+	struct statfs mstat;
+
+#ifdef UMAPFS_DIAGNOSTIC
+	printf("umapfs_statfs(mp = %x, vp = %x->%x)\n", mp,
+			MOUNTTOUMAPMOUNT(mp)->umapm_rootvp,
+			UMAPVPTOLOWERVP(MOUNTTOUMAPMOUNT(mp)->umapm_rootvp)
+			);
+#endif
+
+	bzero(&mstat, sizeof(mstat));
+
+	error = VFS_STATFS(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, &mstat, p);
+	if (error)
+		return (error);
+
+	/* now copy across the "interesting" information and fake the rest */
+	sbp->f_type = mstat.f_type;
+	sbp->f_flags = mstat.f_flags;
+	sbp->f_bsize = mstat.f_bsize;
+	sbp->f_iosize = mstat.f_iosize;
+	sbp->f_blocks = mstat.f_blocks;
+	sbp->f_bfree = mstat.f_bfree;
+	sbp->f_bavail = mstat.f_bavail;
+	sbp->f_files = mstat.f_files;
+	sbp->f_ffree = mstat.f_ffree;
+	if (sbp != &mp->mnt_stat) {
+		bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+	}
+	return (0);
+}
+
+static int
+umapfs_sync(mp, waitfor, cred, p)
+	struct mount *mp;
+	int waitfor;
+	struct ucred *cred;
+	struct proc *p;
+{
+	/*
+	 * XXX - Assumes no data cached at umap layer.
+	 */
+	return (0);
+}
+
+static int
+umapfs_vget(mp, ino, vpp)
+	struct mount *mp;
+	ino_t ino;
+	struct vnode **vpp;
+{
+
+	return (VFS_VGET(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, ino, vpp));
+}
+
+static int
+umapfs_fhtovp(mp, fidp, nam, vpp, exflagsp, credanonp)
+	struct mount *mp;
+	struct fid *fidp;
+	struct sockaddr *nam;
+	struct vnode **vpp;
+	int *exflagsp;
+	struct ucred**credanonp;
+{
+
+	return (VFS_FHTOVP(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, fidp, nam, vpp, exflagsp,credanonp));
+}
+
+static int
+umapfs_vptofh(vp, fhp)
+	struct vnode *vp;
+	struct fid *fhp;
+{
+	return (VFS_VPTOFH(UMAPVPTOLOWERVP(vp), fhp));
+}
+
+static struct vfsops umap_vfsops = {
+	umapfs_mount,
+	umapfs_start,
+	umapfs_unmount,
+	umapfs_root,
+	umapfs_quotactl,
+	umapfs_statfs,
+	umapfs_sync,
+	umapfs_vget,
+	umapfs_fhtovp,
+	umapfs_vptofh,
+	umapfs_init,
+};
+
+VFS_SET(umap_vfsops, umap, VFCF_LOOPBACK);
diff --git a/sys/fs/umapfs/umap_vnops.c b/sys/fs/umapfs/umap_vnops.c
new file mode 100644
index 0000000..893e1e5
--- /dev/null
+++ b/sys/fs/umapfs/umap_vnops.c
@@ -0,0 +1,566 @@
+/*
+ * Copyright (c) 1992, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * the UCLA Ficus project.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)umap_vnops.c	8.6 (Berkeley) 5/22/95
+ * $Id: umap_vnops.c,v 1.25 1998/07/30 17:40:45 bde Exp $
+ */
+
+/*
+ * Umap Layer
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/buf.h>
+#include <miscfs/umapfs/umap.h>
+#include <miscfs/nullfs/null.h>
+
+static int umap_bug_bypass = 0;   /* for debugging: enables bypass printf'ing */
+SYSCTL_INT(_debug, OID_AUTO, umapfs_bug_bypass, CTLFLAG_RW,
+	&umap_bug_bypass, 0, "");
+
+static int	umap_bwrite __P((struct vop_bwrite_args *ap));
+static int	umap_bypass __P((struct vop_generic_args *ap));
+static int	umap_getattr __P((struct vop_getattr_args *ap));
+static int	umap_inactive __P((struct vop_inactive_args *ap));
+static int	umap_lock __P((struct vop_lock_args *ap));
+static int	umap_print __P((struct vop_print_args *ap));
+static int	umap_reclaim __P((struct vop_reclaim_args *ap));
+static int	umap_rename __P((struct vop_rename_args *ap));
+static int	umap_strategy __P((struct vop_strategy_args *ap));
+static int	umap_unlock __P((struct vop_unlock_args *ap));
+
+/*
+ * This is the 10-Apr-92 bypass routine.
+ * See null_vnops.c:null_bypass for more details.
+ */
+static int
+umap_bypass(ap)
+	struct vop_generic_args /* {
+		struct vnodeop_desc *a_desc;
+		<other random data follows, presumably>
+	} */ *ap;
+{
+	struct ucred **credpp = 0, *credp = 0;
+	struct ucred *savecredp = 0, *savecompcredp = 0;
+	struct ucred *compcredp = 0;
+	struct vnode **this_vp_p;
+	int error;
+	struct vnode *old_vps[VDESC_MAX_VPS];
+	struct vnode *vp1 = 0;
+	struct vnode **vps_p[VDESC_MAX_VPS];
+	struct vnode ***vppp;
+	struct vnodeop_desc *descp = ap->a_desc;
+	int reles, i;
+	struct componentname **compnamepp = 0;
+
+	if (umap_bug_bypass)
+		printf ("umap_bypass: %s\n", descp->vdesc_name);
+
+#ifdef SAFETY
+	/*
+	 * We require at least one vp.
+	 */
+	if (descp->vdesc_vp_offsets == NULL ||
+	    descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET)
+		panic ("umap_bypass: no vp's in map.");
+#endif
+
+	/*
+	 * Map the vnodes going in.
+	 * Later, we'll invoke the operation based on
+	 * the first mapped vnode's operation vector.
+	 */
+	reles = descp->vdesc_flags;
+	for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
+		if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
+			break;   /* bail out at end of list */
+		vps_p[i] = this_vp_p =
+			VOPARG_OFFSETTO(struct vnode**, descp->vdesc_vp_offsets[i], ap);
+
+		if (i == 0) {
+			vp1 = *vps_p[0];
+		}
+
+		/*
+		 * We're not guaranteed that any but the first vnode
+		 * are of our type.  Check for and don't map any
+		 * that aren't.  (Must map first vp or vclean fails.)
+		 */
+
+		if (i && (*this_vp_p)->v_op != umap_vnodeop_p) {
+			old_vps[i] = NULL;
+		} else {
+			old_vps[i] = *this_vp_p;
+			*(vps_p[i]) = UMAPVPTOLOWERVP(*this_vp_p);
+			if (reles & 1)
+				VREF(*this_vp_p);
+		}
+
+	}
+
+	/*
+	 * Fix the credentials.  (That's the purpose of this layer.)
+	 */
+
+	if (descp->vdesc_cred_offset != VDESC_NO_OFFSET) {
+
+		credpp = VOPARG_OFFSETTO(struct ucred**,
+		    descp->vdesc_cred_offset, ap);
+
+		/* Save old values */
+
+		savecredp = (*credpp);
+		if (savecredp != NOCRED)
+			(*credpp) = crdup(savecredp);
+		credp = *credpp;
+
+		if (umap_bug_bypass && credp->cr_uid != 0)
+			printf("umap_bypass: user was %lu, group %lu\n",
+			    (u_long)credp->cr_uid, (u_long)credp->cr_gid);
+
+		/* Map all ids in the credential structure. */
+
+		umap_mapids(vp1->v_mount, credp);
+
+		if (umap_bug_bypass && credp->cr_uid != 0)
+			printf("umap_bypass: user now %lu, group %lu\n",
+			    (u_long)credp->cr_uid, (u_long)credp->cr_gid);
+	}
+
+	/* BSD often keeps a credential in the componentname structure
+	 * for speed.  If there is one, it better get mapped, too.
+	 */
+
+	if (descp->vdesc_componentname_offset != VDESC_NO_OFFSET) {
+
+		compnamepp = VOPARG_OFFSETTO(struct componentname**,
+		    descp->vdesc_componentname_offset, ap);
+
+		compcredp = (*compnamepp)->cn_cred;
+		savecompcredp = compcredp;
+		if (savecompcredp != NOCRED)
+			(*compnamepp)->cn_cred = crdup(savecompcredp);
+		compcredp = (*compnamepp)->cn_cred;
+
+		if (umap_bug_bypass && compcredp->cr_uid != 0)
+			printf(
+		    "umap_bypass: component credit user was %lu, group %lu\n",
+			    (u_long)compcredp->cr_uid,
+			    (u_long)compcredp->cr_gid);
+
+		/* Map all ids in the credential structure. */
+
+		umap_mapids(vp1->v_mount, compcredp);
+
+		if (umap_bug_bypass && compcredp->cr_uid != 0)
+			printf(
+		    "umap_bypass: component credit user now %lu, group %lu\n",
+			    (u_long)compcredp->cr_uid,
+			    (u_long)compcredp->cr_gid);
+	}
+
+	/*
+	 * Call the operation on the lower layer
+	 * with the modified argument structure.
+	 */
+	error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
+
+	/*
+	 * Maintain the illusion of call-by-value
+	 * by restoring vnodes in the argument structure
+	 * to their original value.
+	 */
+	reles = descp->vdesc_flags;
+	for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
+		if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
+			break;   /* bail out at end of list */
+		if (old_vps[i]) {
+			*(vps_p[i]) = old_vps[i];
+			if (reles & 1)
+				vrele(*(vps_p[i]));
+		};
+	};
+
+	/*
+	 * Map the possible out-going vpp
+	 * (Assumes that the lower layer always returns
+	 * a VREF'ed vpp unless it gets an error.)
+	 */
+	if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET &&
+	    !(descp->vdesc_flags & VDESC_NOMAP_VPP) &&
+	    !error) {
+		if (descp->vdesc_flags & VDESC_VPP_WILLRELE)
+			goto out;
+		vppp = VOPARG_OFFSETTO(struct vnode***,
+				 descp->vdesc_vpp_offset, ap);
+		if (*vppp)
+			error = umap_node_create(old_vps[0]->v_mount, **vppp, *vppp);
+	};
+
+ out:
+	/*
+	 * Free duplicate cred structure and restore old one.
+	 */
+	if (descp->vdesc_cred_offset != VDESC_NO_OFFSET) {
+		if (umap_bug_bypass && credp && credp->cr_uid != 0)
+			printf("umap_bypass: returning-user was %lu\n",
+			    (u_long)credp->cr_uid);
+
+		if (savecredp != NOCRED) {
+			crfree(credp);
+			(*credpp) = savecredp;
+			if (umap_bug_bypass && credpp && (*credpp)->cr_uid != 0)
+				printf(
+				    "umap_bypass: returning-user now %lu\n\n",
+				    (u_long)(*credpp)->cr_uid);
+		}
+	}
+
+	if (descp->vdesc_componentname_offset != VDESC_NO_OFFSET) {
+		if (umap_bug_bypass && compcredp && compcredp->cr_uid != 0)
+			printf(
+			    "umap_bypass: returning-component-user was %lu\n",
+			    (u_long)compcredp->cr_uid);
+
+		if (savecompcredp != NOCRED) {
+			crfree(compcredp);
+			(*compnamepp)->cn_cred = savecompcredp;
+			if (umap_bug_bypass && credpp && (*credpp)->cr_uid != 0)
+				printf(
+			    "umap_bypass: returning-component-user now %lu\n",
+				    (u_long)compcredp->cr_uid);
+		}
+	}
+
+	return (error);
+}
+
+
+/*
+ *  We handle getattr to change the fsid.
+ */
+static int
+umap_getattr(ap)
+	struct vop_getattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	short uid, gid;
+	int error, tmpid, nentries, gnentries;
+	u_long (*mapdata)[2], (*gmapdata)[2];
+	struct vnode **vp1p;
+	struct vnodeop_desc *descp = ap->a_desc;
+
+	error = umap_bypass((struct vop_generic_args *)ap);
+	if (error)
+		return (error);
+	/* Requires that arguments be restored. */
+	ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
+
+	/*
+	 * Umap needs to map the uid and gid returned by a stat
+	 * into the proper values for this site.  This involves
+	 * finding the returned uid in the mapping information,
+	 * translating it into the uid on the other end,
+	 * and filling in the proper field in the vattr
+	 * structure pointed to by ap->a_vap.  The group
+	 * is easier, since currently all groups will be
+	 * translate to the NULLGROUP.
+	 */
+
+	/* Find entry in map */
+
+	uid = ap->a_vap->va_uid;
+	gid = ap->a_vap->va_gid;
+	if (umap_bug_bypass)
+		printf("umap_getattr: mapped uid = %d, mapped gid = %d\n", uid,
+		    gid);
+
+	vp1p = VOPARG_OFFSETTO(struct vnode**, descp->vdesc_vp_offsets[0], ap);
+	nentries =  MOUNTTOUMAPMOUNT((*vp1p)->v_mount)->info_nentries;
+	mapdata =  (MOUNTTOUMAPMOUNT((*vp1p)->v_mount)->info_mapdata);
+	gnentries =  MOUNTTOUMAPMOUNT((*vp1p)->v_mount)->info_gnentries;
+	gmapdata =  (MOUNTTOUMAPMOUNT((*vp1p)->v_mount)->info_gmapdata);
+
+	/* Reverse map the uid for the vnode.  Since it's a reverse
+		map, we can't use umap_mapids() to do it. */
+
+	tmpid = umap_reverse_findid(uid, mapdata, nentries);
+
+	if (tmpid != -1) {
+
+		ap->a_vap->va_uid = (uid_t) tmpid;
+		if (umap_bug_bypass)
+			printf("umap_getattr: original uid = %d\n", uid);
+	} else
+		ap->a_vap->va_uid = (uid_t) NOBODY;
+
+	/* Reverse map the gid for the vnode. */
+
+	tmpid = umap_reverse_findid(gid, gmapdata, gnentries);
+
+	if (tmpid != -1) {
+
+		ap->a_vap->va_gid = (gid_t) tmpid;
+		if (umap_bug_bypass)
+			printf("umap_getattr: original gid = %d\n", gid);
+	} else
+		ap->a_vap->va_gid = (gid_t) NULLGROUP;
+
+	return (0);
+}
+
+/*
+ * We need to process our own vnode lock and then clear the
+ * interlock flag as it applies only to our vnode, not the
+ * vnodes below us on the stack.
+ */
+static int
+umap_lock(ap)
+	struct vop_lock_args /* {
+		struct vnode *a_vp;
+		int a_flags;
+		struct proc *a_p;
+	} */ *ap;
+{
+
+	vop_nolock(ap);
+	if ((ap->a_flags & LK_TYPE_MASK) == LK_DRAIN)
+		return (0);
+	ap->a_flags &= ~LK_INTERLOCK;
+	return (null_bypass((struct vop_generic_args *)ap));
+}
+
+/*
+ * We need to process our own vnode unlock and then clear the
+ * interlock flag as it applies only to our vnode, not the
+ * vnodes below us on the stack.
+ */
+int
+umap_unlock(ap)
+	struct vop_unlock_args /* {
+		struct vnode *a_vp;
+		int a_flags;
+		struct proc *a_p;
+	} */ *ap;
+{
+	vop_nounlock(ap);
+	ap->a_flags &= ~LK_INTERLOCK;
+	return (null_bypass((struct vop_generic_args *)ap));
+}
+
+static int
+umap_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct umap_node *xp = VTOUMAP(vp);
+	struct vnode *lowervp = xp->umap_lowervp;
+	/*
+	 * Do nothing (and _don't_ bypass).
+	 * Wait to vrele lowervp until reclaim,
+	 * so that until then our umap_node is in the
+	 * cache and reusable.
+	 *
+	 */
+	VOP_INACTIVE(lowervp, ap->a_p);
+	VOP_UNLOCK(ap->a_vp, 0, ap->a_p);
+	return (0);
+}
+
+static int
+umap_reclaim(ap)
+	struct vop_reclaim_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct umap_node *xp = VTOUMAP(vp);
+	struct vnode *lowervp = xp->umap_lowervp;
+
+	/* After this assignment, this node will not be re-used. */
+	xp->umap_lowervp = NULL;
+	LIST_REMOVE(xp, umap_hash);
+	FREE(vp->v_data, M_TEMP);
+	vp->v_data = NULL;
+	vrele(lowervp);
+	return (0);
+}
+
+static int
+umap_strategy(ap)
+	struct vop_strategy_args /* {
+		struct vnode *a_vp;
+		struct buf *a_bp;
+	} */ *ap;
+{
+	struct buf *bp = ap->a_bp;
+	int error;
+	struct vnode *savedvp;
+
+	savedvp = bp->b_vp;
+	bp->b_vp = UMAPVPTOLOWERVP(bp->b_vp);
+
+	error = VOP_STRATEGY(bp->b_vp, ap->a_bp);
+
+	bp->b_vp = savedvp;
+
+	return (error);
+}
+
+static int
+umap_bwrite(ap)
+	struct vop_bwrite_args /* {
+		struct buf *a_bp;
+	} */ *ap;
+{
+	struct buf *bp = ap->a_bp;
+	int error;
+	struct vnode *savedvp;
+
+	savedvp = bp->b_vp;
+	bp->b_vp = UMAPVPTOLOWERVP(bp->b_vp);
+
+	error = VOP_BWRITE(ap->a_bp);
+
+	bp->b_vp = savedvp;
+
+	return (error);
+}
+
+
+static int
+umap_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	printf("\ttag VT_UMAPFS, vp=%p, lowervp=%p\n", vp, UMAPVPTOLOWERVP(vp));
+	return (0);
+}
+
+static int
+umap_rename(ap)
+	struct vop_rename_args  /* {
+		struct vnode *a_fdvp;
+		struct vnode *a_fvp;
+		struct componentname *a_fcnp;
+		struct vnode *a_tdvp;
+		struct vnode *a_tvp;
+		struct componentname *a_tcnp;
+	} */ *ap;
+{
+	int error;
+	struct componentname *compnamep;
+	struct ucred *compcredp, *savecompcredp;
+	struct vnode *vp;
+
+	/*
+	 * Rename is irregular, having two componentname structures.
+	 * We need to map the cre in the second structure,
+	 * and then bypass takes care of the rest.
+	 */
+
+	vp = ap->a_fdvp;
+	compnamep = ap->a_tcnp;
+	compcredp = compnamep->cn_cred;
+
+	savecompcredp = compcredp;
+	compcredp = compnamep->cn_cred = crdup(savecompcredp);
+
+	if (umap_bug_bypass && compcredp->cr_uid != 0)
+		printf(
+	    "umap_rename: rename component credit user was %lu, group %lu\n",
+		    (u_long)compcredp->cr_uid, (u_long)compcredp->cr_gid);
+
+	/* Map all ids in the credential structure. */
+
+	umap_mapids(vp->v_mount, compcredp);
+
+	if (umap_bug_bypass && compcredp->cr_uid != 0)
+		printf(
+	    "umap_rename: rename component credit user now %lu, group %lu\n",
+		    (u_long)compcredp->cr_uid, (u_long)compcredp->cr_gid);
+
+	error = umap_bypass((struct vop_generic_args *)ap);
+
+	/* Restore the additional mapped componentname cred structure. */
+
+	crfree(compcredp);
+	compnamep->cn_cred = savecompcredp;
+
+	return error;
+}
+
+/*
+ * Global vfs data structures
+ */
+/*
+ * XXX - strategy, bwrite are hand coded currently.  They should
+ * go away with a merged buffer/block cache.
+ *
+ */
+vop_t **umap_vnodeop_p;
+static struct vnodeopv_entry_desc umap_vnodeop_entries[] = {
+	{ &vop_default_desc,		(vop_t *) umap_bypass },
+	{ &vop_bwrite_desc,		(vop_t *) umap_bwrite },
+	{ &vop_getattr_desc,		(vop_t *) umap_getattr },
+	{ &vop_inactive_desc,		(vop_t *) umap_inactive },
+	{ &vop_lock_desc,		(vop_t *) umap_lock },
+	{ &vop_print_desc,		(vop_t *) umap_print },
+	{ &vop_reclaim_desc,		(vop_t *) umap_reclaim },
+	{ &vop_rename_desc,		(vop_t *) umap_rename },
+	{ &vop_strategy_desc,		(vop_t *) umap_strategy },
+	{ &vop_unlock_desc,		(vop_t *) umap_unlock },
+	{ NULL, NULL }
+};
+static struct vnodeopv_desc umap_vnodeop_opv_desc =
+	{ &umap_vnodeop_p, umap_vnodeop_entries };
+
+VNODEOP_SET(umap_vnodeop_opv_desc);
diff --git a/sys/fs/unionfs/union.h b/sys/fs/unionfs/union.h
new file mode 100644
index 0000000..6a4aa22
--- /dev/null
+++ b/sys/fs/unionfs/union.h
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 1994 The Regents of the University of California.
+ * Copyright (c) 1994 Jan-Simon Pendry.
+ * All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)union.h	8.9 (Berkeley) 12/10/94
+ * $Id: union.h,v 1.12 1998/02/26 03:23:51 kato Exp $
+ */
+
+struct union_args {
+	char		*target;	/* Target of loopback  */
+	int		mntflags;	/* Options on the mount */
+};
+
+#define UNMNT_ABOVE	0x0001		/* Target appears below mount point */
+#define UNMNT_BELOW	0x0002		/* Target appears below mount point */
+#define UNMNT_REPLACE	0x0003		/* Target replaces mount point */
+#define UNMNT_OPMASK	0x0003
+
+struct union_mount {
+	struct vnode	*um_uppervp;
+	struct vnode	*um_lowervp;
+	struct ucred	*um_cred;	/* Credentials of user calling mount */
+	int		um_cmode;	/* cmask from mount process */
+	int		um_op;		/* Operation mode */
+};
+
+#ifdef KERNEL
+
+/*
+ * DEFDIRMODE is the mode bits used to create a shadow directory.
+ */
+#define VRWXMODE (VREAD|VWRITE|VEXEC)
+#define VRWMODE (VREAD|VWRITE)
+#define UN_DIRMODE ((VRWXMODE)|(VRWXMODE>>3)|(VRWXMODE>>6))
+#define UN_FILEMODE ((VRWMODE)|(VRWMODE>>3)|(VRWMODE>>6))
+
+/*
+ * A cache of vnode references
+ */
+struct union_node {
+	LIST_ENTRY(union_node)	un_cache;	/* Hash chain */
+	struct vnode		*un_vnode;	/* Back pointer */
+	struct vnode	        *un_uppervp;	/* overlaying object */
+	struct vnode	        *un_lowervp;	/* underlying object */
+	struct vnode		*un_dirvp;	/* Parent dir of uppervp */
+	struct vnode		*un_pvp;	/* Parent vnode */
+	char			*un_path;	/* saved component name */
+	int			un_hash;	/* saved un_path hash value */
+	int			un_openl;	/* # of opens on lowervp */
+	unsigned int		un_flags;
+	struct vnode		**un_dircache;	/* cached union stack */
+	off_t			un_uppersz;	/* size of upper object */
+	off_t			un_lowersz;	/* size of lower object */
+#ifdef DIAGNOSTIC
+	pid_t			un_pid;
+#endif
+};
+
+#define UN_WANT		0x01
+#define UN_LOCKED	0x02
+#define UN_ULOCK	0x04		/* Upper node is locked */
+#define UN_KLOCK	0x08		/* Keep upper node locked on vput */
+#define UN_CACHED	0x10		/* In union cache */
+
+extern int union_allocvp __P((struct vnode **, struct mount *,
+				struct vnode *, struct vnode *,
+				struct componentname *, struct vnode *,
+				struct vnode *, int));
+extern int union_freevp __P((struct vnode *));
+extern struct vnode *union_dircache __P((struct vnode *, struct proc *));
+extern int union_copyup __P((struct union_node *, int, struct ucred *,
+				struct proc *));
+extern int union_dowhiteout __P((struct union_node *, struct ucred *,
+					struct proc *));
+extern int union_mkshadow __P((struct union_mount *, struct vnode *,
+				struct componentname *, struct vnode **));
+extern int union_mkwhiteout __P((struct union_mount *, struct vnode *,
+				struct componentname *, char *));
+extern int union_cn_close __P((struct vnode *, int, struct ucred *,
+				struct proc *));
+extern void union_removed_upper __P((struct union_node *un));
+extern struct vnode *union_lowervp __P((struct vnode *));
+extern void union_newsize __P((struct vnode *, off_t, off_t));
+
+extern int (*union_dircheckp) __P((struct proc *, struct vnode **,
+				 struct file *));
+
+#define	MOUNTTOUNIONMOUNT(mp) ((struct union_mount *)((mp)->mnt_data))
+#define	VTOUNION(vp) ((struct union_node *)(vp)->v_data)
+#define	UNIONTOV(un) ((un)->un_vnode)
+#define	LOWERVP(vp) (VTOUNION(vp)->un_lowervp)
+#define	UPPERVP(vp) (VTOUNION(vp)->un_uppervp)
+#define OTHERVP(vp) (UPPERVP(vp) ? UPPERVP(vp) : LOWERVP(vp))
+
+extern vop_t **union_vnodeop_p;
+extern struct vfsops union_vfsops;
+#endif /* KERNEL */
diff --git a/sys/fs/unionfs/union_subr.c b/sys/fs/unionfs/union_subr.c
new file mode 100644
index 0000000..7559b6e
--- /dev/null
+++ b/sys/fs/unionfs/union_subr.c
@@ -0,0 +1,1218 @@
+/*
+ * Copyright (c) 1994 Jan-Simon Pendry
+ * Copyright (c) 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)union_subr.c	8.20 (Berkeley) 5/20/95
+ * $Id: union_subr.c,v 1.35 1998/12/07 21:58:34 archie Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/fcntl.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/module.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>	/* for vnode_pager_setsize */
+#include <vm/vm_zone.h>
+#include <miscfs/union/union.h>
+
+#include <sys/proc.h>
+
+extern int	union_init __P((void));
+
+/* must be power of two, otherwise change UNION_HASH() */
+#define NHASH 32
+
+/* unsigned int ... */
+#define UNION_HASH(u, l) \
+	(((((uintptr_t) (u)) + ((uintptr_t) l)) >> 8) & (NHASH-1))
+
+static LIST_HEAD(unhead, union_node) unhead[NHASH];
+static int unvplock[NHASH];
+
+static void	union_dircache_r __P((struct vnode *vp, struct vnode ***vppp,
+				      int *cntp));
+static int	union_list_lock __P((int ix));
+static void	union_list_unlock __P((int ix));
+static int	union_relookup __P((struct union_mount *um, struct vnode *dvp,
+				    struct vnode **vpp,
+				    struct componentname *cnp,
+				    struct componentname *cn, char *path,
+				    int pathlen));
+static void	union_updatevp __P((struct union_node *un,
+				    struct vnode *uppervp,
+				    struct vnode *lowervp));
+static void union_newlower __P((struct union_node *, struct vnode *));
+static void union_newupper __P((struct union_node *, struct vnode *));
+static int union_copyfile __P((struct vnode *, struct vnode *,
+					struct ucred *, struct proc *));
+static int union_vn_create __P((struct vnode **, struct union_node *,
+				struct proc *));
+static int union_vn_close __P((struct vnode *, int, struct ucred *,
+				struct proc *));
+
+int
+union_init()
+{
+	int i;
+
+	for (i = 0; i < NHASH; i++)
+		LIST_INIT(&unhead[i]);
+	bzero((caddr_t) unvplock, sizeof(unvplock));
+	return (0);
+}
+
+static int
+union_list_lock(ix)
+	int ix;
+{
+
+	if (unvplock[ix] & UN_LOCKED) {
+		unvplock[ix] |= UN_WANT;
+		(void) tsleep((caddr_t) &unvplock[ix], PINOD, "unllck", 0);
+		return (1);
+	}
+
+	unvplock[ix] |= UN_LOCKED;
+
+	return (0);
+}
+
+static void
+union_list_unlock(ix)
+	int ix;
+{
+
+	unvplock[ix] &= ~UN_LOCKED;
+
+	if (unvplock[ix] & UN_WANT) {
+		unvplock[ix] &= ~UN_WANT;
+		wakeup((caddr_t) &unvplock[ix]);
+	}
+}
+
+static void
+union_updatevp(un, uppervp, lowervp)
+	struct union_node *un;
+	struct vnode *uppervp;
+	struct vnode *lowervp;
+{
+	int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp);
+	int nhash = UNION_HASH(uppervp, lowervp);
+	int docache = (lowervp != NULLVP || uppervp != NULLVP);
+	int lhash, uhash;
+
+	/*
+	 * Ensure locking is ordered from lower to higher
+	 * to avoid deadlocks.
+	 */
+	if (nhash < ohash) {
+		lhash = nhash;
+		uhash = ohash;
+	} else {
+		lhash = ohash;
+		uhash = nhash;
+	}
+
+	if (lhash != uhash)
+		while (union_list_lock(lhash))
+			continue;
+
+	while (union_list_lock(uhash))
+		continue;
+
+	if (ohash != nhash || !docache) {
+		if (un->un_flags & UN_CACHED) {
+			un->un_flags &= ~UN_CACHED;
+			LIST_REMOVE(un, un_cache);
+		}
+	}
+
+	if (ohash != nhash)
+		union_list_unlock(ohash);
+
+	if (un->un_lowervp != lowervp) {
+		if (un->un_lowervp) {
+			vrele(un->un_lowervp);
+			if (un->un_path) {
+				free(un->un_path, M_TEMP);
+				un->un_path = 0;
+			}
+			if (un->un_dirvp) {
+				vrele(un->un_dirvp);
+				un->un_dirvp = NULLVP;
+			}
+		}
+		un->un_lowervp = lowervp;
+		un->un_lowersz = VNOVAL;
+	}
+
+	if (un->un_uppervp != uppervp) {
+		if (un->un_uppervp)
+			vrele(un->un_uppervp);
+
+		un->un_uppervp = uppervp;
+		un->un_uppersz = VNOVAL;
+	}
+
+	if (docache && (ohash != nhash)) {
+		LIST_INSERT_HEAD(&unhead[nhash], un, un_cache);
+		un->un_flags |= UN_CACHED;
+	}
+
+	union_list_unlock(nhash);
+}
+
+static void
+union_newlower(un, lowervp)
+	struct union_node *un;
+	struct vnode *lowervp;
+{
+
+	union_updatevp(un, un->un_uppervp, lowervp);
+}
+
+static void
+union_newupper(un, uppervp)
+	struct union_node *un;
+	struct vnode *uppervp;
+{
+
+	union_updatevp(un, uppervp, un->un_lowervp);
+}
+
+/*
+ * Keep track of size changes in the underlying vnodes.
+ * If the size changes, then callback to the vm layer
+ * giving priority to the upper layer size.
+ */
+void
+union_newsize(vp, uppersz, lowersz)
+	struct vnode *vp;
+	off_t uppersz, lowersz;
+{
+	struct union_node *un;
+	off_t sz;
+
+	/* only interested in regular files */
+	if (vp->v_type != VREG)
+		return;
+
+	un = VTOUNION(vp);
+	sz = VNOVAL;
+
+	if ((uppersz != VNOVAL) && (un->un_uppersz != uppersz)) {
+		un->un_uppersz = uppersz;
+		if (sz == VNOVAL)
+			sz = un->un_uppersz;
+	}
+
+	if ((lowersz != VNOVAL) && (un->un_lowersz != lowersz)) {
+		un->un_lowersz = lowersz;
+		if (sz == VNOVAL)
+			sz = un->un_lowersz;
+	}
+
+	if (sz != VNOVAL) {
+#ifdef UNION_DIAGNOSTIC
+		printf("union: %s size now %ld\n",
+			uppersz != VNOVAL ? "upper" : "lower", (long) sz);
+#endif
+		vnode_pager_setsize(vp, sz);
+	}
+}
+
+/*
+ * allocate a union_node/vnode pair.  the vnode is
+ * referenced and locked.  the new vnode is returned
+ * via (vpp).  (mp) is the mountpoint of the union filesystem,
+ * (dvp) is the parent directory where the upper layer object
+ * should exist (but doesn't) and (cnp) is the componentname
+ * information which is partially copied to allow the upper
+ * layer object to be created at a later time.  (uppervp)
+ * and (lowervp) reference the upper and lower layer objects
+ * being mapped.  either, but not both, can be nil.
+ * if supplied, (uppervp) is locked.
+ * the reference is either maintained in the new union_node
+ * object which is allocated, or they are vrele'd.
+ *
+ * all union_nodes are maintained on a singly-linked
+ * list.  new nodes are only allocated when they cannot
+ * be found on this list.  entries on the list are
+ * removed when the vfs reclaim entry is called.
+ *
+ * a single lock is kept for the entire list.  this is
+ * needed because the getnewvnode() function can block
+ * waiting for a vnode to become free, in which case there
+ * may be more than one process trying to get the same
+ * vnode.  this lock is only taken if we are going to
+ * call getnewvnode, since the kernel itself is single-threaded.
+ *
+ * if an entry is found on the list, then call vget() to
+ * take a reference.  this is done because there may be
+ * zero references to it and so it needs to removed from
+ * the vnode free list.
+ */
+int
+union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache)
+	struct vnode **vpp;
+	struct mount *mp;
+	struct vnode *undvp;		/* parent union vnode */
+	struct vnode *dvp;		/* may be null */
+	struct componentname *cnp;	/* may be null */
+	struct vnode *uppervp;		/* may be null */
+	struct vnode *lowervp;		/* may be null */
+	int docache;
+{
+	int error;
+	struct union_node *un = 0;
+	struct vnode *xlowervp = NULLVP;
+	struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
+	int hash = 0;
+	int vflag;
+	int try;
+
+	if (uppervp == NULLVP && lowervp == NULLVP)
+		panic("union: unidentifiable allocation");
+
+	if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) {
+		xlowervp = lowervp;
+		lowervp = NULLVP;
+	}
+
+	/* detect the root vnode (and aliases) */
+	vflag = 0;
+	if ((uppervp == um->um_uppervp) &&
+	    ((lowervp == NULLVP) || lowervp == um->um_lowervp)) {
+		if (lowervp == NULLVP) {
+			lowervp = um->um_lowervp;
+			if (lowervp != NULLVP)
+				VREF(lowervp);
+		}
+		vflag = VROOT;
+	}
+
+loop:
+	if (!docache) {
+		un = 0;
+	} else for (try = 0; try < 3; try++) {
+		switch (try) {
+		case 0:
+			if (lowervp == NULLVP)
+				continue;
+			hash = UNION_HASH(uppervp, lowervp);
+			break;
+
+		case 1:
+			if (uppervp == NULLVP)
+				continue;
+			hash = UNION_HASH(uppervp, NULLVP);
+			break;
+
+		case 2:
+			if (lowervp == NULLVP)
+				continue;
+			hash = UNION_HASH(NULLVP, lowervp);
+			break;
+		}
+
+		while (union_list_lock(hash))
+			continue;
+
+		for (un = unhead[hash].lh_first; un != 0;
+					un = un->un_cache.le_next) {
+			if ((un->un_lowervp == lowervp ||
+			     un->un_lowervp == NULLVP) &&
+			    (un->un_uppervp == uppervp ||
+			     un->un_uppervp == NULLVP) &&
+			    (UNIONTOV(un)->v_mount == mp)) {
+				if (vget(UNIONTOV(un), 0,
+				    cnp ? cnp->cn_proc : NULL)) {
+					union_list_unlock(hash);
+					goto loop;
+				}
+				break;
+			}
+		}
+
+		union_list_unlock(hash);
+
+		if (un)
+			break;
+	}
+
+	if (un) {
+		/*
+		 * Obtain a lock on the union_node.
+		 * uppervp is locked, though un->un_uppervp
+		 * may not be.  this doesn't break the locking
+		 * hierarchy since in the case that un->un_uppervp
+		 * is not yet locked it will be vrele'd and replaced
+		 * with uppervp.
+		 */
+
+		if ((dvp != NULLVP) && (uppervp == dvp)) {
+			/*
+			 * Access ``.'', so (un) will already
+			 * be locked.  Since this process has
+			 * the lock on (uppervp) no other
+			 * process can hold the lock on (un).
+			 */
+#ifdef DIAGNOSTIC
+			if ((un->un_flags & UN_LOCKED) == 0)
+				panic("union: . not locked");
+			else if (curproc && un->un_pid != curproc->p_pid &&
+				    un->un_pid > -1 && curproc->p_pid > -1)
+				panic("union: allocvp not lock owner");
+#endif
+		} else {
+			if (un->un_flags & UN_LOCKED) {
+				vrele(UNIONTOV(un));
+				un->un_flags |= UN_WANT;
+				(void) tsleep((caddr_t) &un->un_flags, PINOD, "unalvp", 0);
+				goto loop;
+			}
+			un->un_flags |= UN_LOCKED;
+
+#ifdef DIAGNOSTIC
+			if (curproc)
+				un->un_pid = curproc->p_pid;
+			else
+				un->un_pid = -1;
+#endif
+		}
+
+		/*
+		 * At this point, the union_node is locked,
+		 * un->un_uppervp may not be locked, and uppervp
+		 * is locked or nil.
+		 */
+
+		/*
+		 * Save information about the upper layer.
+		 */
+		if (uppervp != un->un_uppervp) {
+			union_newupper(un, uppervp);
+		} else if (uppervp) {
+			vrele(uppervp);
+		}
+
+		if (un->un_uppervp) {
+			un->un_flags |= UN_ULOCK;
+			un->un_flags &= ~UN_KLOCK;
+		}
+
+		/*
+		 * Save information about the lower layer.
+		 * This needs to keep track of pathname
+		 * and directory information which union_vn_create
+		 * might need.
+		 */
+		if (lowervp != un->un_lowervp) {
+			union_newlower(un, lowervp);
+			if (cnp && (lowervp != NULLVP)) {
+				un->un_hash = cnp->cn_hash;
+				un->un_path = malloc(cnp->cn_namelen+1,
+						M_TEMP, M_WAITOK);
+				bcopy(cnp->cn_nameptr, un->un_path,
+						cnp->cn_namelen);
+				un->un_path[cnp->cn_namelen] = '\0';
+				VREF(dvp);
+				un->un_dirvp = dvp;
+			}
+		} else if (lowervp) {
+			vrele(lowervp);
+		}
+		*vpp = UNIONTOV(un);
+		return (0);
+	}
+
+	if (docache) {
+		/*
+		 * otherwise lock the vp list while we call getnewvnode
+		 * since that can block.
+		 */ 
+		hash = UNION_HASH(uppervp, lowervp);
+
+		if (union_list_lock(hash))
+			goto loop;
+	}
+
+	error = getnewvnode(VT_UNION, mp, union_vnodeop_p, vpp);
+	if (error) {
+		if (uppervp) {
+			if (dvp == uppervp)
+				vrele(uppervp);
+			else
+				vput(uppervp);
+		}
+		if (lowervp)
+			vrele(lowervp);
+
+		goto out;
+	}
+
+	MALLOC((*vpp)->v_data, void *, sizeof(struct union_node),
+		M_TEMP, M_WAITOK);
+
+	(*vpp)->v_flag |= vflag;
+	if (uppervp)
+		(*vpp)->v_type = uppervp->v_type;
+	else
+		(*vpp)->v_type = lowervp->v_type;
+	un = VTOUNION(*vpp);
+	un->un_vnode = *vpp;
+	un->un_uppervp = uppervp;
+	un->un_uppersz = VNOVAL;
+	un->un_lowervp = lowervp;
+	un->un_lowersz = VNOVAL;
+	un->un_pvp = undvp;
+	if (undvp != NULLVP)
+		VREF(undvp);
+	un->un_dircache = 0;
+	un->un_openl = 0;
+	un->un_flags = UN_LOCKED;
+	if (un->un_uppervp)
+		un->un_flags |= UN_ULOCK;
+#ifdef DIAGNOSTIC
+	if (curproc)
+		un->un_pid = curproc->p_pid;
+	else
+		un->un_pid = -1;
+#endif
+	if (cnp && (lowervp != NULLVP)) {
+		un->un_hash = cnp->cn_hash;
+		un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK);
+		bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen);
+		un->un_path[cnp->cn_namelen] = '\0';
+		VREF(dvp);
+		un->un_dirvp = dvp;
+	} else {
+		un->un_hash = 0;
+		un->un_path = 0;
+		un->un_dirvp = 0;
+	}
+
+	if (docache) {
+		LIST_INSERT_HEAD(&unhead[hash], un, un_cache);
+		un->un_flags |= UN_CACHED;
+	}
+
+	if (xlowervp)
+		vrele(xlowervp);
+
+out:
+	if (docache)
+		union_list_unlock(hash);
+
+	return (error);
+}
+
+int
+union_freevp(vp)
+	struct vnode *vp;
+{
+	struct union_node *un = VTOUNION(vp);
+
+	if (un->un_flags & UN_CACHED) {
+		un->un_flags &= ~UN_CACHED;
+		LIST_REMOVE(un, un_cache);
+	}
+
+	if (un->un_pvp != NULLVP)
+		vrele(un->un_pvp);
+	if (un->un_uppervp != NULLVP)
+		vrele(un->un_uppervp);
+	if (un->un_lowervp != NULLVP)
+		vrele(un->un_lowervp);
+	if (un->un_dirvp != NULLVP)
+		vrele(un->un_dirvp);
+	if (un->un_path)
+		free(un->un_path, M_TEMP);
+
+	FREE(vp->v_data, M_TEMP);
+	vp->v_data = 0;
+
+	return (0);
+}
+
+/*
+ * copyfile.  copy the vnode (fvp) to the vnode (tvp)
+ * using a sequence of reads and writes.  both (fvp)
+ * and (tvp) are locked on entry and exit.
+ */
+static int
+union_copyfile(fvp, tvp, cred, p)
+	struct vnode *fvp;
+	struct vnode *tvp;
+	struct ucred *cred;
+	struct proc *p;
+{
+	char *buf;
+	struct uio uio;
+	struct iovec iov;
+	int error = 0;
+
+	/*
+	 * strategy:
+	 * allocate a buffer of size MAXBSIZE.
+	 * loop doing reads and writes, keeping track
+	 * of the current uio offset.
+	 * give up at the first sign of trouble.
+	 */
+
+	uio.uio_procp = p;
+	uio.uio_segflg = UIO_SYSSPACE;
+	uio.uio_offset = 0;
+
+	VOP_UNLOCK(fvp, 0, p);				/* XXX */
+	VOP_LEASE(fvp, p, cred, LEASE_READ);
+	vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p);	/* XXX */
+	VOP_UNLOCK(tvp, 0, p);				/* XXX */
+	VOP_LEASE(tvp, p, cred, LEASE_WRITE);
+	vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p);	/* XXX */
+
+	buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
+
+	/* ugly loop follows... */
+	do {
+		off_t offset = uio.uio_offset;
+
+		uio.uio_iov = &iov;
+		uio.uio_iovcnt = 1;
+		iov.iov_base = buf;
+		iov.iov_len = MAXBSIZE;
+		uio.uio_resid = iov.iov_len;
+		uio.uio_rw = UIO_READ;
+		error = VOP_READ(fvp, &uio, 0, cred);
+
+		if (error == 0) {
+			uio.uio_iov = &iov;
+			uio.uio_iovcnt = 1;
+			iov.iov_base = buf;
+			iov.iov_len = MAXBSIZE - uio.uio_resid;
+			uio.uio_offset = offset;
+			uio.uio_rw = UIO_WRITE;
+			uio.uio_resid = iov.iov_len;
+
+			if (uio.uio_resid == 0)
+				break;
+
+			do {
+				error = VOP_WRITE(tvp, &uio, 0, cred);
+			} while ((uio.uio_resid > 0) && (error == 0));
+		}
+
+	} while (error == 0);
+
+	free(buf, M_TEMP);
+	return (error);
+}
+
+/*
+ * (un) is assumed to be locked on entry and remains
+ * locked on exit.
+ */
+int
+union_copyup(un, docopy, cred, p)
+	struct union_node *un;
+	int docopy;
+	struct ucred *cred;
+	struct proc *p;
+{
+	int error;
+	struct vnode *lvp, *uvp;
+
+	/*
+	 * If the user does not have read permission, the vnode should not
+	 * be copied to upper layer.
+	 */
+	vn_lock(un->un_lowervp, LK_EXCLUSIVE | LK_RETRY, p);
+	error = VOP_ACCESS(un->un_lowervp, VREAD, cred, p);
+	VOP_UNLOCK(un->un_lowervp, 0, p);
+	if (error)
+		return (error);
+
+	error = union_vn_create(&uvp, un, p);
+	if (error)
+		return (error);
+
+	/* at this point, uppervp is locked */
+	union_newupper(un, uvp);
+	un->un_flags |= UN_ULOCK;
+
+	lvp = un->un_lowervp;
+
+	if (docopy) {
+		/*
+		 * XX - should not ignore errors
+		 * from VOP_CLOSE
+		 */
+		vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, p);
+		error = VOP_OPEN(lvp, FREAD, cred, p);
+		if (error == 0) {
+			error = union_copyfile(lvp, uvp, cred, p);
+			VOP_UNLOCK(lvp, 0, p);
+			(void) VOP_CLOSE(lvp, FREAD, cred, p);
+		}
+#ifdef UNION_DIAGNOSTIC
+		if (error == 0)
+			uprintf("union: copied up %s\n", un->un_path);
+#endif
+
+	}
+	un->un_flags &= ~UN_ULOCK;
+	VOP_UNLOCK(uvp, 0, p);
+	union_vn_close(uvp, FWRITE, cred, p);
+	vn_lock(uvp, LK_EXCLUSIVE | LK_RETRY, p);
+	un->un_flags |= UN_ULOCK;
+
+	/*
+	 * Subsequent IOs will go to the top layer, so
+	 * call close on the lower vnode and open on the
+	 * upper vnode to ensure that the filesystem keeps
+	 * its references counts right.  This doesn't do
+	 * the right thing with (cred) and (FREAD) though.
+	 * Ignoring error returns is not right, either.
+	 */
+	if (error == 0) {
+		int i;
+
+		for (i = 0; i < un->un_openl; i++) {
+			(void) VOP_CLOSE(lvp, FREAD, cred, p);
+			(void) VOP_OPEN(uvp, FREAD, cred, p);
+		}
+		un->un_openl = 0;
+	}
+
+	return (error);
+
+}
+
+static int
+union_relookup(um, dvp, vpp, cnp, cn, path, pathlen)
+	struct union_mount *um;
+	struct vnode *dvp;
+	struct vnode **vpp;
+	struct componentname *cnp;
+	struct componentname *cn;
+	char *path;
+	int pathlen;
+{
+	int error;
+
+	/*
+	 * A new componentname structure must be faked up because
+	 * there is no way to know where the upper level cnp came
+	 * from or what it is being used for.  This must duplicate
+	 * some of the work done by NDINIT, some of the work done
+	 * by namei, some of the work done by lookup and some of
+	 * the work done by VOP_LOOKUP when given a CREATE flag.
+	 * Conclusion: Horrible.
+	 *
+	 * The pathname buffer will be FREEed by VOP_MKDIR.
+	 */
+	cn->cn_namelen = pathlen;
+	cn->cn_pnbuf = zalloc(namei_zone);
+	bcopy(path, cn->cn_pnbuf, cn->cn_namelen);
+	cn->cn_pnbuf[cn->cn_namelen] = '\0';
+
+	cn->cn_nameiop = CREATE;
+	cn->cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN);
+	cn->cn_proc = cnp->cn_proc;
+	if (um->um_op == UNMNT_ABOVE)
+		cn->cn_cred = cnp->cn_cred;
+	else
+		cn->cn_cred = um->um_cred;
+	cn->cn_nameptr = cn->cn_pnbuf;
+	cn->cn_hash = cnp->cn_hash;
+	cn->cn_consume = cnp->cn_consume;
+
+	VREF(dvp);
+	error = relookup(dvp, vpp, cn);
+	if (!error)
+		vrele(dvp);
+	else {
+		zfree(namei_zone, cn->cn_pnbuf);
+		cn->cn_pnbuf = NULL;
+	}
+
+	return (error);
+}
+
+/*
+ * Create a shadow directory in the upper layer.
+ * The new vnode is returned locked.
+ *
+ * (um) points to the union mount structure for access to the
+ * the mounting process's credentials.
+ * (dvp) is the directory in which to create the shadow directory.
+ * it is unlocked on entry and exit.
+ * (cnp) is the componentname to be created.
+ * (vpp) is the returned newly created shadow directory, which
+ * is returned locked.
+ */
+int
+union_mkshadow(um, dvp, cnp, vpp)
+	struct union_mount *um;
+	struct vnode *dvp;
+	struct componentname *cnp;
+	struct vnode **vpp;
+{
+	int error;
+	struct vattr va;
+	struct proc *p = cnp->cn_proc;
+	struct componentname cn;
+
+	error = union_relookup(um, dvp, vpp, cnp, &cn,
+			cnp->cn_nameptr, cnp->cn_namelen);
+	if (error)
+		return (error);
+
+	if (*vpp) {
+		VOP_ABORTOP(dvp, &cn);
+		VOP_UNLOCK(dvp, 0, p);
+		vrele(*vpp);
+		*vpp = NULLVP;
+		return (EEXIST);
+	}
+
+	/*
+	 * policy: when creating the shadow directory in the
+	 * upper layer, create it owned by the user who did
+	 * the mount, group from parent directory, and mode
+	 * 777 modified by umask (ie mostly identical to the
+	 * mkdir syscall).  (jsp, kb)
+	 */
+
+	VATTR_NULL(&va);
+	va.va_type = VDIR;
+	va.va_mode = um->um_cmode;
+
+	/* VOP_LEASE: dvp is locked */
+	VOP_LEASE(dvp, p, cn.cn_cred, LEASE_WRITE);
+
+	error = VOP_MKDIR(dvp, vpp, &cn, &va);
+	vput(dvp);
+	return (error);
+}
+
+/*
+ * Create a whiteout entry in the upper layer.
+ *
+ * (um) points to the union mount structure for access to the
+ * the mounting process's credentials.
+ * (dvp) is the directory in which to create the whiteout.
+ * it is locked on entry and exit.
+ * (cnp) is the componentname to be created.
+ */
+int
+union_mkwhiteout(um, dvp, cnp, path)
+	struct union_mount *um;
+	struct vnode *dvp;
+	struct componentname *cnp;
+	char *path;
+{
+	int error;
+	struct proc *p = cnp->cn_proc;
+	struct vnode *wvp;
+	struct componentname cn;
+
+	VOP_UNLOCK(dvp, 0, p);
+	error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path));
+	if (error) {
+		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p);
+		return (error);
+	}
+
+	if (wvp) {
+		VOP_ABORTOP(dvp, &cn);
+		vrele(dvp);
+		vrele(wvp);
+		return (EEXIST);
+	}
+
+	/* VOP_LEASE: dvp is locked */
+	VOP_LEASE(dvp, p, p->p_ucred, LEASE_WRITE);
+
+	error = VOP_WHITEOUT(dvp, &cn, CREATE);
+	if (error)
+		VOP_ABORTOP(dvp, &cn);
+
+	vrele(dvp);
+
+	return (error);
+}
+
+/*
+ * union_vn_create: creates and opens a new shadow file
+ * on the upper union layer.  this function is similar
+ * in spirit to calling vn_open but it avoids calling namei().
+ * the problem with calling namei is that a) it locks too many
+ * things, and b) it doesn't start at the "right" directory,
+ * whereas relookup is told where to start.
+ */
+static int
+union_vn_create(vpp, un, p)
+	struct vnode **vpp;
+	struct union_node *un;
+	struct proc *p;
+{
+	struct vnode *vp;
+	struct ucred *cred = p->p_ucred;
+	struct vattr vat;
+	struct vattr *vap = &vat;
+	int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL);
+	int error;
+	int cmode = UN_FILEMODE & ~p->p_fd->fd_cmask;
+	struct componentname cn;
+
+	*vpp = NULLVP;
+
+	/*
+	 * Build a new componentname structure (for the same
+	 * reasons outlines in union_mkshadow).
+	 * The difference here is that the file is owned by
+	 * the current user, rather than by the person who
+	 * did the mount, since the current user needs to be
+	 * able to write the file (that's why it is being
+	 * copied in the first place).
+	 */
+	cn.cn_namelen = strlen(un->un_path);
+	cn.cn_pnbuf = zalloc(namei_zone);
+	bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1);
+	cn.cn_nameiop = CREATE;
+	cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN);
+	cn.cn_proc = p;
+	cn.cn_cred = p->p_ucred;
+	cn.cn_nameptr = cn.cn_pnbuf;
+	cn.cn_hash = un->un_hash;
+	cn.cn_consume = 0;
+
+	VREF(un->un_dirvp);
+	error = relookup(un->un_dirvp, &vp, &cn);
+	if (error)
+		return (error);
+	vrele(un->un_dirvp);
+
+	if (vp) {
+		VOP_ABORTOP(un->un_dirvp, &cn);
+		if (un->un_dirvp == vp)
+			vrele(un->un_dirvp);
+		else
+			vput(un->un_dirvp);
+		vrele(vp);
+		return (EEXIST);
+	}
+
+	/*
+	 * Good - there was no race to create the file
+	 * so go ahead and create it.  The permissions
+	 * on the file will be 0666 modified by the
+	 * current user's umask.  Access to the file, while
+	 * it is unioned, will require access to the top *and*
+	 * bottom files.  Access when not unioned will simply
+	 * require access to the top-level file.
+	 * TODO: confirm choice of access permissions.
+	 */
+	VATTR_NULL(vap);
+	vap->va_type = VREG;
+	vap->va_mode = cmode;
+	VOP_LEASE(un->un_dirvp, p, cred, LEASE_WRITE);
+	error = VOP_CREATE(un->un_dirvp, &vp, &cn, vap);
+	vput(un->un_dirvp);
+	if (error)
+		return (error);
+
+	error = VOP_OPEN(vp, fmode, cred, p);
+	if (error) {
+		vput(vp);
+		return (error);
+	}
+
+	vp->v_writecount++;
+	*vpp = vp;
+	return (0);
+}
+
+static int
+union_vn_close(vp, fmode, cred, p)
+	struct vnode *vp;
+	int fmode;
+	struct ucred *cred;
+	struct proc *p;
+{
+
+	if (fmode & FWRITE)
+		--vp->v_writecount;
+	return (VOP_CLOSE(vp, fmode, cred, p));
+}
+
+void
+union_removed_upper(un)
+	struct union_node *un;
+{
+	struct proc *p = curproc;	/* XXX */
+	struct vnode **vpp;
+
+	/*
+	 * Do not set the uppervp to NULLVP.  If lowervp is NULLVP,
+	 * union node will have neither uppervp nor lowervp.  We remove
+	 * the union node from cache, so that it will not be referrenced.
+	 */
+#if 0
+	union_newupper(un, NULLVP);
+#endif
+	if (un->un_dircache != 0) {
+		for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
+			vrele(*vpp);
+		free(un->un_dircache, M_TEMP);
+		un->un_dircache = 0;
+	}
+
+	if (un->un_flags & UN_CACHED) {
+		un->un_flags &= ~UN_CACHED;
+		LIST_REMOVE(un, un_cache);
+	}
+
+	if (un->un_flags & UN_ULOCK) {
+		un->un_flags &= ~UN_ULOCK;
+		VOP_UNLOCK(un->un_uppervp, 0, p);
+	}
+}
+
+#if 0
+struct vnode *
+union_lowervp(vp)
+	struct vnode *vp;
+{
+	struct union_node *un = VTOUNION(vp);
+
+	if ((un->un_lowervp != NULLVP) &&
+	    (vp->v_type == un->un_lowervp->v_type)) {
+		if (vget(un->un_lowervp, 0) == 0)
+			return (un->un_lowervp);
+	}
+
+	return (NULLVP);
+}
+#endif
+
+/*
+ * determine whether a whiteout is needed
+ * during a remove/rmdir operation.
+ */
+int
+union_dowhiteout(un, cred, p)
+	struct union_node *un;
+	struct ucred *cred;
+	struct proc *p;
+{
+	struct vattr va;
+
+	if (un->un_lowervp != NULLVP)
+		return (1);
+
+	if (VOP_GETATTR(un->un_uppervp, &va, cred, p) == 0 &&
+	    (va.va_flags & OPAQUE))
+		return (1);
+
+	return (0);
+}
+
+static void
+union_dircache_r(vp, vppp, cntp)
+	struct vnode *vp;
+	struct vnode ***vppp;
+	int *cntp;
+{
+	struct union_node *un;
+
+	if (vp->v_op != union_vnodeop_p) {
+		if (vppp) {
+			VREF(vp);
+			*(*vppp)++ = vp;
+			if (--(*cntp) == 0)
+				panic("union: dircache table too small");
+		} else {
+			(*cntp)++;
+		}
+
+		return;
+	}
+
+	un = VTOUNION(vp);
+	if (un->un_uppervp != NULLVP)
+		union_dircache_r(un->un_uppervp, vppp, cntp);
+	if (un->un_lowervp != NULLVP)
+		union_dircache_r(un->un_lowervp, vppp, cntp);
+}
+
+struct vnode *
+union_dircache(vp, p)
+	struct vnode *vp;
+	struct proc *p;
+{
+	int cnt;
+	struct vnode *nvp;
+	struct vnode **vpp;
+	struct vnode **dircache;
+	struct union_node *un;
+	int error;
+
+	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+	dircache = VTOUNION(vp)->un_dircache;
+
+	nvp = NULLVP;
+
+	if (dircache == 0) {
+		cnt = 0;
+		union_dircache_r(vp, 0, &cnt);
+		cnt++;
+		dircache = (struct vnode **)
+				malloc(cnt * sizeof(struct vnode *),
+					M_TEMP, M_WAITOK);
+		vpp = dircache;
+		union_dircache_r(vp, &vpp, &cnt);
+		*vpp = NULLVP;
+		vpp = dircache + 1;
+	} else {
+		vpp = dircache;
+		do {
+			if (*vpp++ == VTOUNION(vp)->un_uppervp)
+				break;
+		} while (*vpp != NULLVP);
+	}
+
+	if (*vpp == NULLVP)
+		goto out;
+
+	vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p);
+	VREF(*vpp);
+	error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, 0, *vpp, NULLVP, 0);
+	if (error)
+		goto out;
+
+	VTOUNION(vp)->un_dircache = 0;
+	un = VTOUNION(nvp);
+	un->un_dircache = dircache;
+
+out:
+	VOP_UNLOCK(vp, 0, p);
+	return (nvp);
+}
+
+/*
+ * Module glue to remove #ifdef UNION from vfs_syscalls.c
+ */
+static int
+union_dircheck(struct proc *p, struct vnode **vp, struct file *fp)
+{
+	int error = 0;
+
+	if ((*vp)->v_op == union_vnodeop_p) {
+		struct vnode *lvp;
+
+		lvp = union_dircache(*vp, p);
+		if (lvp != NULLVP) {
+			struct vattr va;
+
+			/*
+			 * If the directory is opaque,
+			 * then don't show lower entries
+			 */
+			error = VOP_GETATTR(*vp, &va, fp->f_cred, p);
+			if (va.va_flags & OPAQUE) {
+				vput(lvp);
+				lvp = NULL;
+			}
+		}
+
+		if (lvp != NULLVP) {
+			error = VOP_OPEN(lvp, FREAD, fp->f_cred, p);
+			if (error) {
+				vput(lvp);
+				return (error);
+			}
+			VOP_UNLOCK(lvp, 0, p);
+			fp->f_data = (caddr_t) lvp;
+			fp->f_offset = 0;
+			error = vn_close(*vp, FREAD, fp->f_cred, p);
+			if (error)
+				return (error);
+			*vp = lvp;
+			return -1;	/* goto unionread */
+		}
+	}
+	if (((*vp)->v_flag & VROOT) && ((*vp)->v_mount->mnt_flag & MNT_UNION)) {
+		struct vnode *tvp = *vp;
+		*vp = (*vp)->v_mount->mnt_vnodecovered;
+		VREF(*vp);
+		fp->f_data = (caddr_t) *vp;
+		fp->f_offset = 0;
+		vrele(tvp);
+		return -1;	/* goto unionread */
+	}
+	return error;
+}
+
+static int
+union_modevent(module_t mod, int type, void *data)
+{
+	switch (type) {
+	case MOD_LOAD:
+		union_dircheckp = union_dircheck;
+		break;
+	case MOD_UNLOAD:
+		union_dircheckp = NULL;
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+static moduledata_t union_mod = {
+	"union_dircheck",
+	union_modevent,
+	NULL
+};
+DECLARE_MODULE(union_dircheck, union_mod, SI_SUB_VFS, SI_ORDER_ANY);
diff --git a/sys/fs/unionfs/union_vfsops.c b/sys/fs/unionfs/union_vfsops.c
new file mode 100644
index 0000000..db4d4d3
--- /dev/null
+++ b/sys/fs/unionfs/union_vfsops.c
@@ -0,0 +1,555 @@
+/*
+ * Copyright (c) 1994, 1995 The Regents of the University of California.
+ * Copyright (c) 1994, 1995 Jan-Simon Pendry.
+ * All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)union_vfsops.c	8.20 (Berkeley) 5/20/95
+ * $Id: union_vfsops.c,v 1.30 1998/09/07 13:17:02 bde Exp $
+ */
+
+/*
+ * Union Layer
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/filedesc.h>
+#include <miscfs/union/union.h>
+
+static MALLOC_DEFINE(M_UNIONFSMNT, "UNION mount", "UNION mount structure");
+
+extern int	union_init __P((struct vfsconf *));
+
+extern int	union_fhtovp __P((struct mount *mp, struct fid *fidp,
+				  struct mbuf *nam, struct vnode **vpp,
+				  int *exflagsp, struct ucred **credanonp));
+static int	union_mount __P((struct mount *mp, char *path, caddr_t data,
+				 struct nameidata *ndp, struct proc *p));
+extern int	union_quotactl __P((struct mount *mp, int cmd, uid_t uid,
+				    caddr_t arg, struct proc *p));
+static int	union_root __P((struct mount *mp, struct vnode **vpp));
+static int	union_start __P((struct mount *mp, int flags, struct proc *p));
+static int	union_statfs __P((struct mount *mp, struct statfs *sbp,
+				  struct proc *p));
+extern int	union_sync __P((struct mount *mp, int waitfor,
+				struct ucred *cred, struct proc *p));
+static int	union_unmount __P((struct mount *mp, int mntflags,
+				   struct proc *p));
+extern int	union_vget __P((struct mount *mp, ino_t ino,
+				struct vnode **vpp));
+extern int	union_vptofh __P((struct vnode *vp, struct fid *fhp));
+
+/*
+ * Mount union filesystem
+ */
+static int
+union_mount(mp, path, data, ndp, p)
+	struct mount *mp;
+	char *path;
+	caddr_t data;
+	struct nameidata *ndp;
+	struct proc *p;
+{
+	int error = 0;
+	struct union_args args;
+	struct vnode *lowerrootvp = NULLVP;
+	struct vnode *upperrootvp = NULLVP;
+	struct union_mount *um = 0;
+	struct ucred *cred = 0;
+	char *cp = 0;
+	int len;
+	u_int size;
+
+#ifdef UNION_DIAGNOSTIC
+	printf("union_mount(mp = %x)\n", mp);
+#endif
+
+	/*
+	 * Disable clustered write, otherwise system becomes unstable.
+	 */
+	mp->mnt_flag |= MNT_NOCLUSTERW;
+
+	/*
+	 * Update is a no-op
+	 */
+	if (mp->mnt_flag & MNT_UPDATE) {
+		/*
+		 * Need to provide.
+		 * 1. a way to convert between rdonly and rdwr mounts.
+		 * 2. support for nfs exports.
+		 */
+		error = EOPNOTSUPP;
+		goto bad;
+	}
+
+	/*
+	 * Get argument
+	 */
+	error = copyin(data, (caddr_t)&args, sizeof(struct union_args));
+	if (error)
+		goto bad;
+
+	lowerrootvp = mp->mnt_vnodecovered;
+	VREF(lowerrootvp);
+
+	/*
+	 * Unlock lower node to avoid deadlock.
+	 */
+	if (lowerrootvp->v_op == union_vnodeop_p)
+		VOP_UNLOCK(lowerrootvp, 0, p);
+
+	/*
+	 * Find upper node.
+	 */
+	NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT,
+	       UIO_USERSPACE, args.target, p);
+
+	error = namei(ndp);
+	if (lowerrootvp->v_op == union_vnodeop_p)
+		vn_lock(lowerrootvp, LK_EXCLUSIVE | LK_RETRY, p);
+	if (error)
+		goto bad;
+
+	upperrootvp = ndp->ni_vp;
+	vrele(ndp->ni_dvp);
+	ndp->ni_dvp = NULL;
+
+	/*
+	 * Check multi union mount to avoid `lock myself again' panic.
+	 */
+	if (upperrootvp == VTOUNION(lowerrootvp)->un_uppervp) {
+#ifdef DIAGNOSTIC
+		printf("union_mount: multi union mount?\n");
+#endif
+		error = EDEADLK;
+		goto bad;
+	}
+
+	if (upperrootvp->v_type != VDIR) {
+		error = EINVAL;
+		goto bad;
+	}
+
+	um = (struct union_mount *) malloc(sizeof(struct union_mount),
+				M_UNIONFSMNT, M_WAITOK);	/* XXX */
+
+	/*
+	 * Keep a held reference to the target vnodes.
+	 * They are vrele'd in union_unmount.
+	 *
+	 * Depending on the _BELOW flag, the filesystems are
+	 * viewed in a different order.  In effect, this is the
+	 * same as providing a mount under option to the mount syscall.
+	 */
+
+	um->um_op = args.mntflags & UNMNT_OPMASK;
+	switch (um->um_op) {
+	case UNMNT_ABOVE:
+		um->um_lowervp = lowerrootvp;
+		um->um_uppervp = upperrootvp;
+		break;
+
+	case UNMNT_BELOW:
+		um->um_lowervp = upperrootvp;
+		um->um_uppervp = lowerrootvp;
+		break;
+
+	case UNMNT_REPLACE:
+		vrele(lowerrootvp);
+		lowerrootvp = NULLVP;
+		um->um_uppervp = upperrootvp;
+		um->um_lowervp = lowerrootvp;
+		break;
+
+	default:
+		error = EINVAL;
+		goto bad;
+	}
+
+	/*
+	 * Unless the mount is readonly, ensure that the top layer
+	 * supports whiteout operations
+	 */
+	if ((mp->mnt_flag & MNT_RDONLY) == 0) {
+		error = VOP_WHITEOUT(um->um_uppervp, (struct componentname *) 0, LOOKUP);
+		if (error)
+			goto bad;
+	}
+
+	um->um_cred = p->p_ucred;
+	crhold(um->um_cred);
+	um->um_cmode = UN_DIRMODE &~ p->p_fd->fd_cmask;
+
+	/*
+	 * Depending on what you think the MNT_LOCAL flag might mean,
+	 * you may want the && to be || on the conditional below.
+	 * At the moment it has been defined that the filesystem is
+	 * only local if it is all local, ie the MNT_LOCAL flag implies
+	 * that the entire namespace is local.  If you think the MNT_LOCAL
+	 * flag implies that some of the files might be stored locally
+	 * then you will want to change the conditional.
+	 */
+	if (um->um_op == UNMNT_ABOVE) {
+		if (((um->um_lowervp == NULLVP) ||
+		     (um->um_lowervp->v_mount->mnt_flag & MNT_LOCAL)) &&
+		    (um->um_uppervp->v_mount->mnt_flag & MNT_LOCAL))
+			mp->mnt_flag |= MNT_LOCAL;
+	}
+
+	/*
+	 * Copy in the upper layer's RDONLY flag.  This is for the benefit
+	 * of lookup() which explicitly checks the flag, rather than asking
+	 * the filesystem for its own opinion.  This means, that an update
+	 * mount of the underlying filesystem to go from rdonly to rdwr
+	 * will leave the unioned view as read-only.
+	 */
+	mp->mnt_flag |= (um->um_uppervp->v_mount->mnt_flag & MNT_RDONLY);
+
+	mp->mnt_data = (qaddr_t) um;
+	vfs_getnewfsid(mp);
+
+	(void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+	bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+
+	switch (um->um_op) {
+	case UNMNT_ABOVE:
+		cp = "<above>:";
+		break;
+	case UNMNT_BELOW:
+		cp = "<below>:";
+		break;
+	case UNMNT_REPLACE:
+		cp = "";
+		break;
+	}
+	len = strlen(cp);
+	bcopy(cp, mp->mnt_stat.f_mntfromname, len);
+
+	cp = mp->mnt_stat.f_mntfromname + len;
+	len = MNAMELEN - len;
+
+	(void) copyinstr(args.target, cp, len - 1, &size);
+	bzero(cp + size, len - size);
+
+	(void)union_statfs(mp, &mp->mnt_stat, p);
+
+#ifdef UNION_DIAGNOSTIC
+	printf("union_mount: from %s, on %s\n",
+		mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
+#endif
+	return (0);
+
+bad:
+	if (um)
+		free(um, M_UNIONFSMNT);
+	if (cred)
+		crfree(cred);
+	if (upperrootvp)
+		vrele(upperrootvp);
+	if (lowerrootvp)
+		vrele(lowerrootvp);
+	return (error);
+}
+
+/*
+ * VFS start.  Nothing needed here - the start routine
+ * on the underlying filesystem(s) will have been called
+ * when that filesystem was mounted.
+ */
+static int
+union_start(mp, flags, p)
+	struct mount *mp;
+	int flags;
+	struct proc *p;
+{
+
+	return (0);
+}
+
+/*
+ * Free reference to union layer
+ */
+static int
+union_unmount(mp, mntflags, p)
+	struct mount *mp;
+	int mntflags;
+	struct proc *p;
+{
+	struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
+	struct vnode *um_rootvp;
+	int error;
+	int freeing;
+	int flags = 0;
+
+#ifdef UNION_DIAGNOSTIC
+	printf("union_unmount(mp = %x)\n", mp);
+#endif
+
+	if (mntflags & MNT_FORCE)
+		flags |= FORCECLOSE;
+
+	if ((error = union_root(mp, &um_rootvp)) != 0)
+		return (error);
+
+	/*
+	 * Keep flushing vnodes from the mount list.
+	 * This is needed because of the un_pvp held
+	 * reference to the parent vnode.
+	 * If more vnodes have been freed on a given pass,
+	 * the try again.  The loop will iterate at most
+	 * (d) times, where (d) is the maximum tree depth
+	 * in the filesystem.
+	 */
+	for (freeing = 0; vflush(mp, um_rootvp, flags) != 0;) {
+		struct vnode *vp;
+		int n;
+
+		/* count #vnodes held on mount list */
+		for (n = 0, vp = mp->mnt_vnodelist.lh_first;
+				vp != NULLVP;
+				vp = vp->v_mntvnodes.le_next)
+			n++;
+
+		/* if this is unchanged then stop */
+		if (n == freeing)
+			break;
+
+		/* otherwise try once more time */
+		freeing = n;
+	}
+
+	/* At this point the root vnode should have a single reference */
+	if (um_rootvp->v_usecount > 1) {
+		vput(um_rootvp);
+		return (EBUSY);
+	}
+
+#ifdef UNION_DIAGNOSTIC
+	vprint("union root", um_rootvp);
+#endif	 
+	/*
+	 * Discard references to upper and lower target vnodes.
+	 */
+	if (um->um_lowervp)
+		vrele(um->um_lowervp);
+	vrele(um->um_uppervp);
+	crfree(um->um_cred);
+	/*
+	 * Release reference on underlying root vnode
+	 */
+	vput(um_rootvp);
+	/*
+	 * And blow it away for future re-use
+	 */
+	vgone(um_rootvp);
+	/*
+	 * Finally, throw away the union_mount structure
+	 */
+	free(mp->mnt_data, M_UNIONFSMNT);	/* XXX */
+	mp->mnt_data = 0;
+	return (0);
+}
+
+static int
+union_root(mp, vpp)
+	struct mount *mp;
+	struct vnode **vpp;
+{
+	struct proc *p = curproc;	/* XXX */
+	struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
+	int error;
+	int loselock;
+	int lockadj = 0;
+
+	if (um->um_lowervp && um->um_op != UNMNT_BELOW &&
+		VOP_ISLOCKED(um->um_lowervp)) {
+		VREF(um->um_lowervp);
+		VOP_UNLOCK(um->um_lowervp, 0, p);
+		lockadj = 1;
+	}
+
+	/*
+	 * Return locked reference to root.
+	 */
+	VREF(um->um_uppervp);
+	if ((um->um_op == UNMNT_BELOW) &&
+	    VOP_ISLOCKED(um->um_uppervp)) {
+		loselock = 1;
+	} else {
+		vn_lock(um->um_uppervp, LK_EXCLUSIVE | LK_RETRY, p);
+		loselock = 0;
+	}
+	if (um->um_lowervp)
+		VREF(um->um_lowervp);
+	error = union_allocvp(vpp, mp,
+			      (struct vnode *) 0,
+			      (struct vnode *) 0,
+			      (struct componentname *) 0,
+			      um->um_uppervp,
+			      um->um_lowervp,
+			      1);
+
+	if (error) {
+		if (loselock)
+			vrele(um->um_uppervp);
+		else
+			vput(um->um_uppervp);
+		if (um->um_lowervp)
+			vrele(um->um_lowervp);
+	} else {
+		if (loselock)
+			VTOUNION(*vpp)->un_flags &= ~UN_ULOCK;
+	}
+	if (lockadj) {
+		vn_lock(um->um_lowervp, LK_EXCLUSIVE | LK_RETRY, p);
+		vrele(um->um_lowervp);
+	}
+
+	return (error);
+}
+
+static int
+union_statfs(mp, sbp, p)
+	struct mount *mp;
+	struct statfs *sbp;
+	struct proc *p;
+{
+	int error;
+	struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
+	struct statfs mstat;
+	int lbsize;
+
+#ifdef UNION_DIAGNOSTIC
+	printf("union_statfs(mp = %x, lvp = %x, uvp = %x)\n", mp,
+			um->um_lowervp,
+	       		um->um_uppervp);
+#endif
+
+	bzero(&mstat, sizeof(mstat));
+
+	if (um->um_lowervp) {
+		error = VFS_STATFS(um->um_lowervp->v_mount, &mstat, p);
+		if (error)
+			return (error);
+	}
+
+	/* now copy across the "interesting" information and fake the rest */
+#if 0
+	sbp->f_type = mstat.f_type;
+	sbp->f_flags = mstat.f_flags;
+	sbp->f_bsize = mstat.f_bsize;
+	sbp->f_iosize = mstat.f_iosize;
+#endif
+	lbsize = mstat.f_bsize;
+	sbp->f_blocks = mstat.f_blocks;
+	sbp->f_bfree = mstat.f_bfree;
+	sbp->f_bavail = mstat.f_bavail;
+	sbp->f_files = mstat.f_files;
+	sbp->f_ffree = mstat.f_ffree;
+
+	error = VFS_STATFS(um->um_uppervp->v_mount, &mstat, p);
+	if (error)
+		return (error);
+
+	sbp->f_flags = mstat.f_flags;
+	sbp->f_bsize = mstat.f_bsize;
+	sbp->f_iosize = mstat.f_iosize;
+
+	/*
+	 * if the lower and upper blocksizes differ, then frig the
+	 * block counts so that the sizes reported by df make some
+	 * kind of sense.  none of this makes sense though.
+	 */
+
+	if (mstat.f_bsize != lbsize)
+		sbp->f_blocks = ((off_t) sbp->f_blocks * lbsize) / mstat.f_bsize;
+
+	/*
+	 * The "total" fields count total resources in all layers,
+	 * the "free" fields count only those resources which are
+	 * free in the upper layer (since only the upper layer
+	 * is writeable).
+	 */
+	sbp->f_blocks += mstat.f_blocks;
+	sbp->f_bfree = mstat.f_bfree;
+	sbp->f_bavail = mstat.f_bavail;
+	sbp->f_files += mstat.f_files;
+	sbp->f_ffree = mstat.f_ffree;
+
+	if (sbp != &mp->mnt_stat) {
+		sbp->f_type = mp->mnt_vfc->vfc_typenum;
+		bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+		bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+		bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+	}
+	return (0);
+}
+
+/*
+ * XXX - Assumes no data cached at union layer.
+ */
+#define union_sync ((int (*) __P((struct mount *, int, struct ucred *, \
+	    struct proc *)))nullop)
+
+#define union_fhtovp ((int (*) __P((struct mount *, struct fid *, \
+	    struct sockaddr *, struct vnode **, int *, struct ucred **)))eopnotsupp)
+#define union_quotactl ((int (*) __P((struct mount *, int, uid_t, caddr_t, \
+	    struct proc *)))eopnotsupp)
+#define union_sysctl ((int (*) __P((int *, u_int, void *, size_t *, void *, \
+	    size_t, struct proc *)))eopnotsupp)
+#define union_vget ((int (*) __P((struct mount *, ino_t, struct vnode **))) \
+	    eopnotsupp)
+#define union_vptofh ((int (*) __P((struct vnode *, struct fid *)))eopnotsupp)
+
+static struct vfsops union_vfsops = {
+	union_mount,
+	union_start,
+	union_unmount,
+	union_root,
+	union_quotactl,
+	union_statfs,
+	union_sync,
+	union_vget,
+	union_fhtovp,
+	union_vptofh,
+	union_init,
+};
+
+VFS_SET(union_vfsops, union, VFCF_LOOPBACK);
diff --git a/sys/fs/unionfs/union_vnops.c b/sys/fs/unionfs/union_vnops.c
new file mode 100644
index 0000000..ba9b2a3
--- /dev/null
+++ b/sys/fs/unionfs/union_vnops.c
@@ -0,0 +1,1804 @@
+/*
+ * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry.
+ * Copyright (c) 1992, 1993, 1994, 1995
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	@(#)union_vnops.c	8.32 (Berkeley) 6/23/95
+ * $Id: union_vnops.c,v 1.59 1998/12/14 05:00:59 dillon Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/fcntl.h>
+#include <sys/stat.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/buf.h>
+#include <sys/lock.h>
+#include <miscfs/union/union.h>
+
+#define FIXUP(un, p) { \
+	if (((un)->un_flags & UN_ULOCK) == 0) { \
+		union_fixup(un, p); \
+	} \
+}
+
+static int	union_abortop __P((struct vop_abortop_args *ap));
+static int	union_access __P((struct vop_access_args *ap));
+static int	union_advlock __P((struct vop_advlock_args *ap));
+static int	union_bmap __P((struct vop_bmap_args *ap));
+static int	union_close __P((struct vop_close_args *ap));
+static int	union_create __P((struct vop_create_args *ap));
+static void	union_fixup __P((struct union_node *un, struct proc *p));
+static int	union_fsync __P((struct vop_fsync_args *ap));
+static int	union_getattr __P((struct vop_getattr_args *ap));
+static int	union_inactive __P((struct vop_inactive_args *ap));
+static int	union_ioctl __P((struct vop_ioctl_args *ap));
+static int	union_islocked __P((struct vop_islocked_args *ap));
+static int	union_lease __P((struct vop_lease_args *ap));
+static int	union_link __P((struct vop_link_args *ap));
+static int	union_lock __P((struct vop_lock_args *ap));
+static int	union_lookup __P((struct vop_lookup_args *ap));
+static int	union_lookup1 __P((struct vnode *udvp, struct vnode **dvpp,
+				   struct vnode **vpp,
+				   struct componentname *cnp));
+static int	union_mkdir __P((struct vop_mkdir_args *ap));
+static int	union_mknod __P((struct vop_mknod_args *ap));
+static int	union_mmap __P((struct vop_mmap_args *ap));
+static int	union_open __P((struct vop_open_args *ap));
+static int	union_pathconf __P((struct vop_pathconf_args *ap));
+static int	union_print __P((struct vop_print_args *ap));
+static int	union_read __P((struct vop_read_args *ap));
+static int	union_readdir __P((struct vop_readdir_args *ap));
+static int	union_readlink __P((struct vop_readlink_args *ap));
+static int	union_reclaim __P((struct vop_reclaim_args *ap));
+static int	union_remove __P((struct vop_remove_args *ap));
+static int	union_rename __P((struct vop_rename_args *ap));
+static int	union_revoke __P((struct vop_revoke_args *ap));
+static int	union_rmdir __P((struct vop_rmdir_args *ap));
+static int	union_poll __P((struct vop_poll_args *ap));
+static int	union_setattr __P((struct vop_setattr_args *ap));
+static int	union_strategy __P((struct vop_strategy_args *ap));
+static int	union_symlink __P((struct vop_symlink_args *ap));
+static int	union_unlock __P((struct vop_unlock_args *ap));
+static int	union_whiteout __P((struct vop_whiteout_args *ap));
+static int	union_write __P((struct vop_read_args *ap));
+
+static void
+union_fixup(un, p)
+	struct union_node *un;
+	struct proc *p;
+{
+
+	vn_lock(un->un_uppervp, LK_EXCLUSIVE | LK_RETRY, p);
+	un->un_flags |= UN_ULOCK;
+}
+
+static int
+union_lookup1(udvp, dvpp, vpp, cnp)
+	struct vnode *udvp;
+	struct vnode **dvpp;
+	struct vnode **vpp;
+	struct componentname *cnp;
+{
+	int error;
+	struct proc *p = cnp->cn_proc;
+	struct vnode *tdvp;
+	struct vnode *dvp;
+	struct mount *mp;
+
+	dvp = *dvpp;
+
+	/*
+	 * If stepping up the directory tree, check for going
+	 * back across the mount point, in which case do what
+	 * lookup would do by stepping back down the mount
+	 * hierarchy.
+	 */
+	if (cnp->cn_flags & ISDOTDOT) {
+		while ((dvp != udvp) && (dvp->v_flag & VROOT)) {
+			/*
+			 * Don't do the NOCROSSMOUNT check
+			 * at this level.  By definition,
+			 * union fs deals with namespaces, not
+			 * filesystems.
+			 */
+			tdvp = dvp;
+			*dvpp = dvp = dvp->v_mount->mnt_vnodecovered;
+			vput(tdvp);
+			VREF(dvp);
+			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p);
+		}
+	}
+
+        error = VOP_LOOKUP(dvp, &tdvp, cnp);
+	if (error)
+		return (error);
+
+	/*
+	 * The parent directory will have been unlocked, unless lookup
+	 * found the last component.  In which case, re-lock the node
+	 * here to allow it to be unlocked again (phew) in union_lookup.
+	 */
+	if (dvp != tdvp && !(cnp->cn_flags & ISLASTCN))
+		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p);
+
+	dvp = tdvp;
+
+	/*
+	 * Lastly check if the current node is a mount point in
+	 * which case walk up the mount hierarchy making sure not to
+	 * bump into the root of the mount tree (ie. dvp != udvp).
+	 */
+	while (dvp != udvp && (dvp->v_type == VDIR) &&
+	       (mp = dvp->v_mountedhere)) {
+
+		if (vfs_busy(mp, 0, 0, p))
+			continue;
+
+		error = VFS_ROOT(mp, &tdvp);
+		vfs_unbusy(mp, p);
+		if (error) {
+			vput(dvp);
+			return (error);
+		}
+
+		vput(dvp);
+		dvp = tdvp;
+	}
+
+	*vpp = dvp;
+	return (0);
+}
+
+static int
+union_lookup(ap)
+	struct vop_lookup_args /* {
+		struct vnodeop_desc *a_desc;
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	int error;
+	int uerror, lerror;
+	struct vnode *uppervp, *lowervp;
+	struct vnode *upperdvp, *lowerdvp;
+	struct vnode *dvp = ap->a_dvp;
+	struct union_node *dun = VTOUNION(dvp);
+	struct componentname *cnp = ap->a_cnp;
+	struct proc *p = cnp->cn_proc;
+	int lockparent = cnp->cn_flags & LOCKPARENT;
+	struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
+	struct ucred *saved_cred = NULL;
+	int iswhiteout;
+	struct vattr va;
+
+
+	/*
+	 * Disallow write attemps to the filesystem mounted read-only.
+	 */
+	if ((cnp->cn_flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
+		(cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
+		return (EROFS);
+
+#ifdef notyet
+	if (cnp->cn_namelen == 3 &&
+			cnp->cn_nameptr[2] == '.' &&
+			cnp->cn_nameptr[1] == '.' &&
+			cnp->cn_nameptr[0] == '.') {
+		dvp = *ap->a_vpp = LOWERVP(ap->a_dvp);
+		if (dvp == NULLVP)
+			return (ENOENT);
+		VREF(dvp);
+		vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p);
+		if (!lockparent || !(cnp->cn_flags & ISLASTCN))
+			VOP_UNLOCK(ap->a_dvp, 0, p);
+		return (0);
+	}
+#endif
+
+	cnp->cn_flags |= LOCKPARENT;
+
+	upperdvp = dun->un_uppervp;
+	lowerdvp = dun->un_lowervp;
+	uppervp = NULLVP;
+	lowervp = NULLVP;
+	iswhiteout = 0;
+
+	if (cnp->cn_flags & ISDOTDOT) {
+		if (upperdvp != NULL)
+			VREF(upperdvp);
+		if (lowerdvp != NULL)
+			VREF(lowerdvp);
+	}
+
+	/*
+	 * do the lookup in the upper level.
+	 * if that level comsumes additional pathnames,
+	 * then assume that something special is going
+	 * on and just return that vnode.
+	 */
+	if (upperdvp != NULLVP) {
+		FIXUP(dun, p);
+		/*
+		 * If we're doing `..' in the underlying filesystem,
+		 * we must drop our lock on the union node before
+		 * going up the tree in the lower file system--if we block
+		 * on the lowervp lock, and that's held by someone else
+		 * coming down the tree and who's waiting for our lock,
+		 * we would be hosed.
+		 */
+		if (cnp->cn_flags & ISDOTDOT) {
+			/* retain lock on underlying VP: */
+			dun->un_flags |= UN_KLOCK;
+			VOP_UNLOCK(dvp, 0, p);
+		}
+		uerror = union_lookup1(um->um_uppervp, &upperdvp,
+					&uppervp, cnp);
+		/*
+		 * Disallow write attemps to the filesystem mounted read-only.
+		 */
+		if (uerror == EJUSTRETURN && (cnp->cn_flags & ISLASTCN) &&
+			(dvp->v_mount->mnt_flag & MNT_RDONLY) &&
+			(cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME)) {
+			if (!lockparent)
+				cnp->cn_flags &= ~LOCKPARENT;
+			return (EROFS);
+		}
+			
+		if (cnp->cn_flags & ISDOTDOT) {
+			if (dun->un_uppervp == upperdvp) {
+				/*
+				 * We got the underlying bugger back locked...
+				 * now take back the union node lock.  Since we
+				 * hold the uppervp lock, we can diddle union
+				 * locking flags at will. :)
+				 */
+				dun->un_flags |= UN_ULOCK;
+			}
+			/*
+			 * If upperdvp got swapped out, it means we did
+			 * some mount point magic, and we do not have
+			 * dun->un_uppervp locked currently--so we get it
+			 * locked here (don't set the UN_ULOCK flag).
+			 */
+			vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p);
+		}
+
+		/*if (uppervp == upperdvp)
+			dun->un_flags |= UN_KLOCK;*/
+
+		if (cnp->cn_consume != 0) {
+			*ap->a_vpp = uppervp;
+			if (!lockparent)
+				cnp->cn_flags &= ~LOCKPARENT;
+			error = uerror;
+			goto out;
+		}
+		if (uerror == ENOENT || uerror == EJUSTRETURN) {
+			if (cnp->cn_flags & ISWHITEOUT) {
+				iswhiteout = 1;
+			} else if (lowerdvp != NULLVP) {
+				lerror = VOP_GETATTR(upperdvp, &va,
+					cnp->cn_cred, cnp->cn_proc);
+				if (lerror == 0 && (va.va_flags & OPAQUE))
+					iswhiteout = 1;
+			}
+		}
+	} else {
+		uerror = ENOENT;
+	}
+
+	/*
+	 * in a similar way to the upper layer, do the lookup
+	 * in the lower layer.   this time, if there is some
+	 * component magic going on, then vput whatever we got
+	 * back from the upper layer and return the lower vnode
+	 * instead.
+	 */
+	if (lowerdvp != NULLVP && !iswhiteout) {
+		int nameiop;
+
+		vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY, p);
+
+		/*
+		 * Only do a LOOKUP on the bottom node, since
+		 * we won't be making changes to it anyway.
+		 */
+		nameiop = cnp->cn_nameiop;
+		cnp->cn_nameiop = LOOKUP;
+		if (um->um_op == UNMNT_BELOW) {
+			saved_cred = cnp->cn_cred;
+			cnp->cn_cred = um->um_cred;
+		}
+		/*
+		 * We shouldn't have to worry about locking interactions
+		 * between the lower layer and our union layer (w.r.t.
+		 * `..' processing) because we don't futz with lowervp
+		 * locks in the union-node instantiation code path.
+		 */
+		lerror = union_lookup1(um->um_lowervp, &lowerdvp,
+				&lowervp, cnp);
+		if (um->um_op == UNMNT_BELOW)
+			cnp->cn_cred = saved_cred;
+		cnp->cn_nameiop = nameiop;
+
+		if (lowervp != lowerdvp)
+			VOP_UNLOCK(lowerdvp, 0, p);
+
+		if (cnp->cn_consume != 0 || lerror == EACCES) {
+			if (lerror == EACCES)
+				lowervp = NULLVP;
+			if (uppervp != NULLVP) {
+				if (uppervp == upperdvp)
+					vrele(uppervp);
+				else
+					vput(uppervp);
+				uppervp = NULLVP;
+			}
+			*ap->a_vpp = lowervp;
+			if (!lockparent)
+				cnp->cn_flags &= ~LOCKPARENT;
+			error = lerror;
+			goto out;
+		}
+	} else {
+		lerror = ENOENT;
+		if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) {
+			lowervp = LOWERVP(dun->un_pvp);
+			if (lowervp != NULLVP) {
+				VREF(lowervp);
+				vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY, p);
+				lerror = 0;
+			}
+		}
+	}
+
+	if (!lockparent)
+		cnp->cn_flags &= ~LOCKPARENT;
+
+	/*
+	 * at this point, we have uerror and lerror indicating
+	 * possible errors with the lookups in the upper and lower
+	 * layers.  additionally, uppervp and lowervp are (locked)
+	 * references to existing vnodes in the upper and lower layers.
+	 *
+	 * there are now three cases to consider.
+	 * 1. if both layers returned an error, then return whatever
+	 *    error the upper layer generated.
+	 *
+	 * 2. if the top layer failed and the bottom layer succeeded
+	 *    then two subcases occur.
+	 *    a.  the bottom vnode is not a directory, in which
+	 *	  case just return a new union vnode referencing
+	 *	  an empty top layer and the existing bottom layer.
+	 *    b.  the bottom vnode is a directory, in which case
+	 *	  create a new directory in the top-level and
+	 *	  continue as in case 3.
+	 *
+	 * 3. if the top layer succeeded then return a new union
+	 *    vnode referencing whatever the new top layer and
+	 *    whatever the bottom layer returned.
+	 */
+
+	*ap->a_vpp = NULLVP;
+
+	/* case 1. */
+	if ((uerror != 0) && (lerror != 0)) {
+		error = uerror;
+		goto out;
+	}
+
+	/* case 2. */
+	if (uerror != 0 /* && (lerror == 0) */ ) {
+		if (lowervp->v_type == VDIR) { /* case 2b. */
+			dun->un_flags &= ~UN_ULOCK;
+			VOP_UNLOCK(upperdvp, 0, p);
+			uerror = union_mkshadow(um, upperdvp, cnp, &uppervp);
+			vn_lock(upperdvp, LK_EXCLUSIVE | LK_RETRY, p);
+			dun->un_flags |= UN_ULOCK;
+
+			if (uerror) {
+				if (lowervp != NULLVP) {
+					vput(lowervp);
+					lowervp = NULLVP;
+				}
+				error = uerror;
+				goto out;
+			}
+		}
+	}
+
+	if (lowervp != NULLVP)
+		VOP_UNLOCK(lowervp, 0, p);
+
+	error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
+			      uppervp, lowervp, 1);
+
+	if (error) {
+		if (uppervp != NULLVP)
+			vput(uppervp);
+		if (lowervp != NULLVP)
+			vrele(lowervp);
+	} else {
+		if (*ap->a_vpp != dvp)
+			if (!lockparent || !(cnp->cn_flags & ISLASTCN))
+				VOP_UNLOCK(dvp, 0, p);
+#ifdef DIAGNOSTIC
+        if (cnp->cn_namelen == 1 &&
+            cnp->cn_nameptr[0] == '.' &&
+            *ap->a_vpp != dvp) {
+            panic("union_lookup returning . (%p) not same as startdir (%p)",
+				  ap->a_vpp, dvp);
+		}
+#endif
+	}
+
+out:
+    if (cnp->cn_flags & ISDOTDOT) {
+        if (upperdvp != NULL)
+            vrele(upperdvp);
+        if (lowerdvp != NULL)
+            vrele(lowerdvp);
+    }
+
+	return (error);
+}
+
+static int
+union_create(ap)
+	struct vop_create_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+	} */ *ap;
+{
+	struct union_node *dun = VTOUNION(ap->a_dvp);
+	struct vnode *dvp = dun->un_uppervp;
+	struct componentname *cnp = ap->a_cnp;
+	struct proc *p = cnp->cn_proc;
+
+	if (dvp != NULLVP) {
+		struct vnode *vp;
+		struct mount *mp;
+		int error;
+
+		FIXUP(dun, p);
+
+		dun->un_flags |= UN_KLOCK;
+		VOP_UNLOCK(ap->a_dvp, 0, p);
+		error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap);
+		if (error) {
+			dun->un_flags |= UN_ULOCK;
+			return (error);
+		}
+
+		mp = ap->a_dvp->v_mount;
+		VOP_UNLOCK(dvp, 0, p);
+		error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, cnp, vp,
+				NULLVP, 1);
+		if (error)
+			vput(vp);
+		vn_lock(ap->a_dvp, LK_EXCLUSIVE| LK_RETRY, p);
+		return (error);
+	}
+
+	return (EROFS);
+}
+
+static int
+union_whiteout(ap)
+	struct vop_whiteout_args /* {
+		struct vnode *a_dvp;
+		struct componentname *a_cnp;
+		int a_flags;
+	} */ *ap;
+{
+	struct union_node *un = VTOUNION(ap->a_dvp);
+	struct componentname *cnp = ap->a_cnp;
+	struct proc *p = cnp->cn_proc;
+
+	if (un->un_uppervp == NULLVP)
+		return (EOPNOTSUPP);
+
+	FIXUP(un, p);
+	return (VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags));
+}
+
+static int
+union_mknod(ap)
+	struct vop_mknod_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+	} */ *ap;
+{
+	struct union_node *dun = VTOUNION(ap->a_dvp);
+	struct vnode *dvp = dun->un_uppervp;
+	struct componentname *cnp = ap->a_cnp;
+	struct proc *p = cnp->cn_proc;
+
+	if (dvp != NULLVP) {
+		struct vnode *vp;
+		struct mount *mp;
+		int error;
+
+		FIXUP(dun, p);
+
+		dun->un_flags |= UN_KLOCK;
+		VOP_UNLOCK(ap->a_dvp, 0, p);
+		error = VOP_MKNOD(dvp, &vp, cnp, ap->a_vap);
+		if (error) {
+			dun->un_flags |= UN_ULOCK;
+			return (error);
+		}
+
+		if (vp != NULLVP) {
+			mp = ap->a_dvp->v_mount;
+			VOP_UNLOCK(dvp, 0, p);
+			error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP,
+					cnp, vp, NULLVP, 1);
+			if (error)
+				vput(vp);
+			vn_lock(ap->a_dvp, LK_EXCLUSIVE| LK_RETRY, p);
+		} else {
+			dun->un_flags |= UN_ULOCK;
+		}
+		return (error);
+	}
+
+	return (EROFS);
+}
+
+static int
+union_open(ap)
+	struct vop_open_args /* {
+		struct vnodeop_desc *a_desc;
+		struct vnode *a_vp;
+		int a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct union_node *un = VTOUNION(ap->a_vp);
+	struct vnode *tvp;
+	int mode = ap->a_mode;
+	struct ucred *cred = ap->a_cred;
+	struct proc *p = ap->a_p;
+	int error;
+
+	/*
+	 * If there is an existing upper vp then simply open that.
+	 */
+	tvp = un->un_uppervp;
+	if (tvp == NULLVP) {
+		/*
+		 * If the lower vnode is being opened for writing, then
+		 * copy the file contents to the upper vnode and open that,
+		 * otherwise can simply open the lower vnode.
+		 */
+		tvp = un->un_lowervp;
+		if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
+			error = union_copyup(un, (mode&O_TRUNC) == 0, cred, p);
+			if (error == 0)
+				error = VOP_OPEN(un->un_uppervp, mode, cred, p);
+			return (error);
+		}
+
+		/*
+		 * Just open the lower vnode
+		 */
+		un->un_openl++;
+		vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p);
+		error = VOP_OPEN(tvp, mode, cred, p);
+		VOP_UNLOCK(tvp, 0, p);
+
+		return (error);
+	}
+
+	FIXUP(un, p);
+
+	error = VOP_OPEN(tvp, mode, cred, p);
+
+	return (error);
+}
+
+static int
+union_close(ap)
+	struct vop_close_args /* {
+		struct vnode *a_vp;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct union_node *un = VTOUNION(ap->a_vp);
+	struct vnode *vp;
+
+	if ((vp = un->un_uppervp) == NULLVP) {
+#ifdef UNION_DIAGNOSTIC
+		if (un->un_openl <= 0)
+			panic("union: un_openl cnt");
+#endif
+		--un->un_openl;
+		vp = un->un_lowervp;
+	}
+
+	ap->a_vp = vp;
+	return (VCALL(vp, VOFFSET(vop_close), ap));
+}
+
+/*
+ * Check access permission on the union vnode.
+ * The access check being enforced is to check
+ * against both the underlying vnode, and any
+ * copied vnode.  This ensures that no additional
+ * file permissions are given away simply because
+ * the user caused an implicit file copy.
+ */
+static int
+union_access(ap)
+	struct vop_access_args /* {
+		struct vnodeop_desc *a_desc;
+		struct vnode *a_vp;
+		int a_mode;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct union_node *un = VTOUNION(ap->a_vp);
+	struct proc *p = ap->a_p;
+	int error = EACCES;
+	struct vnode *vp;
+	struct vnode *savedvp;
+
+	/*
+	 * Disallow write attempts on filesystems mounted read-only.
+	 */
+	if (ap->a_mode & VWRITE && (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)) {
+		switch (ap->a_vp->v_type) {
+		case VREG: 
+		case VDIR:
+		case VLNK:
+			return (EROFS);
+		default:
+			break;
+		}
+	}
+	if ((vp = un->un_uppervp) != NULLVP) {
+		FIXUP(un, p);
+		ap->a_vp = vp;
+		return (VCALL(vp, VOFFSET(vop_access), ap));
+	}
+
+	if ((vp = un->un_lowervp) != NULLVP) {
+		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+		savedvp = ap->a_vp;
+		ap->a_vp = vp;
+		error = VCALL(vp, VOFFSET(vop_access), ap);
+		if (error == 0) {
+			struct union_mount *um = MOUNTTOUNIONMOUNT(savedvp->v_mount);
+
+			if (um->um_op == UNMNT_BELOW) {
+				ap->a_cred = um->um_cred;
+				error = VCALL(vp, VOFFSET(vop_access), ap);
+			}
+		}
+		VOP_UNLOCK(vp, 0, p);
+		if (error)
+			return (error);
+	}
+
+	return (error);
+}
+
+/*
+ * We handle getattr only to change the fsid and
+ * track object sizes
+ */
+static int
+union_getattr(ap)
+	struct vop_getattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	int error;
+	struct union_node *un = VTOUNION(ap->a_vp);
+	struct vnode *vp = un->un_uppervp;
+	struct proc *p = ap->a_p;
+	struct vattr *vap;
+	struct vattr va;
+
+
+	/*
+	 * Some programs walk the filesystem hierarchy by counting
+	 * links to directories to avoid stat'ing all the time.
+	 * This means the link count on directories needs to be "correct".
+	 * The only way to do that is to call getattr on both layers
+	 * and fix up the link count.  The link count will not necessarily
+	 * be accurate but will be large enough to defeat the tree walkers.
+	 */
+
+	vap = ap->a_vap;
+
+	vp = un->un_uppervp;
+	if (vp != NULLVP) {
+		/*
+		 * It's not clear whether VOP_GETATTR is to be
+		 * called with the vnode locked or not.  stat() calls
+		 * it with (vp) locked, and fstat calls it with
+		 * (vp) unlocked.
+		 * In the mean time, compensate here by checking
+		 * the union_node's lock flag.
+		 */
+		if (un->un_flags & UN_LOCKED)
+			FIXUP(un, p);
+
+		error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p);
+		if (error)
+			return (error);
+		union_newsize(ap->a_vp, vap->va_size, VNOVAL);
+	}
+
+	if (vp == NULLVP) {
+		vp = un->un_lowervp;
+	} else if (vp->v_type == VDIR && un->un_lowervp != NULLVP) {
+		vp = un->un_lowervp;
+		vap = &va;
+	} else {
+		vp = NULLVP;
+	}
+
+	if (vp != NULLVP) {
+		error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p);
+		if (error)
+			return (error);
+		union_newsize(ap->a_vp, VNOVAL, vap->va_size);
+	}
+
+	if ((vap != ap->a_vap) && (vap->va_type == VDIR))
+		ap->a_vap->va_nlink += vap->va_nlink;
+
+	ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
+	return (0);
+}
+
+static int
+union_setattr(ap)
+	struct vop_setattr_args /* {
+		struct vnode *a_vp;
+		struct vattr *a_vap;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct union_node *un = VTOUNION(ap->a_vp);
+	struct proc *p = ap->a_p;
+	struct vattr *vap = ap->a_vap;
+	int error;
+
+	/*
+	 * Disallow write attempts on filesystems mounted read-only.
+	 */
+	if ((ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) &&
+		(vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
+		 vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
+		 vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL))
+		return (EROFS);
+
+	/*
+	 * Handle case of truncating lower object to zero size,
+	 * by creating a zero length upper object.  This is to
+	 * handle the case of open with O_TRUNC and O_CREAT.
+	 */
+	if ((un->un_uppervp == NULLVP) &&
+	    /* assert(un->un_lowervp != NULLVP) */
+	    (un->un_lowervp->v_type == VREG)) {
+		error = union_copyup(un, (ap->a_vap->va_size != 0),
+						ap->a_cred, ap->a_p);
+		if (error)
+			return (error);
+	}
+
+	/*
+	 * Try to set attributes in upper layer,
+	 * otherwise return read-only filesystem error.
+	 */
+	if (un->un_uppervp != NULLVP) {
+		FIXUP(un, p);
+		error = VOP_SETATTR(un->un_uppervp, ap->a_vap,
+					ap->a_cred, ap->a_p);
+		if ((error == 0) && (ap->a_vap->va_size != VNOVAL))
+			union_newsize(ap->a_vp, ap->a_vap->va_size, VNOVAL);
+	} else {
+		error = EROFS;
+	}
+
+	return (error);
+}
+
+static int
+union_read(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	int error;
+	struct proc *p = ap->a_uio->uio_procp;
+	struct vnode *vp = OTHERVP(ap->a_vp);
+	int dolock = (vp == LOWERVP(ap->a_vp));
+
+	if (dolock)
+		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+	else
+		FIXUP(VTOUNION(ap->a_vp), p);
+	error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
+	if (dolock)
+		VOP_UNLOCK(vp, 0, p);
+
+	/*
+	 * XXX
+	 * perhaps the size of the underlying object has changed under
+	 * our feet.  take advantage of the offset information present
+	 * in the uio structure.
+	 */
+	if (error == 0) {
+		struct union_node *un = VTOUNION(ap->a_vp);
+		off_t cur = ap->a_uio->uio_offset;
+
+		if (vp == un->un_uppervp) {
+			if (cur > un->un_uppersz)
+				union_newsize(ap->a_vp, cur, VNOVAL);
+		} else {
+			if (cur > un->un_lowersz)
+				union_newsize(ap->a_vp, VNOVAL, cur);
+		}
+	}
+
+	return (error);
+}
+
+static int
+union_write(ap)
+	struct vop_read_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		int  a_ioflag;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	int error;
+	struct vnode *vp;
+	struct union_node *un = VTOUNION(ap->a_vp);
+	struct proc *p = ap->a_uio->uio_procp;
+
+	vp = UPPERVP(ap->a_vp);
+	if (vp == NULLVP)
+		panic("union: missing upper layer in write");
+
+	FIXUP(un, p);
+	error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
+
+	/*
+	 * the size of the underlying object may be changed by the
+	 * write.
+	 */
+	if (error == 0) {
+		off_t cur = ap->a_uio->uio_offset;
+
+		if (cur > un->un_uppersz)
+			union_newsize(ap->a_vp, cur, VNOVAL);
+	}
+
+	return (error);
+}
+
+static int
+union_lease(ap)
+	struct vop_lease_args /* {
+		struct vnode *a_vp;
+		struct proc *a_p;
+		struct ucred *a_cred;
+		int a_flag;
+	} */ *ap;
+{
+	register struct vnode *ovp = OTHERVP(ap->a_vp);
+
+	ap->a_vp = ovp;
+	return (VCALL(ovp, VOFFSET(vop_lease), ap));
+}
+
+static int
+union_ioctl(ap)
+	struct vop_ioctl_args /* {
+		struct vnode *a_vp;
+		int  a_command;
+		caddr_t  a_data;
+		int  a_fflag;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *ovp = OTHERVP(ap->a_vp);
+
+	ap->a_vp = ovp;
+	return (VCALL(ovp, VOFFSET(vop_ioctl), ap));
+}
+
+static int
+union_poll(ap)
+	struct vop_poll_args /* {
+		struct vnode *a_vp;
+		int  a_events;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *ovp = OTHERVP(ap->a_vp);
+
+	ap->a_vp = ovp;
+	return (VCALL(ovp, VOFFSET(vop_poll), ap));
+}
+
+static int
+union_revoke(ap)
+	struct vop_revoke_args /* {
+		struct vnode *a_vp;
+		int a_flags;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+
+	if (UPPERVP(vp))
+		VOP_REVOKE(UPPERVP(vp), ap->a_flags);
+	if (LOWERVP(vp))
+		VOP_REVOKE(LOWERVP(vp), ap->a_flags);
+	vgone(vp);
+	return (0);
+}
+
+static int
+union_mmap(ap)
+	struct vop_mmap_args /* {
+		struct vnode *a_vp;
+		int  a_fflags;
+		struct ucred *a_cred;
+		struct proc *a_p;
+	} */ *ap;
+{
+	register struct vnode *ovp = OTHERVP(ap->a_vp);
+
+	ap->a_vp = ovp;
+	return (VCALL(ovp, VOFFSET(vop_mmap), ap));
+}
+
+static int
+union_fsync(ap)
+	struct vop_fsync_args /* {
+		struct vnode *a_vp;
+		struct ucred *a_cred;
+		int  a_waitfor;
+		struct proc *a_p;
+	} */ *ap;
+{
+	int error = 0;
+	struct proc *p = ap->a_p;
+	struct vnode *targetvp = OTHERVP(ap->a_vp);
+	struct union_node *un;
+
+	if (targetvp != NULLVP) {
+		int dolock = (targetvp == LOWERVP(ap->a_vp));
+
+		un = VTOUNION(ap->a_vp);
+		if (dolock)
+			vn_lock(targetvp, LK_EXCLUSIVE | LK_RETRY, p);
+		else  {
+			un = VTOUNION(ap->a_vp);
+			if ((un->un_flags & UN_ULOCK) == 0 &&
+			    targetvp->v_data != NULL &&
+			    ((struct lock *)targetvp->v_data)->lk_lockholder
+			        == curproc->p_pid &&
+			    VOP_ISLOCKED(targetvp) != 0)
+				return 0;   /* XXX */
+
+			FIXUP(un, p);
+		}
+
+		error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_waitfor, p);
+		if (dolock)
+			VOP_UNLOCK(targetvp, 0, p);
+	}
+
+	return (error);
+}
+
+static int
+union_remove(ap)
+	struct vop_remove_args /* {
+		struct vnode *a_dvp;
+		struct vnode *a_vp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	struct union_node *dun = VTOUNION(ap->a_dvp);
+	struct union_node *un = VTOUNION(ap->a_vp);
+	struct componentname *cnp = ap->a_cnp;
+	struct proc *p = cnp->cn_proc;
+	int error;
+
+	if (dun->un_uppervp == NULLVP)
+		panic("union remove: null upper vnode");
+
+	if (un->un_uppervp != NULLVP) {
+		struct vnode *dvp = dun->un_uppervp;
+		struct vnode *vp = un->un_uppervp;
+
+		FIXUP(dun, p);
+		dun->un_flags |= UN_KLOCK;
+		VOP_UNLOCK(ap->a_dvp, 0, p);
+		FIXUP(un, p);
+		un->un_flags |= UN_KLOCK;
+		VOP_UNLOCK(ap->a_vp, 0, p);
+
+		if (union_dowhiteout(un, cnp->cn_cred, p))
+			cnp->cn_flags |= DOWHITEOUT;
+		error = VOP_REMOVE(dvp, vp, cnp);
+#if 0
+		/* XXX */
+		if (!error)
+			union_removed_upper(un);
+#endif
+		dun->un_flags |= UN_ULOCK;
+		un->un_flags |= UN_ULOCK;
+	} else {
+		FIXUP(dun, p);
+		error = union_mkwhiteout(
+			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
+			dun->un_uppervp, ap->a_cnp, un->un_path);
+	}
+
+	return (error);
+}
+
+static int
+union_link(ap)
+	struct vop_link_args /* {
+		struct vnode *a_tdvp;
+		struct vnode *a_vp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	struct componentname *cnp = ap->a_cnp;
+	struct proc *p = cnp->cn_proc;
+	struct union_node *dun = VTOUNION(ap->a_tdvp);
+	struct vnode *vp;
+	struct vnode *tdvp;
+	int error = 0;
+
+
+	if (ap->a_tdvp->v_op != ap->a_vp->v_op) {
+		vp = ap->a_vp;
+	} else {
+		struct union_node *tun = VTOUNION(ap->a_vp);
+		if (tun->un_uppervp == NULLVP) {
+			vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY, p);
+			if (dun->un_uppervp == tun->un_dirvp) {
+				dun->un_flags &= ~UN_ULOCK;
+				VOP_UNLOCK(dun->un_uppervp, 0, p);
+			}
+			error = union_copyup(tun, 1, cnp->cn_cred, p);
+			if (dun->un_uppervp == tun->un_dirvp) {
+				vn_lock(dun->un_uppervp,
+						LK_EXCLUSIVE | LK_RETRY, p);
+				dun->un_flags |= UN_ULOCK;
+			}
+			VOP_UNLOCK(ap->a_vp, 0, p);
+		}
+		vp = tun->un_uppervp;
+	}
+
+	tdvp = dun->un_uppervp;
+	if (tdvp == NULLVP)
+		error = EROFS;
+
+	if (error)
+		return (error);
+
+	FIXUP(dun, p);
+	dun->un_flags |= UN_KLOCK;
+	VOP_UNLOCK(ap->a_tdvp, 0, p);
+
+	error = VOP_LINK(tdvp, vp, cnp);
+
+	dun->un_flags |= UN_ULOCK;
+
+	return (error);
+}
+
+static int
+union_rename(ap)
+	struct vop_rename_args  /* {
+		struct vnode *a_fdvp;
+		struct vnode *a_fvp;
+		struct componentname *a_fcnp;
+		struct vnode *a_tdvp;
+		struct vnode *a_tvp;
+		struct componentname *a_tcnp;
+	} */ *ap;
+{
+	int error;
+
+	struct vnode *fdvp = ap->a_fdvp;
+	struct vnode *fvp = ap->a_fvp;
+	struct vnode *tdvp = ap->a_tdvp;
+	struct vnode *tvp = ap->a_tvp;
+
+	if (fdvp->v_op == union_vnodeop_p) {	/* always true */
+		struct union_node *un = VTOUNION(fdvp);
+		if (un->un_uppervp == NULLVP) {
+			/*
+			 * this should never happen in normal
+			 * operation but might if there was
+			 * a problem creating the top-level shadow
+			 * directory.
+			 */
+			error = EXDEV;
+			goto bad;
+		}
+
+		fdvp = un->un_uppervp;
+		VREF(fdvp);
+		vrele(ap->a_fdvp);
+	}
+
+	if (fvp->v_op == union_vnodeop_p) {	/* always true */
+		struct union_node *un = VTOUNION(fvp);
+		if (un->un_uppervp == NULLVP) {
+			/* XXX: should do a copyup */
+			error = EXDEV;
+			goto bad;
+		}
+
+		if (un->un_lowervp != NULLVP)
+			ap->a_fcnp->cn_flags |= DOWHITEOUT;
+
+		fvp = un->un_uppervp;
+		VREF(fvp);
+		vrele(ap->a_fvp);
+	}
+
+	if (tdvp->v_op == union_vnodeop_p) {
+		struct union_node *un = VTOUNION(tdvp);
+		if (un->un_uppervp == NULLVP) {
+			/*
+			 * this should never happen in normal
+			 * operation but might if there was
+			 * a problem creating the top-level shadow
+			 * directory.
+			 */
+			error = EXDEV;
+			goto bad;
+		}
+
+		tdvp = un->un_uppervp;
+		VREF(tdvp);
+		un->un_flags |= UN_KLOCK;
+		vput(ap->a_tdvp);
+	}
+
+	if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) {
+		struct union_node *un = VTOUNION(tvp);
+
+		tvp = un->un_uppervp;
+		if (tvp != NULLVP) {
+			VREF(tvp);
+			un->un_flags |= UN_KLOCK;
+		}
+		vput(ap->a_tvp);
+	}
+
+	return (VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp));
+
+bad:
+	vrele(fdvp);
+	vrele(fvp);
+	vput(tdvp);
+	if (tvp != NULLVP)
+		vput(tvp);
+
+	return (error);
+}
+
+static int
+union_mkdir(ap)
+	struct vop_mkdir_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+	} */ *ap;
+{
+	struct union_node *dun = VTOUNION(ap->a_dvp);
+	struct vnode *dvp = dun->un_uppervp;
+	struct componentname *cnp = ap->a_cnp;
+	struct proc *p = cnp->cn_proc;
+
+	if (dvp != NULLVP) {
+		struct vnode *vp;
+		int error;
+
+		FIXUP(dun, p);
+		dun->un_flags |= UN_KLOCK;
+		VOP_UNLOCK(ap->a_dvp, 0, p);
+		error = VOP_MKDIR(dvp, &vp, cnp, ap->a_vap);
+		if (error) {
+			dun->un_flags |= UN_ULOCK;
+			return (error);
+		}
+
+		VOP_UNLOCK(dvp, 0, p);
+		error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, ap->a_dvp,
+				NULLVP, cnp, vp, NULLVP, 1);
+		if (error)
+			vput(vp);
+		vn_lock(ap->a_dvp, LK_EXCLUSIVE| LK_RETRY, p);
+
+		return (error);
+	}
+
+	return (EROFS);
+}
+
+static int
+union_rmdir(ap)
+	struct vop_rmdir_args /* {
+		struct vnode *a_dvp;
+		struct vnode *a_vp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	struct union_node *dun = VTOUNION(ap->a_dvp);
+	struct union_node *un = VTOUNION(ap->a_vp);
+	struct componentname *cnp = ap->a_cnp;
+	struct proc *p = cnp->cn_proc;
+	int error;
+
+	if (dun->un_uppervp == NULLVP)
+		panic("union rmdir: null upper vnode");
+
+	if (un->un_uppervp != NULLVP) {
+		struct vnode *dvp = dun->un_uppervp;
+		struct vnode *vp = un->un_uppervp;
+
+		FIXUP(dun, p);
+		dun->un_flags |= UN_KLOCK;
+		VOP_UNLOCK(ap->a_dvp, 0, p);
+		FIXUP(un, p);
+		un->un_flags |= UN_KLOCK;
+		VOP_UNLOCK(ap->a_vp, 0, p);
+
+		if (union_dowhiteout(un, cnp->cn_cred, p))
+			cnp->cn_flags |= DOWHITEOUT;
+		error = VOP_RMDIR(dvp, vp, ap->a_cnp);
+#if 0
+		/* XXX */
+		if (!error)
+			union_removed_upper(un);
+#endif
+		dun->un_flags |= UN_ULOCK;
+		un->un_flags |= UN_ULOCK;
+	} else {
+		FIXUP(dun, p);
+		error = union_mkwhiteout(
+			MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
+			dun->un_uppervp, ap->a_cnp, un->un_path);
+	}
+
+	return (error);
+}
+
+static int
+union_symlink(ap)
+	struct vop_symlink_args /* {
+		struct vnode *a_dvp;
+		struct vnode **a_vpp;
+		struct componentname *a_cnp;
+		struct vattr *a_vap;
+		char *a_target;
+	} */ *ap;
+{
+	struct union_node *dun = VTOUNION(ap->a_dvp);
+	struct vnode *dvp = dun->un_uppervp;
+	struct componentname *cnp = ap->a_cnp;
+	struct proc *p = cnp->cn_proc;
+
+	if (dvp != NULLVP) {
+		struct vnode *vp;
+		int error;
+
+		FIXUP(dun, p);
+		dun->un_flags |= UN_KLOCK;
+		VOP_UNLOCK(ap->a_dvp, 0, p);
+		error = VOP_SYMLINK(dvp, &vp, cnp, ap->a_vap, ap->a_target);
+		dun->un_flags |= UN_ULOCK;
+		*ap->a_vpp = NULLVP;
+		return (error);
+	}
+
+	return (EROFS);
+}
+
+/*
+ * union_readdir works in concert with getdirentries and
+ * readdir(3) to provide a list of entries in the unioned
+ * directories.  getdirentries is responsible for walking
+ * down the union stack.  readdir(3) is responsible for
+ * eliminating duplicate names from the returned data stream.
+ */
+static int
+union_readdir(ap)
+	struct vop_readdir_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+		int *a_eofflag;
+		u_long *a_cookies;
+		int a_ncookies;
+	} */ *ap;
+{
+	struct union_node *un = VTOUNION(ap->a_vp);
+	struct vnode *uvp = un->un_uppervp;
+	struct proc *p = ap->a_uio->uio_procp;
+
+	if (uvp == NULLVP)
+		return (0);
+
+	FIXUP(un, p);
+	ap->a_vp = uvp;
+	return (VCALL(uvp, VOFFSET(vop_readdir), ap));
+}
+
+static int
+union_readlink(ap)
+	struct vop_readlink_args /* {
+		struct vnode *a_vp;
+		struct uio *a_uio;
+		struct ucred *a_cred;
+	} */ *ap;
+{
+	int error;
+	struct uio *uio = ap->a_uio;
+	struct proc *p = uio->uio_procp;
+	struct vnode *vp = OTHERVP(ap->a_vp);
+	int dolock = (vp == LOWERVP(ap->a_vp));
+
+	if (dolock)
+		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+	else
+		FIXUP(VTOUNION(ap->a_vp), p);
+	ap->a_vp = vp;
+	error = VCALL(vp, VOFFSET(vop_readlink), ap);
+	if (dolock)
+		VOP_UNLOCK(vp, 0, p);
+
+	return (error);
+}
+
+static int
+union_abortop(ap)
+	struct vop_abortop_args /* {
+		struct vnode *a_dvp;
+		struct componentname *a_cnp;
+	} */ *ap;
+{
+	int error;
+	struct componentname *cnp = ap->a_cnp;
+	struct proc *p = cnp->cn_proc;
+	struct vnode *vp = OTHERVP(ap->a_dvp);
+	struct union_node *un = VTOUNION(ap->a_dvp);
+	int islocked = un->un_flags & UN_LOCKED;
+	int dolock = (vp == LOWERVP(ap->a_dvp));
+
+	if (islocked) {
+		if (dolock)
+			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+		else
+			FIXUP(VTOUNION(ap->a_dvp), p);
+	}
+	ap->a_dvp = vp;
+	error = VCALL(vp, VOFFSET(vop_abortop), ap);
+	if (islocked && dolock)
+		VOP_UNLOCK(vp, 0, p);
+
+	return (error);
+}
+
+static int
+union_inactive(ap)
+	struct vop_inactive_args /* {
+		struct vnode *a_vp;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct proc *p = ap->a_p;
+	struct union_node *un = VTOUNION(vp);
+	struct vnode **vpp;
+
+	/*
+	 * Do nothing (and _don't_ bypass).
+	 * Wait to vrele lowervp until reclaim,
+	 * so that until then our union_node is in the
+	 * cache and reusable.
+	 *
+	 * NEEDSWORK: Someday, consider inactive'ing
+	 * the lowervp and then trying to reactivate it
+	 * with capabilities (v_id)
+	 * like they do in the name lookup cache code.
+	 * That's too much work for now.
+	 */
+
+	if (un->un_dircache != 0) {
+		for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
+			vrele(*vpp);
+		free(un->un_dircache, M_TEMP);
+		un->un_dircache = 0;
+	}
+
+	VOP_UNLOCK(vp, 0, p);
+
+	if ((un->un_flags & UN_CACHED) == 0)
+		vgone(vp);
+
+	return (0);
+}
+
+static int
+union_reclaim(ap)
+	struct vop_reclaim_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	union_freevp(ap->a_vp);
+
+	return (0);
+}
+
+static int
+union_lock(ap)
+	struct vop_lock_args *ap;
+{
+	struct vnode *vp = ap->a_vp;
+	struct proc *p = ap->a_p;
+	int flags = ap->a_flags;
+	struct union_node *un;
+	int error;
+
+	vop_nolock(ap);
+	/*
+	 * Need to do real lockmgr-style locking here.
+	 * in the mean time, draining won't work quite right,
+	 * which could lead to a few race conditions.
+	 * the following test was here, but is not quite right, we
+	 * still need to take the lock:
+	if ((flags & LK_TYPE_MASK) == LK_DRAIN)
+		return (0);
+	 */
+	flags &= ~LK_INTERLOCK;
+
+start:
+	un = VTOUNION(vp);
+
+	if (un->un_uppervp != NULLVP) {
+		if (((un->un_flags & UN_ULOCK) == 0) &&
+		    (vp->v_usecount != 0)) {
+			error = vn_lock(un->un_uppervp, flags, p);
+			if (error)
+				return (error);
+			un->un_flags |= UN_ULOCK;
+		}
+#ifdef DIAGNOSTIC
+		if (un->un_flags & UN_KLOCK) {
+			vprint("dangling upper lock", vp);
+			panic("union: dangling upper lock");
+		}
+#endif
+	}
+
+	if (un->un_flags & UN_LOCKED) {
+#ifdef DIAGNOSTIC
+		if (curproc && un->un_pid == curproc->p_pid &&
+			    un->un_pid > -1 && curproc->p_pid > -1)
+			panic("union: locking against myself");
+#endif
+		un->un_flags |= UN_WANT;
+		tsleep((caddr_t)&un->un_flags, PINOD, "unionlk2", 0);
+		goto start;
+	}
+
+#ifdef DIAGNOSTIC
+	if (curproc)
+		un->un_pid = curproc->p_pid;
+	else
+		un->un_pid = -1;
+#endif
+
+	un->un_flags |= UN_LOCKED;
+	return (0);
+}
+
+/*
+ * When operations want to vput() a union node yet retain a lock on
+ * the upper vnode (say, to do some further operations like link(),
+ * mkdir(), ...), they set UN_KLOCK on the union node, then call
+ * vput() which calls VOP_UNLOCK() and comes here.  union_unlock()
+ * unlocks the union node (leaving the upper vnode alone), clears the
+ * KLOCK flag, and then returns to vput().  The caller then does whatever
+ * is left to do with the upper vnode, and ensures that it gets unlocked.
+ *
+ * If UN_KLOCK isn't set, then the upper vnode is unlocked here.
+ */
+static int
+union_unlock(ap)
+	struct vop_unlock_args /* {
+		struct vnode *a_vp;
+		int a_flags;
+		struct proc *a_p;
+	} */ *ap;
+{
+	struct union_node *un = VTOUNION(ap->a_vp);
+	struct proc *p = ap->a_p;
+
+#ifdef DIAGNOSTIC
+	if ((un->un_flags & UN_LOCKED) == 0)
+		panic("union: unlock unlocked node");
+	if (curproc && un->un_pid != curproc->p_pid &&
+			curproc->p_pid > -1 && un->un_pid > -1)
+		panic("union: unlocking other process's union node");
+#endif
+
+	un->un_flags &= ~UN_LOCKED;
+
+	if ((un->un_flags & (UN_ULOCK|UN_KLOCK)) == UN_ULOCK)
+		VOP_UNLOCK(un->un_uppervp, 0, p);
+
+	un->un_flags &= ~(UN_ULOCK|UN_KLOCK);
+
+	if (un->un_flags & UN_WANT) {
+		un->un_flags &= ~UN_WANT;
+		wakeup((caddr_t) &un->un_flags);
+	}
+
+#ifdef DIAGNOSTIC
+	un->un_pid = 0;
+#endif
+	vop_nounlock(ap);
+
+	return (0);
+}
+
+static int
+union_bmap(ap)
+	struct vop_bmap_args /* {
+		struct vnode *a_vp;
+		daddr_t  a_bn;
+		struct vnode **a_vpp;
+		daddr_t *a_bnp;
+		int *a_runp;
+		int *a_runb;
+	} */ *ap;
+{
+	int error;
+	struct proc *p = curproc;		/* XXX */
+	struct vnode *vp = OTHERVP(ap->a_vp);
+	int dolock = (vp == LOWERVP(ap->a_vp));
+
+	if (dolock)
+		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+	else
+		FIXUP(VTOUNION(ap->a_vp), p);
+	ap->a_vp = vp;
+	error = VCALL(vp, VOFFSET(vop_bmap), ap);
+	if (dolock)
+		VOP_UNLOCK(vp, 0, p);
+
+	return (error);
+}
+
+static int
+union_print(ap)
+	struct vop_print_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+	struct vnode *vp = ap->a_vp;
+
+	printf("\ttag VT_UNION, vp=%p, uppervp=%p, lowervp=%p\n",
+			vp, UPPERVP(vp), LOWERVP(vp));
+	if (UPPERVP(vp) != NULLVP)
+		vprint("union: upper", UPPERVP(vp));
+	if (LOWERVP(vp) != NULLVP)
+		vprint("union: lower", LOWERVP(vp));
+
+	return (0);
+}
+
+static int
+union_islocked(ap)
+	struct vop_islocked_args /* {
+		struct vnode *a_vp;
+	} */ *ap;
+{
+
+	return ((VTOUNION(ap->a_vp)->un_flags & UN_LOCKED) ? 1 : 0);
+}
+
+static int
+union_pathconf(ap)
+	struct vop_pathconf_args /* {
+		struct vnode *a_vp;
+		int a_name;
+		int *a_retval;
+	} */ *ap;
+{
+	int error;
+	struct proc *p = curproc;		/* XXX */
+	struct vnode *vp = OTHERVP(ap->a_vp);
+	int dolock = (vp == LOWERVP(ap->a_vp));
+
+	if (dolock)
+		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+	else
+		FIXUP(VTOUNION(ap->a_vp), p);
+	ap->a_vp = vp;
+	error = VCALL(vp, VOFFSET(vop_pathconf), ap);
+	if (dolock)
+		VOP_UNLOCK(vp, 0, p);
+
+	return (error);
+}
+
+static int
+union_advlock(ap)
+	struct vop_advlock_args /* {
+		struct vnode *a_vp;
+		caddr_t  a_id;
+		int  a_op;
+		struct flock *a_fl;
+		int  a_flags;
+	} */ *ap;
+{
+	register struct vnode *ovp = OTHERVP(ap->a_vp);
+
+	ap->a_vp = ovp;
+	return (VCALL(ovp, VOFFSET(vop_advlock), ap));
+}
+
+
+/*
+ * XXX - vop_strategy must be hand coded because it has no
+ * vnode in its arguments.
+ * This goes away with a merged VM/buffer cache.
+ */
+static int
+union_strategy(ap)
+	struct vop_strategy_args /* {
+		struct vnode *a_vp;
+		struct buf *a_bp;
+	} */ *ap;
+{
+	struct buf *bp = ap->a_bp;
+	int error;
+	struct vnode *savedvp;
+
+	savedvp = bp->b_vp;
+	bp->b_vp = OTHERVP(bp->b_vp);
+
+#ifdef DIAGNOSTIC
+	if (bp->b_vp == NULLVP)
+		panic("union_strategy: nil vp");
+	if (((bp->b_flags & B_READ) == 0) &&
+	    (bp->b_vp == LOWERVP(savedvp)))
+		panic("union_strategy: writing to lowervp");
+#endif
+
+	error = VOP_STRATEGY(bp->b_vp, bp);
+	bp->b_vp = savedvp;
+
+	return (error);
+}
+
+/*
+ * Global vfs data structures
+ */
+vop_t **union_vnodeop_p;
+static struct vnodeopv_entry_desc union_vnodeop_entries[] = {
+	{ &vop_default_desc,		(vop_t *) vop_defaultop },
+	{ &vop_abortop_desc,		(vop_t *) union_abortop },
+	{ &vop_access_desc,		(vop_t *) union_access },
+	{ &vop_advlock_desc,		(vop_t *) union_advlock },
+	{ &vop_bmap_desc,		(vop_t *) union_bmap },
+	{ &vop_close_desc,		(vop_t *) union_close },
+	{ &vop_create_desc,		(vop_t *) union_create },
+	{ &vop_fsync_desc,		(vop_t *) union_fsync },
+	{ &vop_getattr_desc,		(vop_t *) union_getattr },
+	{ &vop_inactive_desc,		(vop_t *) union_inactive },
+	{ &vop_ioctl_desc,		(vop_t *) union_ioctl },
+	{ &vop_islocked_desc,		(vop_t *) union_islocked },
+	{ &vop_lease_desc,		(vop_t *) union_lease },
+	{ &vop_link_desc,		(vop_t *) union_link },
+	{ &vop_lock_desc,		(vop_t *) union_lock },
+	{ &vop_lookup_desc,		(vop_t *) union_lookup },
+	{ &vop_mkdir_desc,		(vop_t *) union_mkdir },
+	{ &vop_mknod_desc,		(vop_t *) union_mknod },
+	{ &vop_mmap_desc,		(vop_t *) union_mmap },
+	{ &vop_open_desc,		(vop_t *) union_open },
+	{ &vop_pathconf_desc,		(vop_t *) union_pathconf },
+	{ &vop_poll_desc,		(vop_t *) union_poll },
+	{ &vop_print_desc,		(vop_t *) union_print },
+	{ &vop_read_desc,		(vop_t *) union_read },
+	{ &vop_readdir_desc,		(vop_t *) union_readdir },
+	{ &vop_readlink_desc,		(vop_t *) union_readlink },
+	{ &vop_reclaim_desc,		(vop_t *) union_reclaim },
+	{ &vop_remove_desc,		(vop_t *) union_remove },
+	{ &vop_rename_desc,		(vop_t *) union_rename },
+	{ &vop_revoke_desc,		(vop_t *) union_revoke },
+	{ &vop_rmdir_desc,		(vop_t *) union_rmdir },
+	{ &vop_setattr_desc,		(vop_t *) union_setattr },
+	{ &vop_strategy_desc,		(vop_t *) union_strategy },
+	{ &vop_symlink_desc,		(vop_t *) union_symlink },
+	{ &vop_unlock_desc,		(vop_t *) union_unlock },
+	{ &vop_whiteout_desc,		(vop_t *) union_whiteout },
+	{ &vop_write_desc,		(vop_t *) union_write },
+	{ NULL, NULL }
+};
+static struct vnodeopv_desc union_vnodeop_opv_desc =
+	{ &union_vnodeop_p, union_vnodeop_entries };
+
+VNODEOP_SET(union_vnodeop_opv_desc);