summaryrefslogtreecommitdiffstats
path: root/sys/fs
diff options
context:
space:
mode:
Diffstat (limited to 'sys/fs')
-rw-r--r--sys/fs/cd9660/TODO43
-rw-r--r--sys/fs/cd9660/TODO.hibler14
-rw-r--r--sys/fs/cd9660/cd9660_bmap.c105
-rw-r--r--sys/fs/cd9660/cd9660_lookup.c422
-rw-r--r--sys/fs/cd9660/cd9660_mount.h52
-rw-r--r--sys/fs/cd9660/cd9660_node.c428
-rw-r--r--sys/fs/cd9660/cd9660_node.h126
-rw-r--r--sys/fs/cd9660/cd9660_rrip.c723
-rw-r--r--sys/fs/cd9660/cd9660_rrip.h141
-rw-r--r--sys/fs/cd9660/cd9660_util.c141
-rw-r--r--sys/fs/cd9660/cd9660_vfsops.c894
-rw-r--r--sys/fs/cd9660/cd9660_vnops.c920
-rw-r--r--sys/fs/cd9660/iso.h312
-rw-r--r--sys/fs/cd9660/iso_rrip.h86
-rw-r--r--sys/fs/coda/README60
-rw-r--r--sys/fs/coda/TODO17
-rw-r--r--sys/fs/coda/cnode.h319
-rw-r--r--sys/fs/coda/coda.h761
-rw-r--r--sys/fs/coda/coda_fbsd.c216
-rw-r--r--sys/fs/coda/coda_io.h128
-rw-r--r--sys/fs/coda/coda_kernel.h66
-rw-r--r--sys/fs/coda/coda_namecache.c915
-rw-r--r--sys/fs/coda/coda_namecache.h285
-rw-r--r--sys/fs/coda/coda_opstats.h127
-rw-r--r--sys/fs/coda/coda_pioctl.h133
-rw-r--r--sys/fs/coda/coda_psdev.c788
-rw-r--r--sys/fs/coda/coda_psdev.h39
-rw-r--r--sys/fs/coda/coda_subr.c747
-rw-r--r--sys/fs/coda/coda_subr.h45
-rw-r--r--sys/fs/coda/coda_venus.c660
-rw-r--r--sys/fs/coda/coda_venus.h133
-rw-r--r--sys/fs/coda/coda_vfsops.c770
-rw-r--r--sys/fs/coda/coda_vfsops.h63
-rw-r--r--sys/fs/coda/coda_vnops.c2233
-rw-r--r--sys/fs/coda/coda_vnops.h142
-rw-r--r--sys/fs/deadfs/dead_vnops.c296
-rw-r--r--sys/fs/fdescfs/fdesc.h79
-rw-r--r--sys/fs/fdescfs/fdesc_vfsops.c264
-rw-r--r--sys/fs/fdescfs/fdesc_vnops.c872
-rw-r--r--sys/fs/fifofs/fifo.h44
-rw-r--r--sys/fs/fifofs/fifo_vnops.c537
-rw-r--r--sys/fs/msdosfs/bootsect.h113
-rw-r--r--sys/fs/msdosfs/bpb.h209
-rw-r--r--sys/fs/msdosfs/denode.h286
-rw-r--r--sys/fs/msdosfs/direntry.h143
-rw-r--r--sys/fs/msdosfs/fat.h108
-rw-r--r--sys/fs/msdosfs/msdosfs_conv.c1041
-rw-r--r--sys/fs/msdosfs/msdosfs_denode.c712
-rw-r--r--sys/fs/msdosfs/msdosfs_fat.c1100
-rw-r--r--sys/fs/msdosfs/msdosfs_lookup.c1085
-rw-r--r--sys/fs/msdosfs/msdosfs_vfsops.c1019
-rw-r--r--sys/fs/msdosfs/msdosfs_vnops.c1983
-rw-r--r--sys/fs/msdosfs/msdosfsmount.h239
-rw-r--r--sys/fs/nullfs/null.h76
-rw-r--r--sys/fs/nullfs/null_subr.c303
-rw-r--r--sys/fs/nullfs/null_vfsops.c425
-rw-r--r--sys/fs/nullfs/null_vnops.c669
-rw-r--r--sys/fs/portalfs/portal.h71
-rw-r--r--sys/fs/portalfs/portal_vfsops.c292
-rw-r--r--sys/fs/portalfs/portal_vnops.c607
-rw-r--r--sys/fs/procfs/README113
-rw-r--r--sys/fs/procfs/procfs.h177
-rw-r--r--sys/fs/procfs/procfs_ctl.c315
-rw-r--r--sys/fs/procfs/procfs_fpregs.c98
-rw-r--r--sys/fs/procfs/procfs_map.c185
-rw-r--r--sys/fs/procfs/procfs_mem.c342
-rw-r--r--sys/fs/procfs/procfs_note.c67
-rw-r--r--sys/fs/procfs/procfs_regs.c98
-rw-r--r--sys/fs/procfs/procfs_status.c186
-rw-r--r--sys/fs/procfs/procfs_subr.c392
-rw-r--r--sys/fs/procfs/procfs_type.c85
-rw-r--r--sys/fs/procfs/procfs_vfsops.c215
-rw-r--r--sys/fs/procfs/procfs_vnops.c1013
-rw-r--r--sys/fs/specfs/spec_vnops.c938
-rw-r--r--sys/fs/umapfs/umap.h91
-rw-r--r--sys/fs/umapfs/umap_subr.c402
-rw-r--r--sys/fs/umapfs/umap_vfsops.c431
-rw-r--r--sys/fs/umapfs/umap_vnops.c566
-rw-r--r--sys/fs/unionfs/union.h129
-rw-r--r--sys/fs/unionfs/union_subr.c1218
-rw-r--r--sys/fs/unionfs/union_vfsops.c555
-rw-r--r--sys/fs/unionfs/union_vnops.c1804
82 files changed, 35047 insertions, 0 deletions
diff --git a/sys/fs/cd9660/TODO b/sys/fs/cd9660/TODO
new file mode 100644
index 0000000..cf3fadc
--- /dev/null
+++ b/sys/fs/cd9660/TODO
@@ -0,0 +1,43 @@
+# $Id$
+
+ 2) should understand Rock Ridge
+
+ Yes, we have follows function.
+
+ o Symbolic Link
+ o Real Name(long name)
+ o File Attribute
+ o Time stamp
+ o uid, gid
+ o Devices
+ o Relocated directories
+
+ Except follows:
+
+ o POSIX device number mapping
+
+ There is some preliminary stuff in there that (ab-)uses the mknod
+ system call, but this needs a writable filesystem
+
+ 5) should have name translation enabled by mount flag
+
+ Yes. we can disable the Rock Ridge Extension by follows option;
+
+ "mount -t isofs -o -norrip /dev/cd0d /cdrom"
+
+ 6) should run as a user process, and not take up kernel space (cdroms
+ are slow)
+
+ Not yet.
+
+ 7) ECMA support.
+
+ Not yet. we need not only a technical spec but also ECMA format
+ cd-rom itself!
+
+ 8) Character set change by SVD ( multi SVD support )
+
+ Not yet. We should also hack the other part of system as 8 bit
+ clean. As far as I know, if you export the cdrom by NFS, the client
+ can access the 8 bit clean (ie. Solaris Japanese with EUC code )
+
diff --git a/sys/fs/cd9660/TODO.hibler b/sys/fs/cd9660/TODO.hibler
new file mode 100644
index 0000000..660b268
--- /dev/null
+++ b/sys/fs/cd9660/TODO.hibler
@@ -0,0 +1,14 @@
+1. Investiate making ISOFS another UFS shared filesystem (ala FFS/MFS/LFS).
+ Since it was modelled after the inode code, we might be able to merge
+ them back. It looks like a seperate (but very similar) lookup routine
+ will be needed due to the associated file stuff.
+
+2. It would be nice to be able to use the vfs_cluster code.
+ Unfortunately, if the logical block size is smaller than the page size,
+ it won't work. Also, if throughtput is relatively constant for any
+ block size (as it is for the HP drive--150kbs) then clustering may not
+ buy much (or may even hurt when vfs_cluster comes up with a large sync
+ cluster).
+
+3. Seems like there should be a "notrans" or some such mount option to show
+ filenames as they really are without lower-casing. Does this make sense?
diff --git a/sys/fs/cd9660/cd9660_bmap.c b/sys/fs/cd9660/cd9660_bmap.c
new file mode 100644
index 0000000..e787f83
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_bmap.c
@@ -0,0 +1,105 @@
+/*-
+ * Copyright (c) 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cd9660_bmap.c 8.3 (Berkeley) 1/23/94
+ * $Id: cd9660_bmap.c,v 1.6 1997/02/22 09:38:47 peter Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+
+/*
+ * Bmap converts a the logical block number of a file to its physical block
+ * number on the disk. The conversion is done by using the logical block
+ * number to index into the data block (extent) for the file.
+ */
+int
+cd9660_bmap(ap)
+ struct vop_bmap_args /* {
+ struct vnode *a_vp;
+ daddr_t a_bn;
+ struct vnode **a_vpp;
+ daddr_t *a_bnp;
+ int *a_runp;
+ int *a_runb;
+ } */ *ap;
+{
+ struct iso_node *ip = VTOI(ap->a_vp);
+ daddr_t lblkno = ap->a_bn;
+ int bshift;
+
+ /*
+ * Check for underlying vnode requests and ensure that logical
+ * to physical mapping is requested.
+ */
+ if (ap->a_vpp != NULL)
+ *ap->a_vpp = ip->i_devvp;
+ if (ap->a_bnp == NULL)
+ return (0);
+
+ /*
+ * Compute the requested block number
+ */
+ bshift = ip->i_mnt->im_bshift;
+ *ap->a_bnp = (ip->iso_start + lblkno) << (bshift - DEV_BSHIFT);
+
+ /*
+ * Determine maximum number of readahead blocks following the
+ * requested block.
+ */
+ if (ap->a_runp) {
+ int nblk;
+
+ nblk = (ip->i_size >> bshift) - (lblkno + 1);
+ if (nblk <= 0)
+ *ap->a_runp = 0;
+ else if (nblk >= (MAXBSIZE >> bshift))
+ *ap->a_runp = (MAXBSIZE >> bshift) - 1;
+ else
+ *ap->a_runp = nblk;
+ }
+
+ if (ap->a_runb) {
+ *ap->a_runb = 0;
+ }
+
+ return 0;
+}
diff --git a/sys/fs/cd9660/cd9660_lookup.c b/sys/fs/cd9660/cd9660_lookup.c
new file mode 100644
index 0000000..3d0ff74
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_lookup.c
@@ -0,0 +1,422 @@
+/*-
+ * Copyright (c) 1989, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)ufs_lookup.c 7.33 (Berkeley) 5/19/91
+ *
+ * @(#)cd9660_lookup.c 8.2 (Berkeley) 1/23/94
+ * $Id: cd9660_lookup.c,v 1.20 1997/11/07 08:52:50 phk Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+#include <isofs/cd9660/iso_rrip.h>
+
+/*
+ * Convert a component of a pathname into a pointer to a locked inode.
+ * This is a very central and rather complicated routine.
+ * If the file system is not maintained in a strict tree hierarchy,
+ * this can result in a deadlock situation (see comments in code below).
+ *
+ * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
+ * whether the name is to be looked up, created, renamed, or deleted.
+ * When CREATE, RENAME, or DELETE is specified, information usable in
+ * creating, renaming, or deleting a directory entry may be calculated.
+ * If flag has LOCKPARENT or'ed into it and the target of the pathname
+ * exists, lookup returns both the target and its parent directory locked.
+ * When creating or renaming and LOCKPARENT is specified, the target may
+ * not be ".". When deleting and LOCKPARENT is specified, the target may
+ * be "."., but the caller must check to ensure it does an vrele and iput
+ * instead of two iputs.
+ *
+ * Overall outline of ufs_lookup:
+ *
+ * search for name in directory, to found or notfound
+ * notfound:
+ * if creating, return locked directory, leaving info on available slots
+ * else return error
+ * found:
+ * if at end of path and deleting, return information to allow delete
+ * if at end of path and rewriting (RENAME and LOCKPARENT), lock target
+ * inode and return info to allow rewrite
+ * if not at end, add name to cache; if at end and neither creating
+ * nor deleting, add name to cache
+ *
+ * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode unlocked.
+ */
+int
+cd9660_lookup(ap)
+ struct vop_cachedlookup_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ register struct vnode *vdp; /* vnode for directory being searched */
+ register struct iso_node *dp; /* inode for directory being searched */
+ register struct iso_mnt *imp; /* file system that directory is in */
+ struct buf *bp; /* a buffer of directory entries */
+ struct iso_directory_record *ep = 0;/* the current directory entry */
+ int entryoffsetinblock; /* offset of ep in bp's buffer */
+ int saveoffset = 0; /* offset of last directory entry in dir */
+ int numdirpasses; /* strategy for directory search */
+ doff_t endsearch; /* offset to end directory search */
+ struct vnode *pdp; /* saved dp during symlink work */
+ struct vnode *tdp; /* returned by cd9660_vget_internal */
+ u_long bmask; /* block offset mask */
+ int lockparent; /* 1 => lockparent flag is set */
+ int wantparent; /* 1 => wantparent or lockparent flag */
+ int error;
+ ino_t ino = 0;
+ int reclen;
+ u_short namelen;
+ int isoflags;
+ char altname[NAME_MAX];
+ int res;
+ int assoc, len;
+ char *name;
+ struct vnode **vpp = ap->a_vpp;
+ struct componentname *cnp = ap->a_cnp;
+ int flags = cnp->cn_flags;
+ int nameiop = cnp->cn_nameiop;
+ struct proc *p = cnp->cn_proc;
+
+ bp = NULL;
+ *vpp = NULL;
+ vdp = ap->a_dvp;
+ dp = VTOI(vdp);
+ imp = dp->i_mnt;
+ lockparent = flags & LOCKPARENT;
+ wantparent = flags & (LOCKPARENT|WANTPARENT);
+
+ /*
+ * We now have a segment name to search for, and a directory to search.
+ */
+
+ len = cnp->cn_namelen;
+ name = cnp->cn_nameptr;
+ /*
+ * A leading `=' means, we are looking for an associated file
+ */
+ if ((assoc = (imp->iso_ftype != ISO_FTYPE_RRIP && *name == ASSOCCHAR)))
+ {
+ len--;
+ name++;
+ }
+
+ /*
+ * If there is cached information on a previous search of
+ * this directory, pick up where we last left off.
+ * We cache only lookups as these are the most common
+ * and have the greatest payoff. Caching CREATE has little
+ * benefit as it usually must search the entire directory
+ * to determine that the entry does not exist. Caching the
+ * location of the last DELETE or RENAME has not reduced
+ * profiling time and hence has been removed in the interest
+ * of simplicity.
+ */
+ bmask = imp->im_bmask;
+ if (nameiop != LOOKUP || dp->i_diroff == 0 ||
+ dp->i_diroff > dp->i_size) {
+ entryoffsetinblock = 0;
+ dp->i_offset = 0;
+ numdirpasses = 1;
+ } else {
+ dp->i_offset = dp->i_diroff;
+ if ((entryoffsetinblock = dp->i_offset & bmask) &&
+ (error = cd9660_blkatoff(vdp, (off_t)dp->i_offset, NULL, &bp)))
+ return (error);
+ numdirpasses = 2;
+ nchstats.ncs_2passes++;
+ }
+ endsearch = dp->i_size;
+
+searchloop:
+ while (dp->i_offset < endsearch) {
+ /*
+ * If offset is on a block boundary,
+ * read the next directory block.
+ * Release previous if it exists.
+ */
+ if ((dp->i_offset & bmask) == 0) {
+ if (bp != NULL)
+ brelse(bp);
+ if ((error =
+ cd9660_blkatoff(vdp, (off_t)dp->i_offset, NULL, &bp)) != 0)
+ return (error);
+ entryoffsetinblock = 0;
+ }
+ /*
+ * Get pointer to next entry.
+ */
+ ep = (struct iso_directory_record *)
+ ((char *)bp->b_data + entryoffsetinblock);
+
+ reclen = isonum_711(ep->length);
+ if (reclen == 0) {
+ /* skip to next block, if any */
+ dp->i_offset =
+ (dp->i_offset & ~bmask) + imp->logical_block_size;
+ continue;
+ }
+
+ if (reclen < ISO_DIRECTORY_RECORD_SIZE)
+ /* illegal entry, stop */
+ break;
+
+ if (entryoffsetinblock + reclen > imp->logical_block_size)
+ /* entries are not allowed to cross boundaries */
+ break;
+
+ namelen = isonum_711(ep->name_len);
+ isoflags = isonum_711(imp->iso_ftype == ISO_FTYPE_HIGH_SIERRA?
+ &ep->date[6]: ep->flags);
+
+ if (reclen < ISO_DIRECTORY_RECORD_SIZE + namelen)
+ /* illegal entry, stop */
+ break;
+
+ /*
+ * Check for a name match.
+ */
+ switch (imp->iso_ftype) {
+ default:
+ if (!(isoflags & 4) == !assoc) {
+ if ((len == 1
+ && *name == '.')
+ || (flags & ISDOTDOT)) {
+ if (namelen == 1
+ && ep->name[0] == ((flags & ISDOTDOT) ? 1 : 0)) {
+ /*
+ * Save directory entry's inode number and
+ * release directory buffer.
+ */
+ dp->i_ino = isodirino(ep, imp);
+ goto found;
+ }
+ if (namelen != 1
+ || ep->name[0] != 0)
+ goto notfound;
+ } else if (!(res = isofncmp(name,len,
+ ep->name,namelen))) {
+ if (isoflags & 2)
+ ino = isodirino(ep, imp);
+ else
+ ino = dbtob(bp->b_blkno)
+ + entryoffsetinblock;
+ saveoffset = dp->i_offset;
+ } else if (ino)
+ goto foundino;
+#ifdef NOSORTBUG /* On some CDs directory entries are not sorted correctly */
+ else if (res < 0)
+ goto notfound;
+ else if (res > 0 && numdirpasses == 2)
+ numdirpasses++;
+#endif
+ }
+ break;
+ case ISO_FTYPE_RRIP:
+ if (isonum_711(ep->flags)&2)
+ ino = isodirino(ep, imp);
+ else
+ ino = dbtob(bp->b_blkno) + entryoffsetinblock;
+ dp->i_ino = ino;
+ cd9660_rrip_getname(ep,altname,&namelen,&dp->i_ino,imp);
+ if (namelen == cnp->cn_namelen
+ && !bcmp(name,altname,namelen))
+ goto found;
+ ino = 0;
+ break;
+ }
+ dp->i_offset += reclen;
+ entryoffsetinblock += reclen;
+ }
+ if (ino) {
+foundino:
+ dp->i_ino = ino;
+ if (saveoffset != dp->i_offset) {
+ if (lblkno(imp, dp->i_offset) !=
+ lblkno(imp, saveoffset)) {
+ if (bp != NULL)
+ brelse(bp);
+ if ((error = cd9660_blkatoff(vdp,
+ (off_t)saveoffset, NULL, &bp)) != 0)
+ return (error);
+ }
+ entryoffsetinblock = saveoffset & bmask;
+ ep = (struct iso_directory_record *)
+ ((char *)bp->b_data + entryoffsetinblock);
+ dp->i_offset = saveoffset;
+ }
+ goto found;
+ }
+notfound:
+ /*
+ * If we started in the middle of the directory and failed
+ * to find our target, we must check the beginning as well.
+ */
+ if (numdirpasses == 2) {
+ numdirpasses--;
+ dp->i_offset = 0;
+ endsearch = dp->i_diroff;
+ goto searchloop;
+ }
+ if (bp != NULL)
+ brelse(bp);
+
+ /*
+ * Insert name into cache (as non-existent) if appropriate.
+ */
+ if (cnp->cn_flags & MAKEENTRY)
+ cache_enter(vdp, *vpp, cnp);
+ if (nameiop == CREATE || nameiop == RENAME)
+ return (EROFS);
+ return (ENOENT);
+
+found:
+ if (numdirpasses == 2)
+ nchstats.ncs_pass2++;
+
+ /*
+ * Found component in pathname.
+ * If the final component of path name, save information
+ * in the cache as to where the entry was found.
+ */
+ if ((flags & ISLASTCN) && nameiop == LOOKUP)
+ dp->i_diroff = dp->i_offset;
+
+ /*
+ * Step through the translation in the name. We do not `iput' the
+ * directory because we may need it again if a symbolic link
+ * is relative to the current directory. Instead we save it
+ * unlocked as "pdp". We must get the target inode before unlocking
+ * the directory to insure that the inode will not be removed
+ * before we get it. We prevent deadlock by always fetching
+ * inodes from the root, moving down the directory tree. Thus
+ * when following backward pointers ".." we must unlock the
+ * parent directory before getting the requested directory.
+ * There is a potential race condition here if both the current
+ * and parent directories are removed before the `iget' for the
+ * inode associated with ".." returns. We hope that this occurs
+ * infrequently since we cannot avoid this race condition without
+ * implementing a sophisticated deadlock detection algorithm.
+ * Note also that this simple deadlock detection scheme will not
+ * work if the file system has any hard links other than ".."
+ * that point backwards in the directory structure.
+ */
+ pdp = vdp;
+ /*
+ * If ino is different from dp->i_ino,
+ * it's a relocated directory.
+ */
+ if (flags & ISDOTDOT) {
+ VOP_UNLOCK(pdp, 0, p); /* race to get the inode */
+ error = cd9660_vget_internal(vdp->v_mount, dp->i_ino, &tdp,
+ dp->i_ino != ino, ep);
+ brelse(bp);
+ if (error) {
+ vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, p);
+ return (error);
+ }
+ if (lockparent && (flags & ISLASTCN) &&
+ (error = vn_lock(pdp, LK_EXCLUSIVE, p))) {
+ vput(tdp);
+ return (error);
+ }
+ *vpp = tdp;
+ } else if (dp->i_number == dp->i_ino) {
+ brelse(bp);
+ VREF(vdp); /* we want ourself, ie "." */
+ *vpp = vdp;
+ } else {
+ error = cd9660_vget_internal(vdp->v_mount, dp->i_ino, &tdp,
+ dp->i_ino != ino, ep);
+ brelse(bp);
+ if (error)
+ return (error);
+ if (!lockparent || !(flags & ISLASTCN))
+ VOP_UNLOCK(pdp, 0, p);
+ *vpp = tdp;
+ }
+
+ /*
+ * Insert name into cache if appropriate.
+ */
+ if (cnp->cn_flags & MAKEENTRY)
+ cache_enter(vdp, *vpp, cnp);
+ return (0);
+}
+
+/*
+ * Return buffer with the contents of block "offset" from the beginning of
+ * directory "ip". If "res" is non-zero, fill it in with a pointer to the
+ * remaining space in the directory.
+ */
+int
+cd9660_blkatoff(vp, offset, res, bpp)
+ struct vnode *vp;
+ off_t offset;
+ char **res;
+ struct buf **bpp;
+{
+ struct iso_node *ip;
+ register struct iso_mnt *imp;
+ struct buf *bp;
+ daddr_t lbn;
+ int bsize, error;
+
+ ip = VTOI(vp);
+ imp = ip->i_mnt;
+ lbn = lblkno(imp, offset);
+ bsize = blksize(imp, ip, lbn);
+
+ if ((error = bread(vp, lbn, bsize, NOCRED, &bp)) != 0) {
+ brelse(bp);
+ *bpp = NULL;
+ return (error);
+ }
+ if (res)
+ *res = (char *)bp->b_data + blkoff(imp, offset);
+ *bpp = bp;
+ return (0);
+}
diff --git a/sys/fs/cd9660/cd9660_mount.h b/sys/fs/cd9660/cd9660_mount.h
new file mode 100644
index 0000000..9d3f78e
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_mount.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cd9660_mount.h 8.1 (Berkeley) 5/24/95
+ */
+
+/*
+ * Arguments to mount ISO 9660 filesystems.
+ */
+struct iso_args {
+ char *fspec; /* block special device to mount */
+ struct export_args export; /* network export info */
+ int flags; /* mounting flags, see below */
+ int ssector; /* starting sector, 0 for 1st session */
+};
+#define ISOFSMNT_NORRIP 0x00000001 /* disable Rock Ridge Ext.*/
+#define ISOFSMNT_GENS 0x00000002 /* enable generation numbers */
+#define ISOFSMNT_EXTATT 0x00000004 /* enable extended attributes */
diff --git a/sys/fs/cd9660/cd9660_node.c b/sys/fs/cd9660/cd9660_node.c
new file mode 100644
index 0000000..9640d6e
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_node.c
@@ -0,0 +1,428 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1994, 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cd9660_node.c 8.2 (Berkeley) 1/23/94
+ * $Id: cd9660_node.c,v 1.26 1999/01/02 11:34:54 bde Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <sys/stat.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+#include <isofs/cd9660/cd9660_mount.h>
+
+/*
+ * Structures associated with iso_node caching.
+ */
+static struct iso_node **isohashtbl;
+static u_long isohash;
+#define INOHASH(device, inum) (((device) + ((inum)>>12)) & isohash)
+#ifndef NULL_SIMPLELOCKS
+static struct simplelock cd9660_ihash_slock;
+#endif
+
+static void cd9660_ihashrem __P((struct iso_node *));
+static unsigned cd9660_chars2ui __P((unsigned char *begin, int len));
+
+/*
+ * Initialize hash links for inodes and dnodes.
+ */
+int
+cd9660_init(vfsp)
+ struct vfsconf *vfsp;
+{
+
+ isohashtbl = hashinit(desiredvnodes, M_ISOFSMNT, &isohash);
+ simple_lock_init(&cd9660_ihash_slock);
+ return (0);
+}
+
+
+/*
+ * Use the device/inum pair to find the incore inode, and return a pointer
+ * to it. If it is in core, but locked, wait for it.
+ */
+struct vnode *
+cd9660_ihashget(dev, inum)
+ dev_t dev;
+ ino_t inum;
+{
+ struct proc *p = curproc; /* XXX */
+ struct iso_node *ip;
+ struct vnode *vp;
+
+loop:
+ simple_lock(&cd9660_ihash_slock);
+ for (ip = isohashtbl[INOHASH(dev, inum)]; ip; ip = ip->i_next) {
+ if (inum == ip->i_number && dev == ip->i_dev) {
+ vp = ITOV(ip);
+ simple_lock(&vp->v_interlock);
+ simple_unlock(&cd9660_ihash_slock);
+ if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p))
+ goto loop;
+ return (vp);
+ }
+ }
+ simple_unlock(&cd9660_ihash_slock);
+ return (NULL);
+}
+
+/*
+ * Insert the inode into the hash table, and return it locked.
+ */
+void
+cd9660_ihashins(ip)
+ struct iso_node *ip;
+{
+ struct proc *p = curproc; /* XXX */
+ struct iso_node **ipp, *iq;
+
+ simple_lock(&cd9660_ihash_slock);
+ ipp = &isohashtbl[INOHASH(ip->i_dev, ip->i_number)];
+ if ((iq = *ipp) != NULL)
+ iq->i_prev = &ip->i_next;
+ ip->i_next = iq;
+ ip->i_prev = ipp;
+ *ipp = ip;
+ simple_unlock(&cd9660_ihash_slock);
+
+ lockmgr(&ip->i_lock, LK_EXCLUSIVE, (struct simplelock *)0, p);
+}
+
+/*
+ * Remove the inode from the hash table.
+ */
+static void
+cd9660_ihashrem(ip)
+ register struct iso_node *ip;
+{
+ register struct iso_node *iq;
+
+ simple_lock(&cd9660_ihash_slock);
+ if ((iq = ip->i_next) != NULL)
+ iq->i_prev = ip->i_prev;
+ *ip->i_prev = iq;
+#ifdef DIAGNOSTIC
+ ip->i_next = NULL;
+ ip->i_prev = NULL;
+#endif
+ simple_unlock(&cd9660_ihash_slock);
+}
+
+/*
+ * Last reference to an inode, write the inode out and if necessary,
+ * truncate and deallocate the file.
+ */
+int
+cd9660_inactive(ap)
+ struct vop_inactive_args /* {
+ struct vnode *a_vp;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct proc *p = ap->a_p;
+ register struct iso_node *ip = VTOI(vp);
+ int error = 0;
+
+ if (prtactive && vp->v_usecount != 0)
+ vprint("cd9660_inactive: pushing active", vp);
+
+ ip->i_flag = 0;
+ VOP_UNLOCK(vp, 0, p);
+ /*
+ * If we are done with the inode, reclaim it
+ * so that it can be reused immediately.
+ */
+ if (ip->inode.iso_mode == 0)
+ vrecycle(vp, (struct simplelock *)0, p);
+ return error;
+}
+
+/*
+ * Reclaim an inode so that it can be used for other purposes.
+ */
+int
+cd9660_reclaim(ap)
+ struct vop_reclaim_args /* {
+ struct vnode *a_vp;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct iso_node *ip = VTOI(vp);
+
+ if (prtactive && vp->v_usecount != 0)
+ vprint("cd9660_reclaim: pushing active", vp);
+ /*
+ * Remove the inode from its hash chain.
+ */
+ cd9660_ihashrem(ip);
+ /*
+ * Purge old data structures associated with the inode.
+ */
+ cache_purge(vp);
+ if (ip->i_devvp) {
+ vrele(ip->i_devvp);
+ ip->i_devvp = 0;
+ }
+ FREE(vp->v_data, M_ISOFSNODE);
+ vp->v_data = NULL;
+ return (0);
+}
+
+/*
+ * File attributes
+ */
+void
+cd9660_defattr(isodir, inop, bp, ftype)
+ struct iso_directory_record *isodir;
+ struct iso_node *inop;
+ struct buf *bp;
+ enum ISO_FTYPE ftype;
+{
+ struct buf *bp2 = NULL;
+ struct iso_mnt *imp;
+ struct iso_extended_attributes *ap = NULL;
+ int off;
+
+ /* high sierra does not have timezone data, flag is one byte ahead */
+ if (isonum_711(ftype == ISO_FTYPE_HIGH_SIERRA?
+ &isodir->date[6]: isodir->flags)&2) {
+ inop->inode.iso_mode = S_IFDIR;
+ /*
+ * If we return 2, fts() will assume there are no subdirectories
+ * (just links for the path and .), so instead we return 1.
+ */
+ inop->inode.iso_links = 1;
+ } else {
+ inop->inode.iso_mode = S_IFREG;
+ inop->inode.iso_links = 1;
+ }
+ if (!bp
+ && ((imp = inop->i_mnt)->im_flags & ISOFSMNT_EXTATT)
+ && (off = isonum_711(isodir->ext_attr_length))) {
+ cd9660_blkatoff(ITOV(inop), (off_t)-(off << imp->im_bshift), NULL,
+ &bp2);
+ bp = bp2;
+ }
+ if (bp) {
+ ap = (struct iso_extended_attributes *)bp->b_data;
+
+ if (isonum_711(ap->version) == 1) {
+ if (!(ap->perm[0]&0x40))
+ inop->inode.iso_mode |= VEXEC >> 6;
+ if (!(ap->perm[0]&0x10))
+ inop->inode.iso_mode |= VREAD >> 6;
+ if (!(ap->perm[0]&4))
+ inop->inode.iso_mode |= VEXEC >> 3;
+ if (!(ap->perm[0]&1))
+ inop->inode.iso_mode |= VREAD >> 3;
+ if (!(ap->perm[1]&0x40))
+ inop->inode.iso_mode |= VEXEC;
+ if (!(ap->perm[1]&0x10))
+ inop->inode.iso_mode |= VREAD;
+ inop->inode.iso_uid = isonum_723(ap->owner); /* what about 0? */
+ inop->inode.iso_gid = isonum_723(ap->group); /* what about 0? */
+ } else
+ ap = NULL;
+ }
+ if (!ap) {
+ inop->inode.iso_mode |= VREAD|VEXEC|(VREAD|VEXEC)>>3|(VREAD|VEXEC)>>6;
+ inop->inode.iso_uid = (uid_t)0;
+ inop->inode.iso_gid = (gid_t)0;
+ }
+ if (bp2)
+ brelse(bp2);
+}
+
+/*
+ * Time stamps
+ */
+void
+cd9660_deftstamp(isodir,inop,bp,ftype)
+ struct iso_directory_record *isodir;
+ struct iso_node *inop;
+ struct buf *bp;
+ enum ISO_FTYPE ftype;
+{
+ struct buf *bp2 = NULL;
+ struct iso_mnt *imp;
+ struct iso_extended_attributes *ap = NULL;
+ int off;
+
+ if (!bp
+ && ((imp = inop->i_mnt)->im_flags & ISOFSMNT_EXTATT)
+ && (off = isonum_711(isodir->ext_attr_length))) {
+ cd9660_blkatoff(ITOV(inop), (off_t)-(off << imp->im_bshift), NULL,
+ &bp2);
+ bp = bp2;
+ }
+ if (bp) {
+ ap = (struct iso_extended_attributes *)bp->b_data;
+
+ if (ftype != ISO_FTYPE_HIGH_SIERRA
+ && isonum_711(ap->version) == 1) {
+ if (!cd9660_tstamp_conv17(ap->ftime,&inop->inode.iso_atime))
+ cd9660_tstamp_conv17(ap->ctime,&inop->inode.iso_atime);
+ if (!cd9660_tstamp_conv17(ap->ctime,&inop->inode.iso_ctime))
+ inop->inode.iso_ctime = inop->inode.iso_atime;
+ if (!cd9660_tstamp_conv17(ap->mtime,&inop->inode.iso_mtime))
+ inop->inode.iso_mtime = inop->inode.iso_ctime;
+ } else
+ ap = NULL;
+ }
+ if (!ap) {
+ cd9660_tstamp_conv7(isodir->date,&inop->inode.iso_ctime,ftype);
+ inop->inode.iso_atime = inop->inode.iso_ctime;
+ inop->inode.iso_mtime = inop->inode.iso_ctime;
+ }
+ if (bp2)
+ brelse(bp2);
+}
+
+int
+cd9660_tstamp_conv7(pi,pu,ftype)
+ u_char *pi;
+ struct timespec *pu;
+ enum ISO_FTYPE ftype;
+{
+ int crtime, days;
+ int y, m, d, hour, minute, second, tz;
+
+ y = pi[0] + 1900;
+ m = pi[1];
+ d = pi[2];
+ hour = pi[3];
+ minute = pi[4];
+ second = pi[5];
+ if(ftype != ISO_FTYPE_HIGH_SIERRA)
+ tz = pi[6];
+ else
+ /* original high sierra misses timezone data */
+ tz = 0;
+
+ if (y < 1970) {
+ pu->tv_sec = 0;
+ pu->tv_nsec = 0;
+ return 0;
+ } else {
+#ifdef ORIGINAL
+ /* computes day number relative to Sept. 19th,1989 */
+ /* don't even *THINK* about changing formula. It works! */
+ days = 367*(y-1980)-7*(y+(m+9)/12)/4-3*((y+(m-9)/7)/100+1)/4+275*m/9+d-100;
+#else
+ /*
+ * Changed :-) to make it relative to Jan. 1st, 1970
+ * and to disambiguate negative division
+ */
+ days = 367*(y-1960)-7*(y+(m+9)/12)/4-3*((y+(m+9)/12-1)/100+1)/4+275*m/9+d-239;
+#endif
+ crtime = ((((days * 24) + hour) * 60 + minute) * 60) + second;
+
+ /* timezone offset is unreliable on some disks */
+ if (-48 <= tz && tz <= 52)
+ crtime -= tz * 15 * 60;
+ }
+ pu->tv_sec = crtime;
+ pu->tv_nsec = 0;
+ return 1;
+}
+
+static u_int
+cd9660_chars2ui(begin,len)
+ u_char *begin;
+ int len;
+{
+ u_int rc;
+
+ for (rc = 0; --len >= 0;) {
+ rc *= 10;
+ rc += *begin++ - '0';
+ }
+ return rc;
+}
+
+int
+cd9660_tstamp_conv17(pi,pu)
+ u_char *pi;
+ struct timespec *pu;
+{
+ u_char buf[7];
+
+ /* year:"0001"-"9999" -> -1900 */
+ buf[0] = cd9660_chars2ui(pi,4) - 1900;
+
+ /* month: " 1"-"12" -> 1 - 12 */
+ buf[1] = cd9660_chars2ui(pi + 4,2);
+
+ /* day: " 1"-"31" -> 1 - 31 */
+ buf[2] = cd9660_chars2ui(pi + 6,2);
+
+ /* hour: " 0"-"23" -> 0 - 23 */
+ buf[3] = cd9660_chars2ui(pi + 8,2);
+
+ /* minute:" 0"-"59" -> 0 - 59 */
+ buf[4] = cd9660_chars2ui(pi + 10,2);
+
+ /* second:" 0"-"59" -> 0 - 59 */
+ buf[5] = cd9660_chars2ui(pi + 12,2);
+
+ /* difference of GMT */
+ buf[6] = pi[16];
+
+ return cd9660_tstamp_conv7(buf, pu, ISO_FTYPE_DEFAULT);
+}
+
+ino_t
+isodirino(isodir, imp)
+ struct iso_directory_record *isodir;
+ struct iso_mnt *imp;
+{
+ ino_t ino;
+
+ ino = (isonum_733(isodir->extent) + isonum_711(isodir->ext_attr_length))
+ << imp->im_bshift;
+ return (ino);
+}
diff --git a/sys/fs/cd9660/cd9660_node.h b/sys/fs/cd9660/cd9660_node.h
new file mode 100644
index 0000000..33b208f
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_node.h
@@ -0,0 +1,126 @@
+/*-
+ * Copyright (c) 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cd9660_node.h 8.6 (Berkeley) 5/14/95
+ * $Id: cd9660_node.h,v 1.16 1997/11/18 14:40:34 phk Exp $
+ */
+
+/*
+ * Theoretically, directories can be more than 2Gb in length,
+ * however, in practice this seems unlikely. So, we define
+ * the type doff_t as a long to keep down the cost of doing
+ * lookup on a 32-bit machine. If you are porting to a 64-bit
+ * architecture, you should make doff_t the same as off_t.
+ */
+#define doff_t long
+
+typedef struct {
+ struct timespec iso_atime; /* time of last access */
+ struct timespec iso_mtime; /* time of last modification */
+ struct timespec iso_ctime; /* time file changed */
+ u_short iso_mode; /* files access mode and type */
+ uid_t iso_uid; /* owner user id */
+ gid_t iso_gid; /* owner group id */
+ short iso_links; /* links of file */
+ dev_t iso_rdev; /* Major/Minor number for special */
+} ISO_RRIP_INODE;
+
+
+struct iso_node {
+ struct lock i_lock; /* node lock > Keep this first< */
+ struct iso_node *i_next, **i_prev; /* hash chain */
+ struct vnode *i_vnode; /* vnode associated with this inode */
+ struct vnode *i_devvp; /* vnode for block I/O */
+ u_long i_flag; /* see below */
+ dev_t i_dev; /* device where inode resides */
+ ino_t i_number; /* the identity of the inode */
+ /* we use the actual starting block of the file */
+ struct iso_mnt *i_mnt; /* filesystem associated with this inode */
+ struct lockf *i_lockf; /* head of byte-level lock list */
+ doff_t i_endoff; /* end of useful stuff in directory */
+ doff_t i_diroff; /* offset in dir, where we found last entry */
+ doff_t i_offset; /* offset of free space in directory */
+ ino_t i_ino; /* inode number of found directory */
+
+ long iso_extent; /* extent of file */
+ long i_size;
+ long iso_start; /* actual start of data of file (may be different */
+ /* from iso_extent, if file has extended attributes) */
+ ISO_RRIP_INODE inode;
+};
+
+#define i_forw i_chain[0]
+#define i_back i_chain[1]
+
+/* flags */
+#define IN_ACCESS 0x0020 /* inode access time to be updated */
+
+#define VTOI(vp) ((struct iso_node *)(vp)->v_data)
+#define ITOV(ip) ((ip)->i_vnode)
+
+#ifdef KERNEL
+
+#ifdef MALLOC_DECLARE
+MALLOC_DECLARE(M_ISOFSMNT);
+MALLOC_DECLARE(M_ISOFSNODE);
+#endif
+
+struct buf;
+struct vop_bmap_args;
+struct vop_cachedlookup_args;
+struct vop_inactive_args;
+struct vop_reclaim_args;
+
+/*
+ * Prototypes for ISOFS vnode operations
+ */
+int cd9660_lookup __P((struct vop_cachedlookup_args *));
+int cd9660_inactive __P((struct vop_inactive_args *));
+int cd9660_reclaim __P((struct vop_reclaim_args *));
+int cd9660_bmap __P((struct vop_bmap_args *));
+int cd9660_blkatoff __P((struct vnode *vp, off_t offset, char **res, struct buf **bpp));
+
+void cd9660_defattr __P((struct iso_directory_record *,
+ struct iso_node *, struct buf *, enum ISO_FTYPE));
+void cd9660_deftstamp __P((struct iso_directory_record *,
+ struct iso_node *, struct buf *, enum ISO_FTYPE));
+struct vnode *cd9660_ihashget __P((dev_t, ino_t));
+void cd9660_ihashins __P((struct iso_node *));
+int cd9660_tstamp_conv7 __P((u_char *, struct timespec *, enum ISO_FTYPE));
+int cd9660_tstamp_conv17 __P((u_char *, struct timespec *));
+
+#endif /* KERNEL */
diff --git a/sys/fs/cd9660/cd9660_rrip.c b/sys/fs/cd9660/cd9660_rrip.c
new file mode 100644
index 0000000..b34553f
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_rrip.c
@@ -0,0 +1,723 @@
+/*-
+ * Copyright (c) 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cd9660_rrip.c 8.6 (Berkeley) 12/5/94
+ * $Id: cd9660_rrip.c,v 1.12 1997/02/22 09:38:49 peter Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+#include <isofs/cd9660/cd9660_rrip.h>
+#include <isofs/cd9660/iso_rrip.h>
+
+typedef int rrt_func_t __P((void *, ISO_RRIP_ANALYZE *ana));
+
+typedef struct {
+ char type[2];
+ rrt_func_t *func;
+ void (*func2) __P((struct iso_directory_record *isodir, ISO_RRIP_ANALYZE *ana));
+ int result;
+} RRIP_TABLE;
+
+static int cd9660_rrip_altname __P((ISO_RRIP_ALTNAME *p,
+ ISO_RRIP_ANALYZE *ana));
+static int cd9660_rrip_attr __P((ISO_RRIP_ATTR *p, ISO_RRIP_ANALYZE *ana));
+static int cd9660_rrip_cont __P((ISO_RRIP_CONT *p, ISO_RRIP_ANALYZE *ana));
+static void cd9660_rrip_defattr __P((struct iso_directory_record *isodir,
+ ISO_RRIP_ANALYZE *ana));
+static void cd9660_rrip_defname __P((struct iso_directory_record *isodir,
+ ISO_RRIP_ANALYZE *ana));
+static void cd9660_rrip_deftstamp __P((struct iso_directory_record *isodir,
+ ISO_RRIP_ANALYZE *ana));
+static int cd9660_rrip_device __P((ISO_RRIP_DEVICE *p,
+ ISO_RRIP_ANALYZE *ana));
+static int cd9660_rrip_extref __P((ISO_RRIP_EXTREF *p,
+ ISO_RRIP_ANALYZE *ana));
+static int cd9660_rrip_idflag __P((ISO_RRIP_IDFLAG *p,
+ ISO_RRIP_ANALYZE *ana));
+static int cd9660_rrip_loop __P((struct iso_directory_record *isodir,
+ ISO_RRIP_ANALYZE *ana,
+ RRIP_TABLE *table));
+static int cd9660_rrip_pclink __P((ISO_RRIP_CLINK *p,
+ ISO_RRIP_ANALYZE *ana));
+static int cd9660_rrip_reldir __P((ISO_RRIP_RELDIR *p,
+ ISO_RRIP_ANALYZE *ana));
+static int cd9660_rrip_slink __P((ISO_RRIP_SLINK *p,
+ ISO_RRIP_ANALYZE *ana));
+static int cd9660_rrip_stop __P((ISO_SUSP_HEADER *p,
+ ISO_RRIP_ANALYZE *ana));
+static int cd9660_rrip_tstamp __P((ISO_RRIP_TSTAMP *p,
+ ISO_RRIP_ANALYZE *ana));
+
+/*
+ * POSIX file attribute
+ */
+static int
+cd9660_rrip_attr(p,ana)
+ ISO_RRIP_ATTR *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ ana->inop->inode.iso_mode = isonum_733(p->mode);
+ ana->inop->inode.iso_uid = isonum_733(p->uid);
+ ana->inop->inode.iso_gid = isonum_733(p->gid);
+ ana->inop->inode.iso_links = isonum_733(p->links);
+ ana->fields &= ~ISO_SUSP_ATTR;
+ return ISO_SUSP_ATTR;
+}
+
+static void
+cd9660_rrip_defattr(isodir,ana)
+ struct iso_directory_record *isodir;
+ ISO_RRIP_ANALYZE *ana;
+{
+ /* But this is a required field! */
+ printf("RRIP without PX field?\n");
+ cd9660_defattr(isodir,ana->inop,NULL,ISO_FTYPE_RRIP);
+}
+
+/*
+ * Symbolic Links
+ */
+static int
+cd9660_rrip_slink(p,ana)
+ ISO_RRIP_SLINK *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ register ISO_RRIP_SLINK_COMPONENT *pcomp;
+ register ISO_RRIP_SLINK_COMPONENT *pcompe;
+ int len, wlen, cont;
+ char *outbuf, *inbuf;
+
+ pcomp = (ISO_RRIP_SLINK_COMPONENT *)p->component;
+ pcompe = (ISO_RRIP_SLINK_COMPONENT *)((char *)p + isonum_711(p->h.length));
+ len = *ana->outlen;
+ outbuf = ana->outbuf;
+ cont = ana->cont;
+
+ /*
+ * Gathering a Symbolic name from each component with path
+ */
+ for (;
+ pcomp < pcompe;
+ pcomp = (ISO_RRIP_SLINK_COMPONENT *)((char *)pcomp + ISO_RRIP_SLSIZ
+ + isonum_711(pcomp->clen))) {
+
+ if (!cont) {
+ if (len < ana->maxlen) {
+ len++;
+ *outbuf++ = '/';
+ }
+ }
+ cont = 0;
+
+ inbuf = "..";
+ wlen = 0;
+
+ switch (*pcomp->cflag) {
+
+ case ISO_SUSP_CFLAG_CURRENT:
+ /* Inserting Current */
+ wlen = 1;
+ break;
+
+ case ISO_SUSP_CFLAG_PARENT:
+ /* Inserting Parent */
+ wlen = 2;
+ break;
+
+ case ISO_SUSP_CFLAG_ROOT:
+ /* Inserting slash for ROOT */
+ /* start over from beginning(?) */
+ outbuf -= len;
+ len = 0;
+ break;
+
+ case ISO_SUSP_CFLAG_VOLROOT:
+ /* Inserting a mount point i.e. "/cdrom" */
+ /* same as above */
+ outbuf -= len;
+ len = 0;
+ inbuf = ana->imp->im_mountp->mnt_stat.f_mntonname;
+ wlen = strlen(inbuf);
+ break;
+
+ case ISO_SUSP_CFLAG_HOST:
+ /* Inserting hostname i.e. "kurt.tools.de" */
+ inbuf = hostname;
+ wlen = strlen(hostname);
+ break;
+
+ case ISO_SUSP_CFLAG_CONTINUE:
+ cont = 1;
+ /* fall thru */
+ case 0:
+ /* Inserting component */
+ wlen = isonum_711(pcomp->clen);
+ inbuf = pcomp->name;
+ break;
+ default:
+ printf("RRIP with incorrect flags?");
+ wlen = ana->maxlen + 1;
+ break;
+ }
+
+ if (len + wlen > ana->maxlen) {
+ /* indicate error to caller */
+ ana->cont = 1;
+ ana->fields = 0;
+ ana->outbuf -= *ana->outlen;
+ *ana->outlen = 0;
+ return 0;
+ }
+
+ bcopy(inbuf,outbuf,wlen);
+ outbuf += wlen;
+ len += wlen;
+
+ }
+ ana->outbuf = outbuf;
+ *ana->outlen = len;
+ ana->cont = cont;
+
+ if (!isonum_711(p->flags)) {
+ ana->fields &= ~ISO_SUSP_SLINK;
+ return ISO_SUSP_SLINK;
+ }
+ return 0;
+}
+
+/*
+ * Alternate name
+ */
+static int
+cd9660_rrip_altname(p,ana)
+ ISO_RRIP_ALTNAME *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ char *inbuf;
+ int wlen;
+ int cont;
+
+ inbuf = "..";
+ wlen = 0;
+ cont = 0;
+
+ switch (*p->flags) {
+ case ISO_SUSP_CFLAG_CURRENT:
+ /* Inserting Current */
+ wlen = 1;
+ break;
+
+ case ISO_SUSP_CFLAG_PARENT:
+ /* Inserting Parent */
+ wlen = 2;
+ break;
+
+ case ISO_SUSP_CFLAG_HOST:
+ /* Inserting hostname i.e. "kurt.tools.de" */
+ inbuf = hostname;
+ wlen = strlen(hostname);
+ break;
+
+ case ISO_SUSP_CFLAG_CONTINUE:
+ cont = 1;
+ /* fall thru */
+ case 0:
+ /* Inserting component */
+ wlen = isonum_711(p->h.length) - 5;
+ inbuf = (char *)p + 5;
+ break;
+
+ default:
+ printf("RRIP with incorrect NM flags?\n");
+ wlen = ana->maxlen + 1;
+ break;
+ }
+
+ if ((*ana->outlen += wlen) > ana->maxlen) {
+ /* treat as no name field */
+ ana->fields &= ~ISO_SUSP_ALTNAME;
+ ana->outbuf -= *ana->outlen - wlen;
+ *ana->outlen = 0;
+ return 0;
+ }
+
+ bcopy(inbuf,ana->outbuf,wlen);
+ ana->outbuf += wlen;
+
+ if (!cont) {
+ ana->fields &= ~ISO_SUSP_ALTNAME;
+ return ISO_SUSP_ALTNAME;
+ }
+ return 0;
+}
+
+static void
+cd9660_rrip_defname(isodir,ana)
+ struct iso_directory_record *isodir;
+ ISO_RRIP_ANALYZE *ana;
+{
+ strcpy(ana->outbuf,"..");
+ switch (*isodir->name) {
+ default:
+ isofntrans(isodir->name,isonum_711(isodir->name_len),
+ ana->outbuf,ana->outlen,
+ 1,isonum_711(isodir->flags)&4);
+ break;
+ case 0:
+ *ana->outlen = 1;
+ break;
+ case 1:
+ *ana->outlen = 2;
+ break;
+ }
+}
+
+/*
+ * Parent or Child Link
+ */
+static int
+cd9660_rrip_pclink(p,ana)
+ ISO_RRIP_CLINK *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ *ana->inump = isonum_733(p->dir_loc) << ana->imp->im_bshift;
+ ana->fields &= ~(ISO_SUSP_CLINK|ISO_SUSP_PLINK);
+ return *p->h.type == 'C' ? ISO_SUSP_CLINK : ISO_SUSP_PLINK;
+}
+
+/*
+ * Relocated directory
+ */
+static int
+cd9660_rrip_reldir(p,ana)
+ ISO_RRIP_RELDIR *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ /* special hack to make caller aware of RE field */
+ *ana->outlen = 0;
+ ana->fields = 0;
+ return ISO_SUSP_RELDIR|ISO_SUSP_ALTNAME|ISO_SUSP_CLINK|ISO_SUSP_PLINK;
+}
+
+static int
+cd9660_rrip_tstamp(p,ana)
+ ISO_RRIP_TSTAMP *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ u_char *ptime;
+
+ ptime = p->time;
+
+ /* Check a format of time stamp (7bytes/17bytes) */
+ if (!(*p->flags&ISO_SUSP_TSTAMP_FORM17)) {
+ if (*p->flags&ISO_SUSP_TSTAMP_CREAT)
+ ptime += 7;
+
+ if (*p->flags&ISO_SUSP_TSTAMP_MODIFY) {
+ cd9660_tstamp_conv7(ptime,&ana->inop->inode.iso_mtime,
+ ISO_FTYPE_RRIP);
+ ptime += 7;
+ } else
+ bzero(&ana->inop->inode.iso_mtime,sizeof(struct timespec));
+
+ if (*p->flags&ISO_SUSP_TSTAMP_ACCESS) {
+ cd9660_tstamp_conv7(ptime,&ana->inop->inode.iso_atime,
+ ISO_FTYPE_RRIP);
+ ptime += 7;
+ } else
+ ana->inop->inode.iso_atime = ana->inop->inode.iso_mtime;
+
+ if (*p->flags&ISO_SUSP_TSTAMP_ATTR)
+ cd9660_tstamp_conv7(ptime,&ana->inop->inode.iso_ctime,
+ ISO_FTYPE_RRIP);
+ else
+ ana->inop->inode.iso_ctime = ana->inop->inode.iso_mtime;
+
+ } else {
+ if (*p->flags&ISO_SUSP_TSTAMP_CREAT)
+ ptime += 17;
+
+ if (*p->flags&ISO_SUSP_TSTAMP_MODIFY) {
+ cd9660_tstamp_conv17(ptime,&ana->inop->inode.iso_mtime);
+ ptime += 17;
+ } else
+ bzero(&ana->inop->inode.iso_mtime,sizeof(struct timespec));
+
+ if (*p->flags&ISO_SUSP_TSTAMP_ACCESS) {
+ cd9660_tstamp_conv17(ptime,&ana->inop->inode.iso_atime);
+ ptime += 17;
+ } else
+ ana->inop->inode.iso_atime = ana->inop->inode.iso_mtime;
+
+ if (*p->flags&ISO_SUSP_TSTAMP_ATTR)
+ cd9660_tstamp_conv17(ptime,&ana->inop->inode.iso_ctime);
+ else
+ ana->inop->inode.iso_ctime = ana->inop->inode.iso_mtime;
+
+ }
+ ana->fields &= ~ISO_SUSP_TSTAMP;
+ return ISO_SUSP_TSTAMP;
+}
+
+static void
+cd9660_rrip_deftstamp(isodir,ana)
+ struct iso_directory_record *isodir;
+ ISO_RRIP_ANALYZE *ana;
+{
+ cd9660_deftstamp(isodir,ana->inop,NULL,ISO_FTYPE_RRIP);
+}
+
+/*
+ * POSIX device modes
+ */
+static int
+cd9660_rrip_device(p,ana)
+ ISO_RRIP_DEVICE *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ u_int high, low;
+
+ high = isonum_733(p->dev_t_high);
+ low = isonum_733(p->dev_t_low);
+
+ if (high == 0)
+ ana->inop->inode.iso_rdev = makedev(major(low), minor(low));
+ else
+ ana->inop->inode.iso_rdev = makedev(high, minor(low));
+ ana->fields &= ~ISO_SUSP_DEVICE;
+ return ISO_SUSP_DEVICE;
+}
+
+/*
+ * Flag indicating
+ */
+static int
+cd9660_rrip_idflag(p,ana)
+ ISO_RRIP_IDFLAG *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ ana->fields &= isonum_711(p->flags)|~0xff; /* don't touch high bits */
+ /* special handling of RE field */
+ if (ana->fields&ISO_SUSP_RELDIR)
+ return cd9660_rrip_reldir(/* XXX */ (ISO_RRIP_RELDIR *)p,ana);
+
+ return ISO_SUSP_IDFLAG;
+}
+
+/*
+ * Continuation pointer
+ */
+static int
+cd9660_rrip_cont(p,ana)
+ ISO_RRIP_CONT *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ ana->iso_ce_blk = isonum_733(p->location);
+ ana->iso_ce_off = isonum_733(p->offset);
+ ana->iso_ce_len = isonum_733(p->length);
+ return ISO_SUSP_CONT;
+}
+
+/*
+ * System Use end
+ */
+static int
+cd9660_rrip_stop(p,ana)
+ ISO_SUSP_HEADER *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ return ISO_SUSP_STOP;
+}
+
+/*
+ * Extension reference
+ */
+static int
+cd9660_rrip_extref(p,ana)
+ ISO_RRIP_EXTREF *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ if (isonum_711(p->len_id) != 10
+ || bcmp((char *)p + 8,"RRIP_1991A",10)
+ || isonum_711(p->version) != 1)
+ return 0;
+ ana->fields &= ~ISO_SUSP_EXTREF;
+ return ISO_SUSP_EXTREF;
+}
+
+static int
+cd9660_rrip_loop(isodir,ana,table)
+ struct iso_directory_record *isodir;
+ ISO_RRIP_ANALYZE *ana;
+ RRIP_TABLE *table;
+{
+ register RRIP_TABLE *ptable;
+ register ISO_SUSP_HEADER *phead;
+ register ISO_SUSP_HEADER *pend;
+ struct buf *bp = NULL;
+ char *pwhead;
+ int result;
+
+ /*
+ * Note: If name length is odd,
+ * it will be padding 1 byte after the name
+ */
+ pwhead = isodir->name + isonum_711(isodir->name_len);
+ if (!(isonum_711(isodir->name_len)&1))
+ pwhead++;
+
+ /* If it's not the '.' entry of the root dir obey SP field */
+ if (*isodir->name != 0
+ || isonum_733(isodir->extent) != ana->imp->root_extent)
+ pwhead += ana->imp->rr_skip;
+ else
+ pwhead += ana->imp->rr_skip0;
+
+ phead = (ISO_SUSP_HEADER *)pwhead;
+ pend = (ISO_SUSP_HEADER *)((char *)isodir + isonum_711(isodir->length));
+
+ result = 0;
+ while (1) {
+ ana->iso_ce_len = 0;
+ /*
+ * Note: "pend" should be more than one SUSP header
+ */
+ while (pend >= phead + 1) {
+ if (isonum_711(phead->version) == 1) {
+ for (ptable = table; ptable->func; ptable++) {
+ if (*phead->type == *ptable->type
+ && phead->type[1] == ptable->type[1]) {
+ result |= ptable->func(phead,ana);
+ break;
+ }
+ }
+ if (!ana->fields)
+ break;
+ }
+ if (result&ISO_SUSP_STOP) {
+ result &= ~ISO_SUSP_STOP;
+ break;
+ }
+ /* plausibility check */
+ if (isonum_711(phead->length) < sizeof(*phead))
+ break;
+ /*
+ * move to next SUSP
+ * Hopefully this works with newer versions, too
+ */
+ phead = (ISO_SUSP_HEADER *)((char *)phead + isonum_711(phead->length));
+ }
+
+ if (ana->fields && ana->iso_ce_len) {
+ if (ana->iso_ce_blk >= ana->imp->volume_space_size
+ || ana->iso_ce_off + ana->iso_ce_len > ana->imp->logical_block_size
+ || bread(ana->imp->im_devvp,
+ ana->iso_ce_blk <<
+ (ana->imp->im_bshift - DEV_BSHIFT),
+ ana->imp->logical_block_size, NOCRED, &bp))
+ /* what to do now? */
+ break;
+ phead = (ISO_SUSP_HEADER *)(bp->b_data + ana->iso_ce_off);
+ pend = (ISO_SUSP_HEADER *) ((char *)phead + ana->iso_ce_len);
+ } else
+ break;
+ }
+ if (bp)
+ brelse(bp);
+ /*
+ * If we don't find the Basic SUSP stuffs, just set default value
+ * (attribute/time stamp)
+ */
+ for (ptable = table; ptable->func2; ptable++)
+ if (!(ptable->result&result))
+ ptable->func2(isodir,ana);
+
+ return result;
+}
+
+/*
+ * Get Attributes.
+ */
+/*
+ * XXX the casts are bogus but will do for now.
+ */
+#define BC (rrt_func_t *)
+static RRIP_TABLE rrip_table_analyze[] = {
+ { "PX", BC cd9660_rrip_attr, cd9660_rrip_defattr, ISO_SUSP_ATTR },
+ { "TF", BC cd9660_rrip_tstamp, cd9660_rrip_deftstamp, ISO_SUSP_TSTAMP },
+ { "PN", BC cd9660_rrip_device, 0, ISO_SUSP_DEVICE },
+ { "RR", BC cd9660_rrip_idflag, 0, ISO_SUSP_IDFLAG },
+ { "CE", BC cd9660_rrip_cont, 0, ISO_SUSP_CONT },
+ { "ST", BC cd9660_rrip_stop, 0, ISO_SUSP_STOP },
+ { "", 0, 0, 0 }
+};
+
+int
+cd9660_rrip_analyze(isodir,inop,imp)
+ struct iso_directory_record *isodir;
+ struct iso_node *inop;
+ struct iso_mnt *imp;
+{
+ ISO_RRIP_ANALYZE analyze;
+
+ analyze.inop = inop;
+ analyze.imp = imp;
+ analyze.fields = ISO_SUSP_ATTR|ISO_SUSP_TSTAMP|ISO_SUSP_DEVICE;
+
+ return cd9660_rrip_loop(isodir,&analyze,rrip_table_analyze);
+}
+
+/*
+ * Get Alternate Name.
+ */
+static RRIP_TABLE rrip_table_getname[] = {
+ { "NM", BC cd9660_rrip_altname, cd9660_rrip_defname, ISO_SUSP_ALTNAME },
+ { "CL", BC cd9660_rrip_pclink, 0, ISO_SUSP_CLINK|ISO_SUSP_PLINK },
+ { "PL", BC cd9660_rrip_pclink, 0, ISO_SUSP_CLINK|ISO_SUSP_PLINK },
+ { "RE", BC cd9660_rrip_reldir, 0, ISO_SUSP_RELDIR },
+ { "RR", BC cd9660_rrip_idflag, 0, ISO_SUSP_IDFLAG },
+ { "CE", BC cd9660_rrip_cont, 0, ISO_SUSP_CONT },
+ { "ST", BC cd9660_rrip_stop, 0, ISO_SUSP_STOP },
+ { "", 0, 0, 0 }
+};
+
+int
+cd9660_rrip_getname(isodir,outbuf,outlen,inump,imp)
+ struct iso_directory_record *isodir;
+ char *outbuf;
+ u_short *outlen;
+ ino_t *inump;
+ struct iso_mnt *imp;
+{
+ ISO_RRIP_ANALYZE analyze;
+ RRIP_TABLE *tab;
+
+ analyze.outbuf = outbuf;
+ analyze.outlen = outlen;
+ analyze.maxlen = NAME_MAX;
+ analyze.inump = inump;
+ analyze.imp = imp;
+ analyze.fields = ISO_SUSP_ALTNAME|ISO_SUSP_RELDIR|ISO_SUSP_CLINK|ISO_SUSP_PLINK;
+ *outlen = 0;
+
+ tab = rrip_table_getname;
+ if (*isodir->name == 0
+ || *isodir->name == 1) {
+ cd9660_rrip_defname(isodir,&analyze);
+
+ analyze.fields &= ~ISO_SUSP_ALTNAME;
+ tab++;
+ }
+
+ return cd9660_rrip_loop(isodir,&analyze,tab);
+}
+
+/*
+ * Get Symbolic Link.
+ */
+static RRIP_TABLE rrip_table_getsymname[] = {
+ { "SL", BC cd9660_rrip_slink, 0, ISO_SUSP_SLINK },
+ { "RR", BC cd9660_rrip_idflag, 0, ISO_SUSP_IDFLAG },
+ { "CE", BC cd9660_rrip_cont, 0, ISO_SUSP_CONT },
+ { "ST", BC cd9660_rrip_stop, 0, ISO_SUSP_STOP },
+ { "", 0, 0, 0 }
+};
+
+int
+cd9660_rrip_getsymname(isodir,outbuf,outlen,imp)
+ struct iso_directory_record *isodir;
+ char *outbuf;
+ u_short *outlen;
+ struct iso_mnt *imp;
+{
+ ISO_RRIP_ANALYZE analyze;
+
+ analyze.outbuf = outbuf;
+ analyze.outlen = outlen;
+ *outlen = 0;
+ analyze.maxlen = MAXPATHLEN;
+ analyze.cont = 1; /* don't start with a slash */
+ analyze.imp = imp;
+ analyze.fields = ISO_SUSP_SLINK;
+
+ return (cd9660_rrip_loop(isodir,&analyze,rrip_table_getsymname)&ISO_SUSP_SLINK);
+}
+
+static RRIP_TABLE rrip_table_extref[] = {
+ { "ER", BC cd9660_rrip_extref, 0, ISO_SUSP_EXTREF },
+ { "CE", BC cd9660_rrip_cont, 0, ISO_SUSP_CONT },
+ { "ST", BC cd9660_rrip_stop, 0, ISO_SUSP_STOP },
+ { "", 0, 0, 0 }
+};
+
+/*
+ * Check for Rock Ridge Extension and return offset of its fields.
+ * Note: We insist on the ER field.
+ */
+int
+cd9660_rrip_offset(isodir,imp)
+ struct iso_directory_record *isodir;
+ struct iso_mnt *imp;
+{
+ ISO_RRIP_OFFSET *p;
+ ISO_RRIP_ANALYZE analyze;
+
+ imp->rr_skip0 = 0;
+ p = (ISO_RRIP_OFFSET *)(isodir->name + 1);
+ if (bcmp(p,"SP\7\1\276\357",6)) {
+ /* Maybe, it's a CDROM XA disc? */
+ imp->rr_skip0 = 15;
+ p = (ISO_RRIP_OFFSET *)((char *)p + 15);
+ if (bcmp(p,"SP\7\1\276\357",6))
+ return -1;
+ }
+
+ analyze.imp = imp;
+ analyze.fields = ISO_SUSP_EXTREF;
+ if (!(cd9660_rrip_loop(isodir,&analyze,rrip_table_extref)&ISO_SUSP_EXTREF))
+ return -1;
+
+ return isonum_711(p->skip);
+}
diff --git a/sys/fs/cd9660/cd9660_rrip.h b/sys/fs/cd9660/cd9660_rrip.h
new file mode 100644
index 0000000..cacee39
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_rrip.h
@@ -0,0 +1,141 @@
+/*-
+ * Copyright (c) 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cd9660_rrip.h 8.2 (Berkeley) 12/5/94
+ * $Id: cd9660_rrip.h,v 1.3.2000.1 1996/09/30 12:46:48 dfr Exp $
+ */
+
+typedef struct {
+ char type [ISODCL ( 0, 1)];
+ u_char length [ISODCL ( 2, 2)]; /* 711 */
+ u_char version [ISODCL ( 3, 3)];
+} ISO_SUSP_HEADER;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ char mode [ISODCL ( 4, 11)]; /* 733 */
+ char links [ISODCL ( 12, 19)]; /* 733 */
+ char uid [ISODCL ( 20, 27)]; /* 733 */
+ char gid [ISODCL ( 28, 35)]; /* 733 */
+} ISO_RRIP_ATTR;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ char dev_t_high [ISODCL ( 4, 11)]; /* 733 */
+ char dev_t_low [ISODCL ( 12, 19)]; /* 733 */
+} ISO_RRIP_DEVICE;
+
+#define ISO_SUSP_CFLAG_CONTINUE 0x01
+#define ISO_SUSP_CFLAG_CURRENT 0x02
+#define ISO_SUSP_CFLAG_PARENT 0x04
+#define ISO_SUSP_CFLAG_ROOT 0x08
+#define ISO_SUSP_CFLAG_VOLROOT 0x10
+#define ISO_SUSP_CFLAG_HOST 0x20
+
+typedef struct {
+ u_char cflag [ISODCL ( 1, 1)];
+ u_char clen [ISODCL ( 2, 2)];
+ u_char name [1]; /* XXX */
+} ISO_RRIP_SLINK_COMPONENT;
+#define ISO_RRIP_SLSIZ 2
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ u_char flags [ISODCL ( 4, 4)];
+ u_char component [ISODCL ( 5, 5)];
+} ISO_RRIP_SLINK;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ char flags [ISODCL ( 4, 4)];
+} ISO_RRIP_ALTNAME;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ char dir_loc [ISODCL ( 4, 11)]; /* 733 */
+} ISO_RRIP_CLINK;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ char dir_loc [ISODCL ( 4, 11)]; /* 733 */
+} ISO_RRIP_PLINK;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+} ISO_RRIP_RELDIR;
+
+#define ISO_SUSP_TSTAMP_FORM17 0x80
+#define ISO_SUSP_TSTAMP_FORM7 0x00
+#define ISO_SUSP_TSTAMP_CREAT 0x01
+#define ISO_SUSP_TSTAMP_MODIFY 0x02
+#define ISO_SUSP_TSTAMP_ACCESS 0x04
+#define ISO_SUSP_TSTAMP_ATTR 0x08
+#define ISO_SUSP_TSTAMP_BACKUP 0x10
+#define ISO_SUSP_TSTAMP_EXPIRE 0x20
+#define ISO_SUSP_TSTAMP_EFFECT 0x40
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ u_char flags [ISODCL ( 4, 4)];
+ u_char time [ISODCL ( 5, 5)];
+} ISO_RRIP_TSTAMP;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ u_char flags [ISODCL ( 4, 4)];
+} ISO_RRIP_IDFLAG;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ char len_id [ISODCL ( 4, 4)];
+ char len_des [ISODCL ( 5, 5)];
+ char len_src [ISODCL ( 6, 6)];
+ char version [ISODCL ( 7, 7)];
+} ISO_RRIP_EXTREF;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ char check [ISODCL ( 4, 5)];
+ char skip [ISODCL ( 6, 6)];
+} ISO_RRIP_OFFSET;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ char location [ISODCL ( 4, 11)];
+ char offset [ISODCL ( 12, 19)];
+ char length [ISODCL ( 20, 27)];
+} ISO_RRIP_CONT;
diff --git a/sys/fs/cd9660/cd9660_util.c b/sys/fs/cd9660/cd9660_util.c
new file mode 100644
index 0000000..090f10d
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_util.c
@@ -0,0 +1,141 @@
+/*-
+ * Copyright (c) 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cd9660_util.c 8.3 (Berkeley) 12/5/94
+ * $Id: cd9660_util.c,v 1.9 1997/02/22 09:38:50 peter Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+
+#include <isofs/cd9660/iso.h>
+
+/*
+ * translate and compare a filename
+ * Note: Version number plus ';' may be omitted.
+ */
+int
+isofncmp(fn, fnlen, isofn, isolen)
+ u_char *fn;
+ int fnlen;
+ u_char *isofn;
+ int isolen;
+{
+ int i, j;
+ unsigned char c;
+
+ while (--fnlen >= 0) {
+ if (--isolen < 0)
+ return *fn;
+ if ((c = *isofn++) == ';') {
+ switch (*fn++) {
+ default:
+ return *--fn;
+ case 0:
+ return 0;
+ case ';':
+ break;
+ }
+ for (i = 0; --fnlen >= 0; i = i * 10 + *fn++ - '0') {
+ if (*fn < '0' || *fn > '9') {
+ return -1;
+ }
+ }
+ for (j = 0; --isolen >= 0; j = j * 10 + *isofn++ - '0');
+ return i - j;
+ }
+ if (c != *fn) {
+ if (c >= 'A' && c <= 'Z') {
+ if (c + ('a' - 'A') != *fn) {
+ if (*fn >= 'a' && *fn <= 'z')
+ return *fn - ('a' - 'A') - c;
+ else
+ return *fn - c;
+ }
+ } else
+ return *fn - c;
+ }
+ fn++;
+ }
+ if (isolen > 0) {
+ switch (*isofn) {
+ default:
+ return -1;
+ case '.':
+ if (isofn[1] != ';')
+ return -1;
+ case ';':
+ return 0;
+ }
+ }
+ return 0;
+}
+
+/*
+ * translate a filename
+ */
+void
+isofntrans(infn, infnlen, outfn, outfnlen, original, assoc)
+ u_char *infn;
+ int infnlen;
+ u_char *outfn;
+ u_short *outfnlen;
+ int original;
+ int assoc;
+{
+ int fnidx = 0;
+
+ if (assoc) {
+ *outfn++ = ASSOCCHAR;
+ fnidx++;
+ infnlen++;
+ }
+ for (; fnidx < infnlen; fnidx++) {
+ char c = *infn++;
+
+ if (!original && c >= 'A' && c <= 'Z')
+ *outfn++ = c + ('a' - 'A');
+ else if (!original && c == '.' && *infn == ';')
+ break;
+ else if (!original && c == ';')
+ break;
+ else
+ *outfn++ = c;
+ }
+ *outfnlen = fnidx;
+}
diff --git a/sys/fs/cd9660/cd9660_vfsops.c b/sys/fs/cd9660/cd9660_vfsops.c
new file mode 100644
index 0000000..ba4e385
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_vfsops.c
@@ -0,0 +1,894 @@
+/*-
+ * Copyright (c) 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cd9660_vfsops.c 8.18 (Berkeley) 5/22/95
+ * $Id: cd9660_vfsops.c,v 1.50 1999/01/30 12:26:22 phk Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+#include <miscfs/specfs/specdev.h>
+#include <sys/mount.h>
+#include <sys/buf.h>
+#include <sys/cdio.h>
+#include <sys/conf.h>
+#include <sys/fcntl.h>
+#include <sys/malloc.h>
+#include <sys/stat.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/iso_rrip.h>
+#include <isofs/cd9660/cd9660_node.h>
+#include <isofs/cd9660/cd9660_mount.h>
+
+MALLOC_DEFINE(M_ISOFSMNT, "ISOFS mount", "ISOFS mount structure");
+MALLOC_DEFINE(M_ISOFSNODE, "ISOFS node", "ISOFS vnode private part");
+
+static int cd9660_mount __P((struct mount *,
+ char *, caddr_t, struct nameidata *, struct proc *));
+static int cd9660_start __P((struct mount *, int, struct proc *));
+static int cd9660_unmount __P((struct mount *, int, struct proc *));
+static int cd9660_root __P((struct mount *, struct vnode **));
+static int cd9660_quotactl __P((struct mount *, int, uid_t, caddr_t,
+ struct proc *));
+static int cd9660_statfs __P((struct mount *, struct statfs *, struct proc *));
+static int cd9660_sync __P((struct mount *, int, struct ucred *,
+ struct proc *));
+static int cd9660_vget __P((struct mount *, ino_t, struct vnode **));
+static int cd9660_fhtovp __P((struct mount *, struct fid *, struct sockaddr *,
+ struct vnode **, int *, struct ucred **));
+static int cd9660_vptofh __P((struct vnode *, struct fid *));
+
+static struct vfsops cd9660_vfsops = {
+ cd9660_mount,
+ cd9660_start,
+ cd9660_unmount,
+ cd9660_root,
+ cd9660_quotactl,
+ cd9660_statfs,
+ cd9660_sync,
+ cd9660_vget,
+ cd9660_fhtovp,
+ cd9660_vptofh,
+ cd9660_init
+};
+VFS_SET(cd9660_vfsops, cd9660, VFCF_READONLY);
+
+
+/*
+ * Called by vfs_mountroot when iso is going to be mounted as root.
+ */
+
+static int iso_get_ssector __P((dev_t dev, struct proc *p));
+static int iso_mountfs __P((struct vnode *devvp, struct mount *mp,
+ struct proc *p, struct iso_args *argp));
+
+/*
+ * Try to find the start of the last data track on this CD-ROM. This
+ * is used to mount the last session of a multi-session CD. Bail out
+ * and return 0 if we fail, this is always a safe bet.
+ */
+static int
+iso_get_ssector(dev, p)
+ dev_t dev;
+ struct proc *p;
+{
+ struct ioc_toc_header h;
+ struct ioc_read_toc_single_entry t;
+ int i;
+ struct cdevsw *bd;
+ d_ioctl_t *ioctlp;
+
+ bd = bdevsw[major(dev)];
+ ioctlp = bd->d_ioctl;
+ if (ioctlp == NULL)
+ return 0;
+
+ if (ioctlp(dev, CDIOREADTOCHEADER, (caddr_t)&h, FREAD, p) != 0)
+ return 0;
+
+ for (i = h.ending_track; i >= 0; i--) {
+ t.address_format = CD_LBA_FORMAT;
+ t.track = i;
+ if (ioctlp(dev, CDIOREADTOCENTRY, (caddr_t)&t, FREAD, p) != 0)
+ return 0;
+ if ((t.entry.control & 4) != 0)
+ /* found a data track */
+ break;
+ }
+
+ if (i < 0)
+ return 0;
+
+ return ntohl(t.entry.addr.lba);
+}
+
+static int iso_mountroot __P((struct mount *mp, struct proc *p));
+
+static int
+iso_mountroot(mp, p)
+ struct mount *mp;
+ struct proc *p;
+{
+ struct iso_args args;
+ int error;
+
+ if ((error = bdevvp(rootdev, &rootvp))) {
+ printf("iso_mountroot: can't find rootvp");
+ return (error);
+ }
+ args.flags = ISOFSMNT_ROOT;
+ args.ssector = iso_get_ssector(rootdev, p);
+ if (bootverbose)
+ printf("iso_mountroot(): using session at block %d\n",
+ args.ssector);
+ if ((error = iso_mountfs(rootvp, mp, p, &args)) != 0)
+ return (error);
+
+ (void)cd9660_statfs(mp, &mp->mnt_stat, p);
+ return (0);
+}
+
+/*
+ * VFS Operations.
+ *
+ * mount system call
+ */
+static int
+cd9660_mount(mp, path, data, ndp, p)
+ register struct mount *mp;
+ char *path;
+ caddr_t data;
+ struct nameidata *ndp;
+ struct proc *p;
+{
+ struct vnode *devvp;
+ struct iso_args args;
+ size_t size;
+ int error;
+ mode_t accessmode;
+ struct iso_mnt *imp = 0;
+
+ if ((mp->mnt_flag & MNT_ROOTFS) != 0) {
+ if (bdevsw[major(rootdev)]->d_flags & D_NOCLUSTERR)
+ mp->mnt_flag |= MNT_NOCLUSTERR;
+ return (iso_mountroot(mp, p));
+ }
+ if ((error = copyin(data, (caddr_t)&args, sizeof (struct iso_args))))
+ return (error);
+
+ if ((mp->mnt_flag & MNT_RDONLY) == 0)
+ return (EROFS);
+
+ /*
+ * If updating, check whether changing from read-only to
+ * read/write; if there is no device name, that's all we do.
+ * Disallow clearing MNT_NOCLUSTERR flag, if block device requests.
+ */
+ if (mp->mnt_flag & MNT_UPDATE) {
+ imp = VFSTOISOFS(mp);
+ if (bdevsw[major(imp->im_devvp->v_rdev)]->d_flags &
+ D_NOCLUSTERR)
+ mp->mnt_flag |= MNT_NOCLUSTERR;
+ if (args.fspec == 0)
+ return (vfs_export(mp, &imp->im_export, &args.export));
+ }
+ /*
+ * Not an update, or updating the name: look up the name
+ * and verify that it refers to a sensible block device.
+ */
+ NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
+ if ((error = namei(ndp)))
+ return (error);
+ devvp = ndp->ni_vp;
+
+ if (devvp->v_type != VBLK) {
+ vrele(devvp);
+ return ENOTBLK;
+ }
+ if (major(devvp->v_rdev) >= nblkdev ||
+ bdevsw[major(devvp->v_rdev)] == NULL) {
+ vrele(devvp);
+ return ENXIO;
+ }
+
+ /*
+ * Verify that user has necessary permissions on the device,
+ * or has superuser abilities
+ */
+ accessmode = VREAD;
+ vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
+ error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p);
+ if (error)
+ error = suser(p->p_ucred, &p->p_acflag);
+ if (error) {
+ vput(devvp);
+ return (error);
+ }
+ VOP_UNLOCK(devvp, 0, p);
+
+ if ((mp->mnt_flag & MNT_UPDATE) == 0) {
+ if (bdevsw[major(devvp->v_rdev)]->d_flags & D_NOCLUSTERR)
+ mp->mnt_flag |= MNT_NOCLUSTERR;
+ error = iso_mountfs(devvp, mp, p, &args);
+ } else {
+ if (devvp != imp->im_devvp)
+ error = EINVAL; /* needs translation */
+ else
+ vrele(devvp);
+ }
+ if (error) {
+ vrele(devvp);
+ return error;
+ }
+ imp = VFSTOISOFS(mp);
+ (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+ bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+ (void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+ &size);
+ bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+ (void) cd9660_statfs(mp, &mp->mnt_stat, p);
+ return 0;
+}
+
+/*
+ * Common code for mount and mountroot
+ */
+static int
+iso_mountfs(devvp, mp, p, argp)
+ register struct vnode *devvp;
+ struct mount *mp;
+ struct proc *p;
+ struct iso_args *argp;
+{
+ register struct iso_mnt *isomp = (struct iso_mnt *)0;
+ struct buf *bp = NULL;
+ dev_t dev = devvp->v_rdev;
+ int error = EINVAL;
+ int needclose = 0;
+ int high_sierra = 0;
+ int iso_bsize;
+ int iso_blknum;
+ struct iso_volume_descriptor *vdp = 0;
+ struct iso_primary_descriptor *pri;
+ struct iso_sierra_primary_descriptor *pri_sierra;
+ struct iso_directory_record *rootp;
+ int logical_block_size;
+
+ if (!(mp->mnt_flag & MNT_RDONLY))
+ return EROFS;
+
+ /*
+ * Disallow multiple mounts of the same device.
+ * Disallow mounting of a device that is currently in use
+ * (except for root, which might share swap device for miniroot).
+ * Flush out any old buffers remaining from a previous use.
+ */
+ if ((error = vfs_mountedon(devvp)))
+ return error;
+ if (vcount(devvp) > 1 && devvp != rootvp)
+ return EBUSY;
+ if ((error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0)))
+ return (error);
+
+ if ((error = VOP_OPEN(devvp, FREAD, FSCRED, p)))
+ return error;
+ needclose = 1;
+
+ /* This is the "logical sector size". The standard says this
+ * should be 2048 or the physical sector size on the device,
+ * whichever is greater. For now, we'll just use a constant.
+ */
+ iso_bsize = ISO_DEFAULT_BLOCK_SIZE;
+
+ for (iso_blknum = 16 + argp->ssector;
+ iso_blknum < 100 + argp->ssector;
+ iso_blknum++) {
+ if ((error = bread(devvp, iso_blknum * btodb(iso_bsize),
+ iso_bsize, NOCRED, &bp)) != 0)
+ goto out;
+
+ vdp = (struct iso_volume_descriptor *)bp->b_data;
+ if (bcmp (vdp->id, ISO_STANDARD_ID, sizeof vdp->id) != 0) {
+ if (bcmp (vdp->id_sierra, ISO_SIERRA_ID,
+ sizeof vdp->id) != 0) {
+ error = EINVAL;
+ goto out;
+ } else
+ high_sierra = 1;
+ }
+
+ if (isonum_711 (high_sierra? vdp->type_sierra: vdp->type) == ISO_VD_END) {
+ error = EINVAL;
+ goto out;
+ }
+
+ if (isonum_711 (high_sierra? vdp->type_sierra: vdp->type) == ISO_VD_PRIMARY)
+ break;
+ brelse(bp);
+ }
+
+ if (isonum_711 (high_sierra? vdp->type_sierra: vdp->type) != ISO_VD_PRIMARY) {
+ error = EINVAL;
+ goto out;
+ }
+
+ pri = (struct iso_primary_descriptor *)vdp;
+ pri_sierra = (struct iso_sierra_primary_descriptor *)vdp;
+
+ logical_block_size =
+ isonum_723 (high_sierra?
+ pri_sierra->logical_block_size:
+ pri->logical_block_size);
+
+ if (logical_block_size < DEV_BSIZE || logical_block_size > MAXBSIZE
+ || (logical_block_size & (logical_block_size - 1)) != 0) {
+ error = EINVAL;
+ goto out;
+ }
+
+ rootp = (struct iso_directory_record *)
+ (high_sierra?
+ pri_sierra->root_directory_record:
+ pri->root_directory_record);
+
+ isomp = malloc(sizeof *isomp, M_ISOFSMNT, M_WAITOK);
+ bzero((caddr_t)isomp, sizeof *isomp);
+ isomp->logical_block_size = logical_block_size;
+ isomp->volume_space_size =
+ isonum_733 (high_sierra?
+ pri_sierra->volume_space_size:
+ pri->volume_space_size);
+ /*
+ * Since an ISO9660 multi-session CD can also access previous
+ * sessions, we have to include them into the space consider-
+ * ations. This doesn't yield a very accurate number since
+ * parts of the old sessions might be inaccessible now, but we
+ * can't do much better. This is also important for the NFS
+ * filehandle validation.
+ */
+ isomp->volume_space_size += argp->ssector;
+ bcopy (rootp, isomp->root, sizeof isomp->root);
+ isomp->root_extent = isonum_733 (rootp->extent);
+ isomp->root_size = isonum_733 (rootp->size);
+
+ isomp->im_bmask = logical_block_size - 1;
+ isomp->im_bshift = 0;
+ while ((1 << isomp->im_bshift) < isomp->logical_block_size)
+ isomp->im_bshift++;
+
+ bp->b_flags |= B_AGE;
+ brelse(bp);
+ bp = NULL;
+
+ mp->mnt_data = (qaddr_t)isomp;
+ mp->mnt_stat.f_fsid.val[0] = (long)dev;
+ mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
+ mp->mnt_maxsymlinklen = 0;
+ mp->mnt_flag |= MNT_LOCAL;
+ isomp->im_mountp = mp;
+ isomp->im_dev = dev;
+ isomp->im_devvp = devvp;
+
+ devvp->v_specmountpoint = mp;
+
+ /* Check the Rock Ridge Extention support */
+ if (!(argp->flags & ISOFSMNT_NORRIP)) {
+ if ((error = bread(isomp->im_devvp,
+ (isomp->root_extent + isonum_711(rootp->ext_attr_length)) <<
+ (isomp->im_bshift - DEV_BSHIFT),
+ isomp->logical_block_size, NOCRED, &bp)) != 0)
+ goto out;
+
+ rootp = (struct iso_directory_record *)bp->b_data;
+
+ if ((isomp->rr_skip = cd9660_rrip_offset(rootp,isomp)) < 0) {
+ argp->flags |= ISOFSMNT_NORRIP;
+ } else {
+ argp->flags &= ~ISOFSMNT_GENS;
+ }
+
+ /*
+ * The contents are valid,
+ * but they will get reread as part of another vnode, so...
+ */
+ bp->b_flags |= B_AGE;
+ brelse(bp);
+ bp = NULL;
+ }
+ isomp->im_flags = argp->flags&(ISOFSMNT_NORRIP|ISOFSMNT_GENS|ISOFSMNT_EXTATT);
+
+ if(high_sierra)
+ /* this effectively ignores all the mount flags */
+ isomp->iso_ftype = ISO_FTYPE_HIGH_SIERRA;
+ else
+ switch (isomp->im_flags&(ISOFSMNT_NORRIP|ISOFSMNT_GENS)) {
+ default:
+ isomp->iso_ftype = ISO_FTYPE_DEFAULT;
+ break;
+ case ISOFSMNT_GENS|ISOFSMNT_NORRIP:
+ isomp->iso_ftype = ISO_FTYPE_9660;
+ break;
+ case 0:
+ isomp->iso_ftype = ISO_FTYPE_RRIP;
+ break;
+ }
+
+ return 0;
+out:
+ devvp->v_specmountpoint = NULL;
+ if (bp)
+ brelse(bp);
+ if (needclose)
+ (void)VOP_CLOSE(devvp, FREAD, NOCRED, p);
+ if (isomp) {
+ free((caddr_t)isomp, M_ISOFSMNT);
+ mp->mnt_data = (qaddr_t)0;
+ }
+ return error;
+}
+
+/*
+ * Make a filesystem operational.
+ * Nothing to do at the moment.
+ */
+/* ARGSUSED */
+static int
+cd9660_start(mp, flags, p)
+ struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+ return 0;
+}
+
+/*
+ * unmount system call
+ */
+static int
+cd9660_unmount(mp, mntflags, p)
+ struct mount *mp;
+ int mntflags;
+ struct proc *p;
+{
+ register struct iso_mnt *isomp;
+ int error, flags = 0;
+
+ if (mntflags & MNT_FORCE)
+ flags |= FORCECLOSE;
+#if 0
+ mntflushbuf(mp, 0);
+ if (mntinvalbuf(mp))
+ return EBUSY;
+#endif
+ if ((error = vflush(mp, NULLVP, flags)))
+ return (error);
+
+ isomp = VFSTOISOFS(mp);
+
+
+ isomp->im_devvp->v_specmountpoint = NULL;
+ error = VOP_CLOSE(isomp->im_devvp, FREAD, NOCRED, p);
+ vrele(isomp->im_devvp);
+ free((caddr_t)isomp, M_ISOFSMNT);
+ mp->mnt_data = (qaddr_t)0;
+ mp->mnt_flag &= ~MNT_LOCAL;
+ return (error);
+}
+
+/*
+ * Return root of a filesystem
+ */
+static int
+cd9660_root(mp, vpp)
+ struct mount *mp;
+ struct vnode **vpp;
+{
+ struct iso_mnt *imp = VFSTOISOFS(mp);
+ struct iso_directory_record *dp =
+ (struct iso_directory_record *)imp->root;
+ ino_t ino = isodirino(dp, imp);
+
+ /*
+ * With RRIP we must use the `.' entry of the root directory.
+ * Simply tell vget, that it's a relocated directory.
+ */
+ return (cd9660_vget_internal(mp, ino, vpp,
+ imp->iso_ftype == ISO_FTYPE_RRIP, dp));
+}
+
+/*
+ * Do operations associated with quotas, not supported
+ */
+/* ARGSUSED */
+static int
+cd9660_quotactl(mp, cmd, uid, arg, p)
+ struct mount *mp;
+ int cmd;
+ uid_t uid;
+ caddr_t arg;
+ struct proc *p;
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * Get file system statistics.
+ */
+int
+cd9660_statfs(mp, sbp, p)
+ struct mount *mp;
+ register struct statfs *sbp;
+ struct proc *p;
+{
+ register struct iso_mnt *isomp;
+
+ isomp = VFSTOISOFS(mp);
+
+ sbp->f_bsize = isomp->logical_block_size;
+ sbp->f_iosize = sbp->f_bsize; /* XXX */
+ sbp->f_blocks = isomp->volume_space_size;
+ sbp->f_bfree = 0; /* total free blocks */
+ sbp->f_bavail = 0; /* blocks free for non superuser */
+ sbp->f_files = 0; /* total files */
+ sbp->f_ffree = 0; /* free file nodes */
+ if (sbp != &mp->mnt_stat) {
+ sbp->f_type = mp->mnt_vfc->vfc_typenum;
+ bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+ bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+ }
+ return 0;
+}
+
+/* ARGSUSED */
+static int
+cd9660_sync(mp, waitfor, cred, p)
+ struct mount *mp;
+ int waitfor;
+ struct ucred *cred;
+ struct proc *p;
+{
+ return (0);
+}
+
+/*
+ * File handle to vnode
+ *
+ * Have to be really careful about stale file handles:
+ * - check that the inode number is in range
+ * - call iget() to get the locked inode
+ * - check for an unallocated inode (i_mode == 0)
+ * - check that the generation number matches
+ */
+
+struct ifid {
+ ushort ifid_len;
+ ushort ifid_pad;
+ int ifid_ino;
+ long ifid_start;
+};
+
+/* ARGSUSED */
+int
+cd9660_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
+ register struct mount *mp;
+ struct fid *fhp;
+ struct sockaddr *nam;
+ struct vnode **vpp;
+ int *exflagsp;
+ struct ucred **credanonp;
+{
+ struct ifid *ifhp = (struct ifid *)fhp;
+ register struct iso_node *ip;
+ register struct netcred *np;
+ register struct iso_mnt *imp = VFSTOISOFS(mp);
+ struct vnode *nvp;
+ int error;
+
+#ifdef ISOFS_DBG
+ printf("fhtovp: ino %d, start %ld\n",
+ ifhp->ifid_ino, ifhp->ifid_start);
+#endif
+
+ /*
+ * Get the export permission structure for this <mp, client> tuple.
+ */
+ np = vfs_export_lookup(mp, &imp->im_export, nam);
+ if (np == NULL)
+ return (EACCES);
+
+ if ((error = VFS_VGET(mp, ifhp->ifid_ino, &nvp)) != 0) {
+ *vpp = NULLVP;
+ return (error);
+ }
+ ip = VTOI(nvp);
+ if (ip->inode.iso_mode == 0) {
+ vput(nvp);
+ *vpp = NULLVP;
+ return (ESTALE);
+ }
+ *vpp = nvp;
+ *exflagsp = np->netc_exflags;
+ *credanonp = &np->netc_anon;
+ return (0);
+}
+
+int
+cd9660_vget(mp, ino, vpp)
+ struct mount *mp;
+ ino_t ino;
+ struct vnode **vpp;
+{
+
+ /*
+ * XXXX
+ * It would be nice if we didn't always set the `relocated' flag
+ * and force the extra read, but I don't want to think about fixing
+ * that right now.
+ */
+ return (cd9660_vget_internal(mp, ino, vpp,
+#if 0
+ VFSTOISOFS(mp)->iso_ftype == ISO_FTYPE_RRIP,
+#else
+ 0,
+#endif
+ (struct iso_directory_record *)0));
+}
+
+int
+cd9660_vget_internal(mp, ino, vpp, relocated, isodir)
+ struct mount *mp;
+ ino_t ino;
+ struct vnode **vpp;
+ int relocated;
+ struct iso_directory_record *isodir;
+{
+ struct iso_mnt *imp;
+ struct iso_node *ip;
+ struct buf *bp;
+ struct vnode *vp, *nvp;
+ dev_t dev;
+ int error;
+
+ imp = VFSTOISOFS(mp);
+ dev = imp->im_dev;
+ if ((*vpp = cd9660_ihashget(dev, ino)) != NULLVP)
+ return (0);
+
+ /* Allocate a new vnode/iso_node. */
+ if ((error = getnewvnode(VT_ISOFS, mp, cd9660_vnodeop_p, &vp)) != 0) {
+ *vpp = NULLVP;
+ return (error);
+ }
+ MALLOC(ip, struct iso_node *, sizeof(struct iso_node), M_ISOFSNODE,
+ M_WAITOK);
+ bzero((caddr_t)ip, sizeof(struct iso_node));
+ lockinit(&ip->i_lock, PINOD, "isonode", 0, 0);
+ vp->v_data = ip;
+ ip->i_vnode = vp;
+ ip->i_dev = dev;
+ ip->i_number = ino;
+
+ /*
+ * Put it onto its hash chain and lock it so that other requests for
+ * this inode will block if they arrive while we are sleeping waiting
+ * for old data structures to be purged or for the contents of the
+ * disk portion of this inode to be read.
+ */
+ cd9660_ihashins(ip);
+
+ if (isodir == 0) {
+ int lbn, off;
+
+ lbn = lblkno(imp, ino);
+ if (lbn >= imp->volume_space_size) {
+ vput(vp);
+ printf("fhtovp: lbn exceed volume space %d\n", lbn);
+ return (ESTALE);
+ }
+
+ off = blkoff(imp, ino);
+ if (off + ISO_DIRECTORY_RECORD_SIZE > imp->logical_block_size) {
+ vput(vp);
+ printf("fhtovp: crosses block boundary %d\n",
+ off + ISO_DIRECTORY_RECORD_SIZE);
+ return (ESTALE);
+ }
+
+ error = bread(imp->im_devvp,
+ lbn << (imp->im_bshift - DEV_BSHIFT),
+ imp->logical_block_size, NOCRED, &bp);
+ if (error) {
+ vput(vp);
+ brelse(bp);
+ printf("fhtovp: bread error %d\n",error);
+ return (error);
+ }
+ isodir = (struct iso_directory_record *)(bp->b_data + off);
+
+ if (off + isonum_711(isodir->length) >
+ imp->logical_block_size) {
+ vput(vp);
+ if (bp != 0)
+ brelse(bp);
+ printf("fhtovp: directory crosses block boundary %d[off=%d/len=%d]\n",
+ off +isonum_711(isodir->length), off,
+ isonum_711(isodir->length));
+ return (ESTALE);
+ }
+
+#if 0
+ if (isonum_733(isodir->extent) +
+ isonum_711(isodir->ext_attr_length) != ifhp->ifid_start) {
+ if (bp != 0)
+ brelse(bp);
+ printf("fhtovp: file start miss %d vs %d\n",
+ isonum_733(isodir->extent) + isonum_711(isodir->ext_attr_length),
+ ifhp->ifid_start);
+ return (ESTALE);
+ }
+#endif
+ } else
+ bp = 0;
+
+ ip->i_mnt = imp;
+ ip->i_devvp = imp->im_devvp;
+ VREF(ip->i_devvp);
+
+ if (relocated) {
+ /*
+ * On relocated directories we must
+ * read the `.' entry out of a dir.
+ */
+ ip->iso_start = ino >> imp->im_bshift;
+ if (bp != 0)
+ brelse(bp);
+ if ((error = cd9660_blkatoff(vp, (off_t)0, NULL, &bp)) != 0) {
+ vput(vp);
+ return (error);
+ }
+ isodir = (struct iso_directory_record *)bp->b_data;
+ }
+
+ ip->iso_extent = isonum_733(isodir->extent);
+ ip->i_size = isonum_733(isodir->size);
+ ip->iso_start = isonum_711(isodir->ext_attr_length) + ip->iso_extent;
+
+ /*
+ * Setup time stamp, attribute
+ */
+ vp->v_type = VNON;
+ switch (imp->iso_ftype) {
+ default: /* ISO_FTYPE_9660 */
+ {
+ struct buf *bp2;
+ int off;
+ if ((imp->im_flags & ISOFSMNT_EXTATT)
+ && (off = isonum_711(isodir->ext_attr_length)))
+ cd9660_blkatoff(vp, (off_t)-(off << imp->im_bshift), NULL,
+ &bp2);
+ else
+ bp2 = NULL;
+ cd9660_defattr(isodir, ip, bp2, ISO_FTYPE_9660);
+ cd9660_deftstamp(isodir, ip, bp2, ISO_FTYPE_9660);
+ if (bp2)
+ brelse(bp2);
+ break;
+ }
+ case ISO_FTYPE_RRIP:
+ cd9660_rrip_analyze(isodir, ip, imp);
+ break;
+ }
+
+ if (bp != 0)
+ brelse(bp);
+
+ /*
+ * Initialize the associated vnode
+ */
+ switch (vp->v_type = IFTOVT(ip->inode.iso_mode)) {
+ case VFIFO:
+ vp->v_op = cd9660_fifoop_p;
+ break;
+ case VCHR:
+ case VBLK:
+ /*
+ * if device, look at device number table for translation
+ */
+ vp->v_op = cd9660_specop_p;
+ if ((nvp = checkalias(vp, ip->inode.iso_rdev, mp)) != NULL) {
+ /*
+ * Discard unneeded vnode, but save its iso_node.
+ * Note that the lock is carried over in the iso_node
+ * to the replacement vnode.
+ */
+ nvp->v_data = vp->v_data;
+ vp->v_data = NULL;
+ vp->v_op = spec_vnodeop_p;
+ vrele(vp);
+ vgone(vp);
+ /*
+ * Reinitialize aliased inode.
+ */
+ vp = nvp;
+ ip->i_vnode = vp;
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (ip->iso_extent == imp->root_extent)
+ vp->v_flag |= VROOT;
+
+ /*
+ * XXX need generation number?
+ */
+
+ *vpp = vp;
+ return (0);
+}
+
+/*
+ * Vnode pointer to File handle
+ */
+/* ARGSUSED */
+int
+cd9660_vptofh(vp, fhp)
+ struct vnode *vp;
+ struct fid *fhp;
+{
+ register struct iso_node *ip = VTOI(vp);
+ register struct ifid *ifhp;
+
+ ifhp = (struct ifid *)fhp;
+ ifhp->ifid_len = sizeof(struct ifid);
+
+ ifhp->ifid_ino = ip->i_number;
+ ifhp->ifid_start = ip->iso_start;
+
+#ifdef ISOFS_DBG
+ printf("vptofh: ino %d, start %ld\n",
+ ifhp->ifid_ino,ifhp->ifid_start);
+#endif
+ return 0;
+}
diff --git a/sys/fs/cd9660/cd9660_vnops.c b/sys/fs/cd9660/cd9660_vnops.c
new file mode 100644
index 0000000..5ec970a
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_vnops.c
@@ -0,0 +1,920 @@
+/*-
+ * Copyright (c) 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cd9660_vnops.c 8.19 (Berkeley) 5/27/95
+ * $Id: cd9660_vnops.c,v 1.53 1998/07/04 20:45:30 julian Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/kernel.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <miscfs/specfs/specdev.h>
+#include <miscfs/fifofs/fifo.h>
+#include <sys/malloc.h>
+#include <sys/dirent.h>
+#include <sys/unistd.h>
+
+#include <vm/vm.h>
+#include <vm/vm_zone.h>
+#include <vm/vnode_pager.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+#include <isofs/cd9660/iso_rrip.h>
+
+static int cd9660_setattr __P((struct vop_setattr_args *));
+static int cd9660_access __P((struct vop_access_args *));
+static int cd9660_getattr __P((struct vop_getattr_args *));
+static int cd9660_pathconf __P((struct vop_pathconf_args *));
+static int cd9660_read __P((struct vop_read_args *));
+struct isoreaddir;
+static int iso_uiodir __P((struct isoreaddir *idp, struct dirent *dp,
+ off_t off));
+static int iso_shipdir __P((struct isoreaddir *idp));
+static int cd9660_readdir __P((struct vop_readdir_args *));
+static int cd9660_readlink __P((struct vop_readlink_args *ap));
+static int cd9660_abortop __P((struct vop_abortop_args *));
+static int cd9660_strategy __P((struct vop_strategy_args *));
+static int cd9660_print __P((struct vop_print_args *));
+static int cd9660_getpages __P((struct vop_getpages_args *));
+static int cd9660_putpages __P((struct vop_putpages_args *));
+
+/*
+ * Setattr call. Only allowed for block and character special devices.
+ */
+int
+cd9660_setattr(ap)
+ struct vop_setattr_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct vattr *vap = ap->a_vap;
+
+ if (vap->va_flags != (u_long)VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
+ vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
+ vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL)
+ return (EROFS);
+ if (vap->va_size != (u_quad_t)VNOVAL) {
+ switch (vp->v_type) {
+ case VDIR:
+ return (EISDIR);
+ case VLNK:
+ case VREG:
+ return (EROFS);
+ case VCHR:
+ case VBLK:
+ case VSOCK:
+ case VFIFO:
+ case VNON:
+ case VBAD:
+ return (0);
+ }
+ }
+ return (0);
+}
+
+/*
+ * Check mode permission on inode pointer. Mode is READ, WRITE or EXEC.
+ * The mode is shifted to select the owner/group/other fields. The
+ * super user is granted all permissions.
+ */
+/* ARGSUSED */
+static int
+cd9660_access(ap)
+ struct vop_access_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct iso_node *ip = VTOI(vp);
+ struct ucred *cred = ap->a_cred;
+ mode_t mask, mode = ap->a_mode;
+ gid_t *gp;
+ int i;
+
+ /*
+ * Disallow write attempts unless the file is a socket,
+ * fifo, or a block or character device resident on the
+ * file system.
+ */
+ if (mode & VWRITE) {
+ switch (vp->v_type) {
+ case VDIR:
+ case VLNK:
+ case VREG:
+ return (EROFS);
+ /* NOT REACHED */
+ default:
+ break;
+ }
+ }
+
+ /* User id 0 always gets access. */
+ if (cred->cr_uid == 0)
+ return (0);
+
+ mask = 0;
+
+ /* Otherwise, check the owner. */
+ if (cred->cr_uid == ip->inode.iso_uid) {
+ if (mode & VEXEC)
+ mask |= S_IXUSR;
+ if (mode & VREAD)
+ mask |= S_IRUSR;
+ if (mode & VWRITE)
+ mask |= S_IWUSR;
+ return ((ip->inode.iso_mode & mask) == mask ? 0 : EACCES);
+ }
+
+ /* Otherwise, check the groups. */
+ for (i = 0, gp = cred->cr_groups; i < cred->cr_ngroups; i++, gp++)
+ if (ip->inode.iso_gid == *gp) {
+ if (mode & VEXEC)
+ mask |= S_IXGRP;
+ if (mode & VREAD)
+ mask |= S_IRGRP;
+ if (mode & VWRITE)
+ mask |= S_IWGRP;
+ return ((ip->inode.iso_mode & mask) == mask ?
+ 0 : EACCES);
+ }
+
+ /* Otherwise, check everyone else. */
+ if (mode & VEXEC)
+ mask |= S_IXOTH;
+ if (mode & VREAD)
+ mask |= S_IROTH;
+ if (mode & VWRITE)
+ mask |= S_IWOTH;
+ return ((ip->inode.iso_mode & mask) == mask ? 0 : EACCES);
+}
+
+static int
+cd9660_getattr(ap)
+ struct vop_getattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+
+{
+ struct vnode *vp = ap->a_vp;
+ register struct vattr *vap = ap->a_vap;
+ register struct iso_node *ip = VTOI(vp);
+
+ vap->va_fsid = ip->i_dev;
+ vap->va_fileid = ip->i_number;
+
+ vap->va_mode = ip->inode.iso_mode;
+ vap->va_nlink = ip->inode.iso_links;
+ vap->va_uid = ip->inode.iso_uid;
+ vap->va_gid = ip->inode.iso_gid;
+ vap->va_atime = ip->inode.iso_atime;
+ vap->va_mtime = ip->inode.iso_mtime;
+ vap->va_ctime = ip->inode.iso_ctime;
+ vap->va_rdev = ip->inode.iso_rdev;
+
+ vap->va_size = (u_quad_t) ip->i_size;
+ if (ip->i_size == 0 && (vap->va_mode & S_IFMT) == S_IFLNK) {
+ struct vop_readlink_args rdlnk;
+ struct iovec aiov;
+ struct uio auio;
+ char *cp;
+
+ MALLOC(cp, char *, MAXPATHLEN, M_TEMP, M_WAITOK);
+ aiov.iov_base = cp;
+ aiov.iov_len = MAXPATHLEN;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_offset = 0;
+ auio.uio_rw = UIO_READ;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_procp = ap->a_p;
+ auio.uio_resid = MAXPATHLEN;
+ rdlnk.a_uio = &auio;
+ rdlnk.a_vp = ap->a_vp;
+ rdlnk.a_cred = ap->a_cred;
+ if (cd9660_readlink(&rdlnk) == 0)
+ vap->va_size = MAXPATHLEN - auio.uio_resid;
+ FREE(cp, M_TEMP);
+ }
+ vap->va_flags = 0;
+ vap->va_gen = 1;
+ vap->va_blocksize = ip->i_mnt->logical_block_size;
+ vap->va_bytes = (u_quad_t) ip->i_size;
+ vap->va_type = vp->v_type;
+ vap->va_filerev = 0;
+ return (0);
+}
+
+/*
+ * Vnode op for reading.
+ */
+static int
+cd9660_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ register struct uio *uio = ap->a_uio;
+ register struct iso_node *ip = VTOI(vp);
+ register struct iso_mnt *imp;
+ struct buf *bp;
+ daddr_t lbn, rablock;
+ off_t diff;
+ int rasize, error = 0;
+ long size, n, on;
+
+ if (uio->uio_resid == 0)
+ return (0);
+ if (uio->uio_offset < 0)
+ return (EINVAL);
+ ip->i_flag |= IN_ACCESS;
+ imp = ip->i_mnt;
+ do {
+ lbn = lblkno(imp, uio->uio_offset);
+ on = blkoff(imp, uio->uio_offset);
+ n = min((u_int)(imp->logical_block_size - on),
+ uio->uio_resid);
+ diff = (off_t)ip->i_size - uio->uio_offset;
+ if (diff <= 0)
+ return (0);
+ if (diff < n)
+ n = diff;
+ size = blksize(imp, ip, lbn);
+ rablock = lbn + 1;
+ if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
+ if (lblktosize(imp, rablock) < ip->i_size)
+ error = cluster_read(vp, (off_t)ip->i_size,
+ lbn, size, NOCRED, uio->uio_resid,
+ (ap->a_ioflag >> 16), &bp);
+ else
+ error = bread(vp, lbn, size, NOCRED, &bp);
+ } else {
+ if (vp->v_lastr + 1 == lbn &&
+ lblktosize(imp, rablock) < ip->i_size) {
+ rasize = blksize(imp, ip, rablock);
+ error = breadn(vp, lbn, size, &rablock,
+ &rasize, 1, NOCRED, &bp);
+ } else
+ error = bread(vp, lbn, size, NOCRED, &bp);
+ }
+ vp->v_lastr = lbn;
+ n = min(n, size - bp->b_resid);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+
+ error = uiomove(bp->b_data + on, (int)n, uio);
+ brelse(bp);
+ } while (error == 0 && uio->uio_resid > 0 && n != 0);
+ return (error);
+}
+
+/*
+ * Structure for reading directories
+ */
+struct isoreaddir {
+ struct dirent saveent;
+ struct dirent assocent;
+ struct dirent current;
+ off_t saveoff;
+ off_t assocoff;
+ off_t curroff;
+ struct uio *uio;
+ off_t uio_off;
+ int eofflag;
+ u_long *cookies;
+ int ncookies;
+};
+
+int
+iso_uiodir(idp,dp,off)
+ struct isoreaddir *idp;
+ struct dirent *dp;
+ off_t off;
+{
+ int error;
+
+ dp->d_name[dp->d_namlen] = 0;
+ dp->d_reclen = GENERIC_DIRSIZ(dp);
+
+ if (idp->uio->uio_resid < dp->d_reclen) {
+ idp->eofflag = 0;
+ return (-1);
+ }
+
+ if (idp->cookies) {
+ if (idp->ncookies <= 0) {
+ idp->eofflag = 0;
+ return (-1);
+ }
+
+ *idp->cookies++ = off;
+ --idp->ncookies;
+ }
+
+ if ((error = uiomove((caddr_t) dp,dp->d_reclen,idp->uio)) != 0)
+ return (error);
+ idp->uio_off = off;
+ return (0);
+}
+
+int
+iso_shipdir(idp)
+ struct isoreaddir *idp;
+{
+ struct dirent *dp;
+ int cl, sl, assoc;
+ int error;
+ char *cname, *sname;
+
+ cl = idp->current.d_namlen;
+ cname = idp->current.d_name;
+assoc = (cl > 1) && (*cname == ASSOCCHAR);
+ if (assoc) {
+ cl--;
+ cname++;
+ }
+
+ dp = &idp->saveent;
+ sname = dp->d_name;
+ if (!(sl = dp->d_namlen)) {
+ dp = &idp->assocent;
+ sname = dp->d_name + 1;
+ sl = dp->d_namlen - 1;
+ }
+ if (sl > 0) {
+ if (sl != cl
+ || bcmp(sname,cname,sl)) {
+ if (idp->assocent.d_namlen) {
+ if ((error = iso_uiodir(idp,&idp->assocent,idp->assocoff)) != 0)
+ return (error);
+ idp->assocent.d_namlen = 0;
+ }
+ if (idp->saveent.d_namlen) {
+ if ((error = iso_uiodir(idp,&idp->saveent,idp->saveoff)) != 0)
+ return (error);
+ idp->saveent.d_namlen = 0;
+ }
+ }
+ }
+ idp->current.d_reclen = GENERIC_DIRSIZ(&idp->current);
+ if (assoc) {
+ idp->assocoff = idp->curroff;
+ bcopy(&idp->current,&idp->assocent,idp->current.d_reclen);
+ } else {
+ idp->saveoff = idp->curroff;
+ bcopy(&idp->current,&idp->saveent,idp->current.d_reclen);
+ }
+ return (0);
+}
+
+/*
+ * Vnode op for readdir
+ */
+static int
+cd9660_readdir(ap)
+ struct vop_readdir_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ int *a_eofflag;
+ int *a_ncookies;
+ u_long *a_cookies;
+ } */ *ap;
+{
+ register struct uio *uio = ap->a_uio;
+ struct isoreaddir *idp;
+ struct vnode *vdp = ap->a_vp;
+ struct iso_node *dp;
+ struct iso_mnt *imp;
+ struct buf *bp = NULL;
+ struct iso_directory_record *ep;
+ int entryoffsetinblock;
+ doff_t endsearch;
+ u_long bmask;
+ int error = 0;
+ int reclen;
+ u_short namelen;
+ int ncookies = 0;
+ u_long *cookies = NULL;
+
+ dp = VTOI(vdp);
+ imp = dp->i_mnt;
+ bmask = imp->im_bmask;
+
+ MALLOC(idp, struct isoreaddir *, sizeof(*idp), M_TEMP, M_WAITOK);
+ idp->saveent.d_namlen = idp->assocent.d_namlen = 0;
+ /*
+ * XXX
+ * Is it worth trying to figure out the type?
+ */
+ idp->saveent.d_type = idp->assocent.d_type = idp->current.d_type =
+ DT_UNKNOWN;
+ idp->uio = uio;
+ if (ap->a_ncookies == NULL) {
+ idp->cookies = NULL;
+ } else {
+ /*
+ * Guess the number of cookies needed.
+ */
+ ncookies = uio->uio_resid / 16;
+ MALLOC(cookies, u_long *, ncookies * sizeof(u_int), M_TEMP,
+ M_WAITOK);
+ idp->cookies = cookies;
+ idp->ncookies = ncookies;
+ }
+ idp->eofflag = 1;
+ idp->curroff = uio->uio_offset;
+
+ if ((entryoffsetinblock = idp->curroff & bmask) &&
+ (error = cd9660_blkatoff(vdp, (off_t)idp->curroff, NULL, &bp))) {
+ FREE(idp, M_TEMP);
+ return (error);
+ }
+ endsearch = dp->i_size;
+
+ while (idp->curroff < endsearch) {
+ /*
+ * If offset is on a block boundary,
+ * read the next directory block.
+ * Release previous if it exists.
+ */
+ if ((idp->curroff & bmask) == 0) {
+ if (bp != NULL)
+ brelse(bp);
+ if ((error =
+ cd9660_blkatoff(vdp, (off_t)idp->curroff, NULL, &bp)) != 0)
+ break;
+ entryoffsetinblock = 0;
+ }
+ /*
+ * Get pointer to next entry.
+ */
+ ep = (struct iso_directory_record *)
+ ((char *)bp->b_data + entryoffsetinblock);
+
+ reclen = isonum_711(ep->length);
+ if (reclen == 0) {
+ /* skip to next block, if any */
+ idp->curroff =
+ (idp->curroff & ~bmask) + imp->logical_block_size;
+ continue;
+ }
+
+ if (reclen < ISO_DIRECTORY_RECORD_SIZE) {
+ error = EINVAL;
+ /* illegal entry, stop */
+ break;
+ }
+
+ if (entryoffsetinblock + reclen > imp->logical_block_size) {
+ error = EINVAL;
+ /* illegal directory, so stop looking */
+ break;
+ }
+
+ idp->current.d_namlen = isonum_711(ep->name_len);
+
+ if (reclen < ISO_DIRECTORY_RECORD_SIZE + idp->current.d_namlen) {
+ error = EINVAL;
+ /* illegal entry, stop */
+ break;
+ }
+
+ if (isonum_711(ep->flags)&2)
+ idp->current.d_fileno = isodirino(ep, imp);
+ else
+ idp->current.d_fileno = dbtob(bp->b_blkno) +
+ entryoffsetinblock;
+
+ idp->curroff += reclen;
+
+ switch (imp->iso_ftype) {
+ case ISO_FTYPE_RRIP:
+ cd9660_rrip_getname(ep,idp->current.d_name, &namelen,
+ &idp->current.d_fileno,imp);
+ idp->current.d_namlen = (u_char)namelen;
+ if (idp->current.d_namlen)
+ error = iso_uiodir(idp,&idp->current,idp->curroff);
+ break;
+ default: /* ISO_FTYPE_DEFAULT || ISO_FTYPE_9660 || ISO_FTYPE_HIGH_SIERRA*/
+ strcpy(idp->current.d_name,"..");
+ switch (ep->name[0]) {
+ case 0:
+ idp->current.d_namlen = 1;
+ error = iso_uiodir(idp,&idp->current,idp->curroff);
+ break;
+ case 1:
+ idp->current.d_namlen = 2;
+ error = iso_uiodir(idp,&idp->current,idp->curroff);
+ break;
+ default:
+ isofntrans(ep->name,idp->current.d_namlen,
+ idp->current.d_name, &namelen,
+ imp->iso_ftype == ISO_FTYPE_9660,
+ isonum_711(ep->flags)&4);
+ idp->current.d_namlen = (u_char)namelen;
+ if (imp->iso_ftype == ISO_FTYPE_DEFAULT)
+ error = iso_shipdir(idp);
+ else
+ error = iso_uiodir(idp,&idp->current,idp->curroff);
+ break;
+ }
+ }
+ if (error)
+ break;
+
+ entryoffsetinblock += reclen;
+ }
+
+ if (!error && imp->iso_ftype == ISO_FTYPE_DEFAULT) {
+ idp->current.d_namlen = 0;
+ error = iso_shipdir(idp);
+ }
+ if (error < 0)
+ error = 0;
+
+ if (ap->a_ncookies != NULL) {
+ if (error)
+ free(cookies, M_TEMP);
+ else {
+ /*
+ * Work out the number of cookies actually used.
+ */
+ *ap->a_ncookies = ncookies - idp->ncookies;
+ *ap->a_cookies = cookies;
+ }
+ }
+
+ if (bp)
+ brelse (bp);
+
+ uio->uio_offset = idp->uio_off;
+ *ap->a_eofflag = idp->eofflag;
+
+ FREE(idp, M_TEMP);
+
+ return (error);
+}
+
+/*
+ * Return target name of a symbolic link
+ * Shouldn't we get the parent vnode and read the data from there?
+ * This could eventually result in deadlocks in cd9660_lookup.
+ * But otherwise the block read here is in the block buffer two times.
+ */
+typedef struct iso_directory_record ISODIR;
+typedef struct iso_node ISONODE;
+typedef struct iso_mnt ISOMNT;
+static int
+cd9660_readlink(ap)
+ struct vop_readlink_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ ISONODE *ip;
+ ISODIR *dirp;
+ ISOMNT *imp;
+ struct buf *bp;
+ struct uio *uio;
+ u_short symlen;
+ int error;
+ char *symname;
+
+ ip = VTOI(ap->a_vp);
+ imp = ip->i_mnt;
+ uio = ap->a_uio;
+
+ if (imp->iso_ftype != ISO_FTYPE_RRIP)
+ return (EINVAL);
+
+ /*
+ * Get parents directory record block that this inode included.
+ */
+ error = bread(imp->im_devvp,
+ (ip->i_number >> imp->im_bshift) <<
+ (imp->im_bshift - DEV_BSHIFT),
+ imp->logical_block_size, NOCRED, &bp);
+ if (error) {
+ brelse(bp);
+ return (EINVAL);
+ }
+
+ /*
+ * Setup the directory pointer for this inode
+ */
+ dirp = (ISODIR *)(bp->b_data + (ip->i_number & imp->im_bmask));
+
+ /*
+ * Just make sure, we have a right one....
+ * 1: Check not cross boundary on block
+ */
+ if ((ip->i_number & imp->im_bmask) + isonum_711(dirp->length)
+ > (unsigned)imp->logical_block_size) {
+ brelse(bp);
+ return (EINVAL);
+ }
+
+ /*
+ * Now get a buffer
+ * Abuse a namei buffer for now.
+ */
+ if (uio->uio_segflg == UIO_SYSSPACE)
+ symname = uio->uio_iov->iov_base;
+ else
+ symname = zalloc(namei_zone);
+
+ /*
+ * Ok, we just gathering a symbolic name in SL record.
+ */
+ if (cd9660_rrip_getsymname(dirp, symname, &symlen, imp) == 0) {
+ if (uio->uio_segflg != UIO_SYSSPACE)
+ zfree(namei_zone, symname);
+ brelse(bp);
+ return (EINVAL);
+ }
+ /*
+ * Don't forget before you leave from home ;-)
+ */
+ brelse(bp);
+
+ /*
+ * return with the symbolic name to caller's.
+ */
+ if (uio->uio_segflg != UIO_SYSSPACE) {
+ error = uiomove(symname, symlen, uio);
+ zfree(namei_zone, symname);
+ return (error);
+ }
+ uio->uio_resid -= symlen;
+ uio->uio_iov->iov_base += symlen;
+ uio->uio_iov->iov_len -= symlen;
+ return (0);
+}
+
+/*
+ * Ufs abort op, called after namei() when a CREATE/DELETE isn't actually
+ * done. If a buffer has been saved in anticipation of a CREATE, delete it.
+ */
+static int
+cd9660_abortop(ap)
+ struct vop_abortop_args /* {
+ struct vnode *a_dvp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
+ zfree(namei_zone, ap->a_cnp->cn_pnbuf);
+ return (0);
+}
+
+/*
+ * Calculate the logical to physical mapping if not done already,
+ * then call the device strategy routine.
+ */
+static int
+cd9660_strategy(ap)
+ struct vop_strategy_args /* {
+ struct buf *a_vp;
+ struct buf *a_bp;
+ } */ *ap;
+{
+ register struct buf *bp = ap->a_bp;
+ register struct vnode *vp = bp->b_vp;
+ register struct iso_node *ip;
+ int error;
+
+ ip = VTOI(vp);
+ if (vp->v_type == VBLK || vp->v_type == VCHR)
+ panic("cd9660_strategy: spec");
+ if (bp->b_blkno == bp->b_lblkno) {
+ if ((error =
+ VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL, NULL))) {
+ bp->b_error = error;
+ bp->b_flags |= B_ERROR;
+ biodone(bp);
+ return (error);
+ }
+ if ((long)bp->b_blkno == -1)
+ clrbuf(bp);
+ }
+ if ((long)bp->b_blkno == -1) {
+ biodone(bp);
+ return (0);
+ }
+ vp = ip->i_devvp;
+ bp->b_dev = vp->v_rdev;
+ VOP_STRATEGY(vp, bp);
+ return (0);
+}
+
+/*
+ * Print out the contents of an inode.
+ */
+static int
+cd9660_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ printf("tag VT_ISOFS, isofs vnode\n");
+ return (0);
+}
+
+/*
+ * Return POSIX pathconf information applicable to cd9660 filesystems.
+ */
+static int
+cd9660_pathconf(ap)
+ struct vop_pathconf_args /* {
+ struct vnode *a_vp;
+ int a_name;
+ register_t *a_retval;
+ } */ *ap;
+{
+
+ switch (ap->a_name) {
+ case _PC_LINK_MAX:
+ *ap->a_retval = 1;
+ return (0);
+ case _PC_NAME_MAX:
+ if (VTOI(ap->a_vp)->i_mnt->iso_ftype == ISO_FTYPE_RRIP)
+ *ap->a_retval = NAME_MAX;
+ else
+ *ap->a_retval = 37;
+ return (0);
+ case _PC_PATH_MAX:
+ *ap->a_retval = PATH_MAX;
+ return (0);
+ case _PC_PIPE_BUF:
+ *ap->a_retval = PIPE_BUF;
+ return (0);
+ case _PC_CHOWN_RESTRICTED:
+ *ap->a_retval = 1;
+ return (0);
+ case _PC_NO_TRUNC:
+ *ap->a_retval = 1;
+ return (0);
+ default:
+ return (EINVAL);
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * get page routine
+ *
+ * XXX By default, wimp out... note that a_offset is ignored (and always
+ * XXX has been).
+ */
+int
+cd9660_getpages(ap)
+ struct vop_getpages_args *ap;
+{
+ return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count,
+ ap->a_reqpage);
+}
+
+/*
+ * put page routine
+ *
+ * XXX By default, wimp out... note that a_offset is ignored (and always
+ * XXX has been).
+ */
+int
+cd9660_putpages(ap)
+ struct vop_putpages_args *ap;
+{
+ return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
+ ap->a_sync, ap->a_rtvals);
+}
+
+/*
+ * Global vfs data structures for cd9660
+ */
+vop_t **cd9660_vnodeop_p;
+static struct vnodeopv_entry_desc cd9660_vnodeop_entries[] = {
+ { &vop_default_desc, (vop_t *) vop_defaultop },
+ { &vop_abortop_desc, (vop_t *) cd9660_abortop },
+ { &vop_access_desc, (vop_t *) cd9660_access },
+ { &vop_bmap_desc, (vop_t *) cd9660_bmap },
+ { &vop_cachedlookup_desc, (vop_t *) cd9660_lookup },
+ { &vop_getattr_desc, (vop_t *) cd9660_getattr },
+ { &vop_inactive_desc, (vop_t *) cd9660_inactive },
+ { &vop_islocked_desc, (vop_t *) vop_stdislocked },
+ { &vop_lock_desc, (vop_t *) vop_stdlock },
+ { &vop_lookup_desc, (vop_t *) vfs_cache_lookup },
+ { &vop_pathconf_desc, (vop_t *) cd9660_pathconf },
+ { &vop_print_desc, (vop_t *) cd9660_print },
+ { &vop_read_desc, (vop_t *) cd9660_read },
+ { &vop_readdir_desc, (vop_t *) cd9660_readdir },
+ { &vop_readlink_desc, (vop_t *) cd9660_readlink },
+ { &vop_reclaim_desc, (vop_t *) cd9660_reclaim },
+ { &vop_setattr_desc, (vop_t *) cd9660_setattr },
+ { &vop_strategy_desc, (vop_t *) cd9660_strategy },
+ { &vop_unlock_desc, (vop_t *) vop_stdunlock },
+ { &vop_getpages_desc, (vop_t *) cd9660_getpages },
+ { &vop_putpages_desc, (vop_t *) cd9660_putpages },
+ { NULL, NULL }
+};
+static struct vnodeopv_desc cd9660_vnodeop_opv_desc =
+ { &cd9660_vnodeop_p, cd9660_vnodeop_entries };
+VNODEOP_SET(cd9660_vnodeop_opv_desc);
+
+/*
+ * Special device vnode ops
+ */
+vop_t **cd9660_specop_p;
+static struct vnodeopv_entry_desc cd9660_specop_entries[] = {
+ { &vop_default_desc, (vop_t *) spec_vnoperate },
+ { &vop_access_desc, (vop_t *) cd9660_access },
+ { &vop_getattr_desc, (vop_t *) cd9660_getattr },
+ { &vop_inactive_desc, (vop_t *) cd9660_inactive },
+ { &vop_islocked_desc, (vop_t *) vop_stdislocked },
+ { &vop_lock_desc, (vop_t *) vop_stdlock },
+ { &vop_print_desc, (vop_t *) cd9660_print },
+ { &vop_reclaim_desc, (vop_t *) cd9660_reclaim },
+ { &vop_setattr_desc, (vop_t *) cd9660_setattr },
+ { &vop_unlock_desc, (vop_t *) vop_stdunlock },
+ { NULL, NULL }
+};
+static struct vnodeopv_desc cd9660_specop_opv_desc =
+ { &cd9660_specop_p, cd9660_specop_entries };
+VNODEOP_SET(cd9660_specop_opv_desc);
+
+vop_t **cd9660_fifoop_p;
+static struct vnodeopv_entry_desc cd9660_fifoop_entries[] = {
+ { &vop_default_desc, (vop_t *) fifo_vnoperate },
+ { &vop_access_desc, (vop_t *) cd9660_access },
+ { &vop_getattr_desc, (vop_t *) cd9660_getattr },
+ { &vop_inactive_desc, (vop_t *) cd9660_inactive },
+ { &vop_islocked_desc, (vop_t *) vop_stdislocked },
+ { &vop_lock_desc, (vop_t *) vop_stdlock },
+ { &vop_print_desc, (vop_t *) cd9660_print },
+ { &vop_reclaim_desc, (vop_t *) cd9660_reclaim },
+ { &vop_setattr_desc, (vop_t *) cd9660_setattr },
+ { &vop_unlock_desc, (vop_t *) vop_stdunlock },
+ { NULL, NULL }
+};
+static struct vnodeopv_desc cd9660_fifoop_opv_desc =
+ { &cd9660_fifoop_p, cd9660_fifoop_entries };
+
+VNODEOP_SET(cd9660_fifoop_opv_desc);
diff --git a/sys/fs/cd9660/iso.h b/sys/fs/cd9660/iso.h
new file mode 100644
index 0000000..7b50fb6
--- /dev/null
+++ b/sys/fs/cd9660/iso.h
@@ -0,0 +1,312 @@
+/*-
+ * Copyright (c) 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)iso.h 8.6 (Berkeley) 5/10/95
+ * $Id: iso.h,v 1.15 1997/05/04 16:17:49 joerg Exp $
+ */
+
+#define ISODCL(from, to) (to - from + 1)
+
+struct iso_volume_descriptor {
+ char type[ISODCL(1,1)]; /* 711 */
+ char id[ISODCL(2,6)];
+ char version[ISODCL(7,7)];
+ char unused[ISODCL(8,8)];
+ char type_sierra[ISODCL(9,9)]; /* 711 */
+ char id_sierra[ISODCL(10,14)];
+ char version_sierra[ISODCL(15,15)];
+ char data[ISODCL(16,2048)];
+};
+
+/* volume descriptor types */
+#define ISO_VD_PRIMARY 1
+#define ISO_VD_END 255
+
+#define ISO_STANDARD_ID "CD001"
+#define ISO_ECMA_ID "CDW01"
+
+#define ISO_SIERRA_ID "CDROM"
+
+struct iso_primary_descriptor {
+ char type [ISODCL ( 1, 1)]; /* 711 */
+ char id [ISODCL ( 2, 6)];
+ char version [ISODCL ( 7, 7)]; /* 711 */
+ char unused1 [ISODCL ( 8, 8)];
+ char system_id [ISODCL ( 9, 40)]; /* achars */
+ char volume_id [ISODCL ( 41, 72)]; /* dchars */
+ char unused2 [ISODCL ( 73, 80)];
+ char volume_space_size [ISODCL ( 81, 88)]; /* 733 */
+ char unused3 [ISODCL ( 89, 120)];
+ char volume_set_size [ISODCL (121, 124)]; /* 723 */
+ char volume_sequence_number [ISODCL (125, 128)]; /* 723 */
+ char logical_block_size [ISODCL (129, 132)]; /* 723 */
+ char path_table_size [ISODCL (133, 140)]; /* 733 */
+ char type_l_path_table [ISODCL (141, 144)]; /* 731 */
+ char opt_type_l_path_table [ISODCL (145, 148)]; /* 731 */
+ char type_m_path_table [ISODCL (149, 152)]; /* 732 */
+ char opt_type_m_path_table [ISODCL (153, 156)]; /* 732 */
+ char root_directory_record [ISODCL (157, 190)]; /* 9.1 */
+ char volume_set_id [ISODCL (191, 318)]; /* dchars */
+ char publisher_id [ISODCL (319, 446)]; /* achars */
+ char preparer_id [ISODCL (447, 574)]; /* achars */
+ char application_id [ISODCL (575, 702)]; /* achars */
+ char copyright_file_id [ISODCL (703, 739)]; /* 7.5 dchars */
+ char abstract_file_id [ISODCL (740, 776)]; /* 7.5 dchars */
+ char bibliographic_file_id [ISODCL (777, 813)]; /* 7.5 dchars */
+ char creation_date [ISODCL (814, 830)]; /* 8.4.26.1 */
+ char modification_date [ISODCL (831, 847)]; /* 8.4.26.1 */
+ char expiration_date [ISODCL (848, 864)]; /* 8.4.26.1 */
+ char effective_date [ISODCL (865, 881)]; /* 8.4.26.1 */
+ char file_structure_version [ISODCL (882, 882)]; /* 711 */
+ char unused4 [ISODCL (883, 883)];
+ char application_data [ISODCL (884, 1395)];
+ char unused5 [ISODCL (1396, 2048)];
+};
+#define ISO_DEFAULT_BLOCK_SIZE 2048
+
+struct iso_sierra_primary_descriptor {
+ char unknown1 [ISODCL ( 1, 8)]; /* 733 */
+ char type [ISODCL ( 9, 9)]; /* 711 */
+ char id [ISODCL ( 10, 14)];
+ char version [ISODCL ( 15, 15)]; /* 711 */
+ char unused1 [ISODCL ( 16, 16)];
+ char system_id [ISODCL ( 17, 48)]; /* achars */
+ char volume_id [ISODCL ( 49, 80)]; /* dchars */
+ char unused2 [ISODCL ( 81, 88)];
+ char volume_space_size [ISODCL ( 89, 96)]; /* 733 */
+ char unused3 [ISODCL ( 97, 128)];
+ char volume_set_size [ISODCL (129, 132)]; /* 723 */
+ char volume_sequence_number [ISODCL (133, 136)]; /* 723 */
+ char logical_block_size [ISODCL (137, 140)]; /* 723 */
+ char path_table_size [ISODCL (141, 148)]; /* 733 */
+ char type_l_path_table [ISODCL (149, 152)]; /* 731 */
+ char opt_type_l_path_table [ISODCL (153, 156)]; /* 731 */
+ char unknown2 [ISODCL (157, 160)]; /* 731 */
+ char unknown3 [ISODCL (161, 164)]; /* 731 */
+ char type_m_path_table [ISODCL (165, 168)]; /* 732 */
+ char opt_type_m_path_table [ISODCL (169, 172)]; /* 732 */
+ char unknown4 [ISODCL (173, 176)]; /* 732 */
+ char unknown5 [ISODCL (177, 180)]; /* 732 */
+ char root_directory_record [ISODCL (181, 214)]; /* 9.1 */
+ char volume_set_id [ISODCL (215, 342)]; /* dchars */
+ char publisher_id [ISODCL (343, 470)]; /* achars */
+ char preparer_id [ISODCL (471, 598)]; /* achars */
+ char application_id [ISODCL (599, 726)]; /* achars */
+ char copyright_id [ISODCL (727, 790)]; /* achars */
+ char creation_date [ISODCL (791, 806)]; /* ? */
+ char modification_date [ISODCL (807, 822)]; /* ? */
+ char expiration_date [ISODCL (823, 838)]; /* ? */
+ char effective_date [ISODCL (839, 854)]; /* ? */
+ char file_structure_version [ISODCL (855, 855)]; /* 711 */
+ char unused4 [ISODCL (856, 2048)];
+};
+
+struct iso_directory_record {
+ char length [ISODCL (1, 1)]; /* 711 */
+ char ext_attr_length [ISODCL (2, 2)]; /* 711 */
+ u_char extent [ISODCL (3, 10)]; /* 733 */
+ u_char size [ISODCL (11, 18)]; /* 733 */
+ char date [ISODCL (19, 25)]; /* 7 by 711 */
+ char flags [ISODCL (26, 26)];
+ char file_unit_size [ISODCL (27, 27)]; /* 711 */
+ char interleave [ISODCL (28, 28)]; /* 711 */
+ char volume_sequence_number [ISODCL (29, 32)]; /* 723 */
+ char name_len [ISODCL (33, 33)]; /* 711 */
+ char name [1]; /* XXX */
+};
+/* can't take sizeof(iso_directory_record), because of possible alignment
+ of the last entry (34 instead of 33) */
+#define ISO_DIRECTORY_RECORD_SIZE 33
+
+struct iso_extended_attributes {
+ u_char owner [ISODCL (1, 4)]; /* 723 */
+ u_char group [ISODCL (5, 8)]; /* 723 */
+ u_char perm [ISODCL (9, 10)]; /* 9.5.3 */
+ char ctime [ISODCL (11, 27)]; /* 8.4.26.1 */
+ char mtime [ISODCL (28, 44)]; /* 8.4.26.1 */
+ char xtime [ISODCL (45, 61)]; /* 8.4.26.1 */
+ char ftime [ISODCL (62, 78)]; /* 8.4.26.1 */
+ char recfmt [ISODCL (79, 79)]; /* 711 */
+ char recattr [ISODCL (80, 80)]; /* 711 */
+ u_char reclen [ISODCL (81, 84)]; /* 723 */
+ char system_id [ISODCL (85, 116)]; /* achars */
+ char system_use [ISODCL (117, 180)];
+ char version [ISODCL (181, 181)]; /* 711 */
+ char len_esc [ISODCL (182, 182)]; /* 711 */
+ char reserved [ISODCL (183, 246)];
+ u_char len_au [ISODCL (247, 250)]; /* 723 */
+};
+
+#ifdef KERNEL
+
+/* CD-ROM Format type */
+enum ISO_FTYPE { ISO_FTYPE_DEFAULT, ISO_FTYPE_9660, ISO_FTYPE_RRIP,
+ ISO_FTYPE_ECMA, ISO_FTYPE_HIGH_SIERRA };
+
+#ifndef ISOFSMNT_ROOT
+#define ISOFSMNT_ROOT 0
+#endif
+
+struct iso_mnt {
+ int im_flags;
+
+ struct mount *im_mountp;
+ dev_t im_dev;
+ struct vnode *im_devvp;
+
+ int logical_block_size;
+ int im_bshift;
+ int im_bmask;
+
+ int volume_space_size;
+ struct netexport im_export;
+
+ char root[ISODCL (157, 190)];
+ int root_extent;
+ int root_size;
+ enum ISO_FTYPE iso_ftype;
+
+ int rr_skip;
+ int rr_skip0;
+};
+
+#define VFSTOISOFS(mp) ((struct iso_mnt *)((mp)->mnt_data))
+
+#define blkoff(imp, loc) ((loc) & (imp)->im_bmask)
+#define lblktosize(imp, blk) ((blk) << (imp)->im_bshift)
+#define lblkno(imp, loc) ((loc) >> (imp)->im_bshift)
+#define blksize(imp, ip, lbn) ((imp)->logical_block_size)
+
+int cd9660_vget_internal __P((struct mount *, ino_t, struct vnode **, int,
+ struct iso_directory_record *));
+int cd9660_init __P((struct vfsconf *));
+#define cd9660_sysctl ((int (*) __P((int *, u_int, void *, size_t *, void *, \
+ size_t, struct proc *)))eopnotsupp)
+
+extern vop_t **cd9660_vnodeop_p;
+extern vop_t **cd9660_specop_p;
+extern vop_t **cd9660_fifoop_p;
+
+int isofncmp __P((u_char *, int, u_char *, int));
+void isofntrans __P((u_char *, int, u_char *, u_short *, int, int));
+ino_t isodirino __P((struct iso_directory_record *, struct iso_mnt *));
+
+#endif /* KERNEL */
+
+/*
+ * The isonum_xxx functions are inlined anyway, and could come handy even
+ * outside the kernel. Thus we don't hide them here.
+ */
+
+static __inline int isonum_711 __P((u_char *));
+static __inline int
+isonum_711(p)
+ u_char *p;
+{
+ return *p;
+}
+
+static __inline int isonum_712 __P((char *));
+static __inline int
+isonum_712(p)
+ char *p;
+{
+ return *p;
+}
+
+#ifndef UNALIGNED_ACCESS
+
+static __inline int isonum_723 __P((u_char *));
+static __inline int
+isonum_723(p)
+ u_char *p;
+{
+ return *p|(p[1] << 8);
+}
+
+static __inline int isonum_733 __P((u_char *));
+static __inline int
+isonum_733(p)
+ u_char *p;
+{
+ return *p|(p[1] << 8)|(p[2] << 16)|(p[3] << 24);
+}
+
+#else /* UNALIGNED_ACCESS */
+
+#if BYTE_ORDER == LITTLE_ENDIAN
+
+static __inline int
+isonum_723(p)
+ u_char *p
+{
+ return *(u_int16t *)p;
+}
+
+static __inline int
+isonum_733(p)
+ u_char *p;
+{
+ return *(u_int32t *)p;
+}
+
+#endif
+
+#if BYTE_ORDER == BIG_ENDIAN
+
+static __inline int
+isonum_723(p)
+ u_char *p
+{
+ return *(u_int16t *)(p + 2);
+}
+
+static __inline int
+isonum_733(p)
+ u_char *p;
+{
+ return *(u_int32t *)(p + 4);
+}
+
+#endif
+
+#endif /* UNALIGNED_ACCESS */
+
+/*
+ * Associated files have a leading '='.
+ */
+#define ASSOCCHAR '='
diff --git a/sys/fs/cd9660/iso_rrip.h b/sys/fs/cd9660/iso_rrip.h
new file mode 100644
index 0000000..2b256d5
--- /dev/null
+++ b/sys/fs/cd9660/iso_rrip.h
@@ -0,0 +1,86 @@
+/*-
+ * Copyright (c) 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)iso_rrip.h 8.2 (Berkeley) 1/23/94
+ * $Id: iso_rrip.h,v 1.4 1997/02/22 09:38:52 peter Exp $
+ */
+
+
+/*
+ * Analyze function flag (similar to RR field bits)
+ */
+#define ISO_SUSP_ATTR 0x0001
+#define ISO_SUSP_DEVICE 0x0002
+#define ISO_SUSP_SLINK 0x0004
+#define ISO_SUSP_ALTNAME 0x0008
+#define ISO_SUSP_CLINK 0x0010
+#define ISO_SUSP_PLINK 0x0020
+#define ISO_SUSP_RELDIR 0x0040
+#define ISO_SUSP_TSTAMP 0x0080
+#define ISO_SUSP_IDFLAG 0x0100
+#define ISO_SUSP_EXTREF 0x0200
+#define ISO_SUSP_CONT 0x0400
+#define ISO_SUSP_OFFSET 0x0800
+#define ISO_SUSP_STOP 0x1000
+#define ISO_SUSP_UNKNOWN 0x8000
+
+typedef struct {
+ struct iso_node *inop;
+ int fields; /* interesting fields in this analysis */
+ daddr_t iso_ce_blk; /* block of continuation area */
+ off_t iso_ce_off; /* offset of continuation area */
+ int iso_ce_len; /* length of continuation area */
+ struct iso_mnt *imp; /* mount structure */
+ ino_t *inump; /* inode number pointer */
+ char *outbuf; /* name/symbolic link output area */
+ u_short *outlen; /* length of above */
+ u_short maxlen; /* maximum length of above */
+ int cont; /* continuation of above */
+} ISO_RRIP_ANALYZE;
+
+struct iso_directory_record;
+
+int cd9660_rrip_analyze __P((struct iso_directory_record *isodir,
+ struct iso_node *inop, struct iso_mnt *imp));
+int cd9660_rrip_getname __P((struct iso_directory_record *isodir,
+ char *outbuf, u_short *outlen,
+ ino_t *inump, struct iso_mnt *imp));
+int cd9660_rrip_getsymname __P((struct iso_directory_record *isodir,
+ char *outbuf, u_short *outlen,
+ struct iso_mnt *imp));
+int cd9660_rrip_offset __P((struct iso_directory_record *isodir,
+ struct iso_mnt *imp));
diff --git a/sys/fs/coda/README b/sys/fs/coda/README
new file mode 100644
index 0000000..f9bf3c3
--- /dev/null
+++ b/sys/fs/coda/README
@@ -0,0 +1,60 @@
+ Announcing the Availability of the
+ Coda Distributed
+ Filesystem
+ for
+ BSD Unix Systems
+
+ Coda is a distributed file system like NFS and AFS. It is
+freely available, like NFS. But it functions much like AFS in being a
+"stateful" file system. Coda and AFS cache files on your local
+machine to improve performance. But Coda goes a step further than AFS
+by letting you access the cached files when there is no available
+network, viz. disconnected laptops and network outages. In Coda, both
+the client and server are outside the kernel which makes them easier
+to experiment with.
+
+To get more information on Coda, I would like to refer people to
+ http://www.coda.cs.cmu.edu
+There is a wealth of documents, papers, and theses there. There is
+also a good introduction to the Coda File System in
+ http://www.coda.cs.cmu.edu/ljpaper/lj.html
+
+Coda was originally developed as an academic prototype/testbed. It is
+being polished and rewritten where necessary. Coda is a work in
+progress and does have bugs. It is, though, very usable. Our
+interest is in making Coda available to as many people as possible and
+to have Coda evolve and flourish.
+
+The bulk of the Coda file system code supports the Coda client
+program, the Coda server program and the utilities needed by both.
+All these programs are unix programs and can run equally well on any
+Unix platform. Our main development thrust is improving these
+programs. There is a small part of Coda that deals with the kernel to
+file system interface. This code is OS specific (but should not be
+platform specific).
+
+Coda is currently available for several OS's and platforms:
+ Freebsd-2.2.5: i386
+ Freebsd-2.2.6: i386
+ Freebsd -current: i386
+ linux 2.0: i386 & sparc
+ linux 2.1: i386 & sparc
+ NetBSD 1.3: i386
+ NetBSD -current: i386
+The relevant sources, binaries, and docs can be found in
+ ftp://ftp.coda.cs.cmu.edu/pub/coda/
+
+We intend to come out with new Coda releases often, not daily. We
+don't want to slight any OS/platform not mentioned above. We are just
+limited in our resources as to what we can support internally. We
+will be happy to integrate OpenBSD support as well as other OS
+support. Also, adding platform support should be relatively easy and
+we can discuss this. The only difficulty is that Coda has a light weight
+process package. It does some manipulations in assembler which would
+have to be redone for a different platform.
+
+There are several mailing lists @coda.cs.cmu.edu that discuss coda:
+coda-announce and linux-coda. We are going to revise linux-coda to be
+OS neutral, since it is mainly Coda we want to discuss. We appreciate
+comments, feedback, bug reports, bug fixes, enhancements, etc.
+
diff --git a/sys/fs/coda/TODO b/sys/fs/coda/TODO
new file mode 100644
index 0000000..eac5143
--- /dev/null
+++ b/sys/fs/coda/TODO
@@ -0,0 +1,17 @@
+OOPS:
+ FreeBSD does not fsync!!!
+
+Near term:
+ Fix bug in executing/mapping new files.
+ cfs_mount bug: interaction with cfs_inactive no cfs_unsave.
+ vref/vn_lock == vget except no VXWANT which may be on.
+ Review locks: vn_lock/VOP_UNLOCK/lockmgr ...
+
+Medium term:
+ Add missing VFS methods.
+ Do performance profile.
+ Tune hash algorithm used in cfs_namecache.
+ Tune hash algorithm used in cfs_subr.
+
+Eventually:
+ Use standard queue macros.
diff --git a/sys/fs/coda/cnode.h b/sys/fs/coda/cnode.h
new file mode 100644
index 0000000..bf6f632
--- /dev/null
+++ b/sys/fs/coda/cnode.h
@@ -0,0 +1,319 @@
+/*
+ *
+ * Coda: an Experimental Distributed File System
+ * Release 3.1
+ *
+ * Copyright (c) 1987-1998 Carnegie Mellon University
+ * All Rights Reserved
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University in all documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ *
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS,
+ * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON
+ * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF
+ * ANY DERIVATIVE WORK.
+ *
+ * Carnegie Mellon encourages users of this software to return any
+ * improvements or extensions that they make, and to grant Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ *
+ * @(#) src/sys/coda/cnode.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
+ * $Id: cnode.h,v 1.4 1998/09/13 13:57:59 rvb Exp $
+ *
+ */
+
+/*
+ * Mach Operating System
+ * Copyright (c) 1990 Carnegie-Mellon University
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * All rights reserved. The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+
+/*
+ * This code was written for the Coda file system at Carnegie Mellon University.
+ * Contributers include David Steere, James Kistler, and M. Satyanarayanan.
+ */
+
+/*
+ * HISTORY
+ * $Log: cnode.h,v $
+ * Revision 1.4 1998/09/13 13:57:59 rvb
+ * Finish conversion of cfs -> coda
+ *
+ * Revision 1.3 1998/09/11 18:50:17 rvb
+ * All the references to cfs, in symbols, structs, and strings
+ * have been changed to coda. (Same for CFS.)
+ *
+ * Revision 1.2 1998/09/02 19:09:53 rvb
+ * Pass2 complete
+ *
+ * Revision 1.1.1.1 1998/08/29 21:14:52 rvb
+ * Very Preliminary Coda
+ *
+ * Revision 1.10 1998/08/28 18:12:25 rvb
+ * Now it also works on FreeBSD -current. This code will be
+ * committed to the FreeBSD -current and NetBSD -current
+ * trees. It will then be tailored to the particular platform
+ * by flushing conditional code.
+ *
+ * Revision 1.9 1998/08/18 17:05:24 rvb
+ * Don't use __RCSID now
+ *
+ * Revision 1.8 1998/08/18 16:31:49 rvb
+ * Sync the code for NetBSD -current; test on 1.3 later
+ *
+ * Revision 1.7 98/02/24 22:22:53 rvb
+ * Fixes up mainly to flush iopen and friends
+ *
+ * Revision 1.6 98/01/31 20:53:19 rvb
+ * First version that works on FreeBSD 2.2.5
+ *
+ * Revision 1.5 98/01/23 11:53:51 rvb
+ * Bring RVB_CODA1_1 to HEAD
+ *
+ * Revision 1.4.2.5 98/01/23 11:21:14 rvb
+ * Sync with 2.2.5
+ *
+ * Revision 1.4.2.4 98/01/22 13:03:38 rvb
+ * Had Breaken ls .
+ *
+ * Revision 1.4.2.3 97/12/19 14:26:09 rvb
+ * session id
+ *
+ * Revision 1.4.2.2 97/12/16 12:40:24 rvb
+ * Sync with 1.3
+ *
+ * Revision 1.4.2.1 97/12/06 17:41:28 rvb
+ * Sync with peters coda.h
+ *
+ * Revision 1.4 97/12/05 10:39:30 rvb
+ * Read CHANGES
+ *
+ * Revision 1.3.18.2 97/11/12 12:09:45 rvb
+ * reorg pass1
+ *
+ * Revision 1.3.18.1 97/10/29 16:06:31 rvb
+ * Kill DYING
+ *
+ * Revision 1.3 1996/12/12 22:11:03 bnoble
+ * Fixed the "downcall invokes venus operation" deadlock in all known cases.
+ * There may be more.
+ *
+ * Revision 1.2 1996/01/02 16:57:26 bnoble
+ * Added support for Coda MiniCache and raw inode calls (final commit)
+ *
+ * Revision 1.1.2.1 1995/12/20 01:57:53 bnoble
+ * Added CODA-specific files
+ *
+ * Revision 3.1.1.1 1995/03/04 19:08:23 bnoble
+ * Branch for NetBSD port revisions
+ *
+ * Revision 3.1 1995/03/04 19:08:23 bnoble
+ * Bump to major revision 3 to prepare for NetBSD port
+ *
+ * Revision 2.2 1994/12/06 13:39:18 dcs
+ * Add a flag value to indicate a cnode was orphaned, e.g. the venus
+ * that created it has exited. This will allow one to restart venus
+ * even though some process may be cd'd into /coda.
+ *
+ * Revision 2.1 94/07/21 16:25:33 satya
+ * Conversion to C++ 3.0; start of Coda Release 2.0
+ *
+ * Revision 1.2.7.1 94/06/16 11:26:02 raiff
+ * Branch for release beta-16Jun1994_39118
+ *
+ * Revision 1.2 92/10/27 17:58:41 lily
+ * merge kernel/latest and alpha/src/cfs
+ *
+ * Revision 2.3 92/09/30 14:16:53 mja
+ * Picked up fixed #ifdef _KERNEL. Also...
+ *
+ * Substituted rvb's history blurb so that we agree with Mach 2.5 sources.
+ * [91/02/09 jjk]
+ *
+ * Added contributors blurb.
+ * [90/12/13 jjk]
+ *
+ * Revision 2.2 90/07/05 11:27:24 mrt
+ * Created for the Coda File System.
+ * [90/05/23 dcs]
+ *
+ * Revision 1.4 90/05/31 17:02:16 dcs
+ * Prepare for merge with facilities kernel.
+ *
+ *
+ *
+ */
+
+#ifndef _CNODE_H_
+#define _CNODE_H_
+
+#include <sys/vnode.h>
+#include <sys/lock.h>
+#include <machine/clock.h>
+
+MALLOC_DECLARE(M_CODA);
+
+/*
+ * tmp below since we need struct queue
+ */
+#include <coda/coda_kernel.h>
+
+/*
+ * Cnode lookup stuff.
+ * NOTE: CODA_CACHESIZE must be a power of 2 for cfshash to work!
+ */
+#define CODA_CACHESIZE 512
+
+#define CODA_ALLOC(ptr, cast, size) \
+do { \
+ ptr = (cast)malloc((unsigned long) size, M_CODA, M_WAITOK); \
+ if (ptr == 0) { \
+ panic("kernel malloc returns 0 at %s:%d\n", __FILE__, __LINE__); \
+ } \
+} while (0)
+
+#define CODA_FREE(ptr, size) free((ptr), M_CODA)
+
+/*
+ * global cache state control
+ */
+extern int coda_nc_use;
+
+/*
+ * Used to select debugging statements throughout the cfs code.
+ */
+extern int codadebug;
+extern int coda_nc_debug;
+extern int coda_printf_delay;
+extern int coda_vnop_print_entry;
+extern int coda_psdev_print_entry;
+extern int coda_vfsop_print_entry;
+
+#define CODADBGMSK(N) (1 << N)
+#define CODADEBUG(N, STMT) { if (codadebug & CODADBGMSK(N)) { STMT } }
+#define myprintf(args) \
+do { \
+ if (coda_printf_delay) \
+ DELAY(coda_printf_delay);\
+ printf args ; \
+} while (0)
+
+struct cnode {
+ struct vnode *c_vnode;
+ u_short c_flags; /* flags (see below) */
+ ViceFid c_fid; /* file handle */
+ struct lock c_lock; /* new lock protocol */
+ struct vnode *c_ovp; /* open vnode pointer */
+ u_short c_ocount; /* count of openers */
+ u_short c_owrite; /* count of open for write */
+ struct vattr c_vattr; /* attributes */
+ char *c_symlink; /* pointer to symbolic link */
+ u_short c_symlen; /* length of symbolic link */
+ dev_t c_device; /* associated vnode device */
+ ino_t c_inode; /* associated vnode inode */
+ struct cnode *c_next; /* links if on NetBSD machine */
+};
+#define VTOC(vp) ((struct cnode *)(vp)->v_data)
+#define CTOV(cp) ((struct vnode *)((cp)->c_vnode))
+
+/* flags */
+#define C_VATTR 0x01 /* Validity of vattr in the cnode */
+#define C_SYMLINK 0x02 /* Validity of symlink pointer in the Code */
+#define C_WANTED 0x08 /* Set if lock wanted */
+#define C_LOCKED 0x10 /* Set if lock held */
+#define C_UNMOUNTING 0X20 /* Set if unmounting */
+#define C_PURGING 0x40 /* Set if purging a fid */
+
+#define VALID_VATTR(cp) ((cp->c_flags) & C_VATTR)
+#define VALID_SYMLINK(cp) ((cp->c_flags) & C_SYMLINK)
+#define IS_UNMOUNTING(cp) ((cp)->c_flags & C_UNMOUNTING)
+
+struct vcomm {
+ u_long vc_seq;
+ struct selinfo vc_selproc;
+ struct queue vc_requests;
+ struct queue vc_replys;
+};
+
+#define VC_OPEN(vcp) ((vcp)->vc_requests.forw != NULL)
+#define MARK_VC_CLOSED(vcp) (vcp)->vc_requests.forw = NULL;
+#define MARK_VC_OPEN(vcp) /* MT */
+
+struct coda_clstat {
+ int ncalls; /* client requests */
+ int nbadcalls; /* upcall failures */
+ int reqs[CODA_NCALLS]; /* count of each request */
+};
+extern struct coda_clstat coda_clstat;
+
+/*
+ * CODA structure to hold mount/file system information
+ */
+struct coda_mntinfo {
+ struct vnode *mi_rootvp;
+ struct mount *mi_vfsp;
+ struct vcomm mi_vcomm;
+};
+extern struct coda_mntinfo coda_mnttbl[]; /* indexed by minor device number */
+
+/*
+ * vfs pointer to mount info
+ */
+#define vftomi(vfsp) ((struct coda_mntinfo *)(vfsp->mnt_data))
+#define CODA_MOUNTED(vfsp) (vftomi((vfsp)) != (struct coda_mntinfo *)0)
+
+/*
+ * vnode pointer to mount info
+ */
+#define vtomi(vp) ((struct coda_mntinfo *)(vp->v_mount->mnt_data))
+
+/*
+ * Used for identifying usage of "Control" object
+ */
+extern struct vnode *coda_ctlvp;
+#define IS_CTL_VP(vp) ((vp) == coda_ctlvp)
+#define IS_CTL_NAME(vp, name, l)((l == CODA_CONTROLLEN) \
+ && ((vp) == vtomi((vp))->mi_rootvp) \
+ && strncmp(name, CODA_CONTROL, l) == 0)
+
+/*
+ * An enum to tell us whether something that will remove a reference
+ * to a cnode was a downcall or not
+ */
+enum dc_status {
+ IS_DOWNCALL = 6,
+ NOT_DOWNCALL = 7
+};
+
+/* cfs_psdev.h */
+extern int coda_call(struct coda_mntinfo *mntinfo, int inSize, int *outSize, caddr_t buffer);
+extern int coda_kernel_version;
+
+/* cfs_subr.h */
+extern int handleDownCall(int opcode, union outputArgs *out);
+extern void coda_unmounting(struct mount *whoIam);
+extern int coda_vmflush(struct cnode *cp);
+
+/* cfs_vnodeops.h */
+extern struct cnode *make_coda_node(ViceFid *fid, struct mount *vfsp, short type);
+extern int coda_vnodeopstats_init(void);
+
+/* coda_vfsops.h */
+extern struct mount *devtomp(dev_t dev);
+
+/* sigh */
+#define CODA_RDWR ((u_long) 31)
+
+#endif /* _CNODE_H_ */
+
diff --git a/sys/fs/coda/coda.h b/sys/fs/coda/coda.h
new file mode 100644
index 0000000..7b67ea9
--- /dev/null
+++ b/sys/fs/coda/coda.h
@@ -0,0 +1,761 @@
+/*
+ *
+ * Coda: an Experimental Distributed File System
+ * Release 3.1
+ *
+ * Copyright (c) 1987-1998 Carnegie Mellon University
+ * All Rights Reserved
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University in all documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ *
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS,
+ * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON
+ * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF
+ * ANY DERIVATIVE WORK.
+ *
+ * Carnegie Mellon encourages users of this software to return any
+ * improvements or extensions that they make, and to grant Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ *
+ * @(#) src/sys/coda/coda.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
+ * $Id: coda.h,v 1.5 1998/10/28 19:33:49 rvb Exp $
+ *
+ */
+
+
+/*
+ *
+ * Based on cfs.h from Mach, but revamped for increased simplicity.
+ * Linux modifications by Peter Braam, Aug 1996
+ */
+
+#ifndef _CODA_HEADER_
+#define _CODA_HEADER_
+
+
+
+/* Catch new _KERNEL defn for NetBSD */
+#ifdef __NetBSD__
+#include <sys/types.h>
+#endif
+
+#ifndef CODA_MAXSYMLINKS
+#define CODA_MAXSYMLINKS 10
+#endif
+
+#if defined(DJGPP) || defined(__CYGWIN32__)
+#ifdef KERNEL
+typedef unsigned long u_long;
+typedef unsigned int u_int;
+typedef unsigned short u_short;
+typedef u_long ino_t;
+typedef u_long dev_t;
+typedef void * caddr_t;
+#ifdef DOS
+typedef unsigned __int64 u_quad_t;
+#else
+typedef unsigned long long u_quad_t;
+#endif
+
+#define inline
+
+struct timespec {
+ long ts_sec;
+ long ts_nsec;
+};
+#else /* DJGPP but not KERNEL */
+#include <sys/types.h>
+#include <sys/time.h>
+typedef unsigned long long u_quad_t;
+#endif /* !KERNEL */
+#endif /* !DJGPP */
+
+
+#if defined(__linux__)
+#define cdev_t u_quad_t
+#if !defined(_UQUAD_T_) && (!defined(__GLIBC__) || __GLIBC__ < 2)
+#define _UQUAD_T_ 1
+typedef unsigned long long u_quad_t;
+#endif
+#else
+#define cdev_t dev_t
+#endif
+
+#ifdef __CYGWIN32__
+typedef unsigned char u_int8_t;
+struct timespec {
+ time_t tv_sec; /* seconds */
+ long tv_nsec; /* nanoseconds */
+};
+#endif
+
+
+/*
+ * Cfs constants
+ */
+#define CODA_MAXNAMLEN 255
+#define CODA_MAXPATHLEN 1024
+#define CODA_MAXSYMLINK 10
+
+/* these are Coda's version of O_RDONLY etc combinations
+ * to deal with VFS open modes
+ */
+#define C_O_READ 0x001
+#define C_O_WRITE 0x002
+#define C_O_TRUNC 0x010
+#define C_O_EXCL 0x100
+#define C_O_CREAT 0x200
+
+/* these are to find mode bits in Venus */
+#define C_M_READ 00400
+#define C_M_WRITE 00200
+
+/* for access Venus will use */
+#define C_A_C_OK 8 /* Test for writing upon create. */
+#define C_A_R_OK 4 /* Test for read permission. */
+#define C_A_W_OK 2 /* Test for write permission. */
+#define C_A_X_OK 1 /* Test for execute permission. */
+#define C_A_F_OK 0 /* Test for existence. */
+
+
+
+#ifndef _VENUS_DIRENT_T_
+#define _VENUS_DIRENT_T_ 1
+struct venus_dirent {
+ unsigned long d_fileno; /* file number of entry */
+ unsigned short d_reclen; /* length of this record */
+ char d_type; /* file type, see below */
+ char d_namlen; /* length of string in d_name */
+ char d_name[CODA_MAXNAMLEN + 1];/* name must be no longer than this */
+};
+#undef DIRSIZ
+#define DIRSIZ(dp) ((sizeof (struct venus_dirent) - (CODA_MAXNAMLEN+1)) + \
+ (((dp)->d_namlen+1 + 3) &~ 3))
+
+/*
+ * File types
+ */
+#define CDT_UNKNOWN 0
+#define CDT_FIFO 1
+#define CDT_CHR 2
+#define CDT_DIR 4
+#define CDT_BLK 6
+#define CDT_REG 8
+#define CDT_LNK 10
+#define CDT_SOCK 12
+#define CDT_WHT 14
+
+/*
+ * Convert between stat structure types and directory types.
+ */
+#define IFTOCDT(mode) (((mode) & 0170000) >> 12)
+#define CDTTOIF(dirtype) ((dirtype) << 12)
+
+#endif
+
+#ifndef _FID_T_
+#define _FID_T_ 1
+typedef u_long VolumeId;
+typedef u_long VnodeId;
+typedef u_long Unique_t;
+typedef u_long FileVersion;
+#endif
+
+#ifndef _VICEFID_T_
+#define _VICEFID_T_ 1
+typedef struct ViceFid {
+ VolumeId Volume;
+ VnodeId Vnode;
+ Unique_t Unique;
+} ViceFid;
+#endif /* VICEFID */
+
+
+#ifdef __linux__
+static __inline__ ino_t coda_f2i(struct ViceFid *fid)
+{
+ if ( ! fid )
+ return 0;
+ if (fid->Vnode == 0xfffffffe || fid->Vnode == 0xffffffff)
+ return ((fid->Volume << 20) | (fid->Unique & 0xfffff));
+ else
+ return (fid->Unique + (fid->Vnode<<10) + (fid->Volume<<20));
+}
+
+#else
+#define coda_f2i(fid)\
+ ((fid) ? ((fid)->Unique + ((fid)->Vnode<<10) + ((fid)->Volume<<20)) : 0)
+#endif
+
+
+#ifndef __BIT_TYPES_DEFINED__
+#define u_int32_t unsigned int
+#endif
+
+
+#ifndef _VUID_T_
+#define _VUID_T_
+typedef u_int32_t vuid_t;
+typedef u_int32_t vgid_t;
+#endif /*_VUID_T_ */
+
+#ifndef _CODACRED_T_
+#define _CODACRED_T_
+struct coda_cred {
+ vuid_t cr_uid, cr_euid, cr_suid, cr_fsuid; /* Real, efftve, set, fs uid*/
+ vgid_t cr_groupid, cr_egid, cr_sgid, cr_fsgid; /* same for groups */
+};
+#endif
+
+#ifndef _VENUS_VATTR_T_
+#define _VENUS_VATTR_T_
+/*
+ * Vnode types. VNON means no type.
+ */
+enum coda_vtype { C_VNON, C_VREG, C_VDIR, C_VBLK, C_VCHR, C_VLNK, C_VSOCK, C_VFIFO, C_VBAD };
+
+struct coda_vattr {
+ int va_type; /* vnode type (for create) */
+ u_short va_mode; /* files access mode and type */
+ short va_nlink; /* number of references to file */
+ vuid_t va_uid; /* owner user id */
+ vgid_t va_gid; /* owner group id */
+ long va_fileid; /* file id */
+ u_quad_t va_size; /* file size in bytes */
+ long va_blocksize; /* blocksize preferred for i/o */
+ struct timespec va_atime; /* time of last access */
+ struct timespec va_mtime; /* time of last modification */
+ struct timespec va_ctime; /* time file changed */
+ u_long va_gen; /* generation number of file */
+ u_long va_flags; /* flags defined for file */
+ cdev_t va_rdev; /* device special file represents */
+ u_quad_t va_bytes; /* bytes of disk space held by file */
+ u_quad_t va_filerev; /* file modification number */
+};
+
+#endif
+
+/*
+ * Kernel <--> Venus communications.
+ */
+
+#define CODA_ROOT 2
+#define CODA_SYNC 3
+#define CODA_OPEN 4
+#define CODA_CLOSE 5
+#define CODA_IOCTL 6
+#define CODA_GETATTR 7
+#define CODA_SETATTR 8
+#define CODA_ACCESS 9
+#define CODA_LOOKUP 10
+#define CODA_CREATE 11
+#define CODA_REMOVE 12
+#define CODA_LINK 13
+#define CODA_RENAME 14
+#define CODA_MKDIR 15
+#define CODA_RMDIR 16
+#define CODA_READDIR 17
+#define CODA_SYMLINK 18
+#define CODA_READLINK 19
+#define CODA_FSYNC 20
+#define CODA_INACTIVE 21
+#define CODA_VGET 22
+#define CODA_SIGNAL 23
+#define CODA_REPLACE 24
+#define CODA_FLUSH 25
+#define CODA_PURGEUSER 26
+#define CODA_ZAPFILE 27
+#define CODA_ZAPDIR 28
+#define CODA_PURGEFID 30
+#define CODA_OPEN_BY_PATH 31
+#define CODA_RESOLVE 32
+#define CODA_REINTEGRATE 33
+#define CODA_NCALLS 34
+
+#define DOWNCALL(opcode) (opcode >= CODA_REPLACE && opcode <= CODA_PURGEFID)
+
+#define VC_MAXDATASIZE 8192
+#define VC_MAXMSGSIZE sizeof(union inputArgs)+sizeof(union outputArgs) +\
+ VC_MAXDATASIZE
+
+#define CIOC_KERNEL_VERSION _IOWR('c', 10, sizeof (int))
+#if 0
+ /* don't care about kernel version number */
+#define CODA_KERNEL_VERSION 0
+ /* The old venus 4.6 compatible interface */
+#define CODA_KERNEL_VERSION 1
+#endif
+ /* venus_lookup gets an extra parameter to aid windows.*/
+#define CODA_KERNEL_VERSION 2
+
+/*
+ * Venus <-> Coda RPC arguments
+ */
+struct coda_in_hdr {
+ unsigned long opcode;
+ unsigned long unique; /* Keep multiple outstanding msgs distinct */
+ u_short pid; /* Common to all */
+ u_short pgid; /* Common to all */
+ u_short sid; /* Common to all */
+ struct coda_cred cred; /* Common to all */
+};
+
+/* Really important that opcode and unique are 1st two fields! */
+struct coda_out_hdr {
+ unsigned long opcode;
+ unsigned long unique;
+ unsigned long result;
+};
+
+/* coda_root: NO_IN */
+struct coda_root_out {
+ struct coda_out_hdr oh;
+ ViceFid VFid;
+};
+
+struct coda_root_in {
+ struct coda_in_hdr in;
+};
+
+/* coda_sync: */
+/* Nothing needed for coda_sync */
+
+/* coda_open: */
+struct coda_open_in {
+ struct coda_in_hdr ih;
+ ViceFid VFid;
+ int flags;
+};
+
+struct coda_open_out {
+ struct coda_out_hdr oh;
+ cdev_t dev;
+ ino_t inode;
+};
+
+
+/* coda_close: */
+struct coda_close_in {
+ struct coda_in_hdr ih;
+ ViceFid VFid;
+ int flags;
+};
+
+struct coda_close_out {
+ struct coda_out_hdr out;
+};
+
+/* coda_ioctl: */
+struct coda_ioctl_in {
+ struct coda_in_hdr ih;
+ ViceFid VFid;
+ int cmd;
+ int len;
+ int rwflag;
+ char *data; /* Place holder for data. */
+};
+
+struct coda_ioctl_out {
+ struct coda_out_hdr oh;
+ int len;
+ caddr_t data; /* Place holder for data. */
+};
+
+
+/* coda_getattr: */
+struct coda_getattr_in {
+ struct coda_in_hdr ih;
+ ViceFid VFid;
+};
+
+struct coda_getattr_out {
+ struct coda_out_hdr oh;
+ struct coda_vattr attr;
+};
+
+
+/* coda_setattr: NO_OUT */
+struct coda_setattr_in {
+ struct coda_in_hdr ih;
+ ViceFid VFid;
+ struct coda_vattr attr;
+};
+
+struct coda_setattr_out {
+ struct coda_out_hdr out;
+};
+
+/* coda_access: NO_OUT */
+struct coda_access_in {
+ struct coda_in_hdr ih;
+ ViceFid VFid;
+ int flags;
+};
+
+struct coda_access_out {
+ struct coda_out_hdr out;
+};
+
+
+/* lookup flags */
+#define CLU_CASE_SENSITIVE 0x01
+#define CLU_CASE_INSENSITIVE 0x02
+
+/* coda_lookup: */
+struct coda_lookup_in {
+ struct coda_in_hdr ih;
+ ViceFid VFid;
+ int name; /* Place holder for data. */
+ int flags;
+};
+
+struct coda_lookup_out {
+ struct coda_out_hdr oh;
+ ViceFid VFid;
+ int vtype;
+};
+
+
+/* coda_create: */
+struct coda_create_in {
+ struct coda_in_hdr ih;
+ ViceFid VFid;
+ struct coda_vattr attr;
+ int excl;
+ int mode;
+ int name; /* Place holder for data. */
+};
+
+struct coda_create_out {
+ struct coda_out_hdr oh;
+ ViceFid VFid;
+ struct coda_vattr attr;
+};
+
+
+/* coda_remove: NO_OUT */
+struct coda_remove_in {
+ struct coda_in_hdr ih;
+ ViceFid VFid;
+ int name; /* Place holder for data. */
+};
+
+struct coda_remove_out {
+ struct coda_out_hdr out;
+};
+
+/* coda_link: NO_OUT */
+struct coda_link_in {
+ struct coda_in_hdr ih;
+ ViceFid sourceFid; /* cnode to link *to* */
+ ViceFid destFid; /* Directory in which to place link */
+ int tname; /* Place holder for data. */
+};
+
+struct coda_link_out {
+ struct coda_out_hdr out;
+};
+
+
+/* coda_rename: NO_OUT */
+struct coda_rename_in {
+ struct coda_in_hdr ih;
+ ViceFid sourceFid;
+ int srcname;
+ ViceFid destFid;
+ int destname;
+};
+
+struct coda_rename_out {
+ struct coda_out_hdr out;
+};
+
+/* coda_mkdir: */
+struct coda_mkdir_in {
+ struct coda_in_hdr ih;
+ ViceFid VFid;
+ struct coda_vattr attr;
+ int name; /* Place holder for data. */
+};
+
+struct coda_mkdir_out {
+ struct coda_out_hdr oh;
+ ViceFid VFid;
+ struct coda_vattr attr;
+};
+
+
+/* coda_rmdir: NO_OUT */
+struct coda_rmdir_in {
+ struct coda_in_hdr ih;
+ ViceFid VFid;
+ int name; /* Place holder for data. */
+};
+
+struct coda_rmdir_out {
+ struct coda_out_hdr out;
+};
+
+/* coda_readdir: */
+struct coda_readdir_in {
+ struct coda_in_hdr ih;
+ ViceFid VFid;
+ int count;
+ int offset;
+};
+
+struct coda_readdir_out {
+ struct coda_out_hdr oh;
+ int size;
+ caddr_t data; /* Place holder for data. */
+};
+
+/* coda_symlink: NO_OUT */
+struct coda_symlink_in {
+ struct coda_in_hdr ih;
+ ViceFid VFid; /* Directory to put symlink in */
+ int srcname;
+ struct coda_vattr attr;
+ int tname;
+};
+
+struct coda_symlink_out {
+ struct coda_out_hdr out;
+};
+
+/* coda_readlink: */
+struct coda_readlink_in {
+ struct coda_in_hdr ih;
+ ViceFid VFid;
+};
+
+struct coda_readlink_out {
+ struct coda_out_hdr oh;
+ int count;
+ caddr_t data; /* Place holder for data. */
+};
+
+
+/* coda_fsync: NO_OUT */
+struct coda_fsync_in {
+ struct coda_in_hdr ih;
+ ViceFid VFid;
+};
+
+struct coda_fsync_out {
+ struct coda_out_hdr out;
+};
+
+/* coda_inactive: NO_OUT */
+struct coda_inactive_in {
+ struct coda_in_hdr ih;
+ ViceFid VFid;
+};
+
+/* coda_vget: */
+struct coda_vget_in {
+ struct coda_in_hdr ih;
+ ViceFid VFid;
+};
+
+struct coda_vget_out {
+ struct coda_out_hdr oh;
+ ViceFid VFid;
+ int vtype;
+};
+
+
+/* CODA_SIGNAL is out-of-band, doesn't need data. */
+/* CODA_INVALIDATE is a venus->kernel call */
+/* CODA_FLUSH is a venus->kernel call */
+
+/* coda_purgeuser: */
+/* CODA_PURGEUSER is a venus->kernel call */
+struct coda_purgeuser_out {
+ struct coda_out_hdr oh;
+ struct coda_cred cred;
+};
+
+/* coda_zapfile: */
+/* CODA_ZAPFILE is a venus->kernel call */
+struct coda_zapfile_out {
+ struct coda_out_hdr oh;
+ ViceFid CodaFid;
+};
+
+/* coda_zapdir: */
+/* CODA_ZAPDIR is a venus->kernel call */
+struct coda_zapdir_out {
+ struct coda_out_hdr oh;
+ ViceFid CodaFid;
+};
+
+/* coda_zapnode: */
+/* CODA_ZAPVNODE is a venus->kernel call */
+struct coda_zapvnode_out {
+ struct coda_out_hdr oh;
+ struct coda_cred cred;
+ ViceFid VFid;
+};
+
+/* coda_purgefid: */
+/* CODA_PURGEFID is a venus->kernel call */
+struct coda_purgefid_out {
+ struct coda_out_hdr oh;
+ ViceFid CodaFid;
+};
+
+/* coda_rdwr: */
+struct coda_rdwr_in {
+ struct coda_in_hdr ih;
+ ViceFid VFid;
+ int rwflag;
+ int count;
+ int offset;
+ int ioflag;
+ caddr_t data; /* Place holder for data. */
+};
+
+struct coda_rdwr_out {
+ struct coda_out_hdr oh;
+ int rwflag;
+ int count;
+ caddr_t data; /* Place holder for data. */
+};
+
+
+/* coda_replace: */
+/* CODA_REPLACE is a venus->kernel call */
+struct coda_replace_out { /* coda_replace is a venus->kernel call */
+ struct coda_out_hdr oh;
+ ViceFid NewFid;
+ ViceFid OldFid;
+};
+
+/* coda_open_by_path: */
+struct coda_open_by_path_in {
+ struct coda_in_hdr ih;
+ ViceFid VFid;
+ int flags;
+};
+
+struct coda_open_by_path_out {
+ struct coda_out_hdr oh;
+ int path;
+};
+
+/*
+ * Occasionally, we don't cache the fid returned by CODA_LOOKUP.
+ * For instance, if the fid is inconsistent.
+ * This case is handled by setting the top bit of the type result parameter.
+ */
+#define CODA_NOCACHE 0x80000000
+
+union inputArgs {
+ struct coda_in_hdr ih; /* NB: every struct below begins with an ih */
+ struct coda_open_in coda_open;
+ struct coda_close_in coda_close;
+ struct coda_ioctl_in coda_ioctl;
+ struct coda_getattr_in coda_getattr;
+ struct coda_setattr_in coda_setattr;
+ struct coda_access_in coda_access;
+ struct coda_lookup_in coda_lookup;
+ struct coda_create_in coda_create;
+ struct coda_remove_in coda_remove;
+ struct coda_link_in coda_link;
+ struct coda_rename_in coda_rename;
+ struct coda_mkdir_in coda_mkdir;
+ struct coda_rmdir_in coda_rmdir;
+ struct coda_readdir_in coda_readdir;
+ struct coda_symlink_in coda_symlink;
+ struct coda_readlink_in coda_readlink;
+ struct coda_fsync_in coda_fsync;
+ struct coda_inactive_in coda_inactive;
+ struct coda_vget_in coda_vget;
+ struct coda_rdwr_in coda_rdwr;
+ struct coda_open_by_path_in coda_open_by_path;
+};
+
+union outputArgs {
+ struct coda_out_hdr oh; /* NB: every struct below begins with an oh */
+ struct coda_root_out coda_root;
+ struct coda_open_out coda_open;
+ struct coda_ioctl_out coda_ioctl;
+ struct coda_getattr_out coda_getattr;
+ struct coda_lookup_out coda_lookup;
+ struct coda_create_out coda_create;
+ struct coda_mkdir_out coda_mkdir;
+ struct coda_readdir_out coda_readdir;
+ struct coda_readlink_out coda_readlink;
+ struct coda_vget_out coda_vget;
+ struct coda_purgeuser_out coda_purgeuser;
+ struct coda_zapfile_out coda_zapfile;
+ struct coda_zapdir_out coda_zapdir;
+ struct coda_zapvnode_out coda_zapvnode;
+ struct coda_purgefid_out coda_purgefid;
+ struct coda_rdwr_out coda_rdwr;
+ struct coda_replace_out coda_replace;
+ struct coda_open_by_path_out coda_open_by_path;
+};
+
+union coda_downcalls {
+ /* CODA_INVALIDATE is a venus->kernel call */
+ /* CODA_FLUSH is a venus->kernel call */
+ struct coda_purgeuser_out purgeuser;
+ struct coda_zapfile_out zapfile;
+ struct coda_zapdir_out zapdir;
+ struct coda_zapvnode_out zapvnode;
+ struct coda_purgefid_out purgefid;
+ struct coda_replace_out replace;
+};
+
+
+/*
+ * Used for identifying usage of "Control" and pioctls
+ */
+
+#define PIOCPARM_MASK 0x0000ffff
+struct ViceIoctl {
+ caddr_t in, out; /* Data to be transferred in, or out */
+ short in_size; /* Size of input buffer <= 2K */
+ short out_size; /* Maximum size of output buffer, <= 2K */
+};
+
+#if defined(__CYGWIN32__) || defined(DJGPP)
+struct PioctlData {
+ unsigned long cmd;
+ const char *path;
+ int follow;
+ struct ViceIoctl vi;
+};
+#else
+struct PioctlData {
+ const char *path;
+ int follow;
+ struct ViceIoctl vi;
+};
+#endif
+
+#define CODA_CONTROL ".CONTROL"
+#define CODA_CONTROLLEN 8
+#define CTL_VOL -1
+#define CTL_VNO -1
+#define CTL_UNI -1
+#define CTL_INO -1
+#define CTL_FILE "/coda/.CONTROL"
+
+
+#define IS_CTL_FID(fidp) ((fidp)->Volume == CTL_VOL &&\
+ (fidp)->Vnode == CTL_VNO &&\
+ (fidp)->Unique == CTL_UNI)
+#endif
+
diff --git a/sys/fs/coda/coda_fbsd.c b/sys/fs/coda/coda_fbsd.c
new file mode 100644
index 0000000..703708c
--- /dev/null
+++ b/sys/fs/coda/coda_fbsd.c
@@ -0,0 +1,216 @@
+/*
+ *
+ * Coda: an Experimental Distributed File System
+ * Release 3.1
+ *
+ * Copyright (c) 1987-1998 Carnegie Mellon University
+ * All Rights Reserved
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University in all documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ *
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS,
+ * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON
+ * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF
+ * ANY DERIVATIVE WORK.
+ *
+ * Carnegie Mellon encourages users of this software to return any
+ * improvements or extensions that they make, and to grant Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ *
+ * @(#) src/sys/coda/coda_fbsd.cr,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
+ * $Id: coda_fbsd.c,v 1.12 1999/01/27 20:09:17 dillon Exp $
+ *
+ */
+
+#include "vcoda.h"
+#include "opt_devfs.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/fcntl.h>
+#include <sys/ucred.h>
+#include <sys/vnode.h>
+#include <sys/conf.h>
+
+#include <vm/vm.h>
+#include <vm/vnode_pager.h>
+
+#include <coda/coda.h>
+#include <coda/cnode.h>
+#include <coda/coda_vnops.h>
+#include <coda/coda_psdev.h>
+
+#ifdef DEVFS
+#include <sys/devfsext.h>
+
+static void *cfs_devfs_token[NVCODA];
+static void *coda_devfs_token[NVCODA];
+#endif
+
+/*
+ From: "Jordan K. Hubbard" <jkh@time.cdrom.com>
+ Subject: Re: New 3.0 SNAPshot CDROM about ready for production..
+ To: "Robert.V.Baron" <rvb@GLUCK.CODA.CS.CMU.EDU>
+ Date: Fri, 20 Feb 1998 15:57:01 -0800
+
+ > Also I need a character device major number. (and might want to reserve
+ > a block of 10 syscalls.)
+
+ Just one char device number? No block devices? Very well, cdev 93 is yours!
+*/
+
+#define VC_DEV_NO 93
+
+static struct cdevsw codadevsw =
+{
+ vc_nb_open, vc_nb_close, vc_nb_read, vc_nb_write, /*93*/
+ vc_nb_ioctl, nostop, nullreset, nodevtotty,
+ vc_nb_poll, nommap, NULL, "Coda", NULL, -1
+};
+
+int vcdebug = 1;
+#define VCDEBUG if (vcdebug) printf
+
+static int
+codadev_modevent(module_t mod, int type, void *data)
+{
+ dev_t dev;
+#ifdef DEVFS
+ int i;
+#endif
+ static struct cdevsw *oldcdevsw;
+
+ switch (type) {
+ case MOD_LOAD:
+ dev = makedev(VC_DEV_NO, 0);
+ cdevsw_add(&dev,&codadevsw, &oldcdevsw);
+#ifdef DEVFS
+ /* tmp */
+#undef NVCODA
+#define NVCODA 1
+ for (i = 0; i < NVCODA; i++) {
+ cfs_devfs_token[i] =
+ devfs_add_devswf(&codadevsw, i,
+ DV_CHR, UID_ROOT, GID_WHEEL, 0666,
+ "cfs%d", i);
+ coda_devfs_token[i] =
+ devfs_add_devswf(&codadevsw, i,
+ DV_CHR, UID_ROOT, GID_WHEEL, 0666,
+ "coda%d", i);
+ }
+#endif
+ break;
+ case MOD_UNLOAD:
+#ifdef DEVFS
+ for (i = 0; i < NVCODA; i++) {
+ devfs_remove_dev(cfs_devfs_token[i]);
+ devfs_remove_dev(coda_devfs_token[i]);
+ }
+#endif
+ cdevsw_add(&dev, oldcdevsw, NULL);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+static moduledata_t codadev_mod = {
+ "codadev",
+ codadev_modevent,
+ NULL
+};
+DECLARE_MODULE(codadev, codadev_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE+VC_DEV_NO);
+
+int
+coda_fbsd_getpages(v)
+ void *v;
+{
+ struct vop_getpages_args *ap = v;
+ int ret = 0;
+
+#if 1
+ /* ??? a_offset */
+ ret = vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count,
+ ap->a_reqpage);
+ return ret;
+#else
+ {
+ struct vnode *vp = ap->a_vp;
+ struct cnode *cp = VTOC(vp);
+ struct vnode *cfvp = cp->c_ovp;
+ int opened_internally = 0;
+ struct ucred *cred = (struct ucred *) 0;
+ struct proc *p = curproc;
+ int error = 0;
+
+ if (IS_CTL_VP(vp)) {
+ return(EINVAL);
+ }
+
+ /* Redirect the request to UFS. */
+
+ if (cfvp == NULL) {
+ opened_internally = 1;
+
+ error = VOP_OPEN(vp, FREAD, cred, p);
+printf("coda_getp: Internally Opening %p\n", vp);
+
+ if (error) {
+ printf("coda_getpage: VOP_OPEN on container failed %d\n", error);
+ return (error);
+ }
+ if (vp->v_type == VREG) {
+ error = vfs_object_create(vp, p, cred);
+ if (error != 0) {
+ printf("coda_getpage: vfs_object_create() returns %d\n", error);
+ vput(vp);
+ return(error);
+ }
+ }
+
+ cfvp = cp->c_ovp;
+ } else {
+printf("coda_getp: has container %p\n", cfvp);
+ }
+
+printf("coda_fbsd_getpages: using container ");
+/*
+ error = vnode_pager_generic_getpages(cfvp, ap->a_m, ap->a_count,
+ ap->a_reqpage);
+*/
+ error = VOP_GETPAGES(cfvp, ap->a_m, ap->a_count,
+ ap->a_reqpage, ap->a_offset);
+printf("error = %d\n", error);
+
+ /* Do an internal close if necessary. */
+ if (opened_internally) {
+ (void)VOP_CLOSE(vp, FREAD, cred, p);
+ }
+
+ return(error);
+ }
+#endif
+}
+
+int
+coda_fbsd_putpages(v)
+ void *v;
+{
+ struct vop_putpages_args *ap = v;
+
+ /*??? a_offset */
+ return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
+ ap->a_sync, ap->a_rtvals);
+}
diff --git a/sys/fs/coda/coda_io.h b/sys/fs/coda/coda_io.h
new file mode 100644
index 0000000..dd12fa1
--- /dev/null
+++ b/sys/fs/coda/coda_io.h
@@ -0,0 +1,128 @@
+/*
+ *
+ * Coda: an Experimental Distributed File System
+ * Release 3.1
+ *
+ * Copyright (c) 1987-1998 Carnegie Mellon University
+ * All Rights Reserved
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University in all documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ *
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS,
+ * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON
+ * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF
+ * ANY DERIVATIVE WORK.
+ *
+ * Carnegie Mellon encourages users of this software to return any
+ * improvements or extensions that they make, and to grant Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ *
+ * @(#) src/sys/coda/coda_io.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
+ * $Id: coda_io.h,v 1.3 1998/09/11 18:50:17 rvb Exp $
+ *
+ */
+
+/*
+ * Mach Operating System
+ * Copyright (c) 1990 Carnegie-Mellon University
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * All rights reserved. The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+
+/*
+ * This code was written for the Coda file system at Carnegie Mellon University.
+ * Contributers include David Steere, James Kistler, and M. Satyanarayanan.
+ */
+
+/*
+ * HISTORY
+ * $Log: coda_io.h,v $
+ * Revision 1.3 1998/09/11 18:50:17 rvb
+ * All the references to cfs, in symbols, structs, and strings
+ * have been changed to coda. (Same for CFS.)
+ *
+ * Revision 1.2 1998/09/02 19:09:53 rvb
+ * Pass2 complete
+ *
+ * Revision 1.1.1.1 1998/08/29 21:14:52 rvb
+ * Very Preliminary Coda
+ *
+ * Revision 1.5 1998/08/18 17:05:23 rvb
+ * Don't use __RCSID now
+ *
+ * Revision 1.4 1998/08/18 16:31:47 rvb
+ * Sync the code for NetBSD -current; test on 1.3 later
+ *
+ * Revision 1.3 98/01/23 11:53:49 rvb
+ * Bring RVB_CODA1_1 to HEAD
+ *
+ * Revision 1.2.38.1 97/12/16 12:40:22 rvb
+ * Sync with 1.3
+ *
+ * Revision 1.2 96/01/02 16:57:15 bnoble
+ * Added support for Coda MiniCache and raw inode calls (final commit)
+ *
+ * Revision 1.1.2.1 1995/12/20 01:57:42 bnoble
+ * Added CODA-specific files
+ *
+ * Revision 3.1.1.1 1995/03/04 19:08:20 bnoble
+ * Branch for NetBSD port revisions
+ *
+ * Revision 3.1 1995/03/04 19:08:20 bnoble
+ * Bump to major revision 3 to prepare for NetBSD port
+ *
+ * Revision 2.1 1994/07/21 16:25:25 satya
+ * Conversion to C++ 3.0; start of Coda Release 2.0
+ *
+ * Revision 1.3 94/06/14 16:53:47 dcs
+ * Added support for ODY-like mounting in the kernel (SETS)
+ *
+ * Revision 1.3 94/06/14 16:48:03 dcs
+ * Added support for ODY-like mounting in the kernel (SETS)
+ *
+ * Revision 1.2 92/10/27 17:58:28 lily
+ * merge kernel/latest and alpha/src/cfs
+ *
+ * Revision 1.1 92/04/03 17:35:34 satya
+ * Initial revision
+ *
+ * Revision 1.5 91/02/09 12:53:26 jjk
+ * Substituted rvb's history blurb so that we agree with Mach 2.5 sources.
+ *
+ * Revision 2.2.1.1 91/01/06 22:08:22 rvb
+ * Created for the Coda File System.
+ * [90/05/23 dcs]
+ *
+ * Revision 1.3 90/07/19 10:23:05 dcs
+ * Added ; to cfs_resize definition for port to 386.
+ *
+ * Revision 1.2 90/05/31 17:02:09 dcs
+ * Prepare for merge with facilities kernel.
+ *
+ *
+ *
+ */
+
+#ifndef _CODAIO_H_
+#define _CODAIO_H_
+
+/* Define ioctl commands for vcioctl, /dev/cfs */
+
+#define CODARESIZE _IOW('c', 1, struct coda_resize ) /* Resize CODA NameCache */
+#define CODASTATS _IO('c', 2) /* Collect stats */
+#define CODAPRINT _IO('c', 3) /* Print Cache */
+#define CODATEST _IO('c', 4) /* Print Cache */
+
+struct coda_resize { int hashsize, heapsize; };
+
+#endif
diff --git a/sys/fs/coda/coda_kernel.h b/sys/fs/coda/coda_kernel.h
new file mode 100644
index 0000000..33b372f
--- /dev/null
+++ b/sys/fs/coda/coda_kernel.h
@@ -0,0 +1,66 @@
+/*
+ *
+ * Coda: an Experimental Distributed File System
+ * Release 3.1
+ *
+ * Copyright (c) 1987-1998 Carnegie Mellon University
+ * All Rights Reserved
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University in all documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ *
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS,
+ * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON
+ * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF
+ * ANY DERIVATIVE WORK.
+ *
+ * Carnegie Mellon encourages users of this software to return any
+ * improvements or extensions that they make, and to grant Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ *
+ * @(#) src/sys/coda/coda_kernel.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
+ * $Id: coda_kernel.h,v 1.2 1998/09/02 19:09:53 rvb Exp $
+ *
+ */
+
+/* Macros to manipulate the queue */
+#ifndef INIT_QUEUE
+struct queue {
+ struct queue *forw, *back;
+};
+
+#define INIT_QUEUE(head) \
+do { \
+ (head).forw = (struct queue *)&(head); \
+ (head).back = (struct queue *)&(head); \
+} while (0)
+
+#define GETNEXT(head) (head).forw
+
+#define EMPTY(head) ((head).forw == &(head))
+
+#define EOQ(el, head) ((struct queue *)(el) == (struct queue *)&(head))
+
+#define INSQUE(el, head) \
+do { \
+ (el).forw = ((head).back)->forw; \
+ (el).back = (head).back; \
+ ((head).back)->forw = (struct queue *)&(el); \
+ (head).back = (struct queue *)&(el); \
+} while (0)
+
+#define REMQUE(el) \
+do { \
+ ((el).forw)->back = (el).back; \
+ (el).back->forw = (el).forw; \
+} while (0)
+
+#endif
diff --git a/sys/fs/coda/coda_namecache.c b/sys/fs/coda/coda_namecache.c
new file mode 100644
index 0000000..2da7b09
--- /dev/null
+++ b/sys/fs/coda/coda_namecache.c
@@ -0,0 +1,915 @@
+/*
+ *
+ * Coda: an Experimental Distributed File System
+ * Release 3.1
+ *
+ * Copyright (c) 1987-1998 Carnegie Mellon University
+ * All Rights Reserved
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University in all documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ *
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS,
+ * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON
+ * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF
+ * ANY DERIVATIVE WORK.
+ *
+ * Carnegie Mellon encourages users of this software to return any
+ * improvements or extensions that they make, and to grant Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ *
+ * @(#) src/sys/coda/coda_namecache.c,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
+ * $Id: coda_namecache.c,v 1.7 1998/09/28 20:52:58 rvb Exp $
+ *
+ */
+
+/*
+ * Mach Operating System
+ * Copyright (c) 1990 Carnegie-Mellon University
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * All rights reserved. The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+
+/*
+ * This code was written for the Coda file system at Carnegie Mellon University.
+ * Contributers include David Steere, James Kistler, and M. Satyanarayanan.
+ */
+
+/*
+ * HISTORY
+ * $Log: coda_namecache.c,v $
+ * Revision 1.7 1998/09/28 20:52:58 rvb
+ * Cleanup and fix THE bug
+ *
+ * Revision 1.6 1998/09/25 17:38:31 rvb
+ * Put "stray" printouts under DIAGNOSTIC. Make everything build
+ * with DEBUG on. Add support for lkm. (The macro's don't work
+ * for me; for a good chuckle look at the end of coda_fbsd.c.)
+ *
+ * Revision 1.5 1998/09/13 13:57:59 rvb
+ * Finish conversion of cfs -> coda
+ *
+ * Revision 1.4 1998/09/11 18:50:17 rvb
+ * All the references to cfs, in symbols, structs, and strings
+ * have been changed to coda. (Same for CFS.)
+ *
+ * Revision 1.2 1998/09/02 19:09:53 rvb
+ * Pass2 complete
+ *
+ * Revision 1.1.1.1 1998/08/29 21:14:52 rvb
+ * Very Preliminary Coda
+ *
+ * Revision 1.11 1998/08/28 18:12:16 rvb
+ * Now it also works on FreeBSD -current. This code will be
+ * committed to the FreeBSD -current and NetBSD -current
+ * trees. It will then be tailored to the particular platform
+ * by flushing conditional code.
+ *
+ * Revision 1.10 1998/08/18 17:05:14 rvb
+ * Don't use __RCSID now
+ *
+ * Revision 1.9 1998/08/18 16:31:39 rvb
+ * Sync the code for NetBSD -current; test on 1.3 later
+ *
+ * Revision 1.8 98/01/31 20:53:10 rvb
+ * First version that works on FreeBSD 2.2.5
+ *
+ * Revision 1.7 98/01/23 11:53:39 rvb
+ * Bring RVB_CODA1_1 to HEAD
+ *
+ * Revision 1.6.2.4 98/01/23 11:21:02 rvb
+ * Sync with 2.2.5
+ *
+ * Revision 1.6.2.3 97/12/16 12:40:03 rvb
+ * Sync with 1.3
+ *
+ * Revision 1.6.2.2 97/12/09 16:07:10 rvb
+ * Sync with vfs/include/coda.h
+ *
+ * Revision 1.6.2.1 97/12/06 17:41:18 rvb
+ * Sync with peters coda.h
+ *
+ * Revision 1.6 97/12/05 10:39:13 rvb
+ * Read CHANGES
+ *
+ * Revision 1.5.4.7 97/11/25 08:08:43 rvb
+ * cfs_venus ... done; until cred/vattr change
+ *
+ * Revision 1.5.4.6 97/11/24 15:44:43 rvb
+ * Final cfs_venus.c w/o macros, but one locking bug
+ *
+ * Revision 1.5.4.5 97/11/20 11:46:38 rvb
+ * Capture current cfs_venus
+ *
+ * Revision 1.5.4.4 97/11/18 10:27:13 rvb
+ * cfs_nbsd.c is DEAD!!!; integrated into cfs_vf/vnops.c
+ * cfs_nb_foo and cfs_foo are joined
+ *
+ * Revision 1.5.4.3 97/11/13 22:02:57 rvb
+ * pass2 cfs_NetBSD.h mt
+ *
+ * Revision 1.5.4.2 97/11/12 12:09:35 rvb
+ * reorg pass1
+ *
+ * Revision 1.5.4.1 97/10/28 23:10:12 rvb
+ * >64Meg; venus can be killed!
+ *
+ * Revision 1.5 97/08/05 11:08:01 lily
+ * Removed cfsnc_replace, replaced it with a coda_find, unhash, and
+ * rehash. This fixes a cnode leak and a bug in which the fid is
+ * not actually replaced. (cfs_namecache.c, cfsnc.h, cfs_subr.c)
+ *
+ * Revision 1.4 96/12/12 22:10:57 bnoble
+ * Fixed the "downcall invokes venus operation" deadlock in all known cases.
+ * There may be more
+ *
+ * Revision 1.3 1996/11/08 18:06:09 bnoble
+ * Minor changes in vnode operation signature, VOP_UPDATE signature, and
+ * some newly defined bits in the include files.
+ *
+ * Revision 1.2 1996/01/02 16:56:50 bnoble
+ * Added support for Coda MiniCache and raw inode calls (final commit)
+ *
+ * Revision 1.1.2.1 1995/12/20 01:57:15 bnoble
+ * Added CODA-specific files
+ *
+ * Revision 3.1.1.1 1995/03/04 19:07:57 bnoble
+ * Branch for NetBSD port revisions
+ *
+ * Revision 3.1 1995/03/04 19:07:56 bnoble
+ * Bump to major revision 3 to prepare for NetBSD port
+ *
+ * Revision 2.3 1994/10/14 09:57:54 dcs
+ * Made changes 'cause sun4s have braindead compilers
+ *
+ * Revision 2.2 94/08/28 19:37:35 luqi
+ * Add a new CODA_REPLACE call to allow venus to replace a ViceFid in the
+ * mini-cache.
+ *
+ * In "cfs.h":
+ * Add CODA_REPLACE decl.
+ *
+ * In "cfs_namecache.c":
+ * Add routine cfsnc_replace.
+ *
+ * In "cfs_subr.c":
+ * Add case-statement to process CODA_REPLACE.
+ *
+ * In "cfsnc.h":
+ * Add decl for CODA_NC_REPLACE.
+ *
+ *
+ * Revision 2.1 94/07/21 16:25:15 satya
+ * Conversion to C++ 3.0; start of Coda Release 2.0
+ *
+ * Revision 1.2 92/10/27 17:58:21 lily
+ * merge kernel/latest and alpha/src/cfs
+ *
+ * Revision 2.3 92/09/30 14:16:20 mja
+ * call coda_flush instead of calling inode_uncache_try directly
+ * (from dcs). Also...
+ *
+ * Substituted rvb's history blurb so that we agree with Mach 2.5 sources.
+ * [91/02/09 jjk]
+ *
+ * Added contributors blurb.
+ * [90/12/13 jjk]
+ *
+ * Revision 2.2 90/07/05 11:26:30 mrt
+ * Created for the Coda File System.
+ * [90/05/23 dcs]
+ *
+ * Revision 1.3 90/05/31 17:01:24 dcs
+ * Prepare for merge with facilities kernel.
+ *
+ *
+ */
+
+/*
+ * This module contains the routines to implement the CODA name cache. The
+ * purpose of this cache is to reduce the cost of translating pathnames
+ * into Vice FIDs. Each entry in the cache contains the name of the file,
+ * the vnode (FID) of the parent directory, and the cred structure of the
+ * user accessing the file.
+ *
+ * The first time a file is accessed, it is looked up by the local Venus
+ * which first insures that the user has access to the file. In addition
+ * we are guaranteed that Venus will invalidate any name cache entries in
+ * case the user no longer should be able to access the file. For these
+ * reasons we do not need to keep access list information as well as a
+ * cred structure for each entry.
+ *
+ * The table can be accessed through the routines cnc_init(), cnc_enter(),
+ * cnc_lookup(), cnc_rmfidcred(), cnc_rmfid(), cnc_rmcred(), and cnc_purge().
+ * There are several other routines which aid in the implementation of the
+ * hash table.
+ */
+
+/*
+ * NOTES: rvb@cs
+ * 1. The name cache holds a reference to every vnode in it. Hence files can not be
+ * closed or made inactive until they are released.
+ * 2. coda_nc_name(cp) was added to get a name for a cnode pointer for debugging.
+ * 3. coda_nc_find() has debug code to detect when entries are stored with different
+ * credentials. We don't understand yet, if/how entries are NOT EQ but still
+ * EQUAL
+ * 4. I wonder if this name cache could be replace by the vnode name cache.
+ * The latter has no zapping functions, so probably not.
+ */
+
+#include <sys/param.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+#include <sys/ucred.h>
+#include <sys/select.h>
+
+#ifndef insque
+#include <sys/systm.h>
+#endif /* insque */
+
+#include <vm/vm.h>
+#include <vm/vm_object.h>
+
+#include <coda/coda.h>
+#include <coda/cnode.h>
+#include <coda/coda_namecache.h>
+
+#ifdef DEBUG
+#include <coda/coda_vnops.h>
+#endif
+
+/*
+ * Declaration of the name cache data structure.
+ */
+
+int coda_nc_use = 1; /* Indicate use of CODA Name Cache */
+int coda_nc_size = CODA_NC_CACHESIZE; /* size of the cache */
+int coda_nc_hashsize = CODA_NC_HASHSIZE; /* size of the primary hash */
+
+struct coda_cache *coda_nc_heap; /* pointer to the cache entries */
+struct coda_hash *coda_nc_hash; /* hash table of coda_cache pointers */
+struct coda_lru coda_nc_lru; /* head of lru chain */
+
+struct coda_nc_statistics coda_nc_stat; /* Keep various stats */
+
+/*
+ * for testing purposes
+ */
+int coda_nc_debug = 0;
+
+/*
+ * Entry points for the CODA Name Cache
+ */
+static struct coda_cache *coda_nc_find(struct cnode *dcp, const char *name, int namelen,
+ struct ucred *cred, int hash);
+static void coda_nc_remove(struct coda_cache *cncp, enum dc_status dcstat);
+
+/*
+ * Initialize the cache, the LRU structure and the Hash structure(s)
+ */
+
+#define TOTAL_CACHE_SIZE (sizeof(struct coda_cache) * coda_nc_size)
+#define TOTAL_HASH_SIZE (sizeof(struct coda_hash) * coda_nc_hashsize)
+
+int coda_nc_initialized = 0; /* Initially the cache has not been initialized */
+
+void
+coda_nc_init(void)
+{
+ int i;
+
+ /* zero the statistics structure */
+
+ bzero(&coda_nc_stat, (sizeof(struct coda_nc_statistics)));
+
+#ifdef CODA_VERBOSE
+ printf("CODA NAME CACHE: CACHE %d, HASH TBL %d\n", CODA_NC_CACHESIZE, CODA_NC_HASHSIZE);
+#endif
+ CODA_ALLOC(coda_nc_heap, struct coda_cache *, TOTAL_CACHE_SIZE);
+ CODA_ALLOC(coda_nc_hash, struct coda_hash *, TOTAL_HASH_SIZE);
+
+ coda_nc_lru.lru_next =
+ coda_nc_lru.lru_prev = (struct coda_cache *)LRU_PART(&coda_nc_lru);
+
+
+ for (i=0; i < coda_nc_size; i++) { /* initialize the heap */
+ CODA_NC_LRUINS(&coda_nc_heap[i], &coda_nc_lru);
+ CODA_NC_HSHNUL(&coda_nc_heap[i]);
+ coda_nc_heap[i].cp = coda_nc_heap[i].dcp = (struct cnode *)0;
+ }
+
+ for (i=0; i < coda_nc_hashsize; i++) { /* initialize the hashtable */
+ CODA_NC_HSHNUL((struct coda_cache *)&coda_nc_hash[i]);
+ }
+
+ coda_nc_initialized++;
+}
+
+/*
+ * Auxillary routines -- shouldn't be entry points
+ */
+
+static struct coda_cache *
+coda_nc_find(dcp, name, namelen, cred, hash)
+ struct cnode *dcp;
+ const char *name;
+ int namelen;
+ struct ucred *cred;
+ int hash;
+{
+ /*
+ * hash to find the appropriate bucket, look through the chain
+ * for the right entry (especially right cred, unless cred == 0)
+ */
+ struct coda_cache *cncp;
+ int count = 1;
+
+ CODA_NC_DEBUG(CODA_NC_FIND,
+ myprintf(("coda_nc_find(dcp %p, name %s, len %d, cred %p, hash %d\n",
+ dcp, name, namelen, cred, hash));)
+
+ for (cncp = coda_nc_hash[hash].hash_next;
+ cncp != (struct coda_cache *)&coda_nc_hash[hash];
+ cncp = cncp->hash_next, count++)
+ {
+
+ if ((CODA_NAMEMATCH(cncp, name, namelen, dcp)) &&
+ ((cred == 0) || (cncp->cred == cred)))
+ {
+ /* compare cr_uid instead */
+ coda_nc_stat.Search_len += count;
+ return(cncp);
+ }
+#ifdef DEBUG
+ else if (CODA_NAMEMATCH(cncp, name, namelen, dcp)) {
+ printf("coda_nc_find: name %s, new cred = %p, cred = %p\n",
+ name, cred, cncp->cred);
+ printf("nref %d, nuid %d, ngid %d // oref %d, ocred %d, ogid %d\n",
+ cred->cr_ref, cred->cr_uid, cred->cr_gid,
+ cncp->cred->cr_ref, cncp->cred->cr_uid, cncp->cred->cr_gid);
+ print_cred(cred);
+ print_cred(cncp->cred);
+ }
+#endif
+ }
+
+ return((struct coda_cache *)0);
+}
+
+/*
+ * Enter a new (dir cnode, name) pair into the cache, updating the
+ * LRU and Hash as needed.
+ */
+void
+coda_nc_enter(dcp, name, namelen, cred, cp)
+ struct cnode *dcp;
+ const char *name;
+ int namelen;
+ struct ucred *cred;
+ struct cnode *cp;
+{
+ struct coda_cache *cncp;
+ int hash;
+
+ if (coda_nc_use == 0) /* Cache is off */
+ return;
+
+ CODA_NC_DEBUG(CODA_NC_ENTER,
+ myprintf(("Enter: dcp %p cp %p name %s cred %p \n",
+ dcp, cp, name, cred)); )
+
+ if (namelen > CODA_NC_NAMELEN) {
+ CODA_NC_DEBUG(CODA_NC_ENTER,
+ myprintf(("long name enter %s\n",name));)
+ coda_nc_stat.long_name_enters++; /* record stats */
+ return;
+ }
+
+ hash = CODA_NC_HASH(name, namelen, dcp);
+ cncp = coda_nc_find(dcp, name, namelen, cred, hash);
+ if (cncp != (struct coda_cache *) 0) {
+ coda_nc_stat.dbl_enters++; /* duplicate entry */
+ return;
+ }
+
+ coda_nc_stat.enters++; /* record the enters statistic */
+
+ /* Grab the next element in the lru chain */
+ cncp = CODA_NC_LRUGET(coda_nc_lru);
+
+ CODA_NC_LRUREM(cncp); /* remove it from the lists */
+
+ if (CODA_NC_VALID(cncp)) {
+ /* Seems really ugly, but we have to decrement the appropriate
+ hash bucket length here, so we have to find the hash bucket
+ */
+ coda_nc_hash[CODA_NC_HASH(cncp->name, cncp->namelen, cncp->dcp)].length--;
+
+ coda_nc_stat.lru_rm++; /* zapped a valid entry */
+ CODA_NC_HSHREM(cncp);
+ vrele(CTOV(cncp->dcp));
+ vrele(CTOV(cncp->cp));
+ crfree(cncp->cred);
+ }
+
+ /*
+ * Put a hold on the current vnodes and fill in the cache entry.
+ */
+ vref(CTOV(cp));
+ vref(CTOV(dcp));
+ crhold(cred);
+ cncp->dcp = dcp;
+ cncp->cp = cp;
+ cncp->namelen = namelen;
+ cncp->cred = cred;
+
+ bcopy(name, cncp->name, (unsigned)namelen);
+
+ /* Insert into the lru and hash chains. */
+
+ CODA_NC_LRUINS(cncp, &coda_nc_lru);
+ CODA_NC_HSHINS(cncp, &coda_nc_hash[hash]);
+ coda_nc_hash[hash].length++; /* Used for tuning */
+
+ CODA_NC_DEBUG(CODA_NC_PRINTCODA_NC, print_coda_nc(); )
+}
+
+/*
+ * Find the (dir cnode, name) pair in the cache, if it's cred
+ * matches the input, return it, otherwise return 0
+ */
+struct cnode *
+coda_nc_lookup(dcp, name, namelen, cred)
+ struct cnode *dcp;
+ const char *name;
+ int namelen;
+ struct ucred *cred;
+{
+ int hash;
+ struct coda_cache *cncp;
+
+ if (coda_nc_use == 0) /* Cache is off */
+ return((struct cnode *) 0);
+
+ if (namelen > CODA_NC_NAMELEN) {
+ CODA_NC_DEBUG(CODA_NC_LOOKUP,
+ myprintf(("long name lookup %s\n",name));)
+ coda_nc_stat.long_name_lookups++; /* record stats */
+ return((struct cnode *) 0);
+ }
+
+ /* Use the hash function to locate the starting point,
+ then the search routine to go down the list looking for
+ the correct cred.
+ */
+
+ hash = CODA_NC_HASH(name, namelen, dcp);
+ cncp = coda_nc_find(dcp, name, namelen, cred, hash);
+ if (cncp == (struct coda_cache *) 0) {
+ coda_nc_stat.misses++; /* record miss */
+ return((struct cnode *) 0);
+ }
+
+ coda_nc_stat.hits++;
+
+ /* put this entry at the end of the LRU */
+ CODA_NC_LRUREM(cncp);
+ CODA_NC_LRUINS(cncp, &coda_nc_lru);
+
+ /* move it to the front of the hash chain */
+ /* don't need to change the hash bucket length */
+ CODA_NC_HSHREM(cncp);
+ CODA_NC_HSHINS(cncp, &coda_nc_hash[hash]);
+
+ CODA_NC_DEBUG(CODA_NC_LOOKUP,
+ printf("lookup: dcp %p, name %s, cred %p = cp %p\n",
+ dcp, name, cred, cncp->cp); )
+
+ return(cncp->cp);
+}
+
+static void
+coda_nc_remove(cncp, dcstat)
+ struct coda_cache *cncp;
+ enum dc_status dcstat;
+{
+ /*
+ * remove an entry -- vrele(cncp->dcp, cp), crfree(cred),
+ * remove it from it's hash chain, and
+ * place it at the head of the lru list.
+ */
+ CODA_NC_DEBUG(CODA_NC_REMOVE,
+ myprintf(("coda_nc_remove %s from parent %lx.%lx.%lx\n",
+ cncp->name, (cncp->dcp)->c_fid.Volume,
+ (cncp->dcp)->c_fid.Vnode, (cncp->dcp)->c_fid.Unique));)
+
+ CODA_NC_HSHREM(cncp);
+
+ CODA_NC_HSHNUL(cncp); /* have it be a null chain */
+ if ((dcstat == IS_DOWNCALL) && (CTOV(cncp->dcp)->v_usecount == 1)) {
+ cncp->dcp->c_flags |= C_PURGING;
+ }
+ vrele(CTOV(cncp->dcp));
+
+ if ((dcstat == IS_DOWNCALL) && (CTOV(cncp->cp)->v_usecount == 1)) {
+ cncp->cp->c_flags |= C_PURGING;
+ }
+ vrele(CTOV(cncp->cp));
+
+ crfree(cncp->cred);
+ bzero(DATA_PART(cncp),DATA_SIZE);
+
+ /* Put the null entry just after the least-recently-used entry */
+ /* LRU_TOP adjusts the pointer to point to the top of the structure. */
+ CODA_NC_LRUREM(cncp);
+ CODA_NC_LRUINS(cncp, LRU_TOP(coda_nc_lru.lru_prev));
+}
+
+/*
+ * Remove all entries with a parent which has the input fid.
+ */
+void
+coda_nc_zapParentfid(fid, dcstat)
+ ViceFid *fid;
+ enum dc_status dcstat;
+{
+ /* To get to a specific fid, we might either have another hashing
+ function or do a sequential search through the cache for the
+ appropriate entries. The later may be acceptable since I don't
+ think callbacks or whatever Case 1 covers are frequent occurences.
+ */
+ struct coda_cache *cncp, *ncncp;
+ int i;
+
+ if (coda_nc_use == 0) /* Cache is off */
+ return;
+
+ CODA_NC_DEBUG(CODA_NC_ZAPPFID,
+ myprintf(("ZapParent: fid 0x%lx, 0x%lx, 0x%lx \n",
+ fid->Volume, fid->Vnode, fid->Unique)); )
+
+ coda_nc_stat.zapPfids++;
+
+ for (i = 0; i < coda_nc_hashsize; i++) {
+
+ /*
+ * Need to save the hash_next pointer in case we remove the
+ * entry. remove causes hash_next to point to itself.
+ */
+
+ for (cncp = coda_nc_hash[i].hash_next;
+ cncp != (struct coda_cache *)&coda_nc_hash[i];
+ cncp = ncncp) {
+ ncncp = cncp->hash_next;
+ if ((cncp->dcp->c_fid.Volume == fid->Volume) &&
+ (cncp->dcp->c_fid.Vnode == fid->Vnode) &&
+ (cncp->dcp->c_fid.Unique == fid->Unique)) {
+ coda_nc_hash[i].length--; /* Used for tuning */
+ coda_nc_remove(cncp, dcstat);
+ }
+ }
+ }
+}
+
+
+/*
+ * Remove all entries which have the same fid as the input
+ */
+void
+coda_nc_zapfid(fid, dcstat)
+ ViceFid *fid;
+ enum dc_status dcstat;
+{
+ /* See comment for zapParentfid. This routine will be used
+ if attributes are being cached.
+ */
+ struct coda_cache *cncp, *ncncp;
+ int i;
+
+ if (coda_nc_use == 0) /* Cache is off */
+ return;
+
+ CODA_NC_DEBUG(CODA_NC_ZAPFID,
+ myprintf(("Zapfid: fid 0x%lx, 0x%lx, 0x%lx \n",
+ fid->Volume, fid->Vnode, fid->Unique)); )
+
+ coda_nc_stat.zapFids++;
+
+ for (i = 0; i < coda_nc_hashsize; i++) {
+ for (cncp = coda_nc_hash[i].hash_next;
+ cncp != (struct coda_cache *)&coda_nc_hash[i];
+ cncp = ncncp) {
+ ncncp = cncp->hash_next;
+ if ((cncp->cp->c_fid.Volume == fid->Volume) &&
+ (cncp->cp->c_fid.Vnode == fid->Vnode) &&
+ (cncp->cp->c_fid.Unique == fid->Unique)) {
+ coda_nc_hash[i].length--; /* Used for tuning */
+ coda_nc_remove(cncp, dcstat);
+ }
+ }
+ }
+}
+
+/*
+ * Remove all entries which match the fid and the cred
+ */
+void
+coda_nc_zapvnode(fid, cred, dcstat)
+ ViceFid *fid;
+ struct ucred *cred;
+ enum dc_status dcstat;
+{
+ /* See comment for zapfid. I don't think that one would ever
+ want to zap a file with a specific cred from the kernel.
+ We'll leave this one unimplemented.
+ */
+ if (coda_nc_use == 0) /* Cache is off */
+ return;
+
+ CODA_NC_DEBUG(CODA_NC_ZAPVNODE,
+ myprintf(("Zapvnode: fid 0x%lx, 0x%lx, 0x%lx cred %p\n",
+ fid->Volume, fid->Vnode, fid->Unique, cred)); )
+
+}
+
+/*
+ * Remove all entries which have the (dir vnode, name) pair
+ */
+void
+coda_nc_zapfile(dcp, name, namelen)
+ struct cnode *dcp;
+ const char *name;
+ int namelen;
+{
+ /* use the hash function to locate the file, then zap all
+ entries of it regardless of the cred.
+ */
+ struct coda_cache *cncp;
+ int hash;
+
+ if (coda_nc_use == 0) /* Cache is off */
+ return;
+
+ CODA_NC_DEBUG(CODA_NC_ZAPFILE,
+ myprintf(("Zapfile: dcp %p name %s \n",
+ dcp, name)); )
+
+ if (namelen > CODA_NC_NAMELEN) {
+ coda_nc_stat.long_remove++; /* record stats */
+ return;
+ }
+
+ coda_nc_stat.zapFile++;
+
+ hash = CODA_NC_HASH(name, namelen, dcp);
+ cncp = coda_nc_find(dcp, name, namelen, 0, hash);
+
+ while (cncp) {
+ coda_nc_hash[hash].length--; /* Used for tuning */
+
+ coda_nc_remove(cncp, NOT_DOWNCALL);
+ cncp = coda_nc_find(dcp, name, namelen, 0, hash);
+ }
+}
+
+/*
+ * Remove all the entries for a particular user. Used when tokens expire.
+ * A user is determined by his/her effective user id (id_uid).
+ */
+void
+coda_nc_purge_user(uid, dcstat)
+ vuid_t uid;
+ enum dc_status dcstat;
+{
+ /*
+ * I think the best approach is to go through the entire cache
+ * via HASH or whatever and zap all entries which match the
+ * input cred. Or just flush the whole cache. It might be
+ * best to go through on basis of LRU since cache will almost
+ * always be full and LRU is more straightforward.
+ */
+
+ struct coda_cache *cncp, *ncncp;
+ int hash;
+
+ if (coda_nc_use == 0) /* Cache is off */
+ return;
+
+ CODA_NC_DEBUG(CODA_NC_PURGEUSER,
+ myprintf(("ZapDude: uid %x\n", uid)); )
+ coda_nc_stat.zapUsers++;
+
+ for (cncp = CODA_NC_LRUGET(coda_nc_lru);
+ cncp != (struct coda_cache *)(&coda_nc_lru);
+ cncp = ncncp) {
+ ncncp = CODA_NC_LRUGET(*cncp);
+
+ if ((CODA_NC_VALID(cncp)) &&
+ ((cncp->cred)->cr_uid == uid)) {
+ /* Seems really ugly, but we have to decrement the appropriate
+ hash bucket length here, so we have to find the hash bucket
+ */
+ hash = CODA_NC_HASH(cncp->name, cncp->namelen, cncp->dcp);
+ coda_nc_hash[hash].length--; /* For performance tuning */
+
+ coda_nc_remove(cncp, dcstat);
+ }
+ }
+}
+
+/*
+ * Flush the entire name cache. In response to a flush of the Venus cache.
+ */
+void
+coda_nc_flush(dcstat)
+ enum dc_status dcstat;
+{
+ /* One option is to deallocate the current name cache and
+ call init to start again. Or just deallocate, then rebuild.
+ Or again, we could just go through the array and zero the
+ appropriate fields.
+ */
+
+ /*
+ * Go through the whole lru chain and kill everything as we go.
+ * I don't use remove since that would rebuild the lru chain
+ * as it went and that seemed unneccesary.
+ */
+ struct coda_cache *cncp;
+ int i;
+
+ if (coda_nc_use == 0) /* Cache is off */
+ return;
+
+ coda_nc_stat.Flushes++;
+
+ for (cncp = CODA_NC_LRUGET(coda_nc_lru);
+ cncp != (struct coda_cache *)&coda_nc_lru;
+ cncp = CODA_NC_LRUGET(*cncp)) {
+ if (CODA_NC_VALID(cncp)) {
+
+ CODA_NC_HSHREM(cncp); /* only zero valid nodes */
+ CODA_NC_HSHNUL(cncp);
+ if ((dcstat == IS_DOWNCALL)
+ && (CTOV(cncp->dcp)->v_usecount == 1))
+ {
+ cncp->dcp->c_flags |= C_PURGING;
+ }
+ vrele(CTOV(cncp->dcp));
+
+ if (CTOV(cncp->cp)->v_flag & VTEXT) {
+ if (coda_vmflush(cncp->cp))
+ CODADEBUG(CODA_FLUSH,
+ myprintf(("coda_nc_flush: (%lx.%lx.%lx) busy\n", cncp->cp->c_fid.Volume, cncp->cp->c_fid.Vnode, cncp->cp->c_fid.Unique)); )
+ }
+
+ if ((dcstat == IS_DOWNCALL)
+ && (CTOV(cncp->cp)->v_usecount == 1))
+ {
+ cncp->cp->c_flags |= C_PURGING;
+ }
+ vrele(CTOV(cncp->cp));
+
+ crfree(cncp->cred);
+ bzero(DATA_PART(cncp),DATA_SIZE);
+ }
+ }
+
+ for (i = 0; i < coda_nc_hashsize; i++)
+ coda_nc_hash[i].length = 0;
+}
+
+/*
+ * Debugging routines
+ */
+
+/*
+ * This routine should print out all the hash chains to the console.
+ */
+void
+print_coda_nc(void)
+{
+ int hash;
+ struct coda_cache *cncp;
+
+ for (hash = 0; hash < coda_nc_hashsize; hash++) {
+ myprintf(("\nhash %d\n",hash));
+
+ for (cncp = coda_nc_hash[hash].hash_next;
+ cncp != (struct coda_cache *)&coda_nc_hash[hash];
+ cncp = cncp->hash_next) {
+ myprintf(("cp %p dcp %p cred %p name %s\n",
+ cncp->cp, cncp->dcp,
+ cncp->cred, cncp->name));
+ }
+ }
+}
+
+void
+coda_nc_gather_stats(void)
+{
+ int i, max = 0, sum = 0, temp, zeros = 0, ave, n;
+
+ for (i = 0; i < coda_nc_hashsize; i++) {
+ if (coda_nc_hash[i].length) {
+ sum += coda_nc_hash[i].length;
+ } else {
+ zeros++;
+ }
+
+ if (coda_nc_hash[i].length > max)
+ max = coda_nc_hash[i].length;
+ }
+
+ /*
+ * When computing the Arithmetic mean, only count slots which
+ * are not empty in the distribution.
+ */
+ coda_nc_stat.Sum_bucket_len = sum;
+ coda_nc_stat.Num_zero_len = zeros;
+ coda_nc_stat.Max_bucket_len = max;
+
+ if ((n = coda_nc_hashsize - zeros) > 0)
+ ave = sum / n;
+ else
+ ave = 0;
+
+ sum = 0;
+ for (i = 0; i < coda_nc_hashsize; i++) {
+ if (coda_nc_hash[i].length) {
+ temp = coda_nc_hash[i].length - ave;
+ sum += temp * temp;
+ }
+ }
+ coda_nc_stat.Sum2_bucket_len = sum;
+}
+
+/*
+ * The purpose of this routine is to allow the hash and cache sizes to be
+ * changed dynamically. This should only be used in controlled environments,
+ * it makes no effort to lock other users from accessing the cache while it
+ * is in an improper state (except by turning the cache off).
+ */
+int
+coda_nc_resize(hashsize, heapsize, dcstat)
+ int hashsize, heapsize;
+ enum dc_status dcstat;
+{
+ if ((hashsize % 2) || (heapsize % 2)) { /* Illegal hash or cache sizes */
+ return(EINVAL);
+ }
+
+ coda_nc_use = 0; /* Turn the cache off */
+
+ coda_nc_flush(dcstat); /* free any cnodes in the cache */
+
+ /* WARNING: free must happen *before* size is reset */
+ CODA_FREE(coda_nc_heap,TOTAL_CACHE_SIZE);
+ CODA_FREE(coda_nc_hash,TOTAL_HASH_SIZE);
+
+ coda_nc_hashsize = hashsize;
+ coda_nc_size = heapsize;
+
+ coda_nc_init(); /* Set up a cache with the new size */
+
+ coda_nc_use = 1; /* Turn the cache back on */
+ return(0);
+}
+
+#ifdef DEBUG
+char coda_nc_name_buf[CODA_MAXNAMLEN+1];
+
+void
+coda_nc_name(struct cnode *cp)
+{
+ struct coda_cache *cncp, *ncncp;
+ int i;
+
+ if (coda_nc_use == 0) /* Cache is off */
+ return;
+
+ for (i = 0; i < coda_nc_hashsize; i++) {
+ for (cncp = coda_nc_hash[i].hash_next;
+ cncp != (struct coda_cache *)&coda_nc_hash[i];
+ cncp = ncncp) {
+ ncncp = cncp->hash_next;
+ if (cncp->cp == cp) {
+ bcopy(cncp->name, coda_nc_name_buf, cncp->namelen);
+ coda_nc_name_buf[cncp->namelen] = 0;
+ printf(" is %s (%p,%p)@%p",
+ coda_nc_name_buf, cncp->cp, cncp->dcp, cncp);
+ }
+
+ }
+ }
+}
+#endif
diff --git a/sys/fs/coda/coda_namecache.h b/sys/fs/coda/coda_namecache.h
new file mode 100644
index 0000000..f7b3194
--- /dev/null
+++ b/sys/fs/coda/coda_namecache.h
@@ -0,0 +1,285 @@
+/*
+ *
+ * Coda: an Experimental Distributed File System
+ * Release 3.1
+ *
+ * Copyright (c) 1987-1998 Carnegie Mellon University
+ * All Rights Reserved
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University in all documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ *
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS,
+ * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON
+ * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF
+ * ANY DERIVATIVE WORK.
+ *
+ * Carnegie Mellon encourages users of this software to return any
+ * improvements or extensions that they make, and to grant Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ *
+ * @(#) src/sys/coda/coda_namecache.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
+ * $Id: coda_namecache.h,v 1.3 1998/09/11 18:50:17 rvb Exp $
+ *
+ */
+
+/*
+ * Mach Operating System
+ * Copyright (c) 1990 Carnegie-Mellon University
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * All rights reserved. The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+
+/*
+ * This code was written for the Coda file system at Carnegie Mellon University.
+ * Contributers include David Steere, James Kistler, and M. Satyanarayanan.
+ */
+
+/*
+ * HISTORY
+ * $Log: coda_namecache.h,v $
+ * Revision 1.3 1998/09/11 18:50:17 rvb
+ * All the references to cfs, in symbols, structs, and strings
+ * have been changed to coda. (Same for CFS.)
+ *
+ * Revision 1.2 1998/09/02 19:09:53 rvb
+ * Pass2 complete
+ *
+ * Revision 1.1.1.1 1998/08/29 21:14:52 rvb
+ * Very Preliminary Coda
+ *
+ * Revision 1.8 1998/08/28 18:12:25 rvb
+ * Now it also works on FreeBSD -current. This code will be
+ * committed to the FreeBSD -current and NetBSD -current
+ * trees. It will then be tailored to the particular platform
+ * by flushing conditional code.
+ *
+ * Revision 1.7 1998/08/18 17:05:24 rvb
+ * Don't use __RCSID now
+ *
+ * Revision 1.6 1998/08/18 16:31:49 rvb
+ * Sync the code for NetBSD -current; test on 1.3 later
+ *
+ * Revision 1.5 98/01/23 11:53:51 rvb
+ * Bring RVB_CODA1_1 to HEAD
+ *
+ * Revision 1.4.2.1 97/12/16 12:40:23 rvb
+ * Sync with 1.3
+ *
+ * Revision 1.4 97/12/05 10:39:29 rvb
+ * Read CHANGES
+ *
+ * Revision 1.3.4.3 97/11/24 15:44:51 rvb
+ * Final cfs_venus.c w/o macros, but one locking bug
+ *
+ * Revision 1.3.4.2 97/11/12 12:09:44 rvb
+ * reorg pass1
+ *
+ * Revision 1.3.4.1 97/11/06 21:06:05 rvb
+ * don't include headers in headers
+ *
+ * Revision 1.3 97/08/05 11:08:19 lily
+ * Removed cfsnc_replace, replaced it with a coda_find, unhash, and
+ * rehash. This fixes a cnode leak and a bug in which the fid is
+ * not actually replaced. (cfs_namecache.c, cfsnc.h, cfs_subr.c)
+ *
+ * Revision 1.2 96/01/02 16:57:19 bnoble
+ * Added support for Coda MiniCache and raw inode calls (final commit)
+ *
+ * Revision 1.1.2.1 1995/12/20 01:57:45 bnoble
+ * Added CODA-specific files
+ *
+ * Revision 3.1.1.1 1995/03/04 19:08:22 bnoble
+ * Branch for NetBSD port revisions
+ *
+ * Revision 3.1 1995/03/04 19:08:21 bnoble
+ * Bump to major revision 3 to prepare for NetBSD port
+ *
+ * Revision 2.2 1994/08/28 19:37:39 luqi
+ * Add a new CODA_REPLACE call to allow venus to replace a ViceFid in the
+ * mini-cache.
+ *
+ * In "cfs.h":
+ * Add CODA_REPLACE decl.
+ *
+ * In "cfs_namecache.c":
+ * Add routine cfsnc_replace.
+ *
+ * In "cfs_subr.c":
+ * Add case-statement to process CODA_REPLACE.
+ *
+ * In "cfsnc.h":
+ * Add decl for CODA_NC_REPLACE.
+ *
+ * Revision 2.1 94/07/21 16:25:27 satya
+ * Conversion to C++ 3.0; start of Coda Release 2.0
+ *
+ * Revision 1.2 92/10/27 17:58:34 lily
+ * merge kernel/latest and alpha/src/cfs
+ *
+ * Revision 2.2 90/07/05 11:27:04 mrt
+ * Created for the Coda File System.
+ * [90/05/23 dcs]
+ *
+ * Revision 1.4 90/05/31 17:02:12 dcs
+ * Prepare for merge with facilities kernel.
+ *
+ *
+ */
+#ifndef _CODA_NC_HEADER_
+#define _CODA_NC_HEADER_
+
+/*
+ * Coda constants
+ */
+#define CODA_NC_NAMELEN 15 /* longest name stored in cache */
+#define CODA_NC_CACHESIZE 256 /* Default cache size */
+#define CODA_NC_HASHSIZE 64 /* Must be multiple of 2 */
+
+/*
+ * Hash function for the primary hash.
+ */
+
+/*
+ * First try -- (first + last letters + length + (int)cp) mod size
+ * 2nd try -- same, except dir fid.vnode instead of cp
+ */
+
+#ifdef oldhash
+#define CODA_NC_HASH(name, namelen, cp) \
+ ((name[0] + name[namelen-1] + namelen + (int)(cp)) & (coda_nc_hashsize-1))
+#else
+#define CODA_NC_HASH(name, namelen, cp) \
+ ((name[0] + (name[namelen-1]<<4) + namelen + (((int)cp)>>8)) & (coda_nc_hashsize-1))
+#endif
+
+#define CODA_NAMEMATCH(cp, name, namelen, dcp) \
+ ((namelen == cp->namelen) && (dcp == cp->dcp) && \
+ (bcmp(cp->name,name,namelen) == 0))
+
+/*
+ * Functions to modify the hash and lru chains.
+ * insque and remque assume that the pointers are the first thing
+ * in the list node, thus the trickery for lru.
+ */
+
+#define CODA_NC_HSHINS(elem, pred) insque(elem,pred)
+#define CODA_NC_HSHREM(elem) remque(elem)
+#define CODA_NC_HSHNUL(elem) (elem)->hash_next = \
+ (elem)->hash_prev = (elem)
+
+#define CODA_NC_LRUINS(elem, pred) insque(LRU_PART(elem), LRU_PART(pred))
+#define CODA_NC_LRUREM(elem) remque(LRU_PART(elem));
+#define CODA_NC_LRUGET(lruhead) LRU_TOP((lruhead).lru_prev)
+
+#define CODA_NC_VALID(cncp) (cncp->dcp != (struct cnode *)0)
+
+#define LRU_PART(cncp) (struct coda_cache *) \
+ ((char *)cncp + (2*sizeof(struct coda_cache *)))
+#define LRU_TOP(cncp) (struct coda_cache *) \
+ ((char *)cncp - (2*sizeof(struct coda_cache *)))
+#define DATA_PART(cncp) (struct coda_cache *) \
+ ((char *)cncp + (4*sizeof(struct coda_cache *)))
+#define DATA_SIZE (sizeof(struct coda_cache)-(4*sizeof(struct coda_cache *)))
+
+/*
+ * Structure for an element in the CODA Name Cache.
+ * NOTE: I use the position of arguments and their size in the
+ * implementation of the functions CODA_NC_LRUINS, CODA_NC_LRUREM, and
+ * DATA_PART.
+ */
+
+struct coda_cache {
+ struct coda_cache *hash_next,*hash_prev; /* Hash list */
+ struct coda_cache *lru_next, *lru_prev; /* LRU list */
+ struct cnode *cp; /* vnode of the file */
+ struct cnode *dcp; /* parent's cnode */
+ struct ucred *cred; /* user credentials */
+ char name[CODA_NC_NAMELEN]; /* segment name */
+ int namelen; /* length of name */
+};
+
+struct coda_lru { /* Start of LRU chain */
+ char *dummy1, *dummy2; /* place holders */
+ struct coda_cache *lru_next, *lru_prev; /* position of pointers is important */
+};
+
+
+struct coda_hash { /* Start of Hash chain */
+ struct coda_cache *hash_next, *hash_prev; /* NOTE: chain pointers must be first */
+ int length; /* used for tuning purposes */
+};
+
+
+/*
+ * Symbols to aid in debugging the namecache code. Assumes the existence
+ * of the variable coda_nc_debug, which is defined in cfs_namecache.c
+ */
+#define CODA_NC_DEBUG(N, STMT) { if (coda_nc_debug & (1 <<N)) { STMT } }
+
+/* Prototypes of functions exported within cfs */
+extern void coda_nc_init(void);
+extern void coda_nc_enter(struct cnode *, const char *, int, struct ucred *, struct cnode *);
+extern struct cnode *coda_nc_lookup(struct cnode *, const char *, int, struct ucred *);
+
+extern void coda_nc_zapParentfid(ViceFid *, enum dc_status);
+extern void coda_nc_zapfid(ViceFid *, enum dc_status);
+extern void coda_nc_zapvnode(ViceFid *, struct ucred *, enum dc_status);
+extern void coda_nc_zapfile(struct cnode *, const char *, int);
+extern void coda_nc_purge_user(vuid_t, enum dc_status);
+extern void coda_nc_flush(enum dc_status);
+
+extern void print_coda_nc(void);
+extern void coda_nc_gather_stats(void);
+extern int coda_nc_resize(int, int, enum dc_status);
+extern void coda_nc_name(struct cnode *cp);
+
+/*
+ * Structure to contain statistics on the cache usage
+ */
+
+struct coda_nc_statistics {
+ unsigned hits;
+ unsigned misses;
+ unsigned enters;
+ unsigned dbl_enters;
+ unsigned long_name_enters;
+ unsigned long_name_lookups;
+ unsigned long_remove;
+ unsigned lru_rm;
+ unsigned zapPfids;
+ unsigned zapFids;
+ unsigned zapFile;
+ unsigned zapUsers;
+ unsigned Flushes;
+ unsigned Sum_bucket_len;
+ unsigned Sum2_bucket_len;
+ unsigned Max_bucket_len;
+ unsigned Num_zero_len;
+ unsigned Search_len;
+};
+
+#define CODA_NC_FIND ((u_long) 1)
+#define CODA_NC_REMOVE ((u_long) 2)
+#define CODA_NC_INIT ((u_long) 3)
+#define CODA_NC_ENTER ((u_long) 4)
+#define CODA_NC_LOOKUP ((u_long) 5)
+#define CODA_NC_ZAPPFID ((u_long) 6)
+#define CODA_NC_ZAPFID ((u_long) 7)
+#define CODA_NC_ZAPVNODE ((u_long) 8)
+#define CODA_NC_ZAPFILE ((u_long) 9)
+#define CODA_NC_PURGEUSER ((u_long) 10)
+#define CODA_NC_FLUSH ((u_long) 11)
+#define CODA_NC_PRINTCODA_NC ((u_long) 12)
+#define CODA_NC_PRINTSTATS ((u_long) 13)
+
+#endif
diff --git a/sys/fs/coda/coda_opstats.h b/sys/fs/coda/coda_opstats.h
new file mode 100644
index 0000000..e62c04d
--- /dev/null
+++ b/sys/fs/coda/coda_opstats.h
@@ -0,0 +1,127 @@
+/*
+ *
+ * Coda: an Experimental Distributed File System
+ * Release 3.1
+ *
+ * Copyright (c) 1987-1998 Carnegie Mellon University
+ * All Rights Reserved
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University in all documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ *
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS,
+ * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON
+ * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF
+ * ANY DERIVATIVE WORK.
+ *
+ * Carnegie Mellon encourages users of this software to return any
+ * improvements or extensions that they make, and to grant Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ *
+ * @(#) src/sys/coda/coda_opstats.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
+ * $Id: coda_opstats.h,v 1.3 1998/09/11 18:50:17 rvb Exp $
+ *
+ */
+
+/*
+ * operation stats: what the minicache can intercept that
+ * *isn't* seen by venus. These stats are kept to augment
+ * the stats maintained by the Volume-Session mechanism.
+ */
+
+/* vfsops:
+ * mount: not currently bounced to Venus
+ * umount: nope
+ * root: only first call, rest is cached.
+ * statfs: none (bogus)
+ * sync: none (bogus)
+ * vget: all
+ */
+
+#define CODA_MOUNT_STATS 0
+#define CODA_UMOUNT_STATS 1
+#define CODA_ROOT_STATS 2
+#define CODA_STATFS_STATS 3
+#define CODA_SYNC_STATS 4
+#define CODA_VGET_STATS 5
+#define CODA_VFSOPS_SIZE 6
+
+/* vnodeops:
+ * open: all to venus
+ * close: all to venus
+ * rdrw: bogus. Maybe redirected to UFS.
+ * May call open/close for internal opens/closes
+ * (Does exec not call open?)
+ * ioctl: causes a lookupname
+ * passes through
+ * select: can't get there from here.
+ * getattr: can be satsified by cache
+ * setattr: all go through
+ * access: can be satisfied by cache
+ * readlink: can be satisfied by cache
+ * fsync: passes through
+ * inactive: passes through
+ * lookup: can be satisfied by cache
+ * create: passes through
+ * remove: passes through
+ * link: passes through
+ * rename: passes through
+ * mkdir: passes through
+ * rmdir: passes through
+ * symlink: passes through
+ * readdir: may be redirected to UFS
+ * may cause an "internal" open/close
+ */
+
+#define CODA_OPEN_STATS 0
+#define CODA_CLOSE_STATS 1
+#define CODA_RDWR_STATS 2
+#define CODA_IOCTL_STATS 3
+#define CODA_SELECT_STATS 4
+#define CODA_GETATTR_STATS 5
+#define CODA_SETATTR_STATS 6
+#define CODA_ACCESS_STATS 7
+#define CODA_READLINK_STATS 8
+#define CODA_FSYNC_STATS 9
+#define CODA_INACTIVE_STATS 10
+#define CODA_LOOKUP_STATS 11
+#define CODA_CREATE_STATS 12
+#define CODA_REMOVE_STATS 13
+#define CODA_LINK_STATS 14
+#define CODA_RENAME_STATS 15
+#define CODA_MKDIR_STATS 16
+#define CODA_RMDIR_STATS 17
+#define CODA_SYMLINK_STATS 18
+#define CODA_READDIR_STATS 19
+#define CODA_VNODEOPS_SIZE 20
+
+/*
+ * I propose the following structres:
+ */
+
+struct coda_op_stats {
+ int opcode; /* vfs opcode */
+ long entries; /* number of times call attempted */
+ long sat_intrn; /* number of times call satisfied by cache */
+ long unsat_intrn; /* number of times call failed in cache, but
+ was not bounced to venus proper. */
+ long gen_intrn; /* number of times call generated internally */
+ /* (do we need that?) */
+};
+
+/*
+ * With each call to the minicache, we'll bump the counters whenver
+ * a call is satisfied internally (through the cache or through a
+ * redirect), and whenever an operation is caused internally.
+ * Then, we can add the total operations caught by the minicache
+ * to the world-wide totals, and leave a caveat for the specific
+ * graphs later.
+ */
diff --git a/sys/fs/coda/coda_pioctl.h b/sys/fs/coda/coda_pioctl.h
new file mode 100644
index 0000000..2aa55bb
--- /dev/null
+++ b/sys/fs/coda/coda_pioctl.h
@@ -0,0 +1,133 @@
+/*
+ *
+ * Coda: an Experimental Distributed File System
+ * Release 3.1
+ *
+ * Copyright (c) 1987-1998 Carnegie Mellon University
+ * All Rights Reserved
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University in all documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ *
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS,
+ * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON
+ * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF
+ * ANY DERIVATIVE WORK.
+ *
+ * Carnegie Mellon encourages users of this software to return any
+ * improvements or extensions that they make, and to grant Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ *
+ * @(#) src/sys/coda/coda_pioctl.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
+ * $Id: coda_pioctl.h,v 1.3 1998/09/11 18:50:17 rvb Exp $
+ *
+ */
+
+/*
+ * Mach Operating System
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * Copyright (c) 1988 Carnegie-Mellon University
+ * Copyright (c) 1987 Carnegie-Mellon University
+ * All rights reserved. The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+
+/*
+ * HISTORY
+ * $Log: coda_pioctl.h,v $
+ * Revision 1.3 1998/09/11 18:50:17 rvb
+ * All the references to cfs, in symbols, structs, and strings
+ * have been changed to coda. (Same for CFS.)
+ *
+ * Revision 1.2 1998/09/02 19:09:53 rvb
+ * Pass2 complete
+ *
+ * Revision 1.1.1.1 1998/08/29 21:14:52 rvb
+ * Very Preliminary Coda
+ *
+ * Revision 1.7 1998/08/28 18:12:26 rvb
+ * Now it also works on FreeBSD -current. This code will be
+ * committed to the FreeBSD -current and NetBSD -current
+ * trees. It will then be tailored to the particular platform
+ * by flushing conditional code.
+ *
+ * Revision 1.6 1998/08/18 17:05:26 rvb
+ * Don't use __RCSID now
+ *
+ * Revision 1.5 1998/08/18 16:31:51 rvb
+ * Sync the code for NetBSD -current; test on 1.3 later
+ *
+ * Revision 1.4 98/01/23 11:53:54 rvb
+ * Bring RVB_CODA1_1 to HEAD
+ *
+ * Revision 1.3.2.1 97/12/06 17:41:29 rvb
+ * Sync with peters coda.h
+ *
+ * Revision 1.3 97/12/05 10:39:31 rvb
+ * Read CHANGES
+ *
+ * Revision 1.2.34.2 97/11/13 22:03:06 rvb
+ * pass2 cfs_NetBSD.h mt
+ *
+ * Revision 1.2.34.1 97/11/12 12:38:11 rvb
+ * mach_vioctl.h -> pioctl.h
+ *
+ * Revision 1.2 96/01/02 16:57:27 bnoble
+ * Added support for Coda MiniCache and raw inode calls (final commit)
+ *
+ * Revision 1.1.2.1 1995/12/20 01:57:54 bnoble
+ * Added CODA-specific files
+ *
+ * Revision 2.4 90/08/30 11:51:12 bohman
+ * Ioctl changes for STDC.
+ * [90/08/28 bohman]
+ *
+ * Revision 2.3 89/03/09 22:10:26 rpd
+ * More cleanup.
+ *
+ * Revision 2.2 89/02/25 17:58:32 gm0w
+ * Changes for cleanup.
+ *
+ * 7-Feb-87 Avadis Tevanian (avie) at Carnegie-Mellon University
+ * No need for VICE conditional.
+ *
+ * 22-Oct-86 Jay Kistler (jjk) at Carnegie-Mellon University
+ * Created from Andrew's vice.h and viceioctl.h.
+ *
+ */
+/*
+ * ITC Remote file system - vice ioctl interface module
+ */
+
+/*
+ * TODO: Find /usr/local/include/viceioctl.h.
+ */
+
+#ifndef _SYS_PIOCTL_H_
+#define _SYS_PIOCTL_H_
+
+/* The 2K limits above are a consequence of the size of the kernel buffer
+ used to buffer requests from the user to venus--2*MAXPATHLEN.
+ The buffer pointers may be null, or the counts may be 0 if there
+ are no input or output parameters
+ */
+
+#define _VICEIOCTL(id) ((unsigned int ) _IOW('V', id, struct ViceIoctl))
+
+/* Use this macro to define up to 256 vice ioctl's. These ioctl's
+ all potentially have in/out parameters--this depends upon the
+ values in the ViceIoctl structure. This structure is itself passed
+ into the kernel by the normal ioctl parameter passing mechanism.
+ */
+
+#define _VALIDVICEIOCTL(com) (com >= _VICEIOCTL(0) && com <= _VICEIOCTL(255))
+
+#endif
diff --git a/sys/fs/coda/coda_psdev.c b/sys/fs/coda/coda_psdev.c
new file mode 100644
index 0000000..0d29f6e
--- /dev/null
+++ b/sys/fs/coda/coda_psdev.c
@@ -0,0 +1,788 @@
+/*
+ *
+ * Coda: an Experimental Distributed File System
+ * Release 3.1
+ *
+ * Copyright (c) 1987-1998 Carnegie Mellon University
+ * All Rights Reserved
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University in all documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ *
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS,
+ * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON
+ * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF
+ * ANY DERIVATIVE WORK.
+ *
+ * Carnegie Mellon encourages users of this software to return any
+ * improvements or extensions that they make, and to grant Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ *
+ * @(#) src/sys/coda/coda_psdev.c,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
+ * $Id: coda_psdev.c,v 1.9 1998/11/11 20:32:20 rvb Exp $
+ *
+ */
+
+/*
+ * Mach Operating System
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * All rights reserved. The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+
+/*
+ * This code was written for the Coda file system at Carnegie Mellon
+ * University. Contributers include David Steere, James Kistler, and
+ * M. Satyanarayanan. */
+
+/*
+ * These routines define the psuedo device for communication between
+ * Coda's Venus and Minicache in Mach 2.6. They used to be in cfs_subr.c,
+ * but I moved them to make it easier to port the Minicache without
+ * porting coda. -- DCS 10/12/94
+ */
+
+/*
+ * HISTORY
+ * $Log: coda_psdev.c,v $
+ * Revision 1.9 1998/11/11 20:32:20 rvb
+ * coda_lookup now passes up an extra flag. But old veni will
+ * be ok; new veni will check /dev/cfs0 to make sure that a new
+ * kernel is running.
+ * Also, a bug in vc_nb_close iff CODA_SIGNAL's were seen has been
+ * fixed.
+ *
+ * Revision 1.8 1998/10/28 20:31:13 rvb
+ * Change the way unmounting happens to guarantee that the
+ * client programs are allowed to finish up (coda_call is
+ * forced to complete) and release their locks. Thus there
+ * is a reasonable chance that the vflush implicit in the
+ * unmount will not get hung on held locks.
+ *
+ * Revision 1.7 1998/09/29 20:19:45 rvb
+ * Fixes for lkm:
+ * 1. use VFS_LKM vs ACTUALLY_LKM_NOT_KERNEL
+ * 2. don't pass -DCODA to lkm build
+ *
+ * Revision 1.6 1998/09/28 20:52:58 rvb
+ * Cleanup and fix THE bug
+ *
+ * Revision 1.5 1998/09/25 17:38:31 rvb
+ * Put "stray" printouts under DIAGNOSTIC. Make everything build
+ * with DEBUG on. Add support for lkm. (The macro's don't work
+ * for me; for a good chuckle look at the end of coda_fbsd.c.)
+ *
+ * Revision 1.4 1998/09/13 13:57:59 rvb
+ * Finish conversion of cfs -> coda
+ *
+ * Revision 1.3 1998/09/11 18:50:17 rvb
+ * All the references to cfs, in symbols, structs, and strings
+ * have been changed to coda. (Same for CFS.)
+ *
+ * Revision 1.2 1998/09/02 19:09:53 rvb
+ * Pass2 complete
+ *
+ * Revision 1.1.1.1 1998/08/29 21:14:52 rvb
+ * Very Preliminary Coda
+ *
+ * Revision 1.9 1998/08/28 18:12:17 rvb
+ * Now it also works on FreeBSD -current. This code will be
+ * committed to the FreeBSD -current and NetBSD -current
+ * trees. It will then be tailored to the particular platform
+ * by flushing conditional code.
+ *
+ * Revision 1.8 1998/08/18 17:05:15 rvb
+ * Don't use __RCSID now
+ *
+ * Revision 1.7 1998/08/18 16:31:41 rvb
+ * Sync the code for NetBSD -current; test on 1.3 later
+ *
+ * Revision 1.8 1998/06/09 23:30:42 rvb
+ * Try to allow ^C -- take 1
+ *
+ * Revision 1.5.2.8 98/01/23 11:21:04 rvb
+ * Sync with 2.2.5
+ *
+ * Revision 1.5.2.7 98/01/22 22:22:21 rvb
+ * sync 1.2 and 1.3
+ *
+ * Revision 1.5.2.6 98/01/22 13:11:24 rvb
+ * Move make_coda_node ctlfid later so vfsp is known; work on ^c and ^z
+ *
+ * Revision 1.5.2.5 97/12/16 22:01:27 rvb
+ * Oops add cfs_subr.h cfs_venus.h; sync with peter
+ *
+ * Revision 1.5.2.4 97/12/16 12:40:05 rvb
+ * Sync with 1.3
+ *
+ * Revision 1.5.2.3 97/12/10 14:08:24 rvb
+ * Fix O_ flags; check result in coda_call
+ *
+ * Revision 1.5.2.2 97/12/10 11:40:24 rvb
+ * No more ody
+ *
+ * Revision 1.5.2.1 97/12/06 17:41:20 rvb
+ * Sync with peters coda.h
+ *
+ * Revision 1.5 97/12/05 10:39:16 rvb
+ * Read CHANGES
+ *
+ * Revision 1.4.18.9 97/12/05 08:58:07 rvb
+ * peter found this one
+ *
+ * Revision 1.4.18.8 97/11/26 15:28:57 rvb
+ * Cant make downcall pbuf == union cfs_downcalls yet
+ *
+ * Revision 1.4.18.7 97/11/25 09:40:49 rvb
+ * Final cfs_venus.c w/o macros, but one locking bug
+ *
+ * Revision 1.4.18.6 97/11/20 11:46:41 rvb
+ * Capture current cfs_venus
+ *
+ * Revision 1.4.18.5 97/11/18 10:27:15 rvb
+ * cfs_nbsd.c is DEAD!!!; integrated into cfs_vf/vnops.c
+ * cfs_nb_foo and cfs_foo are joined
+ *
+ * Revision 1.4.18.4 97/11/13 22:02:59 rvb
+ * pass2 cfs_NetBSD.h mt
+ *
+ * Revision 1.4.18.3 97/11/12 12:09:38 rvb
+ * reorg pass1
+ *
+ * Revision 1.4.18.2 97/10/29 16:06:09 rvb
+ * Kill DYING
+ *
+ * Revision 1.4.18.1 1997/10/28 23:10:15 rvb
+ * >64Meg; venus can be killed!
+ *
+ * Revision 1.4 1996/12/12 22:10:58 bnoble
+ * Fixed the "downcall invokes venus operation" deadlock in all known cases.
+ * There may be more
+ *
+ * Revision 1.3 1996/11/13 04:14:20 bnoble
+ * Merging BNOBLE_WORK_6_20_96 into main line
+ *
+ * Revision 1.2.8.1 1996/08/22 14:25:04 bnoble
+ * Added a return code from vc_nb_close
+ *
+ * Revision 1.2 1996/01/02 16:56:58 bnoble
+ * Added support for Coda MiniCache and raw inode calls (final commit)
+ *
+ * Revision 1.1.2.1 1995/12/20 01:57:24 bnoble
+ * Added CODA-specific files
+ *
+ * Revision 1.1 1995/03/14 20:52:15 bnoble
+ * Initial revision
+ *
+ */
+
+/* These routines are the device entry points for Venus. */
+
+extern int coda_nc_initialized; /* Set if cache has been initialized */
+
+#include <vcoda.h>
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/file.h>
+#include <sys/ioccom.h>
+#include <sys/poll.h>
+#include <sys/conf.h>
+
+#include <coda/coda.h>
+#include <coda/cnode.h>
+#include <coda/coda_namecache.h>
+#include <coda/coda_io.h>
+#include <coda/coda_psdev.h>
+
+#define CTL_C
+
+int coda_psdev_print_entry = 0;
+static
+int outstanding_upcalls = 0;
+int coda_call_sleep = PZERO - 1;
+#ifdef CTL_C
+int coda_pcatch = PCATCH;
+#else
+#endif
+
+#define ENTRY if(coda_psdev_print_entry) myprintf(("Entered %s\n",__FUNCTION__))
+
+void vcodaattach(int n);
+
+struct vmsg {
+ struct queue vm_chain;
+ caddr_t vm_data;
+ u_short vm_flags;
+ u_short vm_inSize; /* Size is at most 5000 bytes */
+ u_short vm_outSize;
+ u_short vm_opcode; /* copied from data to save ptr lookup */
+ int vm_unique;
+ caddr_t vm_sleep; /* Not used by Mach. */
+};
+
+#define VM_READ 1
+#define VM_WRITE 2
+#define VM_INTR 4
+
+/* vcodaattach: do nothing */
+void
+vcodaattach(n)
+ int n;
+{
+}
+
+int
+vc_nb_open(dev, flag, mode, p)
+ dev_t dev;
+ int flag;
+ int mode;
+ struct proc *p; /* NetBSD only */
+{
+ register struct vcomm *vcp;
+
+ ENTRY;
+
+ if (minor(dev) >= NVCODA || minor(dev) < 0)
+ return(ENXIO);
+
+ if (!coda_nc_initialized)
+ coda_nc_init();
+
+ vcp = &coda_mnttbl[minor(dev)].mi_vcomm;
+ if (VC_OPEN(vcp))
+ return(EBUSY);
+
+ bzero(&(vcp->vc_selproc), sizeof (struct selinfo));
+ INIT_QUEUE(vcp->vc_requests);
+ INIT_QUEUE(vcp->vc_replys);
+ MARK_VC_OPEN(vcp);
+
+ coda_mnttbl[minor(dev)].mi_vfsp = NULL;
+ coda_mnttbl[minor(dev)].mi_rootvp = NULL;
+
+ return(0);
+}
+
+int
+vc_nb_close (dev, flag, mode, p)
+ dev_t dev;
+ int flag;
+ int mode;
+ struct proc *p;
+{
+ register struct vcomm *vcp;
+ register struct vmsg *vmp, *nvmp = NULL;
+ struct coda_mntinfo *mi;
+ int err;
+
+ ENTRY;
+
+ if (minor(dev) >= NVCODA || minor(dev) < 0)
+ return(ENXIO);
+
+ mi = &coda_mnttbl[minor(dev)];
+ vcp = &(mi->mi_vcomm);
+
+ if (!VC_OPEN(vcp))
+ panic("vcclose: not open");
+
+ /* prevent future operations on this vfs from succeeding by auto-
+ * unmounting any vfs mounted via this device. This frees user or
+ * sysadm from having to remember where all mount points are located.
+ * Put this before WAKEUPs to avoid queuing new messages between
+ * the WAKEUP and the unmount (which can happen if we're unlucky)
+ */
+ if (!mi->mi_rootvp) {
+ /* just a simple open/close w no mount */
+ MARK_VC_CLOSED(vcp);
+ return 0;
+ }
+
+ /* Let unmount know this is for real */
+ VTOC(mi->mi_rootvp)->c_flags |= C_UNMOUNTING;
+ coda_unmounting(mi->mi_vfsp);
+
+ outstanding_upcalls = 0;
+ /* Wakeup clients so they can return. */
+ for (vmp = (struct vmsg *)GETNEXT(vcp->vc_requests);
+ !EOQ(vmp, vcp->vc_requests);
+ vmp = nvmp)
+ {
+ nvmp = (struct vmsg *)GETNEXT(vmp->vm_chain);
+ /* Free signal request messages and don't wakeup cause
+ no one is waiting. */
+ if (vmp->vm_opcode == CODA_SIGNAL) {
+ CODA_FREE((caddr_t)vmp->vm_data, (u_int)VC_IN_NO_DATA);
+ CODA_FREE((caddr_t)vmp, (u_int)sizeof(struct vmsg));
+ continue;
+ }
+ outstanding_upcalls++;
+ wakeup(&vmp->vm_sleep);
+ }
+
+ for (vmp = (struct vmsg *)GETNEXT(vcp->vc_replys);
+ !EOQ(vmp, vcp->vc_replys);
+ vmp = (struct vmsg *)GETNEXT(vmp->vm_chain))
+ {
+ outstanding_upcalls++;
+ wakeup(&vmp->vm_sleep);
+ }
+
+ MARK_VC_CLOSED(vcp);
+
+ if (outstanding_upcalls) {
+#ifdef CODA_VERBOSE
+ printf("presleep: outstanding_upcalls = %d\n", outstanding_upcalls);
+ (void) tsleep(&outstanding_upcalls, coda_call_sleep, "coda_umount", 0);
+ printf("postsleep: outstanding_upcalls = %d\n", outstanding_upcalls);
+#else
+ (void) tsleep(&outstanding_upcalls, coda_call_sleep, "coda_umount", 0);
+#endif
+ }
+
+ err = dounmount(mi->mi_vfsp, flag, p);
+ if (err)
+ myprintf(("Error %d unmounting vfs in vcclose(%d)\n",
+ err, minor(dev)));
+ return 0;
+}
+
+int
+vc_nb_read(dev, uiop, flag)
+ dev_t dev;
+ struct uio *uiop;
+ int flag;
+{
+ register struct vcomm * vcp;
+ register struct vmsg *vmp;
+ int error = 0;
+
+ ENTRY;
+
+ if (minor(dev) >= NVCODA || minor(dev) < 0)
+ return(ENXIO);
+
+ vcp = &coda_mnttbl[minor(dev)].mi_vcomm;
+ /* Get message at head of request queue. */
+ if (EMPTY(vcp->vc_requests))
+ return(0); /* Nothing to read */
+
+ vmp = (struct vmsg *)GETNEXT(vcp->vc_requests);
+
+ /* Move the input args into userspace */
+ uiop->uio_rw = UIO_READ;
+ error = uiomove(vmp->vm_data, vmp->vm_inSize, uiop);
+ if (error) {
+ myprintf(("vcread: error (%d) on uiomove\n", error));
+ error = EINVAL;
+ }
+
+#ifdef OLD_DIAGNOSTIC
+ if (vmp->vm_chain.forw == 0 || vmp->vm_chain.back == 0)
+ panic("vc_nb_read: bad chain");
+#endif
+
+ REMQUE(vmp->vm_chain);
+
+ /* If request was a signal, free up the message and don't
+ enqueue it in the reply queue. */
+ if (vmp->vm_opcode == CODA_SIGNAL) {
+ if (codadebug)
+ myprintf(("vcread: signal msg (%d, %d)\n",
+ vmp->vm_opcode, vmp->vm_unique));
+ CODA_FREE((caddr_t)vmp->vm_data, (u_int)VC_IN_NO_DATA);
+ CODA_FREE((caddr_t)vmp, (u_int)sizeof(struct vmsg));
+ return(error);
+ }
+
+ vmp->vm_flags |= VM_READ;
+ INSQUE(vmp->vm_chain, vcp->vc_replys);
+
+ return(error);
+}
+
+int
+vc_nb_write(dev, uiop, flag)
+ dev_t dev;
+ struct uio *uiop;
+ int flag;
+{
+ register struct vcomm * vcp;
+ register struct vmsg *vmp;
+ struct coda_out_hdr *out;
+ u_long seq;
+ u_long opcode;
+ int buf[2];
+ int error = 0;
+
+ ENTRY;
+
+ if (minor(dev) >= NVCODA || minor(dev) < 0)
+ return(ENXIO);
+
+ vcp = &coda_mnttbl[minor(dev)].mi_vcomm;
+
+ /* Peek at the opcode, unique without transfering the data. */
+ uiop->uio_rw = UIO_WRITE;
+ error = uiomove((caddr_t)buf, sizeof(int) * 2, uiop);
+ if (error) {
+ myprintf(("vcwrite: error (%d) on uiomove\n", error));
+ return(EINVAL);
+ }
+
+ opcode = buf[0];
+ seq = buf[1];
+
+ if (codadebug)
+ myprintf(("vcwrite got a call for %ld.%ld\n", opcode, seq));
+
+ if (DOWNCALL(opcode)) {
+ union outputArgs pbuf;
+
+ /* get the rest of the data. */
+ uiop->uio_rw = UIO_WRITE;
+ error = uiomove((caddr_t)&pbuf.coda_purgeuser.oh.result, sizeof(pbuf) - (sizeof(int)*2), uiop);
+ if (error) {
+ myprintf(("vcwrite: error (%d) on uiomove (Op %ld seq %ld)\n",
+ error, opcode, seq));
+ return(EINVAL);
+ }
+
+ return handleDownCall(opcode, &pbuf);
+ }
+
+ /* Look for the message on the (waiting for) reply queue. */
+ for (vmp = (struct vmsg *)GETNEXT(vcp->vc_replys);
+ !EOQ(vmp, vcp->vc_replys);
+ vmp = (struct vmsg *)GETNEXT(vmp->vm_chain))
+ {
+ if (vmp->vm_unique == seq) break;
+ }
+
+ if (EOQ(vmp, vcp->vc_replys)) {
+ if (codadebug)
+ myprintf(("vcwrite: msg (%ld, %ld) not found\n", opcode, seq));
+
+ return(ESRCH);
+ }
+
+ /* Remove the message from the reply queue */
+ REMQUE(vmp->vm_chain);
+
+ /* move data into response buffer. */
+ out = (struct coda_out_hdr *)vmp->vm_data;
+ /* Don't need to copy opcode and uniquifier. */
+
+ /* get the rest of the data. */
+ if (vmp->vm_outSize < uiop->uio_resid) {
+ myprintf(("vcwrite: more data than asked for (%d < %d)\n",
+ vmp->vm_outSize, uiop->uio_resid));
+ wakeup(&vmp->vm_sleep); /* Notify caller of the error. */
+ return(EINVAL);
+ }
+
+ buf[0] = uiop->uio_resid; /* Save this value. */
+ uiop->uio_rw = UIO_WRITE;
+ error = uiomove((caddr_t) &out->result, vmp->vm_outSize - (sizeof(int) * 2), uiop);
+ if (error) {
+ myprintf(("vcwrite: error (%d) on uiomove (op %ld seq %ld)\n",
+ error, opcode, seq));
+ return(EINVAL);
+ }
+
+ /* I don't think these are used, but just in case. */
+ /* XXX - aren't these two already correct? -bnoble */
+ out->opcode = opcode;
+ out->unique = seq;
+ vmp->vm_outSize = buf[0]; /* Amount of data transferred? */
+ vmp->vm_flags |= VM_WRITE;
+ wakeup(&vmp->vm_sleep);
+
+ return(0);
+}
+
+int
+vc_nb_ioctl(dev, cmd, addr, flag, p)
+ dev_t dev;
+ u_long cmd;
+ caddr_t addr;
+ int flag;
+ struct proc *p;
+{
+ ENTRY;
+
+ switch(cmd) {
+ case CODARESIZE: {
+ struct coda_resize *data = (struct coda_resize *)addr;
+ return(coda_nc_resize(data->hashsize, data->heapsize, IS_DOWNCALL));
+ break;
+ }
+ case CODASTATS:
+ if (coda_nc_use) {
+ coda_nc_gather_stats();
+ return(0);
+ } else {
+ return(ENODEV);
+ }
+ break;
+ case CODAPRINT:
+ if (coda_nc_use) {
+ print_coda_nc();
+ return(0);
+ } else {
+ return(ENODEV);
+ }
+ break;
+ case CIOC_KERNEL_VERSION:
+ switch (*(u_int *)addr) {
+ case 0:
+ *(u_int *)addr = coda_kernel_version;
+ return 0;
+ break;
+ case 1:
+ case 2:
+ if (coda_kernel_version != *(u_int *)addr)
+ return ENOENT;
+ else
+ return 0;
+ default:
+ return ENOENT;
+ }
+ break;
+ default :
+ return(EINVAL);
+ break;
+ }
+}
+
+int
+vc_nb_poll(dev, events, p)
+ dev_t dev;
+ int events;
+ struct proc *p;
+{
+ register struct vcomm *vcp;
+ int event_msk = 0;
+
+ ENTRY;
+
+ if (minor(dev) >= NVCODA || minor(dev) < 0)
+ return(ENXIO);
+
+ vcp = &coda_mnttbl[minor(dev)].mi_vcomm;
+
+ event_msk = events & (POLLIN|POLLRDNORM);
+ if (!event_msk)
+ return(0);
+
+ if (!EMPTY(vcp->vc_requests))
+ return(events & (POLLIN|POLLRDNORM));
+
+ selrecord(p, &(vcp->vc_selproc));
+
+ return(0);
+}
+
+/*
+ * Statistics
+ */
+struct coda_clstat coda_clstat;
+
+/*
+ * Key question: whether to sleep interuptably or uninteruptably when
+ * waiting for Venus. The former seems better (cause you can ^C a
+ * job), but then GNU-EMACS completion breaks. Use tsleep with no
+ * timeout, and no longjmp happens. But, when sleeping
+ * "uninterruptibly", we don't get told if it returns abnormally
+ * (e.g. kill -9).
+ */
+
+int
+coda_call(mntinfo, inSize, outSize, buffer)
+ struct coda_mntinfo *mntinfo; int inSize; int *outSize; caddr_t buffer;
+{
+ struct vcomm *vcp;
+ struct vmsg *vmp;
+ int error;
+#ifdef CTL_C
+ struct proc *p = curproc;
+ unsigned int psig_omask = p->p_sigmask;
+ int i;
+#endif
+ if (mntinfo == NULL) {
+ /* Unlikely, but could be a race condition with a dying warden */
+ return ENODEV;
+ }
+
+ vcp = &(mntinfo->mi_vcomm);
+
+ coda_clstat.ncalls++;
+ coda_clstat.reqs[((struct coda_in_hdr *)buffer)->opcode]++;
+
+ if (!VC_OPEN(vcp))
+ return(ENODEV);
+
+ CODA_ALLOC(vmp,struct vmsg *,sizeof(struct vmsg));
+ /* Format the request message. */
+ vmp->vm_data = buffer;
+ vmp->vm_flags = 0;
+ vmp->vm_inSize = inSize;
+ vmp->vm_outSize
+ = *outSize ? *outSize : inSize; /* |buffer| >= inSize */
+ vmp->vm_opcode = ((struct coda_in_hdr *)buffer)->opcode;
+ vmp->vm_unique = ++vcp->vc_seq;
+ if (codadebug)
+ myprintf(("Doing a call for %d.%d\n",
+ vmp->vm_opcode, vmp->vm_unique));
+
+ /* Fill in the common input args. */
+ ((struct coda_in_hdr *)buffer)->unique = vmp->vm_unique;
+
+ /* Append msg to request queue and poke Venus. */
+ INSQUE(vmp->vm_chain, vcp->vc_requests);
+ selwakeup(&(vcp->vc_selproc));
+
+ /* We can be interrupted while we wait for Venus to process
+ * our request. If the interrupt occurs before Venus has read
+ * the request, we dequeue and return. If it occurs after the
+ * read but before the reply, we dequeue, send a signal
+ * message, and return. If it occurs after the reply we ignore
+ * it. In no case do we want to restart the syscall. If it
+ * was interrupted by a venus shutdown (vcclose), return
+ * ENODEV. */
+
+ /* Ignore return, We have to check anyway */
+#ifdef CTL_C
+ /* This is work in progress. Setting coda_pcatch lets tsleep reawaken
+ on a ^c or ^z. The problem is that emacs sets certain interrupts
+ as SA_RESTART. This means that we should exit sleep handle the
+ "signal" and then go to sleep again. Mostly this is done by letting
+ the syscall complete and be restarted. We are not idempotent and
+ can not do this. A better solution is necessary.
+ */
+ i = 0;
+ do {
+ error = tsleep(&vmp->vm_sleep, (coda_call_sleep|coda_pcatch), "coda_call", hz*2);
+ if (error == 0)
+ break;
+ else if (error == EWOULDBLOCK) {
+#ifdef CODA_VERBOSE
+ printf("coda_call: tsleep TIMEOUT %d sec\n", 2+2*i);
+#endif
+ } else if (p->p_siglist == sigmask(SIGIO)) {
+ p->p_sigmask |= p->p_siglist;
+#ifdef CODA_VERBOSE
+ printf("coda_call: tsleep returns %d SIGIO, cnt %d\n", error, i);
+#endif
+ } else if (p->p_siglist == sigmask(SIGALRM)) {
+ p->p_sigmask |= p->p_siglist;
+#ifdef CODA_VERBOSE
+ printf("coda_call: tsleep returns %d SIGALRM, cnt %d\n", error, i);
+#endif
+ } else {
+ printf("coda_call: tsleep returns %d, cnt %d\n", error, i);
+ printf("coda_call: siglist = %x, sigmask = %x, mask %x\n",
+ p->p_siglist, p->p_sigmask,
+ p->p_siglist & ~p->p_sigmask);
+ break;
+#ifdef notyet
+ p->p_sigmask |= p->p_siglist;
+ printf("coda_call: new mask, siglist = %x, sigmask = %x, mask %x\n",
+ p->p_siglist, p->p_sigmask,
+ p->p_siglist & ~p->p_sigmask);
+#endif
+ }
+ } while (error && i++ < 128 && VC_OPEN(vcp));
+ p->p_sigmask = psig_omask;
+#else
+ (void) tsleep(&vmp->vm_sleep, coda_call_sleep, "coda_call", 0);
+#endif
+ if (VC_OPEN(vcp)) { /* Venus is still alive */
+ /* Op went through, interrupt or not... */
+ if (vmp->vm_flags & VM_WRITE) {
+ error = 0;
+ *outSize = vmp->vm_outSize;
+ }
+
+ else if (!(vmp->vm_flags & VM_READ)) {
+ /* Interrupted before venus read it. */
+#ifdef CODA_VERBOSE
+ if (1)
+#else
+ if (codadebug)
+#endif
+ myprintf(("interrupted before read: op = %d.%d, flags = %x\n",
+ vmp->vm_opcode, vmp->vm_unique, vmp->vm_flags));
+ REMQUE(vmp->vm_chain);
+ error = EINTR;
+ }
+
+ else {
+ /* (!(vmp->vm_flags & VM_WRITE)) means interrupted after
+ upcall started */
+ /* Interrupted after start of upcall, send venus a signal */
+ struct coda_in_hdr *dog;
+ struct vmsg *svmp;
+
+#ifdef CODA_VERBOSE
+ if (1)
+#else
+ if (codadebug)
+#endif
+ myprintf(("Sending Venus a signal: op = %d.%d, flags = %x\n",
+ vmp->vm_opcode, vmp->vm_unique, vmp->vm_flags));
+
+ REMQUE(vmp->vm_chain);
+ error = EINTR;
+
+ CODA_ALLOC(svmp, struct vmsg *, sizeof (struct vmsg));
+
+ CODA_ALLOC((svmp->vm_data), char *, sizeof (struct coda_in_hdr));
+ dog = (struct coda_in_hdr *)svmp->vm_data;
+
+ svmp->vm_flags = 0;
+ dog->opcode = svmp->vm_opcode = CODA_SIGNAL;
+ dog->unique = svmp->vm_unique = vmp->vm_unique;
+ svmp->vm_inSize = sizeof (struct coda_in_hdr);
+/*??? rvb */ svmp->vm_outSize = sizeof (struct coda_in_hdr);
+
+ if (codadebug)
+ myprintf(("coda_call: enqueing signal msg (%d, %d)\n",
+ svmp->vm_opcode, svmp->vm_unique));
+
+ /* insert at head of queue! */
+ INSQUE(svmp->vm_chain, vcp->vc_requests);
+ selwakeup(&(vcp->vc_selproc));
+ }
+ }
+
+ else { /* If venus died (!VC_OPEN(vcp)) */
+ if (codadebug)
+ myprintf(("vcclose woke op %d.%d flags %d\n",
+ vmp->vm_opcode, vmp->vm_unique, vmp->vm_flags));
+
+ error = ENODEV;
+ }
+
+ CODA_FREE(vmp, sizeof(struct vmsg));
+
+ if (outstanding_upcalls > 0 && (--outstanding_upcalls == 0))
+ wakeup(&outstanding_upcalls);
+
+ if (!error)
+ error = ((struct coda_out_hdr *)buffer)->result;
+ return(error);
+}
diff --git a/sys/fs/coda/coda_psdev.h b/sys/fs/coda/coda_psdev.h
new file mode 100644
index 0000000..11922ad
--- /dev/null
+++ b/sys/fs/coda/coda_psdev.h
@@ -0,0 +1,39 @@
+/*
+ *
+ * Coda: an Experimental Distributed File System
+ * Release 3.1
+ *
+ * Copyright (c) 1998 Carnegie Mellon University
+ * All Rights Reserved
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University in all documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ *
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS,
+ * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON
+ * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF
+ * ANY DERIVATIVE WORK.
+ *
+ * Carnegie Mellon encourages users of this software to return any
+ * improvements or extensions that they make, and to grant Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ *
+ * @(#) src/sys/coda/coda_psdev.c,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
+ * $Id: coda_psdev.c,v 1.4 1998/09/13 13:57:59 rvb Exp $
+ *
+ */
+
+int vc_nb_open(dev_t dev, int flag, int mode, struct proc *p);
+int vc_nb_close (dev_t dev, int flag, int mode, struct proc *p);
+int vc_nb_read(dev_t dev, struct uio *uiop, int flag);
+int vc_nb_write(dev_t dev, struct uio *uiop, int flag);
+int vc_nb_ioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p);
+int vc_nb_poll(dev_t dev, int events, struct proc *p);
diff --git a/sys/fs/coda/coda_subr.c b/sys/fs/coda/coda_subr.c
new file mode 100644
index 0000000..40d2d0b
--- /dev/null
+++ b/sys/fs/coda/coda_subr.c
@@ -0,0 +1,747 @@
+/*
+ *
+ * Coda: an Experimental Distributed File System
+ * Release 3.1
+ *
+ * Copyright (c) 1987-1998 Carnegie Mellon University
+ * All Rights Reserved
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University in all documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ *
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS,
+ * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON
+ * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF
+ * ANY DERIVATIVE WORK.
+ *
+ * Carnegie Mellon encourages users of this software to return any
+ * improvements or extensions that they make, and to grant Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ *
+ * @(#) src/sys/coda/coda_subr.c,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
+ * $Id: coda_subr.c,v 1.8 1998/10/28 19:33:50 rvb Exp $
+ *
+ */
+
+/*
+ * Mach Operating System
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * All rights reserved. The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+
+/*
+ * This code was written for the Coda file system at Carnegie Mellon
+ * University. Contributers include David Steere, James Kistler, and
+ * M. Satyanarayanan. */
+
+/*
+ * HISTORY
+ * $Log: coda_subr.c,v $
+ * Revision 1.8 1998/10/28 19:33:50 rvb
+ * Venus must be passed O_CREAT flag on VOP_OPEN iff this is
+ * a creat so that we can will allow a mode 444 file to be
+ * written into. Sync with the latest coda.h and deal with
+ * collateral damage.
+ *
+ * Revision 1.7 1998/09/29 20:19:45 rvb
+ * Fixes for lkm:
+ * 1. use VFS_LKM vs ACTUALLY_LKM_NOT_KERNEL
+ * 2. don't pass -DCODA to lkm build
+ *
+ * Revision 1.6 1998/09/25 17:38:31 rvb
+ * Put "stray" printouts under DIAGNOSTIC. Make everything build
+ * with DEBUG on. Add support for lkm. (The macro's don't work
+ * for me; for a good chuckle look at the end of coda_fbsd.c.)
+ *
+ * Revision 1.5 1998/09/13 13:57:59 rvb
+ * Finish conversion of cfs -> coda
+ *
+ * Revision 1.4 1998/09/11 18:50:17 rvb
+ * All the references to cfs, in symbols, structs, and strings
+ * have been changed to coda. (Same for CFS.)
+ *
+ * Revision 1.2 1998/09/02 19:09:53 rvb
+ * Pass2 complete
+ *
+ * Revision 1.1.1.1 1998/08/29 21:14:52 rvb
+ * Very Preliminary Coda
+ *
+ * Revision 1.11 1998/08/28 18:12:18 rvb
+ * Now it also works on FreeBSD -current. This code will be
+ * committed to the FreeBSD -current and NetBSD -current
+ * trees. It will then be tailored to the particular platform
+ * by flushing conditional code.
+ *
+ * Revision 1.10 1998/08/18 17:05:16 rvb
+ * Don't use __RCSID now
+ *
+ * Revision 1.9 1998/08/18 16:31:41 rvb
+ * Sync the code for NetBSD -current; test on 1.3 later
+ *
+ * Revision 1.8 98/01/31 20:53:12 rvb
+ * First version that works on FreeBSD 2.2.5
+ *
+ * Revision 1.7 98/01/23 11:53:42 rvb
+ * Bring RVB_CODA1_1 to HEAD
+ *
+ * Revision 1.6.2.3 98/01/23 11:21:05 rvb
+ * Sync with 2.2.5
+ *
+ * Revision 1.6.2.2 97/12/16 12:40:06 rvb
+ * Sync with 1.3
+ *
+ * Revision 1.6.2.1 97/12/06 17:41:21 rvb
+ * Sync with peters coda.h
+ *
+ * Revision 1.6 97/12/05 10:39:17 rvb
+ * Read CHANGES
+ *
+ * Revision 1.5.4.8 97/11/26 15:28:58 rvb
+ * Cant make downcall pbuf == union cfs_downcalls yet
+ *
+ * Revision 1.5.4.7 97/11/20 11:46:42 rvb
+ * Capture current cfs_venus
+ *
+ * Revision 1.5.4.6 97/11/18 10:27:16 rvb
+ * cfs_nbsd.c is DEAD!!!; integrated into cfs_vf/vnops.c
+ * cfs_nb_foo and cfs_foo are joined
+ *
+ * Revision 1.5.4.5 97/11/13 22:03:00 rvb
+ * pass2 cfs_NetBSD.h mt
+ *
+ * Revision 1.5.4.4 97/11/12 12:09:39 rvb
+ * reorg pass1
+ *
+ * Revision 1.5.4.3 97/11/06 21:02:38 rvb
+ * first pass at ^c ^z
+ *
+ * Revision 1.5.4.2 97/10/29 16:06:27 rvb
+ * Kill DYING
+ *
+ * Revision 1.5.4.1 97/10/28 23:10:16 rvb
+ * >64Meg; venus can be killed!
+ *
+ * Revision 1.5 97/08/05 11:08:17 lily
+ * Removed cfsnc_replace, replaced it with a coda_find, unhash, and
+ * rehash. This fixes a cnode leak and a bug in which the fid is
+ * not actually replaced. (cfs_namecache.c, cfsnc.h, cfs_subr.c)
+ *
+ * Revision 1.4 96/12/12 22:10:59 bnoble
+ * Fixed the "downcall invokes venus operation" deadlock in all known cases.
+ * There may be more
+ *
+ * Revision 1.3 1996/12/05 16:20:15 bnoble
+ * Minor debugging aids
+ *
+ * Revision 1.2 1996/01/02 16:57:01 bnoble
+ * Added support for Coda MiniCache and raw inode calls (final commit)
+ *
+ * Revision 1.1.2.1 1995/12/20 01:57:27 bnoble
+ * Added CODA-specific files
+ *
+ * Revision 3.1.1.1 1995/03/04 19:07:59 bnoble
+ * Branch for NetBSD port revisions
+ *
+ * Revision 3.1 1995/03/04 19:07:58 bnoble
+ * Bump to major revision 3 to prepare for NetBSD port
+ *
+ * Revision 2.8 1995/03/03 17:00:04 dcs
+ * Fixed kernel bug involving sleep and upcalls. Basically if you killed
+ * a job waiting on venus, the venus upcall queues got trashed. Depending
+ * on luck, you could kill the kernel or not.
+ * (mods to cfs_subr.c and cfs_mach.d)
+ *
+ * Revision 2.7 95/03/02 22:45:21 dcs
+ * Sun4 compatibility
+ *
+ * Revision 2.6 95/02/17 16:25:17 dcs
+ * These versions represent several changes:
+ * 1. Allow venus to restart even if outstanding references exist.
+ * 2. Have only one ctlvp per client, as opposed to one per mounted cfs device.d
+ * 3. Allow ody_expand to return many members, not just one.
+ *
+ * Revision 2.5 94/11/09 15:56:26 dcs
+ * Had the thread sleeping on the wrong thing!
+ *
+ * Revision 2.4 94/10/14 09:57:57 dcs
+ * Made changes 'cause sun4s have braindead compilers
+ *
+ * Revision 2.3 94/10/12 16:46:26 dcs
+ * Cleaned kernel/venus interface by removing XDR junk, plus
+ * so cleanup to allow this code to be more easily ported.
+ *
+ * Revision 1.2 92/10/27 17:58:22 lily
+ * merge kernel/latest and alpha/src/cfs
+ *
+ * Revision 2.4 92/09/30 14:16:26 mja
+ * Incorporated Dave Steere's fix for the GNU-Emacs bug.
+ * Also, included his coda_flush routine in place of the former coda_nc_flush.
+ * [91/02/07 jjk]
+ *
+ * Added contributors blurb.
+ * [90/12/13 jjk]
+ *
+ * Hack to allow users to keep coda venus calls uninterruptible. THis
+ * basically prevents the Gnu-emacs bug from appearing, in which a call
+ * was being interrupted, and return EINTR, but gnu didn't check for the
+ * error and figured the file was buggered.
+ * [90/12/09 dcs]
+ *
+ * Revision 2.3 90/08/10 10:23:20 mrt
+ * Removed include of vm/vm_page.h as it no longer exists.
+ * [90/08/10 mrt]
+ *
+ * Revision 2.2 90/07/05 11:26:35 mrt
+ * Initialize name cache on first call to vcopen.
+ * [90/05/23 dcs]
+ *
+ * Created for the Coda File System.
+ * [90/05/23 dcs]
+ *
+ * Revision 1.5 90/05/31 17:01:35 dcs
+ * Prepare for merge with facilities kernel.
+ *
+ * Revision 1.2 90/03/19 15:56:25 dcs
+ * Initialize name cache on first call to vcopen.
+ *
+ * Revision 1.1 90/03/15 10:43:26 jjk
+ * Initial revision
+ *
+ */
+
+/* NOTES: rvb
+ * 1. Added coda_unmounting to mark all cnodes as being UNMOUNTING. This has to
+ * be done before dounmount is called. Because some of the routines that
+ * dounmount calls before coda_unmounted might try to force flushes to venus.
+ * The vnode pager does this.
+ * 2. coda_unmounting marks all cnodes scanning coda_cache.
+ * 3. cfs_checkunmounting (under DEBUG) checks all cnodes by chasing the vnodes
+ * under the /coda mount point.
+ * 4. coda_cacheprint (under DEBUG) prints names with vnode/cnode address
+ */
+
+#include <vcoda.h>
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/select.h>
+#include <sys/mount.h>
+
+#include <coda/coda.h>
+#include <coda/cnode.h>
+#include <coda/coda_subr.h>
+#include <coda/coda_namecache.h>
+
+int coda_active = 0;
+int coda_reuse = 0;
+int coda_new = 0;
+
+struct cnode *coda_freelist = NULL;
+struct cnode *coda_cache[CODA_CACHESIZE];
+
+#define coda_hash(fid) (((fid)->Volume + (fid)->Vnode) & (CODA_CACHESIZE-1))
+#define CNODE_NEXT(cp) ((cp)->c_next)
+#define ODD(vnode) ((vnode) & 0x1)
+
+/*
+ * Allocate a cnode.
+ */
+struct cnode *
+coda_alloc(void)
+{
+ struct cnode *cp;
+
+ if (coda_freelist) {
+ cp = coda_freelist;
+ coda_freelist = CNODE_NEXT(cp);
+ coda_reuse++;
+ }
+ else {
+ CODA_ALLOC(cp, struct cnode *, sizeof(struct cnode));
+ /* NetBSD vnodes don't have any Pager info in them ('cause there are
+ no external pagers, duh!) */
+#define VNODE_VM_INFO_INIT(vp) /* MT */
+ VNODE_VM_INFO_INIT(CTOV(cp));
+ coda_new++;
+ }
+ bzero(cp, sizeof (struct cnode));
+
+ return(cp);
+}
+
+/*
+ * Deallocate a cnode.
+ */
+void
+coda_free(cp)
+ register struct cnode *cp;
+{
+
+ CNODE_NEXT(cp) = coda_freelist;
+ coda_freelist = cp;
+}
+
+/*
+ * Put a cnode in the hash table
+ */
+void
+coda_save(cp)
+ struct cnode *cp;
+{
+ CNODE_NEXT(cp) = coda_cache[coda_hash(&cp->c_fid)];
+ coda_cache[coda_hash(&cp->c_fid)] = cp;
+}
+
+/*
+ * Remove a cnode from the hash table
+ */
+void
+coda_unsave(cp)
+ struct cnode *cp;
+{
+ struct cnode *ptr;
+ struct cnode *ptrprev = NULL;
+
+ ptr = coda_cache[coda_hash(&cp->c_fid)];
+ while (ptr != NULL) {
+ if (ptr == cp) {
+ if (ptrprev == NULL) {
+ coda_cache[coda_hash(&cp->c_fid)]
+ = CNODE_NEXT(ptr);
+ } else {
+ CNODE_NEXT(ptrprev) = CNODE_NEXT(ptr);
+ }
+ CNODE_NEXT(cp) = (struct cnode *)NULL;
+
+ return;
+ }
+ ptrprev = ptr;
+ ptr = CNODE_NEXT(ptr);
+ }
+}
+
+/*
+ * Lookup a cnode by fid. If the cnode is dying, it is bogus so skip it.
+ * NOTE: this allows multiple cnodes with same fid -- dcs 1/25/95
+ */
+struct cnode *
+coda_find(fid)
+ ViceFid *fid;
+{
+ struct cnode *cp;
+
+ cp = coda_cache[coda_hash(fid)];
+ while (cp) {
+ if ((cp->c_fid.Vnode == fid->Vnode) &&
+ (cp->c_fid.Volume == fid->Volume) &&
+ (cp->c_fid.Unique == fid->Unique) &&
+ (!IS_UNMOUNTING(cp)))
+ {
+ coda_active++;
+ return(cp);
+ }
+ cp = CNODE_NEXT(cp);
+ }
+ return(NULL);
+}
+
+/*
+ * coda_kill is called as a side effect to vcopen. To prevent any
+ * cnodes left around from an earlier run of a venus or warden from
+ * causing problems with the new instance, mark any outstanding cnodes
+ * as dying. Future operations on these cnodes should fail (excepting
+ * coda_inactive of course!). Since multiple venii/wardens can be
+ * running, only kill the cnodes for a particular entry in the
+ * coda_mnttbl. -- DCS 12/1/94 */
+
+int
+coda_kill(whoIam, dcstat)
+ struct mount *whoIam;
+ enum dc_status dcstat;
+{
+ int hash, count = 0;
+ struct cnode *cp;
+
+ /*
+ * Algorithm is as follows:
+ * Second, flush whatever vnodes we can from the name cache.
+ *
+ * Finally, step through whatever is left and mark them dying.
+ * This prevents any operation at all.
+ */
+
+ /* This is slightly overkill, but should work. Eventually it'd be
+ * nice to only flush those entries from the namecache that
+ * reference a vnode in this vfs. */
+ coda_nc_flush(dcstat);
+
+ for (hash = 0; hash < CODA_CACHESIZE; hash++) {
+ for (cp = coda_cache[hash]; cp != NULL; cp = CNODE_NEXT(cp)) {
+ if (CTOV(cp)->v_mount == whoIam) {
+#ifdef DEBUG
+ printf("coda_kill: vp %p, cp %p\n", CTOV(cp), cp);
+#endif
+ count++;
+ CODADEBUG(CODA_FLUSH,
+ myprintf(("Live cnode fid %lx.%lx.%lx flags %d count %d\n",
+ (cp->c_fid).Volume,
+ (cp->c_fid).Vnode,
+ (cp->c_fid).Unique,
+ cp->c_flags,
+ CTOV(cp)->v_usecount)); );
+ }
+ }
+ }
+ return count;
+}
+
+/*
+ * There are two reasons why a cnode may be in use, it may be in the
+ * name cache or it may be executing.
+ */
+void
+coda_flush(dcstat)
+ enum dc_status dcstat;
+{
+ int hash;
+ struct cnode *cp;
+
+ coda_clstat.ncalls++;
+ coda_clstat.reqs[CODA_FLUSH]++;
+
+ coda_nc_flush(dcstat); /* flush files from the name cache */
+
+ for (hash = 0; hash < CODA_CACHESIZE; hash++) {
+ for (cp = coda_cache[hash]; cp != NULL; cp = CNODE_NEXT(cp)) {
+ if (!ODD(cp->c_fid.Vnode)) /* only files can be executed */
+ coda_vmflush(cp);
+ }
+ }
+}
+
+/*
+ * As a debugging measure, print out any cnodes that lived through a
+ * name cache flush.
+ */
+void
+coda_testflush(void)
+{
+ int hash;
+ struct cnode *cp;
+
+ for (hash = 0; hash < CODA_CACHESIZE; hash++) {
+ for (cp = coda_cache[hash];
+ cp != NULL;
+ cp = CNODE_NEXT(cp)) {
+ myprintf(("Live cnode fid %lx.%lx.%lx count %d\n",
+ (cp->c_fid).Volume,(cp->c_fid).Vnode,
+ (cp->c_fid).Unique, CTOV(cp)->v_usecount));
+ }
+ }
+}
+
+/*
+ * First, step through all cnodes and mark them unmounting.
+ * NetBSD kernels may try to fsync them now that venus
+ * is dead, which would be a bad thing.
+ *
+ */
+void
+coda_unmounting(whoIam)
+ struct mount *whoIam;
+{
+ int hash;
+ struct cnode *cp;
+
+ for (hash = 0; hash < CODA_CACHESIZE; hash++) {
+ for (cp = coda_cache[hash]; cp != NULL; cp = CNODE_NEXT(cp)) {
+ if (CTOV(cp)->v_mount == whoIam) {
+ if (cp->c_flags & (C_LOCKED|C_WANTED)) {
+ printf("coda_unmounting: Unlocking %p\n", cp);
+ cp->c_flags &= ~(C_LOCKED|C_WANTED);
+ wakeup((caddr_t) cp);
+ }
+ cp->c_flags |= C_UNMOUNTING;
+ }
+ }
+ }
+}
+
+#ifdef DEBUG
+void
+coda_checkunmounting(mp)
+ struct mount *mp;
+{
+ register struct vnode *vp, *nvp;
+ struct cnode *cp;
+ int count = 0, bad = 0;
+loop:
+ for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
+ if (vp->v_mount != mp)
+ goto loop;
+ nvp = vp->v_mntvnodes.le_next;
+ cp = VTOC(vp);
+ count++;
+ if (!(cp->c_flags & C_UNMOUNTING)) {
+ bad++;
+ printf("vp %p, cp %p missed\n", vp, cp);
+ cp->c_flags |= C_UNMOUNTING;
+ }
+ }
+}
+
+void
+coda_cacheprint(whoIam)
+ struct mount *whoIam;
+{
+ int hash;
+ struct cnode *cp;
+ int count = 0;
+
+ printf("coda_cacheprint: coda_ctlvp %p, cp %p", coda_ctlvp, VTOC(coda_ctlvp));
+ coda_nc_name(VTOC(coda_ctlvp));
+ printf("\n");
+
+ for (hash = 0; hash < CODA_CACHESIZE; hash++) {
+ for (cp = coda_cache[hash]; cp != NULL; cp = CNODE_NEXT(cp)) {
+ if (CTOV(cp)->v_mount == whoIam) {
+ printf("coda_cacheprint: vp %p, cp %p", CTOV(cp), cp);
+ coda_nc_name(cp);
+ printf("\n");
+ count++;
+ }
+ }
+ }
+ printf("coda_cacheprint: count %d\n", count);
+}
+#endif
+
+/*
+ * There are 6 cases where invalidations occur. The semantics of each
+ * is listed here.
+ *
+ * CODA_FLUSH -- flush all entries from the name cache and the cnode cache.
+ * CODA_PURGEUSER -- flush all entries from the name cache for a specific user
+ * This call is a result of token expiration.
+ *
+ * The next two are the result of callbacks on a file or directory.
+ * CODA_ZAPDIR -- flush the attributes for the dir from its cnode.
+ * Zap all children of this directory from the namecache.
+ * CODA_ZAPFILE -- flush the attributes for a file.
+ *
+ * The fifth is a result of Venus detecting an inconsistent file.
+ * CODA_PURGEFID -- flush the attribute for the file
+ * If it is a dir (odd vnode), purge its
+ * children from the namecache
+ * remove the file from the namecache.
+ *
+ * The sixth allows Venus to replace local fids with global ones
+ * during reintegration.
+ *
+ * CODA_REPLACE -- replace one ViceFid with another throughout the name cache
+ */
+
+int handleDownCall(opcode, out)
+ int opcode; union outputArgs *out;
+{
+ int error;
+
+ /* Handle invalidate requests. */
+ switch (opcode) {
+ case CODA_FLUSH : {
+
+ coda_flush(IS_DOWNCALL);
+
+ CODADEBUG(CODA_FLUSH,coda_testflush();) /* print remaining cnodes */
+ return(0);
+ }
+
+ case CODA_PURGEUSER : {
+ coda_clstat.ncalls++;
+ coda_clstat.reqs[CODA_PURGEUSER]++;
+
+ /* XXX - need to prevent fsync's */
+ coda_nc_purge_user(out->coda_purgeuser.cred.cr_uid, IS_DOWNCALL);
+ return(0);
+ }
+
+ case CODA_ZAPFILE : {
+ struct cnode *cp;
+
+ error = 0;
+ coda_clstat.ncalls++;
+ coda_clstat.reqs[CODA_ZAPFILE]++;
+
+ cp = coda_find(&out->coda_zapfile.CodaFid);
+ if (cp != NULL) {
+ vref(CTOV(cp));
+
+ cp->c_flags &= ~C_VATTR;
+ if (CTOV(cp)->v_flag & VTEXT)
+ error = coda_vmflush(cp);
+ CODADEBUG(CODA_ZAPFILE, myprintf(("zapfile: fid = (%lx.%lx.%lx),
+ refcnt = %d, error = %d\n",
+ cp->c_fid.Volume,
+ cp->c_fid.Vnode,
+ cp->c_fid.Unique,
+ CTOV(cp)->v_usecount - 1, error)););
+ if (CTOV(cp)->v_usecount == 1) {
+ cp->c_flags |= C_PURGING;
+ }
+ vrele(CTOV(cp));
+ }
+
+ return(error);
+ }
+
+ case CODA_ZAPDIR : {
+ struct cnode *cp;
+
+ coda_clstat.ncalls++;
+ coda_clstat.reqs[CODA_ZAPDIR]++;
+
+ cp = coda_find(&out->coda_zapdir.CodaFid);
+ if (cp != NULL) {
+ vref(CTOV(cp));
+
+ cp->c_flags &= ~C_VATTR;
+ coda_nc_zapParentfid(&out->coda_zapdir.CodaFid, IS_DOWNCALL);
+
+ CODADEBUG(CODA_ZAPDIR, myprintf(("zapdir: fid = (%lx.%lx.%lx),
+ refcnt = %d\n",cp->c_fid.Volume,
+ cp->c_fid.Vnode,
+ cp->c_fid.Unique,
+ CTOV(cp)->v_usecount - 1)););
+ if (CTOV(cp)->v_usecount == 1) {
+ cp->c_flags |= C_PURGING;
+ }
+ vrele(CTOV(cp));
+ }
+
+ return(0);
+ }
+
+ case CODA_PURGEFID : {
+ struct cnode *cp;
+
+ error = 0;
+ coda_clstat.ncalls++;
+ coda_clstat.reqs[CODA_PURGEFID]++;
+
+ cp = coda_find(&out->coda_purgefid.CodaFid);
+ if (cp != NULL) {
+ vref(CTOV(cp));
+ if (ODD(out->coda_purgefid.CodaFid.Vnode)) { /* Vnode is a directory */
+ coda_nc_zapParentfid(&out->coda_purgefid.CodaFid,
+ IS_DOWNCALL);
+ }
+ cp->c_flags &= ~C_VATTR;
+ coda_nc_zapfid(&out->coda_purgefid.CodaFid, IS_DOWNCALL);
+ if (!(ODD(out->coda_purgefid.CodaFid.Vnode))
+ && (CTOV(cp)->v_flag & VTEXT)) {
+
+ error = coda_vmflush(cp);
+ }
+ CODADEBUG(CODA_PURGEFID, myprintf(("purgefid: fid = (%lx.%lx.%lx), refcnt = %d, error = %d\n",
+ cp->c_fid.Volume, cp->c_fid.Vnode,
+ cp->c_fid.Unique,
+ CTOV(cp)->v_usecount - 1, error)););
+ if (CTOV(cp)->v_usecount == 1) {
+ cp->c_flags |= C_PURGING;
+ }
+ vrele(CTOV(cp));
+ }
+ return(error);
+ }
+
+ case CODA_REPLACE : {
+ struct cnode *cp = NULL;
+
+ coda_clstat.ncalls++;
+ coda_clstat.reqs[CODA_REPLACE]++;
+
+ cp = coda_find(&out->coda_replace.OldFid);
+ if (cp != NULL) {
+ /* remove the cnode from the hash table, replace the fid, and reinsert */
+ vref(CTOV(cp));
+ coda_unsave(cp);
+ cp->c_fid = out->coda_replace.NewFid;
+ coda_save(cp);
+
+ CODADEBUG(CODA_REPLACE, myprintf(("replace: oldfid = (%lx.%lx.%lx), newfid = (%lx.%lx.%lx), cp = %p\n",
+ out->coda_replace.OldFid.Volume,
+ out->coda_replace.OldFid.Vnode,
+ out->coda_replace.OldFid.Unique,
+ cp->c_fid.Volume, cp->c_fid.Vnode,
+ cp->c_fid.Unique, cp));)
+ vrele(CTOV(cp));
+ }
+ return (0);
+ }
+ default:
+ myprintf(("handleDownCall: unknown opcode %d\n", opcode));
+ return (EINVAL);
+ }
+}
+
+/* coda_grab_vnode: lives in either cfs_mach.c or cfs_nbsd.c */
+
+int
+coda_vmflush(cp)
+ struct cnode *cp;
+{
+ return 0;
+}
+
+
+/*
+ * kernel-internal debugging switches
+ */
+void coda_debugon(void)
+{
+ codadebug = -1;
+ coda_nc_debug = -1;
+ coda_vnop_print_entry = 1;
+ coda_psdev_print_entry = 1;
+ coda_vfsop_print_entry = 1;
+}
+
+void coda_debugoff(void)
+{
+ codadebug = 0;
+ coda_nc_debug = 0;
+ coda_vnop_print_entry = 0;
+ coda_psdev_print_entry = 0;
+ coda_vfsop_print_entry = 0;
+}
+
+/*
+ * Utilities used by both client and server
+ * Standard levels:
+ * 0) no debugging
+ * 1) hard failures
+ * 2) soft failures
+ * 3) current test software
+ * 4) main procedure entry points
+ * 5) main procedure exit points
+ * 6) utility procedure entry points
+ * 7) utility procedure exit points
+ * 8) obscure procedure entry points
+ * 9) obscure procedure exit points
+ * 10) random stuff
+ * 11) all <= 1
+ * 12) all <= 2
+ * 13) all <= 3
+ * ...
+ */
diff --git a/sys/fs/coda/coda_subr.h b/sys/fs/coda/coda_subr.h
new file mode 100644
index 0000000..fe27bfd
--- /dev/null
+++ b/sys/fs/coda/coda_subr.h
@@ -0,0 +1,45 @@
+/*
+ *
+ * Coda: an Experimental Distributed File System
+ * Release 3.1
+ *
+ * Copyright (c) 1987-1998 Carnegie Mellon University
+ * All Rights Reserved
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University in all documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ *
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS,
+ * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON
+ * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF
+ * ANY DERIVATIVE WORK.
+ *
+ * Carnegie Mellon encourages users of this software to return any
+ * improvements or extensions that they make, and to grant Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ *
+ * @(#) src/sys/coda/coda_subr.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
+ * $Id: coda_subr.h,v 1.5 1998/09/13 13:57:59 rvb Exp $
+ *
+ */
+
+struct cnode *coda_alloc(void);
+void coda_free(struct cnode *cp);
+struct cnode *coda_find(ViceFid *fid);
+void coda_flush(enum dc_status dcstat);
+void coda_testflush(void);
+void coda_checkunmounting(struct mount *mp);
+void coda_cacheprint(struct mount *whoIam);
+void coda_debugon(void);
+void coda_debugoff(void);
+int coda_kill(struct mount *whoIam, enum dc_status dcstat);
+void coda_save(struct cnode *cp);
+void coda_unsave(struct cnode *cp);
diff --git a/sys/fs/coda/coda_venus.c b/sys/fs/coda/coda_venus.c
new file mode 100644
index 0000000..96228f1
--- /dev/null
+++ b/sys/fs/coda/coda_venus.c
@@ -0,0 +1,660 @@
+/*
+ *
+ * Coda: an Experimental Distributed File System
+ * Release 3.1
+ *
+ * Copyright (c) 1987-1998 Carnegie Mellon University
+ * All Rights Reserved
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University in all documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ *
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS,
+ * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON
+ * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF
+ * ANY DERIVATIVE WORK.
+ *
+ * Carnegie Mellon encourages users of this software to return any
+ * improvements or extensions that they make, and to grant Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ *
+ * @(#) src/sys/cfs/coda_venus.c,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
+ * $Id: coda_venus.c,v 1.5 1998/10/28 19:33:50 rvb Exp $
+ *
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+#include <sys/select.h>
+#include <sys/ioccom.h>
+#include <sys/fcntl.h>
+
+#include <coda/coda.h>
+#include <coda/cnode.h>
+#include <coda/coda_venus.h>
+#include <coda/coda_pioctl.h>
+
+#define DECL_NO_IN(name) \
+ struct coda_in_hdr *inp; \
+ struct name ## _out *outp; \
+ int name ## _size = sizeof (struct coda_in_hdr); \
+ int Isize = sizeof (struct coda_in_hdr); \
+ int Osize = sizeof (struct name ## _out); \
+ int error
+
+#define DECL(name) \
+ struct name ## _in *inp; \
+ struct name ## _out *outp; \
+ int name ## _size = sizeof (struct name ## _in); \
+ int Isize = sizeof (struct name ## _in); \
+ int Osize = sizeof (struct name ## _out); \
+ int error
+
+#define DECL_NO_OUT(name) \
+ struct name ## _in *inp; \
+ struct coda_out_hdr *outp; \
+ int name ## _size = sizeof (struct name ## _in); \
+ int Isize = sizeof (struct name ## _in); \
+ int Osize = sizeof (struct coda_out_hdr); \
+ int error
+
+#define ALLOC_NO_IN(name) \
+ if (Osize > name ## _size) \
+ name ## _size = Osize; \
+ CODA_ALLOC(inp, struct coda_in_hdr *, name ## _size);\
+ outp = (struct name ## _out *) inp
+
+#define ALLOC(name) \
+ if (Osize > name ## _size) \
+ name ## _size = Osize; \
+ CODA_ALLOC(inp, struct name ## _in *, name ## _size);\
+ outp = (struct name ## _out *) inp
+
+#define ALLOC_NO_OUT(name) \
+ if (Osize > name ## _size) \
+ name ## _size = Osize; \
+ CODA_ALLOC(inp, struct name ## _in *, name ## _size);\
+ outp = (struct coda_out_hdr *) inp
+
+#define STRCPY(struc, name, len) \
+ bcopy(name, (char *)inp + (int)inp->struc, len); \
+ ((char*)inp + (int)inp->struc)[len++] = 0; \
+ Isize += len
+
+#define INIT_IN(in, op, ident, p) \
+ (in)->opcode = (op); \
+ (in)->pid = p ? p->p_pid : -1; \
+ (in)->pgid = p ? p->p_pgid : -1; \
+ (in)->sid = (p && p->p_session && p->p_session->s_leader) ? (p->p_session->s_leader->p_pid) : -1; \
+ if (ident != NOCRED) { \
+ (in)->cred.cr_uid = ident->cr_uid; \
+ (in)->cred.cr_groupid = ident->cr_gid; \
+ } else { \
+ bzero(&((in)->cred),sizeof(struct coda_cred)); \
+ (in)->cred.cr_uid = -1; \
+ (in)->cred.cr_groupid = -1; \
+ } \
+
+#define CNV_OFLAG(to, from) \
+ do { \
+ to = 0; \
+ if (from & FREAD) to |= C_O_READ; \
+ if (from & FWRITE) to |= C_O_WRITE; \
+ if (from & O_TRUNC) to |= C_O_TRUNC; \
+ if (from & O_EXCL) to |= C_O_EXCL; \
+ if (from & O_CREAT) to |= C_O_CREAT; \
+ } while (0)
+
+#define CNV_VV2V_ATTR(top, fromp) \
+ do { \
+ (top)->va_type = (fromp)->va_type; \
+ (top)->va_mode = (fromp)->va_mode; \
+ (top)->va_nlink = (fromp)->va_nlink; \
+ (top)->va_uid = (fromp)->va_uid; \
+ (top)->va_gid = (fromp)->va_gid; \
+ (top)->va_fsid = VNOVAL; \
+ (top)->va_fileid = (fromp)->va_fileid; \
+ (top)->va_size = (fromp)->va_size; \
+ (top)->va_blocksize = (fromp)->va_blocksize; \
+ (top)->va_atime = (fromp)->va_atime; \
+ (top)->va_mtime = (fromp)->va_mtime; \
+ (top)->va_ctime = (fromp)->va_ctime; \
+ (top)->va_gen = (fromp)->va_gen; \
+ (top)->va_flags = (fromp)->va_flags; \
+ (top)->va_rdev = (fromp)->va_rdev; \
+ (top)->va_bytes = (fromp)->va_bytes; \
+ (top)->va_filerev = (fromp)->va_filerev; \
+ (top)->va_vaflags = VNOVAL; \
+ (top)->va_spare = VNOVAL; \
+ } while (0)
+
+#define CNV_V2VV_ATTR(top, fromp) \
+ do { \
+ (top)->va_type = (fromp)->va_type; \
+ (top)->va_mode = (fromp)->va_mode; \
+ (top)->va_nlink = (fromp)->va_nlink; \
+ (top)->va_uid = (fromp)->va_uid; \
+ (top)->va_gid = (fromp)->va_gid; \
+ (top)->va_fileid = (fromp)->va_fileid; \
+ (top)->va_size = (fromp)->va_size; \
+ (top)->va_blocksize = (fromp)->va_blocksize; \
+ (top)->va_atime = (fromp)->va_atime; \
+ (top)->va_mtime = (fromp)->va_mtime; \
+ (top)->va_ctime = (fromp)->va_ctime; \
+ (top)->va_gen = (fromp)->va_gen; \
+ (top)->va_flags = (fromp)->va_flags; \
+ (top)->va_rdev = (fromp)->va_rdev; \
+ (top)->va_bytes = (fromp)->va_bytes; \
+ (top)->va_filerev = (fromp)->va_filerev; \
+ } while (0)
+
+
+int coda_kernel_version = CODA_KERNEL_VERSION;
+
+int
+venus_root(void *mdp,
+ struct ucred *cred, struct proc *p,
+/*out*/ ViceFid *VFid)
+{
+ DECL_NO_IN(coda_root); /* sets Isize & Osize */
+ ALLOC_NO_IN(coda_root); /* sets inp & outp */
+
+ /* send the open to venus. */
+ INIT_IN(inp, CODA_ROOT, cred, p);
+
+ error = coda_call(mdp, Isize, &Osize, (char *)inp);
+ if (!error)
+ *VFid = outp->VFid;
+
+ CODA_FREE(inp, coda_root_size);
+ return error;
+}
+
+int
+venus_open(void *mdp, ViceFid *fid, int flag,
+ struct ucred *cred, struct proc *p,
+/*out*/ dev_t *dev, ino_t *inode)
+{
+ int cflag;
+ DECL(coda_open); /* sets Isize & Osize */
+ ALLOC(coda_open); /* sets inp & outp */
+
+ /* send the open to venus. */
+ INIT_IN(&inp->ih, CODA_OPEN, cred, p);
+ inp->VFid = *fid;
+ CNV_OFLAG(cflag, flag);
+ inp->flags = cflag;
+
+ error = coda_call(mdp, Isize, &Osize, (char *)inp);
+ if (!error) {
+ *dev = outp->dev;
+ *inode = outp->inode;
+ }
+
+ CODA_FREE(inp, coda_open_size);
+ return error;
+}
+
+int
+venus_close(void *mdp, ViceFid *fid, int flag,
+ struct ucred *cred, struct proc *p)
+{
+ int cflag;
+ DECL_NO_OUT(coda_close); /* sets Isize & Osize */
+ ALLOC_NO_OUT(coda_close); /* sets inp & outp */
+
+ INIT_IN(&inp->ih, CODA_CLOSE, cred, p);
+ inp->VFid = *fid;
+ CNV_OFLAG(cflag, flag);
+ inp->flags = cflag;
+
+ error = coda_call(mdp, Isize, &Osize, (char *)inp);
+
+ CODA_FREE(inp, coda_close_size);
+ return error;
+}
+
+/*
+ * these two calls will not exist!!! the container file is read/written
+ * directly.
+ */
+void
+venus_read(void)
+{
+}
+
+void
+venus_write(void)
+{
+}
+
+/*
+ * this is a bit sad too. the ioctl's are for the control file, not for
+ * normal files.
+ */
+int
+venus_ioctl(void *mdp, ViceFid *fid,
+ int com, int flag, caddr_t data,
+ struct ucred *cred, struct proc *p)
+{
+ DECL(coda_ioctl); /* sets Isize & Osize */
+ struct PioctlData *iap = (struct PioctlData *)data;
+ int tmp;
+
+ coda_ioctl_size = VC_MAXMSGSIZE;
+ ALLOC(coda_ioctl); /* sets inp & outp */
+
+ INIT_IN(&inp->ih, CODA_IOCTL, cred, p);
+ inp->VFid = *fid;
+
+ /* command was mutated by increasing its size field to reflect the
+ * path and follow args. we need to subtract that out before sending
+ * the command to venus.
+ */
+ inp->cmd = (com & ~(IOCPARM_MASK << 16));
+ tmp = ((com >> 16) & IOCPARM_MASK) - sizeof (char *) - sizeof (int);
+ inp->cmd |= (tmp & IOCPARM_MASK) << 16;
+
+ inp->rwflag = flag;
+ inp->len = iap->vi.in_size;
+ inp->data = (char *)(sizeof (struct coda_ioctl_in));
+
+ error = copyin(iap->vi.in, (char*)inp + (int)inp->data,
+ iap->vi.in_size);
+ if (error) {
+ CODA_FREE(inp, coda_ioctl_size);
+ return(error);
+ }
+
+ Osize = VC_MAXMSGSIZE;
+ error = coda_call(mdp, Isize + iap->vi.in_size, &Osize, (char *)inp);
+
+ /* copy out the out buffer. */
+ if (!error) {
+ if (outp->len > iap->vi.out_size) {
+ error = EINVAL;
+ } else {
+ error = copyout((char *)outp + (int)outp->data,
+ iap->vi.out, iap->vi.out_size);
+ }
+ }
+
+ CODA_FREE(inp, coda_ioctl_size);
+ return error;
+}
+
+int
+venus_getattr(void *mdp, ViceFid *fid,
+ struct ucred *cred, struct proc *p,
+/*out*/ struct vattr *vap)
+{
+ DECL(coda_getattr); /* sets Isize & Osize */
+ ALLOC(coda_getattr); /* sets inp & outp */
+
+ /* send the open to venus. */
+ INIT_IN(&inp->ih, CODA_GETATTR, cred, p);
+ inp->VFid = *fid;
+
+ error = coda_call(mdp, Isize, &Osize, (char *)inp);
+ if (!error) {
+ CNV_VV2V_ATTR(vap, &outp->attr);
+ }
+
+ CODA_FREE(inp, coda_getattr_size);
+ return error;
+}
+
+int
+venus_setattr(void *mdp, ViceFid *fid, struct vattr *vap,
+ struct ucred *cred, struct proc *p)
+{
+ DECL_NO_OUT(coda_setattr); /* sets Isize & Osize */
+ ALLOC_NO_OUT(coda_setattr); /* sets inp & outp */
+
+ /* send the open to venus. */
+ INIT_IN(&inp->ih, CODA_SETATTR, cred, p);
+ inp->VFid = *fid;
+ CNV_V2VV_ATTR(&inp->attr, vap);
+
+ error = coda_call(mdp, Isize, &Osize, (char *)inp);
+
+ CODA_FREE(inp, coda_setattr_size);
+ return error;
+}
+
+int
+venus_access(void *mdp, ViceFid *fid, int mode,
+ struct ucred *cred, struct proc *p)
+{
+ DECL_NO_OUT(coda_access); /* sets Isize & Osize */
+ ALLOC_NO_OUT(coda_access); /* sets inp & outp */
+
+ /* send the open to venus. */
+ INIT_IN(&inp->ih, CODA_ACCESS, cred, p);
+ inp->VFid = *fid;
+ /* NOTE:
+ * NetBSD and Venus internals use the "data" in the low 3 bits.
+ * Hence, the conversion.
+ */
+ inp->flags = mode>>6;
+
+ error = coda_call(mdp, Isize, &Osize, (char *)inp);
+
+ CODA_FREE(inp, coda_access_size);
+ return error;
+}
+
+int
+venus_readlink(void *mdp, ViceFid *fid,
+ struct ucred *cred, struct proc *p,
+/*out*/ char **str, int *len)
+{
+ DECL(coda_readlink); /* sets Isize & Osize */
+ coda_readlink_size += CODA_MAXPATHLEN;
+ ALLOC(coda_readlink); /* sets inp & outp */
+
+ /* send the open to venus. */
+ INIT_IN(&inp->ih, CODA_READLINK, cred, p);
+ inp->VFid = *fid;
+
+ Osize += CODA_MAXPATHLEN;
+ error = coda_call(mdp, Isize, &Osize, (char *)inp);
+ if (!error) {
+ CODA_ALLOC(*str, char *, outp->count);
+ *len = outp->count;
+ bcopy((char *)outp + (int)outp->data, *str, *len);
+ }
+
+ CODA_FREE(inp, coda_readlink_size);
+ return error;
+}
+
+int
+venus_fsync(void *mdp, ViceFid *fid,
+ struct ucred *cred, struct proc *p)
+{
+ DECL_NO_OUT(coda_fsync); /* sets Isize & Osize */
+ ALLOC_NO_OUT(coda_fsync); /* sets inp & outp */
+
+ /* send the open to venus. */
+ INIT_IN(&inp->ih, CODA_FSYNC, cred, p);
+ inp->VFid = *fid;
+
+ error = coda_call(mdp, Isize, &Osize, (char *)inp);
+
+ CODA_FREE(inp, coda_fsync_size);
+ return error;
+}
+
+int
+venus_lookup(void *mdp, ViceFid *fid,
+ const char *nm, int len,
+ struct ucred *cred, struct proc *p,
+/*out*/ ViceFid *VFid, int *vtype)
+{
+ DECL(coda_lookup); /* sets Isize & Osize */
+ coda_lookup_size += len + 1;
+ ALLOC(coda_lookup); /* sets inp & outp */
+
+ /* send the open to venus. */
+ INIT_IN(&inp->ih, CODA_LOOKUP, cred, p);
+ inp->VFid = *fid;
+
+ /* NOTE:
+ * Between version 1 and version 2 we have added an extra flag field
+ * to this structure. But because the string was at the end and because
+ * of the wierd way we represent strings by having the slot point to
+ * where the string characters are in the "heap", we can just slip the
+ * flag parameter in after the string slot pointer and veni that don't
+ * know better won't see this new flag field ...
+ * Otherwise we'd need two different venus_lookup functions.
+ */
+ inp->name = Isize;
+ inp->flags = CLU_CASE_SENSITIVE; /* doesn't really matter for BSD */
+ STRCPY(name, nm, len); /* increments Isize */
+
+ error = coda_call(mdp, Isize, &Osize, (char *)inp);
+ if (!error) {
+ *VFid = outp->VFid;
+ *vtype = outp->vtype;
+ }
+
+ CODA_FREE(inp, coda_lookup_size);
+ return error;
+}
+
+int
+venus_create(void *mdp, ViceFid *fid,
+ const char *nm, int len, int exclusive, int mode, struct vattr *va,
+ struct ucred *cred, struct proc *p,
+/*out*/ ViceFid *VFid, struct vattr *attr)
+{
+ DECL(coda_create); /* sets Isize & Osize */
+ coda_create_size += len + 1;
+ ALLOC(coda_create); /* sets inp & outp */
+
+ /* send the open to venus. */
+ INIT_IN(&inp->ih, CODA_CREATE, cred, p);
+ inp->VFid = *fid;
+ inp->excl = exclusive ? C_O_EXCL : 0;
+ inp->mode = mode;
+ CNV_V2VV_ATTR(&inp->attr, va);
+
+ inp->name = Isize;
+ STRCPY(name, nm, len); /* increments Isize */
+
+ error = coda_call(mdp, Isize, &Osize, (char *)inp);
+ if (!error) {
+ *VFid = outp->VFid;
+ CNV_VV2V_ATTR(attr, &outp->attr);
+ }
+
+ CODA_FREE(inp, coda_create_size);
+ return error;
+}
+
+int
+venus_remove(void *mdp, ViceFid *fid,
+ const char *nm, int len,
+ struct ucred *cred, struct proc *p)
+{
+ DECL_NO_OUT(coda_remove); /* sets Isize & Osize */
+ coda_remove_size += len + 1;
+ ALLOC_NO_OUT(coda_remove); /* sets inp & outp */
+
+ /* send the open to venus. */
+ INIT_IN(&inp->ih, CODA_REMOVE, cred, p);
+ inp->VFid = *fid;
+
+ inp->name = Isize;
+ STRCPY(name, nm, len); /* increments Isize */
+
+ error = coda_call(mdp, Isize, &Osize, (char *)inp);
+
+ CODA_FREE(inp, coda_remove_size);
+ return error;
+}
+
+int
+venus_link(void *mdp, ViceFid *fid, ViceFid *tfid,
+ const char *nm, int len,
+ struct ucred *cred, struct proc *p)
+{
+ DECL_NO_OUT(coda_link); /* sets Isize & Osize */
+ coda_link_size += len + 1;
+ ALLOC_NO_OUT(coda_link); /* sets inp & outp */
+
+ /* send the open to venus. */
+ INIT_IN(&inp->ih, CODA_LINK, cred, p);
+ inp->sourceFid = *fid;
+ inp->destFid = *tfid;
+
+ inp->tname = Isize;
+ STRCPY(tname, nm, len); /* increments Isize */
+
+ error = coda_call(mdp, Isize, &Osize, (char *)inp);
+
+ CODA_FREE(inp, coda_link_size);
+ return error;
+}
+
+int
+venus_rename(void *mdp, ViceFid *fid, ViceFid *tfid,
+ const char *nm, int len, const char *tnm, int tlen,
+ struct ucred *cred, struct proc *p)
+{
+ DECL_NO_OUT(coda_rename); /* sets Isize & Osize */
+ coda_rename_size += len + 1 + tlen + 1;
+ ALLOC_NO_OUT(coda_rename); /* sets inp & outp */
+
+ /* send the open to venus. */
+ INIT_IN(&inp->ih, CODA_RENAME, cred, p);
+ inp->sourceFid = *fid;
+ inp->destFid = *tfid;
+
+ inp->srcname = Isize;
+ STRCPY(srcname, nm, len); /* increments Isize */
+
+ inp->destname = Isize;
+ STRCPY(destname, tnm, tlen); /* increments Isize */
+
+ error = coda_call(mdp, Isize, &Osize, (char *)inp);
+
+ CODA_FREE(inp, coda_rename_size);
+ return error;
+}
+
+int
+venus_mkdir(void *mdp, ViceFid *fid,
+ const char *nm, int len, struct vattr *va,
+ struct ucred *cred, struct proc *p,
+/*out*/ ViceFid *VFid, struct vattr *ova)
+{
+ DECL(coda_mkdir); /* sets Isize & Osize */
+ coda_mkdir_size += len + 1;
+ ALLOC(coda_mkdir); /* sets inp & outp */
+
+ /* send the open to venus. */
+ INIT_IN(&inp->ih, CODA_MKDIR, cred, p);
+ inp->VFid = *fid;
+ CNV_V2VV_ATTR(&inp->attr, va);
+
+ inp->name = Isize;
+ STRCPY(name, nm, len); /* increments Isize */
+
+ error = coda_call(mdp, Isize, &Osize, (char *)inp);
+ if (!error) {
+ *VFid = outp->VFid;
+ CNV_VV2V_ATTR(ova, &outp->attr);
+ }
+
+ CODA_FREE(inp, coda_mkdir_size);
+ return error;
+}
+
+int
+venus_rmdir(void *mdp, ViceFid *fid,
+ const char *nm, int len,
+ struct ucred *cred, struct proc *p)
+{
+ DECL_NO_OUT(coda_rmdir); /* sets Isize & Osize */
+ coda_rmdir_size += len + 1;
+ ALLOC_NO_OUT(coda_rmdir); /* sets inp & outp */
+
+ /* send the open to venus. */
+ INIT_IN(&inp->ih, CODA_RMDIR, cred, p);
+ inp->VFid = *fid;
+
+ inp->name = Isize;
+ STRCPY(name, nm, len); /* increments Isize */
+
+ error = coda_call(mdp, Isize, &Osize, (char *)inp);
+
+ CODA_FREE(inp, coda_rmdir_size);
+ return error;
+}
+
+int
+venus_symlink(void *mdp, ViceFid *fid,
+ const char *lnm, int llen, const char *nm, int len, struct vattr *va,
+ struct ucred *cred, struct proc *p)
+{
+ DECL_NO_OUT(coda_symlink); /* sets Isize & Osize */
+ coda_symlink_size += llen + 1 + len + 1;
+ ALLOC_NO_OUT(coda_symlink); /* sets inp & outp */
+
+ /* send the open to venus. */
+ INIT_IN(&inp->ih, CODA_SYMLINK, cred, p);
+ inp->VFid = *fid;
+ CNV_V2VV_ATTR(&inp->attr, va);
+
+ inp->srcname = Isize;
+ STRCPY(srcname, lnm, llen); /* increments Isize */
+
+ inp->tname = Isize;
+ STRCPY(tname, nm, len); /* increments Isize */
+
+ error = coda_call(mdp, Isize, &Osize, (char *)inp);
+
+ CODA_FREE(inp, coda_symlink_size);
+ return error;
+}
+
+int
+venus_readdir(void *mdp, ViceFid *fid,
+ int count, int offset,
+ struct ucred *cred, struct proc *p,
+/*out*/ char *buffer, int *len)
+{
+ DECL(coda_readdir); /* sets Isize & Osize */
+ coda_readdir_size = VC_MAXMSGSIZE;
+ ALLOC(coda_readdir); /* sets inp & outp */
+
+ /* send the open to venus. */
+ INIT_IN(&inp->ih, CODA_READDIR, cred, p);
+ inp->VFid = *fid;
+ inp->count = count;
+ inp->offset = offset;
+
+ Osize = VC_MAXMSGSIZE;
+ error = coda_call(mdp, Isize, &Osize, (char *)inp);
+ if (!error) {
+ bcopy((char *)outp + (int)outp->data, buffer, outp->size);
+ *len = outp->size;
+ }
+
+ CODA_FREE(inp, coda_readdir_size);
+ return error;
+}
+
+int
+venus_fhtovp(void *mdp, ViceFid *fid,
+ struct ucred *cred, struct proc *p,
+/*out*/ ViceFid *VFid, int *vtype)
+{
+ DECL(coda_vget); /* sets Isize & Osize */
+ ALLOC(coda_vget); /* sets inp & outp */
+
+ /* Send the open to Venus. */
+ INIT_IN(&inp->ih, CODA_VGET, cred, p);
+ inp->VFid = *fid;
+
+ error = coda_call(mdp, Isize, &Osize, (char *)inp);
+ if (!error) {
+ *VFid = outp->VFid;
+ *vtype = outp->vtype;
+ }
+
+ CODA_FREE(inp, coda_vget_size);
+ return error;
+}
diff --git a/sys/fs/coda/coda_venus.h b/sys/fs/coda/coda_venus.h
new file mode 100644
index 0000000..6fdd15c
--- /dev/null
+++ b/sys/fs/coda/coda_venus.h
@@ -0,0 +1,133 @@
+/*
+ *
+ * Coda: an Experimental Distributed File System
+ * Release 3.1
+ *
+ * Copyright (c) 1987-1998 Carnegie Mellon University
+ * All Rights Reserved
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University in all documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ *
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS,
+ * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON
+ * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF
+ * ANY DERIVATIVE WORK.
+ *
+ * Carnegie Mellon encourages users of this software to return any
+ * improvements or extensions that they make, and to grant Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ *
+ * @(#) src/sys/coda/coda_venus.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
+ * $Id: coda_venus.h,v 1.2 1998/09/02 19:09:53 rvb Exp $
+ *
+ */
+
+int
+venus_root(void *mdp,
+ struct ucred *cred, struct proc *p,
+/*out*/ ViceFid *VFid);
+
+int
+venus_open(void *mdp, ViceFid *fid, int flag,
+ struct ucred *cred, struct proc *p,
+/*out*/ dev_t *dev, ino_t *inode);
+
+int
+venus_close(void *mdp, ViceFid *fid, int flag,
+ struct ucred *cred, struct proc *p);
+
+void
+venus_read(void);
+
+void
+venus_write(void);
+
+int
+venus_ioctl(void *mdp, ViceFid *fid,
+ int com, int flag, caddr_t data,
+ struct ucred *cred, struct proc *p);
+
+int
+venus_getattr(void *mdp, ViceFid *fid,
+ struct ucred *cred, struct proc *p,
+/*out*/ struct vattr *vap);
+
+int
+venus_setattr(void *mdp, ViceFid *fid, struct vattr *vap,
+ struct ucred *cred, struct proc *p);
+
+int
+venus_access(void *mdp, ViceFid *fid, int mode,
+ struct ucred *cred, struct proc *p);
+
+int
+venus_readlink(void *mdp, ViceFid *fid,
+ struct ucred *cred, struct proc *p,
+/*out*/ char **str, int *len);
+
+int
+venus_fsync(void *mdp, ViceFid *fid,
+ struct ucred *cred, struct proc *p);
+
+int
+venus_lookup(void *mdp, ViceFid *fid,
+ const char *nm, int len,
+ struct ucred *cred, struct proc *p,
+/*out*/ ViceFid *VFid, int *vtype);
+
+int
+venus_create(void *mdp, ViceFid *fid,
+ const char *nm, int len, int exclusive, int mode, struct vattr *va,
+ struct ucred *cred, struct proc *p,
+/*out*/ ViceFid *VFid, struct vattr *attr);
+
+int
+venus_remove(void *mdp, ViceFid *fid,
+ const char *nm, int len,
+ struct ucred *cred, struct proc *p);
+
+int
+venus_link(void *mdp, ViceFid *fid, ViceFid *tfid,
+ const char *nm, int len,
+ struct ucred *cred, struct proc *p);
+
+int
+venus_rename(void *mdp, ViceFid *fid, ViceFid *tfid,
+ const char *nm, int len, const char *tnm, int tlen,
+ struct ucred *cred, struct proc *p);
+
+int
+venus_mkdir(void *mdp, ViceFid *fid,
+ const char *nm, int len, struct vattr *va,
+ struct ucred *cred, struct proc *p,
+/*out*/ ViceFid *VFid, struct vattr *ova);
+
+int
+venus_rmdir(void *mdp, ViceFid *fid,
+ const char *nm, int len,
+ struct ucred *cred, struct proc *p);
+
+int
+venus_symlink(void *mdp, ViceFid *fid,
+ const char *lnm, int llen, const char *nm, int len, struct vattr *va,
+ struct ucred *cred, struct proc *p);
+
+int
+venus_readdir(void *mdp, ViceFid *fid,
+ int count, int offset,
+ struct ucred *cred, struct proc *p,
+/*out*/ char *buffer, int *len);
+
+int
+venus_fhtovp(void *mdp, ViceFid *fid,
+ struct ucred *cred, struct proc *p,
+/*out*/ ViceFid *VFid, int *vtype);
diff --git a/sys/fs/coda/coda_vfsops.c b/sys/fs/coda/coda_vfsops.c
new file mode 100644
index 0000000..8f6befe
--- /dev/null
+++ b/sys/fs/coda/coda_vfsops.c
@@ -0,0 +1,770 @@
+/*
+ *
+ * Coda: an Experimental Distributed File System
+ * Release 3.1
+ *
+ * Copyright (c) 1987-1998 Carnegie Mellon University
+ * All Rights Reserved
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University in all documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ *
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS,
+ * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON
+ * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF
+ * ANY DERIVATIVE WORK.
+ *
+ * Carnegie Mellon encourages users of this software to return any
+ * improvements or extensions that they make, and to grant Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ *
+ * @(#) src/sys/cfs/coda_vfsops.c,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
+ * $Id: coda_vfsops.c,v 1.10 1998/12/04 22:54:43 archie Exp $
+ *
+ */
+
+/*
+ * Mach Operating System
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * All rights reserved. The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+
+/*
+ * This code was written for the Coda file system at Carnegie Mellon
+ * University. Contributers include David Steere, James Kistler, and
+ * M. Satyanarayanan.
+ */
+
+/*
+ * HISTORY
+ * $Log: coda_vfsops.c,v $
+ * Revision 1.10 1998/12/04 22:54:43 archie
+ * Examine all occurrences of sprintf(), strcat(), and str[n]cpy()
+ * for possible buffer overflow problems. Replaced most sprintf()'s
+ * with snprintf(); for others cases, added terminating NUL bytes where
+ * appropriate, replaced constants like "16" with sizeof(), etc.
+ *
+ * These changes include several bug fixes, but most changes are for
+ * maintainability's sake. Any instance where it wasn't "immediately
+ * obvious" that a buffer overflow could not occur was made safer.
+ *
+ * Reviewed by: Bruce Evans <bde@zeta.org.au>
+ * Reviewed by: Matthew Dillon <dillon@apollo.backplane.com>
+ * Reviewed by: Mike Spengler <mks@networkcs.com>
+ *
+ * Revision 1.9 1998/11/16 19:48:26 rvb
+ * A few bug fixes for Robert Watson
+ *
+ * Revision 1.8 1998/11/03 08:55:06 peter
+ * Support KLD. We register and unregister two modules. "coda" (the vfs)
+ * via VFS_SET(), and "codadev" for the cdevsw entry. From kldstat -v:
+ * 3 1 0xf02c5000 115d8 coda.ko
+ * Contains modules:
+ * Id Name
+ * 2 codadev
+ * 3 coda
+ *
+ * Revision 1.7 1998/09/29 20:19:45 rvb
+ * Fixes for lkm:
+ * 1. use VFS_LKM vs ACTUALLY_LKM_NOT_KERNEL
+ * 2. don't pass -DCODA to lkm build
+ *
+ * Revision 1.6 1998/09/25 17:38:32 rvb
+ * Put "stray" printouts under DIAGNOSTIC. Make everything build
+ * with DEBUG on. Add support for lkm. (The macro's don't work
+ * for me; for a good chuckle look at the end of coda_fbsd.c.)
+ *
+ * Revision 1.5 1998/09/13 13:57:59 rvb
+ * Finish conversion of cfs -> coda
+ *
+ * Revision 1.4 1998/09/11 18:50:17 rvb
+ * All the references to cfs, in symbols, structs, and strings
+ * have been changed to coda. (Same for CFS.)
+ *
+ * Revision 1.2 1998/09/02 19:09:53 rvb
+ * Pass2 complete
+ *
+ * Revision 1.1.1.1 1998/08/29 21:14:52 rvb
+ * Very Preliminary Coda
+ *
+ * Revision 1.11 1998/08/28 18:12:22 rvb
+ * Now it also works on FreeBSD -current. This code will be
+ * committed to the FreeBSD -current and NetBSD -current
+ * trees. It will then be tailored to the particular platform
+ * by flushing conditional code.
+ *
+ * Revision 1.10 1998/08/18 17:05:19 rvb
+ * Don't use __RCSID now
+ *
+ * Revision 1.9 1998/08/18 16:31:44 rvb
+ * Sync the code for NetBSD -current; test on 1.3 later
+ *
+ * Revision 1.8 98/02/24 22:22:48 rvb
+ * Fixes up mainly to flush iopen and friends
+ *
+ * Revision 1.7 98/01/23 11:53:45 rvb
+ * Bring RVB_CODA1_1 to HEAD
+ *
+ * Revision 1.6.2.6 98/01/23 11:21:07 rvb
+ * Sync with 2.2.5
+ *
+ * Revision 1.6.2.5 98/01/22 13:05:33 rvb
+ * Move make_coda_node ctlfid later so vfsp is known
+ *
+ * Revision 1.6.2.4 97/12/19 14:26:05 rvb
+ * session id
+ *
+ * Revision 1.6.2.3 97/12/16 12:40:11 rvb
+ * Sync with 1.3
+ *
+ * Revision 1.6.2.2 97/12/10 11:40:25 rvb
+ * No more ody
+ *
+ * Revision 1.6.2.1 97/12/06 17:41:24 rvb
+ * Sync with peters coda.h
+ *
+ * Revision 1.6 97/12/05 10:39:21 rvb
+ * Read CHANGES
+ *
+ * Revision 1.5.14.8 97/11/24 15:44:46 rvb
+ * Final cfs_venus.c w/o macros, but one locking bug
+ *
+ * Revision 1.5.14.7 97/11/21 13:22:03 rvb
+ * Catch a few coda_calls in coda_vfsops.c
+ *
+ * Revision 1.5.14.6 97/11/20 11:46:48 rvb
+ * Capture current cfs_venus
+ *
+ * Revision 1.5.14.5 97/11/18 10:27:17 rvb
+ * cfs_nbsd.c is DEAD!!!; integrated into cfs_vf/vnops.c
+ * cfs_nb_foo and cfs_foo are joined
+ *
+ * Revision 1.5.14.4 97/11/13 22:03:01 rvb
+ * pass2 cfs_NetBSD.h mt
+ *
+ * Revision 1.5.14.3 97/11/12 12:09:40 rvb
+ * reorg pass1
+ *
+ * Revision 1.5.14.2 97/10/29 16:06:28 rvb
+ * Kill DYING
+ *
+ * Revision 1.5.14.1 1997/10/28 23:10:17 rvb
+ * >64Meg; venus can be killed!
+ *
+ * Revision 1.5 1997/01/13 17:11:07 bnoble
+ * Coda statfs needs to return something other than -1 for blocks avail. and
+ * files available for wabi (and other windowsish) programs to install
+ * there correctly.
+ *
+ * Revision 1.4 1996/12/12 22:11:00 bnoble
+ * Fixed the "downcall invokes venus operation" deadlock in all known cases.
+ * There may be more
+ *
+ * Revision 1.3 1996/11/08 18:06:12 bnoble
+ * Minor changes in vnode operation signature, VOP_UPDATE signature, and
+ * some newly defined bits in the include files.
+ *
+ * Revision 1.2 1996/01/02 16:57:04 bnoble
+ * Added support for Coda MiniCache and raw inode calls (final commit)
+ *
+ * Revision 1.1.2.1 1995/12/20 01:57:32 bnoble
+ * Added CODA-specific files
+ *
+ * Revision 3.1.1.1 1995/03/04 19:08:02 bnoble
+ * Branch for NetBSD port revisions
+ *
+ * Revision 3.1 1995/03/04 19:08:01 bnoble
+ * Bump to major revision 3 to prepare for NetBSD port
+ *
+ * Revision 2.4 1995/02/17 16:25:22 dcs
+ * These versions represent several changes:
+ * 1. Allow venus to restart even if outstanding references exist.
+ * 2. Have only one ctlvp per client, as opposed to one per mounted cfs device.d
+ * 3. Allow ody_expand to return many members, not just one.
+ *
+ * Revision 2.3 94/10/14 09:58:21 dcs
+ * Made changes 'cause sun4s have braindead compilers
+ *
+ * Revision 2.2 94/10/12 16:46:33 dcs
+ * Cleaned kernel/venus interface by removing XDR junk, plus
+ * so cleanup to allow this code to be more easily ported.
+ *
+ * Revision 1.3 93/05/28 16:24:29 bnoble
+ * *** empty log message ***
+ *
+ * Revision 1.2 92/10/27 17:58:24 lily
+ * merge kernel/latest and alpha/src/cfs
+ *
+ * Revision 2.3 92/09/30 14:16:32 mja
+ * Added call to coda_flush to coda_unmount.
+ * [90/12/15 dcs]
+ *
+ * Added contributors blurb.
+ * [90/12/13 jjk]
+ *
+ * Revision 2.2 90/07/05 11:26:40 mrt
+ * Created for the Coda File System.
+ * [90/05/23 dcs]
+ *
+ * Revision 1.3 90/05/31 17:01:42 dcs
+ * Prepare for merge with facilities kernel.
+ *
+ *
+ */
+
+#include <vcoda.h>
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/conf.h>
+#include <sys/namei.h>
+#include <sys/mount.h>
+#include <sys/select.h>
+
+#include <coda/coda.h>
+#include <coda/cnode.h>
+#include <coda/coda_vfsops.h>
+#include <coda/coda_venus.h>
+#include <coda/coda_subr.h>
+#include <coda/coda_opstats.h>
+
+#include <miscfs/specfs/specdev.h>
+
+MALLOC_DEFINE(M_CODA, "CODA storage", "Various Coda Structures");
+
+int codadebug = 0;
+int coda_vfsop_print_entry = 0;
+#define ENTRY if(coda_vfsop_print_entry) myprintf(("Entered %s\n",__FUNCTION__))
+
+struct vnode *coda_ctlvp;
+struct coda_mntinfo coda_mnttbl[NVCODA]; /* indexed by minor device number */
+
+/* structure to keep statistics of internally generated/satisfied calls */
+
+struct coda_op_stats coda_vfsopstats[CODA_VFSOPS_SIZE];
+
+#define MARK_ENTRY(op) (coda_vfsopstats[op].entries++)
+#define MARK_INT_SAT(op) (coda_vfsopstats[op].sat_intrn++)
+#define MARK_INT_FAIL(op) (coda_vfsopstats[op].unsat_intrn++)
+#define MRAK_INT_GEN(op) (coda_vfsopstats[op].gen_intrn++)
+
+extern int coda_nc_initialized; /* Set if cache has been initialized */
+extern int vc_nb_open __P((dev_t, int, int, struct proc *));
+
+int
+coda_vfsopstats_init(void)
+{
+ register int i;
+
+ for (i=0;i<CODA_VFSOPS_SIZE;i++) {
+ coda_vfsopstats[i].opcode = i;
+ coda_vfsopstats[i].entries = 0;
+ coda_vfsopstats[i].sat_intrn = 0;
+ coda_vfsopstats[i].unsat_intrn = 0;
+ coda_vfsopstats[i].gen_intrn = 0;
+ }
+
+ return 0;
+}
+
+/*
+ * cfs mount vfsop
+ * Set up mount info record and attach it to vfs struct.
+ */
+/*ARGSUSED*/
+int
+coda_mount(vfsp, path, data, ndp, p)
+ struct mount *vfsp; /* Allocated and initialized by mount(2) */
+ char *path; /* path covered: ignored by the fs-layer */
+ caddr_t data; /* Need to define a data type for this in netbsd? */
+ struct nameidata *ndp; /* Clobber this to lookup the device name */
+ struct proc *p; /* The ever-famous proc pointer */
+{
+ struct vnode *dvp;
+ struct cnode *cp;
+ dev_t dev;
+ struct coda_mntinfo *mi;
+ struct vnode *rootvp;
+ ViceFid rootfid;
+ ViceFid ctlfid;
+ int error;
+
+ ENTRY;
+
+ coda_vfsopstats_init();
+ coda_vnodeopstats_init();
+
+ MARK_ENTRY(CODA_MOUNT_STATS);
+ if (CODA_MOUNTED(vfsp)) {
+ MARK_INT_FAIL(CODA_MOUNT_STATS);
+ return(EBUSY);
+ }
+
+ /* Validate mount device. Similar to getmdev(). */
+
+ NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, data, p);
+ error = namei(ndp);
+ dvp = ndp->ni_vp;
+
+ if (error) {
+ MARK_INT_FAIL(CODA_MOUNT_STATS);
+ return (error);
+ }
+ if (dvp->v_type != VCHR) {
+ MARK_INT_FAIL(CODA_MOUNT_STATS);
+ vrele(dvp);
+ return(ENXIO);
+ }
+ dev = dvp->v_specinfo->si_rdev;
+ vrele(dvp);
+ if (major(dev) >= nchrdev || major(dev) < 0) {
+ MARK_INT_FAIL(CODA_MOUNT_STATS);
+ return(ENXIO);
+ }
+
+ /*
+ * See if the device table matches our expectations.
+ */
+ if (cdevsw[major(dev)]->d_open != vc_nb_open)
+ {
+ MARK_INT_FAIL(CODA_MOUNT_STATS);
+ return(ENXIO);
+ }
+
+ if (minor(dev) >= NVCODA || minor(dev) < 0) {
+ MARK_INT_FAIL(CODA_MOUNT_STATS);
+ return(ENXIO);
+ }
+
+ /*
+ * Initialize the mount record and link it to the vfs struct
+ */
+ mi = &coda_mnttbl[minor(dev)];
+
+ if (!VC_OPEN(&mi->mi_vcomm)) {
+ MARK_INT_FAIL(CODA_MOUNT_STATS);
+ return(ENODEV);
+ }
+
+ /* No initialization (here) of mi_vcomm! */
+ vfsp->mnt_data = (qaddr_t)mi;
+ vfs_getnewfsid (vfsp);
+
+ mi->mi_vfsp = vfsp;
+
+ /*
+ * Make a root vnode to placate the Vnode interface, but don't
+ * actually make the CODA_ROOT call to venus until the first call
+ * to coda_root in case a server is down while venus is starting.
+ */
+ rootfid.Volume = 0;
+ rootfid.Vnode = 0;
+ rootfid.Unique = 0;
+ cp = make_coda_node(&rootfid, vfsp, VDIR);
+ rootvp = CTOV(cp);
+ rootvp->v_flag |= VROOT;
+
+ ctlfid.Volume = CTL_VOL;
+ ctlfid.Vnode = CTL_VNO;
+ ctlfid.Unique = CTL_UNI;
+/* cp = make_coda_node(&ctlfid, vfsp, VCHR);
+ The above code seems to cause a loop in the cnode links.
+ I don't totally understand when it happens, it is caught
+ when closing down the system.
+ */
+ cp = make_coda_node(&ctlfid, 0, VCHR);
+
+ coda_ctlvp = CTOV(cp);
+
+ /* Add vfs and rootvp to chain of vfs hanging off mntinfo */
+ mi->mi_vfsp = vfsp;
+ mi->mi_rootvp = rootvp;
+
+ /* set filesystem block size */
+ vfsp->mnt_stat.f_bsize = 8192; /* XXX -JJK */
+
+ /* Set f_iosize. XXX -- inamura@isl.ntt.co.jp.
+ For vnode_pager_haspage() references. The value should be obtained
+ from underlying UFS. */
+ /* Checked UFS. iosize is set as 8192 */
+ vfsp->mnt_stat.f_iosize = 8192;
+
+ /* error is currently guaranteed to be zero, but in case some
+ code changes... */
+ CODADEBUG(1,
+ myprintf(("coda_mount returned %d\n",error)););
+ if (error)
+ MARK_INT_FAIL(CODA_MOUNT_STATS);
+ else
+ MARK_INT_SAT(CODA_MOUNT_STATS);
+
+ return(error);
+}
+
+int
+coda_start(vfsp, flags, p)
+ struct mount *vfsp;
+ int flags;
+ struct proc *p;
+{
+ ENTRY;
+ return (0);
+}
+
+int
+coda_unmount(vfsp, mntflags, p)
+ struct mount *vfsp;
+ int mntflags;
+ struct proc *p;
+{
+ struct coda_mntinfo *mi = vftomi(vfsp);
+ int active, error = 0;
+
+ ENTRY;
+ MARK_ENTRY(CODA_UMOUNT_STATS);
+ if (!CODA_MOUNTED(vfsp)) {
+ MARK_INT_FAIL(CODA_UMOUNT_STATS);
+ return(EINVAL);
+ }
+
+ if (mi->mi_vfsp == vfsp) { /* We found the victim */
+ if (!IS_UNMOUNTING(VTOC(mi->mi_rootvp)))
+ return (EBUSY); /* Venus is still running */
+
+#ifdef DEBUG
+ printf("coda_unmount: ROOT: vp %p, cp %p\n", mi->mi_rootvp, VTOC(mi->mi_rootvp));
+#endif
+ vrele(mi->mi_rootvp);
+
+ active = coda_kill(vfsp, NOT_DOWNCALL);
+ mi->mi_rootvp->v_flag &= ~VROOT;
+ error = vflush(mi->mi_vfsp, NULLVP, FORCECLOSE);
+ printf("coda_unmount: active = %d, vflush active %d\n", active, error);
+ error = 0;
+ /* I'm going to take this out to allow lookups to go through. I'm
+ * not sure it's important anyway. -- DCS 2/2/94
+ */
+ /* vfsp->VFS_DATA = NULL; */
+
+ /* No more vfsp's to hold onto */
+ mi->mi_vfsp = NULL;
+ mi->mi_rootvp = NULL;
+
+ if (error)
+ MARK_INT_FAIL(CODA_UMOUNT_STATS);
+ else
+ MARK_INT_SAT(CODA_UMOUNT_STATS);
+
+ return(error);
+ }
+ return (EINVAL);
+}
+
+/*
+ * find root of cfs
+ */
+int
+coda_root(vfsp, vpp)
+ struct mount *vfsp;
+ struct vnode **vpp;
+{
+ struct coda_mntinfo *mi = vftomi(vfsp);
+ struct vnode **result;
+ int error;
+ struct proc *p = curproc; /* XXX - bnoble */
+ ViceFid VFid;
+
+ ENTRY;
+ MARK_ENTRY(CODA_ROOT_STATS);
+ result = NULL;
+
+ if (vfsp == mi->mi_vfsp) {
+ if ((VTOC(mi->mi_rootvp)->c_fid.Volume != 0) ||
+ (VTOC(mi->mi_rootvp)->c_fid.Vnode != 0) ||
+ (VTOC(mi->mi_rootvp)->c_fid.Unique != 0))
+ { /* Found valid root. */
+ *vpp = mi->mi_rootvp;
+ /* On Mach, this is vref. On NetBSD, VOP_LOCK */
+#if 1
+ vref(*vpp);
+ vn_lock(*vpp, LK_EXCLUSIVE, p);
+#else
+ vget(*vpp, LK_EXCLUSIVE, p);
+#endif
+ MARK_INT_SAT(CODA_ROOT_STATS);
+ return(0);
+ }
+ }
+
+ error = venus_root(vftomi(vfsp), p->p_cred->pc_ucred, p, &VFid);
+
+ if (!error) {
+ /*
+ * Save the new rootfid in the cnode, and rehash the cnode into the
+ * cnode hash with the new fid key.
+ */
+ coda_unsave(VTOC(mi->mi_rootvp));
+ VTOC(mi->mi_rootvp)->c_fid = VFid;
+ coda_save(VTOC(mi->mi_rootvp));
+
+ *vpp = mi->mi_rootvp;
+#if 1
+ vref(*vpp);
+ vn_lock(*vpp, LK_EXCLUSIVE, p);
+#else
+ vget(*vpp, LK_EXCLUSIVE, p);
+#endif
+
+ MARK_INT_SAT(CODA_ROOT_STATS);
+ goto exit;
+ } else if (error == ENODEV || error == EINTR) {
+ /* Gross hack here! */
+ /*
+ * If Venus fails to respond to the CODA_ROOT call, coda_call returns
+ * ENODEV. Return the uninitialized root vnode to allow vfs
+ * operations such as unmount to continue. Without this hack,
+ * there is no way to do an unmount if Venus dies before a
+ * successful CODA_ROOT call is done. All vnode operations
+ * will fail.
+ */
+ *vpp = mi->mi_rootvp;
+#if 1
+ vref(*vpp);
+ vn_lock(*vpp, LK_EXCLUSIVE, p);
+#else
+ vget(*vpp, LK_EXCLUSIVE, p);
+#endif
+
+ MARK_INT_FAIL(CODA_ROOT_STATS);
+ error = 0;
+ goto exit;
+ } else {
+ CODADEBUG( CODA_ROOT, myprintf(("error %d in CODA_ROOT\n", error)); );
+ MARK_INT_FAIL(CODA_ROOT_STATS);
+
+ goto exit;
+ }
+
+ exit:
+ return(error);
+}
+
+int
+coda_quotactl(vfsp, cmd, uid, arg, p)
+ struct mount *vfsp;
+ int cmd;
+ uid_t uid;
+ caddr_t arg;
+ struct proc *p;
+{
+ ENTRY;
+ return (EOPNOTSUPP);
+}
+
+/*
+ * Get file system statistics.
+ */
+int
+coda_nb_statfs(vfsp, sbp, p)
+ register struct mount *vfsp;
+ struct statfs *sbp;
+ struct proc *p;
+{
+ ENTRY;
+/* MARK_ENTRY(CODA_STATFS_STATS); */
+ if (!CODA_MOUNTED(vfsp)) {
+/* MARK_INT_FAIL(CODA_STATFS_STATS);*/
+ return(EINVAL);
+ }
+
+ bzero(sbp, sizeof(struct statfs));
+ /* XXX - what to do about f_flags, others? --bnoble */
+ /* Below This is what AFS does
+ #define NB_SFS_SIZ 0x895440
+ */
+ /* Note: Normal fs's have a bsize of 0x400 == 1024 */
+ sbp->f_type = vfsp->mnt_vfc->vfc_typenum;
+ sbp->f_bsize = 8192; /* XXX */
+ sbp->f_iosize = 8192; /* XXX */
+#define NB_SFS_SIZ 0x8AB75D
+ sbp->f_blocks = NB_SFS_SIZ;
+ sbp->f_bfree = NB_SFS_SIZ;
+ sbp->f_bavail = NB_SFS_SIZ;
+ sbp->f_files = NB_SFS_SIZ;
+ sbp->f_ffree = NB_SFS_SIZ;
+ bcopy((caddr_t)&(vfsp->mnt_stat.f_fsid), (caddr_t)&(sbp->f_fsid), sizeof (fsid_t));
+ snprintf(sbp->f_mntonname, sizeof(sbp->f_mntonname), "/coda");
+ snprintf(sbp->f_mntfromname, sizeof(sbp->f_mntfromname), "CODA");
+/* MARK_INT_SAT(CODA_STATFS_STATS); */
+ return(0);
+}
+
+/*
+ * Flush any pending I/O.
+ */
+int
+coda_sync(vfsp, waitfor, cred, p)
+ struct mount *vfsp;
+ int waitfor;
+ struct ucred *cred;
+ struct proc *p;
+{
+ ENTRY;
+ MARK_ENTRY(CODA_SYNC_STATS);
+ MARK_INT_SAT(CODA_SYNC_STATS);
+ return(0);
+}
+
+int
+coda_vget(vfsp, ino, vpp)
+ struct mount *vfsp;
+ ino_t ino;
+ struct vnode **vpp;
+{
+ ENTRY;
+ return (EOPNOTSUPP);
+}
+
+/*
+ * fhtovp is now what vget used to be in 4.3-derived systems. For
+ * some silly reason, vget is now keyed by a 32 bit ino_t, rather than
+ * a type-specific fid.
+ */
+int
+coda_fhtovp(vfsp, fhp, nam, vpp, exflagsp, creadanonp)
+ register struct mount *vfsp;
+ struct fid *fhp;
+ struct mbuf *nam;
+ struct vnode **vpp;
+ int *exflagsp;
+ struct ucred **creadanonp;
+{
+ struct cfid *cfid = (struct cfid *)fhp;
+ struct cnode *cp = 0;
+ int error;
+ struct proc *p = curproc; /* XXX -mach */
+ ViceFid VFid;
+ int vtype;
+
+ ENTRY;
+
+ MARK_ENTRY(CODA_VGET_STATS);
+ /* Check for vget of control object. */
+ if (IS_CTL_FID(&cfid->cfid_fid)) {
+ *vpp = coda_ctlvp;
+ vref(coda_ctlvp);
+ MARK_INT_SAT(CODA_VGET_STATS);
+ return(0);
+ }
+
+ error = venus_fhtovp(vftomi(vfsp), &cfid->cfid_fid, p->p_cred->pc_ucred, p, &VFid, &vtype);
+
+ if (error) {
+ CODADEBUG(CODA_VGET, myprintf(("vget error %d\n",error));)
+ *vpp = (struct vnode *)0;
+ } else {
+ CODADEBUG(CODA_VGET,
+ myprintf(("vget: vol %lx vno %lx uni %lx type %d result %d\n",
+ VFid.Volume, VFid.Vnode, VFid.Unique, vtype, error)); )
+
+ cp = make_coda_node(&VFid, vfsp, vtype);
+ *vpp = CTOV(cp);
+ }
+ return(error);
+}
+
+int
+coda_vptofh(vnp, fidp)
+ struct vnode *vnp;
+ struct fid *fidp;
+{
+ ENTRY;
+ return (EOPNOTSUPP);
+}
+
+int
+coda_init(struct vfsconf *vfsp)
+{
+ ENTRY;
+ return 0;
+}
+
+/*
+ * To allow for greater ease of use, some vnodes may be orphaned when
+ * Venus dies. Certain operations should still be allowed to go
+ * through, but without propagating ophan-ness. So this function will
+ * get a new vnode for the file from the current run of Venus. */
+
+int
+getNewVnode(vpp)
+ struct vnode **vpp;
+{
+ struct cfid cfid;
+ struct coda_mntinfo *mi = vftomi((*vpp)->v_mount);
+
+ ENTRY;
+
+ cfid.cfid_len = (short)sizeof(ViceFid);
+ cfid.cfid_fid = VTOC(*vpp)->c_fid; /* Structure assignment. */
+ /* XXX ? */
+
+ /* We're guessing that if set, the 1st element on the list is a
+ * valid vnode to use. If not, return ENODEV as venus is dead.
+ */
+ if (mi->mi_vfsp == NULL)
+ return ENODEV;
+
+ return coda_fhtovp(mi->mi_vfsp, (struct fid*)&cfid, NULL, vpp,
+ NULL, NULL);
+}
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/ufsmount.h>
+/* get the mount structure corresponding to a given device. Assume
+ * device corresponds to a UFS. Return NULL if no device is found.
+ */
+struct mount *devtomp(dev)
+ dev_t dev;
+{
+ struct mount *mp, *nmp;
+
+ for (mp = mountlist.cqh_first; mp != (void*)&mountlist; mp = nmp) {
+ nmp = mp->mnt_list.cqe_next;
+ if (((VFSTOUFS(mp))->um_dev == (dev_t) dev)) {
+ /* mount corresponds to UFS and the device matches one we want */
+ return(mp);
+ }
+ }
+ /* mount structure wasn't found */
+ return(NULL);
+}
+
+struct vfsops coda_vfsops = {
+ coda_mount,
+ coda_start,
+ coda_unmount,
+ coda_root,
+ coda_quotactl,
+ coda_nb_statfs,
+ coda_sync,
+ coda_vget,
+ (int (*) (struct mount *, struct fid *, struct sockaddr *, struct vnode **,
+ int *, struct ucred **))
+ eopnotsupp,
+ (int (*) (struct vnode *, struct fid *)) eopnotsupp,
+ coda_init,
+};
+
+VFS_SET(coda_vfsops, coda, VFCF_NETWORK);
diff --git a/sys/fs/coda/coda_vfsops.h b/sys/fs/coda/coda_vfsops.h
new file mode 100644
index 0000000..ef23c3f
--- /dev/null
+++ b/sys/fs/coda/coda_vfsops.h
@@ -0,0 +1,63 @@
+/*
+ *
+ * Coda: an Experimental Distributed File System
+ * Release 3.1
+ *
+ * Copyright (c) 1987-1998 Carnegie Mellon University
+ * All Rights Reserved
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University in all documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ *
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS,
+ * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON
+ * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF
+ * ANY DERIVATIVE WORK.
+ *
+ * Carnegie Mellon encourages users of this software to return any
+ * improvements or extensions that they make, and to grant Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ *
+ * @(#) src/sys/cfs/coda_vfsops.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
+ * $Id: coda_vfsops.h,v 1.2 1998/09/02 19:09:53 rvb Exp $
+ *
+ */
+
+/*
+ * cfid structure:
+ * This overlays the fid structure (see vfs.h)
+ * Only used below and will probably go away.
+ */
+
+struct cfid {
+ u_short cfid_len;
+ u_short padding;
+ ViceFid cfid_fid;
+};
+
+struct mount;
+
+int coda_vfsopstats_init(void);
+int coda_mount(struct mount *, char *, caddr_t, struct nameidata *,
+ struct proc *);
+int coda_start(struct mount *, int, struct proc *);
+int coda_unmount(struct mount *, int, struct proc *);
+int coda_root(struct mount *, struct vnode **);
+int coda_quotactl(struct mount *, int, uid_t, caddr_t, struct proc *);
+int coda_nb_statfs(struct mount *, struct statfs *, struct proc *);
+int coda_sync(struct mount *, int, struct ucred *, struct proc *);
+int coda_vget(struct mount *, ino_t, struct vnode **);
+int coda_fhtovp(struct mount *, struct fid *, struct mbuf *, struct vnode **,
+ int *, struct ucred **);
+int coda_vptofh(struct vnode *, struct fid *);
+int coda_init(struct vfsconf *vfsp);
+
+int getNewVnode(struct vnode **vpp);
diff --git a/sys/fs/coda/coda_vnops.c b/sys/fs/coda/coda_vnops.c
new file mode 100644
index 0000000..efa0dda
--- /dev/null
+++ b/sys/fs/coda/coda_vnops.c
@@ -0,0 +1,2233 @@
+/*
+ *
+ * Coda: an Experimental Distributed File System
+ * Release 3.1
+ *
+ * Copyright (c) 1987-1998 Carnegie Mellon University
+ * All Rights Reserved
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University in all documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ *
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS,
+ * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON
+ * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF
+ * ANY DERIVATIVE WORK.
+ *
+ * Carnegie Mellon encourages users of this software to return any
+ * improvements or extensions that they make, and to grant Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ *
+ * @(#) src/sys/coda/coda_vnops.c,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
+ * $Id: coda_vnops.c,v 1.14 1999/01/27 20:09:17 dillon Exp $
+ *
+ */
+
+/*
+ * Mach Operating System
+ * Copyright (c) 1990 Carnegie-Mellon University
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * All rights reserved. The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+
+/*
+ * This code was written for the Coda file system at Carnegie Mellon
+ * University. Contributers include David Steere, James Kistler, and
+ * M. Satyanarayanan.
+ */
+
+/*
+ * HISTORY
+ * $Log: coda_vnops.c,v $
+ * Revision 1.14 1999/01/27 20:09:17 dillon
+ * Fix warnings preparing for -Wall -Wcast-qual
+ *
+ * Also disable one usb module in LINT due to fatal compilation errors,
+ * temporary.
+ *
+ * Revision 1.13 1999/01/20 14:49:05 eivind
+ * Add 'options DEBUG_LOCKS', which stores extra information in struct
+ * lock, and add some macros and function parameters to make sure that
+ * the information get to the point where it can be put in the lock
+ * structure.
+ *
+ * While I'm here, add DEBUG_VFS_LOCKS to LINT.
+ *
+ * Revision 1.12 1999/01/07 16:14:12 bde
+ * Don't pass unused unused timestamp args to UFS_UPDATE() or waste
+ * time initializing them. This almost finishes centralizing (in-core)
+ * timestamp updates in ufs_itimes().
+ *
+ * Revision 1.11 1999/01/05 18:49:51 eivind
+ * Remove the 'waslocked' parameter to vfs_object_create().
+ *
+ * Revision 1.10 1998/12/04 18:44:21 rvb
+ * Don't print diagnostic anymore
+ *
+ * Revision 1.9 1998/11/16 19:48:26 rvb
+ * A few bug fixes for Robert Watson
+ *
+ * Revision 1.8 1998/10/28 20:31:13 rvb
+ * Change the way unmounting happens to guarantee that the
+ * client programs are allowed to finish up (coda_call is
+ * forced to complete) and release their locks. Thus there
+ * is a reasonable chance that the vflush implicit in the
+ * unmount will not get hung on held locks.
+ *
+ * Revision 1.7 1998/10/25 17:44:41 phk
+ * Nitpicking and dusting performed on a train. Removes trivial warnings
+ * about unused variables, labels and other lint.
+ *
+ * Revision 1.6 1998/09/28 20:52:58 rvb
+ * Cleanup and fix THE bug
+ *
+ * Revision 1.5 1998/09/25 17:38:32 rvb
+ * Put "stray" printouts under DIAGNOSTIC. Make everything build
+ * with DEBUG on. Add support for lkm. (The macro's don't work
+ * for me; for a good chuckle look at the end of coda_fbsd.c.)
+ *
+ * Revision 1.4 1998/09/13 13:57:59 rvb
+ * Finish conversion of cfs -> coda
+ *
+ * Revision 1.3 1998/09/11 18:50:17 rvb
+ * All the references to cfs, in symbols, structs, and strings
+ * have been changed to coda. (Same for CFS.)
+ *
+ * Revision 1.2 1998/09/02 19:09:53 rvb
+ * Pass2 complete
+ *
+ * Revision 1.1.1.1 1998/08/29 21:14:52 rvb
+ * Very Preliminary Coda
+ *
+ * Revision 1.12 1998/08/28 18:28:00 rvb
+ * NetBSD -current is stricter!
+ *
+ * Revision 1.11 1998/08/28 18:12:23 rvb
+ * Now it also works on FreeBSD -current. This code will be
+ * committed to the FreeBSD -current and NetBSD -current
+ * trees. It will then be tailored to the particular platform
+ * by flushing conditional code.
+ *
+ * Revision 1.10 1998/08/18 17:05:21 rvb
+ * Don't use __RCSID now
+ *
+ * Revision 1.9 1998/08/18 16:31:46 rvb
+ * Sync the code for NetBSD -current; test on 1.3 later
+ *
+ * Revision 1.8 98/02/24 22:22:50 rvb
+ * Fixes up mainly to flush iopen and friends
+ *
+ * Revision 1.7 98/01/31 20:53:15 rvb
+ * First version that works on FreeBSD 2.2.5
+ *
+ * Revision 1.6 98/01/23 11:53:47 rvb
+ * Bring RVB_CODA1_1 to HEAD
+ *
+ * Revision 1.5.2.8 98/01/23 11:21:11 rvb
+ * Sync with 2.2.5
+ *
+ * Revision 1.5.2.7 97/12/19 14:26:08 rvb
+ * session id
+ *
+ * Revision 1.5.2.6 97/12/16 22:01:34 rvb
+ * Oops add cfs_subr.h cfs_venus.h; sync with peter
+ *
+ * Revision 1.5.2.5 97/12/16 12:40:14 rvb
+ * Sync with 1.3
+ *
+ * Revision 1.5.2.4 97/12/10 14:08:31 rvb
+ * Fix O_ flags; check result in coda_call
+ *
+ * Revision 1.5.2.3 97/12/10 11:40:27 rvb
+ * No more ody
+ *
+ * Revision 1.5.2.2 97/12/09 16:07:15 rvb
+ * Sync with vfs/include/coda.h
+ *
+ * Revision 1.5.2.1 97/12/06 17:41:25 rvb
+ * Sync with peters coda.h
+ *
+ * Revision 1.5 97/12/05 10:39:23 rvb
+ * Read CHANGES
+ *
+ * Revision 1.4.14.10 97/11/25 08:08:48 rvb
+ * cfs_venus ... done; until cred/vattr change
+ *
+ * Revision 1.4.14.9 97/11/24 15:44:48 rvb
+ * Final cfs_venus.c w/o macros, but one locking bug
+ *
+ * Revision 1.4.14.8 97/11/21 11:28:04 rvb
+ * cfs_venus.c is done: first pass
+ *
+ * Revision 1.4.14.7 97/11/20 11:46:51 rvb
+ * Capture current cfs_venus
+ *
+ * Revision 1.4.14.6 97/11/18 10:27:19 rvb
+ * cfs_nbsd.c is DEAD!!!; integrated into cfs_vf/vnops.c
+ * cfs_nb_foo and cfs_foo are joined
+ *
+ * Revision 1.4.14.5 97/11/13 22:03:03 rvb
+ * pass2 cfs_NetBSD.h mt
+ *
+ * Revision 1.4.14.4 97/11/12 12:09:42 rvb
+ * reorg pass1
+ *
+ * Revision 1.4.14.3 97/11/06 21:03:28 rvb
+ * don't include headers in headers
+ *
+ * Revision 1.4.14.2 97/10/29 16:06:30 rvb
+ * Kill DYING
+ *
+ * Revision 1.4.14.1 1997/10/28 23:10:18 rvb
+ * >64Meg; venus can be killed!
+ *
+ * Revision 1.4 1997/02/20 13:54:50 lily
+ * check for NULL return from coda_nc_lookup before CTOV
+ *
+ * Revision 1.3 1996/12/12 22:11:02 bnoble
+ * Fixed the "downcall invokes venus operation" deadlock in all known cases.
+ * There may be more
+ *
+ * Revision 1.2 1996/01/02 16:57:07 bnoble
+ * Added support for Coda MiniCache and raw inode calls (final commit)
+ *
+ * Revision 1.1.2.1 1995/12/20 01:57:34 bnoble
+ * Added CODA-specific files
+ *
+ * Revision 3.1.1.1 1995/03/04 19:08:06 bnoble
+ * Branch for NetBSD port revisions
+ *
+ * Revision 3.1 1995/03/04 19:08:04 bnoble
+ * Bump to major revision 3 to prepare for NetBSD port
+ *
+ * Revision 2.6 1995/02/17 16:25:26 dcs
+ * These versions represent several changes:
+ * 1. Allow venus to restart even if outstanding references exist.
+ * 2. Have only one ctlvp per client, as opposed to one per mounted cfs device.d
+ * 3. Allow ody_expand to return many members, not just one.
+ *
+ * Revision 2.5 94/11/09 20:29:27 dcs
+ * Small bug in remove dealing with hard links and link counts was fixed.
+ *
+ * Revision 2.4 94/10/14 09:58:42 dcs
+ * Made changes 'cause sun4s have braindead compilers
+ *
+ * Revision 2.3 94/10/12 16:46:37 dcs
+ * Cleaned kernel/venus interface by removing XDR junk, plus
+ * so cleanup to allow this code to be more easily ported.
+ *
+ * Revision 2.2 94/09/20 14:12:41 dcs
+ * Fixed bug in rename when moving a directory.
+ *
+ * Revision 2.1 94/07/21 16:25:22 satya
+ * Conversion to C++ 3.0; start of Coda Release 2.0
+ *
+ * Revision 1.4 93/12/17 01:38:01 luqi
+ * Changes made for kernel to pass process info to Venus:
+ *
+ * (1) in file cfs.h
+ * add process id and process group id in most of the cfs argument types.
+ *
+ * (2) in file cfs_vnodeops.c
+ * add process info passing in most of the cfs vnode operations.
+ *
+ * (3) in file cfs_xdr.c
+ * expand xdr routines according changes in (1).
+ * add variable pass_process_info to allow venus for kernel version checking.
+ *
+ * Revision 1.3 93/05/28 16:24:33 bnoble
+ * *** empty log message ***
+ *
+ * Revision 1.2 92/10/27 17:58:25 lily
+ * merge kernel/latest and alpha/src/cfs
+ *
+ * Revision 2.4 92/09/30 14:16:37 mja
+ * Redid buffer allocation so that it does kmem_{alloc,free} for all
+ * architectures. Zone allocation, previously used on the 386, caused
+ * panics if it was invoked repeatedly. Stack allocation, previously
+ * used on all other architectures, tickled some Mach bug that appeared
+ * with large stack frames.
+ * [91/02/09 jjk]
+ *
+ * Added contributors blurb.
+ * [90/12/13 jjk]
+ *
+ * Revision 2.3 90/07/26 15:50:09 mrt
+ * Fixed fix to rename to remove .. from moved directories.
+ * [90/06/28 dcs]
+ *
+ * Revision 1.7 90/06/28 16:24:25 dcs
+ * Fixed bug with moving directories, we weren't flushing .. for the moved directory.
+ *
+ * Revision 1.6 90/05/31 17:01:47 dcs
+ * Prepare for merge with facilities kernel.
+ *
+ *
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/mount.h>
+#include <sys/errno.h>
+#include <sys/acct.h>
+#include <sys/file.h>
+#include <sys/fcntl.h>
+#include <sys/uio.h>
+#include <sys/namei.h>
+#include <sys/ioccom.h>
+#include <sys/select.h>
+
+#include <vm/vm.h>
+#include <vm/vm_object.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_zone.h>
+
+#include <coda/coda.h>
+#include <coda/cnode.h>
+#include <coda/coda_vnops.h>
+#include <coda/coda_venus.h>
+#include <coda/coda_opstats.h>
+#include <coda/coda_subr.h>
+#include <coda/coda_namecache.h>
+#include <coda/coda_pioctl.h>
+
+/*
+ * These flags select various performance enhancements.
+ */
+int coda_attr_cache = 1; /* Set to cache attributes in the kernel */
+int coda_symlink_cache = 1; /* Set to cache symbolic link information */
+int coda_access_cache = 1; /* Set to handle some access checks directly */
+
+/* structure to keep track of vfs calls */
+
+struct coda_op_stats coda_vnodeopstats[CODA_VNODEOPS_SIZE];
+
+#define MARK_ENTRY(op) (coda_vnodeopstats[op].entries++)
+#define MARK_INT_SAT(op) (coda_vnodeopstats[op].sat_intrn++)
+#define MARK_INT_FAIL(op) (coda_vnodeopstats[op].unsat_intrn++)
+#define MARK_INT_GEN(op) (coda_vnodeopstats[op].gen_intrn++)
+
+/* What we are delaying for in printf */
+int coda_printf_delay = 0; /* in microseconds */
+int coda_vnop_print_entry = 0;
+static int coda_lockdebug = 0;
+
+/* Definition of the vfs operation vector */
+
+/*
+ * Some NetBSD details:
+ *
+ * coda_start is called at the end of the mount syscall.
+ * coda_init is called at boot time.
+ */
+
+#define ENTRY if(coda_vnop_print_entry) myprintf(("Entered %s\n",__FUNCTION__))
+
+/* Definition of the vnode operation vector */
+
+struct vnodeopv_entry_desc coda_vnodeop_entries[] = {
+ { &vop_default_desc, coda_vop_error },
+ { &vop_lookup_desc, coda_lookup }, /* lookup */
+ { &vop_create_desc, coda_create }, /* create */
+ { &vop_mknod_desc, coda_vop_error }, /* mknod */
+ { &vop_open_desc, coda_open }, /* open */
+ { &vop_close_desc, coda_close }, /* close */
+ { &vop_access_desc, coda_access }, /* access */
+ { &vop_getattr_desc, coda_getattr }, /* getattr */
+ { &vop_setattr_desc, coda_setattr }, /* setattr */
+ { &vop_read_desc, coda_read }, /* read */
+ { &vop_write_desc, coda_write }, /* write */
+ { &vop_ioctl_desc, coda_ioctl }, /* ioctl */
+ { &vop_mmap_desc, coda_vop_error }, /* mmap */
+ { &vop_fsync_desc, coda_fsync }, /* fsync */
+ { &vop_remove_desc, coda_remove }, /* remove */
+ { &vop_link_desc, coda_link }, /* link */
+ { &vop_rename_desc, coda_rename }, /* rename */
+ { &vop_mkdir_desc, coda_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, coda_rmdir }, /* rmdir */
+ { &vop_symlink_desc, coda_symlink }, /* symlink */
+ { &vop_readdir_desc, coda_readdir }, /* readdir */
+ { &vop_readlink_desc, coda_readlink }, /* readlink */
+ { &vop_abortop_desc, coda_abortop }, /* abortop */
+ { &vop_inactive_desc, coda_inactive }, /* inactive */
+ { &vop_reclaim_desc, coda_reclaim }, /* reclaim */
+ { &vop_lock_desc, coda_lock }, /* lock */
+ { &vop_unlock_desc, coda_unlock }, /* unlock */
+ { &vop_bmap_desc, coda_bmap }, /* bmap */
+ { &vop_strategy_desc, coda_strategy }, /* strategy */
+ { &vop_print_desc, coda_vop_error }, /* print */
+ { &vop_islocked_desc, coda_islocked }, /* islocked */
+ { &vop_pathconf_desc, coda_vop_error }, /* pathconf */
+ { &vop_advlock_desc, coda_vop_nop }, /* advlock */
+ { &vop_bwrite_desc, coda_vop_error }, /* bwrite */
+ { &vop_lease_desc, coda_vop_nop }, /* lease */
+ { &vop_poll_desc, (vop_t *) vop_stdpoll },
+ { &vop_getpages_desc, coda_fbsd_getpages }, /* pager intf.*/
+ { &vop_putpages_desc, coda_fbsd_putpages }, /* pager intf.*/
+
+#if 0
+
+ we need to define these someday
+#define UFS_BLKATOFF(aa, bb, cc, dd) VFSTOUFS((aa)->v_mount)->um_blkatoff(aa, bb, cc, dd)
+#define UFS_VALLOC(aa, bb, cc, dd) VFSTOUFS((aa)->v_mount)->um_valloc(aa, bb, cc, dd)
+#define UFS_VFREE(aa, bb, cc) VFSTOUFS((aa)->v_mount)->um_vfree(aa, bb, cc)
+#define UFS_TRUNCATE(aa, bb, cc, dd, ee) VFSTOUFS((aa)->v_mount)->um_truncate(aa, bb, cc, dd, ee)
+#define UFS_UPDATE(aa, bb) VFSTOUFS((aa)->v_mount)->um_update(aa, bb)
+
+ missing
+ { &vop_reallocblks_desc, (vop_t *) ufs_missingop },
+ { &vop_cachedlookup_desc, (vop_t *) ufs_lookup },
+ { &vop_whiteout_desc, (vop_t *) ufs_whiteout },
+#endif
+ { (struct vnodeop_desc*)NULL, (int(*)(void *))NULL }
+};
+
+static struct vnodeopv_desc coda_vnodeop_opv_desc =
+ { &coda_vnodeop_p, coda_vnodeop_entries };
+
+VNODEOP_SET(coda_vnodeop_opv_desc);
+
+/* A generic panic: we were called with something we didn't define yet */
+int
+coda_vop_error(void *anon) {
+ struct vnodeop_desc **desc = (struct vnodeop_desc **)anon;
+
+ myprintf(("coda_vop_error: Vnode operation %s called, but not defined.\n",
+ (*desc)->vdesc_name));
+ /*
+ panic("coda_vop_error");
+ */
+ return EIO;
+}
+
+/* A generic do-nothing. For lease_check, advlock */
+int
+coda_vop_nop(void *anon) {
+ struct vnodeop_desc **desc = (struct vnodeop_desc **)anon;
+
+ if (codadebug) {
+ myprintf(("Vnode operation %s called, but unsupported\n",
+ (*desc)->vdesc_name));
+ }
+ return (0);
+}
+
+int
+coda_vnodeopstats_init(void)
+{
+ register int i;
+
+ for(i=0;i<CODA_VNODEOPS_SIZE;i++) {
+ coda_vnodeopstats[i].opcode = i;
+ coda_vnodeopstats[i].entries = 0;
+ coda_vnodeopstats[i].sat_intrn = 0;
+ coda_vnodeopstats[i].unsat_intrn = 0;
+ coda_vnodeopstats[i].gen_intrn = 0;
+ }
+ return 0;
+}
+
+/*
+ * coda_open calls Venus to return the device, inode pair of the cache
+ * file holding the data. Using iget, coda_open finds the vnode of the
+ * cache file, and then opens it.
+ */
+int
+coda_open(v)
+ void *v;
+{
+ /*
+ * NetBSD can pass the O_EXCL flag in mode, even though the check
+ * has already happened. Venus defensively assumes that if open
+ * is passed the EXCL, it must be a bug. We strip the flag here.
+ */
+/* true args */
+ struct vop_open_args *ap = v;
+ register struct vnode **vpp = &(ap->a_vp);
+ struct cnode *cp = VTOC(*vpp);
+ int flag = ap->a_mode & (~O_EXCL);
+ struct ucred *cred = ap->a_cred;
+ struct proc *p = ap->a_p;
+/* locals */
+ int error;
+ struct vnode *vp;
+ dev_t dev;
+ ino_t inode;
+
+ MARK_ENTRY(CODA_OPEN_STATS);
+
+ /* Check for open of control file. */
+ if (IS_CTL_VP(*vpp)) {
+ /* XXX */
+ /* if (WRITEABLE(flag)) */
+ if (flag & (FWRITE | O_TRUNC | O_CREAT | O_EXCL)) {
+ MARK_INT_FAIL(CODA_OPEN_STATS);
+ return(EACCES);
+ }
+ MARK_INT_SAT(CODA_OPEN_STATS);
+ return(0);
+ }
+
+ error = venus_open(vtomi((*vpp)), &cp->c_fid, flag, cred, p, &dev, &inode);
+ if (error)
+ return (error);
+ if (!error) {
+ CODADEBUG( CODA_OPEN,myprintf(("open: dev %d inode %d result %d\n",
+ dev, inode, error)); )
+ }
+
+ /* Translate the <device, inode> pair for the cache file into
+ an inode pointer. */
+ error = coda_grab_vnode(dev, inode, &vp);
+ if (error)
+ return (error);
+
+ /* We get the vnode back locked. Needs unlocked */
+ VOP_UNLOCK(vp, 0, p);
+ /* Keep a reference until the close comes in. */
+ vref(*vpp);
+
+ /* Save the vnode pointer for the cache file. */
+ if (cp->c_ovp == NULL) {
+ cp->c_ovp = vp;
+ } else {
+ if (cp->c_ovp != vp)
+ panic("coda_open: cp->c_ovp != ITOV(ip)");
+ }
+ cp->c_ocount++;
+
+ /* Flush the attribute cached if writing the file. */
+ if (flag & FWRITE) {
+ cp->c_owrite++;
+ cp->c_flags &= ~C_VATTR;
+ }
+
+ /* Save the <device, inode> pair for the cache file to speed
+ up subsequent page_read's. */
+ cp->c_device = dev;
+ cp->c_inode = inode;
+
+ /* Open the cache file. */
+ error = VOP_OPEN(vp, flag, cred, p);
+ if (error) {
+ printf("coda_open: VOP_OPEN on container failed %d\n", error);
+ return (error);
+ }
+/* grab (above) does this when it calls newvnode unless it's in the cache*/
+ if (vp->v_type == VREG) {
+ error = vfs_object_create(vp, p, cred);
+ if (error != 0) {
+ printf("coda_open: vfs_object_create() returns %d\n", error);
+ vput(vp);
+ }
+ }
+
+ return(error);
+}
+
+/*
+ * Close the cache file used for I/O and notify Venus.
+ */
+int
+coda_close(v)
+ void *v;
+{
+/* true args */
+ struct vop_close_args *ap = v;
+ struct vnode *vp = ap->a_vp;
+ struct cnode *cp = VTOC(vp);
+ int flag = ap->a_fflag;
+ struct ucred *cred = ap->a_cred;
+ struct proc *p = ap->a_p;
+/* locals */
+ int error;
+
+ MARK_ENTRY(CODA_CLOSE_STATS);
+
+ /* Check for close of control file. */
+ if (IS_CTL_VP(vp)) {
+ MARK_INT_SAT(CODA_CLOSE_STATS);
+ return(0);
+ }
+
+ if (IS_UNMOUNTING(cp)) {
+ if (cp->c_ovp) {
+#ifdef CODA_VERBOSE
+ printf("coda_close: destroying container ref %d, ufs vp %p of vp %p/cp %p\n",
+ vp->v_usecount, cp->c_ovp, vp, cp);
+#endif
+#ifdef hmm
+ vgone(cp->c_ovp);
+#else
+ VOP_CLOSE(cp->c_ovp, flag, cred, p); /* Do errors matter here? */
+ vrele(cp->c_ovp);
+#endif
+ } else {
+#ifdef CODA_VERBOSE
+ printf("coda_close: NO container vp %p/cp %p\n", vp, cp);
+#endif
+ }
+ return ENODEV;
+ } else {
+ VOP_CLOSE(cp->c_ovp, flag, cred, p); /* Do errors matter here? */
+ vrele(cp->c_ovp);
+ }
+
+ if (--cp->c_ocount == 0)
+ cp->c_ovp = NULL;
+
+ if (flag & FWRITE) /* file was opened for write */
+ --cp->c_owrite;
+
+ error = venus_close(vtomi(vp), &cp->c_fid, flag, cred, p);
+ vrele(CTOV(cp));
+
+ CODADEBUG(CODA_CLOSE, myprintf(("close: result %d\n",error)); )
+ return(error);
+}
+
+int
+coda_read(v)
+ void *v;
+{
+ struct vop_read_args *ap = v;
+
+ ENTRY;
+ return(coda_rdwr(ap->a_vp, ap->a_uio, UIO_READ,
+ ap->a_ioflag, ap->a_cred, ap->a_uio->uio_procp));
+}
+
+int
+coda_write(v)
+ void *v;
+{
+ struct vop_write_args *ap = v;
+
+ ENTRY;
+ return(coda_rdwr(ap->a_vp, ap->a_uio, UIO_WRITE,
+ ap->a_ioflag, ap->a_cred, ap->a_uio->uio_procp));
+}
+
+int
+coda_rdwr(vp, uiop, rw, ioflag, cred, p)
+ struct vnode *vp;
+ struct uio *uiop;
+ enum uio_rw rw;
+ int ioflag;
+ struct ucred *cred;
+ struct proc *p;
+{
+/* upcall decl */
+ /* NOTE: container file operation!!! */
+/* locals */
+ struct cnode *cp = VTOC(vp);
+ struct vnode *cfvp = cp->c_ovp;
+ int igot_internally = 0;
+ int opened_internally = 0;
+ int error = 0;
+
+ MARK_ENTRY(CODA_RDWR_STATS);
+
+ CODADEBUG(CODA_RDWR, myprintf(("coda_rdwr(%d, %p, %d, %qd, %d)\n", rw,
+ uiop->uio_iov->iov_base, uiop->uio_resid,
+ uiop->uio_offset, uiop->uio_segflg)); )
+
+ /* Check for rdwr of control object. */
+ if (IS_CTL_VP(vp)) {
+ MARK_INT_FAIL(CODA_RDWR_STATS);
+ return(EINVAL);
+ }
+
+ /*
+ * If file is not already open this must be a page
+ * {read,write} request. Iget the cache file's inode
+ * pointer if we still have its <device, inode> pair.
+ * Otherwise, we must do an internal open to derive the
+ * pair.
+ */
+ if (cfvp == NULL) {
+ /*
+ * If we're dumping core, do the internal open. Otherwise
+ * venus won't have the correct size of the core when
+ * it's completely written.
+ */
+ if (cp->c_inode != 0 && !(p && (p->p_acflag & ACORE))) {
+ igot_internally = 1;
+ error = coda_grab_vnode(cp->c_device, cp->c_inode, &cfvp);
+ if (error) {
+ MARK_INT_FAIL(CODA_RDWR_STATS);
+ return(error);
+ }
+ /*
+ * We get the vnode back locked in both Mach and
+ * NetBSD. Needs unlocked
+ */
+ VOP_UNLOCK(cfvp, 0, p);
+ }
+ else {
+ opened_internally = 1;
+ MARK_INT_GEN(CODA_OPEN_STATS);
+ error = VOP_OPEN(vp, (rw == UIO_READ ? FREAD : FWRITE),
+ cred, p);
+printf("coda_rdwr: Internally Opening %p\n", vp);
+ if (error) {
+ printf("coda_rdwr: VOP_OPEN on container failed %d\n", error);
+ return (error);
+ }
+ if (vp->v_type == VREG) {
+ error = vfs_object_create(vp, p, cred);
+ if (error != 0) {
+ printf("coda_rdwr: vfs_object_create() returns %d\n", error);
+ vput(vp);
+ }
+ }
+ if (error) {
+ MARK_INT_FAIL(CODA_RDWR_STATS);
+ return(error);
+ }
+ cfvp = cp->c_ovp;
+ }
+ }
+
+ /* Have UFS handle the call. */
+ CODADEBUG(CODA_RDWR, myprintf(("indirect rdwr: fid = (%lx.%lx.%lx), refcnt = %d\n",
+ cp->c_fid.Volume, cp->c_fid.Vnode,
+ cp->c_fid.Unique, CTOV(cp)->v_usecount)); )
+
+
+ if (rw == UIO_READ) {
+ error = VOP_READ(cfvp, uiop, ioflag, cred);
+ } else {
+ error = VOP_WRITE(cfvp, uiop, ioflag, cred);
+ /* ufs_write updates the vnode_pager_setsize for the vnode/object */
+
+ { struct vattr attr;
+
+ if (VOP_GETATTR(cfvp, &attr, cred, p) == 0) {
+ vnode_pager_setsize(vp, attr.va_size);
+ }
+ }
+ }
+
+ if (error)
+ MARK_INT_FAIL(CODA_RDWR_STATS);
+ else
+ MARK_INT_SAT(CODA_RDWR_STATS);
+
+ /* Do an internal close if necessary. */
+ if (opened_internally) {
+ MARK_INT_GEN(CODA_CLOSE_STATS);
+ (void)VOP_CLOSE(vp, (rw == UIO_READ ? FREAD : FWRITE), cred, p);
+ }
+
+ /* Invalidate cached attributes if writing. */
+ if (rw == UIO_WRITE)
+ cp->c_flags &= ~C_VATTR;
+ return(error);
+}
+
+int
+coda_ioctl(v)
+ void *v;
+{
+/* true args */
+ struct vop_ioctl_args *ap = v;
+ struct vnode *vp = ap->a_vp;
+ int com = ap->a_command;
+ caddr_t data = ap->a_data;
+ int flag = ap->a_fflag;
+ struct ucred *cred = ap->a_cred;
+ struct proc *p = ap->a_p;
+/* locals */
+ int error;
+ struct vnode *tvp;
+ struct nameidata ndp;
+ struct PioctlData *iap = (struct PioctlData *)data;
+
+ MARK_ENTRY(CODA_IOCTL_STATS);
+
+ CODADEBUG(CODA_IOCTL, myprintf(("in coda_ioctl on %s\n", iap->path));)
+
+ /* Don't check for operation on a dying object, for ctlvp it
+ shouldn't matter */
+
+ /* Must be control object to succeed. */
+ if (!IS_CTL_VP(vp)) {
+ MARK_INT_FAIL(CODA_IOCTL_STATS);
+ CODADEBUG(CODA_IOCTL, myprintf(("coda_ioctl error: vp != ctlvp"));)
+ return (EOPNOTSUPP);
+ }
+ /* Look up the pathname. */
+
+ /* Should we use the name cache here? It would get it from
+ lookupname sooner or later anyway, right? */
+
+ NDINIT(&ndp, LOOKUP, (iap->follow ? FOLLOW : NOFOLLOW), UIO_USERSPACE, iap->path, p);
+ error = namei(&ndp);
+ tvp = ndp.ni_vp;
+
+ if (error) {
+ MARK_INT_FAIL(CODA_IOCTL_STATS);
+ CODADEBUG(CODA_IOCTL, myprintf(("coda_ioctl error: lookup returns %d\n",
+ error));)
+ return(error);
+ }
+
+ /*
+ * Make sure this is a coda style cnode, but it may be a
+ * different vfsp
+ */
+ /* XXX: this totally violates the comment about vtagtype in vnode.h */
+ if (tvp->v_tag != VT_CODA) {
+ vrele(tvp);
+ MARK_INT_FAIL(CODA_IOCTL_STATS);
+ CODADEBUG(CODA_IOCTL,
+ myprintf(("coda_ioctl error: %s not a coda object\n",
+ iap->path));)
+ return(EINVAL);
+ }
+
+ if (iap->vi.in_size > VC_MAXDATASIZE) {
+ vrele(tvp);
+ return(EINVAL);
+ }
+ error = venus_ioctl(vtomi(tvp), &((VTOC(tvp))->c_fid), com, flag, data, cred, p);
+
+ if (error)
+ MARK_INT_FAIL(CODA_IOCTL_STATS);
+ else
+ CODADEBUG(CODA_IOCTL, myprintf(("Ioctl returns %d \n", error)); )
+
+ vrele(tvp);
+ return(error);
+}
+
+/*
+ * To reduce the cost of a user-level venus;we cache attributes in
+ * the kernel. Each cnode has storage allocated for an attribute. If
+ * c_vattr is valid, return a reference to it. Otherwise, get the
+ * attributes from venus and store them in the cnode. There is some
+ * question if this method is a security leak. But I think that in
+ * order to make this call, the user must have done a lookup and
+ * opened the file, and therefore should already have access.
+ */
+int
+coda_getattr(v)
+ void *v;
+{
+/* true args */
+ struct vop_getattr_args *ap = v;
+ struct vnode *vp = ap->a_vp;
+ struct cnode *cp = VTOC(vp);
+ struct vattr *vap = ap->a_vap;
+ struct ucred *cred = ap->a_cred;
+ struct proc *p = ap->a_p;
+/* locals */
+ int error;
+
+ MARK_ENTRY(CODA_GETATTR_STATS);
+
+ if (IS_UNMOUNTING(cp))
+ return ENODEV;
+
+ /* Check for getattr of control object. */
+ if (IS_CTL_VP(vp)) {
+ MARK_INT_FAIL(CODA_GETATTR_STATS);
+ return(ENOENT);
+ }
+
+ /* Check to see if the attributes have already been cached */
+ if (VALID_VATTR(cp)) {
+ CODADEBUG(CODA_GETATTR, { myprintf(("attr cache hit: (%lx.%lx.%lx)\n",
+ cp->c_fid.Volume,
+ cp->c_fid.Vnode,
+ cp->c_fid.Unique));});
+ CODADEBUG(CODA_GETATTR, if (!(codadebug & ~CODA_GETATTR))
+ print_vattr(&cp->c_vattr); );
+
+ *vap = cp->c_vattr;
+ MARK_INT_SAT(CODA_GETATTR_STATS);
+ return(0);
+ }
+
+ error = venus_getattr(vtomi(vp), &cp->c_fid, cred, p, vap);
+
+ if (!error) {
+ CODADEBUG(CODA_GETATTR, myprintf(("getattr miss (%lx.%lx.%lx): result %d\n",
+ cp->c_fid.Volume,
+ cp->c_fid.Vnode,
+ cp->c_fid.Unique,
+ error)); )
+
+ CODADEBUG(CODA_GETATTR, if (!(codadebug & ~CODA_GETATTR))
+ print_vattr(vap); );
+
+ { int size = vap->va_size;
+ struct vnode *convp = cp->c_ovp;
+ if (convp != (struct vnode *)0) {
+ vnode_pager_setsize(convp, size);
+ }
+ }
+ /* If not open for write, store attributes in cnode */
+ if ((cp->c_owrite == 0) && (coda_attr_cache)) {
+ cp->c_vattr = *vap;
+ cp->c_flags |= C_VATTR;
+ }
+
+ }
+ return(error);
+}
+
+int
+coda_setattr(v)
+ void *v;
+{
+/* true args */
+ struct vop_setattr_args *ap = v;
+ register struct vnode *vp = ap->a_vp;
+ struct cnode *cp = VTOC(vp);
+ register struct vattr *vap = ap->a_vap;
+ struct ucred *cred = ap->a_cred;
+ struct proc *p = ap->a_p;
+/* locals */
+ int error;
+
+ MARK_ENTRY(CODA_SETATTR_STATS);
+
+ /* Check for setattr of control object. */
+ if (IS_CTL_VP(vp)) {
+ MARK_INT_FAIL(CODA_SETATTR_STATS);
+ return(ENOENT);
+ }
+
+ if (codadebug & CODADBGMSK(CODA_SETATTR)) {
+ print_vattr(vap);
+ }
+ error = venus_setattr(vtomi(vp), &cp->c_fid, vap, cred, p);
+
+ if (!error)
+ cp->c_flags &= ~C_VATTR;
+
+ { int size = vap->va_size;
+ struct vnode *convp = cp->c_ovp;
+ if (size != VNOVAL && convp != (struct vnode *)0) {
+ vnode_pager_setsize(convp, size);
+ }
+ }
+ CODADEBUG(CODA_SETATTR, myprintf(("setattr %d\n", error)); )
+ return(error);
+}
+
+int
+coda_access(v)
+ void *v;
+{
+/* true args */
+ struct vop_access_args *ap = v;
+ struct vnode *vp = ap->a_vp;
+ struct cnode *cp = VTOC(vp);
+ int mode = ap->a_mode;
+ struct ucred *cred = ap->a_cred;
+ struct proc *p = ap->a_p;
+/* locals */
+ int error;
+
+ MARK_ENTRY(CODA_ACCESS_STATS);
+
+ /* Check for access of control object. Only read access is
+ allowed on it. */
+ if (IS_CTL_VP(vp)) {
+ /* bogus hack - all will be marked as successes */
+ MARK_INT_SAT(CODA_ACCESS_STATS);
+ return(((mode & VREAD) && !(mode & (VWRITE | VEXEC)))
+ ? 0 : EACCES);
+ }
+
+ /*
+ * if the file is a directory, and we are checking exec (eg lookup)
+ * access, and the file is in the namecache, then the user must have
+ * lookup access to it.
+ */
+ if (coda_access_cache) {
+ if ((vp->v_type == VDIR) && (mode & VEXEC)) {
+ if (coda_nc_lookup(cp, ".", 1, cred)) {
+ MARK_INT_SAT(CODA_ACCESS_STATS);
+ return(0); /* it was in the cache */
+ }
+ }
+ }
+
+ error = venus_access(vtomi(vp), &cp->c_fid, mode, cred, p);
+
+ return(error);
+}
+
+/*
+ * CODA abort op, called after namei() when a CREATE/DELETE isn't actually
+ * done. If a buffer has been saved in anticipation of a coda_create or
+ * a coda_remove, delete it.
+ */
+/* ARGSUSED */
+int
+coda_abortop(v)
+ void *v;
+{
+/* true args */
+ struct vop_abortop_args /* {
+ struct vnode *a_dvp;
+ struct componentname *a_cnp;
+ } */ *ap = v;
+/* upcall decl */
+/* locals */
+
+ if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
+ zfree(namei_zone, ap->a_cnp->cn_pnbuf);
+ return (0);
+}
+
+int
+coda_readlink(v)
+ void *v;
+{
+/* true args */
+ struct vop_readlink_args *ap = v;
+ struct vnode *vp = ap->a_vp;
+ struct cnode *cp = VTOC(vp);
+ struct uio *uiop = ap->a_uio;
+ struct ucred *cred = ap->a_cred;
+ struct proc *p = ap->a_uio->uio_procp;
+/* locals */
+ int error;
+ char *str;
+ int len;
+
+ MARK_ENTRY(CODA_READLINK_STATS);
+
+ /* Check for readlink of control object. */
+ if (IS_CTL_VP(vp)) {
+ MARK_INT_FAIL(CODA_READLINK_STATS);
+ return(ENOENT);
+ }
+
+ if ((coda_symlink_cache) && (VALID_SYMLINK(cp))) { /* symlink was cached */
+ uiop->uio_rw = UIO_READ;
+ error = uiomove(cp->c_symlink, (int)cp->c_symlen, uiop);
+ if (error)
+ MARK_INT_FAIL(CODA_READLINK_STATS);
+ else
+ MARK_INT_SAT(CODA_READLINK_STATS);
+ return(error);
+ }
+
+ error = venus_readlink(vtomi(vp), &cp->c_fid, cred, p, &str, &len);
+
+ if (!error) {
+ uiop->uio_rw = UIO_READ;
+ error = uiomove(str, len, uiop);
+
+ if (coda_symlink_cache) {
+ cp->c_symlink = str;
+ cp->c_symlen = len;
+ cp->c_flags |= C_SYMLINK;
+ } else
+ CODA_FREE(str, len);
+ }
+
+ CODADEBUG(CODA_READLINK, myprintf(("in readlink result %d\n",error));)
+ return(error);
+}
+
+int
+coda_fsync(v)
+ void *v;
+{
+/* true args */
+ struct vop_fsync_args *ap = v;
+ struct vnode *vp = ap->a_vp;
+ struct cnode *cp = VTOC(vp);
+ struct ucred *cred = ap->a_cred;
+ struct proc *p = ap->a_p;
+/* locals */
+ struct vnode *convp = cp->c_ovp;
+ int error;
+
+ MARK_ENTRY(CODA_FSYNC_STATS);
+
+ /* Check for fsync on an unmounting object */
+ /* The NetBSD kernel, in it's infinite wisdom, can try to fsync
+ * after an unmount has been initiated. This is a Bad Thing,
+ * which we have to avoid. Not a legitimate failure for stats.
+ */
+ if (IS_UNMOUNTING(cp)) {
+ return(ENODEV);
+ }
+
+ /* Check for fsync of control object. */
+ if (IS_CTL_VP(vp)) {
+ MARK_INT_SAT(CODA_FSYNC_STATS);
+ return(0);
+ }
+
+ if (convp)
+ VOP_FSYNC(convp, cred, MNT_WAIT, p);
+
+ /*
+ * We see fsyncs with usecount == 1 then usecount == 0.
+ * For now we ignore them.
+ */
+ /*
+ if (!vp->v_usecount) {
+ printf("coda_fsync on vnode %p with %d usecount. c_flags = %x (%x)\n",
+ vp, vp->v_usecount, cp->c_flags, cp->c_flags&C_PURGING);
+ }
+ */
+
+ /*
+ * We can expect fsync on any vnode at all if venus is pruging it.
+ * Venus can't very well answer the fsync request, now can it?
+ * Hopefully, it won't have to, because hopefully, venus preserves
+ * the (possibly untrue) invariant that it never purges an open
+ * vnode. Hopefully.
+ */
+ if (cp->c_flags & C_PURGING) {
+ return(0);
+ }
+
+ /* needs research */
+ return 0;
+ error = venus_fsync(vtomi(vp), &cp->c_fid, cred, p);
+
+ CODADEBUG(CODA_FSYNC, myprintf(("in fsync result %d\n",error)); );
+ return(error);
+}
+
+int
+coda_inactive(v)
+ void *v;
+{
+ /* XXX - at the moment, inactive doesn't look at cred, and doesn't
+ have a proc pointer. Oops. */
+/* true args */
+ struct vop_inactive_args *ap = v;
+ struct vnode *vp = ap->a_vp;
+ struct cnode *cp = VTOC(vp);
+ struct ucred *cred __attribute__((unused)) = NULL;
+ struct proc *p __attribute__((unused)) = curproc;
+/* upcall decl */
+/* locals */
+
+ /* We don't need to send inactive to venus - DCS */
+ MARK_ENTRY(CODA_INACTIVE_STATS);
+
+ if (IS_CTL_VP(vp)) {
+ MARK_INT_SAT(CODA_INACTIVE_STATS);
+ return 0;
+ }
+
+ CODADEBUG(CODA_INACTIVE, myprintf(("in inactive, %lx.%lx.%lx. vfsp %p\n",
+ cp->c_fid.Volume, cp->c_fid.Vnode,
+ cp->c_fid.Unique, vp->v_mount));)
+
+ /* If an array has been allocated to hold the symlink, deallocate it */
+ if ((coda_symlink_cache) && (VALID_SYMLINK(cp))) {
+ if (cp->c_symlink == NULL)
+ panic("coda_inactive: null symlink pointer in cnode");
+
+ CODA_FREE(cp->c_symlink, cp->c_symlen);
+ cp->c_flags &= ~C_SYMLINK;
+ cp->c_symlen = 0;
+ }
+
+ /* Remove it from the table so it can't be found. */
+ coda_unsave(cp);
+ if ((struct coda_mntinfo *)(vp->v_mount->mnt_data) == NULL) {
+ myprintf(("Help! vfsp->vfs_data was NULL, but vnode %p wasn't dying\n", vp));
+ panic("badness in coda_inactive\n");
+ }
+
+ if (IS_UNMOUNTING(cp)) {
+#ifdef DEBUG
+ printf("coda_inactive: IS_UNMOUNTING use %d: vp %p, cp %p\n", vp->v_usecount, vp, cp);
+ if (cp->c_ovp != NULL)
+ printf("coda_inactive: cp->ovp != NULL use %d: vp %p, cp %p\n",
+ vp->v_usecount, vp, cp);
+#endif
+ lockmgr(&cp->c_lock, LK_RELEASE, &vp->v_interlock, p);
+ } else {
+#ifdef OLD_DIAGNOSTIC
+ if (CTOV(cp)->v_usecount) {
+ panic("coda_inactive: nonzero reference count");
+ }
+ if (cp->c_ovp != NULL) {
+ panic("coda_inactive: cp->ovp != NULL");
+ }
+#endif
+ VOP_UNLOCK(vp, 0, p);
+ vgone(vp);
+ }
+
+ MARK_INT_SAT(CODA_INACTIVE_STATS);
+ return(0);
+}
+
+/*
+ * Remote file system operations having to do with directory manipulation.
+ */
+
+/*
+ * It appears that in NetBSD, lookup is supposed to return the vnode locked
+ */
+int
+coda_lookup(v)
+ void *v;
+{
+/* true args */
+ struct vop_lookup_args *ap = v;
+ struct vnode *dvp = ap->a_dvp;
+ struct cnode *dcp = VTOC(dvp);
+ struct vnode **vpp = ap->a_vpp;
+ /*
+ * It looks as though ap->a_cnp->ni_cnd->cn_nameptr holds the rest
+ * of the string to xlate, and that we must try to get at least
+ * ap->a_cnp->ni_cnd->cn_namelen of those characters to macth. I
+ * could be wrong.
+ */
+ struct componentname *cnp = ap->a_cnp;
+ struct ucred *cred = cnp->cn_cred;
+ struct proc *p = cnp->cn_proc;
+/* locals */
+ struct cnode *cp;
+ const char *nm = cnp->cn_nameptr;
+ int len = cnp->cn_namelen;
+ ViceFid VFid;
+ int vtype;
+ int error = 0;
+
+ MARK_ENTRY(CODA_LOOKUP_STATS);
+
+ CODADEBUG(CODA_LOOKUP, myprintf(("lookup: %s in %lx.%lx.%lx\n",
+ nm, dcp->c_fid.Volume,
+ dcp->c_fid.Vnode, dcp->c_fid.Unique)););
+
+ /* Check for lookup of control object. */
+ if (IS_CTL_NAME(dvp, nm, len)) {
+ *vpp = coda_ctlvp;
+ vref(*vpp);
+ MARK_INT_SAT(CODA_LOOKUP_STATS);
+ goto exit;
+ }
+
+ if (len+1 > CODA_MAXNAMLEN) {
+ MARK_INT_FAIL(CODA_LOOKUP_STATS);
+ CODADEBUG(CODA_LOOKUP, myprintf(("name too long: lookup, %lx.%lx.%lx(%s)\n",
+ dcp->c_fid.Volume, dcp->c_fid.Vnode,
+ dcp->c_fid.Unique, nm)););
+ *vpp = (struct vnode *)0;
+ error = EINVAL;
+ goto exit;
+ }
+ /* First try to look the file up in the cfs name cache */
+ /* lock the parent vnode? */
+ cp = coda_nc_lookup(dcp, nm, len, cred);
+ if (cp) {
+ *vpp = CTOV(cp);
+ vref(*vpp);
+ CODADEBUG(CODA_LOOKUP,
+ myprintf(("lookup result %d vpp %p\n",error,*vpp));)
+ } else {
+
+ /* The name wasn't cached, so we need to contact Venus */
+ error = venus_lookup(vtomi(dvp), &dcp->c_fid, nm, len, cred, p, &VFid, &vtype);
+
+ if (error) {
+ MARK_INT_FAIL(CODA_LOOKUP_STATS);
+ CODADEBUG(CODA_LOOKUP, myprintf(("lookup error on %lx.%lx.%lx(%s)%d\n",
+ dcp->c_fid.Volume, dcp->c_fid.Vnode, dcp->c_fid.Unique, nm, error));)
+ *vpp = (struct vnode *)0;
+ } else {
+ MARK_INT_SAT(CODA_LOOKUP_STATS);
+ CODADEBUG(CODA_LOOKUP,
+ myprintf(("lookup: vol %lx vno %lx uni %lx type %o result %d\n",
+ VFid.Volume, VFid.Vnode, VFid.Unique, vtype,
+ error)); )
+
+ cp = make_coda_node(&VFid, dvp->v_mount, vtype);
+ *vpp = CTOV(cp);
+
+ /* enter the new vnode in the Name Cache only if the top bit isn't set */
+ /* And don't enter a new vnode for an invalid one! */
+ if (!(vtype & CODA_NOCACHE))
+ coda_nc_enter(VTOC(dvp), nm, len, cred, VTOC(*vpp));
+ }
+ }
+
+ exit:
+ /*
+ * If we are creating, and this was the last name to be looked up,
+ * and the error was ENOENT, then there really shouldn't be an
+ * error and we can make the leaf NULL and return success. Since
+ * this is supposed to work under Mach as well as NetBSD, we're
+ * leaving this fn wrapped. We also must tell lookup/namei that
+ * we need to save the last component of the name. (Create will
+ * have to free the name buffer later...lucky us...)
+ */
+ if (((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME))
+ && (cnp->cn_flags & ISLASTCN)
+ && (error == ENOENT))
+ {
+ error = EJUSTRETURN;
+ cnp->cn_flags |= SAVENAME;
+ *ap->a_vpp = NULL;
+ }
+
+ /*
+ * If we are removing, and we are at the last element, and we
+ * found it, then we need to keep the name around so that the
+ * removal will go ahead as planned. Unfortunately, this will
+ * probably also lock the to-be-removed vnode, which may or may
+ * not be a good idea. I'll have to look at the bits of
+ * coda_remove to make sure. We'll only save the name if we did in
+ * fact find the name, otherwise coda_remove won't have a chance
+ * to free the pathname.
+ */
+ if ((cnp->cn_nameiop == DELETE)
+ && (cnp->cn_flags & ISLASTCN)
+ && !error)
+ {
+ cnp->cn_flags |= SAVENAME;
+ }
+
+ /*
+ * If the lookup went well, we need to (potentially?) unlock the
+ * parent, and lock the child. We are only responsible for
+ * checking to see if the parent is supposed to be unlocked before
+ * we return. We must always lock the child (provided there is
+ * one, and (the parent isn't locked or it isn't the same as the
+ * parent.) Simple, huh? We can never leave the parent locked unless
+ * we are ISLASTCN
+ */
+ if (!error || (error == EJUSTRETURN)) {
+ if (!(cnp->cn_flags & LOCKPARENT) || !(cnp->cn_flags & ISLASTCN)) {
+ if ((error = VOP_UNLOCK(dvp, 0, p))) {
+ return error;
+ }
+ /*
+ * The parent is unlocked. As long as there is a child,
+ * lock it without bothering to check anything else.
+ */
+ if (*ap->a_vpp) {
+ if ((error = VOP_LOCK(*ap->a_vpp, LK_EXCLUSIVE, p))) {
+ printf("coda_lookup: ");
+ panic("unlocked parent but couldn't lock child");
+ }
+ }
+ } else {
+ /* The parent is locked, and may be the same as the child */
+ if (*ap->a_vpp && (*ap->a_vpp != dvp)) {
+ /* Different, go ahead and lock it. */
+ if ((error = VOP_LOCK(*ap->a_vpp, LK_EXCLUSIVE, p))) {
+ printf("coda_lookup: ");
+ panic("unlocked parent but couldn't lock child");
+ }
+ }
+ }
+ } else {
+ /* If the lookup failed, we need to ensure that the leaf is NULL */
+ /* Don't change any locking? */
+ *ap->a_vpp = NULL;
+ }
+ return(error);
+}
+
+/*ARGSUSED*/
+int
+coda_create(v)
+ void *v;
+{
+/* true args */
+ struct vop_create_args *ap = v;
+ struct vnode *dvp = ap->a_dvp;
+ struct cnode *dcp = VTOC(dvp);
+ struct vattr *va = ap->a_vap;
+ int exclusive = 1;
+ int mode = ap->a_vap->va_mode;
+ struct vnode **vpp = ap->a_vpp;
+ struct componentname *cnp = ap->a_cnp;
+ struct ucred *cred = cnp->cn_cred;
+ struct proc *p = cnp->cn_proc;
+/* locals */
+ int error;
+ struct cnode *cp;
+ const char *nm = cnp->cn_nameptr;
+ int len = cnp->cn_namelen;
+ ViceFid VFid;
+ struct vattr attr;
+
+ MARK_ENTRY(CODA_CREATE_STATS);
+
+ /* All creates are exclusive XXX */
+ /* I'm assuming the 'mode' argument is the file mode bits XXX */
+
+ /* Check for create of control object. */
+ if (IS_CTL_NAME(dvp, nm, len)) {
+ *vpp = (struct vnode *)0;
+ MARK_INT_FAIL(CODA_CREATE_STATS);
+ return(EACCES);
+ }
+
+ error = venus_create(vtomi(dvp), &dcp->c_fid, nm, len, exclusive, mode, va, cred, p, &VFid, &attr);
+
+ if (!error) {
+
+ /* If this is an exclusive create, panic if the file already exists. */
+ /* Venus should have detected the file and reported EEXIST. */
+
+ if ((exclusive == 1) &&
+ (coda_find(&VFid) != NULL))
+ panic("cnode existed for newly created file!");
+
+ cp = make_coda_node(&VFid, dvp->v_mount, attr.va_type);
+ *vpp = CTOV(cp);
+
+ /* Update va to reflect the new attributes. */
+ (*va) = attr;
+
+ /* Update the attribute cache and mark it as valid */
+ if (coda_attr_cache) {
+ VTOC(*vpp)->c_vattr = attr;
+ VTOC(*vpp)->c_flags |= C_VATTR;
+ }
+
+ /* Invalidate the parent's attr cache, the modification time has changed */
+ VTOC(dvp)->c_flags &= ~C_VATTR;
+
+ /* enter the new vnode in the Name Cache */
+ coda_nc_enter(VTOC(dvp), nm, len, cred, VTOC(*vpp));
+
+ CODADEBUG(CODA_CREATE,
+ myprintf(("create: (%lx.%lx.%lx), result %d\n",
+ VFid.Volume, VFid.Vnode, VFid.Unique, error)); )
+ } else {
+ *vpp = (struct vnode *)0;
+ CODADEBUG(CODA_CREATE, myprintf(("create error %d\n", error));)
+ }
+
+ if (!error) {
+ if (cnp->cn_flags & LOCKLEAF) {
+ if ((error = VOP_LOCK(*ap->a_vpp, LK_EXCLUSIVE, p))) {
+ printf("coda_create: ");
+ panic("unlocked parent but couldn't lock child");
+ }
+ }
+#ifdef OLD_DIAGNOSTIC
+ else {
+ printf("coda_create: LOCKLEAF not set!\n");
+ }
+#endif
+ }
+ /* Have to free the previously saved name */
+ /*
+ * This condition is stolen from ufs_makeinode. I have no idea
+ * why it's here, but what the hey...
+ */
+ if ((cnp->cn_flags & SAVESTART) == 0) {
+ zfree(namei_zone, cnp->cn_pnbuf);
+ }
+ return(error);
+}
+
+int
+coda_remove(v)
+ void *v;
+{
+/* true args */
+ struct vop_remove_args *ap = v;
+ struct vnode *dvp = ap->a_dvp;
+ struct cnode *cp = VTOC(dvp);
+ struct componentname *cnp = ap->a_cnp;
+ struct ucred *cred = cnp->cn_cred;
+ struct proc *p = cnp->cn_proc;
+/* locals */
+ int error;
+ const char *nm = cnp->cn_nameptr;
+ int len = cnp->cn_namelen;
+ struct cnode *tp;
+
+ MARK_ENTRY(CODA_REMOVE_STATS);
+
+ CODADEBUG(CODA_REMOVE, myprintf(("remove: %s in %lx.%lx.%lx\n",
+ nm, cp->c_fid.Volume, cp->c_fid.Vnode,
+ cp->c_fid.Unique)););
+
+ /* Remove the file's entry from the CODA Name Cache */
+ /* We're being conservative here, it might be that this person
+ * doesn't really have sufficient access to delete the file
+ * but we feel zapping the entry won't really hurt anyone -- dcs
+ */
+ /* I'm gonna go out on a limb here. If a file and a hardlink to it
+ * exist, and one is removed, the link count on the other will be
+ * off by 1. We could either invalidate the attrs if cached, or
+ * fix them. I'll try to fix them. DCS 11/8/94
+ */
+ tp = coda_nc_lookup(VTOC(dvp), nm, len, cred);
+ if (tp) {
+ if (VALID_VATTR(tp)) { /* If attrs are cached */
+ if (tp->c_vattr.va_nlink > 1) { /* If it's a hard link */
+ tp->c_vattr.va_nlink--;
+ }
+ }
+
+ coda_nc_zapfile(VTOC(dvp), nm, len);
+ /* No need to flush it if it doesn't exist! */
+ }
+ /* Invalidate the parent's attr cache, the modification time has changed */
+ VTOC(dvp)->c_flags &= ~C_VATTR;
+
+ /* Check for remove of control object. */
+ if (IS_CTL_NAME(dvp, nm, len)) {
+ MARK_INT_FAIL(CODA_REMOVE_STATS);
+ return(ENOENT);
+ }
+
+ error = venus_remove(vtomi(dvp), &cp->c_fid, nm, len, cred, p);
+
+ CODADEBUG(CODA_REMOVE, myprintf(("in remove result %d\n",error)); )
+
+ if ((cnp->cn_flags & SAVESTART) == 0) {
+ zfree(namei_zone, cnp->cn_pnbuf);
+ }
+ return(error);
+}
+
+int
+coda_link(v)
+ void *v;
+{
+/* true args */
+ struct vop_link_args *ap = v;
+ struct vnode *vp = ap->a_vp;
+ struct cnode *cp = VTOC(vp);
+ struct vnode *tdvp = ap->a_tdvp;
+ struct cnode *tdcp = VTOC(tdvp);
+ struct componentname *cnp = ap->a_cnp;
+ struct ucred *cred = cnp->cn_cred;
+ struct proc *p = cnp->cn_proc;
+/* locals */
+ int error;
+ const char *nm = cnp->cn_nameptr;
+ int len = cnp->cn_namelen;
+
+ MARK_ENTRY(CODA_LINK_STATS);
+
+ if (codadebug & CODADBGMSK(CODA_LINK)) {
+
+ myprintf(("nb_link: vp fid: (%lx.%lx.%lx)\n",
+ cp->c_fid.Volume, cp->c_fid.Vnode, cp->c_fid.Unique));
+ myprintf(("nb_link: tdvp fid: (%lx.%lx.%lx)\n",
+ tdcp->c_fid.Volume, tdcp->c_fid.Vnode, tdcp->c_fid.Unique));
+
+ }
+ if (codadebug & CODADBGMSK(CODA_LINK)) {
+ myprintf(("link: vp fid: (%lx.%lx.%lx)\n",
+ cp->c_fid.Volume, cp->c_fid.Vnode, cp->c_fid.Unique));
+ myprintf(("link: tdvp fid: (%lx.%lx.%lx)\n",
+ tdcp->c_fid.Volume, tdcp->c_fid.Vnode, tdcp->c_fid.Unique));
+
+ }
+
+ /* Check for link to/from control object. */
+ if (IS_CTL_NAME(tdvp, nm, len) || IS_CTL_VP(vp)) {
+ MARK_INT_FAIL(CODA_LINK_STATS);
+ return(EACCES);
+ }
+
+ error = venus_link(vtomi(vp), &cp->c_fid, &tdcp->c_fid, nm, len, cred, p);
+
+ /* Invalidate the parent's attr cache, the modification time has changed */
+ VTOC(tdvp)->c_flags &= ~C_VATTR;
+ VTOC(vp)->c_flags &= ~C_VATTR;
+
+ CODADEBUG(CODA_LINK, myprintf(("in link result %d\n",error)); )
+
+ /* Drop the name buffer if we don't need to SAVESTART */
+ if ((cnp->cn_flags & SAVESTART) == 0) {
+ zfree(namei_zone, cnp->cn_pnbuf);
+ }
+ return(error);
+}
+
+int
+coda_rename(v)
+ void *v;
+{
+/* true args */
+ struct vop_rename_args *ap = v;
+ struct vnode *odvp = ap->a_fdvp;
+ struct cnode *odcp = VTOC(odvp);
+ struct componentname *fcnp = ap->a_fcnp;
+ struct vnode *ndvp = ap->a_tdvp;
+ struct cnode *ndcp = VTOC(ndvp);
+ struct componentname *tcnp = ap->a_tcnp;
+ struct ucred *cred = fcnp->cn_cred;
+ struct proc *p = fcnp->cn_proc;
+/* true args */
+ int error;
+ const char *fnm = fcnp->cn_nameptr;
+ int flen = fcnp->cn_namelen;
+ const char *tnm = tcnp->cn_nameptr;
+ int tlen = tcnp->cn_namelen;
+
+ MARK_ENTRY(CODA_RENAME_STATS);
+
+ /* Hmmm. The vnodes are already looked up. Perhaps they are locked?
+ This could be Bad. XXX */
+#ifdef OLD_DIAGNOSTIC
+ if ((fcnp->cn_cred != tcnp->cn_cred)
+ || (fcnp->cn_proc != tcnp->cn_proc))
+ {
+ panic("coda_rename: component names don't agree");
+ }
+#endif
+
+ /* Check for rename involving control object. */
+ if (IS_CTL_NAME(odvp, fnm, flen) || IS_CTL_NAME(ndvp, tnm, tlen)) {
+ MARK_INT_FAIL(CODA_RENAME_STATS);
+ return(EACCES);
+ }
+
+ /* Problem with moving directories -- need to flush entry for .. */
+ if (odvp != ndvp) {
+ struct cnode *ovcp = coda_nc_lookup(VTOC(odvp), fnm, flen, cred);
+ if (ovcp) {
+ struct vnode *ovp = CTOV(ovcp);
+ if ((ovp) &&
+ (ovp->v_type == VDIR)) /* If it's a directory */
+ coda_nc_zapfile(VTOC(ovp),"..", 2);
+ }
+ }
+
+ /* Remove the entries for both source and target files */
+ coda_nc_zapfile(VTOC(odvp), fnm, flen);
+ coda_nc_zapfile(VTOC(ndvp), tnm, tlen);
+
+ /* Invalidate the parent's attr cache, the modification time has changed */
+ VTOC(odvp)->c_flags &= ~C_VATTR;
+ VTOC(ndvp)->c_flags &= ~C_VATTR;
+
+ if (flen+1 > CODA_MAXNAMLEN) {
+ MARK_INT_FAIL(CODA_RENAME_STATS);
+ error = EINVAL;
+ goto exit;
+ }
+
+ if (tlen+1 > CODA_MAXNAMLEN) {
+ MARK_INT_FAIL(CODA_RENAME_STATS);
+ error = EINVAL;
+ goto exit;
+ }
+
+ error = venus_rename(vtomi(odvp), &odcp->c_fid, &ndcp->c_fid, fnm, flen, tnm, tlen, cred, p);
+
+ exit:
+ CODADEBUG(CODA_RENAME, myprintf(("in rename result %d\n",error));)
+ /* XXX - do we need to call cache pureg on the moved vnode? */
+ cache_purge(ap->a_fvp);
+
+ /* It seems to be incumbent on us to drop locks on all four vnodes */
+ /* From-vnodes are not locked, only ref'd. To-vnodes are locked. */
+
+ vrele(ap->a_fvp);
+ vrele(odvp);
+
+ if (ap->a_tvp) {
+ if (ap->a_tvp == ndvp) {
+ vrele(ap->a_tvp);
+ } else {
+ vput(ap->a_tvp);
+ }
+ }
+
+ vput(ndvp);
+ return(error);
+}
+
+int
+coda_mkdir(v)
+ void *v;
+{
+/* true args */
+ struct vop_mkdir_args *ap = v;
+ struct vnode *dvp = ap->a_dvp;
+ struct cnode *dcp = VTOC(dvp);
+ struct componentname *cnp = ap->a_cnp;
+ register struct vattr *va = ap->a_vap;
+ struct vnode **vpp = ap->a_vpp;
+ struct ucred *cred = cnp->cn_cred;
+ struct proc *p = cnp->cn_proc;
+/* locals */
+ int error;
+ const char *nm = cnp->cn_nameptr;
+ int len = cnp->cn_namelen;
+ struct cnode *cp;
+ ViceFid VFid;
+ struct vattr ova;
+
+ MARK_ENTRY(CODA_MKDIR_STATS);
+
+ /* Check for mkdir of target object. */
+ if (IS_CTL_NAME(dvp, nm, len)) {
+ *vpp = (struct vnode *)0;
+ MARK_INT_FAIL(CODA_MKDIR_STATS);
+ return(EACCES);
+ }
+
+ if (len+1 > CODA_MAXNAMLEN) {
+ *vpp = (struct vnode *)0;
+ MARK_INT_FAIL(CODA_MKDIR_STATS);
+ return(EACCES);
+ }
+
+ error = venus_mkdir(vtomi(dvp), &dcp->c_fid, nm, len, va, cred, p, &VFid, &ova);
+
+ if (!error) {
+ if (coda_find(&VFid) != NULL)
+ panic("cnode existed for newly created directory!");
+
+
+ cp = make_coda_node(&VFid, dvp->v_mount, va->va_type);
+ *vpp = CTOV(cp);
+
+ /* enter the new vnode in the Name Cache */
+ coda_nc_enter(VTOC(dvp), nm, len, cred, VTOC(*vpp));
+
+ /* as a side effect, enter "." and ".." for the directory */
+ coda_nc_enter(VTOC(*vpp), ".", 1, cred, VTOC(*vpp));
+ coda_nc_enter(VTOC(*vpp), "..", 2, cred, VTOC(dvp));
+
+ if (coda_attr_cache) {
+ VTOC(*vpp)->c_vattr = ova; /* update the attr cache */
+ VTOC(*vpp)->c_flags |= C_VATTR; /* Valid attributes in cnode */
+ }
+
+ /* Invalidate the parent's attr cache, the modification time has changed */
+ VTOC(dvp)->c_flags &= ~C_VATTR;
+
+ CODADEBUG( CODA_MKDIR, myprintf(("mkdir: (%lx.%lx.%lx) result %d\n",
+ VFid.Volume, VFid.Vnode, VFid.Unique, error)); )
+ } else {
+ *vpp = (struct vnode *)0;
+ CODADEBUG(CODA_MKDIR, myprintf(("mkdir error %d\n",error));)
+ }
+
+ /* Have to free the previously saved name */
+ /*
+ * ufs_mkdir doesn't check for SAVESTART before freeing the
+ * pathname buffer, but ufs_create does. For the moment, I'll
+ * follow their lead, but this seems like it is probably
+ * incorrect.
+ */
+ zfree(namei_zone, cnp->cn_pnbuf);
+ return(error);
+}
+
+int
+coda_rmdir(v)
+ void *v;
+{
+/* true args */
+ struct vop_rmdir_args *ap = v;
+ struct vnode *dvp = ap->a_dvp;
+ struct cnode *dcp = VTOC(dvp);
+ struct componentname *cnp = ap->a_cnp;
+ struct ucred *cred = cnp->cn_cred;
+ struct proc *p = cnp->cn_proc;
+/* true args */
+ int error;
+ const char *nm = cnp->cn_nameptr;
+ int len = cnp->cn_namelen;
+ struct cnode *cp;
+
+ MARK_ENTRY(CODA_RMDIR_STATS);
+
+ /* Check for rmdir of control object. */
+ if (IS_CTL_NAME(dvp, nm, len)) {
+ MARK_INT_FAIL(CODA_RMDIR_STATS);
+ return(ENOENT);
+ }
+
+ /* We're being conservative here, it might be that this person
+ * doesn't really have sufficient access to delete the file
+ * but we feel zapping the entry won't really hurt anyone -- dcs
+ */
+ /*
+ * As a side effect of the rmdir, remove any entries for children of
+ * the directory, especially "." and "..".
+ */
+ cp = coda_nc_lookup(dcp, nm, len, cred);
+ if (cp) coda_nc_zapParentfid(&(cp->c_fid), NOT_DOWNCALL);
+
+ /* Remove the file's entry from the CODA Name Cache */
+ coda_nc_zapfile(dcp, nm, len);
+
+ /* Invalidate the parent's attr cache, the modification time has changed */
+ dcp->c_flags &= ~C_VATTR;
+
+ error = venus_rmdir(vtomi(dvp), &dcp->c_fid, nm, len, cred, p);
+
+ CODADEBUG(CODA_RMDIR, myprintf(("in rmdir result %d\n", error)); )
+
+ if ((cnp->cn_flags & SAVESTART) == 0) {
+ zfree(namei_zone, cnp->cn_pnbuf);
+ }
+ return(error);
+}
+
+int
+coda_symlink(v)
+ void *v;
+{
+/* true args */
+ struct vop_symlink_args *ap = v;
+ struct vnode *tdvp = ap->a_dvp;
+ struct cnode *tdcp = VTOC(tdvp);
+ struct componentname *cnp = ap->a_cnp;
+ struct vattr *tva = ap->a_vap;
+ char *path = ap->a_target;
+ struct ucred *cred = cnp->cn_cred;
+ struct proc *p = cnp->cn_proc;
+/* locals */
+ int error;
+ /*
+ * XXX I'm assuming the following things about coda_symlink's
+ * arguments:
+ * t(foo) is the new name/parent/etc being created.
+ * lname is the contents of the new symlink.
+ */
+ char *nm = cnp->cn_nameptr;
+ int len = cnp->cn_namelen;
+ int plen = strlen(path);
+
+ /* XXX What about the vpp argument? Do we need it? */
+ /*
+ * Here's the strategy for the moment: perform the symlink, then
+ * do a lookup to grab the resulting vnode. I know this requires
+ * two communications with Venus for a new sybolic link, but
+ * that's the way the ball bounces. I don't yet want to change
+ * the way the Mach symlink works. When Mach support is
+ * deprecated, we should change symlink so that the common case
+ * returns the resultant vnode in a vpp argument.
+ */
+
+ MARK_ENTRY(CODA_SYMLINK_STATS);
+
+ /* Check for symlink of control object. */
+ if (IS_CTL_NAME(tdvp, nm, len)) {
+ MARK_INT_FAIL(CODA_SYMLINK_STATS);
+ return(EACCES);
+ }
+
+ if (plen+1 > CODA_MAXPATHLEN) {
+ MARK_INT_FAIL(CODA_SYMLINK_STATS);
+ return(EINVAL);
+ }
+
+ if (len+1 > CODA_MAXNAMLEN) {
+ MARK_INT_FAIL(CODA_SYMLINK_STATS);
+ error = EINVAL;
+ goto exit;
+ }
+
+ error = venus_symlink(vtomi(tdvp), &tdcp->c_fid, path, plen, nm, len, tva, cred, p);
+
+ /* Invalidate the parent's attr cache, the modification time has changed */
+ tdcp->c_flags &= ~C_VATTR;
+
+ /*
+ * Free the name buffer
+ */
+ if ((cnp->cn_flags & SAVESTART) == 0) {
+ zfree(namei_zone, cnp->cn_pnbuf);
+ }
+
+ exit:
+ CODADEBUG(CODA_SYMLINK, myprintf(("in symlink result %d\n",error)); )
+ return(error);
+}
+
+/*
+ * Read directory entries.
+ */
+int
+coda_readdir(v)
+ void *v;
+{
+/* true args */
+ struct vop_readdir_args *ap = v;
+ struct vnode *vp = ap->a_vp;
+ struct cnode *cp = VTOC(vp);
+ register struct uio *uiop = ap->a_uio;
+ struct ucred *cred = ap->a_cred;
+ int *eofflag = ap->a_eofflag;
+ u_long **cookies = ap->a_cookies;
+ int *ncookies = ap->a_ncookies;
+ struct proc *p = ap->a_uio->uio_procp;
+/* upcall decl */
+/* locals */
+ int error = 0;
+
+ MARK_ENTRY(CODA_READDIR_STATS);
+
+ CODADEBUG(CODA_READDIR, myprintf(("coda_readdir(%p, %d, %qd, %d)\n", uiop->uio_iov->iov_base, uiop->uio_resid, uiop->uio_offset, uiop->uio_segflg)); )
+
+ /* Check for readdir of control object. */
+ if (IS_CTL_VP(vp)) {
+ MARK_INT_FAIL(CODA_READDIR_STATS);
+ return(ENOENT);
+ }
+
+ {
+ /* If directory is not already open do an "internal open" on it. */
+ int opened_internally = 0;
+ if (cp->c_ovp == NULL) {
+ opened_internally = 1;
+ MARK_INT_GEN(CODA_OPEN_STATS);
+ error = VOP_OPEN(vp, FREAD, cred, p);
+printf("coda_readdir: Internally Opening %p\n", vp);
+ if (error) {
+ printf("coda_readdir: VOP_OPEN on container failed %d\n", error);
+ return (error);
+ }
+ if (vp->v_type == VREG) {
+ error = vfs_object_create(vp, p, cred);
+ if (error != 0) {
+ printf("coda_readdir: vfs_object_create() returns %d\n", error);
+ vput(vp);
+ }
+ }
+ if (error) return(error);
+ }
+
+ /* Have UFS handle the call. */
+ CODADEBUG(CODA_READDIR, myprintf(("indirect readdir: fid = (%lx.%lx.%lx), refcnt = %d\n",cp->c_fid.Volume, cp->c_fid.Vnode, cp->c_fid.Unique, vp->v_usecount)); )
+ error = VOP_READDIR(cp->c_ovp, uiop, cred, eofflag, ncookies,
+ cookies);
+
+ if (error)
+ MARK_INT_FAIL(CODA_READDIR_STATS);
+ else
+ MARK_INT_SAT(CODA_READDIR_STATS);
+
+ /* Do an "internal close" if necessary. */
+ if (opened_internally) {
+ MARK_INT_GEN(CODA_CLOSE_STATS);
+ (void)VOP_CLOSE(vp, FREAD, cred, p);
+ }
+ }
+
+ return(error);
+}
+
+/*
+ * Convert from file system blocks to device blocks
+ */
+int
+coda_bmap(v)
+ void *v;
+{
+ /* XXX on the global proc */
+/* true args */
+ struct vop_bmap_args *ap = v;
+ struct vnode *vp __attribute__((unused)) = ap->a_vp; /* file's vnode */
+ daddr_t bn __attribute__((unused)) = ap->a_bn; /* fs block number */
+ struct vnode **vpp = ap->a_vpp; /* RETURN vp of device */
+ daddr_t *bnp __attribute__((unused)) = ap->a_bnp; /* RETURN device block number */
+ struct proc *p __attribute__((unused)) = curproc;
+/* upcall decl */
+/* locals */
+
+ int ret = 0;
+ struct cnode *cp;
+
+ cp = VTOC(vp);
+ if (cp->c_ovp) {
+ return EINVAL;
+ ret = VOP_BMAP(cp->c_ovp, bn, vpp, bnp, ap->a_runp, ap->a_runb);
+#if 0
+ printf("VOP_BMAP(cp->c_ovp %p, bn %p, vpp %p, bnp %p, ap->a_runp %p, ap->a_runb %p) = %d\n",
+ cp->c_ovp, bn, vpp, bnp, ap->a_runp, ap->a_runb, ret);
+#endif
+ return ret;
+ } else {
+#if 0
+ printf("coda_bmap: no container\n");
+#endif
+ return(EOPNOTSUPP);
+ }
+}
+
+/*
+ * I don't think the following two things are used anywhere, so I've
+ * commented them out
+ *
+ * struct buf *async_bufhead;
+ * int async_daemon_count;
+ */
+int
+coda_strategy(v)
+ void *v;
+{
+/* true args */
+ struct vop_strategy_args *ap = v;
+ register struct buf *bp __attribute__((unused)) = ap->a_bp;
+ struct proc *p __attribute__((unused)) = curproc;
+/* upcall decl */
+/* locals */
+
+ printf("coda_strategy: called ???\n");
+ return(EOPNOTSUPP);
+}
+
+int
+coda_reclaim(v)
+ void *v;
+{
+/* true args */
+ struct vop_reclaim_args *ap = v;
+ struct vnode *vp = ap->a_vp;
+ struct cnode *cp = VTOC(vp);
+/* upcall decl */
+/* locals */
+
+/*
+ * Forced unmount/flush will let vnodes with non zero use be destroyed!
+ */
+ ENTRY;
+
+ if (IS_UNMOUNTING(cp)) {
+#ifdef DEBUG
+ if (VTOC(vp)->c_ovp) {
+ if (IS_UNMOUNTING(cp))
+ printf("coda_reclaim: c_ovp not void: vp %p, cp %p\n", vp, cp);
+ }
+#endif
+ } else {
+#ifdef OLD_DIAGNOSTIC
+ if (vp->v_usecount != 0)
+ print("coda_reclaim: pushing active %p\n", vp);
+ if (VTOC(vp)->c_ovp) {
+ panic("coda_reclaim: c_ovp not void");
+ }
+#endif
+ }
+ cache_purge(vp);
+ coda_free(VTOC(vp));
+ VTOC(vp) = NULL;
+ return (0);
+}
+
+int
+coda_lock(v)
+ void *v;
+{
+/* true args */
+ struct vop_lock_args *ap = v;
+ struct vnode *vp = ap->a_vp;
+ struct cnode *cp = VTOC(vp);
+ struct proc *p = ap->a_p;
+/* upcall decl */
+/* locals */
+
+ ENTRY;
+
+ if (coda_lockdebug) {
+ myprintf(("Attempting lock on %lx.%lx.%lx\n",
+ cp->c_fid.Volume, cp->c_fid.Vnode, cp->c_fid.Unique));
+ }
+
+#ifndef DEBUG_LOCKS
+ return (lockmgr(&cp->c_lock, ap->a_flags, &vp->v_interlock, p));
+#else
+ return (debuglockmgr(&cp->c_lock, ap->a_flags, &vp->v_interlock, p,
+ "coda_lock", vp->filename, vp->line));
+#endif
+}
+
+int
+coda_unlock(v)
+ void *v;
+{
+/* true args */
+ struct vop_unlock_args *ap = v;
+ struct vnode *vp = ap->a_vp;
+ struct cnode *cp = VTOC(vp);
+ struct proc *p = ap->a_p;
+/* upcall decl */
+/* locals */
+
+ ENTRY;
+ if (coda_lockdebug) {
+ myprintf(("Attempting unlock on %lx.%lx.%lx\n",
+ cp->c_fid.Volume, cp->c_fid.Vnode, cp->c_fid.Unique));
+ }
+
+ return (lockmgr(&cp->c_lock, ap->a_flags | LK_RELEASE, &vp->v_interlock, p));
+}
+
+int
+coda_islocked(v)
+ void *v;
+{
+/* true args */
+ struct vop_islocked_args *ap = v;
+ struct cnode *cp = VTOC(ap->a_vp);
+ ENTRY;
+
+ return (lockstatus(&cp->c_lock));
+}
+
+/* How one looks up a vnode given a device/inode pair: */
+int
+coda_grab_vnode(dev_t dev, ino_t ino, struct vnode **vpp)
+{
+ /* This is like VFS_VGET() or igetinode()! */
+ int error;
+ struct mount *mp;
+
+ if (!(mp = devtomp(dev))) {
+ myprintf(("coda_grab_vnode: devtomp(%d) returns NULL\n", dev));
+ return(ENXIO);
+ }
+
+ /* XXX - ensure that nonzero-return means failure */
+ error = VFS_VGET(mp,ino,vpp);
+ if (error) {
+ myprintf(("coda_grab_vnode: iget/vget(%d, %d) returns %p, err %d\n",
+ dev, ino, *vpp, error));
+ return(ENOENT);
+ }
+ return(0);
+}
+
+void
+print_vattr( attr )
+ struct vattr *attr;
+{
+ char *typestr;
+
+ switch (attr->va_type) {
+ case VNON:
+ typestr = "VNON";
+ break;
+ case VREG:
+ typestr = "VREG";
+ break;
+ case VDIR:
+ typestr = "VDIR";
+ break;
+ case VBLK:
+ typestr = "VBLK";
+ break;
+ case VCHR:
+ typestr = "VCHR";
+ break;
+ case VLNK:
+ typestr = "VLNK";
+ break;
+ case VSOCK:
+ typestr = "VSCK";
+ break;
+ case VFIFO:
+ typestr = "VFFO";
+ break;
+ case VBAD:
+ typestr = "VBAD";
+ break;
+ default:
+ typestr = "????";
+ break;
+ }
+
+
+ myprintf(("attr: type %s mode %d uid %d gid %d fsid %d rdev %d\n",
+ typestr, (int)attr->va_mode, (int)attr->va_uid,
+ (int)attr->va_gid, (int)attr->va_fsid, (int)attr->va_rdev));
+
+ myprintf((" fileid %d nlink %d size %d blocksize %d bytes %d\n",
+ (int)attr->va_fileid, (int)attr->va_nlink,
+ (int)attr->va_size,
+ (int)attr->va_blocksize,(int)attr->va_bytes));
+ myprintf((" gen %ld flags %ld vaflags %d\n",
+ attr->va_gen, attr->va_flags, attr->va_vaflags));
+ myprintf((" atime sec %d nsec %d\n",
+ (int)attr->va_atime.tv_sec, (int)attr->va_atime.tv_nsec));
+ myprintf((" mtime sec %d nsec %d\n",
+ (int)attr->va_mtime.tv_sec, (int)attr->va_mtime.tv_nsec));
+ myprintf((" ctime sec %d nsec %d\n",
+ (int)attr->va_ctime.tv_sec, (int)attr->va_ctime.tv_nsec));
+}
+
+/* How to print a ucred */
+void
+print_cred(cred)
+ struct ucred *cred;
+{
+
+ int i;
+
+ myprintf(("ref %d\tuid %d\n",cred->cr_ref,cred->cr_uid));
+
+ for (i=0; i < cred->cr_ngroups; i++)
+ myprintf(("\tgroup %d: (%d)\n",i,cred->cr_groups[i]));
+ myprintf(("\n"));
+
+}
+
+/*
+ * Return a vnode for the given fid.
+ * If no cnode exists for this fid create one and put it
+ * in a table hashed by fid.Volume and fid.Vnode. If the cnode for
+ * this fid is already in the table return it (ref count is
+ * incremented by coda_find. The cnode will be flushed from the
+ * table when coda_inactive calls coda_unsave.
+ */
+struct cnode *
+make_coda_node(fid, vfsp, type)
+ ViceFid *fid; struct mount *vfsp; short type;
+{
+ struct cnode *cp;
+ int err;
+
+ if ((cp = coda_find(fid)) == NULL) {
+ struct vnode *vp;
+
+ cp = coda_alloc();
+ lockinit(&cp->c_lock, PINOD, "cnode", 0, 0);
+ cp->c_fid = *fid;
+
+ err = getnewvnode(VT_CODA, vfsp, coda_vnodeop_p, &vp);
+ if (err) {
+ panic("coda: getnewvnode returned error %d\n", err);
+ }
+ vp->v_data = cp;
+ vp->v_type = type;
+ cp->c_vnode = vp;
+ coda_save(cp);
+
+ } else {
+ vref(CTOV(cp));
+ }
+
+ return cp;
+}
diff --git a/sys/fs/coda/coda_vnops.h b/sys/fs/coda/coda_vnops.h
new file mode 100644
index 0000000..6c787d5
--- /dev/null
+++ b/sys/fs/coda/coda_vnops.h
@@ -0,0 +1,142 @@
+/*
+ *
+ * Coda: an Experimental Distributed File System
+ * Release 3.1
+ *
+ * Copyright (c) 1987-1998 Carnegie Mellon University
+ * All Rights Reserved
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation, and
+ * that credit is given to Carnegie Mellon University in all documents
+ * and publicity pertaining to direct or indirect use of this code or its
+ * derivatives.
+ *
+ * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS,
+ * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS
+ * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON
+ * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER
+ * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF
+ * ANY DERIVATIVE WORK.
+ *
+ * Carnegie Mellon encourages users of this software to return any
+ * improvements or extensions that they make, and to grant Carnegie
+ * Mellon the rights to redistribute these changes without encumbrance.
+ *
+ * @(#) src/sys/coda/coda_vnops.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $
+ * $Id: coda_vnops.h,v 1.3 1998/09/11 18:50:17 rvb Exp $
+ *
+ */
+
+/*
+ * Mach Operating System
+ * Copyright (c) 1990 Carnegie-Mellon University
+ * Copyright (c) 1989 Carnegie-Mellon University
+ * All rights reserved. The CMU software License Agreement specifies
+ * the terms and conditions for use and redistribution.
+ */
+
+/*
+ * This code was written for the Coda file system at Carnegie Mellon
+ * University. Contributers include David Steere, James Kistler, and
+ * M. Satyanarayanan.
+ */
+
+/*
+ * HISTORY
+ * $Log: coda_vnops.h,v $
+ * Revision 1.3 1998/09/11 18:50:17 rvb
+ * All the references to cfs, in symbols, structs, and strings
+ * have been changed to coda. (Same for CFS.)
+ *
+ * Revision 1.2 1998/09/02 19:09:53 rvb
+ * Pass2 complete
+ *
+ * Revision 1.1.1.1 1998/08/29 21:14:52 rvb
+ * Very Preliminary Coda
+ *
+ * Revision 1.7 1998/08/28 18:12:24 rvb
+ * Now it also works on FreeBSD -current. This code will be
+ * committed to the FreeBSD -current and NetBSD -current
+ * trees. It will then be tailored to the particular platform
+ * by flushing conditional code.
+ *
+ * Revision 1.6 1998/08/18 17:05:22 rvb
+ * Don't use __RCSID now
+ *
+ * Revision 1.5 1998/08/18 16:31:47 rvb
+ * Sync the code for NetBSD -current; test on 1.3 later
+ *
+ * Revision 1.4 98/01/23 11:53:49 rvb
+ * Bring RVB_CODA1_1 to HEAD
+ *
+ * Revision 1.3.2.3 98/01/23 11:21:13 rvb
+ * Sync with 2.2.5
+ *
+ * Revision 1.3.2.2 97/12/16 12:40:20 rvb
+ * Sync with 1.3
+ *
+ * Revision 1.3.2.1 97/12/10 14:08:34 rvb
+ * Fix O_ flags; check result in coda_call
+ *
+ * Revision 1.3 97/12/05 10:39:25 rvb
+ * Read CHANGES
+ *
+ * Revision 1.2.34.2 97/11/20 11:46:54 rvb
+ * Capture current cfs_venus
+ *
+ * Revision 1.2.34.1 97/11/13 22:03:04 rvb
+ * pass2 cfs_NetBSD.h mt
+ *
+ * Revision 1.2 96/01/02 16:57:14 bnoble
+ * Added support for Coda MiniCache and raw inode calls (final commit)
+ *
+ * Revision 1.1.2.1 1995/12/20 01:57:40 bnoble
+ * Added CODA-specific files
+ *
+ */
+
+/* NetBSD interfaces to the vnodeops */
+int coda_open __P((void *));
+int coda_close __P((void *));
+int coda_read __P((void *));
+int coda_write __P((void *));
+int coda_ioctl __P((void *));
+/* 1.3 int cfs_select __P((void *));*/
+int coda_getattr __P((void *));
+int coda_setattr __P((void *));
+int coda_access __P((void *));
+int coda_abortop __P((void *));
+int coda_readlink __P((void *));
+int coda_fsync __P((void *));
+int coda_inactive __P((void *));
+int coda_lookup __P((void *));
+int coda_create __P((void *));
+int coda_remove __P((void *));
+int coda_link __P((void *));
+int coda_rename __P((void *));
+int coda_mkdir __P((void *));
+int coda_rmdir __P((void *));
+int coda_symlink __P((void *));
+int coda_readdir __P((void *));
+int coda_bmap __P((void *));
+int coda_strategy __P((void *));
+int coda_reclaim __P((void *));
+int coda_lock __P((void *));
+int coda_unlock __P((void *));
+int coda_islocked __P((void *));
+int coda_vop_error __P((void *));
+int coda_vop_nop __P((void *));
+int coda_fbsd_getpages __P((void *));
+int coda_fbsd_putpages __P((void *));
+
+int (**coda_vnodeop_p)(void *);
+
+int coda_rdwr(struct vnode *vp, struct uio *uiop, enum uio_rw rw,
+ int ioflag, struct ucred *cred, struct proc *p);
+int coda_grab_vnode(dev_t dev, ino_t ino, struct vnode **vpp);
+void print_vattr(struct vattr *attr);
+void print_cred(struct ucred *cred);
diff --git a/sys/fs/deadfs/dead_vnops.c b/sys/fs/deadfs/dead_vnops.c
new file mode 100644
index 0000000..4e3853c
--- /dev/null
+++ b/sys/fs/deadfs/dead_vnops.c
@@ -0,0 +1,296 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)dead_vnops.c 8.1 (Berkeley) 6/10/93
+ * $Id: dead_vnops.c,v 1.24 1998/08/23 11:43:29 bde Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/vnode.h>
+#include <sys/buf.h>
+#include <sys/poll.h>
+
+static int chkvnlock __P((struct vnode *));
+/*
+ * Prototypes for dead operations on vnodes.
+ */
+static int dead_badop __P((void));
+static int dead_bmap __P((struct vop_bmap_args *));
+static int dead_ioctl __P((struct vop_ioctl_args *));
+static int dead_lock __P((struct vop_lock_args *));
+static int dead_lookup __P((struct vop_lookup_args *));
+static int dead_open __P((struct vop_open_args *));
+static int dead_poll __P((struct vop_poll_args *));
+static int dead_print __P((struct vop_print_args *));
+static int dead_read __P((struct vop_read_args *));
+static int dead_write __P((struct vop_write_args *));
+
+vop_t **dead_vnodeop_p;
+static struct vnodeopv_entry_desc dead_vnodeop_entries[] = {
+ { &vop_default_desc, (vop_t *) vop_defaultop },
+ { &vop_access_desc, (vop_t *) vop_ebadf },
+ { &vop_advlock_desc, (vop_t *) vop_ebadf },
+ { &vop_bmap_desc, (vop_t *) dead_bmap },
+ { &vop_create_desc, (vop_t *) dead_badop },
+ { &vop_getattr_desc, (vop_t *) vop_ebadf },
+ { &vop_inactive_desc, (vop_t *) vop_null },
+ { &vop_ioctl_desc, (vop_t *) dead_ioctl },
+ { &vop_link_desc, (vop_t *) dead_badop },
+ { &vop_lock_desc, (vop_t *) dead_lock },
+ { &vop_lookup_desc, (vop_t *) dead_lookup },
+ { &vop_mkdir_desc, (vop_t *) dead_badop },
+ { &vop_mknod_desc, (vop_t *) dead_badop },
+ { &vop_mmap_desc, (vop_t *) dead_badop },
+ { &vop_open_desc, (vop_t *) dead_open },
+ { &vop_pathconf_desc, (vop_t *) vop_ebadf }, /* per pathconf(2) */
+ { &vop_poll_desc, (vop_t *) dead_poll },
+ { &vop_print_desc, (vop_t *) dead_print },
+ { &vop_read_desc, (vop_t *) dead_read },
+ { &vop_readdir_desc, (vop_t *) vop_ebadf },
+ { &vop_readlink_desc, (vop_t *) vop_ebadf },
+ { &vop_reclaim_desc, (vop_t *) vop_null },
+ { &vop_remove_desc, (vop_t *) dead_badop },
+ { &vop_rename_desc, (vop_t *) dead_badop },
+ { &vop_rmdir_desc, (vop_t *) dead_badop },
+ { &vop_setattr_desc, (vop_t *) vop_ebadf },
+ { &vop_symlink_desc, (vop_t *) dead_badop },
+ { &vop_write_desc, (vop_t *) dead_write },
+ { NULL, NULL }
+};
+static struct vnodeopv_desc dead_vnodeop_opv_desc =
+ { &dead_vnodeop_p, dead_vnodeop_entries };
+
+VNODEOP_SET(dead_vnodeop_opv_desc);
+
+/*
+ * Trivial lookup routine that always fails.
+ */
+/* ARGSUSED */
+static int
+dead_lookup(ap)
+ struct vop_lookup_args /* {
+ struct vnode * a_dvp;
+ struct vnode ** a_vpp;
+ struct componentname * a_cnp;
+ } */ *ap;
+{
+
+ *ap->a_vpp = NULL;
+ return (ENOTDIR);
+}
+
+/*
+ * Open always fails as if device did not exist.
+ */
+/* ARGSUSED */
+static int
+dead_open(ap)
+ struct vop_open_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ return (ENXIO);
+}
+
+/*
+ * Vnode op for read
+ */
+/* ARGSUSED */
+static int
+dead_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+
+ if (chkvnlock(ap->a_vp))
+ panic("dead_read: lock");
+ /*
+ * Return EOF for tty devices, EIO for others
+ */
+ if ((ap->a_vp->v_flag & VISTTY) == 0)
+ return (EIO);
+ return (0);
+}
+
+/*
+ * Vnode op for write
+ */
+/* ARGSUSED */
+static int
+dead_write(ap)
+ struct vop_write_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+
+ if (chkvnlock(ap->a_vp))
+ panic("dead_write: lock");
+ return (EIO);
+}
+
+/*
+ * Device ioctl operation.
+ */
+/* ARGSUSED */
+static int
+dead_ioctl(ap)
+ struct vop_ioctl_args /* {
+ struct vnode *a_vp;
+ int a_command;
+ caddr_t a_data;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ if (!chkvnlock(ap->a_vp))
+ return (ENOTTY);
+ return (VCALL(ap->a_vp, VOFFSET(vop_ioctl), ap));
+}
+
+
+/*
+ * Wait until the vnode has finished changing state.
+ */
+static int
+dead_lock(ap)
+ struct vop_lock_args /* {
+ struct vnode *a_vp;
+ int a_flags;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+
+ /*
+ * Since we are not using the lock manager, we must clear
+ * the interlock here.
+ */
+ if (ap->a_flags & LK_INTERLOCK) {
+ simple_unlock(&vp->v_interlock);
+ ap->a_flags &= ~LK_INTERLOCK;
+ }
+ if (!chkvnlock(vp))
+ return (0);
+ return (VCALL(vp, VOFFSET(vop_lock), ap));
+}
+
+/*
+ * Wait until the vnode has finished changing state.
+ */
+static int
+dead_bmap(ap)
+ struct vop_bmap_args /* {
+ struct vnode *a_vp;
+ daddr_t a_bn;
+ struct vnode **a_vpp;
+ daddr_t *a_bnp;
+ int *a_runp;
+ int *a_runb;
+ } */ *ap;
+{
+
+ if (!chkvnlock(ap->a_vp))
+ return (EIO);
+ return (VOP_BMAP(ap->a_vp, ap->a_bn, ap->a_vpp, ap->a_bnp, ap->a_runp, ap->a_runb));
+}
+
+/*
+ * Print out the contents of a dead vnode.
+ */
+/* ARGSUSED */
+static int
+dead_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ printf("tag VT_NON, dead vnode\n");
+ return (0);
+}
+
+/*
+ * Empty vnode bad operation
+ */
+static int
+dead_badop()
+{
+
+ panic("dead_badop called");
+ /* NOTREACHED */
+}
+
+/*
+ * We have to wait during times when the vnode is
+ * in a state of change.
+ */
+int
+chkvnlock(vp)
+ register struct vnode *vp;
+{
+ int locked = 0;
+
+ while (vp->v_flag & VXLOCK) {
+ vp->v_flag |= VXWANT;
+ (void) tsleep((caddr_t)vp, PINOD, "ckvnlk", 0);
+ locked = 1;
+ }
+ return (locked);
+}
+
+/*
+ * Trivial poll routine that always returns POLLHUP.
+ * This is necessary so that a process which is polling a file
+ * gets notified when that file is revoke()d.
+ */
+static int
+dead_poll(ap)
+ struct vop_poll_args *ap;
+{
+ return (POLLHUP);
+}
diff --git a/sys/fs/fdescfs/fdesc.h b/sys/fs/fdescfs/fdesc.h
new file mode 100644
index 0000000..bbba54d
--- /dev/null
+++ b/sys/fs/fdescfs/fdesc.h
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)fdesc.h 8.5 (Berkeley) 1/21/94
+ *
+ * $Id: fdesc.h,v 1.5 1997/02/22 09:40:14 peter Exp $
+ */
+
+#ifdef KERNEL
+struct fdescmount {
+ struct vnode *f_root; /* Root node */
+};
+
+#define FD_ROOT 2
+#define FD_DEVFD 3
+#define FD_STDIN 4
+#define FD_STDOUT 5
+#define FD_STDERR 6
+#define FD_CTTY 7
+#define FD_DESC 8
+#define FD_MAX 12
+
+typedef enum {
+ Froot,
+ Fdevfd,
+ Fdesc,
+ Flink,
+ Fctty
+} fdntype;
+
+struct fdescnode {
+ LIST_ENTRY(fdescnode) fd_hash; /* Hash list */
+ struct vnode *fd_vnode; /* Back ptr to vnode */
+ fdntype fd_type; /* Type of this node */
+ unsigned fd_fd; /* Fd to be dup'ed */
+ char *fd_link; /* Link to fd/n */
+ int fd_ix; /* filesystem index */
+};
+
+#define VFSTOFDESC(mp) ((struct fdescmount *)((mp)->mnt_data))
+#define VTOFDESC(vp) ((struct fdescnode *)(vp)->v_data)
+
+extern dev_t devctty;
+extern int fdesc_init __P((struct vfsconf *));
+extern int fdesc_root __P((struct mount *, struct vnode **));
+extern int fdesc_allocvp __P((fdntype, int, struct mount *, struct vnode **));
+#endif /* KERNEL */
diff --git a/sys/fs/fdescfs/fdesc_vfsops.c b/sys/fs/fdescfs/fdesc_vfsops.c
new file mode 100644
index 0000000..758f3b5
--- /dev/null
+++ b/sys/fs/fdescfs/fdesc_vfsops.c
@@ -0,0 +1,264 @@
+/*
+ * Copyright (c) 1992, 1993, 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)fdesc_vfsops.c 8.4 (Berkeley) 1/21/94
+ *
+ * $Id: fdesc_vfsops.c,v 1.17 1999/01/12 11:49:30 eivind Exp $
+ */
+
+/*
+ * /dev/fd Filesystem
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+#include <sys/filedesc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/malloc.h>
+#include <miscfs/fdesc/fdesc.h>
+
+static MALLOC_DEFINE(M_FDESCMNT, "FDESC mount", "FDESC mount structure");
+
+static int fdesc_mount __P((struct mount *mp, char *path, caddr_t data,
+ struct nameidata *ndp, struct proc *p));
+static int fdesc_start __P((struct mount *mp, int flags, struct proc *p));
+static int fdesc_unmount __P((struct mount *mp, int mntflags,
+ struct proc *p));
+static int fdesc_statfs __P((struct mount *mp, struct statfs *sbp,
+ struct proc *p));
+static int fdesc_sync __P((struct mount *mp, int waitfor,
+ struct ucred *cred, struct proc *p));
+
+/*
+ * Mount the per-process file descriptors (/dev/fd)
+ */
+static int
+fdesc_mount(mp, path, data, ndp, p)
+ struct mount *mp;
+ char *path;
+ caddr_t data;
+ struct nameidata *ndp;
+ struct proc *p;
+{
+ int error = 0;
+ u_int size;
+ struct fdescmount *fmp;
+ struct vnode *rvp;
+
+ /*
+ * Update is a no-op
+ */
+ if (mp->mnt_flag & MNT_UPDATE)
+ return (EOPNOTSUPP);
+
+ error = fdesc_allocvp(Froot, FD_ROOT, mp, &rvp);
+ if (error)
+ return (error);
+
+ MALLOC(fmp, struct fdescmount *, sizeof(struct fdescmount),
+ M_FDESCMNT, M_WAITOK); /* XXX */
+ rvp->v_type = VDIR;
+ rvp->v_flag |= VROOT;
+ fmp->f_root = rvp;
+ /* XXX -- don't mark as local to work around fts() problems */
+ /*mp->mnt_flag |= MNT_LOCAL;*/
+ mp->mnt_data = (qaddr_t) fmp;
+ vfs_getnewfsid(mp);
+
+ (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+ bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+ bzero(mp->mnt_stat.f_mntfromname, MNAMELEN);
+ bcopy("fdesc", mp->mnt_stat.f_mntfromname, sizeof("fdesc"));
+ (void)fdesc_statfs(mp, &mp->mnt_stat, p);
+ return (0);
+}
+
+static int
+fdesc_start(mp, flags, p)
+ struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+ return (0);
+}
+
+static int
+fdesc_unmount(mp, mntflags, p)
+ struct mount *mp;
+ int mntflags;
+ struct proc *p;
+{
+ int error;
+ int flags = 0;
+ struct vnode *rootvp = VFSTOFDESC(mp)->f_root;
+
+ if (mntflags & MNT_FORCE)
+ flags |= FORCECLOSE;
+
+ /*
+ * Clear out buffer cache. I don't think we
+ * ever get anything cached at this level at the
+ * moment, but who knows...
+ */
+ if (rootvp->v_usecount > 1)
+ return (EBUSY);
+ if ((error = vflush(mp, rootvp, flags)) != 0)
+ return (error);
+
+ /*
+ * Release reference on underlying root vnode
+ */
+ vrele(rootvp);
+ /*
+ * And blow it away for future re-use
+ */
+ vgone(rootvp);
+ /*
+ * Finally, throw away the fdescmount structure
+ */
+ free(mp->mnt_data, M_FDESCMNT); /* XXX */
+ mp->mnt_data = 0;
+
+ return (0);
+}
+
+int
+fdesc_root(mp, vpp)
+ struct mount *mp;
+ struct vnode **vpp;
+{
+ struct proc *p = curproc; /* XXX */
+ struct vnode *vp;
+
+ /*
+ * Return locked reference to root.
+ */
+ vp = VFSTOFDESC(mp)->f_root;
+ VREF(vp);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+ *vpp = vp;
+ return (0);
+}
+
+static int
+fdesc_statfs(mp, sbp, p)
+ struct mount *mp;
+ struct statfs *sbp;
+ struct proc *p;
+{
+ struct filedesc *fdp;
+ int lim;
+ int i;
+ int last;
+ int freefd;
+
+ /*
+ * Compute number of free file descriptors.
+ * [ Strange results will ensue if the open file
+ * limit is ever reduced below the current number
+ * of open files... ]
+ */
+ lim = p->p_rlimit[RLIMIT_NOFILE].rlim_cur;
+ fdp = p->p_fd;
+ last = min(fdp->fd_nfiles, lim);
+ freefd = 0;
+ for (i = fdp->fd_freefile; i < last; i++)
+ if (fdp->fd_ofiles[i] == NULL)
+ freefd++;
+
+ /*
+ * Adjust for the fact that the fdesc array may not
+ * have been fully allocated yet.
+ */
+ if (fdp->fd_nfiles < lim)
+ freefd += (lim - fdp->fd_nfiles);
+
+ sbp->f_flags = 0;
+ sbp->f_bsize = DEV_BSIZE;
+ sbp->f_iosize = DEV_BSIZE;
+ sbp->f_blocks = 2; /* 1K to keep df happy */
+ sbp->f_bfree = 0;
+ sbp->f_bavail = 0;
+ sbp->f_files = lim + 1; /* Allow for "." */
+ sbp->f_ffree = freefd; /* See comments above */
+ if (sbp != &mp->mnt_stat) {
+ sbp->f_type = mp->mnt_vfc->vfc_typenum;
+ bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+ bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+ bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+ }
+ return (0);
+}
+
+static int
+fdesc_sync(mp, waitfor, cred, p)
+ struct mount *mp;
+ int waitfor;
+ struct ucred *cred;
+ struct proc *p;
+{
+
+ return (0);
+}
+
+#define fdesc_fhtovp ((int (*) __P((struct mount *, struct fid *, \
+ struct sockaddr *, struct vnode **, int *, struct ucred **)))eopnotsupp)
+#define fdesc_quotactl ((int (*) __P((struct mount *, int, uid_t, caddr_t, \
+ struct proc *)))eopnotsupp)
+#define fdesc_sysctl ((int (*) __P((int *, u_int, void *, size_t *, void *, \
+ size_t, struct proc *)))eopnotsupp)
+#define fdesc_vget ((int (*) __P((struct mount *, ino_t, struct vnode **))) \
+ eopnotsupp)
+#define fdesc_vptofh ((int (*) __P((struct vnode *, struct fid *)))eopnotsupp)
+
+static struct vfsops fdesc_vfsops = {
+ fdesc_mount,
+ fdesc_start,
+ fdesc_unmount,
+ fdesc_root,
+ fdesc_quotactl,
+ fdesc_statfs,
+ fdesc_sync,
+ fdesc_vget,
+ fdesc_fhtovp,
+ fdesc_vptofh,
+ fdesc_init,
+};
+
+VFS_SET(fdesc_vfsops, fdesc, VFCF_SYNTHETIC);
diff --git a/sys/fs/fdescfs/fdesc_vnops.c b/sys/fs/fdescfs/fdesc_vnops.c
new file mode 100644
index 0000000..6bdea5f
--- /dev/null
+++ b/sys/fs/fdescfs/fdesc_vnops.c
@@ -0,0 +1,872 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)fdesc_vnops.c 8.9 (Berkeley) 1/21/94
+ *
+ * $Id: fdesc_vnops.c,v 1.40 1998/12/14 05:00:57 dillon Exp $
+ */
+
+/*
+ * /dev/fd Filesystem
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/kernel.h> /* boottime */
+#include <sys/filedesc.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/dirent.h>
+#include <sys/socketvar.h>
+#include <sys/conf.h>
+#include <miscfs/fdesc/fdesc.h>
+
+extern struct cdevsw ctty_cdevsw;
+
+#define cttyvp(p) ((p)->p_flag & P_CONTROLT ? (p)->p_session->s_ttyvp : NULL)
+
+#define FDL_WANT 0x01
+#define FDL_LOCKED 0x02
+static int fdcache_lock;
+
+static vop_t **fdesc_vnodeop_p;
+
+dev_t devctty;
+
+#if (FD_STDIN != FD_STDOUT-1) || (FD_STDOUT != FD_STDERR-1)
+FD_STDIN, FD_STDOUT, FD_STDERR must be a sequence n, n+1, n+2
+#endif
+
+#define NFDCACHE 4
+#define FD_NHASH(ix) \
+ (&fdhashtbl[(ix) & fdhash])
+static LIST_HEAD(fdhashhead, fdescnode) *fdhashtbl;
+static u_long fdhash;
+
+static int fdesc_attr __P((int fd, struct vattr *vap, struct ucred *cred,
+ struct proc *p));
+static int fdesc_badop __P((void));
+static int fdesc_getattr __P((struct vop_getattr_args *ap));
+static int fdesc_inactive __P((struct vop_inactive_args *ap));
+static int fdesc_ioctl __P((struct vop_ioctl_args *ap));
+static int fdesc_lookup __P((struct vop_lookup_args *ap));
+static int fdesc_open __P((struct vop_open_args *ap));
+static int fdesc_print __P((struct vop_print_args *ap));
+static int fdesc_read __P((struct vop_read_args *ap));
+static int fdesc_readdir __P((struct vop_readdir_args *ap));
+static int fdesc_readlink __P((struct vop_readlink_args *ap));
+static int fdesc_reclaim __P((struct vop_reclaim_args *ap));
+static int fdesc_poll __P((struct vop_poll_args *ap));
+static int fdesc_setattr __P((struct vop_setattr_args *ap));
+static int fdesc_write __P((struct vop_write_args *ap));
+
+/*
+ * Initialise cache headers
+ */
+int
+fdesc_init(vfsp)
+ struct vfsconf *vfsp;
+{
+
+ devctty = makedev(nchrdev, 0);
+ fdhashtbl = hashinit(NFDCACHE, M_CACHE, &fdhash);
+ return (0);
+}
+
+int
+fdesc_allocvp(ftype, ix, mp, vpp)
+ fdntype ftype;
+ int ix;
+ struct mount *mp;
+ struct vnode **vpp;
+{
+ struct proc *p = curproc; /* XXX */
+ struct fdhashhead *fc;
+ struct fdescnode *fd;
+ int error = 0;
+
+ fc = FD_NHASH(ix);
+loop:
+ for (fd = fc->lh_first; fd != 0; fd = fd->fd_hash.le_next) {
+ if (fd->fd_ix == ix && fd->fd_vnode->v_mount == mp) {
+ if (vget(fd->fd_vnode, 0, p))
+ goto loop;
+ *vpp = fd->fd_vnode;
+ return (error);
+ }
+ }
+
+ /*
+ * otherwise lock the array while we call getnewvnode
+ * since that can block.
+ */
+ if (fdcache_lock & FDL_LOCKED) {
+ fdcache_lock |= FDL_WANT;
+ (void) tsleep((caddr_t) &fdcache_lock, PINOD, "fdalvp", 0);
+ goto loop;
+ }
+ fdcache_lock |= FDL_LOCKED;
+
+ /*
+ * Do the MALLOC before the getnewvnode since doing so afterward
+ * might cause a bogus v_data pointer to get dereferenced
+ * elsewhere if MALLOC should block.
+ */
+ MALLOC(fd, struct fdescnode *, sizeof(struct fdescnode), M_TEMP, M_WAITOK);
+
+ error = getnewvnode(VT_FDESC, mp, fdesc_vnodeop_p, vpp);
+ if (error) {
+ FREE(fd, M_TEMP);
+ goto out;
+ }
+ (*vpp)->v_data = fd;
+ fd->fd_vnode = *vpp;
+ fd->fd_type = ftype;
+ fd->fd_fd = -1;
+ fd->fd_link = 0;
+ fd->fd_ix = ix;
+ LIST_INSERT_HEAD(fc, fd, fd_hash);
+
+out:;
+ fdcache_lock &= ~FDL_LOCKED;
+
+ if (fdcache_lock & FDL_WANT) {
+ fdcache_lock &= ~FDL_WANT;
+ wakeup((caddr_t) &fdcache_lock);
+ }
+
+ return (error);
+}
+
+/*
+ * vp is the current namei directory
+ * ndp is the name to locate in that directory...
+ */
+static int
+fdesc_lookup(ap)
+ struct vop_lookup_args /* {
+ struct vnode * a_dvp;
+ struct vnode ** a_vpp;
+ struct componentname * a_cnp;
+ } */ *ap;
+{
+ struct vnode **vpp = ap->a_vpp;
+ struct vnode *dvp = ap->a_dvp;
+ struct componentname *cnp = ap->a_cnp;
+ char *pname = cnp->cn_nameptr;
+ struct proc *p = cnp->cn_proc;
+ int nfiles = p->p_fd->fd_nfiles;
+ unsigned fd = -1;
+ int error;
+ struct vnode *fvp;
+ char *ln;
+
+ if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME) {
+ error = EROFS;
+ goto bad;
+ }
+
+ VOP_UNLOCK(dvp, 0, p);
+ if (cnp->cn_namelen == 1 && *pname == '.') {
+ *vpp = dvp;
+ VREF(dvp);
+ vn_lock(dvp, LK_SHARED | LK_RETRY, p);
+ return (0);
+ }
+
+ switch (VTOFDESC(dvp)->fd_type) {
+ default:
+ case Flink:
+ case Fdesc:
+ case Fctty:
+ error = ENOTDIR;
+ goto bad;
+
+ case Froot:
+ if (cnp->cn_namelen == 2 && bcmp(pname, "fd", 2) == 0) {
+ error = fdesc_allocvp(Fdevfd, FD_DEVFD, dvp->v_mount, &fvp);
+ if (error)
+ goto bad;
+ *vpp = fvp;
+ fvp->v_type = VDIR;
+ vn_lock(fvp, LK_SHARED | LK_RETRY, p);
+ return (0);
+ }
+
+ if (cnp->cn_namelen == 3 && bcmp(pname, "tty", 3) == 0) {
+ struct vnode *ttyvp = cttyvp(p);
+ if (ttyvp == NULL) {
+ error = ENXIO;
+ goto bad;
+ }
+ error = fdesc_allocvp(Fctty, FD_CTTY, dvp->v_mount, &fvp);
+ if (error)
+ goto bad;
+ *vpp = fvp;
+ fvp->v_type = VFIFO;
+ vn_lock(fvp, LK_SHARED | LK_RETRY, p);
+ return (0);
+ }
+
+ ln = 0;
+ switch (cnp->cn_namelen) {
+ case 5:
+ if (bcmp(pname, "stdin", 5) == 0) {
+ ln = "fd/0";
+ fd = FD_STDIN;
+ }
+ break;
+ case 6:
+ if (bcmp(pname, "stdout", 6) == 0) {
+ ln = "fd/1";
+ fd = FD_STDOUT;
+ } else
+ if (bcmp(pname, "stderr", 6) == 0) {
+ ln = "fd/2";
+ fd = FD_STDERR;
+ }
+ break;
+ }
+
+ if (ln) {
+ error = fdesc_allocvp(Flink, fd, dvp->v_mount, &fvp);
+ if (error)
+ goto bad;
+ VTOFDESC(fvp)->fd_link = ln;
+ *vpp = fvp;
+ fvp->v_type = VLNK;
+ vn_lock(fvp, LK_SHARED | LK_RETRY, p);
+ return (0);
+ } else {
+ error = ENOENT;
+ goto bad;
+ }
+
+ /* FALL THROUGH */
+
+ case Fdevfd:
+ if (cnp->cn_namelen == 2 && bcmp(pname, "..", 2) == 0) {
+ if ((error = fdesc_root(dvp->v_mount, vpp)) != 0)
+ goto bad;
+ return (0);
+ }
+
+ fd = 0;
+ while (*pname >= '0' && *pname <= '9') {
+ fd = 10 * fd + *pname++ - '0';
+ if (fd >= nfiles)
+ break;
+ }
+
+ if (*pname != '\0') {
+ error = ENOENT;
+ goto bad;
+ }
+
+ if (fd >= nfiles || p->p_fd->fd_ofiles[fd] == NULL) {
+ error = EBADF;
+ goto bad;
+ }
+
+ error = fdesc_allocvp(Fdesc, FD_DESC+fd, dvp->v_mount, &fvp);
+ if (error)
+ goto bad;
+ VTOFDESC(fvp)->fd_fd = fd;
+ vn_lock(fvp, LK_SHARED | LK_RETRY, p);
+ *vpp = fvp;
+ return (0);
+ }
+
+bad:;
+ vn_lock(dvp, LK_SHARED | LK_RETRY, p);
+ *vpp = NULL;
+ return (error);
+}
+
+static int
+fdesc_open(ap)
+ struct vop_open_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ int error = 0;
+
+ switch (VTOFDESC(vp)->fd_type) {
+ case Fdesc:
+ /*
+ * XXX Kludge: set p->p_dupfd to contain the value of the
+ * the file descriptor being sought for duplication. The error
+ * return ensures that the vnode for this device will be
+ * released by vn_open. Open will detect this special error and
+ * take the actions in dupfdopen. Other callers of vn_open or
+ * VOP_OPEN will simply report the error.
+ */
+ ap->a_p->p_dupfd = VTOFDESC(vp)->fd_fd; /* XXX */
+ error = ENODEV;
+ break;
+
+ case Fctty:
+ error = (*ctty_cdevsw.d_open)(devctty, ap->a_mode, 0, ap->a_p);
+ break;
+ }
+
+ return (error);
+}
+
+static int
+fdesc_attr(fd, vap, cred, p)
+ int fd;
+ struct vattr *vap;
+ struct ucred *cred;
+ struct proc *p;
+{
+ struct filedesc *fdp = p->p_fd;
+ struct file *fp;
+ struct stat stb;
+ int error;
+
+ if (fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL)
+ return (EBADF);
+
+ switch (fp->f_type) {
+ case DTYPE_FIFO:
+ case DTYPE_VNODE:
+ error = VOP_GETATTR((struct vnode *) fp->f_data, vap, cred, p);
+ if (error == 0 && vap->va_type == VDIR) {
+ /*
+ * directories can cause loops in the namespace,
+ * so turn off the 'x' bits to avoid trouble.
+ */
+ vap->va_mode &= ~((VEXEC)|(VEXEC>>3)|(VEXEC>>6));
+ }
+ break;
+
+ case DTYPE_SOCKET:
+ error = soo_stat((struct socket *)fp->f_data, &stb);
+ if (error == 0) {
+ vattr_null(vap);
+ vap->va_type = VSOCK;
+ vap->va_mode = stb.st_mode;
+ vap->va_nlink = stb.st_nlink;
+ vap->va_uid = stb.st_uid;
+ vap->va_gid = stb.st_gid;
+ vap->va_fsid = stb.st_dev;
+ vap->va_fileid = stb.st_ino;
+ vap->va_size = stb.st_size;
+ vap->va_blocksize = stb.st_blksize;
+ vap->va_atime = stb.st_atimespec;
+ vap->va_mtime = stb.st_mtimespec;
+ vap->va_ctime = stb.st_ctimespec;
+ vap->va_gen = stb.st_gen;
+ vap->va_flags = stb.st_flags;
+ vap->va_rdev = stb.st_rdev;
+ vap->va_bytes = stb.st_blocks * stb.st_blksize;
+ }
+ break;
+
+ default:
+ panic("fdesc attr");
+ break;
+ }
+
+ return (error);
+}
+
+static int
+fdesc_getattr(ap)
+ struct vop_getattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct vattr *vap = ap->a_vap;
+ unsigned fd;
+ int error = 0;
+
+ switch (VTOFDESC(vp)->fd_type) {
+ case Froot:
+ case Fdevfd:
+ case Flink:
+ case Fctty:
+ bzero((caddr_t) vap, sizeof(*vap));
+ vattr_null(vap);
+ vap->va_fileid = VTOFDESC(vp)->fd_ix;
+
+ switch (VTOFDESC(vp)->fd_type) {
+ case Flink:
+ vap->va_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH;
+ vap->va_type = VLNK;
+ vap->va_nlink = 1;
+ vap->va_size = strlen(VTOFDESC(vp)->fd_link);
+ break;
+
+ case Fctty:
+ vap->va_mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH;
+ vap->va_type = VFIFO;
+ vap->va_nlink = 1;
+ vap->va_size = 0;
+ break;
+
+ default:
+ vap->va_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH;
+ vap->va_type = VDIR;
+ vap->va_nlink = 2;
+ vap->va_size = DEV_BSIZE;
+ break;
+ }
+ vap->va_uid = 0;
+ vap->va_gid = 0;
+ vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+ vap->va_blocksize = DEV_BSIZE;
+ vap->va_atime.tv_sec = boottime.tv_sec;
+ vap->va_atime.tv_nsec = 0;
+ vap->va_mtime = vap->va_atime;
+ vap->va_ctime = vap->va_mtime;
+ vap->va_gen = 0;
+ vap->va_flags = 0;
+ vap->va_rdev = 0;
+ vap->va_bytes = 0;
+ break;
+
+ case Fdesc:
+ fd = VTOFDESC(vp)->fd_fd;
+ error = fdesc_attr(fd, vap, ap->a_cred, ap->a_p);
+ break;
+
+ default:
+ panic("fdesc_getattr");
+ break;
+ }
+
+ if (error == 0)
+ vp->v_type = vap->va_type;
+
+ return (error);
+}
+
+static int
+fdesc_setattr(ap)
+ struct vop_setattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct filedesc *fdp = ap->a_p->p_fd;
+ struct vattr *vap = ap->a_vap;
+ struct file *fp;
+ unsigned fd;
+ int error;
+
+ /*
+ * Can't mess with the root vnode
+ */
+ switch (VTOFDESC(ap->a_vp)->fd_type) {
+ case Fdesc:
+ break;
+
+ case Fctty:
+ if (vap->va_flags != VNOVAL)
+ return (EOPNOTSUPP);
+ return (0);
+
+ default:
+ return (EACCES);
+ }
+
+ fd = VTOFDESC(ap->a_vp)->fd_fd;
+ if (fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) {
+ return (EBADF);
+ }
+
+ /*
+ * Can setattr the underlying vnode, but not sockets!
+ */
+ switch (fp->f_type) {
+ case DTYPE_FIFO:
+ case DTYPE_PIPE:
+ case DTYPE_VNODE:
+ error = VOP_SETATTR((struct vnode *) fp->f_data, ap->a_vap, ap->a_cred, ap->a_p);
+ break;
+
+ case DTYPE_SOCKET:
+ if (vap->va_flags != VNOVAL)
+ error = EOPNOTSUPP;
+ else
+ error = 0;
+ break;
+
+ default:
+ error = EBADF;
+ break;
+ }
+
+ return (error);
+}
+
+#define UIO_MX 16
+
+static struct dirtmp {
+ u_long d_fileno;
+ u_short d_reclen;
+ u_short d_namlen;
+ char d_name[8];
+} rootent[] = {
+ { FD_DEVFD, UIO_MX, 2, "fd" },
+ { FD_STDIN, UIO_MX, 5, "stdin" },
+ { FD_STDOUT, UIO_MX, 6, "stdout" },
+ { FD_STDERR, UIO_MX, 6, "stderr" },
+ { FD_CTTY, UIO_MX, 3, "tty" },
+};
+
+static int
+fdesc_readdir(ap)
+ struct vop_readdir_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ int *a_eofflag;
+ u_long *a_cookies;
+ int a_ncookies;
+ } */ *ap;
+{
+ struct uio *uio = ap->a_uio;
+ struct filedesc *fdp;
+ int error, i, off;
+
+ /*
+ * We don't allow exporting fdesc mounts, and currently local
+ * requests do not need cookies.
+ */
+ if (ap->a_ncookies)
+ panic("fdesc_readdir: not hungry");
+
+ if (VTOFDESC(ap->a_vp)->fd_type != Froot &&
+ VTOFDESC(ap->a_vp)->fd_type != Fdevfd)
+ panic("fdesc_readdir: not dir");
+
+ off = (int)uio->uio_offset;
+ if (off != uio->uio_offset || off < 0 || (u_int)off % UIO_MX != 0 ||
+ uio->uio_resid < UIO_MX)
+ return (EINVAL);
+ i = (u_int)off / UIO_MX;
+ fdp = uio->uio_procp->p_fd;
+
+ if (VTOFDESC(ap->a_vp)->fd_type == Froot) {
+ struct dirent d;
+ struct dirent *dp = &d;
+ struct dirtmp *dt;
+
+ error = 0;
+
+ while (i < sizeof(rootent) / sizeof(rootent[0]) &&
+ uio->uio_resid >= UIO_MX) {
+ dt = &rootent[i];
+ switch (dt->d_fileno) {
+ case FD_CTTY:
+ if (cttyvp(uio->uio_procp) == NULL)
+ continue;
+ break;
+
+ case FD_STDIN:
+ case FD_STDOUT:
+ case FD_STDERR:
+ if ((dt->d_fileno-FD_STDIN) >= fdp->fd_nfiles)
+ continue;
+ if (fdp->fd_ofiles[dt->d_fileno-FD_STDIN] == NULL)
+ continue;
+ break;
+ }
+ bzero((caddr_t) dp, UIO_MX);
+ dp->d_fileno = dt->d_fileno;
+ dp->d_namlen = dt->d_namlen;
+ dp->d_type = DT_UNKNOWN;
+ dp->d_reclen = dt->d_reclen;
+ bcopy(dt->d_name, dp->d_name, dp->d_namlen+1);
+ error = uiomove((caddr_t) dp, UIO_MX, uio);
+ if (error)
+ break;
+ i++;
+ }
+ uio->uio_offset = i * UIO_MX;
+ return (error);
+ }
+
+ error = 0;
+ while (i < fdp->fd_nfiles && uio->uio_resid >= UIO_MX) {
+ if (fdp->fd_ofiles[i] != NULL) {
+ struct dirent d;
+ struct dirent *dp = &d;
+
+ bzero((caddr_t) dp, UIO_MX);
+
+ dp->d_namlen = sprintf(dp->d_name, "%d", i);
+ dp->d_reclen = UIO_MX;
+ dp->d_type = DT_UNKNOWN;
+ dp->d_fileno = i + FD_STDIN;
+ /*
+ * And ship to userland
+ */
+ error = uiomove((caddr_t) dp, UIO_MX, uio);
+ if (error)
+ break;
+ }
+ i++;
+ }
+
+ uio->uio_offset = i * UIO_MX;
+ return (error);
+}
+
+static int
+fdesc_readlink(ap)
+ struct vop_readlink_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ int error;
+
+ if (vp->v_type != VLNK)
+ return (EPERM);
+
+ if (VTOFDESC(vp)->fd_type == Flink) {
+ char *ln = VTOFDESC(vp)->fd_link;
+ error = uiomove(ln, strlen(ln), ap->a_uio);
+ } else {
+ error = EOPNOTSUPP;
+ }
+
+ return (error);
+}
+
+static int
+fdesc_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ int error = EOPNOTSUPP;
+
+ switch (VTOFDESC(ap->a_vp)->fd_type) {
+ case Fctty:
+ error = (*ctty_cdevsw.d_read)(devctty, ap->a_uio, ap->a_ioflag);
+ break;
+
+ default:
+ error = EOPNOTSUPP;
+ break;
+ }
+
+ return (error);
+}
+
+static int
+fdesc_write(ap)
+ struct vop_write_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ int error = EOPNOTSUPP;
+
+ switch (VTOFDESC(ap->a_vp)->fd_type) {
+ case Fctty:
+ error = (*ctty_cdevsw.d_write)(devctty, ap->a_uio, ap->a_ioflag);
+ break;
+
+ default:
+ error = EOPNOTSUPP;
+ break;
+ }
+
+ return (error);
+}
+
+static int
+fdesc_ioctl(ap)
+ struct vop_ioctl_args /* {
+ struct vnode *a_vp;
+ int a_command;
+ caddr_t a_data;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ int error = EOPNOTSUPP;
+
+ switch (VTOFDESC(ap->a_vp)->fd_type) {
+ case Fctty:
+ error = (*ctty_cdevsw.d_ioctl)(devctty, ap->a_command,
+ ap->a_data, ap->a_fflag, ap->a_p);
+ break;
+
+ default:
+ error = EOPNOTSUPP;
+ break;
+ }
+
+ return (error);
+}
+
+static int
+fdesc_poll(ap)
+ struct vop_poll_args /* {
+ struct vnode *a_vp;
+ int a_events;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ int revents;
+
+ switch (VTOFDESC(ap->a_vp)->fd_type) {
+ case Fctty:
+ revents = (*ctty_cdevsw.d_poll)(devctty, ap->a_events, ap->a_p);
+ break;
+
+ default:
+ revents = seltrue(0, ap->a_events, ap->a_p);
+ break;
+ }
+
+ return (revents);
+}
+
+static int
+fdesc_inactive(ap)
+ struct vop_inactive_args /* {
+ struct vnode *a_vp;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+
+ /*
+ * Clear out the v_type field to avoid
+ * nasty things happening in vgone().
+ */
+ VOP_UNLOCK(vp, 0, ap->a_p);
+ vp->v_type = VNON;
+ return (0);
+}
+
+static int
+fdesc_reclaim(ap)
+ struct vop_reclaim_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct fdescnode *fd = VTOFDESC(vp);
+
+ LIST_REMOVE(fd, fd_hash);
+ FREE(vp->v_data, M_TEMP);
+ vp->v_data = 0;
+
+ return (0);
+}
+
+/*
+ * Print out the contents of a /dev/fd vnode.
+ */
+/* ARGSUSED */
+static int
+fdesc_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ printf("tag VT_NON, fdesc vnode\n");
+ return (0);
+}
+
+/*
+ * /dev/fd "should never get here" operation
+ */
+static int
+fdesc_badop()
+{
+
+ panic("fdesc: bad op");
+ /* NOTREACHED */
+}
+
+static struct vnodeopv_entry_desc fdesc_vnodeop_entries[] = {
+ { &vop_default_desc, (vop_t *) vop_defaultop },
+ { &vop_access_desc, (vop_t *) vop_null },
+ { &vop_bmap_desc, (vop_t *) fdesc_badop },
+ { &vop_getattr_desc, (vop_t *) fdesc_getattr },
+ { &vop_inactive_desc, (vop_t *) fdesc_inactive },
+ { &vop_ioctl_desc, (vop_t *) fdesc_ioctl },
+ { &vop_lookup_desc, (vop_t *) fdesc_lookup },
+ { &vop_open_desc, (vop_t *) fdesc_open },
+ { &vop_pathconf_desc, (vop_t *) vop_stdpathconf },
+ { &vop_poll_desc, (vop_t *) fdesc_poll },
+ { &vop_print_desc, (vop_t *) fdesc_print },
+ { &vop_read_desc, (vop_t *) fdesc_read },
+ { &vop_readdir_desc, (vop_t *) fdesc_readdir },
+ { &vop_readlink_desc, (vop_t *) fdesc_readlink },
+ { &vop_reclaim_desc, (vop_t *) fdesc_reclaim },
+ { &vop_setattr_desc, (vop_t *) fdesc_setattr },
+ { &vop_write_desc, (vop_t *) fdesc_write },
+ { NULL, NULL }
+};
+static struct vnodeopv_desc fdesc_vnodeop_opv_desc =
+ { &fdesc_vnodeop_p, fdesc_vnodeop_entries };
+
+VNODEOP_SET(fdesc_vnodeop_opv_desc);
diff --git a/sys/fs/fifofs/fifo.h b/sys/fs/fifofs/fifo.h
new file mode 100644
index 0000000..ec186d0d
--- /dev/null
+++ b/sys/fs/fifofs/fifo.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)fifo.h 8.6 (Berkeley) 5/21/95
+ * $Id: fifo.h,v 1.14 1997/09/14 02:57:51 peter Exp $
+ */
+
+extern vop_t **fifo_vnodeop_p;
+
+/*
+ * Prototypes for fifo operations on vnodes.
+ */
+int fifo_vnoperate __P((struct vop_generic_args *));
+int fifo_printinfo __P((struct vnode *));
+
diff --git a/sys/fs/fifofs/fifo_vnops.c b/sys/fs/fifofs/fifo_vnops.c
new file mode 100644
index 0000000..f7e47e1
--- /dev/null
+++ b/sys/fs/fifofs/fifo_vnops.c
@@ -0,0 +1,537 @@
+/*
+ * Copyright (c) 1990, 1993, 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)fifo_vnops.c 8.10 (Berkeley) 5/27/95
+ * $Id: fifo_vnops.c,v 1.42 1998/02/04 22:32:45 eivind Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/unistd.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/vnode.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/filio.h>
+#include <sys/fcntl.h>
+#include <sys/file.h>
+#include <sys/poll.h>
+#include <sys/un.h>
+#include <miscfs/fifofs/fifo.h>
+
+/*
+ * This structure is associated with the FIFO vnode and stores
+ * the state associated with the FIFO.
+ */
+struct fifoinfo {
+ struct socket *fi_readsock;
+ struct socket *fi_writesock;
+ long fi_readers;
+ long fi_writers;
+};
+
+static int fifo_badop __P((void));
+static int fifo_print __P((struct vop_print_args *));
+static int fifo_lookup __P((struct vop_lookup_args *));
+static int fifo_open __P((struct vop_open_args *));
+static int fifo_close __P((struct vop_close_args *));
+static int fifo_read __P((struct vop_read_args *));
+static int fifo_write __P((struct vop_write_args *));
+static int fifo_ioctl __P((struct vop_ioctl_args *));
+static int fifo_poll __P((struct vop_poll_args *));
+static int fifo_inactive __P((struct vop_inactive_args *));
+static int fifo_bmap __P((struct vop_bmap_args *));
+static int fifo_pathconf __P((struct vop_pathconf_args *));
+static int fifo_advlock __P((struct vop_advlock_args *));
+
+
+vop_t **fifo_vnodeop_p;
+static struct vnodeopv_entry_desc fifo_vnodeop_entries[] = {
+ { &vop_default_desc, (vop_t *) vop_defaultop },
+ { &vop_abortop_desc, (vop_t *) fifo_badop },
+ { &vop_access_desc, (vop_t *) vop_ebadf },
+ { &vop_advlock_desc, (vop_t *) fifo_advlock },
+ { &vop_bmap_desc, (vop_t *) fifo_bmap },
+ { &vop_close_desc, (vop_t *) fifo_close },
+ { &vop_create_desc, (vop_t *) fifo_badop },
+ { &vop_getattr_desc, (vop_t *) vop_ebadf },
+ { &vop_inactive_desc, (vop_t *) fifo_inactive },
+ { &vop_ioctl_desc, (vop_t *) fifo_ioctl },
+ { &vop_lease_desc, (vop_t *) vop_null },
+ { &vop_link_desc, (vop_t *) fifo_badop },
+ { &vop_lookup_desc, (vop_t *) fifo_lookup },
+ { &vop_mkdir_desc, (vop_t *) fifo_badop },
+ { &vop_mknod_desc, (vop_t *) fifo_badop },
+ { &vop_open_desc, (vop_t *) fifo_open },
+ { &vop_pathconf_desc, (vop_t *) fifo_pathconf },
+ { &vop_poll_desc, (vop_t *) fifo_poll },
+ { &vop_print_desc, (vop_t *) fifo_print },
+ { &vop_read_desc, (vop_t *) fifo_read },
+ { &vop_readdir_desc, (vop_t *) fifo_badop },
+ { &vop_readlink_desc, (vop_t *) fifo_badop },
+ { &vop_reallocblks_desc, (vop_t *) fifo_badop },
+ { &vop_reclaim_desc, (vop_t *) vop_null },
+ { &vop_remove_desc, (vop_t *) fifo_badop },
+ { &vop_rename_desc, (vop_t *) fifo_badop },
+ { &vop_rmdir_desc, (vop_t *) fifo_badop },
+ { &vop_setattr_desc, (vop_t *) vop_ebadf },
+ { &vop_symlink_desc, (vop_t *) fifo_badop },
+ { &vop_write_desc, (vop_t *) fifo_write },
+ { NULL, NULL }
+};
+static struct vnodeopv_desc fifo_vnodeop_opv_desc =
+ { &fifo_vnodeop_p, fifo_vnodeop_entries };
+
+VNODEOP_SET(fifo_vnodeop_opv_desc);
+
+int
+fifo_vnoperate(ap)
+ struct vop_generic_args /* {
+ struct vnodeop_desc *a_desc;
+ <other random data follows, presumably>
+ } */ *ap;
+{
+ return (VOCALL(fifo_vnodeop_p, ap->a_desc->vdesc_offset, ap));
+}
+
+/*
+ * Trivial lookup routine that always fails.
+ */
+/* ARGSUSED */
+static int
+fifo_lookup(ap)
+ struct vop_lookup_args /* {
+ struct vnode * a_dvp;
+ struct vnode ** a_vpp;
+ struct componentname * a_cnp;
+ } */ *ap;
+{
+
+ *ap->a_vpp = NULL;
+ return (ENOTDIR);
+}
+
+/*
+ * Open called to set up a new instance of a fifo or
+ * to find an active instance of a fifo.
+ */
+/* ARGSUSED */
+static int
+fifo_open(ap)
+ struct vop_open_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct fifoinfo *fip;
+ struct proc *p = ap->a_p;
+ struct socket *rso, *wso;
+ int error;
+
+ if ((fip = vp->v_fifoinfo) == NULL) {
+ MALLOC(fip, struct fifoinfo *, sizeof(*fip), M_VNODE, M_WAITOK);
+ vp->v_fifoinfo = fip;
+ error = socreate(AF_LOCAL, &rso, SOCK_STREAM, 0, ap->a_p);
+ if (error) {
+ free(fip, M_VNODE);
+ vp->v_fifoinfo = NULL;
+ return (error);
+ }
+ fip->fi_readsock = rso;
+ error = socreate(AF_LOCAL, &wso, SOCK_STREAM, 0, ap->a_p);
+ if (error) {
+ (void)soclose(rso);
+ free(fip, M_VNODE);
+ vp->v_fifoinfo = NULL;
+ return (error);
+ }
+ fip->fi_writesock = wso;
+ error = unp_connect2(wso, rso);
+ if (error) {
+ (void)soclose(wso);
+ (void)soclose(rso);
+ free(fip, M_VNODE);
+ vp->v_fifoinfo = NULL;
+ return (error);
+ }
+ fip->fi_readers = fip->fi_writers = 0;
+ wso->so_snd.sb_lowat = PIPE_BUF;
+ rso->so_state |= SS_CANTRCVMORE;
+ }
+ if (ap->a_mode & FREAD) {
+ fip->fi_readers++;
+ if (fip->fi_readers == 1) {
+ fip->fi_writesock->so_state &= ~SS_CANTSENDMORE;
+ if (fip->fi_writers > 0)
+ wakeup((caddr_t)&fip->fi_writers);
+ }
+ }
+ if (ap->a_mode & FWRITE) {
+ fip->fi_writers++;
+ if (fip->fi_writers == 1) {
+ fip->fi_readsock->so_state &= ~SS_CANTRCVMORE;
+ if (fip->fi_readers > 0)
+ wakeup((caddr_t)&fip->fi_readers);
+ }
+ }
+ if ((ap->a_mode & FREAD) && (ap->a_mode & O_NONBLOCK) == 0) {
+ while (fip->fi_writers == 0) {
+ VOP_UNLOCK(vp, 0, p);
+ error = tsleep((caddr_t)&fip->fi_readers,
+ PCATCH | PSOCK, "fifoor", 0);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+ if (error)
+ goto bad;
+ }
+ }
+ if (ap->a_mode & FWRITE) {
+ if (ap->a_mode & O_NONBLOCK) {
+ if (fip->fi_readers == 0) {
+ error = ENXIO;
+ goto bad;
+ }
+ } else {
+ while (fip->fi_readers == 0) {
+ VOP_UNLOCK(vp, 0, p);
+ error = tsleep((caddr_t)&fip->fi_writers,
+ PCATCH | PSOCK, "fifoow", 0);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+ if (error)
+ goto bad;
+ }
+ }
+ }
+ return (0);
+bad:
+ VOP_CLOSE(vp, ap->a_mode, ap->a_cred, p);
+ return (error);
+}
+
+/*
+ * Vnode op for read
+ */
+/* ARGSUSED */
+static int
+fifo_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ struct uio *uio = ap->a_uio;
+ struct socket *rso = ap->a_vp->v_fifoinfo->fi_readsock;
+ struct proc *p = uio->uio_procp;
+ int error, startresid;
+
+#ifdef DIAGNOSTIC
+ if (uio->uio_rw != UIO_READ)
+ panic("fifo_read mode");
+#endif
+ if (uio->uio_resid == 0)
+ return (0);
+ if (ap->a_ioflag & IO_NDELAY)
+ rso->so_state |= SS_NBIO;
+ startresid = uio->uio_resid;
+ VOP_UNLOCK(ap->a_vp, 0, p);
+ error = soreceive(rso, (struct sockaddr **)0, uio, (struct mbuf **)0,
+ (struct mbuf **)0, (int *)0);
+ vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY, p);
+ if (ap->a_ioflag & IO_NDELAY)
+ rso->so_state &= ~SS_NBIO;
+ return (error);
+}
+
+/*
+ * Vnode op for write
+ */
+/* ARGSUSED */
+static int
+fifo_write(ap)
+ struct vop_write_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ struct socket *wso = ap->a_vp->v_fifoinfo->fi_writesock;
+ struct proc *p = ap->a_uio->uio_procp;
+ int error;
+
+#ifdef DIAGNOSTIC
+ if (ap->a_uio->uio_rw != UIO_WRITE)
+ panic("fifo_write mode");
+#endif
+ if (ap->a_ioflag & IO_NDELAY)
+ wso->so_state |= SS_NBIO;
+ VOP_UNLOCK(ap->a_vp, 0, p);
+ error = sosend(wso, (struct sockaddr *)0, ap->a_uio, 0,
+ (struct mbuf *)0, 0, p);
+ vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY, p);
+ if (ap->a_ioflag & IO_NDELAY)
+ wso->so_state &= ~SS_NBIO;
+ return (error);
+}
+
+/*
+ * Device ioctl operation.
+ */
+/* ARGSUSED */
+static int
+fifo_ioctl(ap)
+ struct vop_ioctl_args /* {
+ struct vnode *a_vp;
+ int a_command;
+ caddr_t a_data;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct file filetmp;
+ int error;
+
+ if (ap->a_command == FIONBIO)
+ return (0);
+ if (ap->a_fflag & FREAD) {
+ filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_readsock;
+ error = soo_ioctl(&filetmp, ap->a_command, ap->a_data, ap->a_p);
+ if (error)
+ return (error);
+ }
+ if (ap->a_fflag & FWRITE) {
+ filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_writesock;
+ error = soo_ioctl(&filetmp, ap->a_command, ap->a_data, ap->a_p);
+ if (error)
+ return (error);
+ }
+ return (0);
+}
+
+/* ARGSUSED */
+static int
+fifo_poll(ap)
+ struct vop_poll_args /* {
+ struct vnode *a_vp;
+ int a_events;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct file filetmp;
+ int revents = 0;
+
+ if (ap->a_events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) {
+ filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_readsock;
+ if (filetmp.f_data)
+ revents |= soo_poll(&filetmp, ap->a_events, ap->a_cred,
+ ap->a_p);
+ }
+ if (ap->a_events & (POLLOUT | POLLWRNORM | POLLWRBAND)) {
+ filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_writesock;
+ if (filetmp.f_data)
+ revents |= soo_poll(&filetmp, ap->a_events, ap->a_cred,
+ ap->a_p);
+ }
+ return (revents);
+}
+
+static int
+fifo_inactive(ap)
+ struct vop_inactive_args /* {
+ struct vnode *a_vp;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ VOP_UNLOCK(ap->a_vp, 0, ap->a_p);
+ return (0);
+}
+
+/*
+ * This is a noop, simply returning what one has been given.
+ */
+static int
+fifo_bmap(ap)
+ struct vop_bmap_args /* {
+ struct vnode *a_vp;
+ daddr_t a_bn;
+ struct vnode **a_vpp;
+ daddr_t *a_bnp;
+ int *a_runp;
+ int *a_runb;
+ } */ *ap;
+{
+
+ if (ap->a_vpp != NULL)
+ *ap->a_vpp = ap->a_vp;
+ if (ap->a_bnp != NULL)
+ *ap->a_bnp = ap->a_bn;
+ if (ap->a_runp != NULL)
+ *ap->a_runp = 0;
+ if (ap->a_runb != NULL)
+ *ap->a_runb = 0;
+ return (0);
+}
+
+/*
+ * Device close routine
+ */
+/* ARGSUSED */
+static int
+fifo_close(ap)
+ struct vop_close_args /* {
+ struct vnode *a_vp;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct fifoinfo *fip = vp->v_fifoinfo;
+ int error1, error2;
+
+ if (ap->a_fflag & FREAD) {
+ fip->fi_readers--;
+ if (fip->fi_readers == 0)
+ socantsendmore(fip->fi_writesock);
+ }
+ if (ap->a_fflag & FWRITE) {
+ fip->fi_writers--;
+ if (fip->fi_writers == 0)
+ socantrcvmore(fip->fi_readsock);
+ }
+ if (vp->v_usecount > 1)
+ return (0);
+ error1 = soclose(fip->fi_readsock);
+ error2 = soclose(fip->fi_writesock);
+ FREE(fip, M_VNODE);
+ vp->v_fifoinfo = NULL;
+ if (error1)
+ return (error1);
+ return (error2);
+}
+
+
+/*
+ * Print out internal contents of a fifo vnode.
+ */
+int
+fifo_printinfo(vp)
+ struct vnode *vp;
+{
+ register struct fifoinfo *fip = vp->v_fifoinfo;
+
+ printf(", fifo with %ld readers and %ld writers",
+ fip->fi_readers, fip->fi_writers);
+ return (0);
+}
+
+/*
+ * Print out the contents of a fifo vnode.
+ */
+static int
+fifo_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ printf("tag VT_NON");
+ fifo_printinfo(ap->a_vp);
+ printf("\n");
+ return (0);
+}
+
+/*
+ * Return POSIX pathconf information applicable to fifo's.
+ */
+int
+fifo_pathconf(ap)
+ struct vop_pathconf_args /* {
+ struct vnode *a_vp;
+ int a_name;
+ int *a_retval;
+ } */ *ap;
+{
+
+ switch (ap->a_name) {
+ case _PC_LINK_MAX:
+ *ap->a_retval = LINK_MAX;
+ return (0);
+ case _PC_PIPE_BUF:
+ *ap->a_retval = PIPE_BUF;
+ return (0);
+ case _PC_CHOWN_RESTRICTED:
+ *ap->a_retval = 1;
+ return (0);
+ default:
+ return (EINVAL);
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Fifo advisory byte-level locks.
+ */
+/* ARGSUSED */
+static int
+fifo_advlock(ap)
+ struct vop_advlock_args /* {
+ struct vnode *a_vp;
+ caddr_t a_id;
+ int a_op;
+ struct flock *a_fl;
+ int a_flags;
+ } */ *ap;
+{
+
+ return (ap->a_flags & F_FLOCK ? EOPNOTSUPP : EINVAL);
+}
+
+/*
+ * Fifo bad operation
+ */
+static int
+fifo_badop()
+{
+
+ panic("fifo_badop called");
+ /* NOTREACHED */
+}
diff --git a/sys/fs/msdosfs/bootsect.h b/sys/fs/msdosfs/bootsect.h
new file mode 100644
index 0000000..11b93371a
--- /dev/null
+++ b/sys/fs/msdosfs/bootsect.h
@@ -0,0 +1,113 @@
+/* $Id: bootsect.h,v 1.5 1997/02/22 09:40:43 peter Exp $ */
+/* $NetBSD: bootsect.h,v 1.9 1997/11/17 15:36:17 ws Exp $ */
+
+/*
+ * Written by Paul Popelka (paulp@uts.amdahl.com)
+ *
+ * You can do anything you want with this software, just don't say you wrote
+ * it, and don't remove this notice.
+ *
+ * This software is provided "as is".
+ *
+ * The author supplies this software to be publicly redistributed on the
+ * understanding that the author is not responsible for the correct
+ * functioning of this software in any circumstances and is not liable for
+ * any damages caused by this software.
+ *
+ * October 1992
+ */
+
+/*
+ * Format of a boot sector. This is the first sector on a DOS floppy disk
+ * or the fist sector of a partition on a hard disk. But, it is not the
+ * first sector of a partitioned hard disk.
+ */
+struct bootsector33 {
+ u_int8_t bsJump[3]; /* jump inst E9xxxx or EBxx90 */
+ int8_t bsOemName[8]; /* OEM name and version */
+ int8_t bsBPB[19]; /* BIOS parameter block */
+ int8_t bsDriveNumber; /* drive number (0x80) */
+ int8_t bsBootCode[479]; /* pad so struct is 512b */
+ u_int8_t bsBootSectSig0;
+ u_int8_t bsBootSectSig1;
+#define BOOTSIG0 0x55
+#define BOOTSIG1 0xaa
+};
+
+struct extboot {
+ int8_t exDriveNumber; /* drive number (0x80) */
+ int8_t exReserved1; /* reserved */
+ int8_t exBootSignature; /* ext. boot signature (0x29) */
+#define EXBOOTSIG 0x29
+ int8_t exVolumeID[4]; /* volume ID number */
+ int8_t exVolumeLabel[11]; /* volume label */
+ int8_t exFileSysType[8]; /* fs type (FAT12 or FAT16) */
+};
+
+struct bootsector50 {
+ u_int8_t bsJump[3]; /* jump inst E9xxxx or EBxx90 */
+ int8_t bsOemName[8]; /* OEM name and version */
+ int8_t bsBPB[25]; /* BIOS parameter block */
+ int8_t bsExt[26]; /* Bootsector Extension */
+ int8_t bsBootCode[448]; /* pad so structure is 512b */
+ u_int8_t bsBootSectSig0;
+ u_int8_t bsBootSectSig1;
+#define BOOTSIG0 0x55
+#define BOOTSIG1 0xaa
+};
+
+struct bootsector710 {
+ u_int8_t bsJump[3]; /* jump inst E9xxxx or EBxx90 */
+ int8_t bsOEMName[8]; /* OEM name and version */
+ int8_t bsPBP[53]; /* BIOS parameter block */
+ int8_t bsExt[26]; /* Bootsector Extension */
+ int8_t bsBootCode[418]; /* pad so structure is 512b */
+ u_int8_t bsBootSectSig2; /* 2 & 3 are only defined for FAT32? */
+ u_int8_t bsBootSectSig3;
+ u_int8_t bsBootSectSig0;
+ u_int8_t bsBootSectSig1;
+#define BOOTSIG0 0x55
+#define BOOTSIG1 0xaa
+#define BOOTSIG2 0
+#define BOOTSIG3 0
+};
+#ifdef atari
+/*
+ * The boot sector on a gemdos fs is a little bit different from the msdos fs
+ * format. Currently there is no need to declare a seperate structure, the
+ * bootsector33 struct will do.
+ */
+#if 0
+struct bootsec_atari {
+ u_int8_t bsBranch[2]; /* branch inst if auto-boot */
+ int8_t bsFiller[6]; /* anything or nothing */
+ int8_t bsSerial[3]; /* serial no. for mediachange */
+ int8_t bsBPB[19]; /* BIOS parameter block */
+ int8_t bsBootCode[482]; /* pad so struct is 512b */
+};
+#endif
+#endif /* atari */
+
+union bootsector {
+ struct bootsector33 bs33;
+ struct bootsector50 bs50;
+ struct bootsector710 bs710;
+};
+
+#if 0
+/*
+ * Shorthand for fields in the bpb.
+ */
+#define bsBytesPerSec bsBPB.bpbBytesPerSec
+#define bsSectPerClust bsBPB.bpbSectPerClust
+#define bsResSectors bsBPB.bpbResSectors
+#define bsFATS bsBPB.bpbFATS
+#define bsRootDirEnts bsBPB.bpbRootDirEnts
+#define bsSectors bsBPB.bpbSectors
+#define bsMedia bsBPB.bpbMedia
+#define bsFATsecs bsBPB.bpbFATsecs
+#define bsSectPerTrack bsBPB.bpbSectPerTrack
+#define bsHeads bsBPB.bpbHeads
+#define bsHiddenSecs bsBPB.bpbHiddenSecs
+#define bsHugeSectors bsBPB.bpbHugeSectors
+#endif
diff --git a/sys/fs/msdosfs/bpb.h b/sys/fs/msdosfs/bpb.h
new file mode 100644
index 0000000..bc00a75
--- /dev/null
+++ b/sys/fs/msdosfs/bpb.h
@@ -0,0 +1,209 @@
+/* $Id: bpb.h,v 1.5 1997/02/22 09:40:44 peter Exp $ */
+/* $NetBSD: bpb.h,v 1.7 1997/11/17 15:36:24 ws Exp $ */
+
+/*
+ * Written by Paul Popelka (paulp@uts.amdahl.com)
+ *
+ * You can do anything you want with this software, just don't say you wrote
+ * it, and don't remove this notice.
+ *
+ * This software is provided "as is".
+ *
+ * The author supplies this software to be publicly redistributed on the
+ * understanding that the author is not responsible for the correct
+ * functioning of this software in any circumstances and is not liable for
+ * any damages caused by this software.
+ *
+ * October 1992
+ */
+
+/*
+ * BIOS Parameter Block (BPB) for DOS 3.3
+ */
+struct bpb33 {
+ u_int16_t bpbBytesPerSec; /* bytes per sector */
+ u_int8_t bpbSecPerClust; /* sectors per cluster */
+ u_int16_t bpbResSectors; /* number of reserved sectors */
+ u_int8_t bpbFATs; /* number of FATs */
+ u_int16_t bpbRootDirEnts; /* number of root directory entries */
+ u_int16_t bpbSectors; /* total number of sectors */
+ u_int8_t bpbMedia; /* media descriptor */
+ u_int16_t bpbFATsecs; /* number of sectors per FAT */
+ u_int16_t bpbSecPerTrack; /* sectors per track */
+ u_int16_t bpbHeads; /* number of heads */
+ u_int16_t bpbHiddenSecs; /* number of hidden sectors */
+};
+
+/*
+ * BPB for DOS 5.0 The difference is bpbHiddenSecs is a short for DOS 3.3,
+ * and bpbHugeSectors is not in the 3.3 bpb.
+ */
+struct bpb50 {
+ u_int16_t bpbBytesPerSec; /* bytes per sector */
+ u_int8_t bpbSecPerClust; /* sectors per cluster */
+ u_int16_t bpbResSectors; /* number of reserved sectors */
+ u_int8_t bpbFATs; /* number of FATs */
+ u_int16_t bpbRootDirEnts; /* number of root directory entries */
+ u_int16_t bpbSectors; /* total number of sectors */
+ u_int8_t bpbMedia; /* media descriptor */
+ u_int16_t bpbFATsecs; /* number of sectors per FAT */
+ u_int16_t bpbSecPerTrack; /* sectors per track */
+ u_int16_t bpbHeads; /* number of heads */
+ u_int32_t bpbHiddenSecs; /* # of hidden sectors */
+ u_int32_t bpbHugeSectors; /* # of sectors if bpbSectors == 0 */
+};
+
+/*
+ * BPB for DOS 7.10 (FAT32). This one has a few extensions to bpb50.
+ */
+struct bpb710 {
+ u_int16_t bpbBytesPerSec; /* bytes per sector */
+ u_int8_t bpbSecPerClust; /* sectors per cluster */
+ u_int16_t bpbResSectors; /* number of reserved sectors */
+ u_int8_t bpbFATs; /* number of FATs */
+ u_int16_t bpbRootDirEnts; /* number of root directory entries */
+ u_int16_t bpbSectors; /* total number of sectors */
+ u_int8_t bpbMedia; /* media descriptor */
+ u_int16_t bpbFATsecs; /* number of sectors per FAT */
+ u_int16_t bpbSecPerTrack; /* sectors per track */
+ u_int16_t bpbHeads; /* number of heads */
+ u_int32_t bpbHiddenSecs; /* # of hidden sectors */
+ u_int32_t bpbHugeSectors; /* # of sectors if bpbSectors == 0 */
+ u_int32_t bpbBigFATsecs; /* like bpbFATsecs for FAT32 */
+ u_int16_t bpbExtFlags; /* extended flags: */
+#define FATNUM 0xf /* mask for numbering active FAT */
+#define FATMIRROR 0x80 /* FAT is mirrored (like it always was) */
+ u_int16_t bpbFSVers; /* filesystem version */
+#define FSVERS 0 /* currently only 0 is understood */
+ u_int32_t bpbRootClust; /* start cluster for root directory */
+ u_int16_t bpbFSInfo; /* filesystem info structure sector */
+ u_int16_t bpbBackup; /* backup boot sector */
+ /* There is a 12 byte filler here, but we ignore it */
+};
+
+#ifdef atari
+/*
+ * BPB for gemdos filesystems. Atari leaves the obsolete stuff undefined.
+ * Currently there is no need for a separate BPB structure.
+ */
+#if 0
+struct bpb_a {
+ u_int16_t bpbBytesPerSec; /* bytes per sector */
+ u_int8_t bpbSecPerClust; /* sectors per cluster */
+ u_int16_t bpbResSectors; /* number of reserved sectors */
+ u_int8_t bpbFATs; /* number of FATs */
+ u_int16_t bpbRootDirEnts; /* number of root directory entries */
+ u_int16_t bpbSectors; /* total number of sectors */
+ u_int8_t bpbUseless1; /* meaningless on gemdos fs */
+ u_int16_t bpbFATsecs; /* number of sectors per FAT */
+ u_int16_t bpbUseless2; /* meaningless for harddisk fs */
+ u_int16_t bpbUseless3; /* meaningless for harddisk fs */
+ u_int16_t bpbHiddenSecs; /* the TOS-BIOS ignores this */
+};
+#endif
+#endif /* atari */
+
+/*
+ * The following structures represent how the bpb's look on disk. shorts
+ * and longs are just character arrays of the appropriate length. This is
+ * because the compiler forces shorts and longs to align on word or
+ * halfword boundaries.
+ *
+ * XXX The little-endian code here assumes that the processor can access
+ * 16-bit and 32-bit quantities on byte boundaries. If this is not true,
+ * use the macros for the big-endian case.
+ */
+#include <machine/endian.h>
+#if (BYTE_ORDER == LITTLE_ENDIAN) /* && defined(UNALIGNED_ACCESS) */
+#define getushort(x) *((u_int16_t *)(x))
+#define getulong(x) *((u_int32_t *)(x))
+#define putushort(p, v) (*((u_int16_t *)(p)) = (v))
+#define putulong(p, v) (*((u_int32_t *)(p)) = (v))
+#else
+#define getushort(x) (((u_int8_t *)(x))[0] + (((u_int8_t *)(x))[1] << 8))
+#define getulong(x) (((u_int8_t *)(x))[0] + (((u_int8_t *)(x))[1] << 8) \
+ + (((u_int8_t *)(x))[2] << 16) \
+ + (((u_int8_t *)(x))[3] << 24))
+#define putushort(p, v) (((u_int8_t *)(p))[0] = (v), \
+ ((u_int8_t *)(p))[1] = (v) >> 8)
+#define putulong(p, v) (((u_int8_t *)(p))[0] = (v), \
+ ((u_int8_t *)(p))[1] = (v) >> 8, \
+ ((u_int8_t *)(p))[2] = (v) >> 16,\
+ ((u_int8_t *)(p))[3] = (v) >> 24)
+#endif
+
+/*
+ * BIOS Parameter Block (BPB) for DOS 3.3
+ */
+struct byte_bpb33 {
+ int8_t bpbBytesPerSec[2]; /* bytes per sector */
+ int8_t bpbSecPerClust; /* sectors per cluster */
+ int8_t bpbResSectors[2]; /* number of reserved sectors */
+ int8_t bpbFATs; /* number of FATs */
+ int8_t bpbRootDirEnts[2]; /* number of root directory entries */
+ int8_t bpbSectors[2]; /* total number of sectors */
+ int8_t bpbMedia; /* media descriptor */
+ int8_t bpbFATsecs[2]; /* number of sectors per FAT */
+ int8_t bpbSecPerTrack[2]; /* sectors per track */
+ int8_t bpbHeads[2]; /* number of heads */
+ int8_t bpbHiddenSecs[2]; /* number of hidden sectors */
+};
+
+/*
+ * BPB for DOS 5.0 The difference is bpbHiddenSecs is a short for DOS 3.3,
+ * and bpbHugeSectors is not in the 3.3 bpb.
+ */
+struct byte_bpb50 {
+ int8_t bpbBytesPerSec[2]; /* bytes per sector */
+ int8_t bpbSecPerClust; /* sectors per cluster */
+ int8_t bpbResSectors[2]; /* number of reserved sectors */
+ int8_t bpbFATs; /* number of FATs */
+ int8_t bpbRootDirEnts[2]; /* number of root directory entries */
+ int8_t bpbSectors[2]; /* total number of sectors */
+ int8_t bpbMedia; /* media descriptor */
+ int8_t bpbFATsecs[2]; /* number of sectors per FAT */
+ int8_t bpbSecPerTrack[2]; /* sectors per track */
+ int8_t bpbHeads[2]; /* number of heads */
+ int8_t bpbHiddenSecs[4]; /* number of hidden sectors */
+ int8_t bpbHugeSectors[4]; /* # of sectors if bpbSectors == 0 */
+};
+
+/*
+ * BPB for DOS 7.10 (FAT32). This one has a few extensions to bpb50.
+ */
+struct byte_bpb710 {
+ u_int8_t bpbBytesPerSec[2]; /* bytes per sector */
+ u_int8_t bpbSecPerClust; /* sectors per cluster */
+ u_int8_t bpbResSectors[2]; /* number of reserved sectors */
+ u_int8_t bpbFATs; /* number of FATs */
+ u_int8_t bpbRootDirEnts[2]; /* number of root directory entries */
+ u_int8_t bpbSectors[2]; /* total number of sectors */
+ u_int8_t bpbMedia; /* media descriptor */
+ u_int8_t bpbFATsecs[2]; /* number of sectors per FAT */
+ u_int8_t bpbSecPerTrack[2]; /* sectors per track */
+ u_int8_t bpbHeads[2]; /* number of heads */
+ u_int8_t bpbHiddenSecs[4]; /* # of hidden sectors */
+ u_int8_t bpbHugeSectors[4]; /* # of sectors if bpbSectors == 0 */
+ u_int8_t bpbBigFATsecs[4]; /* like bpbFATsecs for FAT32 */
+ u_int8_t bpbExtFlags[2]; /* extended flags: */
+ u_int8_t bpbFSVers[2]; /* filesystem version */
+ u_int8_t bpbRootClust[4]; /* start cluster for root directory */
+ u_int8_t bpbFSInfo[2]; /* filesystem info structure sector */
+ u_int8_t bpbBackup[2]; /* backup boot sector */
+ /* There is a 12 byte filler here, but we ignore it */
+};
+
+/*
+ * FAT32 FSInfo block.
+ */
+struct fsinfo {
+ u_int8_t fsisig1[4];
+ u_int8_t fsifill1[480];
+ u_int8_t fsisig2[4];
+ u_int8_t fsinfree[4];
+ u_int8_t fsinxtfree[4];
+ u_int8_t fsifill2[12];
+ u_int8_t fsisig3[4];
+ u_int8_t fsifill3[508];
+ u_int8_t fsisig4[4];
+};
diff --git a/sys/fs/msdosfs/denode.h b/sys/fs/msdosfs/denode.h
new file mode 100644
index 0000000..ba2ef8c
--- /dev/null
+++ b/sys/fs/msdosfs/denode.h
@@ -0,0 +1,286 @@
+/* $Id: denode.h,v 1.17 1998/11/21 00:20:24 dt Exp $ */
+/* $NetBSD: denode.h,v 1.25 1997/11/17 15:36:28 ws Exp $ */
+
+/*-
+ * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
+ * Copyright (C) 1994, 1995, 1997 TooLs GmbH.
+ * All rights reserved.
+ * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by TooLs GmbH.
+ * 4. The name of TooLs GmbH may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/*
+ * Written by Paul Popelka (paulp@uts.amdahl.com)
+ *
+ * You can do anything you want with this software, just don't say you wrote
+ * it, and don't remove this notice.
+ *
+ * This software is provided "as is".
+ *
+ * The author supplies this software to be publicly redistributed on the
+ * understanding that the author is not responsible for the correct
+ * functioning of this software in any circumstances and is not liable for
+ * any damages caused by this software.
+ *
+ * October 1992
+ */
+
+/*
+ * This is the pc filesystem specific portion of the vnode structure.
+ *
+ * To describe a file uniquely the de_dirclust, de_diroffset, and
+ * de_StartCluster fields are used.
+ *
+ * de_dirclust contains the cluster number of the directory cluster
+ * containing the entry for a file or directory.
+ * de_diroffset is the index into the cluster for the entry describing
+ * a file or directory.
+ * de_StartCluster is the number of the first cluster of the file or directory.
+ *
+ * Now to describe the quirks of the pc filesystem.
+ * - Clusters 0 and 1 are reserved.
+ * - The first allocatable cluster is 2.
+ * - The root directory is of fixed size and all blocks that make it up
+ * are contiguous.
+ * - Cluster 0 refers to the root directory when it is found in the
+ * startcluster field of a directory entry that points to another directory.
+ * - Cluster 0 implies a 0 length file when found in the start cluster field
+ * of a directory entry that points to a file.
+ * - You can't use the cluster number 0 to derive the address of the root
+ * directory.
+ * - Multiple directory entries can point to a directory. The entry in the
+ * parent directory points to a child directory. Any directories in the
+ * child directory contain a ".." entry that points back to the parent.
+ * The child directory itself contains a "." entry that points to itself.
+ * - The root directory does not contain a "." or ".." entry.
+ * - Directory entries for directories are never changed once they are created
+ * (except when removed). The size stays 0, and the last modification time
+ * is never changed. This is because so many directory entries can point to
+ * the physical clusters that make up a directory. It would lead to an
+ * update nightmare.
+ * - The length field in a directory entry pointing to a directory contains 0
+ * (always). The only way to find the end of a directory is to follow the
+ * cluster chain until the "last cluster" marker is found.
+ *
+ * My extensions to make this house of cards work. These apply only to the in
+ * memory copy of the directory entry.
+ * - A reference count for each denode will be kept since dos doesn't keep such
+ * things.
+ */
+
+/*
+ * Internal pseudo-offset for (nonexistent) directory entry for the root
+ * dir in the root dir
+ */
+#define MSDOSFSROOT_OFS 0x1fffffff
+
+/*
+ * The fat cache structure. fc_fsrcn is the filesystem relative cluster
+ * number that corresponds to the file relative cluster number in this
+ * structure (fc_frcn).
+ */
+struct fatcache {
+ u_long fc_frcn; /* file relative cluster number */
+ u_long fc_fsrcn; /* filesystem relative cluster number */
+};
+
+/*
+ * The fat entry cache as it stands helps make extending files a "quick"
+ * operation by avoiding having to scan the fat to discover the last
+ * cluster of the file. The cache also helps sequential reads by
+ * remembering the last cluster read from the file. This also prevents us
+ * from having to rescan the fat to find the next cluster to read. This
+ * cache is probably pretty worthless if a file is opened by multiple
+ * processes.
+ */
+#define FC_SIZE 2 /* number of entries in the cache */
+#define FC_LASTMAP 0 /* entry the last call to pcbmap() resolved
+ * to */
+#define FC_LASTFC 1 /* entry for the last cluster in the file */
+
+#define FCE_EMPTY 0xffffffff /* doesn't represent an actual cluster # */
+
+/*
+ * Set a slot in the fat cache.
+ */
+#define fc_setcache(dep, slot, frcn, fsrcn) \
+ (dep)->de_fc[slot].fc_frcn = frcn; \
+ (dep)->de_fc[slot].fc_fsrcn = fsrcn;
+
+/*
+ * This is the in memory variant of a dos directory entry. It is usually
+ * contained within a vnode.
+ */
+struct denode {
+ struct lock de_lock; /* denode lock >Keep this first< */
+ struct denode *de_next; /* Hash chain forward */
+ struct denode **de_prev; /* Hash chain back */
+ struct vnode *de_vnode; /* addr of vnode we are part of */
+ struct vnode *de_devvp; /* vnode of blk dev we live on */
+ u_long de_flag; /* flag bits */
+ dev_t de_dev; /* device where direntry lives */
+ u_long de_dirclust; /* cluster of the directory file containing this entry */
+ u_long de_diroffset; /* offset of this entry in the directory cluster */
+ u_long de_fndoffset; /* offset of found dir entry */
+ int de_fndcnt; /* number of slots before de_fndoffset */
+ long de_refcnt; /* reference count */
+ struct msdosfsmount *de_pmp; /* addr of our mount struct */
+ u_char de_Name[12]; /* name, from DOS directory entry */
+ u_char de_Attributes; /* attributes, from directory entry */
+ u_char de_LowerCase; /* NT VFAT lower case flags */
+ u_char de_CHun; /* Hundredth of second of CTime*/
+ u_short de_CTime; /* creation time */
+ u_short de_CDate; /* creation date */
+ u_short de_ADate; /* access date */
+ u_short de_MTime; /* modification time */
+ u_short de_MDate; /* modification date */
+ u_long de_StartCluster; /* starting cluster of file */
+ u_long de_FileSize; /* size of file in bytes */
+ struct fatcache de_fc[FC_SIZE]; /* fat cache */
+ u_quad_t de_modrev; /* Revision level for lease. */
+};
+
+/*
+ * Values for the de_flag field of the denode.
+ */
+#define DE_UPDATE 0x0004 /* Modification time update request */
+#define DE_CREATE 0x0008 /* Creation time update */
+#define DE_ACCESS 0x0010 /* Access time update */
+#define DE_MODIFIED 0x0020 /* Denode has been modified */
+#define DE_RENAME 0x0040 /* Denode is in the process of being renamed */
+
+
+/*
+ * Transfer directory entries between internal and external form.
+ * dep is a struct denode * (internal form),
+ * dp is a struct direntry * (external form).
+ */
+#define DE_INTERNALIZE32(dep, dp) \
+ ((dep)->de_StartCluster |= getushort((dp)->deHighClust) << 16)
+#define DE_INTERNALIZE(dep, dp) \
+ (bcopy((dp)->deName, (dep)->de_Name, 11), \
+ (dep)->de_Attributes = (dp)->deAttributes, \
+ (dep)->de_LowerCase = (dp)->deLowerCase, \
+ (dep)->de_CHun = (dp)->deCHundredth, \
+ (dep)->de_CTime = getushort((dp)->deCTime), \
+ (dep)->de_CDate = getushort((dp)->deCDate), \
+ (dep)->de_ADate = getushort((dp)->deADate), \
+ (dep)->de_MTime = getushort((dp)->deMTime), \
+ (dep)->de_MDate = getushort((dp)->deMDate), \
+ (dep)->de_StartCluster = getushort((dp)->deStartCluster), \
+ (dep)->de_FileSize = getulong((dp)->deFileSize), \
+ (FAT32((dep)->de_pmp) ? DE_INTERNALIZE32((dep), (dp)) : 0))
+
+#define DE_EXTERNALIZE(dp, dep) \
+ (bcopy((dep)->de_Name, (dp)->deName, 11), \
+ (dp)->deAttributes = (dep)->de_Attributes, \
+ (dp)->deLowerCase = (dep)->de_LowerCase, \
+ (dp)->deCHundredth = (dep)->de_CHun, \
+ putushort((dp)->deCTime, (dep)->de_CTime), \
+ putushort((dp)->deCDate, (dep)->de_CDate), \
+ putushort((dp)->deADate, (dep)->de_ADate), \
+ putushort((dp)->deMTime, (dep)->de_MTime), \
+ putushort((dp)->deMDate, (dep)->de_MDate), \
+ putushort((dp)->deStartCluster, (dep)->de_StartCluster), \
+ putulong((dp)->deFileSize, \
+ ((dep)->de_Attributes & ATTR_DIRECTORY) ? 0 : (dep)->de_FileSize), \
+ putushort((dp)->deHighClust, (dep)->de_StartCluster >> 16))
+
+#define de_forw de_chain[0]
+#define de_back de_chain[1]
+
+#ifdef KERNEL
+
+#define VTODE(vp) ((struct denode *)(vp)->v_data)
+#define DETOV(de) ((de)->de_vnode)
+
+#define DETIMES(dep, acc, mod, cre) do { \
+ if ((dep)->de_flag & DE_UPDATE) { \
+ (dep)->de_flag |= DE_MODIFIED; \
+ unix2dostime((mod), &(dep)->de_MDate, &(dep)->de_MTime, \
+ NULL); \
+ (dep)->de_Attributes |= ATTR_ARCHIVE; \
+ } \
+ if ((dep)->de_pmp->pm_flags & MSDOSFSMNT_NOWIN95) { \
+ (dep)->de_flag &= ~(DE_UPDATE | DE_CREATE | DE_ACCESS); \
+ break; \
+ } \
+ if ((dep)->de_flag & DE_ACCESS) { \
+ u_int16_t adate; \
+ \
+ unix2dostime((acc), &adate, NULL, NULL); \
+ if (adate != (dep)->de_ADate) { \
+ (dep)->de_flag |= DE_MODIFIED; \
+ (dep)->de_ADate = adate; \
+ } \
+ } \
+ if ((dep)->de_flag & DE_CREATE) { \
+ unix2dostime((cre), &(dep)->de_CDate, &(dep)->de_CTime, \
+ &(dep)->de_CHun); \
+ (dep)->de_flag |= DE_MODIFIED; \
+ } \
+ (dep)->de_flag &= ~(DE_UPDATE | DE_CREATE | DE_ACCESS); \
+} while (0);
+
+/*
+ * This overlays the fid structure (see mount.h)
+ */
+struct defid {
+ u_short defid_len; /* length of structure */
+ u_short defid_pad; /* force long alignment */
+
+ u_long defid_dirclust; /* cluster this dir entry came from */
+ u_long defid_dirofs; /* offset of entry within the cluster */
+#if 0
+ u_long defid_gen; /* generation number */
+#endif
+};
+
+extern vop_t **msdosfs_vnodeop_p;
+
+int msdosfs_lookup __P((struct vop_cachedlookup_args *));
+int msdosfs_inactive __P((struct vop_inactive_args *));
+int msdosfs_reclaim __P((struct vop_reclaim_args *));
+
+/*
+ * Internal service routine prototypes.
+ */
+int deget __P((struct msdosfsmount *, u_long, u_long, struct denode **));
+int uniqdosname __P((struct denode *, struct componentname *, u_char *));
+int findwin95 __P((struct denode *));
+
+int readep __P((struct msdosfsmount *pmp, u_long dirclu, u_long dirofs, struct buf **bpp, struct direntry **epp));
+int readde __P((struct denode *dep, struct buf **bpp, struct direntry **epp));
+int deextend __P((struct denode *dep, u_long length, struct ucred *cred));
+int fillinusemap __P((struct msdosfsmount *pmp));
+void reinsert __P((struct denode *dep));
+int dosdirempty __P((struct denode *dep));
+int createde __P((struct denode *dep, struct denode *ddep, struct denode **depp, struct componentname *cnp));
+int deupdat __P((struct denode *dep, int waitfor));
+int removede __P((struct denode *pdep, struct denode *dep));
+int detrunc __P((struct denode *dep, u_long length, int flags, struct ucred *cred, struct proc *p));
+int doscheckpath __P(( struct denode *source, struct denode *target));
+#endif /* KERNEL */
diff --git a/sys/fs/msdosfs/direntry.h b/sys/fs/msdosfs/direntry.h
new file mode 100644
index 0000000..796fe78
--- /dev/null
+++ b/sys/fs/msdosfs/direntry.h
@@ -0,0 +1,143 @@
+/* $Id: direntry.h,v 1.12 1998/02/26 06:45:42 msmith Exp $ */
+/* $NetBSD: direntry.h,v 1.14 1997/11/17 15:36:32 ws Exp $ */
+
+/*-
+ * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
+ * Copyright (C) 1994, 1995, 1997 TooLs GmbH.
+ * All rights reserved.
+ * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by TooLs GmbH.
+ * 4. The name of TooLs GmbH may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/*
+ * Written by Paul Popelka (paulp@uts.amdahl.com)
+ *
+ * You can do anything you want with this software, just don't say you wrote
+ * it, and don't remove this notice.
+ *
+ * This software is provided "as is".
+ *
+ * The author supplies this software to be publicly redistributed on the
+ * understanding that the author is not responsible for the correct
+ * functioning of this software in any circumstances and is not liable for
+ * any damages caused by this software.
+ *
+ * October 1992
+ */
+
+/*
+ * Structure of a dos directory entry.
+ */
+struct direntry {
+ u_int8_t deName[8]; /* filename, blank filled */
+#define SLOT_EMPTY 0x00 /* slot has never been used */
+#define SLOT_E5 0x05 /* the real value is 0xe5 */
+#define SLOT_DELETED 0xe5 /* file in this slot deleted */
+ u_int8_t deExtension[3]; /* extension, blank filled */
+ u_int8_t deAttributes; /* file attributes */
+#define ATTR_NORMAL 0x00 /* normal file */
+#define ATTR_READONLY 0x01 /* file is readonly */
+#define ATTR_HIDDEN 0x02 /* file is hidden */
+#define ATTR_SYSTEM 0x04 /* file is a system file */
+#define ATTR_VOLUME 0x08 /* entry is a volume label */
+#define ATTR_DIRECTORY 0x10 /* entry is a directory name */
+#define ATTR_ARCHIVE 0x20 /* file is new or modified */
+ u_int8_t deLowerCase; /* NT VFAT lower case flags */
+#define LCASE_BASE 0x08 /* filename base in lower case */
+#define LCASE_EXT 0x10 /* filename extension in lower case */
+ u_int8_t deCHundredth; /* hundredth of seconds in CTime */
+ u_int8_t deCTime[2]; /* create time */
+ u_int8_t deCDate[2]; /* create date */
+ u_int8_t deADate[2]; /* access date */
+ u_int8_t deHighClust[2]; /* high bytes of cluster number */
+ u_int8_t deMTime[2]; /* last update time */
+ u_int8_t deMDate[2]; /* last update date */
+ u_int8_t deStartCluster[2]; /* starting cluster of file */
+ u_int8_t deFileSize[4]; /* size of file in bytes */
+};
+
+/*
+ * Structure of a Win95 long name directory entry
+ */
+struct winentry {
+ u_int8_t weCnt;
+#define WIN_LAST 0x40
+#define WIN_CNT 0x3f
+ u_int8_t wePart1[10];
+ u_int8_t weAttributes;
+#define ATTR_WIN95 0x0f
+ u_int8_t weReserved1;
+ u_int8_t weChksum;
+ u_int8_t wePart2[12];
+ u_int16_t weReserved2;
+ u_int8_t wePart3[4];
+};
+#define WIN_CHARS 13 /* Number of chars per winentry */
+
+/*
+ * Maximum filename length in Win95
+ * Note: Must be < sizeof(dirent.d_name)
+ */
+#define WIN_MAXLEN 255
+
+/*
+ * This is the format of the contents of the deTime field in the direntry
+ * structure.
+ * We don't use bitfields because we don't know how compilers for
+ * arbitrary machines will lay them out.
+ */
+#define DT_2SECONDS_MASK 0x1F /* seconds divided by 2 */
+#define DT_2SECONDS_SHIFT 0
+#define DT_MINUTES_MASK 0x7E0 /* minutes */
+#define DT_MINUTES_SHIFT 5
+#define DT_HOURS_MASK 0xF800 /* hours */
+#define DT_HOURS_SHIFT 11
+
+/*
+ * This is the format of the contents of the deDate field in the direntry
+ * structure.
+ */
+#define DD_DAY_MASK 0x1F /* day of month */
+#define DD_DAY_SHIFT 0
+#define DD_MONTH_MASK 0x1E0 /* month */
+#define DD_MONTH_SHIFT 5
+#define DD_YEAR_MASK 0xFE00 /* year - 1980 */
+#define DD_YEAR_SHIFT 9
+
+#ifdef KERNEL
+struct dirent;
+void unix2dostime __P((struct timespec *tsp, u_int16_t *ddp,
+ u_int16_t *dtp, u_int8_t *dhp));
+void dos2unixtime __P((u_int dd, u_int dt, u_int dh, struct timespec *tsp));
+int dos2unixfn __P((u_char dn[11], u_char *un, int lower, int d2u_loaded, u_int8_t *d2u, int ul_loaded, u_int8_t *ul));
+int unix2dosfn __P((const u_char *un, u_char dn[12], int unlen, u_int gen, int u2d_loaded, u_int8_t *u2d, int lu_loaded, u_int8_t *lu));
+int unix2winfn __P((const u_char *un, int unlen, struct winentry *wep, int cnt, int chksum, int table_loaded, u_int16_t *u2w));
+int winChkName __P((const u_char *un, int unlen, struct winentry *wep, int chksum, int u2w_loaded, u_int16_t *u2w, int ul_loaded, u_int8_t *ul));
+int win2unixfn __P((struct winentry *wep, struct dirent *dp, int chksum, int table_loaded, u_int16_t *u2w));
+u_int8_t winChksum __P((u_int8_t *name));
+int winSlotCnt __P((const u_char *un, int unlen));
+int winLenFixup __P((const u_char *un, int unlen));
+#endif /* KERNEL */
diff --git a/sys/fs/msdosfs/fat.h b/sys/fs/msdosfs/fat.h
new file mode 100644
index 0000000..74b05e2
--- /dev/null
+++ b/sys/fs/msdosfs/fat.h
@@ -0,0 +1,108 @@
+/* $Id: fat.h,v 1.6 1997/02/22 09:40:45 peter Exp $ */
+/* $NetBSD: fat.h,v 1.12 1997/11/17 15:36:36 ws Exp $ */
+
+/*-
+ * Copyright (C) 1994, 1997 Wolfgang Solfrank.
+ * Copyright (C) 1994, 1997 TooLs GmbH.
+ * All rights reserved.
+ * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by TooLs GmbH.
+ * 4. The name of TooLs GmbH may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/*
+ * Written by Paul Popelka (paulp@uts.amdahl.com)
+ *
+ * You can do anything you want with this software, just don't say you wrote
+ * it, and don't remove this notice.
+ *
+ * This software is provided "as is".
+ *
+ * The author supplies this software to be publicly redistributed on the
+ * understanding that the author is not responsible for the correct
+ * functioning of this software in any circumstances and is not liable for
+ * any damages caused by this software.
+ *
+ * October 1992
+ */
+
+/*
+ * Some useful cluster numbers.
+ */
+#define MSDOSFSROOT 0 /* cluster 0 means the root dir */
+#define CLUST_FREE 0 /* cluster 0 also means a free cluster */
+#define MSDOSFSFREE CLUST_FREE
+#define CLUST_FIRST 2 /* first legal cluster number */
+#define CLUST_RSRVD 0xfffffff6 /* reserved cluster range */
+#define CLUST_BAD 0xfffffff7 /* a cluster with a defect */
+#define CLUST_EOFS 0xfffffff8 /* start of eof cluster range */
+#define CLUST_EOFE 0xffffffff /* end of eof cluster range */
+
+#define FAT12_MASK 0x00000fff /* mask for 12 bit cluster numbers */
+#define FAT16_MASK 0x0000ffff /* mask for 16 bit cluster numbers */
+#define FAT32_MASK 0x0fffffff /* mask for FAT32 cluster numbers */
+
+/*
+ * MSDOSFS:
+ * Return true if filesystem uses 12 bit fats. Microsoft Programmer's
+ * Reference says if the maximum cluster number in a filesystem is greater
+ * than 4078 ((CLUST_RSRVS - CLUST_FIRST) & FAT12_MASK) then we've got a
+ * 16 bit fat filesystem. While mounting, the result of this test is stored
+ * in pm_fatentrysize.
+ * GEMDOS-flavour (atari):
+ * If the filesystem is on floppy we've got a 12 bit fat filesystem, otherwise
+ * 16 bit. We check the d_type field in the disklabel struct while mounting
+ * and store the result in the pm_fatentrysize. Note that this kind of
+ * detection gets flakey when mounting a vnd-device.
+ */
+#define FAT12(pmp) (pmp->pm_fatmask == FAT12_MASK)
+#define FAT16(pmp) (pmp->pm_fatmask == FAT16_MASK)
+#define FAT32(pmp) (pmp->pm_fatmask == FAT32_MASK)
+
+#define MSDOSFSEOF(pmp, cn) ((((cn) | ~(pmp)->pm_fatmask) & CLUST_EOFS) == CLUST_EOFS)
+
+#ifdef KERNEL
+/*
+ * These are the values for the function argument to the function
+ * fatentry().
+ */
+#define FAT_GET 0x0001 /* get a fat entry */
+#define FAT_SET 0x0002 /* set a fat entry */
+#define FAT_GET_AND_SET (FAT_GET | FAT_SET)
+
+/*
+ * Flags to extendfile:
+ */
+#define DE_CLEAR 1 /* Zero out the blocks allocated */
+
+int pcbmap __P((struct denode *dep, u_long findcn, daddr_t *bnp, u_long *cnp, int* sp));
+int clusterfree __P((struct msdosfsmount *pmp, u_long cn, u_long *oldcnp));
+int clusteralloc __P((struct msdosfsmount *pmp, u_long start, u_long count, u_long fillwith, u_long *retcluster, u_long *got));
+int fatentry __P((int function, struct msdosfsmount *pmp, u_long cluster, u_long *oldcontents, u_long newcontents));
+int freeclusterchain __P((struct msdosfsmount *pmp, u_long startchain));
+int extendfile __P((struct denode *dep, u_long count, struct buf **bpp, u_long *ncp, int flags));
+void fc_purge __P((struct denode *dep, u_int frcn));
+
+#endif /* KERNEL */
diff --git a/sys/fs/msdosfs/msdosfs_conv.c b/sys/fs/msdosfs/msdosfs_conv.c
new file mode 100644
index 0000000..2c792eb
--- /dev/null
+++ b/sys/fs/msdosfs/msdosfs_conv.c
@@ -0,0 +1,1041 @@
+/* $Id: msdosfs_conv.c,v 1.27 1998/05/17 21:18:08 dt Exp $ */
+/* $NetBSD: msdosfs_conv.c,v 1.25 1997/11/17 15:36:40 ws Exp $ */
+
+/*-
+ * Copyright (C) 1995, 1997 Wolfgang Solfrank.
+ * Copyright (C) 1995, 1997 TooLs GmbH.
+ * All rights reserved.
+ * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by TooLs GmbH.
+ * 4. The name of TooLs GmbH may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/*
+ * Written by Paul Popelka (paulp@uts.amdahl.com)
+ *
+ * You can do anything you want with this software, just don't say you wrote
+ * it, and don't remove this notice.
+ *
+ * This software is provided "as is".
+ *
+ * The author supplies this software to be publicly redistributed on the
+ * understanding that the author is not responsible for the correct
+ * functioning of this software in any circumstances and is not liable for
+ * any damages caused by this software.
+ *
+ * October 1992
+ */
+
+/*
+ * System include files.
+ */
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/kernel.h> /* defines tz */
+#include <sys/systm.h>
+#include <machine/clock.h>
+#include <sys/dirent.h>
+
+/*
+ * MSDOSFS include files.
+ */
+#include <msdosfs/direntry.h>
+
+/*
+ * Total number of days that have passed for each month in a regular year.
+ */
+static u_short regyear[] = {
+ 31, 59, 90, 120, 151, 181,
+ 212, 243, 273, 304, 334, 365
+};
+
+/*
+ * Total number of days that have passed for each month in a leap year.
+ */
+static u_short leapyear[] = {
+ 31, 60, 91, 121, 152, 182,
+ 213, 244, 274, 305, 335, 366
+};
+
+/*
+ * Variables used to remember parts of the last time conversion. Maybe we
+ * can avoid a full conversion.
+ */
+static u_long lasttime;
+static u_long lastday;
+static u_short lastddate;
+static u_short lastdtime;
+
+static __inline u_int8_t find_lcode __P((u_int16_t code, u_int16_t *u2w));
+
+/*
+ * Convert the unix version of time to dos's idea of time to be used in
+ * file timestamps. The passed in unix time is assumed to be in GMT.
+ */
+void
+unix2dostime(tsp, ddp, dtp, dhp)
+ struct timespec *tsp;
+ u_int16_t *ddp;
+ u_int16_t *dtp;
+ u_int8_t *dhp;
+{
+ u_long t;
+ u_long days;
+ u_long inc;
+ u_long year;
+ u_long month;
+ u_short *months;
+
+ /*
+ * If the time from the last conversion is the same as now, then
+ * skip the computations and use the saved result.
+ */
+ t = tsp->tv_sec - (tz.tz_minuteswest * 60)
+ - (wall_cmos_clock ? adjkerntz : 0);
+ /* - daylight savings time correction */
+ t &= ~1;
+ if (lasttime != t) {
+ lasttime = t;
+ lastdtime = (((t / 2) % 30) << DT_2SECONDS_SHIFT)
+ + (((t / 60) % 60) << DT_MINUTES_SHIFT)
+ + (((t / 3600) % 24) << DT_HOURS_SHIFT);
+
+ /*
+ * If the number of days since 1970 is the same as the last
+ * time we did the computation then skip all this leap year
+ * and month stuff.
+ */
+ days = t / (24 * 60 * 60);
+ if (days != lastday) {
+ lastday = days;
+ for (year = 1970;; year++) {
+ inc = year & 0x03 ? 365 : 366;
+ if (days < inc)
+ break;
+ days -= inc;
+ }
+ months = year & 0x03 ? regyear : leapyear;
+ for (month = 0; days >= months[month]; month++)
+ ;
+ if (month > 0)
+ days -= months[month - 1];
+ lastddate = ((days + 1) << DD_DAY_SHIFT)
+ + ((month + 1) << DD_MONTH_SHIFT);
+ /*
+ * Remember dos's idea of time is relative to 1980.
+ * unix's is relative to 1970. If somehow we get a
+ * time before 1980 then don't give totally crazy
+ * results.
+ */
+ if (year > 1980)
+ lastddate += (year - 1980) << DD_YEAR_SHIFT;
+ }
+ }
+ if (dtp)
+ *dtp = lastdtime;
+ if (dhp)
+ *dhp = (tsp->tv_sec & 1) * 100 + tsp->tv_nsec / 10000000;
+
+ *ddp = lastddate;
+}
+
+/*
+ * The number of seconds between Jan 1, 1970 and Jan 1, 1980. In that
+ * interval there were 8 regular years and 2 leap years.
+ */
+#define SECONDSTO1980 (((8 * 365) + (2 * 366)) * (24 * 60 * 60))
+
+static u_short lastdosdate;
+static u_long lastseconds;
+
+/*
+ * Convert from dos' idea of time to unix'. This will probably only be
+ * called from the stat(), and fstat() system calls and so probably need
+ * not be too efficient.
+ */
+void
+dos2unixtime(dd, dt, dh, tsp)
+ u_int dd;
+ u_int dt;
+ u_int dh;
+ struct timespec *tsp;
+{
+ u_long seconds;
+ u_long month;
+ u_long year;
+ u_long days;
+ u_short *months;
+
+ if (dd == 0) {
+ /*
+ * Uninitialized field, return the epoch.
+ */
+ tsp->tv_sec = 0;
+ tsp->tv_nsec = 0;
+ return;
+ }
+ seconds = (((dt & DT_2SECONDS_MASK) >> DT_2SECONDS_SHIFT) << 1)
+ + ((dt & DT_MINUTES_MASK) >> DT_MINUTES_SHIFT) * 60
+ + ((dt & DT_HOURS_MASK) >> DT_HOURS_SHIFT) * 3600
+ + dh / 100;
+ /*
+ * If the year, month, and day from the last conversion are the
+ * same then use the saved value.
+ */
+ if (lastdosdate != dd) {
+ lastdosdate = dd;
+ days = 0;
+ year = (dd & DD_YEAR_MASK) >> DD_YEAR_SHIFT;
+ days = year * 365;
+ days += year / 4 + 1; /* add in leap days */
+ if ((year & 0x03) == 0)
+ days--; /* if year is a leap year */
+ months = year & 0x03 ? regyear : leapyear;
+ month = (dd & DD_MONTH_MASK) >> DD_MONTH_SHIFT;
+ if (month < 1 || month > 12) {
+ printf("dos2unixtime(): month value out of range (%ld)\n",
+ month);
+ month = 1;
+ }
+ if (month > 1)
+ days += months[month - 2];
+ days += ((dd & DD_DAY_MASK) >> DD_DAY_SHIFT) - 1;
+ lastseconds = (days * 24 * 60 * 60) + SECONDSTO1980;
+ }
+ tsp->tv_sec = seconds + lastseconds + (tz.tz_minuteswest * 60)
+ + adjkerntz;
+ /* + daylight savings time correction */
+ tsp->tv_nsec = (dh % 100) * 10000000;
+}
+
+/*
+ * 0 - character disallowed in long file name.
+ * 1 - character should be replaced by '_' in DOS file name,
+ * and generation number inserted.
+ * 2 - character ('.' and ' ') should be skipped in DOS file name,
+ * and generation number inserted.
+ */
+static u_char
+unix2dos[256] = {
+ 0, 0, 0, 0, 0, 0, 0, 0, /* 00-07 */
+ 0, 0, 0, 0, 0, 0, 0, 0, /* 08-0f */
+ 0, 0, 0, 0, 0, 0, 0, 0, /* 10-17 */
+ 0, 0, 0, 0, 0, 0, 0, 0, /* 18-1f */
+ 2, 0x21, 0, 0x23, 0x24, 0x25, 0x26, 0x27, /* 20-27 */
+ 0x28, 0x29, 0, 1, 1, 0x2d, 2, 0, /* 28-2f */
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 30-37 */
+ 0x38, 0x39, 0, 1, 0, 1, 0, 0, /* 38-3f */
+ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 40-47 */
+ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 48-4f */
+ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 50-57 */
+ 0x58, 0x59, 0x5a, 1, 0, 1, 0x5e, 0x5f, /* 58-5f */
+ 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 60-67 */
+ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 68-6f */
+ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 70-77 */
+ 0x58, 0x59, 0x5a, 0x7b, 0, 0x7d, 0x7e, 0, /* 78-7f */
+ 0, 0, 0, 0, 0, 0, 0, 0, /* 80-87 */
+ 0, 0, 0, 0, 0, 0, 0, 0, /* 88-8f */
+ 0, 0, 0, 0, 0, 0, 0, 0, /* 90-97 */
+ 0, 0, 0, 0, 0, 0, 0, 0, /* 98-9f */
+ 0, 0xad, 0xbd, 0x9c, 0xcf, 0xbe, 0xdd, 0xf5, /* a0-a7 */
+ 0xf9, 0xb8, 0xa6, 0xae, 0xaa, 0xf0, 0xa9, 0xee, /* a8-af */
+ 0xf8, 0xf1, 0xfd, 0xfc, 0xef, 0xe6, 0xf4, 0xfa, /* b0-b7 */
+ 0xf7, 0xfb, 0xa7, 0xaf, 0xac, 0xab, 0xf3, 0xa8, /* b8-bf */
+ 0xb7, 0xb5, 0xb6, 0xc7, 0x8e, 0x8f, 0x92, 0x80, /* c0-c7 */
+ 0xd4, 0x90, 0xd2, 0xd3, 0xde, 0xd6, 0xd7, 0xd8, /* c8-cf */
+ 0xd1, 0xa5, 0xe3, 0xe0, 0xe2, 0xe5, 0x99, 0x9e, /* d0-d7 */
+ 0x9d, 0xeb, 0xe9, 0xea, 0x9a, 0xed, 0xe8, 0xe1, /* d8-df */
+ 0xb7, 0xb5, 0xb6, 0xc7, 0x8e, 0x8f, 0x92, 0x80, /* e0-e7 */
+ 0xd4, 0x90, 0xd2, 0xd3, 0xde, 0xd6, 0xd7, 0xd8, /* e8-ef */
+ 0xd1, 0xa5, 0xe3, 0xe0, 0xe2, 0xe5, 0x99, 0xf6, /* f0-f7 */
+ 0x9d, 0xeb, 0xe9, 0xea, 0x9a, 0xed, 0xe8, 0x98, /* f8-ff */
+};
+
+static u_char
+dos2unix[256] = {
+ 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, /* 00-07 */
+ 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, /* 08-0f */
+ 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, /* 10-17 */
+ 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, /* 18-1f */
+ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 20-27 */
+ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 28-2f */
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 30-37 */
+ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 38-3f */
+ 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 40-47 */
+ 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 48-4f */
+ 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 50-57 */
+ 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 58-5f */
+ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 60-67 */
+ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 68-6f */
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 70-77 */
+ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 78-7f */
+ 0xc7, 0xfc, 0xe9, 0xe2, 0xe4, 0xe0, 0xe5, 0xe7, /* 80-87 */
+ 0xea, 0xeb, 0xe8, 0xef, 0xee, 0xec, 0xc4, 0xc5, /* 88-8f */
+ 0xc9, 0xe6, 0xc6, 0xf4, 0xf6, 0xf2, 0xfb, 0xf9, /* 90-97 */
+ 0xff, 0xd6, 0xdc, 0xf8, 0xa3, 0xd8, 0xd7, 0x3f, /* 98-9f */
+ 0xe1, 0xed, 0xf3, 0xfa, 0xf1, 0xd1, 0xaa, 0xba, /* a0-a7 */
+ 0xbf, 0xae, 0xac, 0xbd, 0xbc, 0xa1, 0xab, 0xbb, /* a8-af */
+ 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0xc1, 0xc2, 0xc0, /* b0-b7 */
+ 0xa9, 0x3f, 0x3f, 0x3f, 0x3f, 0xa2, 0xa5, 0x3f, /* b8-bf */
+ 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0xe3, 0xc3, /* c0-c7 */
+ 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0xa4, /* c8-cf */
+ 0xf0, 0xd0, 0xca, 0xcb, 0xc8, 0x3f, 0xcd, 0xce, /* d0-d7 */
+ 0xcf, 0x3f, 0x3f, 0x3f, 0x3f, 0xa6, 0xcc, 0x3f, /* d8-df */
+ 0xd3, 0xdf, 0xd4, 0xd2, 0xf5, 0xd5, 0xb5, 0xfe, /* e0-e7 */
+ 0xde, 0xda, 0xdb, 0xd9, 0xfd, 0xdd, 0xaf, 0x3f, /* e8-ef */
+ 0xad, 0xb1, 0x3f, 0xbe, 0xb6, 0xa7, 0xf7, 0xb8, /* f0-f7 */
+ 0xb0, 0xa8, 0xb7, 0xb9, 0xb3, 0xb2, 0x3f, 0x3f, /* f8-ff */
+};
+
+static u_char
+u2l[256] = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 00-07 */
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 08-0f */
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 10-17 */
+ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 18-1f */
+ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 20-27 */
+ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 28-2f */
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 30-37 */
+ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 38-3f */
+ 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 40-47 */
+ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 48-4f */
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 50-57 */
+ 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 58-5f */
+ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 60-67 */
+ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 68-6f */
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 70-77 */
+ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 78-7f */
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 80-87 */
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 88-8f */
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 90-97 */
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 98-9f */
+ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* a0-a7 */
+ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* a8-af */
+ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* b0-b7 */
+ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* b8-bf */
+ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* c0-c7 */
+ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* c8-cf */
+ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xd7, /* d0-d7 */
+ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xdf, /* d8-df */
+ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* e0-e7 */
+ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* e8-ef */
+ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* f0-f7 */
+ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* f8-ff */
+};
+
+static u_char
+l2u[256] = {
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 00-07 */
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 08-0f */
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 10-17 */
+ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 18-1f */
+ 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 20-27 */
+ 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 28-2f */
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 30-37 */
+ 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 38-3f */
+ 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 40-47 */
+ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 48-4f */
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 50-57 */
+ 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 58-5f */
+ 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 60-67 */
+ 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 68-6f */
+ 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 70-77 */
+ 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 78-7f */
+ 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 80-87 */
+ 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 88-8f */
+ 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 90-97 */
+ 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 98-9f */
+ 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* a0-a7 */
+ 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* a8-af */
+ 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* b0-b7 */
+ 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* b8-bf */
+ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* c0-c7 */
+ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* c8-cf */
+ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xd7, /* d0-d7 */
+ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xdf, /* d8-df */
+ 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* e0-e7 */
+ 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* e8-ef */
+ 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* f0-f7 */
+ 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* f8-ff */
+};
+
+/*
+ * DOS filenames are made of 2 parts, the name part and the extension part.
+ * The name part is 8 characters long and the extension part is 3
+ * characters long. They may contain trailing blanks if the name or
+ * extension are not long enough to fill their respective fields.
+ */
+
+/*
+ * Convert a DOS filename to a unix filename. And, return the number of
+ * characters in the resulting unix filename excluding the terminating
+ * null.
+ */
+int
+dos2unixfn(dn, un, lower, d2u_loaded, d2u, ul_loaded, ul)
+ u_char dn[11];
+ u_char *un;
+ int lower;
+ int d2u_loaded;
+ u_int8_t *d2u;
+ int ul_loaded;
+ u_int8_t *ul;
+{
+ int i;
+ int thislong = 1;
+ u_char c;
+
+ /*
+ * If first char of the filename is SLOT_E5 (0x05), then the real
+ * first char of the filename should be 0xe5. But, they couldn't
+ * just have a 0xe5 mean 0xe5 because that is used to mean a freed
+ * directory slot. Another dos quirk.
+ */
+ if (*dn == SLOT_E5)
+ c = d2u_loaded ? d2u[0xe5 & 0x7f] : dos2unix[0xe5];
+ else
+ c = d2u_loaded && (*dn & 0x80) ? d2u[*dn & 0x7f] :
+ dos2unix[*dn];
+ *un++ = (lower & LCASE_BASE) ? (ul_loaded && (c & 0x80) ?
+ ul[c & 0x7f] : u2l[c]) : c;
+ dn++;
+
+ /*
+ * Copy the name portion into the unix filename string.
+ */
+ for (i = 1; i < 8 && *dn != ' '; i++) {
+ c = d2u_loaded && (*dn & 0x80) ? d2u[*dn & 0x7f] :
+ dos2unix[*dn];
+ dn++;
+ *un++ = (lower & LCASE_BASE) ? (ul_loaded && (c & 0x80) ?
+ ul[c & 0x7f] : u2l[c]) : c;
+ thislong++;
+ }
+ dn += 8 - i;
+
+ /*
+ * Now, if there is an extension then put in a period and copy in
+ * the extension.
+ */
+ if (*dn != ' ') {
+ *un++ = '.';
+ thislong++;
+ for (i = 0; i < 3 && *dn != ' '; i++) {
+ c = d2u_loaded && (*dn & 0x80) ? d2u[*dn & 0x7f] :
+ dos2unix[*dn];
+ dn++;
+ *un++ = (lower & LCASE_EXT) ? (ul_loaded && (c & 0x80) ?
+ ul[c & 0x7f] : u2l[c]) : c;
+ thislong++;
+ }
+ }
+ *un++ = 0;
+
+ return (thislong);
+}
+
+/*
+ * Convert a unix filename to a DOS filename according to Win95 rules.
+ * If applicable and gen is not 0, it is inserted into the converted
+ * filename as a generation number.
+ * Returns
+ * 0 if name couldn't be converted
+ * 1 if the converted name is the same as the original
+ * (no long filename entry necessary for Win95)
+ * 2 if conversion was successful
+ * 3 if conversion was successful and generation number was inserted
+ */
+int
+unix2dosfn(un, dn, unlen, gen, u2d_loaded, u2d, lu_loaded, lu)
+ const u_char *un;
+ u_char dn[12];
+ int unlen;
+ u_int gen;
+ int u2d_loaded;
+ u_int8_t *u2d;
+ int lu_loaded;
+ u_int8_t *lu;
+{
+ int i, j, l;
+ int conv = 1;
+ const u_char *cp, *dp, *dp1;
+ u_char gentext[6], *wcp;
+ u_int8_t c;
+#define U2D(c) (u2d_loaded && ((c) & 0x80) ? u2d[(c) & 0x7f] : unix2dos[c])
+
+ /*
+ * Fill the dos filename string with blanks. These are DOS's pad
+ * characters.
+ */
+ for (i = 0; i < 11; i++)
+ dn[i] = ' ';
+ dn[11] = 0;
+
+ /*
+ * The filenames "." and ".." are handled specially, since they
+ * don't follow dos filename rules.
+ */
+ if (un[0] == '.' && unlen == 1) {
+ dn[0] = '.';
+ return gen <= 1;
+ }
+ if (un[0] == '.' && un[1] == '.' && unlen == 2) {
+ dn[0] = '.';
+ dn[1] = '.';
+ return gen <= 1;
+ }
+
+ /*
+ * Filenames with only blanks and dots are not allowed!
+ */
+ for (cp = un, i = unlen; --i >= 0; cp++)
+ if (*cp != ' ' && *cp != '.')
+ break;
+ if (i < 0)
+ return 0;
+
+
+ /*
+ * Filenames with some characters are not allowed!
+ */
+ for (cp = un, i = unlen; --i >= 0; cp++)
+ if (U2D(*cp) == 0)
+ return 0;
+
+ /*
+ * Now find the extension
+ * Note: dot as first char doesn't start extension
+ * and trailing dots and blanks are ignored
+ */
+ dp = dp1 = 0;
+ for (cp = un + 1, i = unlen - 1; --i >= 0;) {
+ switch (*cp++) {
+ case '.':
+ if (!dp1)
+ dp1 = cp;
+ break;
+ case ' ':
+ break;
+ default:
+ if (dp1)
+ dp = dp1;
+ dp1 = 0;
+ break;
+ }
+ }
+
+ /*
+ * Now convert it
+ */
+ if (dp) {
+ if (dp1)
+ l = dp1 - dp;
+ else
+ l = unlen - (dp - un);
+ for (i = 0, j = 8; i < l && j < 11; i++, j++) {
+ c = dp[i];
+ c = lu_loaded && (c & 0x80) ?
+ lu[c & 0x7f] : l2u[c];
+ c = U2D(c);
+ if (dp[i] != (dn[j] = c)
+ && conv != 3)
+ conv = 2;
+ if (dn[j] == 1) {
+ conv = 3;
+ dn[j] = '_';
+ }
+ if (dn[j] == 2) {
+ conv = 3;
+ dn[j--] = ' ';
+ }
+ }
+ if (i < l)
+ conv = 3;
+ dp--;
+ } else {
+ for (dp = cp; *--dp == ' ' || *dp == '.';);
+ dp++;
+ }
+
+ /*
+ * Now convert the rest of the name
+ */
+ for (i = j = 0; un < dp && j < 8; i++, j++, un++) {
+ c = lu_loaded && (*un & 0x80) ?
+ lu[*un & 0x7f] : l2u[*un];
+ c = U2D(c);
+ if (*un != (dn[j] = c)
+ && conv != 3)
+ conv = 2;
+ if (dn[j] == 1) {
+ conv = 3;
+ dn[j] = '_';
+ }
+ if (dn[j] == 2) {
+ conv = 3;
+ dn[j--] = ' ';
+ }
+ }
+ if (un < dp)
+ conv = 3;
+ /*
+ * If we didn't have any chars in filename,
+ * generate a default
+ */
+ if (!j)
+ dn[0] = '_';
+
+ /*
+ * The first character cannot be E5,
+ * because that means a deleted entry
+ */
+ if (dn[0] == 0xe5)
+ dn[0] = SLOT_E5;
+
+ /*
+ * If there wasn't any char dropped,
+ * there is no place for generation numbers
+ */
+ if (conv != 3) {
+ if (gen > 1)
+ return 0;
+ return conv;
+ }
+
+ /*
+ * Now insert the generation number into the filename part
+ */
+ if (gen == 0)
+ return conv;
+ for (wcp = gentext + sizeof(gentext); wcp > gentext && gen; gen /= 10)
+ *--wcp = gen % 10 + '0';
+ if (gen)
+ return 0;
+ for (i = 8; dn[--i] == ' ';);
+ i++;
+ if (gentext + sizeof(gentext) - wcp + 1 > 8 - i)
+ i = 8 - (gentext + sizeof(gentext) - wcp + 1);
+ dn[i++] = '~';
+ while (wcp < gentext + sizeof(gentext))
+ dn[i++] = *wcp++;
+ return 3;
+#undef U2D
+}
+
+/*
+ * Create a Win95 long name directory entry
+ * Note: assumes that the filename is valid,
+ * i.e. doesn't consist solely of blanks and dots
+ */
+int
+unix2winfn(un, unlen, wep, cnt, chksum, table_loaded, u2w)
+ const u_char *un;
+ int unlen;
+ struct winentry *wep;
+ int cnt;
+ int chksum;
+ int table_loaded;
+ u_int16_t *u2w;
+{
+ const u_int8_t *cp;
+ u_int8_t *wcp;
+ int i;
+ u_int16_t code;
+
+ /*
+ * Drop trailing blanks and dots
+ */
+ for (cp = un + unlen; *--cp == ' ' || *cp == '.'; unlen--);
+
+ un += (cnt - 1) * WIN_CHARS;
+ unlen -= (cnt - 1) * WIN_CHARS;
+
+ /*
+ * Initialize winentry to some useful default
+ */
+ for (wcp = (u_int8_t *)wep, i = sizeof(*wep); --i >= 0; *wcp++ = 0xff);
+ wep->weCnt = cnt;
+ wep->weAttributes = ATTR_WIN95;
+ wep->weReserved1 = 0;
+ wep->weChksum = chksum;
+ wep->weReserved2 = 0;
+
+ /*
+ * Now convert the filename parts
+ */
+ for (wcp = wep->wePart1, i = sizeof(wep->wePart1)/2; --i >= 0;) {
+ if (--unlen < 0)
+ goto done;
+ if (table_loaded && (*un & 0x80)) {
+ code = u2w[*un++ & 0x7f];
+ *wcp++ = code;
+ *wcp++ = code >> 8;
+ } else {
+ *wcp++ = *un++;
+ *wcp++ = 0;
+ }
+ }
+ for (wcp = wep->wePart2, i = sizeof(wep->wePart2)/2; --i >= 0;) {
+ if (--unlen < 0)
+ goto done;
+ if (table_loaded && (*un & 0x80)) {
+ code = u2w[*un++ & 0x7f];
+ *wcp++ = code;
+ *wcp++ = code >> 8;
+ } else {
+ *wcp++ = *un++;
+ *wcp++ = 0;
+ }
+ }
+ for (wcp = wep->wePart3, i = sizeof(wep->wePart3)/2; --i >= 0;) {
+ if (--unlen < 0)
+ goto done;
+ if (table_loaded && (*un & 0x80)) {
+ code = u2w[*un++ & 0x7f];
+ *wcp++ = code;
+ *wcp++ = code >> 8;
+ } else {
+ *wcp++ = *un++;
+ *wcp++ = 0;
+ }
+ }
+ if (!unlen)
+ wep->weCnt |= WIN_LAST;
+ return unlen;
+
+done:
+ *wcp++ = 0;
+ *wcp++ = 0;
+ wep->weCnt |= WIN_LAST;
+ return 0;
+}
+
+static __inline u_int8_t
+find_lcode(code, u2w)
+ u_int16_t code;
+ u_int16_t *u2w;
+{
+ int i;
+
+ for (i = 0; i < 128; i++)
+ if (u2w[i] == code)
+ return (i | 0x80);
+ return '?';
+}
+
+/*
+ * Compare our filename to the one in the Win95 entry
+ * Returns the checksum or -1 if no match
+ */
+int
+winChkName(un, unlen, wep, chksum, u2w_loaded, u2w, ul_loaded, ul)
+ const u_char *un;
+ int unlen;
+ struct winentry *wep;
+ int chksum;
+ int u2w_loaded;
+ u_int16_t *u2w;
+ int ul_loaded;
+ u_int8_t *ul;
+{
+ u_int8_t *cp;
+ int i;
+ u_int16_t code;
+ u_int8_t c1, c2;
+
+ /*
+ * First compare checksums
+ */
+ if (wep->weCnt&WIN_LAST)
+ chksum = wep->weChksum;
+ else if (chksum != wep->weChksum)
+ chksum = -1;
+ if (chksum == -1)
+ return -1;
+
+ /*
+ * Offset of this entry
+ */
+ i = ((wep->weCnt&WIN_CNT) - 1) * WIN_CHARS;
+ un += i;
+ if ((unlen -= i) <= 0)
+ return -1;
+ if ((wep->weCnt&WIN_LAST) && unlen > WIN_CHARS)
+ return -1;
+
+ /*
+ * Compare the name parts
+ */
+ for (cp = wep->wePart1, i = sizeof(wep->wePart1)/2; --i >= 0;) {
+ if (--unlen < 0) {
+ if (!*cp++ && !*cp)
+ return chksum;
+ return -1;
+ }
+ code = (cp[1] << 8) | cp[0];
+ if (code & 0xff80) {
+ if (u2w_loaded)
+ code = find_lcode(code, u2w);
+ else if (code & 0xff00)
+ code = '?';
+ }
+ c1 = ul_loaded && (code & 0x80) ?
+ ul[code & 0x7f] : u2l[code];
+ c2 = ul_loaded && (*un & 0x80) ?
+ ul[*un & 0x7f] : u2l[*un];
+ if (c1 != c2)
+ return -1;
+ cp += 2;
+ un++;
+ }
+ for (cp = wep->wePart2, i = sizeof(wep->wePart2)/2; --i >= 0;) {
+ if (--unlen < 0) {
+ if (!*cp++ && !*cp)
+ return chksum;
+ return -1;
+ }
+ code = (cp[1] << 8) | cp[0];
+ if (code & 0xff80) {
+ if (u2w_loaded)
+ code = find_lcode(code, u2w);
+ else if (code & 0xff00)
+ code = '?';
+ }
+ c1 = ul_loaded && (code & 0x80) ?
+ ul[code & 0x7f] : u2l[code];
+ c2 = ul_loaded && (*un & 0x80) ?
+ ul[*un & 0x7f] : u2l[*un];
+ if (c1 != c2)
+ return -1;
+ cp += 2;
+ un++;
+ }
+ for (cp = wep->wePart3, i = sizeof(wep->wePart3)/2; --i >= 0;) {
+ if (--unlen < 0) {
+ if (!*cp++ && !*cp)
+ return chksum;
+ return -1;
+ }
+ code = (cp[1] << 8) | cp[0];
+ if (code & 0xff80) {
+ if (u2w_loaded)
+ code = find_lcode(code, u2w);
+ else if (code & 0xff00)
+ code = '?';
+ }
+ c1 = ul_loaded && (code & 0x80) ?
+ ul[code & 0x7f] : u2l[code];
+ c2 = ul_loaded && (*un & 0x80) ?
+ ul[*un & 0x7f] : u2l[*un];
+ if (c1 != c2)
+ return -1;
+ cp += 2;
+ un++;
+ }
+ return chksum;
+}
+
+/*
+ * Convert Win95 filename to dirbuf.
+ * Returns the checksum or -1 if impossible
+ */
+int
+win2unixfn(wep, dp, chksum, table_loaded, u2w)
+ struct winentry *wep;
+ struct dirent *dp;
+ int chksum;
+ int table_loaded;
+ u_int16_t *u2w;
+{
+ u_int8_t *cp;
+ u_int8_t *np, *ep = dp->d_name + WIN_MAXLEN;
+ u_int16_t code;
+ int i;
+
+ if ((wep->weCnt&WIN_CNT) > howmany(WIN_MAXLEN, WIN_CHARS)
+ || !(wep->weCnt&WIN_CNT))
+ return -1;
+
+ /*
+ * First compare checksums
+ */
+ if (wep->weCnt&WIN_LAST) {
+ chksum = wep->weChksum;
+ /*
+ * This works even though d_namlen is one byte!
+ */
+ dp->d_namlen = (wep->weCnt&WIN_CNT) * WIN_CHARS;
+ } else if (chksum != wep->weChksum)
+ chksum = -1;
+ if (chksum == -1)
+ return -1;
+
+ /*
+ * Offset of this entry
+ */
+ i = ((wep->weCnt&WIN_CNT) - 1) * WIN_CHARS;
+ np = (u_int8_t *)dp->d_name + i;
+
+ /*
+ * Convert the name parts
+ */
+ for (cp = wep->wePart1, i = sizeof(wep->wePart1)/2; --i >= 0;) {
+ code = (cp[1] << 8) | cp[0];
+ switch (code) {
+ case 0:
+ *np = '\0';
+ dp->d_namlen -= sizeof(wep->wePart2)/2
+ + sizeof(wep->wePart3)/2 + i + 1;
+ return chksum;
+ case '/':
+ *np = '\0';
+ return -1;
+ default:
+ if (code & 0xff80) {
+ if (table_loaded)
+ code = find_lcode(code, u2w);
+ else if (code & 0xff00)
+ code = '?';
+ }
+ *np++ = code;
+ break;
+ }
+ /*
+ * The size comparison should result in the compiler
+ * optimizing the whole if away
+ */
+ if (WIN_MAXLEN % WIN_CHARS < sizeof(wep->wePart1) / 2
+ && np > ep) {
+ np[-1] = 0;
+ return -1;
+ }
+ cp += 2;
+ }
+ for (cp = wep->wePart2, i = sizeof(wep->wePart2)/2; --i >= 0;) {
+ code = (cp[1] << 8) | cp[0];
+ switch (code) {
+ case 0:
+ *np = '\0';
+ dp->d_namlen -= sizeof(wep->wePart3)/2 + i + 1;
+ return chksum;
+ case '/':
+ *np = '\0';
+ return -1;
+ default:
+ if (code & 0xff80) {
+ if (table_loaded)
+ code = find_lcode(code, u2w);
+ else if (code & 0xff00)
+ code = '?';
+ }
+ *np++ = code;
+ break;
+ }
+ /*
+ * The size comparisons should be optimized away
+ */
+ if (WIN_MAXLEN % WIN_CHARS >= sizeof(wep->wePart1) / 2
+ && WIN_MAXLEN % WIN_CHARS < (sizeof(wep->wePart1) + sizeof(wep->wePart2)) / 2
+ && np > ep) {
+ np[-1] = 0;
+ return -1;
+ }
+ cp += 2;
+ }
+ for (cp = wep->wePart3, i = sizeof(wep->wePart3)/2; --i >= 0;) {
+ code = (cp[1] << 8) | cp[0];
+ switch (code) {
+ case 0:
+ *np = '\0';
+ dp->d_namlen -= i + 1;
+ return chksum;
+ case '/':
+ *np = '\0';
+ return -1;
+ default:
+ if (code & 0xff80) {
+ if (table_loaded)
+ code = find_lcode(code, u2w);
+ else if (code & 0xff00)
+ code = '?';
+ }
+ *np++ = code;
+ break;
+ }
+ /*
+ * See above
+ */
+ if (WIN_MAXLEN % WIN_CHARS >= (sizeof(wep->wePart1) + sizeof(wep->wePart2)) / 2
+ && np > ep) {
+ np[-1] = 0;
+ return -1;
+ }
+ cp += 2;
+ }
+ return chksum;
+}
+
+/*
+ * Compute the checksum of a DOS filename for Win95 use
+ */
+u_int8_t
+winChksum(name)
+ u_int8_t *name;
+{
+ int i;
+ u_int8_t s;
+
+ for (s = 0, i = 11; --i >= 0; s += *name++)
+ s = (s << 7)|(s >> 1);
+ return s;
+}
+
+/*
+ * Determine the number of slots necessary for Win95 names
+ */
+int
+winSlotCnt(un, unlen)
+ const u_char *un;
+ int unlen;
+{
+ unlen = winLenFixup(un, unlen);
+ if (unlen > WIN_MAXLEN)
+ return 0;
+ return howmany(unlen, WIN_CHARS);
+}
+
+/*
+ * Determine the number of bytes neccesary for Win95 names
+ */
+int
+winLenFixup(un, unlen)
+ const u_char* un;
+ int unlen;
+{
+ for (un += unlen; unlen > 0; unlen--)
+ if (*--un != ' ' && *un != '.')
+ break;
+ return unlen;
+}
diff --git a/sys/fs/msdosfs/msdosfs_denode.c b/sys/fs/msdosfs/msdosfs_denode.c
new file mode 100644
index 0000000..74be5c4
--- /dev/null
+++ b/sys/fs/msdosfs/msdosfs_denode.c
@@ -0,0 +1,712 @@
+/* $Id: msdosfs_denode.c,v 1.43 1998/12/07 21:58:34 archie Exp $ */
+/* $NetBSD: msdosfs_denode.c,v 1.28 1998/02/10 14:10:00 mrg Exp $ */
+
+/*-
+ * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
+ * Copyright (C) 1994, 1995, 1997 TooLs GmbH.
+ * All rights reserved.
+ * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by TooLs GmbH.
+ * 4. The name of TooLs GmbH may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/*
+ * Written by Paul Popelka (paulp@uts.amdahl.com)
+ *
+ * You can do anything you want with this software, just don't say you wrote
+ * it, and don't remove this notice.
+ *
+ * This software is provided "as is".
+ *
+ * The author supplies this software to be publicly redistributed on the
+ * understanding that the author is not responsible for the correct
+ * functioning of this software in any circumstances and is not liable for
+ * any damages caused by this software.
+ *
+ * October 1992
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/mount.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+
+#include <msdosfs/bpb.h>
+#include <msdosfs/msdosfsmount.h>
+#include <msdosfs/direntry.h>
+#include <msdosfs/denode.h>
+#include <msdosfs/fat.h>
+
+static MALLOC_DEFINE(M_MSDOSFSNODE, "MSDOSFS node", "MSDOSFS vnode private part");
+
+static struct denode **dehashtbl;
+static u_long dehash; /* size of hash table - 1 */
+#define DEHASH(dev, dcl, doff) (dehashtbl[((dev) + (dcl) + (doff) / \
+ sizeof(struct direntry)) & dehash])
+#ifndef NULL_SIMPLELOCKS
+static struct simplelock dehash_slock;
+#endif
+
+union _qcvt {
+ quad_t qcvt;
+ long val[2];
+};
+#define SETHIGH(q, h) { \
+ union _qcvt tmp; \
+ tmp.qcvt = (q); \
+ tmp.val[_QUAD_HIGHWORD] = (h); \
+ (q) = tmp.qcvt; \
+}
+#define SETLOW(q, l) { \
+ union _qcvt tmp; \
+ tmp.qcvt = (q); \
+ tmp.val[_QUAD_LOWWORD] = (l); \
+ (q) = tmp.qcvt; \
+}
+
+static struct denode *
+ msdosfs_hashget __P((dev_t dev, u_long dirclust,
+ u_long diroff));
+static void msdosfs_hashins __P((struct denode *dep));
+static void msdosfs_hashrem __P((struct denode *dep));
+
+/*ARGSUSED*/
+int
+msdosfs_init(vfsp)
+ struct vfsconf *vfsp;
+{
+ dehashtbl = hashinit(desiredvnodes/2, M_MSDOSFSMNT, &dehash);
+ simple_lock_init(&dehash_slock);
+ return (0);
+}
+
+static struct denode *
+msdosfs_hashget(dev, dirclust, diroff)
+ dev_t dev;
+ u_long dirclust;
+ u_long diroff;
+{
+ struct proc *p = curproc; /* XXX */
+ struct denode *dep;
+ struct vnode *vp;
+
+loop:
+ simple_lock(&dehash_slock);
+ for (dep = DEHASH(dev, dirclust, diroff); dep; dep = dep->de_next) {
+ if (dirclust == dep->de_dirclust
+ && diroff == dep->de_diroffset
+ && dev == dep->de_dev
+ && dep->de_refcnt != 0) {
+ vp = DETOV(dep);
+ simple_lock(&vp->v_interlock);
+ simple_unlock(&dehash_slock);
+ if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p))
+ goto loop;
+ return (dep);
+ }
+ }
+ simple_unlock(&dehash_slock);
+ return (NULL);
+}
+
+static void
+msdosfs_hashins(dep)
+ struct denode *dep;
+{
+ struct denode **depp, *deq;
+
+ simple_lock(&dehash_slock);
+ depp = &DEHASH(dep->de_dev, dep->de_dirclust, dep->de_diroffset);
+ deq = *depp;
+ if (deq)
+ deq->de_prev = &dep->de_next;
+ dep->de_next = deq;
+ dep->de_prev = depp;
+ *depp = dep;
+ simple_unlock(&dehash_slock);
+}
+
+static void
+msdosfs_hashrem(dep)
+ struct denode *dep;
+{
+ struct denode *deq;
+
+ simple_lock(&dehash_slock);
+ deq = dep->de_next;
+ if (deq)
+ deq->de_prev = dep->de_prev;
+ *dep->de_prev = deq;
+#ifdef DIAGNOSTIC
+ dep->de_next = NULL;
+ dep->de_prev = NULL;
+#endif
+ simple_unlock(&dehash_slock);
+}
+
+/*
+ * If deget() succeeds it returns with the gotten denode locked().
+ *
+ * pmp - address of msdosfsmount structure of the filesystem containing
+ * the denode of interest. The pm_dev field and the address of
+ * the msdosfsmount structure are used.
+ * dirclust - which cluster bp contains, if dirclust is 0 (root directory)
+ * diroffset is relative to the beginning of the root directory,
+ * otherwise it is cluster relative.
+ * diroffset - offset past begin of cluster of denode we want
+ * depp - returns the address of the gotten denode.
+ */
+int
+deget(pmp, dirclust, diroffset, depp)
+ struct msdosfsmount *pmp; /* so we know the maj/min number */
+ u_long dirclust; /* cluster this dir entry came from */
+ u_long diroffset; /* index of entry within the cluster */
+ struct denode **depp; /* returns the addr of the gotten denode */
+{
+ int error;
+ dev_t dev = pmp->pm_dev;
+ struct mount *mntp = pmp->pm_mountp;
+ struct direntry *direntptr;
+ struct denode *ldep;
+ struct vnode *nvp;
+ struct buf *bp;
+ struct proc *p = curproc; /* XXX */
+ struct timeval tv;
+
+#ifdef MSDOSFS_DEBUG
+ printf("deget(pmp %p, dirclust %lu, diroffset %lx, depp %p)\n",
+ pmp, dirclust, diroffset, depp);
+#endif
+
+ /*
+ * On FAT32 filesystems, root is a (more or less) normal
+ * directory
+ */
+ if (FAT32(pmp) && dirclust == MSDOSFSROOT)
+ dirclust = pmp->pm_rootdirblk;
+
+ /*
+ * See if the denode is in the denode cache. Use the location of
+ * the directory entry to compute the hash value. For subdir use
+ * address of "." entry. For root dir (if not FAT32) use cluster
+ * MSDOSFSROOT, offset MSDOSFSROOT_OFS
+ *
+ * NOTE: The check for de_refcnt > 0 below insures the denode being
+ * examined does not represent an unlinked but still open file.
+ * These files are not to be accessible even when the directory
+ * entry that represented the file happens to be reused while the
+ * deleted file is still open.
+ */
+ ldep = msdosfs_hashget(dev, dirclust, diroffset);
+ if (ldep) {
+ *depp = ldep;
+ return (0);
+ }
+
+ /*
+ * Do the MALLOC before the getnewvnode since doing so afterward
+ * might cause a bogus v_data pointer to get dereferenced
+ * elsewhere if MALLOC should block.
+ */
+ MALLOC(ldep, struct denode *, sizeof(struct denode), M_MSDOSFSNODE, M_WAITOK);
+
+ /*
+ * Directory entry was not in cache, have to create a vnode and
+ * copy it from the passed disk buffer.
+ */
+ /* getnewvnode() does a VREF() on the vnode */
+ error = getnewvnode(VT_MSDOSFS, mntp, msdosfs_vnodeop_p, &nvp);
+ if (error) {
+ *depp = NULL;
+ FREE(ldep, M_MSDOSFSNODE);
+ return error;
+ }
+ bzero((caddr_t)ldep, sizeof *ldep);
+ lockinit(&ldep->de_lock, PINOD, "denode", 0, 0);
+ nvp->v_data = ldep;
+ ldep->de_vnode = nvp;
+ ldep->de_flag = 0;
+ ldep->de_devvp = 0;
+ ldep->de_dev = dev;
+ ldep->de_dirclust = dirclust;
+ ldep->de_diroffset = diroffset;
+ fc_purge(ldep, 0); /* init the fat cache for this denode */
+
+ /*
+ * Lock the denode so that it can't be accessed until we've read
+ * it in and have done what we need to it. Do this here instead
+ * of at the start of msdosfs_hashins() so that reinsert() can
+ * call msdosfs_hashins() with a locked denode.
+ */
+ if (lockmgr(&ldep->de_lock, LK_EXCLUSIVE, (struct simplelock *)0, p))
+ panic("deget: unexpected lock failure");
+
+ /*
+ * Insert the denode into the hash queue.
+ */
+ msdosfs_hashins(ldep);
+
+ ldep->de_pmp = pmp;
+ ldep->de_refcnt = 1;
+ /*
+ * Copy the directory entry into the denode area of the vnode.
+ */
+ if ((dirclust == MSDOSFSROOT
+ || (FAT32(pmp) && dirclust == pmp->pm_rootdirblk))
+ && diroffset == MSDOSFSROOT_OFS) {
+ /*
+ * Directory entry for the root directory. There isn't one,
+ * so we manufacture one. We should probably rummage
+ * through the root directory and find a label entry (if it
+ * exists), and then use the time and date from that entry
+ * as the time and date for the root denode.
+ */
+ nvp->v_flag |= VROOT; /* should be further down XXX */
+
+ ldep->de_Attributes = ATTR_DIRECTORY;
+ ldep->de_LowerCase = 0;
+ if (FAT32(pmp))
+ ldep->de_StartCluster = pmp->pm_rootdirblk;
+ /* de_FileSize will be filled in further down */
+ else {
+ ldep->de_StartCluster = MSDOSFSROOT;
+ ldep->de_FileSize = pmp->pm_rootdirsize * pmp->pm_BytesPerSec;
+ }
+ /*
+ * fill in time and date so that dos2unixtime() doesn't
+ * spit up when called from msdosfs_getattr() with root
+ * denode
+ */
+ ldep->de_CHun = 0;
+ ldep->de_CTime = 0x0000; /* 00:00:00 */
+ ldep->de_CDate = (0 << DD_YEAR_SHIFT) | (1 << DD_MONTH_SHIFT)
+ | (1 << DD_DAY_SHIFT);
+ /* Jan 1, 1980 */
+ ldep->de_ADate = ldep->de_CDate;
+ ldep->de_MTime = ldep->de_CTime;
+ ldep->de_MDate = ldep->de_CDate;
+ /* leave the other fields as garbage */
+ } else {
+ error = readep(pmp, dirclust, diroffset, &bp, &direntptr);
+ if (error) {
+ /*
+ * The denode does not contain anything useful, so
+ * it would be wrong to leave it on its hash chain.
+ * Arrange for vput() to just forget about it.
+ */
+ ldep->de_Name[0] = SLOT_DELETED;
+
+ vput(nvp);
+ *depp = NULL;
+ return (error);
+ }
+ DE_INTERNALIZE(ldep, direntptr);
+ brelse(bp);
+ }
+
+ /*
+ * Fill in a few fields of the vnode and finish filling in the
+ * denode. Then return the address of the found denode.
+ */
+ if (ldep->de_Attributes & ATTR_DIRECTORY) {
+ /*
+ * Since DOS directory entries that describe directories
+ * have 0 in the filesize field, we take this opportunity
+ * to find out the length of the directory and plug it into
+ * the denode structure.
+ */
+ u_long size;
+
+ nvp->v_type = VDIR;
+ if (ldep->de_StartCluster != MSDOSFSROOT) {
+ error = pcbmap(ldep, 0xffff, 0, &size, 0);
+ if (error == E2BIG) {
+ ldep->de_FileSize = de_cn2off(pmp, size);
+ error = 0;
+ } else
+ printf("deget(): pcbmap returned %d\n", error);
+ }
+ } else
+ nvp->v_type = VREG;
+ getmicrouptime(&tv);
+ SETHIGH(ldep->de_modrev, tv.tv_sec);
+ SETLOW(ldep->de_modrev, tv.tv_usec * 4294);
+ ldep->de_devvp = pmp->pm_devvp;
+ VREF(ldep->de_devvp);
+ *depp = ldep;
+ return (0);
+}
+
+int
+deupdat(dep, waitfor)
+ struct denode *dep;
+ int waitfor;
+{
+ int error;
+ struct buf *bp;
+ struct direntry *dirp;
+ struct timespec ts;
+
+ if (DETOV(dep)->v_mount->mnt_flag & MNT_RDONLY)
+ return (0);
+ getnanotime(&ts);
+ DETIMES(dep, &ts, &ts, &ts);
+ if ((dep->de_flag & DE_MODIFIED) == 0)
+ return (0);
+ dep->de_flag &= ~DE_MODIFIED;
+ if (dep->de_Attributes & ATTR_DIRECTORY)
+ return (0);
+ if (dep->de_refcnt <= 0)
+ return (0);
+ error = readde(dep, &bp, &dirp);
+ if (error)
+ return (error);
+ DE_EXTERNALIZE(dirp, dep);
+ if (waitfor)
+ return (bwrite(bp));
+ else {
+ bdwrite(bp);
+ return (0);
+ }
+}
+
+/*
+ * Truncate the file described by dep to the length specified by length.
+ */
+int
+detrunc(dep, length, flags, cred, p)
+ struct denode *dep;
+ u_long length;
+ int flags;
+ struct ucred *cred;
+ struct proc *p;
+{
+ int error;
+ int allerror;
+ u_long eofentry;
+ u_long chaintofree;
+ daddr_t bn;
+ int boff;
+ int isadir = dep->de_Attributes & ATTR_DIRECTORY;
+ struct buf *bp;
+ struct msdosfsmount *pmp = dep->de_pmp;
+
+#ifdef MSDOSFS_DEBUG
+ printf("detrunc(): file %s, length %lu, flags %x\n", dep->de_Name, length, flags);
+#endif
+
+ /*
+ * Disallow attempts to truncate the root directory since it is of
+ * fixed size. That's just the way dos filesystems are. We use
+ * the VROOT bit in the vnode because checking for the directory
+ * bit and a startcluster of 0 in the denode is not adequate to
+ * recognize the root directory at this point in a file or
+ * directory's life.
+ */
+ if ((DETOV(dep)->v_flag & VROOT) && !FAT32(pmp)) {
+ printf("detrunc(): can't truncate root directory, clust %ld, offset %ld\n",
+ dep->de_dirclust, dep->de_diroffset);
+ return (EINVAL);
+ }
+
+
+ if (dep->de_FileSize < length) {
+ vnode_pager_setsize(DETOV(dep), length);
+ return deextend(dep, length, cred);
+ }
+
+ /*
+ * If the desired length is 0 then remember the starting cluster of
+ * the file and set the StartCluster field in the directory entry
+ * to 0. If the desired length is not zero, then get the number of
+ * the last cluster in the shortened file. Then get the number of
+ * the first cluster in the part of the file that is to be freed.
+ * Then set the next cluster pointer in the last cluster of the
+ * file to CLUST_EOFE.
+ */
+ if (length == 0) {
+ chaintofree = dep->de_StartCluster;
+ dep->de_StartCluster = 0;
+ eofentry = ~0;
+ } else {
+ error = pcbmap(dep, de_clcount(pmp, length) - 1, 0,
+ &eofentry, 0);
+ if (error) {
+#ifdef MSDOSFS_DEBUG
+ printf("detrunc(): pcbmap fails %d\n", error);
+#endif
+ return (error);
+ }
+ }
+
+ fc_purge(dep, de_clcount(pmp, length));
+
+ /*
+ * If the new length is not a multiple of the cluster size then we
+ * must zero the tail end of the new last cluster in case it
+ * becomes part of the file again because of a seek.
+ */
+ if ((boff = length & pmp->pm_crbomask) != 0) {
+ if (isadir) {
+ bn = cntobn(pmp, eofentry);
+ error = bread(pmp->pm_devvp, bn, pmp->pm_bpcluster,
+ NOCRED, &bp);
+ } else {
+ bn = de_blk(pmp, length);
+ error = bread(DETOV(dep), bn, pmp->pm_bpcluster,
+ NOCRED, &bp);
+ }
+ if (error) {
+ brelse(bp);
+#ifdef MSDOSFS_DEBUG
+ printf("detrunc(): bread fails %d\n", error);
+#endif
+ return (error);
+ }
+ /*
+ * is this the right place for it?
+ */
+ bzero(bp->b_data + boff, pmp->pm_bpcluster - boff);
+ if (flags & IO_SYNC)
+ bwrite(bp);
+ else
+ bdwrite(bp);
+ }
+
+ /*
+ * Write out the updated directory entry. Even if the update fails
+ * we free the trailing clusters.
+ */
+ dep->de_FileSize = length;
+ if (!isadir)
+ dep->de_flag |= DE_UPDATE|DE_MODIFIED;
+ allerror = vtruncbuf(DETOV(dep), cred, p, length, pmp->pm_bpcluster);
+#ifdef MSDOSFS_DEBUG
+ if (allerror)
+ printf("detrunc(): vtruncbuf error %d\n", allerror);
+#endif
+ error = deupdat(dep, 1);
+ if (error && (allerror == 0))
+ allerror = error;
+#ifdef MSDOSFS_DEBUG
+ printf("detrunc(): allerror %d, eofentry %lu\n",
+ allerror, eofentry);
+#endif
+
+ /*
+ * If we need to break the cluster chain for the file then do it
+ * now.
+ */
+ if (eofentry != ~0) {
+ error = fatentry(FAT_GET_AND_SET, pmp, eofentry,
+ &chaintofree, CLUST_EOFE);
+ if (error) {
+#ifdef MSDOSFS_DEBUG
+ printf("detrunc(): fatentry errors %d\n", error);
+#endif
+ return (error);
+ }
+ fc_setcache(dep, FC_LASTFC, de_cluster(pmp, length - 1),
+ eofentry);
+ }
+
+ /*
+ * Now free the clusters removed from the file because of the
+ * truncation.
+ */
+ if (chaintofree != 0 && !MSDOSFSEOF(pmp, chaintofree))
+ freeclusterchain(pmp, chaintofree);
+
+ return (allerror);
+}
+
+/*
+ * Extend the file described by dep to length specified by length.
+ */
+int
+deextend(dep, length, cred)
+ struct denode *dep;
+ u_long length;
+ struct ucred *cred;
+{
+ struct msdosfsmount *pmp = dep->de_pmp;
+ u_long count;
+ int error;
+
+ /*
+ * The root of a DOS filesystem cannot be extended.
+ */
+ if ((DETOV(dep)->v_flag & VROOT) && !FAT32(pmp))
+ return (EINVAL);
+
+ /*
+ * Directories cannot be extended.
+ */
+ if (dep->de_Attributes & ATTR_DIRECTORY)
+ return (EISDIR);
+
+ if (length <= dep->de_FileSize)
+ panic("deextend: file too large");
+
+ /*
+ * Compute the number of clusters to allocate.
+ */
+ count = de_clcount(pmp, length) - de_clcount(pmp, dep->de_FileSize);
+ if (count > 0) {
+ if (count > pmp->pm_freeclustercount)
+ return (ENOSPC);
+ error = extendfile(dep, count, NULL, NULL, DE_CLEAR);
+ if (error) {
+ /* truncate the added clusters away again */
+ (void) detrunc(dep, dep->de_FileSize, 0, cred, NULL);
+ return (error);
+ }
+ }
+ dep->de_FileSize = length;
+ dep->de_flag |= DE_UPDATE|DE_MODIFIED;
+ return (deupdat(dep, 1));
+}
+
+/*
+ * Move a denode to its correct hash queue after the file it represents has
+ * been moved to a new directory.
+ */
+void
+reinsert(dep)
+ struct denode *dep;
+{
+ /*
+ * Fix up the denode cache. If the denode is for a directory,
+ * there is nothing to do since the hash is based on the starting
+ * cluster of the directory file and that hasn't changed. If for a
+ * file the hash is based on the location of the directory entry,
+ * so we must remove it from the cache and re-enter it with the
+ * hash based on the new location of the directory entry.
+ */
+ if (dep->de_Attributes & ATTR_DIRECTORY)
+ return;
+ msdosfs_hashrem(dep);
+ msdosfs_hashins(dep);
+}
+
+int
+msdosfs_reclaim(ap)
+ struct vop_reclaim_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct denode *dep = VTODE(vp);
+
+#ifdef MSDOSFS_DEBUG
+ printf("msdosfs_reclaim(): dep %p, file %s, refcnt %ld\n",
+ dep, dep->de_Name, dep->de_refcnt);
+#endif
+
+ if (prtactive && vp->v_usecount != 0)
+ vprint("msdosfs_reclaim(): pushing active", vp);
+ /*
+ * Remove the denode from its hash chain.
+ */
+ msdosfs_hashrem(dep);
+ /*
+ * Purge old data structures associated with the denode.
+ */
+ cache_purge(vp);
+ if (dep->de_devvp) {
+ vrele(dep->de_devvp);
+ dep->de_devvp = 0;
+ }
+#if 0 /* XXX */
+ dep->de_flag = 0;
+#endif
+ FREE(dep, M_MSDOSFSNODE);
+ vp->v_data = NULL;
+
+ return (0);
+}
+
+int
+msdosfs_inactive(ap)
+ struct vop_inactive_args /* {
+ struct vnode *a_vp;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct denode *dep = VTODE(vp);
+ struct proc *p = ap->a_p;
+ int error = 0;
+
+#ifdef MSDOSFS_DEBUG
+ printf("msdosfs_inactive(): dep %p, de_Name[0] %x\n", dep, dep->de_Name[0]);
+#endif
+
+ if (prtactive && vp->v_usecount != 0)
+ vprint("msdosfs_inactive(): pushing active", vp);
+
+ /*
+ * Ignore denodes related to stale file handles.
+ */
+ if (dep->de_Name[0] == SLOT_DELETED)
+ goto out;
+
+ /*
+ * If the file has been deleted and it is on a read/write
+ * filesystem, then truncate the file, and mark the directory slot
+ * as empty. (This may not be necessary for the dos filesystem.)
+ */
+#ifdef MSDOSFS_DEBUG
+ printf("msdosfs_inactive(): dep %p, refcnt %ld, mntflag %x, MNT_RDONLY %x\n",
+ dep, dep->de_refcnt, vp->v_mount->mnt_flag, MNT_RDONLY);
+#endif
+ if (dep->de_refcnt <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
+ error = detrunc(dep, (u_long) 0, 0, NOCRED, p);
+ dep->de_flag |= DE_UPDATE;
+ dep->de_Name[0] = SLOT_DELETED;
+ }
+ deupdat(dep, 0);
+
+out:
+ VOP_UNLOCK(vp, 0, p);
+ /*
+ * If we are done with the denode, reclaim it
+ * so that it can be reused immediately.
+ */
+#ifdef MSDOSFS_DEBUG
+ printf("msdosfs_inactive(): v_usecount %d, de_Name[0] %x\n", vp->v_usecount,
+ dep->de_Name[0]);
+#endif
+ if (dep->de_Name[0] == SLOT_DELETED)
+ vrecycle(vp, (struct simplelock *)0, p);
+ return (error);
+}
diff --git a/sys/fs/msdosfs/msdosfs_fat.c b/sys/fs/msdosfs/msdosfs_fat.c
new file mode 100644
index 0000000..1ec29db
--- /dev/null
+++ b/sys/fs/msdosfs/msdosfs_fat.c
@@ -0,0 +1,1100 @@
+/* $Id: msdosfs_fat.c,v 1.20 1998/04/06 11:39:04 phk Exp $ */
+/* $NetBSD: msdosfs_fat.c,v 1.28 1997/11/17 15:36:49 ws Exp $ */
+
+/*-
+ * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
+ * Copyright (C) 1994, 1995, 1997 TooLs GmbH.
+ * All rights reserved.
+ * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by TooLs GmbH.
+ * 4. The name of TooLs GmbH may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/*
+ * Written by Paul Popelka (paulp@uts.amdahl.com)
+ *
+ * You can do anything you want with this software, just don't say you wrote
+ * it, and don't remove this notice.
+ *
+ * This software is provided "as is".
+ *
+ * The author supplies this software to be publicly redistributed on the
+ * understanding that the author is not responsible for the correct
+ * functioning of this software in any circumstances and is not liable for
+ * any damages caused by this software.
+ *
+ * October 1992
+ */
+
+/*
+ * kernel include files.
+ */
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/mount.h> /* to define statfs structure */
+#include <sys/vnode.h> /* to define vattr structure */
+
+/*
+ * msdosfs include files.
+ */
+#include <msdosfs/bpb.h>
+#include <msdosfs/msdosfsmount.h>
+#include <msdosfs/direntry.h>
+#include <msdosfs/denode.h>
+#include <msdosfs/fat.h>
+
+/*
+ * Fat cache stats.
+ */
+static int fc_fileextends; /* # of file extends */
+static int fc_lfcempty; /* # of time last file cluster cache entry
+ * was empty */
+static int fc_bmapcalls; /* # of times pcbmap was called */
+
+#define LMMAX 20
+static int fc_lmdistance[LMMAX];/* counters for how far off the last
+ * cluster mapped entry was. */
+static int fc_largedistance; /* off by more than LMMAX */
+
+static int chainalloc __P((struct msdosfsmount *pmp, u_long start,
+ u_long count, u_long fillwith,
+ u_long *retcluster, u_long *got));
+static int chainlength __P((struct msdosfsmount *pmp, u_long start,
+ u_long count));
+static void fatblock __P((struct msdosfsmount *pmp, u_long ofs,
+ u_long *bnp, u_long *sizep, u_long *bop));
+static int fatchain __P((struct msdosfsmount *pmp, u_long start,
+ u_long count, u_long fillwith));
+static void fc_lookup __P((struct denode *dep, u_long findcn,
+ u_long *frcnp, u_long *fsrcnp));
+static void updatefats __P((struct msdosfsmount *pmp, struct buf *bp,
+ u_long fatbn));
+static __inline void
+ usemap_alloc __P((struct msdosfsmount *pmp, u_long cn));
+static __inline void
+ usemap_free __P((struct msdosfsmount *pmp, u_long cn));
+
+static void
+fatblock(pmp, ofs, bnp, sizep, bop)
+ struct msdosfsmount *pmp;
+ u_long ofs;
+ u_long *bnp;
+ u_long *sizep;
+ u_long *bop;
+{
+ u_long bn, size;
+
+ bn = ofs / pmp->pm_fatblocksize * pmp->pm_fatblocksec;
+ size = min(pmp->pm_fatblocksec, pmp->pm_FATsecs - bn)
+ * pmp->pm_BytesPerSec;
+ bn += pmp->pm_fatblk + pmp->pm_curfat * pmp->pm_FATsecs;
+
+ if (bnp)
+ *bnp = bn;
+ if (sizep)
+ *sizep = size;
+ if (bop)
+ *bop = ofs % pmp->pm_fatblocksize;
+}
+
+/*
+ * Map the logical cluster number of a file into a physical disk sector
+ * that is filesystem relative.
+ *
+ * dep - address of denode representing the file of interest
+ * findcn - file relative cluster whose filesystem relative cluster number
+ * and/or block number are/is to be found
+ * bnp - address of where to place the file system relative block number.
+ * If this pointer is null then don't return this quantity.
+ * cnp - address of where to place the file system relative cluster number.
+ * If this pointer is null then don't return this quantity.
+ *
+ * NOTE: Either bnp or cnp must be non-null.
+ * This function has one side effect. If the requested file relative cluster
+ * is beyond the end of file, then the actual number of clusters in the file
+ * is returned in *cnp. This is useful for determining how long a directory is.
+ * If cnp is null, nothing is returned.
+ */
+int
+pcbmap(dep, findcn, bnp, cnp, sp)
+ struct denode *dep;
+ u_long findcn; /* file relative cluster to get */
+ daddr_t *bnp; /* returned filesys relative blk number */
+ u_long *cnp; /* returned cluster number */
+ int *sp; /* returned block size */
+{
+ int error;
+ u_long i;
+ u_long cn;
+ u_long prevcn = 0; /* XXX: prevcn could be used unititialized */
+ u_long byteoffset;
+ u_long bn;
+ u_long bo;
+ struct buf *bp = NULL;
+ u_long bp_bn = -1;
+ struct msdosfsmount *pmp = dep->de_pmp;
+ u_long bsize;
+
+ fc_bmapcalls++;
+
+ /*
+ * If they don't give us someplace to return a value then don't
+ * bother doing anything.
+ */
+ if (bnp == NULL && cnp == NULL && sp == NULL)
+ return (0);
+
+ cn = dep->de_StartCluster;
+ /*
+ * The "file" that makes up the root directory is contiguous,
+ * permanently allocated, of fixed size, and is not made up of
+ * clusters. If the cluster number is beyond the end of the root
+ * directory, then return the number of clusters in the file.
+ */
+ if (cn == MSDOSFSROOT) {
+ if (dep->de_Attributes & ATTR_DIRECTORY) {
+ if (de_cn2off(pmp, findcn) >= dep->de_FileSize) {
+ if (cnp)
+ *cnp = de_bn2cn(pmp, pmp->pm_rootdirsize);
+ return (E2BIG);
+ }
+ if (bnp)
+ *bnp = pmp->pm_rootdirblk + de_cn2bn(pmp, findcn);
+ if (cnp)
+ *cnp = MSDOSFSROOT;
+ if (sp)
+ *sp = min(pmp->pm_bpcluster,
+ dep->de_FileSize - de_cn2off(pmp, findcn));
+ return (0);
+ } else { /* just an empty file */
+ if (cnp)
+ *cnp = 0;
+ return (E2BIG);
+ }
+ }
+
+ /*
+ * All other files do I/O in cluster sized blocks
+ */
+ if (sp)
+ *sp = pmp->pm_bpcluster;
+
+ /*
+ * Rummage around in the fat cache, maybe we can avoid tromping
+ * thru every fat entry for the file. And, keep track of how far
+ * off the cache was from where we wanted to be.
+ */
+ i = 0;
+ fc_lookup(dep, findcn, &i, &cn);
+ if ((bn = findcn - i) >= LMMAX)
+ fc_largedistance++;
+ else
+ fc_lmdistance[bn]++;
+
+ /*
+ * Handle all other files or directories the normal way.
+ */
+ for (; i < findcn; i++) {
+ /*
+ * Stop with all reserved clusters, not just with EOF.
+ */
+ if ((cn | ~pmp->pm_fatmask) >= CLUST_RSRVD)
+ goto hiteof;
+ byteoffset = FATOFS(pmp, cn);
+ fatblock(pmp, byteoffset, &bn, &bsize, &bo);
+ if (bn != bp_bn) {
+ if (bp)
+ brelse(bp);
+ error = bread(pmp->pm_devvp, bn, bsize, NOCRED, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ bp_bn = bn;
+ }
+ prevcn = cn;
+ if (FAT32(pmp))
+ cn = getulong(&bp->b_data[bo]);
+ else
+ cn = getushort(&bp->b_data[bo]);
+ if (FAT12(pmp) && (prevcn & 1))
+ cn >>= 4;
+ cn &= pmp->pm_fatmask;
+
+ /*
+ * Force the special cluster numbers
+ * to be the same for all cluster sizes
+ * to let the rest of msdosfs handle
+ * all cases the same.
+ */
+ if ((cn | ~pmp->pm_fatmask) >= CLUST_RSRVD)
+ cn |= ~pmp->pm_fatmask;
+ }
+
+ if (!MSDOSFSEOF(pmp, cn)) {
+ if (bp)
+ brelse(bp);
+ if (bnp)
+ *bnp = cntobn(pmp, cn);
+ if (cnp)
+ *cnp = cn;
+ fc_setcache(dep, FC_LASTMAP, i, cn);
+ return (0);
+ }
+
+hiteof:;
+ if (cnp)
+ *cnp = i;
+ if (bp)
+ brelse(bp);
+ /* update last file cluster entry in the fat cache */
+ fc_setcache(dep, FC_LASTFC, i - 1, prevcn);
+ return (E2BIG);
+}
+
+/*
+ * Find the closest entry in the fat cache to the cluster we are looking
+ * for.
+ */
+static void
+fc_lookup(dep, findcn, frcnp, fsrcnp)
+ struct denode *dep;
+ u_long findcn;
+ u_long *frcnp;
+ u_long *fsrcnp;
+{
+ int i;
+ u_long cn;
+ struct fatcache *closest = 0;
+
+ for (i = 0; i < FC_SIZE; i++) {
+ cn = dep->de_fc[i].fc_frcn;
+ if (cn != FCE_EMPTY && cn <= findcn) {
+ if (closest == 0 || cn > closest->fc_frcn)
+ closest = &dep->de_fc[i];
+ }
+ }
+ if (closest) {
+ *frcnp = closest->fc_frcn;
+ *fsrcnp = closest->fc_fsrcn;
+ }
+}
+
+/*
+ * Purge the fat cache in denode dep of all entries relating to file
+ * relative cluster frcn and beyond.
+ */
+void
+fc_purge(dep, frcn)
+ struct denode *dep;
+ u_int frcn;
+{
+ int i;
+ struct fatcache *fcp;
+
+ fcp = dep->de_fc;
+ for (i = 0; i < FC_SIZE; i++, fcp++) {
+ if (fcp->fc_frcn >= frcn)
+ fcp->fc_frcn = FCE_EMPTY;
+ }
+}
+
+/*
+ * Update the fat.
+ * If mirroring the fat, update all copies, with the first copy as last.
+ * Else update only the current fat (ignoring the others).
+ *
+ * pmp - msdosfsmount structure for filesystem to update
+ * bp - addr of modified fat block
+ * fatbn - block number relative to begin of filesystem of the modified fat block.
+ */
+static void
+updatefats(pmp, bp, fatbn)
+ struct msdosfsmount *pmp;
+ struct buf *bp;
+ u_long fatbn;
+{
+ int i;
+ struct buf *bpn;
+
+#ifdef MSDOSFS_DEBUG
+ printf("updatefats(pmp %p, bp %p, fatbn %lu)\n", pmp, bp, fatbn);
+#endif
+
+ /*
+ * If we have an FSInfo block, update it.
+ */
+ if (pmp->pm_fsinfo) {
+ u_long cn = pmp->pm_nxtfree;
+
+ if (pmp->pm_freeclustercount
+ && (pmp->pm_inusemap[cn / N_INUSEBITS]
+ & (1 << (cn % N_INUSEBITS)))) {
+ /*
+ * The cluster indicated in FSInfo isn't free
+ * any longer. Got get a new free one.
+ */
+ for (cn = 0; cn < pmp->pm_maxcluster; cn += N_INUSEBITS)
+ if (pmp->pm_inusemap[cn / N_INUSEBITS] != (u_int)-1)
+ break;
+ pmp->pm_nxtfree = cn
+ + ffs(pmp->pm_inusemap[cn / N_INUSEBITS]
+ ^ (u_int)-1) - 1;
+ }
+ if (bread(pmp->pm_devvp, pmp->pm_fsinfo, 1024, NOCRED, &bpn) != 0) {
+ /*
+ * Ignore the error, but turn off FSInfo update for the future.
+ */
+ pmp->pm_fsinfo = 0;
+ brelse(bpn);
+ } else {
+ struct fsinfo *fp = (struct fsinfo *)bpn->b_data;
+
+ putulong(fp->fsinfree, pmp->pm_freeclustercount);
+ putulong(fp->fsinxtfree, pmp->pm_nxtfree);
+ if (pmp->pm_flags & MSDOSFSMNT_WAITONFAT)
+ bwrite(bpn);
+ else
+ bdwrite(bpn);
+ }
+ }
+
+ if (pmp->pm_flags & MSDOSFS_FATMIRROR) {
+ /*
+ * Now copy the block(s) of the modified fat to the other copies of
+ * the fat and write them out. This is faster than reading in the
+ * other fats and then writing them back out. This could tie up
+ * the fat for quite a while. Preventing others from accessing it.
+ * To prevent us from going after the fat quite so much we use
+ * delayed writes, unless they specfied "synchronous" when the
+ * filesystem was mounted. If synch is asked for then use
+ * bwrite()'s and really slow things down.
+ */
+ for (i = 1; i < pmp->pm_FATs; i++) {
+ fatbn += pmp->pm_FATsecs;
+ /* getblk() never fails */
+ bpn = getblk(pmp->pm_devvp, fatbn, bp->b_bcount, 0, 0);
+ bcopy(bp->b_data, bpn->b_data, bp->b_bcount);
+ if (pmp->pm_flags & MSDOSFSMNT_WAITONFAT)
+ bwrite(bpn);
+ else
+ bdwrite(bpn);
+ }
+ }
+
+ /*
+ * Write out the first (or current) fat last.
+ */
+ if (pmp->pm_flags & MSDOSFSMNT_WAITONFAT)
+ bwrite(bp);
+ else
+ bdwrite(bp);
+ /*
+ * Maybe update fsinfo sector here?
+ */
+}
+
+/*
+ * Updating entries in 12 bit fats is a pain in the butt.
+ *
+ * The following picture shows where nibbles go when moving from a 12 bit
+ * cluster number into the appropriate bytes in the FAT.
+ *
+ * byte m byte m+1 byte m+2
+ * +----+----+ +----+----+ +----+----+
+ * | 0 1 | | 2 3 | | 4 5 | FAT bytes
+ * +----+----+ +----+----+ +----+----+
+ *
+ * +----+----+----+ +----+----+----+
+ * | 3 0 1 | | 4 5 2 |
+ * +----+----+----+ +----+----+----+
+ * cluster n cluster n+1
+ *
+ * Where n is even. m = n + (n >> 2)
+ *
+ */
+static __inline void
+usemap_alloc(pmp, cn)
+ struct msdosfsmount *pmp;
+ u_long cn;
+{
+
+ pmp->pm_inusemap[cn / N_INUSEBITS] |= 1 << (cn % N_INUSEBITS);
+ pmp->pm_freeclustercount--;
+}
+
+static __inline void
+usemap_free(pmp, cn)
+ struct msdosfsmount *pmp;
+ u_long cn;
+{
+
+ pmp->pm_freeclustercount++;
+ pmp->pm_inusemap[cn / N_INUSEBITS] &= ~(1 << (cn % N_INUSEBITS));
+}
+
+int
+clusterfree(pmp, cluster, oldcnp)
+ struct msdosfsmount *pmp;
+ u_long cluster;
+ u_long *oldcnp;
+{
+ int error;
+ u_long oldcn;
+
+ usemap_free(pmp, cluster);
+ error = fatentry(FAT_GET_AND_SET, pmp, cluster, &oldcn, MSDOSFSFREE);
+ if (error) {
+ usemap_alloc(pmp, cluster);
+ return (error);
+ }
+ /*
+ * If the cluster was successfully marked free, then update
+ * the count of free clusters, and turn off the "allocated"
+ * bit in the "in use" cluster bit map.
+ */
+ if (oldcnp)
+ *oldcnp = oldcn;
+ return (0);
+}
+
+/*
+ * Get or Set or 'Get and Set' the cluster'th entry in the fat.
+ *
+ * function - whether to get or set a fat entry
+ * pmp - address of the msdosfsmount structure for the filesystem
+ * whose fat is to be manipulated.
+ * cn - which cluster is of interest
+ * oldcontents - address of a word that is to receive the contents of the
+ * cluster'th entry if this is a get function
+ * newcontents - the new value to be written into the cluster'th element of
+ * the fat if this is a set function.
+ *
+ * This function can also be used to free a cluster by setting the fat entry
+ * for a cluster to 0.
+ *
+ * All copies of the fat are updated if this is a set function. NOTE: If
+ * fatentry() marks a cluster as free it does not update the inusemap in
+ * the msdosfsmount structure. This is left to the caller.
+ */
+int
+fatentry(function, pmp, cn, oldcontents, newcontents)
+ int function;
+ struct msdosfsmount *pmp;
+ u_long cn;
+ u_long *oldcontents;
+ u_long newcontents;
+{
+ int error;
+ u_long readcn;
+ u_long bn, bo, bsize, byteoffset;
+ struct buf *bp;
+
+#ifdef MSDOSFS_DEBUG
+ printf("fatentry(func %d, pmp %p, clust %lu, oldcon %p, newcon %lx)\n",
+ function, pmp, cn, oldcontents, newcontents);
+#endif
+
+#ifdef DIAGNOSTIC
+ /*
+ * Be sure they asked us to do something.
+ */
+ if ((function & (FAT_SET | FAT_GET)) == 0) {
+ printf("fatentry(): function code doesn't specify get or set\n");
+ return (EINVAL);
+ }
+
+ /*
+ * If they asked us to return a cluster number but didn't tell us
+ * where to put it, give them an error.
+ */
+ if ((function & FAT_GET) && oldcontents == NULL) {
+ printf("fatentry(): get function with no place to put result\n");
+ return (EINVAL);
+ }
+#endif
+
+ /*
+ * Be sure the requested cluster is in the filesystem.
+ */
+ if (cn < CLUST_FIRST || cn > pmp->pm_maxcluster)
+ return (EINVAL);
+
+ byteoffset = FATOFS(pmp, cn);
+ fatblock(pmp, byteoffset, &bn, &bsize, &bo);
+ error = bread(pmp->pm_devvp, bn, bsize, NOCRED, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+
+ if (function & FAT_GET) {
+ if (FAT32(pmp))
+ readcn = getulong(&bp->b_data[bo]);
+ else
+ readcn = getushort(&bp->b_data[bo]);
+ if (FAT12(pmp) & (cn & 1))
+ readcn >>= 4;
+ readcn &= pmp->pm_fatmask;
+ /* map reserved fat entries to same values for all fats */
+ if ((readcn | ~pmp->pm_fatmask) >= CLUST_RSRVD)
+ readcn |= ~pmp->pm_fatmask;
+ *oldcontents = readcn;
+ }
+ if (function & FAT_SET) {
+ switch (pmp->pm_fatmask) {
+ case FAT12_MASK:
+ readcn = getushort(&bp->b_data[bo]);
+ if (cn & 1) {
+ readcn &= 0x000f;
+ readcn |= newcontents << 4;
+ } else {
+ readcn &= 0xf000;
+ readcn |= newcontents & 0xfff;
+ }
+ putushort(&bp->b_data[bo], readcn);
+ break;
+ case FAT16_MASK:
+ putushort(&bp->b_data[bo], newcontents);
+ break;
+ case FAT32_MASK:
+ /*
+ * According to spec we have to retain the
+ * high order bits of the fat entry.
+ */
+ readcn = getulong(&bp->b_data[bo]);
+ readcn &= ~FAT32_MASK;
+ readcn |= newcontents & FAT32_MASK;
+ putulong(&bp->b_data[bo], readcn);
+ break;
+ }
+ updatefats(pmp, bp, bn);
+ bp = NULL;
+ pmp->pm_fmod = 1;
+ }
+ if (bp)
+ brelse(bp);
+ return (0);
+}
+
+/*
+ * Update a contiguous cluster chain
+ *
+ * pmp - mount point
+ * start - first cluster of chain
+ * count - number of clusters in chain
+ * fillwith - what to write into fat entry of last cluster
+ */
+static int
+fatchain(pmp, start, count, fillwith)
+ struct msdosfsmount *pmp;
+ u_long start;
+ u_long count;
+ u_long fillwith;
+{
+ int error;
+ u_long bn, bo, bsize, byteoffset, readcn, newc;
+ struct buf *bp;
+
+#ifdef MSDOSFS_DEBUG
+ printf("fatchain(pmp %p, start %lu, count %lu, fillwith %lx)\n",
+ pmp, start, count, fillwith);
+#endif
+ /*
+ * Be sure the clusters are in the filesystem.
+ */
+ if (start < CLUST_FIRST || start + count - 1 > pmp->pm_maxcluster)
+ return (EINVAL);
+
+ while (count > 0) {
+ byteoffset = FATOFS(pmp, start);
+ fatblock(pmp, byteoffset, &bn, &bsize, &bo);
+ error = bread(pmp->pm_devvp, bn, bsize, NOCRED, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ while (count > 0) {
+ start++;
+ newc = --count > 0 ? start : fillwith;
+ switch (pmp->pm_fatmask) {
+ case FAT12_MASK:
+ readcn = getushort(&bp->b_data[bo]);
+ if (start & 1) {
+ readcn &= 0xf000;
+ readcn |= newc & 0xfff;
+ } else {
+ readcn &= 0x000f;
+ readcn |= newc << 4;
+ }
+ putushort(&bp->b_data[bo], readcn);
+ bo++;
+ if (!(start & 1))
+ bo++;
+ break;
+ case FAT16_MASK:
+ putushort(&bp->b_data[bo], newc);
+ bo += 2;
+ break;
+ case FAT32_MASK:
+ readcn = getulong(&bp->b_data[bo]);
+ readcn &= ~pmp->pm_fatmask;
+ readcn |= newc & pmp->pm_fatmask;
+ putulong(&bp->b_data[bo], readcn);
+ bo += 4;
+ break;
+ }
+ if (bo >= bsize)
+ break;
+ }
+ updatefats(pmp, bp, bn);
+ }
+ pmp->pm_fmod = 1;
+ return (0);
+}
+
+/*
+ * Check the length of a free cluster chain starting at start.
+ *
+ * pmp - mount point
+ * start - start of chain
+ * count - maximum interesting length
+ */
+static int
+chainlength(pmp, start, count)
+ struct msdosfsmount *pmp;
+ u_long start;
+ u_long count;
+{
+ u_long idx, max_idx;
+ u_int map;
+ u_long len;
+
+ max_idx = pmp->pm_maxcluster / N_INUSEBITS;
+ idx = start / N_INUSEBITS;
+ start %= N_INUSEBITS;
+ map = pmp->pm_inusemap[idx];
+ map &= ~((1 << start) - 1);
+ if (map) {
+ len = ffs(map) - 1 - start;
+ return (len > count ? count : len);
+ }
+ len = N_INUSEBITS - start;
+ if (len >= count)
+ return (count);
+ while (++idx <= max_idx) {
+ if (len >= count)
+ break;
+ map = pmp->pm_inusemap[idx];
+ if (map) {
+ len += ffs(map) - 1;
+ break;
+ }
+ len += N_INUSEBITS;
+ }
+ return (len > count ? count : len);
+}
+
+/*
+ * Allocate contigous free clusters.
+ *
+ * pmp - mount point.
+ * start - start of cluster chain.
+ * count - number of clusters to allocate.
+ * fillwith - put this value into the fat entry for the
+ * last allocated cluster.
+ * retcluster - put the first allocated cluster's number here.
+ * got - how many clusters were actually allocated.
+ */
+static int
+chainalloc(pmp, start, count, fillwith, retcluster, got)
+ struct msdosfsmount *pmp;
+ u_long start;
+ u_long count;
+ u_long fillwith;
+ u_long *retcluster;
+ u_long *got;
+{
+ int error;
+ u_long cl, n;
+
+ for (cl = start, n = count; n-- > 0;)
+ usemap_alloc(pmp, cl++);
+
+ error = fatchain(pmp, start, count, fillwith);
+ if (error != 0)
+ return (error);
+#ifdef MSDOSFS_DEBUG
+ printf("clusteralloc(): allocated cluster chain at %lu (%lu clusters)\n",
+ start, count);
+#endif
+ if (retcluster)
+ *retcluster = start;
+ if (got)
+ *got = count;
+ return (0);
+}
+
+/*
+ * Allocate contiguous free clusters.
+ *
+ * pmp - mount point.
+ * start - preferred start of cluster chain.
+ * count - number of clusters requested.
+ * fillwith - put this value into the fat entry for the
+ * last allocated cluster.
+ * retcluster - put the first allocated cluster's number here.
+ * got - how many clusters were actually allocated.
+ */
+int
+clusteralloc(pmp, start, count, fillwith, retcluster, got)
+ struct msdosfsmount *pmp;
+ u_long start;
+ u_long count;
+ u_long fillwith;
+ u_long *retcluster;
+ u_long *got;
+{
+ u_long idx;
+ u_long len, newst, foundl, cn, l;
+ u_long foundcn = 0; /* XXX: foundcn could be used unititialized */
+ u_int map;
+
+#ifdef MSDOSFS_DEBUG
+ printf("clusteralloc(): find %lu clusters\n",count);
+#endif
+ if (start) {
+ if ((len = chainlength(pmp, start, count)) >= count)
+ return (chainalloc(pmp, start, count, fillwith, retcluster, got));
+ } else
+ len = 0;
+
+ /*
+ * Start at a (pseudo) random place to maximize cluster runs
+ * under multiple writers.
+ */
+ newst = random() % (pmp->pm_maxcluster + 1);
+ foundl = 0;
+
+ for (cn = newst; cn <= pmp->pm_maxcluster;) {
+ idx = cn / N_INUSEBITS;
+ map = pmp->pm_inusemap[idx];
+ map |= (1 << (cn % N_INUSEBITS)) - 1;
+ if (map != (u_int)-1) {
+ cn = idx * N_INUSEBITS + ffs(map^(u_int)-1) - 1;
+ if ((l = chainlength(pmp, cn, count)) >= count)
+ return (chainalloc(pmp, cn, count, fillwith, retcluster, got));
+ if (l > foundl) {
+ foundcn = cn;
+ foundl = l;
+ }
+ cn += l + 1;
+ continue;
+ }
+ cn += N_INUSEBITS - cn % N_INUSEBITS;
+ }
+ for (cn = 0; cn < newst;) {
+ idx = cn / N_INUSEBITS;
+ map = pmp->pm_inusemap[idx];
+ map |= (1 << (cn % N_INUSEBITS)) - 1;
+ if (map != (u_int)-1) {
+ cn = idx * N_INUSEBITS + ffs(map^(u_int)-1) - 1;
+ if ((l = chainlength(pmp, cn, count)) >= count)
+ return (chainalloc(pmp, cn, count, fillwith, retcluster, got));
+ if (l > foundl) {
+ foundcn = cn;
+ foundl = l;
+ }
+ cn += l + 1;
+ continue;
+ }
+ cn += N_INUSEBITS - cn % N_INUSEBITS;
+ }
+
+ if (!foundl)
+ return (ENOSPC);
+
+ if (len)
+ return (chainalloc(pmp, start, len, fillwith, retcluster, got));
+ else
+ return (chainalloc(pmp, foundcn, foundl, fillwith, retcluster, got));
+}
+
+
+/*
+ * Free a chain of clusters.
+ *
+ * pmp - address of the msdosfs mount structure for the filesystem
+ * containing the cluster chain to be freed.
+ * startcluster - number of the 1st cluster in the chain of clusters to be
+ * freed.
+ */
+int
+freeclusterchain(pmp, cluster)
+ struct msdosfsmount *pmp;
+ u_long cluster;
+{
+ int error;
+ struct buf *bp = NULL;
+ u_long bn, bo, bsize, byteoffset;
+ u_long readcn, lbn = -1;
+
+ while (cluster >= CLUST_FIRST && cluster <= pmp->pm_maxcluster) {
+ byteoffset = FATOFS(pmp, cluster);
+ fatblock(pmp, byteoffset, &bn, &bsize, &bo);
+ if (lbn != bn) {
+ if (bp)
+ updatefats(pmp, bp, lbn);
+ error = bread(pmp->pm_devvp, bn, bsize, NOCRED, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ lbn = bn;
+ }
+ usemap_free(pmp, cluster);
+ switch (pmp->pm_fatmask) {
+ case FAT12_MASK:
+ readcn = getushort(&bp->b_data[bo]);
+ if (cluster & 1) {
+ cluster = readcn >> 4;
+ readcn &= 0x000f;
+ readcn |= MSDOSFSFREE << 4;
+ } else {
+ cluster = readcn;
+ readcn &= 0xf000;
+ readcn |= MSDOSFSFREE & 0xfff;
+ }
+ putushort(&bp->b_data[bo], readcn);
+ break;
+ case FAT16_MASK:
+ cluster = getushort(&bp->b_data[bo]);
+ putushort(&bp->b_data[bo], MSDOSFSFREE);
+ break;
+ case FAT32_MASK:
+ cluster = getulong(&bp->b_data[bo]);
+ putulong(&bp->b_data[bo],
+ (MSDOSFSFREE & FAT32_MASK) | (cluster & ~FAT32_MASK));
+ break;
+ }
+ cluster &= pmp->pm_fatmask;
+ if ((cluster | ~pmp->pm_fatmask) >= CLUST_RSRVD)
+ cluster |= pmp->pm_fatmask;
+ }
+ if (bp)
+ updatefats(pmp, bp, bn);
+ return (0);
+}
+
+/*
+ * Read in fat blocks looking for free clusters. For every free cluster
+ * found turn off its corresponding bit in the pm_inusemap.
+ */
+int
+fillinusemap(pmp)
+ struct msdosfsmount *pmp;
+{
+ struct buf *bp = NULL;
+ u_long cn, readcn;
+ int error;
+ u_long bn, bo, bsize, byteoffset;
+
+ /*
+ * Mark all clusters in use, we mark the free ones in the fat scan
+ * loop further down.
+ */
+ for (cn = 0; cn < (pmp->pm_maxcluster + N_INUSEBITS) / N_INUSEBITS; cn++)
+ pmp->pm_inusemap[cn] = (u_int)-1;
+
+ /*
+ * Figure how many free clusters are in the filesystem by ripping
+ * through the fat counting the number of entries whose content is
+ * zero. These represent free clusters.
+ */
+ pmp->pm_freeclustercount = 0;
+ for (cn = CLUST_FIRST; cn <= pmp->pm_maxcluster; cn++) {
+ byteoffset = FATOFS(pmp, cn);
+ bo = byteoffset % pmp->pm_fatblocksize;
+ if (!bo || !bp) {
+ /* Read new FAT block */
+ if (bp)
+ brelse(bp);
+ fatblock(pmp, byteoffset, &bn, &bsize, NULL);
+ error = bread(pmp->pm_devvp, bn, bsize, NOCRED, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ }
+ if (FAT32(pmp))
+ readcn = getulong(&bp->b_data[bo]);
+ else
+ readcn = getushort(&bp->b_data[bo]);
+ if (FAT12(pmp) && (cn & 1))
+ readcn >>= 4;
+ readcn &= pmp->pm_fatmask;
+
+ if (readcn == 0)
+ usemap_free(pmp, cn);
+ }
+ brelse(bp);
+ return (0);
+}
+
+/*
+ * Allocate a new cluster and chain it onto the end of the file.
+ *
+ * dep - the file to extend
+ * count - number of clusters to allocate
+ * bpp - where to return the address of the buf header for the first new
+ * file block
+ * ncp - where to put cluster number of the first newly allocated cluster
+ * If this pointer is 0, do not return the cluster number.
+ * flags - see fat.h
+ *
+ * NOTE: This function is not responsible for turning on the DE_UPDATE bit of
+ * the de_flag field of the denode and it does not change the de_FileSize
+ * field. This is left for the caller to do.
+ */
+int
+extendfile(dep, count, bpp, ncp, flags)
+ struct denode *dep;
+ u_long count;
+ struct buf **bpp;
+ u_long *ncp;
+ int flags;
+{
+ int error;
+ u_long frcn;
+ u_long cn, got;
+ struct msdosfsmount *pmp = dep->de_pmp;
+ struct buf *bp;
+
+ /*
+ * Don't try to extend the root directory
+ */
+ if (dep->de_StartCluster == MSDOSFSROOT
+ && (dep->de_Attributes & ATTR_DIRECTORY)) {
+ printf("extendfile(): attempt to extend root directory\n");
+ return (ENOSPC);
+ }
+
+ /*
+ * If the "file's last cluster" cache entry is empty, and the file
+ * is not empty, then fill the cache entry by calling pcbmap().
+ */
+ fc_fileextends++;
+ if (dep->de_fc[FC_LASTFC].fc_frcn == FCE_EMPTY &&
+ dep->de_StartCluster != 0) {
+ fc_lfcempty++;
+ error = pcbmap(dep, 0xffff, 0, &cn, 0);
+ /* we expect it to return E2BIG */
+ if (error != E2BIG)
+ return (error);
+ }
+
+ while (count > 0) {
+ /*
+ * Allocate a new cluster chain and cat onto the end of the
+ * file. * If the file is empty we make de_StartCluster point
+ * to the new block. Note that de_StartCluster being 0 is
+ * sufficient to be sure the file is empty since we exclude
+ * attempts to extend the root directory above, and the root
+ * dir is the only file with a startcluster of 0 that has
+ * blocks allocated (sort of).
+ */
+ if (dep->de_StartCluster == 0)
+ cn = 0;
+ else
+ cn = dep->de_fc[FC_LASTFC].fc_fsrcn + 1;
+ error = clusteralloc(pmp, cn, count, CLUST_EOFE, &cn, &got);
+ if (error)
+ return (error);
+
+ count -= got;
+
+ /*
+ * Give them the filesystem relative cluster number if they want
+ * it.
+ */
+ if (ncp) {
+ *ncp = cn;
+ ncp = NULL;
+ }
+
+ if (dep->de_StartCluster == 0) {
+ dep->de_StartCluster = cn;
+ frcn = 0;
+ } else {
+ error = fatentry(FAT_SET, pmp,
+ dep->de_fc[FC_LASTFC].fc_fsrcn,
+ 0, cn);
+ if (error) {
+ clusterfree(pmp, cn, NULL);
+ return (error);
+ }
+ frcn = dep->de_fc[FC_LASTFC].fc_frcn + 1;
+ }
+
+ /*
+ * Update the "last cluster of the file" entry in the denode's fat
+ * cache.
+ */
+ fc_setcache(dep, FC_LASTFC, frcn + got - 1, cn + got - 1);
+
+ if (flags & DE_CLEAR) {
+ while (got-- > 0) {
+ /*
+ * Get the buf header for the new block of the file.
+ */
+ if (dep->de_Attributes & ATTR_DIRECTORY)
+ bp = getblk(pmp->pm_devvp, cntobn(pmp, cn++),
+ pmp->pm_bpcluster, 0, 0);
+ else {
+ bp = getblk(DETOV(dep), de_cn2bn(pmp, frcn++),
+ pmp->pm_bpcluster, 0, 0);
+ /*
+ * Do the bmap now, as in msdosfs_write
+ */
+ if (pcbmap(dep,
+ de_bn2cn(pmp, bp->b_lblkno),
+ &bp->b_blkno, 0, 0))
+ bp->b_blkno = -1;
+ if (bp->b_blkno == -1)
+ panic("extendfile: pcbmap");
+ }
+ clrbuf(bp);
+ if (bpp) {
+ *bpp = bp;
+ bpp = NULL;
+ } else
+ bdwrite(bp);
+ }
+ }
+ }
+
+ return (0);
+}
diff --git a/sys/fs/msdosfs/msdosfs_lookup.c b/sys/fs/msdosfs/msdosfs_lookup.c
new file mode 100644
index 0000000..87de1f2
--- /dev/null
+++ b/sys/fs/msdosfs/msdosfs_lookup.c
@@ -0,0 +1,1085 @@
+/* $Id: msdosfs_lookup.c,v 1.27 1998/12/07 21:58:35 archie Exp $ */
+/* $NetBSD: msdosfs_lookup.c,v 1.37 1997/11/17 15:36:54 ws Exp $ */
+
+/*-
+ * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
+ * Copyright (C) 1994, 1995, 1997 TooLs GmbH.
+ * All rights reserved.
+ * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by TooLs GmbH.
+ * 4. The name of TooLs GmbH may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/*
+ * Written by Paul Popelka (paulp@uts.amdahl.com)
+ *
+ * You can do anything you want with this software, just don't say you wrote
+ * it, and don't remove this notice.
+ *
+ * This software is provided "as is".
+ *
+ * The author supplies this software to be publicly redistributed on the
+ * understanding that the author is not responsible for the correct
+ * functioning of this software in any circumstances and is not liable for
+ * any damages caused by this software.
+ *
+ * October 1992
+ */
+
+#include <sys/param.h>
+#include <sys/namei.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/systm.h>
+
+#include <msdosfs/bpb.h>
+#include <msdosfs/direntry.h>
+#include <msdosfs/denode.h>
+#include <msdosfs/msdosfsmount.h>
+#include <msdosfs/fat.h>
+
+/*
+ * When we search a directory the blocks containing directory entries are
+ * read and examined. The directory entries contain information that would
+ * normally be in the inode of a unix filesystem. This means that some of
+ * a directory's contents may also be in memory resident denodes (sort of
+ * an inode). This can cause problems if we are searching while some other
+ * process is modifying a directory. To prevent one process from accessing
+ * incompletely modified directory information we depend upon being the
+ * sole owner of a directory block. bread/brelse provide this service.
+ * This being the case, when a process modifies a directory it must first
+ * acquire the disk block that contains the directory entry to be modified.
+ * Then update the disk block and the denode, and then write the disk block
+ * out to disk. This way disk blocks containing directory entries and in
+ * memory denode's will be in synch.
+ */
+int
+msdosfs_lookup(ap)
+ struct vop_cachedlookup_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ struct vnode *vdp = ap->a_dvp;
+ struct vnode **vpp = ap->a_vpp;
+ struct componentname *cnp = ap->a_cnp;
+ daddr_t bn;
+ int error;
+ int lockparent;
+ int wantparent;
+ int slotcount;
+ int slotoffset = 0;
+ int frcn;
+ u_long cluster;
+ int blkoff;
+ int diroff;
+ int blsize;
+ int isadir; /* ~0 if found direntry is a directory */
+ u_long scn; /* starting cluster number */
+ struct vnode *pdp;
+ struct denode *dp;
+ struct denode *tdp;
+ struct msdosfsmount *pmp;
+ struct buf *bp = 0;
+ struct direntry *dep = NULL;
+ u_char dosfilename[12];
+ int flags = cnp->cn_flags;
+ int nameiop = cnp->cn_nameiop;
+ struct proc *p = cnp->cn_proc;
+ int unlen;
+
+ int wincnt = 1;
+ int chksum = -1;
+ int olddos = 1;
+
+#ifdef MSDOSFS_DEBUG
+ printf("msdosfs_lookup(): looking for %s\n", cnp->cn_nameptr);
+#endif
+ dp = VTODE(vdp);
+ pmp = dp->de_pmp;
+ *vpp = NULL;
+ lockparent = flags & LOCKPARENT;
+ wantparent = flags & (LOCKPARENT | WANTPARENT);
+#ifdef MSDOSFS_DEBUG
+ printf("msdosfs_lookup(): vdp %p, dp %p, Attr %02x\n",
+ vdp, dp, dp->de_Attributes);
+#endif
+
+ /*
+ * If they are going after the . or .. entry in the root directory,
+ * they won't find it. DOS filesystems don't have them in the root
+ * directory. So, we fake it. deget() is in on this scam too.
+ */
+ if ((vdp->v_flag & VROOT) && cnp->cn_nameptr[0] == '.' &&
+ (cnp->cn_namelen == 1 ||
+ (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.'))) {
+ isadir = ATTR_DIRECTORY;
+ scn = MSDOSFSROOT;
+#ifdef MSDOSFS_DEBUG
+ printf("msdosfs_lookup(): looking for . or .. in root directory\n");
+#endif
+ cluster = MSDOSFSROOT;
+ blkoff = MSDOSFSROOT_OFS;
+ goto foundroot;
+ }
+
+ switch (unix2dosfn((const u_char *)cnp->cn_nameptr, dosfilename,
+ cnp->cn_namelen, 0,
+ pmp->pm_flags & MSDOSFSMNT_U2WTABLE, pmp->pm_u2d,
+ pmp->pm_flags & MSDOSFSMNT_ULTABLE, pmp->pm_lu)) {
+ case 0:
+ return (EINVAL);
+ case 1:
+ break;
+ case 2:
+ wincnt = winSlotCnt((const u_char *)cnp->cn_nameptr,
+ cnp->cn_namelen) + 1;
+ break;
+ case 3:
+ olddos = 0;
+ wincnt = winSlotCnt((const u_char *)cnp->cn_nameptr,
+ cnp->cn_namelen) + 1;
+ break;
+ }
+ if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME) {
+ wincnt = 1;
+ olddos = 1;
+ }
+ unlen = winLenFixup(cnp->cn_nameptr, cnp->cn_namelen);
+
+ /*
+ * Suppress search for slots unless creating
+ * file and at end of pathname, in which case
+ * we watch for a place to put the new file in
+ * case it doesn't already exist.
+ */
+ slotcount = wincnt;
+ if ((nameiop == CREATE || nameiop == RENAME) &&
+ (flags & ISLASTCN))
+ slotcount = 0;
+
+#ifdef MSDOSFS_DEBUG
+ printf("msdosfs_lookup(): dos version of filename %s, length %ld\n",
+ dosfilename, cnp->cn_namelen);
+#endif
+ /*
+ * Search the directory pointed at by vdp for the name pointed at
+ * by cnp->cn_nameptr.
+ */
+ tdp = NULL;
+ /*
+ * The outer loop ranges over the clusters that make up the
+ * directory. Note that the root directory is different from all
+ * other directories. It has a fixed number of blocks that are not
+ * part of the pool of allocatable clusters. So, we treat it a
+ * little differently. The root directory starts at "cluster" 0.
+ */
+ diroff = 0;
+ for (frcn = 0;; frcn++) {
+ error = pcbmap(dp, frcn, &bn, &cluster, &blsize);
+ if (error) {
+ if (error == E2BIG)
+ break;
+ return (error);
+ }
+ error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ for (blkoff = 0; blkoff < blsize;
+ blkoff += sizeof(struct direntry),
+ diroff += sizeof(struct direntry)) {
+ dep = (struct direntry *)(bp->b_data + blkoff);
+ /*
+ * If the slot is empty and we are still looking
+ * for an empty then remember this one. If the
+ * slot is not empty then check to see if it
+ * matches what we are looking for. If the slot
+ * has never been filled with anything, then the
+ * remainder of the directory has never been used,
+ * so there is no point in searching it.
+ */
+ if (dep->deName[0] == SLOT_EMPTY ||
+ dep->deName[0] == SLOT_DELETED) {
+ /*
+ * Drop memory of previous long matches
+ */
+ chksum = -1;
+
+ if (slotcount < wincnt) {
+ slotcount++;
+ slotoffset = diroff;
+ }
+ if (dep->deName[0] == SLOT_EMPTY) {
+ brelse(bp);
+ goto notfound;
+ }
+ } else {
+ /*
+ * If there wasn't enough space for our winentries,
+ * forget about the empty space
+ */
+ if (slotcount < wincnt)
+ slotcount = 0;
+
+ /*
+ * Check for Win95 long filename entry
+ */
+ if (dep->deAttributes == ATTR_WIN95) {
+ if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME)
+ continue;
+
+ chksum = winChkName((const u_char *)cnp->cn_nameptr,
+ unlen,
+ (struct winentry *)dep,
+ chksum,
+ pmp->pm_flags & MSDOSFSMNT_U2WTABLE,
+ pmp->pm_u2w,
+ pmp->pm_flags & MSDOSFSMNT_ULTABLE,
+ pmp->pm_ul);
+ continue;
+ }
+
+ /*
+ * Ignore volume labels (anywhere, not just
+ * the root directory).
+ */
+ if (dep->deAttributes & ATTR_VOLUME) {
+ chksum = -1;
+ continue;
+ }
+
+ /*
+ * Check for a checksum or name match
+ */
+ if (chksum != winChksum(dep->deName)
+ && (!olddos || bcmp(dosfilename, dep->deName, 11))) {
+ chksum = -1;
+ continue;
+ }
+#ifdef MSDOSFS_DEBUG
+ printf("msdosfs_lookup(): match blkoff %d, diroff %d\n",
+ blkoff, diroff);
+#endif
+ /*
+ * Remember where this directory
+ * entry came from for whoever did
+ * this lookup.
+ */
+ dp->de_fndoffset = diroff;
+ dp->de_fndcnt = wincnt - 1;
+
+ goto found;
+ }
+ } /* for (blkoff = 0; .... */
+ /*
+ * Release the buffer holding the directory cluster just
+ * searched.
+ */
+ brelse(bp);
+ } /* for (frcn = 0; ; frcn++) */
+
+notfound:
+ /*
+ * We hold no disk buffers at this point.
+ */
+
+ /*
+ * Fixup the slot description to point to the place where
+ * we might put the new DOS direntry (putting the Win95
+ * long name entries before that)
+ */
+ if (!slotcount) {
+ slotcount = 1;
+ slotoffset = diroff;
+ }
+ if (wincnt > slotcount)
+ slotoffset += sizeof(struct direntry) * (wincnt - slotcount);
+
+ /*
+ * If we get here we didn't find the entry we were looking for. But
+ * that's ok if we are creating or renaming and are at the end of
+ * the pathname and the directory hasn't been removed.
+ */
+#ifdef MSDOSFS_DEBUG
+ printf("msdosfs_lookup(): op %d, refcnt %ld\n",
+ nameiop, dp->de_refcnt);
+ printf(" slotcount %d, slotoffset %d\n",
+ slotcount, slotoffset);
+#endif
+ if ((nameiop == CREATE || nameiop == RENAME) &&
+ (flags & ISLASTCN) && dp->de_refcnt != 0) {
+ /*
+ * Access for write is interpreted as allowing
+ * creation of files in the directory.
+ */
+ error = VOP_ACCESS(vdp, VWRITE, cnp->cn_cred, cnp->cn_proc);
+ if (error)
+ return (error);
+ /*
+ * Return an indication of where the new directory
+ * entry should be put.
+ */
+ dp->de_fndoffset = slotoffset;
+ dp->de_fndcnt = wincnt - 1;
+
+ /*
+ * We return with the directory locked, so that
+ * the parameters we set up above will still be
+ * valid if we actually decide to do a direnter().
+ * We return ni_vp == NULL to indicate that the entry
+ * does not currently exist; we leave a pointer to
+ * the (locked) directory inode in ndp->ni_dvp.
+ * The pathname buffer is saved so that the name
+ * can be obtained later.
+ *
+ * NB - if the directory is unlocked, then this
+ * information cannot be used.
+ */
+ cnp->cn_flags |= SAVENAME;
+ if (!lockparent)
+ VOP_UNLOCK(vdp, 0, p);
+ return (EJUSTRETURN);
+ }
+ /*
+ * Insert name into cache (as non-existent) if appropriate.
+ */
+ if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE)
+ cache_enter(vdp, *vpp, cnp);
+ return (ENOENT);
+
+found:
+ /*
+ * NOTE: We still have the buffer with matched directory entry at
+ * this point.
+ */
+ isadir = dep->deAttributes & ATTR_DIRECTORY;
+ scn = getushort(dep->deStartCluster);
+ if (FAT32(pmp)) {
+ scn |= getushort(dep->deHighClust) << 16;
+ if (scn == pmp->pm_rootdirblk) {
+ /*
+ * There should actually be 0 here.
+ * Just ignore the error.
+ */
+ scn = MSDOSFSROOT;
+ }
+ }
+
+ if (isadir) {
+ cluster = scn;
+ if (cluster == MSDOSFSROOT)
+ blkoff = MSDOSFSROOT_OFS;
+ else
+ blkoff = 0;
+ } else if (cluster == MSDOSFSROOT)
+ blkoff = diroff;
+
+ /*
+ * Now release buf to allow deget to read the entry again.
+ * Reserving it here and giving it to deget could result
+ * in a deadlock.
+ */
+ brelse(bp);
+ bp = 0;
+
+foundroot:
+ /*
+ * If we entered at foundroot, then we are looking for the . or ..
+ * entry of the filesystems root directory. isadir and scn were
+ * setup before jumping here. And, bp is already null.
+ */
+ if (FAT32(pmp) && scn == MSDOSFSROOT)
+ scn = pmp->pm_rootdirblk;
+
+ /*
+ * If deleting, and at end of pathname, return
+ * parameters which can be used to remove file.
+ * If the wantparent flag isn't set, we return only
+ * the directory (in ndp->ni_dvp), otherwise we go
+ * on and lock the inode, being careful with ".".
+ */
+ if (nameiop == DELETE && (flags & ISLASTCN)) {
+ /*
+ * Don't allow deleting the root.
+ */
+ if (blkoff == MSDOSFSROOT_OFS)
+ return EROFS; /* really? XXX */
+
+ /*
+ * Write access to directory required to delete files.
+ */
+ error = VOP_ACCESS(vdp, VWRITE, cnp->cn_cred, cnp->cn_proc);
+ if (error)
+ return (error);
+
+ /*
+ * Return pointer to current entry in dp->i_offset.
+ * Save directory inode pointer in ndp->ni_dvp for dirremove().
+ */
+ if (dp->de_StartCluster == scn && isadir) { /* "." */
+ VREF(vdp);
+ *vpp = vdp;
+ return (0);
+ }
+ error = deget(pmp, cluster, blkoff, &tdp);
+ if (error)
+ return (error);
+ *vpp = DETOV(tdp);
+ if (!lockparent)
+ VOP_UNLOCK(vdp, 0, p);
+ return (0);
+ }
+
+ /*
+ * If rewriting (RENAME), return the inode and the
+ * information required to rewrite the present directory
+ * Must get inode of directory entry to verify it's a
+ * regular file, or empty directory.
+ */
+ if (nameiop == RENAME && wantparent &&
+ (flags & ISLASTCN)) {
+ if (blkoff == MSDOSFSROOT_OFS)
+ return EROFS; /* really? XXX */
+
+ error = VOP_ACCESS(vdp, VWRITE, cnp->cn_cred, cnp->cn_proc);
+ if (error)
+ return (error);
+
+ /*
+ * Careful about locking second inode.
+ * This can only occur if the target is ".".
+ */
+ if (dp->de_StartCluster == scn && isadir)
+ return (EISDIR);
+
+ if ((error = deget(pmp, cluster, blkoff, &tdp)) != 0)
+ return (error);
+ *vpp = DETOV(tdp);
+ cnp->cn_flags |= SAVENAME;
+ if (!lockparent)
+ VOP_UNLOCK(vdp, 0, p);
+ return (0);
+ }
+
+ /*
+ * Step through the translation in the name. We do not `vput' the
+ * directory because we may need it again if a symbolic link
+ * is relative to the current directory. Instead we save it
+ * unlocked as "pdp". We must get the target inode before unlocking
+ * the directory to insure that the inode will not be removed
+ * before we get it. We prevent deadlock by always fetching
+ * inodes from the root, moving down the directory tree. Thus
+ * when following backward pointers ".." we must unlock the
+ * parent directory before getting the requested directory.
+ * There is a potential race condition here if both the current
+ * and parent directories are removed before the VFS_VGET for the
+ * inode associated with ".." returns. We hope that this occurs
+ * infrequently since we cannot avoid this race condition without
+ * implementing a sophisticated deadlock detection algorithm.
+ * Note also that this simple deadlock detection scheme will not
+ * work if the file system has any hard links other than ".."
+ * that point backwards in the directory structure.
+ */
+ pdp = vdp;
+ if (flags & ISDOTDOT) {
+ VOP_UNLOCK(pdp, 0, p);
+ error = deget(pmp, cluster, blkoff, &tdp);
+ if (error) {
+ vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, p);
+ return (error);
+ }
+ if (lockparent && (flags & ISLASTCN) &&
+ (error = vn_lock(pdp, LK_EXCLUSIVE, p))) {
+ vput(DETOV(tdp));
+ return (error);
+ }
+ *vpp = DETOV(tdp);
+ } else if (dp->de_StartCluster == scn && isadir) {
+ VREF(vdp); /* we want ourself, ie "." */
+ *vpp = vdp;
+ } else {
+ if ((error = deget(pmp, cluster, blkoff, &tdp)) != 0)
+ return (error);
+ if (!lockparent || !(flags & ISLASTCN))
+ VOP_UNLOCK(pdp, 0, p);
+ *vpp = DETOV(tdp);
+ }
+
+ /*
+ * Insert name into cache if appropriate.
+ */
+ if (cnp->cn_flags & MAKEENTRY)
+ cache_enter(vdp, *vpp, cnp);
+ return (0);
+}
+
+/*
+ * dep - directory entry to copy into the directory
+ * ddep - directory to add to
+ * depp - return the address of the denode for the created directory entry
+ * if depp != 0
+ * cnp - componentname needed for Win95 long filenames
+ */
+int
+createde(dep, ddep, depp, cnp)
+ struct denode *dep;
+ struct denode *ddep;
+ struct denode **depp;
+ struct componentname *cnp;
+{
+ int error;
+ u_long dirclust, diroffset;
+ struct direntry *ndep;
+ struct msdosfsmount *pmp = ddep->de_pmp;
+ struct buf *bp;
+ daddr_t bn;
+ int blsize;
+
+#ifdef MSDOSFS_DEBUG
+ printf("createde(dep %p, ddep %p, depp %p, cnp %p)\n",
+ dep, ddep, depp, cnp);
+#endif
+
+ /*
+ * If no space left in the directory then allocate another cluster
+ * and chain it onto the end of the file. There is one exception
+ * to this. That is, if the root directory has no more space it
+ * can NOT be expanded. extendfile() checks for and fails attempts
+ * to extend the root directory. We just return an error in that
+ * case.
+ */
+ if (ddep->de_fndoffset >= ddep->de_FileSize) {
+ diroffset = ddep->de_fndoffset + sizeof(struct direntry)
+ - ddep->de_FileSize;
+ dirclust = de_clcount(pmp, diroffset);
+ error = extendfile(ddep, dirclust, 0, 0, DE_CLEAR);
+ if (error) {
+ (void)detrunc(ddep, ddep->de_FileSize, 0, NOCRED, NULL);
+ return error;
+ }
+
+ /*
+ * Update the size of the directory
+ */
+ ddep->de_FileSize += de_cn2off(pmp, dirclust);
+ }
+
+ /*
+ * We just read in the cluster with space. Copy the new directory
+ * entry in. Then write it to disk. NOTE: DOS directories
+ * do not get smaller as clusters are emptied.
+ */
+ error = pcbmap(ddep, de_cluster(pmp, ddep->de_fndoffset),
+ &bn, &dirclust, &blsize);
+ if (error)
+ return error;
+ diroffset = ddep->de_fndoffset;
+ if (dirclust != MSDOSFSROOT)
+ diroffset &= pmp->pm_crbomask;
+ if ((error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp)) != 0) {
+ brelse(bp);
+ return error;
+ }
+ ndep = bptoep(pmp, bp, ddep->de_fndoffset);
+
+ DE_EXTERNALIZE(ndep, dep);
+
+ /*
+ * Now write the Win95 long name
+ */
+ if (ddep->de_fndcnt > 0) {
+ u_int8_t chksum = winChksum(ndep->deName);
+ const u_char *un = (const u_char *)cnp->cn_nameptr;
+ int unlen = cnp->cn_namelen;
+ int cnt = 1;
+
+ while (--ddep->de_fndcnt >= 0) {
+ if (!(ddep->de_fndoffset & pmp->pm_crbomask)) {
+ if ((error = bwrite(bp)) != 0)
+ return error;
+
+ ddep->de_fndoffset -= sizeof(struct direntry);
+ error = pcbmap(ddep,
+ de_cluster(pmp,
+ ddep->de_fndoffset),
+ &bn, 0, &blsize);
+ if (error)
+ return error;
+
+ error = bread(pmp->pm_devvp, bn, blsize,
+ NOCRED, &bp);
+ if (error) {
+ brelse(bp);
+ return error;
+ }
+ ndep = bptoep(pmp, bp, ddep->de_fndoffset);
+ } else {
+ ndep--;
+ ddep->de_fndoffset -= sizeof(struct direntry);
+ }
+ if (!unix2winfn(un, unlen, (struct winentry *)ndep,
+ cnt++, chksum,
+ pmp->pm_flags & MSDOSFSMNT_U2WTABLE,
+ pmp->pm_u2w))
+ break;
+ }
+ }
+
+ if ((error = bwrite(bp)) != 0)
+ return error;
+
+ /*
+ * If they want us to return with the denode gotten.
+ */
+ if (depp) {
+ if (dep->de_Attributes & ATTR_DIRECTORY) {
+ dirclust = dep->de_StartCluster;
+ if (FAT32(pmp) && dirclust == pmp->pm_rootdirblk)
+ dirclust = MSDOSFSROOT;
+ if (dirclust == MSDOSFSROOT)
+ diroffset = MSDOSFSROOT_OFS;
+ else
+ diroffset = 0;
+ }
+ return deget(pmp, dirclust, diroffset, depp);
+ }
+
+ return 0;
+}
+
+/*
+ * Be sure a directory is empty except for "." and "..". Return 1 if empty,
+ * return 0 if not empty or error.
+ */
+int
+dosdirempty(dep)
+ struct denode *dep;
+{
+ int blsize;
+ int error;
+ u_long cn;
+ daddr_t bn;
+ struct buf *bp;
+ struct msdosfsmount *pmp = dep->de_pmp;
+ struct direntry *dentp;
+
+ /*
+ * Since the filesize field in directory entries for a directory is
+ * zero, we just have to feel our way through the directory until
+ * we hit end of file.
+ */
+ for (cn = 0;; cn++) {
+ if ((error = pcbmap(dep, cn, &bn, 0, &blsize)) != 0) {
+ if (error == E2BIG)
+ return (1); /* it's empty */
+ return (0);
+ }
+ error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp);
+ if (error) {
+ brelse(bp);
+ return (0);
+ }
+ for (dentp = (struct direntry *)bp->b_data;
+ (char *)dentp < bp->b_data + blsize;
+ dentp++) {
+ if (dentp->deName[0] != SLOT_DELETED &&
+ (dentp->deAttributes & ATTR_VOLUME) == 0) {
+ /*
+ * In dos directories an entry whose name
+ * starts with SLOT_EMPTY (0) starts the
+ * beginning of the unused part of the
+ * directory, so we can just return that it
+ * is empty.
+ */
+ if (dentp->deName[0] == SLOT_EMPTY) {
+ brelse(bp);
+ return (1);
+ }
+ /*
+ * Any names other than "." and ".." in a
+ * directory mean it is not empty.
+ */
+ if (bcmp(dentp->deName, ". ", 11) &&
+ bcmp(dentp->deName, ".. ", 11)) {
+ brelse(bp);
+#ifdef MSDOSFS_DEBUG
+ printf("dosdirempty(): entry found %02x, %02x\n",
+ dentp->deName[0], dentp->deName[1]);
+#endif
+ return (0); /* not empty */
+ }
+ }
+ }
+ brelse(bp);
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Check to see if the directory described by target is in some
+ * subdirectory of source. This prevents something like the following from
+ * succeeding and leaving a bunch or files and directories orphaned. mv
+ * /a/b/c /a/b/c/d/e/f Where c and f are directories.
+ *
+ * source - the inode for /a/b/c
+ * target - the inode for /a/b/c/d/e/f
+ *
+ * Returns 0 if target is NOT a subdirectory of source.
+ * Otherwise returns a non-zero error number.
+ * The target inode is always unlocked on return.
+ */
+int
+doscheckpath(source, target)
+ struct denode *source;
+ struct denode *target;
+{
+ daddr_t scn;
+ struct msdosfsmount *pmp;
+ struct direntry *ep;
+ struct denode *dep;
+ struct buf *bp = NULL;
+ int error = 0;
+
+ dep = target;
+ if ((target->de_Attributes & ATTR_DIRECTORY) == 0 ||
+ (source->de_Attributes & ATTR_DIRECTORY) == 0) {
+ error = ENOTDIR;
+ goto out;
+ }
+ if (dep->de_StartCluster == source->de_StartCluster) {
+ error = EEXIST;
+ goto out;
+ }
+ if (dep->de_StartCluster == MSDOSFSROOT)
+ goto out;
+ pmp = dep->de_pmp;
+#ifdef DIAGNOSTIC
+ if (pmp != source->de_pmp)
+ panic("doscheckpath: source and target on different filesystems");
+#endif
+ if (FAT32(pmp) && dep->de_StartCluster == pmp->pm_rootdirblk)
+ goto out;
+
+ for (;;) {
+ if ((dep->de_Attributes & ATTR_DIRECTORY) == 0) {
+ error = ENOTDIR;
+ break;
+ }
+ scn = dep->de_StartCluster;
+ error = bread(pmp->pm_devvp, cntobn(pmp, scn),
+ pmp->pm_bpcluster, NOCRED, &bp);
+ if (error)
+ break;
+
+ ep = (struct direntry *) bp->b_data + 1;
+ if ((ep->deAttributes & ATTR_DIRECTORY) == 0 ||
+ bcmp(ep->deName, ".. ", 11) != 0) {
+ error = ENOTDIR;
+ break;
+ }
+ scn = getushort(ep->deStartCluster);
+ if (FAT32(pmp))
+ scn |= getushort(ep->deHighClust) << 16;
+
+ if (scn == source->de_StartCluster) {
+ error = EINVAL;
+ break;
+ }
+ if (scn == MSDOSFSROOT)
+ break;
+ if (FAT32(pmp) && scn == pmp->pm_rootdirblk) {
+ /*
+ * scn should be 0 in this case,
+ * but we silently ignore the error.
+ */
+ break;
+ }
+
+ vput(DETOV(dep));
+ brelse(bp);
+ bp = NULL;
+ /* NOTE: deget() clears dep on error */
+ if ((error = deget(pmp, scn, 0, &dep)) != 0)
+ break;
+ }
+out:;
+ if (bp)
+ brelse(bp);
+ if (error == ENOTDIR)
+ printf("doscheckpath(): .. not a directory?\n");
+ if (dep != NULL)
+ vput(DETOV(dep));
+ return (error);
+}
+
+/*
+ * Read in the disk block containing the directory entry (dirclu, dirofs)
+ * and return the address of the buf header, and the address of the
+ * directory entry within the block.
+ */
+int
+readep(pmp, dirclust, diroffset, bpp, epp)
+ struct msdosfsmount *pmp;
+ u_long dirclust, diroffset;
+ struct buf **bpp;
+ struct direntry **epp;
+{
+ int error;
+ daddr_t bn;
+ int blsize;
+
+ blsize = pmp->pm_bpcluster;
+ if (dirclust == MSDOSFSROOT
+ && de_blk(pmp, diroffset + blsize) > pmp->pm_rootdirsize)
+ blsize = de_bn2off(pmp, pmp->pm_rootdirsize) & pmp->pm_crbomask;
+ bn = detobn(pmp, dirclust, diroffset);
+ if ((error = bread(pmp->pm_devvp, bn, blsize, NOCRED, bpp)) != 0) {
+ brelse(*bpp);
+ *bpp = NULL;
+ return (error);
+ }
+ if (epp)
+ *epp = bptoep(pmp, *bpp, diroffset);
+ return (0);
+}
+
+/*
+ * Read in the disk block containing the directory entry dep came from and
+ * return the address of the buf header, and the address of the directory
+ * entry within the block.
+ */
+int
+readde(dep, bpp, epp)
+ struct denode *dep;
+ struct buf **bpp;
+ struct direntry **epp;
+{
+
+ return (readep(dep->de_pmp, dep->de_dirclust, dep->de_diroffset,
+ bpp, epp));
+}
+
+/*
+ * Remove a directory entry. At this point the file represented by the
+ * directory entry to be removed is still full length until noone has it
+ * open. When the file no longer being used msdosfs_inactive() is called
+ * and will truncate the file to 0 length. When the vnode containing the
+ * denode is needed for some other purpose by VFS it will call
+ * msdosfs_reclaim() which will remove the denode from the denode cache.
+ */
+int
+removede(pdep, dep)
+ struct denode *pdep; /* directory where the entry is removed */
+ struct denode *dep; /* file to be removed */
+{
+ int error;
+ struct direntry *ep;
+ struct buf *bp;
+ daddr_t bn;
+ int blsize;
+ struct msdosfsmount *pmp = pdep->de_pmp;
+ u_long offset = pdep->de_fndoffset;
+
+#ifdef MSDOSFS_DEBUG
+ printf("removede(): filename %s, dep %p, offset %08lx\n",
+ dep->de_Name, dep, offset);
+#endif
+
+ dep->de_refcnt--;
+ offset += sizeof(struct direntry);
+ do {
+ offset -= sizeof(struct direntry);
+ error = pcbmap(pdep, de_cluster(pmp, offset), &bn, 0, &blsize);
+ if (error)
+ return error;
+ error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp);
+ if (error) {
+ brelse(bp);
+ return error;
+ }
+ ep = bptoep(pmp, bp, offset);
+ /*
+ * Check whether, if we came here the second time, i.e.
+ * when underflowing into the previous block, the last
+ * entry in this block is a longfilename entry, too.
+ */
+ if (ep->deAttributes != ATTR_WIN95
+ && offset != pdep->de_fndoffset) {
+ brelse(bp);
+ break;
+ }
+ offset += sizeof(struct direntry);
+ while (1) {
+ /*
+ * We are a bit agressive here in that we delete any Win95
+ * entries preceding this entry, not just the ones we "own".
+ * Since these presumably aren't valid anyway,
+ * there should be no harm.
+ */
+ offset -= sizeof(struct direntry);
+ ep--->deName[0] = SLOT_DELETED;
+ if ((pmp->pm_flags & MSDOSFSMNT_NOWIN95)
+ || !(offset & pmp->pm_crbomask)
+ || ep->deAttributes != ATTR_WIN95)
+ break;
+ }
+ if ((error = bwrite(bp)) != 0)
+ return error;
+ } while (!(pmp->pm_flags & MSDOSFSMNT_NOWIN95)
+ && !(offset & pmp->pm_crbomask)
+ && offset);
+ return 0;
+}
+
+/*
+ * Create a unique DOS name in dvp
+ */
+int
+uniqdosname(dep, cnp, cp)
+ struct denode *dep;
+ struct componentname *cnp;
+ u_char *cp;
+{
+ struct msdosfsmount *pmp = dep->de_pmp;
+ struct direntry *dentp;
+ int gen;
+ int blsize;
+ u_long cn;
+ daddr_t bn;
+ struct buf *bp;
+ int error;
+
+ if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME)
+ return (unix2dosfn((const u_char *)cnp->cn_nameptr, cp,
+ cnp->cn_namelen, 0,
+ pmp->pm_flags & MSDOSFSMNT_U2WTABLE, pmp->pm_u2d,
+ pmp->pm_flags & MSDOSFSMNT_ULTABLE, pmp->pm_lu) ?
+ 0 : EINVAL);
+
+ for (gen = 1;; gen++) {
+ /*
+ * Generate DOS name with generation number
+ */
+ if (!unix2dosfn((const u_char *)cnp->cn_nameptr, cp,
+ cnp->cn_namelen, gen,
+ pmp->pm_flags & MSDOSFSMNT_U2WTABLE, pmp->pm_u2d,
+ pmp->pm_flags & MSDOSFSMNT_ULTABLE, pmp->pm_lu))
+ return gen == 1 ? EINVAL : EEXIST;
+
+ /*
+ * Now look for a dir entry with this exact name
+ */
+ for (cn = error = 0; !error; cn++) {
+ if ((error = pcbmap(dep, cn, &bn, 0, &blsize)) != 0) {
+ if (error == E2BIG) /* EOF reached and not found */
+ return 0;
+ return error;
+ }
+ error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp);
+ if (error) {
+ brelse(bp);
+ return error;
+ }
+ for (dentp = (struct direntry *)bp->b_data;
+ (char *)dentp < bp->b_data + blsize;
+ dentp++) {
+ if (dentp->deName[0] == SLOT_EMPTY) {
+ /*
+ * Last used entry and not found
+ */
+ brelse(bp);
+ return 0;
+ }
+ /*
+ * Ignore volume labels and Win95 entries
+ */
+ if (dentp->deAttributes & ATTR_VOLUME)
+ continue;
+ if (!bcmp(dentp->deName, cp, 11)) {
+ error = EEXIST;
+ break;
+ }
+ }
+ brelse(bp);
+ }
+ }
+}
+
+/*
+ * Find any Win'95 long filename entry in directory dep
+ */
+int
+findwin95(dep)
+ struct denode *dep;
+{
+ struct msdosfsmount *pmp = dep->de_pmp;
+ struct direntry *dentp;
+ int blsize, win95;
+ u_long cn;
+ daddr_t bn;
+ struct buf *bp;
+
+ win95 = 1;
+ /*
+ * Read through the directory looking for Win'95 entries
+ * Note: Error currently handled just as EOF XXX
+ */
+ for (cn = 0;; cn++) {
+ if (pcbmap(dep, cn, &bn, 0, &blsize))
+ return (win95);
+ if (bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp)) {
+ brelse(bp);
+ return (win95);
+ }
+ for (dentp = (struct direntry *)bp->b_data;
+ (char *)dentp < bp->b_data + blsize;
+ dentp++) {
+ if (dentp->deName[0] == SLOT_EMPTY) {
+ /*
+ * Last used entry and not found
+ */
+ brelse(bp);
+ return (win95);
+ }
+ if (dentp->deName[0] == SLOT_DELETED) {
+ /*
+ * Ignore deleted files
+ * Note: might be an indication of Win'95 anyway XXX
+ */
+ continue;
+ }
+ if (dentp->deAttributes == ATTR_WIN95) {
+ brelse(bp);
+ return 1;
+ }
+ win95 = 0;
+ }
+ brelse(bp);
+ }
+}
diff --git a/sys/fs/msdosfs/msdosfs_vfsops.c b/sys/fs/msdosfs/msdosfs_vfsops.c
new file mode 100644
index 0000000..bca552c
--- /dev/null
+++ b/sys/fs/msdosfs/msdosfs_vfsops.c
@@ -0,0 +1,1019 @@
+/* $Id: msdosfs_vfsops.c,v 1.39 1998/12/07 21:58:35 archie Exp $ */
+/* $NetBSD: msdosfs_vfsops.c,v 1.51 1997/11/17 15:36:58 ws Exp $ */
+
+/*-
+ * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
+ * Copyright (C) 1994, 1995, 1997 TooLs GmbH.
+ * All rights reserved.
+ * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by TooLs GmbH.
+ * 4. The name of TooLs GmbH may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/*
+ * Written by Paul Popelka (paulp@uts.amdahl.com)
+ *
+ * You can do anything you want with this software, just don't say you wrote
+ * it, and don't remove this notice.
+ *
+ * This software is provided "as is".
+ *
+ * The author supplies this software to be publicly redistributed on the
+ * understanding that the author is not responsible for the correct
+ * functioning of this software in any circumstances and is not liable for
+ * any damages caused by this software.
+ *
+ * October 1992
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/conf.h>
+#include <sys/namei.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+#include <miscfs/specfs/specdev.h> /* XXX */ /* defines v_rdev */
+#include <sys/mount.h>
+#include <sys/buf.h>
+#include <sys/fcntl.h>
+#include <sys/malloc.h>
+#include <sys/stat.h> /* defines ALLPERMS */
+
+#include <msdosfs/bpb.h>
+#include <msdosfs/bootsect.h>
+#include <msdosfs/direntry.h>
+#include <msdosfs/denode.h>
+#include <msdosfs/msdosfsmount.h>
+#include <msdosfs/fat.h>
+
+MALLOC_DEFINE(M_MSDOSFSMNT, "MSDOSFS mount", "MSDOSFS mount structure");
+static MALLOC_DEFINE(M_MSDOSFSFAT, "MSDOSFS FAT", "MSDOSFS file allocation table");
+
+static int update_mp __P((struct mount *mp, struct msdosfs_args *argp));
+static int mountmsdosfs __P((struct vnode *devvp, struct mount *mp,
+ struct proc *p, struct msdosfs_args *argp));
+static int msdosfs_fhtovp __P((struct mount *, struct fid *,
+ struct sockaddr *, struct vnode **, int *,
+ struct ucred **));
+static int msdosfs_mount __P((struct mount *, char *, caddr_t,
+ struct nameidata *, struct proc *));
+static int msdosfs_quotactl __P((struct mount *, int, uid_t, caddr_t,
+ struct proc *));
+static int msdosfs_root __P((struct mount *, struct vnode **));
+static int msdosfs_start __P((struct mount *, int, struct proc *));
+static int msdosfs_statfs __P((struct mount *, struct statfs *,
+ struct proc *));
+static int msdosfs_sync __P((struct mount *, int, struct ucred *,
+ struct proc *));
+static int msdosfs_unmount __P((struct mount *, int, struct proc *));
+static int msdosfs_vget __P((struct mount *mp, ino_t ino,
+ struct vnode **vpp));
+static int msdosfs_vptofh __P((struct vnode *, struct fid *));
+
+static int
+update_mp(mp, argp)
+ struct mount *mp;
+ struct msdosfs_args *argp;
+{
+ struct msdosfsmount *pmp = VFSTOMSDOSFS(mp);
+ int error;
+
+ pmp->pm_gid = argp->gid;
+ pmp->pm_uid = argp->uid;
+ pmp->pm_mask = argp->mask & ALLPERMS;
+ pmp->pm_flags |= argp->flags & MSDOSFSMNT_MNTOPT;
+ if (pmp->pm_flags & MSDOSFSMNT_U2WTABLE) {
+ bcopy(argp->u2w, pmp->pm_u2w, sizeof(pmp->pm_u2w));
+ bcopy(argp->d2u, pmp->pm_d2u, sizeof(pmp->pm_d2u));
+ bcopy(argp->u2d, pmp->pm_u2d, sizeof(pmp->pm_u2d));
+ }
+ if (pmp->pm_flags & MSDOSFSMNT_ULTABLE) {
+ bcopy(argp->ul, pmp->pm_ul, sizeof(pmp->pm_ul));
+ bcopy(argp->lu, pmp->pm_lu, sizeof(pmp->pm_lu));
+ }
+
+#ifndef __FreeBSD__
+ /*
+ * GEMDOS knows nothing (yet) about win95
+ */
+ if (pmp->pm_flags & MSDOSFSMNT_GEMDOSFS)
+ pmp->pm_flags |= MSDOSFSMNT_NOWIN95;
+#endif
+
+ if (pmp->pm_flags & MSDOSFSMNT_NOWIN95)
+ pmp->pm_flags |= MSDOSFSMNT_SHORTNAME;
+ else if (!(pmp->pm_flags &
+ (MSDOSFSMNT_SHORTNAME | MSDOSFSMNT_LONGNAME))) {
+ struct vnode *rootvp;
+
+ /*
+ * Try to divine whether to support Win'95 long filenames
+ */
+ if (FAT32(pmp))
+ pmp->pm_flags |= MSDOSFSMNT_LONGNAME;
+ else {
+ if ((error = msdosfs_root(mp, &rootvp)) != 0)
+ return error;
+ pmp->pm_flags |= findwin95(VTODE(rootvp))
+ ? MSDOSFSMNT_LONGNAME
+ : MSDOSFSMNT_SHORTNAME;
+ vput(rootvp);
+ }
+ }
+ return 0;
+}
+
+#ifndef __FreeBSD__
+int
+msdosfs_mountroot()
+{
+ register struct mount *mp;
+ struct proc *p = curproc; /* XXX */
+ size_t size;
+ int error;
+ struct msdosfs_args args;
+
+ if (root_device->dv_class != DV_DISK)
+ return (ENODEV);
+
+ /*
+ * Get vnodes for swapdev and rootdev.
+ */
+ if (bdevvp(rootdev, &rootvp))
+ panic("msdosfs_mountroot: can't setup rootvp");
+
+ mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
+ bzero((char *)mp, (u_long)sizeof(struct mount));
+ mp->mnt_op = &msdosfs_vfsops;
+ mp->mnt_flag = 0;
+ LIST_INIT(&mp->mnt_vnodelist);
+
+ args.flags = 0;
+ args.uid = 0;
+ args.gid = 0;
+ args.mask = 0777;
+
+ if ((error = mountmsdosfs(rootvp, mp, p, &args)) != 0) {
+ free(mp, M_MOUNT);
+ return (error);
+ }
+
+ if ((error = update_mp(mp, &args)) != 0) {
+ (void)msdosfs_unmount(mp, 0, p);
+ free(mp, M_MOUNT);
+ return (error);
+ }
+
+ if ((error = vfs_lock(mp)) != 0) {
+ (void)msdosfs_unmount(mp, 0, p);
+ free(mp, M_MOUNT);
+ return (error);
+ }
+
+ CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list);
+ mp->mnt_vnodecovered = NULLVP;
+ (void) copystr("/", mp->mnt_stat.f_mntonname, MNAMELEN - 1,
+ &size);
+ bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+ (void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+ &size);
+ bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+ (void)msdosfs_statfs(mp, &mp->mnt_stat, p);
+ vfs_unlock(mp);
+ return (0);
+}
+#endif
+
+/*
+ * mp - path - addr in user space of mount point (ie /usr or whatever)
+ * data - addr in user space of mount params including the name of the block
+ * special file to treat as a filesystem.
+ */
+static int
+msdosfs_mount(mp, path, data, ndp, p)
+ struct mount *mp;
+ char *path;
+ caddr_t data;
+ struct nameidata *ndp;
+ struct proc *p;
+{
+ struct vnode *devvp; /* vnode for blk device to mount */
+ struct msdosfs_args args; /* will hold data from mount request */
+ /* msdosfs specific mount control block */
+ struct msdosfsmount *pmp = NULL;
+ size_t size;
+ int error, flags;
+ mode_t accessmode;
+
+ error = copyin(data, (caddr_t)&args, sizeof(struct msdosfs_args));
+ if (error)
+ return (error);
+ if (args.magic != MSDOSFS_ARGSMAGIC)
+ args.flags = 0;
+ /*
+ * If updating, check whether changing from read-only to
+ * read/write; if there is no device name, that's all we do.
+ */
+ if (mp->mnt_flag & MNT_UPDATE) {
+ pmp = VFSTOMSDOSFS(mp);
+ error = 0;
+ if (!(pmp->pm_flags & MSDOSFSMNT_RONLY) && (mp->mnt_flag & MNT_RDONLY)) {
+ flags = WRITECLOSE;
+ if (mp->mnt_flag & MNT_FORCE)
+ flags |= FORCECLOSE;
+ error = vflush(mp, NULLVP, flags);
+ }
+ if (!error && (mp->mnt_flag & MNT_RELOAD))
+ /* not yet implemented */
+ error = EOPNOTSUPP;
+ if (error)
+ return (error);
+ if ((pmp->pm_flags & MSDOSFSMNT_RONLY) && (mp->mnt_kern_flag & MNTK_WANTRDWR)) {
+ /*
+ * If upgrade to read-write by non-root, then verify
+ * that user has necessary permissions on the device.
+ */
+ if (p->p_ucred->cr_uid != 0) {
+ devvp = pmp->pm_devvp;
+ vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
+ error = VOP_ACCESS(devvp, VREAD | VWRITE,
+ p->p_ucred, p);
+ if (error) {
+ VOP_UNLOCK(devvp, 0, p);
+ return (error);
+ }
+ VOP_UNLOCK(devvp, 0, p);
+ }
+ pmp->pm_flags &= ~MSDOSFSMNT_RONLY;
+ }
+ if (args.fspec == 0) {
+#ifdef __notyet__ /* doesn't work correctly with current mountd XXX */
+ if (args.flags & MSDOSFSMNT_MNTOPT) {
+ pmp->pm_flags &= ~MSDOSFSMNT_MNTOPT;
+ pmp->pm_flags |= args.flags & MSDOSFSMNT_MNTOPT;
+ if (pmp->pm_flags & MSDOSFSMNT_NOWIN95)
+ pmp->pm_flags |= MSDOSFSMNT_SHORTNAME;
+ }
+#endif
+ /*
+ * Process export requests.
+ */
+ return (vfs_export(mp, &pmp->pm_export, &args.export));
+ }
+ }
+ /*
+ * Not an update, or updating the name: look up the name
+ * and verify that it refers to a sensible block device.
+ */
+ NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
+ error = namei(ndp);
+ if (error)
+ return (error);
+ devvp = ndp->ni_vp;
+
+ if (devvp->v_type != VBLK) {
+ vrele(devvp);
+ return (ENOTBLK);
+ }
+ if (major(devvp->v_rdev) >= nblkdev ||
+ bdevsw[major(devvp->v_rdev)] == NULL) {
+ vrele(devvp);
+ return (ENXIO);
+ }
+ /*
+ * If mount by non-root, then verify that user has necessary
+ * permissions on the device.
+ */
+ if (p->p_ucred->cr_uid != 0) {
+ accessmode = VREAD;
+ if ((mp->mnt_flag & MNT_RDONLY) == 0)
+ accessmode |= VWRITE;
+ vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
+ error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p);
+ if (error) {
+ vput(devvp);
+ return (error);
+ }
+ VOP_UNLOCK(devvp, 0, p);
+ }
+ if ((mp->mnt_flag & MNT_UPDATE) == 0) {
+ error = mountmsdosfs(devvp, mp, p, &args);
+#ifdef MSDOSFS_DEBUG /* only needed for the printf below */
+ pmp = VFSTOMSDOSFS(mp);
+#endif
+ } else {
+ if (devvp != pmp->pm_devvp)
+ error = EINVAL; /* XXX needs translation */
+ else
+ vrele(devvp);
+ }
+ if (error) {
+ vrele(devvp);
+ return (error);
+ }
+
+ error = update_mp(mp, &args);
+ if (error) {
+ msdosfs_unmount(mp, MNT_FORCE, p);
+ return error;
+ }
+
+ (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+ bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+ (void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+ &size);
+ bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+ (void) msdosfs_statfs(mp, &mp->mnt_stat, p);
+#ifdef MSDOSFS_DEBUG
+ printf("msdosfs_mount(): mp %p, pmp %p, inusemap %p\n", mp, pmp, pmp->pm_inusemap);
+#endif
+ return (0);
+}
+
+static int
+mountmsdosfs(devvp, mp, p, argp)
+ struct vnode *devvp;
+ struct mount *mp;
+ struct proc *p;
+ struct msdosfs_args *argp;
+{
+ struct msdosfsmount *pmp;
+ struct buf *bp;
+ dev_t dev = devvp->v_rdev;
+#ifndef __FreeBSD__
+ struct partinfo dpart;
+ int bsize = 0, dtype = 0, tmp;
+#endif
+ union bootsector *bsp;
+ struct byte_bpb33 *b33;
+ struct byte_bpb50 *b50;
+ struct byte_bpb710 *b710;
+ u_int8_t SecPerClust;
+ int ronly, error;
+
+ /*
+ * Disallow multiple mounts of the same device.
+ * Disallow mounting of a device that is currently in use
+ * (except for root, which might share swap device for miniroot).
+ * Flush out any old buffers remaining from a previous use.
+ */
+ error = vfs_mountedon(devvp);
+ if (error)
+ return (error);
+ if (vcount(devvp) > 1 && devvp != rootvp)
+ return (EBUSY);
+ vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p);
+ error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0);
+ VOP_UNLOCK(devvp, 0, p);
+ if (error)
+ return (error);
+
+ ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
+ error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p);
+ if (error)
+ return (error);
+
+ bp = NULL; /* both used in error_exit */
+ pmp = NULL;
+
+#ifndef __FreeBSD__
+ if (argp->flags & MSDOSFSMNT_GEMDOSFS) {
+ /*
+ * We need the disklabel to calculate the size of a FAT entry
+ * later on. Also make sure the partition contains a filesystem
+ * of type FS_MSDOS. This doesn't work for floppies, so we have
+ * to check for them too.
+ *
+ * At least some parts of the msdos fs driver seem to assume
+ * that the size of a disk block will always be 512 bytes.
+ * Let's check it...
+ */
+ error = VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart,
+ FREAD, NOCRED, p);
+ if (error)
+ goto error_exit;
+ tmp = dpart.part->p_fstype;
+ dtype = dpart.disklab->d_type;
+ bsize = dpart.disklab->d_secsize;
+ if (bsize != 512 || (dtype!=DTYPE_FLOPPY && tmp!=FS_MSDOS)) {
+ error = EINVAL;
+ goto error_exit;
+ }
+ }
+#endif
+
+ /*
+ * Read the boot sector of the filesystem, and then check the
+ * boot signature. If not a dos boot sector then error out.
+ */
+#ifdef PC98
+ error = bread(devvp, 0, 1024, NOCRED, &bp);
+#else
+ error = bread(devvp, 0, 512, NOCRED, &bp);
+#endif
+ if (error)
+ goto error_exit;
+ bp->b_flags |= B_AGE;
+ bsp = (union bootsector *)bp->b_data;
+ b33 = (struct byte_bpb33 *)bsp->bs33.bsBPB;
+ b50 = (struct byte_bpb50 *)bsp->bs50.bsBPB;
+ b710 = (struct byte_bpb710 *)bsp->bs710.bsPBP;
+
+#ifndef __FreeBSD__
+ if (!(argp->flags & MSDOSFSMNT_GEMDOSFS)) {
+#endif
+#ifdef PC98
+ if ((bsp->bs50.bsBootSectSig0 != BOOTSIG0
+ || bsp->bs50.bsBootSectSig1 != BOOTSIG1)
+ && (bsp->bs50.bsBootSectSig0 != 0 /* PC98 DOS 3.3x */
+ || bsp->bs50.bsBootSectSig1 != 0)
+ && (bsp->bs50.bsBootSectSig0 != 0x90 /* PC98 DOS 5.0 */
+ || bsp->bs50.bsBootSectSig1 != 0x3d)
+ && (bsp->bs50.bsBootSectSig0 != 0x46 /* PC98 DOS 3.3B */
+ || bsp->bs50.bsBootSectSig1 != 0xfa)) {
+#else
+ if (bsp->bs50.bsBootSectSig0 != BOOTSIG0
+ || bsp->bs50.bsBootSectSig1 != BOOTSIG1) {
+#endif
+ error = EINVAL;
+ printf("mountmsdosfs(): bad signature\n");
+ goto error_exit;
+ }
+#ifndef __FreeBSD__
+ }
+#endif
+
+ pmp = malloc(sizeof *pmp, M_MSDOSFSMNT, M_WAITOK);
+ bzero((caddr_t)pmp, sizeof *pmp);
+ pmp->pm_mountp = mp;
+
+ /*
+ * Compute several useful quantities from the bpb in the
+ * bootsector. Copy in the dos 5 variant of the bpb then fix up
+ * the fields that are different between dos 5 and dos 3.3.
+ */
+ SecPerClust = b50->bpbSecPerClust;
+ pmp->pm_BytesPerSec = getushort(b50->bpbBytesPerSec);
+ pmp->pm_ResSectors = getushort(b50->bpbResSectors);
+ pmp->pm_FATs = b50->bpbFATs;
+ pmp->pm_RootDirEnts = getushort(b50->bpbRootDirEnts);
+ pmp->pm_Sectors = getushort(b50->bpbSectors);
+ pmp->pm_FATsecs = getushort(b50->bpbFATsecs);
+ pmp->pm_SecPerTrack = getushort(b50->bpbSecPerTrack);
+ pmp->pm_Heads = getushort(b50->bpbHeads);
+ pmp->pm_Media = b50->bpbMedia;
+
+#ifndef __FreeBSD__
+ if (!(argp->flags & MSDOSFSMNT_GEMDOSFS)) {
+#endif
+ /* XXX - We should probably check more values here */
+ if (!pmp->pm_BytesPerSec || !SecPerClust
+ || !pmp->pm_Heads || pmp->pm_Heads > 255
+#ifdef PC98
+ || !pmp->pm_SecPerTrack || pmp->pm_SecPerTrack > 255) {
+#else
+ || !pmp->pm_SecPerTrack || pmp->pm_SecPerTrack > 63) {
+#endif
+ error = EINVAL;
+ printf("mountmsdosfs(): bad bpb\n");
+ goto error_exit;
+ }
+#ifndef __FreeBSD__
+ }
+#endif
+
+ if (pmp->pm_Sectors == 0) {
+ pmp->pm_HiddenSects = getulong(b50->bpbHiddenSecs);
+ pmp->pm_HugeSectors = getulong(b50->bpbHugeSectors);
+ } else {
+ pmp->pm_HiddenSects = getushort(b33->bpbHiddenSecs);
+ pmp->pm_HugeSectors = pmp->pm_Sectors;
+ }
+ if (pmp->pm_HugeSectors > 0xffffffff /
+ (pmp->pm_BytesPerSec / sizeof(struct direntry)) + 1) {
+ /*
+ * We cannot deal currently with this size of disk
+ * due to fileid limitations (see msdosfs_getattr and
+ * msdosfs_readdir)
+ */
+ error = EINVAL;
+ printf("mountmsdosfs(): disk too big, sorry\n");
+ goto error_exit;
+ }
+
+ if (pmp->pm_RootDirEnts == 0) {
+ if (bsp->bs710.bsBootSectSig2 != BOOTSIG2
+ || bsp->bs710.bsBootSectSig3 != BOOTSIG3
+ || pmp->pm_Sectors
+ || pmp->pm_FATsecs
+ || getushort(b710->bpbFSVers)) {
+ error = EINVAL;
+ printf("mountmsdosfs(): bad FAT32 filesystem\n");
+ goto error_exit;
+ }
+ pmp->pm_fatmask = FAT32_MASK;
+ pmp->pm_fatmult = 4;
+ pmp->pm_fatdiv = 1;
+ pmp->pm_FATsecs = getulong(b710->bpbBigFATsecs);
+ if (getushort(b710->bpbExtFlags) & FATMIRROR)
+ pmp->pm_curfat = getushort(b710->bpbExtFlags) & FATNUM;
+ else
+ pmp->pm_flags |= MSDOSFS_FATMIRROR;
+ } else
+ pmp->pm_flags |= MSDOSFS_FATMIRROR;
+
+#ifndef __FreeBSD__
+ if (argp->flags & MSDOSFSMNT_GEMDOSFS) {
+ if (FAT32(pmp)) {
+ /*
+ * GEMDOS doesn't know fat32.
+ */
+ error = EINVAL;
+ goto error_exit;
+ }
+
+ /*
+ * Check a few values (could do some more):
+ * - logical sector size: power of 2, >= block size
+ * - sectors per cluster: power of 2, >= 1
+ * - number of sectors: >= 1, <= size of partition
+ */
+ if ( (SecPerClust == 0)
+ || (SecPerClust & (SecPerClust - 1))
+ || (pmp->pm_BytesPerSec < bsize)
+ || (pmp->pm_BytesPerSec & (pmp->pm_BytesPerSec - 1))
+ || (pmp->pm_HugeSectors == 0)
+ || (pmp->pm_HugeSectors * (pmp->pm_BytesPerSec / bsize)
+ > dpart.part->p_size)
+ ) {
+ error = EINVAL;
+ goto error_exit;
+ }
+ /*
+ * XXX - Many parts of the msdos fs driver seem to assume that
+ * the number of bytes per logical sector (BytesPerSec) will
+ * always be the same as the number of bytes per disk block
+ * Let's pretend it is.
+ */
+ tmp = pmp->pm_BytesPerSec / bsize;
+ pmp->pm_BytesPerSec = bsize;
+ pmp->pm_HugeSectors *= tmp;
+ pmp->pm_HiddenSects *= tmp;
+ pmp->pm_ResSectors *= tmp;
+ pmp->pm_Sectors *= tmp;
+ pmp->pm_FATsecs *= tmp;
+ SecPerClust *= tmp;
+ }
+#endif
+ pmp->pm_fatblk = pmp->pm_ResSectors;
+ if (FAT32(pmp)) {
+ pmp->pm_rootdirblk = getulong(b710->bpbRootClust);
+ pmp->pm_firstcluster = pmp->pm_fatblk
+ + (pmp->pm_FATs * pmp->pm_FATsecs);
+ pmp->pm_fsinfo = getushort(b710->bpbFSInfo);
+ } else {
+ pmp->pm_rootdirblk = pmp->pm_fatblk +
+ (pmp->pm_FATs * pmp->pm_FATsecs);
+ pmp->pm_rootdirsize = (pmp->pm_RootDirEnts * sizeof(struct direntry)
+ + pmp->pm_BytesPerSec - 1)
+ / pmp->pm_BytesPerSec;/* in sectors */
+ pmp->pm_firstcluster = pmp->pm_rootdirblk + pmp->pm_rootdirsize;
+ }
+
+ pmp->pm_nmbrofclusters = (pmp->pm_HugeSectors - pmp->pm_firstcluster) /
+ SecPerClust;
+ pmp->pm_maxcluster = pmp->pm_nmbrofclusters + 1;
+ pmp->pm_fatsize = pmp->pm_FATsecs * pmp->pm_BytesPerSec;
+
+#ifndef __FreeBSD__
+ if (argp->flags & MSDOSFSMNT_GEMDOSFS) {
+ if ((pmp->pm_nmbrofclusters <= (0xff0 - 2))
+ && ((dtype == DTYPE_FLOPPY) || ((dtype == DTYPE_VNODE)
+ && ((pmp->pm_Heads == 1) || (pmp->pm_Heads == 2))))
+ ) {
+ pmp->pm_fatmask = FAT12_MASK;
+ pmp->pm_fatmult = 3;
+ pmp->pm_fatdiv = 2;
+ } else {
+ pmp->pm_fatmask = FAT16_MASK;
+ pmp->pm_fatmult = 2;
+ pmp->pm_fatdiv = 1;
+ }
+ } else
+#endif
+ if (pmp->pm_fatmask == 0) {
+ if (pmp->pm_maxcluster
+ <= ((CLUST_RSRVD - CLUST_FIRST) & FAT12_MASK)) {
+ /*
+ * This will usually be a floppy disk. This size makes
+ * sure that one fat entry will not be split across
+ * multiple blocks.
+ */
+ pmp->pm_fatmask = FAT12_MASK;
+ pmp->pm_fatmult = 3;
+ pmp->pm_fatdiv = 2;
+ } else {
+ pmp->pm_fatmask = FAT16_MASK;
+ pmp->pm_fatmult = 2;
+ pmp->pm_fatdiv = 1;
+ }
+ }
+ if (FAT12(pmp))
+ pmp->pm_fatblocksize = 3 * pmp->pm_BytesPerSec;
+ else
+ pmp->pm_fatblocksize = DFLTBSIZE;
+
+ pmp->pm_fatblocksec = pmp->pm_fatblocksize / pmp->pm_BytesPerSec;
+ pmp->pm_bnshift = ffs(pmp->pm_BytesPerSec) - 1;
+
+ /*
+ * Compute mask and shift value for isolating cluster relative byte
+ * offsets and cluster numbers from a file offset.
+ */
+ pmp->pm_bpcluster = SecPerClust * pmp->pm_BytesPerSec;
+ pmp->pm_crbomask = pmp->pm_bpcluster - 1;
+ pmp->pm_cnshift = ffs(pmp->pm_bpcluster) - 1;
+
+ /*
+ * Check for valid cluster size
+ * must be a power of 2
+ */
+ if (pmp->pm_bpcluster ^ (1 << pmp->pm_cnshift)) {
+ error = EINVAL;
+ goto error_exit;
+ }
+
+ /*
+ * Release the bootsector buffer.
+ */
+ brelse(bp);
+ bp = NULL;
+
+ /*
+ * Check FSInfo.
+ */
+ if (pmp->pm_fsinfo) {
+ struct fsinfo *fp;
+
+ if ((error = bread(devvp, pmp->pm_fsinfo, 1024, NOCRED, &bp)) != 0)
+ goto error_exit;
+ fp = (struct fsinfo *)bp->b_data;
+ if (!bcmp(fp->fsisig1, "RRaA", 4)
+ && !bcmp(fp->fsisig2, "rrAa", 4)
+ && !bcmp(fp->fsisig3, "\0\0\125\252", 4)
+ && !bcmp(fp->fsisig4, "\0\0\125\252", 4))
+ pmp->pm_nxtfree = getulong(fp->fsinxtfree);
+ else
+ pmp->pm_fsinfo = 0;
+ brelse(bp);
+ bp = NULL;
+ }
+
+ /*
+ * Check and validate (or perhaps invalidate?) the fsinfo structure? XXX
+ */
+
+ /*
+ * Allocate memory for the bitmap of allocated clusters, and then
+ * fill it in.
+ */
+ pmp->pm_inusemap = malloc(((pmp->pm_maxcluster + N_INUSEBITS - 1)
+ / N_INUSEBITS)
+ * sizeof(*pmp->pm_inusemap),
+ M_MSDOSFSFAT, M_WAITOK);
+
+ /*
+ * fillinusemap() needs pm_devvp.
+ */
+ pmp->pm_dev = dev;
+ pmp->pm_devvp = devvp;
+
+ /*
+ * Have the inuse map filled in.
+ */
+ if ((error = fillinusemap(pmp)) != 0)
+ goto error_exit;
+
+ /*
+ * If they want fat updates to be synchronous then let them suffer
+ * the performance degradation in exchange for the on disk copy of
+ * the fat being correct just about all the time. I suppose this
+ * would be a good thing to turn on if the kernel is still flakey.
+ */
+ if (mp->mnt_flag & MNT_SYNCHRONOUS)
+ pmp->pm_flags |= MSDOSFSMNT_WAITONFAT;
+
+ /*
+ * Finish up.
+ */
+ if (ronly)
+ pmp->pm_flags |= MSDOSFSMNT_RONLY;
+ else
+ pmp->pm_fmod = 1;
+ mp->mnt_data = (qaddr_t) pmp;
+ mp->mnt_stat.f_fsid.val[0] = (long)dev;
+ mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum;
+ mp->mnt_flag |= MNT_LOCAL;
+ devvp->v_specmountpoint = mp;
+
+ return 0;
+
+error_exit:
+ if (bp)
+ brelse(bp);
+ (void) VOP_CLOSE(devvp, ronly ? FREAD : FREAD | FWRITE, NOCRED, p);
+ if (pmp) {
+ if (pmp->pm_inusemap)
+ free(pmp->pm_inusemap, M_MSDOSFSFAT);
+ free(pmp, M_MSDOSFSMNT);
+ mp->mnt_data = (qaddr_t)0;
+ }
+ return (error);
+}
+
+static int
+msdosfs_start(mp, flags, p)
+ struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+
+ return (0);
+}
+
+/*
+ * Unmount the filesystem described by mp.
+ */
+static int
+msdosfs_unmount(mp, mntflags, p)
+ struct mount *mp;
+ int mntflags;
+ struct proc *p;
+{
+ struct msdosfsmount *pmp;
+ int error, flags;
+
+ flags = 0;
+ if (mntflags & MNT_FORCE)
+ flags |= FORCECLOSE;
+ error = vflush(mp, NULLVP, flags);
+ if (error)
+ return error;
+ pmp = VFSTOMSDOSFS(mp);
+ pmp->pm_devvp->v_specmountpoint = NULL;
+#ifdef MSDOSFS_DEBUG
+ {
+ struct vnode *vp = pmp->pm_devvp;
+
+ printf("msdosfs_umount(): just before calling VOP_CLOSE()\n");
+ printf("flag %08lx, usecount %d, writecount %d, holdcnt %ld\n",
+ vp->v_flag, vp->v_usecount, vp->v_writecount, vp->v_holdcnt);
+ printf("lastr %d, id %lu, mount %p, op %p\n",
+ vp->v_lastr, vp->v_id, vp->v_mount, vp->v_op);
+ printf("freef %p, freeb %p, mount %p\n",
+ vp->v_freelist.tqe_next, vp->v_freelist.tqe_prev,
+ vp->v_mount);
+ printf("cleanblkhd %p, dirtyblkhd %p, numoutput %ld, type %d\n",
+ TAILQ_FIRST(&vp->v_cleanblkhd),
+ TAILQ_FIRST(&vp->v_dirtyblkhd),
+ vp->v_numoutput, vp->v_type);
+ printf("union %p, tag %d, data[0] %08x, data[1] %08x\n",
+ vp->v_socket, vp->v_tag,
+ ((u_int *)vp->v_data)[0],
+ ((u_int *)vp->v_data)[1]);
+ }
+#endif
+ error = VOP_CLOSE(pmp->pm_devvp,
+ (pmp->pm_flags&MSDOSFSMNT_RONLY) ? FREAD : FREAD | FWRITE,
+ NOCRED, p);
+ vrele(pmp->pm_devvp);
+ free(pmp->pm_inusemap, M_MSDOSFSFAT);
+ free(pmp, M_MSDOSFSMNT);
+ mp->mnt_data = (qaddr_t)0;
+ mp->mnt_flag &= ~MNT_LOCAL;
+ return (error);
+}
+
+static int
+msdosfs_root(mp, vpp)
+ struct mount *mp;
+ struct vnode **vpp;
+{
+ struct msdosfsmount *pmp = VFSTOMSDOSFS(mp);
+ struct denode *ndep;
+ int error;
+
+#ifdef MSDOSFS_DEBUG
+ printf("msdosfs_root(); mp %p, pmp %p\n", mp, pmp);
+#endif
+ error = deget(pmp, MSDOSFSROOT, MSDOSFSROOT_OFS, &ndep);
+ if (error)
+ return (error);
+ *vpp = DETOV(ndep);
+ return (0);
+}
+
+static int
+msdosfs_quotactl(mp, cmds, uid, arg, p)
+ struct mount *mp;
+ int cmds;
+ uid_t uid;
+ caddr_t arg;
+ struct proc *p;
+{
+ return EOPNOTSUPP;
+}
+
+static int
+msdosfs_statfs(mp, sbp, p)
+ struct mount *mp;
+ struct statfs *sbp;
+ struct proc *p;
+{
+ struct msdosfsmount *pmp;
+
+ pmp = VFSTOMSDOSFS(mp);
+ sbp->f_bsize = pmp->pm_bpcluster;
+ sbp->f_iosize = pmp->pm_bpcluster;
+ sbp->f_blocks = pmp->pm_nmbrofclusters;
+ sbp->f_bfree = pmp->pm_freeclustercount;
+ sbp->f_bavail = pmp->pm_freeclustercount;
+ sbp->f_files = pmp->pm_RootDirEnts; /* XXX */
+ sbp->f_ffree = 0; /* what to put in here? */
+ if (sbp != &mp->mnt_stat) {
+ sbp->f_type = mp->mnt_vfc->vfc_typenum;
+ bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+ bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+ }
+ strncpy(sbp->f_fstypename, mp->mnt_vfc->vfc_name, MFSNAMELEN);
+ return (0);
+}
+
+static int
+msdosfs_sync(mp, waitfor, cred, p)
+ struct mount *mp;
+ int waitfor;
+ struct ucred *cred;
+ struct proc *p;
+{
+ struct vnode *vp, *nvp;
+ struct denode *dep;
+ struct msdosfsmount *pmp = VFSTOMSDOSFS(mp);
+ int error, allerror = 0;
+
+ /*
+ * If we ever switch to not updating all of the fats all the time,
+ * this would be the place to update them from the first one.
+ */
+ if (pmp->pm_fmod != 0)
+ if (pmp->pm_flags & MSDOSFSMNT_RONLY)
+ panic("msdosfs_sync: rofs mod");
+ else {
+ /* update fats here */
+ }
+ /*
+ * Write back each (modified) denode.
+ */
+ simple_lock(&mntvnode_slock);
+loop:
+ for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) {
+ /*
+ * If the vnode that we are about to sync is no longer
+ * associated with this mount point, start over.
+ */
+ if (vp->v_mount != mp)
+ goto loop;
+
+ simple_lock(&vp->v_interlock);
+ nvp = vp->v_mntvnodes.le_next;
+ dep = VTODE(vp);
+ if (vp->v_type == VNON ||
+ ((dep->de_flag &
+ (DE_ACCESS | DE_CREATE | DE_UPDATE | DE_MODIFIED)) == 0 &&
+ (TAILQ_EMPTY(&vp->v_dirtyblkhd) || waitfor == MNT_LAZY))) {
+ simple_unlock(&vp->v_interlock);
+ continue;
+ }
+ simple_unlock(&mntvnode_slock);
+ error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p);
+ if (error) {
+ simple_lock(&mntvnode_slock);
+ if (error == ENOENT)
+ goto loop;
+ continue;
+ }
+ error = VOP_FSYNC(vp, cred, waitfor, p);
+ if (error)
+ allerror = error;
+ VOP_UNLOCK(vp, 0, p);
+ vrele(vp);
+ simple_lock(&mntvnode_slock);
+ }
+ simple_unlock(&mntvnode_slock);
+
+ /*
+ * Flush filesystem control info.
+ */
+ if (waitfor != MNT_LAZY) {
+ vn_lock(pmp->pm_devvp, LK_EXCLUSIVE | LK_RETRY, p);
+ error = VOP_FSYNC(pmp->pm_devvp, cred, waitfor, p);
+ if (error)
+ allerror = error;
+ VOP_UNLOCK(pmp->pm_devvp, 0, p);
+ }
+ return (allerror);
+}
+
+static int
+msdosfs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
+ struct mount *mp;
+ struct fid *fhp;
+ struct sockaddr *nam;
+ struct vnode **vpp;
+ int *exflagsp;
+ struct ucred **credanonp;
+{
+ struct msdosfsmount *pmp = VFSTOMSDOSFS(mp);
+ struct defid *defhp = (struct defid *) fhp;
+ struct denode *dep;
+ struct netcred *np;
+ int error;
+
+ np = vfs_export_lookup(mp, &pmp->pm_export, nam);
+ if (np == NULL)
+ return (EACCES);
+ error = deget(pmp, defhp->defid_dirclust, defhp->defid_dirofs, &dep);
+ if (error) {
+ *vpp = NULLVP;
+ return (error);
+ }
+ *vpp = DETOV(dep);
+ *exflagsp = np->netc_exflags;
+ *credanonp = &np->netc_anon;
+ return (0);
+}
+
+static int
+msdosfs_vptofh(vp, fhp)
+ struct vnode *vp;
+ struct fid *fhp;
+{
+ struct denode *dep;
+ struct defid *defhp;
+
+ dep = VTODE(vp);
+ defhp = (struct defid *)fhp;
+ defhp->defid_len = sizeof(struct defid);
+ defhp->defid_dirclust = dep->de_dirclust;
+ defhp->defid_dirofs = dep->de_diroffset;
+ /* defhp->defid_gen = dep->de_gen; */
+ return (0);
+}
+
+static int
+msdosfs_vget(mp, ino, vpp)
+ struct mount *mp;
+ ino_t ino;
+ struct vnode **vpp;
+{
+ return EOPNOTSUPP;
+}
+
+static struct vfsops msdosfs_vfsops = {
+ msdosfs_mount,
+ msdosfs_start,
+ msdosfs_unmount,
+ msdosfs_root,
+ msdosfs_quotactl,
+ msdosfs_statfs,
+ msdosfs_sync,
+ msdosfs_vget,
+ msdosfs_fhtovp,
+ msdosfs_vptofh,
+ msdosfs_init
+};
+
+VFS_SET(msdosfs_vfsops, msdos, 0);
diff --git a/sys/fs/msdosfs/msdosfs_vnops.c b/sys/fs/msdosfs/msdosfs_vnops.c
new file mode 100644
index 0000000..36aa91d
--- /dev/null
+++ b/sys/fs/msdosfs/msdosfs_vnops.c
@@ -0,0 +1,1983 @@
+/* $Id: msdosfs_vnops.c,v 1.80 1998/12/07 21:58:35 archie Exp $ */
+/* $NetBSD: msdosfs_vnops.c,v 1.68 1998/02/10 14:10:04 mrg Exp $ */
+
+/*-
+ * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
+ * Copyright (C) 1994, 1995, 1997 TooLs GmbH.
+ * All rights reserved.
+ * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by TooLs GmbH.
+ * 4. The name of TooLs GmbH may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/*
+ * Written by Paul Popelka (paulp@uts.amdahl.com)
+ *
+ * You can do anything you want with this software, just don't say you wrote
+ * it, and don't remove this notice.
+ *
+ * This software is provided "as is".
+ *
+ * The author supplies this software to be publicly redistributed on the
+ * understanding that the author is not responsible for the correct
+ * functioning of this software in any circumstances and is not liable for
+ * any damages caused by this software.
+ *
+ * October 1992
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/resourcevar.h> /* defines plimit structure in proc struct */
+#include <sys/kernel.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/unistd.h>
+#include <sys/vnode.h>
+#include <miscfs/specfs/specdev.h> /* XXX */ /* defines v_rdev */
+#include <sys/malloc.h>
+#include <sys/dirent.h>
+#include <sys/signalvar.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_zone.h>
+#include <vm/vnode_pager.h>
+
+#include <msdosfs/bpb.h>
+#include <msdosfs/direntry.h>
+#include <msdosfs/denode.h>
+#include <msdosfs/msdosfsmount.h>
+#include <msdosfs/fat.h>
+
+/*
+ * Prototypes for MSDOSFS vnode operations
+ */
+static int msdosfs_create __P((struct vop_create_args *));
+static int msdosfs_mknod __P((struct vop_mknod_args *));
+static int msdosfs_close __P((struct vop_close_args *));
+static int msdosfs_access __P((struct vop_access_args *));
+static int msdosfs_getattr __P((struct vop_getattr_args *));
+static int msdosfs_setattr __P((struct vop_setattr_args *));
+static int msdosfs_read __P((struct vop_read_args *));
+static int msdosfs_write __P((struct vop_write_args *));
+static int msdosfs_fsync __P((struct vop_fsync_args *));
+static int msdosfs_remove __P((struct vop_remove_args *));
+static int msdosfs_link __P((struct vop_link_args *));
+static int msdosfs_rename __P((struct vop_rename_args *));
+static int msdosfs_mkdir __P((struct vop_mkdir_args *));
+static int msdosfs_rmdir __P((struct vop_rmdir_args *));
+static int msdosfs_symlink __P((struct vop_symlink_args *));
+static int msdosfs_readdir __P((struct vop_readdir_args *));
+static int msdosfs_abortop __P((struct vop_abortop_args *));
+static int msdosfs_bmap __P((struct vop_bmap_args *));
+static int msdosfs_strategy __P((struct vop_strategy_args *));
+static int msdosfs_print __P((struct vop_print_args *));
+static int msdosfs_pathconf __P((struct vop_pathconf_args *ap));
+static int msdosfs_getpages __P((struct vop_getpages_args *));
+static int msdosfs_putpages __P((struct vop_putpages_args *));
+
+/*
+ * Some general notes:
+ *
+ * In the ufs filesystem the inodes, superblocks, and indirect blocks are
+ * read/written using the vnode for the filesystem. Blocks that represent
+ * the contents of a file are read/written using the vnode for the file
+ * (including directories when they are read/written as files). This
+ * presents problems for the dos filesystem because data that should be in
+ * an inode (if dos had them) resides in the directory itself. Since we
+ * must update directory entries without the benefit of having the vnode
+ * for the directory we must use the vnode for the filesystem. This means
+ * that when a directory is actually read/written (via read, write, or
+ * readdir, or seek) we must use the vnode for the filesystem instead of
+ * the vnode for the directory as would happen in ufs. This is to insure we
+ * retreive the correct block from the buffer cache since the hash value is
+ * based upon the vnode address and the desired block number.
+ */
+
+/*
+ * Create a regular file. On entry the directory to contain the file being
+ * created is locked. We must release before we return. We must also free
+ * the pathname buffer pointed at by cnp->cn_pnbuf, always on error, or
+ * only if the SAVESTART bit in cn_flags is clear on success.
+ */
+static int
+msdosfs_create(ap)
+ struct vop_create_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ } */ *ap;
+{
+ struct componentname *cnp = ap->a_cnp;
+ struct denode ndirent;
+ struct denode *dep;
+ struct denode *pdep = VTODE(ap->a_dvp);
+ struct timespec ts;
+ int error;
+
+#ifdef MSDOSFS_DEBUG
+ printf("msdosfs_create(cnp %p, vap %p\n", cnp, ap->a_vap);
+#endif
+
+ /*
+ * If this is the root directory and there is no space left we
+ * can't do anything. This is because the root directory can not
+ * change size.
+ */
+ if (pdep->de_StartCluster == MSDOSFSROOT
+ && pdep->de_fndoffset >= pdep->de_FileSize) {
+ error = ENOSPC;
+ goto bad;
+ }
+
+ /*
+ * Create a directory entry for the file, then call createde() to
+ * have it installed. NOTE: DOS files are always executable. We
+ * use the absence of the owner write bit to make the file
+ * readonly.
+ */
+#ifdef DIAGNOSTIC
+ if ((cnp->cn_flags & HASBUF) == 0)
+ panic("msdosfs_create: no name");
+#endif
+ bzero(&ndirent, sizeof(ndirent));
+ error = uniqdosname(pdep, cnp, ndirent.de_Name);
+ if (error)
+ goto bad;
+
+ ndirent.de_Attributes = (ap->a_vap->va_mode & VWRITE) ?
+ ATTR_ARCHIVE : ATTR_ARCHIVE | ATTR_READONLY;
+ ndirent.de_LowerCase = 0;
+ ndirent.de_StartCluster = 0;
+ ndirent.de_FileSize = 0;
+ ndirent.de_dev = pdep->de_dev;
+ ndirent.de_devvp = pdep->de_devvp;
+ ndirent.de_pmp = pdep->de_pmp;
+ ndirent.de_flag = DE_ACCESS | DE_CREATE | DE_UPDATE;
+ getnanotime(&ts);
+ DETIMES(&ndirent, &ts, &ts, &ts);
+ error = createde(&ndirent, pdep, &dep, cnp);
+ if (error)
+ goto bad;
+ if ((cnp->cn_flags & SAVESTART) == 0)
+ zfree(namei_zone, cnp->cn_pnbuf);
+ *ap->a_vpp = DETOV(dep);
+ return (0);
+
+bad:
+ zfree(namei_zone, cnp->cn_pnbuf);
+ return (error);
+}
+
+static int
+msdosfs_mknod(ap)
+ struct vop_mknod_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ } */ *ap;
+{
+
+ switch (ap->a_vap->va_type) {
+ case VDIR:
+ return (msdosfs_mkdir((struct vop_mkdir_args *)ap));
+ break;
+
+ case VREG:
+ return (msdosfs_create((struct vop_create_args *)ap));
+ break;
+
+ default:
+ zfree(namei_zone, ap->a_cnp->cn_pnbuf);
+ return (EINVAL);
+ }
+ /* NOTREACHED */
+}
+
+static int
+msdosfs_close(ap)
+ struct vop_close_args /* {
+ struct vnode *a_vp;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct denode *dep = VTODE(vp);
+ struct timespec ts;
+
+ simple_lock(&vp->v_interlock);
+ if (vp->v_usecount > 1) {
+ getnanotime(&ts);
+ DETIMES(dep, &ts, &ts, &ts);
+ }
+ simple_unlock(&vp->v_interlock);
+ return 0;
+}
+
+static int
+msdosfs_access(ap)
+ struct vop_access_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct denode *dep = VTODE(ap->a_vp);
+ struct msdosfsmount *pmp = dep->de_pmp;
+ struct ucred *cred = ap->a_cred;
+ mode_t mask, file_mode, mode = ap->a_mode;
+ register gid_t *gp;
+ int i;
+
+ file_mode = (S_IXUSR|S_IXGRP|S_IXOTH) | (S_IRUSR|S_IRGRP|S_IROTH) |
+ ((dep->de_Attributes & ATTR_READONLY) ? 0 : (S_IWUSR|S_IWGRP|S_IWOTH));
+ file_mode &= pmp->pm_mask;
+
+ /*
+ * Disallow write attempts on read-only file systems;
+ * unless the file is a socket, fifo, or a block or
+ * character device resident on the file system.
+ */
+ if (mode & VWRITE) {
+ switch (vp->v_type) {
+ case VDIR:
+ case VLNK:
+ case VREG:
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ return (EROFS);
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* User id 0 always gets access. */
+ if (cred->cr_uid == 0)
+ return 0;
+
+ mask = 0;
+
+ /* Otherwise, check the owner. */
+ if (cred->cr_uid == pmp->pm_uid) {
+ if (mode & VEXEC)
+ mask |= S_IXUSR;
+ if (mode & VREAD)
+ mask |= S_IRUSR;
+ if (mode & VWRITE)
+ mask |= S_IWUSR;
+ return (file_mode & mask) == mask ? 0 : EACCES;
+ }
+
+ /* Otherwise, check the groups. */
+ for (i = 0, gp = cred->cr_groups; i < cred->cr_ngroups; i++, gp++)
+ if (pmp->pm_gid == *gp) {
+ if (mode & VEXEC)
+ mask |= S_IXGRP;
+ if (mode & VREAD)
+ mask |= S_IRGRP;
+ if (mode & VWRITE)
+ mask |= S_IWGRP;
+ return (file_mode & mask) == mask ? 0 : EACCES;
+ }
+
+ /* Otherwise, check everyone else. */
+ if (mode & VEXEC)
+ mask |= S_IXOTH;
+ if (mode & VREAD)
+ mask |= S_IROTH;
+ if (mode & VWRITE)
+ mask |= S_IWOTH;
+ return (file_mode & mask) == mask ? 0 : EACCES;
+}
+
+static int
+msdosfs_getattr(ap)
+ struct vop_getattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct denode *dep = VTODE(ap->a_vp);
+ struct msdosfsmount *pmp = dep->de_pmp;
+ struct vattr *vap = ap->a_vap;
+ mode_t mode;
+ struct timespec ts;
+ u_long dirsperblk = pmp->pm_BytesPerSec / sizeof(struct direntry);
+ u_long fileid;
+
+ getnanotime(&ts);
+ DETIMES(dep, &ts, &ts, &ts);
+ vap->va_fsid = dep->de_dev;
+ /*
+ * The following computation of the fileid must be the same as that
+ * used in msdosfs_readdir() to compute d_fileno. If not, pwd
+ * doesn't work.
+ */
+ if (dep->de_Attributes & ATTR_DIRECTORY) {
+ fileid = cntobn(pmp, dep->de_StartCluster) * dirsperblk;
+ if (dep->de_StartCluster == MSDOSFSROOT)
+ fileid = 1;
+ } else {
+ fileid = cntobn(pmp, dep->de_dirclust) * dirsperblk;
+ if (dep->de_dirclust == MSDOSFSROOT)
+ fileid = roottobn(pmp, 0) * dirsperblk;
+ fileid += dep->de_diroffset / sizeof(struct direntry);
+ }
+ vap->va_fileid = fileid;
+ if ((dep->de_Attributes & ATTR_READONLY) == 0)
+ mode = S_IRWXU|S_IRWXG|S_IRWXO;
+ else
+ mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH;
+ vap->va_mode = mode & pmp->pm_mask;
+ vap->va_uid = pmp->pm_uid;
+ vap->va_gid = pmp->pm_gid;
+ vap->va_nlink = 1;
+ vap->va_rdev = 0;
+ vap->va_size = dep->de_FileSize;
+ dos2unixtime(dep->de_MDate, dep->de_MTime, 0, &vap->va_mtime);
+ if (pmp->pm_flags & MSDOSFSMNT_LONGNAME) {
+ dos2unixtime(dep->de_ADate, 0, 0, &vap->va_atime);
+ dos2unixtime(dep->de_CDate, dep->de_CTime, dep->de_CHun, &vap->va_ctime);
+ } else {
+ vap->va_atime = vap->va_mtime;
+ vap->va_ctime = vap->va_mtime;
+ }
+ vap->va_flags = 0;
+ if ((dep->de_Attributes & ATTR_ARCHIVE) == 0)
+ vap->va_flags |= SF_ARCHIVED;
+ vap->va_gen = 0;
+ vap->va_blocksize = pmp->pm_bpcluster;
+ vap->va_bytes =
+ (dep->de_FileSize + pmp->pm_crbomask) & ~pmp->pm_crbomask;
+ vap->va_type = ap->a_vp->v_type;
+ vap->va_filerev = dep->de_modrev;
+ return (0);
+}
+
+static int
+msdosfs_setattr(ap)
+ struct vop_setattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct denode *dep = VTODE(ap->a_vp);
+ struct msdosfsmount *pmp = dep->de_pmp;
+ struct vattr *vap = ap->a_vap;
+ struct ucred *cred = ap->a_cred;
+ int error = 0;
+
+#ifdef MSDOSFS_DEBUG
+ printf("msdosfs_setattr(): vp %p, vap %p, cred %p, p %p\n",
+ ap->a_vp, vap, cred, ap->a_p);
+#endif
+
+ /*
+ * Check for unsettable attributes.
+ */
+ if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) ||
+ (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) ||
+ (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) ||
+ (vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) {
+#ifdef MSDOSFS_DEBUG
+ printf("msdosfs_setattr(): returning EINVAL\n");
+ printf(" va_type %d, va_nlink %x, va_fsid %lx, va_fileid %lx\n",
+ vap->va_type, vap->va_nlink, vap->va_fsid, vap->va_fileid);
+ printf(" va_blocksize %lx, va_rdev %x, va_bytes %qx, va_gen %lx\n",
+ vap->va_blocksize, vap->va_rdev, vap->va_bytes, vap->va_gen);
+ printf(" va_uid %x, va_gid %x\n",
+ vap->va_uid, vap->va_gid);
+#endif
+ return (EINVAL);
+ }
+ if (vap->va_flags != VNOVAL) {
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ return (EROFS);
+ if (cred->cr_uid != pmp->pm_uid &&
+ (error = suser(cred, &ap->a_p->p_acflag)))
+ return (error);
+ /*
+ * We are very inconsistent about handling unsupported
+ * attributes. We ignored the access time and the
+ * read and execute bits. We were strict for the other
+ * attributes.
+ *
+ * Here we are strict, stricter than ufs in not allowing
+ * users to attempt to set SF_SETTABLE bits or anyone to
+ * set unsupported bits. However, we ignore attempts to
+ * set ATTR_ARCHIVE for directories `cp -pr' from a more
+ * sensible file system attempts it a lot.
+ */
+ if (cred->cr_uid != 0) {
+ if (vap->va_flags & SF_SETTABLE)
+ return EPERM;
+ }
+ if (vap->va_flags & ~SF_ARCHIVED)
+ return EOPNOTSUPP;
+ if (vap->va_flags & SF_ARCHIVED)
+ dep->de_Attributes &= ~ATTR_ARCHIVE;
+ else if (!(dep->de_Attributes & ATTR_DIRECTORY))
+ dep->de_Attributes |= ATTR_ARCHIVE;
+ dep->de_flag |= DE_MODIFIED;
+ }
+
+ if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) {
+ uid_t uid;
+ gid_t gid;
+
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ return (EROFS);
+ uid = vap->va_uid;
+ if (uid == (uid_t)VNOVAL)
+ uid = pmp->pm_uid;
+ gid = vap->va_gid;
+ if (gid == (gid_t)VNOVAL)
+ gid = pmp->pm_gid;
+ if ((cred->cr_uid != pmp->pm_uid || uid != pmp->pm_uid ||
+ (gid != pmp->pm_gid && !groupmember(gid, cred))) &&
+ (error = suser(cred, &ap->a_p->p_acflag)))
+ return error;
+ if (uid != pmp->pm_uid || gid != pmp->pm_gid)
+ return EINVAL;
+ }
+
+ if (vap->va_size != VNOVAL) {
+ /*
+ * Disallow write attempts on read-only file systems;
+ * unless the file is a socket, fifo, or a block or
+ * character device resident on the file system.
+ */
+ switch (vp->v_type) {
+ case VDIR:
+ return (EISDIR);
+ /* NOT REACHED */
+ case VLNK:
+ case VREG:
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ return (EROFS);
+ break;
+ default:
+ break;
+ }
+ error = detrunc(dep, vap->va_size, 0, cred, ap->a_p);
+ if (error)
+ return error;
+ }
+ if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) {
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ return (EROFS);
+ if (cred->cr_uid != pmp->pm_uid &&
+ (error = suser(cred, &ap->a_p->p_acflag)) &&
+ ((vap->va_vaflags & VA_UTIMES_NULL) == 0 ||
+ (error = VOP_ACCESS(ap->a_vp, VWRITE, cred, ap->a_p))))
+ return (error);
+ if (vp->v_type != VDIR) {
+ if ((pmp->pm_flags & MSDOSFSMNT_NOWIN95) == 0 &&
+ vap->va_atime.tv_sec != VNOVAL)
+ unix2dostime(&vap->va_atime, &dep->de_ADate, NULL, NULL);
+ if (vap->va_mtime.tv_sec != VNOVAL)
+ unix2dostime(&vap->va_mtime, &dep->de_MDate, &dep->de_MTime, NULL);
+ dep->de_Attributes |= ATTR_ARCHIVE;
+ dep->de_flag |= DE_MODIFIED;
+ }
+ }
+ /*
+ * DOS files only have the ability to have their writability
+ * attribute set, so we use the owner write bit to set the readonly
+ * attribute.
+ */
+ if (vap->va_mode != (mode_t)VNOVAL) {
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ return (EROFS);
+ if (cred->cr_uid != pmp->pm_uid &&
+ (error = suser(cred, &ap->a_p->p_acflag)))
+ return (error);
+ if (vp->v_type != VDIR) {
+ /* We ignore the read and execute bits. */
+ if (vap->va_mode & VWRITE)
+ dep->de_Attributes &= ~ATTR_READONLY;
+ else
+ dep->de_Attributes |= ATTR_READONLY;
+ dep->de_flag |= DE_MODIFIED;
+ }
+ }
+ return (deupdat(dep, 1));
+}
+
+static int
+msdosfs_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ int error = 0;
+ int diff;
+ int blsize;
+ int isadir;
+ long n;
+ long on;
+ daddr_t lbn;
+ daddr_t rablock;
+ int rasize;
+ struct buf *bp;
+ struct vnode *vp = ap->a_vp;
+ struct denode *dep = VTODE(vp);
+ struct msdosfsmount *pmp = dep->de_pmp;
+ struct uio *uio = ap->a_uio;
+
+ /*
+ * If they didn't ask for any data, then we are done.
+ */
+ if (uio->uio_resid == 0)
+ return (0);
+ if (uio->uio_offset < 0)
+ return (EINVAL);
+
+ isadir = dep->de_Attributes & ATTR_DIRECTORY;
+ do {
+ lbn = de_cluster(pmp, uio->uio_offset);
+ on = uio->uio_offset & pmp->pm_crbomask;
+ n = min((u_long) (pmp->pm_bpcluster - on), uio->uio_resid);
+ diff = dep->de_FileSize - uio->uio_offset;
+ if (diff <= 0)
+ return (0);
+ if (diff < n)
+ n = diff;
+ /* convert cluster # to block # if a directory */
+ if (isadir) {
+ error = pcbmap(dep, lbn, &lbn, 0, &blsize);
+ if (error)
+ return (error);
+ }
+ /*
+ * If we are operating on a directory file then be sure to
+ * do i/o with the vnode for the filesystem instead of the
+ * vnode for the directory.
+ */
+ if (isadir) {
+ error = bread(pmp->pm_devvp, lbn, blsize, NOCRED, &bp);
+ } else {
+ rablock = lbn + 1;
+ if (vp->v_lastr + 1 == lbn &&
+ de_cn2off(pmp, rablock) < dep->de_FileSize) {
+ rasize = pmp->pm_bpcluster;
+ error = breadn(vp, lbn, pmp->pm_bpcluster,
+ &rablock, &rasize, 1, NOCRED, &bp);
+ } else
+ error = bread(vp, lbn, pmp->pm_bpcluster,
+ NOCRED, &bp);
+ vp->v_lastr = lbn;
+ }
+ n = min(n, pmp->pm_bpcluster - bp->b_resid);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ error = uiomove(bp->b_data + on, (int) n, uio);
+ brelse(bp);
+ } while (error == 0 && uio->uio_resid > 0 && n != 0);
+ if (!isadir && !(vp->v_mount->mnt_flag & MNT_NOATIME))
+ dep->de_flag |= DE_ACCESS;
+ return (error);
+}
+
+/*
+ * Write data to a file or directory.
+ */
+static int
+msdosfs_write(ap)
+ struct vop_write_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ int n;
+ int croffset;
+ int resid;
+ u_long osize;
+ int error = 0;
+ u_long count;
+ daddr_t bn, lastcn;
+ struct buf *bp;
+ int ioflag = ap->a_ioflag;
+ struct uio *uio = ap->a_uio;
+ struct proc *p = uio->uio_procp;
+ struct vnode *vp = ap->a_vp;
+ struct vnode *thisvp;
+ struct denode *dep = VTODE(vp);
+ struct msdosfsmount *pmp = dep->de_pmp;
+ struct ucred *cred = ap->a_cred;
+
+#ifdef MSDOSFS_DEBUG
+ printf("msdosfs_write(vp %p, uio %p, ioflag %x, cred %p\n",
+ vp, uio, ioflag, cred);
+ printf("msdosfs_write(): diroff %lu, dirclust %lu, startcluster %lu\n",
+ dep->de_diroffset, dep->de_dirclust, dep->de_StartCluster);
+#endif
+
+ switch (vp->v_type) {
+ case VREG:
+ if (ioflag & IO_APPEND)
+ uio->uio_offset = dep->de_FileSize;
+ thisvp = vp;
+ break;
+ case VDIR:
+ return EISDIR;
+ default:
+ panic("msdosfs_write(): bad file type");
+ }
+
+ if (uio->uio_offset < 0)
+ return (EINVAL);
+
+ if (uio->uio_resid == 0)
+ return (0);
+
+ /*
+ * If they've exceeded their filesize limit, tell them about it.
+ */
+ if (p &&
+ ((uio->uio_offset + uio->uio_resid) >
+ p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) {
+ psignal(p, SIGXFSZ);
+ return (EFBIG);
+ }
+
+ /*
+ * If the offset we are starting the write at is beyond the end of
+ * the file, then they've done a seek. Unix filesystems allow
+ * files with holes in them, DOS doesn't so we must fill the hole
+ * with zeroed blocks.
+ */
+ if (uio->uio_offset > dep->de_FileSize) {
+ error = deextend(dep, uio->uio_offset, cred);
+ if (error)
+ return (error);
+ }
+
+ /*
+ * Remember some values in case the write fails.
+ */
+ resid = uio->uio_resid;
+ osize = dep->de_FileSize;
+
+ /*
+ * If we write beyond the end of the file, extend it to its ultimate
+ * size ahead of the time to hopefully get a contiguous area.
+ */
+ if (uio->uio_offset + resid > osize) {
+ count = de_clcount(pmp, uio->uio_offset + resid) -
+ de_clcount(pmp, osize);
+ error = extendfile(dep, count, NULL, NULL, 0);
+ if (error && (error != ENOSPC || (ioflag & IO_UNIT)))
+ goto errexit;
+ lastcn = dep->de_fc[FC_LASTFC].fc_frcn;
+ } else
+ lastcn = de_clcount(pmp, osize) - 1;
+
+ do {
+ if (de_cluster(pmp, uio->uio_offset) > lastcn) {
+ error = ENOSPC;
+ break;
+ }
+
+ croffset = uio->uio_offset & pmp->pm_crbomask;
+ n = min(uio->uio_resid, pmp->pm_bpcluster - croffset);
+ if (uio->uio_offset + n > dep->de_FileSize) {
+ dep->de_FileSize = uio->uio_offset + n;
+ /* The object size needs to be set before buffer is allocated */
+ vnode_pager_setsize(vp, dep->de_FileSize);
+ }
+
+ bn = de_cluster(pmp, uio->uio_offset);
+ if ((uio->uio_offset & pmp->pm_crbomask) == 0
+ && (de_cluster(pmp, uio->uio_offset + uio->uio_resid)
+ > de_cluster(pmp, uio->uio_offset)
+ || uio->uio_offset + uio->uio_resid >= dep->de_FileSize)) {
+ /*
+ * If either the whole cluster gets written,
+ * or we write the cluster from its start beyond EOF,
+ * then no need to read data from disk.
+ */
+ bp = getblk(thisvp, bn, pmp->pm_bpcluster, 0, 0);
+ clrbuf(bp);
+ /*
+ * Do the bmap now, since pcbmap needs buffers
+ * for the fat table. (see msdosfs_strategy)
+ */
+ if (bp->b_blkno == bp->b_lblkno) {
+ error = pcbmap(dep, bp->b_lblkno, &bp->b_blkno,
+ 0, 0);
+ if (error)
+ bp->b_blkno = -1;
+ }
+ if (bp->b_blkno == -1) {
+ brelse(bp);
+ if (!error)
+ error = EIO; /* XXX */
+ break;
+ }
+ } else {
+ /*
+ * The block we need to write into exists, so read it in.
+ */
+ error = bread(thisvp, bn, pmp->pm_bpcluster, cred, &bp);
+ if (error) {
+ brelse(bp);
+ break;
+ }
+ }
+
+ /*
+ * Should these vnode_pager_* functions be done on dir
+ * files?
+ */
+
+ /*
+ * Copy the data from user space into the buf header.
+ */
+ error = uiomove(bp->b_data + croffset, n, uio);
+
+ /*
+ * If they want this synchronous then write it and wait for
+ * it. Otherwise, if on a cluster boundary write it
+ * asynchronously so we can move on to the next block
+ * without delay. Otherwise do a delayed write because we
+ * may want to write somemore into the block later.
+ */
+ if (ioflag & IO_SYNC)
+ (void) bwrite(bp);
+ else if (n + croffset == pmp->pm_bpcluster)
+ bawrite(bp);
+ else
+ bdwrite(bp);
+ dep->de_flag |= DE_UPDATE;
+ } while (error == 0 && uio->uio_resid > 0);
+
+ /*
+ * If the write failed and they want us to, truncate the file back
+ * to the size it was before the write was attempted.
+ */
+errexit:
+ if (error) {
+ if (ioflag & IO_UNIT) {
+ detrunc(dep, osize, ioflag & IO_SYNC, NOCRED, NULL);
+ uio->uio_offset -= resid - uio->uio_resid;
+ uio->uio_resid = resid;
+ } else {
+ detrunc(dep, dep->de_FileSize, ioflag & IO_SYNC, NOCRED, NULL);
+ if (uio->uio_resid != resid)
+ error = 0;
+ }
+ } else if (ioflag & IO_SYNC)
+ error = deupdat(dep, 1);
+ return (error);
+}
+
+/*
+ * Flush the blocks of a file to disk.
+ *
+ * This function is worthless for vnodes that represent directories. Maybe we
+ * could just do a sync if they try an fsync on a directory file.
+ */
+static int
+msdosfs_fsync(ap)
+ struct vop_fsync_args /* {
+ struct vnode *a_vp;
+ struct ucred *a_cred;
+ int a_waitfor;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ int s;
+ struct buf *bp, *nbp;
+
+ /*
+ * Flush all dirty buffers associated with a vnode.
+ */
+loop:
+ s = splbio();
+ for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
+ nbp = TAILQ_NEXT(bp, b_vnbufs);
+ if ((bp->b_flags & B_BUSY))
+ continue;
+ if ((bp->b_flags & B_DELWRI) == 0)
+ panic("msdosfs_fsync: not dirty");
+ bremfree(bp);
+ bp->b_flags |= B_BUSY;
+ splx(s);
+ (void) bwrite(bp);
+ goto loop;
+ }
+ while (vp->v_numoutput) {
+ vp->v_flag |= VBWAIT;
+ (void) tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "msdosfsn", 0);
+ }
+#ifdef DIAGNOSTIC
+ if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
+ vprint("msdosfs_fsync: dirty", vp);
+ goto loop;
+ }
+#endif
+ splx(s);
+ return (deupdat(VTODE(vp), ap->a_waitfor == MNT_WAIT));
+}
+
+static int
+msdosfs_remove(ap)
+ struct vop_remove_args /* {
+ struct vnode *a_dvp;
+ struct vnode *a_vp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ struct denode *dep = VTODE(ap->a_vp);
+ struct denode *ddep = VTODE(ap->a_dvp);
+ int error;
+
+ if (ap->a_vp->v_type == VDIR)
+ error = EPERM;
+ else
+ error = removede(ddep, dep);
+#ifdef MSDOSFS_DEBUG
+ printf("msdosfs_remove(), dep %p, v_usecount %d\n", dep, ap->a_vp->v_usecount);
+#endif
+ return (error);
+}
+
+/*
+ * DOS filesystems don't know what links are. But since we already called
+ * msdosfs_lookup() with create and lockparent, the parent is locked so we
+ * have to free it before we return the error.
+ */
+static int
+msdosfs_link(ap)
+ struct vop_link_args /* {
+ struct vnode *a_tdvp;
+ struct vnode *a_vp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ VOP_ABORTOP(ap->a_tdvp, ap->a_cnp);
+ return (EOPNOTSUPP);
+}
+
+/*
+ * Renames on files require moving the denode to a new hash queue since the
+ * denode's location is used to compute which hash queue to put the file
+ * in. Unless it is a rename in place. For example "mv a b".
+ *
+ * What follows is the basic algorithm:
+ *
+ * if (file move) {
+ * if (dest file exists) {
+ * remove dest file
+ * }
+ * if (dest and src in same directory) {
+ * rewrite name in existing directory slot
+ * } else {
+ * write new entry in dest directory
+ * update offset and dirclust in denode
+ * move denode to new hash chain
+ * clear old directory entry
+ * }
+ * } else {
+ * directory move
+ * if (dest directory exists) {
+ * if (dest is not empty) {
+ * return ENOTEMPTY
+ * }
+ * remove dest directory
+ * }
+ * if (dest and src in same directory) {
+ * rewrite name in existing entry
+ * } else {
+ * be sure dest is not a child of src directory
+ * write entry in dest directory
+ * update "." and ".." in moved directory
+ * clear old directory entry for moved directory
+ * }
+ * }
+ *
+ * On entry:
+ * source's parent directory is unlocked
+ * source file or directory is unlocked
+ * destination's parent directory is locked
+ * destination file or directory is locked if it exists
+ *
+ * On exit:
+ * all denodes should be released
+ *
+ * Notes:
+ * I'm not sure how the memory containing the pathnames pointed at by the
+ * componentname structures is freed, there may be some memory bleeding
+ * for each rename done.
+ */
+static int
+msdosfs_rename(ap)
+ struct vop_rename_args /* {
+ struct vnode *a_fdvp;
+ struct vnode *a_fvp;
+ struct componentname *a_fcnp;
+ struct vnode *a_tdvp;
+ struct vnode *a_tvp;
+ struct componentname *a_tcnp;
+ } */ *ap;
+{
+ struct vnode *tdvp = ap->a_tdvp;
+ struct vnode *fvp = ap->a_fvp;
+ struct vnode *fdvp = ap->a_fdvp;
+ struct vnode *tvp = ap->a_tvp;
+ struct componentname *tcnp = ap->a_tcnp;
+ struct componentname *fcnp = ap->a_fcnp;
+ struct proc *p = fcnp->cn_proc;
+ struct denode *ip, *xp, *dp, *zp;
+ u_char toname[11], oldname[11];
+ u_long from_diroffset, to_diroffset;
+ u_char to_count;
+ int doingdirectory = 0, newparent = 0;
+ int error;
+ u_long cn;
+ daddr_t bn;
+ struct denode *fddep; /* from file's parent directory */
+ struct denode *fdep; /* from file or directory */
+ struct denode *tddep; /* to file's parent directory */
+ struct denode *tdep; /* to file or directory */
+ struct msdosfsmount *pmp;
+ struct direntry *dotdotp;
+ struct buf *bp;
+
+ fddep = VTODE(ap->a_fdvp);
+ fdep = VTODE(ap->a_fvp);
+ tddep = VTODE(ap->a_tdvp);
+ tdep = tvp ? VTODE(tvp) : NULL;
+ pmp = fddep->de_pmp;
+
+ pmp = VFSTOMSDOSFS(fdvp->v_mount);
+
+#ifdef DIAGNOSTIC
+ if ((tcnp->cn_flags & HASBUF) == 0 ||
+ (fcnp->cn_flags & HASBUF) == 0)
+ panic("msdosfs_rename: no name");
+#endif
+ /*
+ * Check for cross-device rename.
+ */
+ if ((fvp->v_mount != tdvp->v_mount) ||
+ (tvp && (fvp->v_mount != tvp->v_mount))) {
+ error = EXDEV;
+abortit:
+ VOP_ABORTOP(tdvp, tcnp);
+ if (tdvp == tvp)
+ vrele(tdvp);
+ else
+ vput(tdvp);
+ if (tvp)
+ vput(tvp);
+ VOP_ABORTOP(fdvp, fcnp);
+ vrele(fdvp);
+ vrele(fvp);
+ return (error);
+ }
+
+ /*
+ * If source and dest are the same, do nothing.
+ */
+ if (tvp == fvp) {
+ error = 0;
+ goto abortit;
+ }
+
+ error = vn_lock(fvp, LK_EXCLUSIVE, p);
+ if (error)
+ goto abortit;
+ dp = VTODE(fdvp);
+ ip = VTODE(fvp);
+
+ /*
+ * Be sure we are not renaming ".", "..", or an alias of ".". This
+ * leads to a crippled directory tree. It's pretty tough to do a
+ * "ls" or "pwd" with the "." directory entry missing, and "cd .."
+ * doesn't work if the ".." entry is missing.
+ */
+ if (ip->de_Attributes & ATTR_DIRECTORY) {
+ /*
+ * Avoid ".", "..", and aliases of "." for obvious reasons.
+ */
+ if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') ||
+ dp == ip ||
+ (fcnp->cn_flags & ISDOTDOT) ||
+ (tcnp->cn_flags & ISDOTDOT) ||
+ (ip->de_flag & DE_RENAME)) {
+ VOP_UNLOCK(fvp, 0, p);
+ error = EINVAL;
+ goto abortit;
+ }
+ ip->de_flag |= DE_RENAME;
+ doingdirectory++;
+ }
+
+ /*
+ * When the target exists, both the directory
+ * and target vnodes are returned locked.
+ */
+ dp = VTODE(tdvp);
+ xp = tvp ? VTODE(tvp) : NULL;
+ /*
+ * Remember direntry place to use for destination
+ */
+ to_diroffset = dp->de_fndoffset;
+ to_count = dp->de_fndcnt;
+
+ /*
+ * If ".." must be changed (ie the directory gets a new
+ * parent) then the source directory must not be in the
+ * directory heirarchy above the target, as this would
+ * orphan everything below the source directory. Also
+ * the user must have write permission in the source so
+ * as to be able to change "..". We must repeat the call
+ * to namei, as the parent directory is unlocked by the
+ * call to doscheckpath().
+ */
+ error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_proc);
+ VOP_UNLOCK(fvp, 0, p);
+ if (VTODE(fdvp)->de_StartCluster != VTODE(tdvp)->de_StartCluster)
+ newparent = 1;
+ vrele(fdvp);
+ if (doingdirectory && newparent) {
+ if (error) /* write access check above */
+ goto bad;
+ if (xp != NULL)
+ vput(tvp);
+ /*
+ * doscheckpath() vput()'s dp,
+ * so we have to do a relookup afterwards
+ */
+ error = doscheckpath(ip, dp);
+ if (error)
+ goto out;
+ if ((tcnp->cn_flags & SAVESTART) == 0)
+ panic("msdosfs_rename: lost to startdir");
+ error = relookup(tdvp, &tvp, tcnp);
+ if (error)
+ goto out;
+ dp = VTODE(tdvp);
+ xp = tvp ? VTODE(tvp) : NULL;
+ }
+
+ if (xp != NULL) {
+ /*
+ * Target must be empty if a directory and have no links
+ * to it. Also, ensure source and target are compatible
+ * (both directories, or both not directories).
+ */
+ if (xp->de_Attributes & ATTR_DIRECTORY) {
+ if (!dosdirempty(xp)) {
+ error = ENOTEMPTY;
+ goto bad;
+ }
+ if (!doingdirectory) {
+ error = ENOTDIR;
+ goto bad;
+ }
+ cache_purge(tdvp);
+ } else if (doingdirectory) {
+ error = EISDIR;
+ goto bad;
+ }
+ error = removede(dp, xp);
+ if (error)
+ goto bad;
+ vput(tvp);
+ xp = NULL;
+ }
+
+ /*
+ * Convert the filename in tcnp into a dos filename. We copy this
+ * into the denode and directory entry for the destination
+ * file/directory.
+ */
+ error = uniqdosname(VTODE(tdvp), tcnp, toname);
+ if (error)
+ goto abortit;
+
+ /*
+ * Since from wasn't locked at various places above,
+ * have to do a relookup here.
+ */
+ fcnp->cn_flags &= ~MODMASK;
+ fcnp->cn_flags |= LOCKPARENT | LOCKLEAF;
+ if ((fcnp->cn_flags & SAVESTART) == 0)
+ panic("msdosfs_rename: lost from startdir");
+ if (!newparent)
+ VOP_UNLOCK(tdvp, 0, p);
+ (void) relookup(fdvp, &fvp, fcnp);
+ if (fvp == NULL) {
+ /*
+ * From name has disappeared.
+ */
+ if (doingdirectory)
+ panic("rename: lost dir entry");
+ vrele(ap->a_fvp);
+ if (newparent)
+ VOP_UNLOCK(tdvp, 0, p);
+ vrele(tdvp);
+ return 0;
+ }
+ xp = VTODE(fvp);
+ zp = VTODE(fdvp);
+ from_diroffset = zp->de_fndoffset;
+
+ /*
+ * Ensure that the directory entry still exists and has not
+ * changed till now. If the source is a file the entry may
+ * have been unlinked or renamed. In either case there is
+ * no further work to be done. If the source is a directory
+ * then it cannot have been rmdir'ed or renamed; this is
+ * prohibited by the DE_RENAME flag.
+ */
+ if (xp != ip) {
+ if (doingdirectory)
+ panic("rename: lost dir entry");
+ vrele(ap->a_fvp);
+ VOP_UNLOCK(fvp, 0, p);
+ if (newparent)
+ VOP_UNLOCK(fdvp, 0, p);
+ xp = NULL;
+ } else {
+ vrele(fvp);
+ xp = NULL;
+
+ /*
+ * First write a new entry in the destination
+ * directory and mark the entry in the source directory
+ * as deleted. Then move the denode to the correct hash
+ * chain for its new location in the filesystem. And, if
+ * we moved a directory, then update its .. entry to point
+ * to the new parent directory.
+ */
+ bcopy(ip->de_Name, oldname, 11);
+ bcopy(toname, ip->de_Name, 11); /* update denode */
+ dp->de_fndoffset = to_diroffset;
+ dp->de_fndcnt = to_count;
+ error = createde(ip, dp, (struct denode **)0, tcnp);
+ if (error) {
+ bcopy(oldname, ip->de_Name, 11);
+ if (newparent)
+ VOP_UNLOCK(fdvp, 0, p);
+ VOP_UNLOCK(fvp, 0, p);
+ goto bad;
+ }
+ ip->de_refcnt++;
+ zp->de_fndoffset = from_diroffset;
+ error = removede(zp, ip);
+ if (error) {
+ /* XXX should really panic here, fs is corrupt */
+ if (newparent)
+ VOP_UNLOCK(fdvp, 0, p);
+ VOP_UNLOCK(fvp, 0, p);
+ goto bad;
+ }
+ if (!doingdirectory) {
+ error = pcbmap(dp, de_cluster(pmp, to_diroffset), 0,
+ &ip->de_dirclust, 0);
+ if (error) {
+ /* XXX should really panic here, fs is corrupt */
+ if (newparent)
+ VOP_UNLOCK(fdvp, 0, p);
+ VOP_UNLOCK(fvp, 0, p);
+ goto bad;
+ }
+ if (ip->de_dirclust == MSDOSFSROOT)
+ ip->de_diroffset = to_diroffset;
+ else
+ ip->de_diroffset = to_diroffset & pmp->pm_crbomask;
+ }
+ reinsert(ip);
+ if (newparent)
+ VOP_UNLOCK(fdvp, 0, p);
+ }
+
+ /*
+ * If we moved a directory to a new parent directory, then we must
+ * fixup the ".." entry in the moved directory.
+ */
+ if (doingdirectory && newparent) {
+ cn = ip->de_StartCluster;
+ if (cn == MSDOSFSROOT) {
+ /* this should never happen */
+ panic("msdosfs_rename(): updating .. in root directory?");
+ } else
+ bn = cntobn(pmp, cn);
+ error = bread(pmp->pm_devvp, bn, pmp->pm_bpcluster,
+ NOCRED, &bp);
+ if (error) {
+ /* XXX should really panic here, fs is corrupt */
+ brelse(bp);
+ VOP_UNLOCK(fvp, 0, p);
+ goto bad;
+ }
+ dotdotp = (struct direntry *)bp->b_data + 1;
+ putushort(dotdotp->deStartCluster, dp->de_StartCluster);
+ if (FAT32(pmp))
+ putushort(dotdotp->deHighClust, dp->de_StartCluster >> 16);
+ error = bwrite(bp);
+ if (error) {
+ /* XXX should really panic here, fs is corrupt */
+ VOP_UNLOCK(fvp, 0, p);
+ goto bad;
+ }
+ }
+
+ VOP_UNLOCK(fvp, 0, p);
+bad:
+ if (xp)
+ vput(tvp);
+ vput(tdvp);
+out:
+ ip->de_flag &= ~DE_RENAME;
+ vrele(fdvp);
+ vrele(fvp);
+ return (error);
+
+}
+
+static struct {
+ struct direntry dot;
+ struct direntry dotdot;
+} dosdirtemplate = {
+ { ". ", " ", /* the . entry */
+ ATTR_DIRECTORY, /* file attribute */
+ 0, /* reserved */
+ 0, { 0, 0 }, { 0, 0 }, /* create time & date */
+ { 0, 0 }, /* access date */
+ { 0, 0 }, /* high bits of start cluster */
+ { 210, 4 }, { 210, 4 }, /* modify time & date */
+ { 0, 0 }, /* startcluster */
+ { 0, 0, 0, 0 } /* filesize */
+ },
+ { ".. ", " ", /* the .. entry */
+ ATTR_DIRECTORY, /* file attribute */
+ 0, /* reserved */
+ 0, { 0, 0 }, { 0, 0 }, /* create time & date */
+ { 0, 0 }, /* access date */
+ { 0, 0 }, /* high bits of start cluster */
+ { 210, 4 }, { 210, 4 }, /* modify time & date */
+ { 0, 0 }, /* startcluster */
+ { 0, 0, 0, 0 } /* filesize */
+ }
+};
+
+static int
+msdosfs_mkdir(ap)
+ struct vop_mkdir_args /* {
+ struct vnode *a_dvp;
+ struvt vnode **a_vpp;
+ struvt componentname *a_cnp;
+ struct vattr *a_vap;
+ } */ *ap;
+{
+ struct componentname *cnp = ap->a_cnp;
+ struct denode *dep;
+ struct denode *pdep = VTODE(ap->a_dvp);
+ struct direntry *denp;
+ struct msdosfsmount *pmp = pdep->de_pmp;
+ struct buf *bp;
+ u_long newcluster, pcl;
+ int bn;
+ int error;
+ struct denode ndirent;
+ struct timespec ts;
+
+ /*
+ * If this is the root directory and there is no space left we
+ * can't do anything. This is because the root directory can not
+ * change size.
+ */
+ if (pdep->de_StartCluster == MSDOSFSROOT
+ && pdep->de_fndoffset >= pdep->de_FileSize) {
+ error = ENOSPC;
+ goto bad2;
+ }
+
+ /*
+ * Allocate a cluster to hold the about to be created directory.
+ */
+ error = clusteralloc(pmp, 0, 1, CLUST_EOFE, &newcluster, NULL);
+ if (error)
+ goto bad2;
+
+ bzero(&ndirent, sizeof(ndirent));
+ ndirent.de_pmp = pmp;
+ ndirent.de_flag = DE_ACCESS | DE_CREATE | DE_UPDATE;
+ getnanotime(&ts);
+ DETIMES(&ndirent, &ts, &ts, &ts);
+
+ /*
+ * Now fill the cluster with the "." and ".." entries. And write
+ * the cluster to disk. This way it is there for the parent
+ * directory to be pointing at if there were a crash.
+ */
+ bn = cntobn(pmp, newcluster);
+ /* always succeeds */
+ bp = getblk(pmp->pm_devvp, bn, pmp->pm_bpcluster, 0, 0);
+ bzero(bp->b_data, pmp->pm_bpcluster);
+ bcopy(&dosdirtemplate, bp->b_data, sizeof dosdirtemplate);
+ denp = (struct direntry *)bp->b_data;
+ putushort(denp[0].deStartCluster, newcluster);
+ putushort(denp[0].deCDate, ndirent.de_CDate);
+ putushort(denp[0].deCTime, ndirent.de_CTime);
+ denp[0].deCHundredth = ndirent.de_CHun;
+ putushort(denp[0].deADate, ndirent.de_ADate);
+ putushort(denp[0].deMDate, ndirent.de_MDate);
+ putushort(denp[0].deMTime, ndirent.de_MTime);
+ pcl = pdep->de_StartCluster;
+ if (FAT32(pmp) && pcl == pmp->pm_rootdirblk)
+ pcl = 0;
+ putushort(denp[1].deStartCluster, pcl);
+ putushort(denp[1].deCDate, ndirent.de_CDate);
+ putushort(denp[1].deCTime, ndirent.de_CTime);
+ denp[1].deCHundredth = ndirent.de_CHun;
+ putushort(denp[1].deADate, ndirent.de_ADate);
+ putushort(denp[1].deMDate, ndirent.de_MDate);
+ putushort(denp[1].deMTime, ndirent.de_MTime);
+ if (FAT32(pmp)) {
+ putushort(denp[0].deHighClust, newcluster >> 16);
+ putushort(denp[1].deHighClust, pdep->de_StartCluster >> 16);
+ }
+
+ error = bwrite(bp);
+ if (error)
+ goto bad;
+
+ /*
+ * Now build up a directory entry pointing to the newly allocated
+ * cluster. This will be written to an empty slot in the parent
+ * directory.
+ */
+#ifdef DIAGNOSTIC
+ if ((cnp->cn_flags & HASBUF) == 0)
+ panic("msdosfs_mkdir: no name");
+#endif
+ error = uniqdosname(pdep, cnp, ndirent.de_Name);
+ if (error)
+ goto bad;
+
+ ndirent.de_Attributes = ATTR_DIRECTORY;
+ ndirent.de_LowerCase = 0;
+ ndirent.de_StartCluster = newcluster;
+ ndirent.de_FileSize = 0;
+ ndirent.de_dev = pdep->de_dev;
+ ndirent.de_devvp = pdep->de_devvp;
+ error = createde(&ndirent, pdep, &dep, cnp);
+ if (error)
+ goto bad;
+ if ((cnp->cn_flags & SAVESTART) == 0)
+ zfree(namei_zone, cnp->cn_pnbuf);
+ *ap->a_vpp = DETOV(dep);
+ return (0);
+
+bad:
+ clusterfree(pmp, newcluster, NULL);
+bad2:
+ zfree(namei_zone, cnp->cn_pnbuf);
+ return (error);
+}
+
+static int
+msdosfs_rmdir(ap)
+ struct vop_rmdir_args /* {
+ struct vnode *a_dvp;
+ struct vnode *a_vp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct vnode *dvp = ap->a_dvp;
+ register struct componentname *cnp = ap->a_cnp;
+ register struct denode *ip, *dp;
+ struct proc *p = cnp->cn_proc;
+ int error;
+
+ ip = VTODE(vp);
+ dp = VTODE(dvp);
+
+ /*
+ * Verify the directory is empty (and valid).
+ * (Rmdir ".." won't be valid since
+ * ".." will contain a reference to
+ * the current directory and thus be
+ * non-empty.)
+ */
+ error = 0;
+ if (!dosdirempty(ip) || ip->de_flag & DE_RENAME) {
+ error = ENOTEMPTY;
+ goto out;
+ }
+ /*
+ * Delete the entry from the directory. For dos filesystems this
+ * gets rid of the directory entry on disk, the in memory copy
+ * still exists but the de_refcnt is <= 0. This prevents it from
+ * being found by deget(). When the vput() on dep is done we give
+ * up access and eventually msdosfs_reclaim() will be called which
+ * will remove it from the denode cache.
+ */
+ error = removede(dp, ip);
+ if (error)
+ goto out;
+ /*
+ * This is where we decrement the link count in the parent
+ * directory. Since dos filesystems don't do this we just purge
+ * the name cache.
+ */
+ cache_purge(dvp);
+ VOP_UNLOCK(dvp, 0, p);
+ /*
+ * Truncate the directory that is being deleted.
+ */
+ error = detrunc(ip, (u_long)0, IO_SYNC, cnp->cn_cred, p);
+ cache_purge(vp);
+
+ vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p);
+out:
+ return (error);
+}
+
+/*
+ * DOS filesystems don't know what symlinks are.
+ */
+static int
+msdosfs_symlink(ap)
+ struct vop_symlink_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ char *a_target;
+ } */ *ap;
+{
+ zfree(namei_zone, ap->a_cnp->cn_pnbuf);
+ /* VOP_ABORTOP(ap->a_dvp, ap->a_cnp); ??? */
+ return (EOPNOTSUPP);
+}
+
+static int
+msdosfs_readdir(ap)
+ struct vop_readdir_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ int *a_eofflag;
+ int *a_ncookies;
+ u_long **a_cookies;
+ } */ *ap;
+{
+ int error = 0;
+ int diff;
+ long n;
+ int blsize;
+ long on;
+ u_long cn;
+ u_long fileno;
+ u_long dirsperblk;
+ long bias = 0;
+ daddr_t bn, lbn;
+ struct buf *bp;
+ struct denode *dep = VTODE(ap->a_vp);
+ struct msdosfsmount *pmp = dep->de_pmp;
+ struct direntry *dentp;
+ struct dirent dirbuf;
+ struct uio *uio = ap->a_uio;
+ u_long *cookies = NULL;
+ int ncookies = 0;
+ off_t offset, off;
+ int chksum = -1;
+
+#ifdef MSDOSFS_DEBUG
+ printf("msdosfs_readdir(): vp %p, uio %p, cred %p, eofflagp %p\n",
+ ap->a_vp, uio, ap->a_cred, ap->a_eofflag);
+#endif
+
+ /*
+ * msdosfs_readdir() won't operate properly on regular files since
+ * it does i/o only with the the filesystem vnode, and hence can
+ * retrieve the wrong block from the buffer cache for a plain file.
+ * So, fail attempts to readdir() on a plain file.
+ */
+ if ((dep->de_Attributes & ATTR_DIRECTORY) == 0)
+ return (ENOTDIR);
+
+ /*
+ * To be safe, initialize dirbuf
+ */
+ bzero(dirbuf.d_name, sizeof(dirbuf.d_name));
+
+ /*
+ * If the user buffer is smaller than the size of one dos directory
+ * entry or the file offset is not a multiple of the size of a
+ * directory entry, then we fail the read.
+ */
+ off = offset = uio->uio_offset;
+ if (uio->uio_resid < sizeof(struct direntry) ||
+ (offset & (sizeof(struct direntry) - 1)))
+ return (EINVAL);
+
+ if (ap->a_ncookies) {
+ ncookies = uio->uio_resid / 16;
+ MALLOC(cookies, u_long *, ncookies * sizeof(u_long), M_TEMP,
+ M_WAITOK);
+ *ap->a_cookies = cookies;
+ *ap->a_ncookies = ncookies;
+ }
+
+ dirsperblk = pmp->pm_BytesPerSec / sizeof(struct direntry);
+
+ /*
+ * If they are reading from the root directory then, we simulate
+ * the . and .. entries since these don't exist in the root
+ * directory. We also set the offset bias to make up for having to
+ * simulate these entries. By this I mean that at file offset 64 we
+ * read the first entry in the root directory that lives on disk.
+ */
+ if (dep->de_StartCluster == MSDOSFSROOT
+ || (FAT32(pmp) && dep->de_StartCluster == pmp->pm_rootdirblk)) {
+#if 0
+ printf("msdosfs_readdir(): going after . or .. in root dir, offset %d\n",
+ offset);
+#endif
+ bias = 2 * sizeof(struct direntry);
+ if (offset < bias) {
+ for (n = (int)offset / sizeof(struct direntry);
+ n < 2; n++) {
+ if (FAT32(pmp))
+ dirbuf.d_fileno = cntobn(pmp,
+ pmp->pm_rootdirblk)
+ * dirsperblk;
+ else
+ dirbuf.d_fileno = 1;
+ dirbuf.d_type = DT_DIR;
+ switch (n) {
+ case 0:
+ dirbuf.d_namlen = 1;
+ strcpy(dirbuf.d_name, ".");
+ break;
+ case 1:
+ dirbuf.d_namlen = 2;
+ strcpy(dirbuf.d_name, "..");
+ break;
+ }
+ dirbuf.d_reclen = GENERIC_DIRSIZ(&dirbuf);
+ if (uio->uio_resid < dirbuf.d_reclen)
+ goto out;
+ error = uiomove((caddr_t) &dirbuf,
+ dirbuf.d_reclen, uio);
+ if (error)
+ goto out;
+ offset += sizeof(struct direntry);
+ off = offset;
+ if (cookies) {
+ *cookies++ = offset;
+ if (--ncookies <= 0)
+ goto out;
+ }
+ }
+ }
+ }
+
+ off = offset;
+ while (uio->uio_resid > 0) {
+ lbn = de_cluster(pmp, offset - bias);
+ on = (offset - bias) & pmp->pm_crbomask;
+ n = min(pmp->pm_bpcluster - on, uio->uio_resid);
+ diff = dep->de_FileSize - (offset - bias);
+ if (diff <= 0)
+ break;
+ n = min(n, diff);
+ error = pcbmap(dep, lbn, &bn, &cn, &blsize);
+ if (error)
+ break;
+ error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ n = min(n, blsize - bp->b_resid);
+
+ /*
+ * Convert from dos directory entries to fs-independent
+ * directory entries.
+ */
+ for (dentp = (struct direntry *)(bp->b_data + on);
+ (char *)dentp < bp->b_data + on + n;
+ dentp++, offset += sizeof(struct direntry)) {
+#if 0
+ printf("rd: dentp %08x prev %08x crnt %08x deName %02x attr %02x\n",
+ dentp, prev, crnt, dentp->deName[0], dentp->deAttributes);
+#endif
+ /*
+ * If this is an unused entry, we can stop.
+ */
+ if (dentp->deName[0] == SLOT_EMPTY) {
+ brelse(bp);
+ goto out;
+ }
+ /*
+ * Skip deleted entries.
+ */
+ if (dentp->deName[0] == SLOT_DELETED) {
+ chksum = -1;
+ continue;
+ }
+
+ /*
+ * Handle Win95 long directory entries
+ */
+ if (dentp->deAttributes == ATTR_WIN95) {
+ if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME)
+ continue;
+ chksum = win2unixfn((struct winentry *)dentp,
+ &dirbuf, chksum,
+ pmp->pm_flags & MSDOSFSMNT_U2WTABLE,
+ pmp->pm_u2w);
+ continue;
+ }
+
+ /*
+ * Skip volume labels
+ */
+ if (dentp->deAttributes & ATTR_VOLUME) {
+ chksum = -1;
+ continue;
+ }
+ /*
+ * This computation of d_fileno must match
+ * the computation of va_fileid in
+ * msdosfs_getattr.
+ */
+ if (dentp->deAttributes & ATTR_DIRECTORY) {
+ fileno = getushort(dentp->deStartCluster);
+ if (FAT32(pmp))
+ fileno |= getushort(dentp->deHighClust) << 16;
+ /* if this is the root directory */
+ if (fileno == MSDOSFSROOT)
+ if (FAT32(pmp))
+ fileno = cntobn(pmp,
+ pmp->pm_rootdirblk)
+ * dirsperblk;
+ else
+ fileno = 1;
+ else
+ fileno = cntobn(pmp, fileno) * dirsperblk;
+ dirbuf.d_fileno = fileno;
+ dirbuf.d_type = DT_DIR;
+ } else {
+ dirbuf.d_fileno = offset / sizeof(struct direntry);
+ dirbuf.d_type = DT_REG;
+ }
+ if (chksum != winChksum(dentp->deName))
+ dirbuf.d_namlen = dos2unixfn(dentp->deName,
+ (u_char *)dirbuf.d_name,
+ dentp->deLowerCase |
+ ((pmp->pm_flags & MSDOSFSMNT_SHORTNAME) ?
+ (LCASE_BASE | LCASE_EXT) : 0),
+ pmp->pm_flags & MSDOSFSMNT_U2WTABLE,
+ pmp->pm_d2u,
+ pmp->pm_flags & MSDOSFSMNT_ULTABLE,
+ pmp->pm_ul);
+ else
+ dirbuf.d_name[dirbuf.d_namlen] = 0;
+ chksum = -1;
+ dirbuf.d_reclen = GENERIC_DIRSIZ(&dirbuf);
+ if (uio->uio_resid < dirbuf.d_reclen) {
+ brelse(bp);
+ goto out;
+ }
+ error = uiomove((caddr_t) &dirbuf,
+ dirbuf.d_reclen, uio);
+ if (error) {
+ brelse(bp);
+ goto out;
+ }
+ if (cookies) {
+ *cookies++ = offset + sizeof(struct direntry);
+ if (--ncookies <= 0) {
+ brelse(bp);
+ goto out;
+ }
+ }
+ off = offset + sizeof(struct direntry);
+ }
+ brelse(bp);
+ }
+out:
+ /* Subtract unused cookies */
+ if (ap->a_ncookies)
+ *ap->a_ncookies -= ncookies;
+
+ uio->uio_offset = off;
+
+ /*
+ * Set the eofflag (NFS uses it)
+ */
+ if (ap->a_eofflag)
+ if (dep->de_FileSize - (offset - bias) <= 0)
+ *ap->a_eofflag = 1;
+ else
+ *ap->a_eofflag = 0;
+
+ return (error);
+}
+
+static int
+msdosfs_abortop(ap)
+ struct vop_abortop_args /* {
+ struct vnode *a_dvp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
+ zfree(namei_zone, ap->a_cnp->cn_pnbuf);
+ return (0);
+}
+
+/*
+ * vp - address of vnode file the file
+ * bn - which cluster we are interested in mapping to a filesystem block number.
+ * vpp - returns the vnode for the block special file holding the filesystem
+ * containing the file of interest
+ * bnp - address of where to return the filesystem relative block number
+ */
+static int
+msdosfs_bmap(ap)
+ struct vop_bmap_args /* {
+ struct vnode *a_vp;
+ daddr_t a_bn;
+ struct vnode **a_vpp;
+ daddr_t *a_bnp;
+ int *a_runp;
+ int *a_runb;
+ } */ *ap;
+{
+ struct denode *dep = VTODE(ap->a_vp);
+
+ if (ap->a_vpp != NULL)
+ *ap->a_vpp = dep->de_devvp;
+ if (ap->a_bnp == NULL)
+ return (0);
+ if (ap->a_runp) {
+ /*
+ * Sequential clusters should be counted here.
+ */
+ *ap->a_runp = 0;
+ }
+ if (ap->a_runb) {
+ *ap->a_runb = 0;
+ }
+ return (pcbmap(dep, ap->a_bn, ap->a_bnp, 0, 0));
+}
+
+static int
+msdosfs_strategy(ap)
+ struct vop_strategy_args /* {
+ struct vnode *a_vp;
+ struct buf *a_bp;
+ } */ *ap;
+{
+ struct buf *bp = ap->a_bp;
+ struct denode *dep = VTODE(bp->b_vp);
+ struct vnode *vp;
+ int error = 0;
+
+ if (bp->b_vp->v_type == VBLK || bp->b_vp->v_type == VCHR)
+ panic("msdosfs_strategy: spec");
+ /*
+ * If we don't already know the filesystem relative block number
+ * then get it using pcbmap(). If pcbmap() returns the block
+ * number as -1 then we've got a hole in the file. DOS filesystems
+ * don't allow files with holes, so we shouldn't ever see this.
+ */
+ if (bp->b_blkno == bp->b_lblkno) {
+ error = pcbmap(dep, bp->b_lblkno, &bp->b_blkno, 0, 0);
+ if (error) {
+ bp->b_error = error;
+ bp->b_flags |= B_ERROR;
+ biodone(bp);
+ return (error);
+ }
+ if ((long)bp->b_blkno == -1)
+ vfs_bio_clrbuf(bp);
+ }
+ if (bp->b_blkno == -1) {
+ biodone(bp);
+ return (0);
+ }
+ /*
+ * Read/write the block from/to the disk that contains the desired
+ * file block.
+ */
+ vp = dep->de_devvp;
+ bp->b_dev = vp->v_rdev;
+ VOP_STRATEGY(vp, bp);
+ return (0);
+}
+
+static int
+msdosfs_print(ap)
+ struct vop_print_args /* {
+ struct vnode *vp;
+ } */ *ap;
+{
+ struct denode *dep = VTODE(ap->a_vp);
+
+ printf(
+ "tag VT_MSDOSFS, startcluster %lu, dircluster %lu, diroffset %lu ",
+ dep->de_StartCluster, dep->de_dirclust, dep->de_diroffset);
+ printf(" dev %d, %d", major(dep->de_dev), minor(dep->de_dev));
+ lockmgr_printinfo(&dep->de_lock);
+ printf("\n");
+ return (0);
+}
+
+static int
+msdosfs_pathconf(ap)
+ struct vop_pathconf_args /* {
+ struct vnode *a_vp;
+ int a_name;
+ int *a_retval;
+ } */ *ap;
+{
+ struct msdosfsmount *pmp = VTODE(ap->a_vp)->de_pmp;
+
+ switch (ap->a_name) {
+ case _PC_LINK_MAX:
+ *ap->a_retval = 1;
+ return (0);
+ case _PC_NAME_MAX:
+ *ap->a_retval = pmp->pm_flags & MSDOSFSMNT_LONGNAME ? WIN_MAXLEN : 12;
+ return (0);
+ case _PC_PATH_MAX:
+ *ap->a_retval = PATH_MAX;
+ return (0);
+ case _PC_CHOWN_RESTRICTED:
+ *ap->a_retval = 1;
+ return (0);
+ case _PC_NO_TRUNC:
+ *ap->a_retval = 0;
+ return (0);
+ default:
+ return (EINVAL);
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * get page routine
+ *
+ * XXX By default, wimp out... note that a_offset is ignored (and always
+ * XXX has been).
+ */
+int
+msdosfs_getpages(ap)
+ struct vop_getpages_args *ap;
+{
+ return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count,
+ ap->a_reqpage);
+}
+
+/*
+ * put page routine
+ *
+ * XXX By default, wimp out... note that a_offset is ignored (and always
+ * XXX has been).
+ */
+int
+msdosfs_putpages(ap)
+ struct vop_putpages_args *ap;
+{
+ return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count,
+ ap->a_sync, ap->a_rtvals);
+}
+
+/* Global vfs data structures for msdosfs */
+vop_t **msdosfs_vnodeop_p;
+static struct vnodeopv_entry_desc msdosfs_vnodeop_entries[] = {
+ { &vop_default_desc, (vop_t *) vop_defaultop },
+ { &vop_abortop_desc, (vop_t *) msdosfs_abortop },
+ { &vop_access_desc, (vop_t *) msdosfs_access },
+ { &vop_bmap_desc, (vop_t *) msdosfs_bmap },
+ { &vop_cachedlookup_desc, (vop_t *) msdosfs_lookup },
+ { &vop_close_desc, (vop_t *) msdosfs_close },
+ { &vop_create_desc, (vop_t *) msdosfs_create },
+ { &vop_fsync_desc, (vop_t *) msdosfs_fsync },
+ { &vop_getattr_desc, (vop_t *) msdosfs_getattr },
+ { &vop_inactive_desc, (vop_t *) msdosfs_inactive },
+ { &vop_islocked_desc, (vop_t *) vop_stdislocked },
+ { &vop_link_desc, (vop_t *) msdosfs_link },
+ { &vop_lock_desc, (vop_t *) vop_stdlock },
+ { &vop_lookup_desc, (vop_t *) vfs_cache_lookup },
+ { &vop_mkdir_desc, (vop_t *) msdosfs_mkdir },
+ { &vop_mknod_desc, (vop_t *) msdosfs_mknod },
+ { &vop_pathconf_desc, (vop_t *) msdosfs_pathconf },
+ { &vop_print_desc, (vop_t *) msdosfs_print },
+ { &vop_read_desc, (vop_t *) msdosfs_read },
+ { &vop_readdir_desc, (vop_t *) msdosfs_readdir },
+ { &vop_reclaim_desc, (vop_t *) msdosfs_reclaim },
+ { &vop_remove_desc, (vop_t *) msdosfs_remove },
+ { &vop_rename_desc, (vop_t *) msdosfs_rename },
+ { &vop_rmdir_desc, (vop_t *) msdosfs_rmdir },
+ { &vop_setattr_desc, (vop_t *) msdosfs_setattr },
+ { &vop_strategy_desc, (vop_t *) msdosfs_strategy },
+ { &vop_symlink_desc, (vop_t *) msdosfs_symlink },
+ { &vop_unlock_desc, (vop_t *) vop_stdunlock },
+ { &vop_write_desc, (vop_t *) msdosfs_write },
+ { &vop_getpages_desc, (vop_t *) msdosfs_getpages },
+ { &vop_putpages_desc, (vop_t *) msdosfs_putpages },
+ { NULL, NULL }
+};
+static struct vnodeopv_desc msdosfs_vnodeop_opv_desc =
+ { &msdosfs_vnodeop_p, msdosfs_vnodeop_entries };
+
+VNODEOP_SET(msdosfs_vnodeop_opv_desc);
diff --git a/sys/fs/msdosfs/msdosfsmount.h b/sys/fs/msdosfs/msdosfsmount.h
new file mode 100644
index 0000000..31a2a67
--- /dev/null
+++ b/sys/fs/msdosfs/msdosfsmount.h
@@ -0,0 +1,239 @@
+/* $Id: msdosfsmount.h,v 1.15 1998/02/23 09:39:29 ache Exp $ */
+/* $NetBSD: msdosfsmount.h,v 1.17 1997/11/17 15:37:07 ws Exp $ */
+
+/*-
+ * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
+ * Copyright (C) 1994, 1995, 1997 TooLs GmbH.
+ * All rights reserved.
+ * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by TooLs GmbH.
+ * 4. The name of TooLs GmbH may not be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+ * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+/*
+ * Written by Paul Popelka (paulp@uts.amdahl.com)
+ *
+ * You can do anything you want with this software, just don't say you wrote
+ * it, and don't remove this notice.
+ *
+ * This software is provided "as is".
+ *
+ * The author supplies this software to be publicly redistributed on the
+ * understanding that the author is not responsible for the correct
+ * functioning of this software in any circumstances and is not liable for
+ * any damages caused by this software.
+ *
+ * October 1992
+ */
+
+#ifndef _MSDOSFS_MSDOSFSMOUNT_H_
+#define _MSDOSFS_MSDOSFSMOUNT_H_
+
+#ifdef KERNEL
+
+#ifdef MALLOC_DECLARE
+MALLOC_DECLARE(M_MSDOSFSMNT);
+#endif
+
+/*
+ * Layout of the mount control block for a msdos file system.
+ */
+struct msdosfsmount {
+ struct mount *pm_mountp;/* vfs mount struct for this fs */
+ dev_t pm_dev; /* block special device mounted */
+ uid_t pm_uid; /* uid to set as owner of the files */
+ gid_t pm_gid; /* gid to set as owner of the files */
+ mode_t pm_mask; /* mask to and with file protection bits */
+ struct vnode *pm_devvp; /* vnode for block device mntd */
+ struct bpb50 pm_bpb; /* BIOS parameter blk for this fs */
+ u_long pm_FATsecs; /* actual number of fat sectors */
+ u_long pm_fatblk; /* block # of first FAT */
+ u_long pm_rootdirblk; /* block # (cluster # for FAT32) of root directory number */
+ u_long pm_rootdirsize; /* size in blocks (not clusters) */
+ u_long pm_firstcluster; /* block number of first cluster */
+ u_long pm_nmbrofclusters; /* # of clusters in filesystem */
+ u_long pm_maxcluster; /* maximum cluster number */
+ u_long pm_freeclustercount; /* number of free clusters */
+ u_long pm_cnshift; /* shift file offset right this amount to get a cluster number */
+ u_long pm_crbomask; /* and a file offset with this mask to get cluster rel offset */
+ u_long pm_bnshift; /* shift file offset right this amount to get a block number */
+ u_long pm_bpcluster; /* bytes per cluster */
+ u_long pm_fmod; /* ~0 if fs is modified, this can rollover to 0 */
+ u_long pm_fatblocksize; /* size of fat blocks in bytes */
+ u_long pm_fatblocksec; /* size of fat blocks in sectors */
+ u_long pm_fatsize; /* size of fat in bytes */
+ u_long pm_fatmask; /* mask to use for fat numbers */
+ u_long pm_fsinfo; /* fsinfo block number */
+ u_long pm_nxtfree; /* next free cluster in fsinfo block */
+ u_int pm_fatmult; /* these 2 values are used in fat */
+ u_int pm_fatdiv; /* offset computation */
+ u_int pm_curfat; /* current fat for FAT32 (0 otherwise) */
+ u_int *pm_inusemap; /* ptr to bitmap of in-use clusters */
+ u_int pm_flags; /* see below */
+ struct netexport pm_export; /* export information */
+ u_int16_t pm_u2w[128]; /* Local->Unicode table */
+ u_int8_t pm_ul[128]; /* Local upper->lower table */
+ u_int8_t pm_lu[128]; /* Local lower->upper table */
+ u_int8_t pm_d2u[128]; /* DOS->local table */
+ u_int8_t pm_u2d[128]; /* Local->DOS table */
+};
+/* Byte offset in FAT on filesystem pmp, cluster cn */
+#define FATOFS(pmp, cn) ((cn) * (pmp)->pm_fatmult / (pmp)->pm_fatdiv)
+
+
+#define VFSTOMSDOSFS(mp) ((struct msdosfsmount *)mp->mnt_data)
+
+/* Number of bits in one pm_inusemap item: */
+#define N_INUSEBITS (8 * sizeof(u_int))
+
+/*
+ * Shorthand for fields in the bpb contained in the msdosfsmount structure.
+ */
+#define pm_BytesPerSec pm_bpb.bpbBytesPerSec
+#define pm_ResSectors pm_bpb.bpbResSectors
+#define pm_FATs pm_bpb.bpbFATs
+#define pm_RootDirEnts pm_bpb.bpbRootDirEnts
+#define pm_Sectors pm_bpb.bpbSectors
+#define pm_Media pm_bpb.bpbMedia
+#define pm_SecPerTrack pm_bpb.bpbSecPerTrack
+#define pm_Heads pm_bpb.bpbHeads
+#define pm_HiddenSects pm_bpb.bpbHiddenSecs
+#define pm_HugeSectors pm_bpb.bpbHugeSectors
+
+/*
+ * Convert pointer to buffer -> pointer to direntry
+ */
+#define bptoep(pmp, bp, dirofs) \
+ ((struct direntry *)(((bp)->b_data) \
+ + ((dirofs) & (pmp)->pm_crbomask)))
+
+/*
+ * Convert block number to cluster number
+ */
+#define de_bn2cn(pmp, bn) \
+ ((bn) >> ((pmp)->pm_cnshift - (pmp)->pm_bnshift))
+
+/*
+ * Convert cluster number to block number
+ */
+#define de_cn2bn(pmp, cn) \
+ ((cn) << ((pmp)->pm_cnshift - (pmp)->pm_bnshift))
+
+/*
+ * Convert file offset to cluster number
+ */
+#define de_cluster(pmp, off) \
+ ((off) >> (pmp)->pm_cnshift)
+
+/*
+ * Clusters required to hold size bytes
+ */
+#define de_clcount(pmp, size) \
+ (((size) + (pmp)->pm_bpcluster - 1) >> (pmp)->pm_cnshift)
+
+/*
+ * Convert file offset to block number
+ */
+#define de_blk(pmp, off) \
+ (de_cn2bn(pmp, de_cluster((pmp), (off))))
+
+/*
+ * Convert cluster number to file offset
+ */
+#define de_cn2off(pmp, cn) \
+ ((cn) << (pmp)->pm_cnshift)
+
+/*
+ * Convert block number to file offset
+ */
+#define de_bn2off(pmp, bn) \
+ ((bn) << (pmp)->pm_bnshift)
+/*
+ * Map a cluster number into a filesystem relative block number.
+ */
+#define cntobn(pmp, cn) \
+ (de_cn2bn((pmp), (cn)-CLUST_FIRST) + (pmp)->pm_firstcluster)
+
+/*
+ * Calculate block number for directory entry in root dir, offset dirofs
+ */
+#define roottobn(pmp, dirofs) \
+ (de_blk((pmp), (dirofs)) + (pmp)->pm_rootdirblk)
+
+/*
+ * Calculate block number for directory entry at cluster dirclu, offset
+ * dirofs
+ */
+#define detobn(pmp, dirclu, dirofs) \
+ ((dirclu) == MSDOSFSROOT \
+ ? roottobn((pmp), (dirofs)) \
+ : cntobn((pmp), (dirclu)))
+
+int msdosfs_init __P((struct vfsconf *vfsp));
+int msdosfs_mountroot __P((void));
+
+#endif /* KERNEL */
+
+/*
+ * Arguments to mount MSDOS filesystems.
+ */
+struct msdosfs_args {
+ char *fspec; /* blocks special holding the fs to mount */
+ struct export_args export; /* network export information */
+ uid_t uid; /* uid that owns msdosfs files */
+ gid_t gid; /* gid that owns msdosfs files */
+ mode_t mask; /* mask to be applied for msdosfs perms */
+ int flags; /* see below */
+ int magic; /* version number */
+ u_int16_t u2w[128]; /* Local->Unicode table */
+ u_int8_t ul[128]; /* Local upper->lower table */
+ u_int8_t lu[128]; /* Local lower->upper table */
+ u_int8_t d2u[128]; /* DOS->local table */
+ u_int8_t u2d[128]; /* Local->DOS table */
+};
+
+/*
+ * Msdosfs mount options:
+ */
+#define MSDOSFSMNT_SHORTNAME 1 /* Force old DOS short names only */
+#define MSDOSFSMNT_LONGNAME 2 /* Force Win'95 long names */
+#define MSDOSFSMNT_NOWIN95 4 /* Completely ignore Win95 entries */
+#ifndef __FreeBSD__
+#define MSDOSFSMNT_GEMDOSFS 8 /* This is a gemdos-flavour */
+#endif
+#define MSDOSFSMNT_U2WTABLE 0x10 /* Local->Unicode and local<->DOS */
+ /* tables loaded */
+#define MSDOSFSMNT_ULTABLE 0x20 /* Local upper<->lower table loaded */
+/* All flags above: */
+#define MSDOSFSMNT_MNTOPT \
+ (MSDOSFSMNT_SHORTNAME|MSDOSFSMNT_LONGNAME|MSDOSFSMNT_NOWIN95 \
+ /*|MSDOSFSMNT_GEMDOSFS*/|MSDOSFSMNT_U2WTABLE|MSDOSFSMNT_ULTABLE)
+#define MSDOSFSMNT_RONLY 0x80000000 /* mounted read-only */
+#define MSDOSFSMNT_WAITONFAT 0x40000000 /* mounted synchronous */
+#define MSDOSFS_FATMIRROR 0x20000000 /* FAT is mirrored */
+
+#define MSDOSFS_ARGSMAGIC 0xe4eff300
+
+#endif /* !_MSDOSFS_MSDOSFSMOUNT_H_ */
diff --git a/sys/fs/nullfs/null.h b/sys/fs/nullfs/null.h
new file mode 100644
index 0000000..7d46a11
--- /dev/null
+++ b/sys/fs/nullfs/null.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)null.h 8.3 (Berkeley) 8/20/94
+ *
+ * $Id: null.h,v 1.7 1997/05/25 04:50:02 peter Exp $
+ */
+
+struct null_args {
+ char *target; /* Target of loopback */
+};
+
+struct null_mount {
+ struct mount *nullm_vfs;
+ struct vnode *nullm_rootvp; /* Reference to root null_node */
+};
+
+#ifdef KERNEL
+/*
+ * A cache of vnode references
+ */
+struct null_node {
+ LIST_ENTRY(null_node) null_hash; /* Hash list */
+ struct vnode *null_lowervp; /* VREFed once */
+ struct vnode *null_vnode; /* Back pointer */
+};
+
+extern int nullfs_init __P((struct vfsconf *vfsp));
+extern int null_node_create __P((struct mount *mp, struct vnode *target, struct vnode **vpp));
+
+#define MOUNTTONULLMOUNT(mp) ((struct null_mount *)((mp)->mnt_data))
+#define VTONULL(vp) ((struct null_node *)(vp)->v_data)
+#define NULLTOV(xp) ((xp)->null_vnode)
+#ifdef NULLFS_DIAGNOSTIC
+extern struct vnode *null_checkvp __P((struct vnode *vp, char *fil, int lno));
+#define NULLVPTOLOWERVP(vp) null_checkvp((vp), __FILE__, __LINE__)
+#else
+#define NULLVPTOLOWERVP(vp) (VTONULL(vp)->null_lowervp)
+#endif
+
+extern int null_bypass __P((struct vop_generic_args *ap));
+
+extern vop_t **null_vnodeop_p;
+#endif /* KERNEL */
diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c
new file mode 100644
index 0000000..603f418
--- /dev/null
+++ b/sys/fs/nullfs/null_subr.c
@@ -0,0 +1,303 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)null_subr.c 8.7 (Berkeley) 5/14/95
+ *
+ * $Id: null_subr.c,v 1.18 1998/07/15 02:32:18 bde Exp $
+ */
+
+#include "opt_debug_nullfs.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/malloc.h>
+#include <miscfs/nullfs/null.h>
+
+#define LOG2_SIZEVNODE 7 /* log2(sizeof struct vnode) */
+#define NNULLNODECACHE 16
+
+/*
+ * Null layer cache:
+ * Each cache entry holds a reference to the lower vnode
+ * along with a pointer to the alias vnode. When an
+ * entry is added the lower vnode is VREF'd. When the
+ * alias is removed the lower vnode is vrele'd.
+ */
+
+#define NULL_NHASH(vp) \
+ (&null_node_hashtbl[(((uintptr_t)vp)>>LOG2_SIZEVNODE) & null_node_hash])
+static LIST_HEAD(null_node_hashhead, null_node) *null_node_hashtbl;
+static u_long null_node_hash;
+
+static int null_node_alloc __P((struct mount *mp, struct vnode *lowervp,
+ struct vnode **vpp));
+static struct vnode *
+ null_node_find __P((struct mount *mp, struct vnode *lowervp));
+
+/*
+ * Initialise cache headers
+ */
+int
+nullfs_init(vfsp)
+ struct vfsconf *vfsp;
+{
+
+#ifdef NULLFS_DIAGNOSTIC
+ printf("nullfs_init\n"); /* printed during system boot */
+#endif
+ null_node_hashtbl = hashinit(NNULLNODECACHE, M_CACHE, &null_node_hash);
+ return (0);
+}
+
+/*
+ * Return a VREF'ed alias for lower vnode if already exists, else 0.
+ */
+static struct vnode *
+null_node_find(mp, lowervp)
+ struct mount *mp;
+ struct vnode *lowervp;
+{
+ struct proc *p = curproc; /* XXX */
+ struct null_node_hashhead *hd;
+ struct null_node *a;
+ struct vnode *vp;
+
+ /*
+ * Find hash base, and then search the (two-way) linked
+ * list looking for a null_node structure which is referencing
+ * the lower vnode. If found, the increment the null_node
+ * reference count (but NOT the lower vnode's VREF counter).
+ */
+ hd = NULL_NHASH(lowervp);
+loop:
+ for (a = hd->lh_first; a != 0; a = a->null_hash.le_next) {
+ if (a->null_lowervp == lowervp && NULLTOV(a)->v_mount == mp) {
+ vp = NULLTOV(a);
+ /*
+ * We need vget for the VXLOCK
+ * stuff, but we don't want to lock
+ * the lower node.
+ */
+ if (vget(vp, 0, p)) {
+ printf ("null_node_find: vget failed.\n");
+ goto loop;
+ };
+ return (vp);
+ }
+ }
+
+ return NULLVP;
+}
+
+
+/*
+ * Make a new null_node node.
+ * Vp is the alias vnode, lofsvp is the lower vnode.
+ * Maintain a reference to (lowervp).
+ */
+static int
+null_node_alloc(mp, lowervp, vpp)
+ struct mount *mp;
+ struct vnode *lowervp;
+ struct vnode **vpp;
+{
+ struct null_node_hashhead *hd;
+ struct null_node *xp;
+ struct vnode *othervp, *vp;
+ int error;
+
+ /*
+ * Do the MALLOC before the getnewvnode since doing so afterward
+ * might cause a bogus v_data pointer to get dereferenced
+ * elsewhere if MALLOC should block.
+ */
+ MALLOC(xp, struct null_node *, sizeof(struct null_node), M_TEMP, M_WAITOK);
+
+ error = getnewvnode(VT_NULL, mp, null_vnodeop_p, vpp);
+ if (error) {
+ FREE(xp, M_TEMP);
+ return (error);
+ }
+ vp = *vpp;
+
+ vp->v_type = lowervp->v_type;
+ xp->null_vnode = vp;
+ vp->v_data = xp;
+ xp->null_lowervp = lowervp;
+ /*
+ * Before we insert our new node onto the hash chains,
+ * check to see if someone else has beaten us to it.
+ * (We could have slept in MALLOC.)
+ */
+ othervp = null_node_find(mp, lowervp);
+ if (othervp) {
+ FREE(xp, M_TEMP);
+ vp->v_type = VBAD; /* node is discarded */
+ vp->v_usecount = 0; /* XXX */
+ *vpp = othervp;
+ return 0;
+ };
+ VREF(lowervp); /* Extra VREF will be vrele'd in null_node_create */
+ hd = NULL_NHASH(lowervp);
+ LIST_INSERT_HEAD(hd, xp, null_hash);
+ return 0;
+}
+
+
+/*
+ * Try to find an existing null_node vnode refering
+ * to it, otherwise make a new null_node vnode which
+ * contains a reference to the lower vnode.
+ */
+int
+null_node_create(mp, lowervp, newvpp)
+ struct mount *mp;
+ struct vnode *lowervp;
+ struct vnode **newvpp;
+{
+ struct vnode *aliasvp;
+
+ aliasvp = null_node_find(mp, lowervp);
+ if (aliasvp) {
+ /*
+ * null_node_find has taken another reference
+ * to the alias vnode.
+ */
+#ifdef NULLFS_DIAGNOSTIC
+ vprint("null_node_create: exists", aliasvp);
+#endif
+ /* VREF(aliasvp); --- done in null_node_find */
+ } else {
+ int error;
+
+ /*
+ * Get new vnode.
+ */
+#ifdef NULLFS_DIAGNOSTIC
+ printf("null_node_create: create new alias vnode\n");
+#endif
+
+ /*
+ * Make new vnode reference the null_node.
+ */
+ error = null_node_alloc(mp, lowervp, &aliasvp);
+ if (error)
+ return error;
+
+ /*
+ * aliasvp is already VREF'd by getnewvnode()
+ */
+ }
+
+ vrele(lowervp);
+
+#ifdef DIAGNOSTIC
+ if (lowervp->v_usecount < 1) {
+ /* Should never happen... */
+ vprint ("null_node_create: alias ", aliasvp);
+ vprint ("null_node_create: lower ", lowervp);
+ panic ("null_node_create: lower has 0 usecount.");
+ };
+#endif
+
+#ifdef NULLFS_DIAGNOSTIC
+ vprint("null_node_create: alias", aliasvp);
+ vprint("null_node_create: lower", lowervp);
+#endif
+
+ *newvpp = aliasvp;
+ return (0);
+}
+
+#ifdef NULLFS_DIAGNOSTIC
+#include "opt_ddb.h"
+
+#ifdef DDB
+#define null_checkvp_barrier 1
+#else
+#define null_checkvp_barrier 0
+#endif
+
+struct vnode *
+null_checkvp(vp, fil, lno)
+ struct vnode *vp;
+ char *fil;
+ int lno;
+{
+ struct null_node *a = VTONULL(vp);
+#ifdef notyet
+ /*
+ * Can't do this check because vop_reclaim runs
+ * with a funny vop vector.
+ */
+ if (vp->v_op != null_vnodeop_p) {
+ printf ("null_checkvp: on non-null-node\n");
+ while (null_checkvp_barrier) /*WAIT*/ ;
+ panic("null_checkvp");
+ };
+#endif
+ if (a->null_lowervp == NULLVP) {
+ /* Should never happen */
+ int i; u_long *p;
+ printf("vp = %p, ZERO ptr\n", (void *)vp);
+ for (p = (u_long *) a, i = 0; i < 8; i++)
+ printf(" %lx", p[i]);
+ printf("\n");
+ /* wait for debugger */
+ while (null_checkvp_barrier) /*WAIT*/ ;
+ panic("null_checkvp");
+ }
+ if (a->null_lowervp->v_usecount < 1) {
+ int i; u_long *p;
+ printf("vp = %p, unref'ed lowervp\n", (void *)vp);
+ for (p = (u_long *) a, i = 0; i < 8; i++)
+ printf(" %lx", p[i]);
+ printf("\n");
+ /* wait for debugger */
+ while (null_checkvp_barrier) /*WAIT*/ ;
+ panic ("null with unref'ed lowervp");
+ };
+#ifdef notyet
+ printf("null %x/%d -> %x/%d [%s, %d]\n",
+ NULLTOV(a), NULLTOV(a)->v_usecount,
+ a->null_lowervp, a->null_lowervp->v_usecount,
+ fil, lno);
+#endif
+ return a->null_lowervp;
+}
+#endif
diff --git a/sys/fs/nullfs/null_vfsops.c b/sys/fs/nullfs/null_vfsops.c
new file mode 100644
index 0000000..4ead5bd
--- /dev/null
+++ b/sys/fs/nullfs/null_vfsops.c
@@ -0,0 +1,425 @@
+/*
+ * Copyright (c) 1992, 1993, 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)null_vfsops.c 8.2 (Berkeley) 1/21/94
+ *
+ * @(#)lofs_vfsops.c 1.2 (Berkeley) 6/18/92
+ * $Id: null_vfsops.c,v 1.27 1998/07/30 17:40:45 bde Exp $
+ */
+
+/*
+ * Null Layer
+ * (See null_vnops.c for a description of what this does.)
+ */
+
+#include "opt_debug_nullfs.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/malloc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <miscfs/nullfs/null.h>
+
+static MALLOC_DEFINE(M_NULLFSMNT, "NULLFS mount", "NULLFS mount structure");
+
+static int nullfs_fhtovp __P((struct mount *mp, struct fid *fidp,
+ struct sockaddr *nam, struct vnode **vpp,
+ int *exflagsp, struct ucred **credanonp));
+static int nullfs_mount __P((struct mount *mp, char *path, caddr_t data,
+ struct nameidata *ndp, struct proc *p));
+static int nullfs_quotactl __P((struct mount *mp, int cmd, uid_t uid,
+ caddr_t arg, struct proc *p));
+static int nullfs_root __P((struct mount *mp, struct vnode **vpp));
+static int nullfs_start __P((struct mount *mp, int flags, struct proc *p));
+static int nullfs_statfs __P((struct mount *mp, struct statfs *sbp,
+ struct proc *p));
+static int nullfs_sync __P((struct mount *mp, int waitfor,
+ struct ucred *cred, struct proc *p));
+static int nullfs_unmount __P((struct mount *mp, int mntflags,
+ struct proc *p));
+static int nullfs_vget __P((struct mount *mp, ino_t ino,
+ struct vnode **vpp));
+static int nullfs_vptofh __P((struct vnode *vp, struct fid *fhp));
+
+/*
+ * Mount null layer
+ */
+static int
+nullfs_mount(mp, path, data, ndp, p)
+ struct mount *mp;
+ char *path;
+ caddr_t data;
+ struct nameidata *ndp;
+ struct proc *p;
+{
+ int error = 0;
+ struct null_args args;
+ struct vnode *lowerrootvp, *vp;
+ struct vnode *nullm_rootvp;
+ struct null_mount *xmp;
+ u_int size;
+ int isvnunlocked = 0;
+
+#ifdef NULLFS_DIAGNOSTIC
+ printf("nullfs_mount(mp = %p)\n", (void *)mp);
+#endif
+
+ /*
+ * Update is a no-op
+ */
+ if (mp->mnt_flag & MNT_UPDATE) {
+ return (EOPNOTSUPP);
+ /* return VFS_MOUNT(MOUNTTONULLMOUNT(mp)->nullm_vfs, path, data, ndp, p);*/
+ }
+
+ /*
+ * Get argument
+ */
+ error = copyin(data, (caddr_t)&args, sizeof(struct null_args));
+ if (error)
+ return (error);
+
+ /*
+ * Unlock lower node to avoid deadlock.
+ * (XXX) VOP_ISLOCKED is needed?
+ */
+ if ((mp->mnt_vnodecovered->v_op == null_vnodeop_p) &&
+ VOP_ISLOCKED(mp->mnt_vnodecovered)) {
+ VOP_UNLOCK(mp->mnt_vnodecovered, 0, p);
+ isvnunlocked = 1;
+ }
+ /*
+ * Find lower node
+ */
+ NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT|LOCKLEAF,
+ UIO_USERSPACE, args.target, p);
+ error = namei(ndp);
+ /*
+ * Re-lock vnode.
+ */
+ if (isvnunlocked && !VOP_ISLOCKED(mp->mnt_vnodecovered))
+ vn_lock(mp->mnt_vnodecovered, LK_EXCLUSIVE | LK_RETRY, p);
+
+ if (error)
+ return (error);
+
+ /*
+ * Sanity check on lower vnode
+ */
+ lowerrootvp = ndp->ni_vp;
+
+ vrele(ndp->ni_dvp);
+ ndp->ni_dvp = NULLVP;
+
+ /*
+ * Check multi null mount to avoid `lock against myself' panic.
+ */
+ if (lowerrootvp == VTONULL(mp->mnt_vnodecovered)->null_lowervp) {
+#ifdef DIAGNOSTIC
+ printf("nullfs_mount: multi null mount?\n");
+#endif
+ return (EDEADLK);
+ }
+
+ xmp = (struct null_mount *) malloc(sizeof(struct null_mount),
+ M_NULLFSMNT, M_WAITOK); /* XXX */
+
+ /*
+ * Save reference to underlying FS
+ */
+ xmp->nullm_vfs = lowerrootvp->v_mount;
+
+ /*
+ * Save reference. Each mount also holds
+ * a reference on the root vnode.
+ */
+ error = null_node_create(mp, lowerrootvp, &vp);
+ /*
+ * Unlock the node (either the lower or the alias)
+ */
+ VOP_UNLOCK(vp, 0, p);
+ /*
+ * Make sure the node alias worked
+ */
+ if (error) {
+ vrele(lowerrootvp);
+ free(xmp, M_NULLFSMNT); /* XXX */
+ return (error);
+ }
+
+ /*
+ * Keep a held reference to the root vnode.
+ * It is vrele'd in nullfs_unmount.
+ */
+ nullm_rootvp = vp;
+ nullm_rootvp->v_flag |= VROOT;
+ xmp->nullm_rootvp = nullm_rootvp;
+ if (NULLVPTOLOWERVP(nullm_rootvp)->v_mount->mnt_flag & MNT_LOCAL)
+ mp->mnt_flag |= MNT_LOCAL;
+ mp->mnt_data = (qaddr_t) xmp;
+ vfs_getnewfsid(mp);
+
+ (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+ bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+ (void) copyinstr(args.target, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+ &size);
+ bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+ (void)nullfs_statfs(mp, &mp->mnt_stat, p);
+#ifdef NULLFS_DIAGNOSTIC
+ printf("nullfs_mount: lower %s, alias at %s\n",
+ mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
+#endif
+ return (0);
+}
+
+/*
+ * VFS start. Nothing needed here - the start routine
+ * on the underlying filesystem will have been called
+ * when that filesystem was mounted.
+ */
+static int
+nullfs_start(mp, flags, p)
+ struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+ return (0);
+ /* return VFS_START(MOUNTTONULLMOUNT(mp)->nullm_vfs, flags, p); */
+}
+
+/*
+ * Free reference to null layer
+ */
+static int
+nullfs_unmount(mp, mntflags, p)
+ struct mount *mp;
+ int mntflags;
+ struct proc *p;
+{
+ struct vnode *nullm_rootvp = MOUNTTONULLMOUNT(mp)->nullm_rootvp;
+ int error;
+ int flags = 0;
+
+#ifdef NULLFS_DIAGNOSTIC
+ printf("nullfs_unmount(mp = %p)\n", (void *)mp);
+#endif
+
+ if (mntflags & MNT_FORCE)
+ flags |= FORCECLOSE;
+
+ /*
+ * Clear out buffer cache. I don't think we
+ * ever get anything cached at this level at the
+ * moment, but who knows...
+ */
+#if 0
+ mntflushbuf(mp, 0);
+ if (mntinvalbuf(mp, 1))
+ return (EBUSY);
+#endif
+ if (nullm_rootvp->v_usecount > 1)
+ return (EBUSY);
+ error = vflush(mp, nullm_rootvp, flags);
+ if (error)
+ return (error);
+
+#ifdef NULLFS_DIAGNOSTIC
+ vprint("alias root of lower", nullm_rootvp);
+#endif
+ /*
+ * Release reference on underlying root vnode
+ */
+ vrele(nullm_rootvp);
+ /*
+ * And blow it away for future re-use
+ */
+ vgone(nullm_rootvp);
+ /*
+ * Finally, throw away the null_mount structure
+ */
+ free(mp->mnt_data, M_NULLFSMNT); /* XXX */
+ mp->mnt_data = 0;
+ return 0;
+}
+
+static int
+nullfs_root(mp, vpp)
+ struct mount *mp;
+ struct vnode **vpp;
+{
+ struct proc *p = curproc; /* XXX */
+ struct vnode *vp;
+
+#ifdef NULLFS_DIAGNOSTIC
+ printf("nullfs_root(mp = %p, vp = %p->%p)\n", (void *)mp,
+ (void *)MOUNTTONULLMOUNT(mp)->nullm_rootvp,
+ (void *)NULLVPTOLOWERVP(MOUNTTONULLMOUNT(mp)->nullm_rootvp));
+#endif
+
+ /*
+ * Return locked reference to root.
+ */
+ vp = MOUNTTONULLMOUNT(mp)->nullm_rootvp;
+ VREF(vp);
+ if (VOP_ISLOCKED(vp)) {
+ /*
+ * XXX
+ * Should we check type of node?
+ */
+#ifdef DIAGNOSTIC
+ printf("nullfs_root: multi null mount?\n");
+#endif
+ vrele(vp);
+ return (EDEADLK);
+ } else
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+ *vpp = vp;
+ return 0;
+}
+
+static int
+nullfs_quotactl(mp, cmd, uid, arg, p)
+ struct mount *mp;
+ int cmd;
+ uid_t uid;
+ caddr_t arg;
+ struct proc *p;
+{
+ return VFS_QUOTACTL(MOUNTTONULLMOUNT(mp)->nullm_vfs, cmd, uid, arg, p);
+}
+
+static int
+nullfs_statfs(mp, sbp, p)
+ struct mount *mp;
+ struct statfs *sbp;
+ struct proc *p;
+{
+ int error;
+ struct statfs mstat;
+
+#ifdef NULLFS_DIAGNOSTIC
+ printf("nullfs_statfs(mp = %p, vp = %p->%p)\n", (void *)mp,
+ (void *)MOUNTTONULLMOUNT(mp)->nullm_rootvp,
+ (void *)NULLVPTOLOWERVP(MOUNTTONULLMOUNT(mp)->nullm_rootvp));
+#endif
+
+ bzero(&mstat, sizeof(mstat));
+
+ error = VFS_STATFS(MOUNTTONULLMOUNT(mp)->nullm_vfs, &mstat, p);
+ if (error)
+ return (error);
+
+ /* now copy across the "interesting" information and fake the rest */
+ sbp->f_type = mstat.f_type;
+ sbp->f_flags = mstat.f_flags;
+ sbp->f_bsize = mstat.f_bsize;
+ sbp->f_iosize = mstat.f_iosize;
+ sbp->f_blocks = mstat.f_blocks;
+ sbp->f_bfree = mstat.f_bfree;
+ sbp->f_bavail = mstat.f_bavail;
+ sbp->f_files = mstat.f_files;
+ sbp->f_ffree = mstat.f_ffree;
+ if (sbp != &mp->mnt_stat) {
+ bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+ bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+ bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+ }
+ return (0);
+}
+
+static int
+nullfs_sync(mp, waitfor, cred, p)
+ struct mount *mp;
+ int waitfor;
+ struct ucred *cred;
+ struct proc *p;
+{
+ /*
+ * XXX - Assumes no data cached at null layer.
+ */
+ return (0);
+}
+
+static int
+nullfs_vget(mp, ino, vpp)
+ struct mount *mp;
+ ino_t ino;
+ struct vnode **vpp;
+{
+
+ return VFS_VGET(MOUNTTONULLMOUNT(mp)->nullm_vfs, ino, vpp);
+}
+
+static int
+nullfs_fhtovp(mp, fidp, nam, vpp, exflagsp, credanonp)
+ struct mount *mp;
+ struct fid *fidp;
+ struct sockaddr *nam;
+ struct vnode **vpp;
+ int *exflagsp;
+ struct ucred**credanonp;
+{
+
+ return VFS_FHTOVP(MOUNTTONULLMOUNT(mp)->nullm_vfs, fidp, nam,
+ vpp, exflagsp, credanonp);
+}
+
+static int
+nullfs_vptofh(vp, fhp)
+ struct vnode *vp;
+ struct fid *fhp;
+{
+ return VFS_VPTOFH(NULLVPTOLOWERVP(vp), fhp);
+}
+
+static struct vfsops null_vfsops = {
+ nullfs_mount,
+ nullfs_start,
+ nullfs_unmount,
+ nullfs_root,
+ nullfs_quotactl,
+ nullfs_statfs,
+ nullfs_sync,
+ nullfs_vget,
+ nullfs_fhtovp,
+ nullfs_vptofh,
+ nullfs_init,
+};
+
+VFS_SET(null_vfsops, null, VFCF_LOOPBACK);
diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c
new file mode 100644
index 0000000..db5c341
--- /dev/null
+++ b/sys/fs/nullfs/null_vnops.c
@@ -0,0 +1,669 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * John Heidemann of the UCLA Ficus project.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)null_vnops.c 8.6 (Berkeley) 5/27/95
+ *
+ * Ancestors:
+ * @(#)lofs_vnops.c 1.2 (Berkeley) 6/18/92
+ * $Id: null_vnops.c,v 1.31 1999/01/27 22:42:06 dillon Exp $
+ * ...and...
+ * @(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project
+ *
+ * $Id: null_vnops.c,v 1.31 1999/01/27 22:42:06 dillon Exp $
+ */
+
+/*
+ * Null Layer
+ *
+ * (See mount_null(8) for more information.)
+ *
+ * The null layer duplicates a portion of the file system
+ * name space under a new name. In this respect, it is
+ * similar to the loopback file system. It differs from
+ * the loopback fs in two respects: it is implemented using
+ * a stackable layers techniques, and its "null-node"s stack above
+ * all lower-layer vnodes, not just over directory vnodes.
+ *
+ * The null layer has two purposes. First, it serves as a demonstration
+ * of layering by proving a layer which does nothing. (It actually
+ * does everything the loopback file system does, which is slightly
+ * more than nothing.) Second, the null layer can serve as a prototype
+ * layer. Since it provides all necessary layer framework,
+ * new file system layers can be created very easily be starting
+ * with a null layer.
+ *
+ * The remainder of this man page examines the null layer as a basis
+ * for constructing new layers.
+ *
+ *
+ * INSTANTIATING NEW NULL LAYERS
+ *
+ * New null layers are created with mount_null(8).
+ * Mount_null(8) takes two arguments, the pathname
+ * of the lower vfs (target-pn) and the pathname where the null
+ * layer will appear in the namespace (alias-pn). After
+ * the null layer is put into place, the contents
+ * of target-pn subtree will be aliased under alias-pn.
+ *
+ *
+ * OPERATION OF A NULL LAYER
+ *
+ * The null layer is the minimum file system layer,
+ * simply bypassing all possible operations to the lower layer
+ * for processing there. The majority of its activity centers
+ * on the bypass routine, through which nearly all vnode operations
+ * pass.
+ *
+ * The bypass routine accepts arbitrary vnode operations for
+ * handling by the lower layer. It begins by examing vnode
+ * operation arguments and replacing any null-nodes by their
+ * lower-layer equivlants. It then invokes the operation
+ * on the lower layer. Finally, it replaces the null-nodes
+ * in the arguments and, if a vnode is return by the operation,
+ * stacks a null-node on top of the returned vnode.
+ *
+ * Although bypass handles most operations, vop_getattr, vop_lock,
+ * vop_unlock, vop_inactive, vop_reclaim, and vop_print are not
+ * bypassed. Vop_getattr must change the fsid being returned.
+ * Vop_lock and vop_unlock must handle any locking for the
+ * current vnode as well as pass the lock request down.
+ * Vop_inactive and vop_reclaim are not bypassed so that
+ * they can handle freeing null-layer specific data. Vop_print
+ * is not bypassed to avoid excessive debugging information.
+ * Also, certain vnode operations change the locking state within
+ * the operation (create, mknod, remove, link, rename, mkdir, rmdir,
+ * and symlink). Ideally these operations should not change the
+ * lock state, but should be changed to let the caller of the
+ * function unlock them. Otherwise all intermediate vnode layers
+ * (such as union, umapfs, etc) must catch these functions to do
+ * the necessary locking at their layer.
+ *
+ *
+ * INSTANTIATING VNODE STACKS
+ *
+ * Mounting associates the null layer with a lower layer,
+ * effect stacking two VFSes. Vnode stacks are instead
+ * created on demand as files are accessed.
+ *
+ * The initial mount creates a single vnode stack for the
+ * root of the new null layer. All other vnode stacks
+ * are created as a result of vnode operations on
+ * this or other null vnode stacks.
+ *
+ * New vnode stacks come into existance as a result of
+ * an operation which returns a vnode.
+ * The bypass routine stacks a null-node above the new
+ * vnode before returning it to the caller.
+ *
+ * For example, imagine mounting a null layer with
+ * "mount_null /usr/include /dev/layer/null".
+ * Changing directory to /dev/layer/null will assign
+ * the root null-node (which was created when the null layer was mounted).
+ * Now consider opening "sys". A vop_lookup would be
+ * done on the root null-node. This operation would bypass through
+ * to the lower layer which would return a vnode representing
+ * the UFS "sys". Null_bypass then builds a null-node
+ * aliasing the UFS "sys" and returns this to the caller.
+ * Later operations on the null-node "sys" will repeat this
+ * process when constructing other vnode stacks.
+ *
+ *
+ * CREATING OTHER FILE SYSTEM LAYERS
+ *
+ * One of the easiest ways to construct new file system layers is to make
+ * a copy of the null layer, rename all files and variables, and
+ * then begin modifing the copy. Sed can be used to easily rename
+ * all variables.
+ *
+ * The umap layer is an example of a layer descended from the
+ * null layer.
+ *
+ *
+ * INVOKING OPERATIONS ON LOWER LAYERS
+ *
+ * There are two techniques to invoke operations on a lower layer
+ * when the operation cannot be completely bypassed. Each method
+ * is appropriate in different situations. In both cases,
+ * it is the responsibility of the aliasing layer to make
+ * the operation arguments "correct" for the lower layer
+ * by mapping an vnode arguments to the lower layer.
+ *
+ * The first approach is to call the aliasing layer's bypass routine.
+ * This method is most suitable when you wish to invoke the operation
+ * currently being handled on the lower layer. It has the advantage
+ * that the bypass routine already must do argument mapping.
+ * An example of this is null_getattrs in the null layer.
+ *
+ * A second approach is to directly invoke vnode operations on
+ * the lower layer with the VOP_OPERATIONNAME interface.
+ * The advantage of this method is that it is easy to invoke
+ * arbitrary operations on the lower layer. The disadvantage
+ * is that vnode arguments must be manualy mapped.
+ *
+ */
+
+#include "opt_debug_nullfs.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/buf.h>
+#include <miscfs/nullfs/null.h>
+
+static int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */
+SYSCTL_INT(_debug, OID_AUTO, nullfs_bug_bypass, CTLFLAG_RW,
+ &null_bug_bypass, 0, "");
+
+static int null_access __P((struct vop_access_args *ap));
+static int null_bwrite __P((struct vop_bwrite_args *ap));
+static int null_getattr __P((struct vop_getattr_args *ap));
+static int null_inactive __P((struct vop_inactive_args *ap));
+static int null_lock __P((struct vop_lock_args *ap));
+static int null_lookup __P((struct vop_lookup_args *ap));
+static int null_print __P((struct vop_print_args *ap));
+static int null_reclaim __P((struct vop_reclaim_args *ap));
+static int null_setattr __P((struct vop_setattr_args *ap));
+static int null_strategy __P((struct vop_strategy_args *ap));
+static int null_unlock __P((struct vop_unlock_args *ap));
+
+/*
+ * This is the 10-Apr-92 bypass routine.
+ * This version has been optimized for speed, throwing away some
+ * safety checks. It should still always work, but it's not as
+ * robust to programmer errors.
+ * Define SAFETY to include some error checking code.
+ *
+ * In general, we map all vnodes going down and unmap them on the way back.
+ * As an exception to this, vnodes can be marked "unmapped" by setting
+ * the Nth bit in operation's vdesc_flags.
+ *
+ * Also, some BSD vnode operations have the side effect of vrele'ing
+ * their arguments. With stacking, the reference counts are held
+ * by the upper node, not the lower one, so we must handle these
+ * side-effects here. This is not of concern in Sun-derived systems
+ * since there are no such side-effects.
+ *
+ * This makes the following assumptions:
+ * - only one returned vpp
+ * - no INOUT vpp's (Sun's vop_open has one of these)
+ * - the vnode operation vector of the first vnode should be used
+ * to determine what implementation of the op should be invoked
+ * - all mapped vnodes are of our vnode-type (NEEDSWORK:
+ * problems on rmdir'ing mount points and renaming?)
+ */
+int
+null_bypass(ap)
+ struct vop_generic_args /* {
+ struct vnodeop_desc *a_desc;
+ <other random data follows, presumably>
+ } */ *ap;
+{
+ register struct vnode **this_vp_p;
+ int error;
+ struct vnode *old_vps[VDESC_MAX_VPS];
+ struct vnode **vps_p[VDESC_MAX_VPS];
+ struct vnode ***vppp;
+ struct vnodeop_desc *descp = ap->a_desc;
+ int reles, i;
+
+ if (null_bug_bypass)
+ printf ("null_bypass: %s\n", descp->vdesc_name);
+
+#ifdef SAFETY
+ /*
+ * We require at least one vp.
+ */
+ if (descp->vdesc_vp_offsets == NULL ||
+ descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET)
+ panic ("null_bypass: no vp's in map.");
+#endif
+
+ /*
+ * Map the vnodes going in.
+ * Later, we'll invoke the operation based on
+ * the first mapped vnode's operation vector.
+ */
+ reles = descp->vdesc_flags;
+ for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
+ if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
+ break; /* bail out at end of list */
+ vps_p[i] = this_vp_p =
+ VOPARG_OFFSETTO(struct vnode**,descp->vdesc_vp_offsets[i],ap);
+ /*
+ * We're not guaranteed that any but the first vnode
+ * are of our type. Check for and don't map any
+ * that aren't. (We must always map first vp or vclean fails.)
+ */
+ if (i && (*this_vp_p == NULLVP ||
+ (*this_vp_p)->v_op != null_vnodeop_p)) {
+ old_vps[i] = NULLVP;
+ } else {
+ old_vps[i] = *this_vp_p;
+ *(vps_p[i]) = NULLVPTOLOWERVP(*this_vp_p);
+ /*
+ * XXX - Several operations have the side effect
+ * of vrele'ing their vp's. We must account for
+ * that. (This should go away in the future.)
+ */
+ if (reles & 1)
+ VREF(*this_vp_p);
+ }
+
+ }
+
+ /*
+ * Call the operation on the lower layer
+ * with the modified argument structure.
+ */
+ error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
+
+ /*
+ * Maintain the illusion of call-by-value
+ * by restoring vnodes in the argument structure
+ * to their original value.
+ */
+ reles = descp->vdesc_flags;
+ for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
+ if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
+ break; /* bail out at end of list */
+ if (old_vps[i]) {
+ *(vps_p[i]) = old_vps[i];
+ if (reles & 1)
+ vrele(*(vps_p[i]));
+ }
+ }
+
+ /*
+ * Map the possible out-going vpp
+ * (Assumes that the lower layer always returns
+ * a VREF'ed vpp unless it gets an error.)
+ */
+ if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET &&
+ !(descp->vdesc_flags & VDESC_NOMAP_VPP) &&
+ !error) {
+ /*
+ * XXX - even though some ops have vpp returned vp's,
+ * several ops actually vrele this before returning.
+ * We must avoid these ops.
+ * (This should go away when these ops are regularized.)
+ */
+ if (descp->vdesc_flags & VDESC_VPP_WILLRELE)
+ goto out;
+ vppp = VOPARG_OFFSETTO(struct vnode***,
+ descp->vdesc_vpp_offset,ap);
+ if (*vppp)
+ error = null_node_create(old_vps[0]->v_mount, **vppp, *vppp);
+ }
+
+ out:
+ return (error);
+}
+
+/*
+ * We have to carry on the locking protocol on the null layer vnodes
+ * as we progress through the tree. We also have to enforce read-only
+ * if this layer is mounted read-only.
+ */
+static int
+null_lookup(ap)
+ struct vop_lookup_args /* {
+ struct vnode * a_dvp;
+ struct vnode ** a_vpp;
+ struct componentname * a_cnp;
+ } */ *ap;
+{
+ struct componentname *cnp = ap->a_cnp;
+ struct proc *p = cnp->cn_proc;
+ int flags = cnp->cn_flags;
+ struct vop_lock_args lockargs;
+ struct vop_unlock_args unlockargs;
+ struct vnode *dvp, *vp;
+ int error;
+
+ if ((flags & ISLASTCN) && (ap->a_dvp->v_mount->mnt_flag & MNT_RDONLY) &&
+ (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
+ return (EROFS);
+ error = null_bypass((struct vop_generic_args *)ap);
+ if (error == EJUSTRETURN && (flags & ISLASTCN) &&
+ (ap->a_dvp->v_mount->mnt_flag & MNT_RDONLY) &&
+ (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME))
+ error = EROFS;
+ /*
+ * We must do the same locking and unlocking at this layer as
+ * is done in the layers below us. We could figure this out
+ * based on the error return and the LASTCN, LOCKPARENT, and
+ * LOCKLEAF flags. However, it is more expidient to just find
+ * out the state of the lower level vnodes and set ours to the
+ * same state.
+ */
+ dvp = ap->a_dvp;
+ vp = *ap->a_vpp;
+ if (dvp == vp)
+ return (error);
+ if (!VOP_ISLOCKED(dvp)) {
+ unlockargs.a_vp = dvp;
+ unlockargs.a_flags = 0;
+ unlockargs.a_p = p;
+ vop_nounlock(&unlockargs);
+ }
+ if (vp != NULLVP && VOP_ISLOCKED(vp)) {
+ lockargs.a_vp = vp;
+ lockargs.a_flags = LK_SHARED;
+ lockargs.a_p = p;
+ vop_nolock(&lockargs);
+ }
+ return (error);
+}
+
+/*
+ * Setattr call. Disallow write attempts if the layer is mounted read-only.
+ */
+int
+null_setattr(ap)
+ struct vop_setattr_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct vattr *vap = ap->a_vap;
+
+ if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
+ vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
+ vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) &&
+ (vp->v_mount->mnt_flag & MNT_RDONLY))
+ return (EROFS);
+ if (vap->va_size != VNOVAL) {
+ switch (vp->v_type) {
+ case VDIR:
+ return (EISDIR);
+ case VCHR:
+ case VBLK:
+ case VSOCK:
+ case VFIFO:
+ if (vap->va_flags != VNOVAL)
+ return (EOPNOTSUPP);
+ return (0);
+ case VREG:
+ case VLNK:
+ default:
+ /*
+ * Disallow write attempts if the filesystem is
+ * mounted read-only.
+ */
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ return (EROFS);
+ }
+ }
+ return (null_bypass((struct vop_generic_args *)ap));
+}
+
+/*
+ * We handle getattr only to change the fsid.
+ */
+static int
+null_getattr(ap)
+ struct vop_getattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ int error;
+
+ if ((error = null_bypass((struct vop_generic_args *)ap)) != 0)
+ return (error);
+ /* Requires that arguments be restored. */
+ ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
+ return (0);
+}
+
+static int
+null_access(ap)
+ struct vop_access_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ mode_t mode = ap->a_mode;
+
+ /*
+ * Disallow write attempts on read-only layers;
+ * unless the file is a socket, fifo, or a block or
+ * character device resident on the file system.
+ */
+ if (mode & VWRITE) {
+ switch (vp->v_type) {
+ case VDIR:
+ case VLNK:
+ case VREG:
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ return (EROFS);
+ break;
+ default:
+ break;
+ }
+ }
+ return (null_bypass((struct vop_generic_args *)ap));
+}
+
+/*
+ * We need to process our own vnode lock and then clear the
+ * interlock flag as it applies only to our vnode, not the
+ * vnodes below us on the stack.
+ */
+static int
+null_lock(ap)
+ struct vop_lock_args /* {
+ struct vnode *a_vp;
+ int a_flags;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ vop_nolock(ap);
+ if ((ap->a_flags & LK_TYPE_MASK) == LK_DRAIN)
+ return (0);
+ ap->a_flags &= ~LK_INTERLOCK;
+ return (null_bypass((struct vop_generic_args *)ap));
+}
+
+/*
+ * We need to process our own vnode unlock and then clear the
+ * interlock flag as it applies only to our vnode, not the
+ * vnodes below us on the stack.
+ */
+static int
+null_unlock(ap)
+ struct vop_unlock_args /* {
+ struct vnode *a_vp;
+ int a_flags;
+ struct proc *a_p;
+ } */ *ap;
+{
+ vop_nounlock(ap);
+ ap->a_flags &= ~LK_INTERLOCK;
+ return (null_bypass((struct vop_generic_args *)ap));
+}
+
+static int
+null_inactive(ap)
+ struct vop_inactive_args /* {
+ struct vnode *a_vp;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct null_node *xp = VTONULL(vp);
+ struct vnode *lowervp = xp->null_lowervp;
+ /*
+ * Do nothing (and _don't_ bypass).
+ * Wait to vrele lowervp until reclaim,
+ * so that until then our null_node is in the
+ * cache and reusable.
+ * We still have to tell the lower layer the vnode
+ * is now inactive though.
+ *
+ * NEEDSWORK: Someday, consider inactive'ing
+ * the lowervp and then trying to reactivate it
+ * with capabilities (v_id)
+ * like they do in the name lookup cache code.
+ * That's too much work for now.
+ */
+ VOP_INACTIVE(lowervp, ap->a_p);
+ VOP_UNLOCK(ap->a_vp, 0, ap->a_p);
+ return (0);
+}
+
+static int
+null_reclaim(ap)
+ struct vop_reclaim_args /* {
+ struct vnode *a_vp;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct null_node *xp = VTONULL(vp);
+ struct vnode *lowervp = xp->null_lowervp;
+
+ /*
+ * Note: in vop_reclaim, vp->v_op == dead_vnodeop_p,
+ * so we can't call VOPs on ourself.
+ */
+ /* After this assignment, this node will not be re-used. */
+ xp->null_lowervp = NULLVP;
+ LIST_REMOVE(xp, null_hash);
+ FREE(vp->v_data, M_TEMP);
+ vp->v_data = NULL;
+ vrele (lowervp);
+ return (0);
+}
+
+static int
+null_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ printf ("\ttag VT_NULLFS, vp=%p, lowervp=%p\n", vp, NULLVPTOLOWERVP(vp));
+ return (0);
+}
+
+/*
+ * XXX - vop_strategy must be hand coded because it has no
+ * vnode in its arguments.
+ * This goes away with a merged VM/buffer cache.
+ */
+static int
+null_strategy(ap)
+ struct vop_strategy_args /* {
+ struct buf *a_bp;
+ } */ *ap;
+{
+ struct buf *bp = ap->a_bp;
+ int error;
+ struct vnode *savedvp;
+
+ savedvp = bp->b_vp;
+ bp->b_vp = NULLVPTOLOWERVP(bp->b_vp);
+
+ error = VOP_STRATEGY(bp->b_vp, bp);
+
+ bp->b_vp = savedvp;
+
+ return (error);
+}
+
+/*
+ * XXX - like vop_strategy, vop_bwrite must be hand coded because it has no
+ * vnode in its arguments.
+ * This goes away with a merged VM/buffer cache.
+ */
+static int
+null_bwrite(ap)
+ struct vop_bwrite_args /* {
+ struct buf *a_bp;
+ } */ *ap;
+{
+ struct buf *bp = ap->a_bp;
+ int error;
+ struct vnode *savedvp;
+
+ savedvp = bp->b_vp;
+ bp->b_vp = NULLVPTOLOWERVP(bp->b_vp);
+
+ error = VOP_BWRITE(bp);
+
+ bp->b_vp = savedvp;
+
+ return (error);
+}
+
+/*
+ * Global vfs data structures
+ */
+vop_t **null_vnodeop_p;
+static struct vnodeopv_entry_desc null_vnodeop_entries[] = {
+ { &vop_default_desc, (vop_t *) null_bypass },
+ { &vop_access_desc, (vop_t *) null_access },
+ { &vop_bwrite_desc, (vop_t *) null_bwrite },
+ { &vop_getattr_desc, (vop_t *) null_getattr },
+ { &vop_inactive_desc, (vop_t *) null_inactive },
+ { &vop_lock_desc, (vop_t *) null_lock },
+ { &vop_lookup_desc, (vop_t *) null_lookup },
+ { &vop_print_desc, (vop_t *) null_print },
+ { &vop_reclaim_desc, (vop_t *) null_reclaim },
+ { &vop_setattr_desc, (vop_t *) null_setattr },
+ { &vop_strategy_desc, (vop_t *) null_strategy },
+ { &vop_unlock_desc, (vop_t *) null_unlock },
+ { NULL, NULL }
+};
+static struct vnodeopv_desc null_vnodeop_opv_desc =
+ { &null_vnodeop_p, null_vnodeop_entries };
+
+VNODEOP_SET(null_vnodeop_opv_desc);
diff --git a/sys/fs/portalfs/portal.h b/sys/fs/portalfs/portal.h
new file mode 100644
index 0000000..d60826e
--- /dev/null
+++ b/sys/fs/portalfs/portal.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)portal.h 8.4 (Berkeley) 1/21/94
+ *
+ * $Id: portal.h,v 1.4 1997/02/22 09:40:24 peter Exp $
+ */
+
+struct portal_args {
+ char *pa_config; /* Config file */
+ int pa_socket; /* Socket to server */
+};
+
+struct portal_cred {
+ int pcr_flag; /* File open mode */
+ uid_t pcr_uid; /* From ucred */
+ short pcr_ngroups; /* From ucred */
+ gid_t pcr_groups[NGROUPS]; /* From ucred */
+};
+
+#ifdef KERNEL
+struct portalmount {
+ struct vnode *pm_root; /* Root node */
+ struct file *pm_server; /* Held reference to server socket */
+};
+
+struct portalnode {
+ int pt_size; /* Length of Arg */
+ char *pt_arg; /* Arg to send to server */
+ int pt_fileid; /* cookie */
+};
+
+#define VFSTOPORTAL(mp) ((struct portalmount *)((mp)->mnt_data))
+#define VTOPORTAL(vp) ((struct portalnode *)(vp)->v_data)
+
+#define PORTAL_ROOTFILEID 2
+
+extern vop_t **portal_vnodeop_p;
+#endif /* KERNEL */
diff --git a/sys/fs/portalfs/portal_vfsops.c b/sys/fs/portalfs/portal_vfsops.c
new file mode 100644
index 0000000..633bf77
--- /dev/null
+++ b/sys/fs/portalfs/portal_vfsops.c
@@ -0,0 +1,292 @@
+/*
+ * Copyright (c) 1992, 1993, 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)portal_vfsops.c 8.11 (Berkeley) 5/14/95
+ *
+ * $Id: portal_vfsops.c,v 1.21 1998/05/06 05:29:35 msmith Exp $
+ */
+
+/*
+ * Portal Filesystem
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/filedesc.h>
+#include <sys/file.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/malloc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/domain.h>
+#include <miscfs/portal/portal.h>
+
+static MALLOC_DEFINE(M_PORTALFSMNT, "PORTAL mount", "PORTAL mount structure");
+
+static int portal_init __P((struct vfsconf *));
+static int portal_mount __P((struct mount *mp, char *path, caddr_t data,
+ struct nameidata *ndp, struct proc *p));
+static int portal_start __P((struct mount *mp, int flags, struct proc *p));
+static int portal_unmount __P((struct mount *mp, int mntflags,
+ struct proc *p));
+static int portal_root __P((struct mount *mp, struct vnode **vpp));
+static int portal_statfs __P((struct mount *mp, struct statfs *sbp,
+ struct proc *p));
+
+static int
+portal_init(vfsp)
+ struct vfsconf *vfsp;
+{
+
+ return (0);
+}
+
+/*
+ * Mount the per-process file descriptors (/dev/fd)
+ */
+static int
+portal_mount(mp, path, data, ndp, p)
+ struct mount *mp;
+ char *path;
+ caddr_t data;
+ struct nameidata *ndp;
+ struct proc *p;
+{
+ struct file *fp;
+ struct portal_args args;
+ struct portalmount *fmp;
+ struct socket *so;
+ struct vnode *rvp;
+ struct portalnode *pn;
+ u_int size;
+ int error;
+
+ /*
+ * Update is a no-op
+ */
+ if (mp->mnt_flag & MNT_UPDATE)
+ return (EOPNOTSUPP);
+
+ error = copyin(data, (caddr_t) &args, sizeof(struct portal_args));
+ if (error)
+ return (error);
+
+ error = getsock(p->p_fd, args.pa_socket, &fp);
+ if (error)
+ return (error);
+ so = (struct socket *) fp->f_data;
+ if (so->so_proto->pr_domain->dom_family != AF_UNIX)
+ return (ESOCKTNOSUPPORT);
+
+ MALLOC(pn, struct portalnode *, sizeof(struct portalnode),
+ M_TEMP, M_WAITOK);
+
+ MALLOC(fmp, struct portalmount *, sizeof(struct portalmount),
+ M_PORTALFSMNT, M_WAITOK); /* XXX */
+
+ error = getnewvnode(VT_PORTAL, mp, portal_vnodeop_p, &rvp); /* XXX */
+ if (error) {
+ FREE(fmp, M_PORTALFSMNT);
+ FREE(pn, M_TEMP);
+ return (error);
+ }
+
+ rvp->v_data = pn;
+ rvp->v_type = VDIR;
+ rvp->v_flag |= VROOT;
+ VTOPORTAL(rvp)->pt_arg = 0;
+ VTOPORTAL(rvp)->pt_size = 0;
+ VTOPORTAL(rvp)->pt_fileid = PORTAL_ROOTFILEID;
+ fmp->pm_root = rvp;
+ fmp->pm_server = fp; fp->f_count++;
+
+ mp->mnt_flag |= MNT_LOCAL;
+ mp->mnt_data = (qaddr_t) fmp;
+ vfs_getnewfsid(mp);
+
+ (void)copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+ bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+ (void)copyinstr(args.pa_config,
+ mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
+ bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+
+#ifdef notdef
+ bzero(mp->mnt_stat.f_mntfromname, MNAMELEN);
+ bcopy("portal", mp->mnt_stat.f_mntfromname, sizeof("portal"));
+#endif
+
+ (void)portal_statfs(mp, &mp->mnt_stat, p);
+ return (0);
+}
+
+static int
+portal_start(mp, flags, p)
+ struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+
+ return (0);
+}
+
+static int
+portal_unmount(mp, mntflags, p)
+ struct mount *mp;
+ int mntflags;
+ struct proc *p;
+{
+ struct vnode *rootvp = VFSTOPORTAL(mp)->pm_root;
+ int error, flags = 0;
+
+
+ if (mntflags & MNT_FORCE)
+ flags |= FORCECLOSE;
+
+ /*
+ * Clear out buffer cache. I don't think we
+ * ever get anything cached at this level at the
+ * moment, but who knows...
+ */
+#ifdef notyet
+ mntflushbuf(mp, 0);
+ if (mntinvalbuf(mp, 1))
+ return (EBUSY);
+#endif
+ if (rootvp->v_usecount > 1)
+ return (EBUSY);
+ error = vflush(mp, rootvp, flags);
+ if (error)
+ return (error);
+
+ /*
+ * Release reference on underlying root vnode
+ */
+ vrele(rootvp);
+ /*
+ * And blow it away for future re-use
+ */
+ vgone(rootvp);
+ /*
+ * Shutdown the socket. This will cause the select in the
+ * daemon to wake up, and then the accept will get ECONNABORTED
+ * which it interprets as a request to go and bury itself.
+ */
+ soshutdown((struct socket *) VFSTOPORTAL(mp)->pm_server->f_data, 2);
+ /*
+ * Discard reference to underlying file. Must call closef because
+ * this may be the last reference.
+ */
+ closef(VFSTOPORTAL(mp)->pm_server, (struct proc *) 0);
+ /*
+ * Finally, throw away the portalmount structure
+ */
+ free(mp->mnt_data, M_PORTALFSMNT); /* XXX */
+ mp->mnt_data = 0;
+ return (0);
+}
+
+static int
+portal_root(mp, vpp)
+ struct mount *mp;
+ struct vnode **vpp;
+{
+ struct proc *p = curproc; /* XXX */
+ struct vnode *vp;
+
+ /*
+ * Return locked reference to root.
+ */
+ vp = VFSTOPORTAL(mp)->pm_root;
+ VREF(vp);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+ *vpp = vp;
+ return (0);
+}
+
+static int
+portal_statfs(mp, sbp, p)
+ struct mount *mp;
+ struct statfs *sbp;
+ struct proc *p;
+{
+
+ sbp->f_flags = 0;
+ sbp->f_bsize = DEV_BSIZE;
+ sbp->f_iosize = DEV_BSIZE;
+ sbp->f_blocks = 2; /* 1K to keep df happy */
+ sbp->f_bfree = 0;
+ sbp->f_bavail = 0;
+ sbp->f_files = 1; /* Allow for "." */
+ sbp->f_ffree = 0; /* See comments above */
+ if (sbp != &mp->mnt_stat) {
+ sbp->f_type = mp->mnt_vfc->vfc_typenum;
+ bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+ bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+ bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+ }
+ return (0);
+}
+
+#define portal_fhtovp ((int (*) __P((struct mount *, struct fid *, \
+ struct sockaddr *, struct vnode **, int *, struct ucred **)))eopnotsupp)
+#define portal_quotactl ((int (*) __P((struct mount *, int, uid_t, caddr_t, \
+ struct proc *)))eopnotsupp)
+#define portal_sync ((int (*) __P((struct mount *, int, struct ucred *, \
+ struct proc *)))nullop)
+#define portal_sysctl ((int (*) __P((int *, u_int, void *, size_t *, void *, \
+ size_t, struct proc *)))eopnotsupp)
+#define portal_vget ((int (*) __P((struct mount *, ino_t, struct vnode **))) \
+ eopnotsupp)
+#define portal_vptofh ((int (*) __P((struct vnode *, struct fid *)))eopnotsupp)
+
+static struct vfsops portal_vfsops = {
+ portal_mount,
+ portal_start,
+ portal_unmount,
+ portal_root,
+ portal_quotactl,
+ portal_statfs,
+ portal_sync,
+ portal_vget,
+ portal_fhtovp,
+ portal_vptofh,
+ portal_init,
+};
+
+VFS_SET(portal_vfsops, portal, VFCF_SYNTHETIC);
diff --git a/sys/fs/portalfs/portal_vnops.c b/sys/fs/portalfs/portal_vnops.c
new file mode 100644
index 0000000..819d636
--- /dev/null
+++ b/sys/fs/portalfs/portal_vnops.c
@@ -0,0 +1,607 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)portal_vnops.c 8.14 (Berkeley) 5/21/95
+ *
+ * $Id: portal_vnops.c,v 1.34 1998/12/07 21:58:32 archie Exp $
+ */
+
+/*
+ * Portal Filesystem
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/sysproto.h>
+#include <sys/kernel.h>
+#include <sys/time.h>
+#include <sys/proc.h>
+#include <sys/filedesc.h>
+#include <sys/vnode.h>
+#include <sys/fcntl.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <sys/malloc.h>
+#include <sys/namei.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/un.h>
+#include <sys/unpcb.h>
+#include <miscfs/portal/portal.h>
+
+static int portal_fileid = PORTAL_ROOTFILEID+1;
+
+static int portal_badop __P((void));
+static void portal_closefd __P((struct proc *p, int fd));
+static int portal_connect __P((struct socket *so, struct socket *so2));
+static int portal_getattr __P((struct vop_getattr_args *ap));
+static int portal_inactive __P((struct vop_inactive_args *ap));
+static int portal_lookup __P((struct vop_lookup_args *ap));
+static int portal_open __P((struct vop_open_args *ap));
+static int portal_print __P((struct vop_print_args *ap));
+static int portal_readdir __P((struct vop_readdir_args *ap));
+static int portal_reclaim __P((struct vop_reclaim_args *ap));
+static int portal_setattr __P((struct vop_setattr_args *ap));
+
+static void
+portal_closefd(p, fd)
+ struct proc *p;
+ int fd;
+{
+ int error;
+ struct close_args ua;
+
+ ua.fd = fd;
+ error = close(p, &ua);
+ /*
+ * We should never get an error, and there isn't anything
+ * we could do if we got one, so just print a message.
+ */
+ if (error)
+ printf("portal_closefd: error = %d\n", error);
+}
+
+/*
+ * vp is the current namei directory
+ * cnp is the name to locate in that directory...
+ */
+static int
+portal_lookup(ap)
+ struct vop_lookup_args /* {
+ struct vnode * a_dvp;
+ struct vnode ** a_vpp;
+ struct componentname * a_cnp;
+ } */ *ap;
+{
+ struct componentname *cnp = ap->a_cnp;
+ struct vnode **vpp = ap->a_vpp;
+ struct vnode *dvp = ap->a_dvp;
+ char *pname = cnp->cn_nameptr;
+ struct portalnode *pt;
+ int error;
+ struct vnode *fvp = 0;
+ char *path;
+ int size;
+
+ *vpp = NULLVP;
+
+ if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)
+ return (EROFS);
+
+ if (cnp->cn_namelen == 1 && *pname == '.') {
+ *vpp = dvp;
+ VREF(dvp);
+ /*VOP_LOCK(dvp);*/
+ return (0);
+ }
+
+ /*
+ * Do the MALLOC before the getnewvnode since doing so afterward
+ * might cause a bogus v_data pointer to get dereferenced
+ * elsewhere if MALLOC should block.
+ */
+ MALLOC(pt, struct portalnode *, sizeof(struct portalnode),
+ M_TEMP, M_WAITOK);
+
+ error = getnewvnode(VT_PORTAL, dvp->v_mount, portal_vnodeop_p, &fvp);
+ if (error) {
+ FREE(pt, M_TEMP);
+ goto bad;
+ }
+ fvp->v_type = VREG;
+ fvp->v_data = pt;
+ /*
+ * Save all of the remaining pathname and
+ * advance the namei next pointer to the end
+ * of the string.
+ */
+ for (size = 0, path = pname; *path; path++)
+ size++;
+ cnp->cn_consume = size - cnp->cn_namelen;
+
+ pt->pt_arg = malloc(size+1, M_TEMP, M_WAITOK);
+ pt->pt_size = size+1;
+ bcopy(pname, pt->pt_arg, pt->pt_size);
+ pt->pt_fileid = portal_fileid++;
+
+ *vpp = fvp;
+ /*VOP_LOCK(fvp);*/
+ return (0);
+
+bad:;
+ if (fvp)
+ vrele(fvp);
+ return (error);
+}
+
+static int
+portal_connect(so, so2)
+ struct socket *so;
+ struct socket *so2;
+{
+ /* from unp_connect, bypassing the namei stuff... */
+ struct socket *so3;
+ struct unpcb *unp2;
+ struct unpcb *unp3;
+
+ if (so2 == 0)
+ return (ECONNREFUSED);
+
+ if (so->so_type != so2->so_type)
+ return (EPROTOTYPE);
+
+ if ((so2->so_options & SO_ACCEPTCONN) == 0)
+ return (ECONNREFUSED);
+
+ if ((so3 = sonewconn(so2, 0)) == 0)
+ return (ECONNREFUSED);
+
+ unp2 = sotounpcb(so2);
+ unp3 = sotounpcb(so3);
+ if (unp2->unp_addr)
+ unp3->unp_addr = (struct sockaddr_un *)
+ dup_sockaddr((struct sockaddr *)unp2->unp_addr, 0);
+ so2 = so3;
+
+ return (unp_connect2(so, so2));
+}
+
+static int
+portal_open(ap)
+ struct vop_open_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct socket *so = 0;
+ struct portalnode *pt;
+ struct proc *p = ap->a_p;
+ struct vnode *vp = ap->a_vp;
+ int s;
+ struct uio auio;
+ struct iovec aiov[2];
+ int res;
+ struct mbuf *cm = 0;
+ struct cmsghdr *cmsg;
+ int newfds;
+ int *ip;
+ int fd;
+ int error;
+ int len;
+ struct portalmount *fmp;
+ struct file *fp;
+ struct portal_cred pcred;
+
+ /*
+ * Nothing to do when opening the root node.
+ */
+ if (vp->v_flag & VROOT)
+ return (0);
+
+ /*
+ * Can't be opened unless the caller is set up
+ * to deal with the side effects. Check for this
+ * by testing whether the p_dupfd has been set.
+ */
+ if (p->p_dupfd >= 0)
+ return (ENODEV);
+
+ pt = VTOPORTAL(vp);
+ fmp = VFSTOPORTAL(vp->v_mount);
+
+ /*
+ * Create a new socket.
+ */
+ error = socreate(AF_UNIX, &so, SOCK_STREAM, 0, ap->a_p);
+ if (error)
+ goto bad;
+
+ /*
+ * Reserve some buffer space
+ */
+ res = pt->pt_size + sizeof(pcred) + 512; /* XXX */
+ error = soreserve(so, res, res);
+ if (error)
+ goto bad;
+
+ /*
+ * Kick off connection
+ */
+ error = portal_connect(so, (struct socket *)fmp->pm_server->f_data);
+ if (error)
+ goto bad;
+
+ /*
+ * Wait for connection to complete
+ */
+ /*
+ * XXX: Since the mount point is holding a reference on the
+ * underlying server socket, it is not easy to find out whether
+ * the server process is still running. To handle this problem
+ * we loop waiting for the new socket to be connected (something
+ * which will only happen if the server is still running) or for
+ * the reference count on the server socket to drop to 1, which
+ * will happen if the server dies. Sleep for 5 second intervals
+ * and keep polling the reference count. XXX.
+ */
+ s = splnet();
+ while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
+ if (fmp->pm_server->f_count == 1) {
+ error = ECONNREFUSED;
+ splx(s);
+ goto bad;
+ }
+ (void) tsleep((caddr_t) &so->so_timeo, PSOCK, "portalcon", 5 * hz);
+ }
+ splx(s);
+
+ if (so->so_error) {
+ error = so->so_error;
+ goto bad;
+ }
+
+ /*
+ * Set miscellaneous flags
+ */
+ so->so_rcv.sb_timeo = 0;
+ so->so_snd.sb_timeo = 0;
+ so->so_rcv.sb_flags |= SB_NOINTR;
+ so->so_snd.sb_flags |= SB_NOINTR;
+
+
+ pcred.pcr_flag = ap->a_mode;
+ pcred.pcr_uid = ap->a_cred->cr_uid;
+ pcred.pcr_ngroups = ap->a_cred->cr_ngroups;
+ bcopy(ap->a_cred->cr_groups, pcred.pcr_groups, NGROUPS * sizeof(gid_t));
+ aiov[0].iov_base = (caddr_t) &pcred;
+ aiov[0].iov_len = sizeof(pcred);
+ aiov[1].iov_base = pt->pt_arg;
+ aiov[1].iov_len = pt->pt_size;
+ auio.uio_iov = aiov;
+ auio.uio_iovcnt = 2;
+ auio.uio_rw = UIO_WRITE;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_procp = p;
+ auio.uio_offset = 0;
+ auio.uio_resid = aiov[0].iov_len + aiov[1].iov_len;
+
+ error = sosend(so, (struct sockaddr *) 0, &auio,
+ (struct mbuf *) 0, (struct mbuf *) 0, 0, p);
+ if (error)
+ goto bad;
+
+ len = auio.uio_resid = sizeof(int);
+ do {
+ struct mbuf *m = 0;
+ int flags = MSG_WAITALL;
+ error = soreceive(so, (struct sockaddr **) 0, &auio,
+ &m, &cm, &flags);
+ if (error)
+ goto bad;
+
+ /*
+ * Grab an error code from the mbuf.
+ */
+ if (m) {
+ m = m_pullup(m, sizeof(int)); /* Needed? */
+ if (m) {
+ error = *(mtod(m, int *));
+ m_freem(m);
+ } else {
+ error = EINVAL;
+ }
+ } else {
+ if (cm == 0) {
+ error = ECONNRESET; /* XXX */
+#ifdef notdef
+ break;
+#endif
+ }
+ }
+ } while (cm == 0 && auio.uio_resid == len && !error);
+
+ if (cm == 0)
+ goto bad;
+
+ if (auio.uio_resid) {
+ error = 0;
+#ifdef notdef
+ error = EMSGSIZE;
+ goto bad;
+#endif
+ }
+
+ /*
+ * XXX: Break apart the control message, and retrieve the
+ * received file descriptor. Note that more than one descriptor
+ * may have been received, or that the rights chain may have more
+ * than a single mbuf in it. What to do?
+ */
+ cmsg = mtod(cm, struct cmsghdr *);
+ newfds = (cmsg->cmsg_len - sizeof(*cmsg)) / sizeof (int);
+ if (newfds == 0) {
+ error = ECONNREFUSED;
+ goto bad;
+ }
+ /*
+ * At this point the rights message consists of a control message
+ * header, followed by a data region containing a vector of
+ * integer file descriptors. The fds were allocated by the action
+ * of receiving the control message.
+ */
+ ip = (int *) (cmsg + 1);
+ fd = *ip++;
+ if (newfds > 1) {
+ /*
+ * Close extra fds.
+ */
+ int i;
+ printf("portal_open: %d extra fds\n", newfds - 1);
+ for (i = 1; i < newfds; i++) {
+ portal_closefd(p, *ip);
+ ip++;
+ }
+ }
+
+ /*
+ * Check that the mode the file is being opened for is a subset
+ * of the mode of the existing descriptor.
+ */
+ fp = p->p_fd->fd_ofiles[fd];
+ if (((ap->a_mode & (FREAD|FWRITE)) | fp->f_flag) != fp->f_flag) {
+ portal_closefd(p, fd);
+ error = EACCES;
+ goto bad;
+ }
+
+ /*
+ * Save the dup fd in the proc structure then return the
+ * special error code (ENXIO) which causes magic things to
+ * happen in vn_open. The whole concept is, well, hmmm.
+ */
+ p->p_dupfd = fd;
+ error = ENXIO;
+
+bad:;
+ /*
+ * And discard the control message.
+ */
+ if (cm) {
+ m_freem(cm);
+ }
+
+ if (so) {
+ soshutdown(so, 2);
+ soclose(so);
+ }
+ return (error);
+}
+
+static int
+portal_getattr(ap)
+ struct vop_getattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct vattr *vap = ap->a_vap;
+
+ bzero(vap, sizeof(*vap));
+ vattr_null(vap);
+ vap->va_uid = 0;
+ vap->va_gid = 0;
+ vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+ vap->va_size = DEV_BSIZE;
+ vap->va_blocksize = DEV_BSIZE;
+ nanotime(&vap->va_atime);
+ vap->va_mtime = vap->va_atime;
+ vap->va_ctime = vap->va_ctime;
+ vap->va_gen = 0;
+ vap->va_flags = 0;
+ vap->va_rdev = 0;
+ /* vap->va_qbytes = 0; */
+ vap->va_bytes = 0;
+ /* vap->va_qsize = 0; */
+ if (vp->v_flag & VROOT) {
+ vap->va_type = VDIR;
+ vap->va_mode = S_IRUSR|S_IWUSR|S_IXUSR|
+ S_IRGRP|S_IWGRP|S_IXGRP|
+ S_IROTH|S_IWOTH|S_IXOTH;
+ vap->va_nlink = 2;
+ vap->va_fileid = 2;
+ } else {
+ vap->va_type = VREG;
+ vap->va_mode = S_IRUSR|S_IWUSR|
+ S_IRGRP|S_IWGRP|
+ S_IROTH|S_IWOTH;
+ vap->va_nlink = 1;
+ vap->va_fileid = VTOPORTAL(vp)->pt_fileid;
+ }
+ return (0);
+}
+
+static int
+portal_setattr(ap)
+ struct vop_setattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ /*
+ * Can't mess with the root vnode
+ */
+ if (ap->a_vp->v_flag & VROOT)
+ return (EACCES);
+
+ if (ap->a_vap->va_flags != VNOVAL)
+ return (EOPNOTSUPP);
+
+ return (0);
+}
+
+/*
+ * Fake readdir, just return empty directory.
+ * It is hard to deal with '.' and '..' so don't bother.
+ */
+static int
+portal_readdir(ap)
+ struct vop_readdir_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ int *a_eofflag;
+ u_long *a_cookies;
+ int a_ncookies;
+ } */ *ap;
+{
+
+ /*
+ * We don't allow exporting portal mounts, and currently local
+ * requests do not need cookies.
+ */
+ if (ap->a_ncookies)
+ panic("portal_readdir: not hungry");
+
+ return (0);
+}
+
+static int
+portal_inactive(ap)
+ struct vop_inactive_args /* {
+ struct vnode *a_vp;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ VOP_UNLOCK(ap->a_vp, 0, ap->a_p);
+ return (0);
+}
+
+static int
+portal_reclaim(ap)
+ struct vop_reclaim_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ struct portalnode *pt = VTOPORTAL(ap->a_vp);
+
+ if (pt->pt_arg) {
+ free((caddr_t) pt->pt_arg, M_TEMP);
+ pt->pt_arg = 0;
+ }
+ FREE(ap->a_vp->v_data, M_TEMP);
+ ap->a_vp->v_data = 0;
+
+ return (0);
+}
+
+
+/*
+ * Print out the contents of a Portal vnode.
+ */
+/* ARGSUSED */
+static int
+portal_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ printf("tag VT_PORTAL, portal vnode\n");
+ return (0);
+}
+
+
+/*
+ * Portal "should never get here" operation
+ */
+static int
+portal_badop()
+{
+
+ panic("portal: bad op");
+ /* NOTREACHED */
+}
+
+vop_t **portal_vnodeop_p;
+static struct vnodeopv_entry_desc portal_vnodeop_entries[] = {
+ { &vop_default_desc, (vop_t *) vop_defaultop },
+ { &vop_access_desc, (vop_t *) vop_null },
+ { &vop_bmap_desc, (vop_t *) portal_badop },
+ { &vop_getattr_desc, (vop_t *) portal_getattr },
+ { &vop_inactive_desc, (vop_t *) portal_inactive },
+ { &vop_lookup_desc, (vop_t *) portal_lookup },
+ { &vop_open_desc, (vop_t *) portal_open },
+ { &vop_pathconf_desc, (vop_t *) vop_stdpathconf },
+ { &vop_print_desc, (vop_t *) portal_print },
+ { &vop_readdir_desc, (vop_t *) portal_readdir },
+ { &vop_reclaim_desc, (vop_t *) portal_reclaim },
+ { &vop_setattr_desc, (vop_t *) portal_setattr },
+ { NULL, NULL }
+};
+static struct vnodeopv_desc portal_vnodeop_opv_desc =
+ { &portal_vnodeop_p, portal_vnodeop_entries };
+
+VNODEOP_SET(portal_vnodeop_opv_desc);
diff --git a/sys/fs/procfs/README b/sys/fs/procfs/README
new file mode 100644
index 0000000..5f1b6cc
--- /dev/null
+++ b/sys/fs/procfs/README
@@ -0,0 +1,113 @@
+saute procfs lyonnais
+
+procfs supports two levels of directory. the filesystem root
+directory contains a representation of the system process table.
+this consists of an entry for each active and zombie process, and
+an additional entry "curproc" which always represents the process
+making the lookup request.
+
+each of the sub-directories contains several files. these files
+are used to control and interrogate processes. the files implemented
+are:
+
+ file - xxx. the exec'ed file.
+
+ status - r/o. returns process status.
+
+ ctl - w/o. sends a control message to the process.
+ for example:
+ echo hup > /proc/curproc/note
+ will send a SIGHUP to the shell.
+ whereas
+ echo attach > /proc/1293/ctl
+ would set up process 1293 for debugging.
+ see below for more details.
+
+ mem - r/w. virtual memory image of the process.
+ parts of the address space are readable
+ only if they exist in the target process.
+ a more reasonable alternative might be
+ to return zero pages instead of an error.
+ comments?
+
+ note - w/o. writing a string here sends the
+ equivalent note to the process.
+ [ not implemented. ]
+
+ notepg - w/o. the same as note, but sends to all
+ members of the process group.
+ [ not implemented. ]
+
+ regs - r/w. process register set. this can be read
+ or written any time even if the process
+ is not stopped. since the bsd kernel
+ is single-processor, this implementation
+ will get the "right" register values.
+ a multi-proc kernel would need to do some
+ synchronisation.
+
+this then looks like:
+
+% ls -li /proc
+total 0
+ 9 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 0
+ 17 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 1
+ 89 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 10
+ 25 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 2
+2065 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 257
+2481 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 309
+ 265 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 32
+3129 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 390
+3209 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 400
+3217 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 401
+3273 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 408
+ 393 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 48
+ 409 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 50
+ 465 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 57
+ 481 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 59
+ 537 dr-xr-xr-x 2 root kmem 0 Sep 21 15:06 66
+ 545 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 67
+ 657 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 81
+ 665 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 82
+ 673 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 83
+ 681 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 84
+3273 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 curproc
+% ls -li /proc/curproc
+total 408
+3341 --w------- 1 jsp staff 0 Sep 21 15:06 ctl
+1554 -r-xr-xr-x 1 bin bin 90112 Mar 29 04:52 file
+3339 -rw------- 1 jsp staff 118784 Sep 21 15:06 mem
+3343 --w------- 1 jsp staff 0 Sep 21 15:06 note
+3344 --w------- 1 jsp staff 0 Sep 21 15:06 notepg
+3340 -rw------- 1 jsp staff 0 Sep 21 15:06 regs
+3342 -r--r--r-- 1 jsp staff 0 Sep 21 15:06 status
+% df /proc/curproc /proc/curproc/file
+Filesystem 512-blocks Used Avail Capacity Mounted on
+proc 2 2 0 100% /proc
+/dev/wd0a 16186 13548 1018 93% /
+% cat /proc/curproc/status
+cat 446 439 400 81 12,0 ctty 748620684 270000 0 0 0 20000 nochan 11 20 20 20 0 21 117
+
+
+
+the basic sequence of commands written to "ctl" would be
+
+ attach - this stops the target process and
+ arranges for the sending process
+ to become the debug control process
+ wait - wait for the target process to come to
+ a steady state ready for debugging.
+ step - single step, with no signal delivery.
+ run - continue running, with no signal delivery,
+ until next trap or breakpoint.
+ <signame> - deliver signal <signame> and continue running.
+ detach - continue execution of the target process
+ and remove it from control by the debug process
+
+in a normal debugging environment, where the target is fork/exec'd by
+the debugger, the debugger should fork and the child should stop itself
+(with a self-inflicted SIGSTOP). the parent should do a "wait" then an
+"attach". as before, the child will hit a breakpoint on the first
+instruction in any newly exec'd image.
+
+$Id$
diff --git a/sys/fs/procfs/procfs.h b/sys/fs/procfs/procfs.h
new file mode 100644
index 0000000..619e1b2
--- /dev/null
+++ b/sys/fs/procfs/procfs.h
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs.h 8.9 (Berkeley) 5/14/95
+ *
+ * From:
+ * $Id: procfs.h,v 1.20 1998/07/07 04:08:44 bde Exp $
+ */
+
+/*
+ * The different types of node in a procfs filesystem
+ */
+typedef enum {
+ Proot, /* the filesystem root */
+ Pcurproc, /* symbolic link for curproc */
+ Pproc, /* a process-specific sub-directory */
+ Pfile, /* the executable file */
+ Pmem, /* the process's memory image */
+ Pregs, /* the process's register set */
+ Pfpregs, /* the process's FP register set */
+ Pctl, /* process control */
+ Pstatus, /* process status */
+ Pnote, /* process notifier */
+ Pnotepg, /* process group notifier */
+ Pmap, /* memory map */
+ Ptype, /* executable type */
+ Pcmdline /* command line */
+} pfstype;
+
+/*
+ * control data for the proc file system.
+ */
+struct pfsnode {
+ struct pfsnode *pfs_next; /* next on list */
+ struct vnode *pfs_vnode; /* vnode associated with this pfsnode */
+ pfstype pfs_type; /* type of procfs node */
+ pid_t pfs_pid; /* associated process */
+ u_short pfs_mode; /* mode bits for stat() */
+ u_long pfs_flags; /* open flags */
+ u_long pfs_fileno; /* unique file id */
+ pid_t pfs_lockowner; /* pfs lock owner */
+};
+
+#define PROCFS_NOTELEN 64 /* max length of a note (/proc/$pid/note) */
+#define PROCFS_CTLLEN 8 /* max length of a ctl msg (/proc/$pid/ctl */
+
+/*
+ * Kernel stuff follows
+ */
+#ifdef KERNEL
+#define CNEQ(cnp, s, len) \
+ ((cnp)->cn_namelen == (len) && \
+ (bcmp((s), (cnp)->cn_nameptr, (len)) == 0))
+
+#define KMEM_GROUP 2
+
+/*
+ * Check to see whether access to target process is allowed
+ * Evaluates to 1 if access is allowed.
+ */
+#define CHECKIO(p1, p2) \
+ ((((p1)->p_cred->pc_ucred->cr_uid == (p2)->p_cred->p_ruid) && \
+ ((p1)->p_cred->p_ruid == (p2)->p_cred->p_ruid) && \
+ ((p1)->p_cred->p_svuid == (p2)->p_cred->p_ruid) && \
+ ((p2)->p_flag & P_SUGID) == 0) || \
+ (suser((p1)->p_cred->pc_ucred, &(p1)->p_acflag) == 0))
+
+/*
+ * Format of a directory entry in /proc, ...
+ * This must map onto struct dirent (see <dirent.h>)
+ */
+#define PROCFS_NAMELEN 8
+struct pfsdent {
+ u_int32_t d_fileno;
+ u_int16_t d_reclen;
+ u_int8_t d_type;
+ u_int8_t d_namlen;
+ char d_name[PROCFS_NAMELEN];
+};
+#define UIO_MX sizeof(struct pfsdent)
+#define PROCFS_FILENO(pid, type) \
+ (((type) < Pproc) ? \
+ ((type) + 2) : \
+ ((((pid)+1) << 4) + ((int) (type))))
+
+/*
+ * Convert between pfsnode vnode
+ */
+#define VTOPFS(vp) ((struct pfsnode *)(vp)->v_data)
+#define PFSTOV(pfs) ((pfs)->pfs_vnode)
+
+typedef struct vfs_namemap vfs_namemap_t;
+struct vfs_namemap {
+ const char *nm_name;
+ int nm_val;
+};
+
+int vfs_getuserstr __P((struct uio *, char *, int *));
+vfs_namemap_t *vfs_findname __P((vfs_namemap_t *, char *, int));
+
+/* <machine/reg.h> */
+struct reg;
+struct fpreg;
+
+#define PFIND(pid) ((pid) ? pfind(pid) : &proc0)
+
+void procfs_exit __P((struct proc *));
+int procfs_freevp __P((struct vnode *));
+int procfs_allocvp __P((struct mount *, struct vnode **, long, pfstype));
+struct vnode *procfs_findtextvp __P((struct proc *));
+int procfs_sstep __P((struct proc *));
+void procfs_fix_sstep __P((struct proc *));
+int procfs_read_regs __P((struct proc *, struct reg *));
+int procfs_write_regs __P((struct proc *, struct reg *));
+int procfs_read_fpregs __P((struct proc *, struct fpreg *));
+int procfs_write_fpregs __P((struct proc *, struct fpreg *));
+int procfs_donote __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+int procfs_doregs __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+int procfs_dofpregs __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+int procfs_domem __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+int procfs_doctl __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+int procfs_dostatus __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+int procfs_domap __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+int procfs_dotype __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+int procfs_docmdline __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+
+/* Return 1 if process has special kernel digging privileges */
+int procfs_kmemaccess __P((struct proc *));
+
+/* functions to check whether or not files should be displayed */
+int procfs_validfile __P((struct proc *));
+int procfs_validfpregs __P((struct proc *));
+int procfs_validregs __P((struct proc *));
+int procfs_validmap __P((struct proc *));
+int procfs_validtype __P((struct proc *));
+
+#define PROCFS_LOCKED 0x01
+#define PROCFS_WANT 0x02
+
+extern vop_t **procfs_vnodeop_p;
+
+int procfs_root __P((struct mount *, struct vnode **));
+int procfs_rw __P((struct vop_read_args *));
+#endif /* KERNEL */
diff --git a/sys/fs/procfs/procfs_ctl.c b/sys/fs/procfs/procfs_ctl.c
new file mode 100644
index 0000000..21724e5
--- /dev/null
+++ b/sys/fs/procfs/procfs_ctl.c
@@ -0,0 +1,315 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs_ctl.c 8.4 (Berkeley) 6/15/94
+ *
+ * From:
+ * $Id: procfs_ctl.c,v 1.16 1997/04/27 21:32:21 alex Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/ptrace.h>
+#include <sys/signalvar.h>
+#include <miscfs/procfs/procfs.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+
+#ifndef FIX_SSTEP
+#define FIX_SSTEP(p)
+#endif
+
+/*
+ * True iff process (p) is in trace wait state
+ * relative to process (curp)
+ */
+#define TRACE_WAIT_P(curp, p) \
+ ((p)->p_stat == SSTOP && \
+ (p)->p_pptr == (curp) && \
+ ((p)->p_flag & P_TRACED))
+
+#define PROCFS_CTL_ATTACH 1
+#define PROCFS_CTL_DETACH 2
+#define PROCFS_CTL_STEP 3
+#define PROCFS_CTL_RUN 4
+#define PROCFS_CTL_WAIT 5
+
+static vfs_namemap_t ctlnames[] = {
+ /* special /proc commands */
+ { "attach", PROCFS_CTL_ATTACH },
+ { "detach", PROCFS_CTL_DETACH },
+ { "step", PROCFS_CTL_STEP },
+ { "run", PROCFS_CTL_RUN },
+ { "wait", PROCFS_CTL_WAIT },
+ { 0 },
+};
+
+static vfs_namemap_t signames[] = {
+ /* regular signal names */
+ { "hup", SIGHUP }, { "int", SIGINT },
+ { "quit", SIGQUIT }, { "ill", SIGILL },
+ { "trap", SIGTRAP }, { "abrt", SIGABRT },
+ { "iot", SIGIOT }, { "emt", SIGEMT },
+ { "fpe", SIGFPE }, { "kill", SIGKILL },
+ { "bus", SIGBUS }, { "segv", SIGSEGV },
+ { "sys", SIGSYS }, { "pipe", SIGPIPE },
+ { "alrm", SIGALRM }, { "term", SIGTERM },
+ { "urg", SIGURG }, { "stop", SIGSTOP },
+ { "tstp", SIGTSTP }, { "cont", SIGCONT },
+ { "chld", SIGCHLD }, { "ttin", SIGTTIN },
+ { "ttou", SIGTTOU }, { "io", SIGIO },
+ { "xcpu", SIGXCPU }, { "xfsz", SIGXFSZ },
+ { "vtalrm", SIGVTALRM }, { "prof", SIGPROF },
+ { "winch", SIGWINCH }, { "info", SIGINFO },
+ { "usr1", SIGUSR1 }, { "usr2", SIGUSR2 },
+ { 0 },
+};
+
+static int procfs_control __P((struct proc *curp, struct proc *p, int op));
+
+static int
+procfs_control(curp, p, op)
+ struct proc *curp;
+ struct proc *p;
+ int op;
+{
+ int error;
+
+ /*
+ * Attach - attaches the target process for debugging
+ * by the calling process.
+ */
+ if (op == PROCFS_CTL_ATTACH) {
+ /* check whether already being traced */
+ if (p->p_flag & P_TRACED)
+ return (EBUSY);
+
+ /* can't trace yourself! */
+ if (p->p_pid == curp->p_pid)
+ return (EINVAL);
+
+ /* can't trace init when securelevel > 0 */
+ if (securelevel > 0 && p->p_pid == 1)
+ return (EPERM);
+
+ /*
+ * Go ahead and set the trace flag.
+ * Save the old parent (it's reset in
+ * _DETACH, and also in kern_exit.c:wait4()
+ * Reparent the process so that the tracing
+ * proc gets to see all the action.
+ * Stop the target.
+ */
+ p->p_flag |= P_TRACED;
+ faultin(p);
+ p->p_xstat = 0; /* XXX ? */
+ if (p->p_pptr != curp) {
+ p->p_oppid = p->p_pptr->p_pid;
+ proc_reparent(p, curp);
+ }
+ psignal(p, SIGSTOP);
+ return (0);
+ }
+
+ /*
+ * Target process must be stopped, owned by (curp) and
+ * be set up for tracing (P_TRACED flag set).
+ * Allow DETACH to take place at any time for sanity.
+ * Allow WAIT any time, of course.
+ */
+ switch (op) {
+ case PROCFS_CTL_DETACH:
+ case PROCFS_CTL_WAIT:
+ break;
+
+ default:
+ if (!TRACE_WAIT_P(curp, p))
+ return (EBUSY);
+ }
+
+
+#ifdef FIX_SSTEP
+ /*
+ * do single-step fixup if needed
+ */
+ FIX_SSTEP(p);
+#endif
+
+ /*
+ * Don't deliver any signal by default.
+ * To continue with a signal, just send
+ * the signal name to the ctl file
+ */
+ p->p_xstat = 0;
+
+ switch (op) {
+ /*
+ * Detach. Cleans up the target process, reparent it if possible
+ * and set it running once more.
+ */
+ case PROCFS_CTL_DETACH:
+ /* if not being traced, then this is a painless no-op */
+ if ((p->p_flag & P_TRACED) == 0)
+ return (0);
+
+ /* not being traced any more */
+ p->p_flag &= ~P_TRACED;
+
+ /* remove pending SIGTRAP, else the process will die */
+ p->p_siglist &= ~sigmask (SIGTRAP);
+
+ /* give process back to original parent */
+ if (p->p_oppid != p->p_pptr->p_pid) {
+ struct proc *pp;
+
+ pp = pfind(p->p_oppid);
+ if (pp)
+ proc_reparent(p, pp);
+ }
+
+ p->p_oppid = 0;
+ p->p_flag &= ~P_WAITED; /* XXX ? */
+ wakeup((caddr_t) curp); /* XXX for CTL_WAIT below ? */
+
+ break;
+
+ /*
+ * Step. Let the target process execute a single instruction.
+ */
+ case PROCFS_CTL_STEP:
+ PHOLD(p);
+ error = procfs_sstep(p);
+ PRELE(p);
+ if (error)
+ return (error);
+ break;
+
+ /*
+ * Run. Let the target process continue running until a breakpoint
+ * or some other trap.
+ */
+ case PROCFS_CTL_RUN:
+ break;
+
+ /*
+ * Wait for the target process to stop.
+ * If the target is not being traced then just wait
+ * to enter
+ */
+ case PROCFS_CTL_WAIT:
+ error = 0;
+ if (p->p_flag & P_TRACED) {
+ while (error == 0 &&
+ (p->p_stat != SSTOP) &&
+ (p->p_flag & P_TRACED) &&
+ (p->p_pptr == curp)) {
+ error = tsleep((caddr_t) p,
+ PWAIT|PCATCH, "procfsx", 0);
+ }
+ if (error == 0 && !TRACE_WAIT_P(curp, p))
+ error = EBUSY;
+ } else {
+ while (error == 0 && p->p_stat != SSTOP) {
+ error = tsleep((caddr_t) p,
+ PWAIT|PCATCH, "procfs", 0);
+ }
+ }
+ return (error);
+
+ default:
+ panic("procfs_control");
+ }
+
+ if (p->p_stat == SSTOP)
+ setrunnable(p);
+ return (0);
+}
+
+int
+procfs_doctl(curp, p, pfs, uio)
+ struct proc *curp;
+ struct pfsnode *pfs;
+ struct uio *uio;
+ struct proc *p;
+{
+ int xlen;
+ int error;
+ char msg[PROCFS_CTLLEN+1];
+ vfs_namemap_t *nm;
+
+ if (uio->uio_rw != UIO_WRITE)
+ return (EOPNOTSUPP);
+
+ xlen = PROCFS_CTLLEN;
+ error = vfs_getuserstr(uio, msg, &xlen);
+ if (error)
+ return (error);
+
+ /*
+ * Map signal names into signal generation
+ * or debug control. Unknown commands and/or signals
+ * return EOPNOTSUPP.
+ *
+ * Sending a signal while the process is being debugged
+ * also has the side effect of letting the target continue
+ * to run. There is no way to single-step a signal delivery.
+ */
+ error = EOPNOTSUPP;
+
+ nm = vfs_findname(ctlnames, msg, xlen);
+ if (nm) {
+ error = procfs_control(curp, p, nm->nm_val);
+ } else {
+ nm = vfs_findname(signames, msg, xlen);
+ if (nm) {
+ if (TRACE_WAIT_P(curp, p)) {
+ p->p_xstat = nm->nm_val;
+#ifdef FIX_SSTEP
+ FIX_SSTEP(p);
+#endif
+ setrunnable(p);
+ } else {
+ psignal(p, nm->nm_val);
+ }
+ error = 0;
+ }
+ }
+
+ return (error);
+}
diff --git a/sys/fs/procfs/procfs_fpregs.c b/sys/fs/procfs/procfs_fpregs.c
new file mode 100644
index 0000000..14c3fd3
--- /dev/null
+++ b/sys/fs/procfs/procfs_fpregs.c
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs_fpregs.c 8.2 (Berkeley) 6/15/94
+ *
+ * From:
+ * $Id: procfs_fpregs.c,v 1.7 1997/08/02 14:32:11 bde Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <machine/reg.h>
+#include <miscfs/procfs/procfs.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+
+int
+procfs_dofpregs(curp, p, pfs, uio)
+ struct proc *curp;
+ struct proc *p;
+ struct pfsnode *pfs;
+ struct uio *uio;
+{
+ int error;
+ struct fpreg r;
+ char *kv;
+ int kl;
+
+ if (!CHECKIO(curp, p))
+ return EPERM;
+ kl = sizeof(r);
+ kv = (char *) &r;
+
+ kv += uio->uio_offset;
+ kl -= uio->uio_offset;
+ if (kl > uio->uio_resid)
+ kl = uio->uio_resid;
+
+ PHOLD(p);
+
+ if (kl < 0)
+ error = EINVAL;
+ else
+ error = procfs_read_fpregs(p, &r);
+ if (error == 0)
+ error = uiomove(kv, kl, uio);
+ if (error == 0 && uio->uio_rw == UIO_WRITE) {
+ if (p->p_stat != SSTOP)
+ error = EBUSY;
+ else
+ error = procfs_write_fpregs(p, &r);
+ }
+ PRELE(p);
+
+ uio->uio_offset = 0;
+ return (error);
+}
+
+int
+procfs_validfpregs(p)
+ struct proc *p;
+{
+ return ((p->p_flag & P_SYSTEM) == 0);
+}
diff --git a/sys/fs/procfs/procfs_map.c b/sys/fs/procfs/procfs_map.c
new file mode 100644
index 0000000..c6b8966
--- /dev/null
+++ b/sys/fs/procfs/procfs_map.c
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs_status.c 8.3 (Berkeley) 2/17/94
+ *
+ * $Id: procfs_map.c,v 1.18 1998/12/04 22:54:51 archie Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <miscfs/procfs/procfs.h>
+
+#include <vm/vm.h>
+#include <vm/vm_prot.h>
+#include <sys/lock.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_page.h>
+#include <vm/vm_object.h>
+
+
+#define MEBUFFERSIZE 256
+
+/*
+ * The map entries can *almost* be read with programs like cat. However,
+ * large maps need special programs to read. It is not easy to implement
+ * a program that can sense the required size of the buffer, and then
+ * subsequently do a read with the appropriate size. This operation cannot
+ * be atomic. The best that we can do is to allow the program to do a read
+ * with an arbitrarily large buffer, and return as much as we can. We can
+ * return an error code if the buffer is too small (EFBIG), then the program
+ * can try a bigger buffer.
+ */
+int
+procfs_domap(curp, p, pfs, uio)
+ struct proc *curp;
+ struct proc *p;
+ struct pfsnode *pfs;
+ struct uio *uio;
+{
+ int len;
+ int error;
+ vm_map_t map = &p->p_vmspace->vm_map;
+ pmap_t pmap = &p->p_vmspace->vm_pmap;
+ vm_map_entry_t entry;
+ char mebuffer[MEBUFFERSIZE];
+
+ if (uio->uio_rw != UIO_READ)
+ return (EOPNOTSUPP);
+
+ if (uio->uio_offset != 0)
+ return (0);
+
+ error = 0;
+ if (map != &curproc->p_vmspace->vm_map)
+ vm_map_lock_read(map);
+ for (entry = map->header.next;
+ ((uio->uio_resid > 0) && (entry != &map->header));
+ entry = entry->next) {
+ vm_object_t obj, tobj, lobj;
+ int ref_count, shadow_count, flags;
+ vm_offset_t addr;
+ int resident, privateresident;
+ char *type;
+
+ if (entry->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP))
+ continue;
+
+ obj = entry->object.vm_object;
+ if (obj && (obj->shadow_count == 1))
+ privateresident = obj->resident_page_count;
+ else
+ privateresident = 0;
+
+ resident = 0;
+ addr = entry->start;
+ while (addr < entry->end) {
+ if (pmap_extract( pmap, addr))
+ resident++;
+ addr += PAGE_SIZE;
+ }
+
+ for( lobj = tobj = obj; tobj; tobj = tobj->backing_object)
+ lobj = tobj;
+
+ if (lobj) {
+ switch(lobj->type) {
+
+default:
+case OBJT_DEFAULT:
+ type = "default";
+ break;
+case OBJT_VNODE:
+ type = "vnode";
+ break;
+case OBJT_SWAP:
+ type = "swap";
+ break;
+case OBJT_DEVICE:
+ type = "device";
+ break;
+ }
+
+ flags = obj->flags;
+ ref_count = obj->ref_count;
+ shadow_count = obj->shadow_count;
+ } else {
+ type = "none";
+ flags = 0;
+ ref_count = 0;
+ shadow_count = 0;
+ }
+
+
+ /*
+ * format:
+ * start, end, resident, private resident, cow, access, type.
+ */
+ snprintf(mebuffer, sizeof(mebuffer),
+ "0x%x 0x%x %d %d %p %s%s%s %d %d 0x%x %s %s %s\n",
+ entry->start, entry->end,
+ resident, privateresident, obj,
+ (entry->protection & VM_PROT_READ)?"r":"-",
+ (entry->protection & VM_PROT_WRITE)?"w":"-",
+ (entry->protection & VM_PROT_EXECUTE)?"x":"-",
+ ref_count, shadow_count, flags,
+ (entry->eflags & MAP_ENTRY_COW)?"COW":"NCOW",
+ (entry->eflags & MAP_ENTRY_NEEDS_COPY)?"NC":"NNC",
+ type);
+
+ len = strlen(mebuffer);
+ if (len > uio->uio_resid) {
+ error = EFBIG;
+ break;
+ }
+ error = uiomove(mebuffer, len, uio);
+ if (error)
+ break;
+ }
+ if (map != &curproc->p_vmspace->vm_map)
+ vm_map_unlock_read(map);
+ return error;
+}
+
+int
+procfs_validmap(p)
+ struct proc *p;
+{
+ return ((p->p_flag & P_SYSTEM) == 0);
+}
diff --git a/sys/fs/procfs/procfs_mem.c b/sys/fs/procfs/procfs_mem.c
new file mode 100644
index 0000000..22d8f74
--- /dev/null
+++ b/sys/fs/procfs/procfs_mem.c
@@ -0,0 +1,342 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993 Sean Eric Fagan
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry and Sean Eric Fagan.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs_mem.c 8.5 (Berkeley) 6/15/94
+ *
+ * $Id: procfs_mem.c,v 1.34 1998/07/15 02:32:19 bde Exp $
+ */
+
+/*
+ * This is a lightly hacked and merged version
+ * of sef's pread/pwrite functions
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <miscfs/procfs/procfs.h>
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_prot.h>
+#include <sys/lock.h>
+#include <vm/pmap.h>
+#include <vm/vm_map.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_extern.h>
+#include <sys/user.h>
+#include <sys/ptrace.h>
+
+static int procfs_rwmem __P((struct proc *curp,
+ struct proc *p, struct uio *uio));
+
+static int
+procfs_rwmem(curp, p, uio)
+ struct proc *curp;
+ struct proc *p;
+ struct uio *uio;
+{
+ int error;
+ int writing;
+ struct vmspace *vm;
+ vm_map_t map;
+ vm_object_t object = NULL;
+ vm_offset_t pageno = 0; /* page number */
+ vm_prot_t reqprot;
+ vm_offset_t kva;
+
+ /*
+ * if the vmspace is in the midst of being deallocated or the
+ * process is exiting, don't try to grab anything. The page table
+ * usage in that process can be messed up.
+ */
+ vm = p->p_vmspace;
+ if ((p->p_flag & P_WEXIT) || (vm->vm_refcnt < 1))
+ return EFAULT;
+ ++vm->vm_refcnt;
+ /*
+ * The map we want...
+ */
+ map = &vm->vm_map;
+
+ writing = uio->uio_rw == UIO_WRITE;
+ reqprot = writing ? (VM_PROT_WRITE | VM_PROT_OVERRIDE_WRITE) : VM_PROT_READ;
+
+ kva = kmem_alloc_pageable(kernel_map, PAGE_SIZE);
+
+ /*
+ * Only map in one page at a time. We don't have to, but it
+ * makes things easier. This way is trivial - right?
+ */
+ do {
+ vm_map_t tmap;
+ vm_offset_t uva;
+ int page_offset; /* offset into page */
+ vm_map_entry_t out_entry;
+ vm_prot_t out_prot;
+ boolean_t wired;
+ vm_pindex_t pindex;
+ u_int len;
+ vm_page_t m;
+
+ object = NULL;
+
+ uva = (vm_offset_t) uio->uio_offset;
+
+ /*
+ * Get the page number of this segment.
+ */
+ pageno = trunc_page(uva);
+ page_offset = uva - pageno;
+
+ /*
+ * How many bytes to copy
+ */
+ len = min(PAGE_SIZE - page_offset, uio->uio_resid);
+
+ if (uva >= VM_MAXUSER_ADDRESS) {
+ vm_offset_t tkva;
+
+ if (writing ||
+ uva >= VM_MAXUSER_ADDRESS + UPAGES * PAGE_SIZE ||
+ (ptrace_read_u_check(p,
+ uva - (vm_offset_t) VM_MAXUSER_ADDRESS,
+ (size_t) len) &&
+ !procfs_kmemaccess(curp))) {
+ error = 0;
+ break;
+ }
+
+ /* we are reading the "U area", force it into core */
+ PHOLD(p);
+
+ /* sanity check */
+ if (!(p->p_flag & P_INMEM)) {
+ /* aiee! */
+ PRELE(p);
+ error = EFAULT;
+ break;
+ }
+
+ /* populate the ptrace/procfs area */
+ p->p_addr->u_kproc.kp_proc = *p;
+ fill_eproc (p, &p->p_addr->u_kproc.kp_eproc);
+
+ /* locate the in-core address */
+ tkva = (uintptr_t)p->p_addr + uva - VM_MAXUSER_ADDRESS;
+
+ /* transfer it */
+ error = uiomove((caddr_t)tkva, len, uio);
+
+ /* let the pages go */
+ PRELE(p);
+
+ continue;
+ }
+
+ /*
+ * Fault the page on behalf of the process
+ */
+ error = vm_fault(map, pageno, reqprot, FALSE);
+ if (error) {
+ error = EFAULT;
+ break;
+ }
+
+ /*
+ * Now we need to get the page. out_entry, out_prot, wired,
+ * and single_use aren't used. One would think the vm code
+ * would be a *bit* nicer... We use tmap because
+ * vm_map_lookup() can change the map argument.
+ */
+ tmap = map;
+ error = vm_map_lookup(&tmap, pageno, reqprot,
+ &out_entry, &object, &pindex, &out_prot,
+ &wired);
+
+ if (error) {
+ error = EFAULT;
+
+ /*
+ * Make sure that there is no residue in 'object' from
+ * an error return on vm_map_lookup.
+ */
+ object = NULL;
+
+ break;
+ }
+
+ m = vm_page_lookup(object, pindex);
+
+ /* Allow fallback to backing objects if we are reading */
+
+ while (m == NULL && !writing && object->backing_object) {
+
+ pindex += OFF_TO_IDX(object->backing_object_offset);
+ object = object->backing_object;
+
+ m = vm_page_lookup(object, pindex);
+ }
+
+ if (m == NULL) {
+ error = EFAULT;
+
+ /*
+ * Make sure that there is no residue in 'object' from
+ * an error return on vm_map_lookup.
+ */
+ object = NULL;
+
+ vm_map_lookup_done(tmap, out_entry);
+
+ break;
+ }
+
+ /*
+ * Wire the page into memory
+ */
+ vm_page_wire(m);
+
+ /*
+ * We're done with tmap now.
+ * But reference the object first, so that we won't loose
+ * it.
+ */
+ vm_object_reference(object);
+ vm_map_lookup_done(tmap, out_entry);
+
+ pmap_kenter(kva, VM_PAGE_TO_PHYS(m));
+
+ /*
+ * Now do the i/o move.
+ */
+ error = uiomove((caddr_t)(kva + page_offset), len, uio);
+
+ pmap_kremove(kva);
+
+ /*
+ * release the page and the object
+ */
+ vm_page_unwire(m, 1);
+ vm_object_deallocate(object);
+
+ object = NULL;
+
+ } while (error == 0 && uio->uio_resid > 0);
+
+ if (object)
+ vm_object_deallocate(object);
+
+ kmem_free(kernel_map, kva, PAGE_SIZE);
+ vmspace_free(vm);
+ return (error);
+}
+
+/*
+ * Copy data in and out of the target process.
+ * We do this by mapping the process's page into
+ * the kernel and then doing a uiomove direct
+ * from the kernel address space.
+ */
+int
+procfs_domem(curp, p, pfs, uio)
+ struct proc *curp;
+ struct proc *p;
+ struct pfsnode *pfs;
+ struct uio *uio;
+{
+
+ if (uio->uio_resid == 0)
+ return (0);
+
+ /*
+ * XXX
+ * We need to check for KMEM_GROUP because ps is sgid kmem;
+ * not allowing it here causes ps to not work properly. Arguably,
+ * this is a bug with what ps does. We only need to do this
+ * for Pmem nodes, and only if it's reading. This is still not
+ * good, as it may still be possible to grab illicit data if
+ * a process somehow gets to be KMEM_GROUP. Note that this also
+ * means that KMEM_GROUP can't change without editing procfs.h!
+ * All in all, quite yucky.
+ */
+
+ if (!CHECKIO(curp, p) &&
+ !(uio->uio_rw == UIO_READ &&
+ procfs_kmemaccess(curp)))
+ return EPERM;
+
+ return (procfs_rwmem(curp, p, uio));
+}
+
+/*
+ * Given process (p), find the vnode from which
+ * its text segment is being executed.
+ *
+ * It would be nice to grab this information from
+ * the VM system, however, there is no sure-fire
+ * way of doing that. Instead, fork(), exec() and
+ * wait() all maintain the p_textvp field in the
+ * process proc structure which contains a held
+ * reference to the exec'ed vnode.
+ */
+struct vnode *
+procfs_findtextvp(p)
+ struct proc *p;
+{
+
+ return (p->p_textvp);
+}
+
+int procfs_kmemaccess(curp)
+ struct proc *curp;
+{
+ int i;
+ struct ucred *cred;
+
+ cred = curp->p_cred->pc_ucred;
+ if (suser(cred, &curp->p_acflag))
+ return 1;
+
+ for (i = 0; i < cred->cr_ngroups; i++)
+ if (cred->cr_groups[i] == KMEM_GROUP)
+ return 1;
+
+ return 0;
+}
diff --git a/sys/fs/procfs/procfs_note.c b/sys/fs/procfs/procfs_note.c
new file mode 100644
index 0000000..8bfde33
--- /dev/null
+++ b/sys/fs/procfs/procfs_note.c
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs_note.c 8.2 (Berkeley) 1/21/94
+ *
+ * $Id: procfs_note.c,v 1.4 1997/02/22 09:40:28 peter Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/vnode.h>
+#include <miscfs/procfs/procfs.h>
+
+int
+procfs_donote(curp, p, pfs, uio)
+ struct proc *curp;
+ struct proc *p;
+ struct pfsnode *pfs;
+ struct uio *uio;
+{
+ int xlen;
+ int error;
+ char note[PROCFS_NOTELEN+1];
+
+ if (uio->uio_rw != UIO_WRITE)
+ return (EINVAL);
+
+ xlen = PROCFS_NOTELEN;
+ error = vfs_getuserstr(uio, note, &xlen);
+ if (error)
+ return (error);
+
+ /* send to process's notify function */
+ return (EOPNOTSUPP);
+}
diff --git a/sys/fs/procfs/procfs_regs.c b/sys/fs/procfs/procfs_regs.c
new file mode 100644
index 0000000..d215d44
--- /dev/null
+++ b/sys/fs/procfs/procfs_regs.c
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs_regs.c 8.4 (Berkeley) 6/15/94
+ *
+ * From:
+ * $Id: procfs_regs.c,v 1.7 1997/08/02 14:32:16 bde Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <machine/reg.h>
+#include <miscfs/procfs/procfs.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+
+int
+procfs_doregs(curp, p, pfs, uio)
+ struct proc *curp;
+ struct proc *p;
+ struct pfsnode *pfs;
+ struct uio *uio;
+{
+ int error;
+ struct reg r;
+ char *kv;
+ int kl;
+
+ if (!CHECKIO(curp, p))
+ return EPERM;
+ kl = sizeof(r);
+ kv = (char *) &r;
+
+ kv += uio->uio_offset;
+ kl -= uio->uio_offset;
+ if (kl > uio->uio_resid)
+ kl = uio->uio_resid;
+
+ PHOLD(p);
+
+ if (kl < 0)
+ error = EINVAL;
+ else
+ error = procfs_read_regs(p, &r);
+ if (error == 0)
+ error = uiomove(kv, kl, uio);
+ if (error == 0 && uio->uio_rw == UIO_WRITE) {
+ if (p->p_stat != SSTOP)
+ error = EBUSY;
+ else
+ error = procfs_write_regs(p, &r);
+ }
+ PRELE(p);
+
+ uio->uio_offset = 0;
+ return (error);
+}
+
+int
+procfs_validregs(p)
+ struct proc *p;
+{
+ return ((p->p_flag & P_SYSTEM) == 0);
+}
diff --git a/sys/fs/procfs/procfs_status.c b/sys/fs/procfs/procfs_status.c
new file mode 100644
index 0000000..3176a64
--- /dev/null
+++ b/sys/fs/procfs/procfs_status.c
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs_status.c 8.4 (Berkeley) 6/15/94
+ *
+ * From:
+ * $Id: procfs_status.c,v 1.11 1998/07/11 07:45:45 bde Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/tty.h>
+#include <sys/resourcevar.h>
+#include <miscfs/procfs/procfs.h>
+
+int
+procfs_dostatus(curp, p, pfs, uio)
+ struct proc *curp;
+ struct proc *p;
+ struct pfsnode *pfs;
+ struct uio *uio;
+{
+ struct session *sess;
+ struct tty *tp;
+ struct ucred *cr;
+ char *ps;
+ char *sep;
+ int pid, ppid, pgid, sid;
+ int i;
+ int xlen;
+ int error;
+ char psbuf[256]; /* XXX - conservative */
+
+ if (uio->uio_rw != UIO_READ)
+ return (EOPNOTSUPP);
+
+ pid = p->p_pid;
+ ppid = p->p_pptr ? p->p_pptr->p_pid : 0,
+ pgid = p->p_pgrp->pg_id;
+ sess = p->p_pgrp->pg_session;
+ sid = sess->s_leader ? sess->s_leader->p_pid : 0;
+
+/* comm pid ppid pgid sid maj,min ctty,sldr start ut st wmsg
+ euid ruid rgid,egid,groups[1 .. NGROUPS]
+*/
+ ps = psbuf;
+ bcopy(p->p_comm, ps, MAXCOMLEN);
+ ps[MAXCOMLEN] = '\0';
+ ps += strlen(ps);
+ ps += sprintf(ps, " %d %d %d %d ", pid, ppid, pgid, sid);
+
+ if ((p->p_flag&P_CONTROLT) && (tp = sess->s_ttyp))
+ ps += sprintf(ps, "%d,%d ", major(tp->t_dev), minor(tp->t_dev));
+ else
+ ps += sprintf(ps, "%d,%d ", -1, -1);
+
+ sep = "";
+ if (sess->s_ttyvp) {
+ ps += sprintf(ps, "%sctty", sep);
+ sep = ",";
+ }
+ if (SESS_LEADER(p)) {
+ ps += sprintf(ps, "%ssldr", sep);
+ sep = ",";
+ }
+ if (*sep != ',')
+ ps += sprintf(ps, "noflags");
+
+ if (p->p_flag & P_INMEM)
+ ps += sprintf(ps, " %ld,%ld",
+ p->p_stats->p_start.tv_sec,
+ p->p_stats->p_start.tv_usec);
+ else
+ ps += sprintf(ps, " -1,-1");
+
+ {
+ struct timeval ut, st;
+
+ calcru(p, &ut, &st, (void *) 0);
+ ps += sprintf(ps, " %ld,%ld %ld,%ld",
+ ut.tv_sec,
+ ut.tv_usec,
+ st.tv_sec,
+ st.tv_usec);
+ }
+
+ ps += sprintf(ps, " %s",
+ (p->p_wchan && p->p_wmesg) ? p->p_wmesg : "nochan");
+
+ cr = p->p_ucred;
+
+ ps += sprintf(ps, " %lu %lu %lu",
+ (u_long)cr->cr_uid,
+ (u_long)p->p_cred->p_ruid,
+ (u_long)p->p_cred->p_rgid);
+
+ /* egid (p->p_cred->p_svgid) is equal to cr_ngroups[0]
+ see also getegid(2) in /sys/kern/kern_prot.c */
+
+ for (i = 0; i < cr->cr_ngroups; i++)
+ ps += sprintf(ps, ",%lu", (u_long)cr->cr_groups[i]);
+ ps += sprintf(ps, "\n");
+
+ xlen = ps - psbuf;
+ xlen -= uio->uio_offset;
+ ps = psbuf + uio->uio_offset;
+ xlen = imin(xlen, uio->uio_resid);
+ if (xlen <= 0)
+ error = 0;
+ else
+ error = uiomove(ps, xlen, uio);
+
+ return (error);
+}
+
+int
+procfs_docmdline(curp, p, pfs, uio)
+ struct proc *curp;
+ struct proc *p;
+ struct pfsnode *pfs;
+ struct uio *uio;
+{
+ char *ps;
+ int xlen;
+ int error;
+ char psbuf[256];
+
+ if (uio->uio_rw != UIO_READ)
+ return (EOPNOTSUPP);
+
+ /*
+ * For now, this is a hack. To implement this fully would require
+ * groping around in the process address space to follow argv etc.
+ */
+ ps = psbuf;
+ bcopy(p->p_comm, ps, MAXCOMLEN);
+ ps[MAXCOMLEN] = '\0';
+ ps += strlen(ps);
+
+ ps += sprintf(ps, "\n");
+
+ xlen = ps - psbuf;
+ xlen -= uio->uio_offset;
+ ps = psbuf + uio->uio_offset;
+ xlen = min(xlen, uio->uio_resid);
+ if (xlen <= 0)
+ error = 0;
+ else
+ error = uiomove(ps, xlen, uio);
+ return (error);
+}
diff --git a/sys/fs/procfs/procfs_subr.c b/sys/fs/procfs/procfs_subr.c
new file mode 100644
index 0000000..98e3687
--- /dev/null
+++ b/sys/fs/procfs/procfs_subr.c
@@ -0,0 +1,392 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs_subr.c 8.6 (Berkeley) 5/14/95
+ *
+ * $Id: procfs_subr.c,v 1.22 1999/01/05 03:53:06 peter Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <miscfs/procfs/procfs.h>
+
+static struct pfsnode *pfshead;
+static int pfsvplock;
+
+/*
+ * allocate a pfsnode/vnode pair. the vnode is
+ * referenced, but not locked.
+ *
+ * the pid, pfs_type, and mount point uniquely
+ * identify a pfsnode. the mount point is needed
+ * because someone might mount this filesystem
+ * twice.
+ *
+ * all pfsnodes are maintained on a singly-linked
+ * list. new nodes are only allocated when they cannot
+ * be found on this list. entries on the list are
+ * removed when the vfs reclaim entry is called.
+ *
+ * a single lock is kept for the entire list. this is
+ * needed because the getnewvnode() function can block
+ * waiting for a vnode to become free, in which case there
+ * may be more than one process trying to get the same
+ * vnode. this lock is only taken if we are going to
+ * call getnewvnode, since the kernel itself is single-threaded.
+ *
+ * if an entry is found on the list, then call vget() to
+ * take a reference. this is done because there may be
+ * zero references to it and so it needs to removed from
+ * the vnode free list.
+ */
+int
+procfs_allocvp(mp, vpp, pid, pfs_type)
+ struct mount *mp;
+ struct vnode **vpp;
+ long pid;
+ pfstype pfs_type;
+{
+ struct proc *p = curproc; /* XXX */
+ struct pfsnode *pfs;
+ struct vnode *vp;
+ struct pfsnode **pp;
+ int error;
+
+loop:
+ for (pfs = pfshead; pfs != 0; pfs = pfs->pfs_next) {
+ vp = PFSTOV(pfs);
+ if (pfs->pfs_pid == pid &&
+ pfs->pfs_type == pfs_type &&
+ vp->v_mount == mp) {
+ if (vget(vp, 0, p))
+ goto loop;
+ *vpp = vp;
+ return (0);
+ }
+ }
+
+ /*
+ * otherwise lock the vp list while we call getnewvnode
+ * since that can block.
+ */
+ if (pfsvplock & PROCFS_LOCKED) {
+ pfsvplock |= PROCFS_WANT;
+ (void) tsleep((caddr_t) &pfsvplock, PINOD, "pfsavp", 0);
+ goto loop;
+ }
+ pfsvplock |= PROCFS_LOCKED;
+
+ /*
+ * Do the MALLOC before the getnewvnode since doing so afterward
+ * might cause a bogus v_data pointer to get dereferenced
+ * elsewhere if MALLOC should block.
+ */
+ MALLOC(pfs, struct pfsnode *, sizeof(struct pfsnode), M_TEMP, M_WAITOK);
+
+ if ((error = getnewvnode(VT_PROCFS, mp, procfs_vnodeop_p, vpp)) != 0) {
+ FREE(pfs, M_TEMP);
+ goto out;
+ }
+ vp = *vpp;
+
+ vp->v_data = pfs;
+
+ pfs->pfs_next = 0;
+ pfs->pfs_pid = (pid_t) pid;
+ pfs->pfs_type = pfs_type;
+ pfs->pfs_vnode = vp;
+ pfs->pfs_flags = 0;
+ pfs->pfs_lockowner = 0;
+ pfs->pfs_fileno = PROCFS_FILENO(pid, pfs_type);
+
+ switch (pfs_type) {
+ case Proot: /* /proc = dr-xr-xr-x */
+ pfs->pfs_mode = (VREAD|VEXEC) |
+ (VREAD|VEXEC) >> 3 |
+ (VREAD|VEXEC) >> 6;
+ vp->v_type = VDIR;
+ vp->v_flag = VROOT;
+ break;
+
+ case Pcurproc: /* /proc/curproc = lr--r--r-- */
+ pfs->pfs_mode = (VREAD) |
+ (VREAD >> 3) |
+ (VREAD >> 6);
+ vp->v_type = VLNK;
+ break;
+
+ case Pproc:
+ pfs->pfs_mode = (VREAD|VEXEC) |
+ (VREAD|VEXEC) >> 3 |
+ (VREAD|VEXEC) >> 6;
+ vp->v_type = VDIR;
+ break;
+
+ case Pfile:
+ case Pmem:
+ pfs->pfs_mode = (VREAD|VWRITE) |
+ (VREAD) >> 3;;
+ vp->v_type = VREG;
+ break;
+
+ case Pregs:
+ case Pfpregs:
+ pfs->pfs_mode = (VREAD|VWRITE);
+ vp->v_type = VREG;
+ break;
+
+ case Pctl:
+ case Pnote:
+ case Pnotepg:
+ pfs->pfs_mode = (VWRITE);
+ vp->v_type = VREG;
+ break;
+
+ case Ptype:
+ case Pmap:
+ case Pstatus:
+ case Pcmdline:
+ pfs->pfs_mode = (VREAD) |
+ (VREAD >> 3) |
+ (VREAD >> 6);
+ vp->v_type = VREG;
+ break;
+
+ default:
+ panic("procfs_allocvp");
+ }
+
+ /* add to procfs vnode list */
+ for (pp = &pfshead; *pp; pp = &(*pp)->pfs_next)
+ continue;
+ *pp = pfs;
+
+out:
+ pfsvplock &= ~PROCFS_LOCKED;
+
+ if (pfsvplock & PROCFS_WANT) {
+ pfsvplock &= ~PROCFS_WANT;
+ wakeup((caddr_t) &pfsvplock);
+ }
+
+ return (error);
+}
+
+int
+procfs_freevp(vp)
+ struct vnode *vp;
+{
+ struct pfsnode **pfspp;
+ struct pfsnode *pfs = VTOPFS(vp);
+
+ for (pfspp = &pfshead; *pfspp != 0; pfspp = &(*pfspp)->pfs_next) {
+ if (*pfspp == pfs) {
+ *pfspp = pfs->pfs_next;
+ break;
+ }
+ }
+
+ FREE(vp->v_data, M_TEMP);
+ vp->v_data = 0;
+ return (0);
+}
+
+int
+procfs_rw(ap)
+ struct vop_read_args *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct uio *uio = ap->a_uio;
+ struct proc *curp = uio->uio_procp;
+ struct pfsnode *pfs = VTOPFS(vp);
+ struct proc *p;
+ int rtval;
+
+ p = PFIND(pfs->pfs_pid);
+ if (p == 0)
+ return (EINVAL);
+ if (p->p_pid == 1 && securelevel > 0 && uio->uio_rw == UIO_WRITE)
+ return (EACCES);
+
+ while (pfs->pfs_lockowner) {
+ tsleep(&pfs->pfs_lockowner, PRIBIO, "pfslck", 0);
+ }
+ pfs->pfs_lockowner = curproc->p_pid;
+
+ switch (pfs->pfs_type) {
+ case Pnote:
+ case Pnotepg:
+ rtval = procfs_donote(curp, p, pfs, uio);
+ break;
+
+ case Pregs:
+ rtval = procfs_doregs(curp, p, pfs, uio);
+ break;
+
+ case Pfpregs:
+ rtval = procfs_dofpregs(curp, p, pfs, uio);
+ break;
+
+ case Pctl:
+ rtval = procfs_doctl(curp, p, pfs, uio);
+ break;
+
+ case Pstatus:
+ rtval = procfs_dostatus(curp, p, pfs, uio);
+ break;
+
+ case Pmap:
+ rtval = procfs_domap(curp, p, pfs, uio);
+ break;
+
+ case Pmem:
+ rtval = procfs_domem(curp, p, pfs, uio);
+ break;
+
+ case Ptype:
+ rtval = procfs_dotype(curp, p, pfs, uio);
+ break;
+
+ case Pcmdline:
+ rtval = procfs_docmdline(curp, p, pfs, uio);
+ break;
+
+ default:
+ rtval = EOPNOTSUPP;
+ break;
+ }
+ pfs->pfs_lockowner = 0;
+ wakeup(&pfs->pfs_lockowner);
+ return rtval;
+}
+
+/*
+ * Get a string from userland into (buf). Strip a trailing
+ * nl character (to allow easy access from the shell).
+ * The buffer should be *buflenp + 1 chars long. vfs_getuserstr
+ * will automatically add a nul char at the end.
+ *
+ * Returns 0 on success or the following errors
+ *
+ * EINVAL: file offset is non-zero.
+ * EMSGSIZE: message is longer than kernel buffer
+ * EFAULT: user i/o buffer is not addressable
+ */
+int
+vfs_getuserstr(uio, buf, buflenp)
+ struct uio *uio;
+ char *buf;
+ int *buflenp;
+{
+ int xlen;
+ int error;
+
+ if (uio->uio_offset != 0)
+ return (EINVAL);
+
+ xlen = *buflenp;
+
+ /* must be able to read the whole string in one go */
+ if (xlen < uio->uio_resid)
+ return (EMSGSIZE);
+ xlen = uio->uio_resid;
+
+ if ((error = uiomove(buf, xlen, uio)) != 0)
+ return (error);
+
+ /* allow multiple writes without seeks */
+ uio->uio_offset = 0;
+
+ /* cleanup string and remove trailing newline */
+ buf[xlen] = '\0';
+ xlen = strlen(buf);
+ if (xlen > 0 && buf[xlen-1] == '\n')
+ buf[--xlen] = '\0';
+ *buflenp = xlen;
+
+ return (0);
+}
+
+vfs_namemap_t *
+vfs_findname(nm, buf, buflen)
+ vfs_namemap_t *nm;
+ char *buf;
+ int buflen;
+{
+
+ for (; nm->nm_name; nm++)
+ if (bcmp(buf, nm->nm_name, buflen+1) == 0)
+ return (nm);
+
+ return (0);
+}
+
+void
+procfs_exit(struct proc *p)
+{
+ struct pfsnode *pfs;
+ pid_t pid = p->p_pid;
+
+ /*
+ * The reason for this loop is not obvious -- basicly,
+ * procfs_freevp(), which is called via vgone() (eventually),
+ * removes the specified procfs node from the pfshead list.
+ * It does this by *pfsp = pfs->pfs_next, meaning that it
+ * overwrites the node. So when we do pfs = pfs->next, we
+ * end up skipping the node that replaces the one that was
+ * vgone'd. Since it may have been the last one on the list,
+ * it may also have been set to null -- but *our* pfs pointer,
+ * here, doesn't see this. So the loop starts from the beginning
+ * again.
+ *
+ * This is not a for() loop because the final event
+ * would be "pfs = pfs->pfs_next"; in the case where
+ * pfs is set to pfshead again, that would mean that
+ * pfshead is skipped over.
+ *
+ */
+ pfs = pfshead;
+ while (pfs) {
+ if (pfs->pfs_pid == pid) {
+ vgone(PFSTOV(pfs));
+ pfs = pfshead;
+ } else
+ pfs = pfs->pfs_next;
+ }
+}
diff --git a/sys/fs/procfs/procfs_type.c b/sys/fs/procfs/procfs_type.c
new file mode 100644
index 0000000..8f85c54
--- /dev/null
+++ b/sys/fs/procfs/procfs_type.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $Id: procfs_type.c,v 1.4 1997/03/24 11:24:42 bde Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/sysent.h>
+#include <sys/vnode.h>
+#include <miscfs/procfs/procfs.h>
+
+int
+procfs_dotype(curp, p, pfs, uio)
+ struct proc *curp;
+ struct proc *p;
+ struct pfsnode *pfs;
+ struct uio *uio;
+{
+ int len;
+ int error;
+ /*
+ * buffer for emulation type
+ */
+ char mebuffer[256];
+ char *none = "Not Available";
+
+ if (uio->uio_rw != UIO_READ)
+ return (EOPNOTSUPP);
+
+ if (uio->uio_offset != 0)
+ return (0);
+
+ if (p && p->p_sysent && p->p_sysent->sv_name) {
+ len = strlen(p->p_sysent->sv_name);
+ bcopy(p->p_sysent->sv_name, mebuffer, len);
+ } else {
+ len = strlen(none);
+ bcopy(none, mebuffer, len);
+ }
+ mebuffer[len++] = '\n';
+ error = uiomove(mebuffer, len, uio);
+ return error;
+}
+
+int
+procfs_validtype(p)
+ struct proc *p;
+{
+ return ((p->p_flag & P_SYSTEM) == 0);
+}
diff --git a/sys/fs/procfs/procfs_vfsops.c b/sys/fs/procfs/procfs_vfsops.c
new file mode 100644
index 0000000..ac1ab53
--- /dev/null
+++ b/sys/fs/procfs/procfs_vfsops.c
@@ -0,0 +1,215 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs_vfsops.c 8.7 (Berkeley) 5/10/95
+ *
+ * $Id: procfs_vfsops.c,v 1.25 1998/07/27 22:47:17 alex Exp $
+ */
+
+/*
+ * procfs VFS interface
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/syslog.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <miscfs/procfs/procfs.h>
+
+static int procfs_init __P((struct vfsconf *vfsp));
+static int procfs_mount __P((struct mount *mp, char *path, caddr_t data,
+ struct nameidata *ndp, struct proc *p));
+static int procfs_start __P((struct mount *mp, int flags, struct proc *p));
+static int procfs_statfs __P((struct mount *mp, struct statfs *sbp,
+ struct proc *p));
+static int procfs_unmount __P((struct mount *mp, int mntflags,
+ struct proc *p));
+
+/*
+ * VFS Operations.
+ *
+ * mount system call
+ */
+/* ARGSUSED */
+static int
+procfs_mount(mp, path, data, ndp, p)
+ struct mount *mp;
+ char *path;
+ caddr_t data;
+ struct nameidata *ndp;
+ struct proc *p;
+{
+ size_t size;
+ int error;
+
+ if (UIO_MX & (UIO_MX-1)) {
+ log(LOG_ERR, "procfs: invalid directory entry size\n");
+ return (EINVAL);
+ }
+
+ if (mp->mnt_flag & MNT_UPDATE)
+ return (EOPNOTSUPP);
+
+ if (mp->mnt_vfc->vfc_refcount == 1 && (error = at_exit(procfs_exit))) {
+ printf("procfs: cannot register procfs_exit with at_exit\n");
+ return(error);
+ }
+
+ mp->mnt_flag |= MNT_LOCAL;
+ mp->mnt_data = 0;
+ vfs_getnewfsid(mp);
+
+ (void) copyinstr(path, (caddr_t)mp->mnt_stat.f_mntonname, MNAMELEN, &size);
+ bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+
+ size = sizeof("procfs") - 1;
+ bcopy("procfs", mp->mnt_stat.f_mntfromname, size);
+ bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+ (void)procfs_statfs(mp, &mp->mnt_stat, p);
+
+ return (0);
+}
+
+/*
+ * unmount system call
+ */
+static int
+procfs_unmount(mp, mntflags, p)
+ struct mount *mp;
+ int mntflags;
+ struct proc *p;
+{
+ int error;
+ int flags = 0;
+
+ if (mntflags & MNT_FORCE)
+ flags |= FORCECLOSE;
+
+ error = vflush(mp, 0, flags);
+ if (error)
+ return (error);
+
+ if (mp->mnt_vfc->vfc_refcount == 1)
+ rm_at_exit(procfs_exit);
+
+ return (0);
+}
+
+int
+procfs_root(mp, vpp)
+ struct mount *mp;
+ struct vnode **vpp;
+{
+
+ return (procfs_allocvp(mp, vpp, 0, Proot));
+}
+
+/* ARGSUSED */
+static int
+procfs_start(mp, flags, p)
+ struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+
+ return (0);
+}
+
+/*
+ * Get file system statistics.
+ */
+static int
+procfs_statfs(mp, sbp, p)
+ struct mount *mp;
+ struct statfs *sbp;
+ struct proc *p;
+{
+ sbp->f_bsize = PAGE_SIZE;
+ sbp->f_iosize = PAGE_SIZE;
+ sbp->f_blocks = 1; /* avoid divide by zero in some df's */
+ sbp->f_bfree = 0;
+ sbp->f_bavail = 0;
+ sbp->f_files = maxproc; /* approx */
+ sbp->f_ffree = maxproc - nprocs; /* approx */
+
+ if (sbp != &mp->mnt_stat) {
+ sbp->f_type = mp->mnt_vfc->vfc_typenum;
+ bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+ bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+ bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+ }
+
+ return (0);
+}
+
+static int
+procfs_init(vfsp)
+ struct vfsconf *vfsp;
+{
+
+ return (0);
+}
+
+#define procfs_fhtovp ((int (*) __P((struct mount *, struct fid *, \
+ struct sockaddr *, struct vnode **, int *, struct ucred **)))einval)
+#define procfs_quotactl ((int (*) __P((struct mount *, int, uid_t, caddr_t, \
+ struct proc *)))eopnotsupp)
+#define procfs_sync ((int (*) __P((struct mount *, int, struct ucred *, \
+ struct proc *)))nullop)
+#define procfs_sysctl ((int (*) __P((int *, u_int, void *, size_t *, void *, \
+ size_t, struct proc *)))eopnotsupp)
+#define procfs_vget ((int (*) __P((struct mount *, ino_t, struct vnode **))) \
+ eopnotsupp)
+#define procfs_vptofh ((int (*) __P((struct vnode *, struct fid *)))einval)
+
+static struct vfsops procfs_vfsops = {
+ procfs_mount,
+ procfs_start,
+ procfs_unmount,
+ procfs_root,
+ procfs_quotactl,
+ procfs_statfs,
+ procfs_sync,
+ procfs_vget,
+ procfs_fhtovp,
+ procfs_vptofh,
+ procfs_init,
+};
+
+VFS_SET(procfs_vfsops, procfs, VFCF_SYNTHETIC);
diff --git a/sys/fs/procfs/procfs_vnops.c b/sys/fs/procfs/procfs_vnops.c
new file mode 100644
index 0000000..1aa5453
--- /dev/null
+++ b/sys/fs/procfs/procfs_vnops.c
@@ -0,0 +1,1013 @@
+/*
+ * Copyright (c) 1993, 1995 Jan-Simon Pendry
+ * Copyright (c) 1993, 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95
+ *
+ * $Id: procfs_vnops.c,v 1.63 1999/01/05 03:53:06 peter Exp $
+ */
+
+/*
+ * procfs vnode interface
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/fcntl.h>
+#include <sys/proc.h>
+#include <sys/signalvar.h>
+#include <sys/vnode.h>
+#include <sys/namei.h>
+#include <sys/dirent.h>
+#include <machine/reg.h>
+#include <vm/vm_zone.h>
+#include <miscfs/procfs/procfs.h>
+#include <sys/pioctl.h>
+
+static int procfs_abortop __P((struct vop_abortop_args *));
+static int procfs_access __P((struct vop_access_args *));
+static int procfs_badop __P((void));
+static int procfs_bmap __P((struct vop_bmap_args *));
+static int procfs_close __P((struct vop_close_args *));
+static int procfs_getattr __P((struct vop_getattr_args *));
+static int procfs_inactive __P((struct vop_inactive_args *));
+static int procfs_ioctl __P((struct vop_ioctl_args *));
+static int procfs_lookup __P((struct vop_lookup_args *));
+static int procfs_open __P((struct vop_open_args *));
+static int procfs_print __P((struct vop_print_args *));
+static int procfs_readdir __P((struct vop_readdir_args *));
+static int procfs_readlink __P((struct vop_readlink_args *));
+static int procfs_reclaim __P((struct vop_reclaim_args *));
+static int procfs_setattr __P((struct vop_setattr_args *));
+
+/*
+ * This is a list of the valid names in the
+ * process-specific sub-directories. It is
+ * used in procfs_lookup and procfs_readdir
+ */
+static struct proc_target {
+ u_char pt_type;
+ u_char pt_namlen;
+ char *pt_name;
+ pfstype pt_pfstype;
+ int (*pt_valid) __P((struct proc *p));
+} proc_targets[] = {
+#define N(s) sizeof(s)-1, s
+ /* name type validp */
+ { DT_DIR, N("."), Pproc, NULL },
+ { DT_DIR, N(".."), Proot, NULL },
+ { DT_REG, N("file"), Pfile, procfs_validfile },
+ { DT_REG, N("mem"), Pmem, NULL },
+ { DT_REG, N("regs"), Pregs, procfs_validregs },
+ { DT_REG, N("fpregs"), Pfpregs, procfs_validfpregs },
+ { DT_REG, N("ctl"), Pctl, NULL },
+ { DT_REG, N("status"), Pstatus, NULL },
+ { DT_REG, N("note"), Pnote, NULL },
+ { DT_REG, N("notepg"), Pnotepg, NULL },
+ { DT_REG, N("map"), Pmap, procfs_validmap },
+ { DT_REG, N("etype"), Ptype, procfs_validtype },
+ { DT_REG, N("cmdline"), Pcmdline, NULL },
+#undef N
+};
+static const int nproc_targets = sizeof(proc_targets) / sizeof(proc_targets[0]);
+
+static pid_t atopid __P((const char *, u_int));
+
+/*
+ * set things up for doing i/o on
+ * the pfsnode (vp). (vp) is locked
+ * on entry, and should be left locked
+ * on exit.
+ *
+ * for procfs we don't need to do anything
+ * in particular for i/o. all that is done
+ * is to support exclusive open on process
+ * memory images.
+ */
+static int
+procfs_open(ap)
+ struct vop_open_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct pfsnode *pfs = VTOPFS(ap->a_vp);
+ struct proc *p1, *p2;
+
+ p2 = PFIND(pfs->pfs_pid);
+ if (p2 == NULL)
+ return (ENOENT);
+
+ switch (pfs->pfs_type) {
+ case Pmem:
+ if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) ||
+ ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE)))
+ return (EBUSY);
+
+ p1 = ap->a_p;
+ if (!CHECKIO(p1, p2) &&
+ !procfs_kmemaccess(p1))
+ return (EPERM);
+
+ if (ap->a_mode & FWRITE)
+ pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL);
+
+ return (0);
+
+ default:
+ break;
+ }
+
+ return (0);
+}
+
+/*
+ * close the pfsnode (vp) after doing i/o.
+ * (vp) is not locked on entry or exit.
+ *
+ * nothing to do for procfs other than undo
+ * any exclusive open flag (see _open above).
+ */
+static int
+procfs_close(ap)
+ struct vop_close_args /* {
+ struct vnode *a_vp;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct pfsnode *pfs = VTOPFS(ap->a_vp);
+ struct proc *p;
+
+ switch (pfs->pfs_type) {
+ case Pmem:
+ if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL))
+ pfs->pfs_flags &= ~(FWRITE|O_EXCL);
+ /*
+ * This rather complicated-looking code is trying to
+ * determine if this was the last close on this particular
+ * vnode. While one would expect v_usecount to be 1 at
+ * that point, it seems that (according to John Dyson)
+ * the VM system will bump up the usecount. So: if the
+ * usecount is 2, and VOBJBUF is set, then this is really
+ * the last close. Otherwise, if the usecount is < 2
+ * then it is definitely the last close.
+ * If this is the last close, then it checks to see if
+ * the target process has PF_LINGER set in p_pfsflags,
+ * if this is *not* the case, then the process' stop flags
+ * are cleared, and the process is woken up. This is
+ * to help prevent the case where a process has been
+ * told to stop on an event, but then the requesting process
+ * has gone away or forgotten about it.
+ */
+ if ((ap->a_vp->v_usecount < 2)
+ && (p = pfind(pfs->pfs_pid))
+ && !(p->p_pfsflags & PF_LINGER)) {
+ p->p_stops = 0;
+ p->p_step = 0;
+ wakeup(&p->p_step);
+ }
+ break;
+ default:
+ break;
+ }
+
+ return (0);
+}
+
+/*
+ * do an ioctl operation on a pfsnode (vp).
+ * (vp) is not locked on entry or exit.
+ */
+static int
+procfs_ioctl(ap)
+ struct vop_ioctl_args *ap;
+{
+ struct pfsnode *pfs = VTOPFS(ap->a_vp);
+ struct proc *procp, *p;
+ int error;
+ int signo;
+ struct procfs_status *psp;
+ unsigned char flags;
+
+ p = ap->a_p;
+ procp = pfind(pfs->pfs_pid);
+ if (procp == NULL) {
+ return ENOTTY;
+ }
+
+ if (!CHECKIO(p, procp))
+ return EPERM;
+
+ switch (ap->a_command) {
+ case PIOCBIS:
+ procp->p_stops |= *(unsigned int*)ap->a_data;
+ break;
+ case PIOCBIC:
+ procp->p_stops &= ~*(unsigned int*)ap->a_data;
+ break;
+ case PIOCSFL:
+ /*
+ * NFLAGS is "non-suser flags" -- currently, only
+ * PFS_ISUGID ("ignore set u/g id");
+ */
+#define NFLAGS (PF_ISUGID)
+ flags = (unsigned char)*(unsigned int*)ap->a_data;
+ if (flags & NFLAGS && (error = suser(p->p_ucred, &p->p_acflag)))
+ return error;
+ procp->p_pfsflags = flags;
+ break;
+ case PIOCGFL:
+ *(unsigned int*)ap->a_data = (unsigned int)procp->p_pfsflags;
+ case PIOCSTATUS:
+ psp = (struct procfs_status *)ap->a_data;
+ psp->state = (procp->p_step == 0);
+ psp->flags = procp->p_pfsflags;
+ psp->events = procp->p_stops;
+ if (procp->p_step) {
+ psp->why = procp->p_stype;
+ psp->val = procp->p_xstat;
+ } else {
+ psp->why = psp->val = 0; /* Not defined values */
+ }
+ break;
+ case PIOCWAIT:
+ psp = (struct procfs_status *)ap->a_data;
+ if (procp->p_step == 0) {
+ error = tsleep(&procp->p_stype, PWAIT | PCATCH, "piocwait", 0);
+ if (error)
+ return error;
+ }
+ psp->state = 1; /* It stopped */
+ psp->flags = procp->p_pfsflags;
+ psp->events = procp->p_stops;
+ psp->why = procp->p_stype; /* why it stopped */
+ psp->val = procp->p_xstat; /* any extra info */
+ break;
+ case PIOCCONT: /* Restart a proc */
+ if (procp->p_step == 0)
+ return EINVAL; /* Can only start a stopped process */
+ if ((signo = *(int*)ap->a_data) != 0) {
+ if (signo >= NSIG || signo <= 0)
+ return EINVAL;
+ psignal(procp, signo);
+ }
+ procp->p_step = 0;
+ wakeup(&procp->p_step);
+ break;
+ default:
+ return (ENOTTY);
+ }
+ return 0;
+}
+
+/*
+ * do block mapping for pfsnode (vp).
+ * since we don't use the buffer cache
+ * for procfs this function should never
+ * be called. in any case, it's not clear
+ * what part of the kernel ever makes use
+ * of this function. for sanity, this is the
+ * usual no-op bmap, although returning
+ * (EIO) would be a reasonable alternative.
+ */
+static int
+procfs_bmap(ap)
+ struct vop_bmap_args /* {
+ struct vnode *a_vp;
+ daddr_t a_bn;
+ struct vnode **a_vpp;
+ daddr_t *a_bnp;
+ int *a_runp;
+ } */ *ap;
+{
+
+ if (ap->a_vpp != NULL)
+ *ap->a_vpp = ap->a_vp;
+ if (ap->a_bnp != NULL)
+ *ap->a_bnp = ap->a_bn;
+ if (ap->a_runp != NULL)
+ *ap->a_runp = 0;
+ return (0);
+}
+
+/*
+ * procfs_inactive is called when the pfsnode
+ * is vrele'd and the reference count goes
+ * to zero. (vp) will be on the vnode free
+ * list, so to get it back vget() must be
+ * used.
+ *
+ * (vp) is locked on entry, but must be unlocked on exit.
+ */
+static int
+procfs_inactive(ap)
+ struct vop_inactive_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+
+ VOP_UNLOCK(vp, 0, ap->a_p);
+
+ return (0);
+}
+
+/*
+ * _reclaim is called when getnewvnode()
+ * wants to make use of an entry on the vnode
+ * free list. at this time the filesystem needs
+ * to free any private data and remove the node
+ * from any private lists.
+ */
+static int
+procfs_reclaim(ap)
+ struct vop_reclaim_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ return (procfs_freevp(ap->a_vp));
+}
+
+/*
+ * _print is used for debugging.
+ * just print a readable description
+ * of (vp).
+ */
+static int
+procfs_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ struct pfsnode *pfs = VTOPFS(ap->a_vp);
+
+ printf("tag VT_PROCFS, type %d, pid %ld, mode %x, flags %lx\n",
+ pfs->pfs_type, (long)pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags);
+ return (0);
+}
+
+/*
+ * _abortop is called when operations such as
+ * rename and create fail. this entry is responsible
+ * for undoing any side-effects caused by the lookup.
+ * this will always include freeing the pathname buffer.
+ */
+static int
+procfs_abortop(ap)
+ struct vop_abortop_args /* {
+ struct vnode *a_dvp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+
+ if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
+ zfree(namei_zone, ap->a_cnp->cn_pnbuf);
+ return (0);
+}
+
+/*
+ * generic entry point for unsupported operations
+ */
+static int
+procfs_badop()
+{
+
+ return (EIO);
+}
+
+/*
+ * Invent attributes for pfsnode (vp) and store
+ * them in (vap).
+ * Directories lengths are returned as zero since
+ * any real length would require the genuine size
+ * to be computed, and nothing cares anyway.
+ *
+ * this is relatively minimal for procfs.
+ */
+static int
+procfs_getattr(ap)
+ struct vop_getattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct pfsnode *pfs = VTOPFS(ap->a_vp);
+ struct vattr *vap = ap->a_vap;
+ struct proc *procp;
+ int error;
+
+ /*
+ * First make sure that the process and its credentials
+ * still exist.
+ */
+ switch (pfs->pfs_type) {
+ case Proot:
+ case Pcurproc:
+ procp = 0;
+ break;
+
+ default:
+ procp = PFIND(pfs->pfs_pid);
+ if (procp == 0 || procp->p_cred == NULL ||
+ procp->p_ucred == NULL)
+ return (ENOENT);
+ }
+
+ error = 0;
+
+ /* start by zeroing out the attributes */
+ VATTR_NULL(vap);
+
+ /* next do all the common fields */
+ vap->va_type = ap->a_vp->v_type;
+ vap->va_mode = pfs->pfs_mode;
+ vap->va_fileid = pfs->pfs_fileno;
+ vap->va_flags = 0;
+ vap->va_blocksize = PAGE_SIZE;
+ vap->va_bytes = vap->va_size = 0;
+
+ /*
+ * Make all times be current TOD.
+ * It would be possible to get the process start
+ * time from the p_stat structure, but there's
+ * no "file creation" time stamp anyway, and the
+ * p_stat structure is not addressible if u. gets
+ * swapped out for that process.
+ */
+ nanotime(&vap->va_ctime);
+ vap->va_atime = vap->va_mtime = vap->va_ctime;
+
+ /*
+ * If the process has exercised some setuid or setgid
+ * privilege, then rip away read/write permission so
+ * that only root can gain access.
+ */
+ switch (pfs->pfs_type) {
+ case Pctl:
+ case Pregs:
+ case Pfpregs:
+ if (procp->p_flag & P_SUGID)
+ vap->va_mode &= ~((VREAD|VWRITE)|
+ ((VREAD|VWRITE)>>3)|
+ ((VREAD|VWRITE)>>6));
+ break;
+ case Pmem:
+ /* Retain group kmem readablity. */
+ if (procp->p_flag & P_SUGID)
+ vap->va_mode &= ~(VREAD|VWRITE);
+ break;
+ default:
+ break;
+ }
+
+ /*
+ * now do the object specific fields
+ *
+ * The size could be set from struct reg, but it's hardly
+ * worth the trouble, and it puts some (potentially) machine
+ * dependent data into this machine-independent code. If it
+ * becomes important then this function should break out into
+ * a per-file stat function in the corresponding .c file.
+ */
+
+ switch (pfs->pfs_type) {
+ case Proot:
+ /*
+ * Set nlink to 1 to tell fts(3) we don't actually know.
+ */
+ vap->va_nlink = 1;
+ vap->va_uid = 0;
+ vap->va_gid = 0;
+ vap->va_size = vap->va_bytes = DEV_BSIZE;
+ break;
+
+ case Pcurproc: {
+ char buf[16]; /* should be enough */
+ vap->va_nlink = 1;
+ vap->va_uid = 0;
+ vap->va_gid = 0;
+ vap->va_size = vap->va_bytes =
+ snprintf(buf, sizeof(buf), "%ld", (long)curproc->p_pid);
+ break;
+ }
+
+ case Pproc:
+ vap->va_nlink = nproc_targets;
+ vap->va_uid = procp->p_ucred->cr_uid;
+ vap->va_gid = procp->p_ucred->cr_gid;
+ vap->va_size = vap->va_bytes = DEV_BSIZE;
+ break;
+
+ case Pfile:
+ error = EOPNOTSUPP;
+ break;
+
+ case Pmem:
+ vap->va_nlink = 1;
+ /*
+ * If we denied owner access earlier, then we have to
+ * change the owner to root - otherwise 'ps' and friends
+ * will break even though they are setgid kmem. *SIGH*
+ */
+ if (procp->p_flag & P_SUGID)
+ vap->va_uid = 0;
+ else
+ vap->va_uid = procp->p_ucred->cr_uid;
+ vap->va_gid = KMEM_GROUP;
+ break;
+
+ case Ptype:
+ case Pmap:
+ case Pregs:
+ vap->va_bytes = vap->va_size = sizeof(struct reg);
+ vap->va_nlink = 1;
+ vap->va_uid = procp->p_ucred->cr_uid;
+ vap->va_gid = procp->p_ucred->cr_gid;
+ break;
+
+ case Pfpregs:
+ vap->va_bytes = vap->va_size = sizeof(struct fpreg);
+
+ case Pctl:
+ case Pstatus:
+ case Pnote:
+ case Pnotepg:
+ case Pcmdline:
+ vap->va_nlink = 1;
+ vap->va_uid = procp->p_ucred->cr_uid;
+ vap->va_gid = procp->p_ucred->cr_gid;
+ break;
+
+ default:
+ panic("procfs_getattr");
+ }
+
+ return (error);
+}
+
+static int
+procfs_setattr(ap)
+ struct vop_setattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ if (ap->a_vap->va_flags != VNOVAL)
+ return (EOPNOTSUPP);
+
+ /*
+ * just fake out attribute setting
+ * it's not good to generate an error
+ * return, otherwise things like creat()
+ * will fail when they try to set the
+ * file length to 0. worse, this means
+ * that echo $note > /proc/$pid/note will fail.
+ */
+
+ return (0);
+}
+
+/*
+ * implement access checking.
+ *
+ * something very similar to this code is duplicated
+ * throughout the 4bsd kernel and should be moved
+ * into kern/vfs_subr.c sometime.
+ *
+ * actually, the check for super-user is slightly
+ * broken since it will allow read access to write-only
+ * objects. this doesn't cause any particular trouble
+ * but does mean that the i/o entry points need to check
+ * that the operation really does make sense.
+ */
+static int
+procfs_access(ap)
+ struct vop_access_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vattr *vap;
+ struct vattr vattr;
+ int error;
+
+ /*
+ * If you're the super-user,
+ * you always get access.
+ */
+ if (ap->a_cred->cr_uid == 0)
+ return (0);
+
+ vap = &vattr;
+ error = VOP_GETATTR(ap->a_vp, vap, ap->a_cred, ap->a_p);
+ if (error)
+ return (error);
+
+ /*
+ * Access check is based on only one of owner, group, public.
+ * If not owner, then check group. If not a member of the
+ * group, then check public access.
+ */
+ if (ap->a_cred->cr_uid != vap->va_uid) {
+ gid_t *gp;
+ int i;
+
+ ap->a_mode >>= 3;
+ gp = ap->a_cred->cr_groups;
+ for (i = 0; i < ap->a_cred->cr_ngroups; i++, gp++)
+ if (vap->va_gid == *gp)
+ goto found;
+ ap->a_mode >>= 3;
+found:
+ ;
+ }
+
+ if ((vap->va_mode & ap->a_mode) == ap->a_mode)
+ return (0);
+
+ return (EACCES);
+}
+
+/*
+ * lookup. this is incredibly complicated in the
+ * general case, however for most pseudo-filesystems
+ * very little needs to be done.
+ *
+ * unless you want to get a migraine, just make sure your
+ * filesystem doesn't do any locking of its own. otherwise
+ * read and inwardly digest ufs_lookup().
+ */
+static int
+procfs_lookup(ap)
+ struct vop_lookup_args /* {
+ struct vnode * a_dvp;
+ struct vnode ** a_vpp;
+ struct componentname * a_cnp;
+ } */ *ap;
+{
+ struct componentname *cnp = ap->a_cnp;
+ struct vnode **vpp = ap->a_vpp;
+ struct vnode *dvp = ap->a_dvp;
+ char *pname = cnp->cn_nameptr;
+ struct proc *curp = cnp->cn_proc;
+ struct proc_target *pt;
+ struct vnode *fvp;
+ pid_t pid;
+ struct pfsnode *pfs;
+ struct proc *p;
+ int i;
+
+ *vpp = NULL;
+
+ if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)
+ return (EROFS);
+
+ if (cnp->cn_namelen == 1 && *pname == '.') {
+ *vpp = dvp;
+ VREF(dvp);
+ /* vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, curp); */
+ return (0);
+ }
+
+ pfs = VTOPFS(dvp);
+ switch (pfs->pfs_type) {
+ case Proot:
+ if (cnp->cn_flags & ISDOTDOT)
+ return (EIO);
+
+ if (CNEQ(cnp, "curproc", 7))
+ return (procfs_allocvp(dvp->v_mount, vpp, 0, Pcurproc));
+
+ pid = atopid(pname, cnp->cn_namelen);
+ if (pid == NO_PID)
+ break;
+
+ p = PFIND(pid);
+ if (p == 0)
+ break;
+
+ return (procfs_allocvp(dvp->v_mount, vpp, pid, Pproc));
+
+ case Pproc:
+ if (cnp->cn_flags & ISDOTDOT)
+ return (procfs_root(dvp->v_mount, vpp));
+
+ p = PFIND(pfs->pfs_pid);
+ if (p == 0)
+ break;
+
+ for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) {
+ if (cnp->cn_namelen == pt->pt_namlen &&
+ bcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 &&
+ (pt->pt_valid == NULL || (*pt->pt_valid)(p)))
+ goto found;
+ }
+ break;
+
+ found:
+ if (pt->pt_pfstype == Pfile) {
+ fvp = procfs_findtextvp(p);
+ /* We already checked that it exists. */
+ VREF(fvp);
+ vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, curp);
+ *vpp = fvp;
+ return (0);
+ }
+
+ return (procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid,
+ pt->pt_pfstype));
+
+ default:
+ return (ENOTDIR);
+ }
+
+ return (cnp->cn_nameiop == LOOKUP ? ENOENT : EROFS);
+}
+
+/*
+ * Does this process have a text file?
+ */
+int
+procfs_validfile(p)
+ struct proc *p;
+{
+
+ return (procfs_findtextvp(p) != NULLVP);
+}
+
+/*
+ * readdir returns directory entries from pfsnode (vp).
+ *
+ * the strategy here with procfs is to generate a single
+ * directory entry at a time (struct pfsdent) and then
+ * copy that out to userland using uiomove. a more efficent
+ * though more complex implementation, would try to minimize
+ * the number of calls to uiomove(). for procfs, this is
+ * hardly worth the added code complexity.
+ *
+ * this should just be done through read()
+ */
+static int
+procfs_readdir(ap)
+ struct vop_readdir_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ int *a_eofflag;
+ int *a_ncookies;
+ u_long **a_cookies;
+ } */ *ap;
+{
+ struct uio *uio = ap->a_uio;
+ struct pfsdent d;
+ struct pfsdent *dp = &d;
+ struct pfsnode *pfs;
+ int count, error, i, off;
+
+ pfs = VTOPFS(ap->a_vp);
+
+ off = (int)uio->uio_offset;
+ if (off != uio->uio_offset || off < 0 || (u_int)off % UIO_MX != 0 ||
+ uio->uio_resid < UIO_MX)
+ return (EINVAL);
+
+ error = 0;
+ count = 0;
+ i = (u_int)off / UIO_MX;
+
+ switch (pfs->pfs_type) {
+ /*
+ * this is for the process-specific sub-directories.
+ * all that is needed to is copy out all the entries
+ * from the procent[] table (top of this file).
+ */
+ case Pproc: {
+ struct proc *p;
+ struct proc_target *pt;
+
+ p = PFIND(pfs->pfs_pid);
+ if (p == NULL)
+ break;
+
+ for (pt = &proc_targets[i];
+ uio->uio_resid >= UIO_MX && i < nproc_targets; pt++, i++) {
+ if (pt->pt_valid && (*pt->pt_valid)(p) == 0)
+ continue;
+
+ dp->d_reclen = UIO_MX;
+ dp->d_fileno = PROCFS_FILENO(pfs->pfs_pid, pt->pt_pfstype);
+ dp->d_namlen = pt->pt_namlen;
+ bcopy(pt->pt_name, dp->d_name, pt->pt_namlen + 1);
+ dp->d_type = pt->pt_type;
+
+ if ((error = uiomove((caddr_t)dp, UIO_MX, uio)) != 0)
+ break;
+ }
+
+ break;
+ }
+
+ /*
+ * this is for the root of the procfs filesystem
+ * what is needed is a special entry for "curproc"
+ * followed by an entry for each process on allproc
+#ifdef PROCFS_ZOMBIE
+ * and zombproc.
+#endif
+ */
+
+ case Proot: {
+#ifdef PROCFS_ZOMBIE
+ int doingzomb = 0;
+#endif
+ int pcnt = 0;
+ volatile struct proc *p = allproc.lh_first;
+
+ for (; p && uio->uio_resid >= UIO_MX; i++, pcnt++) {
+ bzero((char *) dp, UIO_MX);
+ dp->d_reclen = UIO_MX;
+
+ switch (i) {
+ case 0: /* `.' */
+ case 1: /* `..' */
+ dp->d_fileno = PROCFS_FILENO(0, Proot);
+ dp->d_namlen = i + 1;
+ bcopy("..", dp->d_name, dp->d_namlen);
+ dp->d_name[i + 1] = '\0';
+ dp->d_type = DT_DIR;
+ break;
+
+ case 2:
+ dp->d_fileno = PROCFS_FILENO(0, Pcurproc);
+ dp->d_namlen = 7;
+ bcopy("curproc", dp->d_name, 8);
+ dp->d_type = DT_LNK;
+ break;
+
+ default:
+ while (pcnt < i) {
+ pcnt++;
+ p = p->p_list.le_next;
+ if (!p)
+ goto done;
+ }
+ dp->d_fileno = PROCFS_FILENO(p->p_pid, Pproc);
+ dp->d_namlen = sprintf(dp->d_name, "%ld",
+ (long)p->p_pid);
+ dp->d_type = DT_REG;
+ p = p->p_list.le_next;
+ break;
+ }
+
+ if ((error = uiomove((caddr_t)dp, UIO_MX, uio)) != 0)
+ break;
+ }
+ done:
+
+#ifdef PROCFS_ZOMBIE
+ if (p == 0 && doingzomb == 0) {
+ doingzomb = 1;
+ p = zombproc.lh_first;
+ goto again;
+ }
+#endif
+
+ break;
+
+ }
+
+ default:
+ error = ENOTDIR;
+ break;
+ }
+
+ uio->uio_offset = i * UIO_MX;
+
+ return (error);
+}
+
+/*
+ * readlink reads the link of `curproc'
+ */
+static int
+procfs_readlink(ap)
+ struct vop_readlink_args *ap;
+{
+ char buf[16]; /* should be enough */
+ int len;
+
+ if (VTOPFS(ap->a_vp)->pfs_fileno != PROCFS_FILENO(0, Pcurproc))
+ return (EINVAL);
+
+ len = snprintf(buf, sizeof(buf), "%ld", (long)curproc->p_pid);
+
+ return (uiomove((caddr_t)buf, len, ap->a_uio));
+}
+
+/*
+ * convert decimal ascii to pid_t
+ */
+static pid_t
+atopid(b, len)
+ const char *b;
+ u_int len;
+{
+ pid_t p = 0;
+
+ while (len--) {
+ char c = *b++;
+ if (c < '0' || c > '9')
+ return (NO_PID);
+ p = 10 * p + (c - '0');
+ if (p > PID_MAX)
+ return (NO_PID);
+ }
+
+ return (p);
+}
+
+/*
+ * procfs vnode operations.
+ */
+vop_t **procfs_vnodeop_p;
+static struct vnodeopv_entry_desc procfs_vnodeop_entries[] = {
+ { &vop_default_desc, (vop_t *) vop_defaultop },
+ { &vop_abortop_desc, (vop_t *) procfs_abortop },
+ { &vop_access_desc, (vop_t *) procfs_access },
+ { &vop_advlock_desc, (vop_t *) procfs_badop },
+ { &vop_bmap_desc, (vop_t *) procfs_bmap },
+ { &vop_close_desc, (vop_t *) procfs_close },
+ { &vop_create_desc, (vop_t *) procfs_badop },
+ { &vop_getattr_desc, (vop_t *) procfs_getattr },
+ { &vop_inactive_desc, (vop_t *) procfs_inactive },
+ { &vop_link_desc, (vop_t *) procfs_badop },
+ { &vop_lookup_desc, (vop_t *) procfs_lookup },
+ { &vop_mkdir_desc, (vop_t *) procfs_badop },
+ { &vop_mknod_desc, (vop_t *) procfs_badop },
+ { &vop_open_desc, (vop_t *) procfs_open },
+ { &vop_pathconf_desc, (vop_t *) vop_stdpathconf },
+ { &vop_print_desc, (vop_t *) procfs_print },
+ { &vop_read_desc, (vop_t *) procfs_rw },
+ { &vop_readdir_desc, (vop_t *) procfs_readdir },
+ { &vop_readlink_desc, (vop_t *) procfs_readlink },
+ { &vop_reclaim_desc, (vop_t *) procfs_reclaim },
+ { &vop_remove_desc, (vop_t *) procfs_badop },
+ { &vop_rename_desc, (vop_t *) procfs_badop },
+ { &vop_rmdir_desc, (vop_t *) procfs_badop },
+ { &vop_setattr_desc, (vop_t *) procfs_setattr },
+ { &vop_symlink_desc, (vop_t *) procfs_badop },
+ { &vop_write_desc, (vop_t *) procfs_rw },
+ { &vop_ioctl_desc, (vop_t *) procfs_ioctl },
+ { NULL, NULL }
+};
+static struct vnodeopv_desc procfs_vnodeop_opv_desc =
+ { &procfs_vnodeop_p, procfs_vnodeop_entries };
+
+VNODEOP_SET(procfs_vnodeop_opv_desc);
diff --git a/sys/fs/specfs/spec_vnops.c b/sys/fs/specfs/spec_vnops.c
new file mode 100644
index 0000000..88290e4
--- /dev/null
+++ b/sys/fs/specfs/spec_vnops.c
@@ -0,0 +1,938 @@
+/*
+ * Copyright (c) 1989, 1993, 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)spec_vnops.c 8.14 (Berkeley) 5/21/95
+ * $Id: spec_vnops.c,v 1.79 1999/01/21 08:29:07 dillon Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/conf.h>
+#include <sys/buf.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/stat.h>
+#include <sys/fcntl.h>
+#include <sys/disklabel.h>
+#include <sys/vmmeter.h>
+
+#include <vm/vm.h>
+#include <vm/vm_prot.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_pager.h>
+#include <vm/vnode_pager.h>
+#include <vm/vm_extern.h>
+
+#include <miscfs/specfs/specdev.h>
+
+static int spec_advlock __P((struct vop_advlock_args *));
+static int spec_badop __P((void));
+static int spec_bmap __P((struct vop_bmap_args *));
+static int spec_close __P((struct vop_close_args *));
+static int spec_freeblks __P((struct vop_freeblks_args *));
+static int spec_fsync __P((struct vop_fsync_args *));
+static int spec_getattr __P((struct vop_getattr_args *));
+static int spec_getpages __P((struct vop_getpages_args *));
+static int spec_inactive __P((struct vop_inactive_args *));
+static int spec_ioctl __P((struct vop_ioctl_args *));
+static int spec_lookup __P((struct vop_lookup_args *));
+static int spec_open __P((struct vop_open_args *));
+static int spec_poll __P((struct vop_poll_args *));
+static int spec_print __P((struct vop_print_args *));
+static int spec_read __P((struct vop_read_args *));
+static int spec_strategy __P((struct vop_strategy_args *));
+static int spec_write __P((struct vop_write_args *));
+
+struct vnode *speclisth[SPECHSZ];
+vop_t **spec_vnodeop_p;
+static struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
+ { &vop_default_desc, (vop_t *) vop_defaultop },
+ { &vop_access_desc, (vop_t *) vop_ebadf },
+ { &vop_advlock_desc, (vop_t *) spec_advlock },
+ { &vop_bmap_desc, (vop_t *) spec_bmap },
+ { &vop_close_desc, (vop_t *) spec_close },
+ { &vop_create_desc, (vop_t *) spec_badop },
+ { &vop_freeblks_desc, (vop_t *) spec_freeblks },
+ { &vop_fsync_desc, (vop_t *) spec_fsync },
+ { &vop_getattr_desc, (vop_t *) spec_getattr },
+ { &vop_getpages_desc, (vop_t *) spec_getpages },
+ { &vop_inactive_desc, (vop_t *) spec_inactive },
+ { &vop_ioctl_desc, (vop_t *) spec_ioctl },
+ { &vop_lease_desc, (vop_t *) vop_null },
+ { &vop_link_desc, (vop_t *) spec_badop },
+ { &vop_lookup_desc, (vop_t *) spec_lookup },
+ { &vop_mkdir_desc, (vop_t *) spec_badop },
+ { &vop_mknod_desc, (vop_t *) spec_badop },
+ { &vop_open_desc, (vop_t *) spec_open },
+ { &vop_pathconf_desc, (vop_t *) vop_stdpathconf },
+ { &vop_poll_desc, (vop_t *) spec_poll },
+ { &vop_print_desc, (vop_t *) spec_print },
+ { &vop_read_desc, (vop_t *) spec_read },
+ { &vop_readdir_desc, (vop_t *) spec_badop },
+ { &vop_readlink_desc, (vop_t *) spec_badop },
+ { &vop_reallocblks_desc, (vop_t *) spec_badop },
+ { &vop_reclaim_desc, (vop_t *) vop_null },
+ { &vop_remove_desc, (vop_t *) spec_badop },
+ { &vop_rename_desc, (vop_t *) spec_badop },
+ { &vop_rmdir_desc, (vop_t *) spec_badop },
+ { &vop_setattr_desc, (vop_t *) vop_ebadf },
+ { &vop_strategy_desc, (vop_t *) spec_strategy },
+ { &vop_symlink_desc, (vop_t *) spec_badop },
+ { &vop_write_desc, (vop_t *) spec_write },
+ { NULL, NULL }
+};
+static struct vnodeopv_desc spec_vnodeop_opv_desc =
+ { &spec_vnodeop_p, spec_vnodeop_entries };
+
+VNODEOP_SET(spec_vnodeop_opv_desc);
+
+
+int
+spec_vnoperate(ap)
+ struct vop_generic_args /* {
+ struct vnodeop_desc *a_desc;
+ <other random data follows, presumably>
+ } */ *ap;
+{
+ return (VOCALL(spec_vnodeop_p, ap->a_desc->vdesc_offset, ap));
+}
+
+static void spec_getpages_iodone __P((struct buf *bp));
+
+/*
+ * Trivial lookup routine that always fails.
+ */
+static int
+spec_lookup(ap)
+ struct vop_lookup_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+
+ *ap->a_vpp = NULL;
+ return (ENOTDIR);
+}
+
+/*
+ * Open a special file.
+ */
+/* ARGSUSED */
+static int
+spec_open(ap)
+ struct vop_open_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct proc *p = ap->a_p;
+ struct vnode *bvp, *vp = ap->a_vp;
+ dev_t bdev, dev = (dev_t)vp->v_rdev;
+ int maj = major(dev);
+ int error;
+
+ /*
+ * Don't allow open if fs is mounted -nodev.
+ */
+ if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV))
+ return (ENXIO);
+
+ switch (vp->v_type) {
+
+ case VCHR:
+ if ((u_int)maj >= nchrdev)
+ return (ENXIO);
+ if ( (cdevsw[maj] == NULL) || (cdevsw[maj]->d_open == NULL))
+ return ENXIO;
+ if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) {
+ /*
+ * When running in very secure mode, do not allow
+ * opens for writing of any disk character devices.
+ */
+ if (securelevel >= 2
+ && cdevsw[maj]->d_bmaj != -1
+ && (cdevsw[maj]->d_flags & D_TYPEMASK) == D_DISK)
+ return (EPERM);
+ /*
+ * When running in secure mode, do not allow opens
+ * for writing of /dev/mem, /dev/kmem, or character
+ * devices whose corresponding block devices are
+ * currently mounted.
+ */
+ if (securelevel >= 1) {
+ if ((bdev = chrtoblk(dev)) != NODEV &&
+ vfinddev(bdev, VBLK, &bvp) &&
+ bvp->v_usecount > 0 &&
+ (error = vfs_mountedon(bvp)))
+ return (error);
+ if (iskmemdev(dev))
+ return (EPERM);
+ }
+ }
+ if ((cdevsw[maj]->d_flags & D_TYPEMASK) == D_TTY)
+ vp->v_flag |= VISTTY;
+ VOP_UNLOCK(vp, 0, p);
+ error = (*cdevsw[maj]->d_open)(dev, ap->a_mode, S_IFCHR, p);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+ return (error);
+ /* NOT REACHED */
+ case VBLK:
+ if ((u_int)maj >= nblkdev)
+ return (ENXIO);
+ if ( (bdevsw[maj] == NULL) || (bdevsw[maj]->d_open == NULL))
+ return ENXIO;
+ /*
+ * When running in very secure mode, do not allow
+ * opens for writing of any disk block devices.
+ */
+ if (securelevel >= 2 && ap->a_cred != FSCRED &&
+ (ap->a_mode & FWRITE) &&
+ (bdevsw[maj]->d_flags & D_TYPEMASK) == D_DISK)
+ return (EPERM);
+
+ /*
+ * Do not allow opens of block devices that are
+ * currently mounted.
+ */
+ error = vfs_mountedon(vp);
+ if (error)
+ return (error);
+ return ((*bdevsw[maj]->d_open)(dev, ap->a_mode, S_IFBLK, p));
+ /* NOT REACHED */
+ default:
+ break;
+ }
+ return (0);
+}
+
+/*
+ * Vnode op for read
+ */
+/* ARGSUSED */
+static int
+spec_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct uio *uio = ap->a_uio;
+ struct proc *p = uio->uio_procp;
+ struct buf *bp;
+ daddr_t bn, nextbn;
+ long bsize, bscale;
+ struct partinfo dpart;
+ int n, on;
+ d_ioctl_t *ioctl;
+ int error = 0;
+ dev_t dev;
+
+#ifdef DIAGNOSTIC
+ if (uio->uio_rw != UIO_READ)
+ panic("spec_read mode");
+ if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
+ panic("spec_read proc");
+#endif
+ if (uio->uio_resid == 0)
+ return (0);
+
+ switch (vp->v_type) {
+
+ case VCHR:
+ VOP_UNLOCK(vp, 0, p);
+ error = (*cdevsw[major(vp->v_rdev)]->d_read)
+ (vp->v_rdev, uio, ap->a_ioflag);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+ return (error);
+
+ case VBLK:
+ if (uio->uio_offset < 0)
+ return (EINVAL);
+ bsize = BLKDEV_IOSIZE;
+ dev = vp->v_rdev;
+ if ((ioctl = bdevsw[major(dev)]->d_ioctl) != NULL &&
+ (*ioctl)(dev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0 &&
+ dpart.part->p_fstype == FS_BSDFFS &&
+ dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
+ bsize = dpart.part->p_frag * dpart.part->p_fsize;
+ bscale = btodb(bsize);
+ do {
+ bn = btodb(uio->uio_offset) & ~(bscale - 1);
+ on = uio->uio_offset % bsize;
+ n = min((unsigned)(bsize - on), uio->uio_resid);
+ if (vp->v_lastr + bscale == bn) {
+ nextbn = bn + bscale;
+ error = breadn(vp, bn, (int)bsize, &nextbn,
+ (int *)&bsize, 1, NOCRED, &bp);
+ } else
+ error = bread(vp, bn, (int)bsize, NOCRED, &bp);
+ vp->v_lastr = bn;
+ n = min(n, bsize - bp->b_resid);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ error = uiomove((char *)bp->b_data + on, n, uio);
+ brelse(bp);
+ } while (error == 0 && uio->uio_resid > 0 && n != 0);
+ return (error);
+
+ default:
+ panic("spec_read type");
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Vnode op for write
+ */
+/* ARGSUSED */
+static int
+spec_write(ap)
+ struct vop_write_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct uio *uio = ap->a_uio;
+ struct proc *p = uio->uio_procp;
+ struct buf *bp;
+ daddr_t bn;
+ int bsize, blkmask;
+ struct partinfo dpart;
+ register int n, on;
+ int error = 0;
+
+#ifdef DIAGNOSTIC
+ if (uio->uio_rw != UIO_WRITE)
+ panic("spec_write mode");
+ if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
+ panic("spec_write proc");
+#endif
+
+ switch (vp->v_type) {
+
+ case VCHR:
+ VOP_UNLOCK(vp, 0, p);
+ error = (*cdevsw[major(vp->v_rdev)]->d_write)
+ (vp->v_rdev, uio, ap->a_ioflag);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+ return (error);
+
+ case VBLK:
+ if (uio->uio_resid == 0)
+ return (0);
+ if (uio->uio_offset < 0)
+ return (EINVAL);
+ bsize = BLKDEV_IOSIZE;
+ if ((*bdevsw[major(vp->v_rdev)]->d_ioctl)(vp->v_rdev, DIOCGPART,
+ (caddr_t)&dpart, FREAD, p) == 0) {
+ if (dpart.part->p_fstype == FS_BSDFFS &&
+ dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
+ bsize = dpart.part->p_frag *
+ dpart.part->p_fsize;
+ }
+ blkmask = btodb(bsize) - 1;
+ do {
+ bn = btodb(uio->uio_offset) & ~blkmask;
+ on = uio->uio_offset % bsize;
+ n = min((unsigned)(bsize - on), uio->uio_resid);
+ if (n == bsize)
+ bp = getblk(vp, bn, bsize, 0, 0);
+ else
+ error = bread(vp, bn, bsize, NOCRED, &bp);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ n = min(n, bsize - bp->b_resid);
+ error = uiomove((char *)bp->b_data + on, n, uio);
+ if (n + on == bsize)
+ bawrite(bp);
+ else
+ bdwrite(bp);
+ } while (error == 0 && uio->uio_resid > 0 && n != 0);
+ return (error);
+
+ default:
+ panic("spec_write type");
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Device ioctl operation.
+ */
+/* ARGSUSED */
+static int
+spec_ioctl(ap)
+ struct vop_ioctl_args /* {
+ struct vnode *a_vp;
+ int a_command;
+ caddr_t a_data;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ dev_t dev = ap->a_vp->v_rdev;
+
+ switch (ap->a_vp->v_type) {
+
+ case VCHR:
+ return ((*cdevsw[major(dev)]->d_ioctl)(dev, ap->a_command,
+ ap->a_data, ap->a_fflag, ap->a_p));
+ case VBLK:
+ return ((*bdevsw[major(dev)]->d_ioctl)(dev, ap->a_command,
+ ap->a_data, ap->a_fflag, ap->a_p));
+ default:
+ panic("spec_ioctl");
+ /* NOTREACHED */
+ }
+}
+
+/* ARGSUSED */
+static int
+spec_poll(ap)
+ struct vop_poll_args /* {
+ struct vnode *a_vp;
+ int a_events;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register dev_t dev;
+
+ switch (ap->a_vp->v_type) {
+
+ case VCHR:
+ dev = ap->a_vp->v_rdev;
+ return (*cdevsw[major(dev)]->d_poll)(dev, ap->a_events, ap->a_p);
+ default:
+ return (vop_defaultop((struct vop_generic_args *)ap));
+
+ }
+}
+/*
+ * Synch buffers associated with a block device
+ */
+/* ARGSUSED */
+static int
+spec_fsync(ap)
+ struct vop_fsync_args /* {
+ struct vnode *a_vp;
+ struct ucred *a_cred;
+ int a_waitfor;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct buf *bp;
+ struct buf *nbp;
+ int s;
+
+ if (vp->v_type == VCHR)
+ return (0);
+ /*
+ * Flush all dirty buffers associated with a block device.
+ */
+loop:
+ s = splbio();
+ for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) {
+ nbp = TAILQ_NEXT(bp, b_vnbufs);
+ if ((bp->b_flags & B_BUSY))
+ continue;
+ if ((bp->b_flags & B_DELWRI) == 0)
+ panic("spec_fsync: not dirty");
+ if ((vp->v_flag & VOBJBUF) && (bp->b_flags & B_CLUSTEROK)) {
+ vfs_bio_awrite(bp);
+ splx(s);
+ } else {
+ bremfree(bp);
+ bp->b_flags |= B_BUSY;
+ splx(s);
+ bawrite(bp);
+ }
+ goto loop;
+ }
+ if (ap->a_waitfor == MNT_WAIT) {
+ while (vp->v_numoutput) {
+ vp->v_flag |= VBWAIT;
+ (void) tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "spfsyn", 0);
+ }
+#ifdef DIAGNOSTIC
+ if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) {
+ vprint("spec_fsync: dirty", vp);
+ splx(s);
+ goto loop;
+ }
+#endif
+ }
+ splx(s);
+ return (0);
+}
+
+static int
+spec_inactive(ap)
+ struct vop_inactive_args /* {
+ struct vnode *a_vp;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ VOP_UNLOCK(ap->a_vp, 0, ap->a_p);
+ return (0);
+}
+
+/*
+ * Just call the device strategy routine
+ */
+static int
+spec_strategy(ap)
+ struct vop_strategy_args /* {
+ struct buf *a_bp;
+ } */ *ap;
+{
+ struct buf *bp;
+
+ bp = ap->a_bp;
+ if (((bp->b_flags & B_READ) == 0) &&
+ (LIST_FIRST(&bp->b_dep)) != NULL && bioops.io_start)
+ (*bioops.io_start)(bp);
+ (*bdevsw[major(bp->b_dev)]->d_strategy)(bp);
+ return (0);
+}
+
+static int
+spec_freeblks(ap)
+ struct vop_freeblks_args /* {
+ struct vnode *a_vp;
+ daddr_t a_addr;
+ daddr_t a_length;
+ } */ *ap;
+{
+ struct cdevsw *bsw;
+ struct buf *bp;
+
+ bsw = bdevsw[major(ap->a_vp->v_rdev)];
+ if ((bsw->d_flags & D_CANFREE) == 0)
+ return (0);
+ bp = geteblk(ap->a_length);
+ bp->b_flags |= B_FREEBUF | B_BUSY;
+ bp->b_dev = ap->a_vp->v_rdev;
+ bp->b_blkno = ap->a_addr;
+ bp->b_offset = dbtob(ap->a_addr);
+ bp->b_bcount = ap->a_length;
+ (*bsw->d_strategy)(bp);
+ return (0);
+}
+
+/*
+ * This is a noop, simply returning what one has been given.
+ */
+static int
+spec_bmap(ap)
+ struct vop_bmap_args /* {
+ struct vnode *a_vp;
+ daddr_t a_bn;
+ struct vnode **a_vpp;
+ daddr_t *a_bnp;
+ int *a_runp;
+ int *a_runb;
+ } */ *ap;
+{
+
+ if (ap->a_vpp != NULL)
+ *ap->a_vpp = ap->a_vp;
+ if (ap->a_bnp != NULL)
+ *ap->a_bnp = ap->a_bn;
+ if (ap->a_runp != NULL)
+ *ap->a_runp = 0;
+ if (ap->a_runb != NULL)
+ *ap->a_runb = 0;
+ return (0);
+}
+
+/*
+ * Device close routine
+ */
+/* ARGSUSED */
+static int
+spec_close(ap)
+ struct vop_close_args /* {
+ struct vnode *a_vp;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ dev_t dev = vp->v_rdev;
+ d_close_t *devclose;
+ int mode, error;
+
+ switch (vp->v_type) {
+
+ case VCHR:
+ /*
+ * Hack: a tty device that is a controlling terminal
+ * has a reference from the session structure.
+ * We cannot easily tell that a character device is
+ * a controlling terminal, unless it is the closing
+ * process' controlling terminal. In that case,
+ * if the reference count is 2 (this last descriptor
+ * plus the session), release the reference from the session.
+ */
+ if (vcount(vp) == 2 && ap->a_p &&
+ (vp->v_flag & VXLOCK) == 0 &&
+ vp == ap->a_p->p_session->s_ttyvp) {
+ vrele(vp);
+ ap->a_p->p_session->s_ttyvp = NULL;
+ }
+ /*
+ * If the vnode is locked, then we are in the midst
+ * of forcably closing the device, otherwise we only
+ * close on last reference.
+ */
+ if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0)
+ return (0);
+ devclose = cdevsw[major(dev)]->d_close;
+ mode = S_IFCHR;
+ break;
+
+ case VBLK:
+ /*
+ * On last close of a block device (that isn't mounted)
+ * we must invalidate any in core blocks, so that
+ * we can, for instance, change floppy disks.
+ */
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p);
+ error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0);
+ VOP_UNLOCK(vp, 0, ap->a_p);
+ if (error)
+ return (error);
+
+ /*
+ * We do not want to really close the device if it
+ * is still in use unless we are trying to close it
+ * forcibly. Since every use (buffer, vnode, swap, cmap)
+ * holds a reference to the vnode, and because we mark
+ * any other vnodes that alias this device, when the
+ * sum of the reference counts on all the aliased
+ * vnodes descends to one, we are on last close.
+ */
+ if ((vcount(vp) > 1) && (vp->v_flag & VXLOCK) == 0)
+ return (0);
+
+ devclose = bdevsw[major(dev)]->d_close;
+ mode = S_IFBLK;
+ break;
+
+ default:
+ panic("spec_close: not special");
+ }
+
+ return ((*devclose)(dev, ap->a_fflag, mode, ap->a_p));
+}
+
+/*
+ * Print out the contents of a special device vnode.
+ */
+static int
+spec_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev),
+ minor(ap->a_vp->v_rdev));
+ return (0);
+}
+
+/*
+ * Special device advisory byte-level locks.
+ */
+/* ARGSUSED */
+static int
+spec_advlock(ap)
+ struct vop_advlock_args /* {
+ struct vnode *a_vp;
+ caddr_t a_id;
+ int a_op;
+ struct flock *a_fl;
+ int a_flags;
+ } */ *ap;
+{
+
+ return (ap->a_flags & F_FLOCK ? EOPNOTSUPP : EINVAL);
+}
+
+/*
+ * Special device bad operation
+ */
+static int
+spec_badop()
+{
+
+ panic("spec_badop called");
+ /* NOTREACHED */
+}
+
+static void
+spec_getpages_iodone(bp)
+ struct buf *bp;
+{
+
+ bp->b_flags |= B_DONE;
+ wakeup(bp);
+}
+
+static int
+spec_getpages(ap)
+ struct vop_getpages_args *ap;
+{
+ vm_offset_t kva;
+ int error;
+ int i, pcount, size, s;
+ daddr_t blkno;
+ struct buf *bp;
+ vm_page_t m;
+ vm_ooffset_t offset;
+ int toff, nextoff, nread;
+ struct vnode *vp = ap->a_vp;
+ int blksiz;
+ int gotreqpage;
+
+ error = 0;
+ pcount = round_page(ap->a_count) / PAGE_SIZE;
+
+ /*
+ * Calculate the offset of the transfer.
+ */
+ offset = IDX_TO_OFF(ap->a_m[0]->pindex) + ap->a_offset;
+
+ /* XXX sanity check before we go into details. */
+ /* XXX limits should be defined elsewhere. */
+#define DADDR_T_BIT 32
+#define OFFSET_MAX ((1LL << (DADDR_T_BIT + DEV_BSHIFT)) - 1)
+ if (offset < 0 || offset > OFFSET_MAX) {
+ /* XXX still no %q in kernel. */
+ printf("spec_getpages: preposterous offset 0x%x%08x\n",
+ (u_int)((u_quad_t)offset >> 32),
+ (u_int)(offset & 0xffffffff));
+ return (VM_PAGER_ERROR);
+ }
+
+ blkno = btodb(offset);
+
+ /*
+ * Round up physical size for real devices, use the
+ * fundamental blocksize of the fs if possible.
+ */
+ if (vp && vp->v_mount) {
+ if (vp->v_type != VBLK) {
+ vprint("Non VBLK", vp);
+ }
+ blksiz = vp->v_mount->mnt_stat.f_bsize;
+ if (blksiz < DEV_BSIZE) {
+ blksiz = DEV_BSIZE;
+ }
+ }
+ else
+ blksiz = DEV_BSIZE;
+ size = (ap->a_count + blksiz - 1) & ~(blksiz - 1);
+
+ bp = getpbuf(NULL);
+ kva = (vm_offset_t)bp->b_data;
+
+ /*
+ * Map the pages to be read into the kva.
+ */
+ pmap_qenter(kva, ap->a_m, pcount);
+
+ /* Build a minimal buffer header. */
+ bp->b_flags = B_BUSY | B_READ | B_CALL;
+ bp->b_iodone = spec_getpages_iodone;
+
+ /* B_PHYS is not set, but it is nice to fill this in. */
+ bp->b_proc = curproc;
+ bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
+ if (bp->b_rcred != NOCRED)
+ crhold(bp->b_rcred);
+ if (bp->b_wcred != NOCRED)
+ crhold(bp->b_wcred);
+ bp->b_blkno = blkno;
+ bp->b_lblkno = blkno;
+ pbgetvp(ap->a_vp, bp);
+ bp->b_bcount = size;
+ bp->b_bufsize = size;
+ bp->b_resid = 0;
+
+ cnt.v_vnodein++;
+ cnt.v_vnodepgsin += pcount;
+
+ /* Do the input. */
+ VOP_STRATEGY(bp->b_vp, bp);
+
+ s = splbio();
+
+ /* We definitely need to be at splbio here. */
+ while ((bp->b_flags & B_DONE) == 0)
+ tsleep(bp, PVM, "spread", 0);
+
+ splx(s);
+
+ if ((bp->b_flags & B_ERROR) != 0) {
+ if (bp->b_error)
+ error = bp->b_error;
+ else
+ error = EIO;
+ }
+
+ nread = size - bp->b_resid;
+
+ if (nread < ap->a_count) {
+ bzero((caddr_t)kva + nread,
+ ap->a_count - nread);
+ }
+ pmap_qremove(kva, pcount);
+
+
+ gotreqpage = 0;
+ for (i = 0, toff = 0; i < pcount; i++, toff = nextoff) {
+ nextoff = toff + PAGE_SIZE;
+ m = ap->a_m[i];
+
+ m->flags &= ~PG_ZERO;
+
+ if (nextoff <= nread) {
+ m->valid = VM_PAGE_BITS_ALL;
+ m->dirty = 0;
+ } else if (toff < nread) {
+ int nvalid = ((nread + DEV_BSIZE - 1) - toff) & ~(DEV_BSIZE - 1);
+ vm_page_set_validclean(m, 0, nvalid);
+ } else {
+ m->valid = 0;
+ m->dirty = 0;
+ }
+
+ if (i != ap->a_reqpage) {
+ /*
+ * Just in case someone was asking for this page we
+ * now tell them that it is ok to use.
+ */
+ if (!error || (m->valid == VM_PAGE_BITS_ALL)) {
+ if (m->valid) {
+ if (m->flags & PG_WANTED) {
+ vm_page_activate(m);
+ } else {
+ vm_page_deactivate(m);
+ }
+ vm_page_wakeup(m);
+ } else {
+ vm_page_free(m);
+ }
+ } else {
+ vm_page_free(m);
+ }
+ } else if (m->valid) {
+ gotreqpage = 1;
+ }
+ }
+ if (!gotreqpage) {
+ m = ap->a_m[ap->a_reqpage];
+#ifndef MAX_PERF
+ printf(
+ "spec_getpages: I/O read failure: (error code=%d)\n",
+ error);
+ printf(
+ " size: %d, resid: %ld, a_count: %d, valid: 0x%x\n",
+ size, bp->b_resid, ap->a_count, m->valid);
+ printf(
+ " nread: %d, reqpage: %d, pindex: %lu, pcount: %d\n",
+ nread, ap->a_reqpage, (u_long)m->pindex, pcount);
+#endif
+ /*
+ * Free the buffer header back to the swap buffer pool.
+ */
+ relpbuf(bp, NULL);
+ return VM_PAGER_ERROR;
+ }
+ /*
+ * Free the buffer header back to the swap buffer pool.
+ */
+ relpbuf(bp, NULL);
+ return VM_PAGER_OK;
+}
+
+/* ARGSUSED */
+static int
+spec_getattr(ap)
+ struct vop_getattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct vattr *vap = ap->a_vap;
+ struct partinfo dpart;
+
+ bzero(vap, sizeof (*vap));
+
+ if (vp->v_type == VBLK)
+ vap->va_blocksize = BLKDEV_IOSIZE;
+ else if (vp->v_type == VCHR)
+ vap->va_blocksize = MAXBSIZE;
+
+ if ((*bdevsw[major(vp->v_rdev)]->d_ioctl)(vp->v_rdev, DIOCGPART,
+ (caddr_t)&dpart, FREAD, ap->a_p) == 0) {
+ vap->va_bytes = dbtob(dpart.disklab->d_partitions
+ [minor(vp->v_rdev)].p_size);
+ vap->va_size = vap->va_bytes;
+ }
+ return (0);
+}
diff --git a/sys/fs/umapfs/umap.h b/sys/fs/umapfs/umap.h
new file mode 100644
index 0000000..0c6ca34
--- /dev/null
+++ b/sys/fs/umapfs/umap.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * the UCLA Ficus project.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)umap.h 8.4 (Berkeley) 8/20/94
+ *
+ * $Id: umap.h,v 1.8 1997/02/22 09:40:37 peter Exp $
+ */
+
+#define MAPFILEENTRIES 64
+#define GMAPFILEENTRIES 16
+#define NOBODY 32767
+#define NULLGROUP 65534
+
+struct umap_args {
+ char *target; /* Target of loopback */
+ int nentries; /* # of entries in user map array */
+ int gnentries; /* # of entries in group map array */
+ u_long (*mapdata)[2]; /* pointer to array of user mappings */
+ u_long (*gmapdata)[2]; /* pointer to array of group mappings */
+};
+
+struct umap_mount {
+ struct mount *umapm_vfs;
+ struct vnode *umapm_rootvp; /* Reference to root umap_node */
+ int info_nentries; /* number of uid mappings */
+ int info_gnentries; /* number of gid mappings */
+ u_long info_mapdata[MAPFILEENTRIES][2]; /* mapping data for
+ user mapping in ficus */
+ u_long info_gmapdata[GMAPFILEENTRIES][2]; /*mapping data for
+ group mapping in ficus */
+};
+
+#ifdef KERNEL
+/*
+ * A cache of vnode references
+ */
+struct umap_node {
+ LIST_ENTRY(umap_node) umap_hash; /* Hash list */
+ struct vnode *umap_lowervp; /* Aliased vnode - VREFed once */
+ struct vnode *umap_vnode; /* Back pointer to vnode/umap_node */
+};
+
+extern int umapfs_init __P((struct vfsconf *vfsp));
+extern int umap_node_create __P((struct mount *mp, struct vnode *target, struct vnode **vpp));
+extern u_long umap_reverse_findid __P((u_long id, u_long map[][2], int nentries));
+extern void umap_mapids __P((struct mount *v_mount, struct ucred *credp));
+
+#define MOUNTTOUMAPMOUNT(mp) ((struct umap_mount *)((mp)->mnt_data))
+#define VTOUMAP(vp) ((struct umap_node *)(vp)->v_data)
+#define UMAPTOV(xp) ((xp)->umap_vnode)
+#ifdef UMAPFS_DIAGNOSTIC
+extern struct vnode *umap_checkvp __P((struct vnode *vp, char *fil, int lno));
+#define UMAPVPTOLOWERVP(vp) umap_checkvp((vp), __FILE__, __LINE__)
+#else
+#define UMAPVPTOLOWERVP(vp) (VTOUMAP(vp)->umap_lowervp)
+#endif
+
+extern vop_t **umap_vnodeop_p;
+#endif /* KERNEL */
diff --git a/sys/fs/umapfs/umap_subr.c b/sys/fs/umapfs/umap_subr.c
new file mode 100644
index 0000000..4974f03
--- /dev/null
+++ b/sys/fs/umapfs/umap_subr.c
@@ -0,0 +1,402 @@
+/*
+ * Copyright (c) 1992, 1993, 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)umap_subr.c 8.9 (Berkeley) 5/14/95
+ *
+ * $Id: umap_subr.c,v 1.15 1998/11/09 09:21:25 peter Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/malloc.h>
+#include <miscfs/umapfs/umap.h>
+
+#define LOG2_SIZEVNODE 7 /* log2(sizeof struct vnode) */
+#define NUMAPNODECACHE 16
+
+/*
+ * Null layer cache:
+ * Each cache entry holds a reference to the target vnode
+ * along with a pointer to the alias vnode. When an
+ * entry is added the target vnode is VREF'd. When the
+ * alias is removed the target vnode is vrele'd.
+ */
+
+#define UMAP_NHASH(vp) \
+ (&umap_node_hashtbl \
+ [((uintptr_t)(void *)(vp) >> LOG2_SIZEVNODE) & umap_node_hash])
+static LIST_HEAD(umap_node_hashhead, umap_node) *umap_node_hashtbl;
+static u_long umap_node_hash;
+
+static u_long umap_findid __P((u_long id, u_long map[][2], int nentries));
+static int umap_node_alloc __P((struct mount *mp, struct vnode *lowervp,
+ struct vnode **vpp));
+static struct vnode *
+ umap_node_find __P((struct mount *mp, struct vnode *targetvp));
+
+/*
+ * Initialise cache headers
+ */
+int
+umapfs_init(vfsp)
+ struct vfsconf *vfsp;
+{
+
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("umapfs_init\n"); /* printed during system boot */
+#endif
+ umap_node_hashtbl = hashinit(NUMAPNODECACHE, M_CACHE, &umap_node_hash);
+ return (0);
+}
+
+/*
+ * umap_findid is called by various routines in umap_vnodeops.c to
+ * find a user or group id in a map.
+ */
+static u_long
+umap_findid(id, map, nentries)
+ u_long id;
+ u_long map[][2];
+ int nentries;
+{
+ int i;
+
+ /* Find uid entry in map */
+ i = 0;
+ while ((i<nentries) && ((map[i][0]) != id))
+ i++;
+
+ if (i < nentries)
+ return (map[i][1]);
+ else
+ return (-1);
+
+}
+
+/*
+ * umap_reverse_findid is called by umap_getattr() in umap_vnodeops.c to
+ * find a user or group id in a map, in reverse.
+ */
+u_long
+umap_reverse_findid(id, map, nentries)
+ u_long id;
+ u_long map[][2];
+ int nentries;
+{
+ int i;
+
+ /* Find uid entry in map */
+ i = 0;
+ while ((i<nentries) && ((map[i][1]) != id))
+ i++;
+
+ if (i < nentries)
+ return (map[i][0]);
+ else
+ return (-1);
+
+}
+
+/*
+ * Return alias for target vnode if already exists, else 0.
+ */
+static struct vnode *
+umap_node_find(mp, targetvp)
+ struct mount *mp;
+ struct vnode *targetvp;
+{
+ struct proc *p = curproc; /* XXX */
+ struct umap_node_hashhead *hd;
+ struct umap_node *a;
+ struct vnode *vp;
+
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("umap_node_find(mp = %x, target = %x)\n", mp, targetvp);
+#endif
+
+ /*
+ * Find hash base, and then search the (two-way) linked
+ * list looking for a umap_node structure which is referencing
+ * the target vnode. If found, the increment the umap_node
+ * reference count (but NOT the target vnode's VREF counter).
+ */
+ hd = UMAP_NHASH(targetvp);
+loop:
+ for (a = hd->lh_first; a != 0; a = a->umap_hash.le_next) {
+ if (a->umap_lowervp == targetvp &&
+ a->umap_vnode->v_mount == mp) {
+ vp = UMAPTOV(a);
+ /*
+ * We need vget for the VXLOCK
+ * stuff, but we don't want to lock
+ * the lower node.
+ */
+ if (vget(vp, 0, p)) {
+#ifdef UMAPFS_DIAGNOSTIC
+ printf ("umap_node_find: vget failed.\n");
+#endif
+ goto loop;
+ }
+ return (vp);
+ }
+ }
+
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("umap_node_find(%x, %x): NOT found\n", mp, targetvp);
+#endif
+
+ return (0);
+}
+
+/*
+ * Make a new umap_node node.
+ * Vp is the alias vnode, lofsvp is the target vnode.
+ * Maintain a reference to (targetvp).
+ */
+static int
+umap_node_alloc(mp, lowervp, vpp)
+ struct mount *mp;
+ struct vnode *lowervp;
+ struct vnode **vpp;
+{
+ struct umap_node_hashhead *hd;
+ struct umap_node *xp;
+ struct vnode *othervp, *vp;
+ int error;
+
+ /* XXX This routine probably needs a node_alloc lock */
+
+ /*
+ * Do the MALLOC before the getnewvnode since doing so afterward
+ * might cause a bogus v_data pointer to get dereferenced
+ * elsewhere if MALLOC should block.
+ */
+ MALLOC(xp, struct umap_node *, sizeof(struct umap_node),
+ M_TEMP, M_WAITOK);
+
+ error = getnewvnode(VT_UMAP, mp, umap_vnodeop_p, vpp);
+ if (error) {
+ FREE(xp, M_TEMP);
+ return (error);
+ }
+ vp = *vpp;
+
+ vp->v_type = lowervp->v_type;
+ xp->umap_vnode = vp;
+ vp->v_data = xp;
+ xp->umap_lowervp = lowervp;
+ /*
+ * Before we insert our new node onto the hash chains,
+ * check to see if someone else has beaten us to it.
+ * (We could have slept in MALLOC.)
+ */
+ othervp = umap_node_find(mp, lowervp);
+ if (othervp) {
+ FREE(xp, M_TEMP);
+ vp->v_type = VBAD; /* node is discarded */
+ vp->v_usecount = 0; /* XXX */
+ *vpp = othervp;
+ return (0);
+ }
+ VREF(lowervp); /* Extra VREF will be vrele'd in umap_node_create */
+ hd = UMAP_NHASH(lowervp);
+ LIST_INSERT_HEAD(hd, xp, umap_hash);
+ return (0);
+}
+
+
+/*
+ * Try to find an existing umap_node vnode refering
+ * to it, otherwise make a new umap_node vnode which
+ * contains a reference to the target vnode.
+ */
+int
+umap_node_create(mp, targetvp, newvpp)
+ struct mount *mp;
+ struct vnode *targetvp;
+ struct vnode **newvpp;
+{
+ struct vnode *aliasvp;
+
+ aliasvp = umap_node_find(mp, targetvp);
+ if (aliasvp) {
+ /*
+ * Take another reference to the alias vnode
+ */
+#ifdef UMAPFS_DIAGNOSTIC
+ vprint("umap_node_create: exists", aliasvp);
+#endif
+ /* VREF(aliasvp); */
+ } else {
+ int error;
+
+ /*
+ * Get new vnode.
+ */
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("umap_node_create: create new alias vnode\n");
+#endif
+ /*
+ * Make new vnode reference the umap_node.
+ */
+ error = umap_node_alloc(mp, targetvp, &aliasvp);
+ if (error)
+ return (error);
+
+ /*
+ * aliasvp is already VREF'd by getnewvnode()
+ */
+ }
+
+ vrele(targetvp);
+
+#ifdef UMAPFS_DIAGNOSTIC
+ vprint("umap_node_create: alias", aliasvp);
+ vprint("umap_node_create: target", targetvp);
+#endif
+
+ *newvpp = aliasvp;
+ return (0);
+}
+
+#ifdef UMAPFS_DIAGNOSTIC
+int umap_checkvp_barrier = 1;
+struct vnode *
+umap_checkvp(vp, fil, lno)
+ struct vnode *vp;
+ char *fil;
+ int lno;
+{
+ struct umap_node *a = VTOUMAP(vp);
+#if 0
+ /*
+ * Can't do this check because vop_reclaim runs
+ * with funny vop vector.
+ */
+ if (vp->v_op != umap_vnodeop_p) {
+ printf ("umap_checkvp: on non-umap-node\n");
+ while (umap_checkvp_barrier) /*WAIT*/ ;
+ panic("umap_checkvp");
+ }
+#endif
+ if (a->umap_lowervp == NULL) {
+ /* Should never happen */
+ int i; u_long *p;
+ printf("vp = %x, ZERO ptr\n", vp);
+ for (p = (u_long *) a, i = 0; i < 8; i++)
+ printf(" %x", p[i]);
+ printf("\n");
+ /* wait for debugger */
+ while (umap_checkvp_barrier) /*WAIT*/ ;
+ panic("umap_checkvp");
+ }
+ if (a->umap_lowervp->v_usecount < 1) {
+ int i; u_long *p;
+ printf("vp = %x, unref'ed lowervp\n", vp);
+ for (p = (u_long *) a, i = 0; i < 8; i++)
+ printf(" %x", p[i]);
+ printf("\n");
+ /* wait for debugger */
+ while (umap_checkvp_barrier) /*WAIT*/ ;
+ panic ("umap with unref'ed lowervp");
+ }
+#if 0
+ printf("umap %x/%d -> %x/%d [%s, %d]\n",
+ a->umap_vnode, a->umap_vnode->v_usecount,
+ a->umap_lowervp, a->umap_lowervp->v_usecount,
+ fil, lno);
+#endif
+ return (a->umap_lowervp);
+}
+#endif
+
+/* umap_mapids maps all of the ids in a credential, both user and group. */
+
+void
+umap_mapids(v_mount, credp)
+ struct mount *v_mount;
+ struct ucred *credp;
+{
+ int i;
+ uid_t uid;
+ gid_t gid;
+
+ if (credp == NOCRED)
+ return;
+
+ /* Find uid entry in map */
+
+ uid = (uid_t) umap_findid(credp->cr_uid,
+ MOUNTTOUMAPMOUNT(v_mount)->info_mapdata,
+ MOUNTTOUMAPMOUNT(v_mount)->info_nentries);
+
+ if (uid != -1)
+ credp->cr_uid = uid;
+ else
+ credp->cr_uid = (uid_t) NOBODY;
+
+#ifdef notdef
+ /* cr_gid is the same as cr_groups[0] in 4BSD */
+
+ /* Find gid entry in map */
+
+ gid = (gid_t) umap_findid(credp->cr_gid,
+ MOUNTTOUMAPMOUNT(v_mount)->info_gmapdata,
+ MOUNTTOUMAPMOUNT(v_mount)->info_gnentries);
+
+ if (gid != -1)
+ credp->cr_gid = gid;
+ else
+ credp->cr_gid = NULLGROUP;
+#endif
+
+ /* Now we must map each of the set of groups in the cr_groups
+ structure. */
+
+ i = 0;
+ while (credp->cr_groups[i] != 0) {
+ gid = (gid_t) umap_findid(credp->cr_groups[i],
+ MOUNTTOUMAPMOUNT(v_mount)->info_gmapdata,
+ MOUNTTOUMAPMOUNT(v_mount)->info_gnentries);
+
+ if (gid != -1)
+ credp->cr_groups[i++] = gid;
+ else
+ credp->cr_groups[i++] = NULLGROUP;
+ }
+}
diff --git a/sys/fs/umapfs/umap_vfsops.c b/sys/fs/umapfs/umap_vfsops.c
new file mode 100644
index 0000000..03b4cb4
--- /dev/null
+++ b/sys/fs/umapfs/umap_vfsops.c
@@ -0,0 +1,431 @@
+/*
+ * Copyright (c) 1992, 1993, 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * the UCLA Ficus project.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)umap_vfsops.c 8.8 (Berkeley) 5/14/95
+ *
+ * $Id: umap_vfsops.c,v 1.22 1998/05/06 05:29:36 msmith Exp $
+ */
+
+/*
+ * Umap Layer
+ * (See mount_umap(8) for a description of this layer.)
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <miscfs/umapfs/umap.h>
+
+static MALLOC_DEFINE(M_UMAPFSMNT, "UMAP mount", "UMAP mount structure");
+
+static int umapfs_fhtovp __P((struct mount *mp, struct fid *fidp,
+ struct sockaddr *nam, struct vnode **vpp,
+ int *exflagsp, struct ucred **credanonp));
+static int umapfs_mount __P((struct mount *mp, char *path, caddr_t data,
+ struct nameidata *ndp, struct proc *p));
+static int umapfs_quotactl __P((struct mount *mp, int cmd, uid_t uid,
+ caddr_t arg, struct proc *p));
+static int umapfs_root __P((struct mount *mp, struct vnode **vpp));
+static int umapfs_start __P((struct mount *mp, int flags, struct proc *p));
+static int umapfs_statfs __P((struct mount *mp, struct statfs *sbp,
+ struct proc *p));
+static int umapfs_sync __P((struct mount *mp, int waitfor,
+ struct ucred *cred, struct proc *p));
+static int umapfs_unmount __P((struct mount *mp, int mntflags,
+ struct proc *p));
+static int umapfs_vget __P((struct mount *mp, ino_t ino,
+ struct vnode **vpp));
+static int umapfs_vptofh __P((struct vnode *vp, struct fid *fhp));
+
+/*
+ * Mount umap layer
+ */
+static int
+umapfs_mount(mp, path, data, ndp, p)
+ struct mount *mp;
+ char *path;
+ caddr_t data;
+ struct nameidata *ndp;
+ struct proc *p;
+{
+ struct umap_args args;
+ struct vnode *lowerrootvp, *vp;
+ struct vnode *umapm_rootvp;
+ struct umap_mount *amp;
+ u_int size;
+ int error;
+#ifdef UMAP_DIAGNOSTIC
+ int i;
+#endif
+
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("umapfs_mount(mp = %x)\n", mp);
+#endif
+
+ /*
+ * Update is a no-op
+ */
+ if (mp->mnt_flag & MNT_UPDATE) {
+ return (EOPNOTSUPP);
+ /* return (VFS_MOUNT(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, path, data, ndp, p));*/
+ }
+
+ /*
+ * Get argument
+ */
+ error = copyin(data, (caddr_t)&args, sizeof(struct umap_args));
+ if (error)
+ return (error);
+
+ /*
+ * Find lower node
+ */
+ NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT|LOCKLEAF,
+ UIO_USERSPACE, args.target, p);
+ error = namei(ndp);
+ if (error)
+ return (error);
+
+ /*
+ * Sanity check on lower vnode
+ */
+ lowerrootvp = ndp->ni_vp;
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("vp = %x, check for VDIR...\n", lowerrootvp);
+#endif
+ vrele(ndp->ni_dvp);
+ ndp->ni_dvp = 0;
+
+ if (lowerrootvp->v_type != VDIR) {
+ vput(lowerrootvp);
+ return (EINVAL);
+ }
+
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("mp = %x\n", mp);
+#endif
+
+ amp = (struct umap_mount *) malloc(sizeof(struct umap_mount),
+ M_UMAPFSMNT, M_WAITOK); /* XXX */
+
+ /*
+ * Save reference to underlying FS
+ */
+ amp->umapm_vfs = lowerrootvp->v_mount;
+
+ /*
+ * Now copy in the number of entries and maps for umap mapping.
+ */
+ amp->info_nentries = args.nentries;
+ amp->info_gnentries = args.gnentries;
+ error = copyin(args.mapdata, (caddr_t)amp->info_mapdata,
+ 2*sizeof(u_long)*args.nentries);
+ if (error)
+ return (error);
+
+#ifdef UMAP_DIAGNOSTIC
+ printf("umap_mount:nentries %d\n",args.nentries);
+ for (i = 0; i < args.nentries; i++)
+ printf(" %d maps to %d\n", amp->info_mapdata[i][0],
+ amp->info_mapdata[i][1]);
+#endif
+
+ error = copyin(args.gmapdata, (caddr_t)amp->info_gmapdata,
+ 2*sizeof(u_long)*args.gnentries);
+ if (error)
+ return (error);
+
+#ifdef UMAP_DIAGNOSTIC
+ printf("umap_mount:gnentries %d\n",args.gnentries);
+ for (i = 0; i < args.gnentries; i++)
+ printf(" group %d maps to %d\n",
+ amp->info_gmapdata[i][0],
+ amp->info_gmapdata[i][1]);
+#endif
+
+
+ /*
+ * Save reference. Each mount also holds
+ * a reference on the root vnode.
+ */
+ error = umap_node_create(mp, lowerrootvp, &vp);
+ /*
+ * Unlock the node (either the lower or the alias)
+ */
+ VOP_UNLOCK(vp, 0, p);
+ /*
+ * Make sure the node alias worked
+ */
+ if (error) {
+ vrele(lowerrootvp);
+ free(amp, M_UMAPFSMNT); /* XXX */
+ return (error);
+ }
+
+ /*
+ * Keep a held reference to the root vnode.
+ * It is vrele'd in umapfs_unmount.
+ */
+ umapm_rootvp = vp;
+ umapm_rootvp->v_flag |= VROOT;
+ amp->umapm_rootvp = umapm_rootvp;
+ if (UMAPVPTOLOWERVP(umapm_rootvp)->v_mount->mnt_flag & MNT_LOCAL)
+ mp->mnt_flag |= MNT_LOCAL;
+ mp->mnt_data = (qaddr_t) amp;
+ vfs_getnewfsid(mp);
+
+ (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+ bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+ (void) copyinstr(args.target, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+ &size);
+ bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+ (void)umapfs_statfs(mp, &mp->mnt_stat, p);
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("umapfs_mount: lower %s, alias at %s\n",
+ mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
+#endif
+ return (0);
+}
+
+/*
+ * VFS start. Nothing needed here - the start routine
+ * on the underlying filesystem will have been called
+ * when that filesystem was mounted.
+ */
+static int
+umapfs_start(mp, flags, p)
+ struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+ return (0);
+ /* return (VFS_START(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, flags, p)); */
+}
+
+/*
+ * Free reference to umap layer
+ */
+static int
+umapfs_unmount(mp, mntflags, p)
+ struct mount *mp;
+ int mntflags;
+ struct proc *p;
+{
+ struct vnode *umapm_rootvp = MOUNTTOUMAPMOUNT(mp)->umapm_rootvp;
+ int error;
+ int flags = 0;
+
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("umapfs_unmount(mp = %x)\n", mp);
+#endif
+
+ if (mntflags & MNT_FORCE)
+ flags |= FORCECLOSE;
+
+ /*
+ * Clear out buffer cache. I don't think we
+ * ever get anything cached at this level at the
+ * moment, but who knows...
+ */
+#ifdef notyet
+ mntflushbuf(mp, 0);
+ if (mntinvalbuf(mp, 1))
+ return (EBUSY);
+#endif
+ if (umapm_rootvp->v_usecount > 1)
+ return (EBUSY);
+ error = vflush(mp, umapm_rootvp, flags);
+ if (error)
+ return (error);
+
+#ifdef UMAPFS_DIAGNOSTIC
+ vprint("alias root of lower", umapm_rootvp);
+#endif
+ /*
+ * Release reference on underlying root vnode
+ */
+ vrele(umapm_rootvp);
+ /*
+ * And blow it away for future re-use
+ */
+ vgone(umapm_rootvp);
+ /*
+ * Finally, throw away the umap_mount structure
+ */
+ free(mp->mnt_data, M_UMAPFSMNT); /* XXX */
+ mp->mnt_data = 0;
+ return (0);
+}
+
+static int
+umapfs_root(mp, vpp)
+ struct mount *mp;
+ struct vnode **vpp;
+{
+ struct proc *p = curproc; /* XXX */
+ struct vnode *vp;
+
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("umapfs_root(mp = %x, vp = %x->%x)\n", mp,
+ MOUNTTOUMAPMOUNT(mp)->umapm_rootvp,
+ UMAPVPTOLOWERVP(MOUNTTOUMAPMOUNT(mp)->umapm_rootvp)
+ );
+#endif
+
+ /*
+ * Return locked reference to root.
+ */
+ vp = MOUNTTOUMAPMOUNT(mp)->umapm_rootvp;
+ VREF(vp);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+ *vpp = vp;
+ return (0);
+}
+
+static int
+umapfs_quotactl(mp, cmd, uid, arg, p)
+ struct mount *mp;
+ int cmd;
+ uid_t uid;
+ caddr_t arg;
+ struct proc *p;
+{
+ return (VFS_QUOTACTL(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, cmd, uid, arg, p));
+}
+
+static int
+umapfs_statfs(mp, sbp, p)
+ struct mount *mp;
+ struct statfs *sbp;
+ struct proc *p;
+{
+ int error;
+ struct statfs mstat;
+
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("umapfs_statfs(mp = %x, vp = %x->%x)\n", mp,
+ MOUNTTOUMAPMOUNT(mp)->umapm_rootvp,
+ UMAPVPTOLOWERVP(MOUNTTOUMAPMOUNT(mp)->umapm_rootvp)
+ );
+#endif
+
+ bzero(&mstat, sizeof(mstat));
+
+ error = VFS_STATFS(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, &mstat, p);
+ if (error)
+ return (error);
+
+ /* now copy across the "interesting" information and fake the rest */
+ sbp->f_type = mstat.f_type;
+ sbp->f_flags = mstat.f_flags;
+ sbp->f_bsize = mstat.f_bsize;
+ sbp->f_iosize = mstat.f_iosize;
+ sbp->f_blocks = mstat.f_blocks;
+ sbp->f_bfree = mstat.f_bfree;
+ sbp->f_bavail = mstat.f_bavail;
+ sbp->f_files = mstat.f_files;
+ sbp->f_ffree = mstat.f_ffree;
+ if (sbp != &mp->mnt_stat) {
+ bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+ bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+ bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+ }
+ return (0);
+}
+
+static int
+umapfs_sync(mp, waitfor, cred, p)
+ struct mount *mp;
+ int waitfor;
+ struct ucred *cred;
+ struct proc *p;
+{
+ /*
+ * XXX - Assumes no data cached at umap layer.
+ */
+ return (0);
+}
+
+static int
+umapfs_vget(mp, ino, vpp)
+ struct mount *mp;
+ ino_t ino;
+ struct vnode **vpp;
+{
+
+ return (VFS_VGET(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, ino, vpp));
+}
+
+static int
+umapfs_fhtovp(mp, fidp, nam, vpp, exflagsp, credanonp)
+ struct mount *mp;
+ struct fid *fidp;
+ struct sockaddr *nam;
+ struct vnode **vpp;
+ int *exflagsp;
+ struct ucred**credanonp;
+{
+
+ return (VFS_FHTOVP(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, fidp, nam, vpp, exflagsp,credanonp));
+}
+
+static int
+umapfs_vptofh(vp, fhp)
+ struct vnode *vp;
+ struct fid *fhp;
+{
+ return (VFS_VPTOFH(UMAPVPTOLOWERVP(vp), fhp));
+}
+
+static struct vfsops umap_vfsops = {
+ umapfs_mount,
+ umapfs_start,
+ umapfs_unmount,
+ umapfs_root,
+ umapfs_quotactl,
+ umapfs_statfs,
+ umapfs_sync,
+ umapfs_vget,
+ umapfs_fhtovp,
+ umapfs_vptofh,
+ umapfs_init,
+};
+
+VFS_SET(umap_vfsops, umap, VFCF_LOOPBACK);
diff --git a/sys/fs/umapfs/umap_vnops.c b/sys/fs/umapfs/umap_vnops.c
new file mode 100644
index 0000000..893e1e5
--- /dev/null
+++ b/sys/fs/umapfs/umap_vnops.c
@@ -0,0 +1,566 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * the UCLA Ficus project.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)umap_vnops.c 8.6 (Berkeley) 5/22/95
+ * $Id: umap_vnops.c,v 1.25 1998/07/30 17:40:45 bde Exp $
+ */
+
+/*
+ * Umap Layer
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/buf.h>
+#include <miscfs/umapfs/umap.h>
+#include <miscfs/nullfs/null.h>
+
+static int umap_bug_bypass = 0; /* for debugging: enables bypass printf'ing */
+SYSCTL_INT(_debug, OID_AUTO, umapfs_bug_bypass, CTLFLAG_RW,
+ &umap_bug_bypass, 0, "");
+
+static int umap_bwrite __P((struct vop_bwrite_args *ap));
+static int umap_bypass __P((struct vop_generic_args *ap));
+static int umap_getattr __P((struct vop_getattr_args *ap));
+static int umap_inactive __P((struct vop_inactive_args *ap));
+static int umap_lock __P((struct vop_lock_args *ap));
+static int umap_print __P((struct vop_print_args *ap));
+static int umap_reclaim __P((struct vop_reclaim_args *ap));
+static int umap_rename __P((struct vop_rename_args *ap));
+static int umap_strategy __P((struct vop_strategy_args *ap));
+static int umap_unlock __P((struct vop_unlock_args *ap));
+
+/*
+ * This is the 10-Apr-92 bypass routine.
+ * See null_vnops.c:null_bypass for more details.
+ */
+static int
+umap_bypass(ap)
+ struct vop_generic_args /* {
+ struct vnodeop_desc *a_desc;
+ <other random data follows, presumably>
+ } */ *ap;
+{
+ struct ucred **credpp = 0, *credp = 0;
+ struct ucred *savecredp = 0, *savecompcredp = 0;
+ struct ucred *compcredp = 0;
+ struct vnode **this_vp_p;
+ int error;
+ struct vnode *old_vps[VDESC_MAX_VPS];
+ struct vnode *vp1 = 0;
+ struct vnode **vps_p[VDESC_MAX_VPS];
+ struct vnode ***vppp;
+ struct vnodeop_desc *descp = ap->a_desc;
+ int reles, i;
+ struct componentname **compnamepp = 0;
+
+ if (umap_bug_bypass)
+ printf ("umap_bypass: %s\n", descp->vdesc_name);
+
+#ifdef SAFETY
+ /*
+ * We require at least one vp.
+ */
+ if (descp->vdesc_vp_offsets == NULL ||
+ descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET)
+ panic ("umap_bypass: no vp's in map.");
+#endif
+
+ /*
+ * Map the vnodes going in.
+ * Later, we'll invoke the operation based on
+ * the first mapped vnode's operation vector.
+ */
+ reles = descp->vdesc_flags;
+ for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
+ if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
+ break; /* bail out at end of list */
+ vps_p[i] = this_vp_p =
+ VOPARG_OFFSETTO(struct vnode**, descp->vdesc_vp_offsets[i], ap);
+
+ if (i == 0) {
+ vp1 = *vps_p[0];
+ }
+
+ /*
+ * We're not guaranteed that any but the first vnode
+ * are of our type. Check for and don't map any
+ * that aren't. (Must map first vp or vclean fails.)
+ */
+
+ if (i && (*this_vp_p)->v_op != umap_vnodeop_p) {
+ old_vps[i] = NULL;
+ } else {
+ old_vps[i] = *this_vp_p;
+ *(vps_p[i]) = UMAPVPTOLOWERVP(*this_vp_p);
+ if (reles & 1)
+ VREF(*this_vp_p);
+ }
+
+ }
+
+ /*
+ * Fix the credentials. (That's the purpose of this layer.)
+ */
+
+ if (descp->vdesc_cred_offset != VDESC_NO_OFFSET) {
+
+ credpp = VOPARG_OFFSETTO(struct ucred**,
+ descp->vdesc_cred_offset, ap);
+
+ /* Save old values */
+
+ savecredp = (*credpp);
+ if (savecredp != NOCRED)
+ (*credpp) = crdup(savecredp);
+ credp = *credpp;
+
+ if (umap_bug_bypass && credp->cr_uid != 0)
+ printf("umap_bypass: user was %lu, group %lu\n",
+ (u_long)credp->cr_uid, (u_long)credp->cr_gid);
+
+ /* Map all ids in the credential structure. */
+
+ umap_mapids(vp1->v_mount, credp);
+
+ if (umap_bug_bypass && credp->cr_uid != 0)
+ printf("umap_bypass: user now %lu, group %lu\n",
+ (u_long)credp->cr_uid, (u_long)credp->cr_gid);
+ }
+
+ /* BSD often keeps a credential in the componentname structure
+ * for speed. If there is one, it better get mapped, too.
+ */
+
+ if (descp->vdesc_componentname_offset != VDESC_NO_OFFSET) {
+
+ compnamepp = VOPARG_OFFSETTO(struct componentname**,
+ descp->vdesc_componentname_offset, ap);
+
+ compcredp = (*compnamepp)->cn_cred;
+ savecompcredp = compcredp;
+ if (savecompcredp != NOCRED)
+ (*compnamepp)->cn_cred = crdup(savecompcredp);
+ compcredp = (*compnamepp)->cn_cred;
+
+ if (umap_bug_bypass && compcredp->cr_uid != 0)
+ printf(
+ "umap_bypass: component credit user was %lu, group %lu\n",
+ (u_long)compcredp->cr_uid,
+ (u_long)compcredp->cr_gid);
+
+ /* Map all ids in the credential structure. */
+
+ umap_mapids(vp1->v_mount, compcredp);
+
+ if (umap_bug_bypass && compcredp->cr_uid != 0)
+ printf(
+ "umap_bypass: component credit user now %lu, group %lu\n",
+ (u_long)compcredp->cr_uid,
+ (u_long)compcredp->cr_gid);
+ }
+
+ /*
+ * Call the operation on the lower layer
+ * with the modified argument structure.
+ */
+ error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
+
+ /*
+ * Maintain the illusion of call-by-value
+ * by restoring vnodes in the argument structure
+ * to their original value.
+ */
+ reles = descp->vdesc_flags;
+ for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
+ if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
+ break; /* bail out at end of list */
+ if (old_vps[i]) {
+ *(vps_p[i]) = old_vps[i];
+ if (reles & 1)
+ vrele(*(vps_p[i]));
+ };
+ };
+
+ /*
+ * Map the possible out-going vpp
+ * (Assumes that the lower layer always returns
+ * a VREF'ed vpp unless it gets an error.)
+ */
+ if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET &&
+ !(descp->vdesc_flags & VDESC_NOMAP_VPP) &&
+ !error) {
+ if (descp->vdesc_flags & VDESC_VPP_WILLRELE)
+ goto out;
+ vppp = VOPARG_OFFSETTO(struct vnode***,
+ descp->vdesc_vpp_offset, ap);
+ if (*vppp)
+ error = umap_node_create(old_vps[0]->v_mount, **vppp, *vppp);
+ };
+
+ out:
+ /*
+ * Free duplicate cred structure and restore old one.
+ */
+ if (descp->vdesc_cred_offset != VDESC_NO_OFFSET) {
+ if (umap_bug_bypass && credp && credp->cr_uid != 0)
+ printf("umap_bypass: returning-user was %lu\n",
+ (u_long)credp->cr_uid);
+
+ if (savecredp != NOCRED) {
+ crfree(credp);
+ (*credpp) = savecredp;
+ if (umap_bug_bypass && credpp && (*credpp)->cr_uid != 0)
+ printf(
+ "umap_bypass: returning-user now %lu\n\n",
+ (u_long)(*credpp)->cr_uid);
+ }
+ }
+
+ if (descp->vdesc_componentname_offset != VDESC_NO_OFFSET) {
+ if (umap_bug_bypass && compcredp && compcredp->cr_uid != 0)
+ printf(
+ "umap_bypass: returning-component-user was %lu\n",
+ (u_long)compcredp->cr_uid);
+
+ if (savecompcredp != NOCRED) {
+ crfree(compcredp);
+ (*compnamepp)->cn_cred = savecompcredp;
+ if (umap_bug_bypass && credpp && (*credpp)->cr_uid != 0)
+ printf(
+ "umap_bypass: returning-component-user now %lu\n",
+ (u_long)compcredp->cr_uid);
+ }
+ }
+
+ return (error);
+}
+
+
+/*
+ * We handle getattr to change the fsid.
+ */
+static int
+umap_getattr(ap)
+ struct vop_getattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ short uid, gid;
+ int error, tmpid, nentries, gnentries;
+ u_long (*mapdata)[2], (*gmapdata)[2];
+ struct vnode **vp1p;
+ struct vnodeop_desc *descp = ap->a_desc;
+
+ error = umap_bypass((struct vop_generic_args *)ap);
+ if (error)
+ return (error);
+ /* Requires that arguments be restored. */
+ ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
+
+ /*
+ * Umap needs to map the uid and gid returned by a stat
+ * into the proper values for this site. This involves
+ * finding the returned uid in the mapping information,
+ * translating it into the uid on the other end,
+ * and filling in the proper field in the vattr
+ * structure pointed to by ap->a_vap. The group
+ * is easier, since currently all groups will be
+ * translate to the NULLGROUP.
+ */
+
+ /* Find entry in map */
+
+ uid = ap->a_vap->va_uid;
+ gid = ap->a_vap->va_gid;
+ if (umap_bug_bypass)
+ printf("umap_getattr: mapped uid = %d, mapped gid = %d\n", uid,
+ gid);
+
+ vp1p = VOPARG_OFFSETTO(struct vnode**, descp->vdesc_vp_offsets[0], ap);
+ nentries = MOUNTTOUMAPMOUNT((*vp1p)->v_mount)->info_nentries;
+ mapdata = (MOUNTTOUMAPMOUNT((*vp1p)->v_mount)->info_mapdata);
+ gnentries = MOUNTTOUMAPMOUNT((*vp1p)->v_mount)->info_gnentries;
+ gmapdata = (MOUNTTOUMAPMOUNT((*vp1p)->v_mount)->info_gmapdata);
+
+ /* Reverse map the uid for the vnode. Since it's a reverse
+ map, we can't use umap_mapids() to do it. */
+
+ tmpid = umap_reverse_findid(uid, mapdata, nentries);
+
+ if (tmpid != -1) {
+
+ ap->a_vap->va_uid = (uid_t) tmpid;
+ if (umap_bug_bypass)
+ printf("umap_getattr: original uid = %d\n", uid);
+ } else
+ ap->a_vap->va_uid = (uid_t) NOBODY;
+
+ /* Reverse map the gid for the vnode. */
+
+ tmpid = umap_reverse_findid(gid, gmapdata, gnentries);
+
+ if (tmpid != -1) {
+
+ ap->a_vap->va_gid = (gid_t) tmpid;
+ if (umap_bug_bypass)
+ printf("umap_getattr: original gid = %d\n", gid);
+ } else
+ ap->a_vap->va_gid = (gid_t) NULLGROUP;
+
+ return (0);
+}
+
+/*
+ * We need to process our own vnode lock and then clear the
+ * interlock flag as it applies only to our vnode, not the
+ * vnodes below us on the stack.
+ */
+static int
+umap_lock(ap)
+ struct vop_lock_args /* {
+ struct vnode *a_vp;
+ int a_flags;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ vop_nolock(ap);
+ if ((ap->a_flags & LK_TYPE_MASK) == LK_DRAIN)
+ return (0);
+ ap->a_flags &= ~LK_INTERLOCK;
+ return (null_bypass((struct vop_generic_args *)ap));
+}
+
+/*
+ * We need to process our own vnode unlock and then clear the
+ * interlock flag as it applies only to our vnode, not the
+ * vnodes below us on the stack.
+ */
+int
+umap_unlock(ap)
+ struct vop_unlock_args /* {
+ struct vnode *a_vp;
+ int a_flags;
+ struct proc *a_p;
+ } */ *ap;
+{
+ vop_nounlock(ap);
+ ap->a_flags &= ~LK_INTERLOCK;
+ return (null_bypass((struct vop_generic_args *)ap));
+}
+
+static int
+umap_inactive(ap)
+ struct vop_inactive_args /* {
+ struct vnode *a_vp;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct umap_node *xp = VTOUMAP(vp);
+ struct vnode *lowervp = xp->umap_lowervp;
+ /*
+ * Do nothing (and _don't_ bypass).
+ * Wait to vrele lowervp until reclaim,
+ * so that until then our umap_node is in the
+ * cache and reusable.
+ *
+ */
+ VOP_INACTIVE(lowervp, ap->a_p);
+ VOP_UNLOCK(ap->a_vp, 0, ap->a_p);
+ return (0);
+}
+
+static int
+umap_reclaim(ap)
+ struct vop_reclaim_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct umap_node *xp = VTOUMAP(vp);
+ struct vnode *lowervp = xp->umap_lowervp;
+
+ /* After this assignment, this node will not be re-used. */
+ xp->umap_lowervp = NULL;
+ LIST_REMOVE(xp, umap_hash);
+ FREE(vp->v_data, M_TEMP);
+ vp->v_data = NULL;
+ vrele(lowervp);
+ return (0);
+}
+
+static int
+umap_strategy(ap)
+ struct vop_strategy_args /* {
+ struct vnode *a_vp;
+ struct buf *a_bp;
+ } */ *ap;
+{
+ struct buf *bp = ap->a_bp;
+ int error;
+ struct vnode *savedvp;
+
+ savedvp = bp->b_vp;
+ bp->b_vp = UMAPVPTOLOWERVP(bp->b_vp);
+
+ error = VOP_STRATEGY(bp->b_vp, ap->a_bp);
+
+ bp->b_vp = savedvp;
+
+ return (error);
+}
+
+static int
+umap_bwrite(ap)
+ struct vop_bwrite_args /* {
+ struct buf *a_bp;
+ } */ *ap;
+{
+ struct buf *bp = ap->a_bp;
+ int error;
+ struct vnode *savedvp;
+
+ savedvp = bp->b_vp;
+ bp->b_vp = UMAPVPTOLOWERVP(bp->b_vp);
+
+ error = VOP_BWRITE(ap->a_bp);
+
+ bp->b_vp = savedvp;
+
+ return (error);
+}
+
+
+static int
+umap_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ printf("\ttag VT_UMAPFS, vp=%p, lowervp=%p\n", vp, UMAPVPTOLOWERVP(vp));
+ return (0);
+}
+
+static int
+umap_rename(ap)
+ struct vop_rename_args /* {
+ struct vnode *a_fdvp;
+ struct vnode *a_fvp;
+ struct componentname *a_fcnp;
+ struct vnode *a_tdvp;
+ struct vnode *a_tvp;
+ struct componentname *a_tcnp;
+ } */ *ap;
+{
+ int error;
+ struct componentname *compnamep;
+ struct ucred *compcredp, *savecompcredp;
+ struct vnode *vp;
+
+ /*
+ * Rename is irregular, having two componentname structures.
+ * We need to map the cre in the second structure,
+ * and then bypass takes care of the rest.
+ */
+
+ vp = ap->a_fdvp;
+ compnamep = ap->a_tcnp;
+ compcredp = compnamep->cn_cred;
+
+ savecompcredp = compcredp;
+ compcredp = compnamep->cn_cred = crdup(savecompcredp);
+
+ if (umap_bug_bypass && compcredp->cr_uid != 0)
+ printf(
+ "umap_rename: rename component credit user was %lu, group %lu\n",
+ (u_long)compcredp->cr_uid, (u_long)compcredp->cr_gid);
+
+ /* Map all ids in the credential structure. */
+
+ umap_mapids(vp->v_mount, compcredp);
+
+ if (umap_bug_bypass && compcredp->cr_uid != 0)
+ printf(
+ "umap_rename: rename component credit user now %lu, group %lu\n",
+ (u_long)compcredp->cr_uid, (u_long)compcredp->cr_gid);
+
+ error = umap_bypass((struct vop_generic_args *)ap);
+
+ /* Restore the additional mapped componentname cred structure. */
+
+ crfree(compcredp);
+ compnamep->cn_cred = savecompcredp;
+
+ return error;
+}
+
+/*
+ * Global vfs data structures
+ */
+/*
+ * XXX - strategy, bwrite are hand coded currently. They should
+ * go away with a merged buffer/block cache.
+ *
+ */
+vop_t **umap_vnodeop_p;
+static struct vnodeopv_entry_desc umap_vnodeop_entries[] = {
+ { &vop_default_desc, (vop_t *) umap_bypass },
+ { &vop_bwrite_desc, (vop_t *) umap_bwrite },
+ { &vop_getattr_desc, (vop_t *) umap_getattr },
+ { &vop_inactive_desc, (vop_t *) umap_inactive },
+ { &vop_lock_desc, (vop_t *) umap_lock },
+ { &vop_print_desc, (vop_t *) umap_print },
+ { &vop_reclaim_desc, (vop_t *) umap_reclaim },
+ { &vop_rename_desc, (vop_t *) umap_rename },
+ { &vop_strategy_desc, (vop_t *) umap_strategy },
+ { &vop_unlock_desc, (vop_t *) umap_unlock },
+ { NULL, NULL }
+};
+static struct vnodeopv_desc umap_vnodeop_opv_desc =
+ { &umap_vnodeop_p, umap_vnodeop_entries };
+
+VNODEOP_SET(umap_vnodeop_opv_desc);
diff --git a/sys/fs/unionfs/union.h b/sys/fs/unionfs/union.h
new file mode 100644
index 0000000..6a4aa22
--- /dev/null
+++ b/sys/fs/unionfs/union.h
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 1994 The Regents of the University of California.
+ * Copyright (c) 1994 Jan-Simon Pendry.
+ * All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)union.h 8.9 (Berkeley) 12/10/94
+ * $Id: union.h,v 1.12 1998/02/26 03:23:51 kato Exp $
+ */
+
+struct union_args {
+ char *target; /* Target of loopback */
+ int mntflags; /* Options on the mount */
+};
+
+#define UNMNT_ABOVE 0x0001 /* Target appears below mount point */
+#define UNMNT_BELOW 0x0002 /* Target appears below mount point */
+#define UNMNT_REPLACE 0x0003 /* Target replaces mount point */
+#define UNMNT_OPMASK 0x0003
+
+struct union_mount {
+ struct vnode *um_uppervp;
+ struct vnode *um_lowervp;
+ struct ucred *um_cred; /* Credentials of user calling mount */
+ int um_cmode; /* cmask from mount process */
+ int um_op; /* Operation mode */
+};
+
+#ifdef KERNEL
+
+/*
+ * DEFDIRMODE is the mode bits used to create a shadow directory.
+ */
+#define VRWXMODE (VREAD|VWRITE|VEXEC)
+#define VRWMODE (VREAD|VWRITE)
+#define UN_DIRMODE ((VRWXMODE)|(VRWXMODE>>3)|(VRWXMODE>>6))
+#define UN_FILEMODE ((VRWMODE)|(VRWMODE>>3)|(VRWMODE>>6))
+
+/*
+ * A cache of vnode references
+ */
+struct union_node {
+ LIST_ENTRY(union_node) un_cache; /* Hash chain */
+ struct vnode *un_vnode; /* Back pointer */
+ struct vnode *un_uppervp; /* overlaying object */
+ struct vnode *un_lowervp; /* underlying object */
+ struct vnode *un_dirvp; /* Parent dir of uppervp */
+ struct vnode *un_pvp; /* Parent vnode */
+ char *un_path; /* saved component name */
+ int un_hash; /* saved un_path hash value */
+ int un_openl; /* # of opens on lowervp */
+ unsigned int un_flags;
+ struct vnode **un_dircache; /* cached union stack */
+ off_t un_uppersz; /* size of upper object */
+ off_t un_lowersz; /* size of lower object */
+#ifdef DIAGNOSTIC
+ pid_t un_pid;
+#endif
+};
+
+#define UN_WANT 0x01
+#define UN_LOCKED 0x02
+#define UN_ULOCK 0x04 /* Upper node is locked */
+#define UN_KLOCK 0x08 /* Keep upper node locked on vput */
+#define UN_CACHED 0x10 /* In union cache */
+
+extern int union_allocvp __P((struct vnode **, struct mount *,
+ struct vnode *, struct vnode *,
+ struct componentname *, struct vnode *,
+ struct vnode *, int));
+extern int union_freevp __P((struct vnode *));
+extern struct vnode *union_dircache __P((struct vnode *, struct proc *));
+extern int union_copyup __P((struct union_node *, int, struct ucred *,
+ struct proc *));
+extern int union_dowhiteout __P((struct union_node *, struct ucred *,
+ struct proc *));
+extern int union_mkshadow __P((struct union_mount *, struct vnode *,
+ struct componentname *, struct vnode **));
+extern int union_mkwhiteout __P((struct union_mount *, struct vnode *,
+ struct componentname *, char *));
+extern int union_cn_close __P((struct vnode *, int, struct ucred *,
+ struct proc *));
+extern void union_removed_upper __P((struct union_node *un));
+extern struct vnode *union_lowervp __P((struct vnode *));
+extern void union_newsize __P((struct vnode *, off_t, off_t));
+
+extern int (*union_dircheckp) __P((struct proc *, struct vnode **,
+ struct file *));
+
+#define MOUNTTOUNIONMOUNT(mp) ((struct union_mount *)((mp)->mnt_data))
+#define VTOUNION(vp) ((struct union_node *)(vp)->v_data)
+#define UNIONTOV(un) ((un)->un_vnode)
+#define LOWERVP(vp) (VTOUNION(vp)->un_lowervp)
+#define UPPERVP(vp) (VTOUNION(vp)->un_uppervp)
+#define OTHERVP(vp) (UPPERVP(vp) ? UPPERVP(vp) : LOWERVP(vp))
+
+extern vop_t **union_vnodeop_p;
+extern struct vfsops union_vfsops;
+#endif /* KERNEL */
diff --git a/sys/fs/unionfs/union_subr.c b/sys/fs/unionfs/union_subr.c
new file mode 100644
index 0000000..7559b6e
--- /dev/null
+++ b/sys/fs/unionfs/union_subr.c
@@ -0,0 +1,1218 @@
+/*
+ * Copyright (c) 1994 Jan-Simon Pendry
+ * Copyright (c) 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)union_subr.c 8.20 (Berkeley) 5/20/95
+ * $Id: union_subr.c,v 1.35 1998/12/07 21:58:34 archie Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/fcntl.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/module.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h> /* for vnode_pager_setsize */
+#include <vm/vm_zone.h>
+#include <miscfs/union/union.h>
+
+#include <sys/proc.h>
+
+extern int union_init __P((void));
+
+/* must be power of two, otherwise change UNION_HASH() */
+#define NHASH 32
+
+/* unsigned int ... */
+#define UNION_HASH(u, l) \
+ (((((uintptr_t) (u)) + ((uintptr_t) l)) >> 8) & (NHASH-1))
+
+static LIST_HEAD(unhead, union_node) unhead[NHASH];
+static int unvplock[NHASH];
+
+static void union_dircache_r __P((struct vnode *vp, struct vnode ***vppp,
+ int *cntp));
+static int union_list_lock __P((int ix));
+static void union_list_unlock __P((int ix));
+static int union_relookup __P((struct union_mount *um, struct vnode *dvp,
+ struct vnode **vpp,
+ struct componentname *cnp,
+ struct componentname *cn, char *path,
+ int pathlen));
+static void union_updatevp __P((struct union_node *un,
+ struct vnode *uppervp,
+ struct vnode *lowervp));
+static void union_newlower __P((struct union_node *, struct vnode *));
+static void union_newupper __P((struct union_node *, struct vnode *));
+static int union_copyfile __P((struct vnode *, struct vnode *,
+ struct ucred *, struct proc *));
+static int union_vn_create __P((struct vnode **, struct union_node *,
+ struct proc *));
+static int union_vn_close __P((struct vnode *, int, struct ucred *,
+ struct proc *));
+
+int
+union_init()
+{
+ int i;
+
+ for (i = 0; i < NHASH; i++)
+ LIST_INIT(&unhead[i]);
+ bzero((caddr_t) unvplock, sizeof(unvplock));
+ return (0);
+}
+
+static int
+union_list_lock(ix)
+ int ix;
+{
+
+ if (unvplock[ix] & UN_LOCKED) {
+ unvplock[ix] |= UN_WANT;
+ (void) tsleep((caddr_t) &unvplock[ix], PINOD, "unllck", 0);
+ return (1);
+ }
+
+ unvplock[ix] |= UN_LOCKED;
+
+ return (0);
+}
+
+static void
+union_list_unlock(ix)
+ int ix;
+{
+
+ unvplock[ix] &= ~UN_LOCKED;
+
+ if (unvplock[ix] & UN_WANT) {
+ unvplock[ix] &= ~UN_WANT;
+ wakeup((caddr_t) &unvplock[ix]);
+ }
+}
+
+static void
+union_updatevp(un, uppervp, lowervp)
+ struct union_node *un;
+ struct vnode *uppervp;
+ struct vnode *lowervp;
+{
+ int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp);
+ int nhash = UNION_HASH(uppervp, lowervp);
+ int docache = (lowervp != NULLVP || uppervp != NULLVP);
+ int lhash, uhash;
+
+ /*
+ * Ensure locking is ordered from lower to higher
+ * to avoid deadlocks.
+ */
+ if (nhash < ohash) {
+ lhash = nhash;
+ uhash = ohash;
+ } else {
+ lhash = ohash;
+ uhash = nhash;
+ }
+
+ if (lhash != uhash)
+ while (union_list_lock(lhash))
+ continue;
+
+ while (union_list_lock(uhash))
+ continue;
+
+ if (ohash != nhash || !docache) {
+ if (un->un_flags & UN_CACHED) {
+ un->un_flags &= ~UN_CACHED;
+ LIST_REMOVE(un, un_cache);
+ }
+ }
+
+ if (ohash != nhash)
+ union_list_unlock(ohash);
+
+ if (un->un_lowervp != lowervp) {
+ if (un->un_lowervp) {
+ vrele(un->un_lowervp);
+ if (un->un_path) {
+ free(un->un_path, M_TEMP);
+ un->un_path = 0;
+ }
+ if (un->un_dirvp) {
+ vrele(un->un_dirvp);
+ un->un_dirvp = NULLVP;
+ }
+ }
+ un->un_lowervp = lowervp;
+ un->un_lowersz = VNOVAL;
+ }
+
+ if (un->un_uppervp != uppervp) {
+ if (un->un_uppervp)
+ vrele(un->un_uppervp);
+
+ un->un_uppervp = uppervp;
+ un->un_uppersz = VNOVAL;
+ }
+
+ if (docache && (ohash != nhash)) {
+ LIST_INSERT_HEAD(&unhead[nhash], un, un_cache);
+ un->un_flags |= UN_CACHED;
+ }
+
+ union_list_unlock(nhash);
+}
+
+static void
+union_newlower(un, lowervp)
+ struct union_node *un;
+ struct vnode *lowervp;
+{
+
+ union_updatevp(un, un->un_uppervp, lowervp);
+}
+
+static void
+union_newupper(un, uppervp)
+ struct union_node *un;
+ struct vnode *uppervp;
+{
+
+ union_updatevp(un, uppervp, un->un_lowervp);
+}
+
+/*
+ * Keep track of size changes in the underlying vnodes.
+ * If the size changes, then callback to the vm layer
+ * giving priority to the upper layer size.
+ */
+void
+union_newsize(vp, uppersz, lowersz)
+ struct vnode *vp;
+ off_t uppersz, lowersz;
+{
+ struct union_node *un;
+ off_t sz;
+
+ /* only interested in regular files */
+ if (vp->v_type != VREG)
+ return;
+
+ un = VTOUNION(vp);
+ sz = VNOVAL;
+
+ if ((uppersz != VNOVAL) && (un->un_uppersz != uppersz)) {
+ un->un_uppersz = uppersz;
+ if (sz == VNOVAL)
+ sz = un->un_uppersz;
+ }
+
+ if ((lowersz != VNOVAL) && (un->un_lowersz != lowersz)) {
+ un->un_lowersz = lowersz;
+ if (sz == VNOVAL)
+ sz = un->un_lowersz;
+ }
+
+ if (sz != VNOVAL) {
+#ifdef UNION_DIAGNOSTIC
+ printf("union: %s size now %ld\n",
+ uppersz != VNOVAL ? "upper" : "lower", (long) sz);
+#endif
+ vnode_pager_setsize(vp, sz);
+ }
+}
+
+/*
+ * allocate a union_node/vnode pair. the vnode is
+ * referenced and locked. the new vnode is returned
+ * via (vpp). (mp) is the mountpoint of the union filesystem,
+ * (dvp) is the parent directory where the upper layer object
+ * should exist (but doesn't) and (cnp) is the componentname
+ * information which is partially copied to allow the upper
+ * layer object to be created at a later time. (uppervp)
+ * and (lowervp) reference the upper and lower layer objects
+ * being mapped. either, but not both, can be nil.
+ * if supplied, (uppervp) is locked.
+ * the reference is either maintained in the new union_node
+ * object which is allocated, or they are vrele'd.
+ *
+ * all union_nodes are maintained on a singly-linked
+ * list. new nodes are only allocated when they cannot
+ * be found on this list. entries on the list are
+ * removed when the vfs reclaim entry is called.
+ *
+ * a single lock is kept for the entire list. this is
+ * needed because the getnewvnode() function can block
+ * waiting for a vnode to become free, in which case there
+ * may be more than one process trying to get the same
+ * vnode. this lock is only taken if we are going to
+ * call getnewvnode, since the kernel itself is single-threaded.
+ *
+ * if an entry is found on the list, then call vget() to
+ * take a reference. this is done because there may be
+ * zero references to it and so it needs to removed from
+ * the vnode free list.
+ */
+int
+union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache)
+ struct vnode **vpp;
+ struct mount *mp;
+ struct vnode *undvp; /* parent union vnode */
+ struct vnode *dvp; /* may be null */
+ struct componentname *cnp; /* may be null */
+ struct vnode *uppervp; /* may be null */
+ struct vnode *lowervp; /* may be null */
+ int docache;
+{
+ int error;
+ struct union_node *un = 0;
+ struct vnode *xlowervp = NULLVP;
+ struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
+ int hash = 0;
+ int vflag;
+ int try;
+
+ if (uppervp == NULLVP && lowervp == NULLVP)
+ panic("union: unidentifiable allocation");
+
+ if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) {
+ xlowervp = lowervp;
+ lowervp = NULLVP;
+ }
+
+ /* detect the root vnode (and aliases) */
+ vflag = 0;
+ if ((uppervp == um->um_uppervp) &&
+ ((lowervp == NULLVP) || lowervp == um->um_lowervp)) {
+ if (lowervp == NULLVP) {
+ lowervp = um->um_lowervp;
+ if (lowervp != NULLVP)
+ VREF(lowervp);
+ }
+ vflag = VROOT;
+ }
+
+loop:
+ if (!docache) {
+ un = 0;
+ } else for (try = 0; try < 3; try++) {
+ switch (try) {
+ case 0:
+ if (lowervp == NULLVP)
+ continue;
+ hash = UNION_HASH(uppervp, lowervp);
+ break;
+
+ case 1:
+ if (uppervp == NULLVP)
+ continue;
+ hash = UNION_HASH(uppervp, NULLVP);
+ break;
+
+ case 2:
+ if (lowervp == NULLVP)
+ continue;
+ hash = UNION_HASH(NULLVP, lowervp);
+ break;
+ }
+
+ while (union_list_lock(hash))
+ continue;
+
+ for (un = unhead[hash].lh_first; un != 0;
+ un = un->un_cache.le_next) {
+ if ((un->un_lowervp == lowervp ||
+ un->un_lowervp == NULLVP) &&
+ (un->un_uppervp == uppervp ||
+ un->un_uppervp == NULLVP) &&
+ (UNIONTOV(un)->v_mount == mp)) {
+ if (vget(UNIONTOV(un), 0,
+ cnp ? cnp->cn_proc : NULL)) {
+ union_list_unlock(hash);
+ goto loop;
+ }
+ break;
+ }
+ }
+
+ union_list_unlock(hash);
+
+ if (un)
+ break;
+ }
+
+ if (un) {
+ /*
+ * Obtain a lock on the union_node.
+ * uppervp is locked, though un->un_uppervp
+ * may not be. this doesn't break the locking
+ * hierarchy since in the case that un->un_uppervp
+ * is not yet locked it will be vrele'd and replaced
+ * with uppervp.
+ */
+
+ if ((dvp != NULLVP) && (uppervp == dvp)) {
+ /*
+ * Access ``.'', so (un) will already
+ * be locked. Since this process has
+ * the lock on (uppervp) no other
+ * process can hold the lock on (un).
+ */
+#ifdef DIAGNOSTIC
+ if ((un->un_flags & UN_LOCKED) == 0)
+ panic("union: . not locked");
+ else if (curproc && un->un_pid != curproc->p_pid &&
+ un->un_pid > -1 && curproc->p_pid > -1)
+ panic("union: allocvp not lock owner");
+#endif
+ } else {
+ if (un->un_flags & UN_LOCKED) {
+ vrele(UNIONTOV(un));
+ un->un_flags |= UN_WANT;
+ (void) tsleep((caddr_t) &un->un_flags, PINOD, "unalvp", 0);
+ goto loop;
+ }
+ un->un_flags |= UN_LOCKED;
+
+#ifdef DIAGNOSTIC
+ if (curproc)
+ un->un_pid = curproc->p_pid;
+ else
+ un->un_pid = -1;
+#endif
+ }
+
+ /*
+ * At this point, the union_node is locked,
+ * un->un_uppervp may not be locked, and uppervp
+ * is locked or nil.
+ */
+
+ /*
+ * Save information about the upper layer.
+ */
+ if (uppervp != un->un_uppervp) {
+ union_newupper(un, uppervp);
+ } else if (uppervp) {
+ vrele(uppervp);
+ }
+
+ if (un->un_uppervp) {
+ un->un_flags |= UN_ULOCK;
+ un->un_flags &= ~UN_KLOCK;
+ }
+
+ /*
+ * Save information about the lower layer.
+ * This needs to keep track of pathname
+ * and directory information which union_vn_create
+ * might need.
+ */
+ if (lowervp != un->un_lowervp) {
+ union_newlower(un, lowervp);
+ if (cnp && (lowervp != NULLVP)) {
+ un->un_hash = cnp->cn_hash;
+ un->un_path = malloc(cnp->cn_namelen+1,
+ M_TEMP, M_WAITOK);
+ bcopy(cnp->cn_nameptr, un->un_path,
+ cnp->cn_namelen);
+ un->un_path[cnp->cn_namelen] = '\0';
+ VREF(dvp);
+ un->un_dirvp = dvp;
+ }
+ } else if (lowervp) {
+ vrele(lowervp);
+ }
+ *vpp = UNIONTOV(un);
+ return (0);
+ }
+
+ if (docache) {
+ /*
+ * otherwise lock the vp list while we call getnewvnode
+ * since that can block.
+ */
+ hash = UNION_HASH(uppervp, lowervp);
+
+ if (union_list_lock(hash))
+ goto loop;
+ }
+
+ error = getnewvnode(VT_UNION, mp, union_vnodeop_p, vpp);
+ if (error) {
+ if (uppervp) {
+ if (dvp == uppervp)
+ vrele(uppervp);
+ else
+ vput(uppervp);
+ }
+ if (lowervp)
+ vrele(lowervp);
+
+ goto out;
+ }
+
+ MALLOC((*vpp)->v_data, void *, sizeof(struct union_node),
+ M_TEMP, M_WAITOK);
+
+ (*vpp)->v_flag |= vflag;
+ if (uppervp)
+ (*vpp)->v_type = uppervp->v_type;
+ else
+ (*vpp)->v_type = lowervp->v_type;
+ un = VTOUNION(*vpp);
+ un->un_vnode = *vpp;
+ un->un_uppervp = uppervp;
+ un->un_uppersz = VNOVAL;
+ un->un_lowervp = lowervp;
+ un->un_lowersz = VNOVAL;
+ un->un_pvp = undvp;
+ if (undvp != NULLVP)
+ VREF(undvp);
+ un->un_dircache = 0;
+ un->un_openl = 0;
+ un->un_flags = UN_LOCKED;
+ if (un->un_uppervp)
+ un->un_flags |= UN_ULOCK;
+#ifdef DIAGNOSTIC
+ if (curproc)
+ un->un_pid = curproc->p_pid;
+ else
+ un->un_pid = -1;
+#endif
+ if (cnp && (lowervp != NULLVP)) {
+ un->un_hash = cnp->cn_hash;
+ un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK);
+ bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen);
+ un->un_path[cnp->cn_namelen] = '\0';
+ VREF(dvp);
+ un->un_dirvp = dvp;
+ } else {
+ un->un_hash = 0;
+ un->un_path = 0;
+ un->un_dirvp = 0;
+ }
+
+ if (docache) {
+ LIST_INSERT_HEAD(&unhead[hash], un, un_cache);
+ un->un_flags |= UN_CACHED;
+ }
+
+ if (xlowervp)
+ vrele(xlowervp);
+
+out:
+ if (docache)
+ union_list_unlock(hash);
+
+ return (error);
+}
+
+int
+union_freevp(vp)
+ struct vnode *vp;
+{
+ struct union_node *un = VTOUNION(vp);
+
+ if (un->un_flags & UN_CACHED) {
+ un->un_flags &= ~UN_CACHED;
+ LIST_REMOVE(un, un_cache);
+ }
+
+ if (un->un_pvp != NULLVP)
+ vrele(un->un_pvp);
+ if (un->un_uppervp != NULLVP)
+ vrele(un->un_uppervp);
+ if (un->un_lowervp != NULLVP)
+ vrele(un->un_lowervp);
+ if (un->un_dirvp != NULLVP)
+ vrele(un->un_dirvp);
+ if (un->un_path)
+ free(un->un_path, M_TEMP);
+
+ FREE(vp->v_data, M_TEMP);
+ vp->v_data = 0;
+
+ return (0);
+}
+
+/*
+ * copyfile. copy the vnode (fvp) to the vnode (tvp)
+ * using a sequence of reads and writes. both (fvp)
+ * and (tvp) are locked on entry and exit.
+ */
+static int
+union_copyfile(fvp, tvp, cred, p)
+ struct vnode *fvp;
+ struct vnode *tvp;
+ struct ucred *cred;
+ struct proc *p;
+{
+ char *buf;
+ struct uio uio;
+ struct iovec iov;
+ int error = 0;
+
+ /*
+ * strategy:
+ * allocate a buffer of size MAXBSIZE.
+ * loop doing reads and writes, keeping track
+ * of the current uio offset.
+ * give up at the first sign of trouble.
+ */
+
+ uio.uio_procp = p;
+ uio.uio_segflg = UIO_SYSSPACE;
+ uio.uio_offset = 0;
+
+ VOP_UNLOCK(fvp, 0, p); /* XXX */
+ VOP_LEASE(fvp, p, cred, LEASE_READ);
+ vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */
+ VOP_UNLOCK(tvp, 0, p); /* XXX */
+ VOP_LEASE(tvp, p, cred, LEASE_WRITE);
+ vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */
+
+ buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
+
+ /* ugly loop follows... */
+ do {
+ off_t offset = uio.uio_offset;
+
+ uio.uio_iov = &iov;
+ uio.uio_iovcnt = 1;
+ iov.iov_base = buf;
+ iov.iov_len = MAXBSIZE;
+ uio.uio_resid = iov.iov_len;
+ uio.uio_rw = UIO_READ;
+ error = VOP_READ(fvp, &uio, 0, cred);
+
+ if (error == 0) {
+ uio.uio_iov = &iov;
+ uio.uio_iovcnt = 1;
+ iov.iov_base = buf;
+ iov.iov_len = MAXBSIZE - uio.uio_resid;
+ uio.uio_offset = offset;
+ uio.uio_rw = UIO_WRITE;
+ uio.uio_resid = iov.iov_len;
+
+ if (uio.uio_resid == 0)
+ break;
+
+ do {
+ error = VOP_WRITE(tvp, &uio, 0, cred);
+ } while ((uio.uio_resid > 0) && (error == 0));
+ }
+
+ } while (error == 0);
+
+ free(buf, M_TEMP);
+ return (error);
+}
+
+/*
+ * (un) is assumed to be locked on entry and remains
+ * locked on exit.
+ */
+int
+union_copyup(un, docopy, cred, p)
+ struct union_node *un;
+ int docopy;
+ struct ucred *cred;
+ struct proc *p;
+{
+ int error;
+ struct vnode *lvp, *uvp;
+
+ /*
+ * If the user does not have read permission, the vnode should not
+ * be copied to upper layer.
+ */
+ vn_lock(un->un_lowervp, LK_EXCLUSIVE | LK_RETRY, p);
+ error = VOP_ACCESS(un->un_lowervp, VREAD, cred, p);
+ VOP_UNLOCK(un->un_lowervp, 0, p);
+ if (error)
+ return (error);
+
+ error = union_vn_create(&uvp, un, p);
+ if (error)
+ return (error);
+
+ /* at this point, uppervp is locked */
+ union_newupper(un, uvp);
+ un->un_flags |= UN_ULOCK;
+
+ lvp = un->un_lowervp;
+
+ if (docopy) {
+ /*
+ * XX - should not ignore errors
+ * from VOP_CLOSE
+ */
+ vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, p);
+ error = VOP_OPEN(lvp, FREAD, cred, p);
+ if (error == 0) {
+ error = union_copyfile(lvp, uvp, cred, p);
+ VOP_UNLOCK(lvp, 0, p);
+ (void) VOP_CLOSE(lvp, FREAD, cred, p);
+ }
+#ifdef UNION_DIAGNOSTIC
+ if (error == 0)
+ uprintf("union: copied up %s\n", un->un_path);
+#endif
+
+ }
+ un->un_flags &= ~UN_ULOCK;
+ VOP_UNLOCK(uvp, 0, p);
+ union_vn_close(uvp, FWRITE, cred, p);
+ vn_lock(uvp, LK_EXCLUSIVE | LK_RETRY, p);
+ un->un_flags |= UN_ULOCK;
+
+ /*
+ * Subsequent IOs will go to the top layer, so
+ * call close on the lower vnode and open on the
+ * upper vnode to ensure that the filesystem keeps
+ * its references counts right. This doesn't do
+ * the right thing with (cred) and (FREAD) though.
+ * Ignoring error returns is not right, either.
+ */
+ if (error == 0) {
+ int i;
+
+ for (i = 0; i < un->un_openl; i++) {
+ (void) VOP_CLOSE(lvp, FREAD, cred, p);
+ (void) VOP_OPEN(uvp, FREAD, cred, p);
+ }
+ un->un_openl = 0;
+ }
+
+ return (error);
+
+}
+
+static int
+union_relookup(um, dvp, vpp, cnp, cn, path, pathlen)
+ struct union_mount *um;
+ struct vnode *dvp;
+ struct vnode **vpp;
+ struct componentname *cnp;
+ struct componentname *cn;
+ char *path;
+ int pathlen;
+{
+ int error;
+
+ /*
+ * A new componentname structure must be faked up because
+ * there is no way to know where the upper level cnp came
+ * from or what it is being used for. This must duplicate
+ * some of the work done by NDINIT, some of the work done
+ * by namei, some of the work done by lookup and some of
+ * the work done by VOP_LOOKUP when given a CREATE flag.
+ * Conclusion: Horrible.
+ *
+ * The pathname buffer will be FREEed by VOP_MKDIR.
+ */
+ cn->cn_namelen = pathlen;
+ cn->cn_pnbuf = zalloc(namei_zone);
+ bcopy(path, cn->cn_pnbuf, cn->cn_namelen);
+ cn->cn_pnbuf[cn->cn_namelen] = '\0';
+
+ cn->cn_nameiop = CREATE;
+ cn->cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN);
+ cn->cn_proc = cnp->cn_proc;
+ if (um->um_op == UNMNT_ABOVE)
+ cn->cn_cred = cnp->cn_cred;
+ else
+ cn->cn_cred = um->um_cred;
+ cn->cn_nameptr = cn->cn_pnbuf;
+ cn->cn_hash = cnp->cn_hash;
+ cn->cn_consume = cnp->cn_consume;
+
+ VREF(dvp);
+ error = relookup(dvp, vpp, cn);
+ if (!error)
+ vrele(dvp);
+ else {
+ zfree(namei_zone, cn->cn_pnbuf);
+ cn->cn_pnbuf = NULL;
+ }
+
+ return (error);
+}
+
+/*
+ * Create a shadow directory in the upper layer.
+ * The new vnode is returned locked.
+ *
+ * (um) points to the union mount structure for access to the
+ * the mounting process's credentials.
+ * (dvp) is the directory in which to create the shadow directory.
+ * it is unlocked on entry and exit.
+ * (cnp) is the componentname to be created.
+ * (vpp) is the returned newly created shadow directory, which
+ * is returned locked.
+ */
+int
+union_mkshadow(um, dvp, cnp, vpp)
+ struct union_mount *um;
+ struct vnode *dvp;
+ struct componentname *cnp;
+ struct vnode **vpp;
+{
+ int error;
+ struct vattr va;
+ struct proc *p = cnp->cn_proc;
+ struct componentname cn;
+
+ error = union_relookup(um, dvp, vpp, cnp, &cn,
+ cnp->cn_nameptr, cnp->cn_namelen);
+ if (error)
+ return (error);
+
+ if (*vpp) {
+ VOP_ABORTOP(dvp, &cn);
+ VOP_UNLOCK(dvp, 0, p);
+ vrele(*vpp);
+ *vpp = NULLVP;
+ return (EEXIST);
+ }
+
+ /*
+ * policy: when creating the shadow directory in the
+ * upper layer, create it owned by the user who did
+ * the mount, group from parent directory, and mode
+ * 777 modified by umask (ie mostly identical to the
+ * mkdir syscall). (jsp, kb)
+ */
+
+ VATTR_NULL(&va);
+ va.va_type = VDIR;
+ va.va_mode = um->um_cmode;
+
+ /* VOP_LEASE: dvp is locked */
+ VOP_LEASE(dvp, p, cn.cn_cred, LEASE_WRITE);
+
+ error = VOP_MKDIR(dvp, vpp, &cn, &va);
+ vput(dvp);
+ return (error);
+}
+
+/*
+ * Create a whiteout entry in the upper layer.
+ *
+ * (um) points to the union mount structure for access to the
+ * the mounting process's credentials.
+ * (dvp) is the directory in which to create the whiteout.
+ * it is locked on entry and exit.
+ * (cnp) is the componentname to be created.
+ */
+int
+union_mkwhiteout(um, dvp, cnp, path)
+ struct union_mount *um;
+ struct vnode *dvp;
+ struct componentname *cnp;
+ char *path;
+{
+ int error;
+ struct proc *p = cnp->cn_proc;
+ struct vnode *wvp;
+ struct componentname cn;
+
+ VOP_UNLOCK(dvp, 0, p);
+ error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path));
+ if (error) {
+ vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p);
+ return (error);
+ }
+
+ if (wvp) {
+ VOP_ABORTOP(dvp, &cn);
+ vrele(dvp);
+ vrele(wvp);
+ return (EEXIST);
+ }
+
+ /* VOP_LEASE: dvp is locked */
+ VOP_LEASE(dvp, p, p->p_ucred, LEASE_WRITE);
+
+ error = VOP_WHITEOUT(dvp, &cn, CREATE);
+ if (error)
+ VOP_ABORTOP(dvp, &cn);
+
+ vrele(dvp);
+
+ return (error);
+}
+
+/*
+ * union_vn_create: creates and opens a new shadow file
+ * on the upper union layer. this function is similar
+ * in spirit to calling vn_open but it avoids calling namei().
+ * the problem with calling namei is that a) it locks too many
+ * things, and b) it doesn't start at the "right" directory,
+ * whereas relookup is told where to start.
+ */
+static int
+union_vn_create(vpp, un, p)
+ struct vnode **vpp;
+ struct union_node *un;
+ struct proc *p;
+{
+ struct vnode *vp;
+ struct ucred *cred = p->p_ucred;
+ struct vattr vat;
+ struct vattr *vap = &vat;
+ int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL);
+ int error;
+ int cmode = UN_FILEMODE & ~p->p_fd->fd_cmask;
+ struct componentname cn;
+
+ *vpp = NULLVP;
+
+ /*
+ * Build a new componentname structure (for the same
+ * reasons outlines in union_mkshadow).
+ * The difference here is that the file is owned by
+ * the current user, rather than by the person who
+ * did the mount, since the current user needs to be
+ * able to write the file (that's why it is being
+ * copied in the first place).
+ */
+ cn.cn_namelen = strlen(un->un_path);
+ cn.cn_pnbuf = zalloc(namei_zone);
+ bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1);
+ cn.cn_nameiop = CREATE;
+ cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN);
+ cn.cn_proc = p;
+ cn.cn_cred = p->p_ucred;
+ cn.cn_nameptr = cn.cn_pnbuf;
+ cn.cn_hash = un->un_hash;
+ cn.cn_consume = 0;
+
+ VREF(un->un_dirvp);
+ error = relookup(un->un_dirvp, &vp, &cn);
+ if (error)
+ return (error);
+ vrele(un->un_dirvp);
+
+ if (vp) {
+ VOP_ABORTOP(un->un_dirvp, &cn);
+ if (un->un_dirvp == vp)
+ vrele(un->un_dirvp);
+ else
+ vput(un->un_dirvp);
+ vrele(vp);
+ return (EEXIST);
+ }
+
+ /*
+ * Good - there was no race to create the file
+ * so go ahead and create it. The permissions
+ * on the file will be 0666 modified by the
+ * current user's umask. Access to the file, while
+ * it is unioned, will require access to the top *and*
+ * bottom files. Access when not unioned will simply
+ * require access to the top-level file.
+ * TODO: confirm choice of access permissions.
+ */
+ VATTR_NULL(vap);
+ vap->va_type = VREG;
+ vap->va_mode = cmode;
+ VOP_LEASE(un->un_dirvp, p, cred, LEASE_WRITE);
+ error = VOP_CREATE(un->un_dirvp, &vp, &cn, vap);
+ vput(un->un_dirvp);
+ if (error)
+ return (error);
+
+ error = VOP_OPEN(vp, fmode, cred, p);
+ if (error) {
+ vput(vp);
+ return (error);
+ }
+
+ vp->v_writecount++;
+ *vpp = vp;
+ return (0);
+}
+
+static int
+union_vn_close(vp, fmode, cred, p)
+ struct vnode *vp;
+ int fmode;
+ struct ucred *cred;
+ struct proc *p;
+{
+
+ if (fmode & FWRITE)
+ --vp->v_writecount;
+ return (VOP_CLOSE(vp, fmode, cred, p));
+}
+
+void
+union_removed_upper(un)
+ struct union_node *un;
+{
+ struct proc *p = curproc; /* XXX */
+ struct vnode **vpp;
+
+ /*
+ * Do not set the uppervp to NULLVP. If lowervp is NULLVP,
+ * union node will have neither uppervp nor lowervp. We remove
+ * the union node from cache, so that it will not be referrenced.
+ */
+#if 0
+ union_newupper(un, NULLVP);
+#endif
+ if (un->un_dircache != 0) {
+ for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
+ vrele(*vpp);
+ free(un->un_dircache, M_TEMP);
+ un->un_dircache = 0;
+ }
+
+ if (un->un_flags & UN_CACHED) {
+ un->un_flags &= ~UN_CACHED;
+ LIST_REMOVE(un, un_cache);
+ }
+
+ if (un->un_flags & UN_ULOCK) {
+ un->un_flags &= ~UN_ULOCK;
+ VOP_UNLOCK(un->un_uppervp, 0, p);
+ }
+}
+
+#if 0
+struct vnode *
+union_lowervp(vp)
+ struct vnode *vp;
+{
+ struct union_node *un = VTOUNION(vp);
+
+ if ((un->un_lowervp != NULLVP) &&
+ (vp->v_type == un->un_lowervp->v_type)) {
+ if (vget(un->un_lowervp, 0) == 0)
+ return (un->un_lowervp);
+ }
+
+ return (NULLVP);
+}
+#endif
+
+/*
+ * determine whether a whiteout is needed
+ * during a remove/rmdir operation.
+ */
+int
+union_dowhiteout(un, cred, p)
+ struct union_node *un;
+ struct ucred *cred;
+ struct proc *p;
+{
+ struct vattr va;
+
+ if (un->un_lowervp != NULLVP)
+ return (1);
+
+ if (VOP_GETATTR(un->un_uppervp, &va, cred, p) == 0 &&
+ (va.va_flags & OPAQUE))
+ return (1);
+
+ return (0);
+}
+
+static void
+union_dircache_r(vp, vppp, cntp)
+ struct vnode *vp;
+ struct vnode ***vppp;
+ int *cntp;
+{
+ struct union_node *un;
+
+ if (vp->v_op != union_vnodeop_p) {
+ if (vppp) {
+ VREF(vp);
+ *(*vppp)++ = vp;
+ if (--(*cntp) == 0)
+ panic("union: dircache table too small");
+ } else {
+ (*cntp)++;
+ }
+
+ return;
+ }
+
+ un = VTOUNION(vp);
+ if (un->un_uppervp != NULLVP)
+ union_dircache_r(un->un_uppervp, vppp, cntp);
+ if (un->un_lowervp != NULLVP)
+ union_dircache_r(un->un_lowervp, vppp, cntp);
+}
+
+struct vnode *
+union_dircache(vp, p)
+ struct vnode *vp;
+ struct proc *p;
+{
+ int cnt;
+ struct vnode *nvp;
+ struct vnode **vpp;
+ struct vnode **dircache;
+ struct union_node *un;
+ int error;
+
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+ dircache = VTOUNION(vp)->un_dircache;
+
+ nvp = NULLVP;
+
+ if (dircache == 0) {
+ cnt = 0;
+ union_dircache_r(vp, 0, &cnt);
+ cnt++;
+ dircache = (struct vnode **)
+ malloc(cnt * sizeof(struct vnode *),
+ M_TEMP, M_WAITOK);
+ vpp = dircache;
+ union_dircache_r(vp, &vpp, &cnt);
+ *vpp = NULLVP;
+ vpp = dircache + 1;
+ } else {
+ vpp = dircache;
+ do {
+ if (*vpp++ == VTOUNION(vp)->un_uppervp)
+ break;
+ } while (*vpp != NULLVP);
+ }
+
+ if (*vpp == NULLVP)
+ goto out;
+
+ vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p);
+ VREF(*vpp);
+ error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, 0, *vpp, NULLVP, 0);
+ if (error)
+ goto out;
+
+ VTOUNION(vp)->un_dircache = 0;
+ un = VTOUNION(nvp);
+ un->un_dircache = dircache;
+
+out:
+ VOP_UNLOCK(vp, 0, p);
+ return (nvp);
+}
+
+/*
+ * Module glue to remove #ifdef UNION from vfs_syscalls.c
+ */
+static int
+union_dircheck(struct proc *p, struct vnode **vp, struct file *fp)
+{
+ int error = 0;
+
+ if ((*vp)->v_op == union_vnodeop_p) {
+ struct vnode *lvp;
+
+ lvp = union_dircache(*vp, p);
+ if (lvp != NULLVP) {
+ struct vattr va;
+
+ /*
+ * If the directory is opaque,
+ * then don't show lower entries
+ */
+ error = VOP_GETATTR(*vp, &va, fp->f_cred, p);
+ if (va.va_flags & OPAQUE) {
+ vput(lvp);
+ lvp = NULL;
+ }
+ }
+
+ if (lvp != NULLVP) {
+ error = VOP_OPEN(lvp, FREAD, fp->f_cred, p);
+ if (error) {
+ vput(lvp);
+ return (error);
+ }
+ VOP_UNLOCK(lvp, 0, p);
+ fp->f_data = (caddr_t) lvp;
+ fp->f_offset = 0;
+ error = vn_close(*vp, FREAD, fp->f_cred, p);
+ if (error)
+ return (error);
+ *vp = lvp;
+ return -1; /* goto unionread */
+ }
+ }
+ if (((*vp)->v_flag & VROOT) && ((*vp)->v_mount->mnt_flag & MNT_UNION)) {
+ struct vnode *tvp = *vp;
+ *vp = (*vp)->v_mount->mnt_vnodecovered;
+ VREF(*vp);
+ fp->f_data = (caddr_t) *vp;
+ fp->f_offset = 0;
+ vrele(tvp);
+ return -1; /* goto unionread */
+ }
+ return error;
+}
+
+static int
+union_modevent(module_t mod, int type, void *data)
+{
+ switch (type) {
+ case MOD_LOAD:
+ union_dircheckp = union_dircheck;
+ break;
+ case MOD_UNLOAD:
+ union_dircheckp = NULL;
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+static moduledata_t union_mod = {
+ "union_dircheck",
+ union_modevent,
+ NULL
+};
+DECLARE_MODULE(union_dircheck, union_mod, SI_SUB_VFS, SI_ORDER_ANY);
diff --git a/sys/fs/unionfs/union_vfsops.c b/sys/fs/unionfs/union_vfsops.c
new file mode 100644
index 0000000..db4d4d3
--- /dev/null
+++ b/sys/fs/unionfs/union_vfsops.c
@@ -0,0 +1,555 @@
+/*
+ * Copyright (c) 1994, 1995 The Regents of the University of California.
+ * Copyright (c) 1994, 1995 Jan-Simon Pendry.
+ * All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)union_vfsops.c 8.20 (Berkeley) 5/20/95
+ * $Id: union_vfsops.c,v 1.30 1998/09/07 13:17:02 bde Exp $
+ */
+
+/*
+ * Union Layer
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/filedesc.h>
+#include <miscfs/union/union.h>
+
+static MALLOC_DEFINE(M_UNIONFSMNT, "UNION mount", "UNION mount structure");
+
+extern int union_init __P((struct vfsconf *));
+
+extern int union_fhtovp __P((struct mount *mp, struct fid *fidp,
+ struct mbuf *nam, struct vnode **vpp,
+ int *exflagsp, struct ucred **credanonp));
+static int union_mount __P((struct mount *mp, char *path, caddr_t data,
+ struct nameidata *ndp, struct proc *p));
+extern int union_quotactl __P((struct mount *mp, int cmd, uid_t uid,
+ caddr_t arg, struct proc *p));
+static int union_root __P((struct mount *mp, struct vnode **vpp));
+static int union_start __P((struct mount *mp, int flags, struct proc *p));
+static int union_statfs __P((struct mount *mp, struct statfs *sbp,
+ struct proc *p));
+extern int union_sync __P((struct mount *mp, int waitfor,
+ struct ucred *cred, struct proc *p));
+static int union_unmount __P((struct mount *mp, int mntflags,
+ struct proc *p));
+extern int union_vget __P((struct mount *mp, ino_t ino,
+ struct vnode **vpp));
+extern int union_vptofh __P((struct vnode *vp, struct fid *fhp));
+
+/*
+ * Mount union filesystem
+ */
+static int
+union_mount(mp, path, data, ndp, p)
+ struct mount *mp;
+ char *path;
+ caddr_t data;
+ struct nameidata *ndp;
+ struct proc *p;
+{
+ int error = 0;
+ struct union_args args;
+ struct vnode *lowerrootvp = NULLVP;
+ struct vnode *upperrootvp = NULLVP;
+ struct union_mount *um = 0;
+ struct ucred *cred = 0;
+ char *cp = 0;
+ int len;
+ u_int size;
+
+#ifdef UNION_DIAGNOSTIC
+ printf("union_mount(mp = %x)\n", mp);
+#endif
+
+ /*
+ * Disable clustered write, otherwise system becomes unstable.
+ */
+ mp->mnt_flag |= MNT_NOCLUSTERW;
+
+ /*
+ * Update is a no-op
+ */
+ if (mp->mnt_flag & MNT_UPDATE) {
+ /*
+ * Need to provide.
+ * 1. a way to convert between rdonly and rdwr mounts.
+ * 2. support for nfs exports.
+ */
+ error = EOPNOTSUPP;
+ goto bad;
+ }
+
+ /*
+ * Get argument
+ */
+ error = copyin(data, (caddr_t)&args, sizeof(struct union_args));
+ if (error)
+ goto bad;
+
+ lowerrootvp = mp->mnt_vnodecovered;
+ VREF(lowerrootvp);
+
+ /*
+ * Unlock lower node to avoid deadlock.
+ */
+ if (lowerrootvp->v_op == union_vnodeop_p)
+ VOP_UNLOCK(lowerrootvp, 0, p);
+
+ /*
+ * Find upper node.
+ */
+ NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT,
+ UIO_USERSPACE, args.target, p);
+
+ error = namei(ndp);
+ if (lowerrootvp->v_op == union_vnodeop_p)
+ vn_lock(lowerrootvp, LK_EXCLUSIVE | LK_RETRY, p);
+ if (error)
+ goto bad;
+
+ upperrootvp = ndp->ni_vp;
+ vrele(ndp->ni_dvp);
+ ndp->ni_dvp = NULL;
+
+ /*
+ * Check multi union mount to avoid `lock myself again' panic.
+ */
+ if (upperrootvp == VTOUNION(lowerrootvp)->un_uppervp) {
+#ifdef DIAGNOSTIC
+ printf("union_mount: multi union mount?\n");
+#endif
+ error = EDEADLK;
+ goto bad;
+ }
+
+ if (upperrootvp->v_type != VDIR) {
+ error = EINVAL;
+ goto bad;
+ }
+
+ um = (struct union_mount *) malloc(sizeof(struct union_mount),
+ M_UNIONFSMNT, M_WAITOK); /* XXX */
+
+ /*
+ * Keep a held reference to the target vnodes.
+ * They are vrele'd in union_unmount.
+ *
+ * Depending on the _BELOW flag, the filesystems are
+ * viewed in a different order. In effect, this is the
+ * same as providing a mount under option to the mount syscall.
+ */
+
+ um->um_op = args.mntflags & UNMNT_OPMASK;
+ switch (um->um_op) {
+ case UNMNT_ABOVE:
+ um->um_lowervp = lowerrootvp;
+ um->um_uppervp = upperrootvp;
+ break;
+
+ case UNMNT_BELOW:
+ um->um_lowervp = upperrootvp;
+ um->um_uppervp = lowerrootvp;
+ break;
+
+ case UNMNT_REPLACE:
+ vrele(lowerrootvp);
+ lowerrootvp = NULLVP;
+ um->um_uppervp = upperrootvp;
+ um->um_lowervp = lowerrootvp;
+ break;
+
+ default:
+ error = EINVAL;
+ goto bad;
+ }
+
+ /*
+ * Unless the mount is readonly, ensure that the top layer
+ * supports whiteout operations
+ */
+ if ((mp->mnt_flag & MNT_RDONLY) == 0) {
+ error = VOP_WHITEOUT(um->um_uppervp, (struct componentname *) 0, LOOKUP);
+ if (error)
+ goto bad;
+ }
+
+ um->um_cred = p->p_ucred;
+ crhold(um->um_cred);
+ um->um_cmode = UN_DIRMODE &~ p->p_fd->fd_cmask;
+
+ /*
+ * Depending on what you think the MNT_LOCAL flag might mean,
+ * you may want the && to be || on the conditional below.
+ * At the moment it has been defined that the filesystem is
+ * only local if it is all local, ie the MNT_LOCAL flag implies
+ * that the entire namespace is local. If you think the MNT_LOCAL
+ * flag implies that some of the files might be stored locally
+ * then you will want to change the conditional.
+ */
+ if (um->um_op == UNMNT_ABOVE) {
+ if (((um->um_lowervp == NULLVP) ||
+ (um->um_lowervp->v_mount->mnt_flag & MNT_LOCAL)) &&
+ (um->um_uppervp->v_mount->mnt_flag & MNT_LOCAL))
+ mp->mnt_flag |= MNT_LOCAL;
+ }
+
+ /*
+ * Copy in the upper layer's RDONLY flag. This is for the benefit
+ * of lookup() which explicitly checks the flag, rather than asking
+ * the filesystem for its own opinion. This means, that an update
+ * mount of the underlying filesystem to go from rdonly to rdwr
+ * will leave the unioned view as read-only.
+ */
+ mp->mnt_flag |= (um->um_uppervp->v_mount->mnt_flag & MNT_RDONLY);
+
+ mp->mnt_data = (qaddr_t) um;
+ vfs_getnewfsid(mp);
+
+ (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+ bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+
+ switch (um->um_op) {
+ case UNMNT_ABOVE:
+ cp = "<above>:";
+ break;
+ case UNMNT_BELOW:
+ cp = "<below>:";
+ break;
+ case UNMNT_REPLACE:
+ cp = "";
+ break;
+ }
+ len = strlen(cp);
+ bcopy(cp, mp->mnt_stat.f_mntfromname, len);
+
+ cp = mp->mnt_stat.f_mntfromname + len;
+ len = MNAMELEN - len;
+
+ (void) copyinstr(args.target, cp, len - 1, &size);
+ bzero(cp + size, len - size);
+
+ (void)union_statfs(mp, &mp->mnt_stat, p);
+
+#ifdef UNION_DIAGNOSTIC
+ printf("union_mount: from %s, on %s\n",
+ mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
+#endif
+ return (0);
+
+bad:
+ if (um)
+ free(um, M_UNIONFSMNT);
+ if (cred)
+ crfree(cred);
+ if (upperrootvp)
+ vrele(upperrootvp);
+ if (lowerrootvp)
+ vrele(lowerrootvp);
+ return (error);
+}
+
+/*
+ * VFS start. Nothing needed here - the start routine
+ * on the underlying filesystem(s) will have been called
+ * when that filesystem was mounted.
+ */
+static int
+union_start(mp, flags, p)
+ struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+
+ return (0);
+}
+
+/*
+ * Free reference to union layer
+ */
+static int
+union_unmount(mp, mntflags, p)
+ struct mount *mp;
+ int mntflags;
+ struct proc *p;
+{
+ struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
+ struct vnode *um_rootvp;
+ int error;
+ int freeing;
+ int flags = 0;
+
+#ifdef UNION_DIAGNOSTIC
+ printf("union_unmount(mp = %x)\n", mp);
+#endif
+
+ if (mntflags & MNT_FORCE)
+ flags |= FORCECLOSE;
+
+ if ((error = union_root(mp, &um_rootvp)) != 0)
+ return (error);
+
+ /*
+ * Keep flushing vnodes from the mount list.
+ * This is needed because of the un_pvp held
+ * reference to the parent vnode.
+ * If more vnodes have been freed on a given pass,
+ * the try again. The loop will iterate at most
+ * (d) times, where (d) is the maximum tree depth
+ * in the filesystem.
+ */
+ for (freeing = 0; vflush(mp, um_rootvp, flags) != 0;) {
+ struct vnode *vp;
+ int n;
+
+ /* count #vnodes held on mount list */
+ for (n = 0, vp = mp->mnt_vnodelist.lh_first;
+ vp != NULLVP;
+ vp = vp->v_mntvnodes.le_next)
+ n++;
+
+ /* if this is unchanged then stop */
+ if (n == freeing)
+ break;
+
+ /* otherwise try once more time */
+ freeing = n;
+ }
+
+ /* At this point the root vnode should have a single reference */
+ if (um_rootvp->v_usecount > 1) {
+ vput(um_rootvp);
+ return (EBUSY);
+ }
+
+#ifdef UNION_DIAGNOSTIC
+ vprint("union root", um_rootvp);
+#endif
+ /*
+ * Discard references to upper and lower target vnodes.
+ */
+ if (um->um_lowervp)
+ vrele(um->um_lowervp);
+ vrele(um->um_uppervp);
+ crfree(um->um_cred);
+ /*
+ * Release reference on underlying root vnode
+ */
+ vput(um_rootvp);
+ /*
+ * And blow it away for future re-use
+ */
+ vgone(um_rootvp);
+ /*
+ * Finally, throw away the union_mount structure
+ */
+ free(mp->mnt_data, M_UNIONFSMNT); /* XXX */
+ mp->mnt_data = 0;
+ return (0);
+}
+
+static int
+union_root(mp, vpp)
+ struct mount *mp;
+ struct vnode **vpp;
+{
+ struct proc *p = curproc; /* XXX */
+ struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
+ int error;
+ int loselock;
+ int lockadj = 0;
+
+ if (um->um_lowervp && um->um_op != UNMNT_BELOW &&
+ VOP_ISLOCKED(um->um_lowervp)) {
+ VREF(um->um_lowervp);
+ VOP_UNLOCK(um->um_lowervp, 0, p);
+ lockadj = 1;
+ }
+
+ /*
+ * Return locked reference to root.
+ */
+ VREF(um->um_uppervp);
+ if ((um->um_op == UNMNT_BELOW) &&
+ VOP_ISLOCKED(um->um_uppervp)) {
+ loselock = 1;
+ } else {
+ vn_lock(um->um_uppervp, LK_EXCLUSIVE | LK_RETRY, p);
+ loselock = 0;
+ }
+ if (um->um_lowervp)
+ VREF(um->um_lowervp);
+ error = union_allocvp(vpp, mp,
+ (struct vnode *) 0,
+ (struct vnode *) 0,
+ (struct componentname *) 0,
+ um->um_uppervp,
+ um->um_lowervp,
+ 1);
+
+ if (error) {
+ if (loselock)
+ vrele(um->um_uppervp);
+ else
+ vput(um->um_uppervp);
+ if (um->um_lowervp)
+ vrele(um->um_lowervp);
+ } else {
+ if (loselock)
+ VTOUNION(*vpp)->un_flags &= ~UN_ULOCK;
+ }
+ if (lockadj) {
+ vn_lock(um->um_lowervp, LK_EXCLUSIVE | LK_RETRY, p);
+ vrele(um->um_lowervp);
+ }
+
+ return (error);
+}
+
+static int
+union_statfs(mp, sbp, p)
+ struct mount *mp;
+ struct statfs *sbp;
+ struct proc *p;
+{
+ int error;
+ struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
+ struct statfs mstat;
+ int lbsize;
+
+#ifdef UNION_DIAGNOSTIC
+ printf("union_statfs(mp = %x, lvp = %x, uvp = %x)\n", mp,
+ um->um_lowervp,
+ um->um_uppervp);
+#endif
+
+ bzero(&mstat, sizeof(mstat));
+
+ if (um->um_lowervp) {
+ error = VFS_STATFS(um->um_lowervp->v_mount, &mstat, p);
+ if (error)
+ return (error);
+ }
+
+ /* now copy across the "interesting" information and fake the rest */
+#if 0
+ sbp->f_type = mstat.f_type;
+ sbp->f_flags = mstat.f_flags;
+ sbp->f_bsize = mstat.f_bsize;
+ sbp->f_iosize = mstat.f_iosize;
+#endif
+ lbsize = mstat.f_bsize;
+ sbp->f_blocks = mstat.f_blocks;
+ sbp->f_bfree = mstat.f_bfree;
+ sbp->f_bavail = mstat.f_bavail;
+ sbp->f_files = mstat.f_files;
+ sbp->f_ffree = mstat.f_ffree;
+
+ error = VFS_STATFS(um->um_uppervp->v_mount, &mstat, p);
+ if (error)
+ return (error);
+
+ sbp->f_flags = mstat.f_flags;
+ sbp->f_bsize = mstat.f_bsize;
+ sbp->f_iosize = mstat.f_iosize;
+
+ /*
+ * if the lower and upper blocksizes differ, then frig the
+ * block counts so that the sizes reported by df make some
+ * kind of sense. none of this makes sense though.
+ */
+
+ if (mstat.f_bsize != lbsize)
+ sbp->f_blocks = ((off_t) sbp->f_blocks * lbsize) / mstat.f_bsize;
+
+ /*
+ * The "total" fields count total resources in all layers,
+ * the "free" fields count only those resources which are
+ * free in the upper layer (since only the upper layer
+ * is writeable).
+ */
+ sbp->f_blocks += mstat.f_blocks;
+ sbp->f_bfree = mstat.f_bfree;
+ sbp->f_bavail = mstat.f_bavail;
+ sbp->f_files += mstat.f_files;
+ sbp->f_ffree = mstat.f_ffree;
+
+ if (sbp != &mp->mnt_stat) {
+ sbp->f_type = mp->mnt_vfc->vfc_typenum;
+ bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+ bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+ bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+ }
+ return (0);
+}
+
+/*
+ * XXX - Assumes no data cached at union layer.
+ */
+#define union_sync ((int (*) __P((struct mount *, int, struct ucred *, \
+ struct proc *)))nullop)
+
+#define union_fhtovp ((int (*) __P((struct mount *, struct fid *, \
+ struct sockaddr *, struct vnode **, int *, struct ucred **)))eopnotsupp)
+#define union_quotactl ((int (*) __P((struct mount *, int, uid_t, caddr_t, \
+ struct proc *)))eopnotsupp)
+#define union_sysctl ((int (*) __P((int *, u_int, void *, size_t *, void *, \
+ size_t, struct proc *)))eopnotsupp)
+#define union_vget ((int (*) __P((struct mount *, ino_t, struct vnode **))) \
+ eopnotsupp)
+#define union_vptofh ((int (*) __P((struct vnode *, struct fid *)))eopnotsupp)
+
+static struct vfsops union_vfsops = {
+ union_mount,
+ union_start,
+ union_unmount,
+ union_root,
+ union_quotactl,
+ union_statfs,
+ union_sync,
+ union_vget,
+ union_fhtovp,
+ union_vptofh,
+ union_init,
+};
+
+VFS_SET(union_vfsops, union, VFCF_LOOPBACK);
diff --git a/sys/fs/unionfs/union_vnops.c b/sys/fs/unionfs/union_vnops.c
new file mode 100644
index 0000000..ba9b2a3
--- /dev/null
+++ b/sys/fs/unionfs/union_vnops.c
@@ -0,0 +1,1804 @@
+/*
+ * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry.
+ * Copyright (c) 1992, 1993, 1994, 1995
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)union_vnops.c 8.32 (Berkeley) 6/23/95
+ * $Id: union_vnops.c,v 1.59 1998/12/14 05:00:59 dillon Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/fcntl.h>
+#include <sys/stat.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/buf.h>
+#include <sys/lock.h>
+#include <miscfs/union/union.h>
+
+#define FIXUP(un, p) { \
+ if (((un)->un_flags & UN_ULOCK) == 0) { \
+ union_fixup(un, p); \
+ } \
+}
+
+static int union_abortop __P((struct vop_abortop_args *ap));
+static int union_access __P((struct vop_access_args *ap));
+static int union_advlock __P((struct vop_advlock_args *ap));
+static int union_bmap __P((struct vop_bmap_args *ap));
+static int union_close __P((struct vop_close_args *ap));
+static int union_create __P((struct vop_create_args *ap));
+static void union_fixup __P((struct union_node *un, struct proc *p));
+static int union_fsync __P((struct vop_fsync_args *ap));
+static int union_getattr __P((struct vop_getattr_args *ap));
+static int union_inactive __P((struct vop_inactive_args *ap));
+static int union_ioctl __P((struct vop_ioctl_args *ap));
+static int union_islocked __P((struct vop_islocked_args *ap));
+static int union_lease __P((struct vop_lease_args *ap));
+static int union_link __P((struct vop_link_args *ap));
+static int union_lock __P((struct vop_lock_args *ap));
+static int union_lookup __P((struct vop_lookup_args *ap));
+static int union_lookup1 __P((struct vnode *udvp, struct vnode **dvpp,
+ struct vnode **vpp,
+ struct componentname *cnp));
+static int union_mkdir __P((struct vop_mkdir_args *ap));
+static int union_mknod __P((struct vop_mknod_args *ap));
+static int union_mmap __P((struct vop_mmap_args *ap));
+static int union_open __P((struct vop_open_args *ap));
+static int union_pathconf __P((struct vop_pathconf_args *ap));
+static int union_print __P((struct vop_print_args *ap));
+static int union_read __P((struct vop_read_args *ap));
+static int union_readdir __P((struct vop_readdir_args *ap));
+static int union_readlink __P((struct vop_readlink_args *ap));
+static int union_reclaim __P((struct vop_reclaim_args *ap));
+static int union_remove __P((struct vop_remove_args *ap));
+static int union_rename __P((struct vop_rename_args *ap));
+static int union_revoke __P((struct vop_revoke_args *ap));
+static int union_rmdir __P((struct vop_rmdir_args *ap));
+static int union_poll __P((struct vop_poll_args *ap));
+static int union_setattr __P((struct vop_setattr_args *ap));
+static int union_strategy __P((struct vop_strategy_args *ap));
+static int union_symlink __P((struct vop_symlink_args *ap));
+static int union_unlock __P((struct vop_unlock_args *ap));
+static int union_whiteout __P((struct vop_whiteout_args *ap));
+static int union_write __P((struct vop_read_args *ap));
+
+static void
+union_fixup(un, p)
+ struct union_node *un;
+ struct proc *p;
+{
+
+ vn_lock(un->un_uppervp, LK_EXCLUSIVE | LK_RETRY, p);
+ un->un_flags |= UN_ULOCK;
+}
+
+static int
+union_lookup1(udvp, dvpp, vpp, cnp)
+ struct vnode *udvp;
+ struct vnode **dvpp;
+ struct vnode **vpp;
+ struct componentname *cnp;
+{
+ int error;
+ struct proc *p = cnp->cn_proc;
+ struct vnode *tdvp;
+ struct vnode *dvp;
+ struct mount *mp;
+
+ dvp = *dvpp;
+
+ /*
+ * If stepping up the directory tree, check for going
+ * back across the mount point, in which case do what
+ * lookup would do by stepping back down the mount
+ * hierarchy.
+ */
+ if (cnp->cn_flags & ISDOTDOT) {
+ while ((dvp != udvp) && (dvp->v_flag & VROOT)) {
+ /*
+ * Don't do the NOCROSSMOUNT check
+ * at this level. By definition,
+ * union fs deals with namespaces, not
+ * filesystems.
+ */
+ tdvp = dvp;
+ *dvpp = dvp = dvp->v_mount->mnt_vnodecovered;
+ vput(tdvp);
+ VREF(dvp);
+ vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p);
+ }
+ }
+
+ error = VOP_LOOKUP(dvp, &tdvp, cnp);
+ if (error)
+ return (error);
+
+ /*
+ * The parent directory will have been unlocked, unless lookup
+ * found the last component. In which case, re-lock the node
+ * here to allow it to be unlocked again (phew) in union_lookup.
+ */
+ if (dvp != tdvp && !(cnp->cn_flags & ISLASTCN))
+ vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p);
+
+ dvp = tdvp;
+
+ /*
+ * Lastly check if the current node is a mount point in
+ * which case walk up the mount hierarchy making sure not to
+ * bump into the root of the mount tree (ie. dvp != udvp).
+ */
+ while (dvp != udvp && (dvp->v_type == VDIR) &&
+ (mp = dvp->v_mountedhere)) {
+
+ if (vfs_busy(mp, 0, 0, p))
+ continue;
+
+ error = VFS_ROOT(mp, &tdvp);
+ vfs_unbusy(mp, p);
+ if (error) {
+ vput(dvp);
+ return (error);
+ }
+
+ vput(dvp);
+ dvp = tdvp;
+ }
+
+ *vpp = dvp;
+ return (0);
+}
+
+static int
+union_lookup(ap)
+ struct vop_lookup_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ int error;
+ int uerror, lerror;
+ struct vnode *uppervp, *lowervp;
+ struct vnode *upperdvp, *lowerdvp;
+ struct vnode *dvp = ap->a_dvp;
+ struct union_node *dun = VTOUNION(dvp);
+ struct componentname *cnp = ap->a_cnp;
+ struct proc *p = cnp->cn_proc;
+ int lockparent = cnp->cn_flags & LOCKPARENT;
+ struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
+ struct ucred *saved_cred = NULL;
+ int iswhiteout;
+ struct vattr va;
+
+
+ /*
+ * Disallow write attemps to the filesystem mounted read-only.
+ */
+ if ((cnp->cn_flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
+ (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME))
+ return (EROFS);
+
+#ifdef notyet
+ if (cnp->cn_namelen == 3 &&
+ cnp->cn_nameptr[2] == '.' &&
+ cnp->cn_nameptr[1] == '.' &&
+ cnp->cn_nameptr[0] == '.') {
+ dvp = *ap->a_vpp = LOWERVP(ap->a_dvp);
+ if (dvp == NULLVP)
+ return (ENOENT);
+ VREF(dvp);
+ vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p);
+ if (!lockparent || !(cnp->cn_flags & ISLASTCN))
+ VOP_UNLOCK(ap->a_dvp, 0, p);
+ return (0);
+ }
+#endif
+
+ cnp->cn_flags |= LOCKPARENT;
+
+ upperdvp = dun->un_uppervp;
+ lowerdvp = dun->un_lowervp;
+ uppervp = NULLVP;
+ lowervp = NULLVP;
+ iswhiteout = 0;
+
+ if (cnp->cn_flags & ISDOTDOT) {
+ if (upperdvp != NULL)
+ VREF(upperdvp);
+ if (lowerdvp != NULL)
+ VREF(lowerdvp);
+ }
+
+ /*
+ * do the lookup in the upper level.
+ * if that level comsumes additional pathnames,
+ * then assume that something special is going
+ * on and just return that vnode.
+ */
+ if (upperdvp != NULLVP) {
+ FIXUP(dun, p);
+ /*
+ * If we're doing `..' in the underlying filesystem,
+ * we must drop our lock on the union node before
+ * going up the tree in the lower file system--if we block
+ * on the lowervp lock, and that's held by someone else
+ * coming down the tree and who's waiting for our lock,
+ * we would be hosed.
+ */
+ if (cnp->cn_flags & ISDOTDOT) {
+ /* retain lock on underlying VP: */
+ dun->un_flags |= UN_KLOCK;
+ VOP_UNLOCK(dvp, 0, p);
+ }
+ uerror = union_lookup1(um->um_uppervp, &upperdvp,
+ &uppervp, cnp);
+ /*
+ * Disallow write attemps to the filesystem mounted read-only.
+ */
+ if (uerror == EJUSTRETURN && (cnp->cn_flags & ISLASTCN) &&
+ (dvp->v_mount->mnt_flag & MNT_RDONLY) &&
+ (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME)) {
+ if (!lockparent)
+ cnp->cn_flags &= ~LOCKPARENT;
+ return (EROFS);
+ }
+
+ if (cnp->cn_flags & ISDOTDOT) {
+ if (dun->un_uppervp == upperdvp) {
+ /*
+ * We got the underlying bugger back locked...
+ * now take back the union node lock. Since we
+ * hold the uppervp lock, we can diddle union
+ * locking flags at will. :)
+ */
+ dun->un_flags |= UN_ULOCK;
+ }
+ /*
+ * If upperdvp got swapped out, it means we did
+ * some mount point magic, and we do not have
+ * dun->un_uppervp locked currently--so we get it
+ * locked here (don't set the UN_ULOCK flag).
+ */
+ vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p);
+ }
+
+ /*if (uppervp == upperdvp)
+ dun->un_flags |= UN_KLOCK;*/
+
+ if (cnp->cn_consume != 0) {
+ *ap->a_vpp = uppervp;
+ if (!lockparent)
+ cnp->cn_flags &= ~LOCKPARENT;
+ error = uerror;
+ goto out;
+ }
+ if (uerror == ENOENT || uerror == EJUSTRETURN) {
+ if (cnp->cn_flags & ISWHITEOUT) {
+ iswhiteout = 1;
+ } else if (lowerdvp != NULLVP) {
+ lerror = VOP_GETATTR(upperdvp, &va,
+ cnp->cn_cred, cnp->cn_proc);
+ if (lerror == 0 && (va.va_flags & OPAQUE))
+ iswhiteout = 1;
+ }
+ }
+ } else {
+ uerror = ENOENT;
+ }
+
+ /*
+ * in a similar way to the upper layer, do the lookup
+ * in the lower layer. this time, if there is some
+ * component magic going on, then vput whatever we got
+ * back from the upper layer and return the lower vnode
+ * instead.
+ */
+ if (lowerdvp != NULLVP && !iswhiteout) {
+ int nameiop;
+
+ vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY, p);
+
+ /*
+ * Only do a LOOKUP on the bottom node, since
+ * we won't be making changes to it anyway.
+ */
+ nameiop = cnp->cn_nameiop;
+ cnp->cn_nameiop = LOOKUP;
+ if (um->um_op == UNMNT_BELOW) {
+ saved_cred = cnp->cn_cred;
+ cnp->cn_cred = um->um_cred;
+ }
+ /*
+ * We shouldn't have to worry about locking interactions
+ * between the lower layer and our union layer (w.r.t.
+ * `..' processing) because we don't futz with lowervp
+ * locks in the union-node instantiation code path.
+ */
+ lerror = union_lookup1(um->um_lowervp, &lowerdvp,
+ &lowervp, cnp);
+ if (um->um_op == UNMNT_BELOW)
+ cnp->cn_cred = saved_cred;
+ cnp->cn_nameiop = nameiop;
+
+ if (lowervp != lowerdvp)
+ VOP_UNLOCK(lowerdvp, 0, p);
+
+ if (cnp->cn_consume != 0 || lerror == EACCES) {
+ if (lerror == EACCES)
+ lowervp = NULLVP;
+ if (uppervp != NULLVP) {
+ if (uppervp == upperdvp)
+ vrele(uppervp);
+ else
+ vput(uppervp);
+ uppervp = NULLVP;
+ }
+ *ap->a_vpp = lowervp;
+ if (!lockparent)
+ cnp->cn_flags &= ~LOCKPARENT;
+ error = lerror;
+ goto out;
+ }
+ } else {
+ lerror = ENOENT;
+ if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) {
+ lowervp = LOWERVP(dun->un_pvp);
+ if (lowervp != NULLVP) {
+ VREF(lowervp);
+ vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY, p);
+ lerror = 0;
+ }
+ }
+ }
+
+ if (!lockparent)
+ cnp->cn_flags &= ~LOCKPARENT;
+
+ /*
+ * at this point, we have uerror and lerror indicating
+ * possible errors with the lookups in the upper and lower
+ * layers. additionally, uppervp and lowervp are (locked)
+ * references to existing vnodes in the upper and lower layers.
+ *
+ * there are now three cases to consider.
+ * 1. if both layers returned an error, then return whatever
+ * error the upper layer generated.
+ *
+ * 2. if the top layer failed and the bottom layer succeeded
+ * then two subcases occur.
+ * a. the bottom vnode is not a directory, in which
+ * case just return a new union vnode referencing
+ * an empty top layer and the existing bottom layer.
+ * b. the bottom vnode is a directory, in which case
+ * create a new directory in the top-level and
+ * continue as in case 3.
+ *
+ * 3. if the top layer succeeded then return a new union
+ * vnode referencing whatever the new top layer and
+ * whatever the bottom layer returned.
+ */
+
+ *ap->a_vpp = NULLVP;
+
+ /* case 1. */
+ if ((uerror != 0) && (lerror != 0)) {
+ error = uerror;
+ goto out;
+ }
+
+ /* case 2. */
+ if (uerror != 0 /* && (lerror == 0) */ ) {
+ if (lowervp->v_type == VDIR) { /* case 2b. */
+ dun->un_flags &= ~UN_ULOCK;
+ VOP_UNLOCK(upperdvp, 0, p);
+ uerror = union_mkshadow(um, upperdvp, cnp, &uppervp);
+ vn_lock(upperdvp, LK_EXCLUSIVE | LK_RETRY, p);
+ dun->un_flags |= UN_ULOCK;
+
+ if (uerror) {
+ if (lowervp != NULLVP) {
+ vput(lowervp);
+ lowervp = NULLVP;
+ }
+ error = uerror;
+ goto out;
+ }
+ }
+ }
+
+ if (lowervp != NULLVP)
+ VOP_UNLOCK(lowervp, 0, p);
+
+ error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
+ uppervp, lowervp, 1);
+
+ if (error) {
+ if (uppervp != NULLVP)
+ vput(uppervp);
+ if (lowervp != NULLVP)
+ vrele(lowervp);
+ } else {
+ if (*ap->a_vpp != dvp)
+ if (!lockparent || !(cnp->cn_flags & ISLASTCN))
+ VOP_UNLOCK(dvp, 0, p);
+#ifdef DIAGNOSTIC
+ if (cnp->cn_namelen == 1 &&
+ cnp->cn_nameptr[0] == '.' &&
+ *ap->a_vpp != dvp) {
+ panic("union_lookup returning . (%p) not same as startdir (%p)",
+ ap->a_vpp, dvp);
+ }
+#endif
+ }
+
+out:
+ if (cnp->cn_flags & ISDOTDOT) {
+ if (upperdvp != NULL)
+ vrele(upperdvp);
+ if (lowerdvp != NULL)
+ vrele(lowerdvp);
+ }
+
+ return (error);
+}
+
+static int
+union_create(ap)
+ struct vop_create_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ } */ *ap;
+{
+ struct union_node *dun = VTOUNION(ap->a_dvp);
+ struct vnode *dvp = dun->un_uppervp;
+ struct componentname *cnp = ap->a_cnp;
+ struct proc *p = cnp->cn_proc;
+
+ if (dvp != NULLVP) {
+ struct vnode *vp;
+ struct mount *mp;
+ int error;
+
+ FIXUP(dun, p);
+
+ dun->un_flags |= UN_KLOCK;
+ VOP_UNLOCK(ap->a_dvp, 0, p);
+ error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap);
+ if (error) {
+ dun->un_flags |= UN_ULOCK;
+ return (error);
+ }
+
+ mp = ap->a_dvp->v_mount;
+ VOP_UNLOCK(dvp, 0, p);
+ error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, cnp, vp,
+ NULLVP, 1);
+ if (error)
+ vput(vp);
+ vn_lock(ap->a_dvp, LK_EXCLUSIVE| LK_RETRY, p);
+ return (error);
+ }
+
+ return (EROFS);
+}
+
+static int
+union_whiteout(ap)
+ struct vop_whiteout_args /* {
+ struct vnode *a_dvp;
+ struct componentname *a_cnp;
+ int a_flags;
+ } */ *ap;
+{
+ struct union_node *un = VTOUNION(ap->a_dvp);
+ struct componentname *cnp = ap->a_cnp;
+ struct proc *p = cnp->cn_proc;
+
+ if (un->un_uppervp == NULLVP)
+ return (EOPNOTSUPP);
+
+ FIXUP(un, p);
+ return (VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags));
+}
+
+static int
+union_mknod(ap)
+ struct vop_mknod_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ } */ *ap;
+{
+ struct union_node *dun = VTOUNION(ap->a_dvp);
+ struct vnode *dvp = dun->un_uppervp;
+ struct componentname *cnp = ap->a_cnp;
+ struct proc *p = cnp->cn_proc;
+
+ if (dvp != NULLVP) {
+ struct vnode *vp;
+ struct mount *mp;
+ int error;
+
+ FIXUP(dun, p);
+
+ dun->un_flags |= UN_KLOCK;
+ VOP_UNLOCK(ap->a_dvp, 0, p);
+ error = VOP_MKNOD(dvp, &vp, cnp, ap->a_vap);
+ if (error) {
+ dun->un_flags |= UN_ULOCK;
+ return (error);
+ }
+
+ if (vp != NULLVP) {
+ mp = ap->a_dvp->v_mount;
+ VOP_UNLOCK(dvp, 0, p);
+ error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP,
+ cnp, vp, NULLVP, 1);
+ if (error)
+ vput(vp);
+ vn_lock(ap->a_dvp, LK_EXCLUSIVE| LK_RETRY, p);
+ } else {
+ dun->un_flags |= UN_ULOCK;
+ }
+ return (error);
+ }
+
+ return (EROFS);
+}
+
+static int
+union_open(ap)
+ struct vop_open_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct union_node *un = VTOUNION(ap->a_vp);
+ struct vnode *tvp;
+ int mode = ap->a_mode;
+ struct ucred *cred = ap->a_cred;
+ struct proc *p = ap->a_p;
+ int error;
+
+ /*
+ * If there is an existing upper vp then simply open that.
+ */
+ tvp = un->un_uppervp;
+ if (tvp == NULLVP) {
+ /*
+ * If the lower vnode is being opened for writing, then
+ * copy the file contents to the upper vnode and open that,
+ * otherwise can simply open the lower vnode.
+ */
+ tvp = un->un_lowervp;
+ if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
+ error = union_copyup(un, (mode&O_TRUNC) == 0, cred, p);
+ if (error == 0)
+ error = VOP_OPEN(un->un_uppervp, mode, cred, p);
+ return (error);
+ }
+
+ /*
+ * Just open the lower vnode
+ */
+ un->un_openl++;
+ vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p);
+ error = VOP_OPEN(tvp, mode, cred, p);
+ VOP_UNLOCK(tvp, 0, p);
+
+ return (error);
+ }
+
+ FIXUP(un, p);
+
+ error = VOP_OPEN(tvp, mode, cred, p);
+
+ return (error);
+}
+
+static int
+union_close(ap)
+ struct vop_close_args /* {
+ struct vnode *a_vp;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct union_node *un = VTOUNION(ap->a_vp);
+ struct vnode *vp;
+
+ if ((vp = un->un_uppervp) == NULLVP) {
+#ifdef UNION_DIAGNOSTIC
+ if (un->un_openl <= 0)
+ panic("union: un_openl cnt");
+#endif
+ --un->un_openl;
+ vp = un->un_lowervp;
+ }
+
+ ap->a_vp = vp;
+ return (VCALL(vp, VOFFSET(vop_close), ap));
+}
+
+/*
+ * Check access permission on the union vnode.
+ * The access check being enforced is to check
+ * against both the underlying vnode, and any
+ * copied vnode. This ensures that no additional
+ * file permissions are given away simply because
+ * the user caused an implicit file copy.
+ */
+static int
+union_access(ap)
+ struct vop_access_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct union_node *un = VTOUNION(ap->a_vp);
+ struct proc *p = ap->a_p;
+ int error = EACCES;
+ struct vnode *vp;
+ struct vnode *savedvp;
+
+ /*
+ * Disallow write attempts on filesystems mounted read-only.
+ */
+ if (ap->a_mode & VWRITE && (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)) {
+ switch (ap->a_vp->v_type) {
+ case VREG:
+ case VDIR:
+ case VLNK:
+ return (EROFS);
+ default:
+ break;
+ }
+ }
+ if ((vp = un->un_uppervp) != NULLVP) {
+ FIXUP(un, p);
+ ap->a_vp = vp;
+ return (VCALL(vp, VOFFSET(vop_access), ap));
+ }
+
+ if ((vp = un->un_lowervp) != NULLVP) {
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+ savedvp = ap->a_vp;
+ ap->a_vp = vp;
+ error = VCALL(vp, VOFFSET(vop_access), ap);
+ if (error == 0) {
+ struct union_mount *um = MOUNTTOUNIONMOUNT(savedvp->v_mount);
+
+ if (um->um_op == UNMNT_BELOW) {
+ ap->a_cred = um->um_cred;
+ error = VCALL(vp, VOFFSET(vop_access), ap);
+ }
+ }
+ VOP_UNLOCK(vp, 0, p);
+ if (error)
+ return (error);
+ }
+
+ return (error);
+}
+
+/*
+ * We handle getattr only to change the fsid and
+ * track object sizes
+ */
+static int
+union_getattr(ap)
+ struct vop_getattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ int error;
+ struct union_node *un = VTOUNION(ap->a_vp);
+ struct vnode *vp = un->un_uppervp;
+ struct proc *p = ap->a_p;
+ struct vattr *vap;
+ struct vattr va;
+
+
+ /*
+ * Some programs walk the filesystem hierarchy by counting
+ * links to directories to avoid stat'ing all the time.
+ * This means the link count on directories needs to be "correct".
+ * The only way to do that is to call getattr on both layers
+ * and fix up the link count. The link count will not necessarily
+ * be accurate but will be large enough to defeat the tree walkers.
+ */
+
+ vap = ap->a_vap;
+
+ vp = un->un_uppervp;
+ if (vp != NULLVP) {
+ /*
+ * It's not clear whether VOP_GETATTR is to be
+ * called with the vnode locked or not. stat() calls
+ * it with (vp) locked, and fstat calls it with
+ * (vp) unlocked.
+ * In the mean time, compensate here by checking
+ * the union_node's lock flag.
+ */
+ if (un->un_flags & UN_LOCKED)
+ FIXUP(un, p);
+
+ error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p);
+ if (error)
+ return (error);
+ union_newsize(ap->a_vp, vap->va_size, VNOVAL);
+ }
+
+ if (vp == NULLVP) {
+ vp = un->un_lowervp;
+ } else if (vp->v_type == VDIR && un->un_lowervp != NULLVP) {
+ vp = un->un_lowervp;
+ vap = &va;
+ } else {
+ vp = NULLVP;
+ }
+
+ if (vp != NULLVP) {
+ error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p);
+ if (error)
+ return (error);
+ union_newsize(ap->a_vp, VNOVAL, vap->va_size);
+ }
+
+ if ((vap != ap->a_vap) && (vap->va_type == VDIR))
+ ap->a_vap->va_nlink += vap->va_nlink;
+
+ ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
+ return (0);
+}
+
+static int
+union_setattr(ap)
+ struct vop_setattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct union_node *un = VTOUNION(ap->a_vp);
+ struct proc *p = ap->a_p;
+ struct vattr *vap = ap->a_vap;
+ int error;
+
+ /*
+ * Disallow write attempts on filesystems mounted read-only.
+ */
+ if ((ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) &&
+ (vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL ||
+ vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL ||
+ vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL))
+ return (EROFS);
+
+ /*
+ * Handle case of truncating lower object to zero size,
+ * by creating a zero length upper object. This is to
+ * handle the case of open with O_TRUNC and O_CREAT.
+ */
+ if ((un->un_uppervp == NULLVP) &&
+ /* assert(un->un_lowervp != NULLVP) */
+ (un->un_lowervp->v_type == VREG)) {
+ error = union_copyup(un, (ap->a_vap->va_size != 0),
+ ap->a_cred, ap->a_p);
+ if (error)
+ return (error);
+ }
+
+ /*
+ * Try to set attributes in upper layer,
+ * otherwise return read-only filesystem error.
+ */
+ if (un->un_uppervp != NULLVP) {
+ FIXUP(un, p);
+ error = VOP_SETATTR(un->un_uppervp, ap->a_vap,
+ ap->a_cred, ap->a_p);
+ if ((error == 0) && (ap->a_vap->va_size != VNOVAL))
+ union_newsize(ap->a_vp, ap->a_vap->va_size, VNOVAL);
+ } else {
+ error = EROFS;
+ }
+
+ return (error);
+}
+
+static int
+union_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ int error;
+ struct proc *p = ap->a_uio->uio_procp;
+ struct vnode *vp = OTHERVP(ap->a_vp);
+ int dolock = (vp == LOWERVP(ap->a_vp));
+
+ if (dolock)
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+ else
+ FIXUP(VTOUNION(ap->a_vp), p);
+ error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
+ if (dolock)
+ VOP_UNLOCK(vp, 0, p);
+
+ /*
+ * XXX
+ * perhaps the size of the underlying object has changed under
+ * our feet. take advantage of the offset information present
+ * in the uio structure.
+ */
+ if (error == 0) {
+ struct union_node *un = VTOUNION(ap->a_vp);
+ off_t cur = ap->a_uio->uio_offset;
+
+ if (vp == un->un_uppervp) {
+ if (cur > un->un_uppersz)
+ union_newsize(ap->a_vp, cur, VNOVAL);
+ } else {
+ if (cur > un->un_lowersz)
+ union_newsize(ap->a_vp, VNOVAL, cur);
+ }
+ }
+
+ return (error);
+}
+
+static int
+union_write(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ int error;
+ struct vnode *vp;
+ struct union_node *un = VTOUNION(ap->a_vp);
+ struct proc *p = ap->a_uio->uio_procp;
+
+ vp = UPPERVP(ap->a_vp);
+ if (vp == NULLVP)
+ panic("union: missing upper layer in write");
+
+ FIXUP(un, p);
+ error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
+
+ /*
+ * the size of the underlying object may be changed by the
+ * write.
+ */
+ if (error == 0) {
+ off_t cur = ap->a_uio->uio_offset;
+
+ if (cur > un->un_uppersz)
+ union_newsize(ap->a_vp, cur, VNOVAL);
+ }
+
+ return (error);
+}
+
+static int
+union_lease(ap)
+ struct vop_lease_args /* {
+ struct vnode *a_vp;
+ struct proc *a_p;
+ struct ucred *a_cred;
+ int a_flag;
+ } */ *ap;
+{
+ register struct vnode *ovp = OTHERVP(ap->a_vp);
+
+ ap->a_vp = ovp;
+ return (VCALL(ovp, VOFFSET(vop_lease), ap));
+}
+
+static int
+union_ioctl(ap)
+ struct vop_ioctl_args /* {
+ struct vnode *a_vp;
+ int a_command;
+ caddr_t a_data;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *ovp = OTHERVP(ap->a_vp);
+
+ ap->a_vp = ovp;
+ return (VCALL(ovp, VOFFSET(vop_ioctl), ap));
+}
+
+static int
+union_poll(ap)
+ struct vop_poll_args /* {
+ struct vnode *a_vp;
+ int a_events;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *ovp = OTHERVP(ap->a_vp);
+
+ ap->a_vp = ovp;
+ return (VCALL(ovp, VOFFSET(vop_poll), ap));
+}
+
+static int
+union_revoke(ap)
+ struct vop_revoke_args /* {
+ struct vnode *a_vp;
+ int a_flags;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+
+ if (UPPERVP(vp))
+ VOP_REVOKE(UPPERVP(vp), ap->a_flags);
+ if (LOWERVP(vp))
+ VOP_REVOKE(LOWERVP(vp), ap->a_flags);
+ vgone(vp);
+ return (0);
+}
+
+static int
+union_mmap(ap)
+ struct vop_mmap_args /* {
+ struct vnode *a_vp;
+ int a_fflags;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *ovp = OTHERVP(ap->a_vp);
+
+ ap->a_vp = ovp;
+ return (VCALL(ovp, VOFFSET(vop_mmap), ap));
+}
+
+static int
+union_fsync(ap)
+ struct vop_fsync_args /* {
+ struct vnode *a_vp;
+ struct ucred *a_cred;
+ int a_waitfor;
+ struct proc *a_p;
+ } */ *ap;
+{
+ int error = 0;
+ struct proc *p = ap->a_p;
+ struct vnode *targetvp = OTHERVP(ap->a_vp);
+ struct union_node *un;
+
+ if (targetvp != NULLVP) {
+ int dolock = (targetvp == LOWERVP(ap->a_vp));
+
+ un = VTOUNION(ap->a_vp);
+ if (dolock)
+ vn_lock(targetvp, LK_EXCLUSIVE | LK_RETRY, p);
+ else {
+ un = VTOUNION(ap->a_vp);
+ if ((un->un_flags & UN_ULOCK) == 0 &&
+ targetvp->v_data != NULL &&
+ ((struct lock *)targetvp->v_data)->lk_lockholder
+ == curproc->p_pid &&
+ VOP_ISLOCKED(targetvp) != 0)
+ return 0; /* XXX */
+
+ FIXUP(un, p);
+ }
+
+ error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_waitfor, p);
+ if (dolock)
+ VOP_UNLOCK(targetvp, 0, p);
+ }
+
+ return (error);
+}
+
+static int
+union_remove(ap)
+ struct vop_remove_args /* {
+ struct vnode *a_dvp;
+ struct vnode *a_vp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ struct union_node *dun = VTOUNION(ap->a_dvp);
+ struct union_node *un = VTOUNION(ap->a_vp);
+ struct componentname *cnp = ap->a_cnp;
+ struct proc *p = cnp->cn_proc;
+ int error;
+
+ if (dun->un_uppervp == NULLVP)
+ panic("union remove: null upper vnode");
+
+ if (un->un_uppervp != NULLVP) {
+ struct vnode *dvp = dun->un_uppervp;
+ struct vnode *vp = un->un_uppervp;
+
+ FIXUP(dun, p);
+ dun->un_flags |= UN_KLOCK;
+ VOP_UNLOCK(ap->a_dvp, 0, p);
+ FIXUP(un, p);
+ un->un_flags |= UN_KLOCK;
+ VOP_UNLOCK(ap->a_vp, 0, p);
+
+ if (union_dowhiteout(un, cnp->cn_cred, p))
+ cnp->cn_flags |= DOWHITEOUT;
+ error = VOP_REMOVE(dvp, vp, cnp);
+#if 0
+ /* XXX */
+ if (!error)
+ union_removed_upper(un);
+#endif
+ dun->un_flags |= UN_ULOCK;
+ un->un_flags |= UN_ULOCK;
+ } else {
+ FIXUP(dun, p);
+ error = union_mkwhiteout(
+ MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
+ dun->un_uppervp, ap->a_cnp, un->un_path);
+ }
+
+ return (error);
+}
+
+static int
+union_link(ap)
+ struct vop_link_args /* {
+ struct vnode *a_tdvp;
+ struct vnode *a_vp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ struct componentname *cnp = ap->a_cnp;
+ struct proc *p = cnp->cn_proc;
+ struct union_node *dun = VTOUNION(ap->a_tdvp);
+ struct vnode *vp;
+ struct vnode *tdvp;
+ int error = 0;
+
+
+ if (ap->a_tdvp->v_op != ap->a_vp->v_op) {
+ vp = ap->a_vp;
+ } else {
+ struct union_node *tun = VTOUNION(ap->a_vp);
+ if (tun->un_uppervp == NULLVP) {
+ vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY, p);
+ if (dun->un_uppervp == tun->un_dirvp) {
+ dun->un_flags &= ~UN_ULOCK;
+ VOP_UNLOCK(dun->un_uppervp, 0, p);
+ }
+ error = union_copyup(tun, 1, cnp->cn_cred, p);
+ if (dun->un_uppervp == tun->un_dirvp) {
+ vn_lock(dun->un_uppervp,
+ LK_EXCLUSIVE | LK_RETRY, p);
+ dun->un_flags |= UN_ULOCK;
+ }
+ VOP_UNLOCK(ap->a_vp, 0, p);
+ }
+ vp = tun->un_uppervp;
+ }
+
+ tdvp = dun->un_uppervp;
+ if (tdvp == NULLVP)
+ error = EROFS;
+
+ if (error)
+ return (error);
+
+ FIXUP(dun, p);
+ dun->un_flags |= UN_KLOCK;
+ VOP_UNLOCK(ap->a_tdvp, 0, p);
+
+ error = VOP_LINK(tdvp, vp, cnp);
+
+ dun->un_flags |= UN_ULOCK;
+
+ return (error);
+}
+
+static int
+union_rename(ap)
+ struct vop_rename_args /* {
+ struct vnode *a_fdvp;
+ struct vnode *a_fvp;
+ struct componentname *a_fcnp;
+ struct vnode *a_tdvp;
+ struct vnode *a_tvp;
+ struct componentname *a_tcnp;
+ } */ *ap;
+{
+ int error;
+
+ struct vnode *fdvp = ap->a_fdvp;
+ struct vnode *fvp = ap->a_fvp;
+ struct vnode *tdvp = ap->a_tdvp;
+ struct vnode *tvp = ap->a_tvp;
+
+ if (fdvp->v_op == union_vnodeop_p) { /* always true */
+ struct union_node *un = VTOUNION(fdvp);
+ if (un->un_uppervp == NULLVP) {
+ /*
+ * this should never happen in normal
+ * operation but might if there was
+ * a problem creating the top-level shadow
+ * directory.
+ */
+ error = EXDEV;
+ goto bad;
+ }
+
+ fdvp = un->un_uppervp;
+ VREF(fdvp);
+ vrele(ap->a_fdvp);
+ }
+
+ if (fvp->v_op == union_vnodeop_p) { /* always true */
+ struct union_node *un = VTOUNION(fvp);
+ if (un->un_uppervp == NULLVP) {
+ /* XXX: should do a copyup */
+ error = EXDEV;
+ goto bad;
+ }
+
+ if (un->un_lowervp != NULLVP)
+ ap->a_fcnp->cn_flags |= DOWHITEOUT;
+
+ fvp = un->un_uppervp;
+ VREF(fvp);
+ vrele(ap->a_fvp);
+ }
+
+ if (tdvp->v_op == union_vnodeop_p) {
+ struct union_node *un = VTOUNION(tdvp);
+ if (un->un_uppervp == NULLVP) {
+ /*
+ * this should never happen in normal
+ * operation but might if there was
+ * a problem creating the top-level shadow
+ * directory.
+ */
+ error = EXDEV;
+ goto bad;
+ }
+
+ tdvp = un->un_uppervp;
+ VREF(tdvp);
+ un->un_flags |= UN_KLOCK;
+ vput(ap->a_tdvp);
+ }
+
+ if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) {
+ struct union_node *un = VTOUNION(tvp);
+
+ tvp = un->un_uppervp;
+ if (tvp != NULLVP) {
+ VREF(tvp);
+ un->un_flags |= UN_KLOCK;
+ }
+ vput(ap->a_tvp);
+ }
+
+ return (VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp));
+
+bad:
+ vrele(fdvp);
+ vrele(fvp);
+ vput(tdvp);
+ if (tvp != NULLVP)
+ vput(tvp);
+
+ return (error);
+}
+
+static int
+union_mkdir(ap)
+ struct vop_mkdir_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ } */ *ap;
+{
+ struct union_node *dun = VTOUNION(ap->a_dvp);
+ struct vnode *dvp = dun->un_uppervp;
+ struct componentname *cnp = ap->a_cnp;
+ struct proc *p = cnp->cn_proc;
+
+ if (dvp != NULLVP) {
+ struct vnode *vp;
+ int error;
+
+ FIXUP(dun, p);
+ dun->un_flags |= UN_KLOCK;
+ VOP_UNLOCK(ap->a_dvp, 0, p);
+ error = VOP_MKDIR(dvp, &vp, cnp, ap->a_vap);
+ if (error) {
+ dun->un_flags |= UN_ULOCK;
+ return (error);
+ }
+
+ VOP_UNLOCK(dvp, 0, p);
+ error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, ap->a_dvp,
+ NULLVP, cnp, vp, NULLVP, 1);
+ if (error)
+ vput(vp);
+ vn_lock(ap->a_dvp, LK_EXCLUSIVE| LK_RETRY, p);
+
+ return (error);
+ }
+
+ return (EROFS);
+}
+
+static int
+union_rmdir(ap)
+ struct vop_rmdir_args /* {
+ struct vnode *a_dvp;
+ struct vnode *a_vp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ struct union_node *dun = VTOUNION(ap->a_dvp);
+ struct union_node *un = VTOUNION(ap->a_vp);
+ struct componentname *cnp = ap->a_cnp;
+ struct proc *p = cnp->cn_proc;
+ int error;
+
+ if (dun->un_uppervp == NULLVP)
+ panic("union rmdir: null upper vnode");
+
+ if (un->un_uppervp != NULLVP) {
+ struct vnode *dvp = dun->un_uppervp;
+ struct vnode *vp = un->un_uppervp;
+
+ FIXUP(dun, p);
+ dun->un_flags |= UN_KLOCK;
+ VOP_UNLOCK(ap->a_dvp, 0, p);
+ FIXUP(un, p);
+ un->un_flags |= UN_KLOCK;
+ VOP_UNLOCK(ap->a_vp, 0, p);
+
+ if (union_dowhiteout(un, cnp->cn_cred, p))
+ cnp->cn_flags |= DOWHITEOUT;
+ error = VOP_RMDIR(dvp, vp, ap->a_cnp);
+#if 0
+ /* XXX */
+ if (!error)
+ union_removed_upper(un);
+#endif
+ dun->un_flags |= UN_ULOCK;
+ un->un_flags |= UN_ULOCK;
+ } else {
+ FIXUP(dun, p);
+ error = union_mkwhiteout(
+ MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount),
+ dun->un_uppervp, ap->a_cnp, un->un_path);
+ }
+
+ return (error);
+}
+
+static int
+union_symlink(ap)
+ struct vop_symlink_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ char *a_target;
+ } */ *ap;
+{
+ struct union_node *dun = VTOUNION(ap->a_dvp);
+ struct vnode *dvp = dun->un_uppervp;
+ struct componentname *cnp = ap->a_cnp;
+ struct proc *p = cnp->cn_proc;
+
+ if (dvp != NULLVP) {
+ struct vnode *vp;
+ int error;
+
+ FIXUP(dun, p);
+ dun->un_flags |= UN_KLOCK;
+ VOP_UNLOCK(ap->a_dvp, 0, p);
+ error = VOP_SYMLINK(dvp, &vp, cnp, ap->a_vap, ap->a_target);
+ dun->un_flags |= UN_ULOCK;
+ *ap->a_vpp = NULLVP;
+ return (error);
+ }
+
+ return (EROFS);
+}
+
+/*
+ * union_readdir works in concert with getdirentries and
+ * readdir(3) to provide a list of entries in the unioned
+ * directories. getdirentries is responsible for walking
+ * down the union stack. readdir(3) is responsible for
+ * eliminating duplicate names from the returned data stream.
+ */
+static int
+union_readdir(ap)
+ struct vop_readdir_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ int *a_eofflag;
+ u_long *a_cookies;
+ int a_ncookies;
+ } */ *ap;
+{
+ struct union_node *un = VTOUNION(ap->a_vp);
+ struct vnode *uvp = un->un_uppervp;
+ struct proc *p = ap->a_uio->uio_procp;
+
+ if (uvp == NULLVP)
+ return (0);
+
+ FIXUP(un, p);
+ ap->a_vp = uvp;
+ return (VCALL(uvp, VOFFSET(vop_readdir), ap));
+}
+
+static int
+union_readlink(ap)
+ struct vop_readlink_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ int error;
+ struct uio *uio = ap->a_uio;
+ struct proc *p = uio->uio_procp;
+ struct vnode *vp = OTHERVP(ap->a_vp);
+ int dolock = (vp == LOWERVP(ap->a_vp));
+
+ if (dolock)
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+ else
+ FIXUP(VTOUNION(ap->a_vp), p);
+ ap->a_vp = vp;
+ error = VCALL(vp, VOFFSET(vop_readlink), ap);
+ if (dolock)
+ VOP_UNLOCK(vp, 0, p);
+
+ return (error);
+}
+
+static int
+union_abortop(ap)
+ struct vop_abortop_args /* {
+ struct vnode *a_dvp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ int error;
+ struct componentname *cnp = ap->a_cnp;
+ struct proc *p = cnp->cn_proc;
+ struct vnode *vp = OTHERVP(ap->a_dvp);
+ struct union_node *un = VTOUNION(ap->a_dvp);
+ int islocked = un->un_flags & UN_LOCKED;
+ int dolock = (vp == LOWERVP(ap->a_dvp));
+
+ if (islocked) {
+ if (dolock)
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+ else
+ FIXUP(VTOUNION(ap->a_dvp), p);
+ }
+ ap->a_dvp = vp;
+ error = VCALL(vp, VOFFSET(vop_abortop), ap);
+ if (islocked && dolock)
+ VOP_UNLOCK(vp, 0, p);
+
+ return (error);
+}
+
+static int
+union_inactive(ap)
+ struct vop_inactive_args /* {
+ struct vnode *a_vp;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct proc *p = ap->a_p;
+ struct union_node *un = VTOUNION(vp);
+ struct vnode **vpp;
+
+ /*
+ * Do nothing (and _don't_ bypass).
+ * Wait to vrele lowervp until reclaim,
+ * so that until then our union_node is in the
+ * cache and reusable.
+ *
+ * NEEDSWORK: Someday, consider inactive'ing
+ * the lowervp and then trying to reactivate it
+ * with capabilities (v_id)
+ * like they do in the name lookup cache code.
+ * That's too much work for now.
+ */
+
+ if (un->un_dircache != 0) {
+ for (vpp = un->un_dircache; *vpp != NULLVP; vpp++)
+ vrele(*vpp);
+ free(un->un_dircache, M_TEMP);
+ un->un_dircache = 0;
+ }
+
+ VOP_UNLOCK(vp, 0, p);
+
+ if ((un->un_flags & UN_CACHED) == 0)
+ vgone(vp);
+
+ return (0);
+}
+
+static int
+union_reclaim(ap)
+ struct vop_reclaim_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ union_freevp(ap->a_vp);
+
+ return (0);
+}
+
+static int
+union_lock(ap)
+ struct vop_lock_args *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct proc *p = ap->a_p;
+ int flags = ap->a_flags;
+ struct union_node *un;
+ int error;
+
+ vop_nolock(ap);
+ /*
+ * Need to do real lockmgr-style locking here.
+ * in the mean time, draining won't work quite right,
+ * which could lead to a few race conditions.
+ * the following test was here, but is not quite right, we
+ * still need to take the lock:
+ if ((flags & LK_TYPE_MASK) == LK_DRAIN)
+ return (0);
+ */
+ flags &= ~LK_INTERLOCK;
+
+start:
+ un = VTOUNION(vp);
+
+ if (un->un_uppervp != NULLVP) {
+ if (((un->un_flags & UN_ULOCK) == 0) &&
+ (vp->v_usecount != 0)) {
+ error = vn_lock(un->un_uppervp, flags, p);
+ if (error)
+ return (error);
+ un->un_flags |= UN_ULOCK;
+ }
+#ifdef DIAGNOSTIC
+ if (un->un_flags & UN_KLOCK) {
+ vprint("dangling upper lock", vp);
+ panic("union: dangling upper lock");
+ }
+#endif
+ }
+
+ if (un->un_flags & UN_LOCKED) {
+#ifdef DIAGNOSTIC
+ if (curproc && un->un_pid == curproc->p_pid &&
+ un->un_pid > -1 && curproc->p_pid > -1)
+ panic("union: locking against myself");
+#endif
+ un->un_flags |= UN_WANT;
+ tsleep((caddr_t)&un->un_flags, PINOD, "unionlk2", 0);
+ goto start;
+ }
+
+#ifdef DIAGNOSTIC
+ if (curproc)
+ un->un_pid = curproc->p_pid;
+ else
+ un->un_pid = -1;
+#endif
+
+ un->un_flags |= UN_LOCKED;
+ return (0);
+}
+
+/*
+ * When operations want to vput() a union node yet retain a lock on
+ * the upper vnode (say, to do some further operations like link(),
+ * mkdir(), ...), they set UN_KLOCK on the union node, then call
+ * vput() which calls VOP_UNLOCK() and comes here. union_unlock()
+ * unlocks the union node (leaving the upper vnode alone), clears the
+ * KLOCK flag, and then returns to vput(). The caller then does whatever
+ * is left to do with the upper vnode, and ensures that it gets unlocked.
+ *
+ * If UN_KLOCK isn't set, then the upper vnode is unlocked here.
+ */
+static int
+union_unlock(ap)
+ struct vop_unlock_args /* {
+ struct vnode *a_vp;
+ int a_flags;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct union_node *un = VTOUNION(ap->a_vp);
+ struct proc *p = ap->a_p;
+
+#ifdef DIAGNOSTIC
+ if ((un->un_flags & UN_LOCKED) == 0)
+ panic("union: unlock unlocked node");
+ if (curproc && un->un_pid != curproc->p_pid &&
+ curproc->p_pid > -1 && un->un_pid > -1)
+ panic("union: unlocking other process's union node");
+#endif
+
+ un->un_flags &= ~UN_LOCKED;
+
+ if ((un->un_flags & (UN_ULOCK|UN_KLOCK)) == UN_ULOCK)
+ VOP_UNLOCK(un->un_uppervp, 0, p);
+
+ un->un_flags &= ~(UN_ULOCK|UN_KLOCK);
+
+ if (un->un_flags & UN_WANT) {
+ un->un_flags &= ~UN_WANT;
+ wakeup((caddr_t) &un->un_flags);
+ }
+
+#ifdef DIAGNOSTIC
+ un->un_pid = 0;
+#endif
+ vop_nounlock(ap);
+
+ return (0);
+}
+
+static int
+union_bmap(ap)
+ struct vop_bmap_args /* {
+ struct vnode *a_vp;
+ daddr_t a_bn;
+ struct vnode **a_vpp;
+ daddr_t *a_bnp;
+ int *a_runp;
+ int *a_runb;
+ } */ *ap;
+{
+ int error;
+ struct proc *p = curproc; /* XXX */
+ struct vnode *vp = OTHERVP(ap->a_vp);
+ int dolock = (vp == LOWERVP(ap->a_vp));
+
+ if (dolock)
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+ else
+ FIXUP(VTOUNION(ap->a_vp), p);
+ ap->a_vp = vp;
+ error = VCALL(vp, VOFFSET(vop_bmap), ap);
+ if (dolock)
+ VOP_UNLOCK(vp, 0, p);
+
+ return (error);
+}
+
+static int
+union_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+
+ printf("\ttag VT_UNION, vp=%p, uppervp=%p, lowervp=%p\n",
+ vp, UPPERVP(vp), LOWERVP(vp));
+ if (UPPERVP(vp) != NULLVP)
+ vprint("union: upper", UPPERVP(vp));
+ if (LOWERVP(vp) != NULLVP)
+ vprint("union: lower", LOWERVP(vp));
+
+ return (0);
+}
+
+static int
+union_islocked(ap)
+ struct vop_islocked_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ return ((VTOUNION(ap->a_vp)->un_flags & UN_LOCKED) ? 1 : 0);
+}
+
+static int
+union_pathconf(ap)
+ struct vop_pathconf_args /* {
+ struct vnode *a_vp;
+ int a_name;
+ int *a_retval;
+ } */ *ap;
+{
+ int error;
+ struct proc *p = curproc; /* XXX */
+ struct vnode *vp = OTHERVP(ap->a_vp);
+ int dolock = (vp == LOWERVP(ap->a_vp));
+
+ if (dolock)
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
+ else
+ FIXUP(VTOUNION(ap->a_vp), p);
+ ap->a_vp = vp;
+ error = VCALL(vp, VOFFSET(vop_pathconf), ap);
+ if (dolock)
+ VOP_UNLOCK(vp, 0, p);
+
+ return (error);
+}
+
+static int
+union_advlock(ap)
+ struct vop_advlock_args /* {
+ struct vnode *a_vp;
+ caddr_t a_id;
+ int a_op;
+ struct flock *a_fl;
+ int a_flags;
+ } */ *ap;
+{
+ register struct vnode *ovp = OTHERVP(ap->a_vp);
+
+ ap->a_vp = ovp;
+ return (VCALL(ovp, VOFFSET(vop_advlock), ap));
+}
+
+
+/*
+ * XXX - vop_strategy must be hand coded because it has no
+ * vnode in its arguments.
+ * This goes away with a merged VM/buffer cache.
+ */
+static int
+union_strategy(ap)
+ struct vop_strategy_args /* {
+ struct vnode *a_vp;
+ struct buf *a_bp;
+ } */ *ap;
+{
+ struct buf *bp = ap->a_bp;
+ int error;
+ struct vnode *savedvp;
+
+ savedvp = bp->b_vp;
+ bp->b_vp = OTHERVP(bp->b_vp);
+
+#ifdef DIAGNOSTIC
+ if (bp->b_vp == NULLVP)
+ panic("union_strategy: nil vp");
+ if (((bp->b_flags & B_READ) == 0) &&
+ (bp->b_vp == LOWERVP(savedvp)))
+ panic("union_strategy: writing to lowervp");
+#endif
+
+ error = VOP_STRATEGY(bp->b_vp, bp);
+ bp->b_vp = savedvp;
+
+ return (error);
+}
+
+/*
+ * Global vfs data structures
+ */
+vop_t **union_vnodeop_p;
+static struct vnodeopv_entry_desc union_vnodeop_entries[] = {
+ { &vop_default_desc, (vop_t *) vop_defaultop },
+ { &vop_abortop_desc, (vop_t *) union_abortop },
+ { &vop_access_desc, (vop_t *) union_access },
+ { &vop_advlock_desc, (vop_t *) union_advlock },
+ { &vop_bmap_desc, (vop_t *) union_bmap },
+ { &vop_close_desc, (vop_t *) union_close },
+ { &vop_create_desc, (vop_t *) union_create },
+ { &vop_fsync_desc, (vop_t *) union_fsync },
+ { &vop_getattr_desc, (vop_t *) union_getattr },
+ { &vop_inactive_desc, (vop_t *) union_inactive },
+ { &vop_ioctl_desc, (vop_t *) union_ioctl },
+ { &vop_islocked_desc, (vop_t *) union_islocked },
+ { &vop_lease_desc, (vop_t *) union_lease },
+ { &vop_link_desc, (vop_t *) union_link },
+ { &vop_lock_desc, (vop_t *) union_lock },
+ { &vop_lookup_desc, (vop_t *) union_lookup },
+ { &vop_mkdir_desc, (vop_t *) union_mkdir },
+ { &vop_mknod_desc, (vop_t *) union_mknod },
+ { &vop_mmap_desc, (vop_t *) union_mmap },
+ { &vop_open_desc, (vop_t *) union_open },
+ { &vop_pathconf_desc, (vop_t *) union_pathconf },
+ { &vop_poll_desc, (vop_t *) union_poll },
+ { &vop_print_desc, (vop_t *) union_print },
+ { &vop_read_desc, (vop_t *) union_read },
+ { &vop_readdir_desc, (vop_t *) union_readdir },
+ { &vop_readlink_desc, (vop_t *) union_readlink },
+ { &vop_reclaim_desc, (vop_t *) union_reclaim },
+ { &vop_remove_desc, (vop_t *) union_remove },
+ { &vop_rename_desc, (vop_t *) union_rename },
+ { &vop_revoke_desc, (vop_t *) union_revoke },
+ { &vop_rmdir_desc, (vop_t *) union_rmdir },
+ { &vop_setattr_desc, (vop_t *) union_setattr },
+ { &vop_strategy_desc, (vop_t *) union_strategy },
+ { &vop_symlink_desc, (vop_t *) union_symlink },
+ { &vop_unlock_desc, (vop_t *) union_unlock },
+ { &vop_whiteout_desc, (vop_t *) union_whiteout },
+ { &vop_write_desc, (vop_t *) union_write },
+ { NULL, NULL }
+};
+static struct vnodeopv_desc union_vnodeop_opv_desc =
+ { &union_vnodeop_p, union_vnodeop_entries };
+
+VNODEOP_SET(union_vnodeop_opv_desc);
OpenPOWER on IntegriCloud