summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorrgrimes <rgrimes@FreeBSD.org>1994-05-24 10:09:53 +0000
committerrgrimes <rgrimes@FreeBSD.org>1994-05-24 10:09:53 +0000
commit27464aaa8e6ad0a90df705f3dd8ea4c48ffefd04 (patch)
tree498662170037963c8b911613f2622e38efea4434
parent41dbbe7dea1bb3e50798efb3ea01e967bf27364a (diff)
downloadFreeBSD-src-27464aaa8e6ad0a90df705f3dd8ea4c48ffefd04.zip
FreeBSD-src-27464aaa8e6ad0a90df705f3dd8ea4c48ffefd04.tar.gz
BSD 4.4 Lite Kernel Sources
-rw-r--r--sys/fs/cd9660/TODO77
-rw-r--r--sys/fs/cd9660/TODO.hibler22
-rw-r--r--sys/fs/cd9660/cd9660_bmap.c102
-rw-r--r--sys/fs/cd9660/cd9660_lookup.c465
-rw-r--r--sys/fs/cd9660/cd9660_node.c648
-rw-r--r--sys/fs/cd9660/cd9660_node.h143
-rw-r--r--sys/fs/cd9660/cd9660_rrip.c685
-rw-r--r--sys/fs/cd9660/cd9660_rrip.h146
-rw-r--r--sys/fs/cd9660/cd9660_util.c236
-rw-r--r--sys/fs/cd9660/cd9660_vfsops.c681
-rw-r--r--sys/fs/cd9660/cd9660_vnops.c1038
-rw-r--r--sys/fs/cd9660/iso.h256
-rw-r--r--sys/fs/cd9660/iso_rrip.h83
-rw-r--r--sys/fs/deadfs/dead_vnops.c354
-rw-r--r--sys/fs/fdescfs/fdesc.h82
-rw-r--r--sys/fs/fdescfs/fdesc_vfsops.c288
-rw-r--r--sys/fs/fdescfs/fdesc_vnops.c974
-rw-r--r--sys/fs/fifofs/fifo.h85
-rw-r--r--sys/fs/fifofs/fifo_vnops.c494
-rw-r--r--sys/fs/nullfs/null.h75
-rw-r--r--sys/fs/nullfs/null_subr.c293
-rw-r--r--sys/fs/nullfs/null_vfsops.c366
-rw-r--r--sys/fs/nullfs/null_vnops.c462
-rw-r--r--sys/fs/portalfs/portal.h72
-rw-r--r--sys/fs/portalfs/portal_vfsops.c313
-rw-r--r--sys/fs/portalfs/portal_vnops.c707
-rw-r--r--sys/fs/procfs/README113
-rw-r--r--sys/fs/procfs/procfs.h186
-rw-r--r--sys/fs/procfs/procfs_ctl.c302
-rw-r--r--sys/fs/procfs/procfs_fpregs.c87
-rw-r--r--sys/fs/procfs/procfs_mem.c302
-rw-r--r--sys/fs/procfs/procfs_note.c73
-rw-r--r--sys/fs/procfs/procfs_regs.c87
-rw-r--r--sys/fs/procfs/procfs_status.c145
-rw-r--r--sys/fs/procfs/procfs_subr.c314
-rw-r--r--sys/fs/procfs/procfs_vfsops.c243
-rw-r--r--sys/fs/procfs/procfs_vnops.c814
-rw-r--r--sys/fs/specfs/spec_vnops.c689
-rw-r--r--sys/fs/umapfs/umap.h92
-rw-r--r--sys/fs/umapfs/umap_subr.c397
-rw-r--r--sys/fs/umapfs/umap_vfsops.c407
-rw-r--r--sys/fs/umapfs/umap_vnops.c488
-rw-r--r--sys/fs/unionfs/union.h117
-rw-r--r--sys/fs/unionfs/union_subr.c744
-rw-r--r--sys/fs/unionfs/union_vfsops.c550
-rw-r--r--sys/fs/unionfs/union_vnops.c1495
-rw-r--r--sys/gnu/ext2fs/ext2_bmap.c294
-rw-r--r--sys/gnu/ext2fs/ext2_ihash.c154
-rw-r--r--sys/gnu/ext2fs/ext2_mount.h83
-rw-r--r--sys/gnu/ext2fs/inode.h162
-rw-r--r--sys/gnu/fs/ext2fs/ext2_bmap.c294
-rw-r--r--sys/gnu/fs/ext2fs/ext2_mount.h83
-rw-r--r--sys/gnu/fs/ext2fs/inode.h162
-rw-r--r--sys/kern/kern_tc.c528
-rw-r--r--sys/kern/kern_timeout.c528
-rw-r--r--sys/kern/subr_clist.c159
-rw-r--r--sys/kern/subr_disklabel.c364
-rw-r--r--sys/kern/subr_param.c145
-rw-r--r--sys/kern/uipc_sockbuf.c755
-rw-r--r--sys/kern/vfs_export.c1322
-rw-r--r--sys/kern/vfs_extattr.c2107
-rw-r--r--sys/kern/vfs_mount.c260
-rw-r--r--sys/kern/vnode_if.pl433
-rw-r--r--sys/netinet/tcp_reass.c1647
-rw-r--r--sys/netinet/tcp_timewait.c445
-rw-r--r--sys/nfs/nfs_common.c1130
-rw-r--r--sys/nfs/nfs_common.h269
-rw-r--r--sys/nfsclient/nfs.h297
-rw-r--r--sys/nfsclient/nfs_bio.c799
-rw-r--r--sys/nfsclient/nfs_nfsiod.c874
-rw-r--r--sys/nfsclient/nfs_node.c294
-rw-r--r--sys/nfsclient/nfs_socket.c1990
-rw-r--r--sys/nfsclient/nfs_subs.c1130
-rw-r--r--sys/nfsclient/nfs_vfsops.c740
-rw-r--r--sys/nfsclient/nfs_vnops.c2539
-rw-r--r--sys/nfsclient/nfsargs.h297
-rw-r--r--sys/nfsclient/nfsdiskless.h66
-rw-r--r--sys/nfsclient/nfsm_subs.h269
-rw-r--r--sys/nfsclient/nfsmount.h127
-rw-r--r--sys/nfsclient/nfsnode.h166
-rw-r--r--sys/nfsclient/nfsstats.h297
-rw-r--r--sys/nfsserver/nfs.h297
-rw-r--r--sys/nfsserver/nfs_serv.c1908
-rw-r--r--sys/nfsserver/nfs_srvcache.c348
-rw-r--r--sys/nfsserver/nfs_srvsock.c1990
-rw-r--r--sys/nfsserver/nfs_srvsubs.c1130
-rw-r--r--sys/nfsserver/nfs_syscalls.c874
-rw-r--r--sys/nfsserver/nfsm_subs.h269
-rw-r--r--sys/nfsserver/nfsrvcache.h84
-rw-r--r--sys/nfsserver/nfsrvstats.h297
-rw-r--r--sys/sys/_sigset.h194
-rw-r--r--sys/sys/bio.h178
-rw-r--r--sys/sys/diskmbr.h332
-rw-r--r--sys/sys/diskpc98.h332
-rw-r--r--sys/sys/linedisc.h123
-rw-r--r--sys/sys/selinfo.h56
-rw-r--r--sys/sys/timetc.h126
-rw-r--r--sys/tools/vnode_if.awk433
98 files changed, 46972 insertions, 0 deletions
diff --git a/sys/fs/cd9660/TODO b/sys/fs/cd9660/TODO
new file mode 100644
index 0000000..555d26a
--- /dev/null
+++ b/sys/fs/cd9660/TODO
@@ -0,0 +1,77 @@
+# $Id: TODO,v 1.4 1993/09/07 15:40:51 ws Exp $
+
+ 1) should understand "older", original High Sierra ("CDROM001") type
+
+ Not yet. ( I don't have this technical information, yet. )
+
+ 2) should understand Rock Ridge
+
+ Yes, we have follows function.
+
+ o Symbolic Link
+ o Real Name(long name)
+ o File Attribute
+ o Time stamp
+ o uid, gid
+ o Devices
+ o Relocated directories
+
+ Except follows:
+
+ o POSIX device number mapping
+
+ There is some preliminary stuff in there that (ab-)uses the mknod
+ system call, but this needs a writable filesystem
+
+ 3) should be called cdfs, as there are other ISO file system soon possible
+
+ Not yet. Probably we should make another file system when the ECMA draft
+ is valid and do it. For doing Rock Ridge Support, I can use almost same
+ code. So I just use the same file system interface...
+
+ 4) should have file handles implemented for use with NFS, etc
+
+ Yes. we have already this one, and I based it for this release.
+
+ 5) should have name translation enabled by mount flag
+
+ Yes. we can disable the Rock Ridge Extension by follows option;
+
+ "mount -t isofs -o -norrip /dev/cd0d /cdrom"
+
+ 6) should run as a user process, and not take up kernel space (cdroms
+ are slow)
+
+ Not yet.
+
+ 7) ECMA support.
+
+ Not yet. we need not only a technical spec but also ECMA format
+ cd-rom itself!
+
+ 8) Character set change by SVD ( multi SVD support )
+
+ Not yet. We should also hack the other part of system as 8 bit
+ clean. As far as I know, if you export the cdrom by NFS, the client
+ can access the 8 bit clean (ie. Solaris Japanese with EUC code )
+
+ 9) Access checks in isofs_access
+
+ Not yet.
+
+ 10) Support for generation numbers
+
+ Yes. Default is to list only the last file (the one with the highest
+ generation number). If you mount with -gen, all files are shown with
+ their generation numbers. In both cases you can specify the generation
+ number on opening files (if you happen to know it) or leave it off,
+ when it will again find the last file.
+
+ 11) Support for extended attributes
+
+ Yes. Since this requires an extra block buffer for the attributes
+ this must be enabled on mounting with the option -extattr.
+
+----------
+Last update July 19, '93 by Atsushi Murai. (amurai@spec.co.jp)
+Last update August 19, '93 by Wolfgang Solfrank. (ws@tools.de)
diff --git a/sys/fs/cd9660/TODO.hibler b/sys/fs/cd9660/TODO.hibler
new file mode 100644
index 0000000..3501aa2
--- /dev/null
+++ b/sys/fs/cd9660/TODO.hibler
@@ -0,0 +1,22 @@
+1. Investiate making ISOFS another UFS shared filesystem (ala FFS/MFS/LFS).
+ Since it was modelled after the inode code, we might be able to merge
+ them back. It looks like a seperate (but very similar) lookup routine
+ will be needed due to the associated file stuff.
+
+2. Make filesystem exportable. This comes for free if stacked with UFS.
+ Otherwise, the ufs_export routines need to be elevated to vfs_* routines.
+ [ DONE - hibler ]
+
+3. If it can't be merged with UFS, at least get them in sync. For example,
+ it could use the same style hashing routines as in ufs/ufs_ihash.c
+
+4. It would be nice to be able to use the vfs_cluster code.
+ Unfortunately, if the logical block size is smaller than the page size,
+ it won't work. Also, if throughtput is relatively constant for any
+ block size (as it is for the HP drive--150kbs) then clustering may not
+ buy much (or may even hurt when vfs_cluster comes up with a large sync
+ cluster).
+
+5. Seems like there should be a "notrans" or some such mount option to show
+ filenames as they really are without lower-casing, stripping of version
+ numbers, etc. Does this make sense?
diff --git a/sys/fs/cd9660/cd9660_bmap.c b/sys/fs/cd9660/cd9660_bmap.c
new file mode 100644
index 0000000..911eedf
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_bmap.c
@@ -0,0 +1,102 @@
+/*-
+ * Copyright (c) 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cd9660_bmap.c 8.3 (Berkeley) 1/23/94
+ */
+
+#include <sys/param.h>
+#include <sys/namei.h>
+#include <sys/buf.h>
+#include <sys/file.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+
+/*
+ * Bmap converts a the logical block number of a file to its physical block
+ * number on the disk. The conversion is done by using the logical block
+ * number to index into the data block (extent) for the file.
+ */
+int
+cd9660_bmap(ap)
+ struct vop_bmap_args /* {
+ struct vnode *a_vp;
+ daddr_t a_bn;
+ struct vnode **a_vpp;
+ daddr_t *a_bnp;
+ int *a_runp;
+ } */ *ap;
+{
+ struct iso_node *ip = VTOI(ap->a_vp);
+ daddr_t lblkno = ap->a_bn;
+ long bsize;
+
+ /*
+ * Check for underlying vnode requests and ensure that logical
+ * to physical mapping is requested.
+ */
+ if (ap->a_vpp != NULL)
+ *ap->a_vpp = ip->i_devvp;
+ if (ap->a_bnp == NULL)
+ return (0);
+
+ /*
+ * Compute the requested block number
+ */
+ bsize = ip->i_mnt->logical_block_size;
+ *ap->a_bnp = (ip->iso_start + lblkno) * btodb(bsize);
+
+ /*
+ * Determine maximum number of readahead blocks following the
+ * requested block.
+ */
+ if (ap->a_runp) {
+ int nblk;
+
+ nblk = (ip->i_size - (lblkno + 1) * bsize) / bsize;
+ if (nblk <= 0)
+ *ap->a_runp = 0;
+ else if (nblk >= MAXBSIZE/bsize)
+ *ap->a_runp = MAXBSIZE/bsize - 1;
+ else
+ *ap->a_runp = nblk;
+ }
+
+ return 0;
+}
diff --git a/sys/fs/cd9660/cd9660_lookup.c b/sys/fs/cd9660/cd9660_lookup.c
new file mode 100644
index 0000000..62d1d3f
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_lookup.c
@@ -0,0 +1,465 @@
+/*-
+ * Copyright (c) 1989, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)ufs_lookup.c 7.33 (Berkeley) 5/19/91
+ *
+ * @(#)cd9660_lookup.c 8.2 (Berkeley) 1/23/94
+ */
+
+#include <sys/param.h>
+#include <sys/namei.h>
+#include <sys/buf.h>
+#include <sys/file.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+#include <isofs/cd9660/iso_rrip.h>
+#include <isofs/cd9660/cd9660_rrip.h>
+
+struct nchstats iso_nchstats;
+
+/*
+ * Convert a component of a pathname into a pointer to a locked inode.
+ * This is a very central and rather complicated routine.
+ * If the file system is not maintained in a strict tree hierarchy,
+ * this can result in a deadlock situation (see comments in code below).
+ *
+ * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on
+ * whether the name is to be looked up, created, renamed, or deleted.
+ * When CREATE, RENAME, or DELETE is specified, information usable in
+ * creating, renaming, or deleting a directory entry may be calculated.
+ * If flag has LOCKPARENT or'ed into it and the target of the pathname
+ * exists, lookup returns both the target and its parent directory locked.
+ * When creating or renaming and LOCKPARENT is specified, the target may
+ * not be ".". When deleting and LOCKPARENT is specified, the target may
+ * be "."., but the caller must check to ensure it does an vrele and iput
+ * instead of two iputs.
+ *
+ * Overall outline of ufs_lookup:
+ *
+ * check accessibility of directory
+ * look for name in cache, if found, then if at end of path
+ * and deleting or creating, drop it, else return name
+ * search for name in directory, to found or notfound
+ * notfound:
+ * if creating, return locked directory, leaving info on available slots
+ * else return error
+ * found:
+ * if at end of path and deleting, return information to allow delete
+ * if at end of path and rewriting (RENAME and LOCKPARENT), lock target
+ * inode and return info to allow rewrite
+ * if not at end, add name to cache; if at end and neither creating
+ * nor deleting, add name to cache
+ *
+ * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode unlocked.
+ */
+cd9660_lookup(ap)
+ struct vop_lookup_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ register struct vnode *vdp; /* vnode for directory being searched */
+ register struct iso_node *dp; /* inode for directory being searched */
+ register struct iso_mnt *imp; /* file system that directory is in */
+ struct buf *bp; /* a buffer of directory entries */
+ struct iso_directory_record *ep;/* the current directory entry */
+ int entryoffsetinblock; /* offset of ep in bp's buffer */
+ int saveoffset; /* offset of last directory entry in dir */
+ int numdirpasses; /* strategy for directory search */
+ doff_t endsearch; /* offset to end directory search */
+ struct iso_node *pdp; /* saved dp during symlink work */
+ struct iso_node *tdp; /* returned by iget */
+ int lockparent; /* 1 => lockparent flag is set */
+ int wantparent; /* 1 => wantparent or lockparent flag */
+ int error;
+ ino_t ino = 0;
+ int reclen;
+ u_short namelen;
+ char altname[NAME_MAX];
+ int res;
+ int assoc, len;
+ char *name;
+ struct vnode **vpp = ap->a_vpp;
+ struct componentname *cnp = ap->a_cnp;
+ struct ucred *cred = cnp->cn_cred;
+ int flags = cnp->cn_flags;
+ int nameiop = cnp->cn_nameiop;
+
+ bp = NULL;
+ *vpp = NULL;
+ vdp = ap->a_dvp;
+ dp = VTOI(vdp);
+ imp = dp->i_mnt;
+ lockparent = flags & LOCKPARENT;
+ wantparent = flags & (LOCKPARENT|WANTPARENT);
+
+ /*
+ * Check accessiblity of directory.
+ */
+ if (vdp->v_type != VDIR)
+ return (ENOTDIR);
+ if (error = VOP_ACCESS(vdp, VEXEC, cred, cnp->cn_proc))
+ return (error);
+
+ /*
+ * We now have a segment name to search for, and a directory to search.
+ *
+ * Before tediously performing a linear scan of the directory,
+ * check the name cache to see if the directory/name pair
+ * we are looking for is known already.
+ */
+ if (error = cache_lookup(vdp, vpp, cnp)) {
+ int vpid; /* capability number of vnode */
+
+ if (error == ENOENT)
+ return (error);
+#ifdef PARANOID
+ if ((vdp->v_flag & VROOT) && (flags & ISDOTDOT))
+ panic("ufs_lookup: .. through root");
+#endif
+ /*
+ * Get the next vnode in the path.
+ * See comment below starting `Step through' for
+ * an explaination of the locking protocol.
+ */
+ pdp = dp;
+ dp = VTOI(*vpp);
+ vdp = *vpp;
+ vpid = vdp->v_id;
+ if (pdp == dp) {
+ VREF(vdp);
+ error = 0;
+ } else if (flags & ISDOTDOT) {
+ ISO_IUNLOCK(pdp);
+ error = vget(vdp, 1);
+ if (!error && lockparent && (flags & ISLASTCN))
+ ISO_ILOCK(pdp);
+ } else {
+ error = vget(vdp, 1);
+ if (!lockparent || error || !(flags & ISLASTCN))
+ ISO_IUNLOCK(pdp);
+ }
+ /*
+ * Check that the capability number did not change
+ * while we were waiting for the lock.
+ */
+ if (!error) {
+ if (vpid == vdp->v_id)
+ return (0);
+ iso_iput(dp);
+ if (lockparent && pdp != dp && (flags & ISLASTCN))
+ ISO_IUNLOCK(pdp);
+ }
+ ISO_ILOCK(pdp);
+ dp = pdp;
+ vdp = ITOV(dp);
+ *vpp = NULL;
+ }
+
+ len = cnp->cn_namelen;
+ name = cnp->cn_nameptr;
+ /*
+ * A leading `=' means, we are looking for an associated file
+ */
+ if (assoc = (imp->iso_ftype != ISO_FTYPE_RRIP && *name == ASSOCCHAR)) {
+ len--;
+ name++;
+ }
+
+ /*
+ * If there is cached information on a previous search of
+ * this directory, pick up where we last left off.
+ * We cache only lookups as these are the most common
+ * and have the greatest payoff. Caching CREATE has little
+ * benefit as it usually must search the entire directory
+ * to determine that the entry does not exist. Caching the
+ * location of the last DELETE or RENAME has not reduced
+ * profiling time and hence has been removed in the interest
+ * of simplicity.
+ */
+ if (nameiop != LOOKUP || dp->i_diroff == 0 ||
+ dp->i_diroff > dp->i_size) {
+ entryoffsetinblock = 0;
+ dp->i_offset = 0;
+ numdirpasses = 1;
+ } else {
+ dp->i_offset = dp->i_diroff;
+ entryoffsetinblock = iso_blkoff(imp, dp->i_offset);
+ if (entryoffsetinblock != 0) {
+ if (error = iso_blkatoff(dp, dp->i_offset, &bp))
+ return (error);
+ }
+ numdirpasses = 2;
+ iso_nchstats.ncs_2passes++;
+ }
+ endsearch = roundup(dp->i_size, imp->logical_block_size);
+
+searchloop:
+ while (dp->i_offset < endsearch) {
+ /*
+ * If offset is on a block boundary,
+ * read the next directory block.
+ * Release previous if it exists.
+ */
+ if (iso_blkoff(imp, dp->i_offset) == 0) {
+ if (bp != NULL)
+ brelse(bp);
+ if (error = iso_blkatoff(dp, dp->i_offset, &bp))
+ return (error);
+ entryoffsetinblock = 0;
+ }
+ /*
+ * Get pointer to next entry.
+ */
+ ep = (struct iso_directory_record *)
+ (bp->b_un.b_addr + entryoffsetinblock);
+
+ reclen = isonum_711 (ep->length);
+ if (reclen == 0) {
+ /* skip to next block, if any */
+ dp->i_offset =
+ roundup(dp->i_offset, imp->logical_block_size);
+ continue;
+ }
+
+ if (reclen < ISO_DIRECTORY_RECORD_SIZE)
+ /* illegal entry, stop */
+ break;
+
+ if (entryoffsetinblock + reclen > imp->logical_block_size)
+ /* entries are not allowed to cross boundaries */
+ break;
+
+ /*
+ * Check for a name match.
+ */
+ namelen = isonum_711(ep->name_len);
+
+ if (reclen < ISO_DIRECTORY_RECORD_SIZE + namelen)
+ /* illegal entry, stop */
+ break;
+
+ switch (imp->iso_ftype) {
+ default:
+ if ((!(isonum_711(ep->flags)&4)) == !assoc) {
+ if ((len == 1
+ && *name == '.')
+ || (flags & ISDOTDOT)) {
+ if (namelen == 1
+ && ep->name[0] == ((flags & ISDOTDOT) ? 1 : 0)) {
+ /*
+ * Save directory entry's inode number and
+ * reclen in ndp->ni_ufs area, and release
+ * directory buffer.
+ */
+ isodirino(&dp->i_ino,ep,imp);
+ goto found;
+ }
+ if (namelen != 1
+ || ep->name[0] != 0)
+ goto notfound;
+ } else if (!(res = isofncmp(name,len,
+ ep->name,namelen))) {
+ if (isonum_711(ep->flags)&2)
+ isodirino(&ino,ep,imp);
+ else
+ ino = dbtob(bp->b_blkno)
+ + entryoffsetinblock;
+ saveoffset = dp->i_offset;
+ } else if (ino)
+ goto foundino;
+#ifdef NOSORTBUG /* On some CDs directory entries are not sorted correctly */
+ else if (res < 0)
+ goto notfound;
+ else if (res > 0 && numdirpasses == 2)
+ numdirpasses++;
+#endif
+ }
+ break;
+ case ISO_FTYPE_RRIP:
+ if (isonum_711(ep->flags)&2)
+ isodirino(&ino,ep,imp);
+ else
+ ino = dbtob(bp->b_blkno) + entryoffsetinblock;
+ dp->i_ino = ino;
+ cd9660_rrip_getname(ep,altname,&namelen,&dp->i_ino,imp);
+ if (namelen == cnp->cn_namelen
+ && !bcmp(name,altname,namelen))
+ goto found;
+ ino = 0;
+ break;
+ }
+ dp->i_offset += reclen;
+ entryoffsetinblock += reclen;
+ }
+ if (ino) {
+foundino:
+ dp->i_ino = ino;
+ if (saveoffset != dp->i_offset) {
+ if (iso_lblkno(imp,dp->i_offset)
+ != iso_lblkno(imp,saveoffset)) {
+ if (bp != NULL)
+ brelse(bp);
+ if (error = iso_blkatoff(dp, saveoffset, &bp))
+ return (error);
+ }
+ ep = (struct iso_directory_record *)(bp->b_un.b_addr
+ + iso_blkoff(imp,saveoffset));
+ dp->i_offset = saveoffset;
+ }
+ goto found;
+ }
+notfound:
+ /*
+ * If we started in the middle of the directory and failed
+ * to find our target, we must check the beginning as well.
+ */
+ if (numdirpasses == 2) {
+ numdirpasses--;
+ dp->i_offset = 0;
+ endsearch = dp->i_diroff;
+ goto searchloop;
+ }
+ if (bp != NULL)
+ brelse(bp);
+ /*
+ * Insert name into cache (as non-existent) if appropriate.
+ */
+ if (cnp->cn_flags & MAKEENTRY)
+ cache_enter(vdp, *vpp, cnp);
+ if (nameiop == CREATE || nameiop == RENAME)
+ return (EJUSTRETURN);
+ return (ENOENT);
+
+found:
+ if (numdirpasses == 2)
+ iso_nchstats.ncs_pass2++;
+ if (bp != NULL)
+ brelse(bp);
+
+ /*
+ * Found component in pathname.
+ * If the final component of path name, save information
+ * in the cache as to where the entry was found.
+ */
+ if ((flags & ISLASTCN) && nameiop == LOOKUP)
+ dp->i_diroff = dp->i_offset;
+
+ /*
+ * Step through the translation in the name. We do not `iput' the
+ * directory because we may need it again if a symbolic link
+ * is relative to the current directory. Instead we save it
+ * unlocked as "pdp". We must get the target inode before unlocking
+ * the directory to insure that the inode will not be removed
+ * before we get it. We prevent deadlock by always fetching
+ * inodes from the root, moving down the directory tree. Thus
+ * when following backward pointers ".." we must unlock the
+ * parent directory before getting the requested directory.
+ * There is a potential race condition here if both the current
+ * and parent directories are removed before the `iget' for the
+ * inode associated with ".." returns. We hope that this occurs
+ * infrequently since we cannot avoid this race condition without
+ * implementing a sophisticated deadlock detection algorithm.
+ * Note also that this simple deadlock detection scheme will not
+ * work if the file system has any hard links other than ".."
+ * that point backwards in the directory structure.
+ */
+ pdp = dp;
+ /*
+ * If ino is different from dp->i_ino,
+ * it's a relocated directory.
+ */
+ if (flags & ISDOTDOT) {
+ ISO_IUNLOCK(pdp); /* race to get the inode */
+ if (error = iso_iget(dp,dp->i_ino,
+ dp->i_ino != ino,
+ &tdp,ep)) {
+ ISO_ILOCK(pdp);
+ return (error);
+ }
+ if (lockparent && (flags & ISLASTCN))
+ ISO_ILOCK(pdp);
+ *vpp = ITOV(tdp);
+ } else if (dp->i_number == dp->i_ino) {
+ VREF(vdp); /* we want ourself, ie "." */
+ *vpp = vdp;
+ } else {
+ if (error = iso_iget(dp,dp->i_ino,dp->i_ino!=ino,&tdp,ep))
+ return (error);
+ if (!lockparent || !(flags & ISLASTCN))
+ ISO_IUNLOCK(pdp);
+ *vpp = ITOV(tdp);
+ }
+
+ /*
+ * Insert name into cache if appropriate.
+ */
+ if (cnp->cn_flags & MAKEENTRY)
+ cache_enter(vdp, *vpp, cnp);
+ return (0);
+}
+
+/*
+ * Return buffer with contents of block "offset"
+ * from the beginning of directory "ip". If "res"
+ * is non-zero, fill it in with a pointer to the
+ * remaining space in the directory.
+ */
+iso_blkatoff(ip, offset, bpp)
+ struct iso_node *ip;
+ doff_t offset;
+ struct buf **bpp;
+{
+ register struct iso_mnt *imp = ip->i_mnt;
+ daddr_t lbn = iso_lblkno(imp,offset);
+ int bsize = iso_blksize(imp,ip,lbn);
+ struct buf *bp;
+ int error;
+
+ if (error = bread(ITOV(ip),lbn,bsize,NOCRED,&bp)) {
+ brelse(bp);
+ *bpp = 0;
+ return (error);
+ }
+ *bpp = bp;
+
+ return (0);
+}
diff --git a/sys/fs/cd9660/cd9660_node.c b/sys/fs/cd9660/cd9660_node.c
new file mode 100644
index 0000000..d83a7a6
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_node.c
@@ -0,0 +1,648 @@
+/*-
+ * Copyright (c) 1982, 1986, 1989, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cd9660_node.c 8.2 (Berkeley) 1/23/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/stat.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+#include <isofs/cd9660/iso_rrip.h>
+
+#define INOHSZ 512
+#if ((INOHSZ&(INOHSZ-1)) == 0)
+#define INOHASH(dev,ino) (((dev)+((ino)>>12))&(INOHSZ-1))
+#else
+#define INOHASH(dev,ino) (((unsigned)((dev)+((ino)>>12)))%INOHSZ)
+#endif
+
+union iso_ihead {
+ union iso_ihead *ih_head[2];
+ struct iso_node *ih_chain[2];
+} iso_ihead[INOHSZ];
+
+#ifdef ISODEVMAP
+#define DNOHSZ 64
+#if ((DNOHSZ&(DNOHSZ-1)) == 0)
+#define DNOHASH(dev,ino) (((dev)+((ino)>>12))&(DNOHSZ-1))
+#else
+#define DNOHASH(dev,ino) (((unsigned)((dev)+((ino)>>12)))%DNOHSZ)
+#endif
+
+union iso_dhead {
+ union iso_dhead *dh_head[2];
+ struct iso_dnode *dh_chain[2];
+} iso_dhead[DNOHSZ];
+#endif
+
+int prtactive; /* 1 => print out reclaim of active vnodes */
+
+/*
+ * Initialize hash links for inodes and dnodes.
+ */
+cd9660_init()
+{
+ register int i;
+ register union iso_ihead *ih = iso_ihead;
+#ifdef ISODEVMAP
+ register union iso_dhead *dh = iso_dhead;
+#endif
+
+ for (i = INOHSZ; --i >= 0; ih++) {
+ ih->ih_head[0] = ih;
+ ih->ih_head[1] = ih;
+ }
+#ifdef ISODEVMAP
+ for (i = DNOHSZ; --i >= 0; dh++) {
+ dh->dh_head[0] = dh;
+ dh->dh_head[1] = dh;
+ }
+#endif
+}
+
+#ifdef ISODEVMAP
+/*
+ * Enter a new node into the device hash list
+ */
+struct iso_dnode *
+iso_dmap(dev,ino,create)
+ dev_t dev;
+ ino_t ino;
+ int create;
+{
+ struct iso_dnode *dp;
+ union iso_dhead *dh;
+
+ dh = &iso_dhead[DNOHASH(dev, ino)];
+ for (dp = dh->dh_chain[0];
+ dp != (struct iso_dnode *)dh;
+ dp = dp->d_forw)
+ if (ino == dp->i_number && dev == dp->i_dev)
+ return dp;
+
+ if (!create)
+ return (struct iso_dnode *)0;
+
+ MALLOC(dp,struct iso_dnode *,sizeof(struct iso_dnode),M_CACHE,M_WAITOK);
+ dp->i_dev = dev;
+ dp->i_number = ino;
+ insque(dp,dh);
+
+ return dp;
+}
+
+void
+iso_dunmap(dev)
+ dev_t dev;
+{
+ struct iso_dnode *dp, *dq;
+ union iso_dhead *dh;
+
+ for (dh = iso_dhead; dh < iso_dhead + DNOHSZ; dh++) {
+ for (dp = dh->dh_chain[0];
+ dp != (struct iso_dnode *)dh;
+ dp = dq) {
+ dq = dp->d_forw;
+ if (dev == dp->i_dev) {
+ remque(dp);
+ FREE(dp,M_CACHE);
+ }
+ }
+ }
+}
+#endif
+
+/*
+ * Look up a ISOFS dinode number to find its incore vnode.
+ * If it is not in core, read it in from the specified device.
+ * If it is in core, wait for the lock bit to clear, then
+ * return the inode locked. Detection and handling of mount
+ * points must be done by the calling routine.
+ */
+iso_iget(xp, ino, relocated, ipp, isodir)
+ struct iso_node *xp;
+ ino_t ino;
+ struct iso_node **ipp;
+ struct iso_directory_record *isodir;
+{
+ dev_t dev = xp->i_dev;
+ struct mount *mntp = ITOV(xp)->v_mount;
+ register struct iso_node *ip, *iq;
+ register struct vnode *vp;
+ register struct iso_dnode *dp;
+ struct vnode *nvp;
+ struct buf *bp = NULL, *bp2 = NULL;
+ union iso_ihead *ih;
+ union iso_dhead *dh;
+ int i, error, result;
+ struct iso_mnt *imp;
+ ino_t defino;
+
+ ih = &iso_ihead[INOHASH(dev, ino)];
+loop:
+ for (ip = ih->ih_chain[0];
+ ip != (struct iso_node *)ih;
+ ip = ip->i_forw) {
+ if (ino != ip->i_number || dev != ip->i_dev)
+ continue;
+ if ((ip->i_flag&ILOCKED) != 0) {
+ ip->i_flag |= IWANT;
+ sleep((caddr_t)ip, PINOD);
+ goto loop;
+ }
+ if (vget(ITOV(ip), 1))
+ goto loop;
+ *ipp = ip;
+ return 0;
+ }
+ /*
+ * Allocate a new vnode/iso_node.
+ */
+ if (error = getnewvnode(VT_ISOFS, mntp, cd9660_vnodeop_p, &nvp)) {
+ *ipp = 0;
+ return error;
+ }
+ MALLOC(ip, struct iso_node *, sizeof(struct iso_node),
+ M_ISOFSNODE, M_WAITOK);
+ bzero((caddr_t)ip, sizeof(struct iso_node));
+ nvp->v_data = ip;
+ ip->i_vnode = nvp;
+ ip->i_flag = 0;
+ ip->i_devvp = 0;
+ ip->i_diroff = 0;
+ ip->i_lockf = 0;
+
+ /*
+ * Put it onto its hash chain and lock it so that other requests for
+ * this inode will block if they arrive while we are sleeping waiting
+ * for old data structures to be purged or for the contents of the
+ * disk portion of this inode to be read.
+ */
+ ip->i_dev = dev;
+ ip->i_number = ino;
+ insque(ip, ih);
+ ISO_ILOCK(ip);
+
+ imp = VFSTOISOFS (mntp);
+ ip->i_mnt = imp;
+ ip->i_devvp = imp->im_devvp;
+ VREF(ip->i_devvp);
+
+ if (relocated) {
+ /*
+ * On relocated directories we must
+ * read the `.' entry out of a dir.
+ */
+ ip->iso_start = ino >> imp->im_bshift;
+ if (error = iso_blkatoff(ip,0,&bp)) {
+ vrele(ip->i_devvp);
+ remque(ip);
+ ip->i_forw = ip;
+ ip->i_back = ip;
+ iso_iput(ip);
+ *ipp = 0;
+ return error;
+ }
+ isodir = (struct iso_directory_record *)bp->b_un.b_addr;
+ }
+
+ ip->iso_extent = isonum_733(isodir->extent);
+ ip->i_size = isonum_733(isodir->size);
+ ip->iso_start = isonum_711(isodir->ext_attr_length) + ip->iso_extent;
+
+ vp = ITOV(ip);
+
+ /*
+ * Setup time stamp, attribute
+ */
+ vp->v_type = VNON;
+ switch (imp->iso_ftype) {
+ default: /* ISO_FTYPE_9660 */
+ if ((imp->im_flags&ISOFSMNT_EXTATT)
+ && isonum_711(isodir->ext_attr_length))
+ iso_blkatoff(ip,-isonum_711(isodir->ext_attr_length),
+ &bp2);
+ cd9660_defattr(isodir,ip,bp2 );
+ cd9660_deftstamp(isodir,ip,bp2 );
+ break;
+ case ISO_FTYPE_RRIP:
+ result = cd9660_rrip_analyze(isodir,ip,imp);
+ break;
+ }
+ if (bp2)
+ brelse(bp2);
+ if (bp)
+ brelse(bp);
+
+ /*
+ * Initialize the associated vnode
+ */
+ vp->v_type = IFTOVT(ip->inode.iso_mode);
+
+ if ( vp->v_type == VFIFO ) {
+#ifdef FIFO
+ extern int (**cd9660_fifoop_p)();
+ vp->v_op = cd9660_fifoop_p;
+#else
+ iso_iput(ip);
+ *ipp = 0;
+ return EOPNOTSUPP;
+#endif /* FIFO */
+ } else if ( vp->v_type == VCHR || vp->v_type == VBLK ) {
+ extern int (**cd9660_specop_p)();
+
+ /*
+ * if device, look at device number table for translation
+ */
+#ifdef ISODEVMAP
+ if (dp = iso_dmap(dev,ino,0))
+ ip->inode.iso_rdev = dp->d_dev;
+#endif
+ vp->v_op = cd9660_specop_p;
+ if (nvp = checkalias(vp, ip->inode.iso_rdev, mntp)) {
+ /*
+ * Reinitialize aliased inode.
+ */
+ vp = nvp;
+ iq = VTOI(vp);
+ iq->i_vnode = vp;
+ iq->i_flag = 0;
+ ISO_ILOCK(iq);
+ iq->i_dev = dev;
+ iq->i_number = ino;
+ iq->i_mnt = ip->i_mnt;
+ bcopy(&ip->iso_extent,&iq->iso_extent,
+ (char *)(ip + 1) - (char *)&ip->iso_extent);
+ insque(iq, ih);
+ /*
+ * Discard unneeded vnode
+ * (This introduces the need of INACTIVE modification)
+ */
+ ip->inode.iso_mode = 0;
+ iso_iput(ip);
+ ip = iq;
+ }
+ }
+
+ if (ip->iso_extent == imp->root_extent)
+ vp->v_flag |= VROOT;
+
+ *ipp = ip;
+ return 0;
+}
+
+/*
+ * Unlock and decrement the reference count of an inode structure.
+ */
+iso_iput(ip)
+ register struct iso_node *ip;
+{
+
+ if ((ip->i_flag & ILOCKED) == 0)
+ panic("iso_iput");
+ ISO_IUNLOCK(ip);
+ vrele(ITOV(ip));
+}
+
+/*
+ * Last reference to an inode, write the inode out and if necessary,
+ * truncate and deallocate the file.
+ */
+int
+cd9660_inactive(ap)
+ struct vop_inactive_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ register struct iso_node *ip = VTOI(vp);
+ int mode, error = 0;
+
+ if (prtactive && vp->v_usecount != 0)
+ vprint("cd9660_inactive: pushing active", vp);
+
+ ip->i_flag = 0;
+ /*
+ * If we are done with the inode, reclaim it
+ * so that it can be reused immediately.
+ */
+ if (vp->v_usecount == 0 && ip->inode.iso_mode == 0)
+ vgone(vp);
+ return error;
+}
+
+/*
+ * Reclaim an inode so that it can be used for other purposes.
+ */
+int
+cd9660_reclaim(ap)
+ struct vop_reclaim_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct iso_node *ip = VTOI(vp);
+ int i;
+
+ if (prtactive && vp->v_usecount != 0)
+ vprint("cd9660_reclaim: pushing active", vp);
+ /*
+ * Remove the inode from its hash chain.
+ */
+ remque(ip);
+ ip->i_forw = ip;
+ ip->i_back = ip;
+ /*
+ * Purge old data structures associated with the inode.
+ */
+ cache_purge(vp);
+ if (ip->i_devvp) {
+ vrele(ip->i_devvp);
+ ip->i_devvp = 0;
+ }
+ FREE(vp->v_data, M_ISOFSNODE);
+ vp->v_data = NULL;
+ return 0;
+}
+
+/*
+ * Lock an inode. If its already locked, set the WANT bit and sleep.
+ */
+iso_ilock(ip)
+ register struct iso_node *ip;
+{
+
+ while (ip->i_flag & ILOCKED) {
+ ip->i_flag |= IWANT;
+ if (ip->i_spare0 == curproc->p_pid)
+ panic("locking against myself");
+ ip->i_spare1 = curproc->p_pid;
+ (void) sleep((caddr_t)ip, PINOD);
+ }
+ ip->i_spare1 = 0;
+ ip->i_spare0 = curproc->p_pid;
+ ip->i_flag |= ILOCKED;
+}
+
+/*
+ * Unlock an inode. If WANT bit is on, wakeup.
+ */
+iso_iunlock(ip)
+ register struct iso_node *ip;
+{
+
+ if ((ip->i_flag & ILOCKED) == 0)
+ vprint("iso_iunlock: unlocked inode", ITOV(ip));
+ ip->i_spare0 = 0;
+ ip->i_flag &= ~ILOCKED;
+ if (ip->i_flag&IWANT) {
+ ip->i_flag &= ~IWANT;
+ wakeup((caddr_t)ip);
+ }
+}
+
+/*
+ * File attributes
+ */
+void
+cd9660_defattr(isodir,inop,bp)
+ struct iso_directory_record *isodir;
+ struct iso_node *inop;
+ struct buf *bp;
+{
+ struct buf *bp2 = NULL;
+ struct iso_mnt *imp;
+ struct iso_extended_attributes *ap = NULL;
+ int off;
+
+ if (isonum_711(isodir->flags)&2) {
+ inop->inode.iso_mode = S_IFDIR;
+ /*
+ * If we return 2, fts() will assume there are no subdirectories
+ * (just links for the path and .), so instead we return 1.
+ */
+ inop->inode.iso_links = 1;
+ } else {
+ inop->inode.iso_mode = S_IFREG;
+ inop->inode.iso_links = 1;
+ }
+ if (!bp
+ && ((imp = inop->i_mnt)->im_flags&ISOFSMNT_EXTATT)
+ && (off = isonum_711(isodir->ext_attr_length))) {
+ iso_blkatoff(inop,-off * imp->logical_block_size,&bp2);
+ bp = bp2;
+ }
+ if (bp) {
+ ap = (struct iso_extended_attributes *)bp->b_un.b_addr;
+
+ if (isonum_711(ap->version) == 1) {
+ if (!(ap->perm[0]&0x40))
+ inop->inode.iso_mode |= VEXEC >> 6;
+ if (!(ap->perm[0]&0x10))
+ inop->inode.iso_mode |= VREAD >> 6;
+ if (!(ap->perm[0]&4))
+ inop->inode.iso_mode |= VEXEC >> 3;
+ if (!(ap->perm[0]&1))
+ inop->inode.iso_mode |= VREAD >> 3;
+ if (!(ap->perm[1]&0x40))
+ inop->inode.iso_mode |= VEXEC;
+ if (!(ap->perm[1]&0x10))
+ inop->inode.iso_mode |= VREAD;
+ inop->inode.iso_uid = isonum_723(ap->owner); /* what about 0? */
+ inop->inode.iso_gid = isonum_723(ap->group); /* what about 0? */
+ } else
+ ap = NULL;
+ }
+ if (!ap) {
+ inop->inode.iso_mode |= VREAD|VEXEC|(VREAD|VEXEC)>>3|(VREAD|VEXEC)>>6;
+ inop->inode.iso_uid = (uid_t)0;
+ inop->inode.iso_gid = (gid_t)0;
+ }
+ if (bp2)
+ brelse(bp2);
+}
+
+/*
+ * Time stamps
+ */
+void
+cd9660_deftstamp(isodir,inop,bp)
+ struct iso_directory_record *isodir;
+ struct iso_node *inop;
+ struct buf *bp;
+{
+ struct buf *bp2 = NULL;
+ struct iso_mnt *imp;
+ struct iso_extended_attributes *ap = NULL;
+ int off;
+
+ if (!bp
+ && ((imp = inop->i_mnt)->im_flags&ISOFSMNT_EXTATT)
+ && (off = isonum_711(isodir->ext_attr_length))) {
+ iso_blkatoff(inop,-off * imp->logical_block_size,&bp2);
+ bp = bp2;
+ }
+ if (bp) {
+ ap = (struct iso_extended_attributes *)bp->b_un.b_addr;
+
+ if (isonum_711(ap->version) == 1) {
+ if (!cd9660_tstamp_conv17(ap->ftime,&inop->inode.iso_atime))
+ cd9660_tstamp_conv17(ap->ctime,&inop->inode.iso_atime);
+ if (!cd9660_tstamp_conv17(ap->ctime,&inop->inode.iso_ctime))
+ inop->inode.iso_ctime = inop->inode.iso_atime;
+ if (!cd9660_tstamp_conv17(ap->mtime,&inop->inode.iso_mtime))
+ inop->inode.iso_mtime = inop->inode.iso_ctime;
+ } else
+ ap = NULL;
+ }
+ if (!ap) {
+ cd9660_tstamp_conv7(isodir->date,&inop->inode.iso_ctime);
+ inop->inode.iso_atime = inop->inode.iso_ctime;
+ inop->inode.iso_mtime = inop->inode.iso_ctime;
+ }
+ if (bp2)
+ brelse(bp2);
+}
+
+int
+cd9660_tstamp_conv7(pi,pu)
+char *pi;
+struct timeval *pu;
+{
+ int i;
+ int crtime, days;
+ int y, m, d, hour, minute, second, tz;
+
+ y = pi[0] + 1900;
+ m = pi[1];
+ d = pi[2];
+ hour = pi[3];
+ minute = pi[4];
+ second = pi[5];
+ tz = pi[6];
+
+ if (y < 1970) {
+ pu->tv_sec = 0;
+ pu->tv_usec = 0;
+ return 0;
+ } else {
+#ifdef ORIGINAL
+ /* computes day number relative to Sept. 19th,1989 */
+ /* don't even *THINK* about changing formula. It works! */
+ days = 367*(y-1980)-7*(y+(m+9)/12)/4-3*((y+(m-9)/7)/100+1)/4+275*m/9+d-100;
+#else
+ /*
+ * Changed :-) to make it relative to Jan. 1st, 1970
+ * and to disambiguate negative division
+ */
+ days = 367*(y-1960)-7*(y+(m+9)/12)/4-3*((y+(m+9)/12-1)/100+1)/4+275*m/9+d-239;
+#endif
+ crtime = ((((days * 24) + hour) * 60 + minute) * 60) + second;
+
+ /* timezone offset is unreliable on some disks */
+ if (-48 <= tz && tz <= 52)
+ crtime += tz * 15 * 60;
+ }
+ pu->tv_sec = crtime;
+ pu->tv_usec = 0;
+ return 1;
+}
+
+static unsigned
+cd9660_chars2ui(begin,len)
+ unsigned char *begin;
+ int len;
+{
+ unsigned rc;
+
+ for (rc = 0; --len >= 0;) {
+ rc *= 10;
+ rc += *begin++ - '0';
+ }
+ return rc;
+}
+
+int
+cd9660_tstamp_conv17(pi,pu)
+ unsigned char *pi;
+ struct timeval *pu;
+{
+ unsigned char buf[7];
+
+ /* year:"0001"-"9999" -> -1900 */
+ buf[0] = cd9660_chars2ui(pi,4) - 1900;
+
+ /* month: " 1"-"12" -> 1 - 12 */
+ buf[1] = cd9660_chars2ui(pi + 4,2);
+
+ /* day: " 1"-"31" -> 1 - 31 */
+ buf[2] = cd9660_chars2ui(pi + 6,2);
+
+ /* hour: " 0"-"23" -> 0 - 23 */
+ buf[3] = cd9660_chars2ui(pi + 8,2);
+
+ /* minute:" 0"-"59" -> 0 - 59 */
+ buf[4] = cd9660_chars2ui(pi + 10,2);
+
+ /* second:" 0"-"59" -> 0 - 59 */
+ buf[5] = cd9660_chars2ui(pi + 12,2);
+
+ /* difference of GMT */
+ buf[6] = pi[16];
+
+ return cd9660_tstamp_conv7(buf,pu);
+}
+
+void
+isodirino(inump,isodir,imp)
+ ino_t *inump;
+ struct iso_directory_record *isodir;
+ struct iso_mnt *imp;
+{
+ *inump = (isonum_733(isodir->extent) + isonum_711(isodir->ext_attr_length))
+ * imp->logical_block_size;
+}
diff --git a/sys/fs/cd9660/cd9660_node.h b/sys/fs/cd9660/cd9660_node.h
new file mode 100644
index 0000000..45de67f
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_node.h
@@ -0,0 +1,143 @@
+/*-
+ * Copyright (c) 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cd9660_node.h 8.2 (Berkeley) 1/23/94
+ */
+
+/*
+ * Theoretically, directories can be more than 2Gb in length,
+ * however, in practice this seems unlikely. So, we define
+ * the type doff_t as a long to keep down the cost of doing
+ * lookup on a 32-bit machine. If you are porting to a 64-bit
+ * architecture, you should make doff_t the same as off_t.
+ */
+#define doff_t long
+
+typedef struct {
+ struct timespec iso_atime; /* time of last access */
+ struct timespec iso_mtime; /* time of last modification */
+ struct timespec iso_ctime; /* time file changed */
+ u_short iso_mode; /* files access mode and type */
+ uid_t iso_uid; /* owner user id */
+ gid_t iso_gid; /* owner group id */
+ short iso_links; /* links of file */
+ dev_t iso_rdev; /* Major/Minor number for special */
+} ISO_RRIP_INODE;
+
+#ifdef ISODEVMAP
+/*
+ * FOr device# (major,minor) translation table
+ */
+struct iso_dnode {
+ struct iso_dnode *d_chain[2]; /* hash chain, MUST be first */
+ dev_t i_dev; /* device where dnode resides */
+ ino_t i_number; /* the identity of the inode */
+ dev_t d_dev; /* device # for translation */
+};
+#define d_forw d_chain[0]
+#define d_back d_chain[1]
+#endif
+
+struct iso_node {
+ struct iso_node *i_chain[2]; /* hash chain, MUST be first */
+ struct vnode *i_vnode; /* vnode associated with this inode */
+ struct vnode *i_devvp; /* vnode for block I/O */
+ u_long i_flag; /* see below */
+ dev_t i_dev; /* device where inode resides */
+ ino_t i_number; /* the identity of the inode */
+ /* we use the actual starting block of the file */
+ struct iso_mnt *i_mnt; /* filesystem associated with this inode */
+ struct lockf *i_lockf; /* head of byte-level lock list */
+ doff_t i_endoff; /* end of useful stuff in directory */
+ doff_t i_diroff; /* offset in dir, where we found last entry */
+ doff_t i_offset; /* offset of free space in directory */
+ ino_t i_ino; /* inode number of found directory */
+ long i_spare0;
+ long i_spare1;
+
+ long iso_extent; /* extent of file */
+ long i_size;
+ long iso_start; /* actual start of data of file (may be different */
+ /* from iso_extent, if file has extended attributes) */
+ ISO_RRIP_INODE inode;
+};
+
+#define i_forw i_chain[0]
+#define i_back i_chain[1]
+
+/* flags */
+#define ILOCKED 0x0001 /* inode is locked */
+#define IWANT 0x0002 /* some process waiting on lock */
+#define IACC 0x0020 /* inode access time to be updated */
+
+#define VTOI(vp) ((struct iso_node *)(vp)->v_data)
+#define ITOV(ip) ((ip)->i_vnode)
+
+#define ISO_ILOCK(ip) iso_ilock(ip)
+#define ISO_IUNLOCK(ip) iso_iunlock(ip)
+
+/*
+ * Prototypes for ISOFS vnode operations
+ */
+int cd9660_lookup __P((struct vop_lookup_args *));
+int cd9660_open __P((struct vop_open_args *));
+int cd9660_close __P((struct vop_close_args *));
+int cd9660_access __P((struct vop_access_args *));
+int cd9660_getattr __P((struct vop_getattr_args *));
+int cd9660_read __P((struct vop_read_args *));
+int cd9660_ioctl __P((struct vop_ioctl_args *));
+int cd9660_select __P((struct vop_select_args *));
+int cd9660_mmap __P((struct vop_mmap_args *));
+int cd9660_seek __P((struct vop_seek_args *));
+int cd9660_readdir __P((struct vop_readdir_args *));
+int cd9660_abortop __P((struct vop_abortop_args *));
+int cd9660_inactive __P((struct vop_inactive_args *));
+int cd9660_reclaim __P((struct vop_reclaim_args *));
+int cd9660_bmap __P((struct vop_bmap_args *));
+int cd9660_lock __P((struct vop_lock_args *));
+int cd9660_unlock __P((struct vop_unlock_args *));
+int cd9660_strategy __P((struct vop_strategy_args *));
+int cd9660_print __P((struct vop_print_args *));
+int cd9660_islocked __P((struct vop_islocked_args *));
+void cd9660_defattr __P((struct iso_directory_record *,
+ struct iso_node *, struct buf *));
+void cd9660_deftstamp __P((struct iso_directory_record *,
+ struct iso_node *, struct buf *));
+#ifdef ISODEVMAP
+struct iso_dnode *iso_dmap __P((dev_t, ino_t, int));
+void iso_dunmap __P((dev_t));
+#endif
diff --git a/sys/fs/cd9660/cd9660_rrip.c b/sys/fs/cd9660/cd9660_rrip.c
new file mode 100644
index 0000000..0923fa0
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_rrip.c
@@ -0,0 +1,685 @@
+/*-
+ * Copyright (c) 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cd9660_rrip.c 8.2 (Berkeley) 1/23/94
+ */
+
+#include <sys/param.h>
+#include <sys/namei.h>
+#include <sys/buf.h>
+#include <sys/file.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include <sys/time.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+#include <isofs/cd9660/cd9660_rrip.h>
+#include <isofs/cd9660/iso_rrip.h>
+
+/*
+ * POSIX file attribute
+ */
+static int
+cd9660_rrip_attr(p,ana)
+ ISO_RRIP_ATTR *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ ana->inop->inode.iso_mode = isonum_731(p->mode_l);
+ ana->inop->inode.iso_uid = (uid_t)isonum_731(p->uid_l);
+ ana->inop->inode.iso_gid = (gid_t)isonum_731(p->gid_l);
+ ana->inop->inode.iso_links = isonum_731(p->links_l);
+ ana->fields &= ~ISO_SUSP_ATTR;
+ return ISO_SUSP_ATTR;
+}
+
+static void
+cd9660_rrip_defattr(isodir,ana)
+ struct iso_directory_record *isodir;
+ ISO_RRIP_ANALYZE *ana;
+{
+ /* But this is a required field! */
+ printf("RRIP without PX field?\n");
+ cd9660_defattr(isodir,ana->inop,NULL);
+}
+
+/*
+ * Symbolic Links
+ */
+static int
+cd9660_rrip_slink(p,ana)
+ ISO_RRIP_SLINK *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ register ISO_RRIP_SLINK_COMPONENT *pcomp;
+ register ISO_RRIP_SLINK_COMPONENT *pcompe;
+ int len, wlen, cont;
+ char *outbuf, *inbuf;
+
+ pcomp = (ISO_RRIP_SLINK_COMPONENT *)p->component;
+ pcompe = (ISO_RRIP_SLINK_COMPONENT *)((char *)p + isonum_711(p->h.length));
+ len = *ana->outlen;
+ outbuf = ana->outbuf;
+ cont = ana->cont;
+
+ /*
+ * Gathering a Symbolic name from each component with path
+ */
+ for (;
+ pcomp < pcompe;
+ pcomp = (ISO_RRIP_SLINK_COMPONENT *)((char *)pcomp + ISO_RRIP_SLSIZ
+ + isonum_711(pcomp->clen))) {
+
+ if (!cont) {
+ if (len < ana->maxlen) {
+ len++;
+ *outbuf++ = '/';
+ }
+ }
+ cont = 0;
+
+ inbuf = "..";
+ wlen = 0;
+
+ switch (*pcomp->cflag) {
+
+ case ISO_SUSP_CFLAG_CURRENT:
+ /* Inserting Current */
+ wlen = 1;
+ break;
+
+ case ISO_SUSP_CFLAG_PARENT:
+ /* Inserting Parent */
+ wlen = 2;
+ break;
+
+ case ISO_SUSP_CFLAG_ROOT:
+ /* Inserting slash for ROOT */
+ /* start over from beginning(?) */
+ outbuf -= len;
+ len = 0;
+ break;
+
+ case ISO_SUSP_CFLAG_VOLROOT:
+ /* Inserting a mount point i.e. "/cdrom" */
+ /* same as above */
+ outbuf -= len;
+ len = 0;
+ inbuf = ana->imp->im_mountp->mnt_stat.f_mntonname;
+ wlen = strlen(inbuf);
+ break;
+
+ case ISO_SUSP_CFLAG_HOST:
+ /* Inserting hostname i.e. "kurt.tools.de" */
+ inbuf = hostname;
+ wlen = hostnamelen;
+ break;
+
+ case ISO_SUSP_CFLAG_CONTINUE:
+ cont = 1;
+ /* fall thru */
+ case 0:
+ /* Inserting component */
+ wlen = isonum_711(pcomp->clen);
+ inbuf = pcomp->name;
+ break;
+ default:
+ printf("RRIP with incorrect flags?");
+ wlen = ana->maxlen + 1;
+ break;
+ }
+
+ if (len + wlen > ana->maxlen) {
+ /* indicate error to caller */
+ ana->cont = 1;
+ ana->fields = 0;
+ ana->outbuf -= *ana->outlen;
+ *ana->outlen = 0;
+ return 0;
+ }
+
+ bcopy(inbuf,outbuf,wlen);
+ outbuf += wlen;
+ len += wlen;
+
+ }
+ ana->outbuf = outbuf;
+ *ana->outlen = len;
+ ana->cont = cont;
+
+ if (!isonum_711(p->flags)) {
+ ana->fields &= ~ISO_SUSP_SLINK;
+ return ISO_SUSP_SLINK;
+ }
+ return 0;
+}
+
+/*
+ * Alternate name
+ */
+static int
+cd9660_rrip_altname(p,ana)
+ ISO_RRIP_ALTNAME *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ char *inbuf;
+ int wlen;
+ int cont;
+
+ inbuf = "..";
+ wlen = 0;
+ cont = 0;
+
+ switch (*p->flags) {
+ case ISO_SUSP_CFLAG_CURRENT:
+ /* Inserting Current */
+ wlen = 1;
+ break;
+
+ case ISO_SUSP_CFLAG_PARENT:
+ /* Inserting Parent */
+ wlen = 2;
+ break;
+
+ case ISO_SUSP_CFLAG_HOST:
+ /* Inserting hostname i.e. "kurt.tools.de" */
+ inbuf = hostname;
+ wlen = hostnamelen;
+ break;
+
+ case ISO_SUSP_CFLAG_CONTINUE:
+ cont = 1;
+ /* fall thru */
+ case 0:
+ /* Inserting component */
+ wlen = isonum_711(p->h.length) - 5;
+ inbuf = (char *)p + 5;
+ break;
+
+ default:
+ printf("RRIP with incorrect NM flags?\n");
+ wlen = ana->maxlen + 1;
+ break;
+ }
+
+ if ((*ana->outlen += wlen) > ana->maxlen) {
+ /* treat as no name field */
+ ana->fields &= ~ISO_SUSP_ALTNAME;
+ ana->outbuf -= *ana->outlen - wlen;
+ *ana->outlen = 0;
+ return 0;
+ }
+
+ bcopy(inbuf,ana->outbuf,wlen);
+ ana->outbuf += wlen;
+
+ if (!cont) {
+ ana->fields &= ~ISO_SUSP_ALTNAME;
+ return ISO_SUSP_ALTNAME;
+ }
+ return 0;
+}
+
+static void
+cd9660_rrip_defname(isodir,ana)
+ struct iso_directory_record *isodir;
+ ISO_RRIP_ANALYZE *ana;
+{
+ strcpy(ana->outbuf,"..");
+ switch (*isodir->name) {
+ default:
+ isofntrans(isodir->name,isonum_711(isodir->name_len),
+ ana->outbuf,ana->outlen,
+ 1,isonum_711(isodir->flags)&4);
+ break;
+ case 0:
+ *ana->outlen = 1;
+ break;
+ case 1:
+ *ana->outlen = 2;
+ break;
+ }
+}
+
+/*
+ * Parent or Child Link
+ */
+static int
+cd9660_rrip_pclink(p,ana)
+ ISO_RRIP_CLINK *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ *ana->inump = isonum_733(p->dir_loc) << ana->imp->im_bshift;
+ ana->fields &= ~(ISO_SUSP_CLINK|ISO_SUSP_PLINK);
+ return *p->h.type == 'C' ? ISO_SUSP_CLINK : ISO_SUSP_PLINK;
+}
+
+/*
+ * Relocated directory
+ */
+static int
+cd9660_rrip_reldir(p,ana)
+ ISO_RRIP_RELDIR *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ /* special hack to make caller aware of RE field */
+ *ana->outlen = 0;
+ ana->fields = 0;
+ return ISO_SUSP_RELDIR|ISO_SUSP_ALTNAME|ISO_SUSP_CLINK|ISO_SUSP_PLINK;
+}
+
+static int
+cd9660_rrip_tstamp(p,ana)
+ ISO_RRIP_TSTAMP *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ unsigned char *ptime;
+
+ ptime = p->time;
+
+ /* Check a format of time stamp (7bytes/17bytes) */
+ if (!(*p->flags&ISO_SUSP_TSTAMP_FORM17)) {
+ if (*p->flags&ISO_SUSP_TSTAMP_CREAT)
+ ptime += 7;
+
+ if (*p->flags&ISO_SUSP_TSTAMP_MODIFY) {
+ cd9660_tstamp_conv7(ptime,&ana->inop->inode.iso_mtime);
+ ptime += 7;
+ } else
+ bzero(&ana->inop->inode.iso_mtime,sizeof(struct timeval));
+
+ if (*p->flags&ISO_SUSP_TSTAMP_ACCESS) {
+ cd9660_tstamp_conv7(ptime,&ana->inop->inode.iso_atime);
+ ptime += 7;
+ } else
+ ana->inop->inode.iso_atime = ana->inop->inode.iso_mtime;
+
+ if (*p->flags&ISO_SUSP_TSTAMP_ATTR)
+ cd9660_tstamp_conv7(ptime,&ana->inop->inode.iso_ctime);
+ else
+ ana->inop->inode.iso_ctime = ana->inop->inode.iso_mtime;
+
+ } else {
+ if (*p->flags&ISO_SUSP_TSTAMP_CREAT)
+ ptime += 17;
+
+ if (*p->flags&ISO_SUSP_TSTAMP_MODIFY) {
+ cd9660_tstamp_conv17(ptime,&ana->inop->inode.iso_mtime);
+ ptime += 17;
+ } else
+ bzero(&ana->inop->inode.iso_mtime,sizeof(struct timeval));
+
+ if (*p->flags&ISO_SUSP_TSTAMP_ACCESS) {
+ cd9660_tstamp_conv17(ptime,&ana->inop->inode.iso_atime);
+ ptime += 17;
+ } else
+ ana->inop->inode.iso_atime = ana->inop->inode.iso_mtime;
+
+ if (*p->flags&ISO_SUSP_TSTAMP_ATTR)
+ cd9660_tstamp_conv17(ptime,&ana->inop->inode.iso_ctime);
+ else
+ ana->inop->inode.iso_ctime = ana->inop->inode.iso_mtime;
+
+ }
+ ana->fields &= ~ISO_SUSP_TSTAMP;
+ return ISO_SUSP_TSTAMP;
+}
+
+static void
+cd9660_rrip_deftstamp(isodir,ana)
+ struct iso_directory_record *isodir;
+ ISO_RRIP_ANALYZE *ana;
+{
+ cd9660_deftstamp(isodir,ana->inop,NULL);
+}
+
+/*
+ * POSIX device modes
+ */
+static int
+cd9660_rrip_device(p,ana)
+ ISO_RRIP_DEVICE *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ unsigned high, low;
+
+ high = isonum_733(p->dev_t_high_l);
+ low = isonum_733(p->dev_t_low_l);
+
+ if ( high == 0 ) {
+ ana->inop->inode.iso_rdev = makedev( major(low), minor(low) );
+ } else {
+ ana->inop->inode.iso_rdev = makedev( high, minor(low) );
+ }
+ ana->fields &= ~ISO_SUSP_DEVICE;
+ return ISO_SUSP_DEVICE;
+}
+
+/*
+ * Flag indicating
+ */
+static int
+cd9660_rrip_idflag(p,ana)
+ ISO_RRIP_IDFLAG *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ ana->fields &= isonum_711(p->flags)|~0xff; /* don't touch high bits */
+ /* special handling of RE field */
+ if (ana->fields&ISO_SUSP_RELDIR)
+ return cd9660_rrip_reldir(p,ana);
+
+ return ISO_SUSP_IDFLAG;
+}
+
+/*
+ * Continuation pointer
+ */
+static int
+cd9660_rrip_cont(p,ana)
+ ISO_RRIP_CONT *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ ana->iso_ce_blk = isonum_733(p->location);
+ ana->iso_ce_off = isonum_733(p->offset);
+ ana->iso_ce_len = isonum_733(p->length);
+ return ISO_SUSP_CONT;
+}
+
+/*
+ * System Use end
+ */
+static int
+cd9660_rrip_stop(p,ana)
+ ISO_SUSP_HEADER *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ /* stop analyzing */
+ ana->fields = 0;
+ return ISO_SUSP_STOP;
+}
+
+/*
+ * Extension reference
+ */
+static int
+cd9660_rrip_extref(p,ana)
+ ISO_RRIP_EXTREF *p;
+ ISO_RRIP_ANALYZE *ana;
+{
+ if (isonum_711(p->len_id) != 10
+ || bcmp((char *)p + 8,"RRIP_1991A",10)
+ || isonum_711(p->version) != 1)
+ return 0;
+ ana->fields &= ~ISO_SUSP_EXTREF;
+ return ISO_SUSP_EXTREF;
+}
+
+typedef struct {
+ char type[2];
+ int (*func)();
+ void (*func2)();
+ int result;
+} RRIP_TABLE;
+
+static int
+cd9660_rrip_loop(isodir,ana,table)
+ struct iso_directory_record *isodir;
+ ISO_RRIP_ANALYZE *ana;
+ RRIP_TABLE *table;
+{
+ register RRIP_TABLE *ptable;
+ register ISO_SUSP_HEADER *phead;
+ register ISO_SUSP_HEADER *pend;
+ struct buf *bp = NULL;
+ int i;
+ char *pwhead;
+ int result;
+
+ /*
+ * Note: If name length is odd,
+ * it will be padding 1 byte after the name
+ */
+ pwhead = isodir->name + isonum_711(isodir->name_len);
+ if (!(isonum_711(isodir->name_len)&1))
+ pwhead++;
+
+ /* If it's not the '.' entry of the root dir obey SP field */
+ if (*isodir->name != 0
+ || isonum_733(isodir->extent) != ana->imp->root_extent)
+ pwhead += ana->imp->rr_skip;
+ else
+ pwhead += ana->imp->rr_skip0;
+
+ phead = (ISO_SUSP_HEADER *)pwhead;
+ pend = (ISO_SUSP_HEADER *)((char *)isodir + isonum_711(isodir->length));
+
+ result = 0;
+ while (1) {
+ ana->iso_ce_len = 0;
+ /*
+ * Note: "pend" should be more than one SUSP header
+ */
+ while (pend >= phead + 1) {
+ if (isonum_711(phead->version) == 1) {
+ for (ptable = table; ptable->func; ptable++) {
+ if (*phead->type == *ptable->type
+ && phead->type[1] == ptable->type[1]) {
+ result |= ptable->func(phead,ana);
+ break;
+ }
+ }
+ if (!ana->fields)
+ break;
+ }
+ /*
+ * move to next SUSP
+ * Hopefully this works with newer versions, too
+ */
+ phead = (ISO_SUSP_HEADER *)((char *)phead + isonum_711(phead->length));
+ }
+
+ if ( ana->fields && ana->iso_ce_len ) {
+ if (ana->iso_ce_blk >= ana->imp->volume_space_size
+ || ana->iso_ce_off + ana->iso_ce_len > ana->imp->logical_block_size
+ || bread(ana->imp->im_devvp,
+ ana->iso_ce_blk * ana->imp->logical_block_size / DEV_BSIZE,
+ ana->imp->logical_block_size,NOCRED,&bp))
+ /* what to do now? */
+ break;
+ phead = (ISO_SUSP_HEADER *)(bp->b_un.b_addr + ana->iso_ce_off);
+ pend = (ISO_SUSP_HEADER *) ((char *)phead + ana->iso_ce_len);
+ } else
+ break;
+ }
+ if (bp)
+ brelse(bp);
+ /*
+ * If we don't find the Basic SUSP stuffs, just set default value
+ * ( attribute/time stamp )
+ */
+ for (ptable = table; ptable->func2; ptable++)
+ if (!(ptable->result&result))
+ ptable->func2(isodir,ana);
+
+ return result;
+}
+
+static RRIP_TABLE rrip_table_analyze[] = {
+ { "PX", cd9660_rrip_attr, cd9660_rrip_defattr, ISO_SUSP_ATTR },
+ { "TF", cd9660_rrip_tstamp, cd9660_rrip_deftstamp, ISO_SUSP_TSTAMP },
+ { "PN", cd9660_rrip_device, 0, ISO_SUSP_DEVICE },
+ { "RR", cd9660_rrip_idflag, 0, ISO_SUSP_IDFLAG },
+ { "CE", cd9660_rrip_cont, 0, ISO_SUSP_CONT },
+ { "ST", cd9660_rrip_stop, 0, ISO_SUSP_STOP },
+ { "", 0, 0, 0 }
+};
+
+int
+cd9660_rrip_analyze(isodir,inop,imp)
+ struct iso_directory_record *isodir;
+ struct iso_node *inop;
+ struct iso_mnt *imp;
+{
+ ISO_RRIP_ANALYZE analyze;
+
+ analyze.inop = inop;
+ analyze.imp = imp;
+ analyze.fields = ISO_SUSP_ATTR|ISO_SUSP_TSTAMP|ISO_SUSP_DEVICE;
+
+ return cd9660_rrip_loop(isodir,&analyze,rrip_table_analyze);
+}
+
+/*
+ * Get Alternate Name from 'AL' record
+ * If either no AL record or 0 length,
+ * it will be return the translated ISO9660 name,
+ */
+static RRIP_TABLE rrip_table_getname[] = {
+ { "NM", cd9660_rrip_altname, cd9660_rrip_defname, ISO_SUSP_ALTNAME },
+ { "CL", cd9660_rrip_pclink, 0, ISO_SUSP_CLINK|ISO_SUSP_PLINK },
+ { "PL", cd9660_rrip_pclink, 0, ISO_SUSP_CLINK|ISO_SUSP_PLINK },
+ { "RE", cd9660_rrip_reldir, 0, ISO_SUSP_RELDIR },
+ { "RR", cd9660_rrip_idflag, 0, ISO_SUSP_IDFLAG },
+ { "CE", cd9660_rrip_cont, 0, ISO_SUSP_CONT },
+ { "ST", cd9660_rrip_stop, 0, ISO_SUSP_STOP },
+ { "", 0, 0, 0 }
+};
+
+int
+cd9660_rrip_getname(isodir,outbuf,outlen,inump,imp)
+ struct iso_directory_record *isodir;
+ char *outbuf;
+ u_short *outlen;
+ ino_t *inump;
+ struct iso_mnt *imp;
+{
+ ISO_RRIP_ANALYZE analyze;
+ RRIP_TABLE *tab;
+
+ analyze.outbuf = outbuf;
+ analyze.outlen = outlen;
+ analyze.maxlen = NAME_MAX;
+ analyze.inump = inump;
+ analyze.imp = imp;
+ analyze.fields = ISO_SUSP_ALTNAME|ISO_SUSP_RELDIR|ISO_SUSP_CLINK|ISO_SUSP_PLINK;
+ *outlen = 0;
+
+ tab = rrip_table_getname;
+ if (*isodir->name == 0
+ || *isodir->name == 1) {
+ cd9660_rrip_defname(isodir,&analyze);
+
+ analyze.fields &= ~ISO_SUSP_ALTNAME;
+ tab++;
+ }
+
+ return cd9660_rrip_loop(isodir,&analyze,tab);
+}
+
+/*
+ * Get Symbolic Name from 'SL' record
+ *
+ * Note: isodir should contains SL record!
+ */
+static RRIP_TABLE rrip_table_getsymname[] = {
+ { "SL", cd9660_rrip_slink, 0, ISO_SUSP_SLINK },
+ { "RR", cd9660_rrip_idflag, 0, ISO_SUSP_IDFLAG },
+ { "CE", cd9660_rrip_cont, 0, ISO_SUSP_CONT },
+ { "ST", cd9660_rrip_stop, 0, ISO_SUSP_STOP },
+ { "", 0, 0, 0 }
+};
+
+int
+cd9660_rrip_getsymname(isodir,outbuf,outlen,imp)
+ struct iso_directory_record *isodir;
+ char *outbuf;
+ u_short *outlen;
+ struct iso_mnt *imp;
+{
+ ISO_RRIP_ANALYZE analyze;
+
+ analyze.outbuf = outbuf;
+ analyze.outlen = outlen;
+ *outlen = 0;
+ analyze.maxlen = MAXPATHLEN;
+ analyze.cont = 1; /* don't start with a slash */
+ analyze.imp = imp;
+ analyze.fields = ISO_SUSP_SLINK;
+
+ return (cd9660_rrip_loop(isodir,&analyze,rrip_table_getsymname)&ISO_SUSP_SLINK);
+}
+
+static RRIP_TABLE rrip_table_extref[] = {
+ { "ER", cd9660_rrip_extref, 0, ISO_SUSP_EXTREF },
+ { "CE", cd9660_rrip_cont, 0, ISO_SUSP_CONT },
+ { "ST", cd9660_rrip_stop, 0, ISO_SUSP_STOP },
+ { "", 0, 0, 0 }
+};
+
+/*
+ * Check for Rock Ridge Extension and return offset of its fields.
+ * Note: We require the ER field.
+ */
+int
+cd9660_rrip_offset(isodir,imp)
+ struct iso_directory_record *isodir;
+ struct iso_mnt *imp;
+{
+ ISO_RRIP_OFFSET *p;
+ ISO_RRIP_ANALYZE analyze;
+
+ imp->rr_skip0 = 0;
+ p = (ISO_RRIP_OFFSET *)(isodir->name + 1);
+ if (bcmp(p,"SP\7\1\276\357",6)) {
+ /* Maybe, it's a CDROM XA disc? */
+ imp->rr_skip0 = 15;
+ p = (ISO_RRIP_OFFSET *)((char *)p + 15);
+ if (bcmp(p,"SP\7\1\276\357",6))
+ return -1;
+ }
+
+ analyze.imp = imp;
+ analyze.fields = ISO_SUSP_EXTREF;
+ if (!(cd9660_rrip_loop(isodir,&analyze,rrip_table_extref)&ISO_SUSP_EXTREF))
+ return -1;
+
+ return isonum_711(p->skip);
+}
diff --git a/sys/fs/cd9660/cd9660_rrip.h b/sys/fs/cd9660/cd9660_rrip.h
new file mode 100644
index 0000000..b401728
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_rrip.h
@@ -0,0 +1,146 @@
+/*-
+ * Copyright (c) 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cd9660_rrip.h 8.1 (Berkeley) 1/21/94
+ */
+
+typedef struct {
+ char type [ISODCL ( 0, 1)];
+ unsigned char length [ISODCL ( 2, 2)]; /* 711 */
+ unsigned char version [ISODCL ( 3, 3)];
+} ISO_SUSP_HEADER;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ char mode_l [ISODCL ( 4, 7)]; /* 731 */
+ char mode_m [ISODCL ( 8, 11)]; /* 732 */
+ char links_l [ISODCL ( 12, 15)]; /* 731 */
+ char links_m [ISODCL ( 16, 19)]; /* 732 */
+ char uid_l [ISODCL ( 20, 23)]; /* 731 */
+ char uid_m [ISODCL ( 24, 27)]; /* 732 */
+ char gid_l [ISODCL ( 28, 31)]; /* 731 */
+ char gid_m [ISODCL ( 32, 35)]; /* 732 */
+} ISO_RRIP_ATTR;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ char dev_t_high_l [ISODCL ( 4, 7)]; /* 731 */
+ char dev_t_high_m [ISODCL ( 8, 11)]; /* 732 */
+ char dev_t_low_l [ISODCL ( 12, 15)]; /* 731 */
+ char dev_t_low_m [ISODCL ( 16, 19)]; /* 732 */
+} ISO_RRIP_DEVICE;
+
+#define ISO_SUSP_CFLAG_CONTINUE 0x01
+#define ISO_SUSP_CFLAG_CURRENT 0x02
+#define ISO_SUSP_CFLAG_PARENT 0x04
+#define ISO_SUSP_CFLAG_ROOT 0x08
+#define ISO_SUSP_CFLAG_VOLROOT 0x10
+#define ISO_SUSP_CFLAG_HOST 0x20
+
+typedef struct {
+ u_char cflag [ISODCL ( 1, 1)];
+ u_char clen [ISODCL ( 2, 2)];
+ u_char name [0];
+} ISO_RRIP_SLINK_COMPONENT;
+#define ISO_RRIP_SLSIZ 2
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ u_char flags [ISODCL ( 4, 4)];
+ u_char component [ISODCL ( 5, 5)];
+} ISO_RRIP_SLINK;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ char flags [ISODCL ( 4, 4)];
+} ISO_RRIP_ALTNAME;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ char dir_loc [ISODCL ( 4, 11)]; /* 733 */
+} ISO_RRIP_CLINK;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ char dir_loc [ISODCL ( 4, 11)]; /* 733 */
+} ISO_RRIP_PLINK;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+} ISO_RRIP_RELDIR;
+
+#define ISO_SUSP_TSTAMP_FORM17 0x80
+#define ISO_SUSP_TSTAMP_FORM7 0x00
+#define ISO_SUSP_TSTAMP_CREAT 0x01
+#define ISO_SUSP_TSTAMP_MODIFY 0x02
+#define ISO_SUSP_TSTAMP_ACCESS 0x04
+#define ISO_SUSP_TSTAMP_ATTR 0x08
+#define ISO_SUSP_TSTAMP_BACKUP 0x10
+#define ISO_SUSP_TSTAMP_EXPIRE 0x20
+#define ISO_SUSP_TSTAMP_EFFECT 0x40
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ unsigned char flags [ISODCL ( 4, 4)];
+ unsigned char time [ISODCL ( 5, 5)];
+} ISO_RRIP_TSTAMP;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ unsigned char flags [ISODCL ( 4, 4)];
+} ISO_RRIP_IDFLAG;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ char len_id [ISODCL ( 4, 4)];
+ char len_des [ISODCL ( 5, 5)];
+ char len_src [ISODCL ( 6, 6)];
+ char version [ISODCL ( 7, 7)];
+} ISO_RRIP_EXTREF;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ char check [ISODCL ( 4, 5)];
+ char skip [ISODCL ( 6, 6)];
+} ISO_RRIP_OFFSET;
+
+typedef struct {
+ ISO_SUSP_HEADER h;
+ char location [ISODCL ( 4, 11)];
+ char offset [ISODCL ( 12, 19)];
+ char length [ISODCL ( 20, 27)];
+} ISO_RRIP_CONT;
diff --git a/sys/fs/cd9660/cd9660_util.c b/sys/fs/cd9660/cd9660_util.c
new file mode 100644
index 0000000..f74f051
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_util.c
@@ -0,0 +1,236 @@
+/*-
+ * Copyright (c) 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cd9660_util.c 8.1 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/resourcevar.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/conf.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <miscfs/specfs/specdev.h> /* XXX */
+#include <miscfs/fifofs/fifo.h> /* XXX */
+#include <sys/malloc.h>
+#include <sys/dir.h>
+
+#include <isofs/cd9660/iso.h>
+
+#ifdef __notanymore__
+int
+isonum_711 (p)
+unsigned char *p;
+{
+ return (*p);
+}
+
+int
+isonum_712 (p)
+signed char *p;
+{
+ return (*p);
+}
+
+int
+isonum_721 (p)
+unsigned char *p;
+{
+ /* little endian short */
+#if BYTE_ORDER != LITTLE_ENDIAN
+ printf ("isonum_721 called on non little-endian machine!\n");
+#endif
+
+ return *(short *)p;
+}
+
+int
+isonum_722 (p)
+unsigned char *p;
+{
+ /* big endian short */
+#if BYTE_ORDER != BIG_ENDIAN
+ printf ("isonum_722 called on non big-endian machine!\n");
+#endif
+
+ return *(short *)p;
+}
+
+int
+isonum_723 (p)
+unsigned char *p;
+{
+#if BYTE_ORDER == BIG_ENDIAN
+ return isonum_722 (p + 2);
+#elif BYTE_ORDER == LITTLE_ENDIAN
+ return isonum_721 (p);
+#else
+ printf ("isonum_723 unsupported byte order!\n");
+ return 0;
+#endif
+}
+
+int
+isonum_731 (p)
+unsigned char *p;
+{
+ /* little endian long */
+#if BYTE_ORDER != LITTLE_ENDIAN
+ printf ("isonum_731 called on non little-endian machine!\n");
+#endif
+
+ return *(long *)p;
+}
+
+int
+isonum_732 (p)
+unsigned char *p;
+{
+ /* big endian long */
+#if BYTE_ORDER != BIG_ENDIAN
+ printf ("isonum_732 called on non big-endian machine!\n");
+#endif
+
+ return *(long *)p;
+}
+
+int
+isonum_733 (p)
+unsigned char *p;
+{
+#if BYTE_ORDER == BIG_ENDIAN
+ return isonum_732 (p + 4);
+#elif BYTE_ORDER == LITTLE_ENDIAN
+ return isonum_731 (p);
+#else
+ printf ("isonum_733 unsupported byte order!\n");
+ return 0;
+#endif
+}
+#endif /* __notanymore__ */
+
+/*
+ * translate and compare a filename
+ * Note: Version number plus ';' may be omitted.
+ */
+int
+isofncmp(unsigned char *fn,int fnlen,unsigned char *isofn,int isolen)
+{
+ int i, j;
+ char c;
+
+ while (--fnlen >= 0) {
+ if (--isolen < 0)
+ return *fn;
+ if ((c = *isofn++) == ';') {
+ switch (*fn++) {
+ default:
+ return *--fn;
+ case 0:
+ return 0;
+ case ';':
+ break;
+ }
+ for (i = 0; --fnlen >= 0; i = i * 10 + *fn++ - '0') {
+ if (*fn < '0' || *fn > '9') {
+ return -1;
+ }
+ }
+ for (j = 0; --isolen >= 0; j = j * 10 + *isofn++ - '0');
+ return i - j;
+ }
+ if (c != *fn) {
+ if (c >= 'A' && c <= 'Z') {
+ if (c + ('a' - 'A') != *fn) {
+ if (*fn >= 'a' && *fn <= 'z')
+ return *fn - ('a' - 'A') - c;
+ else
+ return *fn - c;
+ }
+ } else
+ return *fn - c;
+ }
+ fn++;
+ }
+ if (isolen > 0) {
+ switch (*isofn) {
+ default:
+ return -1;
+ case '.':
+ if (isofn[1] != ';')
+ return -1;
+ case ';':
+ return 0;
+ }
+ }
+ return 0;
+}
+
+/*
+ * translate a filename
+ */
+void
+isofntrans(unsigned char *infn,int infnlen,
+ unsigned char *outfn,unsigned short *outfnlen,
+ int original,int assoc)
+{
+ int fnidx = 0;
+
+ if (assoc) {
+ *outfn++ = ASSOCCHAR;
+ fnidx++;
+ }
+ for (; fnidx < infnlen; fnidx++) {
+ char c = *infn++;
+
+ if (!original && c >= 'A' && c <= 'Z')
+ *outfn++ = c + ('a' - 'A');
+ else if (!original && c == '.' && *infn == ';')
+ break;
+ else if (!original && c == ';')
+ break;
+ else
+ *outfn++ = c;
+ }
+ *outfnlen = fnidx;
+}
diff --git a/sys/fs/cd9660/cd9660_vfsops.c b/sys/fs/cd9660/cd9660_vfsops.c
new file mode 100644
index 0000000..02dd92a
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_vfsops.c
@@ -0,0 +1,681 @@
+/*-
+ * Copyright (c) 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cd9660_vfsops.c 8.3 (Berkeley) 1/31/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+#include <miscfs/specfs/specdev.h>
+#include <sys/mount.h>
+#include <sys/buf.h>
+#include <sys/file.h>
+#include <sys/dkbad.h>
+#include <sys/disklabel.h>
+#include <sys/ioctl.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+
+extern int enodev ();
+
+struct vfsops cd9660_vfsops = {
+ cd9660_mount,
+ cd9660_start,
+ cd9660_unmount,
+ cd9660_root,
+ cd9660_quotactl,
+ cd9660_statfs,
+ cd9660_sync,
+ cd9660_vget,
+ cd9660_fhtovp,
+ cd9660_vptofh,
+ cd9660_init,
+};
+
+/*
+ * Called by vfs_mountroot when iso is going to be mounted as root.
+ *
+ * Name is updated by mount(8) after booting.
+ */
+#define ROOTNAME "root_device"
+
+static iso_mountfs();
+
+cd9660_mountroot()
+{
+ register struct mount *mp;
+ extern struct vnode *rootvp;
+ struct proc *p = curproc; /* XXX */
+ struct iso_mnt *imp;
+ register struct fs *fs;
+ u_int size;
+ int error;
+ struct iso_args args;
+
+ /*
+ * Get vnodes for swapdev and rootdev.
+ */
+ if (bdevvp(swapdev, &swapdev_vp) || bdevvp(rootdev, &rootvp))
+ panic("cd9660_mountroot: can't setup bdevvp's");
+
+ mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK);
+ bzero((char *)mp, (u_long)sizeof(struct mount));
+ mp->mnt_op = &cd9660_vfsops;
+ mp->mnt_flag = MNT_RDONLY;
+ args.flags = ISOFSMNT_ROOT;
+ if (error = iso_mountfs(rootvp, mp, p, &args)) {
+ free(mp, M_MOUNT);
+ return (error);
+ }
+ if (error = vfs_lock(mp)) {
+ (void)cd9660_unmount(mp, 0, p);
+ free(mp, M_MOUNT);
+ return (error);
+ }
+ TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
+ mp->mnt_flag |= MNT_ROOTFS;
+ mp->mnt_vnodecovered = NULLVP;
+ imp = VFSTOISOFS(mp);
+ bzero(imp->im_fsmnt, sizeof(imp->im_fsmnt));
+ imp->im_fsmnt[0] = '/';
+ bcopy((caddr_t)imp->im_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
+ MNAMELEN);
+ (void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+ &size);
+ bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+ (void) cd9660_statfs(mp, &mp->mnt_stat, p);
+ vfs_unlock(mp);
+ return (0);
+}
+
+/*
+ * Flag to allow forcible unmounting.
+ */
+int iso_doforce = 1;
+
+/*
+ * VFS Operations.
+ *
+ * mount system call
+ */
+cd9660_mount(mp, path, data, ndp, p)
+ register struct mount *mp;
+ char *path;
+ caddr_t data;
+ struct nameidata *ndp;
+ struct proc *p;
+{
+ struct vnode *devvp;
+ struct iso_args args;
+ u_int size;
+ int error;
+ struct iso_mnt *imp;
+
+ if (error = copyin(data, (caddr_t)&args, sizeof (struct iso_args)))
+ return (error);
+
+ if ((mp->mnt_flag & MNT_RDONLY) == 0)
+ return (EROFS);
+
+ /*
+ * If updating, check whether changing from read-only to
+ * read/write; if there is no device name, that's all we do.
+ */
+ if (mp->mnt_flag & MNT_UPDATE) {
+ imp = VFSTOISOFS(mp);
+ if (args.fspec == 0)
+ return (vfs_export(mp, &imp->im_export, &args.export));
+ }
+ /*
+ * Not an update, or updating the name: look up the name
+ * and verify that it refers to a sensible block device.
+ */
+ NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p);
+ if (error = namei(ndp))
+ return (error);
+ devvp = ndp->ni_vp;
+
+ if (devvp->v_type != VBLK) {
+ vrele(devvp);
+ return ENOTBLK;
+ }
+ if (major(devvp->v_rdev) >= nblkdev) {
+ vrele(devvp);
+ return ENXIO;
+ }
+ if ((mp->mnt_flag & MNT_UPDATE) == 0)
+ error = iso_mountfs(devvp, mp, p, &args);
+ else {
+ if (devvp != imp->im_devvp)
+ error = EINVAL; /* needs translation */
+ else
+ vrele(devvp);
+ }
+ if (error) {
+ vrele(devvp);
+ return error;
+ }
+ imp = VFSTOISOFS(mp);
+ (void) copyinstr(path, imp->im_fsmnt, sizeof(imp->im_fsmnt)-1, &size);
+ bzero(imp->im_fsmnt + size, sizeof(imp->im_fsmnt) - size);
+ bcopy((caddr_t)imp->im_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname,
+ MNAMELEN);
+ (void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+ &size);
+ bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+ (void) cd9660_statfs(mp, &mp->mnt_stat, p);
+ return 0;
+}
+
+/*
+ * Common code for mount and mountroot
+ */
+static iso_mountfs(devvp, mp, p, argp)
+ register struct vnode *devvp;
+ struct mount *mp;
+ struct proc *p;
+ struct iso_args *argp;
+{
+ register struct iso_mnt *isomp = (struct iso_mnt *)0;
+ struct buf *bp = NULL;
+ dev_t dev = devvp->v_rdev;
+ caddr_t base, space;
+ int havepart = 0, blks;
+ int error = EINVAL, i, size;
+ int needclose = 0;
+ int ronly = (mp->mnt_flag & MNT_RDONLY) != 0;
+ extern struct vnode *rootvp;
+ int j;
+ int iso_bsize;
+ int iso_blknum;
+ struct iso_volume_descriptor *vdp;
+ struct iso_primary_descriptor *pri;
+ struct iso_directory_record *rootp;
+ int logical_block_size;
+
+ if (!ronly)
+ return EROFS;
+
+ /*
+ * Disallow multiple mounts of the same device.
+ * Disallow mounting of a device that is currently in use
+ * (except for root, which might share swap device for miniroot).
+ * Flush out any old buffers remaining from a previous use.
+ */
+ if (error = vfs_mountedon(devvp))
+ return error;
+ if (vcount(devvp) > 1 && devvp != rootvp)
+ return EBUSY;
+ if (error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0))
+ return (error);
+
+ if (error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p))
+ return error;
+ needclose = 1;
+
+ /* This is the "logical sector size". The standard says this
+ * should be 2048 or the physical sector size on the device,
+ * whichever is greater. For now, we'll just use a constant.
+ */
+ iso_bsize = ISO_DEFAULT_BLOCK_SIZE;
+
+ for (iso_blknum = 16; iso_blknum < 100; iso_blknum++) {
+ if (error = bread (devvp, btodb(iso_blknum * iso_bsize),
+ iso_bsize, NOCRED, &bp))
+ goto out;
+
+ vdp = (struct iso_volume_descriptor *)bp->b_un.b_addr;
+ if (bcmp (vdp->id, ISO_STANDARD_ID, sizeof vdp->id) != 0) {
+ error = EINVAL;
+ goto out;
+ }
+
+ if (isonum_711 (vdp->type) == ISO_VD_END) {
+ error = EINVAL;
+ goto out;
+ }
+
+ if (isonum_711 (vdp->type) == ISO_VD_PRIMARY)
+ break;
+ brelse(bp);
+ }
+
+ if (isonum_711 (vdp->type) != ISO_VD_PRIMARY) {
+ error = EINVAL;
+ goto out;
+ }
+
+ pri = (struct iso_primary_descriptor *)vdp;
+
+ logical_block_size = isonum_723 (pri->logical_block_size);
+
+ if (logical_block_size < DEV_BSIZE || logical_block_size > MAXBSIZE
+ || (logical_block_size & (logical_block_size - 1)) != 0) {
+ error = EINVAL;
+ goto out;
+ }
+
+ rootp = (struct iso_directory_record *)pri->root_directory_record;
+
+ isomp = malloc(sizeof *isomp, M_ISOFSMNT, M_WAITOK);
+ bzero((caddr_t)isomp, sizeof *isomp);
+ isomp->logical_block_size = logical_block_size;
+ isomp->volume_space_size = isonum_733 (pri->volume_space_size);
+ bcopy (rootp, isomp->root, sizeof isomp->root);
+ isomp->root_extent = isonum_733 (rootp->extent);
+ isomp->root_size = isonum_733 (rootp->size);
+
+ isomp->im_bmask = logical_block_size - 1;
+ isomp->im_bshift = 0;
+ while ((1 << isomp->im_bshift) < isomp->logical_block_size)
+ isomp->im_bshift++;
+
+ bp->b_flags |= B_AGE;
+ brelse(bp);
+ bp = NULL;
+
+ mp->mnt_data = (qaddr_t)isomp;
+ mp->mnt_stat.f_fsid.val[0] = (long)dev;
+ mp->mnt_stat.f_fsid.val[1] = MOUNT_CD9660;
+ mp->mnt_maxsymlinklen = 0;
+ mp->mnt_flag |= MNT_LOCAL;
+ isomp->im_mountp = mp;
+ isomp->im_dev = dev;
+ isomp->im_devvp = devvp;
+
+ devvp->v_specflags |= SI_MOUNTEDON;
+
+ /* Check the Rock Ridge Extention support */
+ if (!(argp->flags & ISOFSMNT_NORRIP)) {
+ if (error = bread (isomp->im_devvp,
+ (isomp->root_extent + isonum_711(rootp->ext_attr_length))
+ * isomp->logical_block_size / DEV_BSIZE,
+ isomp->logical_block_size,NOCRED,&bp))
+ goto out;
+
+ rootp = (struct iso_directory_record *)bp->b_un.b_addr;
+
+ if ((isomp->rr_skip = cd9660_rrip_offset(rootp,isomp)) < 0) {
+ argp->flags |= ISOFSMNT_NORRIP;
+ } else {
+ argp->flags &= ~ISOFSMNT_GENS;
+ }
+
+ /*
+ * The contents are valid,
+ * but they will get reread as part of another vnode, so...
+ */
+ bp->b_flags |= B_AGE;
+ brelse(bp);
+ bp = NULL;
+ }
+ isomp->im_flags = argp->flags&(ISOFSMNT_NORRIP|ISOFSMNT_GENS|ISOFSMNT_EXTATT);
+ switch (isomp->im_flags&(ISOFSMNT_NORRIP|ISOFSMNT_GENS)) {
+ default:
+ isomp->iso_ftype = ISO_FTYPE_DEFAULT;
+ break;
+ case ISOFSMNT_GENS|ISOFSMNT_NORRIP:
+ isomp->iso_ftype = ISO_FTYPE_9660;
+ break;
+ case 0:
+ isomp->iso_ftype = ISO_FTYPE_RRIP;
+ break;
+ }
+
+ return 0;
+out:
+ if (bp)
+ brelse(bp);
+ if (needclose)
+ (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, p);
+ if (isomp) {
+ free((caddr_t)isomp, M_ISOFSMNT);
+ mp->mnt_data = (qaddr_t)0;
+ }
+ return error;
+}
+
+/*
+ * Make a filesystem operational.
+ * Nothing to do at the moment.
+ */
+/* ARGSUSED */
+cd9660_start(mp, flags, p)
+ struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+ return 0;
+}
+
+/*
+ * unmount system call
+ */
+int
+cd9660_unmount(mp, mntflags, p)
+ struct mount *mp;
+ int mntflags;
+ struct proc *p;
+{
+ register struct iso_mnt *isomp;
+ int i, error, ronly, flags = 0;
+
+ if (mntflags & MNT_FORCE) {
+ if (!iso_doforce || (mp->mnt_flag & MNT_ROOTFS))
+ return (EINVAL);
+ flags |= FORCECLOSE;
+ }
+#if 0
+ mntflushbuf(mp, 0);
+ if (mntinvalbuf(mp))
+ return EBUSY;
+#endif
+ if (error = vflush(mp, NULLVP, flags))
+ return (error);
+
+ isomp = VFSTOISOFS(mp);
+
+#ifdef ISODEVMAP
+ if (isomp->iso_ftype == ISO_FTYPE_RRIP)
+ iso_dunmap(isomp->im_dev);
+#endif
+
+ isomp->im_devvp->v_specflags &= ~SI_MOUNTEDON;
+ error = VOP_CLOSE(isomp->im_devvp, FREAD, NOCRED, p);
+ vrele(isomp->im_devvp);
+ free((caddr_t)isomp, M_ISOFSMNT);
+ mp->mnt_data = (qaddr_t)0;
+ mp->mnt_flag &= ~MNT_LOCAL;
+ return (error);
+}
+
+/*
+ * Return root of a filesystem
+ */
+cd9660_root(mp, vpp)
+ struct mount *mp;
+ struct vnode **vpp;
+{
+ register struct iso_node *ip;
+ struct iso_node tip, *nip;
+ struct vnode tvp;
+ int error;
+ struct iso_mnt *imp = VFSTOISOFS (mp);
+ struct iso_directory_record *dp;
+
+ tvp.v_mount = mp;
+ tvp.v_data = &tip;
+ ip = VTOI(&tvp);
+ ip->i_vnode = &tvp;
+ ip->i_dev = imp->im_dev;
+ ip->i_diroff = 0;
+ dp = (struct iso_directory_record *)imp->root;
+ isodirino(&ip->i_number,dp,imp);
+
+ /*
+ * With RRIP we must use the `.' entry of the root directory.
+ * Simply tell iget, that it's a relocated directory.
+ */
+ error = iso_iget(ip,ip->i_number,
+ imp->iso_ftype == ISO_FTYPE_RRIP,
+ &nip,dp);
+ if (error)
+ return error;
+ *vpp = ITOV(nip);
+ return 0;
+}
+
+/*
+ * Do operations associated with quotas, not supported
+ */
+/* ARGSUSED */
+int
+cd9660_quotactl(mp, cmd, uid, arg, p)
+ struct mount *mp;
+ int cmd;
+ uid_t uid;
+ caddr_t arg;
+ struct proc *p;
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * Get file system statistics.
+ */
+cd9660_statfs(mp, sbp, p)
+ struct mount *mp;
+ register struct statfs *sbp;
+ struct proc *p;
+{
+ register struct iso_mnt *isomp;
+ register struct fs *fs;
+
+ isomp = VFSTOISOFS(mp);
+
+ sbp->f_type = MOUNT_CD9660;
+ sbp->f_bsize = isomp->logical_block_size;
+ sbp->f_iosize = sbp->f_bsize; /* XXX */
+ sbp->f_blocks = isomp->volume_space_size;
+ sbp->f_bfree = 0; /* total free blocks */
+ sbp->f_bavail = 0; /* blocks free for non superuser */
+ sbp->f_files = 0; /* total files */
+ sbp->f_ffree = 0; /* free file nodes */
+ if (sbp != &mp->mnt_stat) {
+ bcopy((caddr_t)mp->mnt_stat.f_mntonname,
+ (caddr_t)&sbp->f_mntonname[0], MNAMELEN);
+ bcopy((caddr_t)mp->mnt_stat.f_mntfromname,
+ (caddr_t)&sbp->f_mntfromname[0], MNAMELEN);
+ }
+ /* Use the first spare for flags: */
+ sbp->f_spare[0] = isomp->im_flags;
+ return 0;
+}
+
+/* ARGSUSED */
+int
+cd9660_sync(mp, waitfor, cred, p)
+ struct mount *mp;
+ int waitfor;
+ struct ucred *cred;
+ struct proc *p;
+{
+ return (0);
+}
+
+/*
+ * Flat namespace lookup.
+ * Currently unsupported.
+ */
+/* ARGSUSED */
+int
+cd9660_vget(mp, ino, vpp)
+ struct mount *mp;
+ ino_t ino;
+ struct vnode **vpp;
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * File handle to vnode
+ *
+ * Have to be really careful about stale file handles:
+ * - check that the inode number is in range
+ * - call iget() to get the locked inode
+ * - check for an unallocated inode (i_mode == 0)
+ * - check that the generation number matches
+ */
+
+struct ifid {
+ ushort ifid_len;
+ ushort ifid_pad;
+ int ifid_ino;
+ long ifid_start;
+};
+
+/* ARGSUSED */
+int
+cd9660_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
+ register struct mount *mp;
+ struct fid *fhp;
+ struct mbuf *nam;
+ struct vnode **vpp;
+ int *exflagsp;
+ struct ucred **credanonp;
+{
+ struct vnode tvp;
+ int error;
+ int lbn, off;
+ struct ifid *ifhp;
+ struct iso_mnt *imp;
+ struct buf *bp;
+ struct iso_directory_record *dirp;
+ struct iso_node tip, *ip, *nip;
+ struct netcred *np;
+
+ imp = VFSTOISOFS (mp);
+ ifhp = (struct ifid *)fhp;
+
+#ifdef ISOFS_DBG
+ printf("fhtovp: ino %d, start %ld\n",
+ ifhp->ifid_ino, ifhp->ifid_start);
+#endif
+
+ np = vfs_export_lookup(mp, &imp->im_export, nam);
+ if (np == NULL)
+ return (EACCES);
+
+ lbn = iso_lblkno(imp, ifhp->ifid_ino);
+ if (lbn >= imp->volume_space_size) {
+ printf("fhtovp: lbn exceed volume space %d\n", lbn);
+ return (ESTALE);
+ }
+
+ off = iso_blkoff(imp, ifhp->ifid_ino);
+ if (off + ISO_DIRECTORY_RECORD_SIZE > imp->logical_block_size) {
+ printf("fhtovp: crosses block boundary %d\n",
+ off + ISO_DIRECTORY_RECORD_SIZE);
+ return (ESTALE);
+ }
+
+ error = bread(imp->im_devvp, btodb(lbn * imp->logical_block_size),
+ imp->logical_block_size, NOCRED, &bp);
+ if (error) {
+ printf("fhtovp: bread error %d\n",error);
+ brelse(bp);
+ return (error);
+ }
+
+ dirp = (struct iso_directory_record *)(bp->b_un.b_addr + off);
+ if (off + isonum_711(dirp->length) > imp->logical_block_size) {
+ brelse(bp);
+ printf("fhtovp: directory crosses block boundary %d[off=%d/len=%d]\n",
+ off+isonum_711(dirp->length), off,
+ isonum_711(dirp->length));
+ return (ESTALE);
+ }
+
+ if (isonum_733(dirp->extent) + isonum_711(dirp->ext_attr_length) !=
+ ifhp->ifid_start) {
+ brelse(bp);
+ printf("fhtovp: file start miss %d vs %d\n",
+ isonum_733(dirp->extent)+isonum_711(dirp->ext_attr_length),
+ ifhp->ifid_start);
+ return (ESTALE);
+ }
+ brelse(bp);
+
+ ip = &tip;
+ tvp.v_mount = mp;
+ tvp.v_data = ip;
+ ip->i_vnode = &tvp;
+ ip->i_dev = imp->im_dev;
+ if (error = iso_iget(ip, ifhp->ifid_ino, 0, &nip, dirp)) {
+ *vpp = NULLVP;
+ printf("fhtovp: failed to get inode\n");
+ return (error);
+ }
+ ip = nip;
+ /*
+ * XXX need generation number?
+ */
+ if (ip->inode.iso_mode == 0) {
+ iso_iput(ip);
+ *vpp = NULLVP;
+ printf("fhtovp: inode mode == 0\n");
+ return (ESTALE);
+ }
+ *vpp = ITOV(ip);
+ *exflagsp = np->netc_exflags;
+ *credanonp = &np->netc_anon;
+ return 0;
+}
+
+/*
+ * Vnode pointer to File handle
+ */
+/* ARGSUSED */
+cd9660_vptofh(vp, fhp)
+ struct vnode *vp;
+ struct fid *fhp;
+{
+ register struct iso_node *ip = VTOI(vp);
+ register struct ifid *ifhp;
+ register struct iso_mnt *mp = ip->i_mnt;
+
+ ifhp = (struct ifid *)fhp;
+ ifhp->ifid_len = sizeof(struct ifid);
+
+ ifhp->ifid_ino = ip->i_number;
+ ifhp->ifid_start = ip->iso_start;
+
+#ifdef ISOFS_DBG
+ printf("vptofh: ino %d, start %ld\n",
+ ifhp->ifid_ino,ifhp->ifid_start);
+#endif
+ return 0;
+}
diff --git a/sys/fs/cd9660/cd9660_vnops.c b/sys/fs/cd9660/cd9660_vnops.c
new file mode 100644
index 0000000..59f5a73
--- /dev/null
+++ b/sys/fs/cd9660/cd9660_vnops.c
@@ -0,0 +1,1038 @@
+/*-
+ * Copyright (c) 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)cd9660_vnops.c 8.3 (Berkeley) 1/23/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/resourcevar.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/conf.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <miscfs/specfs/specdev.h>
+#include <miscfs/fifofs/fifo.h>
+#include <sys/malloc.h>
+#include <sys/dir.h>
+
+#include <isofs/cd9660/iso.h>
+#include <isofs/cd9660/cd9660_node.h>
+#include <isofs/cd9660/iso_rrip.h>
+
+#if 0
+/*
+ * Mknod vnode call
+ * Actually remap the device number
+ */
+cd9660_mknod(ndp, vap, cred, p)
+ struct nameidata *ndp;
+ struct ucred *cred;
+ struct vattr *vap;
+ struct proc *p;
+{
+#ifndef ISODEVMAP
+ free(ndp->ni_pnbuf, M_NAMEI);
+ vput(ndp->ni_dvp);
+ vput(ndp->ni_vp);
+ return EINVAL;
+#else
+ register struct vnode *vp;
+ struct iso_node *ip;
+ struct iso_dnode *dp;
+ int error;
+
+ vp = ndp->ni_vp;
+ ip = VTOI(vp);
+
+ if (ip->i_mnt->iso_ftype != ISO_FTYPE_RRIP
+ || vap->va_type != vp->v_type
+ || (vap->va_type != VCHR && vap->va_type != VBLK)) {
+ free(ndp->ni_pnbuf, M_NAMEI);
+ vput(ndp->ni_dvp);
+ vput(ndp->ni_vp);
+ return EINVAL;
+ }
+
+ dp = iso_dmap(ip->i_dev,ip->i_number,1);
+ if (ip->inode.iso_rdev == vap->va_rdev || vap->va_rdev == VNOVAL) {
+ /* same as the unmapped one, delete the mapping */
+ remque(dp);
+ FREE(dp,M_CACHE);
+ } else
+ /* enter new mapping */
+ dp->d_dev = vap->va_rdev;
+
+ /*
+ * Remove inode so that it will be reloaded by iget and
+ * checked to see if it is an alias of an existing entry
+ * in the inode cache.
+ */
+ vput(vp);
+ vp->v_type = VNON;
+ vgone(vp);
+ return (0);
+#endif
+}
+#endif
+
+/*
+ * Open called.
+ *
+ * Nothing to do.
+ */
+/* ARGSUSED */
+int
+cd9660_open(ap)
+ struct vop_open_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ return (0);
+}
+
+/*
+ * Close called
+ *
+ * Update the times on the inode on writeable file systems.
+ */
+/* ARGSUSED */
+int
+cd9660_close(ap)
+ struct vop_close_args /* {
+ struct vnode *a_vp;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ return (0);
+}
+
+/*
+ * Check mode permission on inode pointer. Mode is READ, WRITE or EXEC.
+ * The mode is shifted to select the owner/group/other fields. The
+ * super user is granted all permissions.
+ */
+/* ARGSUSED */
+cd9660_access(ap)
+ struct vop_access_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ return (0);
+}
+
+cd9660_getattr(ap)
+ struct vop_getattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+
+{
+ struct vnode *vp = ap->a_vp;
+ register struct vattr *vap = ap->a_vap;
+ register struct iso_node *ip = VTOI(vp);
+ int i;
+
+ vap->va_fsid = ip->i_dev;
+ vap->va_fileid = ip->i_number;
+
+ vap->va_mode = ip->inode.iso_mode;
+ vap->va_nlink = ip->inode.iso_links;
+ vap->va_uid = ip->inode.iso_uid;
+ vap->va_gid = ip->inode.iso_gid;
+ vap->va_atime = ip->inode.iso_atime;
+ vap->va_mtime = ip->inode.iso_mtime;
+ vap->va_ctime = ip->inode.iso_ctime;
+ vap->va_rdev = ip->inode.iso_rdev;
+
+ vap->va_size = (u_quad_t) ip->i_size;
+ vap->va_flags = 0;
+ vap->va_gen = 1;
+ vap->va_blocksize = ip->i_mnt->logical_block_size;
+ vap->va_bytes = (u_quad_t) ip->i_size;
+ vap->va_type = vp->v_type;
+ return (0);
+}
+
+#if ISO_DEFAULT_BLOCK_SIZE >= NBPG
+#ifdef DEBUG
+extern int doclusterread;
+#else
+#define doclusterread 1
+#endif
+#else
+/* XXX until cluster routines can handle block sizes less than one page */
+#define doclusterread 0
+#endif
+
+/*
+ * Vnode op for reading.
+ */
+cd9660_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ register struct uio *uio = ap->a_uio;
+ register struct iso_node *ip = VTOI(vp);
+ register struct iso_mnt *imp;
+ struct buf *bp;
+ daddr_t lbn, bn, rablock;
+ off_t diff;
+ int rasize, error = 0;
+ long size, n, on;
+
+ if (uio->uio_resid == 0)
+ return (0);
+ if (uio->uio_offset < 0)
+ return (EINVAL);
+ ip->i_flag |= IACC;
+ imp = ip->i_mnt;
+ do {
+ lbn = iso_lblkno(imp, uio->uio_offset);
+ on = iso_blkoff(imp, uio->uio_offset);
+ n = min((unsigned)(imp->logical_block_size - on),
+ uio->uio_resid);
+ diff = (off_t)ip->i_size - uio->uio_offset;
+ if (diff <= 0)
+ return (0);
+ if (diff < n)
+ n = diff;
+ size = iso_blksize(imp, ip, lbn);
+ rablock = lbn + 1;
+ if (doclusterread) {
+ if (iso_lblktosize(imp, rablock) <= ip->i_size)
+ error = cluster_read(vp, (off_t)ip->i_size,
+ lbn, size, NOCRED, &bp);
+ else
+ error = bread(vp, lbn, size, NOCRED, &bp);
+ } else {
+ if (vp->v_lastr + 1 == lbn &&
+ iso_lblktosize(imp, rablock) < ip->i_size) {
+ rasize = iso_blksize(imp, ip, rablock);
+ error = breadn(vp, lbn, size, &rablock,
+ &rasize, 1, NOCRED, &bp);
+ } else
+ error = bread(vp, lbn, size, NOCRED, &bp);
+ }
+ vp->v_lastr = lbn;
+ n = min(n, size - bp->b_resid);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+
+ error = uiomove(bp->b_un.b_addr + on, (int)n, uio);
+ if (n + on == imp->logical_block_size ||
+ uio->uio_offset == (off_t)ip->i_size)
+ bp->b_flags |= B_AGE;
+ brelse(bp);
+ } while (error == 0 && uio->uio_resid > 0 && n != 0);
+ return (error);
+}
+
+/* ARGSUSED */
+int
+cd9660_ioctl(ap)
+ struct vop_ioctl_args /* {
+ struct vnode *a_vp;
+ int a_command;
+ caddr_t a_data;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ printf("You did ioctl for isofs !!\n");
+ return (ENOTTY);
+}
+
+/* ARGSUSED */
+int
+cd9660_select(ap)
+ struct vop_select_args /* {
+ struct vnode *a_vp;
+ int a_which;
+ int a_fflags;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ /*
+ * We should really check to see if I/O is possible.
+ */
+ return (1);
+}
+
+/*
+ * Mmap a file
+ *
+ * NB Currently unsupported.
+ */
+/* ARGSUSED */
+int
+cd9660_mmap(ap)
+ struct vop_mmap_args /* {
+ struct vnode *a_vp;
+ int a_fflags;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ return (EINVAL);
+}
+
+/*
+ * Seek on a file
+ *
+ * Nothing to do, so just return.
+ */
+/* ARGSUSED */
+int
+cd9660_seek(ap)
+ struct vop_seek_args /* {
+ struct vnode *a_vp;
+ off_t a_oldoff;
+ off_t a_newoff;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+
+ return (0);
+}
+
+/*
+ * Structure for reading directories
+ */
+struct isoreaddir {
+ struct dirent saveent;
+ struct dirent assocent;
+ struct dirent current;
+ off_t saveoff;
+ off_t assocoff;
+ off_t curroff;
+ struct uio *uio;
+ off_t uio_off;
+ u_int *cookiep;
+ int ncookies;
+ int eof;
+};
+
+static int
+iso_uiodir(idp,dp,off)
+ struct isoreaddir *idp;
+ struct dirent *dp;
+ off_t off;
+{
+ int error;
+
+ dp->d_name[dp->d_namlen] = 0;
+ dp->d_reclen = DIRSIZ(dp);
+
+ if (idp->uio->uio_resid < dp->d_reclen) {
+ idp->eof = 0;
+ return -1;
+ }
+
+ if (idp->cookiep) {
+ if (idp->ncookies <= 0) {
+ idp->eof = 0;
+ return -1;
+ }
+
+ *idp->cookiep++ = off;
+ --idp->ncookies;
+ }
+
+ if (error = uiomove(dp,dp->d_reclen,idp->uio))
+ return error;
+ idp->uio_off = off;
+ return 0;
+}
+
+static int
+iso_shipdir(idp)
+ struct isoreaddir *idp;
+{
+ struct dirent *dp;
+ int cl, sl, assoc;
+ int error;
+ char *cname, *sname;
+
+ cl = idp->current.d_namlen;
+ cname = idp->current.d_name;
+ if (assoc = cl > 1 && *cname == ASSOCCHAR) {
+ cl--;
+ cname++;
+ }
+
+ dp = &idp->saveent;
+ sname = dp->d_name;
+ if (!(sl = dp->d_namlen)) {
+ dp = &idp->assocent;
+ sname = dp->d_name + 1;
+ sl = dp->d_namlen - 1;
+ }
+ if (sl > 0) {
+ if (sl != cl
+ || bcmp(sname,cname,sl)) {
+ if (idp->assocent.d_namlen) {
+ if (error = iso_uiodir(idp,&idp->assocent,idp->assocoff))
+ return error;
+ idp->assocent.d_namlen = 0;
+ }
+ if (idp->saveent.d_namlen) {
+ if (error = iso_uiodir(idp,&idp->saveent,idp->saveoff))
+ return error;
+ idp->saveent.d_namlen = 0;
+ }
+ }
+ }
+ idp->current.d_reclen = DIRSIZ(&idp->current);
+ if (assoc) {
+ idp->assocoff = idp->curroff;
+ bcopy(&idp->current,&idp->assocent,idp->current.d_reclen);
+ } else {
+ idp->saveoff = idp->curroff;
+ bcopy(&idp->current,&idp->saveent,idp->current.d_reclen);
+ }
+ return 0;
+}
+
+/*
+ * Vnode op for readdir
+ * XXX make sure everything still works now that eofflagp and cookiep
+ * are no longer args.
+ */
+int
+cd9660_readdir(ap)
+ struct vop_readdir_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct uio *uio = ap->a_uio;
+ struct isoreaddir *idp;
+ int entryoffsetinblock;
+ int error = 0;
+ int endsearch;
+ struct iso_directory_record *ep;
+ u_short elen;
+ int reclen;
+ struct iso_mnt *imp;
+ struct iso_node *ip;
+ struct buf *bp = NULL;
+
+ ip = VTOI(ap->a_vp);
+ imp = ip->i_mnt;
+
+ MALLOC(idp,struct isoreaddir *,sizeof(*idp),M_TEMP,M_WAITOK);
+ idp->saveent.d_namlen = 0;
+ idp->assocent.d_namlen = 0;
+ idp->uio = uio;
+#if 0
+ idp->cookiep = cookies;
+ idp->ncookies = ncookies;
+ idp->eof = 1;
+#else
+ idp->cookiep = 0;
+#endif
+ idp->curroff = uio->uio_offset;
+
+ entryoffsetinblock = iso_blkoff(imp, idp->curroff);
+ if (entryoffsetinblock != 0) {
+ if (error = iso_blkatoff(ip, idp->curroff, &bp)) {
+ FREE(idp,M_TEMP);
+ return (error);
+ }
+ }
+
+ endsearch = ip->i_size;
+
+ while (idp->curroff < endsearch) {
+ /*
+ * If offset is on a block boundary,
+ * read the next directory block.
+ * Release previous if it exists.
+ */
+
+ if (iso_blkoff(imp, idp->curroff) == 0) {
+ if (bp != NULL)
+ brelse(bp);
+ if (error = iso_blkatoff(ip, idp->curroff, &bp))
+ break;
+ entryoffsetinblock = 0;
+ }
+ /*
+ * Get pointer to next entry.
+ */
+
+ ep = (struct iso_directory_record *)
+ (bp->b_un.b_addr + entryoffsetinblock);
+
+ reclen = isonum_711 (ep->length);
+ if (reclen == 0) {
+ /* skip to next block, if any */
+ idp->curroff = roundup (idp->curroff,
+ imp->logical_block_size);
+ continue;
+ }
+
+ if (reclen < ISO_DIRECTORY_RECORD_SIZE) {
+ error = EINVAL;
+ /* illegal entry, stop */
+ break;
+ }
+
+ if (entryoffsetinblock + reclen > imp->logical_block_size) {
+ error = EINVAL;
+ /* illegal directory, so stop looking */
+ break;
+ }
+
+ idp->current.d_namlen = isonum_711 (ep->name_len);
+ if (isonum_711(ep->flags)&2)
+ isodirino(&idp->current.d_fileno,ep,imp);
+ else
+ idp->current.d_fileno = dbtob(bp->b_blkno) +
+ idp->curroff;
+
+ if (reclen < ISO_DIRECTORY_RECORD_SIZE + idp->current.d_namlen) {
+ error = EINVAL;
+ /* illegal entry, stop */
+ break;
+ }
+
+ idp->curroff += reclen;
+ /*
+ *
+ */
+ switch (imp->iso_ftype) {
+ case ISO_FTYPE_RRIP:
+ cd9660_rrip_getname(ep,idp->current.d_name,
+ (u_short *)&idp->current.d_namlen,
+ &idp->current.d_fileno,imp);
+ if (idp->current.d_namlen)
+ error = iso_uiodir(idp,&idp->current,idp->curroff);
+ break;
+ default: /* ISO_FTYPE_DEFAULT || ISO_FTYPE_9660 */
+ strcpy(idp->current.d_name,"..");
+ switch (ep->name[0]) {
+ case 0:
+ idp->current.d_namlen = 1;
+ error = iso_uiodir(idp,&idp->current,idp->curroff);
+ break;
+ case 1:
+ idp->current.d_namlen = 2;
+ error = iso_uiodir(idp,&idp->current,idp->curroff);
+ break;
+ default:
+ isofntrans(ep->name,idp->current.d_namlen,
+ idp->current.d_name, &elen,
+ imp->iso_ftype == ISO_FTYPE_9660,
+ isonum_711(ep->flags)&4);
+ idp->current.d_namlen = (u_char)elen;
+ if (imp->iso_ftype == ISO_FTYPE_DEFAULT)
+ error = iso_shipdir(idp);
+ else
+ error = iso_uiodir(idp,&idp->current,idp->curroff);
+ break;
+ }
+ }
+ if (error)
+ break;
+
+ entryoffsetinblock += reclen;
+ }
+
+ if (!error && imp->iso_ftype == ISO_FTYPE_DEFAULT) {
+ idp->current.d_namlen = 0;
+ error = iso_shipdir(idp);
+ }
+ if (error < 0)
+ error = 0;
+
+ if (bp)
+ brelse (bp);
+
+ uio->uio_offset = idp->uio_off;
+#if 0
+ *eofflagp = idp->eof;
+#endif
+
+ FREE(idp,M_TEMP);
+
+ return (error);
+}
+
+/*
+ * Return target name of a symbolic link
+ * Shouldn't we get the parent vnode and read the data from there?
+ * This could eventually result in deadlocks in cd9660_lookup.
+ * But otherwise the block read here is in the block buffer two times.
+ */
+typedef struct iso_directory_record ISODIR;
+typedef struct iso_node ISONODE;
+typedef struct iso_mnt ISOMNT;
+int
+cd9660_readlink(ap)
+ struct vop_readlink_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ ISONODE *ip;
+ ISODIR *dirp;
+ ISOMNT *imp;
+ struct buf *bp;
+ u_short symlen;
+ int error;
+ char *symname;
+ ino_t ino;
+
+ ip = VTOI(ap->a_vp);
+ imp = ip->i_mnt;
+
+ if (imp->iso_ftype != ISO_FTYPE_RRIP)
+ return EINVAL;
+
+ /*
+ * Get parents directory record block that this inode included.
+ */
+ error = bread(imp->im_devvp,
+ (daddr_t)(ip->i_number / DEV_BSIZE),
+ imp->logical_block_size,
+ NOCRED,
+ &bp);
+ if (error) {
+ brelse(bp);
+ return EINVAL;
+ }
+
+ /*
+ * Setup the directory pointer for this inode
+ */
+ dirp = (ISODIR *)(bp->b_un.b_addr + (ip->i_number & imp->im_bmask));
+#ifdef DEBUG
+ printf("lbn=%d,off=%d,bsize=%d,DEV_BSIZE=%d, dirp= %08x, b_addr=%08x, offset=%08x(%08x)\n",
+ (daddr_t)(ip->i_number >> imp->im_bshift),
+ ip->i_number & imp->im_bmask,
+ imp->logical_block_size,
+ DEV_BSIZE,
+ dirp,
+ bp->b_un.b_addr,
+ ip->i_number,
+ ip->i_number & imp->im_bmask );
+#endif
+
+ /*
+ * Just make sure, we have a right one....
+ * 1: Check not cross boundary on block
+ */
+ if ((ip->i_number & imp->im_bmask) + isonum_711(dirp->length)
+ > imp->logical_block_size) {
+ brelse(bp);
+ return EINVAL;
+ }
+
+ /*
+ * Now get a buffer
+ * Abuse a namei buffer for now.
+ */
+ MALLOC(symname,char *,MAXPATHLEN,M_NAMEI,M_WAITOK);
+
+ /*
+ * Ok, we just gathering a symbolic name in SL record.
+ */
+ if (cd9660_rrip_getsymname(dirp,symname,&symlen,imp) == 0) {
+ FREE(symname,M_NAMEI);
+ brelse(bp);
+ return EINVAL;
+ }
+ /*
+ * Don't forget before you leave from home ;-)
+ */
+ brelse(bp);
+
+ /*
+ * return with the symbolic name to caller's.
+ */
+ error = uiomove(symname,symlen,ap->a_uio);
+
+ FREE(symname,M_NAMEI);
+
+ return error;
+}
+
+/*
+ * Ufs abort op, called after namei() when a CREATE/DELETE isn't actually
+ * done. If a buffer has been saved in anticipation of a CREATE, delete it.
+ */
+int
+cd9660_abortop(ap)
+ struct vop_abortop_args /* {
+ struct vnode *a_dvp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
+ FREE(ap->a_cnp->cn_pnbuf, M_NAMEI);
+ return 0;
+}
+
+/*
+ * Lock an inode.
+ */
+int
+cd9660_lock(ap)
+ struct vop_lock_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ register struct iso_node *ip = VTOI(ap->a_vp);
+
+ ISO_ILOCK(ip);
+ return 0;
+}
+
+/*
+ * Unlock an inode.
+ */
+int
+cd9660_unlock(ap)
+ struct vop_unlock_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ register struct iso_node *ip = VTOI(ap->a_vp);
+
+ if (!(ip->i_flag & ILOCKED))
+ panic("cd9660_unlock NOT LOCKED");
+ ISO_IUNLOCK(ip);
+ return 0;
+}
+
+/*
+ * Check for a locked inode.
+ */
+int
+cd9660_islocked(ap)
+ struct vop_islocked_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ if (VTOI(ap->a_vp)->i_flag & ILOCKED)
+ return 1;
+ return 0;
+}
+
+/*
+ * Calculate the logical to physical mapping if not done already,
+ * then call the device strategy routine.
+ */
+int
+cd9660_strategy(ap)
+ struct vop_strategy_args /* {
+ struct buf *a_bp;
+ } */ *ap;
+{
+ register struct buf *bp = ap->a_bp;
+ register struct vnode *vp = bp->b_vp;
+ register struct iso_node *ip;
+ int error;
+
+ ip = VTOI(vp);
+ if (vp->v_type == VBLK || vp->v_type == VCHR)
+ panic("cd9660_strategy: spec");
+ if (bp->b_blkno == bp->b_lblkno) {
+ if (error =
+ VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL)) {
+ bp->b_error = error;
+ bp->b_flags |= B_ERROR;
+ biodone(bp);
+ return (error);
+ }
+ if ((long)bp->b_blkno == -1)
+ clrbuf(bp);
+ }
+ if ((long)bp->b_blkno == -1) {
+ biodone(bp);
+ return (0);
+ }
+ vp = ip->i_devvp;
+ bp->b_dev = vp->v_rdev;
+ VOCALL (vp->v_op, VOFFSET(vop_strategy), ap);
+ return (0);
+}
+
+/*
+ * Print out the contents of an inode.
+ */
+int
+cd9660_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ printf("tag VT_ISOFS, isofs vnode\n");
+ return 0;
+}
+
+/*
+ * Unsupported operation
+ */
+int
+cd9660_enotsupp()
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * Global vfs data structures for isofs
+ */
+#define cd9660_create \
+ ((int (*) __P((struct vop_create_args *)))cd9660_enotsupp)
+#define cd9660_mknod ((int (*) __P((struct vop_mknod_args *)))cd9660_enotsupp)
+#define cd9660_setattr \
+ ((int (*) __P((struct vop_setattr_args *)))cd9660_enotsupp)
+#define cd9660_write ((int (*) __P((struct vop_write_args *)))cd9660_enotsupp)
+#define cd9660_fsync ((int (*) __P((struct vop_fsync_args *)))nullop)
+#define cd9660_remove \
+ ((int (*) __P((struct vop_remove_args *)))cd9660_enotsupp)
+#define cd9660_link ((int (*) __P((struct vop_link_args *)))cd9660_enotsupp)
+#define cd9660_rename \
+ ((int (*) __P((struct vop_rename_args *)))cd9660_enotsupp)
+#define cd9660_mkdir ((int (*) __P((struct vop_mkdir_args *)))cd9660_enotsupp)
+#define cd9660_rmdir ((int (*) __P((struct vop_rmdir_args *)))cd9660_enotsupp)
+#define cd9660_symlink \
+ ((int (*) __P((struct vop_symlink_args *)))cd9660_enotsupp)
+#define cd9660_pathconf \
+ ((int (*) __P((struct vop_pathconf_args *)))cd9660_enotsupp)
+#define cd9660_advlock \
+ ((int (*) __P((struct vop_advlock_args *)))cd9660_enotsupp)
+#define cd9660_blkatoff \
+ ((int (*) __P((struct vop_blkatoff_args *)))cd9660_enotsupp)
+#define cd9660_valloc ((int(*) __P(( \
+ struct vnode *pvp, \
+ int mode, \
+ struct ucred *cred, \
+ struct vnode **vpp))) cd9660_enotsupp)
+#define cd9660_vfree ((int (*) __P((struct vop_vfree_args *)))cd9660_enotsupp)
+#define cd9660_truncate \
+ ((int (*) __P((struct vop_truncate_args *)))cd9660_enotsupp)
+#define cd9660_update \
+ ((int (*) __P((struct vop_update_args *)))cd9660_enotsupp)
+#define cd9660_bwrite \
+ ((int (*) __P((struct vop_bwrite_args *)))cd9660_enotsupp)
+
+/*
+ * Global vfs data structures for nfs
+ */
+int (**cd9660_vnodeop_p)();
+struct vnodeopv_entry_desc cd9660_vnodeop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, cd9660_lookup }, /* lookup */
+ { &vop_create_desc, cd9660_create }, /* create */
+ { &vop_mknod_desc, cd9660_mknod }, /* mknod */
+ { &vop_open_desc, cd9660_open }, /* open */
+ { &vop_close_desc, cd9660_close }, /* close */
+ { &vop_access_desc, cd9660_access }, /* access */
+ { &vop_getattr_desc, cd9660_getattr }, /* getattr */
+ { &vop_setattr_desc, cd9660_setattr }, /* setattr */
+ { &vop_read_desc, cd9660_read }, /* read */
+ { &vop_write_desc, cd9660_write }, /* write */
+ { &vop_ioctl_desc, cd9660_ioctl }, /* ioctl */
+ { &vop_select_desc, cd9660_select }, /* select */
+ { &vop_mmap_desc, cd9660_mmap }, /* mmap */
+ { &vop_fsync_desc, cd9660_fsync }, /* fsync */
+ { &vop_seek_desc, cd9660_seek }, /* seek */
+ { &vop_remove_desc, cd9660_remove }, /* remove */
+ { &vop_link_desc, cd9660_link }, /* link */
+ { &vop_rename_desc, cd9660_rename }, /* rename */
+ { &vop_mkdir_desc, cd9660_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, cd9660_rmdir }, /* rmdir */
+ { &vop_symlink_desc, cd9660_symlink }, /* symlink */
+ { &vop_readdir_desc, cd9660_readdir }, /* readdir */
+ { &vop_readlink_desc, cd9660_readlink },/* readlink */
+ { &vop_abortop_desc, cd9660_abortop }, /* abortop */
+ { &vop_inactive_desc, cd9660_inactive },/* inactive */
+ { &vop_reclaim_desc, cd9660_reclaim }, /* reclaim */
+ { &vop_lock_desc, cd9660_lock }, /* lock */
+ { &vop_unlock_desc, cd9660_unlock }, /* unlock */
+ { &vop_bmap_desc, cd9660_bmap }, /* bmap */
+ { &vop_strategy_desc, cd9660_strategy },/* strategy */
+ { &vop_print_desc, cd9660_print }, /* print */
+ { &vop_islocked_desc, cd9660_islocked },/* islocked */
+ { &vop_pathconf_desc, cd9660_pathconf },/* pathconf */
+ { &vop_advlock_desc, cd9660_advlock }, /* advlock */
+ { &vop_blkatoff_desc, cd9660_blkatoff },/* blkatoff */
+ { &vop_valloc_desc, cd9660_valloc }, /* valloc */
+ { &vop_vfree_desc, cd9660_vfree }, /* vfree */
+ { &vop_truncate_desc, cd9660_truncate },/* truncate */
+ { &vop_update_desc, cd9660_update }, /* update */
+ { &vop_bwrite_desc, vn_bwrite },
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc cd9660_vnodeop_opv_desc =
+ { &cd9660_vnodeop_p, cd9660_vnodeop_entries };
+
+/*
+ * Special device vnode ops
+ */
+int (**cd9660_specop_p)();
+struct vnodeopv_entry_desc cd9660_specop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, spec_lookup }, /* lookup */
+ { &vop_create_desc, cd9660_create }, /* create */
+ { &vop_mknod_desc, cd9660_mknod }, /* mknod */
+ { &vop_open_desc, spec_open }, /* open */
+ { &vop_close_desc, spec_close }, /* close */
+ { &vop_access_desc, cd9660_access }, /* access */
+ { &vop_getattr_desc, cd9660_getattr }, /* getattr */
+ { &vop_setattr_desc, cd9660_setattr }, /* setattr */
+ { &vop_read_desc, spec_read }, /* read */
+ { &vop_write_desc, spec_write }, /* write */
+ { &vop_ioctl_desc, spec_ioctl }, /* ioctl */
+ { &vop_select_desc, spec_select }, /* select */
+ { &vop_mmap_desc, spec_mmap }, /* mmap */
+ { &vop_fsync_desc, spec_fsync }, /* fsync */
+ { &vop_seek_desc, spec_seek }, /* seek */
+ { &vop_remove_desc, cd9660_remove }, /* remove */
+ { &vop_link_desc, cd9660_link }, /* link */
+ { &vop_rename_desc, cd9660_rename }, /* rename */
+ { &vop_mkdir_desc, cd9660_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, cd9660_rmdir }, /* rmdir */
+ { &vop_symlink_desc, cd9660_symlink }, /* symlink */
+ { &vop_readdir_desc, spec_readdir }, /* readdir */
+ { &vop_readlink_desc, spec_readlink }, /* readlink */
+ { &vop_abortop_desc, spec_abortop }, /* abortop */
+ { &vop_inactive_desc, cd9660_inactive },/* inactive */
+ { &vop_reclaim_desc, cd9660_reclaim }, /* reclaim */
+ { &vop_lock_desc, cd9660_lock }, /* lock */
+ { &vop_unlock_desc, cd9660_unlock }, /* unlock */
+ { &vop_bmap_desc, spec_bmap }, /* bmap */
+ /* XXX strategy: panics, should be notsupp instead? */
+ { &vop_strategy_desc, cd9660_strategy },/* strategy */
+ { &vop_print_desc, cd9660_print }, /* print */
+ { &vop_islocked_desc, cd9660_islocked },/* islocked */
+ { &vop_pathconf_desc, spec_pathconf }, /* pathconf */
+ { &vop_advlock_desc, spec_advlock }, /* advlock */
+ { &vop_blkatoff_desc, spec_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, spec_valloc }, /* valloc */
+ { &vop_vfree_desc, spec_vfree }, /* vfree */
+ { &vop_truncate_desc, spec_truncate }, /* truncate */
+ { &vop_update_desc, cd9660_update }, /* update */
+ { &vop_bwrite_desc, vn_bwrite },
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc cd9660_specop_opv_desc =
+ { &cd9660_specop_p, cd9660_specop_entries };
+
+#ifdef FIFO
+int (**cd9660_fifoop_p)();
+struct vnodeopv_entry_desc cd9660_fifoop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, fifo_lookup }, /* lookup */
+ { &vop_create_desc, cd9660_create }, /* create */
+ { &vop_mknod_desc, cd9660_mknod }, /* mknod */
+ { &vop_open_desc, fifo_open }, /* open */
+ { &vop_close_desc, fifo_close }, /* close */
+ { &vop_access_desc, cd9660_access }, /* access */
+ { &vop_getattr_desc, cd9660_getattr }, /* getattr */
+ { &vop_setattr_desc, cd9660_setattr }, /* setattr */
+ { &vop_read_desc, fifo_read }, /* read */
+ { &vop_write_desc, fifo_write }, /* write */
+ { &vop_ioctl_desc, fifo_ioctl }, /* ioctl */
+ { &vop_select_desc, fifo_select }, /* select */
+ { &vop_mmap_desc, fifo_mmap }, /* mmap */
+ { &vop_fsync_desc, fifo_fsync }, /* fsync */
+ { &vop_seek_desc, fifo_seek }, /* seek */
+ { &vop_remove_desc, cd9660_remove }, /* remove */
+ { &vop_link_desc, cd9660_link }, /* link */
+ { &vop_rename_desc, cd9660_rename }, /* rename */
+ { &vop_mkdir_desc, cd9660_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, cd9660_rmdir }, /* rmdir */
+ { &vop_symlink_desc, cd9660_symlink }, /* symlink */
+ { &vop_readdir_desc, fifo_readdir }, /* readdir */
+ { &vop_readlink_desc, fifo_readlink }, /* readlink */
+ { &vop_abortop_desc, fifo_abortop }, /* abortop */
+ { &vop_inactive_desc, cd9660_inactive },/* inactive */
+ { &vop_reclaim_desc, cd9660_reclaim }, /* reclaim */
+ { &vop_lock_desc, cd9660_lock }, /* lock */
+ { &vop_unlock_desc, cd9660_unlock }, /* unlock */
+ { &vop_bmap_desc, fifo_bmap }, /* bmap */
+ { &vop_strategy_desc, fifo_badop }, /* strategy */
+ { &vop_print_desc, cd9660_print }, /* print */
+ { &vop_islocked_desc, cd9660_islocked },/* islocked */
+ { &vop_pathconf_desc, fifo_pathconf }, /* pathconf */
+ { &vop_advlock_desc, fifo_advlock }, /* advlock */
+ { &vop_blkatoff_desc, fifo_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, fifo_valloc }, /* valloc */
+ { &vop_vfree_desc, fifo_vfree }, /* vfree */
+ { &vop_truncate_desc, fifo_truncate }, /* truncate */
+ { &vop_update_desc, cd9660_update }, /* update */
+ { &vop_bwrite_desc, vn_bwrite },
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc cd9660_fifoop_opv_desc =
+ { &cd9660_fifoop_p, cd9660_fifoop_entries };
+#endif /* FIFO */
diff --git a/sys/fs/cd9660/iso.h b/sys/fs/cd9660/iso.h
new file mode 100644
index 0000000..e356706
--- /dev/null
+++ b/sys/fs/cd9660/iso.h
@@ -0,0 +1,256 @@
+/*-
+ * Copyright (c) 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)iso.h 8.2 (Berkeley) 1/23/94
+ */
+
+#define ISODCL(from, to) (to - from + 1)
+
+struct iso_volume_descriptor {
+ char type[ISODCL(1,1)]; /* 711 */
+ char id[ISODCL(2,6)];
+ char version[ISODCL(7,7)];
+ char data[ISODCL(8,2048)];
+};
+
+/* volume descriptor types */
+#define ISO_VD_PRIMARY 1
+#define ISO_VD_END 255
+
+#define ISO_STANDARD_ID "CD001"
+#define ISO_ECMA_ID "CDW01"
+
+struct iso_primary_descriptor {
+ char type [ISODCL ( 1, 1)]; /* 711 */
+ char id [ISODCL ( 2, 6)];
+ char version [ISODCL ( 7, 7)]; /* 711 */
+ char unused1 [ISODCL ( 8, 8)];
+ char system_id [ISODCL ( 9, 40)]; /* achars */
+ char volume_id [ISODCL ( 41, 72)]; /* dchars */
+ char unused2 [ISODCL ( 73, 80)];
+ char volume_space_size [ISODCL ( 81, 88)]; /* 733 */
+ char unused3 [ISODCL ( 89, 120)];
+ char volume_set_size [ISODCL (121, 124)]; /* 723 */
+ char volume_sequence_number [ISODCL (125, 128)]; /* 723 */
+ char logical_block_size [ISODCL (129, 132)]; /* 723 */
+ char path_table_size [ISODCL (133, 140)]; /* 733 */
+ char type_l_path_table [ISODCL (141, 144)]; /* 731 */
+ char opt_type_l_path_table [ISODCL (145, 148)]; /* 731 */
+ char type_m_path_table [ISODCL (149, 152)]; /* 732 */
+ char opt_type_m_path_table [ISODCL (153, 156)]; /* 732 */
+ char root_directory_record [ISODCL (157, 190)]; /* 9.1 */
+ char volume_set_id [ISODCL (191, 318)]; /* dchars */
+ char publisher_id [ISODCL (319, 446)]; /* achars */
+ char preparer_id [ISODCL (447, 574)]; /* achars */
+ char application_id [ISODCL (575, 702)]; /* achars */
+ char copyright_file_id [ISODCL (703, 739)]; /* 7.5 dchars */
+ char abstract_file_id [ISODCL (740, 776)]; /* 7.5 dchars */
+ char bibliographic_file_id [ISODCL (777, 813)]; /* 7.5 dchars */
+ char creation_date [ISODCL (814, 830)]; /* 8.4.26.1 */
+ char modification_date [ISODCL (831, 847)]; /* 8.4.26.1 */
+ char expiration_date [ISODCL (848, 864)]; /* 8.4.26.1 */
+ char effective_date [ISODCL (865, 881)]; /* 8.4.26.1 */
+ char file_structure_version [ISODCL (882, 882)]; /* 711 */
+ char unused4 [ISODCL (883, 883)];
+ char application_data [ISODCL (884, 1395)];
+ char unused5 [ISODCL (1396, 2048)];
+};
+#define ISO_DEFAULT_BLOCK_SIZE 2048
+
+struct iso_directory_record {
+ char length [ISODCL (1, 1)]; /* 711 */
+ char ext_attr_length [ISODCL (2, 2)]; /* 711 */
+ unsigned char extent [ISODCL (3, 10)]; /* 733 */
+ unsigned char size [ISODCL (11, 18)]; /* 733 */
+ char date [ISODCL (19, 25)]; /* 7 by 711 */
+ char flags [ISODCL (26, 26)];
+ char file_unit_size [ISODCL (27, 27)]; /* 711 */
+ char interleave [ISODCL (28, 28)]; /* 711 */
+ char volume_sequence_number [ISODCL (29, 32)]; /* 723 */
+ char name_len [ISODCL (33, 33)]; /* 711 */
+ char name [0];
+};
+/* can't take sizeof(iso_directory_record), because of possible alignment
+ of the last entry (34 instead of 33) */
+#define ISO_DIRECTORY_RECORD_SIZE 33
+
+struct iso_extended_attributes {
+ unsigned char owner [ISODCL (1, 4)]; /* 723 */
+ unsigned char group [ISODCL (5, 8)]; /* 723 */
+ unsigned char perm [ISODCL (9, 10)]; /* 9.5.3 */
+ char ctime [ISODCL (11, 27)]; /* 8.4.26.1 */
+ char mtime [ISODCL (28, 44)]; /* 8.4.26.1 */
+ char xtime [ISODCL (45, 61)]; /* 8.4.26.1 */
+ char ftime [ISODCL (62, 78)]; /* 8.4.26.1 */
+ char recfmt [ISODCL (79, 79)]; /* 711 */
+ char recattr [ISODCL (80, 80)]; /* 711 */
+ unsigned char reclen [ISODCL (81, 84)]; /* 723 */
+ char system_id [ISODCL (85, 116)]; /* achars */
+ char system_use [ISODCL (117, 180)];
+ char version [ISODCL (181, 181)]; /* 711 */
+ char len_esc [ISODCL (182, 182)]; /* 711 */
+ char reserved [ISODCL (183, 246)];
+ unsigned char len_au [ISODCL (247, 250)]; /* 723 */
+};
+
+/* CD-ROM Format type */
+enum ISO_FTYPE { ISO_FTYPE_DEFAULT, ISO_FTYPE_9660, ISO_FTYPE_RRIP, ISO_FTYPE_ECMA };
+
+#ifndef ISOFSMNT_ROOT
+#define ISOFSMNT_ROOT 0
+#endif
+
+struct iso_mnt {
+ int im_flags;
+
+ struct mount *im_mountp;
+ dev_t im_dev;
+ struct vnode *im_devvp;
+
+ int logical_block_size;
+ int im_bshift;
+ int im_bmask;
+
+ int volume_space_size;
+ char im_fsmnt[50];
+ struct netexport im_export;
+
+ char root[ISODCL (157, 190)];
+ int root_extent;
+ int root_size;
+ enum ISO_FTYPE iso_ftype;
+
+ int rr_skip;
+ int rr_skip0;
+};
+
+#define VFSTOISOFS(mp) ((struct iso_mnt *)((mp)->mnt_data))
+
+#define iso_blkoff(imp, loc) ((loc) & (imp)->im_bmask)
+#define iso_lblkno(imp, loc) ((loc) >> (imp)->im_bshift)
+#define iso_blksize(imp, ip, lbn) ((imp)->logical_block_size)
+#define iso_lblktosize(imp, blk) ((blk) << (imp)->im_bshift)
+
+int cd9660_mount __P((struct mount *,
+ char *, caddr_t, struct nameidata *, struct proc *));
+int cd9660_start __P((struct mount *, int, struct proc *));
+int cd9660_unmount __P((struct mount *, int, struct proc *));
+int cd9660_root __P((struct mount *, struct vnode **));
+int cd9660_quotactl __P((struct mount *, int, uid_t, caddr_t, struct proc *));
+int cd9660_statfs __P((struct mount *, struct statfs *, struct proc *));
+int cd9660_sync __P((struct mount *, int, struct ucred *, struct proc *));
+int cd9660_vget __P((struct mount *, ino_t, struct vnode **));
+int cd9660_fhtovp __P((struct mount *, struct fid *, struct mbuf *,
+ struct vnode **, int *, struct ucred **));
+int cd9660_vptofh __P((struct vnode *, struct fid *));
+int cd9660_init __P(());
+
+struct iso_node;
+int iso_blkatoff __P((struct iso_node *ip, long offset, struct buf **bpp));
+int iso_iget __P((struct iso_node *xp, ino_t ino, int relocated,
+ struct iso_node **ipp, struct iso_directory_record *isodir));
+int iso_iput __P((struct iso_node *ip));
+int iso_ilock __P((struct iso_node *ip));
+int iso_iunlock __P((struct iso_node *ip));
+int cd9660_mountroot __P((void));
+
+extern int (**cd9660_vnodeop_p)();
+
+extern inline int
+isonum_711(p)
+ unsigned char *p;
+{
+ return *p;
+}
+
+extern inline int
+isonum_712(p)
+ char *p;
+{
+ return *p;
+}
+
+extern inline int
+isonum_721(p)
+ unsigned char *p;
+{
+ return *p|((char)p[1] << 8);
+}
+
+extern inline int
+isonum_722(p)
+ unsigned char *p;
+{
+ return ((char)*p << 8)|p[1];
+}
+
+extern inline int
+isonum_723(p)
+ unsigned char *p;
+{
+ return isonum_721(p);
+}
+
+extern inline int
+isonum_731(p)
+ unsigned char *p;
+{
+ return *p|(p[1] << 8)|(p[2] << 16)|(p[3] << 24);
+}
+
+extern inline int
+isonum_732(p)
+ unsigned char *p;
+{
+ return (*p << 24)|(p[1] << 16)|(p[2] << 8)|p[3];
+}
+
+extern inline int
+isonum_733(p)
+ unsigned char *p;
+{
+ return isonum_731(p);
+}
+
+int isofncmp __P((unsigned char *, int, unsigned char *, int));
+void isofntrans __P((unsigned char *, int, unsigned char *, unsigned short *,
+ int, int));
+
+/*
+ * Associated files have a leading '='.
+ */
+#define ASSOCCHAR '='
diff --git a/sys/fs/cd9660/iso_rrip.h b/sys/fs/cd9660/iso_rrip.h
new file mode 100644
index 0000000..78e4a77
--- /dev/null
+++ b/sys/fs/cd9660/iso_rrip.h
@@ -0,0 +1,83 @@
+/*-
+ * Copyright (c) 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley
+ * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension
+ * Support code is derived from software contributed to Berkeley
+ * by Atsushi Murai (amurai@spec.co.jp).
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)iso_rrip.h 8.2 (Berkeley) 1/23/94
+ */
+
+
+/*
+ * Analyze function flag (similar to RR field bits)
+ */
+#define ISO_SUSP_ATTR 0x0001
+#define ISO_SUSP_DEVICE 0x0002
+#define ISO_SUSP_SLINK 0x0004
+#define ISO_SUSP_ALTNAME 0x0008
+#define ISO_SUSP_CLINK 0x0010
+#define ISO_SUSP_PLINK 0x0020
+#define ISO_SUSP_RELDIR 0x0040
+#define ISO_SUSP_TSTAMP 0x0080
+#define ISO_SUSP_IDFLAG 0x0100
+#define ISO_SUSP_EXTREF 0x0200
+#define ISO_SUSP_CONT 0x0400
+#define ISO_SUSP_OFFSET 0x0800
+#define ISO_SUSP_STOP 0x1000
+#define ISO_SUSP_UNKNOWN 0x8000
+
+typedef struct {
+ struct iso_node *inop;
+ int fields; /* interesting fields in this analysis */
+ daddr_t iso_ce_blk; /* block of continuation area */
+ off_t iso_ce_off; /* offset of continuation area */
+ int iso_ce_len; /* length of continuation area */
+ struct iso_mnt *imp; /* mount structure */
+ ino_t *inump; /* inode number pointer */
+ char *outbuf; /* name/symbolic link output area */
+ u_short *outlen; /* length of above */
+ u_short maxlen; /* maximum length of above */
+ int cont; /* continuation of above */
+} ISO_RRIP_ANALYZE;
+
+int cd9660_rrip_analyze __P((struct iso_directory_record *isodir,
+ struct iso_node *inop, struct iso_mnt *imp));
+int cd9660_rrip_getname __P((struct iso_directory_record *isodir,
+ char *outbuf, u_short *outlen,
+ ino_t *inump, struct iso_mnt *imp));
+int cd9660_rrip_getsymname __P((struct iso_directory_record *isodir,
+ char *outbuf, u_short *outlen,
+ struct iso_mnt *imp));
+int cd9660_rrip_offset __P((struct iso_directory_record *isodir,
+ struct iso_mnt *imp));
diff --git a/sys/fs/deadfs/dead_vnops.c b/sys/fs/deadfs/dead_vnops.c
new file mode 100644
index 0000000..9d04652
--- /dev/null
+++ b/sys/fs/deadfs/dead_vnops.c
@@ -0,0 +1,354 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)dead_vnops.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/vnode.h>
+#include <sys/errno.h>
+#include <sys/namei.h>
+#include <sys/buf.h>
+
+/*
+ * Prototypes for dead operations on vnodes.
+ */
+int dead_badop(),
+ dead_ebadf();
+int dead_lookup __P((struct vop_lookup_args *));
+#define dead_create ((int (*) __P((struct vop_create_args *)))dead_badop)
+#define dead_mknod ((int (*) __P((struct vop_mknod_args *)))dead_badop)
+int dead_open __P((struct vop_open_args *));
+#define dead_close ((int (*) __P((struct vop_close_args *)))nullop)
+#define dead_access ((int (*) __P((struct vop_access_args *)))dead_ebadf)
+#define dead_getattr ((int (*) __P((struct vop_getattr_args *)))dead_ebadf)
+#define dead_setattr ((int (*) __P((struct vop_setattr_args *)))dead_ebadf)
+int dead_read __P((struct vop_read_args *));
+int dead_write __P((struct vop_write_args *));
+int dead_ioctl __P((struct vop_ioctl_args *));
+int dead_select __P((struct vop_select_args *));
+#define dead_mmap ((int (*) __P((struct vop_mmap_args *)))dead_badop)
+#define dead_fsync ((int (*) __P((struct vop_fsync_args *)))nullop)
+#define dead_seek ((int (*) __P((struct vop_seek_args *)))nullop)
+#define dead_remove ((int (*) __P((struct vop_remove_args *)))dead_badop)
+#define dead_link ((int (*) __P((struct vop_link_args *)))dead_badop)
+#define dead_rename ((int (*) __P((struct vop_rename_args *)))dead_badop)
+#define dead_mkdir ((int (*) __P((struct vop_mkdir_args *)))dead_badop)
+#define dead_rmdir ((int (*) __P((struct vop_rmdir_args *)))dead_badop)
+#define dead_symlink ((int (*) __P((struct vop_symlink_args *)))dead_badop)
+#define dead_readdir ((int (*) __P((struct vop_readdir_args *)))dead_ebadf)
+#define dead_readlink ((int (*) __P((struct vop_readlink_args *)))dead_ebadf)
+#define dead_abortop ((int (*) __P((struct vop_abortop_args *)))dead_badop)
+#define dead_inactive ((int (*) __P((struct vop_inactive_args *)))nullop)
+#define dead_reclaim ((int (*) __P((struct vop_reclaim_args *)))nullop)
+int dead_lock __P((struct vop_lock_args *));
+#define dead_unlock ((int (*) __P((struct vop_unlock_args *)))nullop)
+int dead_bmap __P((struct vop_bmap_args *));
+int dead_strategy __P((struct vop_strategy_args *));
+int dead_print __P((struct vop_print_args *));
+#define dead_islocked ((int (*) __P((struct vop_islocked_args *)))nullop)
+#define dead_pathconf ((int (*) __P((struct vop_pathconf_args *)))dead_ebadf)
+#define dead_advlock ((int (*) __P((struct vop_advlock_args *)))dead_ebadf)
+#define dead_blkatoff ((int (*) __P((struct vop_blkatoff_args *)))dead_badop)
+#define dead_valloc ((int (*) __P((struct vop_valloc_args *)))dead_badop)
+#define dead_vfree ((int (*) __P((struct vop_vfree_args *)))dead_badop)
+#define dead_truncate ((int (*) __P((struct vop_truncate_args *)))nullop)
+#define dead_update ((int (*) __P((struct vop_update_args *)))nullop)
+#define dead_bwrite ((int (*) __P((struct vop_bwrite_args *)))nullop)
+
+int (**dead_vnodeop_p)();
+struct vnodeopv_entry_desc dead_vnodeop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, dead_lookup }, /* lookup */
+ { &vop_create_desc, dead_create }, /* create */
+ { &vop_mknod_desc, dead_mknod }, /* mknod */
+ { &vop_open_desc, dead_open }, /* open */
+ { &vop_close_desc, dead_close }, /* close */
+ { &vop_access_desc, dead_access }, /* access */
+ { &vop_getattr_desc, dead_getattr }, /* getattr */
+ { &vop_setattr_desc, dead_setattr }, /* setattr */
+ { &vop_read_desc, dead_read }, /* read */
+ { &vop_write_desc, dead_write }, /* write */
+ { &vop_ioctl_desc, dead_ioctl }, /* ioctl */
+ { &vop_select_desc, dead_select }, /* select */
+ { &vop_mmap_desc, dead_mmap }, /* mmap */
+ { &vop_fsync_desc, dead_fsync }, /* fsync */
+ { &vop_seek_desc, dead_seek }, /* seek */
+ { &vop_remove_desc, dead_remove }, /* remove */
+ { &vop_link_desc, dead_link }, /* link */
+ { &vop_rename_desc, dead_rename }, /* rename */
+ { &vop_mkdir_desc, dead_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, dead_rmdir }, /* rmdir */
+ { &vop_symlink_desc, dead_symlink }, /* symlink */
+ { &vop_readdir_desc, dead_readdir }, /* readdir */
+ { &vop_readlink_desc, dead_readlink }, /* readlink */
+ { &vop_abortop_desc, dead_abortop }, /* abortop */
+ { &vop_inactive_desc, dead_inactive }, /* inactive */
+ { &vop_reclaim_desc, dead_reclaim }, /* reclaim */
+ { &vop_lock_desc, dead_lock }, /* lock */
+ { &vop_unlock_desc, dead_unlock }, /* unlock */
+ { &vop_bmap_desc, dead_bmap }, /* bmap */
+ { &vop_strategy_desc, dead_strategy }, /* strategy */
+ { &vop_print_desc, dead_print }, /* print */
+ { &vop_islocked_desc, dead_islocked }, /* islocked */
+ { &vop_pathconf_desc, dead_pathconf }, /* pathconf */
+ { &vop_advlock_desc, dead_advlock }, /* advlock */
+ { &vop_blkatoff_desc, dead_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, dead_valloc }, /* valloc */
+ { &vop_vfree_desc, dead_vfree }, /* vfree */
+ { &vop_truncate_desc, dead_truncate }, /* truncate */
+ { &vop_update_desc, dead_update }, /* update */
+ { &vop_bwrite_desc, dead_bwrite }, /* bwrite */
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc dead_vnodeop_opv_desc =
+ { &dead_vnodeop_p, dead_vnodeop_entries };
+
+/*
+ * Trivial lookup routine that always fails.
+ */
+/* ARGSUSED */
+int
+dead_lookup(ap)
+ struct vop_lookup_args /* {
+ struct vnode * a_dvp;
+ struct vnode ** a_vpp;
+ struct componentname * a_cnp;
+ } */ *ap;
+{
+
+ *ap->a_vpp = NULL;
+ return (ENOTDIR);
+}
+
+/*
+ * Open always fails as if device did not exist.
+ */
+/* ARGSUSED */
+dead_open(ap)
+ struct vop_open_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ return (ENXIO);
+}
+
+/*
+ * Vnode op for read
+ */
+/* ARGSUSED */
+dead_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+
+ if (chkvnlock(ap->a_vp))
+ panic("dead_read: lock");
+ /*
+ * Return EOF for character devices, EIO for others
+ */
+ if (ap->a_vp->v_type != VCHR)
+ return (EIO);
+ return (0);
+}
+
+/*
+ * Vnode op for write
+ */
+/* ARGSUSED */
+dead_write(ap)
+ struct vop_write_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+
+ if (chkvnlock(ap->a_vp))
+ panic("dead_write: lock");
+ return (EIO);
+}
+
+/*
+ * Device ioctl operation.
+ */
+/* ARGSUSED */
+dead_ioctl(ap)
+ struct vop_ioctl_args /* {
+ struct vnode *a_vp;
+ int a_command;
+ caddr_t a_data;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ if (!chkvnlock(ap->a_vp))
+ return (EBADF);
+ return (VCALL(ap->a_vp, VOFFSET(vop_ioctl), ap));
+}
+
+/* ARGSUSED */
+dead_select(ap)
+ struct vop_select_args /* {
+ struct vnode *a_vp;
+ int a_which;
+ int a_fflags;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ /*
+ * Let the user find out that the descriptor is gone.
+ */
+ return (1);
+}
+
+/*
+ * Just call the device strategy routine
+ */
+dead_strategy(ap)
+ struct vop_strategy_args /* {
+ struct buf *a_bp;
+ } */ *ap;
+{
+
+ if (ap->a_bp->b_vp == NULL || !chkvnlock(ap->a_bp->b_vp)) {
+ ap->a_bp->b_flags |= B_ERROR;
+ biodone(ap->a_bp);
+ return (EIO);
+ }
+ return (VOP_STRATEGY(ap->a_bp));
+}
+
+/*
+ * Wait until the vnode has finished changing state.
+ */
+dead_lock(ap)
+ struct vop_lock_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ if (!chkvnlock(ap->a_vp))
+ return (0);
+ return (VCALL(ap->a_vp, VOFFSET(vop_lock), ap));
+}
+
+/*
+ * Wait until the vnode has finished changing state.
+ */
+dead_bmap(ap)
+ struct vop_bmap_args /* {
+ struct vnode *a_vp;
+ daddr_t a_bn;
+ struct vnode **a_vpp;
+ daddr_t *a_bnp;
+ int *a_runp;
+ } */ *ap;
+{
+
+ if (!chkvnlock(ap->a_vp))
+ return (EIO);
+ return (VOP_BMAP(ap->a_vp, ap->a_bn, ap->a_vpp, ap->a_bnp, ap->a_runp));
+}
+
+/*
+ * Print out the contents of a dead vnode.
+ */
+/* ARGSUSED */
+dead_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ printf("tag VT_NON, dead vnode\n");
+}
+
+/*
+ * Empty vnode failed operation
+ */
+dead_ebadf()
+{
+
+ return (EBADF);
+}
+
+/*
+ * Empty vnode bad operation
+ */
+dead_badop()
+{
+
+ panic("dead_badop called");
+ /* NOTREACHED */
+}
+
+/*
+ * Empty vnode null operation
+ */
+dead_nullop()
+{
+
+ return (0);
+}
+
+/*
+ * We have to wait during times when the vnode is
+ * in a state of change.
+ */
+chkvnlock(vp)
+ register struct vnode *vp;
+{
+ int locked = 0;
+
+ while (vp->v_flag & VXLOCK) {
+ vp->v_flag |= VXWANT;
+ sleep((caddr_t)vp, PINOD);
+ locked = 1;
+ }
+ return (locked);
+}
diff --git a/sys/fs/fdescfs/fdesc.h b/sys/fs/fdescfs/fdesc.h
new file mode 100644
index 0000000..4c682e7
--- /dev/null
+++ b/sys/fs/fdescfs/fdesc.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)fdesc.h 8.5 (Berkeley) 1/21/94
+ *
+ * $Id: fdesc.h,v 1.8 1993/04/06 15:28:33 jsp Exp $
+ */
+
+#ifdef KERNEL
+struct fdescmount {
+ struct vnode *f_root; /* Root node */
+};
+
+#define FD_ROOT 2
+#define FD_DEVFD 3
+#define FD_STDIN 4
+#define FD_STDOUT 5
+#define FD_STDERR 6
+#define FD_CTTY 7
+#define FD_DESC 8
+#define FD_MAX 12
+
+typedef enum {
+ Froot,
+ Fdevfd,
+ Fdesc,
+ Flink,
+ Fctty
+} fdntype;
+
+struct fdescnode {
+ struct fdescnode *fd_forw; /* Hash chain */
+ struct fdescnode *fd_back;
+ struct vnode *fd_vnode; /* Back ptr to vnode */
+ fdntype fd_type; /* Type of this node */
+ unsigned fd_fd; /* Fd to be dup'ed */
+ char *fd_link; /* Link to fd/n */
+ int fd_ix; /* filesystem index */
+};
+
+#define VFSTOFDESC(mp) ((struct fdescmount *)((mp)->mnt_data))
+#define VTOFDESC(vp) ((struct fdescnode *)(vp)->v_data)
+
+extern dev_t devctty;
+extern int fdesc_init __P((void));
+extern int fdesc_root __P((struct mount *, struct vnode **));
+extern int fdesc_allocvp __P((fdntype, int, struct mount *, struct vnode **));
+extern int (**fdesc_vnodeop_p)();
+extern struct vfsops fdesc_vfsops;
+#endif /* KERNEL */
diff --git a/sys/fs/fdescfs/fdesc_vfsops.c b/sys/fs/fdescfs/fdesc_vfsops.c
new file mode 100644
index 0000000..80c543d
--- /dev/null
+++ b/sys/fs/fdescfs/fdesc_vfsops.c
@@ -0,0 +1,288 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)fdesc_vfsops.c 8.4 (Berkeley) 1/21/94
+ *
+ * $Id: fdesc_vfsops.c,v 1.9 1993/04/06 15:28:33 jsp Exp $
+ */
+
+/*
+ * /dev/fd Filesystem
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+#include <sys/filedesc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <miscfs/fdesc/fdesc.h>
+
+/*
+ * Mount the per-process file descriptors (/dev/fd)
+ */
+int
+fdesc_mount(mp, path, data, ndp, p)
+ struct mount *mp;
+ char *path;
+ caddr_t data;
+ struct nameidata *ndp;
+ struct proc *p;
+{
+ int error = 0;
+ u_int size;
+ struct fdescmount *fmp;
+ struct vnode *rvp;
+
+ /*
+ * Update is a no-op
+ */
+ if (mp->mnt_flag & MNT_UPDATE)
+ return (EOPNOTSUPP);
+
+ error = fdesc_allocvp(Froot, FD_ROOT, mp, &rvp);
+ if (error)
+ return (error);
+
+ MALLOC(fmp, struct fdescmount *, sizeof(struct fdescmount),
+ M_UFSMNT, M_WAITOK); /* XXX */
+ rvp->v_type = VDIR;
+ rvp->v_flag |= VROOT;
+ fmp->f_root = rvp;
+ /* XXX -- don't mark as local to work around fts() problems */
+ /*mp->mnt_flag |= MNT_LOCAL;*/
+ mp->mnt_data = (qaddr_t) fmp;
+ getnewfsid(mp, MOUNT_FDESC);
+
+ (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+ bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+ bzero(mp->mnt_stat.f_mntfromname, MNAMELEN);
+ bcopy("fdesc", mp->mnt_stat.f_mntfromname, sizeof("fdesc"));
+ return (0);
+}
+
+int
+fdesc_start(mp, flags, p)
+ struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+ return (0);
+}
+
+int
+fdesc_unmount(mp, mntflags, p)
+ struct mount *mp;
+ int mntflags;
+ struct proc *p;
+{
+ int error;
+ int flags = 0;
+ extern int doforce;
+ struct vnode *rootvp = VFSTOFDESC(mp)->f_root;
+
+ if (mntflags & MNT_FORCE) {
+ /* fdesc can never be rootfs so don't check for it */
+ if (!doforce)
+ return (EINVAL);
+ flags |= FORCECLOSE;
+ }
+
+ /*
+ * Clear out buffer cache. I don't think we
+ * ever get anything cached at this level at the
+ * moment, but who knows...
+ */
+ if (rootvp->v_usecount > 1)
+ return (EBUSY);
+ if (error = vflush(mp, rootvp, flags))
+ return (error);
+
+ /*
+ * Release reference on underlying root vnode
+ */
+ vrele(rootvp);
+ /*
+ * And blow it away for future re-use
+ */
+ vgone(rootvp);
+ /*
+ * Finally, throw away the fdescmount structure
+ */
+ free(mp->mnt_data, M_UFSMNT); /* XXX */
+ mp->mnt_data = 0;
+
+ return (0);
+}
+
+int
+fdesc_root(mp, vpp)
+ struct mount *mp;
+ struct vnode **vpp;
+{
+ struct vnode *vp;
+
+ /*
+ * Return locked reference to root.
+ */
+ vp = VFSTOFDESC(mp)->f_root;
+ VREF(vp);
+ VOP_LOCK(vp);
+ *vpp = vp;
+ return (0);
+}
+
+int
+fdesc_quotactl(mp, cmd, uid, arg, p)
+ struct mount *mp;
+ int cmd;
+ uid_t uid;
+ caddr_t arg;
+ struct proc *p;
+{
+
+ return (EOPNOTSUPP);
+}
+
+int
+fdesc_statfs(mp, sbp, p)
+ struct mount *mp;
+ struct statfs *sbp;
+ struct proc *p;
+{
+ struct filedesc *fdp;
+ int lim;
+ int i;
+ int last;
+ int freefd;
+
+ /*
+ * Compute number of free file descriptors.
+ * [ Strange results will ensue if the open file
+ * limit is ever reduced below the current number
+ * of open files... ]
+ */
+ lim = p->p_rlimit[RLIMIT_NOFILE].rlim_cur;
+ fdp = p->p_fd;
+ last = min(fdp->fd_nfiles, lim);
+ freefd = 0;
+ for (i = fdp->fd_freefile; i < last; i++)
+ if (fdp->fd_ofiles[i] == NULL)
+ freefd++;
+
+ /*
+ * Adjust for the fact that the fdesc array may not
+ * have been fully allocated yet.
+ */
+ if (fdp->fd_nfiles < lim)
+ freefd += (lim - fdp->fd_nfiles);
+
+ sbp->f_type = MOUNT_FDESC;
+ sbp->f_flags = 0;
+ sbp->f_bsize = DEV_BSIZE;
+ sbp->f_iosize = DEV_BSIZE;
+ sbp->f_blocks = 2; /* 1K to keep df happy */
+ sbp->f_bfree = 0;
+ sbp->f_bavail = 0;
+ sbp->f_files = lim + 1; /* Allow for "." */
+ sbp->f_ffree = freefd; /* See comments above */
+ if (sbp != &mp->mnt_stat) {
+ bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+ bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+ bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+ }
+ return (0);
+}
+
+int
+fdesc_sync(mp, waitfor)
+ struct mount *mp;
+ int waitfor;
+{
+
+ return (0);
+}
+
+/*
+ * Fdesc flat namespace lookup.
+ * Currently unsupported.
+ */
+int
+fdesc_vget(mp, ino, vpp)
+ struct mount *mp;
+ ino_t ino;
+ struct vnode **vpp;
+{
+
+ return (EOPNOTSUPP);
+}
+
+int
+fdesc_fhtovp(mp, fhp, setgen, vpp)
+ struct mount *mp;
+ struct fid *fhp;
+ int setgen;
+ struct vnode **vpp;
+{
+ return (EOPNOTSUPP);
+}
+
+int
+fdesc_vptofh(vp, fhp)
+ struct vnode *vp;
+ struct fid *fhp;
+{
+
+ return (EOPNOTSUPP);
+}
+
+struct vfsops fdesc_vfsops = {
+ fdesc_mount,
+ fdesc_start,
+ fdesc_unmount,
+ fdesc_root,
+ fdesc_quotactl,
+ fdesc_statfs,
+ fdesc_sync,
+ fdesc_vget,
+ fdesc_fhtovp,
+ fdesc_vptofh,
+ fdesc_init,
+};
diff --git a/sys/fs/fdescfs/fdesc_vnops.c b/sys/fs/fdescfs/fdesc_vnops.c
new file mode 100644
index 0000000..00d8675
--- /dev/null
+++ b/sys/fs/fdescfs/fdesc_vnops.c
@@ -0,0 +1,974 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)fdesc_vnops.c 8.9 (Berkeley) 1/21/94
+ *
+ * $Id: fdesc_vnops.c,v 1.12 1993/04/06 16:17:17 jsp Exp $
+ */
+
+/*
+ * /dev/fd Filesystem
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/proc.h>
+#include <sys/kernel.h> /* boottime */
+#include <sys/resourcevar.h>
+#include <sys/filedesc.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/buf.h>
+#include <sys/dirent.h>
+#include <miscfs/fdesc/fdesc.h>
+
+#define cttyvp(p) ((p)->p_flag & P_CONTROLT ? (p)->p_session->s_ttyvp : NULL)
+
+#define FDL_WANT 0x01
+#define FDL_LOCKED 0x02
+static int fdcache_lock;
+
+dev_t devctty;
+
+#if (FD_STDIN != FD_STDOUT-1) || (FD_STDOUT != FD_STDERR-1)
+FD_STDIN, FD_STDOUT, FD_STDERR must be a sequence n, n+1, n+2
+#endif
+
+#define NFDCACHE 3
+#define FD_NHASH(ix) ((ix) & NFDCACHE)
+
+/*
+ * Cache head
+ */
+struct fdcache {
+ struct fdescnode *fc_forw;
+ struct fdescnode *fc_back;
+};
+
+static struct fdcache fdcache[NFDCACHE];
+
+/*
+ * Initialise cache headers
+ */
+fdesc_init()
+{
+ struct fdcache *fc;
+
+ devctty = makedev(nchrdev, 0);
+
+ for (fc = fdcache; fc < fdcache + NFDCACHE; fc++)
+ fc->fc_forw = fc->fc_back = (struct fdescnode *) fc;
+}
+
+/*
+ * Compute hash list for given target vnode
+ */
+static struct fdcache *
+fdesc_hash(ix)
+ int ix;
+{
+
+ return (&fdcache[FD_NHASH(ix)]);
+}
+
+int
+fdesc_allocvp(ftype, ix, mp, vpp)
+ fdntype ftype;
+ int ix;
+ struct mount *mp;
+ struct vnode **vpp;
+{
+ struct fdcache *fc;
+ struct fdescnode *fd;
+ int error = 0;
+
+loop:
+ fc = fdesc_hash(ix);
+ for (fd = fc->fc_forw; fd != (struct fdescnode *) fc; fd = fd->fd_forw) {
+ if (fd->fd_ix == ix && fd->fd_vnode->v_mount == mp) {
+ if (vget(fd->fd_vnode, 0))
+ goto loop;
+ *vpp = fd->fd_vnode;
+ return (error);
+ }
+ }
+
+ /*
+ * otherwise lock the array while we call getnewvnode
+ * since that can block.
+ */
+ if (fdcache_lock & FDL_LOCKED) {
+ fdcache_lock |= FDL_WANT;
+ sleep((caddr_t) &fdcache_lock, PINOD);
+ goto loop;
+ }
+ fdcache_lock |= FDL_LOCKED;
+
+ error = getnewvnode(VT_FDESC, mp, fdesc_vnodeop_p, vpp);
+ if (error)
+ goto out;
+ MALLOC(fd, void *, sizeof(struct fdescnode), M_TEMP, M_WAITOK);
+ (*vpp)->v_data = fd;
+ fd->fd_vnode = *vpp;
+ fd->fd_type = ftype;
+ fd->fd_fd = -1;
+ fd->fd_link = 0;
+ fd->fd_ix = ix;
+ fc = fdesc_hash(ix);
+ insque(fd, fc);
+
+out:;
+ fdcache_lock &= ~FDL_LOCKED;
+
+ if (fdcache_lock & FDL_WANT) {
+ fdcache_lock &= ~FDL_WANT;
+ wakeup((caddr_t) &fdcache_lock);
+ }
+
+ return (error);
+}
+
+/*
+ * vp is the current namei directory
+ * ndp is the name to locate in that directory...
+ */
+int
+fdesc_lookup(ap)
+ struct vop_lookup_args /* {
+ struct vnode * a_dvp;
+ struct vnode ** a_vpp;
+ struct componentname * a_cnp;
+ } */ *ap;
+{
+ struct vnode **vpp = ap->a_vpp;
+ struct vnode *dvp = ap->a_dvp;
+ char *pname;
+ struct proc *p;
+ int nfiles;
+ unsigned fd;
+ int error;
+ struct vnode *fvp;
+ char *ln;
+
+ pname = ap->a_cnp->cn_nameptr;
+ if (ap->a_cnp->cn_namelen == 1 && *pname == '.') {
+ *vpp = dvp;
+ VREF(dvp);
+ VOP_LOCK(dvp);
+ return (0);
+ }
+
+ p = ap->a_cnp->cn_proc;
+ nfiles = p->p_fd->fd_nfiles;
+
+ switch (VTOFDESC(dvp)->fd_type) {
+ default:
+ case Flink:
+ case Fdesc:
+ case Fctty:
+ error = ENOTDIR;
+ goto bad;
+
+ case Froot:
+ if (ap->a_cnp->cn_namelen == 2 && bcmp(pname, "fd", 2) == 0) {
+ error = fdesc_allocvp(Fdevfd, FD_DEVFD, dvp->v_mount, &fvp);
+ if (error)
+ goto bad;
+ *vpp = fvp;
+ fvp->v_type = VDIR;
+ VOP_LOCK(fvp);
+ return (0);
+ }
+
+ if (ap->a_cnp->cn_namelen == 3 && bcmp(pname, "tty", 3) == 0) {
+ struct vnode *ttyvp = cttyvp(p);
+ if (ttyvp == NULL) {
+ error = ENXIO;
+ goto bad;
+ }
+ error = fdesc_allocvp(Fctty, FD_CTTY, dvp->v_mount, &fvp);
+ if (error)
+ goto bad;
+ *vpp = fvp;
+ fvp->v_type = VFIFO;
+ VOP_LOCK(fvp);
+ return (0);
+ }
+
+ ln = 0;
+ switch (ap->a_cnp->cn_namelen) {
+ case 5:
+ if (bcmp(pname, "stdin", 5) == 0) {
+ ln = "fd/0";
+ fd = FD_STDIN;
+ }
+ break;
+ case 6:
+ if (bcmp(pname, "stdout", 6) == 0) {
+ ln = "fd/1";
+ fd = FD_STDOUT;
+ } else
+ if (bcmp(pname, "stderr", 6) == 0) {
+ ln = "fd/2";
+ fd = FD_STDERR;
+ }
+ break;
+ }
+
+ if (ln) {
+ error = fdesc_allocvp(Flink, fd, dvp->v_mount, &fvp);
+ if (error)
+ goto bad;
+ VTOFDESC(fvp)->fd_link = ln;
+ *vpp = fvp;
+ fvp->v_type = VLNK;
+ VOP_LOCK(fvp);
+ return (0);
+ } else {
+ error = ENOENT;
+ goto bad;
+ }
+
+ /* FALL THROUGH */
+
+ case Fdevfd:
+ if (ap->a_cnp->cn_namelen == 2 && bcmp(pname, "..", 2) == 0) {
+ error = fdesc_root(dvp->v_mount, vpp);
+ return (error);
+ }
+
+ fd = 0;
+ while (*pname >= '0' && *pname <= '9') {
+ fd = 10 * fd + *pname++ - '0';
+ if (fd >= nfiles)
+ break;
+ }
+
+ if (*pname != '\0') {
+ error = ENOENT;
+ goto bad;
+ }
+
+ if (fd >= nfiles || p->p_fd->fd_ofiles[fd] == NULL) {
+ error = EBADF;
+ goto bad;
+ }
+
+ error = fdesc_allocvp(Fdesc, FD_DESC+fd, dvp->v_mount, &fvp);
+ if (error)
+ goto bad;
+ VTOFDESC(fvp)->fd_fd = fd;
+ *vpp = fvp;
+ return (0);
+ }
+
+bad:;
+ *vpp = NULL;
+ return (error);
+}
+
+int
+fdesc_open(ap)
+ struct vop_open_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ int error = 0;
+
+ switch (VTOFDESC(vp)->fd_type) {
+ case Fdesc:
+ /*
+ * XXX Kludge: set p->p_dupfd to contain the value of the
+ * the file descriptor being sought for duplication. The error
+ * return ensures that the vnode for this device will be
+ * released by vn_open. Open will detect this special error and
+ * take the actions in dupfdopen. Other callers of vn_open or
+ * VOP_OPEN will simply report the error.
+ */
+ ap->a_p->p_dupfd = VTOFDESC(vp)->fd_fd; /* XXX */
+ error = ENODEV;
+ break;
+
+ case Fctty:
+ error = cttyopen(devctty, ap->a_mode, 0, ap->a_p);
+ break;
+ }
+
+ return (error);
+}
+
+static int
+fdesc_attr(fd, vap, cred, p)
+ int fd;
+ struct vattr *vap;
+ struct ucred *cred;
+ struct proc *p;
+{
+ struct filedesc *fdp = p->p_fd;
+ struct file *fp;
+ struct stat stb;
+ int error;
+
+ if (fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL)
+ return (EBADF);
+
+ switch (fp->f_type) {
+ case DTYPE_VNODE:
+ error = VOP_GETATTR((struct vnode *) fp->f_data, vap, cred, p);
+ if (error == 0 && vap->va_type == VDIR) {
+ /*
+ * don't allow directories to show up because
+ * that causes loops in the namespace.
+ */
+ vap->va_type = VFIFO;
+ }
+ break;
+
+ case DTYPE_SOCKET:
+ error = soo_stat((struct socket *)fp->f_data, &stb);
+ if (error == 0) {
+ vattr_null(vap);
+ vap->va_type = VSOCK;
+ vap->va_mode = stb.st_mode;
+ vap->va_nlink = stb.st_nlink;
+ vap->va_uid = stb.st_uid;
+ vap->va_gid = stb.st_gid;
+ vap->va_fsid = stb.st_dev;
+ vap->va_fileid = stb.st_ino;
+ vap->va_size = stb.st_size;
+ vap->va_blocksize = stb.st_blksize;
+ vap->va_atime = stb.st_atimespec;
+ vap->va_mtime = stb.st_mtimespec;
+ vap->va_ctime = stb.st_ctimespec;
+ vap->va_gen = stb.st_gen;
+ vap->va_flags = stb.st_flags;
+ vap->va_rdev = stb.st_rdev;
+ vap->va_bytes = stb.st_blocks * stb.st_blksize;
+ }
+ break;
+
+ default:
+ panic("fdesc attr");
+ break;
+ }
+
+ return (error);
+}
+
+int
+fdesc_getattr(ap)
+ struct vop_getattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct vattr *vap = ap->a_vap;
+ unsigned fd;
+ int error = 0;
+
+ switch (VTOFDESC(vp)->fd_type) {
+ case Froot:
+ case Fdevfd:
+ case Flink:
+ case Fctty:
+ bzero((caddr_t) vap, sizeof(*vap));
+ vattr_null(vap);
+ vap->va_fileid = VTOFDESC(vp)->fd_ix;
+
+ switch (VTOFDESC(vp)->fd_type) {
+ case Flink:
+ vap->va_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH;
+ vap->va_type = VLNK;
+ vap->va_nlink = 1;
+ vap->va_size = strlen(VTOFDESC(vp)->fd_link);
+ break;
+
+ case Fctty:
+ vap->va_mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH;
+ vap->va_type = VFIFO;
+ vap->va_nlink = 1;
+ vap->va_size = 0;
+ break;
+
+ default:
+ vap->va_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH;
+ vap->va_type = VDIR;
+ vap->va_nlink = 2;
+ vap->va_size = DEV_BSIZE;
+ break;
+ }
+ vap->va_uid = 0;
+ vap->va_gid = 0;
+ vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+ vap->va_blocksize = DEV_BSIZE;
+ vap->va_atime.ts_sec = boottime.tv_sec;
+ vap->va_atime.ts_nsec = 0;
+ vap->va_mtime = vap->va_atime;
+ vap->va_ctime = vap->va_mtime;
+ vap->va_gen = 0;
+ vap->va_flags = 0;
+ vap->va_rdev = 0;
+ vap->va_bytes = 0;
+ break;
+
+ case Fdesc:
+ fd = VTOFDESC(vp)->fd_fd;
+ error = fdesc_attr(fd, vap, ap->a_cred, ap->a_p);
+ break;
+
+ default:
+ panic("fdesc_getattr");
+ break;
+ }
+
+ if (error == 0)
+ vp->v_type = vap->va_type;
+
+ return (error);
+}
+
+int
+fdesc_setattr(ap)
+ struct vop_setattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct filedesc *fdp = ap->a_p->p_fd;
+ struct file *fp;
+ unsigned fd;
+ int error;
+
+ /*
+ * Can't mess with the root vnode
+ */
+ switch (VTOFDESC(ap->a_vp)->fd_type) {
+ case Fdesc:
+ break;
+
+ case Fctty:
+ return (0);
+
+ default:
+ return (EACCES);
+ }
+
+ fd = VTOFDESC(ap->a_vp)->fd_fd;
+ if (fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) {
+ return (EBADF);
+ }
+
+ /*
+ * Can setattr the underlying vnode, but not sockets!
+ */
+ switch (fp->f_type) {
+ case DTYPE_VNODE:
+ error = VOP_SETATTR((struct vnode *) fp->f_data, ap->a_vap, ap->a_cred, ap->a_p);
+ break;
+
+ case DTYPE_SOCKET:
+ error = 0;
+ break;
+
+ default:
+ panic("fdesc setattr");
+ break;
+ }
+
+ return (error);
+}
+
+#define UIO_MX 16
+
+static struct dirtmp {
+ u_long d_fileno;
+ u_short d_reclen;
+ u_short d_namlen;
+ char d_name[8];
+} rootent[] = {
+ { FD_DEVFD, UIO_MX, 2, "fd" },
+ { FD_STDIN, UIO_MX, 5, "stdin" },
+ { FD_STDOUT, UIO_MX, 6, "stdout" },
+ { FD_STDERR, UIO_MX, 6, "stderr" },
+ { FD_CTTY, UIO_MX, 3, "tty" },
+ { 0 }
+};
+
+int
+fdesc_readdir(ap)
+ struct vop_readdir_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ struct uio *uio = ap->a_uio;
+ struct filedesc *fdp;
+ int i;
+ int error;
+
+ switch (VTOFDESC(ap->a_vp)->fd_type) {
+ case Fctty:
+ return (0);
+
+ case Fdesc:
+ return (ENOTDIR);
+
+ default:
+ break;
+ }
+
+ fdp = uio->uio_procp->p_fd;
+
+ if (VTOFDESC(ap->a_vp)->fd_type == Froot) {
+ struct dirent d;
+ struct dirent *dp = &d;
+ struct dirtmp *dt;
+
+ i = uio->uio_offset / UIO_MX;
+ error = 0;
+
+ while (uio->uio_resid > 0) {
+ dt = &rootent[i];
+ if (dt->d_fileno == 0) {
+ /**eofflagp = 1;*/
+ break;
+ }
+ i++;
+
+ switch (dt->d_fileno) {
+ case FD_CTTY:
+ if (cttyvp(uio->uio_procp) == NULL)
+ continue;
+ break;
+
+ case FD_STDIN:
+ case FD_STDOUT:
+ case FD_STDERR:
+ if ((dt->d_fileno-FD_STDIN) >= fdp->fd_nfiles)
+ continue;
+ if (fdp->fd_ofiles[dt->d_fileno-FD_STDIN] == NULL)
+ continue;
+ break;
+ }
+ bzero((caddr_t) dp, UIO_MX);
+ dp->d_fileno = dt->d_fileno;
+ dp->d_namlen = dt->d_namlen;
+ dp->d_type = DT_UNKNOWN;
+ dp->d_reclen = dt->d_reclen;
+ bcopy(dt->d_name, dp->d_name, dp->d_namlen+1);
+ error = uiomove((caddr_t) dp, UIO_MX, uio);
+ if (error)
+ break;
+ }
+ uio->uio_offset = i * UIO_MX;
+ return (error);
+ }
+
+ i = uio->uio_offset / UIO_MX;
+ error = 0;
+ while (uio->uio_resid > 0) {
+ if (i >= fdp->fd_nfiles)
+ break;
+
+ if (fdp->fd_ofiles[i] != NULL) {
+ struct dirent d;
+ struct dirent *dp = &d;
+
+ bzero((caddr_t) dp, UIO_MX);
+
+ dp->d_namlen = sprintf(dp->d_name, "%d", i);
+ dp->d_reclen = UIO_MX;
+ dp->d_type = DT_UNKNOWN;
+ dp->d_fileno = i + FD_STDIN;
+ /*
+ * And ship to userland
+ */
+ error = uiomove((caddr_t) dp, UIO_MX, uio);
+ if (error)
+ break;
+ }
+ i++;
+ }
+
+ uio->uio_offset = i * UIO_MX;
+ return (error);
+}
+
+int
+fdesc_readlink(ap)
+ struct vop_readlink_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ int error;
+
+ if (vp->v_type != VLNK)
+ return (EPERM);
+
+ if (VTOFDESC(vp)->fd_type == Flink) {
+ char *ln = VTOFDESC(vp)->fd_link;
+ error = uiomove(ln, strlen(ln), ap->a_uio);
+ } else {
+ error = EOPNOTSUPP;
+ }
+
+ return (error);
+}
+
+int
+fdesc_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ int error = EOPNOTSUPP;
+
+ switch (VTOFDESC(ap->a_vp)->fd_type) {
+ case Fctty:
+ error = cttyread(devctty, ap->a_uio, ap->a_ioflag);
+ break;
+
+ default:
+ error = EOPNOTSUPP;
+ break;
+ }
+
+ return (error);
+}
+
+int
+fdesc_write(ap)
+ struct vop_write_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ int error = EOPNOTSUPP;
+
+ switch (VTOFDESC(ap->a_vp)->fd_type) {
+ case Fctty:
+ error = cttywrite(devctty, ap->a_uio, ap->a_ioflag);
+ break;
+
+ default:
+ error = EOPNOTSUPP;
+ break;
+ }
+
+ return (error);
+}
+
+int
+fdesc_ioctl(ap)
+ struct vop_ioctl_args /* {
+ struct vnode *a_vp;
+ int a_command;
+ caddr_t a_data;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ int error = EOPNOTSUPP;
+
+ switch (VTOFDESC(ap->a_vp)->fd_type) {
+ case Fctty:
+ error = cttyioctl(devctty, ap->a_command, ap->a_data,
+ ap->a_fflag, ap->a_p);
+ break;
+
+ default:
+ error = EOPNOTSUPP;
+ break;
+ }
+
+ return (error);
+}
+
+int
+fdesc_select(ap)
+ struct vop_select_args /* {
+ struct vnode *a_vp;
+ int a_which;
+ int a_fflags;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ int error = EOPNOTSUPP;
+
+ switch (VTOFDESC(ap->a_vp)->fd_type) {
+ case Fctty:
+ error = cttyselect(devctty, ap->a_fflags, ap->a_p);
+ break;
+
+ default:
+ error = EOPNOTSUPP;
+ break;
+ }
+
+ return (error);
+}
+
+int
+fdesc_inactive(ap)
+ struct vop_inactive_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+
+ /*
+ * Clear out the v_type field to avoid
+ * nasty things happening in vgone().
+ */
+ vp->v_type = VNON;
+ return (0);
+}
+
+int
+fdesc_reclaim(ap)
+ struct vop_reclaim_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+
+ remque(VTOFDESC(vp));
+ FREE(vp->v_data, M_TEMP);
+ vp->v_data = 0;
+
+ return (0);
+}
+
+/*
+ * Return POSIX pathconf information applicable to special devices.
+ */
+fdesc_pathconf(ap)
+ struct vop_pathconf_args /* {
+ struct vnode *a_vp;
+ int a_name;
+ int *a_retval;
+ } */ *ap;
+{
+
+ switch (ap->a_name) {
+ case _PC_LINK_MAX:
+ *ap->a_retval = LINK_MAX;
+ return (0);
+ case _PC_MAX_CANON:
+ *ap->a_retval = MAX_CANON;
+ return (0);
+ case _PC_MAX_INPUT:
+ *ap->a_retval = MAX_INPUT;
+ return (0);
+ case _PC_PIPE_BUF:
+ *ap->a_retval = PIPE_BUF;
+ return (0);
+ case _PC_CHOWN_RESTRICTED:
+ *ap->a_retval = 1;
+ return (0);
+ case _PC_VDISABLE:
+ *ap->a_retval = _POSIX_VDISABLE;
+ return (0);
+ default:
+ return (EINVAL);
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Print out the contents of a /dev/fd vnode.
+ */
+/* ARGSUSED */
+int
+fdesc_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ printf("tag VT_NON, fdesc vnode\n");
+ return (0);
+}
+
+/*void*/
+int
+fdesc_vfree(ap)
+ struct vop_vfree_args /* {
+ struct vnode *a_pvp;
+ ino_t a_ino;
+ int a_mode;
+ } */ *ap;
+{
+
+ return (0);
+}
+
+/*
+ * /dev/fd vnode unsupported operation
+ */
+int
+fdesc_enotsupp()
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * /dev/fd "should never get here" operation
+ */
+int
+fdesc_badop()
+{
+
+ panic("fdesc: bad op");
+ /* NOTREACHED */
+}
+
+/*
+ * /dev/fd vnode null operation
+ */
+int
+fdesc_nullop()
+{
+
+ return (0);
+}
+
+#define fdesc_create ((int (*) __P((struct vop_create_args *)))fdesc_enotsupp)
+#define fdesc_mknod ((int (*) __P((struct vop_mknod_args *)))fdesc_enotsupp)
+#define fdesc_close ((int (*) __P((struct vop_close_args *)))nullop)
+#define fdesc_access ((int (*) __P((struct vop_access_args *)))nullop)
+#define fdesc_mmap ((int (*) __P((struct vop_mmap_args *)))fdesc_enotsupp)
+#define fdesc_fsync ((int (*) __P((struct vop_fsync_args *)))nullop)
+#define fdesc_seek ((int (*) __P((struct vop_seek_args *)))nullop)
+#define fdesc_remove ((int (*) __P((struct vop_remove_args *)))fdesc_enotsupp)
+#define fdesc_link ((int (*) __P((struct vop_link_args *)))fdesc_enotsupp)
+#define fdesc_rename ((int (*) __P((struct vop_rename_args *)))fdesc_enotsupp)
+#define fdesc_mkdir ((int (*) __P((struct vop_mkdir_args *)))fdesc_enotsupp)
+#define fdesc_rmdir ((int (*) __P((struct vop_rmdir_args *)))fdesc_enotsupp)
+#define fdesc_symlink ((int (*) __P((struct vop_symlink_args *)))fdesc_enotsupp)
+#define fdesc_abortop ((int (*) __P((struct vop_abortop_args *)))nullop)
+#define fdesc_lock ((int (*) __P((struct vop_lock_args *)))nullop)
+#define fdesc_unlock ((int (*) __P((struct vop_unlock_args *)))nullop)
+#define fdesc_bmap ((int (*) __P((struct vop_bmap_args *)))fdesc_badop)
+#define fdesc_strategy ((int (*) __P((struct vop_strategy_args *)))fdesc_badop)
+#define fdesc_islocked ((int (*) __P((struct vop_islocked_args *)))nullop)
+#define fdesc_advlock ((int (*) __P((struct vop_advlock_args *)))fdesc_enotsupp)
+#define fdesc_blkatoff \
+ ((int (*) __P((struct vop_blkatoff_args *)))fdesc_enotsupp)
+#define fdesc_vget ((int (*) __P((struct vop_vget_args *)))fdesc_enotsupp)
+#define fdesc_valloc ((int(*) __P(( \
+ struct vnode *pvp, \
+ int mode, \
+ struct ucred *cred, \
+ struct vnode **vpp))) fdesc_enotsupp)
+#define fdesc_truncate \
+ ((int (*) __P((struct vop_truncate_args *)))fdesc_enotsupp)
+#define fdesc_update ((int (*) __P((struct vop_update_args *)))fdesc_enotsupp)
+#define fdesc_bwrite ((int (*) __P((struct vop_bwrite_args *)))fdesc_enotsupp)
+
+int (**fdesc_vnodeop_p)();
+struct vnodeopv_entry_desc fdesc_vnodeop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, fdesc_lookup }, /* lookup */
+ { &vop_create_desc, fdesc_create }, /* create */
+ { &vop_mknod_desc, fdesc_mknod }, /* mknod */
+ { &vop_open_desc, fdesc_open }, /* open */
+ { &vop_close_desc, fdesc_close }, /* close */
+ { &vop_access_desc, fdesc_access }, /* access */
+ { &vop_getattr_desc, fdesc_getattr }, /* getattr */
+ { &vop_setattr_desc, fdesc_setattr }, /* setattr */
+ { &vop_read_desc, fdesc_read }, /* read */
+ { &vop_write_desc, fdesc_write }, /* write */
+ { &vop_ioctl_desc, fdesc_ioctl }, /* ioctl */
+ { &vop_select_desc, fdesc_select }, /* select */
+ { &vop_mmap_desc, fdesc_mmap }, /* mmap */
+ { &vop_fsync_desc, fdesc_fsync }, /* fsync */
+ { &vop_seek_desc, fdesc_seek }, /* seek */
+ { &vop_remove_desc, fdesc_remove }, /* remove */
+ { &vop_link_desc, fdesc_link }, /* link */
+ { &vop_rename_desc, fdesc_rename }, /* rename */
+ { &vop_mkdir_desc, fdesc_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, fdesc_rmdir }, /* rmdir */
+ { &vop_symlink_desc, fdesc_symlink }, /* symlink */
+ { &vop_readdir_desc, fdesc_readdir }, /* readdir */
+ { &vop_readlink_desc, fdesc_readlink }, /* readlink */
+ { &vop_abortop_desc, fdesc_abortop }, /* abortop */
+ { &vop_inactive_desc, fdesc_inactive }, /* inactive */
+ { &vop_reclaim_desc, fdesc_reclaim }, /* reclaim */
+ { &vop_lock_desc, fdesc_lock }, /* lock */
+ { &vop_unlock_desc, fdesc_unlock }, /* unlock */
+ { &vop_bmap_desc, fdesc_bmap }, /* bmap */
+ { &vop_strategy_desc, fdesc_strategy }, /* strategy */
+ { &vop_print_desc, fdesc_print }, /* print */
+ { &vop_islocked_desc, fdesc_islocked }, /* islocked */
+ { &vop_pathconf_desc, fdesc_pathconf }, /* pathconf */
+ { &vop_advlock_desc, fdesc_advlock }, /* advlock */
+ { &vop_blkatoff_desc, fdesc_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, fdesc_valloc }, /* valloc */
+ { &vop_vfree_desc, fdesc_vfree }, /* vfree */
+ { &vop_truncate_desc, fdesc_truncate }, /* truncate */
+ { &vop_update_desc, fdesc_update }, /* update */
+ { &vop_bwrite_desc, fdesc_bwrite }, /* bwrite */
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc fdesc_vnodeop_opv_desc =
+ { &fdesc_vnodeop_p, fdesc_vnodeop_entries };
diff --git a/sys/fs/fifofs/fifo.h b/sys/fs/fifofs/fifo.h
new file mode 100644
index 0000000..e89186d
--- /dev/null
+++ b/sys/fs/fifofs/fifo.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)fifo.h 8.2 (Berkeley) 2/2/94
+ */
+
+#ifdef FIFO
+/*
+ * Prototypes for fifo operations on vnodes.
+ */
+int fifo_badop(),
+ fifo_ebadf();
+
+int fifo_lookup __P((struct vop_lookup_args *));
+#define fifo_create ((int (*) __P((struct vop_create_args *)))fifo_badop)
+#define fifo_mknod ((int (*) __P((struct vop_mknod_args *)))fifo_badop)
+int fifo_open __P((struct vop_open_args *));
+int fifo_close __P((struct vop_close_args *));
+#define fifo_access ((int (*) __P((struct vop_access_args *)))fifo_ebadf)
+#define fifo_getattr ((int (*) __P((struct vop_getattr_args *)))fifo_ebadf)
+#define fifo_setattr ((int (*) __P((struct vop_setattr_args *)))fifo_ebadf)
+int fifo_read __P((struct vop_read_args *));
+int fifo_write __P((struct vop_write_args *));
+int fifo_ioctl __P((struct vop_ioctl_args *));
+int fifo_select __P((struct vop_select_args *));
+#define fifo_mmap ((int (*) __P((struct vop_mmap_args *)))fifo_badop)
+#define fifo_fsync ((int (*) __P((struct vop_fsync_args *)))nullop)
+#define fifo_seek ((int (*) __P((struct vop_seek_args *)))fifo_badop)
+#define fifo_remove ((int (*) __P((struct vop_remove_args *)))fifo_badop)
+#define fifo_link ((int (*) __P((struct vop_link_args *)))fifo_badop)
+#define fifo_rename ((int (*) __P((struct vop_rename_args *)))fifo_badop)
+#define fifo_mkdir ((int (*) __P((struct vop_mkdir_args *)))fifo_badop)
+#define fifo_rmdir ((int (*) __P((struct vop_rmdir_args *)))fifo_badop)
+#define fifo_symlink ((int (*) __P((struct vop_symlink_args *)))fifo_badop)
+#define fifo_readdir ((int (*) __P((struct vop_readdir_args *)))fifo_badop)
+#define fifo_readlink ((int (*) __P((struct vop_readlink_args *)))fifo_badop)
+#define fifo_abortop ((int (*) __P((struct vop_abortop_args *)))fifo_badop)
+#define fifo_inactive ((int (*) __P((struct vop_inactive_args *)))nullop)
+#define fifo_reclaim ((int (*) __P((struct vop_reclaim_args *)))nullop)
+int fifo_lock __P((struct vop_lock_args *));
+int fifo_unlock __P((struct vop_unlock_args *));
+int fifo_bmap __P((struct vop_bmap_args *));
+#define fifo_strategy ((int (*) __P((struct vop_strategy_args *)))fifo_badop)
+int fifo_print __P((struct vop_print_args *));
+#define fifo_islocked ((int (*) __P((struct vop_islocked_args *)))nullop)
+int fifo_pathconf __P((struct vop_pathconf_args *));
+int fifo_advlock __P((struct vop_advlock_args *));
+#define fifo_blkatoff ((int (*) __P((struct vop_blkatoff_args *)))fifo_badop)
+#define fifo_valloc ((int (*) __P((struct vop_valloc_args *)))fifo_badop)
+#define fifo_reallocblks \
+ ((int (*) __P((struct vop_reallocblks_args *)))fifo_badop)
+#define fifo_vfree ((int (*) __P((struct vop_vfree_args *)))fifo_badop)
+#define fifo_truncate ((int (*) __P((struct vop_truncate_args *)))nullop)
+#define fifo_update ((int (*) __P((struct vop_update_args *)))nullop)
+#define fifo_bwrite ((int (*) __P((struct vop_bwrite_args *)))nullop)
+#endif /* FIFO */
diff --git a/sys/fs/fifofs/fifo_vnops.c b/sys/fs/fifofs/fifo_vnops.c
new file mode 100644
index 0000000..bad33a4
--- /dev/null
+++ b/sys/fs/fifofs/fifo_vnops.c
@@ -0,0 +1,494 @@
+/*
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)fifo_vnops.c 8.2 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/time.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/stat.h>
+#include <sys/systm.h>
+#include <sys/ioctl.h>
+#include <sys/file.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+#include <miscfs/fifofs/fifo.h>
+
+/*
+ * This structure is associated with the FIFO vnode and stores
+ * the state associated with the FIFO.
+ */
+struct fifoinfo {
+ struct socket *fi_readsock;
+ struct socket *fi_writesock;
+ long fi_readers;
+ long fi_writers;
+};
+
+int (**fifo_vnodeop_p)();
+struct vnodeopv_entry_desc fifo_vnodeop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, fifo_lookup }, /* lookup */
+ { &vop_create_desc, fifo_create }, /* create */
+ { &vop_mknod_desc, fifo_mknod }, /* mknod */
+ { &vop_open_desc, fifo_open }, /* open */
+ { &vop_close_desc, fifo_close }, /* close */
+ { &vop_access_desc, fifo_access }, /* access */
+ { &vop_getattr_desc, fifo_getattr }, /* getattr */
+ { &vop_setattr_desc, fifo_setattr }, /* setattr */
+ { &vop_read_desc, fifo_read }, /* read */
+ { &vop_write_desc, fifo_write }, /* write */
+ { &vop_ioctl_desc, fifo_ioctl }, /* ioctl */
+ { &vop_select_desc, fifo_select }, /* select */
+ { &vop_mmap_desc, fifo_mmap }, /* mmap */
+ { &vop_fsync_desc, fifo_fsync }, /* fsync */
+ { &vop_seek_desc, fifo_seek }, /* seek */
+ { &vop_remove_desc, fifo_remove }, /* remove */
+ { &vop_link_desc, fifo_link }, /* link */
+ { &vop_rename_desc, fifo_rename }, /* rename */
+ { &vop_mkdir_desc, fifo_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, fifo_rmdir }, /* rmdir */
+ { &vop_symlink_desc, fifo_symlink }, /* symlink */
+ { &vop_readdir_desc, fifo_readdir }, /* readdir */
+ { &vop_readlink_desc, fifo_readlink }, /* readlink */
+ { &vop_abortop_desc, fifo_abortop }, /* abortop */
+ { &vop_inactive_desc, fifo_inactive }, /* inactive */
+ { &vop_reclaim_desc, fifo_reclaim }, /* reclaim */
+ { &vop_lock_desc, fifo_lock }, /* lock */
+ { &vop_unlock_desc, fifo_unlock }, /* unlock */
+ { &vop_bmap_desc, fifo_bmap }, /* bmap */
+ { &vop_strategy_desc, fifo_strategy }, /* strategy */
+ { &vop_print_desc, fifo_print }, /* print */
+ { &vop_islocked_desc, fifo_islocked }, /* islocked */
+ { &vop_pathconf_desc, fifo_pathconf }, /* pathconf */
+ { &vop_advlock_desc, fifo_advlock }, /* advlock */
+ { &vop_blkatoff_desc, fifo_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, fifo_valloc }, /* valloc */
+ { &vop_vfree_desc, fifo_vfree }, /* vfree */
+ { &vop_truncate_desc, fifo_truncate }, /* truncate */
+ { &vop_update_desc, fifo_update }, /* update */
+ { &vop_bwrite_desc, fifo_bwrite }, /* bwrite */
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc fifo_vnodeop_opv_desc =
+ { &fifo_vnodeop_p, fifo_vnodeop_entries };
+
+/*
+ * Trivial lookup routine that always fails.
+ */
+/* ARGSUSED */
+fifo_lookup(ap)
+ struct vop_lookup_args /* {
+ struct vnode * a_dvp;
+ struct vnode ** a_vpp;
+ struct componentname * a_cnp;
+ } */ *ap;
+{
+
+ *ap->a_vpp = NULL;
+ return (ENOTDIR);
+}
+
+/*
+ * Open called to set up a new instance of a fifo or
+ * to find an active instance of a fifo.
+ */
+/* ARGSUSED */
+fifo_open(ap)
+ struct vop_open_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct fifoinfo *fip;
+ struct socket *rso, *wso;
+ int error;
+ static char openstr[] = "fifo";
+
+ if ((ap->a_mode & (FREAD|FWRITE)) == (FREAD|FWRITE))
+ return (EINVAL);
+ if ((fip = vp->v_fifoinfo) == NULL) {
+ MALLOC(fip, struct fifoinfo *, sizeof(*fip), M_VNODE, M_WAITOK);
+ vp->v_fifoinfo = fip;
+ if (error = socreate(AF_UNIX, &rso, SOCK_STREAM, 0)) {
+ free(fip, M_VNODE);
+ vp->v_fifoinfo = NULL;
+ return (error);
+ }
+ fip->fi_readsock = rso;
+ if (error = socreate(AF_UNIX, &wso, SOCK_STREAM, 0)) {
+ (void)soclose(rso);
+ free(fip, M_VNODE);
+ vp->v_fifoinfo = NULL;
+ return (error);
+ }
+ fip->fi_writesock = wso;
+ if (error = unp_connect2(wso, rso)) {
+ (void)soclose(wso);
+ (void)soclose(rso);
+ free(fip, M_VNODE);
+ vp->v_fifoinfo = NULL;
+ return (error);
+ }
+ fip->fi_readers = fip->fi_writers = 0;
+ wso->so_state |= SS_CANTRCVMORE;
+ rso->so_state |= SS_CANTSENDMORE;
+ }
+ error = 0;
+ if (ap->a_mode & FREAD) {
+ fip->fi_readers++;
+ if (fip->fi_readers == 1) {
+ fip->fi_writesock->so_state &= ~SS_CANTSENDMORE;
+ if (fip->fi_writers > 0)
+ wakeup((caddr_t)&fip->fi_writers);
+ }
+ if (ap->a_mode & O_NONBLOCK)
+ return (0);
+ while (fip->fi_writers == 0) {
+ VOP_UNLOCK(vp);
+ error = tsleep((caddr_t)&fip->fi_readers,
+ PCATCH | PSOCK, openstr, 0);
+ VOP_LOCK(vp);
+ if (error)
+ break;
+ }
+ } else {
+ fip->fi_writers++;
+ if (fip->fi_readers == 0 && (ap->a_mode & O_NONBLOCK)) {
+ error = ENXIO;
+ } else {
+ if (fip->fi_writers == 1) {
+ fip->fi_readsock->so_state &= ~SS_CANTRCVMORE;
+ if (fip->fi_readers > 0)
+ wakeup((caddr_t)&fip->fi_readers);
+ }
+ while (fip->fi_readers == 0) {
+ VOP_UNLOCK(vp);
+ error = tsleep((caddr_t)&fip->fi_writers,
+ PCATCH | PSOCK, openstr, 0);
+ VOP_LOCK(vp);
+ if (error)
+ break;
+ }
+ }
+ }
+ if (error)
+ VOP_CLOSE(vp, ap->a_mode, ap->a_cred, ap->a_p);
+ return (error);
+}
+
+/*
+ * Vnode op for read
+ */
+/* ARGSUSED */
+fifo_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct uio *uio = ap->a_uio;
+ register struct socket *rso = ap->a_vp->v_fifoinfo->fi_readsock;
+ int error, startresid;
+
+#ifdef DIAGNOSTIC
+ if (uio->uio_rw != UIO_READ)
+ panic("fifo_read mode");
+#endif
+ if (uio->uio_resid == 0)
+ return (0);
+ if (ap->a_ioflag & IO_NDELAY)
+ rso->so_state |= SS_NBIO;
+ startresid = uio->uio_resid;
+ VOP_UNLOCK(ap->a_vp);
+ error = soreceive(rso, (struct mbuf **)0, uio, (int *)0,
+ (struct mbuf **)0, (struct mbuf **)0);
+ VOP_LOCK(ap->a_vp);
+ /*
+ * Clear EOF indication after first such return.
+ */
+ if (uio->uio_resid == startresid)
+ rso->so_state &= ~SS_CANTRCVMORE;
+ if (ap->a_ioflag & IO_NDELAY)
+ rso->so_state &= ~SS_NBIO;
+ return (error);
+}
+
+/*
+ * Vnode op for write
+ */
+/* ARGSUSED */
+fifo_write(ap)
+ struct vop_write_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ struct socket *wso = ap->a_vp->v_fifoinfo->fi_writesock;
+ int error;
+
+#ifdef DIAGNOSTIC
+ if (ap->a_uio->uio_rw != UIO_WRITE)
+ panic("fifo_write mode");
+#endif
+ if (ap->a_ioflag & IO_NDELAY)
+ wso->so_state |= SS_NBIO;
+ VOP_UNLOCK(ap->a_vp);
+ error = sosend(wso, (struct mbuf *)0, ap->a_uio, 0, (struct mbuf *)0, 0);
+ VOP_LOCK(ap->a_vp);
+ if (ap->a_ioflag & IO_NDELAY)
+ wso->so_state &= ~SS_NBIO;
+ return (error);
+}
+
+/*
+ * Device ioctl operation.
+ */
+/* ARGSUSED */
+fifo_ioctl(ap)
+ struct vop_ioctl_args /* {
+ struct vnode *a_vp;
+ int a_command;
+ caddr_t a_data;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct file filetmp;
+
+ if (ap->a_command == FIONBIO)
+ return (0);
+ if (ap->a_fflag & FREAD)
+ filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_readsock;
+ else
+ filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_writesock;
+ return (soo_ioctl(&filetmp, ap->a_command, ap->a_data, ap->a_p));
+}
+
+/* ARGSUSED */
+fifo_select(ap)
+ struct vop_select_args /* {
+ struct vnode *a_vp;
+ int a_which;
+ int a_fflags;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct file filetmp;
+
+ if (ap->a_fflags & FREAD)
+ filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_readsock;
+ else
+ filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_writesock;
+ return (soo_select(&filetmp, ap->a_which, ap->a_p));
+}
+
+/*
+ * This is a noop, simply returning what one has been given.
+ */
+fifo_bmap(ap)
+ struct vop_bmap_args /* {
+ struct vnode *a_vp;
+ daddr_t a_bn;
+ struct vnode **a_vpp;
+ daddr_t *a_bnp;
+ } */ *ap;
+{
+
+ if (ap->a_vpp != NULL)
+ *ap->a_vpp = ap->a_vp;
+ if (ap->a_bnp != NULL)
+ *ap->a_bnp = ap->a_bn;
+ return (0);
+}
+
+/*
+ * At the moment we do not do any locking.
+ */
+/* ARGSUSED */
+fifo_lock(ap)
+ struct vop_lock_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ return (0);
+}
+
+/* ARGSUSED */
+fifo_unlock(ap)
+ struct vop_unlock_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ return (0);
+}
+
+/*
+ * Device close routine
+ */
+/* ARGSUSED */
+fifo_close(ap)
+ struct vop_close_args /* {
+ struct vnode *a_vp;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct fifoinfo *fip = vp->v_fifoinfo;
+ int error1, error2;
+
+ if (ap->a_fflag & FWRITE) {
+ fip->fi_writers--;
+ if (fip->fi_writers == 0)
+ socantrcvmore(fip->fi_readsock);
+ } else {
+ fip->fi_readers--;
+ if (fip->fi_readers == 0)
+ socantsendmore(fip->fi_writesock);
+ }
+ if (vp->v_usecount > 1)
+ return (0);
+ error1 = soclose(fip->fi_readsock);
+ error2 = soclose(fip->fi_writesock);
+ FREE(fip, M_VNODE);
+ vp->v_fifoinfo = NULL;
+ if (error1)
+ return (error1);
+ return (error2);
+}
+
+/*
+ * Print out the contents of a fifo vnode.
+ */
+fifo_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ printf("tag VT_NON");
+ fifo_printinfo(ap->a_vp);
+ printf("\n");
+}
+
+/*
+ * Print out internal contents of a fifo vnode.
+ */
+fifo_printinfo(vp)
+ struct vnode *vp;
+{
+ register struct fifoinfo *fip = vp->v_fifoinfo;
+
+ printf(", fifo with %d readers and %d writers",
+ fip->fi_readers, fip->fi_writers);
+}
+
+/*
+ * Return POSIX pathconf information applicable to fifo's.
+ */
+fifo_pathconf(ap)
+ struct vop_pathconf_args /* {
+ struct vnode *a_vp;
+ int a_name;
+ int *a_retval;
+ } */ *ap;
+{
+
+ switch (ap->a_name) {
+ case _PC_LINK_MAX:
+ *ap->a_retval = LINK_MAX;
+ return (0);
+ case _PC_PIPE_BUF:
+ *ap->a_retval = PIPE_BUF;
+ return (0);
+ case _PC_CHOWN_RESTRICTED:
+ *ap->a_retval = 1;
+ return (0);
+ default:
+ return (EINVAL);
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Fifo failed operation
+ */
+fifo_ebadf()
+{
+
+ return (EBADF);
+}
+
+/*
+ * Fifo advisory byte-level locks.
+ */
+/* ARGSUSED */
+fifo_advlock(ap)
+ struct vop_advlock_args /* {
+ struct vnode *a_vp;
+ caddr_t a_id;
+ int a_op;
+ struct flock *a_fl;
+ int a_flags;
+ } */ *ap;
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * Fifo bad operation
+ */
+fifo_badop()
+{
+
+ panic("fifo_badop called");
+ /* NOTREACHED */
+}
diff --git a/sys/fs/nullfs/null.h b/sys/fs/nullfs/null.h
new file mode 100644
index 0000000..14286ff
--- /dev/null
+++ b/sys/fs/nullfs/null.h
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)null.h 8.2 (Berkeley) 1/21/94
+ *
+ * $Id: lofs.h,v 1.8 1992/05/30 10:05:43 jsp Exp jsp $
+ */
+
+struct null_args {
+ char *target; /* Target of loopback */
+};
+
+struct null_mount {
+ struct mount *nullm_vfs;
+ struct vnode *nullm_rootvp; /* Reference to root null_node */
+};
+
+#ifdef KERNEL
+/*
+ * A cache of vnode references
+ */
+struct null_node {
+ struct null_node *null_forw; /* Hash chain */
+ struct null_node *null_back;
+ struct vnode *null_lowervp; /* VREFed once */
+ struct vnode *null_vnode; /* Back pointer */
+};
+
+extern int null_node_create __P((struct mount *mp, struct vnode *target, struct vnode **vpp));
+
+#define MOUNTTONULLMOUNT(mp) ((struct null_mount *)((mp)->mnt_data))
+#define VTONULL(vp) ((struct null_node *)(vp)->v_data)
+#define NULLTOV(xp) ((xp)->null_vnode)
+#ifdef NULLFS_DIAGNOSTIC
+extern struct vnode *null_checkvp __P((struct vnode *vp, char *fil, int lno));
+#define NULLVPTOLOWERVP(vp) null_checkvp((vp), __FILE__, __LINE__)
+#else
+#define NULLVPTOLOWERVP(vp) (VTONULL(vp)->null_lowervp)
+#endif
+
+extern int (**null_vnodeop_p)();
+extern struct vfsops null_vfsops;
+#endif /* KERNEL */
diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c
new file mode 100644
index 0000000..a31723f
--- /dev/null
+++ b/sys/fs/nullfs/null_subr.c
@@ -0,0 +1,293 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)null_subr.c 8.4 (Berkeley) 1/21/94
+ *
+ * $Id: lofs_subr.c,v 1.11 1992/05/30 10:05:43 jsp Exp jsp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <miscfs/nullfs/null.h>
+
+#define LOG2_SIZEVNODE 7 /* log2(sizeof struct vnode) */
+#define NNULLNODECACHE 16
+#define NULL_NHASH(vp) ((((u_long)vp)>>LOG2_SIZEVNODE) & (NNULLNODECACHE-1))
+
+/*
+ * Null layer cache:
+ * Each cache entry holds a reference to the lower vnode
+ * along with a pointer to the alias vnode. When an
+ * entry is added the lower vnode is VREF'd. When the
+ * alias is removed the lower vnode is vrele'd.
+ */
+
+/*
+ * Cache head
+ */
+struct null_node_cache {
+ struct null_node *ac_forw;
+ struct null_node *ac_back;
+};
+
+static struct null_node_cache null_node_cache[NNULLNODECACHE];
+
+/*
+ * Initialise cache headers
+ */
+nullfs_init()
+{
+ struct null_node_cache *ac;
+#ifdef NULLFS_DIAGNOSTIC
+ printf("nullfs_init\n"); /* printed during system boot */
+#endif
+
+ for (ac = null_node_cache; ac < null_node_cache + NNULLNODECACHE; ac++)
+ ac->ac_forw = ac->ac_back = (struct null_node *) ac;
+}
+
+/*
+ * Compute hash list for given lower vnode
+ */
+static struct null_node_cache *
+null_node_hash(lowervp)
+struct vnode *lowervp;
+{
+
+ return (&null_node_cache[NULL_NHASH(lowervp)]);
+}
+
+/*
+ * Return a VREF'ed alias for lower vnode if already exists, else 0.
+ */
+static struct vnode *
+null_node_find(mp, lowervp)
+ struct mount *mp;
+ struct vnode *lowervp;
+{
+ struct null_node_cache *hd;
+ struct null_node *a;
+ struct vnode *vp;
+
+ /*
+ * Find hash base, and then search the (two-way) linked
+ * list looking for a null_node structure which is referencing
+ * the lower vnode. If found, the increment the null_node
+ * reference count (but NOT the lower vnode's VREF counter).
+ */
+ hd = null_node_hash(lowervp);
+loop:
+ for (a = hd->ac_forw; a != (struct null_node *) hd; a = a->null_forw) {
+ if (a->null_lowervp == lowervp && NULLTOV(a)->v_mount == mp) {
+ vp = NULLTOV(a);
+ /*
+ * We need vget for the VXLOCK
+ * stuff, but we don't want to lock
+ * the lower node.
+ */
+ if (vget(vp, 0)) {
+ printf ("null_node_find: vget failed.\n");
+ goto loop;
+ };
+ return (vp);
+ }
+ }
+
+ return NULL;
+}
+
+
+/*
+ * Make a new null_node node.
+ * Vp is the alias vnode, lofsvp is the lower vnode.
+ * Maintain a reference to (lowervp).
+ */
+static int
+null_node_alloc(mp, lowervp, vpp)
+ struct mount *mp;
+ struct vnode *lowervp;
+ struct vnode **vpp;
+{
+ struct null_node_cache *hd;
+ struct null_node *xp;
+ struct vnode *othervp, *vp;
+ int error;
+
+ if (error = getnewvnode(VT_NULL, mp, null_vnodeop_p, vpp))
+ return (error);
+ vp = *vpp;
+
+ MALLOC(xp, struct null_node *, sizeof(struct null_node), M_TEMP, M_WAITOK);
+ vp->v_type = lowervp->v_type;
+ xp->null_vnode = vp;
+ vp->v_data = xp;
+ xp->null_lowervp = lowervp;
+ /*
+ * Before we insert our new node onto the hash chains,
+ * check to see if someone else has beaten us to it.
+ * (We could have slept in MALLOC.)
+ */
+ if (othervp = null_node_find(lowervp)) {
+ FREE(xp, M_TEMP);
+ vp->v_type = VBAD; /* node is discarded */
+ vp->v_usecount = 0; /* XXX */
+ *vpp = othervp;
+ return 0;
+ };
+ VREF(lowervp); /* Extra VREF will be vrele'd in null_node_create */
+ hd = null_node_hash(lowervp);
+ insque(xp, hd);
+ return 0;
+}
+
+
+/*
+ * Try to find an existing null_node vnode refering
+ * to it, otherwise make a new null_node vnode which
+ * contains a reference to the lower vnode.
+ */
+int
+null_node_create(mp, lowervp, newvpp)
+ struct mount *mp;
+ struct vnode *lowervp;
+ struct vnode **newvpp;
+{
+ struct vnode *aliasvp;
+
+ if (aliasvp = null_node_find(mp, lowervp)) {
+ /*
+ * null_node_find has taken another reference
+ * to the alias vnode.
+ */
+#ifdef NULLFS_DIAGNOSTIC
+ vprint("null_node_create: exists", NULLTOV(ap));
+#endif
+ /* VREF(aliasvp); --- done in null_node_find */
+ } else {
+ int error;
+
+ /*
+ * Get new vnode.
+ */
+#ifdef NULLFS_DIAGNOSTIC
+ printf("null_node_create: create new alias vnode\n");
+#endif
+
+ /*
+ * Make new vnode reference the null_node.
+ */
+ if (error = null_node_alloc(mp, lowervp, &aliasvp))
+ return error;
+
+ /*
+ * aliasvp is already VREF'd by getnewvnode()
+ */
+ }
+
+ vrele(lowervp);
+
+#ifdef DIAGNOSTIC
+ if (lowervp->v_usecount < 1) {
+ /* Should never happen... */
+ vprint ("null_node_create: alias ");
+ vprint ("null_node_create: lower ");
+ printf ("null_node_create: lower has 0 usecount.\n");
+ panic ("null_node_create: lower has 0 usecount.");
+ };
+#endif
+
+#ifdef NULLFS_DIAGNOSTIC
+ vprint("null_node_create: alias", aliasvp);
+ vprint("null_node_create: lower", lowervp);
+#endif
+
+ *newvpp = aliasvp;
+ return (0);
+}
+#ifdef NULLFS_DIAGNOSTIC
+struct vnode *
+null_checkvp(vp, fil, lno)
+ struct vnode *vp;
+ char *fil;
+ int lno;
+{
+ struct null_node *a = VTONULL(vp);
+#ifdef notyet
+ /*
+ * Can't do this check because vop_reclaim runs
+ * with a funny vop vector.
+ */
+ if (vp->v_op != null_vnodeop_p) {
+ printf ("null_checkvp: on non-null-node\n");
+ while (null_checkvp_barrier) /*WAIT*/ ;
+ panic("null_checkvp");
+ };
+#endif
+ if (a->null_lowervp == NULL) {
+ /* Should never happen */
+ int i; u_long *p;
+ printf("vp = %x, ZERO ptr\n", vp);
+ for (p = (u_long *) a, i = 0; i < 8; i++)
+ printf(" %x", p[i]);
+ printf("\n");
+ /* wait for debugger */
+ while (null_checkvp_barrier) /*WAIT*/ ;
+ panic("null_checkvp");
+ }
+ if (a->null_lowervp->v_usecount < 1) {
+ int i; u_long *p;
+ printf("vp = %x, unref'ed lowervp\n", vp);
+ for (p = (u_long *) a, i = 0; i < 8; i++)
+ printf(" %x", p[i]);
+ printf("\n");
+ /* wait for debugger */
+ while (null_checkvp_barrier) /*WAIT*/ ;
+ panic ("null with unref'ed lowervp");
+ };
+#ifdef notyet
+ printf("null %x/%d -> %x/%d [%s, %d]\n",
+ NULLTOV(a), NULLTOV(a)->v_usecount,
+ a->null_lowervp, a->null_lowervp->v_usecount,
+ fil, lno);
+#endif
+ return a->null_lowervp;
+}
+#endif
diff --git a/sys/fs/nullfs/null_vfsops.c b/sys/fs/nullfs/null_vfsops.c
new file mode 100644
index 0000000..b0d2df7
--- /dev/null
+++ b/sys/fs/nullfs/null_vfsops.c
@@ -0,0 +1,366 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)null_vfsops.c 8.2 (Berkeley) 1/21/94
+ *
+ * @(#)lofs_vfsops.c 1.2 (Berkeley) 6/18/92
+ * $Id: lofs_vfsops.c,v 1.9 1992/05/30 10:26:24 jsp Exp jsp $
+ */
+
+/*
+ * Null Layer
+ * (See null_vnops.c for a description of what this does.)
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <miscfs/nullfs/null.h>
+
+/*
+ * Mount null layer
+ */
+int
+nullfs_mount(mp, path, data, ndp, p)
+ struct mount *mp;
+ char *path;
+ caddr_t data;
+ struct nameidata *ndp;
+ struct proc *p;
+{
+ int error = 0;
+ struct null_args args;
+ struct vnode *lowerrootvp, *vp;
+ struct vnode *nullm_rootvp;
+ struct null_mount *xmp;
+ u_int size;
+
+#ifdef NULLFS_DIAGNOSTIC
+ printf("nullfs_mount(mp = %x)\n", mp);
+#endif
+
+ /*
+ * Update is a no-op
+ */
+ if (mp->mnt_flag & MNT_UPDATE) {
+ return (EOPNOTSUPP);
+ /* return VFS_MOUNT(MOUNTTONULLMOUNT(mp)->nullm_vfs, path, data, ndp, p);*/
+ }
+
+ /*
+ * Get argument
+ */
+ if (error = copyin(data, (caddr_t)&args, sizeof(struct null_args)))
+ return (error);
+
+ /*
+ * Find lower node
+ */
+ NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT|LOCKLEAF,
+ UIO_USERSPACE, args.target, p);
+ if (error = namei(ndp))
+ return (error);
+
+ /*
+ * Sanity check on lower vnode
+ */
+ lowerrootvp = ndp->ni_vp;
+
+ vrele(ndp->ni_dvp);
+ ndp->ni_dvp = NULL;
+
+ xmp = (struct null_mount *) malloc(sizeof(struct null_mount),
+ M_UFSMNT, M_WAITOK); /* XXX */
+
+ /*
+ * Save reference to underlying FS
+ */
+ xmp->nullm_vfs = lowerrootvp->v_mount;
+
+ /*
+ * Save reference. Each mount also holds
+ * a reference on the root vnode.
+ */
+ error = null_node_create(mp, lowerrootvp, &vp);
+ /*
+ * Unlock the node (either the lower or the alias)
+ */
+ VOP_UNLOCK(vp);
+ /*
+ * Make sure the node alias worked
+ */
+ if (error) {
+ vrele(lowerrootvp);
+ free(xmp, M_UFSMNT); /* XXX */
+ return (error);
+ }
+
+ /*
+ * Keep a held reference to the root vnode.
+ * It is vrele'd in nullfs_unmount.
+ */
+ nullm_rootvp = vp;
+ nullm_rootvp->v_flag |= VROOT;
+ xmp->nullm_rootvp = nullm_rootvp;
+ if (NULLVPTOLOWERVP(nullm_rootvp)->v_mount->mnt_flag & MNT_LOCAL)
+ mp->mnt_flag |= MNT_LOCAL;
+ mp->mnt_data = (qaddr_t) xmp;
+ getnewfsid(mp, MOUNT_LOFS);
+
+ (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+ bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+ (void) copyinstr(args.target, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+ &size);
+ bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+#ifdef NULLFS_DIAGNOSTIC
+ printf("nullfs_mount: lower %s, alias at %s\n",
+ mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
+#endif
+ return (0);
+}
+
+/*
+ * VFS start. Nothing needed here - the start routine
+ * on the underlying filesystem will have been called
+ * when that filesystem was mounted.
+ */
+int
+nullfs_start(mp, flags, p)
+ struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+ return (0);
+ /* return VFS_START(MOUNTTONULLMOUNT(mp)->nullm_vfs, flags, p); */
+}
+
+/*
+ * Free reference to null layer
+ */
+int
+nullfs_unmount(mp, mntflags, p)
+ struct mount *mp;
+ int mntflags;
+ struct proc *p;
+{
+ struct vnode *nullm_rootvp = MOUNTTONULLMOUNT(mp)->nullm_rootvp;
+ int error;
+ int flags = 0;
+ extern int doforce;
+
+#ifdef NULLFS_DIAGNOSTIC
+ printf("nullfs_unmount(mp = %x)\n", mp);
+#endif
+
+ if (mntflags & MNT_FORCE) {
+ /* lofs can never be rootfs so don't check for it */
+ if (!doforce)
+ return (EINVAL);
+ flags |= FORCECLOSE;
+ }
+
+ /*
+ * Clear out buffer cache. I don't think we
+ * ever get anything cached at this level at the
+ * moment, but who knows...
+ */
+#if 0
+ mntflushbuf(mp, 0);
+ if (mntinvalbuf(mp, 1))
+ return (EBUSY);
+#endif
+ if (nullm_rootvp->v_usecount > 1)
+ return (EBUSY);
+ if (error = vflush(mp, nullm_rootvp, flags))
+ return (error);
+
+#ifdef NULLFS_DIAGNOSTIC
+ vprint("alias root of lower", nullm_rootvp);
+#endif
+ /*
+ * Release reference on underlying root vnode
+ */
+ vrele(nullm_rootvp);
+ /*
+ * And blow it away for future re-use
+ */
+ vgone(nullm_rootvp);
+ /*
+ * Finally, throw away the null_mount structure
+ */
+ free(mp->mnt_data, M_UFSMNT); /* XXX */
+ mp->mnt_data = 0;
+ return 0;
+}
+
+int
+nullfs_root(mp, vpp)
+ struct mount *mp;
+ struct vnode **vpp;
+{
+ struct vnode *vp;
+
+#ifdef NULLFS_DIAGNOSTIC
+ printf("nullfs_root(mp = %x, vp = %x->%x)\n", mp,
+ MOUNTTONULLMOUNT(mp)->nullm_rootvp,
+ NULLVPTOLOWERVP(MOUNTTONULLMOUNT(mp)->nullm_rootvp)
+ );
+#endif
+
+ /*
+ * Return locked reference to root.
+ */
+ vp = MOUNTTONULLMOUNT(mp)->nullm_rootvp;
+ VREF(vp);
+ VOP_LOCK(vp);
+ *vpp = vp;
+ return 0;
+}
+
+int
+nullfs_quotactl(mp, cmd, uid, arg, p)
+ struct mount *mp;
+ int cmd;
+ uid_t uid;
+ caddr_t arg;
+ struct proc *p;
+{
+ return VFS_QUOTACTL(MOUNTTONULLMOUNT(mp)->nullm_vfs, cmd, uid, arg, p);
+}
+
+int
+nullfs_statfs(mp, sbp, p)
+ struct mount *mp;
+ struct statfs *sbp;
+ struct proc *p;
+{
+ int error;
+ struct statfs mstat;
+
+#ifdef NULLFS_DIAGNOSTIC
+ printf("nullfs_statfs(mp = %x, vp = %x->%x)\n", mp,
+ MOUNTTONULLMOUNT(mp)->nullm_rootvp,
+ NULLVPTOLOWERVP(MOUNTTONULLMOUNT(mp)->nullm_rootvp)
+ );
+#endif
+
+ bzero(&mstat, sizeof(mstat));
+
+ error = VFS_STATFS(MOUNTTONULLMOUNT(mp)->nullm_vfs, &mstat, p);
+ if (error)
+ return (error);
+
+ /* now copy across the "interesting" information and fake the rest */
+ sbp->f_type = mstat.f_type;
+ sbp->f_flags = mstat.f_flags;
+ sbp->f_bsize = mstat.f_bsize;
+ sbp->f_iosize = mstat.f_iosize;
+ sbp->f_blocks = mstat.f_blocks;
+ sbp->f_bfree = mstat.f_bfree;
+ sbp->f_bavail = mstat.f_bavail;
+ sbp->f_files = mstat.f_files;
+ sbp->f_ffree = mstat.f_ffree;
+ if (sbp != &mp->mnt_stat) {
+ bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+ bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+ bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+ }
+ return (0);
+}
+
+int
+nullfs_sync(mp, waitfor, cred, p)
+ struct mount *mp;
+ int waitfor;
+ struct ucred *cred;
+ struct proc *p;
+{
+ /*
+ * XXX - Assumes no data cached at null layer.
+ */
+ return (0);
+}
+
+int
+nullfs_vget(mp, ino, vpp)
+ struct mount *mp;
+ ino_t ino;
+ struct vnode **vpp;
+{
+
+ return VFS_VGET(MOUNTTONULLMOUNT(mp)->nullm_vfs, ino, vpp);
+}
+
+int
+nullfs_fhtovp(mp, fidp, nam, vpp, exflagsp, credanonp)
+ struct mount *mp;
+ struct fid *fidp;
+ struct mbuf *nam;
+ struct vnode **vpp;
+ int *exflagsp;
+ struct ucred**credanonp;
+{
+
+ return VFS_FHTOVP(MOUNTTONULLMOUNT(mp)->nullm_vfs, fidp, nam, vpp, exflagsp,credanonp);
+}
+
+int
+nullfs_vptofh(vp, fhp)
+ struct vnode *vp;
+ struct fid *fhp;
+{
+ return VFS_VPTOFH(NULLVPTOLOWERVP(vp), fhp);
+}
+
+int nullfs_init __P((void));
+
+struct vfsops null_vfsops = {
+ nullfs_mount,
+ nullfs_start,
+ nullfs_unmount,
+ nullfs_root,
+ nullfs_quotactl,
+ nullfs_statfs,
+ nullfs_sync,
+ nullfs_vget,
+ nullfs_fhtovp,
+ nullfs_vptofh,
+ nullfs_init,
+};
diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c
new file mode 100644
index 0000000..115ff6f
--- /dev/null
+++ b/sys/fs/nullfs/null_vnops.c
@@ -0,0 +1,462 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * John Heidemann of the UCLA Ficus project.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)null_vnops.c 8.1 (Berkeley) 6/10/93
+ *
+ * Ancestors:
+ * @(#)lofs_vnops.c 1.2 (Berkeley) 6/18/92
+ * $Id: lofs_vnops.c,v 1.11 1992/05/30 10:05:43 jsp Exp jsp $
+ * ...and...
+ * @(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project
+ */
+
+/*
+ * Null Layer
+ *
+ * (See mount_null(8) for more information.)
+ *
+ * The null layer duplicates a portion of the file system
+ * name space under a new name. In this respect, it is
+ * similar to the loopback file system. It differs from
+ * the loopback fs in two respects: it is implemented using
+ * a stackable layers techniques, and it's "null-node"s stack above
+ * all lower-layer vnodes, not just over directory vnodes.
+ *
+ * The null layer has two purposes. First, it serves as a demonstration
+ * of layering by proving a layer which does nothing. (It actually
+ * does everything the loopback file system does, which is slightly
+ * more than nothing.) Second, the null layer can serve as a prototype
+ * layer. Since it provides all necessary layer framework,
+ * new file system layers can be created very easily be starting
+ * with a null layer.
+ *
+ * The remainder of this man page examines the null layer as a basis
+ * for constructing new layers.
+ *
+ *
+ * INSTANTIATING NEW NULL LAYERS
+ *
+ * New null layers are created with mount_null(8).
+ * Mount_null(8) takes two arguments, the pathname
+ * of the lower vfs (target-pn) and the pathname where the null
+ * layer will appear in the namespace (alias-pn). After
+ * the null layer is put into place, the contents
+ * of target-pn subtree will be aliased under alias-pn.
+ *
+ *
+ * OPERATION OF A NULL LAYER
+ *
+ * The null layer is the minimum file system layer,
+ * simply bypassing all possible operations to the lower layer
+ * for processing there. The majority of its activity centers
+ * on the bypass routine, though which nearly all vnode operations
+ * pass.
+ *
+ * The bypass routine accepts arbitrary vnode operations for
+ * handling by the lower layer. It begins by examing vnode
+ * operation arguments and replacing any null-nodes by their
+ * lower-layer equivlants. It then invokes the operation
+ * on the lower layer. Finally, it replaces the null-nodes
+ * in the arguments and, if a vnode is return by the operation,
+ * stacks a null-node on top of the returned vnode.
+ *
+ * Although bypass handles most operations,
+ * vop_getattr, _inactive, _reclaim, and _print are not bypassed.
+ * Vop_getattr must change the fsid being returned.
+ * Vop_inactive and vop_reclaim are not bypassed so that
+ * they can handle freeing null-layer specific data.
+ * Vop_print is not bypassed to avoid excessive debugging
+ * information.
+ *
+ *
+ * INSTANTIATING VNODE STACKS
+ *
+ * Mounting associates the null layer with a lower layer,
+ * effect stacking two VFSes. Vnode stacks are instead
+ * created on demand as files are accessed.
+ *
+ * The initial mount creates a single vnode stack for the
+ * root of the new null layer. All other vnode stacks
+ * are created as a result of vnode operations on
+ * this or other null vnode stacks.
+ *
+ * New vnode stacks come into existance as a result of
+ * an operation which returns a vnode.
+ * The bypass routine stacks a null-node above the new
+ * vnode before returning it to the caller.
+ *
+ * For example, imagine mounting a null layer with
+ * "mount_null /usr/include /dev/layer/null".
+ * Changing directory to /dev/layer/null will assign
+ * the root null-node (which was created when the null layer was mounted).
+ * Now consider opening "sys". A vop_lookup would be
+ * done on the root null-node. This operation would bypass through
+ * to the lower layer which would return a vnode representing
+ * the UFS "sys". Null_bypass then builds a null-node
+ * aliasing the UFS "sys" and returns this to the caller.
+ * Later operations on the null-node "sys" will repeat this
+ * process when constructing other vnode stacks.
+ *
+ *
+ * CREATING OTHER FILE SYSTEM LAYERS
+ *
+ * One of the easiest ways to construct new file system layers is to make
+ * a copy of the null layer, rename all files and variables, and
+ * then begin modifing the copy. Sed can be used to easily rename
+ * all variables.
+ *
+ * The umap layer is an example of a layer descended from the
+ * null layer.
+ *
+ *
+ * INVOKING OPERATIONS ON LOWER LAYERS
+ *
+ * There are two techniques to invoke operations on a lower layer
+ * when the operation cannot be completely bypassed. Each method
+ * is appropriate in different situations. In both cases,
+ * it is the responsibility of the aliasing layer to make
+ * the operation arguments "correct" for the lower layer
+ * by mapping an vnode arguments to the lower layer.
+ *
+ * The first approach is to call the aliasing layer's bypass routine.
+ * This method is most suitable when you wish to invoke the operation
+ * currently being hanldled on the lower layer. It has the advantage
+ * that the bypass routine already must do argument mapping.
+ * An example of this is null_getattrs in the null layer.
+ *
+ * A second approach is to directly invoked vnode operations on
+ * the lower layer with the VOP_OPERATIONNAME interface.
+ * The advantage of this method is that it is easy to invoke
+ * arbitrary operations on the lower layer. The disadvantage
+ * is that vnodes arguments must be manualy mapped.
+ *
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/buf.h>
+#include <miscfs/nullfs/null.h>
+
+
+int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */
+
+/*
+ * This is the 10-Apr-92 bypass routine.
+ * This version has been optimized for speed, throwing away some
+ * safety checks. It should still always work, but it's not as
+ * robust to programmer errors.
+ * Define SAFETY to include some error checking code.
+ *
+ * In general, we map all vnodes going down and unmap them on the way back.
+ * As an exception to this, vnodes can be marked "unmapped" by setting
+ * the Nth bit in operation's vdesc_flags.
+ *
+ * Also, some BSD vnode operations have the side effect of vrele'ing
+ * their arguments. With stacking, the reference counts are held
+ * by the upper node, not the lower one, so we must handle these
+ * side-effects here. This is not of concern in Sun-derived systems
+ * since there are no such side-effects.
+ *
+ * This makes the following assumptions:
+ * - only one returned vpp
+ * - no INOUT vpp's (Sun's vop_open has one of these)
+ * - the vnode operation vector of the first vnode should be used
+ * to determine what implementation of the op should be invoked
+ * - all mapped vnodes are of our vnode-type (NEEDSWORK:
+ * problems on rmdir'ing mount points and renaming?)
+ */
+int
+null_bypass(ap)
+ struct vop_generic_args /* {
+ struct vnodeop_desc *a_desc;
+ <other random data follows, presumably>
+ } */ *ap;
+{
+ extern int (**null_vnodeop_p)(); /* not extern, really "forward" */
+ register struct vnode **this_vp_p;
+ int error;
+ struct vnode *old_vps[VDESC_MAX_VPS];
+ struct vnode **vps_p[VDESC_MAX_VPS];
+ struct vnode ***vppp;
+ struct vnodeop_desc *descp = ap->a_desc;
+ int reles, i;
+
+ if (null_bug_bypass)
+ printf ("null_bypass: %s\n", descp->vdesc_name);
+
+#ifdef SAFETY
+ /*
+ * We require at least one vp.
+ */
+ if (descp->vdesc_vp_offsets == NULL ||
+ descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET)
+ panic ("null_bypass: no vp's in map.\n");
+#endif
+
+ /*
+ * Map the vnodes going in.
+ * Later, we'll invoke the operation based on
+ * the first mapped vnode's operation vector.
+ */
+ reles = descp->vdesc_flags;
+ for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
+ if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
+ break; /* bail out at end of list */
+ vps_p[i] = this_vp_p =
+ VOPARG_OFFSETTO(struct vnode**,descp->vdesc_vp_offsets[i],ap);
+ /*
+ * We're not guaranteed that any but the first vnode
+ * are of our type. Check for and don't map any
+ * that aren't. (We must always map first vp or vclean fails.)
+ */
+ if (i && (*this_vp_p)->v_op != null_vnodeop_p) {
+ old_vps[i] = NULL;
+ } else {
+ old_vps[i] = *this_vp_p;
+ *(vps_p[i]) = NULLVPTOLOWERVP(*this_vp_p);
+ /*
+ * XXX - Several operations have the side effect
+ * of vrele'ing their vp's. We must account for
+ * that. (This should go away in the future.)
+ */
+ if (reles & 1)
+ VREF(*this_vp_p);
+ }
+
+ }
+
+ /*
+ * Call the operation on the lower layer
+ * with the modified argument structure.
+ */
+ error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
+
+ /*
+ * Maintain the illusion of call-by-value
+ * by restoring vnodes in the argument structure
+ * to their original value.
+ */
+ reles = descp->vdesc_flags;
+ for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
+ if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
+ break; /* bail out at end of list */
+ if (old_vps[i]) {
+ *(vps_p[i]) = old_vps[i];
+ if (reles & 1)
+ vrele(*(vps_p[i]));
+ }
+ }
+
+ /*
+ * Map the possible out-going vpp
+ * (Assumes that the lower layer always returns
+ * a VREF'ed vpp unless it gets an error.)
+ */
+ if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET &&
+ !(descp->vdesc_flags & VDESC_NOMAP_VPP) &&
+ !error) {
+ /*
+ * XXX - even though some ops have vpp returned vp's,
+ * several ops actually vrele this before returning.
+ * We must avoid these ops.
+ * (This should go away when these ops are regularized.)
+ */
+ if (descp->vdesc_flags & VDESC_VPP_WILLRELE)
+ goto out;
+ vppp = VOPARG_OFFSETTO(struct vnode***,
+ descp->vdesc_vpp_offset,ap);
+ error = null_node_create(old_vps[0]->v_mount, **vppp, *vppp);
+ }
+
+ out:
+ return (error);
+}
+
+
+/*
+ * We handle getattr only to change the fsid.
+ */
+int
+null_getattr(ap)
+ struct vop_getattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ int error;
+ if (error = null_bypass(ap))
+ return (error);
+ /* Requires that arguments be restored. */
+ ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
+ return (0);
+}
+
+
+int
+null_inactive(ap)
+ struct vop_inactive_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ /*
+ * Do nothing (and _don't_ bypass).
+ * Wait to vrele lowervp until reclaim,
+ * so that until then our null_node is in the
+ * cache and reusable.
+ *
+ * NEEDSWORK: Someday, consider inactive'ing
+ * the lowervp and then trying to reactivate it
+ * with capabilities (v_id)
+ * like they do in the name lookup cache code.
+ * That's too much work for now.
+ */
+ return (0);
+}
+
+int
+null_reclaim(ap)
+ struct vop_reclaim_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct null_node *xp = VTONULL(vp);
+ struct vnode *lowervp = xp->null_lowervp;
+
+ /*
+ * Note: in vop_reclaim, vp->v_op == dead_vnodeop_p,
+ * so we can't call VOPs on ourself.
+ */
+ /* After this assignment, this node will not be re-used. */
+ xp->null_lowervp = NULL;
+ remque(xp);
+ FREE(vp->v_data, M_TEMP);
+ vp->v_data = NULL;
+ vrele (lowervp);
+ return (0);
+}
+
+
+int
+null_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ printf ("\ttag VT_NULLFS, vp=%x, lowervp=%x\n", vp, NULLVPTOLOWERVP(vp));
+ return (0);
+}
+
+
+/*
+ * XXX - vop_strategy must be hand coded because it has no
+ * vnode in its arguments.
+ * This goes away with a merged VM/buffer cache.
+ */
+int
+null_strategy(ap)
+ struct vop_strategy_args /* {
+ struct buf *a_bp;
+ } */ *ap;
+{
+ struct buf *bp = ap->a_bp;
+ int error;
+ struct vnode *savedvp;
+
+ savedvp = bp->b_vp;
+ bp->b_vp = NULLVPTOLOWERVP(bp->b_vp);
+
+ error = VOP_STRATEGY(bp);
+
+ bp->b_vp = savedvp;
+
+ return (error);
+}
+
+
+/*
+ * XXX - like vop_strategy, vop_bwrite must be hand coded because it has no
+ * vnode in its arguments.
+ * This goes away with a merged VM/buffer cache.
+ */
+int
+null_bwrite(ap)
+ struct vop_bwrite_args /* {
+ struct buf *a_bp;
+ } */ *ap;
+{
+ struct buf *bp = ap->a_bp;
+ int error;
+ struct vnode *savedvp;
+
+ savedvp = bp->b_vp;
+ bp->b_vp = NULLVPTOLOWERVP(bp->b_vp);
+
+ error = VOP_BWRITE(bp);
+
+ bp->b_vp = savedvp;
+
+ return (error);
+}
+
+/*
+ * Global vfs data structures
+ */
+int (**null_vnodeop_p)();
+struct vnodeopv_entry_desc null_vnodeop_entries[] = {
+ { &vop_default_desc, null_bypass },
+
+ { &vop_getattr_desc, null_getattr },
+ { &vop_inactive_desc, null_inactive },
+ { &vop_reclaim_desc, null_reclaim },
+ { &vop_print_desc, null_print },
+
+ { &vop_strategy_desc, null_strategy },
+ { &vop_bwrite_desc, null_bwrite },
+
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc null_vnodeop_opv_desc =
+ { &null_vnodeop_p, null_vnodeop_entries };
diff --git a/sys/fs/portalfs/portal.h b/sys/fs/portalfs/portal.h
new file mode 100644
index 0000000..38d7ee0
--- /dev/null
+++ b/sys/fs/portalfs/portal.h
@@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)portal.h 8.4 (Berkeley) 1/21/94
+ *
+ * $Id: portal.h,v 1.3 1992/05/30 10:05:24 jsp Exp jsp $
+ */
+
+struct portal_args {
+ char *pa_config; /* Config file */
+ int pa_socket; /* Socket to server */
+};
+
+struct portal_cred {
+ int pcr_flag; /* File open mode */
+ uid_t pcr_uid; /* From ucred */
+ short pcr_ngroups; /* From ucred */
+ gid_t pcr_groups[NGROUPS]; /* From ucred */
+};
+
+#ifdef KERNEL
+struct portalmount {
+ struct vnode *pm_root; /* Root node */
+ struct file *pm_server; /* Held reference to server socket */
+};
+
+struct portalnode {
+ int pt_size; /* Length of Arg */
+ char *pt_arg; /* Arg to send to server */
+ int pt_fileid; /* cookie */
+};
+
+#define VFSTOPORTAL(mp) ((struct portalmount *)((mp)->mnt_data))
+#define VTOPORTAL(vp) ((struct portalnode *)(vp)->v_data)
+
+#define PORTAL_ROOTFILEID 2
+
+extern int (**portal_vnodeop_p)();
+extern struct vfsops portal_vfsops;
+#endif /* KERNEL */
diff --git a/sys/fs/portalfs/portal_vfsops.c b/sys/fs/portalfs/portal_vfsops.c
new file mode 100644
index 0000000..39e8563
--- /dev/null
+++ b/sys/fs/portalfs/portal_vfsops.c
@@ -0,0 +1,313 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)portal_vfsops.c 8.6 (Berkeley) 1/21/94
+ *
+ * $Id: portal_vfsops.c,v 1.5 1992/05/30 10:25:27 jsp Exp jsp $
+ */
+
+/*
+ * Portal Filesystem
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/proc.h>
+#include <sys/filedesc.h>
+#include <sys/file.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/domain.h>
+#include <sys/un.h>
+#include <miscfs/portal/portal.h>
+
+int
+portal_init()
+{
+
+ return (0);
+}
+
+/*
+ * Mount the per-process file descriptors (/dev/fd)
+ */
+int
+portal_mount(mp, path, data, ndp, p)
+ struct mount *mp;
+ char *path;
+ caddr_t data;
+ struct nameidata *ndp;
+ struct proc *p;
+{
+ struct file *fp;
+ struct portal_args args;
+ struct portalmount *fmp;
+ struct socket *so;
+ struct vnode *rvp;
+ u_int size;
+ int error;
+
+ /*
+ * Update is a no-op
+ */
+ if (mp->mnt_flag & MNT_UPDATE)
+ return (EOPNOTSUPP);
+
+ if (error = copyin(data, (caddr_t) &args, sizeof(struct portal_args)))
+ return (error);
+
+ if (error = getsock(p->p_fd, args.pa_socket, &fp))
+ return (error);
+ so = (struct socket *) fp->f_data;
+ if (so->so_proto->pr_domain->dom_family != AF_UNIX)
+ return (ESOCKTNOSUPPORT);
+
+ error = getnewvnode(VT_PORTAL, mp, portal_vnodeop_p, &rvp); /* XXX */
+ if (error)
+ return (error);
+ MALLOC(rvp->v_data, void *, sizeof(struct portalnode),
+ M_TEMP, M_WAITOK);
+
+ fmp = (struct portalmount *) malloc(sizeof(struct portalmount),
+ M_UFSMNT, M_WAITOK); /* XXX */
+ rvp->v_type = VDIR;
+ rvp->v_flag |= VROOT;
+ VTOPORTAL(rvp)->pt_arg = 0;
+ VTOPORTAL(rvp)->pt_size = 0;
+ VTOPORTAL(rvp)->pt_fileid = PORTAL_ROOTFILEID;
+ fmp->pm_root = rvp;
+ fmp->pm_server = fp; fp->f_count++;
+
+ mp->mnt_flag |= MNT_LOCAL;
+ mp->mnt_data = (qaddr_t) fmp;
+ getnewfsid(mp, MOUNT_PORTAL);
+
+ (void)copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+ bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+ (void)copyinstr(args.pa_config,
+ mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size);
+ bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+
+#ifdef notdef
+ bzero(mp->mnt_stat.f_mntfromname, MNAMELEN);
+ bcopy("portal", mp->mnt_stat.f_mntfromname, sizeof("portal"));
+#endif
+
+ return (0);
+}
+
+int
+portal_start(mp, flags, p)
+ struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+
+ return (0);
+}
+
+int
+portal_unmount(mp, mntflags, p)
+ struct mount *mp;
+ int mntflags;
+ struct proc *p;
+{
+ extern int doforce;
+ struct vnode *rootvp = VFSTOPORTAL(mp)->pm_root;
+ int error, flags = 0;
+
+
+ if (mntflags & MNT_FORCE) {
+ /* portal can never be rootfs so don't check for it */
+ if (!doforce)
+ return (EINVAL);
+ flags |= FORCECLOSE;
+ }
+
+ /*
+ * Clear out buffer cache. I don't think we
+ * ever get anything cached at this level at the
+ * moment, but who knows...
+ */
+#ifdef notyet
+ mntflushbuf(mp, 0);
+ if (mntinvalbuf(mp, 1))
+ return (EBUSY);
+#endif
+ if (rootvp->v_usecount > 1)
+ return (EBUSY);
+ if (error = vflush(mp, rootvp, flags))
+ return (error);
+
+ /*
+ * Release reference on underlying root vnode
+ */
+ vrele(rootvp);
+ /*
+ * And blow it away for future re-use
+ */
+ vgone(rootvp);
+ /*
+ * Shutdown the socket. This will cause the select in the
+ * daemon to wake up, and then the accept will get ECONNABORTED
+ * which it interprets as a request to go and bury itself.
+ */
+ soshutdown((struct socket *) VFSTOPORTAL(mp)->pm_server->f_data, 2);
+ /*
+ * Discard reference to underlying file. Must call closef because
+ * this may be the last reference.
+ */
+ closef(VFSTOPORTAL(mp)->pm_server, (struct proc *) 0);
+ /*
+ * Finally, throw away the portalmount structure
+ */
+ free(mp->mnt_data, M_UFSMNT); /* XXX */
+ mp->mnt_data = 0;
+ return (0);
+}
+
+int
+portal_root(mp, vpp)
+ struct mount *mp;
+ struct vnode **vpp;
+{
+ struct vnode *vp;
+
+
+ /*
+ * Return locked reference to root.
+ */
+ vp = VFSTOPORTAL(mp)->pm_root;
+ VREF(vp);
+ VOP_LOCK(vp);
+ *vpp = vp;
+ return (0);
+}
+
+int
+portal_quotactl(mp, cmd, uid, arg, p)
+ struct mount *mp;
+ int cmd;
+ uid_t uid;
+ caddr_t arg;
+ struct proc *p;
+{
+
+ return (EOPNOTSUPP);
+}
+
+int
+portal_statfs(mp, sbp, p)
+ struct mount *mp;
+ struct statfs *sbp;
+ struct proc *p;
+{
+
+ sbp->f_type = MOUNT_PORTAL;
+ sbp->f_flags = 0;
+ sbp->f_bsize = DEV_BSIZE;
+ sbp->f_iosize = DEV_BSIZE;
+ sbp->f_blocks = 2; /* 1K to keep df happy */
+ sbp->f_bfree = 0;
+ sbp->f_bavail = 0;
+ sbp->f_files = 1; /* Allow for "." */
+ sbp->f_ffree = 0; /* See comments above */
+ if (sbp != &mp->mnt_stat) {
+ bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+ bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+ bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+ }
+ return (0);
+}
+
+int
+portal_sync(mp, waitfor)
+ struct mount *mp;
+ int waitfor;
+{
+
+ return (0);
+}
+
+int
+portal_vget(mp, ino, vpp)
+ struct mount *mp;
+ ino_t ino;
+ struct vnode **vpp;
+{
+
+ return (EOPNOTSUPP);
+}
+
+int
+portal_fhtovp(mp, fhp, vpp)
+ struct mount *mp;
+ struct fid *fhp;
+ struct vnode **vpp;
+{
+
+ return (EOPNOTSUPP);
+}
+
+int
+portal_vptofh(vp, fhp)
+ struct vnode *vp;
+ struct fid *fhp;
+{
+
+ return (EOPNOTSUPP);
+}
+
+struct vfsops portal_vfsops = {
+ portal_mount,
+ portal_start,
+ portal_unmount,
+ portal_root,
+ portal_quotactl,
+ portal_statfs,
+ portal_sync,
+ portal_vget,
+ portal_fhtovp,
+ portal_vptofh,
+ portal_init,
+};
diff --git a/sys/fs/portalfs/portal_vnops.c b/sys/fs/portalfs/portal_vnops.c
new file mode 100644
index 0000000..5e17026
--- /dev/null
+++ b/sys/fs/portalfs/portal_vnops.c
@@ -0,0 +1,707 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)portal_vnops.c 8.8 (Berkeley) 1/21/94
+ *
+ * $Id: portal_vnops.c,v 1.4 1992/05/30 10:05:24 jsp Exp jsp $
+ */
+
+/*
+ * Portal Filesystem
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/types.h>
+#include <sys/time.h>
+#include <sys/proc.h>
+#include <sys/filedesc.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <sys/malloc.h>
+#include <sys/namei.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/un.h>
+#include <sys/unpcb.h>
+#include <miscfs/portal/portal.h>
+
+static int portal_fileid = PORTAL_ROOTFILEID+1;
+
+static void
+portal_closefd(p, fd)
+ struct proc *p;
+ int fd;
+{
+ int error;
+ struct {
+ int fd;
+ } ua;
+ int rc;
+
+ ua.fd = fd;
+ error = close(p, &ua, &rc);
+ /*
+ * We should never get an error, and there isn't anything
+ * we could do if we got one, so just print a message.
+ */
+ if (error)
+ printf("portal_closefd: error = %d\n", error);
+}
+
+/*
+ * vp is the current namei directory
+ * cnp is the name to locate in that directory...
+ */
+int
+portal_lookup(ap)
+ struct vop_lookup_args /* {
+ struct vnode * a_dvp;
+ struct vnode ** a_vpp;
+ struct componentname * a_cnp;
+ } */ *ap;
+{
+ char *pname = ap->a_cnp->cn_nameptr;
+ struct portalnode *pt;
+ int error;
+ struct vnode *fvp = 0;
+ char *path;
+ int size;
+
+ if (ap->a_cnp->cn_namelen == 1 && *pname == '.') {
+ *ap->a_vpp = ap->a_dvp;
+ VREF(ap->a_dvp);
+ /*VOP_LOCK(ap->a_dvp);*/
+ return (0);
+ }
+
+
+ error = getnewvnode(VT_PORTAL, ap->a_dvp->v_mount, portal_vnodeop_p, &fvp);
+ if (error)
+ goto bad;
+ fvp->v_type = VREG;
+ MALLOC(fvp->v_data, void *, sizeof(struct portalnode),
+ M_TEMP, M_WAITOK);
+
+ pt = VTOPORTAL(fvp);
+ /*
+ * Save all of the remaining pathname and
+ * advance the namei next pointer to the end
+ * of the string.
+ */
+ for (size = 0, path = pname; *path; path++)
+ size++;
+ ap->a_cnp->cn_consume = size - ap->a_cnp->cn_namelen;
+
+ pt->pt_arg = malloc(size+1, M_TEMP, M_WAITOK);
+ pt->pt_size = size+1;
+ bcopy(pname, pt->pt_arg, pt->pt_size);
+ pt->pt_fileid = portal_fileid++;
+
+ *ap->a_vpp = fvp;
+ /*VOP_LOCK(fvp);*/
+ return (0);
+
+bad:;
+ if (fvp) {
+ vrele(fvp);
+ }
+ *ap->a_vpp = NULL;
+ return (error);
+}
+
+static int
+portal_connect(so, so2)
+ struct socket *so;
+ struct socket *so2;
+{
+ /* from unp_connect, bypassing the namei stuff... */
+ struct socket *so3;
+ struct unpcb *unp2;
+ struct unpcb *unp3;
+
+ if (so2 == 0)
+ return (ECONNREFUSED);
+
+ if (so->so_type != so2->so_type)
+ return (EPROTOTYPE);
+
+ if ((so2->so_options & SO_ACCEPTCONN) == 0)
+ return (ECONNREFUSED);
+
+ if ((so3 = sonewconn(so2, 0)) == 0)
+ return (ECONNREFUSED);
+
+ unp2 = sotounpcb(so2);
+ unp3 = sotounpcb(so3);
+ if (unp2->unp_addr)
+ unp3->unp_addr = m_copy(unp2->unp_addr, 0, (int)M_COPYALL);
+
+ so2 = so3;
+
+
+ return (unp_connect2(so, so2));
+}
+
+int
+portal_open(ap)
+ struct vop_open_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct socket *so = 0;
+ struct portalnode *pt;
+ struct proc *p = ap->a_p;
+ struct vnode *vp = ap->a_vp;
+ int s;
+ struct uio auio;
+ struct iovec aiov[2];
+ int res;
+ struct mbuf *cm = 0;
+ struct cmsghdr *cmsg;
+ int newfds;
+ int *ip;
+ int fd;
+ int error;
+ int len;
+ struct portalmount *fmp;
+ struct file *fp;
+ struct portal_cred pcred;
+
+ /*
+ * Nothing to do when opening the root node.
+ */
+ if (vp->v_flag & VROOT)
+ return (0);
+
+ /*
+ * Can't be opened unless the caller is set up
+ * to deal with the side effects. Check for this
+ * by testing whether the p_dupfd has been set.
+ */
+ if (p->p_dupfd >= 0)
+ return (ENODEV);
+
+ pt = VTOPORTAL(vp);
+ fmp = VFSTOPORTAL(vp->v_mount);
+
+ /*
+ * Create a new socket.
+ */
+ error = socreate(AF_UNIX, &so, SOCK_STREAM, 0);
+ if (error)
+ goto bad;
+
+ /*
+ * Reserve some buffer space
+ */
+ res = pt->pt_size + sizeof(pcred) + 512; /* XXX */
+ error = soreserve(so, res, res);
+ if (error)
+ goto bad;
+
+ /*
+ * Kick off connection
+ */
+ error = portal_connect(so, (struct socket *)fmp->pm_server->f_data);
+ if (error)
+ goto bad;
+
+ /*
+ * Wait for connection to complete
+ */
+ /*
+ * XXX: Since the mount point is holding a reference on the
+ * underlying server socket, it is not easy to find out whether
+ * the server process is still running. To handle this problem
+ * we loop waiting for the new socket to be connected (something
+ * which will only happen if the server is still running) or for
+ * the reference count on the server socket to drop to 1, which
+ * will happen if the server dies. Sleep for 5 second intervals
+ * and keep polling the reference count. XXX.
+ */
+ s = splnet();
+ while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
+ if (fmp->pm_server->f_count == 1) {
+ error = ECONNREFUSED;
+ splx(s);
+ goto bad;
+ }
+ (void) tsleep((caddr_t) &so->so_timeo, PSOCK, "portalcon", 5 * hz);
+ }
+ splx(s);
+
+ if (so->so_error) {
+ error = so->so_error;
+ goto bad;
+ }
+
+ /*
+ * Set miscellaneous flags
+ */
+ so->so_rcv.sb_timeo = 0;
+ so->so_snd.sb_timeo = 0;
+ so->so_rcv.sb_flags |= SB_NOINTR;
+ so->so_snd.sb_flags |= SB_NOINTR;
+
+
+ pcred.pcr_flag = ap->a_mode;
+ pcred.pcr_uid = ap->a_cred->cr_uid;
+ pcred.pcr_ngroups = ap->a_cred->cr_ngroups;
+ bcopy(ap->a_cred->cr_groups, pcred.pcr_groups, NGROUPS * sizeof(gid_t));
+ aiov[0].iov_base = (caddr_t) &pcred;
+ aiov[0].iov_len = sizeof(pcred);
+ aiov[1].iov_base = pt->pt_arg;
+ aiov[1].iov_len = pt->pt_size;
+ auio.uio_iov = aiov;
+ auio.uio_iovcnt = 2;
+ auio.uio_rw = UIO_WRITE;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_procp = p;
+ auio.uio_offset = 0;
+ auio.uio_resid = aiov[0].iov_len + aiov[1].iov_len;
+
+ error = sosend(so, (struct mbuf *) 0, &auio,
+ (struct mbuf *) 0, (struct mbuf *) 0, 0);
+ if (error)
+ goto bad;
+
+ len = auio.uio_resid = sizeof(int);
+ do {
+ struct mbuf *m = 0;
+ int flags = MSG_WAITALL;
+ error = soreceive(so, (struct mbuf **) 0, &auio,
+ &m, &cm, &flags);
+ if (error)
+ goto bad;
+
+ /*
+ * Grab an error code from the mbuf.
+ */
+ if (m) {
+ m = m_pullup(m, sizeof(int)); /* Needed? */
+ if (m) {
+ error = *(mtod(m, int *));
+ m_freem(m);
+ } else {
+ error = EINVAL;
+ }
+ } else {
+ if (cm == 0) {
+ error = ECONNRESET; /* XXX */
+#ifdef notdef
+ break;
+#endif
+ }
+ }
+ } while (cm == 0 && auio.uio_resid == len && !error);
+
+ if (cm == 0)
+ goto bad;
+
+ if (auio.uio_resid) {
+ error = 0;
+#ifdef notdef
+ error = EMSGSIZE;
+ goto bad;
+#endif
+ }
+
+ /*
+ * XXX: Break apart the control message, and retrieve the
+ * received file descriptor. Note that more than one descriptor
+ * may have been received, or that the rights chain may have more
+ * than a single mbuf in it. What to do?
+ */
+ cmsg = mtod(cm, struct cmsghdr *);
+ newfds = (cmsg->cmsg_len - sizeof(*cmsg)) / sizeof (int);
+ if (newfds == 0) {
+ error = ECONNREFUSED;
+ goto bad;
+ }
+ /*
+ * At this point the rights message consists of a control message
+ * header, followed by a data region containing a vector of
+ * integer file descriptors. The fds were allocated by the action
+ * of receiving the control message.
+ */
+ ip = (int *) (cmsg + 1);
+ fd = *ip++;
+ if (newfds > 1) {
+ /*
+ * Close extra fds.
+ */
+ int i;
+ printf("portal_open: %d extra fds\n", newfds - 1);
+ for (i = 1; i < newfds; i++) {
+ portal_closefd(p, *ip);
+ ip++;
+ }
+ }
+
+ /*
+ * Check that the mode the file is being opened for is a subset
+ * of the mode of the existing descriptor.
+ */
+ fp = p->p_fd->fd_ofiles[fd];
+ if (((ap->a_mode & (FREAD|FWRITE)) | fp->f_flag) != fp->f_flag) {
+ portal_closefd(p, fd);
+ error = EACCES;
+ goto bad;
+ }
+
+ /*
+ * Save the dup fd in the proc structure then return the
+ * special error code (ENXIO) which causes magic things to
+ * happen in vn_open. The whole concept is, well, hmmm.
+ */
+ p->p_dupfd = fd;
+ error = ENXIO;
+
+bad:;
+ /*
+ * And discard the control message.
+ */
+ if (cm) {
+ m_freem(cm);
+ }
+
+ if (so) {
+ soshutdown(so, 2);
+ soclose(so);
+ }
+ return (error);
+}
+
+int
+portal_getattr(ap)
+ struct vop_getattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct vattr *vap = ap->a_vap;
+
+ bzero(vap, sizeof(*vap));
+ vattr_null(vap);
+ vap->va_uid = 0;
+ vap->va_gid = 0;
+ vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+ vap->va_size = DEV_BSIZE;
+ vap->va_blocksize = DEV_BSIZE;
+ microtime(&vap->va_atime);
+ vap->va_mtime = vap->va_atime;
+ vap->va_ctime = vap->va_ctime;
+ vap->va_gen = 0;
+ vap->va_flags = 0;
+ vap->va_rdev = 0;
+ /* vap->va_qbytes = 0; */
+ vap->va_bytes = 0;
+ /* vap->va_qsize = 0; */
+ if (vp->v_flag & VROOT) {
+ vap->va_type = VDIR;
+ vap->va_mode = S_IRUSR|S_IWUSR|S_IXUSR|
+ S_IRGRP|S_IWGRP|S_IXGRP|
+ S_IROTH|S_IWOTH|S_IXOTH;
+ vap->va_nlink = 2;
+ vap->va_fileid = 2;
+ } else {
+ vap->va_type = VREG;
+ vap->va_mode = S_IRUSR|S_IWUSR|
+ S_IRGRP|S_IWGRP|
+ S_IROTH|S_IWOTH;
+ vap->va_nlink = 1;
+ vap->va_fileid = VTOPORTAL(vp)->pt_fileid;
+ }
+ return (0);
+}
+
+int
+portal_setattr(ap)
+ struct vop_setattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ /*
+ * Can't mess with the root vnode
+ */
+ if (ap->a_vp->v_flag & VROOT)
+ return (EACCES);
+
+ return (0);
+}
+
+/*
+ * Fake readdir, just return empty directory.
+ * It is hard to deal with '.' and '..' so don't bother.
+ */
+int
+portal_readdir(ap)
+ struct vop_readdir_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+
+ return (0);
+}
+
+int
+portal_inactive(ap)
+ struct vop_inactive_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ return (0);
+}
+
+int
+portal_reclaim(ap)
+ struct vop_reclaim_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ struct portalnode *pt = VTOPORTAL(ap->a_vp);
+
+ if (pt->pt_arg) {
+ free((caddr_t) pt->pt_arg, M_TEMP);
+ pt->pt_arg = 0;
+ }
+ FREE(ap->a_vp->v_data, M_TEMP);
+ ap->a_vp->v_data = 0;
+
+ return (0);
+}
+
+/*
+ * Return POSIX pathconf information applicable to special devices.
+ */
+portal_pathconf(ap)
+ struct vop_pathconf_args /* {
+ struct vnode *a_vp;
+ int a_name;
+ int *a_retval;
+ } */ *ap;
+{
+
+ switch (ap->a_name) {
+ case _PC_LINK_MAX:
+ *ap->a_retval = LINK_MAX;
+ return (0);
+ case _PC_MAX_CANON:
+ *ap->a_retval = MAX_CANON;
+ return (0);
+ case _PC_MAX_INPUT:
+ *ap->a_retval = MAX_INPUT;
+ return (0);
+ case _PC_PIPE_BUF:
+ *ap->a_retval = PIPE_BUF;
+ return (0);
+ case _PC_CHOWN_RESTRICTED:
+ *ap->a_retval = 1;
+ return (0);
+ case _PC_VDISABLE:
+ *ap->a_retval = _POSIX_VDISABLE;
+ return (0);
+ default:
+ return (EINVAL);
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Print out the contents of a Portal vnode.
+ */
+/* ARGSUSED */
+int
+portal_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ printf("tag VT_PORTAL, portal vnode\n");
+ return (0);
+}
+
+/*void*/
+int
+portal_vfree(ap)
+ struct vop_vfree_args /* {
+ struct vnode *a_pvp;
+ ino_t a_ino;
+ int a_mode;
+ } */ *ap;
+{
+
+ return (0);
+}
+
+
+/*
+ * Portal vnode unsupported operation
+ */
+int
+portal_enotsupp()
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * Portal "should never get here" operation
+ */
+int
+portal_badop()
+{
+
+ panic("portal: bad op");
+ /* NOTREACHED */
+}
+
+/*
+ * Portal vnode null operation
+ */
+int
+portal_nullop()
+{
+
+ return (0);
+}
+
+#define portal_create ((int (*) __P((struct vop_create_args *)))portal_enotsupp)
+#define portal_mknod ((int (*) __P((struct vop_mknod_args *)))portal_enotsupp)
+#define portal_close ((int (*) __P((struct vop_close_args *)))nullop)
+#define portal_access ((int (*) __P((struct vop_access_args *)))nullop)
+#define portal_read ((int (*) __P((struct vop_read_args *)))portal_enotsupp)
+#define portal_write ((int (*) __P((struct vop_write_args *)))portal_enotsupp)
+#define portal_ioctl ((int (*) __P((struct vop_ioctl_args *)))portal_enotsupp)
+#define portal_select ((int (*) __P((struct vop_select_args *)))portal_enotsupp)
+#define portal_mmap ((int (*) __P((struct vop_mmap_args *)))portal_enotsupp)
+#define portal_fsync ((int (*) __P((struct vop_fsync_args *)))nullop)
+#define portal_seek ((int (*) __P((struct vop_seek_args *)))nullop)
+#define portal_remove ((int (*) __P((struct vop_remove_args *)))portal_enotsupp)
+#define portal_link ((int (*) __P((struct vop_link_args *)))portal_enotsupp)
+#define portal_rename ((int (*) __P((struct vop_rename_args *)))portal_enotsupp)
+#define portal_mkdir ((int (*) __P((struct vop_mkdir_args *)))portal_enotsupp)
+#define portal_rmdir ((int (*) __P((struct vop_rmdir_args *)))portal_enotsupp)
+#define portal_symlink \
+ ((int (*) __P((struct vop_symlink_args *)))portal_enotsupp)
+#define portal_readlink \
+ ((int (*) __P((struct vop_readlink_args *)))portal_enotsupp)
+#define portal_abortop ((int (*) __P((struct vop_abortop_args *)))nullop)
+#define portal_lock ((int (*) __P((struct vop_lock_args *)))nullop)
+#define portal_unlock ((int (*) __P((struct vop_unlock_args *)))nullop)
+#define portal_bmap ((int (*) __P((struct vop_bmap_args *)))portal_badop)
+#define portal_strategy \
+ ((int (*) __P((struct vop_strategy_args *)))portal_badop)
+#define portal_islocked ((int (*) __P((struct vop_islocked_args *)))nullop)
+#define portal_advlock \
+ ((int (*) __P((struct vop_advlock_args *)))portal_enotsupp)
+#define portal_blkatoff \
+ ((int (*) __P((struct vop_blkatoff_args *)))portal_enotsupp)
+#define portal_valloc ((int(*) __P(( \
+ struct vnode *pvp, \
+ int mode, \
+ struct ucred *cred, \
+ struct vnode **vpp))) portal_enotsupp)
+#define portal_truncate \
+ ((int (*) __P((struct vop_truncate_args *)))portal_enotsupp)
+#define portal_update ((int (*) __P((struct vop_update_args *)))portal_enotsupp)
+#define portal_bwrite ((int (*) __P((struct vop_bwrite_args *)))portal_enotsupp)
+
+int (**portal_vnodeop_p)();
+struct vnodeopv_entry_desc portal_vnodeop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, portal_lookup }, /* lookup */
+ { &vop_create_desc, portal_create }, /* create */
+ { &vop_mknod_desc, portal_mknod }, /* mknod */
+ { &vop_open_desc, portal_open }, /* open */
+ { &vop_close_desc, portal_close }, /* close */
+ { &vop_access_desc, portal_access }, /* access */
+ { &vop_getattr_desc, portal_getattr }, /* getattr */
+ { &vop_setattr_desc, portal_setattr }, /* setattr */
+ { &vop_read_desc, portal_read }, /* read */
+ { &vop_write_desc, portal_write }, /* write */
+ { &vop_ioctl_desc, portal_ioctl }, /* ioctl */
+ { &vop_select_desc, portal_select }, /* select */
+ { &vop_mmap_desc, portal_mmap }, /* mmap */
+ { &vop_fsync_desc, portal_fsync }, /* fsync */
+ { &vop_seek_desc, portal_seek }, /* seek */
+ { &vop_remove_desc, portal_remove }, /* remove */
+ { &vop_link_desc, portal_link }, /* link */
+ { &vop_rename_desc, portal_rename }, /* rename */
+ { &vop_mkdir_desc, portal_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, portal_rmdir }, /* rmdir */
+ { &vop_symlink_desc, portal_symlink }, /* symlink */
+ { &vop_readdir_desc, portal_readdir }, /* readdir */
+ { &vop_readlink_desc, portal_readlink }, /* readlink */
+ { &vop_abortop_desc, portal_abortop }, /* abortop */
+ { &vop_inactive_desc, portal_inactive }, /* inactive */
+ { &vop_reclaim_desc, portal_reclaim }, /* reclaim */
+ { &vop_lock_desc, portal_lock }, /* lock */
+ { &vop_unlock_desc, portal_unlock }, /* unlock */
+ { &vop_bmap_desc, portal_bmap }, /* bmap */
+ { &vop_strategy_desc, portal_strategy }, /* strategy */
+ { &vop_print_desc, portal_print }, /* print */
+ { &vop_islocked_desc, portal_islocked }, /* islocked */
+ { &vop_pathconf_desc, portal_pathconf }, /* pathconf */
+ { &vop_advlock_desc, portal_advlock }, /* advlock */
+ { &vop_blkatoff_desc, portal_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, portal_valloc }, /* valloc */
+ { &vop_vfree_desc, portal_vfree }, /* vfree */
+ { &vop_truncate_desc, portal_truncate }, /* truncate */
+ { &vop_update_desc, portal_update }, /* update */
+ { &vop_bwrite_desc, portal_bwrite }, /* bwrite */
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc portal_vnodeop_opv_desc =
+ { &portal_vnodeop_p, portal_vnodeop_entries };
diff --git a/sys/fs/procfs/README b/sys/fs/procfs/README
new file mode 100644
index 0000000..38811b3
--- /dev/null
+++ b/sys/fs/procfs/README
@@ -0,0 +1,113 @@
+saute procfs lyonnais
+
+procfs supports two levels of directory. the filesystem root
+directory contains a representation of the system process table.
+this consists of an entry for each active and zombie process, and
+an additional entry "curproc" which always represents the process
+making the lookup request.
+
+each of the sub-directories contains several files. these files
+are used to control and interrogate processes. the files implemented
+are:
+
+ file - xxx. the exec'ed file.
+
+ status - r/o. returns process status.
+
+ ctl - w/o. sends a control message to the process.
+ for example:
+ echo hup > /proc/curproc/note
+ will send a SIGHUP to the shell.
+ whereas
+ echo attach > /proc/1293/ctl
+ would set up process 1293 for debugging.
+ see below for more details.
+
+ mem - r/w. virtual memory image of the process.
+ parts of the address space are readable
+ only if they exist in the target process.
+ a more reasonable alternative might be
+ to return zero pages instead of an error.
+ comments?
+
+ note - w/o. writing a string here sends the
+ equivalent note to the process.
+ [ not implemented. ]
+
+ notepg - w/o. the same as note, but sends to all
+ members of the process group.
+ [ not implemented. ]
+
+ regs - r/w. process register set. this can be read
+ or written any time even if the process
+ is not stopped. since the bsd kernel
+ is single-processor, this implementation
+ will get the "right" register values.
+ a multi-proc kernel would need to do some
+ synchronisation.
+
+this then looks like:
+
+% ls -li /proc
+total 0
+ 9 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 0
+ 17 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 1
+ 89 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 10
+ 25 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 2
+2065 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 257
+2481 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 309
+ 265 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 32
+3129 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 390
+3209 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 400
+3217 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 401
+3273 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 408
+ 393 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 48
+ 409 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 50
+ 465 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 57
+ 481 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 59
+ 537 dr-xr-xr-x 2 root kmem 0 Sep 21 15:06 66
+ 545 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 67
+ 657 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 81
+ 665 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 82
+ 673 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 83
+ 681 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 84
+3273 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 curproc
+% ls -li /proc/curproc
+total 408
+3341 --w------- 1 jsp staff 0 Sep 21 15:06 ctl
+1554 -r-xr-xr-x 1 bin bin 90112 Mar 29 04:52 file
+3339 -rw------- 1 jsp staff 118784 Sep 21 15:06 mem
+3343 --w------- 1 jsp staff 0 Sep 21 15:06 note
+3344 --w------- 1 jsp staff 0 Sep 21 15:06 notepg
+3340 -rw------- 1 jsp staff 0 Sep 21 15:06 regs
+3342 -r--r--r-- 1 jsp staff 0 Sep 21 15:06 status
+% df /proc/curproc /proc/curproc/file
+Filesystem 512-blocks Used Avail Capacity Mounted on
+proc 2 2 0 100% /proc
+/dev/wd0a 16186 13548 1018 93% /
+% cat /proc/curproc/status
+cat 446 439 400 81 12,0 ctty 748620684 270000 0 0 0 20000 nochan 11 20 20 20 0 21 117
+
+
+
+the basic sequence of commands written to "ctl" would be
+
+ attach - this stops the target process and
+ arranges for the sending process
+ to become the debug control process
+ wait - wait for the target process to come to
+ a steady state ready for debugging.
+ step - single step, with no signal delivery.
+ run - continue running, with no signal delivery,
+ until next trap or breakpoint.
+ <signame> - deliver signal <signame> and continue running.
+ detach - continue execution of the target process
+ and remove it from control by the debug process
+
+in a normal debugging environment, where the target is fork/exec'd by
+the debugger, the debugger should fork and the child should stop itself
+(with a self-inflicted SIGSTOP). the parent should do a "wait" then an
+"attach". as before, the child will hit a breakpoint on the first
+instruction in any newly exec'd image.
+
+$Id: README,v 3.1 1993/12/15 09:40:17 jsp Exp $
diff --git a/sys/fs/procfs/procfs.h b/sys/fs/procfs/procfs.h
new file mode 100644
index 0000000..f7b8fa3
--- /dev/null
+++ b/sys/fs/procfs/procfs.h
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs.h 8.6 (Berkeley) 2/3/94
+ *
+ * From:
+ * $Id: procfs.h,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+/*
+ * The different types of node in a procfs filesystem
+ */
+typedef enum {
+ Proot, /* the filesystem root */
+ Pproc, /* a process-specific sub-directory */
+ Pfile, /* the executable file */
+ Pmem, /* the process's memory image */
+ Pregs, /* the process's register set */
+ Pfpregs, /* the process's FP register set */
+ Pctl, /* process control */
+ Pstatus, /* process status */
+ Pnote, /* process notifier */
+ Pnotepg /* process group notifier */
+} pfstype;
+
+/*
+ * control data for the proc file system.
+ */
+struct pfsnode {
+ struct pfsnode *pfs_next; /* next on list */
+ struct vnode *pfs_vnode; /* vnode associated with this pfsnode */
+ pfstype pfs_type; /* type of procfs node */
+ pid_t pfs_pid; /* associated process */
+ u_short pfs_mode; /* mode bits for stat() */
+ u_long pfs_flags; /* open flags */
+ u_long pfs_fileno; /* unique file id */
+};
+
+#define PROCFS_NOTELEN 64 /* max length of a note (/proc/$pid/note) */
+#define PROCFS_CTLLEN 8 /* max length of a ctl msg (/proc/$pid/ctl */
+
+/*
+ * Kernel stuff follows
+ */
+#ifdef KERNEL
+#define CNEQ(cnp, s, len) \
+ ((cnp)->cn_namelen == (len) && \
+ (bcmp((s), (cnp)->cn_nameptr, (len)) == 0))
+
+/*
+ * Format of a directory entry in /proc, ...
+ * This must map onto struct dirent (see <dirent.h>)
+ */
+#define PROCFS_NAMELEN 8
+struct pfsdent {
+ u_long d_fileno;
+ u_short d_reclen;
+ u_char d_type;
+ u_char d_namlen;
+ char d_name[PROCFS_NAMELEN];
+};
+#define UIO_MX sizeof(struct pfsdent)
+#define PROCFS_FILENO(pid, type) \
+ (((type) == Proot) ? \
+ 2 : \
+ ((((pid)+1) << 3) + ((int) (type))))
+
+/*
+ * Convert between pfsnode vnode
+ */
+#define VTOPFS(vp) ((struct pfsnode *)(vp)->v_data)
+#define PFSTOV(pfs) ((pfs)->pfs_vnode)
+
+typedef struct vfs_namemap vfs_namemap_t;
+struct vfs_namemap {
+ const char *nm_name;
+ int nm_val;
+};
+
+extern int vfs_getuserstr __P((struct uio *, char *, int *));
+extern vfs_namemap_t *vfs_findname __P((vfs_namemap_t *, char *, int));
+
+/* <machine/reg.h> */
+struct reg;
+struct fpreg;
+
+#define PFIND(pid) ((pid) ? pfind(pid) : &proc0)
+extern int procfs_freevp __P((struct vnode *));
+extern int procfs_allocvp __P((struct mount *, struct vnode **, long, pfstype));
+extern struct vnode *procfs_findtextvp __P((struct proc *));
+extern int procfs_sstep __P((struct proc *));
+extern void procfs_fix_sstep __P((struct proc *));
+extern int procfs_read_regs __P((struct proc *, struct reg *));
+extern int procfs_write_regs __P((struct proc *, struct reg *));
+extern int procfs_read_fpregs __P((struct proc *, struct fpreg *));
+extern int procfs_write_fpregs __P((struct proc *, struct fpreg *));
+extern int procfs_donote __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+extern int procfs_doregs __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+extern int procfs_dofpregs __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+extern int procfs_domem __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+extern int procfs_doctl __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+extern int procfs_dostatus __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio));
+
+#define PROCFS_LOCKED 0x01
+#define PROCFS_WANT 0x02
+
+extern int (**procfs_vnodeop_p)();
+extern struct vfsops procfs_vfsops;
+
+/*
+ * Prototypes for procfs vnode ops
+ */
+int procfs_badop(); /* varargs */
+int procfs_rw __P((struct vop_read_args *));
+int procfs_lookup __P((struct vop_lookup_args *));
+#define procfs_create ((int (*) __P((struct vop_create_args *))) procfs_badop)
+#define procfs_mknod ((int (*) __P((struct vop_mknod_args *))) procfs_badop)
+int procfs_open __P((struct vop_open_args *));
+int procfs_close __P((struct vop_close_args *));
+int procfs_access __P((struct vop_access_args *));
+int procfs_getattr __P((struct vop_getattr_args *));
+int procfs_setattr __P((struct vop_setattr_args *));
+#define procfs_read procfs_rw
+#define procfs_write procfs_rw
+int procfs_ioctl __P((struct vop_ioctl_args *));
+#define procfs_select ((int (*) __P((struct vop_select_args *))) procfs_badop)
+#define procfs_mmap ((int (*) __P((struct vop_mmap_args *))) procfs_badop)
+#define procfs_fsync ((int (*) __P((struct vop_fsync_args *))) procfs_badop)
+#define procfs_seek ((int (*) __P((struct vop_seek_args *))) procfs_badop)
+#define procfs_remove ((int (*) __P((struct vop_remove_args *))) procfs_badop)
+#define procfs_link ((int (*) __P((struct vop_link_args *))) procfs_badop)
+#define procfs_rename ((int (*) __P((struct vop_rename_args *))) procfs_badop)
+#define procfs_mkdir ((int (*) __P((struct vop_mkdir_args *))) procfs_badop)
+#define procfs_rmdir ((int (*) __P((struct vop_rmdir_args *))) procfs_badop)
+#define procfs_symlink ((int (*) __P((struct vop_symlink_args *))) procfs_badop)
+int procfs_readdir __P((struct vop_readdir_args *));
+#define procfs_readlink ((int (*) __P((struct vop_readlink_args *))) procfs_badop)
+int procfs_abortop __P((struct vop_abortop_args *));
+int procfs_inactive __P((struct vop_inactive_args *));
+int procfs_reclaim __P((struct vop_reclaim_args *));
+#define procfs_lock ((int (*) __P((struct vop_lock_args *))) nullop)
+#define procfs_unlock ((int (*) __P((struct vop_unlock_args *))) nullop)
+int procfs_bmap __P((struct vop_bmap_args *));
+#define procfs_strategy ((int (*) __P((struct vop_strategy_args *))) procfs_badop)
+int procfs_print __P((struct vop_print_args *));
+#define procfs_islocked ((int (*) __P((struct vop_islocked_args *))) nullop)
+#define procfs_advlock ((int (*) __P((struct vop_advlock_args *))) procfs_badop)
+#define procfs_blkatoff ((int (*) __P((struct vop_blkatoff_args *))) procfs_badop)
+#define procfs_valloc ((int (*) __P((struct vop_valloc_args *))) procfs_badop)
+#define procfs_vfree ((int (*) __P((struct vop_vfree_args *))) nullop)
+#define procfs_truncate ((int (*) __P((struct vop_truncate_args *))) procfs_badop)
+#define procfs_update ((int (*) __P((struct vop_update_args *))) nullop)
+#endif /* KERNEL */
diff --git a/sys/fs/procfs/procfs_ctl.c b/sys/fs/procfs/procfs_ctl.c
new file mode 100644
index 0000000..a42a03c
--- /dev/null
+++ b/sys/fs/procfs/procfs_ctl.c
@@ -0,0 +1,302 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs_ctl.c 8.3 (Berkeley) 1/21/94
+ *
+ * From:
+ * $Id: procfs_ctl.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/ioctl.h>
+#include <sys/tty.h>
+#include <sys/resource.h>
+#include <sys/resourcevar.h>
+#include <miscfs/procfs/procfs.h>
+
+/*
+ * True iff process (p) is in trace wait state
+ * relative to process (curp)
+ */
+#define TRACE_WAIT_P(curp, p) \
+ ((p)->p_stat == SSTOP && \
+ (p)->p_pptr == (curp) && \
+ ((p)->p_flag & P_TRACED))
+
+#ifdef notdef
+#define FIX_SSTEP(p) { \
+ procfs_fix_sstep(p); \
+ } \
+}
+#else
+#define FIX_SSTEP(p)
+#endif
+
+#define PROCFS_CTL_ATTACH 1
+#define PROCFS_CTL_DETACH 2
+#define PROCFS_CTL_STEP 3
+#define PROCFS_CTL_RUN 4
+#define PROCFS_CTL_WAIT 5
+
+static vfs_namemap_t ctlnames[] = {
+ /* special /proc commands */
+ { "attach", PROCFS_CTL_ATTACH },
+ { "detach", PROCFS_CTL_DETACH },
+ { "step", PROCFS_CTL_STEP },
+ { "run", PROCFS_CTL_RUN },
+ { "wait", PROCFS_CTL_WAIT },
+ { 0 },
+};
+
+static vfs_namemap_t signames[] = {
+ /* regular signal names */
+ { "hup", SIGHUP }, { "int", SIGINT },
+ { "quit", SIGQUIT }, { "ill", SIGILL },
+ { "trap", SIGTRAP }, { "abrt", SIGABRT },
+ { "iot", SIGIOT }, { "emt", SIGEMT },
+ { "fpe", SIGFPE }, { "kill", SIGKILL },
+ { "bus", SIGBUS }, { "segv", SIGSEGV },
+ { "sys", SIGSYS }, { "pipe", SIGPIPE },
+ { "alrm", SIGALRM }, { "term", SIGTERM },
+ { "urg", SIGURG }, { "stop", SIGSTOP },
+ { "tstp", SIGTSTP }, { "cont", SIGCONT },
+ { "chld", SIGCHLD }, { "ttin", SIGTTIN },
+ { "ttou", SIGTTOU }, { "io", SIGIO },
+ { "xcpu", SIGXCPU }, { "xfsz", SIGXFSZ },
+ { "vtalrm", SIGVTALRM }, { "prof", SIGPROF },
+ { "winch", SIGWINCH }, { "info", SIGINFO },
+ { "usr1", SIGUSR1 }, { "usr2", SIGUSR2 },
+ { 0 },
+};
+
+static int
+procfs_control(curp, p, op)
+ struct proc *curp;
+ struct proc *p;
+ int op;
+{
+ int error;
+
+ /*
+ * Attach - attaches the target process for debugging
+ * by the calling process.
+ */
+ if (op == PROCFS_CTL_ATTACH) {
+ /* check whether already being traced */
+ if (p->p_flag & P_TRACED)
+ return (EBUSY);
+
+ /* can't trace yourself! */
+ if (p->p_pid == curp->p_pid)
+ return (EINVAL);
+
+ /*
+ * Go ahead and set the trace flag.
+ * Save the old parent (it's reset in
+ * _DETACH, and also in kern_exit.c:wait4()
+ * Reparent the process so that the tracing
+ * proc gets to see all the action.
+ * Stop the target.
+ */
+ p->p_flag |= P_TRACED;
+ p->p_xstat = 0; /* XXX ? */
+ if (p->p_pptr != curp) {
+ p->p_oppid = p->p_pptr->p_pid;
+ proc_reparent(p, curp);
+ }
+ psignal(p, SIGSTOP);
+ return (0);
+ }
+
+ /*
+ * Target process must be stopped, owned by (curp) and
+ * be set up for tracing (P_TRACED flag set).
+ * Allow DETACH to take place at any time for sanity.
+ * Allow WAIT any time, of course.
+ */
+ switch (op) {
+ case PROCFS_CTL_DETACH:
+ case PROCFS_CTL_WAIT:
+ break;
+
+ default:
+ if (!TRACE_WAIT_P(curp, p))
+ return (EBUSY);
+ }
+
+ /*
+ * do single-step fixup if needed
+ */
+ FIX_SSTEP(p);
+
+ /*
+ * Don't deliver any signal by default.
+ * To continue with a signal, just send
+ * the signal name to the ctl file
+ */
+ p->p_xstat = 0;
+
+ switch (op) {
+ /*
+ * Detach. Cleans up the target process, reparent it if possible
+ * and set it running once more.
+ */
+ case PROCFS_CTL_DETACH:
+ /* if not being traced, then this is a painless no-op */
+ if ((p->p_flag & P_TRACED) == 0)
+ return (0);
+
+ /* not being traced any more */
+ p->p_flag &= ~P_TRACED;
+
+ /* give process back to original parent */
+ if (p->p_oppid != p->p_pptr->p_pid) {
+ struct proc *pp;
+
+ pp = pfind(p->p_oppid);
+ if (pp)
+ proc_reparent(p, pp);
+ }
+
+ p->p_oppid = 0;
+ p->p_flag &= ~P_WAITED; /* XXX ? */
+ wakeup((caddr_t) curp); /* XXX for CTL_WAIT below ? */
+
+ break;
+
+ /*
+ * Step. Let the target process execute a single instruction.
+ */
+ case PROCFS_CTL_STEP:
+ procfs_sstep(p);
+ break;
+
+ /*
+ * Run. Let the target process continue running until a breakpoint
+ * or some other trap.
+ */
+ case PROCFS_CTL_RUN:
+ break;
+
+ /*
+ * Wait for the target process to stop.
+ * If the target is not being traced then just wait
+ * to enter
+ */
+ case PROCFS_CTL_WAIT:
+ error = 0;
+ if (p->p_flag & P_TRACED) {
+ while (error == 0 &&
+ (p->p_stat != SSTOP) &&
+ (p->p_flag & P_TRACED) &&
+ (p->p_pptr == curp)) {
+ error = tsleep((caddr_t) p,
+ PWAIT|PCATCH, "procfsx", 0);
+ }
+ if (error == 0 && !TRACE_WAIT_P(curp, p))
+ error = EBUSY;
+ } else {
+ while (error == 0 && p->p_stat != SSTOP) {
+ error = tsleep((caddr_t) p,
+ PWAIT|PCATCH, "procfs", 0);
+ }
+ }
+ return (error);
+
+ default:
+ panic("procfs_control");
+ }
+
+ if (p->p_stat == SSTOP)
+ setrunnable(p);
+ return (0);
+}
+
+int
+procfs_doctl(curp, p, pfs, uio)
+ struct proc *curp;
+ struct pfsnode *pfs;
+ struct uio *uio;
+ struct proc *p;
+{
+ int xlen;
+ int error;
+ char msg[PROCFS_CTLLEN+1];
+ vfs_namemap_t *nm;
+
+ if (uio->uio_rw != UIO_WRITE)
+ return (EOPNOTSUPP);
+
+ xlen = PROCFS_CTLLEN;
+ error = vfs_getuserstr(uio, msg, &xlen);
+ if (error)
+ return (error);
+
+ /*
+ * Map signal names into signal generation
+ * or debug control. Unknown commands and/or signals
+ * return EOPNOTSUPP.
+ *
+ * Sending a signal while the process is being debugged
+ * also has the side effect of letting the target continue
+ * to run. There is no way to single-step a signal delivery.
+ */
+ error = EOPNOTSUPP;
+
+ nm = vfs_findname(ctlnames, msg, xlen);
+ if (nm) {
+ error = procfs_control(curp, p, nm->nm_val);
+ } else {
+ nm = vfs_findname(signames, msg, xlen);
+ if (nm) {
+ if (TRACE_WAIT_P(curp, p)) {
+ p->p_xstat = nm->nm_val;
+ FIX_SSTEP(p);
+ setrunnable(p);
+ } else {
+ psignal(p, nm->nm_val);
+ }
+ error = 0;
+ }
+ }
+
+ return (error);
+}
diff --git a/sys/fs/procfs/procfs_fpregs.c b/sys/fs/procfs/procfs_fpregs.c
new file mode 100644
index 0000000..6d850a6
--- /dev/null
+++ b/sys/fs/procfs/procfs_fpregs.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs_fpregs.c 8.1 (Berkeley) 1/27/94
+ *
+ * From:
+ * $Id: procfs_regs.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <machine/reg.h>
+#include <miscfs/procfs/procfs.h>
+
+int
+procfs_dofpregs(curp, p, pfs, uio)
+ struct proc *curp;
+ struct proc *p;
+ struct pfsnode *pfs;
+ struct uio *uio;
+{
+ int error;
+ struct fpreg r;
+ char *kv;
+ int kl;
+
+ kl = sizeof(r);
+ kv = (char *) &r;
+
+ kv += uio->uio_offset;
+ kl -= uio->uio_offset;
+ if (kl > uio->uio_resid)
+ kl = uio->uio_resid;
+
+ if (kl < 0)
+ error = EINVAL;
+ else
+ error = procfs_read_fpregs(p, &r);
+ if (error == 0)
+ error = uiomove(kv, kl, uio);
+ if (error == 0 && uio->uio_rw == UIO_WRITE) {
+ if (p->p_stat != SSTOP)
+ error = EBUSY;
+ else
+ error = procfs_write_fpregs(p, &r);
+ }
+
+ uio->uio_offset = 0;
+ return (error);
+}
diff --git a/sys/fs/procfs/procfs_mem.c b/sys/fs/procfs/procfs_mem.c
new file mode 100644
index 0000000..039983d
--- /dev/null
+++ b/sys/fs/procfs/procfs_mem.c
@@ -0,0 +1,302 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993 Sean Eric Fagan
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry and Sean Eric Fagan.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs_mem.c 8.4 (Berkeley) 1/21/94
+ *
+ * From:
+ * $Id: procfs_mem.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+/*
+ * This is a lightly hacked and merged version
+ * of sef's pread/pwrite functions
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <miscfs/procfs/procfs.h>
+#include <vm/vm.h>
+#include <vm/vm_kern.h>
+#include <vm/vm_page.h>
+
+static int
+procfs_rwmem(p, uio)
+ struct proc *p;
+ struct uio *uio;
+{
+ int error;
+ int writing;
+
+ writing = uio->uio_rw == UIO_WRITE;
+
+ /*
+ * Only map in one page at a time. We don't have to, but it
+ * makes things easier. This way is trivial - right?
+ */
+ do {
+ vm_map_t map, tmap;
+ vm_object_t object;
+ vm_offset_t kva;
+ vm_offset_t uva;
+ int page_offset; /* offset into page */
+ vm_offset_t pageno; /* page number */
+ vm_map_entry_t out_entry;
+ vm_prot_t out_prot;
+ vm_page_t m;
+ boolean_t wired, single_use;
+ vm_offset_t off;
+ u_int len;
+ int fix_prot;
+
+ uva = (vm_offset_t) uio->uio_offset;
+ if (uva > VM_MAXUSER_ADDRESS) {
+ error = 0;
+ break;
+ }
+
+ /*
+ * Get the page number of this segment.
+ */
+ pageno = trunc_page(uva);
+ page_offset = uva - pageno;
+
+ /*
+ * How many bytes to copy
+ */
+ len = min(PAGE_SIZE - page_offset, uio->uio_resid);
+
+ /*
+ * The map we want...
+ */
+ map = &p->p_vmspace->vm_map;
+
+ /*
+ * Check the permissions for the area we're interested
+ * in.
+ */
+ fix_prot = 0;
+ if (writing)
+ fix_prot = !vm_map_check_protection(map, pageno,
+ pageno + PAGE_SIZE, VM_PROT_WRITE);
+
+ if (fix_prot) {
+ /*
+ * If the page is not writable, we make it so.
+ * XXX It is possible that a page may *not* be
+ * read/executable, if a process changes that!
+ * We will assume, for now, that a page is either
+ * VM_PROT_ALL, or VM_PROT_READ|VM_PROT_EXECUTE.
+ */
+ error = vm_map_protect(map, pageno,
+ pageno + PAGE_SIZE, VM_PROT_ALL, 0);
+ if (error)
+ break;
+ }
+
+ /*
+ * Now we need to get the page. out_entry, out_prot, wired,
+ * and single_use aren't used. One would think the vm code
+ * would be a *bit* nicer... We use tmap because
+ * vm_map_lookup() can change the map argument.
+ */
+ tmap = map;
+ error = vm_map_lookup(&tmap, pageno,
+ writing ? VM_PROT_WRITE : VM_PROT_READ,
+ &out_entry, &object, &off, &out_prot,
+ &wired, &single_use);
+ /*
+ * We're done with tmap now.
+ */
+ if (!error)
+ vm_map_lookup_done(tmap, out_entry);
+
+ /*
+ * Fault the page in...
+ */
+ if (!error && writing && object->shadow) {
+ m = vm_page_lookup(object, off);
+ if (m == 0 || (m->flags & PG_COPYONWRITE))
+ error = vm_fault(map, pageno,
+ VM_PROT_WRITE, FALSE);
+ }
+
+ /* Find space in kernel_map for the page we're interested in */
+ if (!error)
+ error = vm_map_find(kernel_map, object, off, &kva,
+ PAGE_SIZE, 1);
+
+ if (!error) {
+ /*
+ * Neither vm_map_lookup() nor vm_map_find() appear
+ * to add a reference count to the object, so we do
+ * that here and now.
+ */
+ vm_object_reference(object);
+
+ /*
+ * Mark the page we just found as pageable.
+ */
+ error = vm_map_pageable(kernel_map, kva,
+ kva + PAGE_SIZE, 0);
+
+ /*
+ * Now do the i/o move.
+ */
+ if (!error)
+ error = uiomove(kva + page_offset, len, uio);
+
+ vm_map_remove(kernel_map, kva, kva + PAGE_SIZE);
+ }
+ if (fix_prot)
+ vm_map_protect(map, pageno, pageno + PAGE_SIZE,
+ VM_PROT_READ|VM_PROT_EXECUTE, 0);
+ } while (error == 0 && uio->uio_resid > 0);
+
+ return (error);
+}
+
+/*
+ * Copy data in and out of the target process.
+ * We do this by mapping the process's page into
+ * the kernel and then doing a uiomove direct
+ * from the kernel address space.
+ */
+int
+procfs_domem(curp, p, pfs, uio)
+ struct proc *curp;
+ struct proc *p;
+ struct pfsnode *pfs;
+ struct uio *uio;
+{
+ int error;
+
+ if (uio->uio_resid == 0)
+ return (0);
+
+ error = procfs_rwmem(p, uio);
+
+ return (error);
+}
+
+/*
+ * Given process (p), find the vnode from which
+ * it's text segment is being executed.
+ *
+ * It would be nice to grab this information from
+ * the VM system, however, there is no sure-fire
+ * way of doing that. Instead, fork(), exec() and
+ * wait() all maintain the p_textvp field in the
+ * process proc structure which contains a held
+ * reference to the exec'ed vnode.
+ */
+struct vnode *
+procfs_findtextvp(p)
+ struct proc *p;
+{
+ return (p->p_textvp);
+}
+
+
+#ifdef probably_never
+/*
+ * Given process (p), find the vnode from which
+ * it's text segment is being mapped.
+ *
+ * (This is here, rather than in procfs_subr in order
+ * to keep all the VM related code in one place.)
+ */
+struct vnode *
+procfs_findtextvp(p)
+ struct proc *p;
+{
+ int error;
+ vm_object_t object;
+ vm_offset_t pageno; /* page number */
+
+ /* find a vnode pager for the user address space */
+
+ for (pageno = VM_MIN_ADDRESS;
+ pageno < VM_MAXUSER_ADDRESS;
+ pageno += PAGE_SIZE) {
+ vm_map_t map;
+ vm_map_entry_t out_entry;
+ vm_prot_t out_prot;
+ boolean_t wired, single_use;
+ vm_offset_t off;
+
+ map = &p->p_vmspace->vm_map;
+ error = vm_map_lookup(&map, pageno,
+ VM_PROT_READ,
+ &out_entry, &object, &off, &out_prot,
+ &wired, &single_use);
+
+ if (!error) {
+ vm_pager_t pager;
+
+ printf("procfs: found vm object\n");
+ vm_map_lookup_done(map, out_entry);
+ printf("procfs: vm object = %x\n", object);
+
+ /*
+ * At this point, assuming no errors, object
+ * is the VM object mapping UVA (pageno).
+ * Ensure it has a vnode pager, then grab
+ * the vnode from that pager's handle.
+ */
+
+ pager = object->pager;
+ printf("procfs: pager = %x\n", pager);
+ if (pager)
+ printf("procfs: found pager, type = %d\n", pager->pg_type);
+ if (pager && pager->pg_type == PG_VNODE) {
+ struct vnode *vp;
+
+ vp = (struct vnode *) pager->pg_handle;
+ printf("procfs: vp = 0x%x\n", vp);
+ return (vp);
+ }
+ }
+ }
+
+ printf("procfs: text object not found\n");
+ return (0);
+}
+#endif /* probably_never */
diff --git a/sys/fs/procfs/procfs_note.c b/sys/fs/procfs/procfs_note.c
new file mode 100644
index 0000000..bf2f160
--- /dev/null
+++ b/sys/fs/procfs/procfs_note.c
@@ -0,0 +1,73 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs_note.c 8.2 (Berkeley) 1/21/94
+ *
+ * From:
+ * $Id: procfs_note.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/signal.h>
+#include <miscfs/procfs/procfs.h>
+
+int
+procfs_donote(curp, p, pfs, uio)
+ struct proc *curp;
+ struct proc *p;
+ struct pfsnode *pfs;
+ struct uio *uio;
+{
+ int xlen;
+ int error;
+ char note[PROCFS_NOTELEN+1];
+
+ if (uio->uio_rw != UIO_WRITE)
+ return (EINVAL);
+
+ xlen = PROCFS_NOTELEN;
+ error = vfs_getuserstr(uio, note, &xlen);
+ if (error)
+ return (error);
+
+ /* send to process's notify function */
+ return (EOPNOTSUPP);
+}
diff --git a/sys/fs/procfs/procfs_regs.c b/sys/fs/procfs/procfs_regs.c
new file mode 100644
index 0000000..fa95fef
--- /dev/null
+++ b/sys/fs/procfs/procfs_regs.c
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs_regs.c 8.3 (Berkeley) 1/27/94
+ *
+ * From:
+ * $Id: procfs_regs.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <machine/reg.h>
+#include <miscfs/procfs/procfs.h>
+
+int
+procfs_doregs(curp, p, pfs, uio)
+ struct proc *curp;
+ struct proc *p;
+ struct pfsnode *pfs;
+ struct uio *uio;
+{
+ int error;
+ struct reg r;
+ char *kv;
+ int kl;
+
+ kl = sizeof(r);
+ kv = (char *) &r;
+
+ kv += uio->uio_offset;
+ kl -= uio->uio_offset;
+ if (kl > uio->uio_resid)
+ kl = uio->uio_resid;
+
+ if (kl < 0)
+ error = EINVAL;
+ else
+ error = procfs_read_regs(p, &r);
+ if (error == 0)
+ error = uiomove(kv, kl, uio);
+ if (error == 0 && uio->uio_rw == UIO_WRITE) {
+ if (p->p_stat != SSTOP)
+ error = EBUSY;
+ else
+ error = procfs_write_regs(p, &r);
+ }
+
+ uio->uio_offset = 0;
+ return (error);
+}
diff --git a/sys/fs/procfs/procfs_status.c b/sys/fs/procfs/procfs_status.c
new file mode 100644
index 0000000..d88aaab
--- /dev/null
+++ b/sys/fs/procfs/procfs_status.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs_status.c 8.3 (Berkeley) 2/17/94
+ *
+ * From:
+ * $Id: procfs_status.c,v 3.1 1993/12/15 09:40:17 jsp Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/ioctl.h>
+#include <sys/tty.h>
+#include <sys/resource.h>
+#include <sys/resourcevar.h>
+#include <miscfs/procfs/procfs.h>
+
+int
+procfs_dostatus(curp, p, pfs, uio)
+ struct proc *curp;
+ struct proc *p;
+ struct pfsnode *pfs;
+ struct uio *uio;
+{
+ struct session *sess;
+ struct tty *tp;
+ struct ucred *cr;
+ char *ps;
+ char *sep;
+ int pid, ppid, pgid, sid;
+ int i;
+ int xlen;
+ int error;
+ char psbuf[256]; /* XXX - conservative */
+
+ if (uio->uio_rw != UIO_READ)
+ return (EOPNOTSUPP);
+
+ pid = p->p_pid;
+ ppid = p->p_pptr ? p->p_pptr->p_pid : 0,
+ pgid = p->p_pgrp->pg_id;
+ sess = p->p_pgrp->pg_session;
+ sid = sess->s_leader ? sess->s_leader->p_pid : 0;
+
+/* comm pid ppid pgid sid maj,min ctty,sldr start ut st wmsg uid groups ... */
+
+ ps = psbuf;
+ bcopy(p->p_comm, ps, MAXCOMLEN);
+ ps[MAXCOMLEN] = '\0';
+ ps += strlen(ps);
+ ps += sprintf(ps, " %d %d %d %d ", pid, ppid, pgid, sid);
+
+ if ((p->p_flag&P_CONTROLT) && (tp = sess->s_ttyp))
+ ps += sprintf(ps, "%d,%d ", major(tp->t_dev), minor(tp->t_dev));
+ else
+ ps += sprintf(ps, "%d,%d ", -1, -1);
+
+ sep = "";
+ if (sess->s_ttyvp) {
+ ps += sprintf(ps, "%sctty", sep);
+ sep = ",";
+ }
+ if (SESS_LEADER(p)) {
+ ps += sprintf(ps, "%ssldr", sep);
+ sep = ",";
+ }
+ if (*sep != ',')
+ ps += sprintf(ps, "noflags");
+
+ if (p->p_flag & P_INMEM)
+ ps += sprintf(ps, " %d,%d",
+ p->p_stats->p_start.tv_sec,
+ p->p_stats->p_start.tv_usec);
+ else
+ ps += sprintf(ps, " -1,-1");
+
+ {
+ struct timeval ut, st;
+
+ calcru(p, &ut, &st, (void *) 0);
+ ps += sprintf(ps, " %d,%d %d,%d",
+ ut.tv_sec,
+ ut.tv_usec,
+ st.tv_sec,
+ st.tv_usec);
+ }
+
+ ps += sprintf(ps, " %s",
+ (p->p_wchan && p->p_wmesg) ? p->p_wmesg : "nochan");
+
+ cr = p->p_ucred;
+
+ ps += sprintf(ps, " %d", cr->cr_uid, cr->cr_gid);
+ for (i = 0; i < cr->cr_ngroups; i++)
+ ps += sprintf(ps, ",%d", cr->cr_groups[i]);
+ ps += sprintf(ps, "\n");
+
+ xlen = ps - psbuf;
+ xlen -= uio->uio_offset;
+ ps = psbuf + uio->uio_offset;
+ xlen = min(xlen, uio->uio_resid);
+ if (xlen <= 0)
+ error = 0;
+ else
+ error = uiomove(ps, xlen, uio);
+
+ return (error);
+}
diff --git a/sys/fs/procfs/procfs_subr.c b/sys/fs/procfs/procfs_subr.c
new file mode 100644
index 0000000..b371af1
--- /dev/null
+++ b/sys/fs/procfs/procfs_subr.c
@@ -0,0 +1,314 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs_subr.c 8.4 (Berkeley) 1/27/94
+ *
+ * From:
+ * $Id: procfs_subr.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <miscfs/procfs/procfs.h>
+
+static struct pfsnode *pfshead;
+static int pfsvplock;
+
+/*
+ * allocate a pfsnode/vnode pair. the vnode is
+ * referenced, but not locked.
+ *
+ * the pid, pfs_type, and mount point uniquely
+ * identify a pfsnode. the mount point is needed
+ * because someone might mount this filesystem
+ * twice.
+ *
+ * all pfsnodes are maintained on a singly-linked
+ * list. new nodes are only allocated when they cannot
+ * be found on this list. entries on the list are
+ * removed when the vfs reclaim entry is called.
+ *
+ * a single lock is kept for the entire list. this is
+ * needed because the getnewvnode() function can block
+ * waiting for a vnode to become free, in which case there
+ * may be more than one process trying to get the same
+ * vnode. this lock is only taken if we are going to
+ * call getnewvnode, since the kernel itself is single-threaded.
+ *
+ * if an entry is found on the list, then call vget() to
+ * take a reference. this is done because there may be
+ * zero references to it and so it needs to removed from
+ * the vnode free list.
+ */
+int
+procfs_allocvp(mp, vpp, pid, pfs_type)
+ struct mount *mp;
+ struct vnode **vpp;
+ long pid;
+ pfstype pfs_type;
+{
+ int error;
+ struct pfsnode *pfs;
+ struct pfsnode **pp;
+
+loop:
+ for (pfs = pfshead; pfs != 0; pfs = pfs->pfs_next) {
+ if (pfs->pfs_pid == pid &&
+ pfs->pfs_type == pfs_type &&
+ PFSTOV(pfs)->v_mount == mp) {
+ if (vget(pfs->pfs_vnode, 0))
+ goto loop;
+ *vpp = pfs->pfs_vnode;
+ return (0);
+ }
+ }
+
+ /*
+ * otherwise lock the vp list while we call getnewvnode
+ * since that can block.
+ */
+ if (pfsvplock & PROCFS_LOCKED) {
+ pfsvplock |= PROCFS_WANT;
+ sleep((caddr_t) &pfsvplock, PINOD);
+ goto loop;
+ }
+ pfsvplock |= PROCFS_LOCKED;
+
+ error = getnewvnode(VT_PROCFS, mp, procfs_vnodeop_p, vpp);
+ if (error)
+ goto out;
+
+ MALLOC((*vpp)->v_data, void *, sizeof(struct pfsnode),
+ M_TEMP, M_WAITOK);
+
+ pfs = VTOPFS(*vpp);
+ pfs->pfs_next = 0;
+ pfs->pfs_pid = (pid_t) pid;
+ pfs->pfs_type = pfs_type;
+ pfs->pfs_vnode = *vpp;
+ pfs->pfs_flags = 0;
+ pfs->pfs_fileno = PROCFS_FILENO(pid, pfs_type);
+
+ switch (pfs_type) {
+ case Proot: /* /proc = dr-xr-xr-x */
+ pfs->pfs_mode = (VREAD|VEXEC) |
+ (VREAD|VEXEC) >> 3 |
+ (VREAD|VEXEC) >> 6;
+ break;
+
+ case Pproc:
+ pfs->pfs_mode = (VREAD|VEXEC) |
+ (VREAD|VEXEC) >> 3 |
+ (VREAD|VEXEC) >> 6;
+ break;
+
+ case Pfile:
+ pfs->pfs_mode = (VREAD|VWRITE);
+ break;
+
+ case Pmem:
+ pfs->pfs_mode = (VREAD|VWRITE);
+ break;
+
+ case Pregs:
+ pfs->pfs_mode = (VREAD|VWRITE);
+ break;
+
+ case Pfpregs:
+ pfs->pfs_mode = (VREAD|VWRITE);
+ break;
+
+ case Pctl:
+ pfs->pfs_mode = (VWRITE);
+ break;
+
+ case Pstatus:
+ pfs->pfs_mode = (VREAD) |
+ (VREAD >> 3) |
+ (VREAD >> 6);
+ break;
+
+ case Pnote:
+ pfs->pfs_mode = (VWRITE);
+ break;
+
+ case Pnotepg:
+ pfs->pfs_mode = (VWRITE);
+ break;
+
+ default:
+ panic("procfs_allocvp");
+ }
+
+ /* add to procfs vnode list */
+ for (pp = &pfshead; *pp; pp = &(*pp)->pfs_next)
+ continue;
+ *pp = pfs;
+
+out:
+ pfsvplock &= ~PROCFS_LOCKED;
+
+ if (pfsvplock & PROCFS_WANT) {
+ pfsvplock &= ~PROCFS_WANT;
+ wakeup((caddr_t) &pfsvplock);
+ }
+
+ return (error);
+}
+
+int
+procfs_freevp(vp)
+ struct vnode *vp;
+{
+ struct pfsnode **pfspp;
+ struct pfsnode *pfs = VTOPFS(vp);
+
+ for (pfspp = &pfshead; *pfspp != 0; pfspp = &(*pfspp)->pfs_next) {
+ if (*pfspp == pfs) {
+ *pfspp = pfs->pfs_next;
+ break;
+ }
+ }
+
+ FREE(vp->v_data, M_TEMP);
+ vp->v_data = 0;
+ return (0);
+}
+
+int
+procfs_rw(ap)
+ struct vop_read_args *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct uio *uio = ap->a_uio;
+ struct proc *curp = uio->uio_procp;
+ struct pfsnode *pfs = VTOPFS(vp);
+ struct proc *p;
+
+ p = PFIND(pfs->pfs_pid);
+ if (p == 0)
+ return (EINVAL);
+
+ switch (pfs->pfs_type) {
+ case Pnote:
+ case Pnotepg:
+ return (procfs_donote(curp, p, pfs, uio));
+
+ case Pregs:
+ return (procfs_doregs(curp, p, pfs, uio));
+
+ case Pfpregs:
+ return (procfs_dofpregs(curp, p, pfs, uio));
+
+ case Pctl:
+ return (procfs_doctl(curp, p, pfs, uio));
+
+ case Pstatus:
+ return (procfs_dostatus(curp, p, pfs, uio));
+
+ case Pmem:
+ return (procfs_domem(curp, p, pfs, uio));
+
+ default:
+ return (EOPNOTSUPP);
+ }
+}
+
+/*
+ * Get a string from userland into (buf). Strip a trailing
+ * nl character (to allow easy access from the shell).
+ * The buffer should be *buflenp + 1 chars long. vfs_getuserstr
+ * will automatically add a nul char at the end.
+ *
+ * Returns 0 on success or the following errors
+ *
+ * EINVAL: file offset is non-zero.
+ * EMSGSIZE: message is longer than kernel buffer
+ * EFAULT: user i/o buffer is not addressable
+ */
+int
+vfs_getuserstr(uio, buf, buflenp)
+ struct uio *uio;
+ char *buf;
+ int *buflenp;
+{
+ int xlen;
+ int error;
+
+ if (uio->uio_offset != 0)
+ return (EINVAL);
+
+ xlen = *buflenp;
+
+ /* must be able to read the whole string in one go */
+ if (xlen < uio->uio_resid)
+ return (EMSGSIZE);
+ xlen = uio->uio_resid;
+
+ error = uiomove(buf, xlen, uio);
+ if (error)
+ return (error);
+
+ /* allow multiple writes without seeks */
+ uio->uio_offset = 0;
+
+ /* cleanup string and remove trailing newline */
+ buf[xlen] = '\0';
+ xlen = strlen(buf);
+ if (xlen > 0 && buf[xlen-1] == '\n')
+ buf[--xlen] = '\0';
+ *buflenp = xlen;
+
+ return (0);
+}
+
+vfs_namemap_t *
+vfs_findname(nm, buf, buflen)
+ vfs_namemap_t *nm;
+ char *buf;
+ int buflen;
+{
+ for (; nm->nm_name; nm++)
+ if (bcmp(buf, (char *) nm->nm_name, buflen+1) == 0)
+ return (nm);
+
+ return (0);
+}
diff --git a/sys/fs/procfs/procfs_vfsops.c b/sys/fs/procfs/procfs_vfsops.c
new file mode 100644
index 0000000..3938ca1
--- /dev/null
+++ b/sys/fs/procfs/procfs_vfsops.c
@@ -0,0 +1,243 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs_vfsops.c 8.4 (Berkeley) 1/21/94
+ *
+ * From:
+ * $Id: procfs_vfsops.c,v 3.1 1993/12/15 09:40:17 jsp Exp $
+ */
+
+/*
+ * procfs VFS interface
+ */
+
+#include <sys/param.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/syslog.h>
+#include <sys/mount.h>
+#include <sys/signalvar.h>
+#include <sys/vnode.h>
+#include <miscfs/procfs/procfs.h>
+#include <vm/vm.h> /* for PAGE_SIZE */
+
+/*
+ * VFS Operations.
+ *
+ * mount system call
+ */
+/* ARGSUSED */
+procfs_mount(mp, path, data, ndp, p)
+ struct mount *mp;
+ char *path;
+ caddr_t data;
+ struct nameidata *ndp;
+ struct proc *p;
+{
+ u_int size;
+
+ if (UIO_MX & (UIO_MX-1)) {
+ log(LOG_ERR, "procfs: invalid directory entry size");
+ return (EINVAL);
+ }
+
+ if (mp->mnt_flag & MNT_UPDATE)
+ return (EOPNOTSUPP);
+
+ mp->mnt_flag |= MNT_LOCAL;
+ mp->mnt_data = 0;
+ getnewfsid(mp, MOUNT_PROCFS);
+
+ (void) copyinstr(path, (caddr_t)mp->mnt_stat.f_mntonname, MNAMELEN, &size);
+ bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+
+ size = sizeof("procfs") - 1;
+ bcopy("procfs", mp->mnt_stat.f_mntfromname, size);
+ bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+
+ return (0);
+}
+
+/*
+ * unmount system call
+ */
+procfs_unmount(mp, mntflags, p)
+ struct mount *mp;
+ int mntflags;
+ struct proc *p;
+{
+ int error;
+ extern int doforce;
+ int flags = 0;
+
+ if (mntflags & MNT_FORCE) {
+ /* procfs can never be rootfs so don't check for it */
+ if (!doforce)
+ return (EINVAL);
+ flags |= FORCECLOSE;
+ }
+
+ if (error = vflush(mp, 0, flags))
+ return (error);
+
+ return (0);
+}
+
+procfs_root(mp, vpp)
+ struct mount *mp;
+ struct vnode **vpp;
+{
+ struct pfsnode *pfs;
+ struct vnode *vp;
+ int error;
+
+ error = procfs_allocvp(mp, &vp, (pid_t) 0, Proot);
+ if (error)
+ return (error);
+
+ vp->v_type = VDIR;
+ vp->v_flag = VROOT;
+ pfs = VTOPFS(vp);
+
+ *vpp = vp;
+ return (0);
+}
+
+/*
+ */
+/* ARGSUSED */
+procfs_start(mp, flags, p)
+ struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+
+ return (0);
+}
+
+/*
+ * Get file system statistics.
+ */
+procfs_statfs(mp, sbp, p)
+ struct mount *mp;
+ struct statfs *sbp;
+ struct proc *p;
+{
+ sbp->f_type = MOUNT_PROCFS;
+ sbp->f_bsize = PAGE_SIZE;
+ sbp->f_iosize = PAGE_SIZE;
+ sbp->f_blocks = 1; /* avoid divide by zero in some df's */
+ sbp->f_bfree = 0;
+ sbp->f_bavail = 0;
+ sbp->f_files = maxproc; /* approx */
+ sbp->f_ffree = maxproc - nprocs; /* approx */
+
+ if (sbp != &mp->mnt_stat) {
+ bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+ bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+ bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+ }
+
+ return (0);
+}
+
+
+procfs_quotactl(mp, cmds, uid, arg, p)
+ struct mount *mp;
+ int cmds;
+ uid_t uid;
+ caddr_t arg;
+ struct proc *p;
+{
+
+ return (EOPNOTSUPP);
+}
+
+procfs_sync(mp, waitfor)
+ struct mount *mp;
+ int waitfor;
+{
+
+ return (0);
+}
+
+procfs_vget(mp, ino, vpp)
+ struct mount *mp;
+ ino_t ino;
+ struct vnode **vpp;
+{
+
+ return (EOPNOTSUPP);
+}
+
+procfs_fhtovp(mp, fhp, vpp)
+ struct mount *mp;
+ struct fid *fhp;
+ struct vnode **vpp;
+{
+
+ return (EINVAL);
+}
+
+procfs_vptofh(vp, fhp)
+ struct vnode *vp;
+ struct fid *fhp;
+{
+
+ return EINVAL;
+}
+
+procfs_init()
+{
+
+ return (0);
+}
+
+struct vfsops procfs_vfsops = {
+ procfs_mount,
+ procfs_start,
+ procfs_unmount,
+ procfs_root,
+ procfs_quotactl,
+ procfs_statfs,
+ procfs_sync,
+ procfs_vget,
+ procfs_fhtovp,
+ procfs_vptofh,
+ procfs_init,
+};
diff --git a/sys/fs/procfs/procfs_vnops.c b/sys/fs/procfs/procfs_vnops.c
new file mode 100644
index 0000000..4e1ee00
--- /dev/null
+++ b/sys/fs/procfs/procfs_vnops.c
@@ -0,0 +1,814 @@
+/*
+ * Copyright (c) 1993 Jan-Simon Pendry
+ * Copyright (c) 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)procfs_vnops.c 8.6 (Berkeley) 2/7/94
+ *
+ * From:
+ * $Id: procfs_vnops.c,v 3.2 1993/12/15 09:40:17 jsp Exp $
+ */
+
+/*
+ * procfs vnode interface
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/dirent.h>
+#include <sys/resourcevar.h>
+#include <miscfs/procfs/procfs.h>
+#include <vm/vm.h> /* for PAGE_SIZE */
+
+/*
+ * Vnode Operations.
+ *
+ */
+
+/*
+ * This is a list of the valid names in the
+ * process-specific sub-directories. It is
+ * used in procfs_lookup and procfs_readdir
+ */
+static struct pfsnames {
+ u_short d_namlen;
+ char d_name[PROCFS_NAMELEN];
+ pfstype d_pfstype;
+} procent[] = {
+#define N(s) sizeof(s)-1, s
+ /* namlen, nam, type */
+ { N("file"), Pfile },
+ { N("mem"), Pmem },
+ { N("regs"), Pregs },
+ { N("fpregs"), Pfpregs },
+ { N("ctl"), Pctl },
+ { N("status"), Pstatus },
+ { N("note"), Pnote },
+ { N("notepg"), Pnotepg },
+#undef N
+};
+#define Nprocent (sizeof(procent)/sizeof(procent[0]))
+
+static pid_t atopid __P((const char *, u_int));
+
+/*
+ * set things up for doing i/o on
+ * the pfsnode (vp). (vp) is locked
+ * on entry, and should be left locked
+ * on exit.
+ *
+ * for procfs we don't need to do anything
+ * in particular for i/o. all that is done
+ * is to support exclusive open on process
+ * memory images.
+ */
+procfs_open(ap)
+ struct vop_open_args *ap;
+{
+ struct pfsnode *pfs = VTOPFS(ap->a_vp);
+
+ switch (pfs->pfs_type) {
+ case Pmem:
+ if (PFIND(pfs->pfs_pid) == 0)
+ return (ENOENT); /* was ESRCH, jsp */
+
+ if ((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL) ||
+ (pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE))
+ return (EBUSY);
+
+
+ if (ap->a_mode & FWRITE)
+ pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL);
+
+ return (0);
+
+ default:
+ break;
+ }
+
+ return (0);
+}
+
+/*
+ * close the pfsnode (vp) after doing i/o.
+ * (vp) is not locked on entry or exit.
+ *
+ * nothing to do for procfs other than undo
+ * any exclusive open flag (see _open above).
+ */
+procfs_close(ap)
+ struct vop_close_args *ap;
+{
+ struct pfsnode *pfs = VTOPFS(ap->a_vp);
+
+ switch (pfs->pfs_type) {
+ case Pmem:
+ if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL))
+ pfs->pfs_flags &= ~(FWRITE|O_EXCL);
+ break;
+ }
+
+ return (0);
+}
+
+/*
+ * do an ioctl operation on pfsnode (vp).
+ * (vp) is not locked on entry or exit.
+ */
+procfs_ioctl(ap)
+ struct vop_ioctl_args *ap;
+{
+
+ return (ENOTTY);
+}
+
+/*
+ * do block mapping for pfsnode (vp).
+ * since we don't use the buffer cache
+ * for procfs this function should never
+ * be called. in any case, it's not clear
+ * what part of the kernel ever makes use
+ * of this function. for sanity, this is the
+ * usual no-op bmap, although returning
+ * (EIO) would be a reasonable alternative.
+ */
+procfs_bmap(ap)
+ struct vop_bmap_args *ap;
+{
+
+ if (ap->a_vpp != NULL)
+ *ap->a_vpp = ap->a_vp;
+ if (ap->a_bnp != NULL)
+ *ap->a_bnp = ap->a_bn;
+ return (0);
+}
+
+/*
+ * _inactive is called when the pfsnode
+ * is vrele'd and the reference count goes
+ * to zero. (vp) will be on the vnode free
+ * list, so to get it back vget() must be
+ * used.
+ *
+ * for procfs, check if the process is still
+ * alive and if it isn't then just throw away
+ * the vnode by calling vgone(). this may
+ * be overkill and a waste of time since the
+ * chances are that the process will still be
+ * there and PFIND is not free.
+ *
+ * (vp) is not locked on entry or exit.
+ */
+procfs_inactive(ap)
+ struct vop_inactive_args *ap;
+{
+ struct pfsnode *pfs = VTOPFS(ap->a_vp);
+
+ if (PFIND(pfs->pfs_pid) == 0)
+ vgone(ap->a_vp);
+
+ return (0);
+}
+
+/*
+ * _reclaim is called when getnewvnode()
+ * wants to make use of an entry on the vnode
+ * free list. at this time the filesystem needs
+ * to free any private data and remove the node
+ * from any private lists.
+ */
+procfs_reclaim(ap)
+ struct vop_reclaim_args *ap;
+{
+ int error;
+
+ error = procfs_freevp(ap->a_vp);
+ return (error);
+}
+
+/*
+ * Return POSIX pathconf information applicable to special devices.
+ */
+procfs_pathconf(ap)
+ struct vop_pathconf_args /* {
+ struct vnode *a_vp;
+ int a_name;
+ int *a_retval;
+ } */ *ap;
+{
+
+ switch (ap->a_name) {
+ case _PC_LINK_MAX:
+ *ap->a_retval = LINK_MAX;
+ return (0);
+ case _PC_MAX_CANON:
+ *ap->a_retval = MAX_CANON;
+ return (0);
+ case _PC_MAX_INPUT:
+ *ap->a_retval = MAX_INPUT;
+ return (0);
+ case _PC_PIPE_BUF:
+ *ap->a_retval = PIPE_BUF;
+ return (0);
+ case _PC_CHOWN_RESTRICTED:
+ *ap->a_retval = 1;
+ return (0);
+ case _PC_VDISABLE:
+ *ap->a_retval = _POSIX_VDISABLE;
+ return (0);
+ default:
+ return (EINVAL);
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * _print is used for debugging.
+ * just print a readable description
+ * of (vp).
+ */
+procfs_print(ap)
+ struct vop_print_args *ap;
+{
+ struct pfsnode *pfs = VTOPFS(ap->a_vp);
+
+ printf("tag VT_PROCFS, pid %d, mode %x, flags %x\n",
+ pfs->pfs_pid,
+ pfs->pfs_mode, pfs->pfs_flags);
+}
+
+/*
+ * _abortop is called when operations such as
+ * rename and create fail. this entry is responsible
+ * for undoing any side-effects caused by the lookup.
+ * this will always include freeing the pathname buffer.
+ */
+procfs_abortop(ap)
+ struct vop_abortop_args *ap;
+{
+
+ if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
+ FREE(ap->a_cnp->cn_pnbuf, M_NAMEI);
+ return (0);
+}
+
+/*
+ * generic entry point for unsupported operations
+ */
+procfs_badop()
+{
+
+ return (EIO);
+}
+
+/*
+ * Invent attributes for pfsnode (vp) and store
+ * them in (vap).
+ * Directories lengths are returned as zero since
+ * any real length would require the genuine size
+ * to be computed, and nothing cares anyway.
+ *
+ * this is relatively minimal for procfs.
+ */
+procfs_getattr(ap)
+ struct vop_getattr_args *ap;
+{
+ struct pfsnode *pfs = VTOPFS(ap->a_vp);
+ struct vattr *vap = ap->a_vap;
+ struct proc *procp;
+ int error;
+
+ /* first check the process still exists */
+ switch (pfs->pfs_type) {
+ case Proot:
+ procp = 0;
+ break;
+
+ default:
+ procp = PFIND(pfs->pfs_pid);
+ if (procp == 0)
+ return (ENOENT);
+ }
+
+ error = 0;
+
+ /* start by zeroing out the attributes */
+ VATTR_NULL(vap);
+
+ /* next do all the common fields */
+ vap->va_type = ap->a_vp->v_type;
+ vap->va_mode = pfs->pfs_mode;
+ vap->va_fileid = pfs->pfs_fileno;
+ vap->va_flags = 0;
+ vap->va_blocksize = PAGE_SIZE;
+ vap->va_bytes = vap->va_size = 0;
+
+ /*
+ * If the process has exercised some setuid or setgid
+ * privilege, then rip away read/write permission so
+ * that only root can gain access.
+ */
+ switch (pfs->pfs_type) {
+ case Pregs:
+ case Pfpregs:
+ case Pmem:
+ if (procp->p_flag & P_SUGID)
+ vap->va_mode &= ~((VREAD|VWRITE)|
+ ((VREAD|VWRITE)>>3)|
+ ((VREAD|VWRITE)>>6));
+ break;
+ }
+
+ /*
+ * Make all times be current TOD.
+ * It would be possible to get the process start
+ * time from the p_stat structure, but there's
+ * no "file creation" time stamp anyway, and the
+ * p_stat structure is not addressible if u. gets
+ * swapped out for that process.
+ */
+ microtime(&vap->va_ctime);
+ vap->va_atime = vap->va_mtime = vap->va_ctime;
+
+ /*
+ * now do the object specific fields
+ *
+ * The size could be set from struct reg, but it's hardly
+ * worth the trouble, and it puts some (potentially) machine
+ * dependent data into this machine-independent code. If it
+ * becomes important then this function should break out into
+ * a per-file stat function in the corresponding .c file.
+ */
+
+ switch (pfs->pfs_type) {
+ case Proot:
+ vap->va_nlink = 2;
+ vap->va_uid = 0;
+ vap->va_gid = 0;
+ break;
+
+ case Pproc:
+ vap->va_nlink = 2;
+ vap->va_uid = procp->p_ucred->cr_uid;
+ vap->va_gid = procp->p_ucred->cr_gid;
+ break;
+
+ case Pfile:
+ error = EOPNOTSUPP;
+ break;
+
+ case Pmem:
+ vap->va_nlink = 1;
+ vap->va_bytes = vap->va_size =
+ ctob(procp->p_vmspace->vm_tsize +
+ procp->p_vmspace->vm_dsize +
+ procp->p_vmspace->vm_ssize);
+ vap->va_uid = procp->p_ucred->cr_uid;
+ vap->va_gid = procp->p_ucred->cr_gid;
+ break;
+
+ case Pregs:
+ case Pfpregs:
+ case Pctl:
+ case Pstatus:
+ case Pnote:
+ case Pnotepg:
+ vap->va_nlink = 1;
+ vap->va_uid = procp->p_ucred->cr_uid;
+ vap->va_gid = procp->p_ucred->cr_gid;
+ break;
+
+ default:
+ panic("procfs_getattr");
+ }
+
+ return (error);
+}
+
+procfs_setattr(ap)
+ struct vop_setattr_args *ap;
+{
+ /*
+ * just fake out attribute setting
+ * it's not good to generate an error
+ * return, otherwise things like creat()
+ * will fail when they try to set the
+ * file length to 0. worse, this means
+ * that echo $note > /proc/$pid/note will fail.
+ */
+
+ return (0);
+}
+
+/*
+ * implement access checking.
+ *
+ * something very similar to this code is duplicated
+ * throughout the 4bsd kernel and should be moved
+ * into kern/vfs_subr.c sometime.
+ *
+ * actually, the check for super-user is slightly
+ * broken since it will allow read access to write-only
+ * objects. this doesn't cause any particular trouble
+ * but does mean that the i/o entry points need to check
+ * that the operation really does make sense.
+ */
+procfs_access(ap)
+ struct vop_access_args *ap;
+{
+ struct vattr *vap;
+ struct vattr vattr;
+ int error;
+
+ /*
+ * If you're the super-user,
+ * you always get access.
+ */
+ if (ap->a_cred->cr_uid == (uid_t) 0)
+ return (0);
+ vap = &vattr;
+ if (error = VOP_GETATTR(ap->a_vp, vap, ap->a_cred, ap->a_p))
+ return (error);
+
+ /*
+ * Access check is based on only one of owner, group, public.
+ * If not owner, then check group. If not a member of the
+ * group, then check public access.
+ */
+ if (ap->a_cred->cr_uid != vap->va_uid) {
+ gid_t *gp;
+ int i;
+
+ (ap->a_mode) >>= 3;
+ gp = ap->a_cred->cr_groups;
+ for (i = 0; i < ap->a_cred->cr_ngroups; i++, gp++)
+ if (vap->va_gid == *gp)
+ goto found;
+ ap->a_mode >>= 3;
+found:
+ ;
+ }
+
+ if ((vap->va_mode & ap->a_mode) == ap->a_mode)
+ return (0);
+
+ return (EACCES);
+}
+
+/*
+ * lookup. this is incredibly complicated in the
+ * general case, however for most pseudo-filesystems
+ * very little needs to be done.
+ *
+ * unless you want to get a migraine, just make sure your
+ * filesystem doesn't do any locking of its own. otherwise
+ * read and inwardly digest ufs_lookup().
+ */
+procfs_lookup(ap)
+ struct vop_lookup_args *ap;
+{
+ struct componentname *cnp = ap->a_cnp;
+ struct vnode **vpp = ap->a_vpp;
+ struct vnode *dvp = ap->a_dvp;
+ char *pname = cnp->cn_nameptr;
+ int error = 0;
+ pid_t pid;
+ struct vnode *nvp;
+ struct pfsnode *pfs;
+ struct proc *procp;
+ pfstype pfs_type;
+ int i;
+
+ if (cnp->cn_namelen == 1 && *pname == '.') {
+ *vpp = dvp;
+ VREF(dvp);
+ /*VOP_LOCK(dvp);*/
+ return (0);
+ }
+
+ *vpp = NULL;
+
+ pfs = VTOPFS(dvp);
+ switch (pfs->pfs_type) {
+ case Proot:
+ if (cnp->cn_flags & ISDOTDOT)
+ return (EIO);
+
+ if (CNEQ(cnp, "curproc", 7))
+ pid = cnp->cn_proc->p_pid;
+ else
+ pid = atopid(pname, cnp->cn_namelen);
+ if (pid == NO_PID)
+ return (ENOENT);
+
+ procp = PFIND(pid);
+ if (procp == 0)
+ return (ENOENT);
+
+ error = procfs_allocvp(dvp->v_mount, &nvp, pid, Pproc);
+ if (error)
+ return (error);
+
+ nvp->v_type = VDIR;
+ pfs = VTOPFS(nvp);
+
+ *vpp = nvp;
+ return (0);
+
+ case Pproc:
+ if (cnp->cn_flags & ISDOTDOT) {
+ error = procfs_root(dvp->v_mount, vpp);
+ return (error);
+ }
+
+ procp = PFIND(pfs->pfs_pid);
+ if (procp == 0)
+ return (ENOENT);
+
+ for (i = 0; i < Nprocent; i++) {
+ struct pfsnames *dp = &procent[i];
+
+ if (cnp->cn_namelen == dp->d_namlen &&
+ bcmp(pname, dp->d_name, dp->d_namlen) == 0) {
+ pfs_type = dp->d_pfstype;
+ goto found;
+ }
+ }
+ return (ENOENT);
+
+ found:
+ if (pfs_type == Pfile) {
+ nvp = procfs_findtextvp(procp);
+ if (nvp) {
+ VREF(nvp);
+ VOP_LOCK(nvp);
+ } else {
+ error = ENXIO;
+ }
+ } else {
+ error = procfs_allocvp(dvp->v_mount, &nvp,
+ pfs->pfs_pid, pfs_type);
+ if (error)
+ return (error);
+
+ nvp->v_type = VREG;
+ pfs = VTOPFS(nvp);
+ }
+ *vpp = nvp;
+ return (error);
+
+ default:
+ return (ENOTDIR);
+ }
+}
+
+/*
+ * readdir returns directory entries from pfsnode (vp).
+ *
+ * the strategy here with procfs is to generate a single
+ * directory entry at a time (struct pfsdent) and then
+ * copy that out to userland using uiomove. a more efficent
+ * though more complex implementation, would try to minimize
+ * the number of calls to uiomove(). for procfs, this is
+ * hardly worth the added code complexity.
+ *
+ * this should just be done through read()
+ */
+procfs_readdir(ap)
+ struct vop_readdir_args *ap;
+{
+ struct uio *uio = ap->a_uio;
+ struct pfsdent d;
+ struct pfsdent *dp = &d;
+ struct pfsnode *pfs;
+ int error;
+ int count;
+ int i;
+
+ pfs = VTOPFS(ap->a_vp);
+
+ if (uio->uio_resid < UIO_MX)
+ return (EINVAL);
+ if (uio->uio_offset & (UIO_MX-1))
+ return (EINVAL);
+ if (uio->uio_offset < 0)
+ return (EINVAL);
+
+ error = 0;
+ count = 0;
+ i = uio->uio_offset / UIO_MX;
+
+ switch (pfs->pfs_type) {
+ /*
+ * this is for the process-specific sub-directories.
+ * all that is needed to is copy out all the entries
+ * from the procent[] table (top of this file).
+ */
+ case Pproc: {
+ while (uio->uio_resid >= UIO_MX) {
+ struct pfsnames *dt;
+
+ if (i >= Nprocent)
+ break;
+
+ dt = &procent[i];
+
+ dp->d_reclen = UIO_MX;
+ dp->d_fileno = PROCFS_FILENO(pfs->pfs_pid, dt->d_pfstype);
+ dp->d_type = DT_REG;
+ dp->d_namlen = dt->d_namlen;
+ bcopy(dt->d_name, dp->d_name, sizeof(dt->d_name)-1);
+ error = uiomove((caddr_t) dp, UIO_MX, uio);
+ if (error)
+ break;
+ count += UIO_MX;
+ i++;
+ }
+
+ break;
+
+ }
+
+ /*
+ * this is for the root of the procfs filesystem
+ * what is needed is a special entry for "curproc"
+ * followed by an entry for each process on allproc
+#ifdef PROCFS_ZOMBIE
+ * and zombproc.
+#endif
+ */
+
+ case Proot: {
+ int pcnt;
+#ifdef PROCFS_ZOMBIE
+ int doingzomb = 0;
+#endif
+ volatile struct proc *p;
+
+ p = allproc;
+
+#define PROCFS_XFILES 1 /* number of other entries, like "curproc" */
+ pcnt = PROCFS_XFILES;
+
+ while (p && uio->uio_resid >= UIO_MX) {
+ bzero((char *) dp, UIO_MX);
+ dp->d_type = DT_DIR;
+ dp->d_reclen = UIO_MX;
+
+ switch (i) {
+ case 0:
+ /* ship out entry for "curproc" */
+ dp->d_fileno = PROCFS_FILENO(PID_MAX+1, Pproc);
+ dp->d_namlen = sprintf(dp->d_name, "curproc");
+ break;
+
+ default:
+ if (pcnt >= i) {
+ dp->d_fileno = PROCFS_FILENO(p->p_pid, Pproc);
+ dp->d_namlen = sprintf(dp->d_name, "%ld", (long) p->p_pid);
+ }
+
+ p = p->p_next;
+
+#ifdef PROCFS_ZOMBIE
+ if (p == 0 && doingzomb == 0) {
+ doingzomb = 1;
+ p = zombproc;
+ }
+#endif
+
+ if (pcnt++ < i)
+ continue;
+
+ break;
+ }
+ error = uiomove((caddr_t) dp, UIO_MX, uio);
+ if (error)
+ break;
+ count += UIO_MX;
+ i++;
+ }
+
+ break;
+
+ }
+
+ default:
+ error = ENOTDIR;
+ break;
+ }
+
+ uio->uio_offset = i * UIO_MX;
+
+ return (error);
+}
+
+/*
+ * convert decimal ascii to pid_t
+ */
+static pid_t
+atopid(b, len)
+ const char *b;
+ u_int len;
+{
+ pid_t p = 0;
+
+ while (len--) {
+ char c = *b++;
+ if (c < '0' || c > '9')
+ return (NO_PID);
+ p = 10 * p + (c - '0');
+ if (p > PID_MAX)
+ return (NO_PID);
+ }
+
+ return (p);
+}
+
+/*
+ * procfs vnode operations.
+ */
+int (**procfs_vnodeop_p)();
+struct vnodeopv_entry_desc procfs_vnodeop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, procfs_lookup }, /* lookup */
+ { &vop_create_desc, procfs_create }, /* create */
+ { &vop_mknod_desc, procfs_mknod }, /* mknod */
+ { &vop_open_desc, procfs_open }, /* open */
+ { &vop_close_desc, procfs_close }, /* close */
+ { &vop_access_desc, procfs_access }, /* access */
+ { &vop_getattr_desc, procfs_getattr }, /* getattr */
+ { &vop_setattr_desc, procfs_setattr }, /* setattr */
+ { &vop_read_desc, procfs_read }, /* read */
+ { &vop_write_desc, procfs_write }, /* write */
+ { &vop_ioctl_desc, procfs_ioctl }, /* ioctl */
+ { &vop_select_desc, procfs_select }, /* select */
+ { &vop_mmap_desc, procfs_mmap }, /* mmap */
+ { &vop_fsync_desc, procfs_fsync }, /* fsync */
+ { &vop_seek_desc, procfs_seek }, /* seek */
+ { &vop_remove_desc, procfs_remove }, /* remove */
+ { &vop_link_desc, procfs_link }, /* link */
+ { &vop_rename_desc, procfs_rename }, /* rename */
+ { &vop_mkdir_desc, procfs_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, procfs_rmdir }, /* rmdir */
+ { &vop_symlink_desc, procfs_symlink }, /* symlink */
+ { &vop_readdir_desc, procfs_readdir }, /* readdir */
+ { &vop_readlink_desc, procfs_readlink }, /* readlink */
+ { &vop_abortop_desc, procfs_abortop }, /* abortop */
+ { &vop_inactive_desc, procfs_inactive }, /* inactive */
+ { &vop_reclaim_desc, procfs_reclaim }, /* reclaim */
+ { &vop_lock_desc, procfs_lock }, /* lock */
+ { &vop_unlock_desc, procfs_unlock }, /* unlock */
+ { &vop_bmap_desc, procfs_bmap }, /* bmap */
+ { &vop_strategy_desc, procfs_strategy }, /* strategy */
+ { &vop_print_desc, procfs_print }, /* print */
+ { &vop_islocked_desc, procfs_islocked }, /* islocked */
+ { &vop_pathconf_desc, procfs_pathconf }, /* pathconf */
+ { &vop_advlock_desc, procfs_advlock }, /* advlock */
+ { &vop_blkatoff_desc, procfs_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, procfs_valloc }, /* valloc */
+ { &vop_vfree_desc, procfs_vfree }, /* vfree */
+ { &vop_truncate_desc, procfs_truncate }, /* truncate */
+ { &vop_update_desc, procfs_update }, /* update */
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc procfs_vnodeop_opv_desc =
+ { &procfs_vnodeop_p, procfs_vnodeop_entries };
diff --git a/sys/fs/specfs/spec_vnops.c b/sys/fs/specfs/spec_vnops.c
new file mode 100644
index 0000000..111c517
--- /dev/null
+++ b/sys/fs/specfs/spec_vnops.c
@@ -0,0 +1,689 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)spec_vnops.c 8.6 (Berkeley) 4/9/94
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/conf.h>
+#include <sys/buf.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/stat.h>
+#include <sys/errno.h>
+#include <sys/ioctl.h>
+#include <sys/file.h>
+#include <sys/disklabel.h>
+#include <miscfs/specfs/specdev.h>
+
+/* symbolic sleep message strings for devices */
+char devopn[] = "devopn";
+char devio[] = "devio";
+char devwait[] = "devwait";
+char devin[] = "devin";
+char devout[] = "devout";
+char devioc[] = "devioc";
+char devcls[] = "devcls";
+
+int (**spec_vnodeop_p)();
+struct vnodeopv_entry_desc spec_vnodeop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, spec_lookup }, /* lookup */
+ { &vop_create_desc, spec_create }, /* create */
+ { &vop_mknod_desc, spec_mknod }, /* mknod */
+ { &vop_open_desc, spec_open }, /* open */
+ { &vop_close_desc, spec_close }, /* close */
+ { &vop_access_desc, spec_access }, /* access */
+ { &vop_getattr_desc, spec_getattr }, /* getattr */
+ { &vop_setattr_desc, spec_setattr }, /* setattr */
+ { &vop_read_desc, spec_read }, /* read */
+ { &vop_write_desc, spec_write }, /* write */
+ { &vop_ioctl_desc, spec_ioctl }, /* ioctl */
+ { &vop_select_desc, spec_select }, /* select */
+ { &vop_mmap_desc, spec_mmap }, /* mmap */
+ { &vop_fsync_desc, spec_fsync }, /* fsync */
+ { &vop_seek_desc, spec_seek }, /* seek */
+ { &vop_remove_desc, spec_remove }, /* remove */
+ { &vop_link_desc, spec_link }, /* link */
+ { &vop_rename_desc, spec_rename }, /* rename */
+ { &vop_mkdir_desc, spec_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, spec_rmdir }, /* rmdir */
+ { &vop_symlink_desc, spec_symlink }, /* symlink */
+ { &vop_readdir_desc, spec_readdir }, /* readdir */
+ { &vop_readlink_desc, spec_readlink }, /* readlink */
+ { &vop_abortop_desc, spec_abortop }, /* abortop */
+ { &vop_inactive_desc, spec_inactive }, /* inactive */
+ { &vop_reclaim_desc, spec_reclaim }, /* reclaim */
+ { &vop_lock_desc, spec_lock }, /* lock */
+ { &vop_unlock_desc, spec_unlock }, /* unlock */
+ { &vop_bmap_desc, spec_bmap }, /* bmap */
+ { &vop_strategy_desc, spec_strategy }, /* strategy */
+ { &vop_print_desc, spec_print }, /* print */
+ { &vop_islocked_desc, spec_islocked }, /* islocked */
+ { &vop_pathconf_desc, spec_pathconf }, /* pathconf */
+ { &vop_advlock_desc, spec_advlock }, /* advlock */
+ { &vop_blkatoff_desc, spec_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, spec_valloc }, /* valloc */
+ { &vop_vfree_desc, spec_vfree }, /* vfree */
+ { &vop_truncate_desc, spec_truncate }, /* truncate */
+ { &vop_update_desc, spec_update }, /* update */
+ { &vop_bwrite_desc, spec_bwrite }, /* bwrite */
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc spec_vnodeop_opv_desc =
+ { &spec_vnodeop_p, spec_vnodeop_entries };
+
+/*
+ * Trivial lookup routine that always fails.
+ */
+int
+spec_lookup(ap)
+ struct vop_lookup_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+
+ *ap->a_vpp = NULL;
+ return (ENOTDIR);
+}
+
+/*
+ * Open a special file.
+ */
+/* ARGSUSED */
+spec_open(ap)
+ struct vop_open_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct vnode *bvp, *vp = ap->a_vp;
+ dev_t bdev, dev = (dev_t)vp->v_rdev;
+ register int maj = major(dev);
+ int error;
+
+ /*
+ * Don't allow open if fs is mounted -nodev.
+ */
+ if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV))
+ return (ENXIO);
+
+ switch (vp->v_type) {
+
+ case VCHR:
+ if ((u_int)maj >= nchrdev)
+ return (ENXIO);
+ if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) {
+ /*
+ * When running in very secure mode, do not allow
+ * opens for writing of any disk character devices.
+ */
+ if (securelevel >= 2 && isdisk(dev, VCHR))
+ return (EPERM);
+ /*
+ * When running in secure mode, do not allow opens
+ * for writing of /dev/mem, /dev/kmem, or character
+ * devices whose corresponding block devices are
+ * currently mounted.
+ */
+ if (securelevel >= 1) {
+ if ((bdev = chrtoblk(dev)) != NODEV &&
+ vfinddev(bdev, VBLK, &bvp) &&
+ bvp->v_usecount > 0 &&
+ (error = vfs_mountedon(bvp)))
+ return (error);
+ if (iskmemdev(dev))
+ return (EPERM);
+ }
+ }
+ VOP_UNLOCK(vp);
+ error = (*cdevsw[maj].d_open)(dev, ap->a_mode, S_IFCHR, ap->a_p);
+ VOP_LOCK(vp);
+ return (error);
+
+ case VBLK:
+ if ((u_int)maj >= nblkdev)
+ return (ENXIO);
+ /*
+ * When running in very secure mode, do not allow
+ * opens for writing of any disk block devices.
+ */
+ if (securelevel >= 2 && ap->a_cred != FSCRED &&
+ (ap->a_mode & FWRITE) && isdisk(dev, VBLK))
+ return (EPERM);
+ /*
+ * Do not allow opens of block devices that are
+ * currently mounted.
+ */
+ if (error = vfs_mountedon(vp))
+ return (error);
+ return ((*bdevsw[maj].d_open)(dev, ap->a_mode, S_IFBLK, ap->a_p));
+ }
+ return (0);
+}
+
+/*
+ * Vnode op for read
+ */
+/* ARGSUSED */
+spec_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct uio *uio = ap->a_uio;
+ struct proc *p = uio->uio_procp;
+ struct buf *bp;
+ daddr_t bn, nextbn;
+ long bsize, bscale;
+ struct partinfo dpart;
+ int n, on, majordev, (*ioctl)();
+ int error = 0;
+ dev_t dev;
+
+#ifdef DIAGNOSTIC
+ if (uio->uio_rw != UIO_READ)
+ panic("spec_read mode");
+ if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
+ panic("spec_read proc");
+#endif
+ if (uio->uio_resid == 0)
+ return (0);
+
+ switch (vp->v_type) {
+
+ case VCHR:
+ VOP_UNLOCK(vp);
+ error = (*cdevsw[major(vp->v_rdev)].d_read)
+ (vp->v_rdev, uio, ap->a_ioflag);
+ VOP_LOCK(vp);
+ return (error);
+
+ case VBLK:
+ if (uio->uio_offset < 0)
+ return (EINVAL);
+ bsize = BLKDEV_IOSIZE;
+ dev = vp->v_rdev;
+ if ((majordev = major(dev)) < nblkdev &&
+ (ioctl = bdevsw[majordev].d_ioctl) != NULL &&
+ (*ioctl)(dev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0 &&
+ dpart.part->p_fstype == FS_BSDFFS &&
+ dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
+ bsize = dpart.part->p_frag * dpart.part->p_fsize;
+ bscale = bsize / DEV_BSIZE;
+ do {
+ bn = (uio->uio_offset / DEV_BSIZE) &~ (bscale - 1);
+ on = uio->uio_offset % bsize;
+ n = min((unsigned)(bsize - on), uio->uio_resid);
+ if (vp->v_lastr + bscale == bn) {
+ nextbn = bn + bscale;
+ error = breadn(vp, bn, (int)bsize, &nextbn,
+ (int *)&bsize, 1, NOCRED, &bp);
+ } else
+ error = bread(vp, bn, (int)bsize, NOCRED, &bp);
+ vp->v_lastr = bn;
+ n = min(n, bsize - bp->b_resid);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ error = uiomove((char *)bp->b_data + on, n, uio);
+ if (n + on == bsize)
+ bp->b_flags |= B_AGE;
+ brelse(bp);
+ } while (error == 0 && uio->uio_resid > 0 && n != 0);
+ return (error);
+
+ default:
+ panic("spec_read type");
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Vnode op for write
+ */
+/* ARGSUSED */
+spec_write(ap)
+ struct vop_write_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct uio *uio = ap->a_uio;
+ struct proc *p = uio->uio_procp;
+ struct buf *bp;
+ daddr_t bn;
+ int bsize, blkmask;
+ struct partinfo dpart;
+ register int n, on;
+ int error = 0;
+
+#ifdef DIAGNOSTIC
+ if (uio->uio_rw != UIO_WRITE)
+ panic("spec_write mode");
+ if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
+ panic("spec_write proc");
+#endif
+
+ switch (vp->v_type) {
+
+ case VCHR:
+ VOP_UNLOCK(vp);
+ error = (*cdevsw[major(vp->v_rdev)].d_write)
+ (vp->v_rdev, uio, ap->a_ioflag);
+ VOP_LOCK(vp);
+ return (error);
+
+ case VBLK:
+ if (uio->uio_resid == 0)
+ return (0);
+ if (uio->uio_offset < 0)
+ return (EINVAL);
+ bsize = BLKDEV_IOSIZE;
+ if ((*bdevsw[major(vp->v_rdev)].d_ioctl)(vp->v_rdev, DIOCGPART,
+ (caddr_t)&dpart, FREAD, p) == 0) {
+ if (dpart.part->p_fstype == FS_BSDFFS &&
+ dpart.part->p_frag != 0 && dpart.part->p_fsize != 0)
+ bsize = dpart.part->p_frag *
+ dpart.part->p_fsize;
+ }
+ blkmask = (bsize / DEV_BSIZE) - 1;
+ do {
+ bn = (uio->uio_offset / DEV_BSIZE) &~ blkmask;
+ on = uio->uio_offset % bsize;
+ n = min((unsigned)(bsize - on), uio->uio_resid);
+ if (n == bsize)
+ bp = getblk(vp, bn, bsize, 0, 0);
+ else
+ error = bread(vp, bn, bsize, NOCRED, &bp);
+ n = min(n, bsize - bp->b_resid);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ error = uiomove((char *)bp->b_data + on, n, uio);
+ if (n + on == bsize) {
+ bp->b_flags |= B_AGE;
+ bawrite(bp);
+ } else
+ bdwrite(bp);
+ } while (error == 0 && uio->uio_resid > 0 && n != 0);
+ return (error);
+
+ default:
+ panic("spec_write type");
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Device ioctl operation.
+ */
+/* ARGSUSED */
+spec_ioctl(ap)
+ struct vop_ioctl_args /* {
+ struct vnode *a_vp;
+ int a_command;
+ caddr_t a_data;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ dev_t dev = ap->a_vp->v_rdev;
+
+ switch (ap->a_vp->v_type) {
+
+ case VCHR:
+ return ((*cdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data,
+ ap->a_fflag, ap->a_p));
+
+ case VBLK:
+ if (ap->a_command == 0 && (int)ap->a_data == B_TAPE)
+ if (bdevsw[major(dev)].d_flags & B_TAPE)
+ return (0);
+ else
+ return (1);
+ return ((*bdevsw[major(dev)].d_ioctl)(dev, ap->a_command, ap->a_data,
+ ap->a_fflag, ap->a_p));
+
+ default:
+ panic("spec_ioctl");
+ /* NOTREACHED */
+ }
+}
+
+/* ARGSUSED */
+spec_select(ap)
+ struct vop_select_args /* {
+ struct vnode *a_vp;
+ int a_which;
+ int a_fflags;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register dev_t dev;
+
+ switch (ap->a_vp->v_type) {
+
+ default:
+ return (1); /* XXX */
+
+ case VCHR:
+ dev = ap->a_vp->v_rdev;
+ return (*cdevsw[major(dev)].d_select)(dev, ap->a_which, ap->a_p);
+ }
+}
+/*
+ * Synch buffers associated with a block device
+ */
+/* ARGSUSED */
+int
+spec_fsync(ap)
+ struct vop_fsync_args /* {
+ struct vnode *a_vp;
+ struct ucred *a_cred;
+ int a_waitfor;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct buf *bp;
+ struct buf *nbp;
+ int s;
+
+ if (vp->v_type == VCHR)
+ return (0);
+ /*
+ * Flush all dirty buffers associated with a block device.
+ */
+loop:
+ s = splbio();
+ for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
+ nbp = bp->b_vnbufs.le_next;
+ if ((bp->b_flags & B_BUSY))
+ continue;
+ if ((bp->b_flags & B_DELWRI) == 0)
+ panic("spec_fsync: not dirty");
+ bremfree(bp);
+ bp->b_flags |= B_BUSY;
+ splx(s);
+ bawrite(bp);
+ goto loop;
+ }
+ if (ap->a_waitfor == MNT_WAIT) {
+ while (vp->v_numoutput) {
+ vp->v_flag |= VBWAIT;
+ sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1);
+ }
+#ifdef DIAGNOSTIC
+ if (vp->v_dirtyblkhd.lh_first) {
+ vprint("spec_fsync: dirty", vp);
+ goto loop;
+ }
+#endif
+ }
+ splx(s);
+ return (0);
+}
+
+/*
+ * Just call the device strategy routine
+ */
+spec_strategy(ap)
+ struct vop_strategy_args /* {
+ struct buf *a_bp;
+ } */ *ap;
+{
+
+ (*bdevsw[major(ap->a_bp->b_dev)].d_strategy)(ap->a_bp);
+ return (0);
+}
+
+/*
+ * This is a noop, simply returning what one has been given.
+ */
+spec_bmap(ap)
+ struct vop_bmap_args /* {
+ struct vnode *a_vp;
+ daddr_t a_bn;
+ struct vnode **a_vpp;
+ daddr_t *a_bnp;
+ } */ *ap;
+{
+
+ if (ap->a_vpp != NULL)
+ *ap->a_vpp = ap->a_vp;
+ if (ap->a_bnp != NULL)
+ *ap->a_bnp = ap->a_bn;
+ return (0);
+}
+
+/*
+ * At the moment we do not do any locking.
+ */
+/* ARGSUSED */
+spec_lock(ap)
+ struct vop_lock_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ return (0);
+}
+
+/* ARGSUSED */
+spec_unlock(ap)
+ struct vop_unlock_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ return (0);
+}
+
+/*
+ * Device close routine
+ */
+/* ARGSUSED */
+spec_close(ap)
+ struct vop_close_args /* {
+ struct vnode *a_vp;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ dev_t dev = vp->v_rdev;
+ int (*devclose) __P((dev_t, int, int, struct proc *));
+ int mode, error;
+
+ switch (vp->v_type) {
+
+ case VCHR:
+ /*
+ * Hack: a tty device that is a controlling terminal
+ * has a reference from the session structure.
+ * We cannot easily tell that a character device is
+ * a controlling terminal, unless it is the closing
+ * process' controlling terminal. In that case,
+ * if the reference count is 2 (this last descriptor
+ * plus the session), release the reference from the session.
+ */
+ if (vcount(vp) == 2 && ap->a_p &&
+ vp == ap->a_p->p_session->s_ttyvp) {
+ vrele(vp);
+ ap->a_p->p_session->s_ttyvp = NULL;
+ }
+ /*
+ * If the vnode is locked, then we are in the midst
+ * of forcably closing the device, otherwise we only
+ * close on last reference.
+ */
+ if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0)
+ return (0);
+ devclose = cdevsw[major(dev)].d_close;
+ mode = S_IFCHR;
+ break;
+
+ case VBLK:
+ /*
+ * On last close of a block device (that isn't mounted)
+ * we must invalidate any in core blocks, so that
+ * we can, for instance, change floppy disks.
+ */
+ if (error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0))
+ return (error);
+ /*
+ * We do not want to really close the device if it
+ * is still in use unless we are trying to close it
+ * forcibly. Since every use (buffer, vnode, swap, cmap)
+ * holds a reference to the vnode, and because we mark
+ * any other vnodes that alias this device, when the
+ * sum of the reference counts on all the aliased
+ * vnodes descends to one, we are on last close.
+ */
+ if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0)
+ return (0);
+ devclose = bdevsw[major(dev)].d_close;
+ mode = S_IFBLK;
+ break;
+
+ default:
+ panic("spec_close: not special");
+ }
+
+ return ((*devclose)(dev, ap->a_fflag, mode, ap->a_p));
+}
+
+/*
+ * Print out the contents of a special device vnode.
+ */
+spec_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev),
+ minor(ap->a_vp->v_rdev));
+}
+
+/*
+ * Return POSIX pathconf information applicable to special devices.
+ */
+spec_pathconf(ap)
+ struct vop_pathconf_args /* {
+ struct vnode *a_vp;
+ int a_name;
+ int *a_retval;
+ } */ *ap;
+{
+
+ switch (ap->a_name) {
+ case _PC_LINK_MAX:
+ *ap->a_retval = LINK_MAX;
+ return (0);
+ case _PC_MAX_CANON:
+ *ap->a_retval = MAX_CANON;
+ return (0);
+ case _PC_MAX_INPUT:
+ *ap->a_retval = MAX_INPUT;
+ return (0);
+ case _PC_PIPE_BUF:
+ *ap->a_retval = PIPE_BUF;
+ return (0);
+ case _PC_CHOWN_RESTRICTED:
+ *ap->a_retval = 1;
+ return (0);
+ case _PC_VDISABLE:
+ *ap->a_retval = _POSIX_VDISABLE;
+ return (0);
+ default:
+ return (EINVAL);
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Special device advisory byte-level locks.
+ */
+/* ARGSUSED */
+spec_advlock(ap)
+ struct vop_advlock_args /* {
+ struct vnode *a_vp;
+ caddr_t a_id;
+ int a_op;
+ struct flock *a_fl;
+ int a_flags;
+ } */ *ap;
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * Special device failed operation
+ */
+spec_ebadf()
+{
+
+ return (EBADF);
+}
+
+/*
+ * Special device bad operation
+ */
+spec_badop()
+{
+
+ panic("spec_badop called");
+ /* NOTREACHED */
+}
diff --git a/sys/fs/umapfs/umap.h b/sys/fs/umapfs/umap.h
new file mode 100644
index 0000000..9f4d1e7
--- /dev/null
+++ b/sys/fs/umapfs/umap.h
@@ -0,0 +1,92 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * the UCLA Ficus project.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)umap.h 8.3 (Berkeley) 1/21/94
+ *
+ * @(#)null_vnops.c 1.5 (Berkeley) 7/10/92
+ */
+
+#define MAPFILEENTRIES 64
+#define GMAPFILEENTRIES 16
+#define NOBODY 32767
+#define NULLGROUP 65534
+
+struct umap_args {
+ char *target; /* Target of loopback */
+ int nentries; /* # of entries in user map array */
+ int gnentries; /* # of entries in group map array */
+ u_long (*mapdata)[2]; /* pointer to array of user mappings */
+ u_long (*gmapdata)[2]; /* pointer to array of group mappings */
+};
+
+struct umap_mount {
+ struct mount *umapm_vfs;
+ struct vnode *umapm_rootvp; /* Reference to root umap_node */
+ int info_nentries; /* number of uid mappings */
+ int info_gnentries; /* number of gid mappings */
+ u_long info_mapdata[MAPFILEENTRIES][2]; /* mapping data for
+ user mapping in ficus */
+ u_long info_gmapdata[GMAPFILEENTRIES][2]; /*mapping data for
+ group mapping in ficus */
+};
+
+#ifdef KERNEL
+/*
+ * A cache of vnode references
+ */
+struct umap_node {
+ struct umap_node *umap_forw; /* Hash chain */
+ struct umap_node *umap_back;
+ struct vnode *umap_lowervp; /* Aliased vnode - VREFed once */
+ struct vnode *umap_vnode; /* Back pointer to vnode/umap_node */
+};
+
+extern int umap_node_create __P((struct mount *mp, struct vnode *target, struct vnode **vpp));
+extern u_long umap_reverse_findid __P((u_long id, u_long map[][2], int nentries));
+extern void umap_mapids __P((struct mount *v_mount, struct ucred *credp));
+
+#define MOUNTTOUMAPMOUNT(mp) ((struct umap_mount *)((mp)->mnt_data))
+#define VTOUMAP(vp) ((struct umap_node *)(vp)->v_data)
+#define UMAPTOV(xp) ((xp)->umap_vnode)
+#ifdef UMAPFS_DIAGNOSTIC
+extern struct vnode *umap_checkvp __P((struct vnode *vp, char *fil, int lno));
+#define UMAPVPTOLOWERVP(vp) umap_checkvp((vp), __FILE__, __LINE__)
+#else
+#define UMAPVPTOLOWERVP(vp) (VTOUMAP(vp)->umap_lowervp)
+#endif
+
+extern int (**umap_vnodeop_p)();
+extern struct vfsops umap_vfsops;
+#endif /* KERNEL */
diff --git a/sys/fs/umapfs/umap_subr.c b/sys/fs/umapfs/umap_subr.c
new file mode 100644
index 0000000..6f1f077
--- /dev/null
+++ b/sys/fs/umapfs/umap_subr.c
@@ -0,0 +1,397 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)umap_subr.c 8.6 (Berkeley) 1/26/94
+ *
+ * $Id: lofs_subr.c, v 1.11 1992/05/30 10:05:43 jsp Exp jsp $
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <miscfs/umapfs/umap.h>
+
+#define LOG2_SIZEVNODE 7 /* log2(sizeof struct vnode) */
+#define NUMAPNODECACHE 16
+#define UMAP_NHASH(vp) ((((u_long) vp)>>LOG2_SIZEVNODE) & (NUMAPNODECACHE-1))
+
+/*
+ * Null layer cache:
+ * Each cache entry holds a reference to the target vnode
+ * along with a pointer to the alias vnode. When an
+ * entry is added the target vnode is VREF'd. When the
+ * alias is removed the target vnode is vrele'd.
+ */
+
+/*
+ * Cache head
+ */
+struct umap_node_cache {
+ struct umap_node *ac_forw;
+ struct umap_node *ac_back;
+};
+
+static struct umap_node_cache umap_node_cache[NUMAPNODECACHE];
+
+/*
+ * Initialise cache headers
+ */
+umapfs_init()
+{
+ struct umap_node_cache *ac;
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("umapfs_init\n"); /* printed during system boot */
+#endif
+
+ for (ac = umap_node_cache; ac < umap_node_cache + NUMAPNODECACHE; ac++)
+ ac->ac_forw = ac->ac_back = (struct umap_node *) ac;
+}
+
+/*
+ * Compute hash list for given target vnode
+ */
+static struct umap_node_cache *
+umap_node_hash(targetvp)
+ struct vnode *targetvp;
+{
+
+ return (&umap_node_cache[UMAP_NHASH(targetvp)]);
+}
+
+/*
+ * umap_findid is called by various routines in umap_vnodeops.c to
+ * find a user or group id in a map.
+ */
+static u_long
+umap_findid(id, map, nentries)
+ u_long id;
+ u_long map[][2];
+ int nentries;
+{
+ int i;
+
+ /* Find uid entry in map */
+ i = 0;
+ while ((i<nentries) && ((map[i][0]) != id))
+ i++;
+
+ if (i < nentries)
+ return (map[i][1]);
+ else
+ return (-1);
+
+}
+
+/*
+ * umap_reverse_findid is called by umap_getattr() in umap_vnodeops.c to
+ * find a user or group id in a map, in reverse.
+ */
+u_long
+umap_reverse_findid(id, map, nentries)
+ u_long id;
+ u_long map[][2];
+ int nentries;
+{
+ int i;
+
+ /* Find uid entry in map */
+ i = 0;
+ while ((i<nentries) && ((map[i][1]) != id))
+ i++;
+
+ if (i < nentries)
+ return (map[i][0]);
+ else
+ return (-1);
+
+}
+
+/*
+ * Return alias for target vnode if already exists, else 0.
+ */
+static struct vnode *
+umap_node_find(mp, targetvp)
+ struct mount *mp;
+ struct vnode *targetvp;
+{
+ struct umap_node_cache *hd;
+ struct umap_node *a;
+ struct vnode *vp;
+
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("umap_node_find(mp = %x, target = %x)\n", mp, targetvp);
+#endif
+
+ /*
+ * Find hash base, and then search the (two-way) linked
+ * list looking for a umap_node structure which is referencing
+ * the target vnode. If found, the increment the umap_node
+ * reference count (but NOT the target vnode's VREF counter).
+ */
+ hd = umap_node_hash(targetvp);
+
+ loop:
+ for (a = hd->ac_forw; a != (struct umap_node *) hd; a = a->umap_forw) {
+ if (a->umap_lowervp == targetvp &&
+ a->umap_vnode->v_mount == mp) {
+ vp = UMAPTOV(a);
+ /*
+ * We need vget for the VXLOCK
+ * stuff, but we don't want to lock
+ * the lower node.
+ */
+ if (vget(vp, 0)) {
+#ifdef UMAPFS_DIAGNOSTIC
+ printf ("umap_node_find: vget failed.\n");
+#endif
+ goto loop;
+ }
+ return (vp);
+ }
+ }
+
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("umap_node_find(%x, %x): NOT found\n", mp, targetvp);
+#endif
+
+ return (0);
+}
+
+/*
+ * Make a new umap_node node.
+ * Vp is the alias vnode, lofsvp is the target vnode.
+ * Maintain a reference to (targetvp).
+ */
+static int
+umap_node_alloc(mp, lowervp, vpp)
+ struct mount *mp;
+ struct vnode *lowervp;
+ struct vnode **vpp;
+{
+ struct umap_node_cache *hd;
+ struct umap_node *xp;
+ struct vnode *othervp, *vp;
+ int error;
+
+ if (error = getnewvnode(VT_UMAP, mp, umap_vnodeop_p, vpp))
+ return (error);
+ vp = *vpp;
+
+ MALLOC(xp, struct umap_node *, sizeof(struct umap_node),
+ M_TEMP, M_WAITOK);
+ vp->v_type = lowervp->v_type;
+ xp->umap_vnode = vp;
+ vp->v_data = xp;
+ xp->umap_lowervp = lowervp;
+ /*
+ * Before we insert our new node onto the hash chains,
+ * check to see if someone else has beaten us to it.
+ * (We could have slept in MALLOC.)
+ */
+ if (othervp = umap_node_find(lowervp)) {
+ FREE(xp, M_TEMP);
+ vp->v_type = VBAD; /* node is discarded */
+ vp->v_usecount = 0; /* XXX */
+ *vpp = othervp;
+ return (0);
+ }
+ VREF(lowervp); /* Extra VREF will be vrele'd in umap_node_create */
+ hd = umap_node_hash(lowervp);
+ insque(xp, hd);
+ return (0);
+}
+
+
+/*
+ * Try to find an existing umap_node vnode refering
+ * to it, otherwise make a new umap_node vnode which
+ * contains a reference to the target vnode.
+ */
+int
+umap_node_create(mp, targetvp, newvpp)
+ struct mount *mp;
+ struct vnode *targetvp;
+ struct vnode **newvpp;
+{
+ struct vnode *aliasvp;
+
+ if (aliasvp = umap_node_find(mp, targetvp)) {
+ /*
+ * Take another reference to the alias vnode
+ */
+#ifdef UMAPFS_DIAGNOSTIC
+ vprint("umap_node_create: exists", ap->umap_vnode);
+#endif
+ /* VREF(aliasvp); */
+ } else {
+ int error;
+
+ /*
+ * Get new vnode.
+ */
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("umap_node_create: create new alias vnode\n");
+#endif
+ /*
+ * Make new vnode reference the umap_node.
+ */
+ if (error = umap_node_alloc(mp, targetvp, &aliasvp))
+ return (error);
+
+ /*
+ * aliasvp is already VREF'd by getnewvnode()
+ */
+ }
+
+ vrele(targetvp);
+
+#ifdef UMAPFS_DIAGNOSTIC
+ vprint("umap_node_create: alias", aliasvp);
+ vprint("umap_node_create: target", targetvp);
+#endif
+
+ *newvpp = aliasvp;
+ return (0);
+}
+
+#ifdef UMAPFS_DIAGNOSTIC
+int umap_checkvp_barrier = 1;
+struct vnode *
+umap_checkvp(vp, fil, lno)
+ struct vnode *vp;
+ char *fil;
+ int lno;
+{
+ struct umap_node *a = VTOUMAP(vp);
+#if 0
+ /*
+ * Can't do this check because vop_reclaim runs
+ * with funny vop vector.
+ */
+ if (vp->v_op != umap_vnodeop_p) {
+ printf ("umap_checkvp: on non-umap-node\n");
+ while (umap_checkvp_barrier) /*WAIT*/ ;
+ panic("umap_checkvp");
+ }
+#endif
+ if (a->umap_lowervp == NULL) {
+ /* Should never happen */
+ int i; u_long *p;
+ printf("vp = %x, ZERO ptr\n", vp);
+ for (p = (u_long *) a, i = 0; i < 8; i++)
+ printf(" %x", p[i]);
+ printf("\n");
+ /* wait for debugger */
+ while (umap_checkvp_barrier) /*WAIT*/ ;
+ panic("umap_checkvp");
+ }
+ if (a->umap_lowervp->v_usecount < 1) {
+ int i; u_long *p;
+ printf("vp = %x, unref'ed lowervp\n", vp);
+ for (p = (u_long *) a, i = 0; i < 8; i++)
+ printf(" %x", p[i]);
+ printf("\n");
+ /* wait for debugger */
+ while (umap_checkvp_barrier) /*WAIT*/ ;
+ panic ("umap with unref'ed lowervp");
+ }
+#if 0
+ printf("umap %x/%d -> %x/%d [%s, %d]\n",
+ a->umap_vnode, a->umap_vnode->v_usecount,
+ a->umap_lowervp, a->umap_lowervp->v_usecount,
+ fil, lno);
+#endif
+ return (a->umap_lowervp);
+}
+#endif
+
+/* umap_mapids maps all of the ids in a credential, both user and group. */
+
+void
+umap_mapids(v_mount, credp)
+ struct mount *v_mount;
+ struct ucred *credp;
+{
+ int i, unentries, gnentries;
+ u_long *groupmap, *usermap;
+ uid_t uid;
+ gid_t gid;
+
+ unentries = MOUNTTOUMAPMOUNT(v_mount)->info_nentries;
+ usermap = &(MOUNTTOUMAPMOUNT(v_mount)->info_mapdata[0][0]);
+ gnentries = MOUNTTOUMAPMOUNT(v_mount)->info_gnentries;
+ groupmap = &(MOUNTTOUMAPMOUNT(v_mount)->info_gmapdata[0][0]);
+
+ /* Find uid entry in map */
+
+ uid = (uid_t) umap_findid(credp->cr_uid, usermap, unentries);
+
+ if (uid != -1)
+ credp->cr_uid = uid;
+ else
+ credp->cr_uid = (uid_t) NOBODY;
+
+#ifdef notdef
+ /* cr_gid is the same as cr_groups[0] in 4BSD */
+
+ /* Find gid entry in map */
+
+ gid = (gid_t) umap_findid(credp->cr_gid, groupmap, gnentries);
+
+ if (gid != -1)
+ credp->cr_gid = gid;
+ else
+ credp->cr_gid = NULLGROUP;
+#endif
+
+ /* Now we must map each of the set of groups in the cr_groups
+ structure. */
+
+ i = 0;
+ while (credp->cr_groups[i] != 0) {
+ gid = (gid_t) umap_findid(credp->cr_groups[i],
+ groupmap, gnentries);
+
+ if (gid != -1)
+ credp->cr_groups[i++] = gid;
+ else
+ credp->cr_groups[i++] = NULLGROUP;
+ }
+}
diff --git a/sys/fs/umapfs/umap_vfsops.c b/sys/fs/umapfs/umap_vfsops.c
new file mode 100644
index 0000000..2480a85
--- /dev/null
+++ b/sys/fs/umapfs/umap_vfsops.c
@@ -0,0 +1,407 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * the UCLA Ficus project.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)umap_vfsops.c 8.3 (Berkeley) 1/21/94
+ *
+ * @(#)null_vfsops.c 1.5 (Berkeley) 7/10/92
+ */
+
+/*
+ * Umap Layer
+ * (See mount_umap(8) for a description of this layer.)
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <miscfs/umapfs/umap.h>
+
+/*
+ * Mount umap layer
+ */
+int
+umapfs_mount(mp, path, data, ndp, p)
+ struct mount *mp;
+ char *path;
+ caddr_t data;
+ struct nameidata *ndp;
+ struct proc *p;
+{
+ struct umap_args args;
+ struct vnode *lowerrootvp, *vp;
+ struct vnode *umapm_rootvp;
+ struct umap_mount *amp;
+ u_int size;
+ int error;
+
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("umapfs_mount(mp = %x)\n", mp);
+#endif
+
+ /*
+ * Update is a no-op
+ */
+ if (mp->mnt_flag & MNT_UPDATE) {
+ return (EOPNOTSUPP);
+ /* return (VFS_MOUNT(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, path, data, ndp, p));*/
+ }
+
+ /*
+ * Get argument
+ */
+ if (error = copyin(data, (caddr_t)&args, sizeof(struct umap_args)))
+ return (error);
+
+ /*
+ * Find lower node
+ */
+ NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT|LOCKLEAF,
+ UIO_USERSPACE, args.target, p);
+ if (error = namei(ndp))
+ return (error);
+
+ /*
+ * Sanity check on lower vnode
+ */
+ lowerrootvp = ndp->ni_vp;
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("vp = %x, check for VDIR...\n", lowerrootvp);
+#endif
+ vrele(ndp->ni_dvp);
+ ndp->ni_dvp = 0;
+
+ if (lowerrootvp->v_type != VDIR) {
+ vput(lowerrootvp);
+ return (EINVAL);
+ }
+
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("mp = %x\n", mp);
+#endif
+
+ amp = (struct umap_mount *) malloc(sizeof(struct umap_mount),
+ M_UFSMNT, M_WAITOK); /* XXX */
+
+ /*
+ * Save reference to underlying FS
+ */
+ amp->umapm_vfs = lowerrootvp->v_mount;
+
+ /*
+ * Now copy in the number of entries and maps for umap mapping.
+ */
+ amp->info_nentries = args.nentries;
+ amp->info_gnentries = args.gnentries;
+ error = copyin(args.mapdata, (caddr_t)amp->info_mapdata,
+ 2*sizeof(u_long)*args.nentries);
+ if (error)
+ return (error);
+
+#ifdef UMAP_DIAGNOSTIC
+ printf("umap_mount:nentries %d\n",args.nentries);
+ for (i = 0; i < args.nentries; i++)
+ printf(" %d maps to %d\n", amp->info_mapdata[i][0],
+ amp->info_mapdata[i][1]);
+#endif
+
+ error = copyin(args.gmapdata, (caddr_t)amp->info_gmapdata,
+ 2*sizeof(u_long)*args.nentries);
+ if (error)
+ return (error);
+
+#ifdef UMAP_DIAGNOSTIC
+ printf("umap_mount:gnentries %d\n",args.gnentries);
+ for (i = 0; i < args.gnentries; i++)
+ printf(" group %d maps to %d\n",
+ amp->info_gmapdata[i][0],
+ amp->info_gmapdata[i][1]);
+#endif
+
+
+ /*
+ * Save reference. Each mount also holds
+ * a reference on the root vnode.
+ */
+ error = umap_node_create(mp, lowerrootvp, &vp);
+ /*
+ * Unlock the node (either the lower or the alias)
+ */
+ VOP_UNLOCK(vp);
+ /*
+ * Make sure the node alias worked
+ */
+ if (error) {
+ vrele(lowerrootvp);
+ free(amp, M_UFSMNT); /* XXX */
+ return (error);
+ }
+
+ /*
+ * Keep a held reference to the root vnode.
+ * It is vrele'd in umapfs_unmount.
+ */
+ umapm_rootvp = vp;
+ umapm_rootvp->v_flag |= VROOT;
+ amp->umapm_rootvp = umapm_rootvp;
+ if (UMAPVPTOLOWERVP(umapm_rootvp)->v_mount->mnt_flag & MNT_LOCAL)
+ mp->mnt_flag |= MNT_LOCAL;
+ mp->mnt_data = (qaddr_t) amp;
+ getnewfsid(mp, MOUNT_LOFS);
+
+ (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+ bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+ (void) copyinstr(args.target, mp->mnt_stat.f_mntfromname, MNAMELEN - 1,
+ &size);
+ bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size);
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("umapfs_mount: lower %s, alias at %s\n",
+ mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
+#endif
+ return (0);
+}
+
+/*
+ * VFS start. Nothing needed here - the start routine
+ * on the underlying filesystem will have been called
+ * when that filesystem was mounted.
+ */
+int
+umapfs_start(mp, flags, p)
+ struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+ return (0);
+ /* return (VFS_START(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, flags, p)); */
+}
+
+/*
+ * Free reference to umap layer
+ */
+int
+umapfs_unmount(mp, mntflags, p)
+ struct mount *mp;
+ int mntflags;
+ struct proc *p;
+{
+ struct vnode *umapm_rootvp = MOUNTTOUMAPMOUNT(mp)->umapm_rootvp;
+ int error;
+ int flags = 0;
+ extern int doforce;
+
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("umapfs_unmount(mp = %x)\n", mp);
+#endif
+
+ if (mntflags & MNT_FORCE) {
+ /* lofs can never be rootfs so don't check for it */
+ if (!doforce)
+ return (EINVAL);
+ flags |= FORCECLOSE;
+ }
+
+ /*
+ * Clear out buffer cache. I don't think we
+ * ever get anything cached at this level at the
+ * moment, but who knows...
+ */
+#ifdef notyet
+ mntflushbuf(mp, 0);
+ if (mntinvalbuf(mp, 1))
+ return (EBUSY);
+#endif
+ if (umapm_rootvp->v_usecount > 1)
+ return (EBUSY);
+ if (error = vflush(mp, umapm_rootvp, flags))
+ return (error);
+
+#ifdef UMAPFS_DIAGNOSTIC
+ vprint("alias root of lower", umapm_rootvp);
+#endif
+ /*
+ * Release reference on underlying root vnode
+ */
+ vrele(umapm_rootvp);
+ /*
+ * And blow it away for future re-use
+ */
+ vgone(umapm_rootvp);
+ /*
+ * Finally, throw away the umap_mount structure
+ */
+ free(mp->mnt_data, M_UFSMNT); /* XXX */
+ mp->mnt_data = 0;
+ return (0);
+}
+
+int
+umapfs_root(mp, vpp)
+ struct mount *mp;
+ struct vnode **vpp;
+{
+ struct vnode *vp;
+
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("umapfs_root(mp = %x, vp = %x->%x)\n", mp,
+ MOUNTTOUMAPMOUNT(mp)->umapm_rootvp,
+ UMAPVPTOLOWERVP(MOUNTTOUMAPMOUNT(mp)->umapm_rootvp)
+ );
+#endif
+
+ /*
+ * Return locked reference to root.
+ */
+ vp = MOUNTTOUMAPMOUNT(mp)->umapm_rootvp;
+ VREF(vp);
+ VOP_LOCK(vp);
+ *vpp = vp;
+ return (0);
+}
+
+int
+umapfs_quotactl(mp, cmd, uid, arg, p)
+ struct mount *mp;
+ int cmd;
+ uid_t uid;
+ caddr_t arg;
+ struct proc *p;
+{
+ return (VFS_QUOTACTL(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, cmd, uid, arg, p));
+}
+
+int
+umapfs_statfs(mp, sbp, p)
+ struct mount *mp;
+ struct statfs *sbp;
+ struct proc *p;
+{
+ int error;
+ struct statfs mstat;
+
+#ifdef UMAPFS_DIAGNOSTIC
+ printf("umapfs_statfs(mp = %x, vp = %x->%x)\n", mp,
+ MOUNTTOUMAPMOUNT(mp)->umapm_rootvp,
+ UMAPVPTOLOWERVP(MOUNTTOUMAPMOUNT(mp)->umapm_rootvp)
+ );
+#endif
+
+ bzero(&mstat, sizeof(mstat));
+
+ error = VFS_STATFS(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, &mstat, p);
+ if (error)
+ return (error);
+
+ /* now copy across the "interesting" information and fake the rest */
+ sbp->f_type = mstat.f_type;
+ sbp->f_flags = mstat.f_flags;
+ sbp->f_bsize = mstat.f_bsize;
+ sbp->f_iosize = mstat.f_iosize;
+ sbp->f_blocks = mstat.f_blocks;
+ sbp->f_bfree = mstat.f_bfree;
+ sbp->f_bavail = mstat.f_bavail;
+ sbp->f_files = mstat.f_files;
+ sbp->f_ffree = mstat.f_ffree;
+ if (sbp != &mp->mnt_stat) {
+ bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+ bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+ bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+ }
+ return (0);
+}
+
+int
+umapfs_sync(mp, waitfor, cred, p)
+ struct mount *mp;
+ int waitfor;
+ struct ucred *cred;
+ struct proc *p;
+{
+ /*
+ * XXX - Assumes no data cached at umap layer.
+ */
+ return (0);
+}
+
+int
+umapfs_vget(mp, ino, vpp)
+ struct mount *mp;
+ ino_t ino;
+ struct vnode **vpp;
+{
+
+ return (VFS_VGET(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, ino, vpp));
+}
+
+int
+umapfs_fhtovp(mp, fidp, nam, vpp, exflagsp, credanonp)
+ struct mount *mp;
+ struct fid *fidp;
+ struct mbuf *nam;
+ struct vnode **vpp;
+ int *exflagsp;
+ struct ucred**credanonp;
+{
+
+ return (VFS_FHTOVP(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, fidp, nam, vpp, exflagsp,credanonp));
+}
+
+int
+umapfs_vptofh(vp, fhp)
+ struct vnode *vp;
+ struct fid *fhp;
+{
+ return (VFS_VPTOFH(UMAPVPTOLOWERVP(vp), fhp));
+}
+
+int umapfs_init __P((void));
+
+struct vfsops umap_vfsops = {
+ umapfs_mount,
+ umapfs_start,
+ umapfs_unmount,
+ umapfs_root,
+ umapfs_quotactl,
+ umapfs_statfs,
+ umapfs_sync,
+ umapfs_vget,
+ umapfs_fhtovp,
+ umapfs_vptofh,
+ umapfs_init,
+};
diff --git a/sys/fs/umapfs/umap_vnops.c b/sys/fs/umapfs/umap_vnops.c
new file mode 100644
index 0000000..287804e
--- /dev/null
+++ b/sys/fs/umapfs/umap_vnops.c
@@ -0,0 +1,488 @@
+/*
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * the UCLA Ficus project.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)umap_vnops.c 8.3 (Berkeley) 1/5/94
+ */
+
+/*
+ * Umap Layer
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/buf.h>
+#include <miscfs/umapfs/umap.h>
+
+
+int umap_bug_bypass = 0; /* for debugging: enables bypass printf'ing */
+
+/*
+ * This is the 10-Apr-92 bypass routine.
+ * See null_vnops.c:null_bypass for more details.
+ */
+int
+umap_bypass(ap)
+ struct vop_generic_args /* {
+ struct vnodeop_desc *a_desc;
+ <other random data follows, presumably>
+ } */ *ap;
+{
+ extern int (**umap_vnodeop_p)(); /* not extern, really "forward" */
+ struct ucred **credpp = 0, *credp = 0;
+ struct ucred *savecredp, *savecompcredp = 0;
+ struct ucred *compcredp = 0;
+ struct vnode **this_vp_p;
+ int error;
+ struct vnode *old_vps[VDESC_MAX_VPS];
+ struct vnode *vp1 = 0;
+ struct vnode **vps_p[VDESC_MAX_VPS];
+ struct vnode ***vppp;
+ struct vnodeop_desc *descp = ap->a_desc;
+ int reles, i;
+ struct componentname **compnamepp = 0;
+
+ if (umap_bug_bypass)
+ printf ("umap_bypass: %s\n", descp->vdesc_name);
+
+#ifdef SAFETY
+ /*
+ * We require at least one vp.
+ */
+ if (descp->vdesc_vp_offsets == NULL ||
+ descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET)
+ panic ("umap_bypass: no vp's in map.\n");
+#endif
+
+ /*
+ * Map the vnodes going in.
+ * Later, we'll invoke the operation based on
+ * the first mapped vnode's operation vector.
+ */
+ reles = descp->vdesc_flags;
+ for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
+ if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
+ break; /* bail out at end of list */
+ vps_p[i] = this_vp_p =
+ VOPARG_OFFSETTO(struct vnode**, descp->vdesc_vp_offsets[i], ap);
+
+ if (i == 0) {
+ vp1 = *vps_p[0];
+ }
+
+ /*
+ * We're not guaranteed that any but the first vnode
+ * are of our type. Check for and don't map any
+ * that aren't. (Must map first vp or vclean fails.)
+ */
+
+ if (i && (*this_vp_p)->v_op != umap_vnodeop_p) {
+ old_vps[i] = NULL;
+ } else {
+ old_vps[i] = *this_vp_p;
+ *(vps_p[i]) = UMAPVPTOLOWERVP(*this_vp_p);
+ if (reles & 1)
+ VREF(*this_vp_p);
+ }
+
+ }
+
+ /*
+ * Fix the credentials. (That's the purpose of this layer.)
+ */
+
+ if (descp->vdesc_cred_offset != VDESC_NO_OFFSET) {
+
+ credpp = VOPARG_OFFSETTO(struct ucred**,
+ descp->vdesc_cred_offset, ap);
+
+ /* Save old values */
+
+ savecredp = (*credpp);
+ (*credpp) = crdup(savecredp);
+ credp = *credpp;
+
+ if (umap_bug_bypass && credp->cr_uid != 0)
+ printf("umap_bypass: user was %d, group %d\n",
+ credp->cr_uid, credp->cr_gid);
+
+ /* Map all ids in the credential structure. */
+
+ umap_mapids(vp1->v_mount, credp);
+
+ if (umap_bug_bypass && credp->cr_uid != 0)
+ printf("umap_bypass: user now %d, group %d\n",
+ credp->cr_uid, credp->cr_gid);
+ }
+
+ /* BSD often keeps a credential in the componentname structure
+ * for speed. If there is one, it better get mapped, too.
+ */
+
+ if (descp->vdesc_componentname_offset != VDESC_NO_OFFSET) {
+
+ compnamepp = VOPARG_OFFSETTO(struct componentname**,
+ descp->vdesc_componentname_offset, ap);
+
+ compcredp = (*compnamepp)->cn_cred;
+ savecompcredp = compcredp;
+ compcredp = (*compnamepp)->cn_cred = crdup(savecompcredp);
+
+ if (umap_bug_bypass && compcredp->cr_uid != 0)
+ printf("umap_bypass: component credit user was %d, group %d\n",
+ compcredp->cr_uid, compcredp->cr_gid);
+
+ /* Map all ids in the credential structure. */
+
+ umap_mapids(vp1->v_mount, compcredp);
+
+ if (umap_bug_bypass && compcredp->cr_uid != 0)
+ printf("umap_bypass: component credit user now %d, group %d\n",
+ compcredp->cr_uid, compcredp->cr_gid);
+ }
+
+ /*
+ * Call the operation on the lower layer
+ * with the modified argument structure.
+ */
+ error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap);
+
+ /*
+ * Maintain the illusion of call-by-value
+ * by restoring vnodes in the argument structure
+ * to their original value.
+ */
+ reles = descp->vdesc_flags;
+ for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) {
+ if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET)
+ break; /* bail out at end of list */
+ if (old_vps[i]) {
+ *(vps_p[i]) = old_vps[i];
+ if (reles & 1)
+ vrele(*(vps_p[i]));
+ };
+ };
+
+ /*
+ * Map the possible out-going vpp
+ * (Assumes that the lower layer always returns
+ * a VREF'ed vpp unless it gets an error.)
+ */
+ if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET &&
+ !(descp->vdesc_flags & VDESC_NOMAP_VPP) &&
+ !error) {
+ if (descp->vdesc_flags & VDESC_VPP_WILLRELE)
+ goto out;
+ vppp = VOPARG_OFFSETTO(struct vnode***,
+ descp->vdesc_vpp_offset, ap);
+ error = umap_node_create(old_vps[0]->v_mount, **vppp, *vppp);
+ };
+
+ out:
+ /*
+ * Free duplicate cred structure and restore old one.
+ */
+ if (descp->vdesc_cred_offset != VDESC_NO_OFFSET) {
+ if (umap_bug_bypass && credp && credp->cr_uid != 0)
+ printf("umap_bypass: returning-user was %d\n",
+ credp->cr_uid);
+
+ crfree(credp);
+ (*credpp) = savecredp;
+ if (umap_bug_bypass && credpp && (*credpp)->cr_uid != 0)
+ printf("umap_bypass: returning-user now %d\n\n",
+ (*credpp)->cr_uid);
+ }
+
+ if (descp->vdesc_componentname_offset != VDESC_NO_OFFSET) {
+ if (umap_bug_bypass && compcredp && compcredp->cr_uid != 0)
+ printf("umap_bypass: returning-component-user was %d\n",
+ compcredp->cr_uid);
+
+ crfree(compcredp);
+ (*compnamepp)->cn_cred = savecompcredp;
+ if (umap_bug_bypass && credpp && (*credpp)->cr_uid != 0)
+ printf("umap_bypass: returning-component-user now %d\n",
+ compcredp->cr_uid);
+ }
+
+ return (error);
+}
+
+
+/*
+ * We handle getattr to change the fsid.
+ */
+int
+umap_getattr(ap)
+ struct vop_getattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ short uid, gid;
+ int error, tmpid, nentries, gnentries;
+ u_long (*mapdata)[2], (*gmapdata)[2];
+ struct vnode **vp1p;
+ struct vnodeop_desc *descp = ap->a_desc;
+
+ if (error = umap_bypass(ap))
+ return (error);
+ /* Requires that arguments be restored. */
+ ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
+
+ /*
+ * Umap needs to map the uid and gid returned by a stat
+ * into the proper values for this site. This involves
+ * finding the returned uid in the mapping information,
+ * translating it into the uid on the other end,
+ * and filling in the proper field in the vattr
+ * structure pointed to by ap->a_vap. The group
+ * is easier, since currently all groups will be
+ * translate to the NULLGROUP.
+ */
+
+ /* Find entry in map */
+
+ uid = ap->a_vap->va_uid;
+ gid = ap->a_vap->va_gid;
+ if (umap_bug_bypass)
+ printf("umap_getattr: mapped uid = %d, mapped gid = %d\n", uid,
+ gid);
+
+ vp1p = VOPARG_OFFSETTO(struct vnode**, descp->vdesc_vp_offsets[0], ap);
+ nentries = MOUNTTOUMAPMOUNT((*vp1p)->v_mount)->info_nentries;
+ mapdata = (MOUNTTOUMAPMOUNT((*vp1p)->v_mount)->info_mapdata);
+ gnentries = MOUNTTOUMAPMOUNT((*vp1p)->v_mount)->info_gnentries;
+ gmapdata = (MOUNTTOUMAPMOUNT((*vp1p)->v_mount)->info_gmapdata);
+
+ /* Reverse map the uid for the vnode. Since it's a reverse
+ map, we can't use umap_mapids() to do it. */
+
+ tmpid = umap_reverse_findid(uid, mapdata, nentries);
+
+ if (tmpid != -1) {
+
+ ap->a_vap->va_uid = (uid_t) tmpid;
+ if (umap_bug_bypass)
+ printf("umap_getattr: original uid = %d\n", uid);
+ } else
+ ap->a_vap->va_uid = (uid_t) NOBODY;
+
+ /* Reverse map the gid for the vnode. */
+
+ tmpid = umap_reverse_findid(gid, gmapdata, gnentries);
+
+ if (tmpid != -1) {
+
+ ap->a_vap->va_gid = (gid_t) tmpid;
+ if (umap_bug_bypass)
+ printf("umap_getattr: original gid = %d\n", gid);
+ } else
+ ap->a_vap->va_gid = (gid_t) NULLGROUP;
+
+ return (0);
+}
+
+int
+umap_inactive(ap)
+ struct vop_inactive_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ /*
+ * Do nothing (and _don't_ bypass).
+ * Wait to vrele lowervp until reclaim,
+ * so that until then our umap_node is in the
+ * cache and reusable.
+ *
+ */
+ return (0);
+}
+
+int
+umap_reclaim(ap)
+ struct vop_reclaim_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct umap_node *xp = VTOUMAP(vp);
+ struct vnode *lowervp = xp->umap_lowervp;
+
+ /* After this assignment, this node will not be re-used. */
+ xp->umap_lowervp = NULL;
+ remque(xp);
+ FREE(vp->v_data, M_TEMP);
+ vp->v_data = NULL;
+ vrele(lowervp);
+ return (0);
+}
+
+int
+umap_strategy(ap)
+ struct vop_strategy_args /* {
+ struct buf *a_bp;
+ } */ *ap;
+{
+ struct buf *bp = ap->a_bp;
+ int error;
+ struct vnode *savedvp;
+
+ savedvp = bp->b_vp;
+ bp->b_vp = UMAPVPTOLOWERVP(bp->b_vp);
+
+ error = VOP_STRATEGY(ap->a_bp);
+
+ bp->b_vp = savedvp;
+
+ return (error);
+}
+
+int
+umap_bwrite(ap)
+ struct vop_bwrite_args /* {
+ struct buf *a_bp;
+ } */ *ap;
+{
+ struct buf *bp = ap->a_bp;
+ int error;
+ struct vnode *savedvp;
+
+ savedvp = bp->b_vp;
+ bp->b_vp = UMAPVPTOLOWERVP(bp->b_vp);
+
+ error = VOP_BWRITE(ap->a_bp);
+
+ bp->b_vp = savedvp;
+
+ return (error);
+}
+
+
+int
+umap_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ printf("\ttag VT_UMAPFS, vp=%x, lowervp=%x\n", vp, UMAPVPTOLOWERVP(vp));
+ return (0);
+}
+
+int
+umap_rename(ap)
+ struct vop_rename_args /* {
+ struct vnode *a_fdvp;
+ struct vnode *a_fvp;
+ struct componentname *a_fcnp;
+ struct vnode *a_tdvp;
+ struct vnode *a_tvp;
+ struct componentname *a_tcnp;
+ } */ *ap;
+{
+ int error;
+ struct componentname *compnamep;
+ struct ucred *compcredp, *savecompcredp;
+ struct vnode *vp;
+
+ /*
+ * Rename is irregular, having two componentname structures.
+ * We need to map the cre in the second structure,
+ * and then bypass takes care of the rest.
+ */
+
+ vp = ap->a_fdvp;
+ compnamep = ap->a_tcnp;
+ compcredp = compnamep->cn_cred;
+
+ savecompcredp = compcredp;
+ compcredp = compnamep->cn_cred = crdup(savecompcredp);
+
+ if (umap_bug_bypass && compcredp->cr_uid != 0)
+ printf("umap_rename: rename component credit user was %d, group %d\n",
+ compcredp->cr_uid, compcredp->cr_gid);
+
+ /* Map all ids in the credential structure. */
+
+ umap_mapids(vp->v_mount, compcredp);
+
+ if (umap_bug_bypass && compcredp->cr_uid != 0)
+ printf("umap_rename: rename component credit user now %d, group %d\n",
+ compcredp->cr_uid, compcredp->cr_gid);
+
+ error = umap_bypass(ap);
+
+ /* Restore the additional mapped componentname cred structure. */
+
+ crfree(compcredp);
+ compnamep->cn_cred = savecompcredp;
+
+ return error;
+}
+
+/*
+ * Global vfs data structures
+ */
+/*
+ * XXX - strategy, bwrite are hand coded currently. They should
+ * go away with a merged buffer/block cache.
+ *
+ */
+int (**umap_vnodeop_p)();
+struct vnodeopv_entry_desc umap_vnodeop_entries[] = {
+ { &vop_default_desc, umap_bypass },
+
+ { &vop_getattr_desc, umap_getattr },
+ { &vop_inactive_desc, umap_inactive },
+ { &vop_reclaim_desc, umap_reclaim },
+ { &vop_print_desc, umap_print },
+ { &vop_rename_desc, umap_rename },
+
+ { &vop_strategy_desc, umap_strategy },
+ { &vop_bwrite_desc, umap_bwrite },
+
+ { (struct vnodeop_desc*) NULL, (int(*)()) NULL }
+};
+struct vnodeopv_desc umap_vnodeop_opv_desc =
+ { &umap_vnodeop_p, umap_vnodeop_entries };
diff --git a/sys/fs/unionfs/union.h b/sys/fs/unionfs/union.h
new file mode 100644
index 0000000..463218a
--- /dev/null
+++ b/sys/fs/unionfs/union.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 1994 The Regents of the University of California.
+ * Copyright (c) 1994 Jan-Simon Pendry.
+ * All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)union.h 8.2 (Berkeley) 2/17/94
+ */
+
+struct union_args {
+ char *target; /* Target of loopback */
+ int mntflags; /* Options on the mount */
+};
+
+#define UNMNT_ABOVE 0x0001 /* Target appears below mount point */
+#define UNMNT_BELOW 0x0002 /* Target appears below mount point */
+#define UNMNT_REPLACE 0x0003 /* Target replaces mount point */
+#define UNMNT_OPMASK 0x0003
+
+struct union_mount {
+ struct vnode *um_uppervp;
+ struct vnode *um_lowervp;
+ struct ucred *um_cred; /* Credentials of user calling mount */
+ int um_cmode; /* cmask from mount process */
+ int um_op; /* Operation mode */
+};
+
+#ifdef KERNEL
+
+/*
+ * DEFDIRMODE is the mode bits used to create a shadow directory.
+ */
+#define VRWXMODE (VREAD|VWRITE|VEXEC)
+#define VRWMODE (VREAD|VWRITE)
+#define UN_DIRMODE ((VRWXMODE)|(VRWXMODE>>3)|(VRWXMODE>>6))
+#define UN_FILEMODE ((VRWMODE)|(VRWMODE>>3)|(VRWMODE>>6))
+
+/*
+ * A cache of vnode references
+ */
+struct union_node {
+ LIST_ENTRY(union_node) un_cache; /* Hash chain */
+ struct vnode *un_vnode; /* Back pointer */
+ struct vnode *un_uppervp; /* overlaying object */
+ struct vnode *un_lowervp; /* underlying object */
+ struct vnode *un_dirvp; /* Parent dir of uppervp */
+ char *un_path; /* saved component name */
+ int un_hash; /* saved un_path hash value */
+ int un_openl; /* # of opens on lowervp */
+ int un_flags;
+#ifdef DIAGNOSTIC
+ pid_t un_pid;
+#endif
+};
+
+#define UN_WANT 0x01
+#define UN_LOCKED 0x02
+#define UN_ULOCK 0x04 /* Upper node is locked */
+#define UN_KLOCK 0x08 /* Keep upper node locked on vput */
+
+extern int union_allocvp __P((struct vnode **, struct mount *,
+ struct vnode *, struct vnode *,
+ struct componentname *, struct vnode *,
+ struct vnode *));
+extern int union_copyfile __P((struct proc *, struct ucred *,
+ struct vnode *, struct vnode *));
+extern int union_mkshadow __P((struct union_mount *, struct vnode *,
+ struct componentname *, struct vnode **));
+extern int union_vn_create __P((struct vnode **, struct union_node *,
+ struct proc *));
+extern int union_cn_close __P((struct vnode *, int, struct ucred *,
+ struct proc *));
+extern void union_removed_upper __P((struct union_node *un));
+extern struct vnode *union_lowervp __P((struct vnode *));
+extern void union_newlower __P((struct union_node *, struct vnode *));
+extern void union_newupper __P((struct union_node *, struct vnode *));
+
+#define MOUNTTOUNIONMOUNT(mp) ((struct union_mount *)((mp)->mnt_data))
+#define VTOUNION(vp) ((struct union_node *)(vp)->v_data)
+#define UNIONTOV(un) ((un)->un_vnode)
+#define LOWERVP(vp) (VTOUNION(vp)->un_lowervp)
+#define UPPERVP(vp) (VTOUNION(vp)->un_uppervp)
+#define OTHERVP(vp) (UPPERVP(vp) ? UPPERVP(vp) : LOWERVP(vp))
+
+extern int (**union_vnodeop_p)();
+extern struct vfsops union_vfsops;
+#endif /* KERNEL */
diff --git a/sys/fs/unionfs/union_subr.c b/sys/fs/unionfs/union_subr.c
new file mode 100644
index 0000000..77947d1
--- /dev/null
+++ b/sys/fs/unionfs/union_subr.c
@@ -0,0 +1,744 @@
+/*
+ * Copyright (c) 1994 Jan-Simon Pendry
+ * Copyright (c) 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)union_subr.c 8.4 (Berkeley) 2/17/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/kernel.h>
+#include <sys/vnode.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/file.h>
+#include <sys/filedesc.h>
+#include <sys/queue.h>
+#include <miscfs/union/union.h>
+
+#ifdef DIAGNOSTIC
+#include <sys/proc.h>
+#endif
+
+/* must be power of two, otherwise change UNION_HASH() */
+#define NHASH 32
+
+/* unsigned int ... */
+#define UNION_HASH(u, l) \
+ (((((unsigned long) (u)) + ((unsigned long) l)) >> 8) & (NHASH-1))
+
+static LIST_HEAD(unhead, union_node) unhead[NHASH];
+static int unvplock[NHASH];
+
+int
+union_init()
+{
+ int i;
+
+ for (i = 0; i < NHASH; i++)
+ LIST_INIT(&unhead[i]);
+ bzero((caddr_t) unvplock, sizeof(unvplock));
+}
+
+static int
+union_list_lock(ix)
+ int ix;
+{
+
+ if (unvplock[ix] & UN_LOCKED) {
+ unvplock[ix] |= UN_WANT;
+ sleep((caddr_t) &unvplock[ix], PINOD);
+ return (1);
+ }
+
+ unvplock[ix] |= UN_LOCKED;
+
+ return (0);
+}
+
+static void
+union_list_unlock(ix)
+ int ix;
+{
+
+ unvplock[ix] &= ~UN_LOCKED;
+
+ if (unvplock[ix] & UN_WANT) {
+ unvplock[ix] &= ~UN_WANT;
+ wakeup((caddr_t) &unvplock[ix]);
+ }
+}
+
+void
+union_updatevp(un, uppervp, lowervp)
+ struct union_node *un;
+ struct vnode *uppervp;
+ struct vnode *lowervp;
+{
+ int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp);
+ int nhash = UNION_HASH(uppervp, lowervp);
+
+ if (ohash != nhash) {
+ /*
+ * Ensure locking is ordered from lower to higher
+ * to avoid deadlocks.
+ */
+ if (nhash < ohash) {
+ int t = ohash;
+ ohash = nhash;
+ nhash = t;
+ }
+
+ while (union_list_lock(ohash))
+ continue;
+
+ while (union_list_lock(nhash))
+ continue;
+
+ LIST_REMOVE(un, un_cache);
+ union_list_unlock(ohash);
+ } else {
+ while (union_list_lock(nhash))
+ continue;
+ }
+
+ if (un->un_lowervp != lowervp) {
+ if (un->un_lowervp) {
+ vrele(un->un_lowervp);
+ if (un->un_path) {
+ free(un->un_path, M_TEMP);
+ un->un_path = 0;
+ }
+ if (un->un_dirvp) {
+ vrele(un->un_dirvp);
+ un->un_dirvp = NULLVP;
+ }
+ }
+ un->un_lowervp = lowervp;
+ }
+
+ if (un->un_uppervp != uppervp) {
+ if (un->un_uppervp)
+ vrele(un->un_uppervp);
+
+ un->un_uppervp = uppervp;
+ }
+
+ if (ohash != nhash)
+ LIST_INSERT_HEAD(&unhead[nhash], un, un_cache);
+
+ union_list_unlock(nhash);
+}
+
+void
+union_newlower(un, lowervp)
+ struct union_node *un;
+ struct vnode *lowervp;
+{
+
+ union_updatevp(un, un->un_uppervp, lowervp);
+}
+
+void
+union_newupper(un, uppervp)
+ struct union_node *un;
+ struct vnode *uppervp;
+{
+
+ union_updatevp(un, uppervp, un->un_lowervp);
+}
+
+/*
+ * allocate a union_node/vnode pair. the vnode is
+ * referenced and locked. the new vnode is returned
+ * via (vpp). (mp) is the mountpoint of the union filesystem,
+ * (dvp) is the parent directory where the upper layer object
+ * should exist (but doesn't) and (cnp) is the componentname
+ * information which is partially copied to allow the upper
+ * layer object to be created at a later time. (uppervp)
+ * and (lowervp) reference the upper and lower layer objects
+ * being mapped. either, but not both, can be nil.
+ * if supplied, (uppervp) is locked.
+ * the reference is either maintained in the new union_node
+ * object which is allocated, or they are vrele'd.
+ *
+ * all union_nodes are maintained on a singly-linked
+ * list. new nodes are only allocated when they cannot
+ * be found on this list. entries on the list are
+ * removed when the vfs reclaim entry is called.
+ *
+ * a single lock is kept for the entire list. this is
+ * needed because the getnewvnode() function can block
+ * waiting for a vnode to become free, in which case there
+ * may be more than one process trying to get the same
+ * vnode. this lock is only taken if we are going to
+ * call getnewvnode, since the kernel itself is single-threaded.
+ *
+ * if an entry is found on the list, then call vget() to
+ * take a reference. this is done because there may be
+ * zero references to it and so it needs to removed from
+ * the vnode free list.
+ */
+int
+union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp)
+ struct vnode **vpp;
+ struct mount *mp;
+ struct vnode *undvp;
+ struct vnode *dvp; /* may be null */
+ struct componentname *cnp; /* may be null */
+ struct vnode *uppervp; /* may be null */
+ struct vnode *lowervp; /* may be null */
+{
+ int error;
+ struct union_node *un;
+ struct union_node **pp;
+ struct vnode *xlowervp = NULLVP;
+ int hash;
+ int try;
+
+ if (uppervp == NULLVP && lowervp == NULLVP)
+ panic("union: unidentifiable allocation");
+
+ if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) {
+ xlowervp = lowervp;
+ lowervp = NULLVP;
+ }
+
+loop:
+ for (try = 0; try < 3; try++) {
+ switch (try) {
+ case 0:
+ if (lowervp == NULLVP)
+ continue;
+ hash = UNION_HASH(uppervp, lowervp);
+ break;
+
+ case 1:
+ if (uppervp == NULLVP)
+ continue;
+ hash = UNION_HASH(uppervp, NULLVP);
+ break;
+
+ case 2:
+ if (lowervp == NULLVP)
+ continue;
+ hash = UNION_HASH(NULLVP, lowervp);
+ break;
+ }
+
+ while (union_list_lock(hash))
+ continue;
+
+ for (un = unhead[hash].lh_first; un != 0;
+ un = un->un_cache.le_next) {
+ if ((un->un_lowervp == lowervp ||
+ un->un_lowervp == NULLVP) &&
+ (un->un_uppervp == uppervp ||
+ un->un_uppervp == NULLVP) &&
+ (UNIONTOV(un)->v_mount == mp)) {
+ if (vget(UNIONTOV(un), 0)) {
+ union_list_unlock(hash);
+ goto loop;
+ }
+ break;
+ }
+ }
+
+ union_list_unlock(hash);
+
+ if (un)
+ break;
+ }
+
+ if (un) {
+ /*
+ * Obtain a lock on the union_node.
+ * uppervp is locked, though un->un_uppervp
+ * may not be. this doesn't break the locking
+ * hierarchy since in the case that un->un_uppervp
+ * is not yet locked it will be vrele'd and replaced
+ * with uppervp.
+ */
+
+ if ((dvp != NULLVP) && (uppervp == dvp)) {
+ /*
+ * Access ``.'', so (un) will already
+ * be locked. Since this process has
+ * the lock on (uppervp) no other
+ * process can hold the lock on (un).
+ */
+#ifdef DIAGNOSTIC
+ if ((un->un_flags & UN_LOCKED) == 0)
+ panic("union: . not locked");
+ else if (curproc && un->un_pid != curproc->p_pid &&
+ un->un_pid > -1 && curproc->p_pid > -1)
+ panic("union: allocvp not lock owner");
+#endif
+ } else {
+ if (un->un_flags & UN_LOCKED) {
+ vrele(UNIONTOV(un));
+ un->un_flags |= UN_WANT;
+ sleep((caddr_t) &un->un_flags, PINOD);
+ goto loop;
+ }
+ un->un_flags |= UN_LOCKED;
+
+#ifdef DIAGNOSTIC
+ if (curproc)
+ un->un_pid = curproc->p_pid;
+ else
+ un->un_pid = -1;
+#endif
+ }
+
+ /*
+ * At this point, the union_node is locked,
+ * un->un_uppervp may not be locked, and uppervp
+ * is locked or nil.
+ */
+
+ /*
+ * Save information about the upper layer.
+ */
+ if (uppervp != un->un_uppervp) {
+ union_newupper(un, uppervp);
+ } else if (uppervp) {
+ vrele(uppervp);
+ }
+
+ if (un->un_uppervp) {
+ un->un_flags |= UN_ULOCK;
+ un->un_flags &= ~UN_KLOCK;
+ }
+
+ /*
+ * Save information about the lower layer.
+ * This needs to keep track of pathname
+ * and directory information which union_vn_create
+ * might need.
+ */
+ if (lowervp != un->un_lowervp) {
+ union_newlower(un, lowervp);
+ if (cnp && (lowervp != NULLVP) &&
+ (lowervp->v_type == VREG)) {
+ un->un_hash = cnp->cn_hash;
+ un->un_path = malloc(cnp->cn_namelen+1,
+ M_TEMP, M_WAITOK);
+ bcopy(cnp->cn_nameptr, un->un_path,
+ cnp->cn_namelen);
+ un->un_path[cnp->cn_namelen] = '\0';
+ VREF(dvp);
+ un->un_dirvp = dvp;
+ }
+ } else if (lowervp) {
+ vrele(lowervp);
+ }
+ *vpp = UNIONTOV(un);
+ return (0);
+ }
+
+ /*
+ * otherwise lock the vp list while we call getnewvnode
+ * since that can block.
+ */
+ hash = UNION_HASH(uppervp, lowervp);
+
+ if (union_list_lock(hash))
+ goto loop;
+
+ error = getnewvnode(VT_UNION, mp, union_vnodeop_p, vpp);
+ if (error) {
+ if (uppervp) {
+ if (dvp == uppervp)
+ vrele(uppervp);
+ else
+ vput(uppervp);
+ }
+ if (lowervp)
+ vrele(lowervp);
+
+ goto out;
+ }
+
+ MALLOC((*vpp)->v_data, void *, sizeof(struct union_node),
+ M_TEMP, M_WAITOK);
+
+ if (uppervp)
+ (*vpp)->v_type = uppervp->v_type;
+ else
+ (*vpp)->v_type = lowervp->v_type;
+ un = VTOUNION(*vpp);
+ un->un_vnode = *vpp;
+ un->un_uppervp = uppervp;
+ un->un_lowervp = lowervp;
+ un->un_openl = 0;
+ un->un_flags = UN_LOCKED;
+ if (un->un_uppervp)
+ un->un_flags |= UN_ULOCK;
+#ifdef DIAGNOSTIC
+ if (curproc)
+ un->un_pid = curproc->p_pid;
+ else
+ un->un_pid = -1;
+#endif
+ if (cnp && (lowervp != NULLVP) && (lowervp->v_type == VREG)) {
+ un->un_hash = cnp->cn_hash;
+ un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK);
+ bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen);
+ un->un_path[cnp->cn_namelen] = '\0';
+ VREF(dvp);
+ un->un_dirvp = dvp;
+ } else {
+ un->un_hash = 0;
+ un->un_path = 0;
+ un->un_dirvp = 0;
+ }
+
+ LIST_INSERT_HEAD(&unhead[hash], un, un_cache);
+
+ if (xlowervp)
+ vrele(xlowervp);
+
+out:
+ union_list_unlock(hash);
+
+ return (error);
+}
+
+int
+union_freevp(vp)
+ struct vnode *vp;
+{
+ struct union_node *un = VTOUNION(vp);
+
+ LIST_REMOVE(un, un_cache);
+
+ if (un->un_uppervp)
+ vrele(un->un_uppervp);
+ if (un->un_lowervp)
+ vrele(un->un_lowervp);
+ if (un->un_dirvp)
+ vrele(un->un_dirvp);
+ if (un->un_path)
+ free(un->un_path, M_TEMP);
+
+ FREE(vp->v_data, M_TEMP);
+ vp->v_data = 0;
+
+ return (0);
+}
+
+/*
+ * copyfile. copy the vnode (fvp) to the vnode (tvp)
+ * using a sequence of reads and writes. both (fvp)
+ * and (tvp) are locked on entry and exit.
+ */
+int
+union_copyfile(p, cred, fvp, tvp)
+ struct proc *p;
+ struct ucred *cred;
+ struct vnode *fvp;
+ struct vnode *tvp;
+{
+ char *buf;
+ struct uio uio;
+ struct iovec iov;
+ int error = 0;
+
+ /*
+ * strategy:
+ * allocate a buffer of size MAXBSIZE.
+ * loop doing reads and writes, keeping track
+ * of the current uio offset.
+ * give up at the first sign of trouble.
+ */
+
+ uio.uio_procp = p;
+ uio.uio_segflg = UIO_SYSSPACE;
+ uio.uio_offset = 0;
+
+ VOP_UNLOCK(fvp); /* XXX */
+ LEASE_CHECK(fvp, p, cred, LEASE_READ);
+ VOP_LOCK(fvp); /* XXX */
+ VOP_UNLOCK(tvp); /* XXX */
+ LEASE_CHECK(tvp, p, cred, LEASE_WRITE);
+ VOP_LOCK(tvp); /* XXX */
+
+ buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK);
+
+ /* ugly loop follows... */
+ do {
+ off_t offset = uio.uio_offset;
+
+ uio.uio_iov = &iov;
+ uio.uio_iovcnt = 1;
+ iov.iov_base = buf;
+ iov.iov_len = MAXBSIZE;
+ uio.uio_resid = iov.iov_len;
+ uio.uio_rw = UIO_READ;
+ error = VOP_READ(fvp, &uio, 0, cred);
+
+ if (error == 0) {
+ uio.uio_iov = &iov;
+ uio.uio_iovcnt = 1;
+ iov.iov_base = buf;
+ iov.iov_len = MAXBSIZE - uio.uio_resid;
+ uio.uio_offset = offset;
+ uio.uio_rw = UIO_WRITE;
+ uio.uio_resid = iov.iov_len;
+
+ if (uio.uio_resid == 0)
+ break;
+
+ do {
+ error = VOP_WRITE(tvp, &uio, 0, cred);
+ } while ((uio.uio_resid > 0) && (error == 0));
+ }
+
+ } while (error == 0);
+
+ free(buf, M_TEMP);
+ return (error);
+}
+
+/*
+ * Create a shadow directory in the upper layer.
+ * The new vnode is returned locked.
+ *
+ * (um) points to the union mount structure for access to the
+ * the mounting process's credentials.
+ * (dvp) is the directory in which to create the shadow directory.
+ * it is unlocked on entry and exit.
+ * (cnp) is the componentname to be created.
+ * (vpp) is the returned newly created shadow directory, which
+ * is returned locked.
+ */
+int
+union_mkshadow(um, dvp, cnp, vpp)
+ struct union_mount *um;
+ struct vnode *dvp;
+ struct componentname *cnp;
+ struct vnode **vpp;
+{
+ int error;
+ struct vattr va;
+ struct proc *p = cnp->cn_proc;
+ struct componentname cn;
+
+ /*
+ * policy: when creating the shadow directory in the
+ * upper layer, create it owned by the user who did
+ * the mount, group from parent directory, and mode
+ * 777 modified by umask (ie mostly identical to the
+ * mkdir syscall). (jsp, kb)
+ */
+
+ /*
+ * A new componentname structure must be faked up because
+ * there is no way to know where the upper level cnp came
+ * from or what it is being used for. This must duplicate
+ * some of the work done by NDINIT, some of the work done
+ * by namei, some of the work done by lookup and some of
+ * the work done by VOP_LOOKUP when given a CREATE flag.
+ * Conclusion: Horrible.
+ *
+ * The pathname buffer will be FREEed by VOP_MKDIR.
+ */
+ cn.cn_pnbuf = malloc(cnp->cn_namelen+1, M_NAMEI, M_WAITOK);
+ bcopy(cnp->cn_nameptr, cn.cn_pnbuf, cnp->cn_namelen);
+ cn.cn_pnbuf[cnp->cn_namelen] = '\0';
+
+ cn.cn_nameiop = CREATE;
+ cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN);
+ cn.cn_proc = cnp->cn_proc;
+ if (um->um_op == UNMNT_ABOVE)
+ cn.cn_cred = cnp->cn_cred;
+ else
+ cn.cn_cred = um->um_cred;
+ cn.cn_nameptr = cn.cn_pnbuf;
+ cn.cn_namelen = cnp->cn_namelen;
+ cn.cn_hash = cnp->cn_hash;
+ cn.cn_consume = cnp->cn_consume;
+
+ VREF(dvp);
+ if (error = relookup(dvp, vpp, &cn))
+ return (error);
+ vrele(dvp);
+
+ if (*vpp) {
+ VOP_ABORTOP(dvp, &cn);
+ VOP_UNLOCK(dvp);
+ vrele(*vpp);
+ *vpp = NULLVP;
+ return (EEXIST);
+ }
+
+ VATTR_NULL(&va);
+ va.va_type = VDIR;
+ va.va_mode = um->um_cmode;
+
+ /* LEASE_CHECK: dvp is locked */
+ LEASE_CHECK(dvp, p, p->p_ucred, LEASE_WRITE);
+
+ error = VOP_MKDIR(dvp, vpp, &cn, &va);
+ return (error);
+}
+
+/*
+ * union_vn_create: creates and opens a new shadow file
+ * on the upper union layer. this function is similar
+ * in spirit to calling vn_open but it avoids calling namei().
+ * the problem with calling namei is that a) it locks too many
+ * things, and b) it doesn't start at the "right" directory,
+ * whereas relookup is told where to start.
+ */
+int
+union_vn_create(vpp, un, p)
+ struct vnode **vpp;
+ struct union_node *un;
+ struct proc *p;
+{
+ struct vnode *vp;
+ struct ucred *cred = p->p_ucred;
+ struct vattr vat;
+ struct vattr *vap = &vat;
+ int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL);
+ int error;
+ int cmode = UN_FILEMODE & ~p->p_fd->fd_cmask;
+ char *cp;
+ struct componentname cn;
+
+ *vpp = NULLVP;
+
+ /*
+ * Build a new componentname structure (for the same
+ * reasons outlines in union_mkshadow).
+ * The difference here is that the file is owned by
+ * the current user, rather than by the person who
+ * did the mount, since the current user needs to be
+ * able to write the file (that's why it is being
+ * copied in the first place).
+ */
+ cn.cn_namelen = strlen(un->un_path);
+ cn.cn_pnbuf = (caddr_t) malloc(cn.cn_namelen, M_NAMEI, M_WAITOK);
+ bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1);
+ cn.cn_nameiop = CREATE;
+ cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN);
+ cn.cn_proc = p;
+ cn.cn_cred = p->p_ucred;
+ cn.cn_nameptr = cn.cn_pnbuf;
+ cn.cn_hash = un->un_hash;
+ cn.cn_consume = 0;
+
+ VREF(un->un_dirvp);
+ if (error = relookup(un->un_dirvp, &vp, &cn))
+ return (error);
+ vrele(un->un_dirvp);
+
+ if (vp) {
+ VOP_ABORTOP(un->un_dirvp, &cn);
+ if (un->un_dirvp == vp)
+ vrele(un->un_dirvp);
+ else
+ vput(un->un_dirvp);
+ vrele(vp);
+ return (EEXIST);
+ }
+
+ /*
+ * Good - there was no race to create the file
+ * so go ahead and create it. The permissions
+ * on the file will be 0666 modified by the
+ * current user's umask. Access to the file, while
+ * it is unioned, will require access to the top *and*
+ * bottom files. Access when not unioned will simply
+ * require access to the top-level file.
+ * TODO: confirm choice of access permissions.
+ */
+ VATTR_NULL(vap);
+ vap->va_type = VREG;
+ vap->va_mode = cmode;
+ LEASE_CHECK(un->un_dirvp, p, cred, LEASE_WRITE);
+ if (error = VOP_CREATE(un->un_dirvp, &vp, &cn, vap))
+ return (error);
+
+ if (error = VOP_OPEN(vp, fmode, cred, p)) {
+ vput(vp);
+ return (error);
+ }
+
+ vp->v_writecount++;
+ *vpp = vp;
+ return (0);
+}
+
+int
+union_vn_close(vp, fmode, cred, p)
+ struct vnode *vp;
+ int fmode;
+ struct ucred *cred;
+ struct proc *p;
+{
+ if (fmode & FWRITE)
+ --vp->v_writecount;
+ return (VOP_CLOSE(vp, fmode));
+}
+
+void
+union_removed_upper(un)
+ struct union_node *un;
+{
+ if (un->un_flags & UN_ULOCK) {
+ un->un_flags &= ~UN_ULOCK;
+ VOP_UNLOCK(un->un_uppervp);
+ }
+
+ union_newupper(un, NULLVP);
+}
+
+struct vnode *
+union_lowervp(vp)
+ struct vnode *vp;
+{
+ struct union_node *un = VTOUNION(vp);
+
+ if (un->un_lowervp && (vp->v_type == un->un_lowervp->v_type)) {
+ if (vget(un->un_lowervp, 0))
+ return (NULLVP);
+ }
+
+ return (un->un_lowervp);
+}
diff --git a/sys/fs/unionfs/union_vfsops.c b/sys/fs/unionfs/union_vfsops.c
new file mode 100644
index 0000000..9fa2746
--- /dev/null
+++ b/sys/fs/unionfs/union_vfsops.c
@@ -0,0 +1,550 @@
+/*
+ * Copyright (c) 1994 The Regents of the University of California.
+ * Copyright (c) 1994 Jan-Simon Pendry.
+ * All rights reserved.
+ *
+ * This code is derived from software donated to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)union_vfsops.c 8.7 (Berkeley) 3/5/94
+ */
+
+/*
+ * Union Layer
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/filedesc.h>
+#include <sys/queue.h>
+#include <miscfs/union/union.h>
+
+/*
+ * Mount union filesystem
+ */
+int
+union_mount(mp, path, data, ndp, p)
+ struct mount *mp;
+ char *path;
+ caddr_t data;
+ struct nameidata *ndp;
+ struct proc *p;
+{
+ int error = 0;
+ struct union_args args;
+ struct vnode *lowerrootvp = NULLVP;
+ struct vnode *upperrootvp = NULLVP;
+ struct union_mount *um;
+ struct ucred *cred = 0;
+ struct ucred *scred;
+ struct vattr va;
+ char *cp;
+ int len;
+ u_int size;
+
+#ifdef UNION_DIAGNOSTIC
+ printf("union_mount(mp = %x)\n", mp);
+#endif
+
+ /*
+ * Update is a no-op
+ */
+ if (mp->mnt_flag & MNT_UPDATE) {
+ /*
+ * Need to provide.
+ * 1. a way to convert between rdonly and rdwr mounts.
+ * 2. support for nfs exports.
+ */
+ error = EOPNOTSUPP;
+ goto bad;
+ }
+
+ /*
+ * Take a copy of the process's credentials. This isn't
+ * quite right since the euid will always be zero and we
+ * want to get the "real" users credentials. So fix up
+ * the uid field after taking the copy.
+ */
+ cred = crdup(p->p_ucred);
+ cred->cr_uid = p->p_cred->p_ruid;
+
+ /*
+ * Ensure the *real* user has write permission on the
+ * mounted-on directory. This allows the mount_union
+ * command to be made setuid root so allowing anyone
+ * to do union mounts onto any directory on which they
+ * have write permission and which they also own.
+ */
+ error = VOP_GETATTR(mp->mnt_vnodecovered, &va, cred, p);
+ if (error)
+ goto bad;
+ if ((va.va_uid != cred->cr_uid) &&
+ (cred->cr_uid != 0)) {
+ error = EACCES;
+ goto bad;
+ }
+ error = VOP_ACCESS(mp->mnt_vnodecovered, VWRITE, cred, p);
+ if (error)
+ goto bad;
+
+ /*
+ * Get argument
+ */
+ if (error = copyin(data, (caddr_t)&args, sizeof(struct union_args)))
+ goto bad;
+
+ lowerrootvp = mp->mnt_vnodecovered;
+ VREF(lowerrootvp);
+
+ /*
+ * Find upper node. Use the real process credentials,
+ * not the effective ones since this will have come
+ * through a setuid process (mount_union). All this
+ * messing around with permissions is entirely bogus
+ * and should be removed by allowing any user straight
+ * past the mount system call.
+ */
+ scred = p->p_ucred;
+ p->p_ucred = cred;
+ NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT,
+ UIO_USERSPACE, args.target, p);
+ p->p_ucred = scred;
+
+ if (error = namei(ndp))
+ goto bad;
+
+ upperrootvp = ndp->ni_vp;
+ vrele(ndp->ni_dvp);
+ ndp->ni_dvp = NULL;
+
+ if (upperrootvp->v_type != VDIR) {
+ error = EINVAL;
+ goto bad;
+ }
+
+ um = (struct union_mount *) malloc(sizeof(struct union_mount),
+ M_UFSMNT, M_WAITOK); /* XXX */
+
+ /*
+ * Keep a held reference to the target vnodes.
+ * They are vrele'd in union_unmount.
+ *
+ * Depending on the _BELOW flag, the filesystems are
+ * viewed in a different order. In effect, this is the
+ * same as providing a mount under option to the mount syscall.
+ */
+
+ um->um_op = args.mntflags & UNMNT_OPMASK;
+ switch (um->um_op) {
+ case UNMNT_ABOVE:
+ um->um_lowervp = lowerrootvp;
+ um->um_uppervp = upperrootvp;
+ break;
+
+ case UNMNT_BELOW:
+ um->um_lowervp = upperrootvp;
+ um->um_uppervp = lowerrootvp;
+ break;
+
+ case UNMNT_REPLACE:
+ vrele(lowerrootvp);
+ lowerrootvp = NULLVP;
+ um->um_uppervp = upperrootvp;
+ um->um_lowervp = lowerrootvp;
+ break;
+
+ default:
+ error = EINVAL;
+ goto bad;
+ }
+
+ um->um_cred = cred;
+ um->um_cmode = UN_DIRMODE &~ p->p_fd->fd_cmask;
+
+ /*
+ * Depending on what you think the MNT_LOCAL flag might mean,
+ * you may want the && to be || on the conditional below.
+ * At the moment it has been defined that the filesystem is
+ * only local if it is all local, ie the MNT_LOCAL flag implies
+ * that the entire namespace is local. If you think the MNT_LOCAL
+ * flag implies that some of the files might be stored locally
+ * then you will want to change the conditional.
+ */
+ if (um->um_op == UNMNT_ABOVE) {
+ if (((um->um_lowervp == NULLVP) ||
+ (um->um_lowervp->v_mount->mnt_flag & MNT_LOCAL)) &&
+ (um->um_uppervp->v_mount->mnt_flag & MNT_LOCAL))
+ mp->mnt_flag |= MNT_LOCAL;
+ }
+
+ /*
+ * Copy in the upper layer's RDONLY flag. This is for the benefit
+ * of lookup() which explicitly checks the flag, rather than asking
+ * the filesystem for it's own opinion. This means, that an update
+ * mount of the underlying filesystem to go from rdonly to rdwr
+ * will leave the unioned view as read-only.
+ */
+ mp->mnt_flag |= (um->um_uppervp->v_mount->mnt_flag & MNT_RDONLY);
+
+ /*
+ * This is a user mount. Privilege check for unmount
+ * will be done in union_unmount.
+ */
+ mp->mnt_flag |= MNT_USER;
+
+ mp->mnt_data = (qaddr_t) um;
+ getnewfsid(mp, MOUNT_UNION);
+
+ (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size);
+ bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size);
+
+ switch (um->um_op) {
+ case UNMNT_ABOVE:
+ cp = "<above>";
+ break;
+ case UNMNT_BELOW:
+ cp = "<below>";
+ break;
+ case UNMNT_REPLACE:
+ cp = "";
+ break;
+ }
+ len = strlen(cp);
+ bcopy(cp, mp->mnt_stat.f_mntfromname, len);
+
+ cp = mp->mnt_stat.f_mntfromname + len;
+ len = MNAMELEN - len;
+
+ (void) copyinstr(args.target, cp, len - 1, &size);
+ bzero(cp + size, len - size);
+
+#ifdef UNION_DIAGNOSTIC
+ printf("union_mount: from %s, on %s\n",
+ mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname);
+#endif
+ return (0);
+
+bad:
+ if (cred)
+ crfree(cred);
+ if (upperrootvp)
+ vrele(upperrootvp);
+ if (lowerrootvp)
+ vrele(lowerrootvp);
+ return (error);
+}
+
+/*
+ * VFS start. Nothing needed here - the start routine
+ * on the underlying filesystem(s) will have been called
+ * when that filesystem was mounted.
+ */
+int
+union_start(mp, flags, p)
+ struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+
+ return (0);
+}
+
+/*
+ * Free reference to union layer
+ */
+int
+union_unmount(mp, mntflags, p)
+ struct mount *mp;
+ int mntflags;
+ struct proc *p;
+{
+ struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
+ struct vnode *um_rootvp;
+ int error;
+ int flags = 0;
+ extern int doforce;
+
+#ifdef UNION_DIAGNOSTIC
+ printf("union_unmount(mp = %x)\n", mp);
+#endif
+
+ /* only the mounter, or superuser can unmount */
+ if ((p->p_cred->p_ruid != um->um_cred->cr_uid) &&
+ (error = suser(p->p_ucred, &p->p_acflag)))
+ return (error);
+
+ if (mntflags & MNT_FORCE) {
+ /* union can never be rootfs so don't check for it */
+ if (!doforce)
+ return (EINVAL);
+ flags |= FORCECLOSE;
+ }
+
+ if (error = union_root(mp, &um_rootvp))
+ return (error);
+ if (um_rootvp->v_usecount > 1) {
+ vput(um_rootvp);
+ return (EBUSY);
+ }
+ if (error = vflush(mp, um_rootvp, flags)) {
+ vput(um_rootvp);
+ return (error);
+ }
+
+#ifdef UNION_DIAGNOSTIC
+ vprint("alias root of lower", um_rootvp);
+#endif
+ /*
+ * Discard references to upper and lower target vnodes.
+ */
+ if (um->um_lowervp)
+ vrele(um->um_lowervp);
+ vrele(um->um_uppervp);
+ crfree(um->um_cred);
+ /*
+ * Release reference on underlying root vnode
+ */
+ vput(um_rootvp);
+ /*
+ * And blow it away for future re-use
+ */
+ vgone(um_rootvp);
+ /*
+ * Finally, throw away the union_mount structure
+ */
+ free(mp->mnt_data, M_UFSMNT); /* XXX */
+ mp->mnt_data = 0;
+ return (0);
+}
+
+int
+union_root(mp, vpp)
+ struct mount *mp;
+ struct vnode **vpp;
+{
+ struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
+ int error;
+ int loselock;
+
+#ifdef UNION_DIAGNOSTIC
+ printf("union_root(mp = %x, lvp = %x, uvp = %x)\n", mp,
+ um->um_lowervp,
+ um->um_uppervp);
+#endif
+
+ /*
+ * Return locked reference to root.
+ */
+ VREF(um->um_uppervp);
+ if ((um->um_op == UNMNT_BELOW) &&
+ VOP_ISLOCKED(um->um_uppervp)) {
+ loselock = 1;
+ } else {
+ VOP_LOCK(um->um_uppervp);
+ loselock = 0;
+ }
+ if (um->um_lowervp)
+ VREF(um->um_lowervp);
+ error = union_allocvp(vpp, mp,
+ (struct vnode *) 0,
+ (struct vnode *) 0,
+ (struct componentname *) 0,
+ um->um_uppervp,
+ um->um_lowervp);
+
+ if (error) {
+ if (!loselock)
+ VOP_UNLOCK(um->um_uppervp);
+ vrele(um->um_uppervp);
+ if (um->um_lowervp)
+ vrele(um->um_lowervp);
+ } else {
+ (*vpp)->v_flag |= VROOT;
+ if (loselock)
+ VTOUNION(*vpp)->un_flags &= ~UN_ULOCK;
+ }
+
+ return (error);
+}
+
+int
+union_quotactl(mp, cmd, uid, arg, p)
+ struct mount *mp;
+ int cmd;
+ uid_t uid;
+ caddr_t arg;
+ struct proc *p;
+{
+
+ return (EOPNOTSUPP);
+}
+
+int
+union_statfs(mp, sbp, p)
+ struct mount *mp;
+ struct statfs *sbp;
+ struct proc *p;
+{
+ int error;
+ struct union_mount *um = MOUNTTOUNIONMOUNT(mp);
+ struct statfs mstat;
+ int lbsize;
+
+#ifdef UNION_DIAGNOSTIC
+ printf("union_statfs(mp = %x, lvp = %x, uvp = %x)\n", mp,
+ um->um_lowervp,
+ um->um_uppervp);
+#endif
+
+ bzero(&mstat, sizeof(mstat));
+
+ if (um->um_lowervp) {
+ error = VFS_STATFS(um->um_lowervp->v_mount, &mstat, p);
+ if (error)
+ return (error);
+ }
+
+ /* now copy across the "interesting" information and fake the rest */
+#if 0
+ sbp->f_type = mstat.f_type;
+ sbp->f_flags = mstat.f_flags;
+ sbp->f_bsize = mstat.f_bsize;
+ sbp->f_iosize = mstat.f_iosize;
+#endif
+ lbsize = mstat.f_bsize;
+ sbp->f_blocks = mstat.f_blocks;
+ sbp->f_bfree = mstat.f_bfree;
+ sbp->f_bavail = mstat.f_bavail;
+ sbp->f_files = mstat.f_files;
+ sbp->f_ffree = mstat.f_ffree;
+
+ error = VFS_STATFS(um->um_uppervp->v_mount, &mstat, p);
+ if (error)
+ return (error);
+
+ sbp->f_type = MOUNT_UNION;
+ sbp->f_flags = mstat.f_flags;
+ sbp->f_bsize = mstat.f_bsize;
+ sbp->f_iosize = mstat.f_iosize;
+
+ /*
+ * if the lower and upper blocksizes differ, then frig the
+ * block counts so that the sizes reported by df make some
+ * kind of sense. none of this makes sense though.
+ */
+
+ if (mstat.f_bsize != lbsize) {
+ sbp->f_blocks = sbp->f_blocks * lbsize / mstat.f_bsize;
+ sbp->f_bfree = sbp->f_bfree * lbsize / mstat.f_bsize;
+ sbp->f_bavail = sbp->f_bavail * lbsize / mstat.f_bsize;
+ }
+ sbp->f_blocks += mstat.f_blocks;
+ sbp->f_bfree += mstat.f_bfree;
+ sbp->f_bavail += mstat.f_bavail;
+ sbp->f_files += mstat.f_files;
+ sbp->f_ffree += mstat.f_ffree;
+
+ if (sbp != &mp->mnt_stat) {
+ bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid));
+ bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+ bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+ }
+ return (0);
+}
+
+int
+union_sync(mp, waitfor, cred, p)
+ struct mount *mp;
+ int waitfor;
+ struct ucred *cred;
+ struct proc *p;
+{
+
+ /*
+ * XXX - Assumes no data cached at union layer.
+ */
+ return (0);
+}
+
+int
+union_vget(mp, ino, vpp)
+ struct mount *mp;
+ ino_t ino;
+ struct vnode **vpp;
+{
+
+ return (EOPNOTSUPP);
+}
+
+int
+union_fhtovp(mp, fidp, nam, vpp, exflagsp, credanonp)
+ struct mount *mp;
+ struct fid *fidp;
+ struct mbuf *nam;
+ struct vnode **vpp;
+ int *exflagsp;
+ struct ucred **credanonp;
+{
+
+ return (EOPNOTSUPP);
+}
+
+int
+union_vptofh(vp, fhp)
+ struct vnode *vp;
+ struct fid *fhp;
+{
+
+ return (EOPNOTSUPP);
+}
+
+int union_init __P((void));
+
+struct vfsops union_vfsops = {
+ union_mount,
+ union_start,
+ union_unmount,
+ union_root,
+ union_quotactl,
+ union_statfs,
+ union_sync,
+ union_vget,
+ union_fhtovp,
+ union_vptofh,
+ union_init,
+};
diff --git a/sys/fs/unionfs/union_vnops.c b/sys/fs/unionfs/union_vnops.c
new file mode 100644
index 0000000..96327b0
--- /dev/null
+++ b/sys/fs/unionfs/union_vnops.c
@@ -0,0 +1,1495 @@
+/*
+ * Copyright (c) 1992, 1993, 1994 The Regents of the University of California.
+ * Copyright (c) 1992, 1993, 1994 Jan-Simon Pendry.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Jan-Simon Pendry.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)union_vnops.c 8.6 (Berkeley) 2/17/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/malloc.h>
+#include <sys/buf.h>
+#include <sys/queue.h>
+#include <miscfs/union/union.h>
+
+#define FIXUP(un) { \
+ if (((un)->un_flags & UN_ULOCK) == 0) { \
+ union_fixup(un); \
+ } \
+}
+
+static void
+union_fixup(un)
+ struct union_node *un;
+{
+
+ VOP_LOCK(un->un_uppervp);
+ un->un_flags |= UN_ULOCK;
+}
+
+static int
+union_lookup1(udvp, dvp, vpp, cnp)
+ struct vnode *udvp;
+ struct vnode *dvp;
+ struct vnode **vpp;
+ struct componentname *cnp;
+{
+ int error;
+ struct vnode *tdvp;
+ struct mount *mp;
+
+ /*
+ * If stepping up the directory tree, check for going
+ * back across the mount point, in which case do what
+ * lookup would do by stepping back down the mount
+ * hierarchy.
+ */
+ if (cnp->cn_flags & ISDOTDOT) {
+ for (;;) {
+ /*
+ * Don't do the NOCROSSMOUNT check
+ * at this level. By definition,
+ * union fs deals with namespaces, not
+ * filesystems.
+ */
+ if ((dvp->v_flag & VROOT) == 0)
+ break;
+
+ tdvp = dvp;
+ dvp = dvp->v_mount->mnt_vnodecovered;
+ vput(tdvp);
+ VREF(dvp);
+ VOP_LOCK(dvp);
+ }
+ }
+
+ error = VOP_LOOKUP(dvp, &tdvp, cnp);
+ if (error)
+ return (error);
+
+ /*
+ * The parent directory will have been unlocked, unless lookup
+ * found the last component. In which case, re-lock the node
+ * here to allow it to be unlocked again (phew) in union_lookup.
+ */
+ if (dvp != tdvp && !(cnp->cn_flags & ISLASTCN))
+ VOP_LOCK(dvp);
+
+ dvp = tdvp;
+
+ /*
+ * Lastly check if the current node is a mount point in
+ * which case walk up the mount hierarchy making sure not to
+ * bump into the root of the mount tree (ie. dvp != udvp).
+ */
+ while (dvp != udvp && (dvp->v_type == VDIR) &&
+ (mp = dvp->v_mountedhere)) {
+
+ if (mp->mnt_flag & MNT_MLOCK) {
+ mp->mnt_flag |= MNT_MWAIT;
+ sleep((caddr_t) mp, PVFS);
+ continue;
+ }
+
+ if (error = VFS_ROOT(mp, &tdvp)) {
+ vput(dvp);
+ return (error);
+ }
+
+ vput(dvp);
+ dvp = tdvp;
+ }
+
+ *vpp = dvp;
+ return (0);
+}
+
+int
+union_lookup(ap)
+ struct vop_lookup_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ int error;
+ int uerror, lerror;
+ struct vnode *uppervp, *lowervp;
+ struct vnode *upperdvp, *lowerdvp;
+ struct vnode *dvp = ap->a_dvp;
+ struct union_node *dun = VTOUNION(dvp);
+ struct componentname *cnp = ap->a_cnp;
+ int lockparent = cnp->cn_flags & LOCKPARENT;
+ int rdonly = cnp->cn_flags & RDONLY;
+ struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount);
+ struct ucred *saved_cred;
+
+ cnp->cn_flags |= LOCKPARENT;
+
+ upperdvp = dun->un_uppervp;
+ lowerdvp = dun->un_lowervp;
+ uppervp = NULLVP;
+ lowervp = NULLVP;
+
+ /*
+ * do the lookup in the upper level.
+ * if that level comsumes additional pathnames,
+ * then assume that something special is going
+ * on and just return that vnode.
+ */
+ if (upperdvp) {
+ FIXUP(dun);
+ uerror = union_lookup1(um->um_uppervp, upperdvp,
+ &uppervp, cnp);
+ /*if (uppervp == upperdvp)
+ dun->un_flags |= UN_KLOCK;*/
+
+ if (cnp->cn_consume != 0) {
+ *ap->a_vpp = uppervp;
+ if (!lockparent)
+ cnp->cn_flags &= ~LOCKPARENT;
+ return (uerror);
+ }
+ } else {
+ uerror = ENOENT;
+ }
+
+ /*
+ * in a similar way to the upper layer, do the lookup
+ * in the lower layer. this time, if there is some
+ * component magic going on, then vput whatever we got
+ * back from the upper layer and return the lower vnode
+ * instead.
+ */
+ if (lowerdvp) {
+ int nameiop;
+
+ VOP_LOCK(lowerdvp);
+
+ /*
+ * Only do a LOOKUP on the bottom node, since
+ * we won't be making changes to it anyway.
+ */
+ nameiop = cnp->cn_nameiop;
+ cnp->cn_nameiop = LOOKUP;
+ if (um->um_op == UNMNT_BELOW) {
+ saved_cred = cnp->cn_cred;
+ cnp->cn_cred = um->um_cred;
+ }
+ lerror = union_lookup1(um->um_lowervp, lowerdvp,
+ &lowervp, cnp);
+ if (um->um_op == UNMNT_BELOW)
+ cnp->cn_cred = saved_cred;
+ cnp->cn_nameiop = nameiop;
+
+ if (lowervp != lowerdvp)
+ VOP_UNLOCK(lowerdvp);
+
+ if (cnp->cn_consume != 0) {
+ if (uppervp) {
+ if (uppervp == upperdvp)
+ vrele(uppervp);
+ else
+ vput(uppervp);
+ uppervp = NULLVP;
+ }
+ *ap->a_vpp = lowervp;
+ if (!lockparent)
+ cnp->cn_flags &= ~LOCKPARENT;
+ return (lerror);
+ }
+ } else {
+ lerror = ENOENT;
+ }
+
+ if (!lockparent)
+ cnp->cn_flags &= ~LOCKPARENT;
+
+ /*
+ * at this point, we have uerror and lerror indicating
+ * possible errors with the lookups in the upper and lower
+ * layers. additionally, uppervp and lowervp are (locked)
+ * references to existing vnodes in the upper and lower layers.
+ *
+ * there are now three cases to consider.
+ * 1. if both layers returned an error, then return whatever
+ * error the upper layer generated.
+ *
+ * 2. if the top layer failed and the bottom layer succeeded
+ * then two subcases occur.
+ * a. the bottom vnode is not a directory, in which
+ * case just return a new union vnode referencing
+ * an empty top layer and the existing bottom layer.
+ * b. the bottom vnode is a directory, in which case
+ * create a new directory in the top-level and
+ * continue as in case 3.
+ *
+ * 3. if the top layer succeeded then return a new union
+ * vnode referencing whatever the new top layer and
+ * whatever the bottom layer returned.
+ */
+
+ *ap->a_vpp = NULLVP;
+
+ /* case 1. */
+ if ((uerror != 0) && (lerror != 0)) {
+ return (uerror);
+ }
+
+ /* case 2. */
+ if (uerror != 0 /* && (lerror == 0) */ ) {
+ if (lowervp->v_type == VDIR) { /* case 2b. */
+ dun->un_flags &= ~UN_ULOCK;
+ VOP_UNLOCK(upperdvp);
+ uerror = union_mkshadow(um, upperdvp, cnp, &uppervp);
+ VOP_LOCK(upperdvp);
+ dun->un_flags |= UN_ULOCK;
+
+ if (uerror) {
+ if (lowervp) {
+ vput(lowervp);
+ lowervp = NULLVP;
+ }
+ return (uerror);
+ }
+ }
+ }
+
+ if (lowervp)
+ VOP_UNLOCK(lowervp);
+
+ error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp,
+ uppervp, lowervp);
+
+ if (error) {
+ if (uppervp)
+ vput(uppervp);
+ if (lowervp)
+ vrele(lowervp);
+ } else {
+ if (*ap->a_vpp != dvp)
+ if (!lockparent || !(cnp->cn_flags & ISLASTCN))
+ VOP_UNLOCK(dvp);
+ }
+
+ return (error);
+}
+
+int
+union_create(ap)
+ struct vop_create_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ } */ *ap;
+{
+ struct union_node *un = VTOUNION(ap->a_dvp);
+ struct vnode *dvp = un->un_uppervp;
+
+ if (dvp) {
+ int error;
+ struct vnode *vp;
+
+ FIXUP(un);
+
+ VREF(dvp);
+ un->un_flags |= UN_KLOCK;
+ vput(ap->a_dvp);
+ error = VOP_CREATE(dvp, &vp, ap->a_cnp, ap->a_vap);
+ if (error)
+ return (error);
+
+ error = union_allocvp(
+ ap->a_vpp,
+ ap->a_dvp->v_mount,
+ ap->a_dvp,
+ NULLVP,
+ ap->a_cnp,
+ vp,
+ NULLVP);
+ if (error)
+ vput(vp);
+ return (error);
+ }
+
+ vput(ap->a_dvp);
+ return (EROFS);
+}
+
+int
+union_mknod(ap)
+ struct vop_mknod_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ } */ *ap;
+{
+ struct union_node *un = VTOUNION(ap->a_dvp);
+ struct vnode *dvp = un->un_uppervp;
+
+ if (dvp) {
+ int error;
+ struct vnode *vp;
+
+ FIXUP(un);
+
+ VREF(dvp);
+ un->un_flags |= UN_KLOCK;
+ vput(ap->a_dvp);
+ error = VOP_MKNOD(dvp, &vp, ap->a_cnp, ap->a_vap);
+ if (error)
+ return (error);
+
+ if (vp) {
+ error = union_allocvp(
+ ap->a_vpp,
+ ap->a_dvp->v_mount,
+ ap->a_dvp,
+ NULLVP,
+ ap->a_cnp,
+ vp,
+ NULLVP);
+ if (error)
+ vput(vp);
+ }
+ return (error);
+ }
+
+ vput(ap->a_dvp);
+ return (EROFS);
+}
+
+int
+union_open(ap)
+ struct vop_open_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct union_node *un = VTOUNION(ap->a_vp);
+ struct vnode *tvp;
+ int mode = ap->a_mode;
+ struct ucred *cred = ap->a_cred;
+ struct proc *p = ap->a_p;
+ int error;
+
+ /*
+ * If there is an existing upper vp then simply open that.
+ */
+ tvp = un->un_uppervp;
+ if (tvp == NULLVP) {
+ /*
+ * If the lower vnode is being opened for writing, then
+ * copy the file contents to the upper vnode and open that,
+ * otherwise can simply open the lower vnode.
+ */
+ tvp = un->un_lowervp;
+ if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) {
+ struct vnode *vp;
+ int i;
+
+ /*
+ * Open the named file in the upper layer. Note that
+ * the file may have come into existence *since* the
+ * lookup was done, since the upper layer may really
+ * be a loopback mount of some other filesystem...
+ * so open the file with exclusive create and barf if
+ * it already exists.
+ * XXX - perhaps should re-lookup the node (once more
+ * with feeling) and simply open that. Who knows.
+ */
+ error = union_vn_create(&vp, un, p);
+ if (error)
+ return (error);
+
+ /* at this point, uppervp is locked */
+ union_newupper(un, vp);
+ un->un_flags |= UN_ULOCK;
+
+ /*
+ * Now, if the file is being opened with truncation,
+ * then the (new) upper vnode is ready to fly,
+ * otherwise the data from the lower vnode must be
+ * copied to the upper layer first. This only works
+ * for regular files (check is made above).
+ */
+ if ((mode & O_TRUNC) == 0) {
+ /*
+ * XXX - should not ignore errors
+ * from VOP_CLOSE
+ */
+ VOP_LOCK(tvp);
+ error = VOP_OPEN(tvp, FREAD, cred, p);
+ if (error == 0) {
+ error = union_copyfile(p, cred,
+ tvp, un->un_uppervp);
+ VOP_UNLOCK(tvp);
+ (void) VOP_CLOSE(tvp, FREAD);
+ } else {
+ VOP_UNLOCK(tvp);
+ }
+
+#ifdef UNION_DIAGNOSTIC
+ if (!error)
+ uprintf("union: copied up %s\n",
+ un->un_path);
+#endif
+ }
+
+ un->un_flags &= ~UN_ULOCK;
+ VOP_UNLOCK(un->un_uppervp);
+ union_vn_close(un->un_uppervp, FWRITE, cred, p);
+ VOP_LOCK(un->un_uppervp);
+ un->un_flags |= UN_ULOCK;
+
+ /*
+ * Subsequent IOs will go to the top layer, so
+ * call close on the lower vnode and open on the
+ * upper vnode to ensure that the filesystem keeps
+ * its references counts right. This doesn't do
+ * the right thing with (cred) and (FREAD) though.
+ * Ignoring error returns is not righ, either.
+ */
+ for (i = 0; i < un->un_openl; i++) {
+ (void) VOP_CLOSE(tvp, FREAD);
+ (void) VOP_OPEN(un->un_uppervp, FREAD, cred, p);
+ }
+ un->un_openl = 0;
+
+ if (error == 0)
+ error = VOP_OPEN(un->un_uppervp, mode, cred, p);
+ return (error);
+ }
+
+ /*
+ * Just open the lower vnode
+ */
+ un->un_openl++;
+ VOP_LOCK(tvp);
+ error = VOP_OPEN(tvp, mode, cred, p);
+ VOP_UNLOCK(tvp);
+
+ return (error);
+ }
+
+ FIXUP(un);
+
+ error = VOP_OPEN(tvp, mode, cred, p);
+
+ return (error);
+}
+
+int
+union_close(ap)
+ struct vop_close_args /* {
+ struct vnode *a_vp;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct union_node *un = VTOUNION(ap->a_vp);
+ struct vnode *vp;
+
+ if (un->un_uppervp) {
+ vp = un->un_uppervp;
+ } else {
+#ifdef UNION_DIAGNOSTIC
+ if (un->un_openl <= 0)
+ panic("union: un_openl cnt");
+#endif
+ --un->un_openl;
+ vp = un->un_lowervp;
+ }
+
+ return (VOP_CLOSE(vp, ap->a_fflag, ap->a_cred, ap->a_p));
+}
+
+/*
+ * Check access permission on the union vnode.
+ * The access check being enforced is to check
+ * against both the underlying vnode, and any
+ * copied vnode. This ensures that no additional
+ * file permissions are given away simply because
+ * the user caused an implicit file copy.
+ */
+int
+union_access(ap)
+ struct vop_access_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct union_node *un = VTOUNION(ap->a_vp);
+ int error = EACCES;
+ struct vnode *vp;
+
+ if (vp = un->un_uppervp) {
+ FIXUP(un);
+ return (VOP_ACCESS(vp, ap->a_mode, ap->a_cred, ap->a_p));
+ }
+
+ if (vp = un->un_lowervp) {
+ VOP_LOCK(vp);
+ error = VOP_ACCESS(vp, ap->a_mode, ap->a_cred, ap->a_p);
+ if (error == 0) {
+ struct union_mount *um = MOUNTTOUNIONMOUNT(vp->v_mount);
+
+ if (um->um_op == UNMNT_BELOW)
+ error = VOP_ACCESS(vp, ap->a_mode,
+ um->um_cred, ap->a_p);
+ }
+ VOP_UNLOCK(vp);
+ if (error)
+ return (error);
+ }
+
+ return (error);
+}
+
+/*
+ * We handle getattr only to change the fsid.
+ */
+int
+union_getattr(ap)
+ struct vop_getattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ int error;
+ struct union_node *un = VTOUNION(ap->a_vp);
+ struct vnode *vp = un->un_uppervp;
+ struct vattr *vap;
+ struct vattr va;
+
+
+ /*
+ * Some programs walk the filesystem hierarchy by counting
+ * links to directories to avoid stat'ing all the time.
+ * This means the link count on directories needs to be "correct".
+ * The only way to do that is to call getattr on both layers
+ * and fix up the link count. The link count will not necessarily
+ * be accurate but will be large enough to defeat the tree walkers.
+ */
+
+ vap = ap->a_vap;
+
+ vp = un->un_uppervp;
+ if (vp != NULLVP) {
+ FIXUP(un);
+ error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p);
+ if (error)
+ return (error);
+ }
+
+ if (vp == NULLVP) {
+ vp = un->un_lowervp;
+ } else if (vp->v_type == VDIR) {
+ vp = un->un_lowervp;
+ vap = &va;
+ } else {
+ vp = NULLVP;
+ }
+
+ if (vp != NULLVP) {
+ VOP_LOCK(vp);
+ error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p);
+ VOP_UNLOCK(vp);
+ if (error)
+ return (error);
+ }
+
+ if ((vap != ap->a_vap) && (vap->va_type == VDIR))
+ ap->a_vap->va_nlink += vap->va_nlink;
+
+ vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0];
+ return (0);
+}
+
+int
+union_setattr(ap)
+ struct vop_setattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ struct union_node *un = VTOUNION(ap->a_vp);
+ int error;
+
+ /*
+ * Handle case of truncating lower object to zero size,
+ * by creating a zero length upper object. This is to
+ * handle the case of open with O_TRUNC and O_CREAT.
+ */
+ if ((un->un_uppervp == NULLVP) &&
+ /* assert(un->un_lowervp != NULLVP) */
+ (un->un_lowervp->v_type == VREG) &&
+ (ap->a_vap->va_size == 0)) {
+ struct vnode *vp;
+
+ error = union_vn_create(&vp, un, ap->a_p);
+ if (error)
+ return (error);
+
+ /* at this point, uppervp is locked */
+ union_newupper(un, vp);
+
+ VOP_UNLOCK(vp);
+ union_vn_close(un->un_uppervp, FWRITE, ap->a_cred, ap->a_p);
+ VOP_LOCK(vp);
+ un->un_flags |= UN_ULOCK;
+ }
+
+ /*
+ * Try to set attributes in upper layer,
+ * otherwise return read-only filesystem error.
+ */
+ if (un->un_uppervp != NULLVP) {
+ FIXUP(un);
+ error = VOP_SETATTR(un->un_uppervp, ap->a_vap,
+ ap->a_cred, ap->a_p);
+ } else {
+ error = EROFS;
+ }
+
+ return (error);
+}
+
+int
+union_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ int error;
+ struct vnode *vp = OTHERVP(ap->a_vp);
+ int dolock = (vp == LOWERVP(ap->a_vp));
+
+ if (dolock)
+ VOP_LOCK(vp);
+ else
+ FIXUP(VTOUNION(ap->a_vp));
+ error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
+ if (dolock)
+ VOP_UNLOCK(vp);
+
+ return (error);
+}
+
+int
+union_write(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ int error;
+ struct vnode *vp = OTHERVP(ap->a_vp);
+ int dolock = (vp == LOWERVP(ap->a_vp));
+
+ if (dolock)
+ VOP_LOCK(vp);
+ else
+ FIXUP(VTOUNION(ap->a_vp));
+ error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred);
+ if (dolock)
+ VOP_UNLOCK(vp);
+
+ return (error);
+}
+
+int
+union_ioctl(ap)
+ struct vop_ioctl_args /* {
+ struct vnode *a_vp;
+ int a_command;
+ caddr_t a_data;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ return (VOP_IOCTL(OTHERVP(ap->a_vp), ap->a_command, ap->a_data,
+ ap->a_fflag, ap->a_cred, ap->a_p));
+}
+
+int
+union_select(ap)
+ struct vop_select_args /* {
+ struct vnode *a_vp;
+ int a_which;
+ int a_fflags;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ return (VOP_SELECT(OTHERVP(ap->a_vp), ap->a_which, ap->a_fflags,
+ ap->a_cred, ap->a_p));
+}
+
+int
+union_mmap(ap)
+ struct vop_mmap_args /* {
+ struct vnode *a_vp;
+ int a_fflags;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ return (VOP_MMAP(OTHERVP(ap->a_vp), ap->a_fflags,
+ ap->a_cred, ap->a_p));
+}
+
+int
+union_fsync(ap)
+ struct vop_fsync_args /* {
+ struct vnode *a_vp;
+ struct ucred *a_cred;
+ int a_waitfor;
+ struct proc *a_p;
+ } */ *ap;
+{
+ int error = 0;
+ struct vnode *targetvp = OTHERVP(ap->a_vp);
+
+ if (targetvp) {
+ int dolock = (targetvp == LOWERVP(ap->a_vp));
+
+ if (dolock)
+ VOP_LOCK(targetvp);
+ else
+ FIXUP(VTOUNION(ap->a_vp));
+ error = VOP_FSYNC(targetvp, ap->a_cred,
+ ap->a_waitfor, ap->a_p);
+ if (dolock)
+ VOP_UNLOCK(targetvp);
+ }
+
+ return (error);
+}
+
+int
+union_seek(ap)
+ struct vop_seek_args /* {
+ struct vnode *a_vp;
+ off_t a_oldoff;
+ off_t a_newoff;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+
+ return (VOP_SEEK(OTHERVP(ap->a_vp), ap->a_oldoff, ap->a_newoff, ap->a_cred));
+}
+
+int
+union_remove(ap)
+ struct vop_remove_args /* {
+ struct vnode *a_dvp;
+ struct vnode *a_vp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ int error;
+ struct union_node *dun = VTOUNION(ap->a_dvp);
+ struct union_node *un = VTOUNION(ap->a_vp);
+
+ if (dun->un_uppervp && un->un_uppervp) {
+ struct vnode *dvp = dun->un_uppervp;
+ struct vnode *vp = un->un_uppervp;
+
+ FIXUP(dun);
+ VREF(dvp);
+ dun->un_flags |= UN_KLOCK;
+ vput(ap->a_dvp);
+ FIXUP(un);
+ VREF(vp);
+ un->un_flags |= UN_KLOCK;
+ vput(ap->a_vp);
+
+ error = VOP_REMOVE(dvp, vp, ap->a_cnp);
+ if (!error)
+ union_removed_upper(un);
+
+ /*
+ * XXX: should create a whiteout here
+ */
+ } else {
+ /*
+ * XXX: should create a whiteout here
+ */
+ vput(ap->a_dvp);
+ vput(ap->a_vp);
+ error = EROFS;
+ }
+
+ return (error);
+}
+
+int
+union_link(ap)
+ struct vop_link_args /* {
+ struct vnode *a_vp;
+ struct vnode *a_tdvp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ int error;
+ struct union_node *dun = VTOUNION(ap->a_vp);
+ struct union_node *un = VTOUNION(ap->a_tdvp);
+
+ if (dun->un_uppervp && un->un_uppervp) {
+ struct vnode *dvp = dun->un_uppervp;
+ struct vnode *vp = un->un_uppervp;
+
+ FIXUP(dun);
+ VREF(dvp);
+ dun->un_flags |= UN_KLOCK;
+ vput(ap->a_vp);
+ FIXUP(un);
+ VREF(vp);
+ vrele(ap->a_tdvp);
+
+ error = VOP_LINK(dvp, vp, ap->a_cnp);
+ } else {
+ /*
+ * XXX: need to copy to upper layer
+ * and do the link there.
+ */
+ vput(ap->a_vp);
+ vrele(ap->a_tdvp);
+ error = EROFS;
+ }
+
+ return (error);
+}
+
+int
+union_rename(ap)
+ struct vop_rename_args /* {
+ struct vnode *a_fdvp;
+ struct vnode *a_fvp;
+ struct componentname *a_fcnp;
+ struct vnode *a_tdvp;
+ struct vnode *a_tvp;
+ struct componentname *a_tcnp;
+ } */ *ap;
+{
+ int error;
+
+ struct vnode *fdvp = ap->a_fdvp;
+ struct vnode *fvp = ap->a_fvp;
+ struct vnode *tdvp = ap->a_tdvp;
+ struct vnode *tvp = ap->a_tvp;
+
+ if (fdvp->v_op == union_vnodeop_p) { /* always true */
+ struct union_node *un = VTOUNION(fdvp);
+ if (un->un_uppervp == NULLVP) {
+ error = EROFS;
+ goto bad;
+ }
+
+ FIXUP(un);
+ fdvp = un->un_uppervp;
+ VREF(fdvp);
+ vrele(ap->a_fdvp);
+ }
+
+ if (fvp->v_op == union_vnodeop_p) { /* always true */
+ struct union_node *un = VTOUNION(fvp);
+ if (un->un_uppervp == NULLVP) {
+ error = EROFS;
+ goto bad;
+ }
+
+ FIXUP(un);
+ fvp = un->un_uppervp;
+ VREF(fvp);
+ vrele(ap->a_fvp);
+ }
+
+ if (tdvp->v_op == union_vnodeop_p) {
+ struct union_node *un = VTOUNION(tdvp);
+ if (un->un_uppervp == NULLVP) {
+ error = EROFS;
+ goto bad;
+ }
+
+ tdvp = un->un_uppervp;
+ VREF(tdvp);
+ un->un_flags |= UN_KLOCK;
+ vput(ap->a_tdvp);
+ }
+
+ if (tvp && tvp->v_op == union_vnodeop_p) {
+ struct union_node *un = VTOUNION(tvp);
+ if (un->un_uppervp == NULLVP) {
+ error = EROFS;
+ goto bad;
+ }
+
+ tvp = un->un_uppervp;
+ VREF(tvp);
+ un->un_flags |= UN_KLOCK;
+ vput(ap->a_tvp);
+ }
+
+ return (VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp));
+
+bad:
+ vrele(fdvp);
+ vrele(fvp);
+ vput(tdvp);
+ if (tvp)
+ vput(tvp);
+
+ return (error);
+}
+
+int
+union_mkdir(ap)
+ struct vop_mkdir_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ } */ *ap;
+{
+ struct union_node *un = VTOUNION(ap->a_dvp);
+ struct vnode *dvp = un->un_uppervp;
+
+ if (dvp) {
+ int error;
+ struct vnode *vp;
+
+ FIXUP(un);
+ VREF(dvp);
+ un->un_flags |= UN_KLOCK;
+ vput(ap->a_dvp);
+ error = VOP_MKDIR(dvp, &vp, ap->a_cnp, ap->a_vap);
+ if (error)
+ return (error);
+
+ error = union_allocvp(
+ ap->a_vpp,
+ ap->a_dvp->v_mount,
+ ap->a_dvp,
+ NULLVP,
+ ap->a_cnp,
+ vp,
+ NULLVP);
+ if (error)
+ vput(vp);
+ return (error);
+ }
+
+ vput(ap->a_dvp);
+ return (EROFS);
+}
+
+int
+union_rmdir(ap)
+ struct vop_rmdir_args /* {
+ struct vnode *a_dvp;
+ struct vnode *a_vp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ int error;
+ struct union_node *dun = VTOUNION(ap->a_dvp);
+ struct union_node *un = VTOUNION(ap->a_vp);
+
+ if (dun->un_uppervp && un->un_uppervp) {
+ struct vnode *dvp = dun->un_uppervp;
+ struct vnode *vp = un->un_uppervp;
+
+ FIXUP(dun);
+ VREF(dvp);
+ dun->un_flags |= UN_KLOCK;
+ vput(ap->a_dvp);
+ FIXUP(un);
+ VREF(vp);
+ un->un_flags |= UN_KLOCK;
+ vput(ap->a_vp);
+
+ error = VOP_RMDIR(dvp, vp, ap->a_cnp);
+ if (!error)
+ union_removed_upper(un);
+
+ /*
+ * XXX: should create a whiteout here
+ */
+ } else {
+ /*
+ * XXX: should create a whiteout here
+ */
+ vput(ap->a_dvp);
+ vput(ap->a_vp);
+ error = EROFS;
+ }
+
+ return (error);
+}
+
+int
+union_symlink(ap)
+ struct vop_symlink_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ char *a_target;
+ } */ *ap;
+{
+ struct union_node *un = VTOUNION(ap->a_dvp);
+ struct vnode *dvp = un->un_uppervp;
+
+ if (dvp) {
+ int error;
+ struct vnode *vp;
+ struct mount *mp = ap->a_dvp->v_mount;
+
+ FIXUP(un);
+ VREF(dvp);
+ un->un_flags |= UN_KLOCK;
+ vput(ap->a_dvp);
+ error = VOP_SYMLINK(dvp, &vp, ap->a_cnp,
+ ap->a_vap, ap->a_target);
+ *ap->a_vpp = NULLVP;
+ return (error);
+ }
+
+ vput(ap->a_dvp);
+ return (EROFS);
+}
+
+/*
+ * union_readdir works in concert with getdirentries and
+ * readdir(3) to provide a list of entries in the unioned
+ * directories. getdirentries is responsible for walking
+ * down the union stack. readdir(3) is responsible for
+ * eliminating duplicate names from the returned data stream.
+ */
+int
+union_readdir(ap)
+ struct vop_readdir_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ int error = 0;
+ struct union_node *un = VTOUNION(ap->a_vp);
+
+ if (un->un_uppervp) {
+ FIXUP(un);
+ error = VOP_READDIR(un->un_uppervp, ap->a_uio, ap->a_cred);
+ }
+
+ return (error);
+}
+
+int
+union_readlink(ap)
+ struct vop_readlink_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ int error;
+ struct vnode *vp = OTHERVP(ap->a_vp);
+ int dolock = (vp == LOWERVP(ap->a_vp));
+
+ if (dolock)
+ VOP_LOCK(vp);
+ else
+ FIXUP(VTOUNION(ap->a_vp));
+ error = VOP_READLINK(vp, ap->a_uio, ap->a_cred);
+ if (dolock)
+ VOP_UNLOCK(vp);
+
+ return (error);
+}
+
+int
+union_abortop(ap)
+ struct vop_abortop_args /* {
+ struct vnode *a_dvp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ int error;
+ struct vnode *vp = OTHERVP(ap->a_dvp);
+ struct union_node *un = VTOUNION(ap->a_dvp);
+ int islocked = un->un_flags & UN_LOCKED;
+ int dolock = (vp == LOWERVP(ap->a_dvp));
+
+ if (islocked) {
+ if (dolock)
+ VOP_LOCK(vp);
+ else
+ FIXUP(VTOUNION(ap->a_dvp));
+ }
+ error = VOP_ABORTOP(vp, ap->a_cnp);
+ if (islocked && dolock)
+ VOP_UNLOCK(vp);
+
+ return (error);
+}
+
+int
+union_inactive(ap)
+ struct vop_inactive_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ /*
+ * Do nothing (and _don't_ bypass).
+ * Wait to vrele lowervp until reclaim,
+ * so that until then our union_node is in the
+ * cache and reusable.
+ *
+ * NEEDSWORK: Someday, consider inactive'ing
+ * the lowervp and then trying to reactivate it
+ * with capabilities (v_id)
+ * like they do in the name lookup cache code.
+ * That's too much work for now.
+ */
+
+#ifdef UNION_DIAGNOSTIC
+ struct union_node *un = VTOUNION(ap->a_vp);
+
+ if (un->un_flags & UN_LOCKED)
+ panic("union: inactivating locked node");
+#endif
+
+ return (0);
+}
+
+int
+union_reclaim(ap)
+ struct vop_reclaim_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ union_freevp(ap->a_vp);
+
+ return (0);
+}
+
+int
+union_lock(ap)
+ struct vop_lock_args *ap;
+{
+ struct vnode *vp = ap->a_vp;
+ struct union_node *un;
+
+start:
+ while (vp->v_flag & VXLOCK) {
+ vp->v_flag |= VXWANT;
+ sleep((caddr_t)vp, PINOD);
+ }
+
+ un = VTOUNION(vp);
+
+ if (un->un_uppervp) {
+ if ((un->un_flags & UN_ULOCK) == 0) {
+ un->un_flags |= UN_ULOCK;
+ VOP_LOCK(un->un_uppervp);
+ }
+#ifdef DIAGNOSTIC
+ if (un->un_flags & UN_KLOCK)
+ panic("union: dangling upper lock");
+#endif
+ }
+
+ if (un->un_flags & UN_LOCKED) {
+#ifdef DIAGNOSTIC
+ if (curproc && un->un_pid == curproc->p_pid &&
+ un->un_pid > -1 && curproc->p_pid > -1)
+ panic("union: locking against myself");
+#endif
+ un->un_flags |= UN_WANT;
+ sleep((caddr_t) &un->un_flags, PINOD);
+ goto start;
+ }
+
+#ifdef DIAGNOSTIC
+ if (curproc)
+ un->un_pid = curproc->p_pid;
+ else
+ un->un_pid = -1;
+#endif
+
+ un->un_flags |= UN_LOCKED;
+ return (0);
+}
+
+int
+union_unlock(ap)
+ struct vop_lock_args *ap;
+{
+ struct union_node *un = VTOUNION(ap->a_vp);
+
+#ifdef DIAGNOSTIC
+ if ((un->un_flags & UN_LOCKED) == 0)
+ panic("union: unlock unlocked node");
+ if (curproc && un->un_pid != curproc->p_pid &&
+ curproc->p_pid > -1 && un->un_pid > -1)
+ panic("union: unlocking other process's union node");
+#endif
+
+ un->un_flags &= ~UN_LOCKED;
+
+ if ((un->un_flags & (UN_ULOCK|UN_KLOCK)) == UN_ULOCK)
+ VOP_UNLOCK(un->un_uppervp);
+
+ un->un_flags &= ~(UN_ULOCK|UN_KLOCK);
+
+ if (un->un_flags & UN_WANT) {
+ un->un_flags &= ~UN_WANT;
+ wakeup((caddr_t) &un->un_flags);
+ }
+
+#ifdef DIAGNOSTIC
+ un->un_pid = 0;
+#endif
+
+ return (0);
+}
+
+int
+union_bmap(ap)
+ struct vop_bmap_args /* {
+ struct vnode *a_vp;
+ daddr_t a_bn;
+ struct vnode **a_vpp;
+ daddr_t *a_bnp;
+ int *a_runp;
+ } */ *ap;
+{
+ int error;
+ struct vnode *vp = OTHERVP(ap->a_vp);
+ int dolock = (vp == LOWERVP(ap->a_vp));
+
+ if (dolock)
+ VOP_LOCK(vp);
+ else
+ FIXUP(VTOUNION(ap->a_vp));
+ error = VOP_BMAP(vp, ap->a_bn, ap->a_vpp, ap->a_bnp, ap->a_runp);
+ if (dolock)
+ VOP_UNLOCK(vp);
+
+ return (error);
+}
+
+int
+union_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ struct vnode *vp = ap->a_vp;
+
+ printf("\ttag VT_UNION, vp=%x, uppervp=%x, lowervp=%x\n",
+ vp, UPPERVP(vp), LOWERVP(vp));
+ return (0);
+}
+
+int
+union_islocked(ap)
+ struct vop_islocked_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ return ((VTOUNION(ap->a_vp)->un_flags & UN_LOCKED) ? 1 : 0);
+}
+
+int
+union_pathconf(ap)
+ struct vop_pathconf_args /* {
+ struct vnode *a_vp;
+ int a_name;
+ int *a_retval;
+ } */ *ap;
+{
+ int error;
+ struct vnode *vp = OTHERVP(ap->a_vp);
+ int dolock = (vp == LOWERVP(ap->a_vp));
+
+ if (dolock)
+ VOP_LOCK(vp);
+ else
+ FIXUP(VTOUNION(ap->a_vp));
+ error = VOP_PATHCONF(vp, ap->a_name, ap->a_retval);
+ if (dolock)
+ VOP_UNLOCK(vp);
+
+ return (error);
+}
+
+int
+union_advlock(ap)
+ struct vop_advlock_args /* {
+ struct vnode *a_vp;
+ caddr_t a_id;
+ int a_op;
+ struct flock *a_fl;
+ int a_flags;
+ } */ *ap;
+{
+
+ return (VOP_ADVLOCK(OTHERVP(ap->a_vp), ap->a_id, ap->a_op,
+ ap->a_fl, ap->a_flags));
+}
+
+
+/*
+ * XXX - vop_strategy must be hand coded because it has no
+ * vnode in its arguments.
+ * This goes away with a merged VM/buffer cache.
+ */
+int
+union_strategy(ap)
+ struct vop_strategy_args /* {
+ struct buf *a_bp;
+ } */ *ap;
+{
+ struct buf *bp = ap->a_bp;
+ int error;
+ struct vnode *savedvp;
+
+ savedvp = bp->b_vp;
+ bp->b_vp = OTHERVP(bp->b_vp);
+
+#ifdef DIAGNOSTIC
+ if (bp->b_vp == NULLVP)
+ panic("union_strategy: nil vp");
+ if (((bp->b_flags & B_READ) == 0) &&
+ (bp->b_vp == LOWERVP(savedvp)))
+ panic("union_strategy: writing to lowervp");
+#endif
+
+ error = VOP_STRATEGY(bp);
+ bp->b_vp = savedvp;
+
+ return (error);
+}
+
+/*
+ * Global vfs data structures
+ */
+int (**union_vnodeop_p)();
+struct vnodeopv_entry_desc union_vnodeop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, union_lookup }, /* lookup */
+ { &vop_create_desc, union_create }, /* create */
+ { &vop_mknod_desc, union_mknod }, /* mknod */
+ { &vop_open_desc, union_open }, /* open */
+ { &vop_close_desc, union_close }, /* close */
+ { &vop_access_desc, union_access }, /* access */
+ { &vop_getattr_desc, union_getattr }, /* getattr */
+ { &vop_setattr_desc, union_setattr }, /* setattr */
+ { &vop_read_desc, union_read }, /* read */
+ { &vop_write_desc, union_write }, /* write */
+ { &vop_ioctl_desc, union_ioctl }, /* ioctl */
+ { &vop_select_desc, union_select }, /* select */
+ { &vop_mmap_desc, union_mmap }, /* mmap */
+ { &vop_fsync_desc, union_fsync }, /* fsync */
+ { &vop_seek_desc, union_seek }, /* seek */
+ { &vop_remove_desc, union_remove }, /* remove */
+ { &vop_link_desc, union_link }, /* link */
+ { &vop_rename_desc, union_rename }, /* rename */
+ { &vop_mkdir_desc, union_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, union_rmdir }, /* rmdir */
+ { &vop_symlink_desc, union_symlink }, /* symlink */
+ { &vop_readdir_desc, union_readdir }, /* readdir */
+ { &vop_readlink_desc, union_readlink }, /* readlink */
+ { &vop_abortop_desc, union_abortop }, /* abortop */
+ { &vop_inactive_desc, union_inactive }, /* inactive */
+ { &vop_reclaim_desc, union_reclaim }, /* reclaim */
+ { &vop_lock_desc, union_lock }, /* lock */
+ { &vop_unlock_desc, union_unlock }, /* unlock */
+ { &vop_bmap_desc, union_bmap }, /* bmap */
+ { &vop_strategy_desc, union_strategy }, /* strategy */
+ { &vop_print_desc, union_print }, /* print */
+ { &vop_islocked_desc, union_islocked }, /* islocked */
+ { &vop_pathconf_desc, union_pathconf }, /* pathconf */
+ { &vop_advlock_desc, union_advlock }, /* advlock */
+#ifdef notdef
+ { &vop_blkatoff_desc, union_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, union_valloc }, /* valloc */
+ { &vop_vfree_desc, union_vfree }, /* vfree */
+ { &vop_truncate_desc, union_truncate }, /* truncate */
+ { &vop_update_desc, union_update }, /* update */
+ { &vop_bwrite_desc, union_bwrite }, /* bwrite */
+#endif
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc union_vnodeop_opv_desc =
+ { &union_vnodeop_p, union_vnodeop_entries };
diff --git a/sys/gnu/ext2fs/ext2_bmap.c b/sys/gnu/ext2fs/ext2_bmap.c
new file mode 100644
index 0000000..bcd838d
--- /dev/null
+++ b/sys/gnu/ext2fs/ext2_bmap.c
@@ -0,0 +1,294 @@
+/*
+ * Copyright (c) 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ufs_bmap.c 8.6 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/resourcevar.h>
+#include <sys/trace.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+/*
+ * Bmap converts a the logical block number of a file to its physical block
+ * number on the disk. The conversion is done by using the logical block
+ * number to index into the array of block pointers described by the dinode.
+ */
+int
+ufs_bmap(ap)
+ struct vop_bmap_args /* {
+ struct vnode *a_vp;
+ daddr_t a_bn;
+ struct vnode **a_vpp;
+ daddr_t *a_bnp;
+ int *a_runp;
+ } */ *ap;
+{
+ /*
+ * Check for underlying vnode requests and ensure that logical
+ * to physical mapping is requested.
+ */
+ if (ap->a_vpp != NULL)
+ *ap->a_vpp = VTOI(ap->a_vp)->i_devvp;
+ if (ap->a_bnp == NULL)
+ return (0);
+
+ return (ufs_bmaparray(ap->a_vp, ap->a_bn, ap->a_bnp, NULL, NULL,
+ ap->a_runp));
+}
+
+/*
+ * Indirect blocks are now on the vnode for the file. They are given negative
+ * logical block numbers. Indirect blocks are addressed by the negative
+ * address of the first data block to which they point. Double indirect blocks
+ * are addressed by one less than the address of the first indirect block to
+ * which they point. Triple indirect blocks are addressed by one less than
+ * the address of the first double indirect block to which they point.
+ *
+ * ufs_bmaparray does the bmap conversion, and if requested returns the
+ * array of logical blocks which must be traversed to get to a block.
+ * Each entry contains the offset into that block that gets you to the
+ * next block and the disk address of the block (if it is assigned).
+ */
+
+int
+ufs_bmaparray(vp, bn, bnp, ap, nump, runp)
+ struct vnode *vp;
+ register daddr_t bn;
+ daddr_t *bnp;
+ struct indir *ap;
+ int *nump;
+ int *runp;
+{
+ register struct inode *ip;
+ struct buf *bp;
+ struct ufsmount *ump;
+ struct mount *mp;
+ struct vnode *devvp;
+ struct indir a[NIADDR], *xap;
+ daddr_t daddr;
+ long metalbn;
+ int error, maxrun, num;
+
+ ip = VTOI(vp);
+ mp = vp->v_mount;
+ ump = VFSTOUFS(mp);
+#ifdef DIAGNOSTIC
+ if (ap != NULL && nump == NULL || ap == NULL && nump != NULL)
+ panic("ufs_bmaparray: invalid arguments");
+#endif
+
+ if (runp) {
+ /*
+ * XXX
+ * If MAXBSIZE is the largest transfer the disks can handle,
+ * we probably want maxrun to be 1 block less so that we
+ * don't create a block larger than the device can handle.
+ */
+ *runp = 0;
+ maxrun = MAXBSIZE / mp->mnt_stat.f_iosize - 1;
+ }
+
+ xap = ap == NULL ? a : ap;
+ if (!nump)
+ nump = &num;
+ if (error = ufs_getlbns(vp, bn, xap, nump))
+ return (error);
+
+ num = *nump;
+ if (num == 0) {
+ *bnp = blkptrtodb(ump, ip->i_db[bn]);
+ if (*bnp == 0)
+ *bnp = -1;
+ else if (runp)
+ for (++bn; bn < NDADDR && *runp < maxrun &&
+ is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]);
+ ++bn, ++*runp);
+ return (0);
+ }
+
+
+ /* Get disk address out of indirect block array */
+ daddr = ip->i_ib[xap->in_off];
+
+ devvp = VFSTOUFS(vp->v_mount)->um_devvp;
+ for (bp = NULL, ++xap; --num; ++xap) {
+ /*
+ * Exit the loop if there is no disk address assigned yet and
+ * the indirect block isn't in the cache, or if we were
+ * looking for an indirect block and we've found it.
+ */
+
+ metalbn = xap->in_lbn;
+ if (daddr == 0 && !incore(vp, metalbn) || metalbn == bn)
+ break;
+ /*
+ * If we get here, we've either got the block in the cache
+ * or we have a disk address for it, go fetch it.
+ */
+ if (bp)
+ brelse(bp);
+
+ xap->in_exists = 1;
+ bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0);
+ if (bp->b_flags & (B_DONE | B_DELWRI)) {
+ trace(TR_BREADHIT, pack(vp, size), metalbn);
+ }
+#ifdef DIAGNOSTIC
+ else if (!daddr)
+ panic("ufs_bmaparry: indirect block not in cache");
+#endif
+ else {
+ trace(TR_BREADMISS, pack(vp, size), metalbn);
+ bp->b_blkno = blkptrtodb(ump, daddr);
+ bp->b_flags |= B_READ;
+ VOP_STRATEGY(bp);
+ curproc->p_stats->p_ru.ru_inblock++; /* XXX */
+ if (error = biowait(bp)) {
+ brelse(bp);
+ return (error);
+ }
+ }
+
+ daddr = ((daddr_t *)bp->b_data)[xap->in_off];
+ if (num == 1 && daddr && runp)
+ for (bn = xap->in_off + 1;
+ bn < MNINDIR(ump) && *runp < maxrun &&
+ is_sequential(ump, ((daddr_t *)bp->b_data)[bn - 1],
+ ((daddr_t *)bp->b_data)[bn]);
+ ++bn, ++*runp);
+ }
+ if (bp)
+ brelse(bp);
+
+ daddr = blkptrtodb(ump, daddr);
+ *bnp = daddr == 0 ? -1 : daddr;
+ return (0);
+}
+
+/*
+ * Create an array of logical block number/offset pairs which represent the
+ * path of indirect blocks required to access a data block. The first "pair"
+ * contains the logical block number of the appropriate single, double or
+ * triple indirect block and the offset into the inode indirect block array.
+ * Note, the logical block number of the inode single/double/triple indirect
+ * block appears twice in the array, once with the offset into the i_ib and
+ * once with the offset into the page itself.
+ */
+int
+ufs_getlbns(vp, bn, ap, nump)
+ struct vnode *vp;
+ register daddr_t bn;
+ struct indir *ap;
+ int *nump;
+{
+ long metalbn, realbn;
+ struct ufsmount *ump;
+ int blockcnt, i, numlevels, off;
+
+ ump = VFSTOUFS(vp->v_mount);
+ if (nump)
+ *nump = 0;
+ numlevels = 0;
+ realbn = bn;
+ if ((long)bn < 0)
+ bn = -(long)bn;
+
+ /* The first NDADDR blocks are direct blocks. */
+ if (bn < NDADDR)
+ return (0);
+
+ /*
+ * Determine the number of levels of indirection. After this loop
+ * is done, blockcnt indicates the number of data blocks possible
+ * at the given level of indirection, and NIADDR - i is the number
+ * of levels of indirection needed to locate the requested block.
+ */
+ for (blockcnt = 1, i = NIADDR, bn -= NDADDR;; i--, bn -= blockcnt) {
+ if (i == 0)
+ return (EFBIG);
+ blockcnt *= MNINDIR(ump);
+ if (bn < blockcnt)
+ break;
+ }
+
+ /* Calculate the address of the first meta-block. */
+ if (realbn >= 0)
+ metalbn = -(realbn - bn + NIADDR - i);
+ else
+ metalbn = -(-realbn - bn + NIADDR - i);
+
+ /*
+ * At each iteration, off is the offset into the bap array which is
+ * an array of disk addresses at the current level of indirection.
+ * The logical block number and the offset in that block are stored
+ * into the argument array.
+ */
+ ap->in_lbn = metalbn;
+ ap->in_off = off = NIADDR - i;
+ ap->in_exists = 0;
+ ap++;
+ for (++numlevels; i <= NIADDR; i++) {
+ /* If searching for a meta-data block, quit when found. */
+ if (metalbn == realbn)
+ break;
+
+ blockcnt /= MNINDIR(ump);
+ off = (bn / blockcnt) % MNINDIR(ump);
+
+ ++numlevels;
+ ap->in_lbn = metalbn;
+ ap->in_off = off;
+ ap->in_exists = 0;
+ ++ap;
+
+ metalbn -= -1 + off * blockcnt;
+ }
+ if (nump)
+ *nump = numlevels;
+ return (0);
+}
diff --git a/sys/gnu/ext2fs/ext2_ihash.c b/sys/gnu/ext2fs/ext2_ihash.c
new file mode 100644
index 0000000..4a37c90
--- /dev/null
+++ b/sys/gnu/ext2fs/ext2_ihash.c
@@ -0,0 +1,154 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ufs_ihash.c 8.4 (Berkeley) 12/30/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/vnode.h>
+#include <sys/malloc.h>
+#include <sys/proc.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufs_extern.h>
+
+/*
+ * Structures associated with inode cacheing.
+ */
+struct inode **ihashtbl;
+u_long ihash; /* size of hash table - 1 */
+#define INOHASH(device, inum) (((device) + (inum)) & ihash)
+
+/*
+ * Initialize inode hash table.
+ */
+void
+ufs_ihashinit()
+{
+
+ ihashtbl = hashinit(desiredvnodes, M_UFSMNT, &ihash);
+}
+
+/*
+ * Use the device/inum pair to find the incore inode, and return a pointer
+ * to it. If it is in core, return it, even if it is locked.
+ */
+struct vnode *
+ufs_ihashlookup(device, inum)
+ dev_t device;
+ ino_t inum;
+{
+ register struct inode *ip;
+
+ for (ip = ihashtbl[INOHASH(device, inum)];; ip = ip->i_next) {
+ if (ip == NULL)
+ return (NULL);
+ if (inum == ip->i_number && device == ip->i_dev)
+ return (ITOV(ip));
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Use the device/inum pair to find the incore inode, and return a pointer
+ * to it. If it is in core, but locked, wait for it.
+ */
+struct vnode *
+ufs_ihashget(device, inum)
+ dev_t device;
+ ino_t inum;
+{
+ register struct inode *ip;
+ struct vnode *vp;
+
+ for (;;)
+ for (ip = ihashtbl[INOHASH(device, inum)];; ip = ip->i_next) {
+ if (ip == NULL)
+ return (NULL);
+ if (inum == ip->i_number && device == ip->i_dev) {
+ if (ip->i_flag & IN_LOCKED) {
+ ip->i_flag |= IN_WANTED;
+ sleep(ip, PINOD);
+ break;
+ }
+ vp = ITOV(ip);
+ if (!vget(vp, 1))
+ return (vp);
+ break;
+ }
+ }
+ /* NOTREACHED */
+}
+
+/*
+ * Insert the inode into the hash table, and return it locked.
+ */
+void
+ufs_ihashins(ip)
+ struct inode *ip;
+{
+ struct inode **ipp, *iq;
+
+ ipp = &ihashtbl[INOHASH(ip->i_dev, ip->i_number)];
+ if (iq = *ipp)
+ iq->i_prev = &ip->i_next;
+ ip->i_next = iq;
+ ip->i_prev = ipp;
+ *ipp = ip;
+ if (ip->i_flag & IN_LOCKED)
+ panic("ufs_ihashins: already locked");
+ if (curproc)
+ ip->i_lockholder = curproc->p_pid;
+ else
+ ip->i_lockholder = -1;
+ ip->i_flag |= IN_LOCKED;
+}
+
+/*
+ * Remove the inode from the hash table.
+ */
+void
+ufs_ihashrem(ip)
+ register struct inode *ip;
+{
+ register struct inode *iq;
+
+ if (iq = ip->i_next)
+ iq->i_prev = ip->i_prev;
+ *ip->i_prev = iq;
+#ifdef DIAGNOSTIC
+ ip->i_next = NULL;
+ ip->i_prev = NULL;
+#endif
+}
diff --git a/sys/gnu/ext2fs/ext2_mount.h b/sys/gnu/ext2fs/ext2_mount.h
new file mode 100644
index 0000000..237871f
--- /dev/null
+++ b/sys/gnu/ext2fs/ext2_mount.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ufsmount.h 8.2 (Berkeley) 1/12/94
+ */
+
+struct buf;
+struct inode;
+struct nameidata;
+struct timeval;
+struct ucred;
+struct uio;
+struct vnode;
+struct netexport;
+
+/* This structure describes the UFS specific mount structure data. */
+struct ufsmount {
+ struct mount *um_mountp; /* filesystem vfs structure */
+ dev_t um_dev; /* device mounted */
+ struct vnode *um_devvp; /* block device mounted vnode */
+ union { /* pointer to superblock */
+ struct lfs *lfs; /* LFS */
+ struct fs *fs; /* FFS */
+ } ufsmount_u;
+#define um_fs ufsmount_u.fs
+#define um_lfs ufsmount_u.lfs
+ struct vnode *um_quotas[MAXQUOTAS]; /* pointer to quota files */
+ struct ucred *um_cred[MAXQUOTAS]; /* quota file access cred */
+ u_long um_nindir; /* indirect ptrs per block */
+ u_long um_bptrtodb; /* indir ptr to disk block */
+ u_long um_seqinc; /* inc between seq blocks */
+ time_t um_btime[MAXQUOTAS]; /* block quota time limit */
+ time_t um_itime[MAXQUOTAS]; /* inode quota time limit */
+ char um_qflags[MAXQUOTAS]; /* quota specific flags */
+ struct netexport um_export; /* export information */
+};
+/*
+ * Flags describing the state of quotas.
+ */
+#define QTF_OPENING 0x01 /* Q_QUOTAON in progress */
+#define QTF_CLOSING 0x02 /* Q_QUOTAOFF in progress */
+
+/* Convert mount ptr to ufsmount ptr. */
+#define VFSTOUFS(mp) ((struct ufsmount *)((mp)->mnt_data))
+
+/*
+ * Macros to access file system parameters in the ufsmount structure.
+ * Used by ufs_bmap.
+ */
+#define blkptrtodb(ump, b) ((b) << (ump)->um_bptrtodb)
+#define is_sequential(ump, a, b) ((b) == (a) + ump->um_seqinc)
+#define MNINDIR(ump) ((ump)->um_nindir)
+
+
diff --git a/sys/gnu/ext2fs/inode.h b/sys/gnu/ext2fs/inode.h
new file mode 100644
index 0000000..df15596
--- /dev/null
+++ b/sys/gnu/ext2fs/inode.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 1982, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)inode.h 8.4 (Berkeley) 1/21/94
+ */
+
+#include <ufs/ufs/dinode.h>
+
+/*
+ * Theoretically, directories can be more than 2Gb in length, however, in
+ * practice this seems unlikely. So, we define the type doff_t as a long
+ * to keep down the cost of doing lookup on a 32-bit machine. If you are
+ * porting to a 64-bit architecture, you should make doff_t the same as off_t.
+ */
+#define doff_t long
+
+/*
+ * The inode is used to describe each active (or recently active)
+ * file in the UFS filesystem. It is composed of two types of
+ * information. The first part is the information that is needed
+ * only while the file is active (such as the identity of the file
+ * and linkage to speed its lookup). The second part is the
+ * permannent meta-data associated with the file which is read
+ * in from the permanent dinode from long term storage when the
+ * file becomes active, and is put back when the file is no longer
+ * being used.
+ */
+struct inode {
+ struct inode *i_next; /* Hash chain forward. */
+ struct inode **i_prev; /* Hash chain back. */
+ struct vnode *i_vnode; /* Vnode associated with this inode. */
+ struct vnode *i_devvp; /* Vnode for block I/O. */
+ u_long i_flag; /* I* flags. */
+ dev_t i_dev; /* Device associated with the inode. */
+ ino_t i_number; /* The identity of the inode. */
+ union { /* Associated filesystem. */
+ struct fs *fs; /* FFS */
+ struct lfs *lfs; /* LFS */
+ } inode_u;
+#define i_fs inode_u.fs
+#define i_lfs inode_u.lfs
+ struct dquot *i_dquot[MAXQUOTAS]; /* Dquot structures. */
+ u_quad_t i_modrev; /* Revision level for lease. */
+ struct lockf *i_lockf; /* Head of byte-level lock list. */
+ pid_t i_lockholder; /* DEBUG: holder of inode lock. */
+ pid_t i_lockwaiter; /* DEBUG: latest blocked for inode lock. */
+ /*
+ * Side effects; used during directory lookup.
+ */
+ long i_count; /* Size of free slot in directory. */
+ doff_t i_endoff; /* End of useful stuff in directory. */
+ doff_t i_diroff; /* Offset in dir, where we found last entry. */
+ doff_t i_offset; /* Offset of free space in directory. */
+ ino_t i_ino; /* Inode number of found directory. */
+ u_long i_reclen; /* Size of found directory entry. */
+ long i_spare[11]; /* Spares to round up to 128 bytes. */
+ /*
+ * The on-disk dinode itself.
+ */
+ struct dinode i_din; /* 128 bytes of the on-disk dinode. */
+};
+
+#define i_atime i_din.di_atime
+#define i_blocks i_din.di_blocks
+#define i_ctime i_din.di_ctime
+#define i_db i_din.di_db
+#define i_flags i_din.di_flags
+#define i_gen i_din.di_gen
+#define i_gid i_din.di_gid
+#define i_ib i_din.di_ib
+#define i_mode i_din.di_mode
+#define i_mtime i_din.di_mtime
+#define i_nlink i_din.di_nlink
+#define i_rdev i_din.di_rdev
+#define i_shortlink i_din.di_shortlink
+#define i_size i_din.di_size
+#define i_uid i_din.di_uid
+
+/* These flags are kept in i_flag. */
+#define IN_ACCESS 0x0001 /* Access time update request. */
+#define IN_CHANGE 0x0002 /* Inode change time update request. */
+#define IN_EXLOCK 0x0004 /* File has exclusive lock. */
+#define IN_LOCKED 0x0008 /* Inode lock. */
+#define IN_LWAIT 0x0010 /* Process waiting on file lock. */
+#define IN_MODIFIED 0x0020 /* Inode has been modified. */
+#define IN_RENAME 0x0040 /* Inode is being renamed. */
+#define IN_SHLOCK 0x0080 /* File has shared lock. */
+#define IN_UPDATE 0x0100 /* Modification time update request. */
+#define IN_WANTED 0x0200 /* Inode is wanted by a process. */
+
+#ifdef KERNEL
+/*
+ * Structure used to pass around logical block paths generated by
+ * ufs_getlbns and used by truncate and bmap code.
+ */
+struct indir {
+ daddr_t in_lbn; /* Logical block number. */
+ int in_off; /* Offset in buffer. */
+ int in_exists; /* Flag if the block exists. */
+};
+
+/* Convert between inode pointers and vnode pointers. */
+#define VTOI(vp) ((struct inode *)(vp)->v_data)
+#define ITOV(ip) ((ip)->i_vnode)
+
+#define ITIMES(ip, t1, t2) { \
+ if ((ip)->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) { \
+ (ip)->i_flag |= IN_MODIFIED; \
+ if ((ip)->i_flag & IN_ACCESS) \
+ (ip)->i_atime.ts_sec = (t1)->tv_sec; \
+ if ((ip)->i_flag & IN_UPDATE) { \
+ (ip)->i_mtime.ts_sec = (t2)->tv_sec; \
+ (ip)->i_modrev++; \
+ } \
+ if ((ip)->i_flag & IN_CHANGE) \
+ (ip)->i_ctime.ts_sec = time.tv_sec; \
+ (ip)->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); \
+ } \
+}
+
+/* This overlays the fid structure (see mount.h). */
+struct ufid {
+ u_short ufid_len; /* Length of structure. */
+ u_short ufid_pad; /* Force long alignment. */
+ ino_t ufid_ino; /* File number (ino). */
+ long ufid_gen; /* Generation number. */
+};
+#endif /* KERNEL */
diff --git a/sys/gnu/fs/ext2fs/ext2_bmap.c b/sys/gnu/fs/ext2fs/ext2_bmap.c
new file mode 100644
index 0000000..bcd838d
--- /dev/null
+++ b/sys/gnu/fs/ext2fs/ext2_bmap.c
@@ -0,0 +1,294 @@
+/*
+ * Copyright (c) 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ufs_bmap.c 8.6 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/buf.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/resourcevar.h>
+#include <sys/trace.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <ufs/ufs/quota.h>
+#include <ufs/ufs/inode.h>
+#include <ufs/ufs/ufsmount.h>
+#include <ufs/ufs/ufs_extern.h>
+
+/*
+ * Bmap converts a the logical block number of a file to its physical block
+ * number on the disk. The conversion is done by using the logical block
+ * number to index into the array of block pointers described by the dinode.
+ */
+int
+ufs_bmap(ap)
+ struct vop_bmap_args /* {
+ struct vnode *a_vp;
+ daddr_t a_bn;
+ struct vnode **a_vpp;
+ daddr_t *a_bnp;
+ int *a_runp;
+ } */ *ap;
+{
+ /*
+ * Check for underlying vnode requests and ensure that logical
+ * to physical mapping is requested.
+ */
+ if (ap->a_vpp != NULL)
+ *ap->a_vpp = VTOI(ap->a_vp)->i_devvp;
+ if (ap->a_bnp == NULL)
+ return (0);
+
+ return (ufs_bmaparray(ap->a_vp, ap->a_bn, ap->a_bnp, NULL, NULL,
+ ap->a_runp));
+}
+
+/*
+ * Indirect blocks are now on the vnode for the file. They are given negative
+ * logical block numbers. Indirect blocks are addressed by the negative
+ * address of the first data block to which they point. Double indirect blocks
+ * are addressed by one less than the address of the first indirect block to
+ * which they point. Triple indirect blocks are addressed by one less than
+ * the address of the first double indirect block to which they point.
+ *
+ * ufs_bmaparray does the bmap conversion, and if requested returns the
+ * array of logical blocks which must be traversed to get to a block.
+ * Each entry contains the offset into that block that gets you to the
+ * next block and the disk address of the block (if it is assigned).
+ */
+
+int
+ufs_bmaparray(vp, bn, bnp, ap, nump, runp)
+ struct vnode *vp;
+ register daddr_t bn;
+ daddr_t *bnp;
+ struct indir *ap;
+ int *nump;
+ int *runp;
+{
+ register struct inode *ip;
+ struct buf *bp;
+ struct ufsmount *ump;
+ struct mount *mp;
+ struct vnode *devvp;
+ struct indir a[NIADDR], *xap;
+ daddr_t daddr;
+ long metalbn;
+ int error, maxrun, num;
+
+ ip = VTOI(vp);
+ mp = vp->v_mount;
+ ump = VFSTOUFS(mp);
+#ifdef DIAGNOSTIC
+ if (ap != NULL && nump == NULL || ap == NULL && nump != NULL)
+ panic("ufs_bmaparray: invalid arguments");
+#endif
+
+ if (runp) {
+ /*
+ * XXX
+ * If MAXBSIZE is the largest transfer the disks can handle,
+ * we probably want maxrun to be 1 block less so that we
+ * don't create a block larger than the device can handle.
+ */
+ *runp = 0;
+ maxrun = MAXBSIZE / mp->mnt_stat.f_iosize - 1;
+ }
+
+ xap = ap == NULL ? a : ap;
+ if (!nump)
+ nump = &num;
+ if (error = ufs_getlbns(vp, bn, xap, nump))
+ return (error);
+
+ num = *nump;
+ if (num == 0) {
+ *bnp = blkptrtodb(ump, ip->i_db[bn]);
+ if (*bnp == 0)
+ *bnp = -1;
+ else if (runp)
+ for (++bn; bn < NDADDR && *runp < maxrun &&
+ is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]);
+ ++bn, ++*runp);
+ return (0);
+ }
+
+
+ /* Get disk address out of indirect block array */
+ daddr = ip->i_ib[xap->in_off];
+
+ devvp = VFSTOUFS(vp->v_mount)->um_devvp;
+ for (bp = NULL, ++xap; --num; ++xap) {
+ /*
+ * Exit the loop if there is no disk address assigned yet and
+ * the indirect block isn't in the cache, or if we were
+ * looking for an indirect block and we've found it.
+ */
+
+ metalbn = xap->in_lbn;
+ if (daddr == 0 && !incore(vp, metalbn) || metalbn == bn)
+ break;
+ /*
+ * If we get here, we've either got the block in the cache
+ * or we have a disk address for it, go fetch it.
+ */
+ if (bp)
+ brelse(bp);
+
+ xap->in_exists = 1;
+ bp = getblk(vp, metalbn, mp->mnt_stat.f_iosize, 0, 0);
+ if (bp->b_flags & (B_DONE | B_DELWRI)) {
+ trace(TR_BREADHIT, pack(vp, size), metalbn);
+ }
+#ifdef DIAGNOSTIC
+ else if (!daddr)
+ panic("ufs_bmaparry: indirect block not in cache");
+#endif
+ else {
+ trace(TR_BREADMISS, pack(vp, size), metalbn);
+ bp->b_blkno = blkptrtodb(ump, daddr);
+ bp->b_flags |= B_READ;
+ VOP_STRATEGY(bp);
+ curproc->p_stats->p_ru.ru_inblock++; /* XXX */
+ if (error = biowait(bp)) {
+ brelse(bp);
+ return (error);
+ }
+ }
+
+ daddr = ((daddr_t *)bp->b_data)[xap->in_off];
+ if (num == 1 && daddr && runp)
+ for (bn = xap->in_off + 1;
+ bn < MNINDIR(ump) && *runp < maxrun &&
+ is_sequential(ump, ((daddr_t *)bp->b_data)[bn - 1],
+ ((daddr_t *)bp->b_data)[bn]);
+ ++bn, ++*runp);
+ }
+ if (bp)
+ brelse(bp);
+
+ daddr = blkptrtodb(ump, daddr);
+ *bnp = daddr == 0 ? -1 : daddr;
+ return (0);
+}
+
+/*
+ * Create an array of logical block number/offset pairs which represent the
+ * path of indirect blocks required to access a data block. The first "pair"
+ * contains the logical block number of the appropriate single, double or
+ * triple indirect block and the offset into the inode indirect block array.
+ * Note, the logical block number of the inode single/double/triple indirect
+ * block appears twice in the array, once with the offset into the i_ib and
+ * once with the offset into the page itself.
+ */
+int
+ufs_getlbns(vp, bn, ap, nump)
+ struct vnode *vp;
+ register daddr_t bn;
+ struct indir *ap;
+ int *nump;
+{
+ long metalbn, realbn;
+ struct ufsmount *ump;
+ int blockcnt, i, numlevels, off;
+
+ ump = VFSTOUFS(vp->v_mount);
+ if (nump)
+ *nump = 0;
+ numlevels = 0;
+ realbn = bn;
+ if ((long)bn < 0)
+ bn = -(long)bn;
+
+ /* The first NDADDR blocks are direct blocks. */
+ if (bn < NDADDR)
+ return (0);
+
+ /*
+ * Determine the number of levels of indirection. After this loop
+ * is done, blockcnt indicates the number of data blocks possible
+ * at the given level of indirection, and NIADDR - i is the number
+ * of levels of indirection needed to locate the requested block.
+ */
+ for (blockcnt = 1, i = NIADDR, bn -= NDADDR;; i--, bn -= blockcnt) {
+ if (i == 0)
+ return (EFBIG);
+ blockcnt *= MNINDIR(ump);
+ if (bn < blockcnt)
+ break;
+ }
+
+ /* Calculate the address of the first meta-block. */
+ if (realbn >= 0)
+ metalbn = -(realbn - bn + NIADDR - i);
+ else
+ metalbn = -(-realbn - bn + NIADDR - i);
+
+ /*
+ * At each iteration, off is the offset into the bap array which is
+ * an array of disk addresses at the current level of indirection.
+ * The logical block number and the offset in that block are stored
+ * into the argument array.
+ */
+ ap->in_lbn = metalbn;
+ ap->in_off = off = NIADDR - i;
+ ap->in_exists = 0;
+ ap++;
+ for (++numlevels; i <= NIADDR; i++) {
+ /* If searching for a meta-data block, quit when found. */
+ if (metalbn == realbn)
+ break;
+
+ blockcnt /= MNINDIR(ump);
+ off = (bn / blockcnt) % MNINDIR(ump);
+
+ ++numlevels;
+ ap->in_lbn = metalbn;
+ ap->in_off = off;
+ ap->in_exists = 0;
+ ++ap;
+
+ metalbn -= -1 + off * blockcnt;
+ }
+ if (nump)
+ *nump = numlevels;
+ return (0);
+}
diff --git a/sys/gnu/fs/ext2fs/ext2_mount.h b/sys/gnu/fs/ext2fs/ext2_mount.h
new file mode 100644
index 0000000..237871f
--- /dev/null
+++ b/sys/gnu/fs/ext2fs/ext2_mount.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ufsmount.h 8.2 (Berkeley) 1/12/94
+ */
+
+struct buf;
+struct inode;
+struct nameidata;
+struct timeval;
+struct ucred;
+struct uio;
+struct vnode;
+struct netexport;
+
+/* This structure describes the UFS specific mount structure data. */
+struct ufsmount {
+ struct mount *um_mountp; /* filesystem vfs structure */
+ dev_t um_dev; /* device mounted */
+ struct vnode *um_devvp; /* block device mounted vnode */
+ union { /* pointer to superblock */
+ struct lfs *lfs; /* LFS */
+ struct fs *fs; /* FFS */
+ } ufsmount_u;
+#define um_fs ufsmount_u.fs
+#define um_lfs ufsmount_u.lfs
+ struct vnode *um_quotas[MAXQUOTAS]; /* pointer to quota files */
+ struct ucred *um_cred[MAXQUOTAS]; /* quota file access cred */
+ u_long um_nindir; /* indirect ptrs per block */
+ u_long um_bptrtodb; /* indir ptr to disk block */
+ u_long um_seqinc; /* inc between seq blocks */
+ time_t um_btime[MAXQUOTAS]; /* block quota time limit */
+ time_t um_itime[MAXQUOTAS]; /* inode quota time limit */
+ char um_qflags[MAXQUOTAS]; /* quota specific flags */
+ struct netexport um_export; /* export information */
+};
+/*
+ * Flags describing the state of quotas.
+ */
+#define QTF_OPENING 0x01 /* Q_QUOTAON in progress */
+#define QTF_CLOSING 0x02 /* Q_QUOTAOFF in progress */
+
+/* Convert mount ptr to ufsmount ptr. */
+#define VFSTOUFS(mp) ((struct ufsmount *)((mp)->mnt_data))
+
+/*
+ * Macros to access file system parameters in the ufsmount structure.
+ * Used by ufs_bmap.
+ */
+#define blkptrtodb(ump, b) ((b) << (ump)->um_bptrtodb)
+#define is_sequential(ump, a, b) ((b) == (a) + ump->um_seqinc)
+#define MNINDIR(ump) ((ump)->um_nindir)
+
+
diff --git a/sys/gnu/fs/ext2fs/inode.h b/sys/gnu/fs/ext2fs/inode.h
new file mode 100644
index 0000000..df15596
--- /dev/null
+++ b/sys/gnu/fs/ext2fs/inode.h
@@ -0,0 +1,162 @@
+/*
+ * Copyright (c) 1982, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)inode.h 8.4 (Berkeley) 1/21/94
+ */
+
+#include <ufs/ufs/dinode.h>
+
+/*
+ * Theoretically, directories can be more than 2Gb in length, however, in
+ * practice this seems unlikely. So, we define the type doff_t as a long
+ * to keep down the cost of doing lookup on a 32-bit machine. If you are
+ * porting to a 64-bit architecture, you should make doff_t the same as off_t.
+ */
+#define doff_t long
+
+/*
+ * The inode is used to describe each active (or recently active)
+ * file in the UFS filesystem. It is composed of two types of
+ * information. The first part is the information that is needed
+ * only while the file is active (such as the identity of the file
+ * and linkage to speed its lookup). The second part is the
+ * permannent meta-data associated with the file which is read
+ * in from the permanent dinode from long term storage when the
+ * file becomes active, and is put back when the file is no longer
+ * being used.
+ */
+struct inode {
+ struct inode *i_next; /* Hash chain forward. */
+ struct inode **i_prev; /* Hash chain back. */
+ struct vnode *i_vnode; /* Vnode associated with this inode. */
+ struct vnode *i_devvp; /* Vnode for block I/O. */
+ u_long i_flag; /* I* flags. */
+ dev_t i_dev; /* Device associated with the inode. */
+ ino_t i_number; /* The identity of the inode. */
+ union { /* Associated filesystem. */
+ struct fs *fs; /* FFS */
+ struct lfs *lfs; /* LFS */
+ } inode_u;
+#define i_fs inode_u.fs
+#define i_lfs inode_u.lfs
+ struct dquot *i_dquot[MAXQUOTAS]; /* Dquot structures. */
+ u_quad_t i_modrev; /* Revision level for lease. */
+ struct lockf *i_lockf; /* Head of byte-level lock list. */
+ pid_t i_lockholder; /* DEBUG: holder of inode lock. */
+ pid_t i_lockwaiter; /* DEBUG: latest blocked for inode lock. */
+ /*
+ * Side effects; used during directory lookup.
+ */
+ long i_count; /* Size of free slot in directory. */
+ doff_t i_endoff; /* End of useful stuff in directory. */
+ doff_t i_diroff; /* Offset in dir, where we found last entry. */
+ doff_t i_offset; /* Offset of free space in directory. */
+ ino_t i_ino; /* Inode number of found directory. */
+ u_long i_reclen; /* Size of found directory entry. */
+ long i_spare[11]; /* Spares to round up to 128 bytes. */
+ /*
+ * The on-disk dinode itself.
+ */
+ struct dinode i_din; /* 128 bytes of the on-disk dinode. */
+};
+
+#define i_atime i_din.di_atime
+#define i_blocks i_din.di_blocks
+#define i_ctime i_din.di_ctime
+#define i_db i_din.di_db
+#define i_flags i_din.di_flags
+#define i_gen i_din.di_gen
+#define i_gid i_din.di_gid
+#define i_ib i_din.di_ib
+#define i_mode i_din.di_mode
+#define i_mtime i_din.di_mtime
+#define i_nlink i_din.di_nlink
+#define i_rdev i_din.di_rdev
+#define i_shortlink i_din.di_shortlink
+#define i_size i_din.di_size
+#define i_uid i_din.di_uid
+
+/* These flags are kept in i_flag. */
+#define IN_ACCESS 0x0001 /* Access time update request. */
+#define IN_CHANGE 0x0002 /* Inode change time update request. */
+#define IN_EXLOCK 0x0004 /* File has exclusive lock. */
+#define IN_LOCKED 0x0008 /* Inode lock. */
+#define IN_LWAIT 0x0010 /* Process waiting on file lock. */
+#define IN_MODIFIED 0x0020 /* Inode has been modified. */
+#define IN_RENAME 0x0040 /* Inode is being renamed. */
+#define IN_SHLOCK 0x0080 /* File has shared lock. */
+#define IN_UPDATE 0x0100 /* Modification time update request. */
+#define IN_WANTED 0x0200 /* Inode is wanted by a process. */
+
+#ifdef KERNEL
+/*
+ * Structure used to pass around logical block paths generated by
+ * ufs_getlbns and used by truncate and bmap code.
+ */
+struct indir {
+ daddr_t in_lbn; /* Logical block number. */
+ int in_off; /* Offset in buffer. */
+ int in_exists; /* Flag if the block exists. */
+};
+
+/* Convert between inode pointers and vnode pointers. */
+#define VTOI(vp) ((struct inode *)(vp)->v_data)
+#define ITOV(ip) ((ip)->i_vnode)
+
+#define ITIMES(ip, t1, t2) { \
+ if ((ip)->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) { \
+ (ip)->i_flag |= IN_MODIFIED; \
+ if ((ip)->i_flag & IN_ACCESS) \
+ (ip)->i_atime.ts_sec = (t1)->tv_sec; \
+ if ((ip)->i_flag & IN_UPDATE) { \
+ (ip)->i_mtime.ts_sec = (t2)->tv_sec; \
+ (ip)->i_modrev++; \
+ } \
+ if ((ip)->i_flag & IN_CHANGE) \
+ (ip)->i_ctime.ts_sec = time.tv_sec; \
+ (ip)->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); \
+ } \
+}
+
+/* This overlays the fid structure (see mount.h). */
+struct ufid {
+ u_short ufid_len; /* Length of structure. */
+ u_short ufid_pad; /* Force long alignment. */
+ ino_t ufid_ino; /* File number (ino). */
+ long ufid_gen; /* Generation number. */
+};
+#endif /* KERNEL */
diff --git a/sys/kern/kern_tc.c b/sys/kern/kern_tc.c
new file mode 100644
index 0000000..f42900c
--- /dev/null
+++ b/sys/kern/kern_tc.c
@@ -0,0 +1,528 @@
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/dkstat.h>
+#include <sys/callout.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+
+#include <machine/cpu.h>
+
+#ifdef GPROF
+#include <sys/gmon.h>
+#endif
+
+/*
+ * Clock handling routines.
+ *
+ * This code is written to operate with two timers that run independently of
+ * each other. The main clock, running hz times per second, is used to keep
+ * track of real time. The second timer handles kernel and user profiling,
+ * and does resource use estimation. If the second timer is programmable,
+ * it is randomized to avoid aliasing between the two clocks. For example,
+ * the randomization prevents an adversary from always giving up the cpu
+ * just before its quantum expires. Otherwise, it would never accumulate
+ * cpu ticks. The mean frequency of the second timer is stathz.
+ *
+ * If no second timer exists, stathz will be zero; in this case we drive
+ * profiling and statistics off the main clock. This WILL NOT be accurate;
+ * do not do it unless absolutely necessary.
+ *
+ * The statistics clock may (or may not) be run at a higher rate while
+ * profiling. This profile clock runs at profhz. We require that profhz
+ * be an integral multiple of stathz.
+ *
+ * If the statistics clock is running fast, it must be divided by the ratio
+ * profhz/stathz for statistics. (For profiling, every tick counts.)
+ */
+
+/*
+ * TODO:
+ * allocate more timeout table slots when table overflows.
+ */
+
+/*
+ * Bump a timeval by a small number of usec's.
+ */
+#define BUMPTIME(t, usec) { \
+ register volatile struct timeval *tp = (t); \
+ register long us; \
+ \
+ tp->tv_usec = us = tp->tv_usec + (usec); \
+ if (us >= 1000000) { \
+ tp->tv_usec = us - 1000000; \
+ tp->tv_sec++; \
+ } \
+}
+
+int stathz;
+int profhz;
+int profprocs;
+int ticks;
+static int psdiv, pscnt; /* prof => stat divider */
+int psratio; /* ratio: prof / stat */
+
+volatile struct timeval time;
+volatile struct timeval mono_time;
+
+/*
+ * Initialize clock frequencies and start both clocks running.
+ */
+void
+initclocks()
+{
+ register int i;
+
+ /*
+ * Set divisors to 1 (normal case) and let the machine-specific
+ * code do its bit.
+ */
+ psdiv = pscnt = 1;
+ cpu_initclocks();
+
+ /*
+ * Compute profhz/stathz, and fix profhz if needed.
+ */
+ i = stathz ? stathz : hz;
+ if (profhz == 0)
+ profhz = i;
+ psratio = profhz / i;
+}
+
+/*
+ * The real-time timer, interrupting hz times per second.
+ */
+void
+hardclock(frame)
+ register struct clockframe *frame;
+{
+ register struct callout *p1;
+ register struct proc *p;
+ register int delta, needsoft;
+ extern int tickdelta;
+ extern long timedelta;
+
+ /*
+ * Update real-time timeout queue.
+ * At front of queue are some number of events which are ``due''.
+ * The time to these is <= 0 and if negative represents the
+ * number of ticks which have passed since it was supposed to happen.
+ * The rest of the q elements (times > 0) are events yet to happen,
+ * where the time for each is given as a delta from the previous.
+ * Decrementing just the first of these serves to decrement the time
+ * to all events.
+ */
+ needsoft = 0;
+ for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) {
+ if (--p1->c_time > 0)
+ break;
+ needsoft = 1;
+ if (p1->c_time == 0)
+ break;
+ }
+
+ p = curproc;
+ if (p) {
+ register struct pstats *pstats;
+
+ /*
+ * Run current process's virtual and profile time, as needed.
+ */
+ pstats = p->p_stats;
+ if (CLKF_USERMODE(frame) &&
+ timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
+ itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
+ psignal(p, SIGVTALRM);
+ if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
+ itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
+ psignal(p, SIGPROF);
+ }
+
+ /*
+ * If no separate statistics clock is available, run it from here.
+ */
+ if (stathz == 0)
+ statclock(frame);
+
+ /*
+ * Increment the time-of-day. The increment is just ``tick'' unless
+ * we are still adjusting the clock; see adjtime().
+ */
+ ticks++;
+ if (timedelta == 0)
+ delta = tick;
+ else {
+ delta = tick + tickdelta;
+ timedelta -= tickdelta;
+ }
+ BUMPTIME(&time, delta);
+ BUMPTIME(&mono_time, delta);
+
+ /*
+ * Process callouts at a very low cpu priority, so we don't keep the
+ * relatively high clock interrupt priority any longer than necessary.
+ */
+ if (needsoft) {
+ if (CLKF_BASEPRI(frame)) {
+ /*
+ * Save the overhead of a software interrupt;
+ * it will happen as soon as we return, so do it now.
+ */
+ (void)splsoftclock();
+ softclock();
+ } else
+ setsoftclock();
+ }
+}
+
+/*
+ * Software (low priority) clock interrupt.
+ * Run periodic events from timeout queue.
+ */
+/*ARGSUSED*/
+void
+softclock()
+{
+ register struct callout *c;
+ register void *arg;
+ register void (*func) __P((void *));
+ register int s;
+
+ s = splhigh();
+ while ((c = calltodo.c_next) != NULL && c->c_time <= 0) {
+ func = c->c_func;
+ arg = c->c_arg;
+ calltodo.c_next = c->c_next;
+ c->c_next = callfree;
+ callfree = c;
+ splx(s);
+ (*func)(arg);
+ (void) splhigh();
+ }
+ splx(s);
+}
+
+/*
+ * timeout --
+ * Execute a function after a specified length of time.
+ *
+ * untimeout --
+ * Cancel previous timeout function call.
+ *
+ * See AT&T BCI Driver Reference Manual for specification. This
+ * implementation differs from that one in that no identification
+ * value is returned from timeout, rather, the original arguments
+ * to timeout are used to identify entries for untimeout.
+ */
+void
+timeout(ftn, arg, ticks)
+ void (*ftn) __P((void *));
+ void *arg;
+ register int ticks;
+{
+ register struct callout *new, *p, *t;
+ register int s;
+
+ if (ticks <= 0)
+ ticks = 1;
+
+ /* Lock out the clock. */
+ s = splhigh();
+
+ /* Fill in the next free callout structure. */
+ if (callfree == NULL)
+ panic("timeout table full");
+ new = callfree;
+ callfree = new->c_next;
+ new->c_arg = arg;
+ new->c_func = ftn;
+
+ /*
+ * The time for each event is stored as a difference from the time
+ * of the previous event on the queue. Walk the queue, correcting
+ * the ticks argument for queue entries passed. Correct the ticks
+ * value for the queue entry immediately after the insertion point
+ * as well. Watch out for negative c_time values; these represent
+ * overdue events.
+ */
+ for (p = &calltodo;
+ (t = p->c_next) != NULL && ticks > t->c_time; p = t)
+ if (t->c_time > 0)
+ ticks -= t->c_time;
+ new->c_time = ticks;
+ if (t != NULL)
+ t->c_time -= ticks;
+
+ /* Insert the new entry into the queue. */
+ p->c_next = new;
+ new->c_next = t;
+ splx(s);
+}
+
+void
+untimeout(ftn, arg)
+ void (*ftn) __P((void *));
+ void *arg;
+{
+ register struct callout *p, *t;
+ register int s;
+
+ s = splhigh();
+ for (p = &calltodo; (t = p->c_next) != NULL; p = t)
+ if (t->c_func == ftn && t->c_arg == arg) {
+ /* Increment next entry's tick count. */
+ if (t->c_next && t->c_time > 0)
+ t->c_next->c_time += t->c_time;
+
+ /* Move entry from callout queue to callfree queue. */
+ p->c_next = t->c_next;
+ t->c_next = callfree;
+ callfree = t;
+ break;
+ }
+ splx(s);
+}
+
+/*
+ * Compute number of hz until specified time. Used to
+ * compute third argument to timeout() from an absolute time.
+ */
+int
+hzto(tv)
+ struct timeval *tv;
+{
+ register long ticks, sec;
+ int s;
+
+ /*
+ * If number of milliseconds will fit in 32 bit arithmetic,
+ * then compute number of milliseconds to time and scale to
+ * ticks. Otherwise just compute number of hz in time, rounding
+ * times greater than representible to maximum value.
+ *
+ * Delta times less than 25 days can be computed ``exactly''.
+ * Maximum value for any timeout in 10ms ticks is 250 days.
+ */
+ s = splhigh();
+ sec = tv->tv_sec - time.tv_sec;
+ if (sec <= 0x7fffffff / 1000 - 1000)
+ ticks = ((tv->tv_sec - time.tv_sec) * 1000 +
+ (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000);
+ else if (sec <= 0x7fffffff / hz)
+ ticks = sec * hz;
+ else
+ ticks = 0x7fffffff;
+ splx(s);
+ return (ticks);
+}
+
+/*
+ * Start profiling on a process.
+ *
+ * Kernel profiling passes proc0 which never exits and hence
+ * keeps the profile clock running constantly.
+ */
+void
+startprofclock(p)
+ register struct proc *p;
+{
+ int s;
+
+ if ((p->p_flag & P_PROFIL) == 0) {
+ p->p_flag |= P_PROFIL;
+ if (++profprocs == 1 && stathz != 0) {
+ s = splstatclock();
+ psdiv = pscnt = psratio;
+ setstatclockrate(profhz);
+ splx(s);
+ }
+ }
+}
+
+/*
+ * Stop profiling on a process.
+ */
+void
+stopprofclock(p)
+ register struct proc *p;
+{
+ int s;
+
+ if (p->p_flag & P_PROFIL) {
+ p->p_flag &= ~P_PROFIL;
+ if (--profprocs == 0 && stathz != 0) {
+ s = splstatclock();
+ psdiv = pscnt = 1;
+ setstatclockrate(stathz);
+ splx(s);
+ }
+ }
+}
+
+int dk_ndrive = DK_NDRIVE;
+
+/*
+ * Statistics clock. Grab profile sample, and if divider reaches 0,
+ * do process and kernel statistics.
+ */
+void
+statclock(frame)
+ register struct clockframe *frame;
+{
+#ifdef GPROF
+ register struct gmonparam *g;
+#endif
+ register struct proc *p;
+ register int i;
+
+ if (CLKF_USERMODE(frame)) {
+ p = curproc;
+ if (p->p_flag & P_PROFIL)
+ addupc_intr(p, CLKF_PC(frame), 1);
+ if (--pscnt > 0)
+ return;
+ /*
+ * Came from user mode; CPU was in user state.
+ * If this process is being profiled record the tick.
+ */
+ p->p_uticks++;
+ if (p->p_nice > NZERO)
+ cp_time[CP_NICE]++;
+ else
+ cp_time[CP_USER]++;
+ } else {
+#ifdef GPROF
+ /*
+ * Kernel statistics are just like addupc_intr, only easier.
+ */
+ g = &_gmonparam;
+ if (g->state == GMON_PROF_ON) {
+ i = CLKF_PC(frame) - g->lowpc;
+ if (i < g->textsize) {
+ i /= HISTFRACTION * sizeof(*g->kcount);
+ g->kcount[i]++;
+ }
+ }
+#endif
+ if (--pscnt > 0)
+ return;
+ /*
+ * Came from kernel mode, so we were:
+ * - handling an interrupt,
+ * - doing syscall or trap work on behalf of the current
+ * user process, or
+ * - spinning in the idle loop.
+ * Whichever it is, charge the time as appropriate.
+ * Note that we charge interrupts to the current process,
+ * regardless of whether they are ``for'' that process,
+ * so that we know how much of its real time was spent
+ * in ``non-process'' (i.e., interrupt) work.
+ */
+ p = curproc;
+ if (CLKF_INTR(frame)) {
+ if (p != NULL)
+ p->p_iticks++;
+ cp_time[CP_INTR]++;
+ } else if (p != NULL) {
+ p->p_sticks++;
+ cp_time[CP_SYS]++;
+ } else
+ cp_time[CP_IDLE]++;
+ }
+ pscnt = psdiv;
+
+ /*
+ * We maintain statistics shown by user-level statistics
+ * programs: the amount of time in each cpu state, and
+ * the amount of time each of DK_NDRIVE ``drives'' is busy.
+ *
+ * XXX should either run linked list of drives, or (better)
+ * grab timestamps in the start & done code.
+ */
+ for (i = 0; i < DK_NDRIVE; i++)
+ if (dk_busy & (1 << i))
+ dk_time[i]++;
+
+ /*
+ * We adjust the priority of the current process. The priority of
+ * a process gets worse as it accumulates CPU time. The cpu usage
+ * estimator (p_estcpu) is increased here. The formula for computing
+ * priorities (in kern_synch.c) will compute a different value each
+ * time p_estcpu increases by 4. The cpu usage estimator ramps up
+ * quite quickly when the process is running (linearly), and decays
+ * away exponentially, at a rate which is proportionally slower when
+ * the system is busy. The basic principal is that the system will
+ * 90% forget that the process used a lot of CPU time in 5 * loadav
+ * seconds. This causes the system to favor processes which haven't
+ * run much recently, and to round-robin among other processes.
+ */
+ if (p != NULL) {
+ p->p_cpticks++;
+ if (++p->p_estcpu == 0)
+ p->p_estcpu--;
+ if ((p->p_estcpu & 3) == 0) {
+ resetpriority(p);
+ if (p->p_priority >= PUSER)
+ p->p_priority = p->p_usrpri;
+ }
+ }
+}
+
+/*
+ * Return information about system clocks.
+ */
+sysctl_clockrate(where, sizep)
+ register char *where;
+ size_t *sizep;
+{
+ struct clockinfo clkinfo;
+
+ /*
+ * Construct clockinfo structure.
+ */
+ clkinfo.hz = hz;
+ clkinfo.tick = tick;
+ clkinfo.profhz = profhz;
+ clkinfo.stathz = stathz ? stathz : hz;
+ return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo)));
+}
diff --git a/sys/kern/kern_timeout.c b/sys/kern/kern_timeout.c
new file mode 100644
index 0000000..f42900c
--- /dev/null
+++ b/sys/kern/kern_timeout.c
@@ -0,0 +1,528 @@
+/*-
+ * Copyright (c) 1982, 1986, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/dkstat.h>
+#include <sys/callout.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/resourcevar.h>
+
+#include <machine/cpu.h>
+
+#ifdef GPROF
+#include <sys/gmon.h>
+#endif
+
+/*
+ * Clock handling routines.
+ *
+ * This code is written to operate with two timers that run independently of
+ * each other. The main clock, running hz times per second, is used to keep
+ * track of real time. The second timer handles kernel and user profiling,
+ * and does resource use estimation. If the second timer is programmable,
+ * it is randomized to avoid aliasing between the two clocks. For example,
+ * the randomization prevents an adversary from always giving up the cpu
+ * just before its quantum expires. Otherwise, it would never accumulate
+ * cpu ticks. The mean frequency of the second timer is stathz.
+ *
+ * If no second timer exists, stathz will be zero; in this case we drive
+ * profiling and statistics off the main clock. This WILL NOT be accurate;
+ * do not do it unless absolutely necessary.
+ *
+ * The statistics clock may (or may not) be run at a higher rate while
+ * profiling. This profile clock runs at profhz. We require that profhz
+ * be an integral multiple of stathz.
+ *
+ * If the statistics clock is running fast, it must be divided by the ratio
+ * profhz/stathz for statistics. (For profiling, every tick counts.)
+ */
+
+/*
+ * TODO:
+ * allocate more timeout table slots when table overflows.
+ */
+
+/*
+ * Bump a timeval by a small number of usec's.
+ */
+#define BUMPTIME(t, usec) { \
+ register volatile struct timeval *tp = (t); \
+ register long us; \
+ \
+ tp->tv_usec = us = tp->tv_usec + (usec); \
+ if (us >= 1000000) { \
+ tp->tv_usec = us - 1000000; \
+ tp->tv_sec++; \
+ } \
+}
+
+int stathz;
+int profhz;
+int profprocs;
+int ticks;
+static int psdiv, pscnt; /* prof => stat divider */
+int psratio; /* ratio: prof / stat */
+
+volatile struct timeval time;
+volatile struct timeval mono_time;
+
+/*
+ * Initialize clock frequencies and start both clocks running.
+ */
+void
+initclocks()
+{
+ register int i;
+
+ /*
+ * Set divisors to 1 (normal case) and let the machine-specific
+ * code do its bit.
+ */
+ psdiv = pscnt = 1;
+ cpu_initclocks();
+
+ /*
+ * Compute profhz/stathz, and fix profhz if needed.
+ */
+ i = stathz ? stathz : hz;
+ if (profhz == 0)
+ profhz = i;
+ psratio = profhz / i;
+}
+
+/*
+ * The real-time timer, interrupting hz times per second.
+ */
+void
+hardclock(frame)
+ register struct clockframe *frame;
+{
+ register struct callout *p1;
+ register struct proc *p;
+ register int delta, needsoft;
+ extern int tickdelta;
+ extern long timedelta;
+
+ /*
+ * Update real-time timeout queue.
+ * At front of queue are some number of events which are ``due''.
+ * The time to these is <= 0 and if negative represents the
+ * number of ticks which have passed since it was supposed to happen.
+ * The rest of the q elements (times > 0) are events yet to happen,
+ * where the time for each is given as a delta from the previous.
+ * Decrementing just the first of these serves to decrement the time
+ * to all events.
+ */
+ needsoft = 0;
+ for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) {
+ if (--p1->c_time > 0)
+ break;
+ needsoft = 1;
+ if (p1->c_time == 0)
+ break;
+ }
+
+ p = curproc;
+ if (p) {
+ register struct pstats *pstats;
+
+ /*
+ * Run current process's virtual and profile time, as needed.
+ */
+ pstats = p->p_stats;
+ if (CLKF_USERMODE(frame) &&
+ timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
+ itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
+ psignal(p, SIGVTALRM);
+ if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
+ itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
+ psignal(p, SIGPROF);
+ }
+
+ /*
+ * If no separate statistics clock is available, run it from here.
+ */
+ if (stathz == 0)
+ statclock(frame);
+
+ /*
+ * Increment the time-of-day. The increment is just ``tick'' unless
+ * we are still adjusting the clock; see adjtime().
+ */
+ ticks++;
+ if (timedelta == 0)
+ delta = tick;
+ else {
+ delta = tick + tickdelta;
+ timedelta -= tickdelta;
+ }
+ BUMPTIME(&time, delta);
+ BUMPTIME(&mono_time, delta);
+
+ /*
+ * Process callouts at a very low cpu priority, so we don't keep the
+ * relatively high clock interrupt priority any longer than necessary.
+ */
+ if (needsoft) {
+ if (CLKF_BASEPRI(frame)) {
+ /*
+ * Save the overhead of a software interrupt;
+ * it will happen as soon as we return, so do it now.
+ */
+ (void)splsoftclock();
+ softclock();
+ } else
+ setsoftclock();
+ }
+}
+
+/*
+ * Software (low priority) clock interrupt.
+ * Run periodic events from timeout queue.
+ */
+/*ARGSUSED*/
+void
+softclock()
+{
+ register struct callout *c;
+ register void *arg;
+ register void (*func) __P((void *));
+ register int s;
+
+ s = splhigh();
+ while ((c = calltodo.c_next) != NULL && c->c_time <= 0) {
+ func = c->c_func;
+ arg = c->c_arg;
+ calltodo.c_next = c->c_next;
+ c->c_next = callfree;
+ callfree = c;
+ splx(s);
+ (*func)(arg);
+ (void) splhigh();
+ }
+ splx(s);
+}
+
+/*
+ * timeout --
+ * Execute a function after a specified length of time.
+ *
+ * untimeout --
+ * Cancel previous timeout function call.
+ *
+ * See AT&T BCI Driver Reference Manual for specification. This
+ * implementation differs from that one in that no identification
+ * value is returned from timeout, rather, the original arguments
+ * to timeout are used to identify entries for untimeout.
+ */
+void
+timeout(ftn, arg, ticks)
+ void (*ftn) __P((void *));
+ void *arg;
+ register int ticks;
+{
+ register struct callout *new, *p, *t;
+ register int s;
+
+ if (ticks <= 0)
+ ticks = 1;
+
+ /* Lock out the clock. */
+ s = splhigh();
+
+ /* Fill in the next free callout structure. */
+ if (callfree == NULL)
+ panic("timeout table full");
+ new = callfree;
+ callfree = new->c_next;
+ new->c_arg = arg;
+ new->c_func = ftn;
+
+ /*
+ * The time for each event is stored as a difference from the time
+ * of the previous event on the queue. Walk the queue, correcting
+ * the ticks argument for queue entries passed. Correct the ticks
+ * value for the queue entry immediately after the insertion point
+ * as well. Watch out for negative c_time values; these represent
+ * overdue events.
+ */
+ for (p = &calltodo;
+ (t = p->c_next) != NULL && ticks > t->c_time; p = t)
+ if (t->c_time > 0)
+ ticks -= t->c_time;
+ new->c_time = ticks;
+ if (t != NULL)
+ t->c_time -= ticks;
+
+ /* Insert the new entry into the queue. */
+ p->c_next = new;
+ new->c_next = t;
+ splx(s);
+}
+
+void
+untimeout(ftn, arg)
+ void (*ftn) __P((void *));
+ void *arg;
+{
+ register struct callout *p, *t;
+ register int s;
+
+ s = splhigh();
+ for (p = &calltodo; (t = p->c_next) != NULL; p = t)
+ if (t->c_func == ftn && t->c_arg == arg) {
+ /* Increment next entry's tick count. */
+ if (t->c_next && t->c_time > 0)
+ t->c_next->c_time += t->c_time;
+
+ /* Move entry from callout queue to callfree queue. */
+ p->c_next = t->c_next;
+ t->c_next = callfree;
+ callfree = t;
+ break;
+ }
+ splx(s);
+}
+
+/*
+ * Compute number of hz until specified time. Used to
+ * compute third argument to timeout() from an absolute time.
+ */
+int
+hzto(tv)
+ struct timeval *tv;
+{
+ register long ticks, sec;
+ int s;
+
+ /*
+ * If number of milliseconds will fit in 32 bit arithmetic,
+ * then compute number of milliseconds to time and scale to
+ * ticks. Otherwise just compute number of hz in time, rounding
+ * times greater than representible to maximum value.
+ *
+ * Delta times less than 25 days can be computed ``exactly''.
+ * Maximum value for any timeout in 10ms ticks is 250 days.
+ */
+ s = splhigh();
+ sec = tv->tv_sec - time.tv_sec;
+ if (sec <= 0x7fffffff / 1000 - 1000)
+ ticks = ((tv->tv_sec - time.tv_sec) * 1000 +
+ (tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000);
+ else if (sec <= 0x7fffffff / hz)
+ ticks = sec * hz;
+ else
+ ticks = 0x7fffffff;
+ splx(s);
+ return (ticks);
+}
+
+/*
+ * Start profiling on a process.
+ *
+ * Kernel profiling passes proc0 which never exits and hence
+ * keeps the profile clock running constantly.
+ */
+void
+startprofclock(p)
+ register struct proc *p;
+{
+ int s;
+
+ if ((p->p_flag & P_PROFIL) == 0) {
+ p->p_flag |= P_PROFIL;
+ if (++profprocs == 1 && stathz != 0) {
+ s = splstatclock();
+ psdiv = pscnt = psratio;
+ setstatclockrate(profhz);
+ splx(s);
+ }
+ }
+}
+
+/*
+ * Stop profiling on a process.
+ */
+void
+stopprofclock(p)
+ register struct proc *p;
+{
+ int s;
+
+ if (p->p_flag & P_PROFIL) {
+ p->p_flag &= ~P_PROFIL;
+ if (--profprocs == 0 && stathz != 0) {
+ s = splstatclock();
+ psdiv = pscnt = 1;
+ setstatclockrate(stathz);
+ splx(s);
+ }
+ }
+}
+
+int dk_ndrive = DK_NDRIVE;
+
+/*
+ * Statistics clock. Grab profile sample, and if divider reaches 0,
+ * do process and kernel statistics.
+ */
+void
+statclock(frame)
+ register struct clockframe *frame;
+{
+#ifdef GPROF
+ register struct gmonparam *g;
+#endif
+ register struct proc *p;
+ register int i;
+
+ if (CLKF_USERMODE(frame)) {
+ p = curproc;
+ if (p->p_flag & P_PROFIL)
+ addupc_intr(p, CLKF_PC(frame), 1);
+ if (--pscnt > 0)
+ return;
+ /*
+ * Came from user mode; CPU was in user state.
+ * If this process is being profiled record the tick.
+ */
+ p->p_uticks++;
+ if (p->p_nice > NZERO)
+ cp_time[CP_NICE]++;
+ else
+ cp_time[CP_USER]++;
+ } else {
+#ifdef GPROF
+ /*
+ * Kernel statistics are just like addupc_intr, only easier.
+ */
+ g = &_gmonparam;
+ if (g->state == GMON_PROF_ON) {
+ i = CLKF_PC(frame) - g->lowpc;
+ if (i < g->textsize) {
+ i /= HISTFRACTION * sizeof(*g->kcount);
+ g->kcount[i]++;
+ }
+ }
+#endif
+ if (--pscnt > 0)
+ return;
+ /*
+ * Came from kernel mode, so we were:
+ * - handling an interrupt,
+ * - doing syscall or trap work on behalf of the current
+ * user process, or
+ * - spinning in the idle loop.
+ * Whichever it is, charge the time as appropriate.
+ * Note that we charge interrupts to the current process,
+ * regardless of whether they are ``for'' that process,
+ * so that we know how much of its real time was spent
+ * in ``non-process'' (i.e., interrupt) work.
+ */
+ p = curproc;
+ if (CLKF_INTR(frame)) {
+ if (p != NULL)
+ p->p_iticks++;
+ cp_time[CP_INTR]++;
+ } else if (p != NULL) {
+ p->p_sticks++;
+ cp_time[CP_SYS]++;
+ } else
+ cp_time[CP_IDLE]++;
+ }
+ pscnt = psdiv;
+
+ /*
+ * We maintain statistics shown by user-level statistics
+ * programs: the amount of time in each cpu state, and
+ * the amount of time each of DK_NDRIVE ``drives'' is busy.
+ *
+ * XXX should either run linked list of drives, or (better)
+ * grab timestamps in the start & done code.
+ */
+ for (i = 0; i < DK_NDRIVE; i++)
+ if (dk_busy & (1 << i))
+ dk_time[i]++;
+
+ /*
+ * We adjust the priority of the current process. The priority of
+ * a process gets worse as it accumulates CPU time. The cpu usage
+ * estimator (p_estcpu) is increased here. The formula for computing
+ * priorities (in kern_synch.c) will compute a different value each
+ * time p_estcpu increases by 4. The cpu usage estimator ramps up
+ * quite quickly when the process is running (linearly), and decays
+ * away exponentially, at a rate which is proportionally slower when
+ * the system is busy. The basic principal is that the system will
+ * 90% forget that the process used a lot of CPU time in 5 * loadav
+ * seconds. This causes the system to favor processes which haven't
+ * run much recently, and to round-robin among other processes.
+ */
+ if (p != NULL) {
+ p->p_cpticks++;
+ if (++p->p_estcpu == 0)
+ p->p_estcpu--;
+ if ((p->p_estcpu & 3) == 0) {
+ resetpriority(p);
+ if (p->p_priority >= PUSER)
+ p->p_priority = p->p_usrpri;
+ }
+ }
+}
+
+/*
+ * Return information about system clocks.
+ */
+sysctl_clockrate(where, sizep)
+ register char *where;
+ size_t *sizep;
+{
+ struct clockinfo clkinfo;
+
+ /*
+ * Construct clockinfo structure.
+ */
+ clkinfo.hz = hz;
+ clkinfo.tick = tick;
+ clkinfo.profhz = profhz;
+ clkinfo.stathz = stathz ? stathz : hz;
+ return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo)));
+}
diff --git a/sys/kern/subr_clist.c b/sys/kern/subr_clist.c
new file mode 100644
index 0000000..fe8f000
--- /dev/null
+++ b/sys/kern/subr_clist.c
@@ -0,0 +1,159 @@
+/*-
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * from: @(#)tty_subr.c 8.2 (Berkeley) 9/5/93
+ */
+
+#include <sys/param.h>
+#include <sys/ioctl.h>
+#include <sys/tty.h>
+
+char cwaiting;
+struct cblock *cfree, *cfreelist;
+int cfreecount, nclist;
+
+void
+clist_init()
+{
+
+ /*
+ * Body deleted.
+ */
+ return;
+}
+
+getc(a1)
+ struct clist *a1;
+{
+
+ /*
+ * Body deleted.
+ */
+ return ((char)0);
+}
+
+q_to_b(a1, a2, a3)
+ struct clist *a1;
+ char *a2;
+ int a3;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (0);
+}
+
+ndqb(a1, a2)
+ struct clist *a1;
+ int a2;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (0);
+}
+
+void
+ndflush(a1, a2)
+ struct clist *a1;
+ int a2;
+{
+
+ /*
+ * Body deleted.
+ */
+ return;
+}
+
+putc(a1, a2)
+ char a1;
+ struct clist *a2;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (0);
+}
+
+b_to_q(a1, a2, a3)
+ char *a1;
+ int a2;
+ struct clist *a3;
+{
+
+ /*
+ * Body deleted.
+ */
+ return (0);
+}
+
+char *
+nextc(a1, a2, a3)
+ struct clist *a1;
+ char *a2;
+ int *a3;
+{
+
+ /*
+ * Body deleted.
+ */
+ return ((char *)0);
+}
+
+unputc(a1)
+ struct clist *a1;
+{
+
+ /*
+ * Body deleted.
+ */
+ return ((char)0);
+}
+
+void
+catq(a1, a2)
+ struct clist *a1, *a2;
+{
+
+ /*
+ * Body deleted.
+ */
+ return;
+}
diff --git a/sys/kern/subr_disklabel.c b/sys/kern/subr_disklabel.c
new file mode 100644
index 0000000..78dede4
--- /dev/null
+++ b/sys/kern/subr_disklabel.c
@@ -0,0 +1,364 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)ufs_disksubr.c 8.5 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/buf.h>
+#include <sys/disklabel.h>
+#include <sys/syslog.h>
+
+/*
+ * Seek sort for disks. We depend on the driver which calls us using b_resid
+ * as the current cylinder number.
+ *
+ * The argument ap structure holds a b_actf activity chain pointer on which we
+ * keep two queues, sorted in ascending cylinder order. The first queue holds
+ * those requests which are positioned after the current cylinder (in the first
+ * request); the second holds requests which came in after their cylinder number
+ * was passed. Thus we implement a one way scan, retracting after reaching the
+ * end of the drive to the first request on the second queue, at which time it
+ * becomes the first queue.
+ *
+ * A one-way scan is natural because of the way UNIX read-ahead blocks are
+ * allocated.
+ */
+
+/*
+ * For portability with historic industry practice, the
+ * cylinder number has to be maintained in the `b_resid'
+ * field.
+ */
+#define b_cylinder b_resid
+
+void
+disksort(ap, bp)
+ register struct buf *ap, *bp;
+{
+ register struct buf *bq;
+
+ /* If the queue is empty, then it's easy. */
+ if (ap->b_actf == NULL) {
+ bp->b_actf = NULL;
+ ap->b_actf = bp;
+ return;
+ }
+
+ /*
+ * If we lie after the first (currently active) request, then we
+ * must locate the second request list and add ourselves to it.
+ */
+ bq = ap->b_actf;
+ if (bp->b_cylinder < bq->b_cylinder) {
+ while (bq->b_actf) {
+ /*
+ * Check for an ``inversion'' in the normally ascending
+ * cylinder numbers, indicating the start of the second
+ * request list.
+ */
+ if (bq->b_actf->b_cylinder < bq->b_cylinder) {
+ /*
+ * Search the second request list for the first
+ * request at a larger cylinder number. We go
+ * before that; if there is no such request, we
+ * go at end.
+ */
+ do {
+ if (bp->b_cylinder <
+ bq->b_actf->b_cylinder)
+ goto insert;
+ if (bp->b_cylinder ==
+ bq->b_actf->b_cylinder &&
+ bp->b_blkno < bq->b_actf->b_blkno)
+ goto insert;
+ bq = bq->b_actf;
+ } while (bq->b_actf);
+ goto insert; /* after last */
+ }
+ bq = bq->b_actf;
+ }
+ /*
+ * No inversions... we will go after the last, and
+ * be the first request in the second request list.
+ */
+ goto insert;
+ }
+ /*
+ * Request is at/after the current request...
+ * sort in the first request list.
+ */
+ while (bq->b_actf) {
+ /*
+ * We want to go after the current request if there is an
+ * inversion after it (i.e. it is the end of the first
+ * request list), or if the next request is a larger cylinder
+ * than our request.
+ */
+ if (bq->b_actf->b_cylinder < bq->b_cylinder ||
+ bp->b_cylinder < bq->b_actf->b_cylinder ||
+ (bp->b_cylinder == bq->b_actf->b_cylinder &&
+ bp->b_blkno < bq->b_actf->b_blkno))
+ goto insert;
+ bq = bq->b_actf;
+ }
+ /*
+ * Neither a second list nor a larger request... we go at the end of
+ * the first list, which is the same as the end of the whole schebang.
+ */
+insert: bp->b_actf = bq->b_actf;
+ bq->b_actf = bp;
+}
+
+/*
+ * Attempt to read a disk label from a device using the indicated stategy
+ * routine. The label must be partly set up before this: secpercyl and
+ * anything required in the strategy routine (e.g., sector size) must be
+ * filled in before calling us. Returns NULL on success and an error
+ * string on failure.
+ */
+char *
+readdisklabel(dev, strat, lp)
+ dev_t dev;
+ int (*strat)();
+ register struct disklabel *lp;
+{
+ register struct buf *bp;
+ struct disklabel *dlp;
+ char *msg = NULL;
+
+ if (lp->d_secperunit == 0)
+ lp->d_secperunit = 0x1fffffff;
+ lp->d_npartitions = 1;
+ if (lp->d_partitions[0].p_size == 0)
+ lp->d_partitions[0].p_size = 0x1fffffff;
+ lp->d_partitions[0].p_offset = 0;
+
+ bp = geteblk((int)lp->d_secsize);
+ bp->b_dev = dev;
+ bp->b_blkno = LABELSECTOR;
+ bp->b_bcount = lp->d_secsize;
+ bp->b_flags = B_BUSY | B_READ;
+ bp->b_cylinder = LABELSECTOR / lp->d_secpercyl;
+ (*strat)(bp);
+ if (biowait(bp))
+ msg = "I/O error";
+ else for (dlp = (struct disklabel *)bp->b_data;
+ dlp <= (struct disklabel *)((char *)bp->b_data +
+ DEV_BSIZE - sizeof(*dlp));
+ dlp = (struct disklabel *)((char *)dlp + sizeof(long))) {
+ if (dlp->d_magic != DISKMAGIC || dlp->d_magic2 != DISKMAGIC) {
+ if (msg == NULL)
+ msg = "no disk label";
+ } else if (dlp->d_npartitions > MAXPARTITIONS ||
+ dkcksum(dlp) != 0)
+ msg = "disk label corrupted";
+ else {
+ *lp = *dlp;
+ msg = NULL;
+ break;
+ }
+ }
+ bp->b_flags = B_INVAL | B_AGE;
+ brelse(bp);
+ return (msg);
+}
+
+/*
+ * Check new disk label for sensibility before setting it.
+ */
+int
+setdisklabel(olp, nlp, openmask)
+ register struct disklabel *olp, *nlp;
+ u_long openmask;
+{
+ register i;
+ register struct partition *opp, *npp;
+
+ if (nlp->d_magic != DISKMAGIC || nlp->d_magic2 != DISKMAGIC ||
+ dkcksum(nlp) != 0)
+ return (EINVAL);
+ while ((i = ffs((long)openmask)) != 0) {
+ i--;
+ openmask &= ~(1 << i);
+ if (nlp->d_npartitions <= i)
+ return (EBUSY);
+ opp = &olp->d_partitions[i];
+ npp = &nlp->d_partitions[i];
+ if (npp->p_offset != opp->p_offset || npp->p_size < opp->p_size)
+ return (EBUSY);
+ /*
+ * Copy internally-set partition information
+ * if new label doesn't include it. XXX
+ */
+ if (npp->p_fstype == FS_UNUSED && opp->p_fstype != FS_UNUSED) {
+ npp->p_fstype = opp->p_fstype;
+ npp->p_fsize = opp->p_fsize;
+ npp->p_frag = opp->p_frag;
+ npp->p_cpg = opp->p_cpg;
+ }
+ }
+ nlp->d_checksum = 0;
+ nlp->d_checksum = dkcksum(nlp);
+ *olp = *nlp;
+ return (0);
+}
+
+/* encoding of disk minor numbers, should be elsewhere... */
+#define dkunit(dev) (minor(dev) >> 3)
+#define dkpart(dev) (minor(dev) & 07)
+#define dkminor(unit, part) (((unit) << 3) | (part))
+
+/*
+ * Write disk label back to device after modification.
+ */
+int
+writedisklabel(dev, strat, lp)
+ dev_t dev;
+ int (*strat)();
+ register struct disklabel *lp;
+{
+ struct buf *bp;
+ struct disklabel *dlp;
+ int labelpart;
+ int error = 0;
+
+ labelpart = dkpart(dev);
+ if (lp->d_partitions[labelpart].p_offset != 0) {
+ if (lp->d_partitions[0].p_offset != 0)
+ return (EXDEV); /* not quite right */
+ labelpart = 0;
+ }
+ bp = geteblk((int)lp->d_secsize);
+ bp->b_dev = makedev(major(dev), dkminor(dkunit(dev), labelpart));
+ bp->b_blkno = LABELSECTOR;
+ bp->b_bcount = lp->d_secsize;
+ bp->b_flags = B_READ;
+ (*strat)(bp);
+ if (error = biowait(bp))
+ goto done;
+ for (dlp = (struct disklabel *)bp->b_data;
+ dlp <= (struct disklabel *)
+ ((char *)bp->b_data + lp->d_secsize - sizeof(*dlp));
+ dlp = (struct disklabel *)((char *)dlp + sizeof(long))) {
+ if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC &&
+ dkcksum(dlp) == 0) {
+ *dlp = *lp;
+ bp->b_flags = B_WRITE;
+ (*strat)(bp);
+ error = biowait(bp);
+ goto done;
+ }
+ }
+ error = ESRCH;
+done:
+ brelse(bp);
+ return (error);
+}
+
+/*
+ * Compute checksum for disk label.
+ */
+dkcksum(lp)
+ register struct disklabel *lp;
+{
+ register u_short *start, *end;
+ register u_short sum = 0;
+
+ start = (u_short *)lp;
+ end = (u_short *)&lp->d_partitions[lp->d_npartitions];
+ while (start < end)
+ sum ^= *start++;
+ return (sum);
+}
+
+/*
+ * Disk error is the preface to plaintive error messages
+ * about failing disk transfers. It prints messages of the form
+
+hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d)
+
+ * if the offset of the error in the transfer and a disk label
+ * are both available. blkdone should be -1 if the position of the error
+ * is unknown; the disklabel pointer may be null from drivers that have not
+ * been converted to use them. The message is printed with printf
+ * if pri is LOG_PRINTF, otherwise it uses log at the specified priority.
+ * The message should be completed (with at least a newline) with printf
+ * or addlog, respectively. There is no trailing space.
+ */
+void
+diskerr(bp, dname, what, pri, blkdone, lp)
+ register struct buf *bp;
+ char *dname, *what;
+ int pri, blkdone;
+ register struct disklabel *lp;
+{
+ int unit = dkunit(bp->b_dev), part = dkpart(bp->b_dev);
+ register void (*pr) __P((const char *, ...));
+ char partname = 'a' + part;
+ int sn;
+
+ if (pri != LOG_PRINTF) {
+ log(pri, "");
+ pr = addlog;
+ } else
+ pr = printf;
+ (*pr)("%s%d%c: %s %sing fsbn ", dname, unit, partname, what,
+ bp->b_flags & B_READ ? "read" : "writ");
+ sn = bp->b_blkno;
+ if (bp->b_bcount <= DEV_BSIZE)
+ (*pr)("%d", sn);
+ else {
+ if (blkdone >= 0) {
+ sn += blkdone;
+ (*pr)("%d of ", sn);
+ }
+ (*pr)("%d-%d", bp->b_blkno,
+ bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE);
+ }
+ if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) {
+#ifdef tahoe
+ sn *= DEV_BSIZE / lp->d_secsize; /* XXX */
+#endif
+ sn += lp->d_partitions[part].p_offset;
+ (*pr)(" (%s%d bn %d; cn %d", dname, unit, sn,
+ sn / lp->d_secpercyl);
+ sn %= lp->d_secpercyl;
+ (*pr)(" tn %d sn %d)", sn / lp->d_nsectors, sn % lp->d_nsectors);
+ }
+}
diff --git a/sys/kern/subr_param.c b/sys/kern/subr_param.c
new file mode 100644
index 0000000..9f4e2ca
--- /dev/null
+++ b/sys/kern/subr_param.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 1980, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)param.c 8.2 (Berkeley) 1/21/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/socket.h>
+#include <sys/proc.h>
+#include <sys/vnode.h>
+#include <sys/file.h>
+#include <sys/callout.h>
+#include <sys/clist.h>
+#include <sys/mbuf.h>
+#include <sys/kernel.h>
+
+#include <ufs/ufs/quota.h>
+
+#ifdef SYSVSHM
+#include <machine/vmparam.h>
+#include <sys/shm.h>
+#endif
+
+/*
+ * System parameter formulae.
+ *
+ * This file is copied into each directory where we compile
+ * the kernel; it should be modified there to suit local taste
+ * if necessary.
+ *
+ * Compiled with -DHZ=xx -DTIMEZONE=x -DDST=x -DMAXUSERS=xx
+ */
+
+#ifndef HZ
+#define HZ 100
+#endif
+int hz = HZ;
+int tick = 1000000 / HZ;
+int tickadj = 30000 / (60 * HZ); /* can adjust 30ms in 60s */
+struct timezone tz = { TIMEZONE, DST };
+#define NPROC (20 + 16 * MAXUSERS)
+int maxproc = NPROC;
+#define NTEXT (80 + NPROC / 8) /* actually the object cache */
+#define NVNODE (NPROC + NTEXT + 100)
+int desiredvnodes = NVNODE;
+int maxfiles = 3 * (NPROC + MAXUSERS) + 80;
+int ncallout = 16 + NPROC;
+int nclist = 60 + 12 * MAXUSERS;
+int nmbclusters = NMBCLUSTERS;
+int fscale = FSCALE; /* kernel uses `FSCALE', user uses `fscale' */
+
+/*
+ * Values in support of System V compatible shared memory. XXX
+ */
+#ifdef SYSVSHM
+#define SHMMAX (SHMMAXPGS*NBPG)
+#define SHMMIN 1
+#define SHMMNI 32 /* <= SHMMMNI in shm.h */
+#define SHMSEG 8
+#define SHMALL (SHMMAXPGS/CLSIZE)
+
+struct shminfo shminfo = {
+ SHMMAX,
+ SHMMIN,
+ SHMMNI,
+ SHMSEG,
+ SHMALL
+};
+#endif
+
+/*
+ * These are initialized at bootstrap time
+ * to values dependent on memory size
+ */
+int nbuf, nswbuf;
+
+/*
+ * These have to be allocated somewhere; allocating
+ * them here forces loader errors if this file is omitted
+ * (if they've been externed everywhere else; hah!).
+ */
+struct callout *callout;
+struct cblock *cfree;
+struct buf *buf, *swbuf;
+char *buffers;
+
+/*
+ * Proc/pgrp hashing.
+ * Here so that hash table sizes can depend on MAXUSERS/NPROC.
+ * Hash size must be a power of two.
+ * NOW omission of this file will cause loader errors!
+ */
+
+#if NPROC > 1024
+#define PIDHSZ 512
+#else
+#if NPROC > 512
+#define PIDHSZ 256
+#else
+#if NPROC > 256
+#define PIDHSZ 128
+#else
+#define PIDHSZ 64
+#endif
+#endif
+#endif
+
+struct proc *pidhash[PIDHSZ];
+struct pgrp *pgrphash[PIDHSZ];
+int pidhashmask = PIDHSZ - 1;
diff --git a/sys/kern/uipc_sockbuf.c b/sys/kern/uipc_sockbuf.c
new file mode 100644
index 0000000..d4af592
--- /dev/null
+++ b/sys/kern/uipc_sockbuf.c
@@ -0,0 +1,755 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)uipc_socket2.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/buf.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+
+/*
+ * Primitive routines for operating on sockets and socket buffers
+ */
+
+/* strings for sleep message: */
+char netio[] = "netio";
+char netcon[] = "netcon";
+char netcls[] = "netcls";
+
+u_long sb_max = SB_MAX; /* patchable */
+
+/*
+ * Procedures to manipulate state flags of socket
+ * and do appropriate wakeups. Normal sequence from the
+ * active (originating) side is that soisconnecting() is
+ * called during processing of connect() call,
+ * resulting in an eventual call to soisconnected() if/when the
+ * connection is established. When the connection is torn down
+ * soisdisconnecting() is called during processing of disconnect() call,
+ * and soisdisconnected() is called when the connection to the peer
+ * is totally severed. The semantics of these routines are such that
+ * connectionless protocols can call soisconnected() and soisdisconnected()
+ * only, bypassing the in-progress calls when setting up a ``connection''
+ * takes no time.
+ *
+ * From the passive side, a socket is created with
+ * two queues of sockets: so_q0 for connections in progress
+ * and so_q for connections already made and awaiting user acceptance.
+ * As a protocol is preparing incoming connections, it creates a socket
+ * structure queued on so_q0 by calling sonewconn(). When the connection
+ * is established, soisconnected() is called, and transfers the
+ * socket structure to so_q, making it available to accept().
+ *
+ * If a socket is closed with sockets on either
+ * so_q0 or so_q, these sockets are dropped.
+ *
+ * If higher level protocols are implemented in
+ * the kernel, the wakeups done here will sometimes
+ * cause software-interrupt process scheduling.
+ */
+
+soisconnecting(so)
+ register struct socket *so;
+{
+
+ so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
+ so->so_state |= SS_ISCONNECTING;
+}
+
+soisconnected(so)
+ register struct socket *so;
+{
+ register struct socket *head = so->so_head;
+
+ so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING|SS_ISCONFIRMING);
+ so->so_state |= SS_ISCONNECTED;
+ if (head && soqremque(so, 0)) {
+ soqinsque(head, so, 1);
+ sorwakeup(head);
+ wakeup((caddr_t)&head->so_timeo);
+ } else {
+ wakeup((caddr_t)&so->so_timeo);
+ sorwakeup(so);
+ sowwakeup(so);
+ }
+}
+
+soisdisconnecting(so)
+ register struct socket *so;
+{
+
+ so->so_state &= ~SS_ISCONNECTING;
+ so->so_state |= (SS_ISDISCONNECTING|SS_CANTRCVMORE|SS_CANTSENDMORE);
+ wakeup((caddr_t)&so->so_timeo);
+ sowwakeup(so);
+ sorwakeup(so);
+}
+
+soisdisconnected(so)
+ register struct socket *so;
+{
+
+ so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
+ so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE);
+ wakeup((caddr_t)&so->so_timeo);
+ sowwakeup(so);
+ sorwakeup(so);
+}
+
+/*
+ * When an attempt at a new connection is noted on a socket
+ * which accepts connections, sonewconn is called. If the
+ * connection is possible (subject to space constraints, etc.)
+ * then we allocate a new structure, propoerly linked into the
+ * data structure of the original socket, and return this.
+ * Connstatus may be 0, or SO_ISCONFIRMING, or SO_ISCONNECTED.
+ *
+ * Currently, sonewconn() is defined as sonewconn1() in socketvar.h
+ * to catch calls that are missing the (new) second parameter.
+ */
+struct socket *
+sonewconn1(head, connstatus)
+ register struct socket *head;
+ int connstatus;
+{
+ register struct socket *so;
+ int soqueue = connstatus ? 1 : 0;
+
+ if (head->so_qlen + head->so_q0len > 3 * head->so_qlimit / 2)
+ return ((struct socket *)0);
+ MALLOC(so, struct socket *, sizeof(*so), M_SOCKET, M_DONTWAIT);
+ if (so == NULL)
+ return ((struct socket *)0);
+ bzero((caddr_t)so, sizeof(*so));
+ so->so_type = head->so_type;
+ so->so_options = head->so_options &~ SO_ACCEPTCONN;
+ so->so_linger = head->so_linger;
+ so->so_state = head->so_state | SS_NOFDREF;
+ so->so_proto = head->so_proto;
+ so->so_timeo = head->so_timeo;
+ so->so_pgid = head->so_pgid;
+ (void) soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat);
+ soqinsque(head, so, soqueue);
+ if ((*so->so_proto->pr_usrreq)(so, PRU_ATTACH,
+ (struct mbuf *)0, (struct mbuf *)0, (struct mbuf *)0)) {
+ (void) soqremque(so, soqueue);
+ (void) free((caddr_t)so, M_SOCKET);
+ return ((struct socket *)0);
+ }
+ if (connstatus) {
+ sorwakeup(head);
+ wakeup((caddr_t)&head->so_timeo);
+ so->so_state |= connstatus;
+ }
+ return (so);
+}
+
+soqinsque(head, so, q)
+ register struct socket *head, *so;
+ int q;
+{
+
+ register struct socket **prev;
+ so->so_head = head;
+ if (q == 0) {
+ head->so_q0len++;
+ so->so_q0 = 0;
+ for (prev = &(head->so_q0); *prev; )
+ prev = &((*prev)->so_q0);
+ } else {
+ head->so_qlen++;
+ so->so_q = 0;
+ for (prev = &(head->so_q); *prev; )
+ prev = &((*prev)->so_q);
+ }
+ *prev = so;
+}
+
+soqremque(so, q)
+ register struct socket *so;
+ int q;
+{
+ register struct socket *head, *prev, *next;
+
+ head = so->so_head;
+ prev = head;
+ for (;;) {
+ next = q ? prev->so_q : prev->so_q0;
+ if (next == so)
+ break;
+ if (next == 0)
+ return (0);
+ prev = next;
+ }
+ if (q == 0) {
+ prev->so_q0 = next->so_q0;
+ head->so_q0len--;
+ } else {
+ prev->so_q = next->so_q;
+ head->so_qlen--;
+ }
+ next->so_q0 = next->so_q = 0;
+ next->so_head = 0;
+ return (1);
+}
+
+/*
+ * Socantsendmore indicates that no more data will be sent on the
+ * socket; it would normally be applied to a socket when the user
+ * informs the system that no more data is to be sent, by the protocol
+ * code (in case PRU_SHUTDOWN). Socantrcvmore indicates that no more data
+ * will be received, and will normally be applied to the socket by a
+ * protocol when it detects that the peer will send no more data.
+ * Data queued for reading in the socket may yet be read.
+ */
+
+socantsendmore(so)
+ struct socket *so;
+{
+
+ so->so_state |= SS_CANTSENDMORE;
+ sowwakeup(so);
+}
+
+socantrcvmore(so)
+ struct socket *so;
+{
+
+ so->so_state |= SS_CANTRCVMORE;
+ sorwakeup(so);
+}
+
+/*
+ * Wait for data to arrive at/drain from a socket buffer.
+ */
+sbwait(sb)
+ struct sockbuf *sb;
+{
+
+ sb->sb_flags |= SB_WAIT;
+ return (tsleep((caddr_t)&sb->sb_cc,
+ (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK | PCATCH, netio,
+ sb->sb_timeo));
+}
+
+/*
+ * Lock a sockbuf already known to be locked;
+ * return any error returned from sleep (EINTR).
+ */
+sb_lock(sb)
+ register struct sockbuf *sb;
+{
+ int error;
+
+ while (sb->sb_flags & SB_LOCK) {
+ sb->sb_flags |= SB_WANT;
+ if (error = tsleep((caddr_t)&sb->sb_flags,
+ (sb->sb_flags & SB_NOINTR) ? PSOCK : PSOCK|PCATCH,
+ netio, 0))
+ return (error);
+ }
+ sb->sb_flags |= SB_LOCK;
+ return (0);
+}
+
+/*
+ * Wakeup processes waiting on a socket buffer.
+ * Do asynchronous notification via SIGIO
+ * if the socket has the SS_ASYNC flag set.
+ */
+sowakeup(so, sb)
+ register struct socket *so;
+ register struct sockbuf *sb;
+{
+ struct proc *p;
+
+ selwakeup(&sb->sb_sel);
+ sb->sb_flags &= ~SB_SEL;
+ if (sb->sb_flags & SB_WAIT) {
+ sb->sb_flags &= ~SB_WAIT;
+ wakeup((caddr_t)&sb->sb_cc);
+ }
+ if (so->so_state & SS_ASYNC) {
+ if (so->so_pgid < 0)
+ gsignal(-so->so_pgid, SIGIO);
+ else if (so->so_pgid > 0 && (p = pfind(so->so_pgid)) != 0)
+ psignal(p, SIGIO);
+ }
+}
+
+/*
+ * Socket buffer (struct sockbuf) utility routines.
+ *
+ * Each socket contains two socket buffers: one for sending data and
+ * one for receiving data. Each buffer contains a queue of mbufs,
+ * information about the number of mbufs and amount of data in the
+ * queue, and other fields allowing select() statements and notification
+ * on data availability to be implemented.
+ *
+ * Data stored in a socket buffer is maintained as a list of records.
+ * Each record is a list of mbufs chained together with the m_next
+ * field. Records are chained together with the m_nextpkt field. The upper
+ * level routine soreceive() expects the following conventions to be
+ * observed when placing information in the receive buffer:
+ *
+ * 1. If the protocol requires each message be preceded by the sender's
+ * name, then a record containing that name must be present before
+ * any associated data (mbuf's must be of type MT_SONAME).
+ * 2. If the protocol supports the exchange of ``access rights'' (really
+ * just additional data associated with the message), and there are
+ * ``rights'' to be received, then a record containing this data
+ * should be present (mbuf's must be of type MT_RIGHTS).
+ * 3. If a name or rights record exists, then it must be followed by
+ * a data record, perhaps of zero length.
+ *
+ * Before using a new socket structure it is first necessary to reserve
+ * buffer space to the socket, by calling sbreserve(). This should commit
+ * some of the available buffer space in the system buffer pool for the
+ * socket (currently, it does nothing but enforce limits). The space
+ * should be released by calling sbrelease() when the socket is destroyed.
+ */
+
+soreserve(so, sndcc, rcvcc)
+ register struct socket *so;
+ u_long sndcc, rcvcc;
+{
+
+ if (sbreserve(&so->so_snd, sndcc) == 0)
+ goto bad;
+ if (sbreserve(&so->so_rcv, rcvcc) == 0)
+ goto bad2;
+ if (so->so_rcv.sb_lowat == 0)
+ so->so_rcv.sb_lowat = 1;
+ if (so->so_snd.sb_lowat == 0)
+ so->so_snd.sb_lowat = MCLBYTES;
+ if (so->so_snd.sb_lowat > so->so_snd.sb_hiwat)
+ so->so_snd.sb_lowat = so->so_snd.sb_hiwat;
+ return (0);
+bad2:
+ sbrelease(&so->so_snd);
+bad:
+ return (ENOBUFS);
+}
+
+/*
+ * Allot mbufs to a sockbuf.
+ * Attempt to scale mbmax so that mbcnt doesn't become limiting
+ * if buffering efficiency is near the normal case.
+ */
+sbreserve(sb, cc)
+ struct sockbuf *sb;
+ u_long cc;
+{
+
+ if (cc > sb_max * MCLBYTES / (MSIZE + MCLBYTES))
+ return (0);
+ sb->sb_hiwat = cc;
+ sb->sb_mbmax = min(cc * 2, sb_max);
+ if (sb->sb_lowat > sb->sb_hiwat)
+ sb->sb_lowat = sb->sb_hiwat;
+ return (1);
+}
+
+/*
+ * Free mbufs held by a socket, and reserved mbuf space.
+ */
+sbrelease(sb)
+ struct sockbuf *sb;
+{
+
+ sbflush(sb);
+ sb->sb_hiwat = sb->sb_mbmax = 0;
+}
+
+/*
+ * Routines to add and remove
+ * data from an mbuf queue.
+ *
+ * The routines sbappend() or sbappendrecord() are normally called to
+ * append new mbufs to a socket buffer, after checking that adequate
+ * space is available, comparing the function sbspace() with the amount
+ * of data to be added. sbappendrecord() differs from sbappend() in
+ * that data supplied is treated as the beginning of a new record.
+ * To place a sender's address, optional access rights, and data in a
+ * socket receive buffer, sbappendaddr() should be used. To place
+ * access rights and data in a socket receive buffer, sbappendrights()
+ * should be used. In either case, the new data begins a new record.
+ * Note that unlike sbappend() and sbappendrecord(), these routines check
+ * for the caller that there will be enough space to store the data.
+ * Each fails if there is not enough space, or if it cannot find mbufs
+ * to store additional information in.
+ *
+ * Reliable protocols may use the socket send buffer to hold data
+ * awaiting acknowledgement. Data is normally copied from a socket
+ * send buffer in a protocol with m_copy for output to a peer,
+ * and then removing the data from the socket buffer with sbdrop()
+ * or sbdroprecord() when the data is acknowledged by the peer.
+ */
+
+/*
+ * Append mbuf chain m to the last record in the
+ * socket buffer sb. The additional space associated
+ * the mbuf chain is recorded in sb. Empty mbufs are
+ * discarded and mbufs are compacted where possible.
+ */
+sbappend(sb, m)
+ struct sockbuf *sb;
+ struct mbuf *m;
+{
+ register struct mbuf *n;
+
+ if (m == 0)
+ return;
+ if (n = sb->sb_mb) {
+ while (n->m_nextpkt)
+ n = n->m_nextpkt;
+ do {
+ if (n->m_flags & M_EOR) {
+ sbappendrecord(sb, m); /* XXXXXX!!!! */
+ return;
+ }
+ } while (n->m_next && (n = n->m_next));
+ }
+ sbcompress(sb, m, n);
+}
+
+#ifdef SOCKBUF_DEBUG
+sbcheck(sb)
+ register struct sockbuf *sb;
+{
+ register struct mbuf *m;
+ register int len = 0, mbcnt = 0;
+
+ for (m = sb->sb_mb; m; m = m->m_next) {
+ len += m->m_len;
+ mbcnt += MSIZE;
+ if (m->m_flags & M_EXT)
+ mbcnt += m->m_ext.ext_size;
+ if (m->m_nextpkt)
+ panic("sbcheck nextpkt");
+ }
+ if (len != sb->sb_cc || mbcnt != sb->sb_mbcnt) {
+ printf("cc %d != %d || mbcnt %d != %d\n", len, sb->sb_cc,
+ mbcnt, sb->sb_mbcnt);
+ panic("sbcheck");
+ }
+}
+#endif
+
+/*
+ * As above, except the mbuf chain
+ * begins a new record.
+ */
+sbappendrecord(sb, m0)
+ register struct sockbuf *sb;
+ register struct mbuf *m0;
+{
+ register struct mbuf *m;
+
+ if (m0 == 0)
+ return;
+ if (m = sb->sb_mb)
+ while (m->m_nextpkt)
+ m = m->m_nextpkt;
+ /*
+ * Put the first mbuf on the queue.
+ * Note this permits zero length records.
+ */
+ sballoc(sb, m0);
+ if (m)
+ m->m_nextpkt = m0;
+ else
+ sb->sb_mb = m0;
+ m = m0->m_next;
+ m0->m_next = 0;
+ if (m && (m0->m_flags & M_EOR)) {
+ m0->m_flags &= ~M_EOR;
+ m->m_flags |= M_EOR;
+ }
+ sbcompress(sb, m, m0);
+}
+
+/*
+ * As above except that OOB data
+ * is inserted at the beginning of the sockbuf,
+ * but after any other OOB data.
+ */
+sbinsertoob(sb, m0)
+ register struct sockbuf *sb;
+ register struct mbuf *m0;
+{
+ register struct mbuf *m;
+ register struct mbuf **mp;
+
+ if (m0 == 0)
+ return;
+ for (mp = &sb->sb_mb; m = *mp; mp = &((*mp)->m_nextpkt)) {
+ again:
+ switch (m->m_type) {
+
+ case MT_OOBDATA:
+ continue; /* WANT next train */
+
+ case MT_CONTROL:
+ if (m = m->m_next)
+ goto again; /* inspect THIS train further */
+ }
+ break;
+ }
+ /*
+ * Put the first mbuf on the queue.
+ * Note this permits zero length records.
+ */
+ sballoc(sb, m0);
+ m0->m_nextpkt = *mp;
+ *mp = m0;
+ m = m0->m_next;
+ m0->m_next = 0;
+ if (m && (m0->m_flags & M_EOR)) {
+ m0->m_flags &= ~M_EOR;
+ m->m_flags |= M_EOR;
+ }
+ sbcompress(sb, m, m0);
+}
+
+/*
+ * Append address and data, and optionally, control (ancillary) data
+ * to the receive queue of a socket. If present,
+ * m0 must include a packet header with total length.
+ * Returns 0 if no space in sockbuf or insufficient mbufs.
+ */
+sbappendaddr(sb, asa, m0, control)
+ register struct sockbuf *sb;
+ struct sockaddr *asa;
+ struct mbuf *m0, *control;
+{
+ register struct mbuf *m, *n;
+ int space = asa->sa_len;
+
+if (m0 && (m0->m_flags & M_PKTHDR) == 0)
+panic("sbappendaddr");
+ if (m0)
+ space += m0->m_pkthdr.len;
+ for (n = control; n; n = n->m_next) {
+ space += n->m_len;
+ if (n->m_next == 0) /* keep pointer to last control buf */
+ break;
+ }
+ if (space > sbspace(sb))
+ return (0);
+ if (asa->sa_len > MLEN)
+ return (0);
+ MGET(m, M_DONTWAIT, MT_SONAME);
+ if (m == 0)
+ return (0);
+ m->m_len = asa->sa_len;
+ bcopy((caddr_t)asa, mtod(m, caddr_t), asa->sa_len);
+ if (n)
+ n->m_next = m0; /* concatenate data to control */
+ else
+ control = m0;
+ m->m_next = control;
+ for (n = m; n; n = n->m_next)
+ sballoc(sb, n);
+ if (n = sb->sb_mb) {
+ while (n->m_nextpkt)
+ n = n->m_nextpkt;
+ n->m_nextpkt = m;
+ } else
+ sb->sb_mb = m;
+ return (1);
+}
+
+sbappendcontrol(sb, m0, control)
+ struct sockbuf *sb;
+ struct mbuf *control, *m0;
+{
+ register struct mbuf *m, *n;
+ int space = 0;
+
+ if (control == 0)
+ panic("sbappendcontrol");
+ for (m = control; ; m = m->m_next) {
+ space += m->m_len;
+ if (m->m_next == 0)
+ break;
+ }
+ n = m; /* save pointer to last control buffer */
+ for (m = m0; m; m = m->m_next)
+ space += m->m_len;
+ if (space > sbspace(sb))
+ return (0);
+ n->m_next = m0; /* concatenate data to control */
+ for (m = control; m; m = m->m_next)
+ sballoc(sb, m);
+ if (n = sb->sb_mb) {
+ while (n->m_nextpkt)
+ n = n->m_nextpkt;
+ n->m_nextpkt = control;
+ } else
+ sb->sb_mb = control;
+ return (1);
+}
+
+/*
+ * Compress mbuf chain m into the socket
+ * buffer sb following mbuf n. If n
+ * is null, the buffer is presumed empty.
+ */
+sbcompress(sb, m, n)
+ register struct sockbuf *sb;
+ register struct mbuf *m, *n;
+{
+ register int eor = 0;
+ register struct mbuf *o;
+
+ while (m) {
+ eor |= m->m_flags & M_EOR;
+ if (m->m_len == 0 &&
+ (eor == 0 ||
+ (((o = m->m_next) || (o = n)) &&
+ o->m_type == m->m_type))) {
+ m = m_free(m);
+ continue;
+ }
+ if (n && (n->m_flags & (M_EXT | M_EOR)) == 0 &&
+ (n->m_data + n->m_len + m->m_len) < &n->m_dat[MLEN] &&
+ n->m_type == m->m_type) {
+ bcopy(mtod(m, caddr_t), mtod(n, caddr_t) + n->m_len,
+ (unsigned)m->m_len);
+ n->m_len += m->m_len;
+ sb->sb_cc += m->m_len;
+ m = m_free(m);
+ continue;
+ }
+ if (n)
+ n->m_next = m;
+ else
+ sb->sb_mb = m;
+ sballoc(sb, m);
+ n = m;
+ m->m_flags &= ~M_EOR;
+ m = m->m_next;
+ n->m_next = 0;
+ }
+ if (eor) {
+ if (n)
+ n->m_flags |= eor;
+ else
+ printf("semi-panic: sbcompress\n");
+ }
+}
+
+/*
+ * Free all mbufs in a sockbuf.
+ * Check that all resources are reclaimed.
+ */
+sbflush(sb)
+ register struct sockbuf *sb;
+{
+
+ if (sb->sb_flags & SB_LOCK)
+ panic("sbflush");
+ while (sb->sb_mbcnt)
+ sbdrop(sb, (int)sb->sb_cc);
+ if (sb->sb_cc || sb->sb_mb)
+ panic("sbflush 2");
+}
+
+/*
+ * Drop data from (the front of) a sockbuf.
+ */
+sbdrop(sb, len)
+ register struct sockbuf *sb;
+ register int len;
+{
+ register struct mbuf *m, *mn;
+ struct mbuf *next;
+
+ next = (m = sb->sb_mb) ? m->m_nextpkt : 0;
+ while (len > 0) {
+ if (m == 0) {
+ if (next == 0)
+ panic("sbdrop");
+ m = next;
+ next = m->m_nextpkt;
+ continue;
+ }
+ if (m->m_len > len) {
+ m->m_len -= len;
+ m->m_data += len;
+ sb->sb_cc -= len;
+ break;
+ }
+ len -= m->m_len;
+ sbfree(sb, m);
+ MFREE(m, mn);
+ m = mn;
+ }
+ while (m && m->m_len == 0) {
+ sbfree(sb, m);
+ MFREE(m, mn);
+ m = mn;
+ }
+ if (m) {
+ sb->sb_mb = m;
+ m->m_nextpkt = next;
+ } else
+ sb->sb_mb = next;
+}
+
+/*
+ * Drop a record off the front of a sockbuf
+ * and move the next record to the front.
+ */
+sbdroprecord(sb)
+ register struct sockbuf *sb;
+{
+ register struct mbuf *m, *mn;
+
+ m = sb->sb_mb;
+ if (m) {
+ sb->sb_mb = m->m_nextpkt;
+ do {
+ sbfree(sb, m);
+ MFREE(m, mn);
+ } while (m = mn);
+ }
+}
diff --git a/sys/kern/vfs_export.c b/sys/kern/vfs_export.c
new file mode 100644
index 0000000..9891fe6
--- /dev/null
+++ b/sys/kern/vfs_export.c
@@ -0,0 +1,1322 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vfs_subr.c 8.13 (Berkeley) 4/18/94
+ */
+
+/*
+ * External virtual filesystem routines
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/time.h>
+#include <sys/vnode.h>
+#include <sys/stat.h>
+#include <sys/namei.h>
+#include <sys/ucred.h>
+#include <sys/buf.h>
+#include <sys/errno.h>
+#include <sys/malloc.h>
+#include <sys/domain.h>
+#include <sys/mbuf.h>
+
+#include <vm/vm.h>
+#include <sys/sysctl.h>
+
+#include <miscfs/specfs/specdev.h>
+
+enum vtype iftovt_tab[16] = {
+ VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON,
+ VREG, VNON, VLNK, VNON, VSOCK, VNON, VNON, VBAD,
+};
+int vttoif_tab[9] = {
+ 0, S_IFREG, S_IFDIR, S_IFBLK, S_IFCHR, S_IFLNK,
+ S_IFSOCK, S_IFIFO, S_IFMT,
+};
+
+/*
+ * Insq/Remq for the vnode usage lists.
+ */
+#define bufinsvn(bp, dp) LIST_INSERT_HEAD(dp, bp, b_vnbufs)
+#define bufremvn(bp) { \
+ LIST_REMOVE(bp, b_vnbufs); \
+ (bp)->b_vnbufs.le_next = NOLIST; \
+}
+
+TAILQ_HEAD(freelst, vnode) vnode_free_list; /* vnode free list */
+struct mntlist mountlist; /* mounted filesystem list */
+
+/*
+ * Initialize the vnode management data structures.
+ */
+vntblinit()
+{
+
+ TAILQ_INIT(&vnode_free_list);
+ TAILQ_INIT(&mountlist);
+}
+
+/*
+ * Lock a filesystem.
+ * Used to prevent access to it while mounting and unmounting.
+ */
+vfs_lock(mp)
+ register struct mount *mp;
+{
+
+ while(mp->mnt_flag & MNT_MLOCK) {
+ mp->mnt_flag |= MNT_MWAIT;
+ sleep((caddr_t)mp, PVFS);
+ }
+ mp->mnt_flag |= MNT_MLOCK;
+ return (0);
+}
+
+/*
+ * Unlock a locked filesystem.
+ * Panic if filesystem is not locked.
+ */
+void
+vfs_unlock(mp)
+ register struct mount *mp;
+{
+
+ if ((mp->mnt_flag & MNT_MLOCK) == 0)
+ panic("vfs_unlock: not locked");
+ mp->mnt_flag &= ~MNT_MLOCK;
+ if (mp->mnt_flag & MNT_MWAIT) {
+ mp->mnt_flag &= ~MNT_MWAIT;
+ wakeup((caddr_t)mp);
+ }
+}
+
+/*
+ * Mark a mount point as busy.
+ * Used to synchronize access and to delay unmounting.
+ */
+vfs_busy(mp)
+ register struct mount *mp;
+{
+
+ while(mp->mnt_flag & MNT_MPBUSY) {
+ mp->mnt_flag |= MNT_MPWANT;
+ sleep((caddr_t)&mp->mnt_flag, PVFS);
+ }
+ if (mp->mnt_flag & MNT_UNMOUNT)
+ return (1);
+ mp->mnt_flag |= MNT_MPBUSY;
+ return (0);
+}
+
+/*
+ * Free a busy filesystem.
+ * Panic if filesystem is not busy.
+ */
+vfs_unbusy(mp)
+ register struct mount *mp;
+{
+
+ if ((mp->mnt_flag & MNT_MPBUSY) == 0)
+ panic("vfs_unbusy: not busy");
+ mp->mnt_flag &= ~MNT_MPBUSY;
+ if (mp->mnt_flag & MNT_MPWANT) {
+ mp->mnt_flag &= ~MNT_MPWANT;
+ wakeup((caddr_t)&mp->mnt_flag);
+ }
+}
+
+/*
+ * Lookup a mount point by filesystem identifier.
+ */
+struct mount *
+getvfs(fsid)
+ fsid_t *fsid;
+{
+ register struct mount *mp;
+
+ for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) {
+ if (mp->mnt_stat.f_fsid.val[0] == fsid->val[0] &&
+ mp->mnt_stat.f_fsid.val[1] == fsid->val[1])
+ return (mp);
+ }
+ return ((struct mount *)0);
+}
+
+/*
+ * Get a new unique fsid
+ */
+void
+getnewfsid(mp, mtype)
+ struct mount *mp;
+ int mtype;
+{
+static u_short xxxfs_mntid;
+
+ fsid_t tfsid;
+
+ mp->mnt_stat.f_fsid.val[0] = makedev(nblkdev + mtype, 0);
+ mp->mnt_stat.f_fsid.val[1] = mtype;
+ if (xxxfs_mntid == 0)
+ ++xxxfs_mntid;
+ tfsid.val[0] = makedev(nblkdev + mtype, xxxfs_mntid);
+ tfsid.val[1] = mtype;
+ if (mountlist.tqh_first != NULL) {
+ while (getvfs(&tfsid)) {
+ tfsid.val[0]++;
+ xxxfs_mntid++;
+ }
+ }
+ mp->mnt_stat.f_fsid.val[0] = tfsid.val[0];
+}
+
+/*
+ * Set vnode attributes to VNOVAL
+ */
+void vattr_null(vap)
+ register struct vattr *vap;
+{
+
+ vap->va_type = VNON;
+ vap->va_size = vap->va_bytes = VNOVAL;
+ vap->va_mode = vap->va_nlink = vap->va_uid = vap->va_gid =
+ vap->va_fsid = vap->va_fileid =
+ vap->va_blocksize = vap->va_rdev =
+ vap->va_atime.ts_sec = vap->va_atime.ts_nsec =
+ vap->va_mtime.ts_sec = vap->va_mtime.ts_nsec =
+ vap->va_ctime.ts_sec = vap->va_ctime.ts_nsec =
+ vap->va_flags = vap->va_gen = VNOVAL;
+ vap->va_vaflags = 0;
+}
+
+/*
+ * Routines having to do with the management of the vnode table.
+ */
+extern int (**dead_vnodeop_p)();
+extern void vclean();
+long numvnodes;
+extern struct vattr va_null;
+
+/*
+ * Return the next vnode from the free list.
+ */
+getnewvnode(tag, mp, vops, vpp)
+ enum vtagtype tag;
+ struct mount *mp;
+ int (**vops)();
+ struct vnode **vpp;
+{
+ register struct vnode *vp;
+ int s;
+
+ if ((vnode_free_list.tqh_first == NULL &&
+ numvnodes < 2 * desiredvnodes) ||
+ numvnodes < desiredvnodes) {
+ vp = (struct vnode *)malloc((u_long)sizeof *vp,
+ M_VNODE, M_WAITOK);
+ bzero((char *)vp, sizeof *vp);
+ numvnodes++;
+ } else {
+ if ((vp = vnode_free_list.tqh_first) == NULL) {
+ tablefull("vnode");
+ *vpp = 0;
+ return (ENFILE);
+ }
+ if (vp->v_usecount)
+ panic("free vnode isn't");
+ TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+ /* see comment on why 0xdeadb is set at end of vgone (below) */
+ vp->v_freelist.tqe_prev = (struct vnode **)0xdeadb;
+ vp->v_lease = NULL;
+ if (vp->v_type != VBAD)
+ vgone(vp);
+#ifdef DIAGNOSTIC
+ if (vp->v_data)
+ panic("cleaned vnode isn't");
+ s = splbio();
+ if (vp->v_numoutput)
+ panic("Clean vnode has pending I/O's");
+ splx(s);
+#endif
+ vp->v_flag = 0;
+ vp->v_lastr = 0;
+ vp->v_ralen = 0;
+ vp->v_maxra = 0;
+ vp->v_lastw = 0;
+ vp->v_lasta = 0;
+ vp->v_cstart = 0;
+ vp->v_clen = 0;
+ vp->v_socket = 0;
+ }
+ vp->v_type = VNON;
+ cache_purge(vp);
+ vp->v_tag = tag;
+ vp->v_op = vops;
+ insmntque(vp, mp);
+ *vpp = vp;
+ vp->v_usecount = 1;
+ vp->v_data = 0;
+ return (0);
+}
+
+/*
+ * Move a vnode from one mount queue to another.
+ */
+insmntque(vp, mp)
+ register struct vnode *vp;
+ register struct mount *mp;
+{
+
+ /*
+ * Delete from old mount point vnode list, if on one.
+ */
+ if (vp->v_mount != NULL)
+ LIST_REMOVE(vp, v_mntvnodes);
+ /*
+ * Insert into list of vnodes for the new mount point, if available.
+ */
+ if ((vp->v_mount = mp) == NULL)
+ return;
+ LIST_INSERT_HEAD(&mp->mnt_vnodelist, vp, v_mntvnodes);
+}
+
+/*
+ * Update outstanding I/O count and do wakeup if requested.
+ */
+vwakeup(bp)
+ register struct buf *bp;
+{
+ register struct vnode *vp;
+
+ bp->b_flags &= ~B_WRITEINPROG;
+ if (vp = bp->b_vp) {
+ vp->v_numoutput--;
+ if (vp->v_numoutput < 0)
+ panic("vwakeup: neg numoutput");
+ if ((vp->v_flag & VBWAIT) && vp->v_numoutput <= 0) {
+ if (vp->v_numoutput < 0)
+ panic("vwakeup: neg numoutput");
+ vp->v_flag &= ~VBWAIT;
+ wakeup((caddr_t)&vp->v_numoutput);
+ }
+ }
+}
+
+/*
+ * Flush out and invalidate all buffers associated with a vnode.
+ * Called with the underlying object locked.
+ */
+int
+vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
+ register struct vnode *vp;
+ int flags;
+ struct ucred *cred;
+ struct proc *p;
+ int slpflag, slptimeo;
+{
+ register struct buf *bp;
+ struct buf *nbp, *blist;
+ int s, error;
+
+ if (flags & V_SAVE) {
+ if (error = VOP_FSYNC(vp, cred, MNT_WAIT, p))
+ return (error);
+ if (vp->v_dirtyblkhd.lh_first != NULL)
+ panic("vinvalbuf: dirty bufs");
+ }
+ for (;;) {
+ if ((blist = vp->v_cleanblkhd.lh_first) && flags & V_SAVEMETA)
+ while (blist && blist->b_lblkno < 0)
+ blist = blist->b_vnbufs.le_next;
+ if (!blist && (blist = vp->v_dirtyblkhd.lh_first) &&
+ (flags & V_SAVEMETA))
+ while (blist && blist->b_lblkno < 0)
+ blist = blist->b_vnbufs.le_next;
+ if (!blist)
+ break;
+
+ for (bp = blist; bp; bp = nbp) {
+ nbp = bp->b_vnbufs.le_next;
+ if (flags & V_SAVEMETA && bp->b_lblkno < 0)
+ continue;
+ s = splbio();
+ if (bp->b_flags & B_BUSY) {
+ bp->b_flags |= B_WANTED;
+ error = tsleep((caddr_t)bp,
+ slpflag | (PRIBIO + 1), "vinvalbuf",
+ slptimeo);
+ splx(s);
+ if (error)
+ return (error);
+ break;
+ }
+ bremfree(bp);
+ bp->b_flags |= B_BUSY;
+ splx(s);
+ /*
+ * XXX Since there are no node locks for NFS, I believe
+ * there is a slight chance that a delayed write will
+ * occur while sleeping just above, so check for it.
+ */
+ if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) {
+ (void) VOP_BWRITE(bp);
+ break;
+ }
+ bp->b_flags |= B_INVAL;
+ brelse(bp);
+ }
+ }
+ if (!(flags & V_SAVEMETA) &&
+ (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first))
+ panic("vinvalbuf: flush failed");
+ return (0);
+}
+
+/*
+ * Associate a buffer with a vnode.
+ */
+bgetvp(vp, bp)
+ register struct vnode *vp;
+ register struct buf *bp;
+{
+
+ if (bp->b_vp)
+ panic("bgetvp: not free");
+ VHOLD(vp);
+ bp->b_vp = vp;
+ if (vp->v_type == VBLK || vp->v_type == VCHR)
+ bp->b_dev = vp->v_rdev;
+ else
+ bp->b_dev = NODEV;
+ /*
+ * Insert onto list for new vnode.
+ */
+ bufinsvn(bp, &vp->v_cleanblkhd);
+}
+
+/*
+ * Disassociate a buffer from a vnode.
+ */
+brelvp(bp)
+ register struct buf *bp;
+{
+ struct vnode *vp;
+
+ if (bp->b_vp == (struct vnode *) 0)
+ panic("brelvp: NULL");
+ /*
+ * Delete from old vnode list, if on one.
+ */
+ if (bp->b_vnbufs.le_next != NOLIST)
+ bufremvn(bp);
+ vp = bp->b_vp;
+ bp->b_vp = (struct vnode *) 0;
+ HOLDRELE(vp);
+}
+
+/*
+ * Reassign a buffer from one vnode to another.
+ * Used to assign file specific control information
+ * (indirect blocks) to the vnode to which they belong.
+ */
+reassignbuf(bp, newvp)
+ register struct buf *bp;
+ register struct vnode *newvp;
+{
+ register struct buflists *listheadp;
+
+ if (newvp == NULL) {
+ printf("reassignbuf: NULL");
+ return;
+ }
+ /*
+ * Delete from old vnode list, if on one.
+ */
+ if (bp->b_vnbufs.le_next != NOLIST)
+ bufremvn(bp);
+ /*
+ * If dirty, put on list of dirty buffers;
+ * otherwise insert onto list of clean buffers.
+ */
+ if (bp->b_flags & B_DELWRI)
+ listheadp = &newvp->v_dirtyblkhd;
+ else
+ listheadp = &newvp->v_cleanblkhd;
+ bufinsvn(bp, listheadp);
+}
+
+/*
+ * Create a vnode for a block device.
+ * Used for root filesystem, argdev, and swap areas.
+ * Also used for memory file system special devices.
+ */
+bdevvp(dev, vpp)
+ dev_t dev;
+ struct vnode **vpp;
+{
+ register struct vnode *vp;
+ struct vnode *nvp;
+ int error;
+
+ if (dev == NODEV)
+ return (0);
+ error = getnewvnode(VT_NON, (struct mount *)0, spec_vnodeop_p, &nvp);
+ if (error) {
+ *vpp = 0;
+ return (error);
+ }
+ vp = nvp;
+ vp->v_type = VBLK;
+ if (nvp = checkalias(vp, dev, (struct mount *)0)) {
+ vput(vp);
+ vp = nvp;
+ }
+ *vpp = vp;
+ return (0);
+}
+
+/*
+ * Check to see if the new vnode represents a special device
+ * for which we already have a vnode (either because of
+ * bdevvp() or because of a different vnode representing
+ * the same block device). If such an alias exists, deallocate
+ * the existing contents and return the aliased vnode. The
+ * caller is responsible for filling it with its new contents.
+ */
+struct vnode *
+checkalias(nvp, nvp_rdev, mp)
+ register struct vnode *nvp;
+ dev_t nvp_rdev;
+ struct mount *mp;
+{
+ register struct vnode *vp;
+ struct vnode **vpp;
+
+ if (nvp->v_type != VBLK && nvp->v_type != VCHR)
+ return (NULLVP);
+
+ vpp = &speclisth[SPECHASH(nvp_rdev)];
+loop:
+ for (vp = *vpp; vp; vp = vp->v_specnext) {
+ if (nvp_rdev != vp->v_rdev || nvp->v_type != vp->v_type)
+ continue;
+ /*
+ * Alias, but not in use, so flush it out.
+ */
+ if (vp->v_usecount == 0) {
+ vgone(vp);
+ goto loop;
+ }
+ if (vget(vp, 1))
+ goto loop;
+ break;
+ }
+ if (vp == NULL || vp->v_tag != VT_NON) {
+ MALLOC(nvp->v_specinfo, struct specinfo *,
+ sizeof(struct specinfo), M_VNODE, M_WAITOK);
+ nvp->v_rdev = nvp_rdev;
+ nvp->v_hashchain = vpp;
+ nvp->v_specnext = *vpp;
+ nvp->v_specflags = 0;
+ *vpp = nvp;
+ if (vp != NULL) {
+ nvp->v_flag |= VALIASED;
+ vp->v_flag |= VALIASED;
+ vput(vp);
+ }
+ return (NULLVP);
+ }
+ VOP_UNLOCK(vp);
+ vclean(vp, 0);
+ vp->v_op = nvp->v_op;
+ vp->v_tag = nvp->v_tag;
+ nvp->v_type = VNON;
+ insmntque(vp, mp);
+ return (vp);
+}
+
+/*
+ * Grab a particular vnode from the free list, increment its
+ * reference count and lock it. The vnode lock bit is set the
+ * vnode is being eliminated in vgone. The process is awakened
+ * when the transition is completed, and an error returned to
+ * indicate that the vnode is no longer usable (possibly having
+ * been changed to a new file system type).
+ */
+vget(vp, lockflag)
+ register struct vnode *vp;
+ int lockflag;
+{
+
+ /*
+ * If the vnode is in the process of being cleaned out for
+ * another use, we wait for the cleaning to finish and then
+ * return failure. Cleaning is determined either by checking
+ * that the VXLOCK flag is set, or that the use count is
+ * zero with the back pointer set to show that it has been
+ * removed from the free list by getnewvnode. The VXLOCK
+ * flag may not have been set yet because vclean is blocked in
+ * the VOP_LOCK call waiting for the VOP_INACTIVE to complete.
+ */
+ if ((vp->v_flag & VXLOCK) ||
+ (vp->v_usecount == 0 &&
+ vp->v_freelist.tqe_prev == (struct vnode **)0xdeadb)) {
+ vp->v_flag |= VXWANT;
+ sleep((caddr_t)vp, PINOD);
+ return (1);
+ }
+ if (vp->v_usecount == 0)
+ TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+ vp->v_usecount++;
+ if (lockflag)
+ VOP_LOCK(vp);
+ return (0);
+}
+
+/*
+ * Vnode reference, just increment the count
+ */
+void vref(vp)
+ struct vnode *vp;
+{
+
+ if (vp->v_usecount <= 0)
+ panic("vref used where vget required");
+ vp->v_usecount++;
+}
+
+/*
+ * vput(), just unlock and vrele()
+ */
+void vput(vp)
+ register struct vnode *vp;
+{
+
+ VOP_UNLOCK(vp);
+ vrele(vp);
+}
+
+/*
+ * Vnode release.
+ * If count drops to zero, call inactive routine and return to freelist.
+ */
+void vrele(vp)
+ register struct vnode *vp;
+{
+
+#ifdef DIAGNOSTIC
+ if (vp == NULL)
+ panic("vrele: null vp");
+#endif
+ vp->v_usecount--;
+ if (vp->v_usecount > 0)
+ return;
+#ifdef DIAGNOSTIC
+ if (vp->v_usecount != 0 || vp->v_writecount != 0) {
+ vprint("vrele: bad ref count", vp);
+ panic("vrele: ref cnt");
+ }
+#endif
+ /*
+ * insert at tail of LRU list
+ */
+ TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
+ VOP_INACTIVE(vp);
+}
+
+/*
+ * Page or buffer structure gets a reference.
+ */
+void vhold(vp)
+ register struct vnode *vp;
+{
+
+ vp->v_holdcnt++;
+}
+
+/*
+ * Page or buffer structure frees a reference.
+ */
+void holdrele(vp)
+ register struct vnode *vp;
+{
+
+ if (vp->v_holdcnt <= 0)
+ panic("holdrele: holdcnt");
+ vp->v_holdcnt--;
+}
+
+/*
+ * Remove any vnodes in the vnode table belonging to mount point mp.
+ *
+ * If MNT_NOFORCE is specified, there should not be any active ones,
+ * return error if any are found (nb: this is a user error, not a
+ * system error). If MNT_FORCE is specified, detach any active vnodes
+ * that are found.
+ */
+#ifdef DIAGNOSTIC
+int busyprt = 0; /* print out busy vnodes */
+struct ctldebug debug1 = { "busyprt", &busyprt };
+#endif
+
+vflush(mp, skipvp, flags)
+ struct mount *mp;
+ struct vnode *skipvp;
+ int flags;
+{
+ register struct vnode *vp, *nvp;
+ int busy = 0;
+
+ if ((mp->mnt_flag & MNT_MPBUSY) == 0)
+ panic("vflush: not busy");
+loop:
+ for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
+ if (vp->v_mount != mp)
+ goto loop;
+ nvp = vp->v_mntvnodes.le_next;
+ /*
+ * Skip over a selected vnode.
+ */
+ if (vp == skipvp)
+ continue;
+ /*
+ * Skip over a vnodes marked VSYSTEM.
+ */
+ if ((flags & SKIPSYSTEM) && (vp->v_flag & VSYSTEM))
+ continue;
+ /*
+ * If WRITECLOSE is set, only flush out regular file
+ * vnodes open for writing.
+ */
+ if ((flags & WRITECLOSE) &&
+ (vp->v_writecount == 0 || vp->v_type != VREG))
+ continue;
+ /*
+ * With v_usecount == 0, all we need to do is clear
+ * out the vnode data structures and we are done.
+ */
+ if (vp->v_usecount == 0) {
+ vgone(vp);
+ continue;
+ }
+ /*
+ * If FORCECLOSE is set, forcibly close the vnode.
+ * For block or character devices, revert to an
+ * anonymous device. For all other files, just kill them.
+ */
+ if (flags & FORCECLOSE) {
+ if (vp->v_type != VBLK && vp->v_type != VCHR) {
+ vgone(vp);
+ } else {
+ vclean(vp, 0);
+ vp->v_op = spec_vnodeop_p;
+ insmntque(vp, (struct mount *)0);
+ }
+ continue;
+ }
+#ifdef DIAGNOSTIC
+ if (busyprt)
+ vprint("vflush: busy vnode", vp);
+#endif
+ busy++;
+ }
+ if (busy)
+ return (EBUSY);
+ return (0);
+}
+
+/*
+ * Disassociate the underlying file system from a vnode.
+ */
+void
+vclean(vp, flags)
+ register struct vnode *vp;
+ int flags;
+{
+ int active;
+
+ /*
+ * Check to see if the vnode is in use.
+ * If so we have to reference it before we clean it out
+ * so that its count cannot fall to zero and generate a
+ * race against ourselves to recycle it.
+ */
+ if (active = vp->v_usecount)
+ VREF(vp);
+ /*
+ * Even if the count is zero, the VOP_INACTIVE routine may still
+ * have the object locked while it cleans it out. The VOP_LOCK
+ * ensures that the VOP_INACTIVE routine is done with its work.
+ * For active vnodes, it ensures that no other activity can
+ * occur while the underlying object is being cleaned out.
+ */
+ VOP_LOCK(vp);
+ /*
+ * Prevent the vnode from being recycled or
+ * brought into use while we clean it out.
+ */
+ if (vp->v_flag & VXLOCK)
+ panic("vclean: deadlock");
+ vp->v_flag |= VXLOCK;
+ /*
+ * Clean out any buffers associated with the vnode.
+ */
+ if (flags & DOCLOSE)
+ vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0);
+ /*
+ * Any other processes trying to obtain this lock must first
+ * wait for VXLOCK to clear, then call the new lock operation.
+ */
+ VOP_UNLOCK(vp);
+ /*
+ * If purging an active vnode, it must be closed and
+ * deactivated before being reclaimed.
+ */
+ if (active) {
+ if (flags & DOCLOSE)
+ VOP_CLOSE(vp, IO_NDELAY, NOCRED, NULL);
+ VOP_INACTIVE(vp);
+ }
+ /*
+ * Reclaim the vnode.
+ */
+ if (VOP_RECLAIM(vp))
+ panic("vclean: cannot reclaim");
+ if (active)
+ vrele(vp);
+
+ /*
+ * Done with purge, notify sleepers of the grim news.
+ */
+ vp->v_op = dead_vnodeop_p;
+ vp->v_tag = VT_NON;
+ vp->v_flag &= ~VXLOCK;
+ if (vp->v_flag & VXWANT) {
+ vp->v_flag &= ~VXWANT;
+ wakeup((caddr_t)vp);
+ }
+}
+
+/*
+ * Eliminate all activity associated with the requested vnode
+ * and with all vnodes aliased to the requested vnode.
+ */
+void vgoneall(vp)
+ register struct vnode *vp;
+{
+ register struct vnode *vq;
+
+ if (vp->v_flag & VALIASED) {
+ /*
+ * If a vgone (or vclean) is already in progress,
+ * wait until it is done and return.
+ */
+ if (vp->v_flag & VXLOCK) {
+ vp->v_flag |= VXWANT;
+ sleep((caddr_t)vp, PINOD);
+ return;
+ }
+ /*
+ * Ensure that vp will not be vgone'd while we
+ * are eliminating its aliases.
+ */
+ vp->v_flag |= VXLOCK;
+ while (vp->v_flag & VALIASED) {
+ for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
+ if (vq->v_rdev != vp->v_rdev ||
+ vq->v_type != vp->v_type || vp == vq)
+ continue;
+ vgone(vq);
+ break;
+ }
+ }
+ /*
+ * Remove the lock so that vgone below will
+ * really eliminate the vnode after which time
+ * vgone will awaken any sleepers.
+ */
+ vp->v_flag &= ~VXLOCK;
+ }
+ vgone(vp);
+}
+
+/*
+ * Eliminate all activity associated with a vnode
+ * in preparation for reuse.
+ */
+void vgone(vp)
+ register struct vnode *vp;
+{
+ register struct vnode *vq;
+ struct vnode *vx;
+
+ /*
+ * If a vgone (or vclean) is already in progress,
+ * wait until it is done and return.
+ */
+ if (vp->v_flag & VXLOCK) {
+ vp->v_flag |= VXWANT;
+ sleep((caddr_t)vp, PINOD);
+ return;
+ }
+ /*
+ * Clean out the filesystem specific data.
+ */
+ vclean(vp, DOCLOSE);
+ /*
+ * Delete from old mount point vnode list, if on one.
+ */
+ if (vp->v_mount != NULL) {
+ LIST_REMOVE(vp, v_mntvnodes);
+ vp->v_mount = NULL;
+ }
+ /*
+ * If special device, remove it from special device alias list.
+ */
+ if (vp->v_type == VBLK || vp->v_type == VCHR) {
+ if (*vp->v_hashchain == vp) {
+ *vp->v_hashchain = vp->v_specnext;
+ } else {
+ for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
+ if (vq->v_specnext != vp)
+ continue;
+ vq->v_specnext = vp->v_specnext;
+ break;
+ }
+ if (vq == NULL)
+ panic("missing bdev");
+ }
+ if (vp->v_flag & VALIASED) {
+ vx = NULL;
+ for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
+ if (vq->v_rdev != vp->v_rdev ||
+ vq->v_type != vp->v_type)
+ continue;
+ if (vx)
+ break;
+ vx = vq;
+ }
+ if (vx == NULL)
+ panic("missing alias");
+ if (vq == NULL)
+ vx->v_flag &= ~VALIASED;
+ vp->v_flag &= ~VALIASED;
+ }
+ FREE(vp->v_specinfo, M_VNODE);
+ vp->v_specinfo = NULL;
+ }
+ /*
+ * If it is on the freelist and not already at the head,
+ * move it to the head of the list. The test of the back
+ * pointer and the reference count of zero is because
+ * it will be removed from the free list by getnewvnode,
+ * but will not have its reference count incremented until
+ * after calling vgone. If the reference count were
+ * incremented first, vgone would (incorrectly) try to
+ * close the previous instance of the underlying object.
+ * So, the back pointer is explicitly set to `0xdeadb' in
+ * getnewvnode after removing it from the freelist to ensure
+ * that we do not try to move it here.
+ */
+ if (vp->v_usecount == 0 &&
+ vp->v_freelist.tqe_prev != (struct vnode **)0xdeadb &&
+ vnode_free_list.tqh_first != vp) {
+ TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+ TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
+ }
+ vp->v_type = VBAD;
+}
+
+/*
+ * Lookup a vnode by device number.
+ */
+vfinddev(dev, type, vpp)
+ dev_t dev;
+ enum vtype type;
+ struct vnode **vpp;
+{
+ register struct vnode *vp;
+
+ for (vp = speclisth[SPECHASH(dev)]; vp; vp = vp->v_specnext) {
+ if (dev != vp->v_rdev || type != vp->v_type)
+ continue;
+ *vpp = vp;
+ return (1);
+ }
+ return (0);
+}
+
+/*
+ * Calculate the total number of references to a special device.
+ */
+vcount(vp)
+ register struct vnode *vp;
+{
+ register struct vnode *vq, *vnext;
+ int count;
+
+loop:
+ if ((vp->v_flag & VALIASED) == 0)
+ return (vp->v_usecount);
+ for (count = 0, vq = *vp->v_hashchain; vq; vq = vnext) {
+ vnext = vq->v_specnext;
+ if (vq->v_rdev != vp->v_rdev || vq->v_type != vp->v_type)
+ continue;
+ /*
+ * Alias, but not in use, so flush it out.
+ */
+ if (vq->v_usecount == 0 && vq != vp) {
+ vgone(vq);
+ goto loop;
+ }
+ count += vq->v_usecount;
+ }
+ return (count);
+}
+
+/*
+ * Print out a description of a vnode.
+ */
+static char *typename[] =
+ { "VNON", "VREG", "VDIR", "VBLK", "VCHR", "VLNK", "VSOCK", "VFIFO", "VBAD" };
+
+vprint(label, vp)
+ char *label;
+ register struct vnode *vp;
+{
+ char buf[64];
+
+ if (label != NULL)
+ printf("%s: ", label);
+ printf("type %s, usecount %d, writecount %d, refcount %d,",
+ typename[vp->v_type], vp->v_usecount, vp->v_writecount,
+ vp->v_holdcnt);
+ buf[0] = '\0';
+ if (vp->v_flag & VROOT)
+ strcat(buf, "|VROOT");
+ if (vp->v_flag & VTEXT)
+ strcat(buf, "|VTEXT");
+ if (vp->v_flag & VSYSTEM)
+ strcat(buf, "|VSYSTEM");
+ if (vp->v_flag & VXLOCK)
+ strcat(buf, "|VXLOCK");
+ if (vp->v_flag & VXWANT)
+ strcat(buf, "|VXWANT");
+ if (vp->v_flag & VBWAIT)
+ strcat(buf, "|VBWAIT");
+ if (vp->v_flag & VALIASED)
+ strcat(buf, "|VALIASED");
+ if (buf[0] != '\0')
+ printf(" flags (%s)", &buf[1]);
+ if (vp->v_data == NULL) {
+ printf("\n");
+ } else {
+ printf("\n\t");
+ VOP_PRINT(vp);
+ }
+}
+
+#ifdef DEBUG
+/*
+ * List all of the locked vnodes in the system.
+ * Called when debugging the kernel.
+ */
+printlockedvnodes()
+{
+ register struct mount *mp;
+ register struct vnode *vp;
+
+ printf("Locked vnodes\n");
+ for (mp = mountlist.tqh_first; mp != NULL; mp = mp->mnt_list.tqe_next) {
+ for (vp = mp->mnt_vnodelist.lh_first;
+ vp != NULL;
+ vp = vp->v_mntvnodes.le_next)
+ if (VOP_ISLOCKED(vp))
+ vprint((char *)0, vp);
+ }
+}
+#endif
+
+int kinfo_vdebug = 1;
+int kinfo_vgetfailed;
+#define KINFO_VNODESLOP 10
+/*
+ * Dump vnode list (via sysctl).
+ * Copyout address of vnode followed by vnode.
+ */
+/* ARGSUSED */
+sysctl_vnode(where, sizep)
+ char *where;
+ size_t *sizep;
+{
+ register struct mount *mp, *nmp;
+ struct vnode *vp;
+ register char *bp = where, *savebp;
+ char *ewhere;
+ int error;
+
+#define VPTRSZ sizeof (struct vnode *)
+#define VNODESZ sizeof (struct vnode)
+ if (where == NULL) {
+ *sizep = (numvnodes + KINFO_VNODESLOP) * (VPTRSZ + VNODESZ);
+ return (0);
+ }
+ ewhere = where + *sizep;
+
+ for (mp = mountlist.tqh_first; mp != NULL; mp = nmp) {
+ nmp = mp->mnt_list.tqe_next;
+ if (vfs_busy(mp))
+ continue;
+ savebp = bp;
+again:
+ for (vp = mp->mnt_vnodelist.lh_first;
+ vp != NULL;
+ vp = vp->v_mntvnodes.le_next) {
+ /*
+ * Check that the vp is still associated with
+ * this filesystem. RACE: could have been
+ * recycled onto the same filesystem.
+ */
+ if (vp->v_mount != mp) {
+ if (kinfo_vdebug)
+ printf("kinfo: vp changed\n");
+ bp = savebp;
+ goto again;
+ }
+ if (bp + VPTRSZ + VNODESZ > ewhere) {
+ *sizep = bp - where;
+ return (ENOMEM);
+ }
+ if ((error = copyout((caddr_t)&vp, bp, VPTRSZ)) ||
+ (error = copyout((caddr_t)vp, bp + VPTRSZ, VNODESZ)))
+ return (error);
+ bp += VPTRSZ + VNODESZ;
+ }
+ vfs_unbusy(mp);
+ }
+
+ *sizep = bp - where;
+ return (0);
+}
+
+/*
+ * Check to see if a filesystem is mounted on a block device.
+ */
+int
+vfs_mountedon(vp)
+ register struct vnode *vp;
+{
+ register struct vnode *vq;
+
+ if (vp->v_specflags & SI_MOUNTEDON)
+ return (EBUSY);
+ if (vp->v_flag & VALIASED) {
+ for (vq = *vp->v_hashchain; vq; vq = vq->v_specnext) {
+ if (vq->v_rdev != vp->v_rdev ||
+ vq->v_type != vp->v_type)
+ continue;
+ if (vq->v_specflags & SI_MOUNTEDON)
+ return (EBUSY);
+ }
+ }
+ return (0);
+}
+
+/*
+ * Build hash lists of net addresses and hang them off the mount point.
+ * Called by ufs_mount() to set up the lists of export addresses.
+ */
+static int
+vfs_hang_addrlist(mp, nep, argp)
+ struct mount *mp;
+ struct netexport *nep;
+ struct export_args *argp;
+{
+ register struct netcred *np;
+ register struct radix_node_head *rnh;
+ register int i;
+ struct radix_node *rn;
+ struct sockaddr *saddr, *smask = 0;
+ struct domain *dom;
+ int error;
+
+ if (argp->ex_addrlen == 0) {
+ if (mp->mnt_flag & MNT_DEFEXPORTED)
+ return (EPERM);
+ np = &nep->ne_defexported;
+ np->netc_exflags = argp->ex_flags;
+ np->netc_anon = argp->ex_anon;
+ np->netc_anon.cr_ref = 1;
+ mp->mnt_flag |= MNT_DEFEXPORTED;
+ return (0);
+ }
+ i = sizeof(struct netcred) + argp->ex_addrlen + argp->ex_masklen;
+ np = (struct netcred *)malloc(i, M_NETADDR, M_WAITOK);
+ bzero((caddr_t)np, i);
+ saddr = (struct sockaddr *)(np + 1);
+ if (error = copyin(argp->ex_addr, (caddr_t)saddr, argp->ex_addrlen))
+ goto out;
+ if (saddr->sa_len > argp->ex_addrlen)
+ saddr->sa_len = argp->ex_addrlen;
+ if (argp->ex_masklen) {
+ smask = (struct sockaddr *)((caddr_t)saddr + argp->ex_addrlen);
+ error = copyin(argp->ex_addr, (caddr_t)smask, argp->ex_masklen);
+ if (error)
+ goto out;
+ if (smask->sa_len > argp->ex_masklen)
+ smask->sa_len = argp->ex_masklen;
+ }
+ i = saddr->sa_family;
+ if ((rnh = nep->ne_rtable[i]) == 0) {
+ /*
+ * Seems silly to initialize every AF when most are not
+ * used, do so on demand here
+ */
+ for (dom = domains; dom; dom = dom->dom_next)
+ if (dom->dom_family == i && dom->dom_rtattach) {
+ dom->dom_rtattach((void **)&nep->ne_rtable[i],
+ dom->dom_rtoffset);
+ break;
+ }
+ if ((rnh = nep->ne_rtable[i]) == 0) {
+ error = ENOBUFS;
+ goto out;
+ }
+ }
+ rn = (*rnh->rnh_addaddr)((caddr_t)saddr, (caddr_t)smask, rnh,
+ np->netc_rnodes);
+ if (rn == 0 || np != (struct netcred *)rn) { /* already exists */
+ error = EPERM;
+ goto out;
+ }
+ np->netc_exflags = argp->ex_flags;
+ np->netc_anon = argp->ex_anon;
+ np->netc_anon.cr_ref = 1;
+ return (0);
+out:
+ free(np, M_NETADDR);
+ return (error);
+}
+
+/* ARGSUSED */
+static int
+vfs_free_netcred(rn, w)
+ struct radix_node *rn;
+ caddr_t w;
+{
+ register struct radix_node_head *rnh = (struct radix_node_head *)w;
+
+ (*rnh->rnh_deladdr)(rn->rn_key, rn->rn_mask, rnh);
+ free((caddr_t)rn, M_NETADDR);
+ return (0);
+}
+
+/*
+ * Free the net address hash lists that are hanging off the mount points.
+ */
+static void
+vfs_free_addrlist(nep)
+ struct netexport *nep;
+{
+ register int i;
+ register struct radix_node_head *rnh;
+
+ for (i = 0; i <= AF_MAX; i++)
+ if (rnh = nep->ne_rtable[i]) {
+ (*rnh->rnh_walktree)(rnh, vfs_free_netcred,
+ (caddr_t)rnh);
+ free((caddr_t)rnh, M_RTABLE);
+ nep->ne_rtable[i] = 0;
+ }
+}
+
+int
+vfs_export(mp, nep, argp)
+ struct mount *mp;
+ struct netexport *nep;
+ struct export_args *argp;
+{
+ int error;
+
+ if (argp->ex_flags & MNT_DELEXPORT) {
+ vfs_free_addrlist(nep);
+ mp->mnt_flag &= ~(MNT_EXPORTED | MNT_DEFEXPORTED);
+ }
+ if (argp->ex_flags & MNT_EXPORTED) {
+ if (error = vfs_hang_addrlist(mp, nep, argp))
+ return (error);
+ mp->mnt_flag |= MNT_EXPORTED;
+ }
+ return (0);
+}
+
+struct netcred *
+vfs_export_lookup(mp, nep, nam)
+ register struct mount *mp;
+ struct netexport *nep;
+ struct mbuf *nam;
+{
+ register struct netcred *np;
+ register struct radix_node_head *rnh;
+ struct sockaddr *saddr;
+
+ np = NULL;
+ if (mp->mnt_flag & MNT_EXPORTED) {
+ /*
+ * Lookup in the export list first.
+ */
+ if (nam != NULL) {
+ saddr = mtod(nam, struct sockaddr *);
+ rnh = nep->ne_rtable[saddr->sa_family];
+ if (rnh != NULL) {
+ np = (struct netcred *)
+ (*rnh->rnh_matchaddr)((caddr_t)saddr,
+ rnh);
+ if (np && np->netc_rnodes->rn_flags & RNF_ROOT)
+ np = NULL;
+ }
+ }
+ /*
+ * If no address match, use the default if it exists.
+ */
+ if (np == NULL && mp->mnt_flag & MNT_DEFEXPORTED)
+ np = &nep->ne_defexported;
+ }
+ return (np);
+}
diff --git a/sys/kern/vfs_extattr.c b/sys/kern/vfs_extattr.c
new file mode 100644
index 0000000..345c7a7
--- /dev/null
+++ b/sys/kern/vfs_extattr.c
@@ -0,0 +1,2107 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/namei.h>
+#include <sys/filedesc.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/uio.h>
+#include <sys/malloc.h>
+#include <sys/dirent.h>
+
+#include <vm/vm.h>
+#include <sys/sysctl.h>
+
+static int change_dir __P((struct nameidata *ndp, struct proc *p));
+
+/*
+ * Virtual File System System Calls
+ */
+
+/*
+ * Mount a file system.
+ */
+struct mount_args {
+ int type;
+ char *path;
+ int flags;
+ caddr_t data;
+};
+/* ARGSUSED */
+mount(p, uap, retval)
+ struct proc *p;
+ register struct mount_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ register struct mount *mp;
+ int error, flag;
+ struct nameidata nd;
+
+ /*
+ * Must be super user
+ */
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ /*
+ * Get vnode to be covered
+ */
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (uap->flags & MNT_UPDATE) {
+ if ((vp->v_flag & VROOT) == 0) {
+ vput(vp);
+ return (EINVAL);
+ }
+ mp = vp->v_mount;
+ flag = mp->mnt_flag;
+ /*
+ * We only allow the filesystem to be reloaded if it
+ * is currently mounted read-only.
+ */
+ if ((uap->flags & MNT_RELOAD) &&
+ ((mp->mnt_flag & MNT_RDONLY) == 0)) {
+ vput(vp);
+ return (EOPNOTSUPP); /* Needs translation */
+ }
+ mp->mnt_flag |=
+ uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE);
+ VOP_UNLOCK(vp);
+ goto update;
+ }
+ if (error = vinvalbuf(vp, V_SAVE, p->p_ucred, p, 0, 0))
+ return (error);
+ if (vp->v_type != VDIR) {
+ vput(vp);
+ return (ENOTDIR);
+ }
+ if ((u_long)uap->type > MOUNT_MAXTYPE || vfssw[uap->type] == NULL) {
+ vput(vp);
+ return (ENODEV);
+ }
+
+ /*
+ * Allocate and initialize the file system.
+ */
+ mp = (struct mount *)malloc((u_long)sizeof(struct mount),
+ M_MOUNT, M_WAITOK);
+ bzero((char *)mp, (u_long)sizeof(struct mount));
+ mp->mnt_op = vfssw[uap->type];
+ if (error = vfs_lock(mp)) {
+ free((caddr_t)mp, M_MOUNT);
+ vput(vp);
+ return (error);
+ }
+ if (vp->v_mountedhere != NULL) {
+ vfs_unlock(mp);
+ free((caddr_t)mp, M_MOUNT);
+ vput(vp);
+ return (EBUSY);
+ }
+ vp->v_mountedhere = mp;
+ mp->mnt_vnodecovered = vp;
+update:
+ /*
+ * Set the mount level flags.
+ */
+ if (uap->flags & MNT_RDONLY)
+ mp->mnt_flag |= MNT_RDONLY;
+ else if (mp->mnt_flag & MNT_RDONLY)
+ mp->mnt_flag |= MNT_WANTRDWR;
+ mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
+ MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC);
+ mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV |
+ MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC);
+ /*
+ * Mount the filesystem.
+ */
+ error = VFS_MOUNT(mp, uap->path, uap->data, &nd, p);
+ if (mp->mnt_flag & MNT_UPDATE) {
+ vrele(vp);
+ if (mp->mnt_flag & MNT_WANTRDWR)
+ mp->mnt_flag &= ~MNT_RDONLY;
+ mp->mnt_flag &=~
+ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_WANTRDWR);
+ if (error)
+ mp->mnt_flag = flag;
+ return (error);
+ }
+ /*
+ * Put the new filesystem on the mount list after root.
+ */
+ cache_purge(vp);
+ if (!error) {
+ TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
+ VOP_UNLOCK(vp);
+ vfs_unlock(mp);
+ error = VFS_START(mp, 0, p);
+ } else {
+ mp->mnt_vnodecovered->v_mountedhere = (struct mount *)0;
+ vfs_unlock(mp);
+ free((caddr_t)mp, M_MOUNT);
+ vput(vp);
+ }
+ return (error);
+}
+
+/*
+ * Unmount a file system.
+ *
+ * Note: unmount takes a path to the vnode mounted on as argument,
+ * not special file (as before).
+ */
+struct unmount_args {
+ char *path;
+ int flags;
+};
+/* ARGSUSED */
+unmount(p, uap, retval)
+ struct proc *p;
+ register struct unmount_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct mount *mp;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+
+ /*
+ * Unless this is a user mount, then must
+ * have suser privilege.
+ */
+ if (((vp->v_mount->mnt_flag & MNT_USER) == 0) &&
+ (error = suser(p->p_ucred, &p->p_acflag))) {
+ vput(vp);
+ return (error);
+ }
+
+ /*
+ * Must be the root of the filesystem
+ */
+ if ((vp->v_flag & VROOT) == 0) {
+ vput(vp);
+ return (EINVAL);
+ }
+ mp = vp->v_mount;
+ vput(vp);
+ return (dounmount(mp, uap->flags, p));
+}
+
+/*
+ * Do the actual file system unmount.
+ */
+dounmount(mp, flags, p)
+ register struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+ struct vnode *coveredvp;
+ int error;
+
+ coveredvp = mp->mnt_vnodecovered;
+ if (vfs_busy(mp))
+ return (EBUSY);
+ mp->mnt_flag |= MNT_UNMOUNT;
+ if (error = vfs_lock(mp))
+ return (error);
+
+ mp->mnt_flag &=~ MNT_ASYNC;
+ vnode_pager_umount(mp); /* release cached vnodes */
+ cache_purgevfs(mp); /* remove cache entries for this file sys */
+ if ((error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0 ||
+ (flags & MNT_FORCE))
+ error = VFS_UNMOUNT(mp, flags, p);
+ mp->mnt_flag &= ~MNT_UNMOUNT;
+ vfs_unbusy(mp);
+ if (error) {
+ vfs_unlock(mp);
+ } else {
+ vrele(coveredvp);
+ TAILQ_REMOVE(&mountlist, mp, mnt_list);
+ mp->mnt_vnodecovered->v_mountedhere = (struct mount *)0;
+ vfs_unlock(mp);
+ if (mp->mnt_vnodelist.lh_first != NULL)
+ panic("unmount: dangling vnode");
+ free((caddr_t)mp, M_MOUNT);
+ }
+ return (error);
+}
+
+/*
+ * Sync each mounted filesystem.
+ */
+#ifdef DIAGNOSTIC
+int syncprt = 0;
+struct ctldebug debug0 = { "syncprt", &syncprt };
+#endif
+
+struct sync_args {
+ int dummy;
+};
+/* ARGSUSED */
+sync(p, uap, retval)
+ struct proc *p;
+ struct sync_args *uap;
+ int *retval;
+{
+ register struct mount *mp, *nmp;
+ int asyncflag;
+
+ for (mp = mountlist.tqh_first; mp != NULL; mp = nmp) {
+ nmp = mp->mnt_list.tqe_next;
+ /*
+ * The lock check below is to avoid races with mount
+ * and unmount.
+ */
+ if ((mp->mnt_flag & (MNT_MLOCK|MNT_RDONLY|MNT_MPBUSY)) == 0 &&
+ !vfs_busy(mp)) {
+ asyncflag = mp->mnt_flag & MNT_ASYNC;
+ mp->mnt_flag &= ~MNT_ASYNC;
+ VFS_SYNC(mp, MNT_NOWAIT, p->p_ucred, p);
+ if (asyncflag)
+ mp->mnt_flag |= MNT_ASYNC;
+ vfs_unbusy(mp);
+ }
+ }
+#ifdef DIAGNOSTIC
+ if (syncprt)
+ vfs_bufstats();
+#endif /* DIAGNOSTIC */
+ return (0);
+}
+
+/*
+ * Change filesystem quotas.
+ */
+struct quotactl_args {
+ char *path;
+ int cmd;
+ int uid;
+ caddr_t arg;
+};
+/* ARGSUSED */
+quotactl(p, uap, retval)
+ struct proc *p;
+ register struct quotactl_args *uap;
+ int *retval;
+{
+ register struct mount *mp;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ mp = nd.ni_vp->v_mount;
+ vrele(nd.ni_vp);
+ return (VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, p));
+}
+
+/*
+ * Get filesystem statistics.
+ */
+struct statfs_args {
+ char *path;
+ struct statfs *buf;
+};
+/* ARGSUSED */
+statfs(p, uap, retval)
+ struct proc *p;
+ register struct statfs_args *uap;
+ int *retval;
+{
+ register struct mount *mp;
+ register struct statfs *sp;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ mp = nd.ni_vp->v_mount;
+ sp = &mp->mnt_stat;
+ vrele(nd.ni_vp);
+ if (error = VFS_STATFS(mp, sp, p))
+ return (error);
+ sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+ return (copyout((caddr_t)sp, (caddr_t)uap->buf, sizeof(*sp)));
+}
+
+/*
+ * Get filesystem statistics.
+ */
+struct fstatfs_args {
+ int fd;
+ struct statfs *buf;
+};
+/* ARGSUSED */
+fstatfs(p, uap, retval)
+ struct proc *p;
+ register struct fstatfs_args *uap;
+ int *retval;
+{
+ struct file *fp;
+ struct mount *mp;
+ register struct statfs *sp;
+ int error;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ mp = ((struct vnode *)fp->f_data)->v_mount;
+ sp = &mp->mnt_stat;
+ if (error = VFS_STATFS(mp, sp, p))
+ return (error);
+ sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+ return (copyout((caddr_t)sp, (caddr_t)uap->buf, sizeof(*sp)));
+}
+
+/*
+ * Get statistics on all filesystems.
+ */
+struct getfsstat_args {
+ struct statfs *buf;
+ long bufsize;
+ int flags;
+};
+getfsstat(p, uap, retval)
+ struct proc *p;
+ register struct getfsstat_args *uap;
+ int *retval;
+{
+ register struct mount *mp, *nmp;
+ register struct statfs *sp;
+ caddr_t sfsp;
+ long count, maxcount, error;
+
+ maxcount = uap->bufsize / sizeof(struct statfs);
+ sfsp = (caddr_t)uap->buf;
+ for (count = 0, mp = mountlist.tqh_first; mp != NULL; mp = nmp) {
+ nmp = mp->mnt_list.tqe_next;
+ if (sfsp && count < maxcount &&
+ ((mp->mnt_flag & MNT_MLOCK) == 0)) {
+ sp = &mp->mnt_stat;
+ /*
+ * If MNT_NOWAIT is specified, do not refresh the
+ * fsstat cache. MNT_WAIT overrides MNT_NOWAIT.
+ */
+ if (((uap->flags & MNT_NOWAIT) == 0 ||
+ (uap->flags & MNT_WAIT)) &&
+ (error = VFS_STATFS(mp, sp, p)))
+ continue;
+ sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK;
+ if (error = copyout((caddr_t)sp, sfsp, sizeof(*sp)))
+ return (error);
+ sfsp += sizeof(*sp);
+ }
+ count++;
+ }
+ if (sfsp && count > maxcount)
+ *retval = maxcount;
+ else
+ *retval = count;
+ return (0);
+}
+
+/*
+ * Change current working directory to a given file descriptor.
+ */
+struct fchdir_args {
+ int fd;
+};
+/* ARGSUSED */
+fchdir(p, uap, retval)
+ struct proc *p;
+ struct fchdir_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp = p->p_fd;
+ register struct vnode *vp;
+ struct file *fp;
+ int error;
+
+ if (error = getvnode(fdp, uap->fd, &fp))
+ return (error);
+ vp = (struct vnode *)fp->f_data;
+ VOP_LOCK(vp);
+ if (vp->v_type != VDIR)
+ error = ENOTDIR;
+ else
+ error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
+ VOP_UNLOCK(vp);
+ if (error)
+ return (error);
+ VREF(vp);
+ vrele(fdp->fd_cdir);
+ fdp->fd_cdir = vp;
+ return (0);
+}
+
+/*
+ * Change current working directory (``.'').
+ */
+struct chdir_args {
+ char *path;
+};
+/* ARGSUSED */
+chdir(p, uap, retval)
+ struct proc *p;
+ struct chdir_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp = p->p_fd;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = change_dir(&nd, p))
+ return (error);
+ vrele(fdp->fd_cdir);
+ fdp->fd_cdir = nd.ni_vp;
+ return (0);
+}
+
+/*
+ * Change notion of root (``/'') directory.
+ */
+struct chroot_args {
+ char *path;
+};
+/* ARGSUSED */
+chroot(p, uap, retval)
+ struct proc *p;
+ struct chroot_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp = p->p_fd;
+ int error;
+ struct nameidata nd;
+
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = change_dir(&nd, p))
+ return (error);
+ if (fdp->fd_rdir != NULL)
+ vrele(fdp->fd_rdir);
+ fdp->fd_rdir = nd.ni_vp;
+ return (0);
+}
+
+/*
+ * Common routine for chroot and chdir.
+ */
+static int
+change_dir(ndp, p)
+ register struct nameidata *ndp;
+ struct proc *p;
+{
+ struct vnode *vp;
+ int error;
+
+ if (error = namei(ndp))
+ return (error);
+ vp = ndp->ni_vp;
+ if (vp->v_type != VDIR)
+ error = ENOTDIR;
+ else
+ error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p);
+ VOP_UNLOCK(vp);
+ if (error)
+ vrele(vp);
+ return (error);
+}
+
+/*
+ * Check permissions, allocate an open file structure,
+ * and call the device open routine if any.
+ */
+struct open_args {
+ char *path;
+ int flags;
+ int mode;
+};
+open(p, uap, retval)
+ struct proc *p;
+ register struct open_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp = p->p_fd;
+ register struct file *fp;
+ register struct vnode *vp;
+ int flags, cmode;
+ struct file *nfp;
+ int type, indx, error;
+ struct flock lf;
+ struct nameidata nd;
+ extern struct fileops vnops;
+
+ if (error = falloc(p, &nfp, &indx))
+ return (error);
+ fp = nfp;
+ flags = FFLAGS(uap->flags);
+ cmode = ((uap->mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT;
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ p->p_dupfd = -indx - 1; /* XXX check for fdopen */
+ if (error = vn_open(&nd, flags, cmode)) {
+ ffree(fp);
+ if ((error == ENODEV || error == ENXIO) &&
+ p->p_dupfd >= 0 && /* XXX from fdopen */
+ (error =
+ dupfdopen(fdp, indx, p->p_dupfd, flags, error)) == 0) {
+ *retval = indx;
+ return (0);
+ }
+ if (error == ERESTART)
+ error = EINTR;
+ fdp->fd_ofiles[indx] = NULL;
+ return (error);
+ }
+ p->p_dupfd = 0;
+ vp = nd.ni_vp;
+ fp->f_flag = flags & FMASK;
+ fp->f_type = DTYPE_VNODE;
+ fp->f_ops = &vnops;
+ fp->f_data = (caddr_t)vp;
+ if (flags & (O_EXLOCK | O_SHLOCK)) {
+ lf.l_whence = SEEK_SET;
+ lf.l_start = 0;
+ lf.l_len = 0;
+ if (flags & O_EXLOCK)
+ lf.l_type = F_WRLCK;
+ else
+ lf.l_type = F_RDLCK;
+ type = F_FLOCK;
+ if ((flags & FNONBLOCK) == 0)
+ type |= F_WAIT;
+ VOP_UNLOCK(vp);
+ if (error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) {
+ (void) vn_close(vp, fp->f_flag, fp->f_cred, p);
+ ffree(fp);
+ fdp->fd_ofiles[indx] = NULL;
+ return (error);
+ }
+ VOP_LOCK(vp);
+ fp->f_flag |= FHASLOCK;
+ }
+ VOP_UNLOCK(vp);
+ *retval = indx;
+ return (0);
+}
+
+#ifdef COMPAT_43
+/*
+ * Create a file.
+ */
+struct ocreat_args {
+ char *path;
+ int mode;
+};
+ocreat(p, uap, retval)
+ struct proc *p;
+ register struct ocreat_args *uap;
+ int *retval;
+{
+ struct open_args openuap;
+
+ openuap.path = uap->path;
+ openuap.mode = uap->mode;
+ openuap.flags = O_WRONLY | O_CREAT | O_TRUNC;
+ return (open(p, &openuap, retval));
+}
+#endif /* COMPAT_43 */
+
+/*
+ * Create a special file.
+ */
+struct mknod_args {
+ char *path;
+ int mode;
+ int dev;
+};
+/* ARGSUSED */
+mknod(p, uap, retval)
+ struct proc *p;
+ register struct mknod_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (vp != NULL)
+ error = EEXIST;
+ else {
+ VATTR_NULL(&vattr);
+ vattr.va_mode = (uap->mode & ALLPERMS) &~ p->p_fd->fd_cmask;
+ vattr.va_rdev = uap->dev;
+
+ switch (uap->mode & S_IFMT) {
+ case S_IFMT: /* used by badsect to flag bad sectors */
+ vattr.va_type = VBAD;
+ break;
+ case S_IFCHR:
+ vattr.va_type = VCHR;
+ break;
+ case S_IFBLK:
+ vattr.va_type = VBLK;
+ break;
+ default:
+ error = EINVAL;
+ break;
+ }
+ }
+ if (!error) {
+ LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
+ } else {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ if (vp)
+ vrele(vp);
+ }
+ return (error);
+}
+
+/*
+ * Create named pipe.
+ */
+struct mkfifo_args {
+ char *path;
+ int mode;
+};
+/* ARGSUSED */
+mkfifo(p, uap, retval)
+ struct proc *p;
+ register struct mkfifo_args *uap;
+ int *retval;
+{
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+#ifndef FIFO
+ return (EOPNOTSUPP);
+#else
+ NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ if (nd.ni_vp != NULL) {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vrele(nd.ni_vp);
+ return (EEXIST);
+ }
+ VATTR_NULL(&vattr);
+ vattr.va_type = VFIFO;
+ vattr.va_mode = (uap->mode & ALLPERMS) &~ p->p_fd->fd_cmask;
+ LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ return (VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr));
+#endif /* FIFO */
+}
+
+/*
+ * Make a hard file link.
+ */
+struct link_args {
+ char *path;
+ char *link;
+};
+/* ARGSUSED */
+link(p, uap, retval)
+ struct proc *p;
+ register struct link_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct nameidata nd;
+ int error;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (vp->v_type != VDIR ||
+ (error = suser(p->p_ucred, &p->p_acflag)) == 0) {
+ nd.ni_cnd.cn_nameiop = CREATE;
+ nd.ni_cnd.cn_flags = LOCKPARENT;
+ nd.ni_dirp = uap->link;
+ if ((error = namei(&nd)) == 0) {
+ if (nd.ni_vp != NULL)
+ error = EEXIST;
+ if (!error) {
+ LEASE_CHECK(nd.ni_dvp,
+ p, p->p_ucred, LEASE_WRITE);
+ LEASE_CHECK(vp,
+ p, p->p_ucred, LEASE_WRITE);
+ error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
+ } else {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ if (nd.ni_vp)
+ vrele(nd.ni_vp);
+ }
+ }
+ }
+ vrele(vp);
+ return (error);
+}
+
+/*
+ * Make a symbolic link.
+ */
+struct symlink_args {
+ char *path;
+ char *link;
+};
+/* ARGSUSED */
+symlink(p, uap, retval)
+ struct proc *p;
+ register struct symlink_args *uap;
+ int *retval;
+{
+ struct vattr vattr;
+ char *path;
+ int error;
+ struct nameidata nd;
+
+ MALLOC(path, char *, MAXPATHLEN, M_NAMEI, M_WAITOK);
+ if (error = copyinstr(uap->path, path, MAXPATHLEN, NULL))
+ goto out;
+ NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->link, p);
+ if (error = namei(&nd))
+ goto out;
+ if (nd.ni_vp) {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vrele(nd.ni_vp);
+ error = EEXIST;
+ goto out;
+ }
+ VATTR_NULL(&vattr);
+ vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask;
+ LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path);
+out:
+ FREE(path, M_NAMEI);
+ return (error);
+}
+
+/*
+ * Delete a name from the filesystem.
+ */
+struct unlink_args {
+ char *path;
+};
+/* ARGSUSED */
+unlink(p, uap, retval)
+ struct proc *p;
+ struct unlink_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+
+ if (vp->v_type != VDIR ||
+ (error = suser(p->p_ucred, &p->p_acflag)) == 0) {
+ /*
+ * The root of a mounted filesystem cannot be deleted.
+ */
+ if (vp->v_flag & VROOT)
+ error = EBUSY;
+ else
+ (void)vnode_pager_uncache(vp);
+ }
+
+ if (!error) {
+ LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+ } else {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vput(vp);
+ }
+ return (error);
+}
+
+/*
+ * Reposition read/write file offset.
+ */
+struct lseek_args {
+ int fd;
+ int pad;
+ off_t offset;
+ int whence;
+};
+lseek(p, uap, retval)
+ struct proc *p;
+ register struct lseek_args *uap;
+ int *retval;
+{
+ struct ucred *cred = p->p_ucred;
+ register struct filedesc *fdp = p->p_fd;
+ register struct file *fp;
+ struct vattr vattr;
+ int error;
+
+ if ((u_int)uap->fd >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[uap->fd]) == NULL)
+ return (EBADF);
+ if (fp->f_type != DTYPE_VNODE)
+ return (ESPIPE);
+ switch (uap->whence) {
+ case L_INCR:
+ fp->f_offset += uap->offset;
+ break;
+ case L_XTND:
+ if (error =
+ VOP_GETATTR((struct vnode *)fp->f_data, &vattr, cred, p))
+ return (error);
+ fp->f_offset = uap->offset + vattr.va_size;
+ break;
+ case L_SET:
+ fp->f_offset = uap->offset;
+ break;
+ default:
+ return (EINVAL);
+ }
+ *(off_t *)retval = fp->f_offset;
+ return (0);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+/*
+ * Reposition read/write file offset.
+ */
+struct olseek_args {
+ int fd;
+ long offset;
+ int whence;
+};
+olseek(p, uap, retval)
+ struct proc *p;
+ register struct olseek_args *uap;
+ int *retval;
+{
+ struct lseek_args nuap;
+ off_t qret;
+ int error;
+
+ nuap.fd = uap->fd;
+ nuap.offset = uap->offset;
+ nuap.whence = uap->whence;
+ error = lseek(p, &nuap, &qret);
+ *(long *)retval = qret;
+ return (error);
+}
+#endif /* COMPAT_43 */
+
+/*
+ * Check access permissions.
+ */
+struct access_args {
+ char *path;
+ int flags;
+};
+access(p, uap, retval)
+ struct proc *p;
+ register struct access_args *uap;
+ int *retval;
+{
+ register struct ucred *cred = p->p_ucred;
+ register struct vnode *vp;
+ int error, flags, t_gid, t_uid;
+ struct nameidata nd;
+
+ t_uid = cred->cr_uid;
+ t_gid = cred->cr_groups[0];
+ cred->cr_uid = p->p_cred->p_ruid;
+ cred->cr_groups[0] = p->p_cred->p_rgid;
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ goto out1;
+ vp = nd.ni_vp;
+
+ /* Flags == 0 means only check for existence. */
+ if (uap->flags) {
+ flags = 0;
+ if (uap->flags & R_OK)
+ flags |= VREAD;
+ if (uap->flags & W_OK)
+ flags |= VWRITE;
+ if (uap->flags & X_OK)
+ flags |= VEXEC;
+ if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0)
+ error = VOP_ACCESS(vp, flags, cred, p);
+ }
+ vput(vp);
+out1:
+ cred->cr_uid = t_uid;
+ cred->cr_groups[0] = t_gid;
+ return (error);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+/*
+ * Get file status; this version follows links.
+ */
+struct ostat_args {
+ char *path;
+ struct ostat *ub;
+};
+/* ARGSUSED */
+ostat(p, uap, retval)
+ struct proc *p;
+ register struct ostat_args *uap;
+ int *retval;
+{
+ struct stat sb;
+ struct ostat osb;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ error = vn_stat(nd.ni_vp, &sb, p);
+ vput(nd.ni_vp);
+ if (error)
+ return (error);
+ cvtstat(&sb, &osb);
+ error = copyout((caddr_t)&osb, (caddr_t)uap->ub, sizeof (osb));
+ return (error);
+}
+
+/*
+ * Get file status; this version does not follow links.
+ */
+struct olstat_args {
+ char *path;
+ struct ostat *ub;
+};
+/* ARGSUSED */
+olstat(p, uap, retval)
+ struct proc *p;
+ register struct olstat_args *uap;
+ int *retval;
+{
+ struct stat sb;
+ struct ostat osb;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ error = vn_stat(nd.ni_vp, &sb, p);
+ vput(nd.ni_vp);
+ if (error)
+ return (error);
+ cvtstat(&sb, &osb);
+ error = copyout((caddr_t)&osb, (caddr_t)uap->ub, sizeof (osb));
+ return (error);
+}
+
+/*
+ * Convert from an old to a new stat structure.
+ */
+cvtstat(st, ost)
+ struct stat *st;
+ struct ostat *ost;
+{
+
+ ost->st_dev = st->st_dev;
+ ost->st_ino = st->st_ino;
+ ost->st_mode = st->st_mode;
+ ost->st_nlink = st->st_nlink;
+ ost->st_uid = st->st_uid;
+ ost->st_gid = st->st_gid;
+ ost->st_rdev = st->st_rdev;
+ if (st->st_size < (quad_t)1 << 32)
+ ost->st_size = st->st_size;
+ else
+ ost->st_size = -2;
+ ost->st_atime = st->st_atime;
+ ost->st_mtime = st->st_mtime;
+ ost->st_ctime = st->st_ctime;
+ ost->st_blksize = st->st_blksize;
+ ost->st_blocks = st->st_blocks;
+ ost->st_flags = st->st_flags;
+ ost->st_gen = st->st_gen;
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+/*
+ * Get file status; this version follows links.
+ */
+struct stat_args {
+ char *path;
+ struct stat *ub;
+};
+/* ARGSUSED */
+stat(p, uap, retval)
+ struct proc *p;
+ register struct stat_args *uap;
+ int *retval;
+{
+ struct stat sb;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ error = vn_stat(nd.ni_vp, &sb, p);
+ vput(nd.ni_vp);
+ if (error)
+ return (error);
+ error = copyout((caddr_t)&sb, (caddr_t)uap->ub, sizeof (sb));
+ return (error);
+}
+
+/*
+ * Get file status; this version does not follow links.
+ */
+struct lstat_args {
+ char *path;
+ struct stat *ub;
+};
+/* ARGSUSED */
+lstat(p, uap, retval)
+ struct proc *p;
+ register struct lstat_args *uap;
+ int *retval;
+{
+ int error;
+ struct vnode *vp, *dvp;
+ struct stat sb, sb1;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | LOCKPARENT, UIO_USERSPACE,
+ uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ /*
+ * For symbolic links, always return the attributes of its
+ * containing directory, except for mode, size, and links.
+ */
+ vp = nd.ni_vp;
+ dvp = nd.ni_dvp;
+ if (vp->v_type != VLNK) {
+ if (dvp == vp)
+ vrele(dvp);
+ else
+ vput(dvp);
+ error = vn_stat(vp, &sb, p);
+ vput(vp);
+ if (error)
+ return (error);
+ } else {
+ error = vn_stat(dvp, &sb, p);
+ vput(dvp);
+ if (error) {
+ vput(vp);
+ return (error);
+ }
+ error = vn_stat(vp, &sb1, p);
+ vput(vp);
+ if (error)
+ return (error);
+ sb.st_mode &= ~S_IFDIR;
+ sb.st_mode |= S_IFLNK;
+ sb.st_nlink = sb1.st_nlink;
+ sb.st_size = sb1.st_size;
+ sb.st_blocks = sb1.st_blocks;
+ }
+ error = copyout((caddr_t)&sb, (caddr_t)uap->ub, sizeof (sb));
+ return (error);
+}
+
+/*
+ * Get configurable pathname variables.
+ */
+struct pathconf_args {
+ char *path;
+ int name;
+};
+/* ARGSUSED */
+pathconf(p, uap, retval)
+ struct proc *p;
+ register struct pathconf_args *uap;
+ int *retval;
+{
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ error = VOP_PATHCONF(nd.ni_vp, uap->name, retval);
+ vput(nd.ni_vp);
+ return (error);
+}
+
+/*
+ * Return target name of a symbolic link.
+ */
+struct readlink_args {
+ char *path;
+ char *buf;
+ int count;
+};
+/* ARGSUSED */
+readlink(p, uap, retval)
+ struct proc *p;
+ register struct readlink_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct iovec aiov;
+ struct uio auio;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (vp->v_type != VLNK)
+ error = EINVAL;
+ else {
+ aiov.iov_base = uap->buf;
+ aiov.iov_len = uap->count;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_offset = 0;
+ auio.uio_rw = UIO_READ;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_procp = p;
+ auio.uio_resid = uap->count;
+ error = VOP_READLINK(vp, &auio, p->p_ucred);
+ }
+ vput(vp);
+ *retval = uap->count - auio.uio_resid;
+ return (error);
+}
+
+/*
+ * Change flags of a file given a path name.
+ */
+struct chflags_args {
+ char *path;
+ int flags;
+};
+/* ARGSUSED */
+chflags(p, uap, retval)
+ struct proc *p;
+ register struct chflags_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else {
+ VATTR_NULL(&vattr);
+ vattr.va_flags = uap->flags;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ vput(vp);
+ return (error);
+}
+
+/*
+ * Change flags of a file given a file descriptor.
+ */
+struct fchflags_args {
+ int fd;
+ int flags;
+};
+/* ARGSUSED */
+fchflags(p, uap, retval)
+ struct proc *p;
+ register struct fchflags_args *uap;
+ int *retval;
+{
+ struct vattr vattr;
+ struct vnode *vp;
+ struct file *fp;
+ int error;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ vp = (struct vnode *)fp->f_data;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else {
+ VATTR_NULL(&vattr);
+ vattr.va_flags = uap->flags;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ VOP_UNLOCK(vp);
+ return (error);
+}
+
+/*
+ * Change mode of a file given path name.
+ */
+struct chmod_args {
+ char *path;
+ int mode;
+};
+/* ARGSUSED */
+chmod(p, uap, retval)
+ struct proc *p;
+ register struct chmod_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else {
+ VATTR_NULL(&vattr);
+ vattr.va_mode = uap->mode & ALLPERMS;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ vput(vp);
+ return (error);
+}
+
+/*
+ * Change mode of a file given a file descriptor.
+ */
+struct fchmod_args {
+ int fd;
+ int mode;
+};
+/* ARGSUSED */
+fchmod(p, uap, retval)
+ struct proc *p;
+ register struct fchmod_args *uap;
+ int *retval;
+{
+ struct vattr vattr;
+ struct vnode *vp;
+ struct file *fp;
+ int error;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ vp = (struct vnode *)fp->f_data;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else {
+ VATTR_NULL(&vattr);
+ vattr.va_mode = uap->mode & ALLPERMS;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ VOP_UNLOCK(vp);
+ return (error);
+}
+
+/*
+ * Set ownership given a path name.
+ */
+struct chown_args {
+ char *path;
+ int uid;
+ int gid;
+};
+/* ARGSUSED */
+chown(p, uap, retval)
+ struct proc *p;
+ register struct chown_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else {
+ VATTR_NULL(&vattr);
+ vattr.va_uid = uap->uid;
+ vattr.va_gid = uap->gid;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ vput(vp);
+ return (error);
+}
+
+/*
+ * Set ownership given a file descriptor.
+ */
+struct fchown_args {
+ int fd;
+ int uid;
+ int gid;
+};
+/* ARGSUSED */
+fchown(p, uap, retval)
+ struct proc *p;
+ register struct fchown_args *uap;
+ int *retval;
+{
+ struct vattr vattr;
+ struct vnode *vp;
+ struct file *fp;
+ int error;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ vp = (struct vnode *)fp->f_data;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else {
+ VATTR_NULL(&vattr);
+ vattr.va_uid = uap->uid;
+ vattr.va_gid = uap->gid;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ VOP_UNLOCK(vp);
+ return (error);
+}
+
+/*
+ * Set the access and modification times of a file.
+ */
+struct utimes_args {
+ char *path;
+ struct timeval *tptr;
+};
+/* ARGSUSED */
+utimes(p, uap, retval)
+ struct proc *p;
+ register struct utimes_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct timeval tv[2];
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ VATTR_NULL(&vattr);
+ if (uap->tptr == NULL) {
+ microtime(&tv[0]);
+ tv[1] = tv[0];
+ vattr.va_vaflags |= VA_UTIMES_NULL;
+ } else if (error = copyin((caddr_t)uap->tptr, (caddr_t)tv, sizeof (tv)))
+ return (error);
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_mount->mnt_flag & MNT_RDONLY)
+ error = EROFS;
+ else {
+ vattr.va_atime.ts_sec = tv[0].tv_sec;
+ vattr.va_atime.ts_nsec = tv[0].tv_usec * 1000;
+ vattr.va_mtime.ts_sec = tv[1].tv_sec;
+ vattr.va_mtime.ts_nsec = tv[1].tv_usec * 1000;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ vput(vp);
+ return (error);
+}
+
+/*
+ * Truncate a file given its path name.
+ */
+struct truncate_args {
+ char *path;
+ int pad;
+ off_t length;
+};
+/* ARGSUSED */
+truncate(p, uap, retval)
+ struct proc *p;
+ register struct truncate_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_type == VDIR)
+ error = EISDIR;
+ else if ((error = vn_writechk(vp)) == 0 &&
+ (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) == 0) {
+ VATTR_NULL(&vattr);
+ vattr.va_size = uap->length;
+ error = VOP_SETATTR(vp, &vattr, p->p_ucred, p);
+ }
+ vput(vp);
+ return (error);
+}
+
+/*
+ * Truncate a file given a file descriptor.
+ */
+struct ftruncate_args {
+ int fd;
+ int pad;
+ off_t length;
+};
+/* ARGSUSED */
+ftruncate(p, uap, retval)
+ struct proc *p;
+ register struct ftruncate_args *uap;
+ int *retval;
+{
+ struct vattr vattr;
+ struct vnode *vp;
+ struct file *fp;
+ int error;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ if ((fp->f_flag & FWRITE) == 0)
+ return (EINVAL);
+ vp = (struct vnode *)fp->f_data;
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ VOP_LOCK(vp);
+ if (vp->v_type == VDIR)
+ error = EISDIR;
+ else if ((error = vn_writechk(vp)) == 0) {
+ VATTR_NULL(&vattr);
+ vattr.va_size = uap->length;
+ error = VOP_SETATTR(vp, &vattr, fp->f_cred, p);
+ }
+ VOP_UNLOCK(vp);
+ return (error);
+}
+
+#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
+/*
+ * Truncate a file given its path name.
+ */
+struct otruncate_args {
+ char *path;
+ long length;
+};
+/* ARGSUSED */
+otruncate(p, uap, retval)
+ struct proc *p;
+ register struct otruncate_args *uap;
+ int *retval;
+{
+ struct truncate_args nuap;
+
+ nuap.path = uap->path;
+ nuap.length = uap->length;
+ return (truncate(p, &nuap, retval));
+}
+
+/*
+ * Truncate a file given a file descriptor.
+ */
+struct oftruncate_args {
+ int fd;
+ long length;
+};
+/* ARGSUSED */
+oftruncate(p, uap, retval)
+ struct proc *p;
+ register struct oftruncate_args *uap;
+ int *retval;
+{
+ struct ftruncate_args nuap;
+
+ nuap.fd = uap->fd;
+ nuap.length = uap->length;
+ return (ftruncate(p, &nuap, retval));
+}
+#endif /* COMPAT_43 || COMPAT_SUNOS */
+
+/*
+ * Sync an open file.
+ */
+struct fsync_args {
+ int fd;
+};
+/* ARGSUSED */
+fsync(p, uap, retval)
+ struct proc *p;
+ struct fsync_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct file *fp;
+ int error;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ vp = (struct vnode *)fp->f_data;
+ VOP_LOCK(vp);
+ error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
+ VOP_UNLOCK(vp);
+ return (error);
+}
+
+/*
+ * Rename files. Source and destination must either both be directories,
+ * or both not be directories. If target is a directory, it must be empty.
+ */
+struct rename_args {
+ char *from;
+ char *to;
+};
+/* ARGSUSED */
+rename(p, uap, retval)
+ struct proc *p;
+ register struct rename_args *uap;
+ int *retval;
+{
+ register struct vnode *tvp, *fvp, *tdvp;
+ struct nameidata fromnd, tond;
+ int error;
+
+ NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE,
+ uap->from, p);
+ if (error = namei(&fromnd))
+ return (error);
+ fvp = fromnd.ni_vp;
+ NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART,
+ UIO_USERSPACE, uap->to, p);
+ if (error = namei(&tond)) {
+ VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
+ vrele(fromnd.ni_dvp);
+ vrele(fvp);
+ goto out1;
+ }
+ tdvp = tond.ni_dvp;
+ tvp = tond.ni_vp;
+ if (tvp != NULL) {
+ if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
+ error = ENOTDIR;
+ goto out;
+ } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
+ error = EISDIR;
+ goto out;
+ }
+ }
+ if (fvp == tdvp)
+ error = EINVAL;
+ /*
+ * If source is the same as the destination (that is the
+ * same inode number with the same name in the same directory),
+ * then there is nothing to do.
+ */
+ if (fvp == tvp && fromnd.ni_dvp == tdvp &&
+ fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
+ !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
+ fromnd.ni_cnd.cn_namelen))
+ error = -1;
+out:
+ if (!error) {
+ LEASE_CHECK(tdvp, p, p->p_ucred, LEASE_WRITE);
+ if (fromnd.ni_dvp != tdvp)
+ LEASE_CHECK(fromnd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ if (tvp)
+ LEASE_CHECK(tvp, p, p->p_ucred, LEASE_WRITE);
+ error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
+ tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
+ } else {
+ VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
+ if (tdvp == tvp)
+ vrele(tdvp);
+ else
+ vput(tdvp);
+ if (tvp)
+ vput(tvp);
+ VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
+ vrele(fromnd.ni_dvp);
+ vrele(fvp);
+ }
+ vrele(tond.ni_startdir);
+ FREE(tond.ni_cnd.cn_pnbuf, M_NAMEI);
+out1:
+ if (fromnd.ni_startdir)
+ vrele(fromnd.ni_startdir);
+ FREE(fromnd.ni_cnd.cn_pnbuf, M_NAMEI);
+ if (error == -1)
+ return (0);
+ return (error);
+}
+
+/*
+ * Make a directory file.
+ */
+struct mkdir_args {
+ char *path;
+ int mode;
+};
+/* ARGSUSED */
+mkdir(p, uap, retval)
+ struct proc *p;
+ register struct mkdir_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (vp != NULL) {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vrele(vp);
+ return (EEXIST);
+ }
+ VATTR_NULL(&vattr);
+ vattr.va_type = VDIR;
+ vattr.va_mode = (uap->mode & ACCESSPERMS) &~ p->p_fd->fd_cmask;
+ LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
+ if (!error)
+ vput(nd.ni_vp);
+ return (error);
+}
+
+/*
+ * Remove a directory file.
+ */
+struct rmdir_args {
+ char *path;
+};
+/* ARGSUSED */
+rmdir(p, uap, retval)
+ struct proc *p;
+ struct rmdir_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (vp->v_type != VDIR) {
+ error = ENOTDIR;
+ goto out;
+ }
+ /*
+ * No rmdir "." please.
+ */
+ if (nd.ni_dvp == vp) {
+ error = EINVAL;
+ goto out;
+ }
+ /*
+ * The root of a mounted filesystem cannot be deleted.
+ */
+ if (vp->v_flag & VROOT)
+ error = EBUSY;
+out:
+ if (!error) {
+ LEASE_CHECK(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
+ LEASE_CHECK(vp, p, p->p_ucred, LEASE_WRITE);
+ error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+ } else {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vput(vp);
+ }
+ return (error);
+}
+
+#ifdef COMPAT_43
+/*
+ * Read a block of directory entries in a file system independent format.
+ */
+struct ogetdirentries_args {
+ int fd;
+ char *buf;
+ u_int count;
+ long *basep;
+};
+ogetdirentries(p, uap, retval)
+ struct proc *p;
+ register struct ogetdirentries_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct file *fp;
+ struct uio auio, kuio;
+ struct iovec aiov, kiov;
+ struct dirent *dp, *edp;
+ caddr_t dirbuf;
+ int error, readcnt;
+ long loff;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ if ((fp->f_flag & FREAD) == 0)
+ return (EBADF);
+ vp = (struct vnode *)fp->f_data;
+ if (vp->v_type != VDIR)
+ return (EINVAL);
+ aiov.iov_base = uap->buf;
+ aiov.iov_len = uap->count;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_rw = UIO_READ;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_procp = p;
+ auio.uio_resid = uap->count;
+ VOP_LOCK(vp);
+ loff = auio.uio_offset = fp->f_offset;
+# if (BYTE_ORDER != LITTLE_ENDIAN)
+ if (vp->v_mount->mnt_maxsymlinklen <= 0) {
+ error = VOP_READDIR(vp, &auio, fp->f_cred);
+ fp->f_offset = auio.uio_offset;
+ } else
+# endif
+ {
+ kuio = auio;
+ kuio.uio_iov = &kiov;
+ kuio.uio_segflg = UIO_SYSSPACE;
+ kiov.iov_len = uap->count;
+ MALLOC(dirbuf, caddr_t, uap->count, M_TEMP, M_WAITOK);
+ kiov.iov_base = dirbuf;
+ error = VOP_READDIR(vp, &kuio, fp->f_cred);
+ fp->f_offset = kuio.uio_offset;
+ if (error == 0) {
+ readcnt = uap->count - kuio.uio_resid;
+ edp = (struct dirent *)&dirbuf[readcnt];
+ for (dp = (struct dirent *)dirbuf; dp < edp; ) {
+# if (BYTE_ORDER == LITTLE_ENDIAN)
+ /*
+ * The expected low byte of
+ * dp->d_namlen is our dp->d_type.
+ * The high MBZ byte of dp->d_namlen
+ * is our dp->d_namlen.
+ */
+ dp->d_type = dp->d_namlen;
+ dp->d_namlen = 0;
+# else
+ /*
+ * The dp->d_type is the high byte
+ * of the expected dp->d_namlen,
+ * so must be zero'ed.
+ */
+ dp->d_type = 0;
+# endif
+ if (dp->d_reclen > 0) {
+ dp = (struct dirent *)
+ ((char *)dp + dp->d_reclen);
+ } else {
+ error = EIO;
+ break;
+ }
+ }
+ if (dp >= edp)
+ error = uiomove(dirbuf, readcnt, &auio);
+ }
+ FREE(dirbuf, M_TEMP);
+ }
+ VOP_UNLOCK(vp);
+ if (error)
+ return (error);
+ error = copyout((caddr_t)&loff, (caddr_t)uap->basep, sizeof(long));
+ *retval = uap->count - auio.uio_resid;
+ return (error);
+}
+#endif
+
+/*
+ * Read a block of directory entries in a file system independent format.
+ */
+struct getdirentries_args {
+ int fd;
+ char *buf;
+ u_int count;
+ long *basep;
+};
+getdirentries(p, uap, retval)
+ struct proc *p;
+ register struct getdirentries_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct file *fp;
+ struct uio auio;
+ struct iovec aiov;
+ long loff;
+ int error;
+
+ if (error = getvnode(p->p_fd, uap->fd, &fp))
+ return (error);
+ if ((fp->f_flag & FREAD) == 0)
+ return (EBADF);
+ vp = (struct vnode *)fp->f_data;
+unionread:
+ if (vp->v_type != VDIR)
+ return (EINVAL);
+ aiov.iov_base = uap->buf;
+ aiov.iov_len = uap->count;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_rw = UIO_READ;
+ auio.uio_segflg = UIO_USERSPACE;
+ auio.uio_procp = p;
+ auio.uio_resid = uap->count;
+ VOP_LOCK(vp);
+ loff = auio.uio_offset = fp->f_offset;
+ error = VOP_READDIR(vp, &auio, fp->f_cred);
+ fp->f_offset = auio.uio_offset;
+ VOP_UNLOCK(vp);
+ if (error)
+ return (error);
+
+#ifdef UNION
+{
+ extern int (**union_vnodeop_p)();
+ extern struct vnode *union_lowervp __P((struct vnode *));
+
+ if ((uap->count == auio.uio_resid) &&
+ (vp->v_op == union_vnodeop_p)) {
+ struct vnode *tvp = vp;
+
+ vp = union_lowervp(vp);
+ if (vp != NULLVP) {
+ VOP_LOCK(vp);
+ error = VOP_OPEN(vp, FREAD);
+ VOP_UNLOCK(vp);
+
+ if (error) {
+ vrele(vp);
+ return (error);
+ }
+ fp->f_data = (caddr_t) vp;
+ fp->f_offset = 0;
+ error = vn_close(tvp, FREAD, fp->f_cred, p);
+ if (error)
+ return (error);
+ goto unionread;
+ }
+ }
+}
+#endif
+
+ if ((uap->count == auio.uio_resid) &&
+ (vp->v_flag & VROOT) &&
+ (vp->v_mount->mnt_flag & MNT_UNION)) {
+ struct vnode *tvp = vp;
+ vp = vp->v_mount->mnt_vnodecovered;
+ VREF(vp);
+ fp->f_data = (caddr_t) vp;
+ fp->f_offset = 0;
+ vrele(tvp);
+ goto unionread;
+ }
+ error = copyout((caddr_t)&loff, (caddr_t)uap->basep, sizeof(long));
+ *retval = uap->count - auio.uio_resid;
+ return (error);
+}
+
+/*
+ * Set the mode mask for creation of filesystem nodes.
+ */
+struct umask_args {
+ int newmask;
+};
+mode_t /* XXX */
+umask(p, uap, retval)
+ struct proc *p;
+ struct umask_args *uap;
+ int *retval;
+{
+ register struct filedesc *fdp;
+
+ fdp = p->p_fd;
+ *retval = fdp->fd_cmask;
+ fdp->fd_cmask = uap->newmask & ALLPERMS;
+ return (0);
+}
+
+/*
+ * Void all references to file by ripping underlying filesystem
+ * away from vnode.
+ */
+struct revoke_args {
+ char *path;
+};
+/* ARGSUSED */
+revoke(p, uap, retval)
+ struct proc *p;
+ register struct revoke_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ struct vattr vattr;
+ int error;
+ struct nameidata nd;
+
+ NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ if (vp->v_type != VCHR && vp->v_type != VBLK) {
+ error = EINVAL;
+ goto out;
+ }
+ if (error = VOP_GETATTR(vp, &vattr, p->p_ucred, p))
+ goto out;
+ if (p->p_ucred->cr_uid != vattr.va_uid &&
+ (error = suser(p->p_ucred, &p->p_acflag)))
+ goto out;
+ if (vp->v_usecount > 1 || (vp->v_flag & VALIASED))
+ vgoneall(vp);
+out:
+ vrele(vp);
+ return (error);
+}
+
+/*
+ * Convert a user file descriptor to a kernel file entry.
+ */
+getvnode(fdp, fd, fpp)
+ struct filedesc *fdp;
+ struct file **fpp;
+ int fd;
+{
+ struct file *fp;
+
+ if ((u_int)fd >= fdp->fd_nfiles ||
+ (fp = fdp->fd_ofiles[fd]) == NULL)
+ return (EBADF);
+ if (fp->f_type != DTYPE_VNODE)
+ return (EINVAL);
+ *fpp = fp;
+ return (0);
+}
diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c
new file mode 100644
index 0000000..2fe39eb
--- /dev/null
+++ b/sys/kern/vfs_mount.c
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)vfs_conf.c 8.8 (Berkeley) 3/31/94
+ */
+
+#include <sys/param.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+
+#ifdef FFS
+#include <ufs/ffs/ffs_extern.h>
+
+/*
+ * This specifies the filesystem used to mount the root.
+ * This specification should be done by /etc/config.
+ */
+int (*mountroot)() = ffs_mountroot;
+#endif
+
+/*
+ * These define the root filesystem and device.
+ */
+struct mount *rootfs;
+struct vnode *rootvnode;
+
+/*
+ * Set up the filesystem operations for vnodes.
+ * The types are defined in mount.h.
+ */
+#ifdef FFS
+extern struct vfsops ufs_vfsops;
+#define UFS_VFSOPS &ufs_vfsops
+#else
+#define UFS_VFSOPS NULL
+#endif
+
+#ifdef LFS
+extern struct vfsops lfs_vfsops;
+#define LFS_VFSOPS &lfs_vfsops
+#else
+#define LFS_VFSOPS NULL
+#endif
+
+#ifdef MFS
+extern struct vfsops mfs_vfsops;
+#define MFS_VFSOPS &mfs_vfsops
+#else
+#define MFS_VFSOPS NULL
+#endif
+
+#ifdef NFS
+extern struct vfsops nfs_vfsops;
+#define NFS_VFSOPS &nfs_vfsops
+#else
+#define NFS_VFSOPS NULL
+#endif
+
+#ifdef FDESC
+extern struct vfsops fdesc_vfsops;
+#define FDESC_VFSOPS &fdesc_vfsops
+#else
+#define FDESC_VFSOPS NULL
+#endif
+
+#ifdef PORTAL
+extern struct vfsops portal_vfsops;
+#define PORTAL_VFSOPS &portal_vfsops
+#else
+#define PORTAL_VFSOPS NULL
+#endif
+
+#ifdef NULLFS
+extern struct vfsops null_vfsops;
+#define NULL_VFSOPS &null_vfsops
+#else
+#define NULL_VFSOPS NULL
+#endif
+
+#ifdef UMAPFS
+extern struct vfsops umap_vfsops;
+#define UMAP_VFSOPS &umap_vfsops
+#else
+#define UMAP_VFSOPS NULL
+#endif
+
+#ifdef KERNFS
+extern struct vfsops kernfs_vfsops;
+#define KERNFS_VFSOPS &kernfs_vfsops
+#else
+#define KERNFS_VFSOPS NULL
+#endif
+
+#ifdef PROCFS
+extern struct vfsops procfs_vfsops;
+#define PROCFS_VFSOPS &procfs_vfsops
+#else
+#define PROCFS_VFSOPS NULL
+#endif
+
+#ifdef AFS
+extern struct vfsops afs_vfsops;
+#define AFS_VFSOPS &afs_vfsops
+#else
+#define AFS_VFSOPS NULL
+#endif
+
+#ifdef CD9660
+extern struct vfsops cd9660_vfsops;
+#define CD9660_VFSOPS &cd9660_vfsops
+#else
+#define CD9660_VFSOPS NULL
+#endif
+
+#ifdef UNION
+extern struct vfsops union_vfsops;
+#define UNION_VFSOPS &union_vfsops
+#else
+#define UNION_VFSOPS NULL
+#endif
+
+struct vfsops *vfssw[] = {
+ NULL, /* 0 = MOUNT_NONE */
+ UFS_VFSOPS, /* 1 = MOUNT_UFS */
+ NFS_VFSOPS, /* 2 = MOUNT_NFS */
+ MFS_VFSOPS, /* 3 = MOUNT_MFS */
+ NULL, /* 4 = MOUNT_PC */
+ LFS_VFSOPS, /* 5 = MOUNT_LFS */
+ NULL, /* 6 = MOUNT_LOFS */
+ FDESC_VFSOPS, /* 7 = MOUNT_FDESC */
+ PORTAL_VFSOPS, /* 8 = MOUNT_PORTAL */
+ NULL_VFSOPS, /* 9 = MOUNT_NULL */
+ UMAP_VFSOPS, /* 10 = MOUNT_UMAP */
+ KERNFS_VFSOPS, /* 11 = MOUNT_KERNFS */
+ PROCFS_VFSOPS, /* 12 = MOUNT_PROCFS */
+ AFS_VFSOPS, /* 13 = MOUNT_AFS */
+ CD9660_VFSOPS, /* 14 = MOUNT_CD9660 */
+ UNION_VFSOPS, /* 15 = MOUNT_UNION */
+ 0
+};
+
+
+/*
+ *
+ * vfs_opv_descs enumerates the list of vnode classes, each with it's own
+ * vnode operation vector. It is consulted at system boot to build operation
+ * vectors. It is NULL terminated.
+ *
+ */
+extern struct vnodeopv_desc ffs_vnodeop_opv_desc;
+extern struct vnodeopv_desc ffs_specop_opv_desc;
+extern struct vnodeopv_desc ffs_fifoop_opv_desc;
+extern struct vnodeopv_desc lfs_vnodeop_opv_desc;
+extern struct vnodeopv_desc lfs_specop_opv_desc;
+extern struct vnodeopv_desc lfs_fifoop_opv_desc;
+extern struct vnodeopv_desc mfs_vnodeop_opv_desc;
+extern struct vnodeopv_desc dead_vnodeop_opv_desc;
+extern struct vnodeopv_desc fifo_vnodeop_opv_desc;
+extern struct vnodeopv_desc spec_vnodeop_opv_desc;
+extern struct vnodeopv_desc nfsv2_vnodeop_opv_desc;
+extern struct vnodeopv_desc spec_nfsv2nodeop_opv_desc;
+extern struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc;
+extern struct vnodeopv_desc fdesc_vnodeop_opv_desc;
+extern struct vnodeopv_desc portal_vnodeop_opv_desc;
+extern struct vnodeopv_desc null_vnodeop_opv_desc;
+extern struct vnodeopv_desc umap_vnodeop_opv_desc;
+extern struct vnodeopv_desc kernfs_vnodeop_opv_desc;
+extern struct vnodeopv_desc procfs_vnodeop_opv_desc;
+extern struct vnodeopv_desc cd9660_vnodeop_opv_desc;
+extern struct vnodeopv_desc cd9660_specop_opv_desc;
+extern struct vnodeopv_desc cd9660_fifoop_opv_desc;
+extern struct vnodeopv_desc union_vnodeop_opv_desc;
+
+struct vnodeopv_desc *vfs_opv_descs[] = {
+ &ffs_vnodeop_opv_desc,
+ &ffs_specop_opv_desc,
+#ifdef FIFO
+ &ffs_fifoop_opv_desc,
+#endif
+ &dead_vnodeop_opv_desc,
+#ifdef FIFO
+ &fifo_vnodeop_opv_desc,
+#endif
+ &spec_vnodeop_opv_desc,
+#ifdef LFS
+ &lfs_vnodeop_opv_desc,
+ &lfs_specop_opv_desc,
+#ifdef FIFO
+ &lfs_fifoop_opv_desc,
+#endif
+#endif
+#ifdef MFS
+ &mfs_vnodeop_opv_desc,
+#endif
+#ifdef NFS
+ &nfsv2_vnodeop_opv_desc,
+ &spec_nfsv2nodeop_opv_desc,
+#ifdef FIFO
+ &fifo_nfsv2nodeop_opv_desc,
+#endif
+#endif
+#ifdef FDESC
+ &fdesc_vnodeop_opv_desc,
+#endif
+#ifdef PORTAL
+ &portal_vnodeop_opv_desc,
+#endif
+#ifdef NULLFS
+ &null_vnodeop_opv_desc,
+#endif
+#ifdef UMAPFS
+ &umap_vnodeop_opv_desc,
+#endif
+#ifdef KERNFS
+ &kernfs_vnodeop_opv_desc,
+#endif
+#ifdef PROCFS
+ &procfs_vnodeop_opv_desc,
+#endif
+#ifdef CD9660
+ &cd9660_vnodeop_opv_desc,
+ &cd9660_specop_opv_desc,
+#ifdef FIFO
+ &cd9660_fifoop_opv_desc,
+#endif
+#endif
+#ifdef UNION
+ &union_vnodeop_opv_desc,
+#endif
+ NULL
+};
diff --git a/sys/kern/vnode_if.pl b/sys/kern/vnode_if.pl
new file mode 100644
index 0000000..e190fa0
--- /dev/null
+++ b/sys/kern/vnode_if.pl
@@ -0,0 +1,433 @@
+#!/bin/sh -
+#
+# Copyright (c) 1992, 1993
+# The Regents of the University of California. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# 3. All advertising materials mentioning features or use of this software
+# must display the following acknowledgement:
+# This product includes software developed by the University of
+# California, Berkeley and its contributors.
+# 4. Neither the name of the University nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# @(#)vnode_if.sh 8.1 (Berkeley) 6/10/93
+#
+
+# Script to produce VFS front-end sugar.
+#
+# usage: vnode_if.sh srcfile
+# (where srcfile is currently /sys/kern/vnode_if.src)
+#
+# These awk scripts are not particularly well written, specifically they
+# don't use arrays well and figure out the same information repeatedly.
+# Please rewrite them if you actually understand how to use awk. Note,
+# they use nawk extensions and gawk's toupper.
+
+if [ $# -ne 1 ] ; then
+ echo 'usage: vnode_if.sh srcfile'
+ exit 1
+fi
+
+# Name of the source file.
+SRC=$1
+
+# Names of the created files.
+CFILE=vnode_if.c
+HEADER=vnode_if.h
+
+# Awk program (must support nawk extensions and gawk's "toupper")
+# Use "awk" at Berkeley, "gawk" elsewhere.
+AWK=awk
+
+# Print out header information for vnode_if.h.
+cat << END_OF_LEADING_COMMENT > $HEADER
+/*
+ * This file is produced automatically.
+ * Do not modify anything in here by hand.
+ *
+ * Created from @(#)vnode_if.sh 8.1 (Berkeley) 6/10/93
+ */
+
+extern struct vnodeop_desc vop_default_desc;
+END_OF_LEADING_COMMENT
+
+# Awk script to take vnode_if.src and turn it into vnode_if.h.
+$AWK '
+ NF == 0 || $0 ~ "^#" {
+ next;
+ }
+ {
+ # Get the function name.
+ name = $1;
+ uname = toupper(name);
+
+ # Get the function arguments.
+ for (c1 = 0;; ++c1) {
+ if (getline <= 0)
+ exit
+ if ($0 ~ "^};")
+ break;
+ a[c1] = $0;
+ }
+
+ # Print out the vop_F_args structure.
+ printf("struct %s_args {\n\tstruct vnodeop_desc *a_desc;\n",
+ name);
+ for (c2 = 0; c2 < c1; ++c2) {
+ c3 = split(a[c2], t);
+ printf("\t");
+ if (t[2] ~ "WILLRELE")
+ c4 = 3;
+ else
+ c4 = 2;
+ for (; c4 < c3; ++c4)
+ printf("%s ", t[c4]);
+ beg = match(t[c3], "[^*]");
+ printf("%sa_%s\n",
+ substr(t[c4], 0, beg - 1), substr(t[c4], beg));
+ }
+ printf("};\n");
+
+ # Print out extern declaration.
+ printf("extern struct vnodeop_desc %s_desc;\n", name);
+
+ # Print out inline struct.
+ printf("static inline int %s(", uname);
+ sep = ", ";
+ for (c2 = 0; c2 < c1; ++c2) {
+ if (c2 == c1 - 1)
+ sep = ")\n";
+ c3 = split(a[c2], t);
+ beg = match(t[c3], "[^*]");
+ end = match(t[c3], ";");
+ printf("%s%s", substr(t[c3], beg, end - beg), sep);
+ }
+ for (c2 = 0; c2 < c1; ++c2) {
+ c3 = split(a[c2], t);
+ printf("\t");
+ if (t[2] ~ "WILLRELE")
+ c4 = 3;
+ else
+ c4 = 2;
+ for (; c4 < c3; ++c4)
+ printf("%s ", t[c4]);
+ beg = match(t[c3], "[^*]");
+ printf("%s%s\n",
+ substr(t[c4], 0, beg - 1), substr(t[c4], beg));
+ }
+ printf("{\n\tstruct %s_args a;\n\n", name);
+ printf("\ta.a_desc = VDESC(%s);\n", name);
+ for (c2 = 0; c2 < c1; ++c2) {
+ c3 = split(a[c2], t);
+ printf("\t");
+ beg = match(t[c3], "[^*]");
+ end = match(t[c3], ";");
+ printf("a.a_%s = %s\n",
+ substr(t[c3], beg, end - beg), substr(t[c3], beg));
+ }
+ c1 = split(a[0], t);
+ beg = match(t[c1], "[^*]");
+ end = match(t[c1], ";");
+ printf("\treturn (VCALL(%s, VOFFSET(%s), &a));\n}\n",
+ substr(t[c1], beg, end - beg), name);
+ }' < $SRC >> $HEADER
+
+# Print out header information for vnode_if.c.
+cat << END_OF_LEADING_COMMENT > $CFILE
+/*
+ * This file is produced automatically.
+ * Do not modify anything in here by hand.
+ *
+ * Created from @(#)vnode_if.sh 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+
+struct vnodeop_desc vop_default_desc = {
+ 0,
+ "default",
+ 0,
+ NULL,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ NULL,
+};
+
+END_OF_LEADING_COMMENT
+
+# Awk script to take vnode_if.src and turn it into vnode_if.c.
+$AWK 'function kill_surrounding_ws (s) {
+ sub (/^[ \t]*/, "", s);
+ sub (/[ \t]*$/, "", s);
+ return s;
+ }
+
+ function read_args() {
+ numargs = 0;
+ while (getline ln) {
+ if (ln ~ /}/) {
+ break;
+ };
+
+ # Delete comments, if any.
+ gsub (/\/\*.*\*\//, "", ln);
+
+ # Delete leading/trailing space.
+ ln = kill_surrounding_ws(ln);
+
+ # Pick off direction.
+ if (1 == sub(/^INOUT[ \t]+/, "", ln))
+ dir = "INOUT";
+ else if (1 == sub(/^IN[ \t]+/, "", ln))
+ dir = "IN";
+ else if (1 == sub(/^OUT[ \t]+/, "", ln))
+ dir = "OUT";
+ else
+ bail("No IN/OUT direction for \"" ln "\".");
+
+ # check for "WILLRELE"
+ if (1 == sub(/^WILLRELE[ \t]+/, "", ln)) {
+ rele = "WILLRELE";
+ } else {
+ rele = "WONTRELE";
+ };
+
+ # kill trailing ;
+ if (1 != sub (/;$/, "", ln)) {
+ bail("Missing end-of-line ; in \"" ln "\".");
+ };
+
+ # pick off variable name
+ if (!(i = match(ln, /[A-Za-z0-9_]+$/))) {
+ bail("Missing var name \"a_foo\" in \"" ln "\".");
+ };
+ arg = substr (ln, i);
+ # Want to <<substr(ln, i) = "";>>, but nawk cannot.
+ # Hack around this.
+ ln = substr(ln, 1, i-1);
+
+ # what is left must be type
+ # (put clean it up some)
+ type = ln;
+ gsub (/[ \t]+/, " ", type); # condense whitespace
+ type = kill_surrounding_ws(type);
+
+ # (boy this was easier in Perl)
+
+ numargs++;
+ dirs[numargs] = dir;
+ reles[numargs] = rele;
+ types[numargs] = type;
+ args[numargs] = arg;
+ };
+ }
+
+ function generate_operation_vp_offsets() {
+ printf ("int %s_vp_offsets[] = {\n", name);
+ # as a side effect, figure out the releflags
+ releflags = "";
+ vpnum = 0;
+ for (i=1; i<=numargs; i++) {
+ if (types[i] == "struct vnode *") {
+ printf ("\tVOPARG_OFFSETOF(struct %s_args,a_%s),\n",
+ name, args[i]);
+ if (reles[i] == "WILLRELE") {
+ releflags = releflags "|VDESC_VP" vpnum "_WILLRELE";
+ };
+ vpnum++;
+ };
+ };
+ sub (/^\|/, "", releflags);
+ print "\tVDESC_NO_OFFSET";
+ print "};";
+ }
+
+ function find_arg_with_type (type) {
+ for (i=1; i<=numargs; i++) {
+ if (types[i] == type) {
+ return "VOPARG_OFFSETOF(struct " name "_args,a_" args[i] ")";
+ };
+ };
+ return "VDESC_NO_OFFSET";
+ }
+
+ function generate_operation_desc() {
+ printf ("struct vnodeop_desc %s_desc = {\n", name);
+ # offset
+ printf ("\t0,\n");
+ # printable name
+ printf ("\t\"%s\",\n", name);
+ # flags
+ vppwillrele = "";
+ for (i=1; i<=numargs; i++) {
+ if (types[i] == "struct vnode **" &&
+ (reles[i] == "WILLRELE")) {
+ vppwillrele = "|VDESC_VPP_WILLRELE";
+ };
+ };
+ if (releflags == "") {
+ printf ("\t0%s,\n", vppwillrele);
+ } else {
+ printf ("\t%s%s,\n", releflags, vppwillrele);
+ };
+ # vp offsets
+ printf ("\t%s_vp_offsets,\n", name);
+ # vpp (if any)
+ printf ("\t%s,\n", find_arg_with_type("struct vnode **"));
+ # cred (if any)
+ printf ("\t%s,\n", find_arg_with_type("struct ucred *"));
+ # proc (if any)
+ printf ("\t%s,\n", find_arg_with_type("struct proc *"));
+ # componentname
+ printf ("\t%s,\n", find_arg_with_type("struct componentname *"));
+ # transport layer information
+ printf ("\tNULL,\n};\n");
+ }
+
+ NF == 0 || $0 ~ "^#" {
+ next;
+ }
+ {
+ # get the function name
+ name = $1;
+
+ # get the function arguments
+ read_args();
+
+ # Print out the vop_F_vp_offsets structure. This all depends
+ # on naming conventions and nothing else.
+ generate_operation_vp_offsets();
+
+ # Print out the vnodeop_desc structure.
+ generate_operation_desc();
+
+ printf "\n";
+
+ }' < $SRC >> $CFILE
+# THINGS THAT DON'T WORK RIGHT YET.
+#
+# Two existing BSD vnodeops (bwrite and strategy) don't take any vnodes as
+# arguments. This means that these operations can't function successfully
+# through a bypass routine.
+#
+# Bwrite and strategy will be replaced when the VM page/buffer cache
+# integration happens.
+#
+# To get around this problem for now we handle these ops as special cases.
+
+cat << END_OF_SPECIAL_CASES >> $HEADER
+#include <sys/buf.h>
+struct vop_strategy_args {
+ struct vnodeop_desc *a_desc;
+ struct buf *a_bp;
+};
+extern struct vnodeop_desc vop_strategy_desc;
+static inline int VOP_STRATEGY(bp)
+ struct buf *bp;
+{
+ struct vop_strategy_args a;
+
+ a.a_desc = VDESC(vop_strategy);
+ a.a_bp = bp;
+ return (VCALL((bp)->b_vp, VOFFSET(vop_strategy), &a));
+}
+
+struct vop_bwrite_args {
+ struct vnodeop_desc *a_desc;
+ struct buf *a_bp;
+};
+extern struct vnodeop_desc vop_bwrite_desc;
+static inline int VOP_BWRITE(bp)
+ struct buf *bp;
+{
+ struct vop_bwrite_args a;
+
+ a.a_desc = VDESC(vop_bwrite);
+ a.a_bp = bp;
+ return (VCALL((bp)->b_vp, VOFFSET(vop_bwrite), &a));
+}
+END_OF_SPECIAL_CASES
+
+cat << END_OF_SPECIAL_CASES >> $CFILE
+int vop_strategy_vp_offsets[] = {
+ VDESC_NO_OFFSET
+};
+struct vnodeop_desc vop_strategy_desc = {
+ 0,
+ "vop_strategy",
+ 0,
+ vop_strategy_vp_offsets,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ NULL,
+};
+int vop_bwrite_vp_offsets[] = {
+ VDESC_NO_OFFSET
+};
+struct vnodeop_desc vop_bwrite_desc = {
+ 0,
+ "vop_bwrite",
+ 0,
+ vop_bwrite_vp_offsets,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ NULL,
+};
+END_OF_SPECIAL_CASES
+
+# Add the vfs_op_descs array to the C file.
+$AWK '
+ BEGIN {
+ printf("\nstruct vnodeop_desc *vfs_op_descs[] = {\n");
+ printf("\t&vop_default_desc, /* MUST BE FIRST */\n");
+ printf("\t&vop_strategy_desc, /* XXX: SPECIAL CASE */\n");
+ printf("\t&vop_bwrite_desc, /* XXX: SPECIAL CASE */\n");
+ }
+ END {
+ printf("\tNULL\n};\n");
+ }
+ NF == 0 || $0 ~ "^#" {
+ next;
+ }
+ {
+ # Get the function name.
+ printf("\t&%s_desc,\n", $1);
+
+ # Skip the function arguments.
+ for (;;) {
+ if (getline <= 0)
+ exit
+ if ($0 ~ "^};")
+ break;
+ }
+ }' < $SRC >> $CFILE
+
diff --git a/sys/netinet/tcp_reass.c b/sys/netinet/tcp_reass.c
new file mode 100644
index 0000000..2dd1d74
--- /dev/null
+++ b/sys/netinet/tcp_reass.c
@@ -0,0 +1,1647 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tcp_input.c 8.5 (Berkeley) 4/10/94
+ */
+
+#ifndef TUBA_INCLUDE
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/errno.h>
+
+#include <net/if.h>
+#include <net/route.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+#include <netinet/tcp_debug.h>
+
+int tcprexmtthresh = 3;
+struct tcpiphdr tcp_saveti;
+struct inpcb *tcp_last_inpcb = &tcb;
+
+extern u_long sb_max;
+
+#endif /* TUBA_INCLUDE */
+#define TCP_PAWS_IDLE (24 * 24 * 60 * 60 * PR_SLOWHZ)
+
+/* for modulo comparisons of timestamps */
+#define TSTMP_LT(a,b) ((int)((a)-(b)) < 0)
+#define TSTMP_GEQ(a,b) ((int)((a)-(b)) >= 0)
+
+
+/*
+ * Insert segment ti into reassembly queue of tcp with
+ * control block tp. Return TH_FIN if reassembly now includes
+ * a segment with FIN. The macro form does the common case inline
+ * (segment is the next to be received on an established connection,
+ * and the queue is empty), avoiding linkage into and removal
+ * from the queue and repetition of various conversions.
+ * Set DELACK for segments received in order, but ack immediately
+ * when segments are out of order (so fast retransmit can work).
+ */
+#define TCP_REASS(tp, ti, m, so, flags) { \
+ if ((ti)->ti_seq == (tp)->rcv_nxt && \
+ (tp)->seg_next == (struct tcpiphdr *)(tp) && \
+ (tp)->t_state == TCPS_ESTABLISHED) { \
+ tp->t_flags |= TF_DELACK; \
+ (tp)->rcv_nxt += (ti)->ti_len; \
+ flags = (ti)->ti_flags & TH_FIN; \
+ tcpstat.tcps_rcvpack++;\
+ tcpstat.tcps_rcvbyte += (ti)->ti_len;\
+ sbappend(&(so)->so_rcv, (m)); \
+ sorwakeup(so); \
+ } else { \
+ (flags) = tcp_reass((tp), (ti), (m)); \
+ tp->t_flags |= TF_ACKNOW; \
+ } \
+}
+#ifndef TUBA_INCLUDE
+
+int
+tcp_reass(tp, ti, m)
+ register struct tcpcb *tp;
+ register struct tcpiphdr *ti;
+ struct mbuf *m;
+{
+ register struct tcpiphdr *q;
+ struct socket *so = tp->t_inpcb->inp_socket;
+ int flags;
+
+ /*
+ * Call with ti==0 after become established to
+ * force pre-ESTABLISHED data up to user socket.
+ */
+ if (ti == 0)
+ goto present;
+
+ /*
+ * Find a segment which begins after this one does.
+ */
+ for (q = tp->seg_next; q != (struct tcpiphdr *)tp;
+ q = (struct tcpiphdr *)q->ti_next)
+ if (SEQ_GT(q->ti_seq, ti->ti_seq))
+ break;
+
+ /*
+ * If there is a preceding segment, it may provide some of
+ * our data already. If so, drop the data from the incoming
+ * segment. If it provides all of our data, drop us.
+ */
+ if ((struct tcpiphdr *)q->ti_prev != (struct tcpiphdr *)tp) {
+ register int i;
+ q = (struct tcpiphdr *)q->ti_prev;
+ /* conversion to int (in i) handles seq wraparound */
+ i = q->ti_seq + q->ti_len - ti->ti_seq;
+ if (i > 0) {
+ if (i >= ti->ti_len) {
+ tcpstat.tcps_rcvduppack++;
+ tcpstat.tcps_rcvdupbyte += ti->ti_len;
+ m_freem(m);
+ return (0);
+ }
+ m_adj(m, i);
+ ti->ti_len -= i;
+ ti->ti_seq += i;
+ }
+ q = (struct tcpiphdr *)(q->ti_next);
+ }
+ tcpstat.tcps_rcvoopack++;
+ tcpstat.tcps_rcvoobyte += ti->ti_len;
+ REASS_MBUF(ti) = m; /* XXX */
+
+ /*
+ * While we overlap succeeding segments trim them or,
+ * if they are completely covered, dequeue them.
+ */
+ while (q != (struct tcpiphdr *)tp) {
+ register int i = (ti->ti_seq + ti->ti_len) - q->ti_seq;
+ if (i <= 0)
+ break;
+ if (i < q->ti_len) {
+ q->ti_seq += i;
+ q->ti_len -= i;
+ m_adj(REASS_MBUF(q), i);
+ break;
+ }
+ q = (struct tcpiphdr *)q->ti_next;
+ m = REASS_MBUF((struct tcpiphdr *)q->ti_prev);
+ remque(q->ti_prev);
+ m_freem(m);
+ }
+
+ /*
+ * Stick new segment in its place.
+ */
+ insque(ti, q->ti_prev);
+
+present:
+ /*
+ * Present data to user, advancing rcv_nxt through
+ * completed sequence space.
+ */
+ if (TCPS_HAVERCVDSYN(tp->t_state) == 0)
+ return (0);
+ ti = tp->seg_next;
+ if (ti == (struct tcpiphdr *)tp || ti->ti_seq != tp->rcv_nxt)
+ return (0);
+ if (tp->t_state == TCPS_SYN_RECEIVED && ti->ti_len)
+ return (0);
+ do {
+ tp->rcv_nxt += ti->ti_len;
+ flags = ti->ti_flags & TH_FIN;
+ remque(ti);
+ m = REASS_MBUF(ti);
+ ti = (struct tcpiphdr *)ti->ti_next;
+ if (so->so_state & SS_CANTRCVMORE)
+ m_freem(m);
+ else
+ sbappend(&so->so_rcv, m);
+ } while (ti != (struct tcpiphdr *)tp && ti->ti_seq == tp->rcv_nxt);
+ sorwakeup(so);
+ return (flags);
+}
+
+/*
+ * TCP input routine, follows pages 65-76 of the
+ * protocol specification dated September, 1981 very closely.
+ */
+void
+tcp_input(m, iphlen)
+ register struct mbuf *m;
+ int iphlen;
+{
+ register struct tcpiphdr *ti;
+ register struct inpcb *inp;
+ caddr_t optp = NULL;
+ int optlen;
+ int len, tlen, off;
+ register struct tcpcb *tp = 0;
+ register int tiflags;
+ struct socket *so;
+ int todrop, acked, ourfinisacked, needoutput = 0;
+ short ostate;
+ struct in_addr laddr;
+ int dropsocket = 0;
+ int iss = 0;
+ u_long tiwin, ts_val, ts_ecr;
+ int ts_present = 0;
+
+ tcpstat.tcps_rcvtotal++;
+ /*
+ * Get IP and TCP header together in first mbuf.
+ * Note: IP leaves IP header in first mbuf.
+ */
+ ti = mtod(m, struct tcpiphdr *);
+ if (iphlen > sizeof (struct ip))
+ ip_stripoptions(m, (struct mbuf *)0);
+ if (m->m_len < sizeof (struct tcpiphdr)) {
+ if ((m = m_pullup(m, sizeof (struct tcpiphdr))) == 0) {
+ tcpstat.tcps_rcvshort++;
+ return;
+ }
+ ti = mtod(m, struct tcpiphdr *);
+ }
+
+ /*
+ * Checksum extended TCP header and data.
+ */
+ tlen = ((struct ip *)ti)->ip_len;
+ len = sizeof (struct ip) + tlen;
+ ti->ti_next = ti->ti_prev = 0;
+ ti->ti_x1 = 0;
+ ti->ti_len = (u_short)tlen;
+ HTONS(ti->ti_len);
+ if (ti->ti_sum = in_cksum(m, len)) {
+ tcpstat.tcps_rcvbadsum++;
+ goto drop;
+ }
+#endif /* TUBA_INCLUDE */
+
+ /*
+ * Check that TCP offset makes sense,
+ * pull out TCP options and adjust length. XXX
+ */
+ off = ti->ti_off << 2;
+ if (off < sizeof (struct tcphdr) || off > tlen) {
+ tcpstat.tcps_rcvbadoff++;
+ goto drop;
+ }
+ tlen -= off;
+ ti->ti_len = tlen;
+ if (off > sizeof (struct tcphdr)) {
+ if (m->m_len < sizeof(struct ip) + off) {
+ if ((m = m_pullup(m, sizeof (struct ip) + off)) == 0) {
+ tcpstat.tcps_rcvshort++;
+ return;
+ }
+ ti = mtod(m, struct tcpiphdr *);
+ }
+ optlen = off - sizeof (struct tcphdr);
+ optp = mtod(m, caddr_t) + sizeof (struct tcpiphdr);
+ /*
+ * Do quick retrieval of timestamp options ("options
+ * prediction?"). If timestamp is the only option and it's
+ * formatted as recommended in RFC 1323 appendix A, we
+ * quickly get the values now and not bother calling
+ * tcp_dooptions(), etc.
+ */
+ if ((optlen == TCPOLEN_TSTAMP_APPA ||
+ (optlen > TCPOLEN_TSTAMP_APPA &&
+ optp[TCPOLEN_TSTAMP_APPA] == TCPOPT_EOL)) &&
+ *(u_long *)optp == htonl(TCPOPT_TSTAMP_HDR) &&
+ (ti->ti_flags & TH_SYN) == 0) {
+ ts_present = 1;
+ ts_val = ntohl(*(u_long *)(optp + 4));
+ ts_ecr = ntohl(*(u_long *)(optp + 8));
+ optp = NULL; /* we've parsed the options */
+ }
+ }
+ tiflags = ti->ti_flags;
+
+ /*
+ * Convert TCP protocol specific fields to host format.
+ */
+ NTOHL(ti->ti_seq);
+ NTOHL(ti->ti_ack);
+ NTOHS(ti->ti_win);
+ NTOHS(ti->ti_urp);
+
+ /*
+ * Locate pcb for segment.
+ */
+findpcb:
+ inp = tcp_last_inpcb;
+ if (inp->inp_lport != ti->ti_dport ||
+ inp->inp_fport != ti->ti_sport ||
+ inp->inp_faddr.s_addr != ti->ti_src.s_addr ||
+ inp->inp_laddr.s_addr != ti->ti_dst.s_addr) {
+ inp = in_pcblookup(&tcb, ti->ti_src, ti->ti_sport,
+ ti->ti_dst, ti->ti_dport, INPLOOKUP_WILDCARD);
+ if (inp)
+ tcp_last_inpcb = inp;
+ ++tcpstat.tcps_pcbcachemiss;
+ }
+
+ /*
+ * If the state is CLOSED (i.e., TCB does not exist) then
+ * all data in the incoming segment is discarded.
+ * If the TCB exists but is in CLOSED state, it is embryonic,
+ * but should either do a listen or a connect soon.
+ */
+ if (inp == 0)
+ goto dropwithreset;
+ tp = intotcpcb(inp);
+ if (tp == 0)
+ goto dropwithreset;
+ if (tp->t_state == TCPS_CLOSED)
+ goto drop;
+
+ /* Unscale the window into a 32-bit value. */
+ if ((tiflags & TH_SYN) == 0)
+ tiwin = ti->ti_win << tp->snd_scale;
+ else
+ tiwin = ti->ti_win;
+
+ so = inp->inp_socket;
+ if (so->so_options & (SO_DEBUG|SO_ACCEPTCONN)) {
+ if (so->so_options & SO_DEBUG) {
+ ostate = tp->t_state;
+ tcp_saveti = *ti;
+ }
+ if (so->so_options & SO_ACCEPTCONN) {
+ so = sonewconn(so, 0);
+ if (so == 0)
+ goto drop;
+ /*
+ * This is ugly, but ....
+ *
+ * Mark socket as temporary until we're
+ * committed to keeping it. The code at
+ * ``drop'' and ``dropwithreset'' check the
+ * flag dropsocket to see if the temporary
+ * socket created here should be discarded.
+ * We mark the socket as discardable until
+ * we're committed to it below in TCPS_LISTEN.
+ */
+ dropsocket++;
+ inp = (struct inpcb *)so->so_pcb;
+ inp->inp_laddr = ti->ti_dst;
+ inp->inp_lport = ti->ti_dport;
+#if BSD>=43
+ inp->inp_options = ip_srcroute();
+#endif
+ tp = intotcpcb(inp);
+ tp->t_state = TCPS_LISTEN;
+
+ /* Compute proper scaling value from buffer space
+ */
+ while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
+ TCP_MAXWIN << tp->request_r_scale < so->so_rcv.sb_hiwat)
+ tp->request_r_scale++;
+ }
+ }
+
+ /*
+ * Segment received on connection.
+ * Reset idle time and keep-alive timer.
+ */
+ tp->t_idle = 0;
+ tp->t_timer[TCPT_KEEP] = tcp_keepidle;
+
+ /*
+ * Process options if not in LISTEN state,
+ * else do it below (after getting remote address).
+ */
+ if (optp && tp->t_state != TCPS_LISTEN)
+ tcp_dooptions(tp, optp, optlen, ti,
+ &ts_present, &ts_val, &ts_ecr);
+
+ /*
+ * Header prediction: check for the two common cases
+ * of a uni-directional data xfer. If the packet has
+ * no control flags, is in-sequence, the window didn't
+ * change and we're not retransmitting, it's a
+ * candidate. If the length is zero and the ack moved
+ * forward, we're the sender side of the xfer. Just
+ * free the data acked & wake any higher level process
+ * that was blocked waiting for space. If the length
+ * is non-zero and the ack didn't move, we're the
+ * receiver side. If we're getting packets in-order
+ * (the reassembly queue is empty), add the data to
+ * the socket buffer and note that we need a delayed ack.
+ */
+ if (tp->t_state == TCPS_ESTABLISHED &&
+ (tiflags & (TH_SYN|TH_FIN|TH_RST|TH_URG|TH_ACK)) == TH_ACK &&
+ (!ts_present || TSTMP_GEQ(ts_val, tp->ts_recent)) &&
+ ti->ti_seq == tp->rcv_nxt &&
+ tiwin && tiwin == tp->snd_wnd &&
+ tp->snd_nxt == tp->snd_max) {
+
+ /*
+ * If last ACK falls within this segment's sequence numbers,
+ * record the timestamp.
+ */
+ if (ts_present && SEQ_LEQ(ti->ti_seq, tp->last_ack_sent) &&
+ SEQ_LT(tp->last_ack_sent, ti->ti_seq + ti->ti_len)) {
+ tp->ts_recent_age = tcp_now;
+ tp->ts_recent = ts_val;
+ }
+
+ if (ti->ti_len == 0) {
+ if (SEQ_GT(ti->ti_ack, tp->snd_una) &&
+ SEQ_LEQ(ti->ti_ack, tp->snd_max) &&
+ tp->snd_cwnd >= tp->snd_wnd) {
+ /*
+ * this is a pure ack for outstanding data.
+ */
+ ++tcpstat.tcps_predack;
+ if (ts_present)
+ tcp_xmit_timer(tp, tcp_now-ts_ecr+1);
+ else if (tp->t_rtt &&
+ SEQ_GT(ti->ti_ack, tp->t_rtseq))
+ tcp_xmit_timer(tp, tp->t_rtt);
+ acked = ti->ti_ack - tp->snd_una;
+ tcpstat.tcps_rcvackpack++;
+ tcpstat.tcps_rcvackbyte += acked;
+ sbdrop(&so->so_snd, acked);
+ tp->snd_una = ti->ti_ack;
+ m_freem(m);
+
+ /*
+ * If all outstanding data are acked, stop
+ * retransmit timer, otherwise restart timer
+ * using current (possibly backed-off) value.
+ * If process is waiting for space,
+ * wakeup/selwakeup/signal. If data
+ * are ready to send, let tcp_output
+ * decide between more output or persist.
+ */
+ if (tp->snd_una == tp->snd_max)
+ tp->t_timer[TCPT_REXMT] = 0;
+ else if (tp->t_timer[TCPT_PERSIST] == 0)
+ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
+
+ if (so->so_snd.sb_flags & SB_NOTIFY)
+ sowwakeup(so);
+ if (so->so_snd.sb_cc)
+ (void) tcp_output(tp);
+ return;
+ }
+ } else if (ti->ti_ack == tp->snd_una &&
+ tp->seg_next == (struct tcpiphdr *)tp &&
+ ti->ti_len <= sbspace(&so->so_rcv)) {
+ /*
+ * this is a pure, in-sequence data packet
+ * with nothing on the reassembly queue and
+ * we have enough buffer space to take it.
+ */
+ ++tcpstat.tcps_preddat;
+ tp->rcv_nxt += ti->ti_len;
+ tcpstat.tcps_rcvpack++;
+ tcpstat.tcps_rcvbyte += ti->ti_len;
+ /*
+ * Drop TCP, IP headers and TCP options then add data
+ * to socket buffer.
+ */
+ m->m_data += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
+ m->m_len -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
+ sbappend(&so->so_rcv, m);
+ sorwakeup(so);
+ tp->t_flags |= TF_DELACK;
+ return;
+ }
+ }
+
+ /*
+ * Drop TCP, IP headers and TCP options.
+ */
+ m->m_data += sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
+ m->m_len -= sizeof(struct tcpiphdr)+off-sizeof(struct tcphdr);
+
+ /*
+ * Calculate amount of space in receive window,
+ * and then do TCP input processing.
+ * Receive window is amount of space in rcv queue,
+ * but not less than advertised window.
+ */
+ { int win;
+
+ win = sbspace(&so->so_rcv);
+ if (win < 0)
+ win = 0;
+ tp->rcv_wnd = max(win, (int)(tp->rcv_adv - tp->rcv_nxt));
+ }
+
+ switch (tp->t_state) {
+
+ /*
+ * If the state is LISTEN then ignore segment if it contains an RST.
+ * If the segment contains an ACK then it is bad and send a RST.
+ * If it does not contain a SYN then it is not interesting; drop it.
+ * Don't bother responding if the destination was a broadcast.
+ * Otherwise initialize tp->rcv_nxt, and tp->irs, select an initial
+ * tp->iss, and send a segment:
+ * <SEQ=ISS><ACK=RCV_NXT><CTL=SYN,ACK>
+ * Also initialize tp->snd_nxt to tp->iss+1 and tp->snd_una to tp->iss.
+ * Fill in remote peer address fields if not previously specified.
+ * Enter SYN_RECEIVED state, and process any other fields of this
+ * segment in this state.
+ */
+ case TCPS_LISTEN: {
+ struct mbuf *am;
+ register struct sockaddr_in *sin;
+
+ if (tiflags & TH_RST)
+ goto drop;
+ if (tiflags & TH_ACK)
+ goto dropwithreset;
+ if ((tiflags & TH_SYN) == 0)
+ goto drop;
+ /*
+ * RFC1122 4.2.3.10, p. 104: discard bcast/mcast SYN
+ * in_broadcast() should never return true on a received
+ * packet with M_BCAST not set.
+ */
+ if (m->m_flags & (M_BCAST|M_MCAST) ||
+ IN_MULTICAST(ti->ti_dst.s_addr))
+ goto drop;
+ am = m_get(M_DONTWAIT, MT_SONAME); /* XXX */
+ if (am == NULL)
+ goto drop;
+ am->m_len = sizeof (struct sockaddr_in);
+ sin = mtod(am, struct sockaddr_in *);
+ sin->sin_family = AF_INET;
+ sin->sin_len = sizeof(*sin);
+ sin->sin_addr = ti->ti_src;
+ sin->sin_port = ti->ti_sport;
+ bzero((caddr_t)sin->sin_zero, sizeof(sin->sin_zero));
+ laddr = inp->inp_laddr;
+ if (inp->inp_laddr.s_addr == INADDR_ANY)
+ inp->inp_laddr = ti->ti_dst;
+ if (in_pcbconnect(inp, am)) {
+ inp->inp_laddr = laddr;
+ (void) m_free(am);
+ goto drop;
+ }
+ (void) m_free(am);
+ tp->t_template = tcp_template(tp);
+ if (tp->t_template == 0) {
+ tp = tcp_drop(tp, ENOBUFS);
+ dropsocket = 0; /* socket is already gone */
+ goto drop;
+ }
+ if (optp)
+ tcp_dooptions(tp, optp, optlen, ti,
+ &ts_present, &ts_val, &ts_ecr);
+ if (iss)
+ tp->iss = iss;
+ else
+ tp->iss = tcp_iss;
+ tcp_iss += TCP_ISSINCR/2;
+ tp->irs = ti->ti_seq;
+ tcp_sendseqinit(tp);
+ tcp_rcvseqinit(tp);
+ tp->t_flags |= TF_ACKNOW;
+ tp->t_state = TCPS_SYN_RECEIVED;
+ tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
+ dropsocket = 0; /* committed to socket */
+ tcpstat.tcps_accepts++;
+ goto trimthenstep6;
+ }
+
+ /*
+ * If the state is SYN_SENT:
+ * if seg contains an ACK, but not for our SYN, drop the input.
+ * if seg contains a RST, then drop the connection.
+ * if seg does not contain SYN, then drop it.
+ * Otherwise this is an acceptable SYN segment
+ * initialize tp->rcv_nxt and tp->irs
+ * if seg contains ack then advance tp->snd_una
+ * if SYN has been acked change to ESTABLISHED else SYN_RCVD state
+ * arrange for segment to be acked (eventually)
+ * continue processing rest of data/controls, beginning with URG
+ */
+ case TCPS_SYN_SENT:
+ if ((tiflags & TH_ACK) &&
+ (SEQ_LEQ(ti->ti_ack, tp->iss) ||
+ SEQ_GT(ti->ti_ack, tp->snd_max)))
+ goto dropwithreset;
+ if (tiflags & TH_RST) {
+ if (tiflags & TH_ACK)
+ tp = tcp_drop(tp, ECONNREFUSED);
+ goto drop;
+ }
+ if ((tiflags & TH_SYN) == 0)
+ goto drop;
+ if (tiflags & TH_ACK) {
+ tp->snd_una = ti->ti_ack;
+ if (SEQ_LT(tp->snd_nxt, tp->snd_una))
+ tp->snd_nxt = tp->snd_una;
+ }
+ tp->t_timer[TCPT_REXMT] = 0;
+ tp->irs = ti->ti_seq;
+ tcp_rcvseqinit(tp);
+ tp->t_flags |= TF_ACKNOW;
+ if (tiflags & TH_ACK && SEQ_GT(tp->snd_una, tp->iss)) {
+ tcpstat.tcps_connects++;
+ soisconnected(so);
+ tp->t_state = TCPS_ESTABLISHED;
+ /* Do window scaling on this connection? */
+ if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
+ (TF_RCVD_SCALE|TF_REQ_SCALE)) {
+ tp->snd_scale = tp->requested_s_scale;
+ tp->rcv_scale = tp->request_r_scale;
+ }
+ (void) tcp_reass(tp, (struct tcpiphdr *)0,
+ (struct mbuf *)0);
+ /*
+ * if we didn't have to retransmit the SYN,
+ * use its rtt as our initial srtt & rtt var.
+ */
+ if (tp->t_rtt)
+ tcp_xmit_timer(tp, tp->t_rtt);
+ } else
+ tp->t_state = TCPS_SYN_RECEIVED;
+
+trimthenstep6:
+ /*
+ * Advance ti->ti_seq to correspond to first data byte.
+ * If data, trim to stay within window,
+ * dropping FIN if necessary.
+ */
+ ti->ti_seq++;
+ if (ti->ti_len > tp->rcv_wnd) {
+ todrop = ti->ti_len - tp->rcv_wnd;
+ m_adj(m, -todrop);
+ ti->ti_len = tp->rcv_wnd;
+ tiflags &= ~TH_FIN;
+ tcpstat.tcps_rcvpackafterwin++;
+ tcpstat.tcps_rcvbyteafterwin += todrop;
+ }
+ tp->snd_wl1 = ti->ti_seq - 1;
+ tp->rcv_up = ti->ti_seq;
+ goto step6;
+ }
+
+ /*
+ * States other than LISTEN or SYN_SENT.
+ * First check timestamp, if present.
+ * Then check that at least some bytes of segment are within
+ * receive window. If segment begins before rcv_nxt,
+ * drop leading data (and SYN); if nothing left, just ack.
+ *
+ * RFC 1323 PAWS: If we have a timestamp reply on this segment
+ * and it's less than ts_recent, drop it.
+ */
+ if (ts_present && (tiflags & TH_RST) == 0 && tp->ts_recent &&
+ TSTMP_LT(ts_val, tp->ts_recent)) {
+
+ /* Check to see if ts_recent is over 24 days old. */
+ if ((int)(tcp_now - tp->ts_recent_age) > TCP_PAWS_IDLE) {
+ /*
+ * Invalidate ts_recent. If this segment updates
+ * ts_recent, the age will be reset later and ts_recent
+ * will get a valid value. If it does not, setting
+ * ts_recent to zero will at least satisfy the
+ * requirement that zero be placed in the timestamp
+ * echo reply when ts_recent isn't valid. The
+ * age isn't reset until we get a valid ts_recent
+ * because we don't want out-of-order segments to be
+ * dropped when ts_recent is old.
+ */
+ tp->ts_recent = 0;
+ } else {
+ tcpstat.tcps_rcvduppack++;
+ tcpstat.tcps_rcvdupbyte += ti->ti_len;
+ tcpstat.tcps_pawsdrop++;
+ goto dropafterack;
+ }
+ }
+
+ todrop = tp->rcv_nxt - ti->ti_seq;
+ if (todrop > 0) {
+ if (tiflags & TH_SYN) {
+ tiflags &= ~TH_SYN;
+ ti->ti_seq++;
+ if (ti->ti_urp > 1)
+ ti->ti_urp--;
+ else
+ tiflags &= ~TH_URG;
+ todrop--;
+ }
+ if (todrop >= ti->ti_len) {
+ tcpstat.tcps_rcvduppack++;
+ tcpstat.tcps_rcvdupbyte += ti->ti_len;
+ /*
+ * If segment is just one to the left of the window,
+ * check two special cases:
+ * 1. Don't toss RST in response to 4.2-style keepalive.
+ * 2. If the only thing to drop is a FIN, we can drop
+ * it, but check the ACK or we will get into FIN
+ * wars if our FINs crossed (both CLOSING).
+ * In either case, send ACK to resynchronize,
+ * but keep on processing for RST or ACK.
+ */
+ if ((tiflags & TH_FIN && todrop == ti->ti_len + 1)
+#ifdef TCP_COMPAT_42
+ || (tiflags & TH_RST && ti->ti_seq == tp->rcv_nxt - 1)
+#endif
+ ) {
+ todrop = ti->ti_len;
+ tiflags &= ~TH_FIN;
+ tp->t_flags |= TF_ACKNOW;
+ } else {
+ /*
+ * Handle the case when a bound socket connects
+ * to itself. Allow packets with a SYN and
+ * an ACK to continue with the processing.
+ */
+ if (todrop != 0 || (tiflags & TH_ACK) == 0)
+ goto dropafterack;
+ }
+ } else {
+ tcpstat.tcps_rcvpartduppack++;
+ tcpstat.tcps_rcvpartdupbyte += todrop;
+ }
+ m_adj(m, todrop);
+ ti->ti_seq += todrop;
+ ti->ti_len -= todrop;
+ if (ti->ti_urp > todrop)
+ ti->ti_urp -= todrop;
+ else {
+ tiflags &= ~TH_URG;
+ ti->ti_urp = 0;
+ }
+ }
+
+ /*
+ * If new data are received on a connection after the
+ * user processes are gone, then RST the other end.
+ */
+ if ((so->so_state & SS_NOFDREF) &&
+ tp->t_state > TCPS_CLOSE_WAIT && ti->ti_len) {
+ tp = tcp_close(tp);
+ tcpstat.tcps_rcvafterclose++;
+ goto dropwithreset;
+ }
+
+ /*
+ * If segment ends after window, drop trailing data
+ * (and PUSH and FIN); if nothing left, just ACK.
+ */
+ todrop = (ti->ti_seq+ti->ti_len) - (tp->rcv_nxt+tp->rcv_wnd);
+ if (todrop > 0) {
+ tcpstat.tcps_rcvpackafterwin++;
+ if (todrop >= ti->ti_len) {
+ tcpstat.tcps_rcvbyteafterwin += ti->ti_len;
+ /*
+ * If a new connection request is received
+ * while in TIME_WAIT, drop the old connection
+ * and start over if the sequence numbers
+ * are above the previous ones.
+ */
+ if (tiflags & TH_SYN &&
+ tp->t_state == TCPS_TIME_WAIT &&
+ SEQ_GT(ti->ti_seq, tp->rcv_nxt)) {
+ iss = tp->rcv_nxt + TCP_ISSINCR;
+ tp = tcp_close(tp);
+ goto findpcb;
+ }
+ /*
+ * If window is closed can only take segments at
+ * window edge, and have to drop data and PUSH from
+ * incoming segments. Continue processing, but
+ * remember to ack. Otherwise, drop segment
+ * and ack.
+ */
+ if (tp->rcv_wnd == 0 && ti->ti_seq == tp->rcv_nxt) {
+ tp->t_flags |= TF_ACKNOW;
+ tcpstat.tcps_rcvwinprobe++;
+ } else
+ goto dropafterack;
+ } else
+ tcpstat.tcps_rcvbyteafterwin += todrop;
+ m_adj(m, -todrop);
+ ti->ti_len -= todrop;
+ tiflags &= ~(TH_PUSH|TH_FIN);
+ }
+
+ /*
+ * If last ACK falls within this segment's sequence numbers,
+ * record its timestamp.
+ */
+ if (ts_present && SEQ_LEQ(ti->ti_seq, tp->last_ack_sent) &&
+ SEQ_LT(tp->last_ack_sent, ti->ti_seq + ti->ti_len +
+ ((tiflags & (TH_SYN|TH_FIN)) != 0))) {
+ tp->ts_recent_age = tcp_now;
+ tp->ts_recent = ts_val;
+ }
+
+ /*
+ * If the RST bit is set examine the state:
+ * SYN_RECEIVED STATE:
+ * If passive open, return to LISTEN state.
+ * If active open, inform user that connection was refused.
+ * ESTABLISHED, FIN_WAIT_1, FIN_WAIT2, CLOSE_WAIT STATES:
+ * Inform user that connection was reset, and close tcb.
+ * CLOSING, LAST_ACK, TIME_WAIT STATES
+ * Close the tcb.
+ */
+ if (tiflags&TH_RST) switch (tp->t_state) {
+
+ case TCPS_SYN_RECEIVED:
+ so->so_error = ECONNREFUSED;
+ goto close;
+
+ case TCPS_ESTABLISHED:
+ case TCPS_FIN_WAIT_1:
+ case TCPS_FIN_WAIT_2:
+ case TCPS_CLOSE_WAIT:
+ so->so_error = ECONNRESET;
+ close:
+ tp->t_state = TCPS_CLOSED;
+ tcpstat.tcps_drops++;
+ tp = tcp_close(tp);
+ goto drop;
+
+ case TCPS_CLOSING:
+ case TCPS_LAST_ACK:
+ case TCPS_TIME_WAIT:
+ tp = tcp_close(tp);
+ goto drop;
+ }
+
+ /*
+ * If a SYN is in the window, then this is an
+ * error and we send an RST and drop the connection.
+ */
+ if (tiflags & TH_SYN) {
+ tp = tcp_drop(tp, ECONNRESET);
+ goto dropwithreset;
+ }
+
+ /*
+ * If the ACK bit is off we drop the segment and return.
+ */
+ if ((tiflags & TH_ACK) == 0)
+ goto drop;
+
+ /*
+ * Ack processing.
+ */
+ switch (tp->t_state) {
+
+ /*
+ * In SYN_RECEIVED state if the ack ACKs our SYN then enter
+ * ESTABLISHED state and continue processing, otherwise
+ * send an RST.
+ */
+ case TCPS_SYN_RECEIVED:
+ if (SEQ_GT(tp->snd_una, ti->ti_ack) ||
+ SEQ_GT(ti->ti_ack, tp->snd_max))
+ goto dropwithreset;
+ tcpstat.tcps_connects++;
+ soisconnected(so);
+ tp->t_state = TCPS_ESTABLISHED;
+ /* Do window scaling? */
+ if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) ==
+ (TF_RCVD_SCALE|TF_REQ_SCALE)) {
+ tp->snd_scale = tp->requested_s_scale;
+ tp->rcv_scale = tp->request_r_scale;
+ }
+ (void) tcp_reass(tp, (struct tcpiphdr *)0, (struct mbuf *)0);
+ tp->snd_wl1 = ti->ti_seq - 1;
+ /* fall into ... */
+
+ /*
+ * In ESTABLISHED state: drop duplicate ACKs; ACK out of range
+ * ACKs. If the ack is in the range
+ * tp->snd_una < ti->ti_ack <= tp->snd_max
+ * then advance tp->snd_una to ti->ti_ack and drop
+ * data from the retransmission queue. If this ACK reflects
+ * more up to date window information we update our window information.
+ */
+ case TCPS_ESTABLISHED:
+ case TCPS_FIN_WAIT_1:
+ case TCPS_FIN_WAIT_2:
+ case TCPS_CLOSE_WAIT:
+ case TCPS_CLOSING:
+ case TCPS_LAST_ACK:
+ case TCPS_TIME_WAIT:
+
+ if (SEQ_LEQ(ti->ti_ack, tp->snd_una)) {
+ if (ti->ti_len == 0 && tiwin == tp->snd_wnd) {
+ tcpstat.tcps_rcvdupack++;
+ /*
+ * If we have outstanding data (other than
+ * a window probe), this is a completely
+ * duplicate ack (ie, window info didn't
+ * change), the ack is the biggest we've
+ * seen and we've seen exactly our rexmt
+ * threshhold of them, assume a packet
+ * has been dropped and retransmit it.
+ * Kludge snd_nxt & the congestion
+ * window so we send only this one
+ * packet.
+ *
+ * We know we're losing at the current
+ * window size so do congestion avoidance
+ * (set ssthresh to half the current window
+ * and pull our congestion window back to
+ * the new ssthresh).
+ *
+ * Dup acks mean that packets have left the
+ * network (they're now cached at the receiver)
+ * so bump cwnd by the amount in the receiver
+ * to keep a constant cwnd packets in the
+ * network.
+ */
+ if (tp->t_timer[TCPT_REXMT] == 0 ||
+ ti->ti_ack != tp->snd_una)
+ tp->t_dupacks = 0;
+ else if (++tp->t_dupacks == tcprexmtthresh) {
+ tcp_seq onxt = tp->snd_nxt;
+ u_int win =
+ min(tp->snd_wnd, tp->snd_cwnd) / 2 /
+ tp->t_maxseg;
+
+ if (win < 2)
+ win = 2;
+ tp->snd_ssthresh = win * tp->t_maxseg;
+ tp->t_timer[TCPT_REXMT] = 0;
+ tp->t_rtt = 0;
+ tp->snd_nxt = ti->ti_ack;
+ tp->snd_cwnd = tp->t_maxseg;
+ (void) tcp_output(tp);
+ tp->snd_cwnd = tp->snd_ssthresh +
+ tp->t_maxseg * tp->t_dupacks;
+ if (SEQ_GT(onxt, tp->snd_nxt))
+ tp->snd_nxt = onxt;
+ goto drop;
+ } else if (tp->t_dupacks > tcprexmtthresh) {
+ tp->snd_cwnd += tp->t_maxseg;
+ (void) tcp_output(tp);
+ goto drop;
+ }
+ } else
+ tp->t_dupacks = 0;
+ break;
+ }
+ /*
+ * If the congestion window was inflated to account
+ * for the other side's cached packets, retract it.
+ */
+ if (tp->t_dupacks > tcprexmtthresh &&
+ tp->snd_cwnd > tp->snd_ssthresh)
+ tp->snd_cwnd = tp->snd_ssthresh;
+ tp->t_dupacks = 0;
+ if (SEQ_GT(ti->ti_ack, tp->snd_max)) {
+ tcpstat.tcps_rcvacktoomuch++;
+ goto dropafterack;
+ }
+ acked = ti->ti_ack - tp->snd_una;
+ tcpstat.tcps_rcvackpack++;
+ tcpstat.tcps_rcvackbyte += acked;
+
+ /*
+ * If we have a timestamp reply, update smoothed
+ * round trip time. If no timestamp is present but
+ * transmit timer is running and timed sequence
+ * number was acked, update smoothed round trip time.
+ * Since we now have an rtt measurement, cancel the
+ * timer backoff (cf., Phil Karn's retransmit alg.).
+ * Recompute the initial retransmit timer.
+ */
+ if (ts_present)
+ tcp_xmit_timer(tp, tcp_now-ts_ecr+1);
+ else if (tp->t_rtt && SEQ_GT(ti->ti_ack, tp->t_rtseq))
+ tcp_xmit_timer(tp,tp->t_rtt);
+
+ /*
+ * If all outstanding data is acked, stop retransmit
+ * timer and remember to restart (more output or persist).
+ * If there is more data to be acked, restart retransmit
+ * timer, using current (possibly backed-off) value.
+ */
+ if (ti->ti_ack == tp->snd_max) {
+ tp->t_timer[TCPT_REXMT] = 0;
+ needoutput = 1;
+ } else if (tp->t_timer[TCPT_PERSIST] == 0)
+ tp->t_timer[TCPT_REXMT] = tp->t_rxtcur;
+ /*
+ * When new data is acked, open the congestion window.
+ * If the window gives us less than ssthresh packets
+ * in flight, open exponentially (maxseg per packet).
+ * Otherwise open linearly: maxseg per window
+ * (maxseg^2 / cwnd per packet), plus a constant
+ * fraction of a packet (maxseg/8) to help larger windows
+ * open quickly enough.
+ */
+ {
+ register u_int cw = tp->snd_cwnd;
+ register u_int incr = tp->t_maxseg;
+
+ if (cw > tp->snd_ssthresh)
+ incr = incr * incr / cw + incr / 8;
+ tp->snd_cwnd = min(cw + incr, TCP_MAXWIN<<tp->snd_scale);
+ }
+ if (acked > so->so_snd.sb_cc) {
+ tp->snd_wnd -= so->so_snd.sb_cc;
+ sbdrop(&so->so_snd, (int)so->so_snd.sb_cc);
+ ourfinisacked = 1;
+ } else {
+ sbdrop(&so->so_snd, acked);
+ tp->snd_wnd -= acked;
+ ourfinisacked = 0;
+ }
+ if (so->so_snd.sb_flags & SB_NOTIFY)
+ sowwakeup(so);
+ tp->snd_una = ti->ti_ack;
+ if (SEQ_LT(tp->snd_nxt, tp->snd_una))
+ tp->snd_nxt = tp->snd_una;
+
+ switch (tp->t_state) {
+
+ /*
+ * In FIN_WAIT_1 STATE in addition to the processing
+ * for the ESTABLISHED state if our FIN is now acknowledged
+ * then enter FIN_WAIT_2.
+ */
+ case TCPS_FIN_WAIT_1:
+ if (ourfinisacked) {
+ /*
+ * If we can't receive any more
+ * data, then closing user can proceed.
+ * Starting the timer is contrary to the
+ * specification, but if we don't get a FIN
+ * we'll hang forever.
+ */
+ if (so->so_state & SS_CANTRCVMORE) {
+ soisdisconnected(so);
+ tp->t_timer[TCPT_2MSL] = tcp_maxidle;
+ }
+ tp->t_state = TCPS_FIN_WAIT_2;
+ }
+ break;
+
+ /*
+ * In CLOSING STATE in addition to the processing for
+ * the ESTABLISHED state if the ACK acknowledges our FIN
+ * then enter the TIME-WAIT state, otherwise ignore
+ * the segment.
+ */
+ case TCPS_CLOSING:
+ if (ourfinisacked) {
+ tp->t_state = TCPS_TIME_WAIT;
+ tcp_canceltimers(tp);
+ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
+ soisdisconnected(so);
+ }
+ break;
+
+ /*
+ * In LAST_ACK, we may still be waiting for data to drain
+ * and/or to be acked, as well as for the ack of our FIN.
+ * If our FIN is now acknowledged, delete the TCB,
+ * enter the closed state and return.
+ */
+ case TCPS_LAST_ACK:
+ if (ourfinisacked) {
+ tp = tcp_close(tp);
+ goto drop;
+ }
+ break;
+
+ /*
+ * In TIME_WAIT state the only thing that should arrive
+ * is a retransmission of the remote FIN. Acknowledge
+ * it and restart the finack timer.
+ */
+ case TCPS_TIME_WAIT:
+ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
+ goto dropafterack;
+ }
+ }
+
+step6:
+ /*
+ * Update window information.
+ * Don't look at window if no ACK: TAC's send garbage on first SYN.
+ */
+ if ((tiflags & TH_ACK) &&
+ (SEQ_LT(tp->snd_wl1, ti->ti_seq) || tp->snd_wl1 == ti->ti_seq &&
+ (SEQ_LT(tp->snd_wl2, ti->ti_ack) ||
+ tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd))) {
+ /* keep track of pure window updates */
+ if (ti->ti_len == 0 &&
+ tp->snd_wl2 == ti->ti_ack && tiwin > tp->snd_wnd)
+ tcpstat.tcps_rcvwinupd++;
+ tp->snd_wnd = tiwin;
+ tp->snd_wl1 = ti->ti_seq;
+ tp->snd_wl2 = ti->ti_ack;
+ if (tp->snd_wnd > tp->max_sndwnd)
+ tp->max_sndwnd = tp->snd_wnd;
+ needoutput = 1;
+ }
+
+ /*
+ * Process segments with URG.
+ */
+ if ((tiflags & TH_URG) && ti->ti_urp &&
+ TCPS_HAVERCVDFIN(tp->t_state) == 0) {
+ /*
+ * This is a kludge, but if we receive and accept
+ * random urgent pointers, we'll crash in
+ * soreceive. It's hard to imagine someone
+ * actually wanting to send this much urgent data.
+ */
+ if (ti->ti_urp + so->so_rcv.sb_cc > sb_max) {
+ ti->ti_urp = 0; /* XXX */
+ tiflags &= ~TH_URG; /* XXX */
+ goto dodata; /* XXX */
+ }
+ /*
+ * If this segment advances the known urgent pointer,
+ * then mark the data stream. This should not happen
+ * in CLOSE_WAIT, CLOSING, LAST_ACK or TIME_WAIT STATES since
+ * a FIN has been received from the remote side.
+ * In these states we ignore the URG.
+ *
+ * According to RFC961 (Assigned Protocols),
+ * the urgent pointer points to the last octet
+ * of urgent data. We continue, however,
+ * to consider it to indicate the first octet
+ * of data past the urgent section as the original
+ * spec states (in one of two places).
+ */
+ if (SEQ_GT(ti->ti_seq+ti->ti_urp, tp->rcv_up)) {
+ tp->rcv_up = ti->ti_seq + ti->ti_urp;
+ so->so_oobmark = so->so_rcv.sb_cc +
+ (tp->rcv_up - tp->rcv_nxt) - 1;
+ if (so->so_oobmark == 0)
+ so->so_state |= SS_RCVATMARK;
+ sohasoutofband(so);
+ tp->t_oobflags &= ~(TCPOOB_HAVEDATA | TCPOOB_HADDATA);
+ }
+ /*
+ * Remove out of band data so doesn't get presented to user.
+ * This can happen independent of advancing the URG pointer,
+ * but if two URG's are pending at once, some out-of-band
+ * data may creep in... ick.
+ */
+ if (ti->ti_urp <= ti->ti_len
+#ifdef SO_OOBINLINE
+ && (so->so_options & SO_OOBINLINE) == 0
+#endif
+ )
+ tcp_pulloutofband(so, ti, m);
+ } else
+ /*
+ * If no out of band data is expected,
+ * pull receive urgent pointer along
+ * with the receive window.
+ */
+ if (SEQ_GT(tp->rcv_nxt, tp->rcv_up))
+ tp->rcv_up = tp->rcv_nxt;
+dodata: /* XXX */
+
+ /*
+ * Process the segment text, merging it into the TCP sequencing queue,
+ * and arranging for acknowledgment of receipt if necessary.
+ * This process logically involves adjusting tp->rcv_wnd as data
+ * is presented to the user (this happens in tcp_usrreq.c,
+ * case PRU_RCVD). If a FIN has already been received on this
+ * connection then we just ignore the text.
+ */
+ if ((ti->ti_len || (tiflags&TH_FIN)) &&
+ TCPS_HAVERCVDFIN(tp->t_state) == 0) {
+ TCP_REASS(tp, ti, m, so, tiflags);
+ /*
+ * Note the amount of data that peer has sent into
+ * our window, in order to estimate the sender's
+ * buffer size.
+ */
+ len = so->so_rcv.sb_hiwat - (tp->rcv_adv - tp->rcv_nxt);
+ } else {
+ m_freem(m);
+ tiflags &= ~TH_FIN;
+ }
+
+ /*
+ * If FIN is received ACK the FIN and let the user know
+ * that the connection is closing.
+ */
+ if (tiflags & TH_FIN) {
+ if (TCPS_HAVERCVDFIN(tp->t_state) == 0) {
+ socantrcvmore(so);
+ tp->t_flags |= TF_ACKNOW;
+ tp->rcv_nxt++;
+ }
+ switch (tp->t_state) {
+
+ /*
+ * In SYN_RECEIVED and ESTABLISHED STATES
+ * enter the CLOSE_WAIT state.
+ */
+ case TCPS_SYN_RECEIVED:
+ case TCPS_ESTABLISHED:
+ tp->t_state = TCPS_CLOSE_WAIT;
+ break;
+
+ /*
+ * If still in FIN_WAIT_1 STATE FIN has not been acked so
+ * enter the CLOSING state.
+ */
+ case TCPS_FIN_WAIT_1:
+ tp->t_state = TCPS_CLOSING;
+ break;
+
+ /*
+ * In FIN_WAIT_2 state enter the TIME_WAIT state,
+ * starting the time-wait timer, turning off the other
+ * standard timers.
+ */
+ case TCPS_FIN_WAIT_2:
+ tp->t_state = TCPS_TIME_WAIT;
+ tcp_canceltimers(tp);
+ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
+ soisdisconnected(so);
+ break;
+
+ /*
+ * In TIME_WAIT state restart the 2 MSL time_wait timer.
+ */
+ case TCPS_TIME_WAIT:
+ tp->t_timer[TCPT_2MSL] = 2 * TCPTV_MSL;
+ break;
+ }
+ }
+ if (so->so_options & SO_DEBUG)
+ tcp_trace(TA_INPUT, ostate, tp, &tcp_saveti, 0);
+
+ /*
+ * Return any desired output.
+ */
+ if (needoutput || (tp->t_flags & TF_ACKNOW))
+ (void) tcp_output(tp);
+ return;
+
+dropafterack:
+ /*
+ * Generate an ACK dropping incoming segment if it occupies
+ * sequence space, where the ACK reflects our state.
+ */
+ if (tiflags & TH_RST)
+ goto drop;
+ m_freem(m);
+ tp->t_flags |= TF_ACKNOW;
+ (void) tcp_output(tp);
+ return;
+
+dropwithreset:
+ /*
+ * Generate a RST, dropping incoming segment.
+ * Make ACK acceptable to originator of segment.
+ * Don't bother to respond if destination was broadcast/multicast.
+ */
+ if ((tiflags & TH_RST) || m->m_flags & (M_BCAST|M_MCAST) ||
+ IN_MULTICAST(ti->ti_dst.s_addr))
+ goto drop;
+ if (tiflags & TH_ACK)
+ tcp_respond(tp, ti, m, (tcp_seq)0, ti->ti_ack, TH_RST);
+ else {
+ if (tiflags & TH_SYN)
+ ti->ti_len++;
+ tcp_respond(tp, ti, m, ti->ti_seq+ti->ti_len, (tcp_seq)0,
+ TH_RST|TH_ACK);
+ }
+ /* destroy temporarily created socket */
+ if (dropsocket)
+ (void) soabort(so);
+ return;
+
+drop:
+ /*
+ * Drop space held by incoming segment and return.
+ */
+ if (tp && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
+ tcp_trace(TA_DROP, ostate, tp, &tcp_saveti, 0);
+ m_freem(m);
+ /* destroy temporarily created socket */
+ if (dropsocket)
+ (void) soabort(so);
+ return;
+#ifndef TUBA_INCLUDE
+}
+
+void
+tcp_dooptions(tp, cp, cnt, ti, ts_present, ts_val, ts_ecr)
+ struct tcpcb *tp;
+ u_char *cp;
+ int cnt;
+ struct tcpiphdr *ti;
+ int *ts_present;
+ u_long *ts_val, *ts_ecr;
+{
+ u_short mss;
+ int opt, optlen;
+
+ for (; cnt > 0; cnt -= optlen, cp += optlen) {
+ opt = cp[0];
+ if (opt == TCPOPT_EOL)
+ break;
+ if (opt == TCPOPT_NOP)
+ optlen = 1;
+ else {
+ optlen = cp[1];
+ if (optlen <= 0)
+ break;
+ }
+ switch (opt) {
+
+ default:
+ continue;
+
+ case TCPOPT_MAXSEG:
+ if (optlen != TCPOLEN_MAXSEG)
+ continue;
+ if (!(ti->ti_flags & TH_SYN))
+ continue;
+ bcopy((char *) cp + 2, (char *) &mss, sizeof(mss));
+ NTOHS(mss);
+ (void) tcp_mss(tp, mss); /* sets t_maxseg */
+ break;
+
+ case TCPOPT_WINDOW:
+ if (optlen != TCPOLEN_WINDOW)
+ continue;
+ if (!(ti->ti_flags & TH_SYN))
+ continue;
+ tp->t_flags |= TF_RCVD_SCALE;
+ tp->requested_s_scale = min(cp[2], TCP_MAX_WINSHIFT);
+ break;
+
+ case TCPOPT_TIMESTAMP:
+ if (optlen != TCPOLEN_TIMESTAMP)
+ continue;
+ *ts_present = 1;
+ bcopy((char *)cp + 2, (char *) ts_val, sizeof(*ts_val));
+ NTOHL(*ts_val);
+ bcopy((char *)cp + 6, (char *) ts_ecr, sizeof(*ts_ecr));
+ NTOHL(*ts_ecr);
+
+ /*
+ * A timestamp received in a SYN makes
+ * it ok to send timestamp requests and replies.
+ */
+ if (ti->ti_flags & TH_SYN) {
+ tp->t_flags |= TF_RCVD_TSTMP;
+ tp->ts_recent = *ts_val;
+ tp->ts_recent_age = tcp_now;
+ }
+ break;
+ }
+ }
+}
+
+/*
+ * Pull out of band byte out of a segment so
+ * it doesn't appear in the user's data queue.
+ * It is still reflected in the segment length for
+ * sequencing purposes.
+ */
+void
+tcp_pulloutofband(so, ti, m)
+ struct socket *so;
+ struct tcpiphdr *ti;
+ register struct mbuf *m;
+{
+ int cnt = ti->ti_urp - 1;
+
+ while (cnt >= 0) {
+ if (m->m_len > cnt) {
+ char *cp = mtod(m, caddr_t) + cnt;
+ struct tcpcb *tp = sototcpcb(so);
+
+ tp->t_iobc = *cp;
+ tp->t_oobflags |= TCPOOB_HAVEDATA;
+ bcopy(cp+1, cp, (unsigned)(m->m_len - cnt - 1));
+ m->m_len--;
+ return;
+ }
+ cnt -= m->m_len;
+ m = m->m_next;
+ if (m == 0)
+ break;
+ }
+ panic("tcp_pulloutofband");
+}
+
+/*
+ * Collect new round-trip time estimate
+ * and update averages and current timeout.
+ */
+void
+tcp_xmit_timer(tp, rtt)
+ register struct tcpcb *tp;
+ short rtt;
+{
+ register short delta;
+
+ tcpstat.tcps_rttupdated++;
+ if (tp->t_srtt != 0) {
+ /*
+ * srtt is stored as fixed point with 3 bits after the
+ * binary point (i.e., scaled by 8). The following magic
+ * is equivalent to the smoothing algorithm in rfc793 with
+ * an alpha of .875 (srtt = rtt/8 + srtt*7/8 in fixed
+ * point). Adjust rtt to origin 0.
+ */
+ delta = rtt - 1 - (tp->t_srtt >> TCP_RTT_SHIFT);
+ if ((tp->t_srtt += delta) <= 0)
+ tp->t_srtt = 1;
+ /*
+ * We accumulate a smoothed rtt variance (actually, a
+ * smoothed mean difference), then set the retransmit
+ * timer to smoothed rtt + 4 times the smoothed variance.
+ * rttvar is stored as fixed point with 2 bits after the
+ * binary point (scaled by 4). The following is
+ * equivalent to rfc793 smoothing with an alpha of .75
+ * (rttvar = rttvar*3/4 + |delta| / 4). This replaces
+ * rfc793's wired-in beta.
+ */
+ if (delta < 0)
+ delta = -delta;
+ delta -= (tp->t_rttvar >> TCP_RTTVAR_SHIFT);
+ if ((tp->t_rttvar += delta) <= 0)
+ tp->t_rttvar = 1;
+ } else {
+ /*
+ * No rtt measurement yet - use the unsmoothed rtt.
+ * Set the variance to half the rtt (so our first
+ * retransmit happens at 3*rtt).
+ */
+ tp->t_srtt = rtt << TCP_RTT_SHIFT;
+ tp->t_rttvar = rtt << (TCP_RTTVAR_SHIFT - 1);
+ }
+ tp->t_rtt = 0;
+ tp->t_rxtshift = 0;
+
+ /*
+ * the retransmit should happen at rtt + 4 * rttvar.
+ * Because of the way we do the smoothing, srtt and rttvar
+ * will each average +1/2 tick of bias. When we compute
+ * the retransmit timer, we want 1/2 tick of rounding and
+ * 1 extra tick because of +-1/2 tick uncertainty in the
+ * firing of the timer. The bias will give us exactly the
+ * 1.5 tick we need. But, because the bias is
+ * statistical, we have to test that we don't drop below
+ * the minimum feasible timer (which is 2 ticks).
+ */
+ TCPT_RANGESET(tp->t_rxtcur, TCP_REXMTVAL(tp),
+ tp->t_rttmin, TCPTV_REXMTMAX);
+
+ /*
+ * We received an ack for a packet that wasn't retransmitted;
+ * it is probably safe to discard any error indications we've
+ * received recently. This isn't quite right, but close enough
+ * for now (a route might have failed after we sent a segment,
+ * and the return path might not be symmetrical).
+ */
+ tp->t_softerror = 0;
+}
+
+/*
+ * Determine a reasonable value for maxseg size.
+ * If the route is known, check route for mtu.
+ * If none, use an mss that can be handled on the outgoing
+ * interface without forcing IP to fragment; if bigger than
+ * an mbuf cluster (MCLBYTES), round down to nearest multiple of MCLBYTES
+ * to utilize large mbufs. If no route is found, route has no mtu,
+ * or the destination isn't local, use a default, hopefully conservative
+ * size (usually 512 or the default IP max size, but no more than the mtu
+ * of the interface), as we can't discover anything about intervening
+ * gateways or networks. We also initialize the congestion/slow start
+ * window to be a single segment if the destination isn't local.
+ * While looking at the routing entry, we also initialize other path-dependent
+ * parameters from pre-set or cached values in the routing entry.
+ */
+int
+tcp_mss(tp, offer)
+ register struct tcpcb *tp;
+ u_int offer;
+{
+ struct route *ro;
+ register struct rtentry *rt;
+ struct ifnet *ifp;
+ register int rtt, mss;
+ u_long bufsize;
+ struct inpcb *inp;
+ struct socket *so;
+ extern int tcp_mssdflt;
+
+ inp = tp->t_inpcb;
+ ro = &inp->inp_route;
+
+ if ((rt = ro->ro_rt) == (struct rtentry *)0) {
+ /* No route yet, so try to acquire one */
+ if (inp->inp_faddr.s_addr != INADDR_ANY) {
+ ro->ro_dst.sa_family = AF_INET;
+ ro->ro_dst.sa_len = sizeof(ro->ro_dst);
+ ((struct sockaddr_in *) &ro->ro_dst)->sin_addr =
+ inp->inp_faddr;
+ rtalloc(ro);
+ }
+ if ((rt = ro->ro_rt) == (struct rtentry *)0)
+ return (tcp_mssdflt);
+ }
+ ifp = rt->rt_ifp;
+ so = inp->inp_socket;
+
+#ifdef RTV_MTU /* if route characteristics exist ... */
+ /*
+ * While we're here, check if there's an initial rtt
+ * or rttvar. Convert from the route-table units
+ * to scaled multiples of the slow timeout timer.
+ */
+ if (tp->t_srtt == 0 && (rtt = rt->rt_rmx.rmx_rtt)) {
+ /*
+ * XXX the lock bit for MTU indicates that the value
+ * is also a minimum value; this is subject to time.
+ */
+ if (rt->rt_rmx.rmx_locks & RTV_RTT)
+ tp->t_rttmin = rtt / (RTM_RTTUNIT / PR_SLOWHZ);
+ tp->t_srtt = rtt / (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE));
+ if (rt->rt_rmx.rmx_rttvar)
+ tp->t_rttvar = rt->rt_rmx.rmx_rttvar /
+ (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE));
+ else
+ /* default variation is +- 1 rtt */
+ tp->t_rttvar =
+ tp->t_srtt * TCP_RTTVAR_SCALE / TCP_RTT_SCALE;
+ TCPT_RANGESET(tp->t_rxtcur,
+ ((tp->t_srtt >> 2) + tp->t_rttvar) >> 1,
+ tp->t_rttmin, TCPTV_REXMTMAX);
+ }
+ /*
+ * if there's an mtu associated with the route, use it
+ */
+ if (rt->rt_rmx.rmx_mtu)
+ mss = rt->rt_rmx.rmx_mtu - sizeof(struct tcpiphdr);
+ else
+#endif /* RTV_MTU */
+ {
+ mss = ifp->if_mtu - sizeof(struct tcpiphdr);
+#if (MCLBYTES & (MCLBYTES - 1)) == 0
+ if (mss > MCLBYTES)
+ mss &= ~(MCLBYTES-1);
+#else
+ if (mss > MCLBYTES)
+ mss = mss / MCLBYTES * MCLBYTES;
+#endif
+ if (!in_localaddr(inp->inp_faddr))
+ mss = min(mss, tcp_mssdflt);
+ }
+ /*
+ * The current mss, t_maxseg, is initialized to the default value.
+ * If we compute a smaller value, reduce the current mss.
+ * If we compute a larger value, return it for use in sending
+ * a max seg size option, but don't store it for use
+ * unless we received an offer at least that large from peer.
+ * However, do not accept offers under 32 bytes.
+ */
+ if (offer)
+ mss = min(mss, offer);
+ mss = max(mss, 32); /* sanity */
+ if (mss < tp->t_maxseg || offer != 0) {
+ /*
+ * If there's a pipesize, change the socket buffer
+ * to that size. Make the socket buffers an integral
+ * number of mss units; if the mss is larger than
+ * the socket buffer, decrease the mss.
+ */
+#ifdef RTV_SPIPE
+ if ((bufsize = rt->rt_rmx.rmx_sendpipe) == 0)
+#endif
+ bufsize = so->so_snd.sb_hiwat;
+ if (bufsize < mss)
+ mss = bufsize;
+ else {
+ bufsize = roundup(bufsize, mss);
+ if (bufsize > sb_max)
+ bufsize = sb_max;
+ (void)sbreserve(&so->so_snd, bufsize);
+ }
+ tp->t_maxseg = mss;
+
+#ifdef RTV_RPIPE
+ if ((bufsize = rt->rt_rmx.rmx_recvpipe) == 0)
+#endif
+ bufsize = so->so_rcv.sb_hiwat;
+ if (bufsize > mss) {
+ bufsize = roundup(bufsize, mss);
+ if (bufsize > sb_max)
+ bufsize = sb_max;
+ (void)sbreserve(&so->so_rcv, bufsize);
+ }
+ }
+ tp->snd_cwnd = mss;
+
+#ifdef RTV_SSTHRESH
+ if (rt->rt_rmx.rmx_ssthresh) {
+ /*
+ * There's some sort of gateway or interface
+ * buffer limit on the path. Use this to set
+ * the slow start threshhold, but set the
+ * threshold to no less than 2*mss.
+ */
+ tp->snd_ssthresh = max(2 * mss, rt->rt_rmx.rmx_ssthresh);
+ }
+#endif /* RTV_MTU */
+ return (mss);
+}
+#endif /* TUBA_INCLUDE */
diff --git a/sys/netinet/tcp_timewait.c b/sys/netinet/tcp_timewait.c
new file mode 100644
index 0000000..8edb853
--- /dev/null
+++ b/sys/netinet/tcp_timewait.c
@@ -0,0 +1,445 @@
+/*
+ * Copyright (c) 1982, 1986, 1988, 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)tcp_subr.c 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/protosw.h>
+#include <sys/errno.h>
+
+#include <net/route.h>
+#include <net/if.h>
+
+#include <netinet/in.h>
+#include <netinet/in_systm.h>
+#include <netinet/ip.h>
+#include <netinet/in_pcb.h>
+#include <netinet/ip_var.h>
+#include <netinet/ip_icmp.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <netinet/tcpip.h>
+
+/* patchable/settable parameters for tcp */
+int tcp_mssdflt = TCP_MSS;
+int tcp_rttdflt = TCPTV_SRTTDFLT / PR_SLOWHZ;
+int tcp_do_rfc1323 = 1;
+
+extern struct inpcb *tcp_last_inpcb;
+
+/*
+ * Tcp initialization
+ */
+void
+tcp_init()
+{
+
+ tcp_iss = 1; /* wrong */
+ tcb.inp_next = tcb.inp_prev = &tcb;
+ if (max_protohdr < sizeof(struct tcpiphdr))
+ max_protohdr = sizeof(struct tcpiphdr);
+ if (max_linkhdr + sizeof(struct tcpiphdr) > MHLEN)
+ panic("tcp_init");
+}
+
+/*
+ * Create template to be used to send tcp packets on a connection.
+ * Call after host entry created, allocates an mbuf and fills
+ * in a skeletal tcp/ip header, minimizing the amount of work
+ * necessary when the connection is used.
+ */
+struct tcpiphdr *
+tcp_template(tp)
+ struct tcpcb *tp;
+{
+ register struct inpcb *inp = tp->t_inpcb;
+ register struct mbuf *m;
+ register struct tcpiphdr *n;
+
+ if ((n = tp->t_template) == 0) {
+ m = m_get(M_DONTWAIT, MT_HEADER);
+ if (m == NULL)
+ return (0);
+ m->m_len = sizeof (struct tcpiphdr);
+ n = mtod(m, struct tcpiphdr *);
+ }
+ n->ti_next = n->ti_prev = 0;
+ n->ti_x1 = 0;
+ n->ti_pr = IPPROTO_TCP;
+ n->ti_len = htons(sizeof (struct tcpiphdr) - sizeof (struct ip));
+ n->ti_src = inp->inp_laddr;
+ n->ti_dst = inp->inp_faddr;
+ n->ti_sport = inp->inp_lport;
+ n->ti_dport = inp->inp_fport;
+ n->ti_seq = 0;
+ n->ti_ack = 0;
+ n->ti_x2 = 0;
+ n->ti_off = 5;
+ n->ti_flags = 0;
+ n->ti_win = 0;
+ n->ti_sum = 0;
+ n->ti_urp = 0;
+ return (n);
+}
+
+/*
+ * Send a single message to the TCP at address specified by
+ * the given TCP/IP header. If m == 0, then we make a copy
+ * of the tcpiphdr at ti and send directly to the addressed host.
+ * This is used to force keep alive messages out using the TCP
+ * template for a connection tp->t_template. If flags are given
+ * then we send a message back to the TCP which originated the
+ * segment ti, and discard the mbuf containing it and any other
+ * attached mbufs.
+ *
+ * In any case the ack and sequence number of the transmitted
+ * segment are as specified by the parameters.
+ */
+void
+tcp_respond(tp, ti, m, ack, seq, flags)
+ struct tcpcb *tp;
+ register struct tcpiphdr *ti;
+ register struct mbuf *m;
+ tcp_seq ack, seq;
+ int flags;
+{
+ register int tlen;
+ int win = 0;
+ struct route *ro = 0;
+
+ if (tp) {
+ win = sbspace(&tp->t_inpcb->inp_socket->so_rcv);
+ ro = &tp->t_inpcb->inp_route;
+ }
+ if (m == 0) {
+ m = m_gethdr(M_DONTWAIT, MT_HEADER);
+ if (m == NULL)
+ return;
+#ifdef TCP_COMPAT_42
+ tlen = 1;
+#else
+ tlen = 0;
+#endif
+ m->m_data += max_linkhdr;
+ *mtod(m, struct tcpiphdr *) = *ti;
+ ti = mtod(m, struct tcpiphdr *);
+ flags = TH_ACK;
+ } else {
+ m_freem(m->m_next);
+ m->m_next = 0;
+ m->m_data = (caddr_t)ti;
+ m->m_len = sizeof (struct tcpiphdr);
+ tlen = 0;
+#define xchg(a,b,type) { type t; t=a; a=b; b=t; }
+ xchg(ti->ti_dst.s_addr, ti->ti_src.s_addr, u_long);
+ xchg(ti->ti_dport, ti->ti_sport, u_short);
+#undef xchg
+ }
+ ti->ti_len = htons((u_short)(sizeof (struct tcphdr) + tlen));
+ tlen += sizeof (struct tcpiphdr);
+ m->m_len = tlen;
+ m->m_pkthdr.len = tlen;
+ m->m_pkthdr.rcvif = (struct ifnet *) 0;
+ ti->ti_next = ti->ti_prev = 0;
+ ti->ti_x1 = 0;
+ ti->ti_seq = htonl(seq);
+ ti->ti_ack = htonl(ack);
+ ti->ti_x2 = 0;
+ ti->ti_off = sizeof (struct tcphdr) >> 2;
+ ti->ti_flags = flags;
+ if (tp)
+ ti->ti_win = htons((u_short) (win >> tp->rcv_scale));
+ else
+ ti->ti_win = htons((u_short)win);
+ ti->ti_urp = 0;
+ ti->ti_sum = 0;
+ ti->ti_sum = in_cksum(m, tlen);
+ ((struct ip *)ti)->ip_len = tlen;
+ ((struct ip *)ti)->ip_ttl = ip_defttl;
+ (void) ip_output(m, NULL, ro, 0, NULL);
+}
+
+/*
+ * Create a new TCP control block, making an
+ * empty reassembly queue and hooking it to the argument
+ * protocol control block.
+ */
+struct tcpcb *
+tcp_newtcpcb(inp)
+ struct inpcb *inp;
+{
+ register struct tcpcb *tp;
+
+ tp = malloc(sizeof(*tp), M_PCB, M_NOWAIT);
+ if (tp == NULL)
+ return ((struct tcpcb *)0);
+ bzero((char *) tp, sizeof(struct tcpcb));
+ tp->seg_next = tp->seg_prev = (struct tcpiphdr *)tp;
+ tp->t_maxseg = tcp_mssdflt;
+
+ tp->t_flags = tcp_do_rfc1323 ? (TF_REQ_SCALE|TF_REQ_TSTMP) : 0;
+ tp->t_inpcb = inp;
+ /*
+ * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no
+ * rtt estimate. Set rttvar so that srtt + 2 * rttvar gives
+ * reasonable initial retransmit time.
+ */
+ tp->t_srtt = TCPTV_SRTTBASE;
+ tp->t_rttvar = tcp_rttdflt * PR_SLOWHZ << 2;
+ tp->t_rttmin = TCPTV_MIN;
+ TCPT_RANGESET(tp->t_rxtcur,
+ ((TCPTV_SRTTBASE >> 2) + (TCPTV_SRTTDFLT << 2)) >> 1,
+ TCPTV_MIN, TCPTV_REXMTMAX);
+ tp->snd_cwnd = TCP_MAXWIN << TCP_MAX_WINSHIFT;
+ tp->snd_ssthresh = TCP_MAXWIN << TCP_MAX_WINSHIFT;
+ inp->inp_ip.ip_ttl = ip_defttl;
+ inp->inp_ppcb = (caddr_t)tp;
+ return (tp);
+}
+
+/*
+ * Drop a TCP connection, reporting
+ * the specified error. If connection is synchronized,
+ * then send a RST to peer.
+ */
+struct tcpcb *
+tcp_drop(tp, errno)
+ register struct tcpcb *tp;
+ int errno;
+{
+ struct socket *so = tp->t_inpcb->inp_socket;
+
+ if (TCPS_HAVERCVDSYN(tp->t_state)) {
+ tp->t_state = TCPS_CLOSED;
+ (void) tcp_output(tp);
+ tcpstat.tcps_drops++;
+ } else
+ tcpstat.tcps_conndrops++;
+ if (errno == ETIMEDOUT && tp->t_softerror)
+ errno = tp->t_softerror;
+ so->so_error = errno;
+ return (tcp_close(tp));
+}
+
+/*
+ * Close a TCP control block:
+ * discard all space held by the tcp
+ * discard internet protocol block
+ * wake up any sleepers
+ */
+struct tcpcb *
+tcp_close(tp)
+ register struct tcpcb *tp;
+{
+ register struct tcpiphdr *t;
+ struct inpcb *inp = tp->t_inpcb;
+ struct socket *so = inp->inp_socket;
+ register struct mbuf *m;
+#ifdef RTV_RTT
+ register struct rtentry *rt;
+
+ /*
+ * If we sent enough data to get some meaningful characteristics,
+ * save them in the routing entry. 'Enough' is arbitrarily
+ * defined as the sendpipesize (default 4K) * 16. This would
+ * give us 16 rtt samples assuming we only get one sample per
+ * window (the usual case on a long haul net). 16 samples is
+ * enough for the srtt filter to converge to within 5% of the correct
+ * value; fewer samples and we could save a very bogus rtt.
+ *
+ * Don't update the default route's characteristics and don't
+ * update anything that the user "locked".
+ */
+ if (SEQ_LT(tp->iss + so->so_snd.sb_hiwat * 16, tp->snd_max) &&
+ (rt = inp->inp_route.ro_rt) &&
+ ((struct sockaddr_in *)rt_key(rt))->sin_addr.s_addr != INADDR_ANY) {
+ register u_long i;
+
+ if ((rt->rt_rmx.rmx_locks & RTV_RTT) == 0) {
+ i = tp->t_srtt *
+ (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTT_SCALE));
+ if (rt->rt_rmx.rmx_rtt && i)
+ /*
+ * filter this update to half the old & half
+ * the new values, converting scale.
+ * See route.h and tcp_var.h for a
+ * description of the scaling constants.
+ */
+ rt->rt_rmx.rmx_rtt =
+ (rt->rt_rmx.rmx_rtt + i) / 2;
+ else
+ rt->rt_rmx.rmx_rtt = i;
+ }
+ if ((rt->rt_rmx.rmx_locks & RTV_RTTVAR) == 0) {
+ i = tp->t_rttvar *
+ (RTM_RTTUNIT / (PR_SLOWHZ * TCP_RTTVAR_SCALE));
+ if (rt->rt_rmx.rmx_rttvar && i)
+ rt->rt_rmx.rmx_rttvar =
+ (rt->rt_rmx.rmx_rttvar + i) / 2;
+ else
+ rt->rt_rmx.rmx_rttvar = i;
+ }
+ /*
+ * update the pipelimit (ssthresh) if it has been updated
+ * already or if a pipesize was specified & the threshhold
+ * got below half the pipesize. I.e., wait for bad news
+ * before we start updating, then update on both good
+ * and bad news.
+ */
+ if ((rt->rt_rmx.rmx_locks & RTV_SSTHRESH) == 0 &&
+ (i = tp->snd_ssthresh) && rt->rt_rmx.rmx_ssthresh ||
+ i < (rt->rt_rmx.rmx_sendpipe / 2)) {
+ /*
+ * convert the limit from user data bytes to
+ * packets then to packet data bytes.
+ */
+ i = (i + tp->t_maxseg / 2) / tp->t_maxseg;
+ if (i < 2)
+ i = 2;
+ i *= (u_long)(tp->t_maxseg + sizeof (struct tcpiphdr));
+ if (rt->rt_rmx.rmx_ssthresh)
+ rt->rt_rmx.rmx_ssthresh =
+ (rt->rt_rmx.rmx_ssthresh + i) / 2;
+ else
+ rt->rt_rmx.rmx_ssthresh = i;
+ }
+ }
+#endif /* RTV_RTT */
+ /* free the reassembly queue, if any */
+ t = tp->seg_next;
+ while (t != (struct tcpiphdr *)tp) {
+ t = (struct tcpiphdr *)t->ti_next;
+ m = REASS_MBUF((struct tcpiphdr *)t->ti_prev);
+ remque(t->ti_prev);
+ m_freem(m);
+ }
+ if (tp->t_template)
+ (void) m_free(dtom(tp->t_template));
+ free(tp, M_PCB);
+ inp->inp_ppcb = 0;
+ soisdisconnected(so);
+ /* clobber input pcb cache if we're closing the cached connection */
+ if (inp == tcp_last_inpcb)
+ tcp_last_inpcb = &tcb;
+ in_pcbdetach(inp);
+ tcpstat.tcps_closed++;
+ return ((struct tcpcb *)0);
+}
+
+void
+tcp_drain()
+{
+
+}
+
+/*
+ * Notify a tcp user of an asynchronous error;
+ * store error as soft error, but wake up user
+ * (for now, won't do anything until can select for soft error).
+ */
+void
+tcp_notify(inp, error)
+ struct inpcb *inp;
+ int error;
+{
+ register struct tcpcb *tp = (struct tcpcb *)inp->inp_ppcb;
+ register struct socket *so = inp->inp_socket;
+
+ /*
+ * Ignore some errors if we are hooked up.
+ * If connection hasn't completed, has retransmitted several times,
+ * and receives a second error, give up now. This is better
+ * than waiting a long time to establish a connection that
+ * can never complete.
+ */
+ if (tp->t_state == TCPS_ESTABLISHED &&
+ (error == EHOSTUNREACH || error == ENETUNREACH ||
+ error == EHOSTDOWN)) {
+ return;
+ } else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 &&
+ tp->t_softerror)
+ so->so_error = error;
+ else
+ tp->t_softerror = error;
+ wakeup((caddr_t) &so->so_timeo);
+ sorwakeup(so);
+ sowwakeup(so);
+}
+
+void
+tcp_ctlinput(cmd, sa, ip)
+ int cmd;
+ struct sockaddr *sa;
+ register struct ip *ip;
+{
+ register struct tcphdr *th;
+ extern struct in_addr zeroin_addr;
+ extern u_char inetctlerrmap[];
+ void (*notify) __P((struct inpcb *, int)) = tcp_notify;
+
+ if (cmd == PRC_QUENCH)
+ notify = tcp_quench;
+ else if (!PRC_IS_REDIRECT(cmd) &&
+ ((unsigned)cmd > PRC_NCMDS || inetctlerrmap[cmd] == 0))
+ return;
+ if (ip) {
+ th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
+ in_pcbnotify(&tcb, sa, th->th_dport, ip->ip_src, th->th_sport,
+ cmd, notify);
+ } else
+ in_pcbnotify(&tcb, sa, 0, zeroin_addr, 0, cmd, notify);
+}
+
+/*
+ * When a source quench is received, close congestion window
+ * to one segment. We will gradually open it again as we proceed.
+ */
+void
+tcp_quench(inp, errno)
+ struct inpcb *inp;
+ int errno;
+{
+ struct tcpcb *tp = intotcpcb(inp);
+
+ if (tp)
+ tp->snd_cwnd = tp->t_maxseg;
+}
diff --git a/sys/nfs/nfs_common.c b/sys/nfs/nfs_common.c
new file mode 100644
index 0000000..5778f7d
--- /dev/null
+++ b/sys/nfs/nfs_common.c
@@ -0,0 +1,1130 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_subs.c 8.3 (Berkeley) 1/4/94
+ */
+
+/*
+ * These functions support the macros and help fiddle mbuf chains for
+ * the nfs op functions. They do things like create the rpc header and
+ * copy data between mbuf chains and uio lists.
+ */
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/namei.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfs.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nqnfs.h>
+#include <nfs/nfsrtt.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <netinet/in.h>
+#ifdef ISO
+#include <netiso/iso.h>
+#endif
+
+#define TRUE 1
+#define FALSE 0
+
+/*
+ * Data items converted to xdr at startup, since they are constant
+ * This is kinda hokey, but may save a little time doing byte swaps
+ */
+u_long nfs_procids[NFS_NPROCS];
+u_long nfs_xdrneg1;
+u_long rpc_call, rpc_vers, rpc_reply, rpc_msgdenied, rpc_autherr,
+ rpc_mismatch, rpc_auth_unix, rpc_msgaccepted, rpc_rejectedcred,
+ rpc_auth_kerb;
+u_long nfs_vers, nfs_prog, nfs_true, nfs_false;
+
+/* And other global data */
+static u_long nfs_xid = 0;
+enum vtype ntov_type[7] = { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON };
+extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
+extern struct nfsreq nfsreqh;
+extern int nqnfs_piggy[NFS_NPROCS];
+extern struct nfsrtt nfsrtt;
+extern time_t nqnfsstarttime;
+extern u_long nqnfs_prog, nqnfs_vers;
+extern int nqsrv_clockskew;
+extern int nqsrv_writeslack;
+extern int nqsrv_maxlease;
+
+/*
+ * Create the header for an rpc request packet
+ * The hsiz is the size of the rest of the nfs request header.
+ * (just used to decide if a cluster is a good idea)
+ */
+struct mbuf *
+nfsm_reqh(vp, procid, hsiz, bposp)
+ struct vnode *vp;
+ u_long procid;
+ int hsiz;
+ caddr_t *bposp;
+{
+ register struct mbuf *mb;
+ register u_long *tl;
+ register caddr_t bpos;
+ struct mbuf *mb2;
+ struct nfsmount *nmp;
+ int nqflag;
+
+ MGET(mb, M_WAIT, MT_DATA);
+ if (hsiz >= MINCLSIZE)
+ MCLGET(mb, M_WAIT);
+ mb->m_len = 0;
+ bpos = mtod(mb, caddr_t);
+
+ /*
+ * For NQNFS, add lease request.
+ */
+ if (vp) {
+ nmp = VFSTONFS(vp->v_mount);
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ nqflag = NQNFS_NEEDLEASE(vp, procid);
+ if (nqflag) {
+ nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(nqflag);
+ *tl = txdr_unsigned(nmp->nm_leaseterm);
+ } else {
+ nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+ *tl = 0;
+ }
+ }
+ }
+ /* Finally, return values */
+ *bposp = bpos;
+ return (mb);
+}
+
+/*
+ * Build the RPC header and fill in the authorization info.
+ * The authorization string argument is only used when the credentials
+ * come from outside of the kernel.
+ * Returns the head of the mbuf list.
+ */
+struct mbuf *
+nfsm_rpchead(cr, nqnfs, procid, auth_type, auth_len, auth_str, mrest,
+ mrest_len, mbp, xidp)
+ register struct ucred *cr;
+ int nqnfs;
+ int procid;
+ int auth_type;
+ int auth_len;
+ char *auth_str;
+ struct mbuf *mrest;
+ int mrest_len;
+ struct mbuf **mbp;
+ u_long *xidp;
+{
+ register struct mbuf *mb;
+ register u_long *tl;
+ register caddr_t bpos;
+ register int i;
+ struct mbuf *mreq, *mb2;
+ int siz, grpsiz, authsiz;
+
+ authsiz = nfsm_rndup(auth_len);
+ if (auth_type == RPCAUTH_NQNFS)
+ authsiz += 2 * NFSX_UNSIGNED;
+ MGETHDR(mb, M_WAIT, MT_DATA);
+ if ((authsiz + 10*NFSX_UNSIGNED) >= MINCLSIZE) {
+ MCLGET(mb, M_WAIT);
+ } else if ((authsiz + 10*NFSX_UNSIGNED) < MHLEN) {
+ MH_ALIGN(mb, authsiz + 10*NFSX_UNSIGNED);
+ } else {
+ MH_ALIGN(mb, 8*NFSX_UNSIGNED);
+ }
+ mb->m_len = 0;
+ mreq = mb;
+ bpos = mtod(mb, caddr_t);
+
+ /*
+ * First the RPC header.
+ */
+ nfsm_build(tl, u_long *, 8*NFSX_UNSIGNED);
+ if (++nfs_xid == 0)
+ nfs_xid++;
+ *tl++ = *xidp = txdr_unsigned(nfs_xid);
+ *tl++ = rpc_call;
+ *tl++ = rpc_vers;
+ if (nqnfs) {
+ *tl++ = txdr_unsigned(NQNFS_PROG);
+ *tl++ = txdr_unsigned(NQNFS_VER1);
+ } else {
+ *tl++ = txdr_unsigned(NFS_PROG);
+ *tl++ = txdr_unsigned(NFS_VER2);
+ }
+ *tl++ = txdr_unsigned(procid);
+
+ /*
+ * And then the authorization cred.
+ */
+ *tl++ = txdr_unsigned(auth_type);
+ *tl = txdr_unsigned(authsiz);
+ switch (auth_type) {
+ case RPCAUTH_UNIX:
+ nfsm_build(tl, u_long *, auth_len);
+ *tl++ = 0; /* stamp ?? */
+ *tl++ = 0; /* NULL hostname */
+ *tl++ = txdr_unsigned(cr->cr_uid);
+ *tl++ = txdr_unsigned(cr->cr_groups[0]);
+ grpsiz = (auth_len >> 2) - 5;
+ *tl++ = txdr_unsigned(grpsiz);
+ for (i = 1; i <= grpsiz; i++)
+ *tl++ = txdr_unsigned(cr->cr_groups[i]);
+ break;
+ case RPCAUTH_NQNFS:
+ nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(cr->cr_uid);
+ *tl = txdr_unsigned(auth_len);
+ siz = auth_len;
+ while (siz > 0) {
+ if (M_TRAILINGSPACE(mb) == 0) {
+ MGET(mb2, M_WAIT, MT_DATA);
+ if (siz >= MINCLSIZE)
+ MCLGET(mb2, M_WAIT);
+ mb->m_next = mb2;
+ mb = mb2;
+ mb->m_len = 0;
+ bpos = mtod(mb, caddr_t);
+ }
+ i = min(siz, M_TRAILINGSPACE(mb));
+ bcopy(auth_str, bpos, i);
+ mb->m_len += i;
+ auth_str += i;
+ bpos += i;
+ siz -= i;
+ }
+ if ((siz = (nfsm_rndup(auth_len) - auth_len)) > 0) {
+ for (i = 0; i < siz; i++)
+ *bpos++ = '\0';
+ mb->m_len += siz;
+ }
+ break;
+ };
+ nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(RPCAUTH_NULL);
+ *tl = 0;
+ mb->m_next = mrest;
+ mreq->m_pkthdr.len = authsiz + 10*NFSX_UNSIGNED + mrest_len;
+ mreq->m_pkthdr.rcvif = (struct ifnet *)0;
+ *mbp = mb;
+ return (mreq);
+}
+
+/*
+ * copies mbuf chain to the uio scatter/gather list
+ */
+nfsm_mbuftouio(mrep, uiop, siz, dpos)
+ struct mbuf **mrep;
+ register struct uio *uiop;
+ int siz;
+ caddr_t *dpos;
+{
+ register char *mbufcp, *uiocp;
+ register int xfer, left, len;
+ register struct mbuf *mp;
+ long uiosiz, rem;
+ int error = 0;
+
+ mp = *mrep;
+ mbufcp = *dpos;
+ len = mtod(mp, caddr_t)+mp->m_len-mbufcp;
+ rem = nfsm_rndup(siz)-siz;
+ while (siz > 0) {
+ if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL)
+ return (EFBIG);
+ left = uiop->uio_iov->iov_len;
+ uiocp = uiop->uio_iov->iov_base;
+ if (left > siz)
+ left = siz;
+ uiosiz = left;
+ while (left > 0) {
+ while (len == 0) {
+ mp = mp->m_next;
+ if (mp == NULL)
+ return (EBADRPC);
+ mbufcp = mtod(mp, caddr_t);
+ len = mp->m_len;
+ }
+ xfer = (left > len) ? len : left;
+#ifdef notdef
+ /* Not Yet.. */
+ if (uiop->uio_iov->iov_op != NULL)
+ (*(uiop->uio_iov->iov_op))
+ (mbufcp, uiocp, xfer);
+ else
+#endif
+ if (uiop->uio_segflg == UIO_SYSSPACE)
+ bcopy(mbufcp, uiocp, xfer);
+ else
+ copyout(mbufcp, uiocp, xfer);
+ left -= xfer;
+ len -= xfer;
+ mbufcp += xfer;
+ uiocp += xfer;
+ uiop->uio_offset += xfer;
+ uiop->uio_resid -= xfer;
+ }
+ if (uiop->uio_iov->iov_len <= siz) {
+ uiop->uio_iovcnt--;
+ uiop->uio_iov++;
+ } else {
+ uiop->uio_iov->iov_base += uiosiz;
+ uiop->uio_iov->iov_len -= uiosiz;
+ }
+ siz -= uiosiz;
+ }
+ *dpos = mbufcp;
+ *mrep = mp;
+ if (rem > 0) {
+ if (len < rem)
+ error = nfs_adv(mrep, dpos, rem, len);
+ else
+ *dpos += rem;
+ }
+ return (error);
+}
+
+/*
+ * copies a uio scatter/gather list to an mbuf chain...
+ */
+nfsm_uiotombuf(uiop, mq, siz, bpos)
+ register struct uio *uiop;
+ struct mbuf **mq;
+ int siz;
+ caddr_t *bpos;
+{
+ register char *uiocp;
+ register struct mbuf *mp, *mp2;
+ register int xfer, left, mlen;
+ int uiosiz, clflg, rem;
+ char *cp;
+
+ if (siz > MLEN) /* or should it >= MCLBYTES ?? */
+ clflg = 1;
+ else
+ clflg = 0;
+ rem = nfsm_rndup(siz)-siz;
+ mp = mp2 = *mq;
+ while (siz > 0) {
+ if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL)
+ return (EINVAL);
+ left = uiop->uio_iov->iov_len;
+ uiocp = uiop->uio_iov->iov_base;
+ if (left > siz)
+ left = siz;
+ uiosiz = left;
+ while (left > 0) {
+ mlen = M_TRAILINGSPACE(mp);
+ if (mlen == 0) {
+ MGET(mp, M_WAIT, MT_DATA);
+ if (clflg)
+ MCLGET(mp, M_WAIT);
+ mp->m_len = 0;
+ mp2->m_next = mp;
+ mp2 = mp;
+ mlen = M_TRAILINGSPACE(mp);
+ }
+ xfer = (left > mlen) ? mlen : left;
+#ifdef notdef
+ /* Not Yet.. */
+ if (uiop->uio_iov->iov_op != NULL)
+ (*(uiop->uio_iov->iov_op))
+ (uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+ else
+#endif
+ if (uiop->uio_segflg == UIO_SYSSPACE)
+ bcopy(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+ else
+ copyin(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+ mp->m_len += xfer;
+ left -= xfer;
+ uiocp += xfer;
+ uiop->uio_offset += xfer;
+ uiop->uio_resid -= xfer;
+ }
+ if (uiop->uio_iov->iov_len <= siz) {
+ uiop->uio_iovcnt--;
+ uiop->uio_iov++;
+ } else {
+ uiop->uio_iov->iov_base += uiosiz;
+ uiop->uio_iov->iov_len -= uiosiz;
+ }
+ siz -= uiosiz;
+ }
+ if (rem > 0) {
+ if (rem > M_TRAILINGSPACE(mp)) {
+ MGET(mp, M_WAIT, MT_DATA);
+ mp->m_len = 0;
+ mp2->m_next = mp;
+ }
+ cp = mtod(mp, caddr_t)+mp->m_len;
+ for (left = 0; left < rem; left++)
+ *cp++ = '\0';
+ mp->m_len += rem;
+ *bpos = cp;
+ } else
+ *bpos = mtod(mp, caddr_t)+mp->m_len;
+ *mq = mp;
+ return (0);
+}
+
+/*
+ * Help break down an mbuf chain by setting the first siz bytes contiguous
+ * pointed to by returned val.
+ * This is used by the macros nfsm_dissect and nfsm_dissecton for tough
+ * cases. (The macros use the vars. dpos and dpos2)
+ */
+nfsm_disct(mdp, dposp, siz, left, cp2)
+ struct mbuf **mdp;
+ caddr_t *dposp;
+ int siz;
+ int left;
+ caddr_t *cp2;
+{
+ register struct mbuf *mp, *mp2;
+ register int siz2, xfer;
+ register caddr_t p;
+
+ mp = *mdp;
+ while (left == 0) {
+ *mdp = mp = mp->m_next;
+ if (mp == NULL)
+ return (EBADRPC);
+ left = mp->m_len;
+ *dposp = mtod(mp, caddr_t);
+ }
+ if (left >= siz) {
+ *cp2 = *dposp;
+ *dposp += siz;
+ } else if (mp->m_next == NULL) {
+ return (EBADRPC);
+ } else if (siz > MHLEN) {
+ panic("nfs S too big");
+ } else {
+ MGET(mp2, M_WAIT, MT_DATA);
+ mp2->m_next = mp->m_next;
+ mp->m_next = mp2;
+ mp->m_len -= left;
+ mp = mp2;
+ *cp2 = p = mtod(mp, caddr_t);
+ bcopy(*dposp, p, left); /* Copy what was left */
+ siz2 = siz-left;
+ p += left;
+ mp2 = mp->m_next;
+ /* Loop around copying up the siz2 bytes */
+ while (siz2 > 0) {
+ if (mp2 == NULL)
+ return (EBADRPC);
+ xfer = (siz2 > mp2->m_len) ? mp2->m_len : siz2;
+ if (xfer > 0) {
+ bcopy(mtod(mp2, caddr_t), p, xfer);
+ NFSMADV(mp2, xfer);
+ mp2->m_len -= xfer;
+ p += xfer;
+ siz2 -= xfer;
+ }
+ if (siz2 > 0)
+ mp2 = mp2->m_next;
+ }
+ mp->m_len = siz;
+ *mdp = mp2;
+ *dposp = mtod(mp2, caddr_t);
+ }
+ return (0);
+}
+
+/*
+ * Advance the position in the mbuf chain.
+ */
+nfs_adv(mdp, dposp, offs, left)
+ struct mbuf **mdp;
+ caddr_t *dposp;
+ int offs;
+ int left;
+{
+ register struct mbuf *m;
+ register int s;
+
+ m = *mdp;
+ s = left;
+ while (s < offs) {
+ offs -= s;
+ m = m->m_next;
+ if (m == NULL)
+ return (EBADRPC);
+ s = m->m_len;
+ }
+ *mdp = m;
+ *dposp = mtod(m, caddr_t)+offs;
+ return (0);
+}
+
+/*
+ * Copy a string into mbufs for the hard cases...
+ */
+nfsm_strtmbuf(mb, bpos, cp, siz)
+ struct mbuf **mb;
+ char **bpos;
+ char *cp;
+ long siz;
+{
+ register struct mbuf *m1, *m2;
+ long left, xfer, len, tlen;
+ u_long *tl;
+ int putsize;
+
+ putsize = 1;
+ m2 = *mb;
+ left = M_TRAILINGSPACE(m2);
+ if (left > 0) {
+ tl = ((u_long *)(*bpos));
+ *tl++ = txdr_unsigned(siz);
+ putsize = 0;
+ left -= NFSX_UNSIGNED;
+ m2->m_len += NFSX_UNSIGNED;
+ if (left > 0) {
+ bcopy(cp, (caddr_t) tl, left);
+ siz -= left;
+ cp += left;
+ m2->m_len += left;
+ left = 0;
+ }
+ }
+ /* Loop around adding mbufs */
+ while (siz > 0) {
+ MGET(m1, M_WAIT, MT_DATA);
+ if (siz > MLEN)
+ MCLGET(m1, M_WAIT);
+ m1->m_len = NFSMSIZ(m1);
+ m2->m_next = m1;
+ m2 = m1;
+ tl = mtod(m1, u_long *);
+ tlen = 0;
+ if (putsize) {
+ *tl++ = txdr_unsigned(siz);
+ m1->m_len -= NFSX_UNSIGNED;
+ tlen = NFSX_UNSIGNED;
+ putsize = 0;
+ }
+ if (siz < m1->m_len) {
+ len = nfsm_rndup(siz);
+ xfer = siz;
+ if (xfer < len)
+ *(tl+(xfer>>2)) = 0;
+ } else {
+ xfer = len = m1->m_len;
+ }
+ bcopy(cp, (caddr_t) tl, xfer);
+ m1->m_len = len+tlen;
+ siz -= xfer;
+ cp += xfer;
+ }
+ *mb = m1;
+ *bpos = mtod(m1, caddr_t)+m1->m_len;
+ return (0);
+}
+
+/*
+ * Called once to initialize data structures...
+ */
+nfs_init()
+{
+ register int i;
+
+ nfsrtt.pos = 0;
+ rpc_vers = txdr_unsigned(RPC_VER2);
+ rpc_call = txdr_unsigned(RPC_CALL);
+ rpc_reply = txdr_unsigned(RPC_REPLY);
+ rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED);
+ rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED);
+ rpc_mismatch = txdr_unsigned(RPC_MISMATCH);
+ rpc_autherr = txdr_unsigned(RPC_AUTHERR);
+ rpc_rejectedcred = txdr_unsigned(AUTH_REJECTCRED);
+ rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX);
+ rpc_auth_kerb = txdr_unsigned(RPCAUTH_NQNFS);
+ nfs_vers = txdr_unsigned(NFS_VER2);
+ nfs_prog = txdr_unsigned(NFS_PROG);
+ nfs_true = txdr_unsigned(TRUE);
+ nfs_false = txdr_unsigned(FALSE);
+ /* Loop thru nfs procids */
+ for (i = 0; i < NFS_NPROCS; i++)
+ nfs_procids[i] = txdr_unsigned(i);
+ /* Ensure async daemons disabled */
+ for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
+ nfs_iodwant[i] = (struct proc *)0;
+ TAILQ_INIT(&nfs_bufq);
+ nfs_xdrneg1 = txdr_unsigned(-1);
+ nfs_nhinit(); /* Init the nfsnode table */
+ nfsrv_init(0); /* Init server data structures */
+ nfsrv_initcache(); /* Init the server request cache */
+
+ /*
+ * Initialize the nqnfs server stuff.
+ */
+ if (nqnfsstarttime == 0) {
+ nqnfsstarttime = boottime.tv_sec + nqsrv_maxlease
+ + nqsrv_clockskew + nqsrv_writeslack;
+ NQLOADNOVRAM(nqnfsstarttime);
+ nqnfs_prog = txdr_unsigned(NQNFS_PROG);
+ nqnfs_vers = txdr_unsigned(NQNFS_VER1);
+ nqthead.th_head[0] = &nqthead;
+ nqthead.th_head[1] = &nqthead;
+ nqfhead = hashinit(NQLCHSZ, M_NQLEASE, &nqfheadhash);
+ }
+
+ /*
+ * Initialize reply list and start timer
+ */
+ nfsreqh.r_prev = nfsreqh.r_next = &nfsreqh;
+ nfs_timer();
+}
+
+/*
+ * Attribute cache routines.
+ * nfs_loadattrcache() - loads or updates the cache contents from attributes
+ * that are on the mbuf list
+ * nfs_getattrcache() - returns valid attributes if found in cache, returns
+ * error otherwise
+ */
+
+/*
+ * Load the attribute cache (that lives in the nfsnode entry) with
+ * the values on the mbuf list and
+ * Iff vap not NULL
+ * copy the attributes to *vaper
+ */
+nfs_loadattrcache(vpp, mdp, dposp, vaper)
+ struct vnode **vpp;
+ struct mbuf **mdp;
+ caddr_t *dposp;
+ struct vattr *vaper;
+{
+ register struct vnode *vp = *vpp;
+ register struct vattr *vap;
+ register struct nfsv2_fattr *fp;
+ extern int (**spec_nfsv2nodeop_p)();
+ register struct nfsnode *np, *nq, **nhpp;
+ register long t1;
+ caddr_t dpos, cp2;
+ int error = 0, isnq;
+ struct mbuf *md;
+ enum vtype vtyp;
+ u_short vmode;
+ long rdev;
+ struct timespec mtime;
+ struct vnode *nvp;
+
+ md = *mdp;
+ dpos = *dposp;
+ t1 = (mtod(md, caddr_t) + md->m_len) - dpos;
+ isnq = (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS);
+ if (error = nfsm_disct(&md, &dpos, NFSX_FATTR(isnq), t1, &cp2))
+ return (error);
+ fp = (struct nfsv2_fattr *)cp2;
+ vtyp = nfstov_type(fp->fa_type);
+ vmode = fxdr_unsigned(u_short, fp->fa_mode);
+ if (vtyp == VNON || vtyp == VREG)
+ vtyp = IFTOVT(vmode);
+ if (isnq) {
+ rdev = fxdr_unsigned(long, fp->fa_nqrdev);
+ fxdr_nqtime(&fp->fa_nqmtime, &mtime);
+ } else {
+ rdev = fxdr_unsigned(long, fp->fa_nfsrdev);
+ fxdr_nfstime(&fp->fa_nfsmtime, &mtime);
+ }
+ /*
+ * If v_type == VNON it is a new node, so fill in the v_type,
+ * n_mtime fields. Check to see if it represents a special
+ * device, and if so, check for a possible alias. Once the
+ * correct vnode has been obtained, fill in the rest of the
+ * information.
+ */
+ np = VTONFS(vp);
+ if (vp->v_type == VNON) {
+ if (vtyp == VCHR && rdev == 0xffffffff)
+ vp->v_type = vtyp = VFIFO;
+ else
+ vp->v_type = vtyp;
+ if (vp->v_type == VFIFO) {
+#ifdef FIFO
+ extern int (**fifo_nfsv2nodeop_p)();
+ vp->v_op = fifo_nfsv2nodeop_p;
+#else
+ return (EOPNOTSUPP);
+#endif /* FIFO */
+ }
+ if (vp->v_type == VCHR || vp->v_type == VBLK) {
+ vp->v_op = spec_nfsv2nodeop_p;
+ if (nvp = checkalias(vp, (dev_t)rdev, vp->v_mount)) {
+ /*
+ * Discard unneeded vnode, but save its nfsnode.
+ */
+ if (nq = np->n_forw)
+ nq->n_back = np->n_back;
+ *np->n_back = nq;
+ nvp->v_data = vp->v_data;
+ vp->v_data = NULL;
+ vp->v_op = spec_vnodeop_p;
+ vrele(vp);
+ vgone(vp);
+ /*
+ * Reinitialize aliased node.
+ */
+ np->n_vnode = nvp;
+ nhpp = (struct nfsnode **)nfs_hash(&np->n_fh);
+ if (nq = *nhpp)
+ nq->n_back = &np->n_forw;
+ np->n_forw = nq;
+ np->n_back = nhpp;
+ *nhpp = np;
+ *vpp = vp = nvp;
+ }
+ }
+ np->n_mtime = mtime.ts_sec;
+ }
+ vap = &np->n_vattr;
+ vap->va_type = vtyp;
+ vap->va_mode = (vmode & 07777);
+ vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
+ vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid);
+ vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid);
+ vap->va_rdev = (dev_t)rdev;
+ vap->va_mtime = mtime;
+ vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+ if (isnq) {
+ fxdr_hyper(&fp->fa_nqsize, &vap->va_size);
+ vap->va_blocksize = fxdr_unsigned(long, fp->fa_nqblocksize);
+ fxdr_hyper(&fp->fa_nqbytes, &vap->va_bytes);
+ vap->va_fileid = fxdr_unsigned(long, fp->fa_nqfileid);
+ fxdr_nqtime(&fp->fa_nqatime, &vap->va_atime);
+ vap->va_flags = fxdr_unsigned(u_long, fp->fa_nqflags);
+ fxdr_nqtime(&fp->fa_nqctime, &vap->va_ctime);
+ vap->va_gen = fxdr_unsigned(u_long, fp->fa_nqgen);
+ fxdr_hyper(&fp->fa_nqfilerev, &vap->va_filerev);
+ } else {
+ vap->va_size = fxdr_unsigned(u_long, fp->fa_nfssize);
+ vap->va_blocksize = fxdr_unsigned(long, fp->fa_nfsblocksize);
+ vap->va_bytes = fxdr_unsigned(long, fp->fa_nfsblocks) * NFS_FABLKSIZE;
+ vap->va_fileid = fxdr_unsigned(long, fp->fa_nfsfileid);
+ fxdr_nfstime(&fp->fa_nfsatime, &vap->va_atime);
+ vap->va_flags = 0;
+ vap->va_ctime.ts_sec = fxdr_unsigned(long, fp->fa_nfsctime.nfs_sec);
+ vap->va_ctime.ts_nsec = 0;
+ vap->va_gen = fxdr_unsigned(u_long, fp->fa_nfsctime.nfs_usec);
+ vap->va_filerev = 0;
+ }
+ if (vap->va_size != np->n_size) {
+ if (vap->va_type == VREG) {
+ if (np->n_flag & NMODIFIED) {
+ if (vap->va_size < np->n_size)
+ vap->va_size = np->n_size;
+ else
+ np->n_size = vap->va_size;
+ } else
+ np->n_size = vap->va_size;
+ vnode_pager_setsize(vp, (u_long)np->n_size);
+ } else
+ np->n_size = vap->va_size;
+ }
+ np->n_attrstamp = time.tv_sec;
+ *dposp = dpos;
+ *mdp = md;
+ if (vaper != NULL) {
+ bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(*vap));
+#ifdef notdef
+ if ((np->n_flag & NMODIFIED) && np->n_size > vap->va_size)
+ if (np->n_size > vap->va_size)
+ vaper->va_size = np->n_size;
+#endif
+ if (np->n_flag & NCHG) {
+ if (np->n_flag & NACC) {
+ vaper->va_atime.ts_sec = np->n_atim.tv_sec;
+ vaper->va_atime.ts_nsec =
+ np->n_atim.tv_usec * 1000;
+ }
+ if (np->n_flag & NUPD) {
+ vaper->va_mtime.ts_sec = np->n_mtim.tv_sec;
+ vaper->va_mtime.ts_nsec =
+ np->n_mtim.tv_usec * 1000;
+ }
+ }
+ }
+ return (0);
+}
+
+/*
+ * Check the time stamp
+ * If the cache is valid, copy contents to *vap and return 0
+ * otherwise return an error
+ */
+nfs_getattrcache(vp, vaper)
+ register struct vnode *vp;
+ struct vattr *vaper;
+{
+ register struct nfsnode *np = VTONFS(vp);
+ register struct vattr *vap;
+
+ if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQLOOKLEASE) {
+ if (!NQNFS_CKCACHABLE(vp, NQL_READ) || np->n_attrstamp == 0) {
+ nfsstats.attrcache_misses++;
+ return (ENOENT);
+ }
+ } else if ((time.tv_sec - np->n_attrstamp) >= NFS_ATTRTIMEO(np)) {
+ nfsstats.attrcache_misses++;
+ return (ENOENT);
+ }
+ nfsstats.attrcache_hits++;
+ vap = &np->n_vattr;
+ if (vap->va_size != np->n_size) {
+ if (vap->va_type == VREG) {
+ if (np->n_flag & NMODIFIED) {
+ if (vap->va_size < np->n_size)
+ vap->va_size = np->n_size;
+ else
+ np->n_size = vap->va_size;
+ } else
+ np->n_size = vap->va_size;
+ vnode_pager_setsize(vp, (u_long)np->n_size);
+ } else
+ np->n_size = vap->va_size;
+ }
+ bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(struct vattr));
+#ifdef notdef
+ if ((np->n_flag & NMODIFIED) == 0) {
+ np->n_size = vaper->va_size;
+ vnode_pager_setsize(vp, (u_long)np->n_size);
+ } else if (np->n_size > vaper->va_size)
+ if (np->n_size > vaper->va_size)
+ vaper->va_size = np->n_size;
+#endif
+ if (np->n_flag & NCHG) {
+ if (np->n_flag & NACC) {
+ vaper->va_atime.ts_sec = np->n_atim.tv_sec;
+ vaper->va_atime.ts_nsec = np->n_atim.tv_usec * 1000;
+ }
+ if (np->n_flag & NUPD) {
+ vaper->va_mtime.ts_sec = np->n_mtim.tv_sec;
+ vaper->va_mtime.ts_nsec = np->n_mtim.tv_usec * 1000;
+ }
+ }
+ return (0);
+}
+
+/*
+ * Set up nameidata for a lookup() call and do it
+ */
+nfs_namei(ndp, fhp, len, slp, nam, mdp, dposp, p)
+ register struct nameidata *ndp;
+ fhandle_t *fhp;
+ int len;
+ struct nfssvc_sock *slp;
+ struct mbuf *nam;
+ struct mbuf **mdp;
+ caddr_t *dposp;
+ struct proc *p;
+{
+ register int i, rem;
+ register struct mbuf *md;
+ register char *fromcp, *tocp;
+ struct vnode *dp;
+ int error, rdonly;
+ struct componentname *cnp = &ndp->ni_cnd;
+
+ MALLOC(cnp->cn_pnbuf, char *, len + 1, M_NAMEI, M_WAITOK);
+ /*
+ * Copy the name from the mbuf list to ndp->ni_pnbuf
+ * and set the various ndp fields appropriately.
+ */
+ fromcp = *dposp;
+ tocp = cnp->cn_pnbuf;
+ md = *mdp;
+ rem = mtod(md, caddr_t) + md->m_len - fromcp;
+ cnp->cn_hash = 0;
+ for (i = 0; i < len; i++) {
+ while (rem == 0) {
+ md = md->m_next;
+ if (md == NULL) {
+ error = EBADRPC;
+ goto out;
+ }
+ fromcp = mtod(md, caddr_t);
+ rem = md->m_len;
+ }
+ if (*fromcp == '\0' || *fromcp == '/') {
+ error = EINVAL;
+ goto out;
+ }
+ cnp->cn_hash += (unsigned char)*fromcp;
+ *tocp++ = *fromcp++;
+ rem--;
+ }
+ *tocp = '\0';
+ *mdp = md;
+ *dposp = fromcp;
+ len = nfsm_rndup(len)-len;
+ if (len > 0) {
+ if (rem >= len)
+ *dposp += len;
+ else if (error = nfs_adv(mdp, dposp, len, rem))
+ goto out;
+ }
+ ndp->ni_pathlen = tocp - cnp->cn_pnbuf;
+ cnp->cn_nameptr = cnp->cn_pnbuf;
+ /*
+ * Extract and set starting directory.
+ */
+ if (error = nfsrv_fhtovp(fhp, FALSE, &dp, ndp->ni_cnd.cn_cred, slp,
+ nam, &rdonly))
+ goto out;
+ if (dp->v_type != VDIR) {
+ vrele(dp);
+ error = ENOTDIR;
+ goto out;
+ }
+ ndp->ni_startdir = dp;
+ if (rdonly)
+ cnp->cn_flags |= (NOCROSSMOUNT | RDONLY);
+ else
+ cnp->cn_flags |= NOCROSSMOUNT;
+ /*
+ * And call lookup() to do the real work
+ */
+ cnp->cn_proc = p;
+ if (error = lookup(ndp))
+ goto out;
+ /*
+ * Check for encountering a symbolic link
+ */
+ if (cnp->cn_flags & ISSYMLINK) {
+ if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
+ vput(ndp->ni_dvp);
+ else
+ vrele(ndp->ni_dvp);
+ vput(ndp->ni_vp);
+ ndp->ni_vp = NULL;
+ error = EINVAL;
+ goto out;
+ }
+ /*
+ * Check for saved name request
+ */
+ if (cnp->cn_flags & (SAVENAME | SAVESTART)) {
+ cnp->cn_flags |= HASBUF;
+ return (0);
+ }
+out:
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ return (error);
+}
+
+/*
+ * A fiddled version of m_adj() that ensures null fill to a long
+ * boundary and only trims off the back end
+ */
+void
+nfsm_adj(mp, len, nul)
+ struct mbuf *mp;
+ register int len;
+ int nul;
+{
+ register struct mbuf *m;
+ register int count, i;
+ register char *cp;
+
+ /*
+ * Trim from tail. Scan the mbuf chain,
+ * calculating its length and finding the last mbuf.
+ * If the adjustment only affects this mbuf, then just
+ * adjust and return. Otherwise, rescan and truncate
+ * after the remaining size.
+ */
+ count = 0;
+ m = mp;
+ for (;;) {
+ count += m->m_len;
+ if (m->m_next == (struct mbuf *)0)
+ break;
+ m = m->m_next;
+ }
+ if (m->m_len > len) {
+ m->m_len -= len;
+ if (nul > 0) {
+ cp = mtod(m, caddr_t)+m->m_len-nul;
+ for (i = 0; i < nul; i++)
+ *cp++ = '\0';
+ }
+ return;
+ }
+ count -= len;
+ if (count < 0)
+ count = 0;
+ /*
+ * Correct length for chain is "count".
+ * Find the mbuf with last data, adjust its length,
+ * and toss data from remaining mbufs on chain.
+ */
+ for (m = mp; m; m = m->m_next) {
+ if (m->m_len >= count) {
+ m->m_len = count;
+ if (nul > 0) {
+ cp = mtod(m, caddr_t)+m->m_len-nul;
+ for (i = 0; i < nul; i++)
+ *cp++ = '\0';
+ }
+ break;
+ }
+ count -= m->m_len;
+ }
+ while (m = m->m_next)
+ m->m_len = 0;
+}
+
+/*
+ * nfsrv_fhtovp() - convert a fh to a vnode ptr (optionally locked)
+ * - look up fsid in mount list (if not found ret error)
+ * - get vp and export rights by calling VFS_FHTOVP()
+ * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
+ * - if not lockflag unlock it with VOP_UNLOCK()
+ */
+nfsrv_fhtovp(fhp, lockflag, vpp, cred, slp, nam, rdonlyp)
+ fhandle_t *fhp;
+ int lockflag;
+ struct vnode **vpp;
+ struct ucred *cred;
+ struct nfssvc_sock *slp;
+ struct mbuf *nam;
+ int *rdonlyp;
+{
+ register struct mount *mp;
+ register struct nfsuid *uidp;
+ register int i;
+ struct ucred *credanon;
+ int error, exflags;
+
+ *vpp = (struct vnode *)0;
+ if ((mp = getvfs(&fhp->fh_fsid)) == NULL)
+ return (ESTALE);
+ if (error = VFS_FHTOVP(mp, &fhp->fh_fid, nam, vpp, &exflags, &credanon))
+ return (error);
+ /*
+ * Check/setup credentials.
+ */
+ if (exflags & MNT_EXKERB) {
+ uidp = slp->ns_uidh[NUIDHASH(cred->cr_uid)];
+ while (uidp) {
+ if (uidp->nu_uid == cred->cr_uid)
+ break;
+ uidp = uidp->nu_hnext;
+ }
+ if (uidp) {
+ cred->cr_uid = uidp->nu_cr.cr_uid;
+ for (i = 0; i < uidp->nu_cr.cr_ngroups; i++)
+ cred->cr_groups[i] = uidp->nu_cr.cr_groups[i];
+ } else {
+ vput(*vpp);
+ return (NQNFS_AUTHERR);
+ }
+ } else if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) {
+ cred->cr_uid = credanon->cr_uid;
+ for (i = 0; i < credanon->cr_ngroups && i < NGROUPS; i++)
+ cred->cr_groups[i] = credanon->cr_groups[i];
+ }
+ if (exflags & MNT_EXRDONLY)
+ *rdonlyp = 1;
+ else
+ *rdonlyp = 0;
+ if (!lockflag)
+ VOP_UNLOCK(*vpp);
+ return (0);
+}
+
+/*
+ * This function compares two net addresses by family and returns TRUE
+ * if they are the same host.
+ * If there is any doubt, return FALSE.
+ * The AF_INET family is handled as a special case so that address mbufs
+ * don't need to be saved to store "struct in_addr", which is only 4 bytes.
+ */
+netaddr_match(family, haddr, nam)
+ int family;
+ union nethostaddr *haddr;
+ struct mbuf *nam;
+{
+ register struct sockaddr_in *inetaddr;
+
+ switch (family) {
+ case AF_INET:
+ inetaddr = mtod(nam, struct sockaddr_in *);
+ if (inetaddr->sin_family == AF_INET &&
+ inetaddr->sin_addr.s_addr == haddr->had_inetaddr)
+ return (1);
+ break;
+#ifdef ISO
+ case AF_ISO:
+ {
+ register struct sockaddr_iso *isoaddr1, *isoaddr2;
+
+ isoaddr1 = mtod(nam, struct sockaddr_iso *);
+ isoaddr2 = mtod(haddr->had_nam, struct sockaddr_iso *);
+ if (isoaddr1->siso_family == AF_ISO &&
+ isoaddr1->siso_nlen > 0 &&
+ isoaddr1->siso_nlen == isoaddr2->siso_nlen &&
+ SAME_ISOADDR(isoaddr1, isoaddr2))
+ return (1);
+ break;
+ }
+#endif /* ISO */
+ default:
+ break;
+ };
+ return (0);
+}
diff --git a/sys/nfs/nfs_common.h b/sys/nfs/nfs_common.h
new file mode 100644
index 0000000..879db36
--- /dev/null
+++ b/sys/nfs/nfs_common.h
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfsm_subs.h 8.1 (Berkeley) 6/16/93
+ */
+
+/*
+ * These macros do strange and peculiar things to mbuf chains for
+ * the assistance of the nfs code. To attempt to use them for any
+ * other purpose will be dangerous. (they make weird assumptions)
+ */
+
+/*
+ * First define what the actual subs. return
+ */
+extern struct mbuf *nfsm_reqh();
+
+#define M_HASCL(m) ((m)->m_flags & M_EXT)
+#define NFSMINOFF(m) \
+ if (M_HASCL(m)) \
+ (m)->m_data = (m)->m_ext.ext_buf; \
+ else if ((m)->m_flags & M_PKTHDR) \
+ (m)->m_data = (m)->m_pktdat; \
+ else \
+ (m)->m_data = (m)->m_dat
+#define NFSMADV(m, s) (m)->m_data += (s)
+#define NFSMSIZ(m) ((M_HASCL(m))?MCLBYTES: \
+ (((m)->m_flags & M_PKTHDR)?MHLEN:MLEN))
+
+/*
+ * Now for the macros that do the simple stuff and call the functions
+ * for the hard stuff.
+ * These macros use several vars. declared in nfsm_reqhead and these
+ * vars. must not be used elsewhere unless you are careful not to corrupt
+ * them. The vars. starting with pN and tN (N=1,2,3,..) are temporaries
+ * that may be used so long as the value is not expected to retained
+ * after a macro.
+ * I know, this is kind of dorkey, but it makes the actual op functions
+ * fairly clean and deals with the mess caused by the xdr discriminating
+ * unions.
+ */
+
+#define nfsm_build(a,c,s) \
+ { if ((s) > M_TRAILINGSPACE(mb)) { \
+ MGET(mb2, M_WAIT, MT_DATA); \
+ if ((s) > MLEN) \
+ panic("build > MLEN"); \
+ mb->m_next = mb2; \
+ mb = mb2; \
+ mb->m_len = 0; \
+ bpos = mtod(mb, caddr_t); \
+ } \
+ (a) = (c)(bpos); \
+ mb->m_len += (s); \
+ bpos += (s); }
+
+#define nfsm_dissect(a,c,s) \
+ { t1 = mtod(md, caddr_t)+md->m_len-dpos; \
+ if (t1 >= (s)) { \
+ (a) = (c)(dpos); \
+ dpos += (s); \
+ } else if (error = nfsm_disct(&md, &dpos, (s), t1, &cp2)) { \
+ m_freem(mrep); \
+ goto nfsmout; \
+ } else { \
+ (a) = (c)cp2; \
+ } }
+
+#define nfsm_fhtom(v) \
+ nfsm_build(cp,caddr_t,NFSX_FH); \
+ bcopy((caddr_t)&(VTONFS(v)->n_fh), cp, NFSX_FH)
+
+#define nfsm_srvfhtom(f) \
+ nfsm_build(cp,caddr_t,NFSX_FH); \
+ bcopy((caddr_t)(f), cp, NFSX_FH)
+
+#define nfsm_mtofh(d,v) \
+ { struct nfsnode *np; nfsv2fh_t *fhp; \
+ nfsm_dissect(fhp,nfsv2fh_t *,NFSX_FH); \
+ if (error = nfs_nget((d)->v_mount, fhp, &np)) { \
+ m_freem(mrep); \
+ goto nfsmout; \
+ } \
+ (v) = NFSTOV(np); \
+ nfsm_loadattr(v, (struct vattr *)0); \
+ }
+
+#define nfsm_loadattr(v,a) \
+ { struct vnode *tvp = (v); \
+ if (error = nfs_loadattrcache(&tvp, &md, &dpos, (a))) { \
+ m_freem(mrep); \
+ goto nfsmout; \
+ } \
+ (v) = tvp; }
+
+#define nfsm_strsiz(s,m) \
+ { nfsm_dissect(tl,u_long *,NFSX_UNSIGNED); \
+ if (((s) = fxdr_unsigned(long,*tl)) > (m)) { \
+ m_freem(mrep); \
+ error = EBADRPC; \
+ goto nfsmout; \
+ } }
+
+#define nfsm_srvstrsiz(s,m) \
+ { nfsm_dissect(tl,u_long *,NFSX_UNSIGNED); \
+ if (((s) = fxdr_unsigned(long,*tl)) > (m) || (s) <= 0) { \
+ error = EBADRPC; \
+ nfsm_reply(0); \
+ } }
+
+#define nfsm_mtouio(p,s) \
+ if ((s) > 0 && \
+ (error = nfsm_mbuftouio(&md,(p),(s),&dpos))) { \
+ m_freem(mrep); \
+ goto nfsmout; \
+ }
+
+#define nfsm_uiotom(p,s) \
+ if (error = nfsm_uiotombuf((p),&mb,(s),&bpos)) { \
+ m_freem(mreq); \
+ goto nfsmout; \
+ }
+
+#define nfsm_reqhead(v,a,s) \
+ mb = mreq = nfsm_reqh((v),(a),(s),&bpos)
+
+#define nfsm_reqdone m_freem(mrep); \
+ nfsmout:
+
+#define nfsm_rndup(a) (((a)+3)&(~0x3))
+
+#define nfsm_request(v, t, p, c) \
+ if (error = nfs_request((v), mreq, (t), (p), \
+ (c), &mrep, &md, &dpos)) \
+ goto nfsmout
+
+#define nfsm_strtom(a,s,m) \
+ if ((s) > (m)) { \
+ m_freem(mreq); \
+ error = ENAMETOOLONG; \
+ goto nfsmout; \
+ } \
+ t2 = nfsm_rndup(s)+NFSX_UNSIGNED; \
+ if (t2 <= M_TRAILINGSPACE(mb)) { \
+ nfsm_build(tl,u_long *,t2); \
+ *tl++ = txdr_unsigned(s); \
+ *(tl+((t2>>2)-2)) = 0; \
+ bcopy((caddr_t)(a), (caddr_t)tl, (s)); \
+ } else if (error = nfsm_strtmbuf(&mb, &bpos, (a), (s))) { \
+ m_freem(mreq); \
+ goto nfsmout; \
+ }
+
+#define nfsm_srvdone \
+ nfsmout: \
+ return(error)
+
+#define nfsm_reply(s) \
+ { \
+ nfsd->nd_repstat = error; \
+ if (error) \
+ (void) nfs_rephead(0, nfsd, error, cache, &frev, \
+ mrq, &mb, &bpos); \
+ else \
+ (void) nfs_rephead((s), nfsd, error, cache, &frev, \
+ mrq, &mb, &bpos); \
+ m_freem(mrep); \
+ mreq = *mrq; \
+ if (error) \
+ return(0); \
+ }
+
+#define nfsm_adv(s) \
+ t1 = mtod(md, caddr_t)+md->m_len-dpos; \
+ if (t1 >= (s)) { \
+ dpos += (s); \
+ } else if (error = nfs_adv(&md, &dpos, (s), t1)) { \
+ m_freem(mrep); \
+ goto nfsmout; \
+ }
+
+#define nfsm_srvmtofh(f) \
+ nfsm_dissect(tl, u_long *, NFSX_FH); \
+ bcopy((caddr_t)tl, (caddr_t)f, NFSX_FH)
+
+#define nfsm_clget \
+ if (bp >= be) { \
+ if (mp == mb) \
+ mp->m_len += bp-bpos; \
+ MGET(mp, M_WAIT, MT_DATA); \
+ MCLGET(mp, M_WAIT); \
+ mp->m_len = NFSMSIZ(mp); \
+ mp2->m_next = mp; \
+ mp2 = mp; \
+ bp = mtod(mp, caddr_t); \
+ be = bp+mp->m_len; \
+ } \
+ tl = (u_long *)bp
+
+#define nfsm_srvfillattr \
+ fp->fa_type = vtonfs_type(vap->va_type); \
+ fp->fa_mode = vtonfs_mode(vap->va_type, vap->va_mode); \
+ fp->fa_nlink = txdr_unsigned(vap->va_nlink); \
+ fp->fa_uid = txdr_unsigned(vap->va_uid); \
+ fp->fa_gid = txdr_unsigned(vap->va_gid); \
+ if (nfsd->nd_nqlflag == NQL_NOVAL) { \
+ fp->fa_nfsblocksize = txdr_unsigned(vap->va_blocksize); \
+ if (vap->va_type == VFIFO) \
+ fp->fa_nfsrdev = 0xffffffff; \
+ else \
+ fp->fa_nfsrdev = txdr_unsigned(vap->va_rdev); \
+ fp->fa_nfsfsid = txdr_unsigned(vap->va_fsid); \
+ fp->fa_nfsfileid = txdr_unsigned(vap->va_fileid); \
+ fp->fa_nfssize = txdr_unsigned(vap->va_size); \
+ fp->fa_nfsblocks = txdr_unsigned(vap->va_bytes / NFS_FABLKSIZE); \
+ txdr_nfstime(&vap->va_atime, &fp->fa_nfsatime); \
+ txdr_nfstime(&vap->va_mtime, &fp->fa_nfsmtime); \
+ fp->fa_nfsctime.nfs_sec = txdr_unsigned(vap->va_ctime.ts_sec); \
+ fp->fa_nfsctime.nfs_usec = txdr_unsigned(vap->va_gen); \
+ } else { \
+ fp->fa_nqblocksize = txdr_unsigned(vap->va_blocksize); \
+ if (vap->va_type == VFIFO) \
+ fp->fa_nqrdev = 0xffffffff; \
+ else \
+ fp->fa_nqrdev = txdr_unsigned(vap->va_rdev); \
+ fp->fa_nqfsid = txdr_unsigned(vap->va_fsid); \
+ fp->fa_nqfileid = txdr_unsigned(vap->va_fileid); \
+ txdr_hyper(&vap->va_size, &fp->fa_nqsize); \
+ txdr_hyper(&vap->va_bytes, &fp->fa_nqbytes); \
+ txdr_nqtime(&vap->va_atime, &fp->fa_nqatime); \
+ txdr_nqtime(&vap->va_mtime, &fp->fa_nqmtime); \
+ txdr_nqtime(&vap->va_ctime, &fp->fa_nqctime); \
+ fp->fa_nqflags = txdr_unsigned(vap->va_flags); \
+ fp->fa_nqgen = txdr_unsigned(vap->va_gen); \
+ txdr_hyper(&vap->va_filerev, &fp->fa_nqfilerev); \
+ }
+
diff --git a/sys/nfsclient/nfs.h b/sys/nfsclient/nfs.h
new file mode 100644
index 0000000..261fd42
--- /dev/null
+++ b/sys/nfsclient/nfs.h
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Tunable constants for nfs
+ */
+
+#define NFS_MAXIOVEC 34
+#define NFS_HZ 25 /* Ticks per second for NFS timeouts */
+#define NFS_TIMEO (1*NFS_HZ) /* Default timeout = 1 second */
+#define NFS_MINTIMEO (1*NFS_HZ) /* Min timeout to use */
+#define NFS_MAXTIMEO (60*NFS_HZ) /* Max timeout to backoff to */
+#define NFS_MINIDEMTIMEO (5*NFS_HZ) /* Min timeout for non-idempotent ops*/
+#define NFS_MAXREXMIT 100 /* Stop counting after this many */
+#define NFS_MAXWINDOW 1024 /* Max number of outstanding requests */
+#define NFS_RETRANS 10 /* Num of retrans for soft mounts */
+#define NFS_MAXGRPS 16 /* Max. size of groups list */
+#define NFS_MINATTRTIMO 5 /* Attribute cache timeout in sec */
+#define NFS_MAXATTRTIMO 60
+#define NFS_WSIZE 8192 /* Def. write data size <= 8192 */
+#define NFS_RSIZE 8192 /* Def. read data size <= 8192 */
+#define NFS_DEFRAHEAD 1 /* Def. read ahead # blocks */
+#define NFS_MAXRAHEAD 4 /* Max. read ahead # blocks */
+#define NFS_MAXREADDIR NFS_MAXDATA /* Max. size of directory read */
+#define NFS_MAXUIDHASH 64 /* Max. # of hashed uid entries/mp */
+#define NFS_MAXASYNCDAEMON 20 /* Max. number async_daemons runable */
+#define NFS_DIRBLKSIZ 1024 /* Size of an NFS directory block */
+#define NMOD(a) ((a) % nfs_asyncdaemons)
+
+/*
+ * Set the attribute timeout based on how recently the file has been modified.
+ */
+#define NFS_ATTRTIMEO(np) \
+ ((((np)->n_flag & NMODIFIED) || \
+ (time.tv_sec - (np)->n_mtime) / 10 < NFS_MINATTRTIMO) ? NFS_MINATTRTIMO : \
+ ((time.tv_sec - (np)->n_mtime) / 10 > NFS_MAXATTRTIMO ? NFS_MAXATTRTIMO : \
+ (time.tv_sec - (np)->n_mtime) / 10))
+
+/*
+ * Structures for the nfssvc(2) syscall. Not that anyone but nfsd and mount_nfs
+ * should ever try and use it.
+ */
+struct nfsd_args {
+ int sock; /* Socket to serve */
+ caddr_t name; /* Client address for connection based sockets */
+ int namelen; /* Length of name */
+};
+
+struct nfsd_srvargs {
+ struct nfsd *nsd_nfsd; /* Pointer to in kernel nfsd struct */
+ uid_t nsd_uid; /* Effective uid mapped to cred */
+ u_long nsd_haddr; /* Ip address of client */
+ struct ucred nsd_cr; /* Cred. uid maps to */
+ int nsd_authlen; /* Length of auth string (ret) */
+ char *nsd_authstr; /* Auth string (ret) */
+};
+
+struct nfsd_cargs {
+ char *ncd_dirp; /* Mount dir path */
+ uid_t ncd_authuid; /* Effective uid */
+ int ncd_authtype; /* Type of authenticator */
+ int ncd_authlen; /* Length of authenticator string */
+ char *ncd_authstr; /* Authenticator string */
+};
+
+/*
+ * Stats structure
+ */
+struct nfsstats {
+ int attrcache_hits;
+ int attrcache_misses;
+ int lookupcache_hits;
+ int lookupcache_misses;
+ int direofcache_hits;
+ int direofcache_misses;
+ int biocache_reads;
+ int read_bios;
+ int read_physios;
+ int biocache_writes;
+ int write_bios;
+ int write_physios;
+ int biocache_readlinks;
+ int readlink_bios;
+ int biocache_readdirs;
+ int readdir_bios;
+ int rpccnt[NFS_NPROCS];
+ int rpcretries;
+ int srvrpccnt[NFS_NPROCS];
+ int srvrpc_errs;
+ int srv_errs;
+ int rpcrequests;
+ int rpctimeouts;
+ int rpcunexpected;
+ int rpcinvalid;
+ int srvcache_inproghits;
+ int srvcache_idemdonehits;
+ int srvcache_nonidemdonehits;
+ int srvcache_misses;
+ int srvnqnfs_leases;
+ int srvnqnfs_maxleases;
+ int srvnqnfs_getleases;
+};
+
+/*
+ * Flags for nfssvc() system call.
+ */
+#define NFSSVC_BIOD 0x002
+#define NFSSVC_NFSD 0x004
+#define NFSSVC_ADDSOCK 0x008
+#define NFSSVC_AUTHIN 0x010
+#define NFSSVC_GOTAUTH 0x040
+#define NFSSVC_AUTHINFAIL 0x080
+#define NFSSVC_MNTD 0x100
+
+/*
+ * The set of signals the interrupt an I/O in progress for NFSMNT_INT mounts.
+ * What should be in this set is open to debate, but I believe that since
+ * I/O system calls on ufs are never interrupted by signals the set should
+ * be minimal. My reasoning is that many current programs that use signals
+ * such as SIGALRM will not expect file I/O system calls to be interrupted
+ * by them and break.
+ */
+#ifdef KERNEL
+#define NFSINT_SIGMASK (sigmask(SIGINT)|sigmask(SIGTERM)|sigmask(SIGKILL)| \
+ sigmask(SIGHUP)|sigmask(SIGQUIT))
+
+/*
+ * Socket errors ignored for connectionless sockets??
+ * For now, ignore them all
+ */
+#define NFSIGNORE_SOERROR(s, e) \
+ ((e) != EINTR && (e) != ERESTART && (e) != EWOULDBLOCK && \
+ ((s) & PR_CONNREQUIRED) == 0)
+
+/*
+ * Nfs outstanding request list element
+ */
+struct nfsreq {
+ struct nfsreq *r_next;
+ struct nfsreq *r_prev;
+ struct mbuf *r_mreq;
+ struct mbuf *r_mrep;
+ struct mbuf *r_md;
+ caddr_t r_dpos;
+ struct nfsmount *r_nmp;
+ struct vnode *r_vp;
+ u_long r_xid;
+ int r_flags; /* flags on request, see below */
+ int r_retry; /* max retransmission count */
+ int r_rexmit; /* current retrans count */
+ int r_timer; /* tick counter on reply */
+ int r_procnum; /* NFS procedure number */
+ int r_rtt; /* RTT for rpc */
+ struct proc *r_procp; /* Proc that did I/O system call */
+};
+
+/* Flag values for r_flags */
+#define R_TIMING 0x01 /* timing request (in mntp) */
+#define R_SENT 0x02 /* request has been sent */
+#define R_SOFTTERM 0x04 /* soft mnt, too many retries */
+#define R_INTR 0x08 /* intr mnt, signal pending */
+#define R_SOCKERR 0x10 /* Fatal error on socket */
+#define R_TPRINTFMSG 0x20 /* Did a tprintf msg. */
+#define R_MUSTRESEND 0x40 /* Must resend request */
+#define R_GETONEREP 0x80 /* Probe for one reply only */
+
+struct nfsstats nfsstats;
+
+/*
+ * A list of nfssvc_sock structures is maintained with all the sockets
+ * that require service by the nfsd.
+ * The nfsuid structs hang off of the nfssvc_sock structs in both lru
+ * and uid hash lists.
+ */
+#define NUIDHASHSIZ 32
+#define NUIDHASH(uid) ((uid) & (NUIDHASHSIZ - 1))
+
+/*
+ * Network address hash list element
+ */
+union nethostaddr {
+ u_long had_inetaddr;
+ struct mbuf *had_nam;
+};
+
+struct nfsuid {
+ struct nfsuid *nu_lrunext; /* MUST be first */
+ struct nfsuid *nu_lruprev;
+ struct nfsuid *nu_hnext;
+ struct nfsuid *nu_hprev;
+ int nu_flag; /* Flags */
+ uid_t nu_uid; /* Uid mapped by this entry */
+ union nethostaddr nu_haddr; /* Host addr. for dgram sockets */
+ struct ucred nu_cr; /* Cred uid mapped to */
+};
+
+#define nu_inetaddr nu_haddr.had_inetaddr
+#define nu_nam nu_haddr.had_nam
+/* Bits for nu_flag */
+#define NU_INETADDR 0x1
+
+struct nfssvc_sock {
+ struct nfsuid *ns_lrunext; /* MUST be first */
+ struct nfsuid *ns_lruprev;
+ struct nfssvc_sock *ns_next;
+ struct nfssvc_sock *ns_prev;
+ int ns_flag;
+ u_long ns_sref;
+ struct file *ns_fp;
+ struct socket *ns_so;
+ int ns_solock;
+ struct mbuf *ns_nam;
+ int ns_cc;
+ struct mbuf *ns_raw;
+ struct mbuf *ns_rawend;
+ int ns_reclen;
+ struct mbuf *ns_rec;
+ struct mbuf *ns_recend;
+ int ns_numuids;
+ struct nfsuid *ns_uidh[NUIDHASHSIZ];
+};
+
+/* Bits for "ns_flag" */
+#define SLP_VALID 0x01
+#define SLP_DOREC 0x02
+#define SLP_NEEDQ 0x04
+#define SLP_DISCONN 0x08
+#define SLP_GETSTREAM 0x10
+#define SLP_INIT 0x20
+#define SLP_WANTINIT 0x40
+
+#define SLP_ALLFLAGS 0xff
+
+/*
+ * One of these structures is allocated for each nfsd.
+ */
+struct nfsd {
+ struct nfsd *nd_next; /* Must be first */
+ struct nfsd *nd_prev;
+ int nd_flag; /* NFSD_ flags */
+ struct nfssvc_sock *nd_slp; /* Current socket */
+ struct mbuf *nd_nam; /* Client addr for datagram req. */
+ struct mbuf *nd_mrep; /* Req. mbuf list */
+ struct mbuf *nd_md;
+ caddr_t nd_dpos; /* Position in list */
+ int nd_procnum; /* RPC procedure number */
+ u_long nd_retxid; /* RPC xid */
+ int nd_repstat; /* Reply status value */
+ struct ucred nd_cr; /* Credentials for req. */
+ int nd_nqlflag; /* Leasing flag */
+ int nd_duration; /* Lease duration */
+ int nd_authlen; /* Authenticator len */
+ u_char nd_authstr[RPCAUTH_MAXSIZ]; /* Authenticator data */
+ struct proc *nd_procp; /* Proc ptr */
+};
+
+#define NFSD_WAITING 0x01
+#define NFSD_CHECKSLP 0x02
+#define NFSD_REQINPROG 0x04
+#define NFSD_NEEDAUTH 0x08
+#define NFSD_AUTHFAIL 0x10
+#endif /* KERNEL */
diff --git a/sys/nfsclient/nfs_bio.c b/sys/nfsclient/nfs_bio.c
new file mode 100644
index 0000000..177a278
--- /dev/null
+++ b/sys/nfsclient/nfs_bio.c
@@ -0,0 +1,799 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_bio.c 8.5 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/resourcevar.h>
+#include <sys/proc.h>
+#include <sys/buf.h>
+#include <sys/vnode.h>
+#include <sys/trace.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+
+#include <vm/vm.h>
+
+#include <nfs/nfsnode.h>
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nqnfs.h>
+
+struct buf *incore(), *nfs_getcacheblk();
+extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
+extern int nfs_numasync;
+
+/*
+ * Vnode op for read using bio
+ * Any similarity to readip() is purely coincidental
+ */
+nfs_bioread(vp, uio, ioflag, cred)
+ register struct vnode *vp;
+ register struct uio *uio;
+ int ioflag;
+ struct ucred *cred;
+{
+ register struct nfsnode *np = VTONFS(vp);
+ register int biosize, diff;
+ struct buf *bp, *rabp;
+ struct vattr vattr;
+ struct proc *p;
+ struct nfsmount *nmp;
+ daddr_t lbn, bn, rabn;
+ caddr_t baddr;
+ int got_buf, nra, error = 0, n, on, not_readin;
+
+#ifdef lint
+ ioflag = ioflag;
+#endif /* lint */
+#ifdef DIAGNOSTIC
+ if (uio->uio_rw != UIO_READ)
+ panic("nfs_read mode");
+#endif
+ if (uio->uio_resid == 0)
+ return (0);
+ if (uio->uio_offset < 0 && vp->v_type != VDIR)
+ return (EINVAL);
+ nmp = VFSTONFS(vp->v_mount);
+ biosize = nmp->nm_rsize;
+ p = uio->uio_procp;
+ /*
+ * For nfs, cache consistency can only be maintained approximately.
+ * Although RFC1094 does not specify the criteria, the following is
+ * believed to be compatible with the reference port.
+ * For nqnfs, full cache consistency is maintained within the loop.
+ * For nfs:
+ * If the file's modify time on the server has changed since the
+ * last read rpc or you have written to the file,
+ * you may have lost data cache consistency with the
+ * server, so flush all of the file's data out of the cache.
+ * Then force a getattr rpc to ensure that you have up to date
+ * attributes.
+ * The mount flag NFSMNT_MYWRITE says "Assume that my writes are
+ * the ones changing the modify time.
+ * NB: This implies that cache data can be read when up to
+ * NFS_ATTRTIMEO seconds out of date. If you find that you need current
+ * attributes this could be forced by setting n_attrstamp to 0 before
+ * the VOP_GETATTR() call.
+ */
+ if ((nmp->nm_flag & NFSMNT_NQNFS) == 0 && vp->v_type != VLNK) {
+ if (np->n_flag & NMODIFIED) {
+ if ((nmp->nm_flag & NFSMNT_MYWRITE) == 0 ||
+ vp->v_type != VREG) {
+ if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
+ return (error);
+ }
+ np->n_attrstamp = 0;
+ np->n_direofoffset = 0;
+ if (error = VOP_GETATTR(vp, &vattr, cred, p))
+ return (error);
+ np->n_mtime = vattr.va_mtime.ts_sec;
+ } else {
+ if (error = VOP_GETATTR(vp, &vattr, cred, p))
+ return (error);
+ if (np->n_mtime != vattr.va_mtime.ts_sec) {
+ np->n_direofoffset = 0;
+ if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
+ return (error);
+ np->n_mtime = vattr.va_mtime.ts_sec;
+ }
+ }
+ }
+ do {
+
+ /*
+ * Get a valid lease. If cached data is stale, flush it.
+ */
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ if (NQNFS_CKINVALID(vp, np, NQL_READ)) {
+ do {
+ error = nqnfs_getlease(vp, NQL_READ, cred, p);
+ } while (error == NQNFS_EXPIRED);
+ if (error)
+ return (error);
+ if (np->n_lrev != np->n_brev ||
+ (np->n_flag & NQNFSNONCACHE) ||
+ ((np->n_flag & NMODIFIED) && vp->v_type == VDIR)) {
+ if (vp->v_type == VDIR) {
+ np->n_direofoffset = 0;
+ cache_purge(vp);
+ }
+ if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
+ return (error);
+ np->n_brev = np->n_lrev;
+ }
+ } else if (vp->v_type == VDIR && (np->n_flag & NMODIFIED)) {
+ np->n_direofoffset = 0;
+ cache_purge(vp);
+ if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
+ return (error);
+ }
+ }
+ if (np->n_flag & NQNFSNONCACHE) {
+ switch (vp->v_type) {
+ case VREG:
+ error = nfs_readrpc(vp, uio, cred);
+ break;
+ case VLNK:
+ error = nfs_readlinkrpc(vp, uio, cred);
+ break;
+ case VDIR:
+ error = nfs_readdirrpc(vp, uio, cred);
+ break;
+ };
+ return (error);
+ }
+ baddr = (caddr_t)0;
+ switch (vp->v_type) {
+ case VREG:
+ nfsstats.biocache_reads++;
+ lbn = uio->uio_offset / biosize;
+ on = uio->uio_offset & (biosize-1);
+ bn = lbn * (biosize / DEV_BSIZE);
+ not_readin = 1;
+
+ /*
+ * Start the read ahead(s), as required.
+ */
+ if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
+ lbn == vp->v_lastr + 1) {
+ for (nra = 0; nra < nmp->nm_readahead &&
+ (lbn + 1 + nra) * biosize < np->n_size; nra++) {
+ rabn = (lbn + 1 + nra) * (biosize / DEV_BSIZE);
+ if (!incore(vp, rabn)) {
+ rabp = nfs_getcacheblk(vp, rabn, biosize, p);
+ if (!rabp)
+ return (EINTR);
+ if ((rabp->b_flags & (B_DELWRI | B_DONE)) == 0) {
+ rabp->b_flags |= (B_READ | B_ASYNC);
+ if (nfs_asyncio(rabp, cred)) {
+ rabp->b_flags |= B_INVAL;
+ brelse(rabp);
+ }
+ }
+ }
+ }
+ }
+
+ /*
+ * If the block is in the cache and has the required data
+ * in a valid region, just copy it out.
+ * Otherwise, get the block and write back/read in,
+ * as required.
+ */
+ if ((bp = incore(vp, bn)) &&
+ (bp->b_flags & (B_BUSY | B_WRITEINPROG)) ==
+ (B_BUSY | B_WRITEINPROG))
+ got_buf = 0;
+ else {
+again:
+ bp = nfs_getcacheblk(vp, bn, biosize, p);
+ if (!bp)
+ return (EINTR);
+ got_buf = 1;
+ if ((bp->b_flags & (B_DONE | B_DELWRI)) == 0) {
+ bp->b_flags |= B_READ;
+ not_readin = 0;
+ if (error = nfs_doio(bp, cred, p)) {
+ brelse(bp);
+ return (error);
+ }
+ }
+ }
+ n = min((unsigned)(biosize - on), uio->uio_resid);
+ diff = np->n_size - uio->uio_offset;
+ if (diff < n)
+ n = diff;
+ if (not_readin && n > 0) {
+ if (on < bp->b_validoff || (on + n) > bp->b_validend) {
+ if (!got_buf) {
+ bp = nfs_getcacheblk(vp, bn, biosize, p);
+ if (!bp)
+ return (EINTR);
+ got_buf = 1;
+ }
+ bp->b_flags |= B_INVAL;
+ if (bp->b_dirtyend > 0) {
+ if ((bp->b_flags & B_DELWRI) == 0)
+ panic("nfsbioread");
+ if (VOP_BWRITE(bp) == EINTR)
+ return (EINTR);
+ } else
+ brelse(bp);
+ goto again;
+ }
+ }
+ vp->v_lastr = lbn;
+ diff = (on >= bp->b_validend) ? 0 : (bp->b_validend - on);
+ if (diff < n)
+ n = diff;
+ break;
+ case VLNK:
+ nfsstats.biocache_readlinks++;
+ bp = nfs_getcacheblk(vp, (daddr_t)0, NFS_MAXPATHLEN, p);
+ if (!bp)
+ return (EINTR);
+ if ((bp->b_flags & B_DONE) == 0) {
+ bp->b_flags |= B_READ;
+ if (error = nfs_doio(bp, cred, p)) {
+ brelse(bp);
+ return (error);
+ }
+ }
+ n = min(uio->uio_resid, NFS_MAXPATHLEN - bp->b_resid);
+ got_buf = 1;
+ on = 0;
+ break;
+ case VDIR:
+ nfsstats.biocache_readdirs++;
+ bn = (daddr_t)uio->uio_offset;
+ bp = nfs_getcacheblk(vp, bn, NFS_DIRBLKSIZ, p);
+ if (!bp)
+ return (EINTR);
+ if ((bp->b_flags & B_DONE) == 0) {
+ bp->b_flags |= B_READ;
+ if (error = nfs_doio(bp, cred, p)) {
+ brelse(bp);
+ return (error);
+ }
+ }
+
+ /*
+ * If not eof and read aheads are enabled, start one.
+ * (You need the current block first, so that you have the
+ * directory offset cookie of the next block.
+ */
+ rabn = bp->b_blkno;
+ if (nfs_numasync > 0 && nmp->nm_readahead > 0 &&
+ rabn != 0 && rabn != np->n_direofoffset &&
+ !incore(vp, rabn)) {
+ rabp = nfs_getcacheblk(vp, rabn, NFS_DIRBLKSIZ, p);
+ if (rabp) {
+ if ((rabp->b_flags & (B_DONE | B_DELWRI)) == 0) {
+ rabp->b_flags |= (B_READ | B_ASYNC);
+ if (nfs_asyncio(rabp, cred)) {
+ rabp->b_flags |= B_INVAL;
+ brelse(rabp);
+ }
+ }
+ }
+ }
+ on = 0;
+ n = min(uio->uio_resid, NFS_DIRBLKSIZ - bp->b_resid);
+ got_buf = 1;
+ break;
+ };
+
+ if (n > 0) {
+ if (!baddr)
+ baddr = bp->b_data;
+ error = uiomove(baddr + on, (int)n, uio);
+ }
+ switch (vp->v_type) {
+ case VREG:
+ if (n + on == biosize || uio->uio_offset == np->n_size)
+ bp->b_flags |= B_AGE;
+ break;
+ case VLNK:
+ n = 0;
+ break;
+ case VDIR:
+ uio->uio_offset = bp->b_blkno;
+ break;
+ };
+ if (got_buf)
+ brelse(bp);
+ } while (error == 0 && uio->uio_resid > 0 && n > 0);
+ return (error);
+}
+
+/*
+ * Vnode op for write using bio
+ */
+nfs_write(ap)
+ struct vop_write_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register int biosize;
+ register struct uio *uio = ap->a_uio;
+ struct proc *p = uio->uio_procp;
+ register struct vnode *vp = ap->a_vp;
+ struct nfsnode *np = VTONFS(vp);
+ register struct ucred *cred = ap->a_cred;
+ int ioflag = ap->a_ioflag;
+ struct buf *bp;
+ struct vattr vattr;
+ struct nfsmount *nmp;
+ daddr_t lbn, bn;
+ int n, on, error = 0;
+
+#ifdef DIAGNOSTIC
+ if (uio->uio_rw != UIO_WRITE)
+ panic("nfs_write mode");
+ if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc)
+ panic("nfs_write proc");
+#endif
+ if (vp->v_type != VREG)
+ return (EIO);
+ if (np->n_flag & NWRITEERR) {
+ np->n_flag &= ~NWRITEERR;
+ return (np->n_error);
+ }
+ if (ioflag & (IO_APPEND | IO_SYNC)) {
+ if (np->n_flag & NMODIFIED) {
+ np->n_attrstamp = 0;
+ if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
+ return (error);
+ }
+ if (ioflag & IO_APPEND) {
+ np->n_attrstamp = 0;
+ if (error = VOP_GETATTR(vp, &vattr, cred, p))
+ return (error);
+ uio->uio_offset = np->n_size;
+ }
+ }
+ nmp = VFSTONFS(vp->v_mount);
+ if (uio->uio_offset < 0)
+ return (EINVAL);
+ if (uio->uio_resid == 0)
+ return (0);
+ /*
+ * Maybe this should be above the vnode op call, but so long as
+ * file servers have no limits, i don't think it matters
+ */
+ if (p && uio->uio_offset + uio->uio_resid >
+ p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
+ psignal(p, SIGXFSZ);
+ return (EFBIG);
+ }
+ /*
+ * I use nm_rsize, not nm_wsize so that all buffer cache blocks
+ * will be the same size within a filesystem. nfs_writerpc will
+ * still use nm_wsize when sizing the rpc's.
+ */
+ biosize = nmp->nm_rsize;
+ do {
+
+ /*
+ * Check for a valid write lease.
+ * If non-cachable, just do the rpc
+ */
+ if ((nmp->nm_flag & NFSMNT_NQNFS) &&
+ NQNFS_CKINVALID(vp, np, NQL_WRITE)) {
+ do {
+ error = nqnfs_getlease(vp, NQL_WRITE, cred, p);
+ } while (error == NQNFS_EXPIRED);
+ if (error)
+ return (error);
+ if (np->n_lrev != np->n_brev ||
+ (np->n_flag & NQNFSNONCACHE)) {
+ if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
+ return (error);
+ np->n_brev = np->n_lrev;
+ }
+ }
+ if (np->n_flag & NQNFSNONCACHE)
+ return (nfs_writerpc(vp, uio, cred, ioflag));
+ nfsstats.biocache_writes++;
+ lbn = uio->uio_offset / biosize;
+ on = uio->uio_offset & (biosize-1);
+ n = min((unsigned)(biosize - on), uio->uio_resid);
+ bn = lbn * (biosize / DEV_BSIZE);
+again:
+ bp = nfs_getcacheblk(vp, bn, biosize, p);
+ if (!bp)
+ return (EINTR);
+ if (bp->b_wcred == NOCRED) {
+ crhold(cred);
+ bp->b_wcred = cred;
+ }
+ np->n_flag |= NMODIFIED;
+ if (uio->uio_offset + n > np->n_size) {
+ np->n_size = uio->uio_offset + n;
+ vnode_pager_setsize(vp, (u_long)np->n_size);
+ }
+
+ /*
+ * If the new write will leave a contiguous dirty
+ * area, just update the b_dirtyoff and b_dirtyend,
+ * otherwise force a write rpc of the old dirty area.
+ */
+ if (bp->b_dirtyend > 0 &&
+ (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) {
+ bp->b_proc = p;
+ if (VOP_BWRITE(bp) == EINTR)
+ return (EINTR);
+ goto again;
+ }
+
+ /*
+ * Check for valid write lease and get one as required.
+ * In case getblk() and/or bwrite() delayed us.
+ */
+ if ((nmp->nm_flag & NFSMNT_NQNFS) &&
+ NQNFS_CKINVALID(vp, np, NQL_WRITE)) {
+ do {
+ error = nqnfs_getlease(vp, NQL_WRITE, cred, p);
+ } while (error == NQNFS_EXPIRED);
+ if (error) {
+ brelse(bp);
+ return (error);
+ }
+ if (np->n_lrev != np->n_brev ||
+ (np->n_flag & NQNFSNONCACHE)) {
+ brelse(bp);
+ if (error = nfs_vinvalbuf(vp, V_SAVE, cred, p, 1))
+ return (error);
+ np->n_brev = np->n_lrev;
+ goto again;
+ }
+ }
+ if (error = uiomove((char *)bp->b_data + on, n, uio)) {
+ bp->b_flags |= B_ERROR;
+ brelse(bp);
+ return (error);
+ }
+ if (bp->b_dirtyend > 0) {
+ bp->b_dirtyoff = min(on, bp->b_dirtyoff);
+ bp->b_dirtyend = max((on + n), bp->b_dirtyend);
+ } else {
+ bp->b_dirtyoff = on;
+ bp->b_dirtyend = on + n;
+ }
+#ifndef notdef
+ if (bp->b_validend == 0 || bp->b_validend < bp->b_dirtyoff ||
+ bp->b_validoff > bp->b_dirtyend) {
+ bp->b_validoff = bp->b_dirtyoff;
+ bp->b_validend = bp->b_dirtyend;
+ } else {
+ bp->b_validoff = min(bp->b_validoff, bp->b_dirtyoff);
+ bp->b_validend = max(bp->b_validend, bp->b_dirtyend);
+ }
+#else
+ bp->b_validoff = bp->b_dirtyoff;
+ bp->b_validend = bp->b_dirtyend;
+#endif
+ if (ioflag & IO_APPEND)
+ bp->b_flags |= B_APPENDWRITE;
+
+ /*
+ * If the lease is non-cachable or IO_SYNC do bwrite().
+ */
+ if ((np->n_flag & NQNFSNONCACHE) || (ioflag & IO_SYNC)) {
+ bp->b_proc = p;
+ if (error = VOP_BWRITE(bp))
+ return (error);
+ } else if ((n + on) == biosize &&
+ (nmp->nm_flag & NFSMNT_NQNFS) == 0) {
+ bp->b_proc = (struct proc *)0;
+ bawrite(bp);
+ } else
+ bdwrite(bp);
+ } while (uio->uio_resid > 0 && n > 0);
+ return (0);
+}
+
+/*
+ * Get an nfs cache block.
+ * Allocate a new one if the block isn't currently in the cache
+ * and return the block marked busy. If the calling process is
+ * interrupted by a signal for an interruptible mount point, return
+ * NULL.
+ */
+struct buf *
+nfs_getcacheblk(vp, bn, size, p)
+ struct vnode *vp;
+ daddr_t bn;
+ int size;
+ struct proc *p;
+{
+ register struct buf *bp;
+ struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+
+ if (nmp->nm_flag & NFSMNT_INT) {
+ bp = getblk(vp, bn, size, PCATCH, 0);
+ while (bp == (struct buf *)0) {
+ if (nfs_sigintr(nmp, (struct nfsreq *)0, p))
+ return ((struct buf *)0);
+ bp = getblk(vp, bn, size, 0, 2 * hz);
+ }
+ } else
+ bp = getblk(vp, bn, size, 0, 0);
+ return (bp);
+}
+
+/*
+ * Flush and invalidate all dirty buffers. If another process is already
+ * doing the flush, just wait for completion.
+ */
+nfs_vinvalbuf(vp, flags, cred, p, intrflg)
+ struct vnode *vp;
+ int flags;
+ struct ucred *cred;
+ struct proc *p;
+ int intrflg;
+{
+ register struct nfsnode *np = VTONFS(vp);
+ struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+ int error = 0, slpflag, slptimeo;
+
+ if ((nmp->nm_flag & NFSMNT_INT) == 0)
+ intrflg = 0;
+ if (intrflg) {
+ slpflag = PCATCH;
+ slptimeo = 2 * hz;
+ } else {
+ slpflag = 0;
+ slptimeo = 0;
+ }
+ /*
+ * First wait for any other process doing a flush to complete.
+ */
+ while (np->n_flag & NFLUSHINPROG) {
+ np->n_flag |= NFLUSHWANT;
+ error = tsleep((caddr_t)&np->n_flag, PRIBIO + 2, "nfsvinval",
+ slptimeo);
+ if (error && intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p))
+ return (EINTR);
+ }
+
+ /*
+ * Now, flush as required.
+ */
+ np->n_flag |= NFLUSHINPROG;
+ error = vinvalbuf(vp, flags, cred, p, slpflag, 0);
+ while (error) {
+ if (intrflg && nfs_sigintr(nmp, (struct nfsreq *)0, p)) {
+ np->n_flag &= ~NFLUSHINPROG;
+ if (np->n_flag & NFLUSHWANT) {
+ np->n_flag &= ~NFLUSHWANT;
+ wakeup((caddr_t)&np->n_flag);
+ }
+ return (EINTR);
+ }
+ error = vinvalbuf(vp, flags, cred, p, 0, slptimeo);
+ }
+ np->n_flag &= ~(NMODIFIED | NFLUSHINPROG);
+ if (np->n_flag & NFLUSHWANT) {
+ np->n_flag &= ~NFLUSHWANT;
+ wakeup((caddr_t)&np->n_flag);
+ }
+ return (0);
+}
+
+/*
+ * Initiate asynchronous I/O. Return an error if no nfsiods are available.
+ * This is mainly to avoid queueing async I/O requests when the nfsiods
+ * are all hung on a dead server.
+ */
+nfs_asyncio(bp, cred)
+ register struct buf *bp;
+ struct ucred *cred;
+{
+ register int i;
+
+ if (nfs_numasync == 0)
+ return (EIO);
+ for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
+ if (nfs_iodwant[i]) {
+ if (bp->b_flags & B_READ) {
+ if (bp->b_rcred == NOCRED && cred != NOCRED) {
+ crhold(cred);
+ bp->b_rcred = cred;
+ }
+ } else {
+ if (bp->b_wcred == NOCRED && cred != NOCRED) {
+ crhold(cred);
+ bp->b_wcred = cred;
+ }
+ }
+
+ TAILQ_INSERT_TAIL(&nfs_bufq, bp, b_freelist);
+ nfs_iodwant[i] = (struct proc *)0;
+ wakeup((caddr_t)&nfs_iodwant[i]);
+ return (0);
+ }
+ return (EIO);
+}
+
+/*
+ * Do an I/O operation to/from a cache block. This may be called
+ * synchronously or from an nfsiod.
+ */
+int
+nfs_doio(bp, cr, p)
+ register struct buf *bp;
+ struct cred *cr;
+ struct proc *p;
+{
+ register struct uio *uiop;
+ register struct vnode *vp;
+ struct nfsnode *np;
+ struct nfsmount *nmp;
+ int error, diff, len;
+ struct uio uio;
+ struct iovec io;
+
+ vp = bp->b_vp;
+ np = VTONFS(vp);
+ nmp = VFSTONFS(vp->v_mount);
+ uiop = &uio;
+ uiop->uio_iov = &io;
+ uiop->uio_iovcnt = 1;
+ uiop->uio_segflg = UIO_SYSSPACE;
+ uiop->uio_procp = p;
+
+ /*
+ * Historically, paging was done with physio, but no more.
+ */
+ if (bp->b_flags & B_PHYS)
+ panic("doio phys");
+ if (bp->b_flags & B_READ) {
+ io.iov_len = uiop->uio_resid = bp->b_bcount;
+ io.iov_base = bp->b_data;
+ uiop->uio_rw = UIO_READ;
+ switch (vp->v_type) {
+ case VREG:
+ uiop->uio_offset = bp->b_blkno * DEV_BSIZE;
+ nfsstats.read_bios++;
+ error = nfs_readrpc(vp, uiop, cr);
+ if (!error) {
+ bp->b_validoff = 0;
+ if (uiop->uio_resid) {
+ /*
+ * If len > 0, there is a hole in the file and
+ * no writes after the hole have been pushed to
+ * the server yet.
+ * Just zero fill the rest of the valid area.
+ */
+ diff = bp->b_bcount - uiop->uio_resid;
+ len = np->n_size - (bp->b_blkno * DEV_BSIZE
+ + diff);
+ if (len > 0) {
+ len = min(len, uiop->uio_resid);
+ bzero((char *)bp->b_data + diff, len);
+ bp->b_validend = diff + len;
+ } else
+ bp->b_validend = diff;
+ } else
+ bp->b_validend = bp->b_bcount;
+ }
+ if (p && (vp->v_flag & VTEXT) &&
+ (((nmp->nm_flag & NFSMNT_NQNFS) &&
+ np->n_lrev != np->n_brev) ||
+ (!(nmp->nm_flag & NFSMNT_NQNFS) &&
+ np->n_mtime != np->n_vattr.va_mtime.ts_sec))) {
+ uprintf("Process killed due to text file modification\n");
+ psignal(p, SIGKILL);
+ p->p_flag |= P_NOSWAP;
+ }
+ break;
+ case VLNK:
+ uiop->uio_offset = 0;
+ nfsstats.readlink_bios++;
+ error = nfs_readlinkrpc(vp, uiop, cr);
+ break;
+ case VDIR:
+ uiop->uio_offset = bp->b_lblkno;
+ nfsstats.readdir_bios++;
+ if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS)
+ error = nfs_readdirlookrpc(vp, uiop, cr);
+ else
+ error = nfs_readdirrpc(vp, uiop, cr);
+ /*
+ * Save offset cookie in b_blkno.
+ */
+ bp->b_blkno = uiop->uio_offset;
+ break;
+ };
+ if (error) {
+ bp->b_flags |= B_ERROR;
+ bp->b_error = error;
+ }
+ } else {
+ io.iov_len = uiop->uio_resid = bp->b_dirtyend
+ - bp->b_dirtyoff;
+ uiop->uio_offset = (bp->b_blkno * DEV_BSIZE)
+ + bp->b_dirtyoff;
+ io.iov_base = (char *)bp->b_data + bp->b_dirtyoff;
+ uiop->uio_rw = UIO_WRITE;
+ nfsstats.write_bios++;
+ if (bp->b_flags & B_APPENDWRITE)
+ error = nfs_writerpc(vp, uiop, cr, IO_APPEND);
+ else
+ error = nfs_writerpc(vp, uiop, cr, 0);
+ bp->b_flags &= ~(B_WRITEINPROG | B_APPENDWRITE);
+
+ /*
+ * For an interrupted write, the buffer is still valid and the
+ * write hasn't been pushed to the server yet, so we can't set
+ * B_ERROR and report the interruption by setting B_EINTR. For
+ * the B_ASYNC case, B_EINTR is not relevant, so the rpc attempt
+ * is essentially a noop.
+ */
+ if (error == EINTR) {
+ bp->b_flags &= ~B_INVAL;
+ bp->b_flags |= B_DELWRI;
+
+ /*
+ * Since for the B_ASYNC case, nfs_bwrite() has reassigned the
+ * buffer to the clean list, we have to reassign it back to the
+ * dirty one. Ugh.
+ */
+ if (bp->b_flags & B_ASYNC)
+ reassignbuf(bp, vp);
+ else
+ bp->b_flags |= B_EINTR;
+ } else {
+ if (error) {
+ bp->b_flags |= B_ERROR;
+ bp->b_error = np->n_error = error;
+ np->n_flag |= NWRITEERR;
+ }
+ bp->b_dirtyoff = bp->b_dirtyend = 0;
+ }
+ }
+ bp->b_resid = uiop->uio_resid;
+ biodone(bp);
+ return (error);
+}
diff --git a/sys/nfsclient/nfs_nfsiod.c b/sys/nfsclient/nfs_nfsiod.c
new file mode 100644
index 0000000..5d86b42
--- /dev/null
+++ b/sys/nfsclient/nfs_nfsiod.c
@@ -0,0 +1,874 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_syscalls.c 8.3 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/uio.h>
+#include <sys/malloc.h>
+#include <sys/buf.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/namei.h>
+#include <sys/syslog.h>
+
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#ifdef ISO
+#include <netiso/iso.h>
+#endif
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsrvcache.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nqnfs.h>
+#include <nfs/nfsrtt.h>
+
+/* Global defs. */
+extern u_long nfs_prog, nfs_vers;
+extern int (*nfsrv_procs[NFS_NPROCS])();
+extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
+extern int nfs_numasync;
+extern time_t nqnfsstarttime;
+extern struct nfsrv_req nsrvq_head;
+extern struct nfsd nfsd_head;
+extern int nqsrv_writeslack;
+extern int nfsrtton;
+struct nfssvc_sock *nfs_udpsock, *nfs_cltpsock;
+int nuidhash_max = NFS_MAXUIDHASH;
+static int nfs_numnfsd = 0;
+int nfsd_waiting = 0;
+static int notstarted = 1;
+static int modify_flag = 0;
+static struct nfsdrt nfsdrt;
+void nfsrv_cleancache(), nfsrv_rcv(), nfsrv_wakenfsd(), nfs_sndunlock();
+static void nfsd_rt();
+void nfsrv_slpderef(), nfsrv_init();
+
+#define TRUE 1
+#define FALSE 0
+
+static int nfs_asyncdaemon[NFS_MAXASYNCDAEMON];
+/*
+ * NFS server system calls
+ * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c
+ */
+
+/*
+ * Get file handle system call
+ */
+struct getfh_args {
+ char *fname;
+ fhandle_t *fhp;
+};
+getfh(p, uap, retval)
+ struct proc *p;
+ register struct getfh_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ fhandle_t fh;
+ int error;
+ struct nameidata nd;
+
+ /*
+ * Must be super user
+ */
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ bzero((caddr_t)&fh, sizeof(fh));
+ fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
+ error = VFS_VPTOFH(vp, &fh.fh_fid);
+ vput(vp);
+ if (error)
+ return (error);
+ error = copyout((caddr_t)&fh, (caddr_t)uap->fhp, sizeof (fh));
+ return (error);
+}
+
+static struct nfssvc_sock nfssvc_sockhead;
+
+/*
+ * Nfs server psuedo system call for the nfsd's
+ * Based on the flag value it either:
+ * - adds a socket to the selection list
+ * - remains in the kernel as an nfsd
+ * - remains in the kernel as an nfsiod
+ */
+struct nfssvc_args {
+ int flag;
+ caddr_t argp;
+};
+nfssvc(p, uap, retval)
+ struct proc *p;
+ register struct nfssvc_args *uap;
+ int *retval;
+{
+ struct nameidata nd;
+ struct file *fp;
+ struct mbuf *nam;
+ struct nfsd_args nfsdarg;
+ struct nfsd_srvargs nfsd_srvargs, *nsd = &nfsd_srvargs;
+ struct nfsd_cargs ncd;
+ struct nfsd *nfsd;
+ struct nfssvc_sock *slp;
+ struct nfsuid *nuidp, **nuh;
+ struct nfsmount *nmp;
+ int error;
+
+ /*
+ * Must be super user
+ */
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ while (nfssvc_sockhead.ns_flag & SLP_INIT) {
+ nfssvc_sockhead.ns_flag |= SLP_WANTINIT;
+ (void) tsleep((caddr_t)&nfssvc_sockhead, PSOCK, "nfsd init", 0);
+ }
+ if (uap->flag & NFSSVC_BIOD)
+ error = nfssvc_iod(p);
+ else if (uap->flag & NFSSVC_MNTD) {
+ if (error = copyin(uap->argp, (caddr_t)&ncd, sizeof (ncd)))
+ return (error);
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
+ ncd.ncd_dirp, p);
+ if (error = namei(&nd))
+ return (error);
+ if ((nd.ni_vp->v_flag & VROOT) == 0)
+ error = EINVAL;
+ nmp = VFSTONFS(nd.ni_vp->v_mount);
+ vput(nd.ni_vp);
+ if (error)
+ return (error);
+ if ((nmp->nm_flag & NFSMNT_MNTD) &&
+ (uap->flag & NFSSVC_GOTAUTH) == 0)
+ return (0);
+ nmp->nm_flag |= NFSMNT_MNTD;
+ error = nqnfs_clientd(nmp, p->p_ucred, &ncd, uap->flag,
+ uap->argp, p);
+ } else if (uap->flag & NFSSVC_ADDSOCK) {
+ if (error = copyin(uap->argp, (caddr_t)&nfsdarg,
+ sizeof(nfsdarg)))
+ return (error);
+ if (error = getsock(p->p_fd, nfsdarg.sock, &fp))
+ return (error);
+ /*
+ * Get the client address for connected sockets.
+ */
+ if (nfsdarg.name == NULL || nfsdarg.namelen == 0)
+ nam = (struct mbuf *)0;
+ else if (error = sockargs(&nam, nfsdarg.name, nfsdarg.namelen,
+ MT_SONAME))
+ return (error);
+ error = nfssvc_addsock(fp, nam);
+ } else {
+ if (error = copyin(uap->argp, (caddr_t)nsd, sizeof (*nsd)))
+ return (error);
+ if ((uap->flag & NFSSVC_AUTHIN) && (nfsd = nsd->nsd_nfsd) &&
+ (nfsd->nd_slp->ns_flag & SLP_VALID)) {
+ slp = nfsd->nd_slp;
+
+ /*
+ * First check to see if another nfsd has already
+ * added this credential.
+ */
+ nuidp = slp->ns_uidh[NUIDHASH(nsd->nsd_uid)];
+ while (nuidp) {
+ if (nuidp->nu_uid == nsd->nsd_uid)
+ break;
+ nuidp = nuidp->nu_hnext;
+ }
+ if (!nuidp) {
+ /*
+ * Nope, so we will.
+ */
+ if (slp->ns_numuids < nuidhash_max) {
+ slp->ns_numuids++;
+ nuidp = (struct nfsuid *)
+ malloc(sizeof (struct nfsuid), M_NFSUID,
+ M_WAITOK);
+ } else
+ nuidp = (struct nfsuid *)0;
+ if ((slp->ns_flag & SLP_VALID) == 0) {
+ if (nuidp)
+ free((caddr_t)nuidp, M_NFSUID);
+ } else {
+ if (nuidp == (struct nfsuid *)0) {
+ nuidp = slp->ns_lruprev;
+ remque(nuidp);
+ if (nuidp->nu_hprev)
+ nuidp->nu_hprev->nu_hnext =
+ nuidp->nu_hnext;
+ if (nuidp->nu_hnext)
+ nuidp->nu_hnext->nu_hprev =
+ nuidp->nu_hprev;
+ }
+ nuidp->nu_cr = nsd->nsd_cr;
+ if (nuidp->nu_cr.cr_ngroups > NGROUPS)
+ nuidp->nu_cr.cr_ngroups = NGROUPS;
+ nuidp->nu_cr.cr_ref = 1;
+ nuidp->nu_uid = nsd->nsd_uid;
+ insque(nuidp, (struct nfsuid *)slp);
+ nuh = &slp->ns_uidh[NUIDHASH(nsd->nsd_uid)];
+ if (nuidp->nu_hnext = *nuh)
+ nuidp->nu_hnext->nu_hprev = nuidp;
+ nuidp->nu_hprev = (struct nfsuid *)0;
+ *nuh = nuidp;
+ }
+ }
+ }
+ if ((uap->flag & NFSSVC_AUTHINFAIL) && (nfsd = nsd->nsd_nfsd))
+ nfsd->nd_flag |= NFSD_AUTHFAIL;
+ error = nfssvc_nfsd(nsd, uap->argp, p);
+ }
+ if (error == EINTR || error == ERESTART)
+ error = 0;
+ return (error);
+}
+
+/*
+ * Adds a socket to the list for servicing by nfsds.
+ */
+nfssvc_addsock(fp, mynam)
+ struct file *fp;
+ struct mbuf *mynam;
+{
+ register struct mbuf *m;
+ register int siz;
+ register struct nfssvc_sock *slp;
+ register struct socket *so;
+ struct nfssvc_sock *tslp;
+ int error, s;
+
+ so = (struct socket *)fp->f_data;
+ tslp = (struct nfssvc_sock *)0;
+ /*
+ * Add it to the list, as required.
+ */
+ if (so->so_proto->pr_protocol == IPPROTO_UDP) {
+ tslp = nfs_udpsock;
+ if (tslp->ns_flag & SLP_VALID) {
+ m_freem(mynam);
+ return (EPERM);
+ }
+#ifdef ISO
+ } else if (so->so_proto->pr_protocol == ISOPROTO_CLTP) {
+ tslp = nfs_cltpsock;
+ if (tslp->ns_flag & SLP_VALID) {
+ m_freem(mynam);
+ return (EPERM);
+ }
+#endif /* ISO */
+ }
+ if (so->so_type == SOCK_STREAM)
+ siz = NFS_MAXPACKET + sizeof (u_long);
+ else
+ siz = NFS_MAXPACKET;
+ if (error = soreserve(so, siz, siz)) {
+ m_freem(mynam);
+ return (error);
+ }
+
+ /*
+ * Set protocol specific options { for now TCP only } and
+ * reserve some space. For datagram sockets, this can get called
+ * repeatedly for the same socket, but that isn't harmful.
+ */
+ if (so->so_type == SOCK_STREAM) {
+ MGET(m, M_WAIT, MT_SOOPTS);
+ *mtod(m, int *) = 1;
+ m->m_len = sizeof(int);
+ sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
+ }
+ if (so->so_proto->pr_domain->dom_family == AF_INET &&
+ so->so_proto->pr_protocol == IPPROTO_TCP) {
+ MGET(m, M_WAIT, MT_SOOPTS);
+ *mtod(m, int *) = 1;
+ m->m_len = sizeof(int);
+ sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
+ }
+ so->so_rcv.sb_flags &= ~SB_NOINTR;
+ so->so_rcv.sb_timeo = 0;
+ so->so_snd.sb_flags &= ~SB_NOINTR;
+ so->so_snd.sb_timeo = 0;
+ if (tslp)
+ slp = tslp;
+ else {
+ slp = (struct nfssvc_sock *)
+ malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
+ bzero((caddr_t)slp, sizeof (struct nfssvc_sock));
+ slp->ns_prev = nfssvc_sockhead.ns_prev;
+ slp->ns_prev->ns_next = slp;
+ slp->ns_next = &nfssvc_sockhead;
+ nfssvc_sockhead.ns_prev = slp;
+ slp->ns_lrunext = slp->ns_lruprev = (struct nfsuid *)slp;
+ }
+ slp->ns_so = so;
+ slp->ns_nam = mynam;
+ fp->f_count++;
+ slp->ns_fp = fp;
+ s = splnet();
+ so->so_upcallarg = (caddr_t)slp;
+ so->so_upcall = nfsrv_rcv;
+ slp->ns_flag = (SLP_VALID | SLP_NEEDQ);
+ nfsrv_wakenfsd(slp);
+ splx(s);
+ return (0);
+}
+
+/*
+ * Called by nfssvc() for nfsds. Just loops around servicing rpc requests
+ * until it is killed by a signal.
+ */
+nfssvc_nfsd(nsd, argp, p)
+ struct nfsd_srvargs *nsd;
+ caddr_t argp;
+ struct proc *p;
+{
+ register struct mbuf *m, *nam2;
+ register int siz;
+ register struct nfssvc_sock *slp;
+ register struct socket *so;
+ register int *solockp;
+ struct nfsd *nd = nsd->nsd_nfsd;
+ struct mbuf *mreq, *nam;
+ struct timeval starttime;
+ struct nfsuid *uidp;
+ int error, cacherep, s;
+ int sotype;
+
+ s = splnet();
+ if (nd == (struct nfsd *)0) {
+ nsd->nsd_nfsd = nd = (struct nfsd *)
+ malloc(sizeof (struct nfsd), M_NFSD, M_WAITOK);
+ bzero((caddr_t)nd, sizeof (struct nfsd));
+ nd->nd_procp = p;
+ nd->nd_cr.cr_ref = 1;
+ insque(nd, &nfsd_head);
+ nd->nd_nqlflag = NQL_NOVAL;
+ nfs_numnfsd++;
+ }
+ /*
+ * Loop getting rpc requests until SIGKILL.
+ */
+ for (;;) {
+ if ((nd->nd_flag & NFSD_REQINPROG) == 0) {
+ while (nd->nd_slp == (struct nfssvc_sock *)0 &&
+ (nfsd_head.nd_flag & NFSD_CHECKSLP) == 0) {
+ nd->nd_flag |= NFSD_WAITING;
+ nfsd_waiting++;
+ error = tsleep((caddr_t)nd, PSOCK | PCATCH, "nfsd", 0);
+ nfsd_waiting--;
+ if (error)
+ goto done;
+ }
+ if (nd->nd_slp == (struct nfssvc_sock *)0 &&
+ (nfsd_head.nd_flag & NFSD_CHECKSLP)) {
+ slp = nfssvc_sockhead.ns_next;
+ while (slp != &nfssvc_sockhead) {
+ if ((slp->ns_flag & (SLP_VALID | SLP_DOREC))
+ == (SLP_VALID | SLP_DOREC)) {
+ slp->ns_flag &= ~SLP_DOREC;
+ slp->ns_sref++;
+ nd->nd_slp = slp;
+ break;
+ }
+ slp = slp->ns_next;
+ }
+ if (slp == &nfssvc_sockhead)
+ nfsd_head.nd_flag &= ~NFSD_CHECKSLP;
+ }
+ if ((slp = nd->nd_slp) == (struct nfssvc_sock *)0)
+ continue;
+ if (slp->ns_flag & SLP_VALID) {
+ if (slp->ns_flag & SLP_DISCONN)
+ nfsrv_zapsock(slp);
+ else if (slp->ns_flag & SLP_NEEDQ) {
+ slp->ns_flag &= ~SLP_NEEDQ;
+ (void) nfs_sndlock(&slp->ns_solock,
+ (struct nfsreq *)0);
+ nfsrv_rcv(slp->ns_so, (caddr_t)slp,
+ M_WAIT);
+ nfs_sndunlock(&slp->ns_solock);
+ }
+ error = nfsrv_dorec(slp, nd);
+ nd->nd_flag |= NFSD_REQINPROG;
+ }
+ } else {
+ error = 0;
+ slp = nd->nd_slp;
+ }
+ if (error || (slp->ns_flag & SLP_VALID) == 0) {
+ nd->nd_slp = (struct nfssvc_sock *)0;
+ nd->nd_flag &= ~NFSD_REQINPROG;
+ nfsrv_slpderef(slp);
+ continue;
+ }
+ splx(s);
+ so = slp->ns_so;
+ sotype = so->so_type;
+ starttime = time;
+ if (so->so_proto->pr_flags & PR_CONNREQUIRED)
+ solockp = &slp->ns_solock;
+ else
+ solockp = (int *)0;
+ /*
+ * nam == nam2 for connectionless protocols such as UDP
+ * nam2 == NULL for connection based protocols to disable
+ * recent request caching.
+ */
+ if (nam2 = nd->nd_nam) {
+ nam = nam2;
+ cacherep = RC_CHECKIT;
+ } else {
+ nam = slp->ns_nam;
+ cacherep = RC_DOIT;
+ }
+
+ /*
+ * Check to see if authorization is needed.
+ */
+ if (nd->nd_flag & NFSD_NEEDAUTH) {
+ static int logauth = 0;
+
+ nd->nd_flag &= ~NFSD_NEEDAUTH;
+ /*
+ * Check for a mapping already installed.
+ */
+ uidp = slp->ns_uidh[NUIDHASH(nd->nd_cr.cr_uid)];
+ while (uidp) {
+ if (uidp->nu_uid == nd->nd_cr.cr_uid)
+ break;
+ uidp = uidp->nu_hnext;
+ }
+ if (!uidp) {
+ nsd->nsd_uid = nd->nd_cr.cr_uid;
+ if (nam2 && logauth++ == 0)
+ log(LOG_WARNING, "Kerberized NFS using UDP\n");
+ nsd->nsd_haddr =
+ mtod(nam, struct sockaddr_in *)->sin_addr.s_addr;
+ nsd->nsd_authlen = nd->nd_authlen;
+ if (copyout(nd->nd_authstr, nsd->nsd_authstr,
+ nd->nd_authlen) == 0 &&
+ copyout((caddr_t)nsd, argp, sizeof (*nsd)) == 0)
+ return (ENEEDAUTH);
+ cacherep = RC_DROPIT;
+ }
+ }
+ if (cacherep == RC_CHECKIT)
+ cacherep = nfsrv_getcache(nam2, nd, &mreq);
+
+ /*
+ * Check for just starting up for NQNFS and send
+ * fake "try again later" replies to the NQNFS clients.
+ */
+ if (notstarted && nqnfsstarttime <= time.tv_sec) {
+ if (modify_flag) {
+ nqnfsstarttime = time.tv_sec + nqsrv_writeslack;
+ modify_flag = 0;
+ } else
+ notstarted = 0;
+ }
+ if (notstarted) {
+ if (nd->nd_nqlflag == NQL_NOVAL)
+ cacherep = RC_DROPIT;
+ else if (nd->nd_procnum != NFSPROC_WRITE) {
+ nd->nd_procnum = NFSPROC_NOOP;
+ nd->nd_repstat = NQNFS_TRYLATER;
+ cacherep = RC_DOIT;
+ } else
+ modify_flag = 1;
+ } else if (nd->nd_flag & NFSD_AUTHFAIL) {
+ nd->nd_flag &= ~NFSD_AUTHFAIL;
+ nd->nd_procnum = NFSPROC_NOOP;
+ nd->nd_repstat = NQNFS_AUTHERR;
+ cacherep = RC_DOIT;
+ }
+
+ switch (cacherep) {
+ case RC_DOIT:
+ error = (*(nfsrv_procs[nd->nd_procnum]))(nd,
+ nd->nd_mrep, nd->nd_md, nd->nd_dpos, &nd->nd_cr,
+ nam, &mreq);
+ if (nd->nd_cr.cr_ref != 1) {
+ printf("nfssvc cref=%d\n", nd->nd_cr.cr_ref);
+ panic("nfssvc cref");
+ }
+ if (error) {
+ if (nd->nd_procnum != NQNFSPROC_VACATED)
+ nfsstats.srv_errs++;
+ if (nam2) {
+ nfsrv_updatecache(nam2, nd, FALSE, mreq);
+ m_freem(nam2);
+ }
+ break;
+ }
+ nfsstats.srvrpccnt[nd->nd_procnum]++;
+ if (nam2)
+ nfsrv_updatecache(nam2, nd, TRUE, mreq);
+ nd->nd_mrep = (struct mbuf *)0;
+ case RC_REPLY:
+ m = mreq;
+ siz = 0;
+ while (m) {
+ siz += m->m_len;
+ m = m->m_next;
+ }
+ if (siz <= 0 || siz > NFS_MAXPACKET) {
+ printf("mbuf siz=%d\n",siz);
+ panic("Bad nfs svc reply");
+ }
+ m = mreq;
+ m->m_pkthdr.len = siz;
+ m->m_pkthdr.rcvif = (struct ifnet *)0;
+ /*
+ * For stream protocols, prepend a Sun RPC
+ * Record Mark.
+ */
+ if (sotype == SOCK_STREAM) {
+ M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
+ *mtod(m, u_long *) = htonl(0x80000000 | siz);
+ }
+ if (solockp)
+ (void) nfs_sndlock(solockp, (struct nfsreq *)0);
+ if (slp->ns_flag & SLP_VALID)
+ error = nfs_send(so, nam2, m, (struct nfsreq *)0);
+ else {
+ error = EPIPE;
+ m_freem(m);
+ }
+ if (nfsrtton)
+ nfsd_rt(&starttime, sotype, nd, nam, cacherep);
+ if (nam2)
+ MFREE(nam2, m);
+ if (nd->nd_mrep)
+ m_freem(nd->nd_mrep);
+ if (error == EPIPE)
+ nfsrv_zapsock(slp);
+ if (solockp)
+ nfs_sndunlock(solockp);
+ if (error == EINTR || error == ERESTART) {
+ nfsrv_slpderef(slp);
+ s = splnet();
+ goto done;
+ }
+ break;
+ case RC_DROPIT:
+ if (nfsrtton)
+ nfsd_rt(&starttime, sotype, nd, nam, cacherep);
+ m_freem(nd->nd_mrep);
+ m_freem(nam2);
+ break;
+ };
+ s = splnet();
+ if (nfsrv_dorec(slp, nd)) {
+ nd->nd_flag &= ~NFSD_REQINPROG;
+ nd->nd_slp = (struct nfssvc_sock *)0;
+ nfsrv_slpderef(slp);
+ }
+ }
+done:
+ remque(nd);
+ splx(s);
+ free((caddr_t)nd, M_NFSD);
+ nsd->nsd_nfsd = (struct nfsd *)0;
+ if (--nfs_numnfsd == 0)
+ nfsrv_init(TRUE); /* Reinitialize everything */
+ return (error);
+}
+
+/*
+ * Asynchronous I/O daemons for client nfs.
+ * They do read-ahead and write-behind operations on the block I/O cache.
+ * Never returns unless it fails or gets killed.
+ */
+nfssvc_iod(p)
+ struct proc *p;
+{
+ register struct buf *bp;
+ register int i, myiod;
+ int error = 0;
+
+ /*
+ * Assign my position or return error if too many already running
+ */
+ myiod = -1;
+ for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
+ if (nfs_asyncdaemon[i] == 0) {
+ nfs_asyncdaemon[i]++;
+ myiod = i;
+ break;
+ }
+ if (myiod == -1)
+ return (EBUSY);
+ nfs_numasync++;
+ /*
+ * Just loop around doin our stuff until SIGKILL
+ */
+ for (;;) {
+ while (nfs_bufq.tqh_first == NULL && error == 0) {
+ nfs_iodwant[myiod] = p;
+ error = tsleep((caddr_t)&nfs_iodwant[myiod],
+ PWAIT | PCATCH, "nfsidl", 0);
+ }
+ while ((bp = nfs_bufq.tqh_first) != NULL) {
+ /* Take one off the front of the list */
+ TAILQ_REMOVE(&nfs_bufq, bp, b_freelist);
+ if (bp->b_flags & B_READ)
+ (void) nfs_doio(bp, bp->b_rcred, (struct proc *)0);
+ else
+ (void) nfs_doio(bp, bp->b_wcred, (struct proc *)0);
+ }
+ if (error) {
+ nfs_asyncdaemon[myiod] = 0;
+ nfs_numasync--;
+ return (error);
+ }
+ }
+}
+
+/*
+ * Shut down a socket associated with an nfssvc_sock structure.
+ * Should be called with the send lock set, if required.
+ * The trick here is to increment the sref at the start, so that the nfsds
+ * will stop using it and clear ns_flag at the end so that it will not be
+ * reassigned during cleanup.
+ */
+nfsrv_zapsock(slp)
+ register struct nfssvc_sock *slp;
+{
+ register struct nfsuid *nuidp, *onuidp;
+ register int i;
+ struct socket *so;
+ struct file *fp;
+ struct mbuf *m;
+
+ slp->ns_flag &= ~SLP_ALLFLAGS;
+ if (fp = slp->ns_fp) {
+ slp->ns_fp = (struct file *)0;
+ so = slp->ns_so;
+ so->so_upcall = NULL;
+ soshutdown(so, 2);
+ closef(fp, (struct proc *)0);
+ if (slp->ns_nam)
+ MFREE(slp->ns_nam, m);
+ m_freem(slp->ns_raw);
+ m_freem(slp->ns_rec);
+ nuidp = slp->ns_lrunext;
+ while (nuidp != (struct nfsuid *)slp) {
+ onuidp = nuidp;
+ nuidp = nuidp->nu_lrunext;
+ free((caddr_t)onuidp, M_NFSUID);
+ }
+ slp->ns_lrunext = slp->ns_lruprev = (struct nfsuid *)slp;
+ for (i = 0; i < NUIDHASHSIZ; i++)
+ slp->ns_uidh[i] = (struct nfsuid *)0;
+ }
+}
+
+/*
+ * Get an authorization string for the uid by having the mount_nfs sitting
+ * on this mount point porpous out of the kernel and do it.
+ */
+nfs_getauth(nmp, rep, cred, auth_type, auth_str, auth_len)
+ register struct nfsmount *nmp;
+ struct nfsreq *rep;
+ struct ucred *cred;
+ int *auth_type;
+ char **auth_str;
+ int *auth_len;
+{
+ int error = 0;
+
+ while ((nmp->nm_flag & NFSMNT_WAITAUTH) == 0) {
+ nmp->nm_flag |= NFSMNT_WANTAUTH;
+ (void) tsleep((caddr_t)&nmp->nm_authtype, PSOCK,
+ "nfsauth1", 2 * hz);
+ if (error = nfs_sigintr(nmp, rep, rep->r_procp)) {
+ nmp->nm_flag &= ~NFSMNT_WANTAUTH;
+ return (error);
+ }
+ }
+ nmp->nm_flag &= ~(NFSMNT_WAITAUTH | NFSMNT_WANTAUTH);
+ nmp->nm_authstr = *auth_str = (char *)malloc(RPCAUTH_MAXSIZ, M_TEMP, M_WAITOK);
+ nmp->nm_authuid = cred->cr_uid;
+ wakeup((caddr_t)&nmp->nm_authstr);
+
+ /*
+ * And wait for mount_nfs to do its stuff.
+ */
+ while ((nmp->nm_flag & NFSMNT_HASAUTH) == 0 && error == 0) {
+ (void) tsleep((caddr_t)&nmp->nm_authlen, PSOCK,
+ "nfsauth2", 2 * hz);
+ error = nfs_sigintr(nmp, rep, rep->r_procp);
+ }
+ if (nmp->nm_flag & NFSMNT_AUTHERR) {
+ nmp->nm_flag &= ~NFSMNT_AUTHERR;
+ error = EAUTH;
+ }
+ if (error)
+ free((caddr_t)*auth_str, M_TEMP);
+ else {
+ *auth_type = nmp->nm_authtype;
+ *auth_len = nmp->nm_authlen;
+ }
+ nmp->nm_flag &= ~NFSMNT_HASAUTH;
+ nmp->nm_flag |= NFSMNT_WAITAUTH;
+ if (nmp->nm_flag & NFSMNT_WANTAUTH) {
+ nmp->nm_flag &= ~NFSMNT_WANTAUTH;
+ wakeup((caddr_t)&nmp->nm_authtype);
+ }
+ return (error);
+}
+
+/*
+ * Derefence a server socket structure. If it has no more references and
+ * is no longer valid, you can throw it away.
+ */
+void
+nfsrv_slpderef(slp)
+ register struct nfssvc_sock *slp;
+{
+ if (--(slp->ns_sref) == 0 && (slp->ns_flag & SLP_VALID) == 0) {
+ slp->ns_prev->ns_next = slp->ns_next;
+ slp->ns_next->ns_prev = slp->ns_prev;
+ free((caddr_t)slp, M_NFSSVC);
+ }
+}
+
+/*
+ * Initialize the data structures for the server.
+ * Handshake with any new nfsds starting up to avoid any chance of
+ * corruption.
+ */
+void
+nfsrv_init(terminating)
+ int terminating;
+{
+ register struct nfssvc_sock *slp;
+ struct nfssvc_sock *oslp;
+
+ if (nfssvc_sockhead.ns_flag & SLP_INIT)
+ panic("nfsd init");
+ nfssvc_sockhead.ns_flag |= SLP_INIT;
+ if (terminating) {
+ slp = nfssvc_sockhead.ns_next;
+ while (slp != &nfssvc_sockhead) {
+ if (slp->ns_flag & SLP_VALID)
+ nfsrv_zapsock(slp);
+ slp->ns_next->ns_prev = slp->ns_prev;
+ slp->ns_prev->ns_next = slp->ns_next;
+ oslp = slp;
+ slp = slp->ns_next;
+ free((caddr_t)oslp, M_NFSSVC);
+ }
+ nfsrv_cleancache(); /* And clear out server cache */
+ }
+ nfs_udpsock = (struct nfssvc_sock *)
+ malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
+ bzero((caddr_t)nfs_udpsock, sizeof (struct nfssvc_sock));
+ nfs_cltpsock = (struct nfssvc_sock *)
+ malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
+ bzero((caddr_t)nfs_cltpsock, sizeof (struct nfssvc_sock));
+ nfssvc_sockhead.ns_next = nfs_udpsock;
+ nfs_udpsock->ns_next = nfs_cltpsock;
+ nfs_cltpsock->ns_next = &nfssvc_sockhead;
+ nfssvc_sockhead.ns_prev = nfs_cltpsock;
+ nfs_cltpsock->ns_prev = nfs_udpsock;
+ nfs_udpsock->ns_prev = &nfssvc_sockhead;
+ nfs_udpsock->ns_lrunext = nfs_udpsock->ns_lruprev =
+ (struct nfsuid *)nfs_udpsock;
+ nfs_cltpsock->ns_lrunext = nfs_cltpsock->ns_lruprev =
+ (struct nfsuid *)nfs_cltpsock;
+ nfsd_head.nd_next = nfsd_head.nd_prev = &nfsd_head;
+ nfsd_head.nd_flag = 0;
+ nfssvc_sockhead.ns_flag &= ~SLP_INIT;
+ if (nfssvc_sockhead.ns_flag & SLP_WANTINIT) {
+ nfssvc_sockhead.ns_flag &= ~SLP_WANTINIT;
+ wakeup((caddr_t)&nfssvc_sockhead);
+ }
+}
+
+/*
+ * Add entries to the server monitor log.
+ */
+static void
+nfsd_rt(startp, sotype, nd, nam, cacherep)
+ struct timeval *startp;
+ int sotype;
+ register struct nfsd *nd;
+ struct mbuf *nam;
+ int cacherep;
+{
+ register struct drt *rt;
+
+ rt = &nfsdrt.drt[nfsdrt.pos];
+ if (cacherep == RC_DOIT)
+ rt->flag = 0;
+ else if (cacherep == RC_REPLY)
+ rt->flag = DRT_CACHEREPLY;
+ else
+ rt->flag = DRT_CACHEDROP;
+ if (sotype == SOCK_STREAM)
+ rt->flag |= DRT_TCP;
+ if (nd->nd_nqlflag != NQL_NOVAL)
+ rt->flag |= DRT_NQNFS;
+ rt->proc = nd->nd_procnum;
+ if (mtod(nam, struct sockaddr *)->sa_family == AF_INET)
+ rt->ipadr = mtod(nam, struct sockaddr_in *)->sin_addr.s_addr;
+ else
+ rt->ipadr = INADDR_ANY;
+ rt->resptime = ((time.tv_sec - startp->tv_sec) * 1000000) +
+ (time.tv_usec - startp->tv_usec);
+ rt->tstamp = time;
+ nfsdrt.pos = (nfsdrt.pos + 1) % NFSRTTLOGSIZ;
+}
diff --git a/sys/nfsclient/nfs_node.c b/sys/nfsclient/nfs_node.c
new file mode 100644
index 0000000..032bdef
--- /dev/null
+++ b/sys/nfsclient/nfs_node.c
@@ -0,0 +1,294 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_node.c 8.2 (Berkeley) 12/30/93
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nqnfs.h>
+
+struct nfsnode **nheadhashtbl;
+u_long nheadhash;
+#define NFSNOHASH(fhsum) ((fhsum)&nheadhash)
+
+#define TRUE 1
+#define FALSE 0
+
+/*
+ * Initialize hash links for nfsnodes
+ * and build nfsnode free list.
+ */
+nfs_nhinit()
+{
+
+#ifndef lint
+ if ((sizeof(struct nfsnode) - 1) & sizeof(struct nfsnode))
+ printf("nfs_nhinit: bad size %d\n", sizeof(struct nfsnode));
+#endif /* not lint */
+ nheadhashtbl = hashinit(desiredvnodes, M_NFSNODE, &nheadhash);
+}
+
+/*
+ * Compute an entry in the NFS hash table structure
+ */
+struct nfsnode **
+nfs_hash(fhp)
+ register nfsv2fh_t *fhp;
+{
+ register u_char *fhpp;
+ register u_long fhsum;
+ int i;
+
+ fhpp = &fhp->fh_bytes[0];
+ fhsum = 0;
+ for (i = 0; i < NFSX_FH; i++)
+ fhsum += *fhpp++;
+ return (&nheadhashtbl[NFSNOHASH(fhsum)]);
+}
+
+/*
+ * Look up a vnode/nfsnode by file handle.
+ * Callers must check for mount points!!
+ * In all cases, a pointer to a
+ * nfsnode structure is returned.
+ */
+nfs_nget(mntp, fhp, npp)
+ struct mount *mntp;
+ register nfsv2fh_t *fhp;
+ struct nfsnode **npp;
+{
+ register struct nfsnode *np, *nq, **nhpp;
+ register struct vnode *vp;
+ extern int (**nfsv2_vnodeop_p)();
+ struct vnode *nvp;
+ int error;
+
+ nhpp = nfs_hash(fhp);
+loop:
+ for (np = *nhpp; np; np = np->n_forw) {
+ if (mntp != NFSTOV(np)->v_mount ||
+ bcmp((caddr_t)fhp, (caddr_t)&np->n_fh, NFSX_FH))
+ continue;
+ vp = NFSTOV(np);
+ if (vget(vp, 1))
+ goto loop;
+ *npp = np;
+ return(0);
+ }
+ if (error = getnewvnode(VT_NFS, mntp, nfsv2_vnodeop_p, &nvp)) {
+ *npp = 0;
+ return (error);
+ }
+ vp = nvp;
+ MALLOC(np, struct nfsnode *, sizeof *np, M_NFSNODE, M_WAITOK);
+ vp->v_data = np;
+ np->n_vnode = vp;
+ /*
+ * Insert the nfsnode in the hash queue for its new file handle
+ */
+ np->n_flag = 0;
+ if (nq = *nhpp)
+ nq->n_back = &np->n_forw;
+ np->n_forw = nq;
+ np->n_back = nhpp;
+ *nhpp = np;
+ bcopy((caddr_t)fhp, (caddr_t)&np->n_fh, NFSX_FH);
+ np->n_attrstamp = 0;
+ np->n_direofoffset = 0;
+ np->n_sillyrename = (struct sillyrename *)0;
+ np->n_size = 0;
+ np->n_mtime = 0;
+ if (VFSTONFS(mntp)->nm_flag & NFSMNT_NQNFS) {
+ np->n_brev = 0;
+ np->n_lrev = 0;
+ np->n_expiry = (time_t)0;
+ np->n_tnext = (struct nfsnode *)0;
+ }
+ *npp = np;
+ return (0);
+}
+
+nfs_inactive(ap)
+ struct vop_inactive_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ register struct nfsnode *np;
+ register struct sillyrename *sp;
+ struct proc *p = curproc; /* XXX */
+ extern int prtactive;
+
+ np = VTONFS(ap->a_vp);
+ if (prtactive && ap->a_vp->v_usecount != 0)
+ vprint("nfs_inactive: pushing active", ap->a_vp);
+ sp = np->n_sillyrename;
+ np->n_sillyrename = (struct sillyrename *)0;
+ if (sp) {
+ /*
+ * Remove the silly file that was rename'd earlier
+ */
+ (void) nfs_vinvalbuf(ap->a_vp, 0, sp->s_cred, p, 1);
+ nfs_removeit(sp);
+ crfree(sp->s_cred);
+ vrele(sp->s_dvp);
+#ifdef SILLYSEPARATE
+ free((caddr_t)sp, M_NFSREQ);
+#endif
+ }
+ np->n_flag &= (NMODIFIED | NFLUSHINPROG | NFLUSHWANT | NQNFSEVICTED |
+ NQNFSNONCACHE | NQNFSWRITE);
+ return (0);
+}
+
+/*
+ * Reclaim an nfsnode so that it can be used for other purposes.
+ */
+nfs_reclaim(ap)
+ struct vop_reclaim_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct nfsnode *np = VTONFS(vp);
+ register struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+ register struct nfsnode *nq;
+ extern int prtactive;
+
+ if (prtactive && vp->v_usecount != 0)
+ vprint("nfs_reclaim: pushing active", vp);
+ /*
+ * Remove the nfsnode from its hash chain.
+ */
+ if (nq = np->n_forw)
+ nq->n_back = np->n_back;
+ *np->n_back = nq;
+
+ /*
+ * For nqnfs, take it off the timer queue as required.
+ */
+ if ((nmp->nm_flag & NFSMNT_NQNFS) && np->n_tnext) {
+ if (np->n_tnext == (struct nfsnode *)nmp)
+ nmp->nm_tprev = np->n_tprev;
+ else
+ np->n_tnext->n_tprev = np->n_tprev;
+ if (np->n_tprev == (struct nfsnode *)nmp)
+ nmp->nm_tnext = np->n_tnext;
+ else
+ np->n_tprev->n_tnext = np->n_tnext;
+ }
+ cache_purge(vp);
+ FREE(vp->v_data, M_NFSNODE);
+ vp->v_data = (void *)0;
+ return (0);
+}
+
+/*
+ * Lock an nfsnode
+ */
+nfs_lock(ap)
+ struct vop_lock_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+
+ /*
+ * Ugh, another place where interruptible mounts will get hung.
+ * If you make this sleep interruptible, then you have to fix all
+ * the VOP_LOCK() calls to expect interruptibility.
+ */
+ while (vp->v_flag & VXLOCK) {
+ vp->v_flag |= VXWANT;
+ sleep((caddr_t)vp, PINOD);
+ }
+ if (vp->v_tag == VT_NON)
+ return (ENOENT);
+ return (0);
+}
+
+/*
+ * Unlock an nfsnode
+ */
+nfs_unlock(ap)
+ struct vop_unlock_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ return (0);
+}
+
+/*
+ * Check for a locked nfsnode
+ */
+nfs_islocked(ap)
+ struct vop_islocked_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+
+ return (0);
+}
+
+/*
+ * Nfs abort op, called after namei() when a CREATE/DELETE isn't actually
+ * done. Currently nothing to do.
+ */
+/* ARGSUSED */
+int
+nfs_abortop(ap)
+ struct vop_abortop_args /* {
+ struct vnode *a_dvp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+
+ if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF)
+ FREE(ap->a_cnp->cn_pnbuf, M_NAMEI);
+ return (0);
+}
diff --git a/sys/nfsclient/nfs_socket.c b/sys/nfsclient/nfs_socket.c
new file mode 100644
index 0000000..cf88ed3
--- /dev/null
+++ b/sys/nfsclient/nfs_socket.c
@@ -0,0 +1,1990 @@
+/*
+ * Copyright (c) 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_socket.c 8.3 (Berkeley) 1/12/94
+ */
+
+/*
+ * Socket operations for use by nfs
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+#include <sys/mbuf.h>
+#include <sys/vnode.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/syslog.h>
+#include <sys/tprintf.h>
+
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfsrtt.h>
+#include <nfs/nqnfs.h>
+
+#define TRUE 1
+#define FALSE 0
+
+/*
+ * Estimate rto for an nfs rpc sent via. an unreliable datagram.
+ * Use the mean and mean deviation of rtt for the appropriate type of rpc
+ * for the frequent rpcs and a default for the others.
+ * The justification for doing "other" this way is that these rpcs
+ * happen so infrequently that timer est. would probably be stale.
+ * Also, since many of these rpcs are
+ * non-idempotent, a conservative timeout is desired.
+ * getattr, lookup - A+2D
+ * read, write - A+4D
+ * other - nm_timeo
+ */
+#define NFS_RTO(n, t) \
+ ((t) == 0 ? (n)->nm_timeo : \
+ ((t) < 3 ? \
+ (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
+ ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
+#define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
+#define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
+/*
+ * External data, mostly RPC constants in XDR form
+ */
+extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix,
+ rpc_msgaccepted, rpc_call, rpc_autherr, rpc_rejectedcred,
+ rpc_auth_kerb;
+extern u_long nfs_prog, nfs_vers, nqnfs_prog, nqnfs_vers;
+extern time_t nqnfsstarttime;
+extern int nonidempotent[NFS_NPROCS];
+
+/*
+ * Maps errno values to nfs error numbers.
+ * Use NFSERR_IO as the catch all for ones not specifically defined in
+ * RFC 1094.
+ */
+static int nfsrv_errmap[ELAST] = {
+ NFSERR_PERM, NFSERR_NOENT, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_NXIO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_EXIST, NFSERR_IO, NFSERR_NODEV, NFSERR_NOTDIR,
+ NFSERR_ISDIR, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_FBIG, NFSERR_NOSPC, NFSERR_IO, NFSERR_ROFS,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_NAMETOL, NFSERR_IO, NFSERR_IO,
+ NFSERR_NOTEMPTY, NFSERR_IO, NFSERR_IO, NFSERR_DQUOT, NFSERR_STALE,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO,
+};
+
+/*
+ * Defines which timer to use for the procnum.
+ * 0 - default
+ * 1 - getattr
+ * 2 - lookup
+ * 3 - read
+ * 4 - write
+ */
+static int proct[NFS_NPROCS] = {
+ 0, 1, 0, 0, 2, 3, 3, 0, 4, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, 0, 0, 0,
+};
+
+/*
+ * There is a congestion window for outstanding rpcs maintained per mount
+ * point. The cwnd size is adjusted in roughly the way that:
+ * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
+ * SIGCOMM '88". ACM, August 1988.
+ * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
+ * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
+ * of rpcs is in progress.
+ * (The sent count and cwnd are scaled for integer arith.)
+ * Variants of "slow start" were tried and were found to be too much of a
+ * performance hit (ave. rtt 3 times larger),
+ * I suspect due to the large rtt that nfs rpcs have.
+ */
+#define NFS_CWNDSCALE 256
+#define NFS_MAXCWND (NFS_CWNDSCALE * 32)
+static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
+int nfs_sbwait();
+void nfs_disconnect(), nfs_realign(), nfsrv_wakenfsd(), nfs_sndunlock();
+void nfs_rcvunlock(), nqnfs_serverd(), nqnfs_clientlease();
+struct mbuf *nfsm_rpchead();
+int nfsrtton = 0;
+struct nfsrtt nfsrtt;
+struct nfsd nfsd_head;
+
+int nfsrv_null(),
+ nfsrv_getattr(),
+ nfsrv_setattr(),
+ nfsrv_lookup(),
+ nfsrv_readlink(),
+ nfsrv_read(),
+ nfsrv_write(),
+ nfsrv_create(),
+ nfsrv_remove(),
+ nfsrv_rename(),
+ nfsrv_link(),
+ nfsrv_symlink(),
+ nfsrv_mkdir(),
+ nfsrv_rmdir(),
+ nfsrv_readdir(),
+ nfsrv_statfs(),
+ nfsrv_noop(),
+ nqnfsrv_readdirlook(),
+ nqnfsrv_getlease(),
+ nqnfsrv_vacated(),
+ nqnfsrv_access();
+
+int (*nfsrv_procs[NFS_NPROCS])() = {
+ nfsrv_null,
+ nfsrv_getattr,
+ nfsrv_setattr,
+ nfsrv_noop,
+ nfsrv_lookup,
+ nfsrv_readlink,
+ nfsrv_read,
+ nfsrv_noop,
+ nfsrv_write,
+ nfsrv_create,
+ nfsrv_remove,
+ nfsrv_rename,
+ nfsrv_link,
+ nfsrv_symlink,
+ nfsrv_mkdir,
+ nfsrv_rmdir,
+ nfsrv_readdir,
+ nfsrv_statfs,
+ nqnfsrv_readdirlook,
+ nqnfsrv_getlease,
+ nqnfsrv_vacated,
+ nfsrv_noop,
+ nqnfsrv_access,
+};
+
+struct nfsreq nfsreqh;
+
+/*
+ * Initialize sockets and congestion for a new NFS connection.
+ * We do not free the sockaddr if error.
+ */
+nfs_connect(nmp, rep)
+ register struct nfsmount *nmp;
+ struct nfsreq *rep;
+{
+ register struct socket *so;
+ int s, error, rcvreserve, sndreserve;
+ struct sockaddr *saddr;
+ struct sockaddr_in *sin;
+ struct mbuf *m;
+ u_short tport;
+
+ nmp->nm_so = (struct socket *)0;
+ saddr = mtod(nmp->nm_nam, struct sockaddr *);
+ if (error = socreate(saddr->sa_family,
+ &nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto))
+ goto bad;
+ so = nmp->nm_so;
+ nmp->nm_soflags = so->so_proto->pr_flags;
+
+ /*
+ * Some servers require that the client port be a reserved port number.
+ */
+ if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) {
+ MGET(m, M_WAIT, MT_SONAME);
+ sin = mtod(m, struct sockaddr_in *);
+ sin->sin_len = m->m_len = sizeof (struct sockaddr_in);
+ sin->sin_family = AF_INET;
+ sin->sin_addr.s_addr = INADDR_ANY;
+ tport = IPPORT_RESERVED - 1;
+ sin->sin_port = htons(tport);
+ while ((error = sobind(so, m)) == EADDRINUSE &&
+ --tport > IPPORT_RESERVED / 2)
+ sin->sin_port = htons(tport);
+ m_freem(m);
+ if (error)
+ goto bad;
+ }
+
+ /*
+ * Protocols that do not require connections may be optionally left
+ * unconnected for servers that reply from a port other than NFS_PORT.
+ */
+ if (nmp->nm_flag & NFSMNT_NOCONN) {
+ if (nmp->nm_soflags & PR_CONNREQUIRED) {
+ error = ENOTCONN;
+ goto bad;
+ }
+ } else {
+ if (error = soconnect(so, nmp->nm_nam))
+ goto bad;
+
+ /*
+ * Wait for the connection to complete. Cribbed from the
+ * connect system call but with the wait timing out so
+ * that interruptible mounts don't hang here for a long time.
+ */
+ s = splnet();
+ while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
+ (void) tsleep((caddr_t)&so->so_timeo, PSOCK,
+ "nfscon", 2 * hz);
+ if ((so->so_state & SS_ISCONNECTING) &&
+ so->so_error == 0 && rep &&
+ (error = nfs_sigintr(nmp, rep, rep->r_procp))) {
+ so->so_state &= ~SS_ISCONNECTING;
+ splx(s);
+ goto bad;
+ }
+ }
+ if (so->so_error) {
+ error = so->so_error;
+ so->so_error = 0;
+ splx(s);
+ goto bad;
+ }
+ splx(s);
+ }
+ if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) {
+ so->so_rcv.sb_timeo = (5 * hz);
+ so->so_snd.sb_timeo = (5 * hz);
+ } else {
+ so->so_rcv.sb_timeo = 0;
+ so->so_snd.sb_timeo = 0;
+ }
+ if (nmp->nm_sotype == SOCK_DGRAM) {
+ sndreserve = nmp->nm_wsize + NFS_MAXPKTHDR;
+ rcvreserve = nmp->nm_rsize + NFS_MAXPKTHDR;
+ } else if (nmp->nm_sotype == SOCK_SEQPACKET) {
+ sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2;
+ rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) * 2;
+ } else {
+ if (nmp->nm_sotype != SOCK_STREAM)
+ panic("nfscon sotype");
+ if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
+ MGET(m, M_WAIT, MT_SOOPTS);
+ *mtod(m, int *) = 1;
+ m->m_len = sizeof(int);
+ sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
+ }
+ if (so->so_proto->pr_protocol == IPPROTO_TCP) {
+ MGET(m, M_WAIT, MT_SOOPTS);
+ *mtod(m, int *) = 1;
+ m->m_len = sizeof(int);
+ sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
+ }
+ sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof (u_long))
+ * 2;
+ rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof (u_long))
+ * 2;
+ }
+ if (error = soreserve(so, sndreserve, rcvreserve))
+ goto bad;
+ so->so_rcv.sb_flags |= SB_NOINTR;
+ so->so_snd.sb_flags |= SB_NOINTR;
+
+ /* Initialize other non-zero congestion variables */
+ nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] =
+ nmp->nm_srtt[4] = (NFS_TIMEO << 3);
+ nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
+ nmp->nm_sdrtt[3] = nmp->nm_sdrtt[4] = 0;
+ nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */
+ nmp->nm_sent = 0;
+ nmp->nm_timeouts = 0;
+ return (0);
+
+bad:
+ nfs_disconnect(nmp);
+ return (error);
+}
+
+/*
+ * Reconnect routine:
+ * Called when a connection is broken on a reliable protocol.
+ * - clean up the old socket
+ * - nfs_connect() again
+ * - set R_MUSTRESEND for all outstanding requests on mount point
+ * If this fails the mount point is DEAD!
+ * nb: Must be called with the nfs_sndlock() set on the mount point.
+ */
+nfs_reconnect(rep)
+ register struct nfsreq *rep;
+{
+ register struct nfsreq *rp;
+ register struct nfsmount *nmp = rep->r_nmp;
+ int error;
+
+ nfs_disconnect(nmp);
+ while (error = nfs_connect(nmp, rep)) {
+ if (error == EINTR || error == ERESTART)
+ return (EINTR);
+ (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
+ }
+
+ /*
+ * Loop through outstanding request list and fix up all requests
+ * on old socket.
+ */
+ rp = nfsreqh.r_next;
+ while (rp != &nfsreqh) {
+ if (rp->r_nmp == nmp)
+ rp->r_flags |= R_MUSTRESEND;
+ rp = rp->r_next;
+ }
+ return (0);
+}
+
+/*
+ * NFS disconnect. Clean up and unlink.
+ */
+void
+nfs_disconnect(nmp)
+ register struct nfsmount *nmp;
+{
+ register struct socket *so;
+
+ if (nmp->nm_so) {
+ so = nmp->nm_so;
+ nmp->nm_so = (struct socket *)0;
+ soshutdown(so, 2);
+ soclose(so);
+ }
+}
+
+/*
+ * This is the nfs send routine. For connection based socket types, it
+ * must be called with an nfs_sndlock() on the socket.
+ * "rep == NULL" indicates that it has been called from a server.
+ * For the client side:
+ * - return EINTR if the RPC is terminated, 0 otherwise
+ * - set R_MUSTRESEND if the send fails for any reason
+ * - do any cleanup required by recoverable socket errors (???)
+ * For the server side:
+ * - return EINTR or ERESTART if interrupted by a signal
+ * - return EPIPE if a connection is lost for connection based sockets (TCP...)
+ * - do any cleanup required by recoverable socket errors (???)
+ */
+nfs_send(so, nam, top, rep)
+ register struct socket *so;
+ struct mbuf *nam;
+ register struct mbuf *top;
+ struct nfsreq *rep;
+{
+ struct mbuf *sendnam;
+ int error, soflags, flags;
+
+ if (rep) {
+ if (rep->r_flags & R_SOFTTERM) {
+ m_freem(top);
+ return (EINTR);
+ }
+ if ((so = rep->r_nmp->nm_so) == NULL) {
+ rep->r_flags |= R_MUSTRESEND;
+ m_freem(top);
+ return (0);
+ }
+ rep->r_flags &= ~R_MUSTRESEND;
+ soflags = rep->r_nmp->nm_soflags;
+ } else
+ soflags = so->so_proto->pr_flags;
+ if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
+ sendnam = (struct mbuf *)0;
+ else
+ sendnam = nam;
+ if (so->so_type == SOCK_SEQPACKET)
+ flags = MSG_EOR;
+ else
+ flags = 0;
+
+ error = sosend(so, sendnam, (struct uio *)0, top,
+ (struct mbuf *)0, flags);
+ if (error) {
+ if (rep) {
+ log(LOG_INFO, "nfs send error %d for server %s\n",error,
+ rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+ /*
+ * Deal with errors for the client side.
+ */
+ if (rep->r_flags & R_SOFTTERM)
+ error = EINTR;
+ else
+ rep->r_flags |= R_MUSTRESEND;
+ } else
+ log(LOG_INFO, "nfsd send error %d\n", error);
+
+ /*
+ * Handle any recoverable (soft) socket errors here. (???)
+ */
+ if (error != EINTR && error != ERESTART &&
+ error != EWOULDBLOCK && error != EPIPE)
+ error = 0;
+ }
+ return (error);
+}
+
+/*
+ * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
+ * done by soreceive(), but for SOCK_STREAM we must deal with the Record
+ * Mark and consolidate the data into a new mbuf list.
+ * nb: Sometimes TCP passes the data up to soreceive() in long lists of
+ * small mbufs.
+ * For SOCK_STREAM we must be very careful to read an entire record once
+ * we have read any of it, even if the system call has been interrupted.
+ */
+nfs_receive(rep, aname, mp)
+ register struct nfsreq *rep;
+ struct mbuf **aname;
+ struct mbuf **mp;
+{
+ register struct socket *so;
+ struct uio auio;
+ struct iovec aio;
+ register struct mbuf *m;
+ struct mbuf *control;
+ u_long len;
+ struct mbuf **getnam;
+ int error, sotype, rcvflg;
+ struct proc *p = curproc; /* XXX */
+
+ /*
+ * Set up arguments for soreceive()
+ */
+ *mp = (struct mbuf *)0;
+ *aname = (struct mbuf *)0;
+ sotype = rep->r_nmp->nm_sotype;
+
+ /*
+ * For reliable protocols, lock against other senders/receivers
+ * in case a reconnect is necessary.
+ * For SOCK_STREAM, first get the Record Mark to find out how much
+ * more there is to get.
+ * We must lock the socket against other receivers
+ * until we have an entire rpc request/reply.
+ */
+ if (sotype != SOCK_DGRAM) {
+ if (error = nfs_sndlock(&rep->r_nmp->nm_flag, rep))
+ return (error);
+tryagain:
+ /*
+ * Check for fatal errors and resending request.
+ */
+ /*
+ * Ugh: If a reconnect attempt just happened, nm_so
+ * would have changed. NULL indicates a failed
+ * attempt that has essentially shut down this
+ * mount point.
+ */
+ if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) {
+ nfs_sndunlock(&rep->r_nmp->nm_flag);
+ return (EINTR);
+ }
+ if ((so = rep->r_nmp->nm_so) == NULL) {
+ if (error = nfs_reconnect(rep)) {
+ nfs_sndunlock(&rep->r_nmp->nm_flag);
+ return (error);
+ }
+ goto tryagain;
+ }
+ while (rep->r_flags & R_MUSTRESEND) {
+ m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
+ nfsstats.rpcretries++;
+ if (error = nfs_send(so, rep->r_nmp->nm_nam, m, rep)) {
+ if (error == EINTR || error == ERESTART ||
+ (error = nfs_reconnect(rep))) {
+ nfs_sndunlock(&rep->r_nmp->nm_flag);
+ return (error);
+ }
+ goto tryagain;
+ }
+ }
+ nfs_sndunlock(&rep->r_nmp->nm_flag);
+ if (sotype == SOCK_STREAM) {
+ aio.iov_base = (caddr_t) &len;
+ aio.iov_len = sizeof(u_long);
+ auio.uio_iov = &aio;
+ auio.uio_iovcnt = 1;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_rw = UIO_READ;
+ auio.uio_offset = 0;
+ auio.uio_resid = sizeof(u_long);
+ auio.uio_procp = p;
+ do {
+ rcvflg = MSG_WAITALL;
+ error = soreceive(so, (struct mbuf **)0, &auio,
+ (struct mbuf **)0, (struct mbuf **)0, &rcvflg);
+ if (error == EWOULDBLOCK && rep) {
+ if (rep->r_flags & R_SOFTTERM)
+ return (EINTR);
+ }
+ } while (error == EWOULDBLOCK);
+ if (!error && auio.uio_resid > 0) {
+ log(LOG_INFO,
+ "short receive (%d/%d) from nfs server %s\n",
+ sizeof(u_long) - auio.uio_resid,
+ sizeof(u_long),
+ rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+ error = EPIPE;
+ }
+ if (error)
+ goto errout;
+ len = ntohl(len) & ~0x80000000;
+ /*
+ * This is SERIOUS! We are out of sync with the sender
+ * and forcing a disconnect/reconnect is all I can do.
+ */
+ if (len > NFS_MAXPACKET) {
+ log(LOG_ERR, "%s (%d) from nfs server %s\n",
+ "impossible packet length",
+ len,
+ rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+ error = EFBIG;
+ goto errout;
+ }
+ auio.uio_resid = len;
+ do {
+ rcvflg = MSG_WAITALL;
+ error = soreceive(so, (struct mbuf **)0,
+ &auio, mp, (struct mbuf **)0, &rcvflg);
+ } while (error == EWOULDBLOCK || error == EINTR ||
+ error == ERESTART);
+ if (!error && auio.uio_resid > 0) {
+ log(LOG_INFO,
+ "short receive (%d/%d) from nfs server %s\n",
+ len - auio.uio_resid, len,
+ rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+ error = EPIPE;
+ }
+ } else {
+ /*
+ * NB: Since uio_resid is big, MSG_WAITALL is ignored
+ * and soreceive() will return when it has either a
+ * control msg or a data msg.
+ * We have no use for control msg., but must grab them
+ * and then throw them away so we know what is going
+ * on.
+ */
+ auio.uio_resid = len = 100000000; /* Anything Big */
+ auio.uio_procp = p;
+ do {
+ rcvflg = 0;
+ error = soreceive(so, (struct mbuf **)0,
+ &auio, mp, &control, &rcvflg);
+ if (control)
+ m_freem(control);
+ if (error == EWOULDBLOCK && rep) {
+ if (rep->r_flags & R_SOFTTERM)
+ return (EINTR);
+ }
+ } while (error == EWOULDBLOCK ||
+ (!error && *mp == NULL && control));
+ if ((rcvflg & MSG_EOR) == 0)
+ printf("Egad!!\n");
+ if (!error && *mp == NULL)
+ error = EPIPE;
+ len -= auio.uio_resid;
+ }
+errout:
+ if (error && error != EINTR && error != ERESTART) {
+ m_freem(*mp);
+ *mp = (struct mbuf *)0;
+ if (error != EPIPE)
+ log(LOG_INFO,
+ "receive error %d from nfs server %s\n",
+ error,
+ rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+ error = nfs_sndlock(&rep->r_nmp->nm_flag, rep);
+ if (!error)
+ error = nfs_reconnect(rep);
+ if (!error)
+ goto tryagain;
+ }
+ } else {
+ if ((so = rep->r_nmp->nm_so) == NULL)
+ return (EACCES);
+ if (so->so_state & SS_ISCONNECTED)
+ getnam = (struct mbuf **)0;
+ else
+ getnam = aname;
+ auio.uio_resid = len = 1000000;
+ auio.uio_procp = p;
+ do {
+ rcvflg = 0;
+ error = soreceive(so, getnam, &auio, mp,
+ (struct mbuf **)0, &rcvflg);
+ if (error == EWOULDBLOCK &&
+ (rep->r_flags & R_SOFTTERM))
+ return (EINTR);
+ } while (error == EWOULDBLOCK);
+ len -= auio.uio_resid;
+ }
+ if (error) {
+ m_freem(*mp);
+ *mp = (struct mbuf *)0;
+ }
+ /*
+ * Search for any mbufs that are not a multiple of 4 bytes long
+ * or with m_data not longword aligned.
+ * These could cause pointer alignment problems, so copy them to
+ * well aligned mbufs.
+ */
+ nfs_realign(*mp, 5 * NFSX_UNSIGNED);
+ return (error);
+}
+
+/*
+ * Implement receipt of reply on a socket.
+ * We must search through the list of received datagrams matching them
+ * with outstanding requests using the xid, until ours is found.
+ */
+/* ARGSUSED */
+nfs_reply(myrep)
+ struct nfsreq *myrep;
+{
+ register struct nfsreq *rep;
+ register struct nfsmount *nmp = myrep->r_nmp;
+ register long t1;
+ struct mbuf *mrep, *nam, *md;
+ u_long rxid, *tl;
+ caddr_t dpos, cp2;
+ int error;
+
+ /*
+ * Loop around until we get our own reply
+ */
+ for (;;) {
+ /*
+ * Lock against other receivers so that I don't get stuck in
+ * sbwait() after someone else has received my reply for me.
+ * Also necessary for connection based protocols to avoid
+ * race conditions during a reconnect.
+ */
+ if (error = nfs_rcvlock(myrep))
+ return (error);
+ /* Already received, bye bye */
+ if (myrep->r_mrep != NULL) {
+ nfs_rcvunlock(&nmp->nm_flag);
+ return (0);
+ }
+ /*
+ * Get the next Rpc reply off the socket
+ */
+ error = nfs_receive(myrep, &nam, &mrep);
+ nfs_rcvunlock(&nmp->nm_flag);
+ if (error) {
+
+ /*
+ * Ignore routing errors on connectionless protocols??
+ */
+ if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
+ nmp->nm_so->so_error = 0;
+ if (myrep->r_flags & R_GETONEREP)
+ return (0);
+ continue;
+ }
+ return (error);
+ }
+ if (nam)
+ m_freem(nam);
+
+ /*
+ * Get the xid and check that it is an rpc reply
+ */
+ md = mrep;
+ dpos = mtod(md, caddr_t);
+ nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED);
+ rxid = *tl++;
+ if (*tl != rpc_reply) {
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ if (nqnfs_callback(nmp, mrep, md, dpos))
+ nfsstats.rpcinvalid++;
+ } else {
+ nfsstats.rpcinvalid++;
+ m_freem(mrep);
+ }
+nfsmout:
+ if (myrep->r_flags & R_GETONEREP)
+ return (0);
+ continue;
+ }
+
+ /*
+ * Loop through the request list to match up the reply
+ * Iff no match, just drop the datagram
+ */
+ rep = nfsreqh.r_next;
+ while (rep != &nfsreqh) {
+ if (rep->r_mrep == NULL && rxid == rep->r_xid) {
+ /* Found it.. */
+ rep->r_mrep = mrep;
+ rep->r_md = md;
+ rep->r_dpos = dpos;
+ if (nfsrtton) {
+ struct rttl *rt;
+
+ rt = &nfsrtt.rttl[nfsrtt.pos];
+ rt->proc = rep->r_procnum;
+ rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]);
+ rt->sent = nmp->nm_sent;
+ rt->cwnd = nmp->nm_cwnd;
+ rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1];
+ rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1];
+ rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid;
+ rt->tstamp = time;
+ if (rep->r_flags & R_TIMING)
+ rt->rtt = rep->r_rtt;
+ else
+ rt->rtt = 1000000;
+ nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ;
+ }
+ /*
+ * Update congestion window.
+ * Do the additive increase of
+ * one rpc/rtt.
+ */
+ if (nmp->nm_cwnd <= nmp->nm_sent) {
+ nmp->nm_cwnd +=
+ (NFS_CWNDSCALE * NFS_CWNDSCALE +
+ (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
+ if (nmp->nm_cwnd > NFS_MAXCWND)
+ nmp->nm_cwnd = NFS_MAXCWND;
+ }
+ rep->r_flags &= ~R_SENT;
+ nmp->nm_sent -= NFS_CWNDSCALE;
+ /*
+ * Update rtt using a gain of 0.125 on the mean
+ * and a gain of 0.25 on the deviation.
+ */
+ if (rep->r_flags & R_TIMING) {
+ /*
+ * Since the timer resolution of
+ * NFS_HZ is so course, it can often
+ * result in r_rtt == 0. Since
+ * r_rtt == N means that the actual
+ * rtt is between N+dt and N+2-dt ticks,
+ * add 1.
+ */
+ t1 = rep->r_rtt + 1;
+ t1 -= (NFS_SRTT(rep) >> 3);
+ NFS_SRTT(rep) += t1;
+ if (t1 < 0)
+ t1 = -t1;
+ t1 -= (NFS_SDRTT(rep) >> 2);
+ NFS_SDRTT(rep) += t1;
+ }
+ nmp->nm_timeouts = 0;
+ break;
+ }
+ rep = rep->r_next;
+ }
+ /*
+ * If not matched to a request, drop it.
+ * If it's mine, get out.
+ */
+ if (rep == &nfsreqh) {
+ nfsstats.rpcunexpected++;
+ m_freem(mrep);
+ } else if (rep == myrep) {
+ if (rep->r_mrep == NULL)
+ panic("nfsreply nil");
+ return (0);
+ }
+ if (myrep->r_flags & R_GETONEREP)
+ return (0);
+ }
+}
+
+/*
+ * nfs_request - goes something like this
+ * - fill in request struct
+ * - links it into list
+ * - calls nfs_send() for first transmit
+ * - calls nfs_receive() to get reply
+ * - break down rpc header and return with nfs reply pointed to
+ * by mrep or error
+ * nb: always frees up mreq mbuf list
+ */
+nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp)
+ struct vnode *vp;
+ struct mbuf *mrest;
+ int procnum;
+ struct proc *procp;
+ struct ucred *cred;
+ struct mbuf **mrp;
+ struct mbuf **mdp;
+ caddr_t *dposp;
+{
+ register struct mbuf *m, *mrep;
+ register struct nfsreq *rep;
+ register u_long *tl;
+ register int i;
+ struct nfsmount *nmp;
+ struct mbuf *md, *mheadend;
+ struct nfsreq *reph;
+ struct nfsnode *np;
+ time_t reqtime, waituntil;
+ caddr_t dpos, cp2;
+ int t1, nqlflag, cachable, s, error = 0, mrest_len, auth_len, auth_type;
+ int trylater_delay = NQ_TRYLATERDEL, trylater_cnt = 0, failed_auth = 0;
+ u_long xid;
+ u_quad_t frev;
+ char *auth_str;
+
+ nmp = VFSTONFS(vp->v_mount);
+ MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
+ rep->r_nmp = nmp;
+ rep->r_vp = vp;
+ rep->r_procp = procp;
+ rep->r_procnum = procnum;
+ i = 0;
+ m = mrest;
+ while (m) {
+ i += m->m_len;
+ m = m->m_next;
+ }
+ mrest_len = i;
+
+ /*
+ * Get the RPC header with authorization.
+ */
+kerbauth:
+ auth_str = (char *)0;
+ if (nmp->nm_flag & NFSMNT_KERB) {
+ if (failed_auth) {
+ error = nfs_getauth(nmp, rep, cred, &auth_type,
+ &auth_str, &auth_len);
+ if (error) {
+ free((caddr_t)rep, M_NFSREQ);
+ m_freem(mrest);
+ return (error);
+ }
+ } else {
+ auth_type = RPCAUTH_UNIX;
+ auth_len = 5 * NFSX_UNSIGNED;
+ }
+ } else {
+ auth_type = RPCAUTH_UNIX;
+ if (cred->cr_ngroups < 1)
+ panic("nfsreq nogrps");
+ auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ?
+ nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) +
+ 5 * NFSX_UNSIGNED;
+ }
+ m = nfsm_rpchead(cred, (nmp->nm_flag & NFSMNT_NQNFS), procnum,
+ auth_type, auth_len, auth_str, mrest, mrest_len, &mheadend, &xid);
+ if (auth_str)
+ free(auth_str, M_TEMP);
+
+ /*
+ * For stream protocols, insert a Sun RPC Record Mark.
+ */
+ if (nmp->nm_sotype == SOCK_STREAM) {
+ M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
+ *mtod(m, u_long *) = htonl(0x80000000 |
+ (m->m_pkthdr.len - NFSX_UNSIGNED));
+ }
+ rep->r_mreq = m;
+ rep->r_xid = xid;
+tryagain:
+ if (nmp->nm_flag & NFSMNT_SOFT)
+ rep->r_retry = nmp->nm_retry;
+ else
+ rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
+ rep->r_rtt = rep->r_rexmit = 0;
+ if (proct[procnum] > 0)
+ rep->r_flags = R_TIMING;
+ else
+ rep->r_flags = 0;
+ rep->r_mrep = NULL;
+
+ /*
+ * Do the client side RPC.
+ */
+ nfsstats.rpcrequests++;
+ /*
+ * Chain request into list of outstanding requests. Be sure
+ * to put it LAST so timer finds oldest requests first.
+ */
+ s = splsoftclock();
+ reph = &nfsreqh;
+ reph->r_prev->r_next = rep;
+ rep->r_prev = reph->r_prev;
+ reph->r_prev = rep;
+ rep->r_next = reph;
+
+ /* Get send time for nqnfs */
+ reqtime = time.tv_sec;
+
+ /*
+ * If backing off another request or avoiding congestion, don't
+ * send this one now but let timer do it. If not timing a request,
+ * do it now.
+ */
+ if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
+ (nmp->nm_flag & NFSMNT_DUMBTIMR) ||
+ nmp->nm_sent < nmp->nm_cwnd)) {
+ splx(s);
+ if (nmp->nm_soflags & PR_CONNREQUIRED)
+ error = nfs_sndlock(&nmp->nm_flag, rep);
+ if (!error) {
+ m = m_copym(m, 0, M_COPYALL, M_WAIT);
+ error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep);
+ if (nmp->nm_soflags & PR_CONNREQUIRED)
+ nfs_sndunlock(&nmp->nm_flag);
+ }
+ if (!error && (rep->r_flags & R_MUSTRESEND) == 0) {
+ nmp->nm_sent += NFS_CWNDSCALE;
+ rep->r_flags |= R_SENT;
+ }
+ } else {
+ splx(s);
+ rep->r_rtt = -1;
+ }
+
+ /*
+ * Wait for the reply from our send or the timer's.
+ */
+ if (!error || error == EPIPE)
+ error = nfs_reply(rep);
+
+ /*
+ * RPC done, unlink the request.
+ */
+ s = splsoftclock();
+ rep->r_prev->r_next = rep->r_next;
+ rep->r_next->r_prev = rep->r_prev;
+ splx(s);
+
+ /*
+ * Decrement the outstanding request count.
+ */
+ if (rep->r_flags & R_SENT) {
+ rep->r_flags &= ~R_SENT; /* paranoia */
+ nmp->nm_sent -= NFS_CWNDSCALE;
+ }
+
+ /*
+ * If there was a successful reply and a tprintf msg.
+ * tprintf a response.
+ */
+ if (!error && (rep->r_flags & R_TPRINTFMSG))
+ nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
+ "is alive again");
+ mrep = rep->r_mrep;
+ md = rep->r_md;
+ dpos = rep->r_dpos;
+ if (error) {
+ m_freem(rep->r_mreq);
+ free((caddr_t)rep, M_NFSREQ);
+ return (error);
+ }
+
+ /*
+ * break down the rpc header and check if ok
+ */
+ nfsm_dissect(tl, u_long *, 3*NFSX_UNSIGNED);
+ if (*tl++ == rpc_msgdenied) {
+ if (*tl == rpc_mismatch)
+ error = EOPNOTSUPP;
+ else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) {
+ if (*tl == rpc_rejectedcred && failed_auth == 0) {
+ failed_auth++;
+ mheadend->m_next = (struct mbuf *)0;
+ m_freem(mrep);
+ m_freem(rep->r_mreq);
+ goto kerbauth;
+ } else
+ error = EAUTH;
+ } else
+ error = EACCES;
+ m_freem(mrep);
+ m_freem(rep->r_mreq);
+ free((caddr_t)rep, M_NFSREQ);
+ return (error);
+ }
+
+ /*
+ * skip over the auth_verf, someday we may want to cache auth_short's
+ * for nfs_reqhead(), but for now just dump it
+ */
+ if (*++tl != 0) {
+ i = nfsm_rndup(fxdr_unsigned(long, *tl));
+ nfsm_adv(i);
+ }
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ /* 0 == ok */
+ if (*tl == 0) {
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ if (*tl != 0) {
+ error = fxdr_unsigned(int, *tl);
+ m_freem(mrep);
+ if ((nmp->nm_flag & NFSMNT_NQNFS) &&
+ error == NQNFS_TRYLATER) {
+ error = 0;
+ waituntil = time.tv_sec + trylater_delay;
+ while (time.tv_sec < waituntil)
+ (void) tsleep((caddr_t)&lbolt,
+ PSOCK, "nqnfstry", 0);
+ trylater_delay *= nfs_backoff[trylater_cnt];
+ if (trylater_cnt < 7)
+ trylater_cnt++;
+ goto tryagain;
+ }
+
+ /*
+ * If the File Handle was stale, invalidate the
+ * lookup cache, just in case.
+ */
+ if (error == ESTALE)
+ cache_purge(vp);
+ m_freem(rep->r_mreq);
+ free((caddr_t)rep, M_NFSREQ);
+ return (error);
+ }
+
+ /*
+ * For nqnfs, get any lease in reply
+ */
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ if (*tl) {
+ np = VTONFS(vp);
+ nqlflag = fxdr_unsigned(int, *tl);
+ nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED);
+ cachable = fxdr_unsigned(int, *tl++);
+ reqtime += fxdr_unsigned(int, *tl++);
+ if (reqtime > time.tv_sec) {
+ fxdr_hyper(tl, &frev);
+ nqnfs_clientlease(nmp, np, nqlflag,
+ cachable, reqtime, frev);
+ }
+ }
+ }
+ *mrp = mrep;
+ *mdp = md;
+ *dposp = dpos;
+ m_freem(rep->r_mreq);
+ FREE((caddr_t)rep, M_NFSREQ);
+ return (0);
+ }
+ m_freem(mrep);
+ m_freem(rep->r_mreq);
+ free((caddr_t)rep, M_NFSREQ);
+ error = EPROTONOSUPPORT;
+nfsmout:
+ return (error);
+}
+
+/*
+ * Generate the rpc reply header
+ * siz arg. is used to decide if adding a cluster is worthwhile
+ */
+nfs_rephead(siz, nd, err, cache, frev, mrq, mbp, bposp)
+ int siz;
+ struct nfsd *nd;
+ int err;
+ int cache;
+ u_quad_t *frev;
+ struct mbuf **mrq;
+ struct mbuf **mbp;
+ caddr_t *bposp;
+{
+ register u_long *tl;
+ register struct mbuf *mreq;
+ caddr_t bpos;
+ struct mbuf *mb, *mb2;
+
+ MGETHDR(mreq, M_WAIT, MT_DATA);
+ mb = mreq;
+ /*
+ * If this is a big reply, use a cluster else
+ * try and leave leading space for the lower level headers.
+ */
+ siz += RPC_REPLYSIZ;
+ if (siz >= MINCLSIZE) {
+ MCLGET(mreq, M_WAIT);
+ } else
+ mreq->m_data += max_hdr;
+ tl = mtod(mreq, u_long *);
+ mreq->m_len = 6*NFSX_UNSIGNED;
+ bpos = ((caddr_t)tl)+mreq->m_len;
+ *tl++ = nd->nd_retxid;
+ *tl++ = rpc_reply;
+ if (err == ERPCMISMATCH || err == NQNFS_AUTHERR) {
+ *tl++ = rpc_msgdenied;
+ if (err == NQNFS_AUTHERR) {
+ *tl++ = rpc_autherr;
+ *tl = rpc_rejectedcred;
+ mreq->m_len -= NFSX_UNSIGNED;
+ bpos -= NFSX_UNSIGNED;
+ } else {
+ *tl++ = rpc_mismatch;
+ *tl++ = txdr_unsigned(2);
+ *tl = txdr_unsigned(2);
+ }
+ } else {
+ *tl++ = rpc_msgaccepted;
+ *tl++ = 0;
+ *tl++ = 0;
+ switch (err) {
+ case EPROGUNAVAIL:
+ *tl = txdr_unsigned(RPC_PROGUNAVAIL);
+ break;
+ case EPROGMISMATCH:
+ *tl = txdr_unsigned(RPC_PROGMISMATCH);
+ nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(2);
+ *tl = txdr_unsigned(2); /* someday 3 */
+ break;
+ case EPROCUNAVAIL:
+ *tl = txdr_unsigned(RPC_PROCUNAVAIL);
+ break;
+ default:
+ *tl = 0;
+ if (err != VNOVAL) {
+ nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+ if (err)
+ *tl = txdr_unsigned(nfsrv_errmap[err - 1]);
+ else
+ *tl = 0;
+ }
+ break;
+ };
+ }
+
+ /*
+ * For nqnfs, piggyback lease as requested.
+ */
+ if (nd->nd_nqlflag != NQL_NOVAL && err == 0) {
+ if (nd->nd_nqlflag) {
+ nfsm_build(tl, u_long *, 5*NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(nd->nd_nqlflag);
+ *tl++ = txdr_unsigned(cache);
+ *tl++ = txdr_unsigned(nd->nd_duration);
+ txdr_hyper(frev, tl);
+ } else {
+ if (nd->nd_nqlflag != 0)
+ panic("nqreph");
+ nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+ *tl = 0;
+ }
+ }
+ *mrq = mreq;
+ *mbp = mb;
+ *bposp = bpos;
+ if (err != 0 && err != VNOVAL)
+ nfsstats.srvrpc_errs++;
+ return (0);
+}
+
+/*
+ * Nfs timer routine
+ * Scan the nfsreq list and retranmit any requests that have timed out
+ * To avoid retransmission attempts on STREAM sockets (in the future) make
+ * sure to set the r_retry field to 0 (implies nm_retry == 0).
+ */
+void
+nfs_timer(arg)
+ void *arg;
+{
+ register struct nfsreq *rep;
+ register struct mbuf *m;
+ register struct socket *so;
+ register struct nfsmount *nmp;
+ register int timeo;
+ static long lasttime = 0;
+ int s, error;
+
+ s = splnet();
+ for (rep = nfsreqh.r_next; rep != &nfsreqh; rep = rep->r_next) {
+ nmp = rep->r_nmp;
+ if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
+ continue;
+ if (nfs_sigintr(nmp, rep, rep->r_procp)) {
+ rep->r_flags |= R_SOFTTERM;
+ continue;
+ }
+ if (rep->r_rtt >= 0) {
+ rep->r_rtt++;
+ if (nmp->nm_flag & NFSMNT_DUMBTIMR)
+ timeo = nmp->nm_timeo;
+ else
+ timeo = NFS_RTO(nmp, proct[rep->r_procnum]);
+ if (nmp->nm_timeouts > 0)
+ timeo *= nfs_backoff[nmp->nm_timeouts - 1];
+ if (rep->r_rtt <= timeo)
+ continue;
+ if (nmp->nm_timeouts < 8)
+ nmp->nm_timeouts++;
+ }
+ /*
+ * Check for server not responding
+ */
+ if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
+ rep->r_rexmit > nmp->nm_deadthresh) {
+ nfs_msg(rep->r_procp,
+ nmp->nm_mountp->mnt_stat.f_mntfromname,
+ "not responding");
+ rep->r_flags |= R_TPRINTFMSG;
+ }
+ if (rep->r_rexmit >= rep->r_retry) { /* too many */
+ nfsstats.rpctimeouts++;
+ rep->r_flags |= R_SOFTTERM;
+ continue;
+ }
+ if (nmp->nm_sotype != SOCK_DGRAM) {
+ if (++rep->r_rexmit > NFS_MAXREXMIT)
+ rep->r_rexmit = NFS_MAXREXMIT;
+ continue;
+ }
+ if ((so = nmp->nm_so) == NULL)
+ continue;
+
+ /*
+ * If there is enough space and the window allows..
+ * Resend it
+ * Set r_rtt to -1 in case we fail to send it now.
+ */
+ rep->r_rtt = -1;
+ if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
+ ((nmp->nm_flag & NFSMNT_DUMBTIMR) ||
+ (rep->r_flags & R_SENT) ||
+ nmp->nm_sent < nmp->nm_cwnd) &&
+ (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
+ if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
+ error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
+ (struct mbuf *)0, (struct mbuf *)0);
+ else
+ error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
+ nmp->nm_nam, (struct mbuf *)0);
+ if (error) {
+ if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
+ so->so_error = 0;
+ } else {
+ /*
+ * Iff first send, start timing
+ * else turn timing off, backoff timer
+ * and divide congestion window by 2.
+ */
+ if (rep->r_flags & R_SENT) {
+ rep->r_flags &= ~R_TIMING;
+ if (++rep->r_rexmit > NFS_MAXREXMIT)
+ rep->r_rexmit = NFS_MAXREXMIT;
+ nmp->nm_cwnd >>= 1;
+ if (nmp->nm_cwnd < NFS_CWNDSCALE)
+ nmp->nm_cwnd = NFS_CWNDSCALE;
+ nfsstats.rpcretries++;
+ } else {
+ rep->r_flags |= R_SENT;
+ nmp->nm_sent += NFS_CWNDSCALE;
+ }
+ rep->r_rtt = 0;
+ }
+ }
+ }
+
+ /*
+ * Call the nqnfs server timer once a second to handle leases.
+ */
+ if (lasttime != time.tv_sec) {
+ lasttime = time.tv_sec;
+ nqnfs_serverd();
+ }
+ splx(s);
+ timeout(nfs_timer, (void *)0, hz / NFS_HZ);
+}
+
+/*
+ * Test for a termination condition pending on the process.
+ * This is used for NFSMNT_INT mounts.
+ */
+nfs_sigintr(nmp, rep, p)
+ struct nfsmount *nmp;
+ struct nfsreq *rep;
+ register struct proc *p;
+{
+
+ if (rep && (rep->r_flags & R_SOFTTERM))
+ return (EINTR);
+ if (!(nmp->nm_flag & NFSMNT_INT))
+ return (0);
+ if (p && p->p_siglist &&
+ (((p->p_siglist & ~p->p_sigmask) & ~p->p_sigignore) &
+ NFSINT_SIGMASK))
+ return (EINTR);
+ return (0);
+}
+
+/*
+ * Lock a socket against others.
+ * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
+ * and also to avoid race conditions between the processes with nfs requests
+ * in progress when a reconnect is necessary.
+ */
+nfs_sndlock(flagp, rep)
+ register int *flagp;
+ struct nfsreq *rep;
+{
+ struct proc *p;
+ int slpflag = 0, slptimeo = 0;
+
+ if (rep) {
+ p = rep->r_procp;
+ if (rep->r_nmp->nm_flag & NFSMNT_INT)
+ slpflag = PCATCH;
+ } else
+ p = (struct proc *)0;
+ while (*flagp & NFSMNT_SNDLOCK) {
+ if (nfs_sigintr(rep->r_nmp, rep, p))
+ return (EINTR);
+ *flagp |= NFSMNT_WANTSND;
+ (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsndlck",
+ slptimeo);
+ if (slpflag == PCATCH) {
+ slpflag = 0;
+ slptimeo = 2 * hz;
+ }
+ }
+ *flagp |= NFSMNT_SNDLOCK;
+ return (0);
+}
+
+/*
+ * Unlock the stream socket for others.
+ */
+void
+nfs_sndunlock(flagp)
+ register int *flagp;
+{
+
+ if ((*flagp & NFSMNT_SNDLOCK) == 0)
+ panic("nfs sndunlock");
+ *flagp &= ~NFSMNT_SNDLOCK;
+ if (*flagp & NFSMNT_WANTSND) {
+ *flagp &= ~NFSMNT_WANTSND;
+ wakeup((caddr_t)flagp);
+ }
+}
+
+nfs_rcvlock(rep)
+ register struct nfsreq *rep;
+{
+ register int *flagp = &rep->r_nmp->nm_flag;
+ int slpflag, slptimeo = 0;
+
+ if (*flagp & NFSMNT_INT)
+ slpflag = PCATCH;
+ else
+ slpflag = 0;
+ while (*flagp & NFSMNT_RCVLOCK) {
+ if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp))
+ return (EINTR);
+ *flagp |= NFSMNT_WANTRCV;
+ (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsrcvlk",
+ slptimeo);
+ if (slpflag == PCATCH) {
+ slpflag = 0;
+ slptimeo = 2 * hz;
+ }
+ }
+ *flagp |= NFSMNT_RCVLOCK;
+ return (0);
+}
+
+/*
+ * Unlock the stream socket for others.
+ */
+void
+nfs_rcvunlock(flagp)
+ register int *flagp;
+{
+
+ if ((*flagp & NFSMNT_RCVLOCK) == 0)
+ panic("nfs rcvunlock");
+ *flagp &= ~NFSMNT_RCVLOCK;
+ if (*flagp & NFSMNT_WANTRCV) {
+ *flagp &= ~NFSMNT_WANTRCV;
+ wakeup((caddr_t)flagp);
+ }
+}
+
+/*
+ * Check for badly aligned mbuf data areas and
+ * realign data in an mbuf list by copying the data areas up, as required.
+ */
+void
+nfs_realign(m, hsiz)
+ register struct mbuf *m;
+ int hsiz;
+{
+ register struct mbuf *m2;
+ register int siz, mlen, olen;
+ register caddr_t tcp, fcp;
+ struct mbuf *mnew;
+
+ while (m) {
+ /*
+ * This never happens for UDP, rarely happens for TCP
+ * but frequently happens for iso transport.
+ */
+ if ((m->m_len & 0x3) || (mtod(m, int) & 0x3)) {
+ olen = m->m_len;
+ fcp = mtod(m, caddr_t);
+ if ((int)fcp & 0x3) {
+ m->m_flags &= ~M_PKTHDR;
+ if (m->m_flags & M_EXT)
+ m->m_data = m->m_ext.ext_buf +
+ ((m->m_ext.ext_size - olen) & ~0x3);
+ else
+ m->m_data = m->m_dat;
+ }
+ m->m_len = 0;
+ tcp = mtod(m, caddr_t);
+ mnew = m;
+ m2 = m->m_next;
+
+ /*
+ * If possible, only put the first invariant part
+ * of the RPC header in the first mbuf.
+ */
+ mlen = M_TRAILINGSPACE(m);
+ if (olen <= hsiz && mlen > hsiz)
+ mlen = hsiz;
+
+ /*
+ * Loop through the mbuf list consolidating data.
+ */
+ while (m) {
+ while (olen > 0) {
+ if (mlen == 0) {
+ m2->m_flags &= ~M_PKTHDR;
+ if (m2->m_flags & M_EXT)
+ m2->m_data = m2->m_ext.ext_buf;
+ else
+ m2->m_data = m2->m_dat;
+ m2->m_len = 0;
+ mlen = M_TRAILINGSPACE(m2);
+ tcp = mtod(m2, caddr_t);
+ mnew = m2;
+ m2 = m2->m_next;
+ }
+ siz = min(mlen, olen);
+ if (tcp != fcp)
+ bcopy(fcp, tcp, siz);
+ mnew->m_len += siz;
+ mlen -= siz;
+ olen -= siz;
+ tcp += siz;
+ fcp += siz;
+ }
+ m = m->m_next;
+ if (m) {
+ olen = m->m_len;
+ fcp = mtod(m, caddr_t);
+ }
+ }
+
+ /*
+ * Finally, set m_len == 0 for any trailing mbufs that have
+ * been copied out of.
+ */
+ while (m2) {
+ m2->m_len = 0;
+ m2 = m2->m_next;
+ }
+ return;
+ }
+ m = m->m_next;
+ }
+}
+
+/*
+ * Socket upcall routine for the nfsd sockets.
+ * The caddr_t arg is a pointer to the "struct nfssvc_sock".
+ * Essentially do as much as possible non-blocking, else punt and it will
+ * be called with M_WAIT from an nfsd.
+ */
+void
+nfsrv_rcv(so, arg, waitflag)
+ struct socket *so;
+ caddr_t arg;
+ int waitflag;
+{
+ register struct nfssvc_sock *slp = (struct nfssvc_sock *)arg;
+ register struct mbuf *m;
+ struct mbuf *mp, *nam;
+ struct uio auio;
+ int flags, error;
+
+ if ((slp->ns_flag & SLP_VALID) == 0)
+ return;
+#ifdef notdef
+ /*
+ * Define this to test for nfsds handling this under heavy load.
+ */
+ if (waitflag == M_DONTWAIT) {
+ slp->ns_flag |= SLP_NEEDQ; goto dorecs;
+ }
+#endif
+ auio.uio_procp = NULL;
+ if (so->so_type == SOCK_STREAM) {
+ /*
+ * If there are already records on the queue, defer soreceive()
+ * to an nfsd so that there is feedback to the TCP layer that
+ * the nfs servers are heavily loaded.
+ */
+ if (slp->ns_rec && waitflag == M_DONTWAIT) {
+ slp->ns_flag |= SLP_NEEDQ;
+ goto dorecs;
+ }
+
+ /*
+ * Do soreceive().
+ */
+ auio.uio_resid = 1000000000;
+ flags = MSG_DONTWAIT;
+ error = soreceive(so, &nam, &auio, &mp, (struct mbuf **)0, &flags);
+ if (error || mp == (struct mbuf *)0) {
+ if (error == EWOULDBLOCK)
+ slp->ns_flag |= SLP_NEEDQ;
+ else
+ slp->ns_flag |= SLP_DISCONN;
+ goto dorecs;
+ }
+ m = mp;
+ if (slp->ns_rawend) {
+ slp->ns_rawend->m_next = m;
+ slp->ns_cc += 1000000000 - auio.uio_resid;
+ } else {
+ slp->ns_raw = m;
+ slp->ns_cc = 1000000000 - auio.uio_resid;
+ }
+ while (m->m_next)
+ m = m->m_next;
+ slp->ns_rawend = m;
+
+ /*
+ * Now try and parse record(s) out of the raw stream data.
+ */
+ if (error = nfsrv_getstream(slp, waitflag)) {
+ if (error == EPERM)
+ slp->ns_flag |= SLP_DISCONN;
+ else
+ slp->ns_flag |= SLP_NEEDQ;
+ }
+ } else {
+ do {
+ auio.uio_resid = 1000000000;
+ flags = MSG_DONTWAIT;
+ error = soreceive(so, &nam, &auio, &mp,
+ (struct mbuf **)0, &flags);
+ if (mp) {
+ nfs_realign(mp, 10 * NFSX_UNSIGNED);
+ if (nam) {
+ m = nam;
+ m->m_next = mp;
+ } else
+ m = mp;
+ if (slp->ns_recend)
+ slp->ns_recend->m_nextpkt = m;
+ else
+ slp->ns_rec = m;
+ slp->ns_recend = m;
+ m->m_nextpkt = (struct mbuf *)0;
+ }
+ if (error) {
+ if ((so->so_proto->pr_flags & PR_CONNREQUIRED)
+ && error != EWOULDBLOCK) {
+ slp->ns_flag |= SLP_DISCONN;
+ goto dorecs;
+ }
+ }
+ } while (mp);
+ }
+
+ /*
+ * Now try and process the request records, non-blocking.
+ */
+dorecs:
+ if (waitflag == M_DONTWAIT &&
+ (slp->ns_rec || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN))))
+ nfsrv_wakenfsd(slp);
+}
+
+/*
+ * Try and extract an RPC request from the mbuf data list received on a
+ * stream socket. The "waitflag" argument indicates whether or not it
+ * can sleep.
+ */
+nfsrv_getstream(slp, waitflag)
+ register struct nfssvc_sock *slp;
+ int waitflag;
+{
+ register struct mbuf *m;
+ register char *cp1, *cp2;
+ register int len;
+ struct mbuf *om, *m2, *recm;
+ u_long recmark;
+
+ if (slp->ns_flag & SLP_GETSTREAM)
+ panic("nfs getstream");
+ slp->ns_flag |= SLP_GETSTREAM;
+ for (;;) {
+ if (slp->ns_reclen == 0) {
+ if (slp->ns_cc < NFSX_UNSIGNED) {
+ slp->ns_flag &= ~SLP_GETSTREAM;
+ return (0);
+ }
+ m = slp->ns_raw;
+ if (m->m_len >= NFSX_UNSIGNED) {
+ bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED);
+ m->m_data += NFSX_UNSIGNED;
+ m->m_len -= NFSX_UNSIGNED;
+ } else {
+ cp1 = (caddr_t)&recmark;
+ cp2 = mtod(m, caddr_t);
+ while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
+ while (m->m_len == 0) {
+ m = m->m_next;
+ cp2 = mtod(m, caddr_t);
+ }
+ *cp1++ = *cp2++;
+ m->m_data++;
+ m->m_len--;
+ }
+ }
+ slp->ns_cc -= NFSX_UNSIGNED;
+ slp->ns_reclen = ntohl(recmark) & ~0x80000000;
+ if (slp->ns_reclen < NFS_MINPACKET || slp->ns_reclen > NFS_MAXPACKET) {
+ slp->ns_flag &= ~SLP_GETSTREAM;
+ return (EPERM);
+ }
+ }
+
+ /*
+ * Now get the record part.
+ */
+ if (slp->ns_cc == slp->ns_reclen) {
+ recm = slp->ns_raw;
+ slp->ns_raw = slp->ns_rawend = (struct mbuf *)0;
+ slp->ns_cc = slp->ns_reclen = 0;
+ } else if (slp->ns_cc > slp->ns_reclen) {
+ len = 0;
+ m = slp->ns_raw;
+ om = (struct mbuf *)0;
+ while (len < slp->ns_reclen) {
+ if ((len + m->m_len) > slp->ns_reclen) {
+ m2 = m_copym(m, 0, slp->ns_reclen - len,
+ waitflag);
+ if (m2) {
+ if (om) {
+ om->m_next = m2;
+ recm = slp->ns_raw;
+ } else
+ recm = m2;
+ m->m_data += slp->ns_reclen - len;
+ m->m_len -= slp->ns_reclen - len;
+ len = slp->ns_reclen;
+ } else {
+ slp->ns_flag &= ~SLP_GETSTREAM;
+ return (EWOULDBLOCK);
+ }
+ } else if ((len + m->m_len) == slp->ns_reclen) {
+ om = m;
+ len += m->m_len;
+ m = m->m_next;
+ recm = slp->ns_raw;
+ om->m_next = (struct mbuf *)0;
+ } else {
+ om = m;
+ len += m->m_len;
+ m = m->m_next;
+ }
+ }
+ slp->ns_raw = m;
+ slp->ns_cc -= len;
+ slp->ns_reclen = 0;
+ } else {
+ slp->ns_flag &= ~SLP_GETSTREAM;
+ return (0);
+ }
+ nfs_realign(recm, 10 * NFSX_UNSIGNED);
+ if (slp->ns_recend)
+ slp->ns_recend->m_nextpkt = recm;
+ else
+ slp->ns_rec = recm;
+ slp->ns_recend = recm;
+ }
+}
+
+/*
+ * Parse an RPC header.
+ */
+nfsrv_dorec(slp, nd)
+ register struct nfssvc_sock *slp;
+ register struct nfsd *nd;
+{
+ register struct mbuf *m;
+ int error;
+
+ if ((slp->ns_flag & SLP_VALID) == 0 ||
+ (m = slp->ns_rec) == (struct mbuf *)0)
+ return (ENOBUFS);
+ if (slp->ns_rec = m->m_nextpkt)
+ m->m_nextpkt = (struct mbuf *)0;
+ else
+ slp->ns_recend = (struct mbuf *)0;
+ if (m->m_type == MT_SONAME) {
+ nd->nd_nam = m;
+ nd->nd_md = nd->nd_mrep = m->m_next;
+ m->m_next = (struct mbuf *)0;
+ } else {
+ nd->nd_nam = (struct mbuf *)0;
+ nd->nd_md = nd->nd_mrep = m;
+ }
+ nd->nd_dpos = mtod(nd->nd_md, caddr_t);
+ if (error = nfs_getreq(nd, TRUE)) {
+ m_freem(nd->nd_nam);
+ return (error);
+ }
+ return (0);
+}
+
+/*
+ * Parse an RPC request
+ * - verify it
+ * - fill in the cred struct.
+ */
+nfs_getreq(nd, has_header)
+ register struct nfsd *nd;
+ int has_header;
+{
+ register int len, i;
+ register u_long *tl;
+ register long t1;
+ struct uio uio;
+ struct iovec iov;
+ caddr_t dpos, cp2;
+ u_long nfsvers, auth_type;
+ int error = 0, nqnfs = 0;
+ struct mbuf *mrep, *md;
+
+ mrep = nd->nd_mrep;
+ md = nd->nd_md;
+ dpos = nd->nd_dpos;
+ if (has_header) {
+ nfsm_dissect(tl, u_long *, 10*NFSX_UNSIGNED);
+ nd->nd_retxid = *tl++;
+ if (*tl++ != rpc_call) {
+ m_freem(mrep);
+ return (EBADRPC);
+ }
+ } else {
+ nfsm_dissect(tl, u_long *, 8*NFSX_UNSIGNED);
+ }
+ nd->nd_repstat = 0;
+ if (*tl++ != rpc_vers) {
+ nd->nd_repstat = ERPCMISMATCH;
+ nd->nd_procnum = NFSPROC_NOOP;
+ return (0);
+ }
+ nfsvers = nfs_vers;
+ if (*tl != nfs_prog) {
+ if (*tl == nqnfs_prog) {
+ nqnfs++;
+ nfsvers = nqnfs_vers;
+ } else {
+ nd->nd_repstat = EPROGUNAVAIL;
+ nd->nd_procnum = NFSPROC_NOOP;
+ return (0);
+ }
+ }
+ tl++;
+ if (*tl++ != nfsvers) {
+ nd->nd_repstat = EPROGMISMATCH;
+ nd->nd_procnum = NFSPROC_NOOP;
+ return (0);
+ }
+ nd->nd_procnum = fxdr_unsigned(u_long, *tl++);
+ if (nd->nd_procnum == NFSPROC_NULL)
+ return (0);
+ if (nd->nd_procnum >= NFS_NPROCS ||
+ (!nqnfs && nd->nd_procnum > NFSPROC_STATFS) ||
+ (*tl != rpc_auth_unix && *tl != rpc_auth_kerb)) {
+ nd->nd_repstat = EPROCUNAVAIL;
+ nd->nd_procnum = NFSPROC_NOOP;
+ return (0);
+ }
+ auth_type = *tl++;
+ len = fxdr_unsigned(int, *tl++);
+ if (len < 0 || len > RPCAUTH_MAXSIZ) {
+ m_freem(mrep);
+ return (EBADRPC);
+ }
+
+ /*
+ * Handle auth_unix or auth_kerb.
+ */
+ if (auth_type == rpc_auth_unix) {
+ len = fxdr_unsigned(int, *++tl);
+ if (len < 0 || len > NFS_MAXNAMLEN) {
+ m_freem(mrep);
+ return (EBADRPC);
+ }
+ nfsm_adv(nfsm_rndup(len));
+ nfsm_dissect(tl, u_long *, 3*NFSX_UNSIGNED);
+ nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++);
+ nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++);
+ len = fxdr_unsigned(int, *tl);
+ if (len < 0 || len > RPCAUTH_UNIXGIDS) {
+ m_freem(mrep);
+ return (EBADRPC);
+ }
+ nfsm_dissect(tl, u_long *, (len + 2)*NFSX_UNSIGNED);
+ for (i = 1; i <= len; i++)
+ if (i < NGROUPS)
+ nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
+ else
+ tl++;
+ nd->nd_cr.cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
+ } else if (auth_type == rpc_auth_kerb) {
+ nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++);
+ nd->nd_authlen = fxdr_unsigned(int, *tl);
+ uio.uio_resid = nfsm_rndup(nd->nd_authlen);
+ if (uio.uio_resid > (len - 2 * NFSX_UNSIGNED)) {
+ m_freem(mrep);
+ return (EBADRPC);
+ }
+ uio.uio_offset = 0;
+ uio.uio_iov = &iov;
+ uio.uio_iovcnt = 1;
+ uio.uio_segflg = UIO_SYSSPACE;
+ iov.iov_base = (caddr_t)nd->nd_authstr;
+ iov.iov_len = RPCAUTH_MAXSIZ;
+ nfsm_mtouio(&uio, uio.uio_resid);
+ nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
+ nd->nd_flag |= NFSD_NEEDAUTH;
+ }
+
+ /*
+ * Do we have any use for the verifier.
+ * According to the "Remote Procedure Call Protocol Spec." it
+ * should be AUTH_NULL, but some clients make it AUTH_UNIX?
+ * For now, just skip over it
+ */
+ len = fxdr_unsigned(int, *++tl);
+ if (len < 0 || len > RPCAUTH_MAXSIZ) {
+ m_freem(mrep);
+ return (EBADRPC);
+ }
+ if (len > 0) {
+ nfsm_adv(nfsm_rndup(len));
+ }
+
+ /*
+ * For nqnfs, get piggybacked lease request.
+ */
+ if (nqnfs && nd->nd_procnum != NQNFSPROC_EVICTED) {
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ nd->nd_nqlflag = fxdr_unsigned(int, *tl);
+ if (nd->nd_nqlflag) {
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ nd->nd_duration = fxdr_unsigned(int, *tl);
+ } else
+ nd->nd_duration = NQ_MINLEASE;
+ } else {
+ nd->nd_nqlflag = NQL_NOVAL;
+ nd->nd_duration = NQ_MINLEASE;
+ }
+ nd->nd_md = md;
+ nd->nd_dpos = dpos;
+ return (0);
+nfsmout:
+ return (error);
+}
+
+/*
+ * Search for a sleeping nfsd and wake it up.
+ * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the
+ * running nfsds will go look for the work in the nfssvc_sock list.
+ */
+void
+nfsrv_wakenfsd(slp)
+ struct nfssvc_sock *slp;
+{
+ register struct nfsd *nd = nfsd_head.nd_next;
+
+ if ((slp->ns_flag & SLP_VALID) == 0)
+ return;
+ while (nd != (struct nfsd *)&nfsd_head) {
+ if (nd->nd_flag & NFSD_WAITING) {
+ nd->nd_flag &= ~NFSD_WAITING;
+ if (nd->nd_slp)
+ panic("nfsd wakeup");
+ slp->ns_sref++;
+ nd->nd_slp = slp;
+ wakeup((caddr_t)nd);
+ return;
+ }
+ nd = nd->nd_next;
+ }
+ slp->ns_flag |= SLP_DOREC;
+ nfsd_head.nd_flag |= NFSD_CHECKSLP;
+}
+
+nfs_msg(p, server, msg)
+ struct proc *p;
+ char *server, *msg;
+{
+ tpr_t tpr;
+
+ if (p)
+ tpr = tprintf_open(p);
+ else
+ tpr = NULL;
+ tprintf(tpr, "nfs server %s: %s\n", server, msg);
+ tprintf_close(tpr);
+}
diff --git a/sys/nfsclient/nfs_subs.c b/sys/nfsclient/nfs_subs.c
new file mode 100644
index 0000000..5778f7d
--- /dev/null
+++ b/sys/nfsclient/nfs_subs.c
@@ -0,0 +1,1130 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_subs.c 8.3 (Berkeley) 1/4/94
+ */
+
+/*
+ * These functions support the macros and help fiddle mbuf chains for
+ * the nfs op functions. They do things like create the rpc header and
+ * copy data between mbuf chains and uio lists.
+ */
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/namei.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfs.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nqnfs.h>
+#include <nfs/nfsrtt.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <netinet/in.h>
+#ifdef ISO
+#include <netiso/iso.h>
+#endif
+
+#define TRUE 1
+#define FALSE 0
+
+/*
+ * Data items converted to xdr at startup, since they are constant
+ * This is kinda hokey, but may save a little time doing byte swaps
+ */
+u_long nfs_procids[NFS_NPROCS];
+u_long nfs_xdrneg1;
+u_long rpc_call, rpc_vers, rpc_reply, rpc_msgdenied, rpc_autherr,
+ rpc_mismatch, rpc_auth_unix, rpc_msgaccepted, rpc_rejectedcred,
+ rpc_auth_kerb;
+u_long nfs_vers, nfs_prog, nfs_true, nfs_false;
+
+/* And other global data */
+static u_long nfs_xid = 0;
+enum vtype ntov_type[7] = { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON };
+extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
+extern struct nfsreq nfsreqh;
+extern int nqnfs_piggy[NFS_NPROCS];
+extern struct nfsrtt nfsrtt;
+extern time_t nqnfsstarttime;
+extern u_long nqnfs_prog, nqnfs_vers;
+extern int nqsrv_clockskew;
+extern int nqsrv_writeslack;
+extern int nqsrv_maxlease;
+
+/*
+ * Create the header for an rpc request packet
+ * The hsiz is the size of the rest of the nfs request header.
+ * (just used to decide if a cluster is a good idea)
+ */
+struct mbuf *
+nfsm_reqh(vp, procid, hsiz, bposp)
+ struct vnode *vp;
+ u_long procid;
+ int hsiz;
+ caddr_t *bposp;
+{
+ register struct mbuf *mb;
+ register u_long *tl;
+ register caddr_t bpos;
+ struct mbuf *mb2;
+ struct nfsmount *nmp;
+ int nqflag;
+
+ MGET(mb, M_WAIT, MT_DATA);
+ if (hsiz >= MINCLSIZE)
+ MCLGET(mb, M_WAIT);
+ mb->m_len = 0;
+ bpos = mtod(mb, caddr_t);
+
+ /*
+ * For NQNFS, add lease request.
+ */
+ if (vp) {
+ nmp = VFSTONFS(vp->v_mount);
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ nqflag = NQNFS_NEEDLEASE(vp, procid);
+ if (nqflag) {
+ nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(nqflag);
+ *tl = txdr_unsigned(nmp->nm_leaseterm);
+ } else {
+ nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+ *tl = 0;
+ }
+ }
+ }
+ /* Finally, return values */
+ *bposp = bpos;
+ return (mb);
+}
+
+/*
+ * Build the RPC header and fill in the authorization info.
+ * The authorization string argument is only used when the credentials
+ * come from outside of the kernel.
+ * Returns the head of the mbuf list.
+ */
+struct mbuf *
+nfsm_rpchead(cr, nqnfs, procid, auth_type, auth_len, auth_str, mrest,
+ mrest_len, mbp, xidp)
+ register struct ucred *cr;
+ int nqnfs;
+ int procid;
+ int auth_type;
+ int auth_len;
+ char *auth_str;
+ struct mbuf *mrest;
+ int mrest_len;
+ struct mbuf **mbp;
+ u_long *xidp;
+{
+ register struct mbuf *mb;
+ register u_long *tl;
+ register caddr_t bpos;
+ register int i;
+ struct mbuf *mreq, *mb2;
+ int siz, grpsiz, authsiz;
+
+ authsiz = nfsm_rndup(auth_len);
+ if (auth_type == RPCAUTH_NQNFS)
+ authsiz += 2 * NFSX_UNSIGNED;
+ MGETHDR(mb, M_WAIT, MT_DATA);
+ if ((authsiz + 10*NFSX_UNSIGNED) >= MINCLSIZE) {
+ MCLGET(mb, M_WAIT);
+ } else if ((authsiz + 10*NFSX_UNSIGNED) < MHLEN) {
+ MH_ALIGN(mb, authsiz + 10*NFSX_UNSIGNED);
+ } else {
+ MH_ALIGN(mb, 8*NFSX_UNSIGNED);
+ }
+ mb->m_len = 0;
+ mreq = mb;
+ bpos = mtod(mb, caddr_t);
+
+ /*
+ * First the RPC header.
+ */
+ nfsm_build(tl, u_long *, 8*NFSX_UNSIGNED);
+ if (++nfs_xid == 0)
+ nfs_xid++;
+ *tl++ = *xidp = txdr_unsigned(nfs_xid);
+ *tl++ = rpc_call;
+ *tl++ = rpc_vers;
+ if (nqnfs) {
+ *tl++ = txdr_unsigned(NQNFS_PROG);
+ *tl++ = txdr_unsigned(NQNFS_VER1);
+ } else {
+ *tl++ = txdr_unsigned(NFS_PROG);
+ *tl++ = txdr_unsigned(NFS_VER2);
+ }
+ *tl++ = txdr_unsigned(procid);
+
+ /*
+ * And then the authorization cred.
+ */
+ *tl++ = txdr_unsigned(auth_type);
+ *tl = txdr_unsigned(authsiz);
+ switch (auth_type) {
+ case RPCAUTH_UNIX:
+ nfsm_build(tl, u_long *, auth_len);
+ *tl++ = 0; /* stamp ?? */
+ *tl++ = 0; /* NULL hostname */
+ *tl++ = txdr_unsigned(cr->cr_uid);
+ *tl++ = txdr_unsigned(cr->cr_groups[0]);
+ grpsiz = (auth_len >> 2) - 5;
+ *tl++ = txdr_unsigned(grpsiz);
+ for (i = 1; i <= grpsiz; i++)
+ *tl++ = txdr_unsigned(cr->cr_groups[i]);
+ break;
+ case RPCAUTH_NQNFS:
+ nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(cr->cr_uid);
+ *tl = txdr_unsigned(auth_len);
+ siz = auth_len;
+ while (siz > 0) {
+ if (M_TRAILINGSPACE(mb) == 0) {
+ MGET(mb2, M_WAIT, MT_DATA);
+ if (siz >= MINCLSIZE)
+ MCLGET(mb2, M_WAIT);
+ mb->m_next = mb2;
+ mb = mb2;
+ mb->m_len = 0;
+ bpos = mtod(mb, caddr_t);
+ }
+ i = min(siz, M_TRAILINGSPACE(mb));
+ bcopy(auth_str, bpos, i);
+ mb->m_len += i;
+ auth_str += i;
+ bpos += i;
+ siz -= i;
+ }
+ if ((siz = (nfsm_rndup(auth_len) - auth_len)) > 0) {
+ for (i = 0; i < siz; i++)
+ *bpos++ = '\0';
+ mb->m_len += siz;
+ }
+ break;
+ };
+ nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(RPCAUTH_NULL);
+ *tl = 0;
+ mb->m_next = mrest;
+ mreq->m_pkthdr.len = authsiz + 10*NFSX_UNSIGNED + mrest_len;
+ mreq->m_pkthdr.rcvif = (struct ifnet *)0;
+ *mbp = mb;
+ return (mreq);
+}
+
+/*
+ * copies mbuf chain to the uio scatter/gather list
+ */
+nfsm_mbuftouio(mrep, uiop, siz, dpos)
+ struct mbuf **mrep;
+ register struct uio *uiop;
+ int siz;
+ caddr_t *dpos;
+{
+ register char *mbufcp, *uiocp;
+ register int xfer, left, len;
+ register struct mbuf *mp;
+ long uiosiz, rem;
+ int error = 0;
+
+ mp = *mrep;
+ mbufcp = *dpos;
+ len = mtod(mp, caddr_t)+mp->m_len-mbufcp;
+ rem = nfsm_rndup(siz)-siz;
+ while (siz > 0) {
+ if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL)
+ return (EFBIG);
+ left = uiop->uio_iov->iov_len;
+ uiocp = uiop->uio_iov->iov_base;
+ if (left > siz)
+ left = siz;
+ uiosiz = left;
+ while (left > 0) {
+ while (len == 0) {
+ mp = mp->m_next;
+ if (mp == NULL)
+ return (EBADRPC);
+ mbufcp = mtod(mp, caddr_t);
+ len = mp->m_len;
+ }
+ xfer = (left > len) ? len : left;
+#ifdef notdef
+ /* Not Yet.. */
+ if (uiop->uio_iov->iov_op != NULL)
+ (*(uiop->uio_iov->iov_op))
+ (mbufcp, uiocp, xfer);
+ else
+#endif
+ if (uiop->uio_segflg == UIO_SYSSPACE)
+ bcopy(mbufcp, uiocp, xfer);
+ else
+ copyout(mbufcp, uiocp, xfer);
+ left -= xfer;
+ len -= xfer;
+ mbufcp += xfer;
+ uiocp += xfer;
+ uiop->uio_offset += xfer;
+ uiop->uio_resid -= xfer;
+ }
+ if (uiop->uio_iov->iov_len <= siz) {
+ uiop->uio_iovcnt--;
+ uiop->uio_iov++;
+ } else {
+ uiop->uio_iov->iov_base += uiosiz;
+ uiop->uio_iov->iov_len -= uiosiz;
+ }
+ siz -= uiosiz;
+ }
+ *dpos = mbufcp;
+ *mrep = mp;
+ if (rem > 0) {
+ if (len < rem)
+ error = nfs_adv(mrep, dpos, rem, len);
+ else
+ *dpos += rem;
+ }
+ return (error);
+}
+
+/*
+ * copies a uio scatter/gather list to an mbuf chain...
+ */
+nfsm_uiotombuf(uiop, mq, siz, bpos)
+ register struct uio *uiop;
+ struct mbuf **mq;
+ int siz;
+ caddr_t *bpos;
+{
+ register char *uiocp;
+ register struct mbuf *mp, *mp2;
+ register int xfer, left, mlen;
+ int uiosiz, clflg, rem;
+ char *cp;
+
+ if (siz > MLEN) /* or should it >= MCLBYTES ?? */
+ clflg = 1;
+ else
+ clflg = 0;
+ rem = nfsm_rndup(siz)-siz;
+ mp = mp2 = *mq;
+ while (siz > 0) {
+ if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL)
+ return (EINVAL);
+ left = uiop->uio_iov->iov_len;
+ uiocp = uiop->uio_iov->iov_base;
+ if (left > siz)
+ left = siz;
+ uiosiz = left;
+ while (left > 0) {
+ mlen = M_TRAILINGSPACE(mp);
+ if (mlen == 0) {
+ MGET(mp, M_WAIT, MT_DATA);
+ if (clflg)
+ MCLGET(mp, M_WAIT);
+ mp->m_len = 0;
+ mp2->m_next = mp;
+ mp2 = mp;
+ mlen = M_TRAILINGSPACE(mp);
+ }
+ xfer = (left > mlen) ? mlen : left;
+#ifdef notdef
+ /* Not Yet.. */
+ if (uiop->uio_iov->iov_op != NULL)
+ (*(uiop->uio_iov->iov_op))
+ (uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+ else
+#endif
+ if (uiop->uio_segflg == UIO_SYSSPACE)
+ bcopy(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+ else
+ copyin(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+ mp->m_len += xfer;
+ left -= xfer;
+ uiocp += xfer;
+ uiop->uio_offset += xfer;
+ uiop->uio_resid -= xfer;
+ }
+ if (uiop->uio_iov->iov_len <= siz) {
+ uiop->uio_iovcnt--;
+ uiop->uio_iov++;
+ } else {
+ uiop->uio_iov->iov_base += uiosiz;
+ uiop->uio_iov->iov_len -= uiosiz;
+ }
+ siz -= uiosiz;
+ }
+ if (rem > 0) {
+ if (rem > M_TRAILINGSPACE(mp)) {
+ MGET(mp, M_WAIT, MT_DATA);
+ mp->m_len = 0;
+ mp2->m_next = mp;
+ }
+ cp = mtod(mp, caddr_t)+mp->m_len;
+ for (left = 0; left < rem; left++)
+ *cp++ = '\0';
+ mp->m_len += rem;
+ *bpos = cp;
+ } else
+ *bpos = mtod(mp, caddr_t)+mp->m_len;
+ *mq = mp;
+ return (0);
+}
+
+/*
+ * Help break down an mbuf chain by setting the first siz bytes contiguous
+ * pointed to by returned val.
+ * This is used by the macros nfsm_dissect and nfsm_dissecton for tough
+ * cases. (The macros use the vars. dpos and dpos2)
+ */
+nfsm_disct(mdp, dposp, siz, left, cp2)
+ struct mbuf **mdp;
+ caddr_t *dposp;
+ int siz;
+ int left;
+ caddr_t *cp2;
+{
+ register struct mbuf *mp, *mp2;
+ register int siz2, xfer;
+ register caddr_t p;
+
+ mp = *mdp;
+ while (left == 0) {
+ *mdp = mp = mp->m_next;
+ if (mp == NULL)
+ return (EBADRPC);
+ left = mp->m_len;
+ *dposp = mtod(mp, caddr_t);
+ }
+ if (left >= siz) {
+ *cp2 = *dposp;
+ *dposp += siz;
+ } else if (mp->m_next == NULL) {
+ return (EBADRPC);
+ } else if (siz > MHLEN) {
+ panic("nfs S too big");
+ } else {
+ MGET(mp2, M_WAIT, MT_DATA);
+ mp2->m_next = mp->m_next;
+ mp->m_next = mp2;
+ mp->m_len -= left;
+ mp = mp2;
+ *cp2 = p = mtod(mp, caddr_t);
+ bcopy(*dposp, p, left); /* Copy what was left */
+ siz2 = siz-left;
+ p += left;
+ mp2 = mp->m_next;
+ /* Loop around copying up the siz2 bytes */
+ while (siz2 > 0) {
+ if (mp2 == NULL)
+ return (EBADRPC);
+ xfer = (siz2 > mp2->m_len) ? mp2->m_len : siz2;
+ if (xfer > 0) {
+ bcopy(mtod(mp2, caddr_t), p, xfer);
+ NFSMADV(mp2, xfer);
+ mp2->m_len -= xfer;
+ p += xfer;
+ siz2 -= xfer;
+ }
+ if (siz2 > 0)
+ mp2 = mp2->m_next;
+ }
+ mp->m_len = siz;
+ *mdp = mp2;
+ *dposp = mtod(mp2, caddr_t);
+ }
+ return (0);
+}
+
+/*
+ * Advance the position in the mbuf chain.
+ */
+nfs_adv(mdp, dposp, offs, left)
+ struct mbuf **mdp;
+ caddr_t *dposp;
+ int offs;
+ int left;
+{
+ register struct mbuf *m;
+ register int s;
+
+ m = *mdp;
+ s = left;
+ while (s < offs) {
+ offs -= s;
+ m = m->m_next;
+ if (m == NULL)
+ return (EBADRPC);
+ s = m->m_len;
+ }
+ *mdp = m;
+ *dposp = mtod(m, caddr_t)+offs;
+ return (0);
+}
+
+/*
+ * Copy a string into mbufs for the hard cases...
+ */
+nfsm_strtmbuf(mb, bpos, cp, siz)
+ struct mbuf **mb;
+ char **bpos;
+ char *cp;
+ long siz;
+{
+ register struct mbuf *m1, *m2;
+ long left, xfer, len, tlen;
+ u_long *tl;
+ int putsize;
+
+ putsize = 1;
+ m2 = *mb;
+ left = M_TRAILINGSPACE(m2);
+ if (left > 0) {
+ tl = ((u_long *)(*bpos));
+ *tl++ = txdr_unsigned(siz);
+ putsize = 0;
+ left -= NFSX_UNSIGNED;
+ m2->m_len += NFSX_UNSIGNED;
+ if (left > 0) {
+ bcopy(cp, (caddr_t) tl, left);
+ siz -= left;
+ cp += left;
+ m2->m_len += left;
+ left = 0;
+ }
+ }
+ /* Loop around adding mbufs */
+ while (siz > 0) {
+ MGET(m1, M_WAIT, MT_DATA);
+ if (siz > MLEN)
+ MCLGET(m1, M_WAIT);
+ m1->m_len = NFSMSIZ(m1);
+ m2->m_next = m1;
+ m2 = m1;
+ tl = mtod(m1, u_long *);
+ tlen = 0;
+ if (putsize) {
+ *tl++ = txdr_unsigned(siz);
+ m1->m_len -= NFSX_UNSIGNED;
+ tlen = NFSX_UNSIGNED;
+ putsize = 0;
+ }
+ if (siz < m1->m_len) {
+ len = nfsm_rndup(siz);
+ xfer = siz;
+ if (xfer < len)
+ *(tl+(xfer>>2)) = 0;
+ } else {
+ xfer = len = m1->m_len;
+ }
+ bcopy(cp, (caddr_t) tl, xfer);
+ m1->m_len = len+tlen;
+ siz -= xfer;
+ cp += xfer;
+ }
+ *mb = m1;
+ *bpos = mtod(m1, caddr_t)+m1->m_len;
+ return (0);
+}
+
+/*
+ * Called once to initialize data structures...
+ */
+nfs_init()
+{
+ register int i;
+
+ nfsrtt.pos = 0;
+ rpc_vers = txdr_unsigned(RPC_VER2);
+ rpc_call = txdr_unsigned(RPC_CALL);
+ rpc_reply = txdr_unsigned(RPC_REPLY);
+ rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED);
+ rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED);
+ rpc_mismatch = txdr_unsigned(RPC_MISMATCH);
+ rpc_autherr = txdr_unsigned(RPC_AUTHERR);
+ rpc_rejectedcred = txdr_unsigned(AUTH_REJECTCRED);
+ rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX);
+ rpc_auth_kerb = txdr_unsigned(RPCAUTH_NQNFS);
+ nfs_vers = txdr_unsigned(NFS_VER2);
+ nfs_prog = txdr_unsigned(NFS_PROG);
+ nfs_true = txdr_unsigned(TRUE);
+ nfs_false = txdr_unsigned(FALSE);
+ /* Loop thru nfs procids */
+ for (i = 0; i < NFS_NPROCS; i++)
+ nfs_procids[i] = txdr_unsigned(i);
+ /* Ensure async daemons disabled */
+ for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
+ nfs_iodwant[i] = (struct proc *)0;
+ TAILQ_INIT(&nfs_bufq);
+ nfs_xdrneg1 = txdr_unsigned(-1);
+ nfs_nhinit(); /* Init the nfsnode table */
+ nfsrv_init(0); /* Init server data structures */
+ nfsrv_initcache(); /* Init the server request cache */
+
+ /*
+ * Initialize the nqnfs server stuff.
+ */
+ if (nqnfsstarttime == 0) {
+ nqnfsstarttime = boottime.tv_sec + nqsrv_maxlease
+ + nqsrv_clockskew + nqsrv_writeslack;
+ NQLOADNOVRAM(nqnfsstarttime);
+ nqnfs_prog = txdr_unsigned(NQNFS_PROG);
+ nqnfs_vers = txdr_unsigned(NQNFS_VER1);
+ nqthead.th_head[0] = &nqthead;
+ nqthead.th_head[1] = &nqthead;
+ nqfhead = hashinit(NQLCHSZ, M_NQLEASE, &nqfheadhash);
+ }
+
+ /*
+ * Initialize reply list and start timer
+ */
+ nfsreqh.r_prev = nfsreqh.r_next = &nfsreqh;
+ nfs_timer();
+}
+
+/*
+ * Attribute cache routines.
+ * nfs_loadattrcache() - loads or updates the cache contents from attributes
+ * that are on the mbuf list
+ * nfs_getattrcache() - returns valid attributes if found in cache, returns
+ * error otherwise
+ */
+
+/*
+ * Load the attribute cache (that lives in the nfsnode entry) with
+ * the values on the mbuf list and
+ * Iff vap not NULL
+ * copy the attributes to *vaper
+ */
+nfs_loadattrcache(vpp, mdp, dposp, vaper)
+ struct vnode **vpp;
+ struct mbuf **mdp;
+ caddr_t *dposp;
+ struct vattr *vaper;
+{
+ register struct vnode *vp = *vpp;
+ register struct vattr *vap;
+ register struct nfsv2_fattr *fp;
+ extern int (**spec_nfsv2nodeop_p)();
+ register struct nfsnode *np, *nq, **nhpp;
+ register long t1;
+ caddr_t dpos, cp2;
+ int error = 0, isnq;
+ struct mbuf *md;
+ enum vtype vtyp;
+ u_short vmode;
+ long rdev;
+ struct timespec mtime;
+ struct vnode *nvp;
+
+ md = *mdp;
+ dpos = *dposp;
+ t1 = (mtod(md, caddr_t) + md->m_len) - dpos;
+ isnq = (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS);
+ if (error = nfsm_disct(&md, &dpos, NFSX_FATTR(isnq), t1, &cp2))
+ return (error);
+ fp = (struct nfsv2_fattr *)cp2;
+ vtyp = nfstov_type(fp->fa_type);
+ vmode = fxdr_unsigned(u_short, fp->fa_mode);
+ if (vtyp == VNON || vtyp == VREG)
+ vtyp = IFTOVT(vmode);
+ if (isnq) {
+ rdev = fxdr_unsigned(long, fp->fa_nqrdev);
+ fxdr_nqtime(&fp->fa_nqmtime, &mtime);
+ } else {
+ rdev = fxdr_unsigned(long, fp->fa_nfsrdev);
+ fxdr_nfstime(&fp->fa_nfsmtime, &mtime);
+ }
+ /*
+ * If v_type == VNON it is a new node, so fill in the v_type,
+ * n_mtime fields. Check to see if it represents a special
+ * device, and if so, check for a possible alias. Once the
+ * correct vnode has been obtained, fill in the rest of the
+ * information.
+ */
+ np = VTONFS(vp);
+ if (vp->v_type == VNON) {
+ if (vtyp == VCHR && rdev == 0xffffffff)
+ vp->v_type = vtyp = VFIFO;
+ else
+ vp->v_type = vtyp;
+ if (vp->v_type == VFIFO) {
+#ifdef FIFO
+ extern int (**fifo_nfsv2nodeop_p)();
+ vp->v_op = fifo_nfsv2nodeop_p;
+#else
+ return (EOPNOTSUPP);
+#endif /* FIFO */
+ }
+ if (vp->v_type == VCHR || vp->v_type == VBLK) {
+ vp->v_op = spec_nfsv2nodeop_p;
+ if (nvp = checkalias(vp, (dev_t)rdev, vp->v_mount)) {
+ /*
+ * Discard unneeded vnode, but save its nfsnode.
+ */
+ if (nq = np->n_forw)
+ nq->n_back = np->n_back;
+ *np->n_back = nq;
+ nvp->v_data = vp->v_data;
+ vp->v_data = NULL;
+ vp->v_op = spec_vnodeop_p;
+ vrele(vp);
+ vgone(vp);
+ /*
+ * Reinitialize aliased node.
+ */
+ np->n_vnode = nvp;
+ nhpp = (struct nfsnode **)nfs_hash(&np->n_fh);
+ if (nq = *nhpp)
+ nq->n_back = &np->n_forw;
+ np->n_forw = nq;
+ np->n_back = nhpp;
+ *nhpp = np;
+ *vpp = vp = nvp;
+ }
+ }
+ np->n_mtime = mtime.ts_sec;
+ }
+ vap = &np->n_vattr;
+ vap->va_type = vtyp;
+ vap->va_mode = (vmode & 07777);
+ vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
+ vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid);
+ vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid);
+ vap->va_rdev = (dev_t)rdev;
+ vap->va_mtime = mtime;
+ vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+ if (isnq) {
+ fxdr_hyper(&fp->fa_nqsize, &vap->va_size);
+ vap->va_blocksize = fxdr_unsigned(long, fp->fa_nqblocksize);
+ fxdr_hyper(&fp->fa_nqbytes, &vap->va_bytes);
+ vap->va_fileid = fxdr_unsigned(long, fp->fa_nqfileid);
+ fxdr_nqtime(&fp->fa_nqatime, &vap->va_atime);
+ vap->va_flags = fxdr_unsigned(u_long, fp->fa_nqflags);
+ fxdr_nqtime(&fp->fa_nqctime, &vap->va_ctime);
+ vap->va_gen = fxdr_unsigned(u_long, fp->fa_nqgen);
+ fxdr_hyper(&fp->fa_nqfilerev, &vap->va_filerev);
+ } else {
+ vap->va_size = fxdr_unsigned(u_long, fp->fa_nfssize);
+ vap->va_blocksize = fxdr_unsigned(long, fp->fa_nfsblocksize);
+ vap->va_bytes = fxdr_unsigned(long, fp->fa_nfsblocks) * NFS_FABLKSIZE;
+ vap->va_fileid = fxdr_unsigned(long, fp->fa_nfsfileid);
+ fxdr_nfstime(&fp->fa_nfsatime, &vap->va_atime);
+ vap->va_flags = 0;
+ vap->va_ctime.ts_sec = fxdr_unsigned(long, fp->fa_nfsctime.nfs_sec);
+ vap->va_ctime.ts_nsec = 0;
+ vap->va_gen = fxdr_unsigned(u_long, fp->fa_nfsctime.nfs_usec);
+ vap->va_filerev = 0;
+ }
+ if (vap->va_size != np->n_size) {
+ if (vap->va_type == VREG) {
+ if (np->n_flag & NMODIFIED) {
+ if (vap->va_size < np->n_size)
+ vap->va_size = np->n_size;
+ else
+ np->n_size = vap->va_size;
+ } else
+ np->n_size = vap->va_size;
+ vnode_pager_setsize(vp, (u_long)np->n_size);
+ } else
+ np->n_size = vap->va_size;
+ }
+ np->n_attrstamp = time.tv_sec;
+ *dposp = dpos;
+ *mdp = md;
+ if (vaper != NULL) {
+ bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(*vap));
+#ifdef notdef
+ if ((np->n_flag & NMODIFIED) && np->n_size > vap->va_size)
+ if (np->n_size > vap->va_size)
+ vaper->va_size = np->n_size;
+#endif
+ if (np->n_flag & NCHG) {
+ if (np->n_flag & NACC) {
+ vaper->va_atime.ts_sec = np->n_atim.tv_sec;
+ vaper->va_atime.ts_nsec =
+ np->n_atim.tv_usec * 1000;
+ }
+ if (np->n_flag & NUPD) {
+ vaper->va_mtime.ts_sec = np->n_mtim.tv_sec;
+ vaper->va_mtime.ts_nsec =
+ np->n_mtim.tv_usec * 1000;
+ }
+ }
+ }
+ return (0);
+}
+
+/*
+ * Check the time stamp
+ * If the cache is valid, copy contents to *vap and return 0
+ * otherwise return an error
+ */
+nfs_getattrcache(vp, vaper)
+ register struct vnode *vp;
+ struct vattr *vaper;
+{
+ register struct nfsnode *np = VTONFS(vp);
+ register struct vattr *vap;
+
+ if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQLOOKLEASE) {
+ if (!NQNFS_CKCACHABLE(vp, NQL_READ) || np->n_attrstamp == 0) {
+ nfsstats.attrcache_misses++;
+ return (ENOENT);
+ }
+ } else if ((time.tv_sec - np->n_attrstamp) >= NFS_ATTRTIMEO(np)) {
+ nfsstats.attrcache_misses++;
+ return (ENOENT);
+ }
+ nfsstats.attrcache_hits++;
+ vap = &np->n_vattr;
+ if (vap->va_size != np->n_size) {
+ if (vap->va_type == VREG) {
+ if (np->n_flag & NMODIFIED) {
+ if (vap->va_size < np->n_size)
+ vap->va_size = np->n_size;
+ else
+ np->n_size = vap->va_size;
+ } else
+ np->n_size = vap->va_size;
+ vnode_pager_setsize(vp, (u_long)np->n_size);
+ } else
+ np->n_size = vap->va_size;
+ }
+ bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(struct vattr));
+#ifdef notdef
+ if ((np->n_flag & NMODIFIED) == 0) {
+ np->n_size = vaper->va_size;
+ vnode_pager_setsize(vp, (u_long)np->n_size);
+ } else if (np->n_size > vaper->va_size)
+ if (np->n_size > vaper->va_size)
+ vaper->va_size = np->n_size;
+#endif
+ if (np->n_flag & NCHG) {
+ if (np->n_flag & NACC) {
+ vaper->va_atime.ts_sec = np->n_atim.tv_sec;
+ vaper->va_atime.ts_nsec = np->n_atim.tv_usec * 1000;
+ }
+ if (np->n_flag & NUPD) {
+ vaper->va_mtime.ts_sec = np->n_mtim.tv_sec;
+ vaper->va_mtime.ts_nsec = np->n_mtim.tv_usec * 1000;
+ }
+ }
+ return (0);
+}
+
+/*
+ * Set up nameidata for a lookup() call and do it
+ */
+nfs_namei(ndp, fhp, len, slp, nam, mdp, dposp, p)
+ register struct nameidata *ndp;
+ fhandle_t *fhp;
+ int len;
+ struct nfssvc_sock *slp;
+ struct mbuf *nam;
+ struct mbuf **mdp;
+ caddr_t *dposp;
+ struct proc *p;
+{
+ register int i, rem;
+ register struct mbuf *md;
+ register char *fromcp, *tocp;
+ struct vnode *dp;
+ int error, rdonly;
+ struct componentname *cnp = &ndp->ni_cnd;
+
+ MALLOC(cnp->cn_pnbuf, char *, len + 1, M_NAMEI, M_WAITOK);
+ /*
+ * Copy the name from the mbuf list to ndp->ni_pnbuf
+ * and set the various ndp fields appropriately.
+ */
+ fromcp = *dposp;
+ tocp = cnp->cn_pnbuf;
+ md = *mdp;
+ rem = mtod(md, caddr_t) + md->m_len - fromcp;
+ cnp->cn_hash = 0;
+ for (i = 0; i < len; i++) {
+ while (rem == 0) {
+ md = md->m_next;
+ if (md == NULL) {
+ error = EBADRPC;
+ goto out;
+ }
+ fromcp = mtod(md, caddr_t);
+ rem = md->m_len;
+ }
+ if (*fromcp == '\0' || *fromcp == '/') {
+ error = EINVAL;
+ goto out;
+ }
+ cnp->cn_hash += (unsigned char)*fromcp;
+ *tocp++ = *fromcp++;
+ rem--;
+ }
+ *tocp = '\0';
+ *mdp = md;
+ *dposp = fromcp;
+ len = nfsm_rndup(len)-len;
+ if (len > 0) {
+ if (rem >= len)
+ *dposp += len;
+ else if (error = nfs_adv(mdp, dposp, len, rem))
+ goto out;
+ }
+ ndp->ni_pathlen = tocp - cnp->cn_pnbuf;
+ cnp->cn_nameptr = cnp->cn_pnbuf;
+ /*
+ * Extract and set starting directory.
+ */
+ if (error = nfsrv_fhtovp(fhp, FALSE, &dp, ndp->ni_cnd.cn_cred, slp,
+ nam, &rdonly))
+ goto out;
+ if (dp->v_type != VDIR) {
+ vrele(dp);
+ error = ENOTDIR;
+ goto out;
+ }
+ ndp->ni_startdir = dp;
+ if (rdonly)
+ cnp->cn_flags |= (NOCROSSMOUNT | RDONLY);
+ else
+ cnp->cn_flags |= NOCROSSMOUNT;
+ /*
+ * And call lookup() to do the real work
+ */
+ cnp->cn_proc = p;
+ if (error = lookup(ndp))
+ goto out;
+ /*
+ * Check for encountering a symbolic link
+ */
+ if (cnp->cn_flags & ISSYMLINK) {
+ if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
+ vput(ndp->ni_dvp);
+ else
+ vrele(ndp->ni_dvp);
+ vput(ndp->ni_vp);
+ ndp->ni_vp = NULL;
+ error = EINVAL;
+ goto out;
+ }
+ /*
+ * Check for saved name request
+ */
+ if (cnp->cn_flags & (SAVENAME | SAVESTART)) {
+ cnp->cn_flags |= HASBUF;
+ return (0);
+ }
+out:
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ return (error);
+}
+
+/*
+ * A fiddled version of m_adj() that ensures null fill to a long
+ * boundary and only trims off the back end
+ */
+void
+nfsm_adj(mp, len, nul)
+ struct mbuf *mp;
+ register int len;
+ int nul;
+{
+ register struct mbuf *m;
+ register int count, i;
+ register char *cp;
+
+ /*
+ * Trim from tail. Scan the mbuf chain,
+ * calculating its length and finding the last mbuf.
+ * If the adjustment only affects this mbuf, then just
+ * adjust and return. Otherwise, rescan and truncate
+ * after the remaining size.
+ */
+ count = 0;
+ m = mp;
+ for (;;) {
+ count += m->m_len;
+ if (m->m_next == (struct mbuf *)0)
+ break;
+ m = m->m_next;
+ }
+ if (m->m_len > len) {
+ m->m_len -= len;
+ if (nul > 0) {
+ cp = mtod(m, caddr_t)+m->m_len-nul;
+ for (i = 0; i < nul; i++)
+ *cp++ = '\0';
+ }
+ return;
+ }
+ count -= len;
+ if (count < 0)
+ count = 0;
+ /*
+ * Correct length for chain is "count".
+ * Find the mbuf with last data, adjust its length,
+ * and toss data from remaining mbufs on chain.
+ */
+ for (m = mp; m; m = m->m_next) {
+ if (m->m_len >= count) {
+ m->m_len = count;
+ if (nul > 0) {
+ cp = mtod(m, caddr_t)+m->m_len-nul;
+ for (i = 0; i < nul; i++)
+ *cp++ = '\0';
+ }
+ break;
+ }
+ count -= m->m_len;
+ }
+ while (m = m->m_next)
+ m->m_len = 0;
+}
+
+/*
+ * nfsrv_fhtovp() - convert a fh to a vnode ptr (optionally locked)
+ * - look up fsid in mount list (if not found ret error)
+ * - get vp and export rights by calling VFS_FHTOVP()
+ * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
+ * - if not lockflag unlock it with VOP_UNLOCK()
+ */
+nfsrv_fhtovp(fhp, lockflag, vpp, cred, slp, nam, rdonlyp)
+ fhandle_t *fhp;
+ int lockflag;
+ struct vnode **vpp;
+ struct ucred *cred;
+ struct nfssvc_sock *slp;
+ struct mbuf *nam;
+ int *rdonlyp;
+{
+ register struct mount *mp;
+ register struct nfsuid *uidp;
+ register int i;
+ struct ucred *credanon;
+ int error, exflags;
+
+ *vpp = (struct vnode *)0;
+ if ((mp = getvfs(&fhp->fh_fsid)) == NULL)
+ return (ESTALE);
+ if (error = VFS_FHTOVP(mp, &fhp->fh_fid, nam, vpp, &exflags, &credanon))
+ return (error);
+ /*
+ * Check/setup credentials.
+ */
+ if (exflags & MNT_EXKERB) {
+ uidp = slp->ns_uidh[NUIDHASH(cred->cr_uid)];
+ while (uidp) {
+ if (uidp->nu_uid == cred->cr_uid)
+ break;
+ uidp = uidp->nu_hnext;
+ }
+ if (uidp) {
+ cred->cr_uid = uidp->nu_cr.cr_uid;
+ for (i = 0; i < uidp->nu_cr.cr_ngroups; i++)
+ cred->cr_groups[i] = uidp->nu_cr.cr_groups[i];
+ } else {
+ vput(*vpp);
+ return (NQNFS_AUTHERR);
+ }
+ } else if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) {
+ cred->cr_uid = credanon->cr_uid;
+ for (i = 0; i < credanon->cr_ngroups && i < NGROUPS; i++)
+ cred->cr_groups[i] = credanon->cr_groups[i];
+ }
+ if (exflags & MNT_EXRDONLY)
+ *rdonlyp = 1;
+ else
+ *rdonlyp = 0;
+ if (!lockflag)
+ VOP_UNLOCK(*vpp);
+ return (0);
+}
+
+/*
+ * This function compares two net addresses by family and returns TRUE
+ * if they are the same host.
+ * If there is any doubt, return FALSE.
+ * The AF_INET family is handled as a special case so that address mbufs
+ * don't need to be saved to store "struct in_addr", which is only 4 bytes.
+ */
+netaddr_match(family, haddr, nam)
+ int family;
+ union nethostaddr *haddr;
+ struct mbuf *nam;
+{
+ register struct sockaddr_in *inetaddr;
+
+ switch (family) {
+ case AF_INET:
+ inetaddr = mtod(nam, struct sockaddr_in *);
+ if (inetaddr->sin_family == AF_INET &&
+ inetaddr->sin_addr.s_addr == haddr->had_inetaddr)
+ return (1);
+ break;
+#ifdef ISO
+ case AF_ISO:
+ {
+ register struct sockaddr_iso *isoaddr1, *isoaddr2;
+
+ isoaddr1 = mtod(nam, struct sockaddr_iso *);
+ isoaddr2 = mtod(haddr->had_nam, struct sockaddr_iso *);
+ if (isoaddr1->siso_family == AF_ISO &&
+ isoaddr1->siso_nlen > 0 &&
+ isoaddr1->siso_nlen == isoaddr2->siso_nlen &&
+ SAME_ISOADDR(isoaddr1, isoaddr2))
+ return (1);
+ break;
+ }
+#endif /* ISO */
+ default:
+ break;
+ };
+ return (0);
+}
diff --git a/sys/nfsclient/nfs_vfsops.c b/sys/nfsclient/nfs_vfsops.c
new file mode 100644
index 0000000..1f18676
--- /dev/null
+++ b/sys/nfsclient/nfs_vfsops.c
@@ -0,0 +1,740 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_vfsops.c 8.3 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/conf.h>
+#include <sys/ioctl.h>
+#include <sys/signal.h>
+#include <sys/proc.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/kernel.h>
+#include <sys/mount.h>
+#include <sys/buf.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/systm.h>
+
+#include <net/if.h>
+#include <net/route.h>
+#include <netinet/in.h>
+
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nfs.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nfsdiskless.h>
+#include <nfs/nqnfs.h>
+
+/*
+ * nfs vfs operations.
+ */
+struct vfsops nfs_vfsops = {
+ nfs_mount,
+ nfs_start,
+ nfs_unmount,
+ nfs_root,
+ nfs_quotactl,
+ nfs_statfs,
+ nfs_sync,
+ nfs_vget,
+ nfs_fhtovp,
+ nfs_vptofh,
+ nfs_init,
+};
+
+/*
+ * This structure must be filled in by a primary bootstrap or bootstrap
+ * server for a diskless/dataless machine. It is initialized below just
+ * to ensure that it is allocated to initialized data (.data not .bss).
+ */
+struct nfs_diskless nfs_diskless = { 0 };
+
+extern u_long nfs_procids[NFS_NPROCS];
+extern u_long nfs_prog, nfs_vers;
+void nfs_disconnect __P((struct nfsmount *));
+void nfsargs_ntoh __P((struct nfs_args *));
+static struct mount *nfs_mountdiskless __P((char *, char *, int,
+ struct sockaddr_in *, struct nfs_args *, register struct vnode **));
+
+#define TRUE 1
+#define FALSE 0
+
+/*
+ * nfs statfs call
+ */
+int
+nfs_statfs(mp, sbp, p)
+ struct mount *mp;
+ register struct statfs *sbp;
+ struct proc *p;
+{
+ register struct vnode *vp;
+ register struct nfsv2_statfs *sfp;
+ register caddr_t cp;
+ register long t1;
+ caddr_t bpos, dpos, cp2;
+ int error = 0, isnq;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ struct nfsmount *nmp;
+ struct ucred *cred;
+ struct nfsnode *np;
+
+ nmp = VFSTONFS(mp);
+ isnq = (nmp->nm_flag & NFSMNT_NQNFS);
+ if (error = nfs_nget(mp, &nmp->nm_fh, &np))
+ return (error);
+ vp = NFSTOV(np);
+ nfsstats.rpccnt[NFSPROC_STATFS]++;
+ cred = crget();
+ cred->cr_ngroups = 1;
+ nfsm_reqhead(vp, NFSPROC_STATFS, NFSX_FH);
+ nfsm_fhtom(vp);
+ nfsm_request(vp, NFSPROC_STATFS, p, cred);
+ nfsm_dissect(sfp, struct nfsv2_statfs *, NFSX_STATFS(isnq));
+ sbp->f_type = MOUNT_NFS;
+ sbp->f_flags = nmp->nm_flag;
+ sbp->f_iosize = NFS_MAXDGRAMDATA;
+ sbp->f_bsize = fxdr_unsigned(long, sfp->sf_bsize);
+ sbp->f_blocks = fxdr_unsigned(long, sfp->sf_blocks);
+ sbp->f_bfree = fxdr_unsigned(long, sfp->sf_bfree);
+ sbp->f_bavail = fxdr_unsigned(long, sfp->sf_bavail);
+ if (isnq) {
+ sbp->f_files = fxdr_unsigned(long, sfp->sf_files);
+ sbp->f_ffree = fxdr_unsigned(long, sfp->sf_ffree);
+ } else {
+ sbp->f_files = 0;
+ sbp->f_ffree = 0;
+ }
+ if (sbp != &mp->mnt_stat) {
+ bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN);
+ bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN);
+ }
+ nfsm_reqdone;
+ vrele(vp);
+ crfree(cred);
+ return (error);
+}
+
+/*
+ * Mount a remote root fs via. nfs. This depends on the info in the
+ * nfs_diskless structure that has been filled in properly by some primary
+ * bootstrap.
+ * It goes something like this:
+ * - do enough of "ifconfig" by calling ifioctl() so that the system
+ * can talk to the server
+ * - If nfs_diskless.mygateway is filled in, use that address as
+ * a default gateway.
+ * - hand craft the swap nfs vnode hanging off a fake mount point
+ * if swdevt[0].sw_dev == NODEV
+ * - build the rootfs mount point and call mountnfs() to do the rest.
+ */
+int
+nfs_mountroot()
+{
+ register struct mount *mp;
+ register struct nfs_diskless *nd = &nfs_diskless;
+ struct socket *so;
+ struct vnode *vp;
+ struct proc *p = curproc; /* XXX */
+ int error, i;
+
+ /*
+ * XXX time must be non-zero when we init the interface or else
+ * the arp code will wedge...
+ */
+ if (time.tv_sec == 0)
+ time.tv_sec = 1;
+
+#ifdef notyet
+ /* Set up swap credentials. */
+ proc0.p_ucred->cr_uid = ntohl(nd->swap_ucred.cr_uid);
+ proc0.p_ucred->cr_gid = ntohl(nd->swap_ucred.cr_gid);
+ if ((proc0.p_ucred->cr_ngroups = ntohs(nd->swap_ucred.cr_ngroups)) >
+ NGROUPS)
+ proc0.p_ucred->cr_ngroups = NGROUPS;
+ for (i = 0; i < proc0.p_ucred->cr_ngroups; i++)
+ proc0.p_ucred->cr_groups[i] = ntohl(nd->swap_ucred.cr_groups[i]);
+#endif
+
+ /*
+ * Do enough of ifconfig(8) so that the critical net interface can
+ * talk to the server.
+ */
+ if (error = socreate(nd->myif.ifra_addr.sa_family, &so, SOCK_DGRAM, 0))
+ panic("nfs_mountroot: socreate: %d", error);
+ if (error = ifioctl(so, SIOCAIFADDR, (caddr_t)&nd->myif, p))
+ panic("nfs_mountroot: SIOCAIFADDR: %d", error);
+ soclose(so);
+
+ /*
+ * If the gateway field is filled in, set it as the default route.
+ */
+ if (nd->mygateway.sin_len != 0) {
+ struct sockaddr_in mask, sin;
+
+ bzero((caddr_t)&mask, sizeof(mask));
+ sin = mask;
+ sin.sin_family = AF_INET;
+ sin.sin_len = sizeof(sin);
+ if (error = rtrequest(RTM_ADD, (struct sockaddr *)&sin,
+ (struct sockaddr *)&nd->mygateway,
+ (struct sockaddr *)&mask,
+ RTF_UP | RTF_GATEWAY, (struct rtentry **)0))
+ panic("nfs_mountroot: RTM_ADD: %d", error);
+ }
+
+ /*
+ * If swapping to an nfs node (indicated by swdevt[0].sw_dev == NODEV):
+ * Create a fake mount point just for the swap vnode so that the
+ * swap file can be on a different server from the rootfs.
+ */
+ if (swdevt[0].sw_dev == NODEV) {
+ nd->swap_args.fh = (nfsv2fh_t *)nd->swap_fh;
+ (void) nfs_mountdiskless(nd->swap_hostnam, "/swap", 0,
+ &nd->swap_saddr, &nd->swap_args, &vp);
+
+ /*
+ * Since the swap file is not the root dir of a file system,
+ * hack it to a regular file.
+ */
+ vp->v_type = VREG;
+ vp->v_flag = 0;
+ swapdev_vp = vp;
+ VREF(vp);
+ swdevt[0].sw_vp = vp;
+ swdevt[0].sw_nblks = ntohl(nd->swap_nblks);
+ } else if (bdevvp(swapdev, &swapdev_vp))
+ panic("nfs_mountroot: can't setup swapdev_vp");
+
+ /*
+ * Create the rootfs mount point.
+ */
+ nd->root_args.fh = (nfsv2fh_t *)nd->root_fh;
+ mp = nfs_mountdiskless(nd->root_hostnam, "/", MNT_RDONLY,
+ &nd->root_saddr, &nd->root_args, &vp);
+
+ if (vfs_lock(mp))
+ panic("nfs_mountroot: vfs_lock");
+ TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list);
+ mp->mnt_flag |= MNT_ROOTFS;
+ mp->mnt_vnodecovered = NULLVP;
+ vfs_unlock(mp);
+ rootvp = vp;
+
+ /*
+ * This is not really an nfs issue, but it is much easier to
+ * set hostname here and then let the "/etc/rc.xxx" files
+ * mount the right /var based upon its preset value.
+ */
+ bcopy(nd->my_hostnam, hostname, MAXHOSTNAMELEN);
+ hostname[MAXHOSTNAMELEN - 1] = '\0';
+ for (i = 0; i < MAXHOSTNAMELEN; i++)
+ if (hostname[i] == '\0')
+ break;
+ hostnamelen = i;
+ inittodr(ntohl(nd->root_time));
+ return (0);
+}
+
+/*
+ * Internal version of mount system call for diskless setup.
+ */
+static struct mount *
+nfs_mountdiskless(path, which, mountflag, sin, args, vpp)
+ char *path;
+ char *which;
+ int mountflag;
+ struct sockaddr_in *sin;
+ struct nfs_args *args;
+ register struct vnode **vpp;
+{
+ register struct mount *mp;
+ register struct mbuf *m;
+ register int error;
+
+ mp = (struct mount *)malloc((u_long)sizeof(struct mount),
+ M_MOUNT, M_NOWAIT);
+ if (mp == NULL)
+ panic("nfs_mountroot: %s mount malloc", which);
+ bzero((char *)mp, (u_long)sizeof(struct mount));
+ mp->mnt_op = &nfs_vfsops;
+ mp->mnt_flag = mountflag;
+
+ MGET(m, MT_SONAME, M_DONTWAIT);
+ if (m == NULL)
+ panic("nfs_mountroot: %s mount mbuf", which);
+ bcopy((caddr_t)sin, mtod(m, caddr_t), sin->sin_len);
+ m->m_len = sin->sin_len;
+ nfsargs_ntoh(args);
+ if (error = mountnfs(args, mp, m, which, path, vpp))
+ panic("nfs_mountroot: mount %s on %s: %d", path, which, error);
+
+ return (mp);
+}
+
+/*
+ * Convert the integer fields of the nfs_args structure from net byte order
+ * to host byte order. Called by nfs_mountroot() above.
+ */
+void
+nfsargs_ntoh(nfsp)
+ register struct nfs_args *nfsp;
+{
+
+ NTOHL(nfsp->sotype);
+ NTOHL(nfsp->proto);
+ NTOHL(nfsp->flags);
+ NTOHL(nfsp->wsize);
+ NTOHL(nfsp->rsize);
+ NTOHL(nfsp->timeo);
+ NTOHL(nfsp->retrans);
+ NTOHL(nfsp->maxgrouplist);
+ NTOHL(nfsp->readahead);
+ NTOHL(nfsp->leaseterm);
+ NTOHL(nfsp->deadthresh);
+}
+
+/*
+ * VFS Operations.
+ *
+ * mount system call
+ * It seems a bit dumb to copyinstr() the host and path here and then
+ * bcopy() them in mountnfs(), but I wanted to detect errors before
+ * doing the sockargs() call because sockargs() allocates an mbuf and
+ * an error after that means that I have to release the mbuf.
+ */
+/* ARGSUSED */
+int
+nfs_mount(mp, path, data, ndp, p)
+ struct mount *mp;
+ char *path;
+ caddr_t data;
+ struct nameidata *ndp;
+ struct proc *p;
+{
+ int error;
+ struct nfs_args args;
+ struct mbuf *nam;
+ struct vnode *vp;
+ char pth[MNAMELEN], hst[MNAMELEN];
+ u_int len;
+ nfsv2fh_t nfh;
+
+ if (error = copyin(data, (caddr_t)&args, sizeof (struct nfs_args)))
+ return (error);
+ if (error = copyin((caddr_t)args.fh, (caddr_t)&nfh, sizeof (nfsv2fh_t)))
+ return (error);
+ if (error = copyinstr(path, pth, MNAMELEN-1, &len))
+ return (error);
+ bzero(&pth[len], MNAMELEN - len);
+ if (error = copyinstr(args.hostname, hst, MNAMELEN-1, &len))
+ return (error);
+ bzero(&hst[len], MNAMELEN - len);
+ /* sockargs() call must be after above copyin() calls */
+ if (error = sockargs(&nam, (caddr_t)args.addr,
+ args.addrlen, MT_SONAME))
+ return (error);
+ args.fh = &nfh;
+ error = mountnfs(&args, mp, nam, pth, hst, &vp);
+ return (error);
+}
+
+/*
+ * Common code for mount and mountroot
+ */
+int
+mountnfs(argp, mp, nam, pth, hst, vpp)
+ register struct nfs_args *argp;
+ register struct mount *mp;
+ struct mbuf *nam;
+ char *pth, *hst;
+ struct vnode **vpp;
+{
+ register struct nfsmount *nmp;
+ struct nfsnode *np;
+ int error;
+
+ if (mp->mnt_flag & MNT_UPDATE) {
+ nmp = VFSTONFS(mp);
+ /* update paths, file handles, etc, here XXX */
+ m_freem(nam);
+ return (0);
+ } else {
+ MALLOC(nmp, struct nfsmount *, sizeof (struct nfsmount),
+ M_NFSMNT, M_WAITOK);
+ bzero((caddr_t)nmp, sizeof (struct nfsmount));
+ mp->mnt_data = (qaddr_t)nmp;
+ }
+ getnewfsid(mp, MOUNT_NFS);
+ nmp->nm_mountp = mp;
+ nmp->nm_flag = argp->flags;
+ if ((nmp->nm_flag & (NFSMNT_NQNFS | NFSMNT_MYWRITE)) ==
+ (NFSMNT_NQNFS | NFSMNT_MYWRITE)) {
+ error = EPERM;
+ goto bad;
+ }
+ if (nmp->nm_flag & NFSMNT_NQNFS)
+ /*
+ * We have to set mnt_maxsymlink to a non-zero value so
+ * that COMPAT_43 routines will know that we are setting
+ * the d_type field in directories (and can zero it for
+ * unsuspecting binaries).
+ */
+ mp->mnt_maxsymlinklen = 1;
+ nmp->nm_timeo = NFS_TIMEO;
+ nmp->nm_retry = NFS_RETRANS;
+ nmp->nm_wsize = NFS_WSIZE;
+ nmp->nm_rsize = NFS_RSIZE;
+ nmp->nm_numgrps = NFS_MAXGRPS;
+ nmp->nm_readahead = NFS_DEFRAHEAD;
+ nmp->nm_leaseterm = NQ_DEFLEASE;
+ nmp->nm_deadthresh = NQ_DEADTHRESH;
+ nmp->nm_tnext = (struct nfsnode *)nmp;
+ nmp->nm_tprev = (struct nfsnode *)nmp;
+ nmp->nm_inprog = NULLVP;
+ bcopy((caddr_t)argp->fh, (caddr_t)&nmp->nm_fh, sizeof(nfsv2fh_t));
+ mp->mnt_stat.f_type = MOUNT_NFS;
+ bcopy(hst, mp->mnt_stat.f_mntfromname, MNAMELEN);
+ bcopy(pth, mp->mnt_stat.f_mntonname, MNAMELEN);
+ nmp->nm_nam = nam;
+
+ if ((argp->flags & NFSMNT_TIMEO) && argp->timeo > 0) {
+ nmp->nm_timeo = (argp->timeo * NFS_HZ + 5) / 10;
+ if (nmp->nm_timeo < NFS_MINTIMEO)
+ nmp->nm_timeo = NFS_MINTIMEO;
+ else if (nmp->nm_timeo > NFS_MAXTIMEO)
+ nmp->nm_timeo = NFS_MAXTIMEO;
+ }
+
+ if ((argp->flags & NFSMNT_RETRANS) && argp->retrans > 1) {
+ nmp->nm_retry = argp->retrans;
+ if (nmp->nm_retry > NFS_MAXREXMIT)
+ nmp->nm_retry = NFS_MAXREXMIT;
+ }
+
+ if ((argp->flags & NFSMNT_WSIZE) && argp->wsize > 0) {
+ nmp->nm_wsize = argp->wsize;
+ /* Round down to multiple of blocksize */
+ nmp->nm_wsize &= ~0x1ff;
+ if (nmp->nm_wsize <= 0)
+ nmp->nm_wsize = 512;
+ else if (nmp->nm_wsize > NFS_MAXDATA)
+ nmp->nm_wsize = NFS_MAXDATA;
+ }
+ if (nmp->nm_wsize > MAXBSIZE)
+ nmp->nm_wsize = MAXBSIZE;
+
+ if ((argp->flags & NFSMNT_RSIZE) && argp->rsize > 0) {
+ nmp->nm_rsize = argp->rsize;
+ /* Round down to multiple of blocksize */
+ nmp->nm_rsize &= ~0x1ff;
+ if (nmp->nm_rsize <= 0)
+ nmp->nm_rsize = 512;
+ else if (nmp->nm_rsize > NFS_MAXDATA)
+ nmp->nm_rsize = NFS_MAXDATA;
+ }
+ if (nmp->nm_rsize > MAXBSIZE)
+ nmp->nm_rsize = MAXBSIZE;
+ if ((argp->flags & NFSMNT_MAXGRPS) && argp->maxgrouplist >= 0 &&
+ argp->maxgrouplist <= NFS_MAXGRPS)
+ nmp->nm_numgrps = argp->maxgrouplist;
+ if ((argp->flags & NFSMNT_READAHEAD) && argp->readahead >= 0 &&
+ argp->readahead <= NFS_MAXRAHEAD)
+ nmp->nm_readahead = argp->readahead;
+ if ((argp->flags & NFSMNT_LEASETERM) && argp->leaseterm >= 2 &&
+ argp->leaseterm <= NQ_MAXLEASE)
+ nmp->nm_leaseterm = argp->leaseterm;
+ if ((argp->flags & NFSMNT_DEADTHRESH) && argp->deadthresh >= 1 &&
+ argp->deadthresh <= NQ_NEVERDEAD)
+ nmp->nm_deadthresh = argp->deadthresh;
+ /* Set up the sockets and per-host congestion */
+ nmp->nm_sotype = argp->sotype;
+ nmp->nm_soproto = argp->proto;
+
+ /*
+ * For Connection based sockets (TCP,...) defer the connect until
+ * the first request, in case the server is not responding.
+ */
+ if (nmp->nm_sotype == SOCK_DGRAM &&
+ (error = nfs_connect(nmp, (struct nfsreq *)0)))
+ goto bad;
+
+ /*
+ * This is silly, but it has to be set so that vinifod() works.
+ * We do not want to do an nfs_statfs() here since we can get
+ * stuck on a dead server and we are holding a lock on the mount
+ * point.
+ */
+ mp->mnt_stat.f_iosize = NFS_MAXDGRAMDATA;
+ /*
+ * A reference count is needed on the nfsnode representing the
+ * remote root. If this object is not persistent, then backward
+ * traversals of the mount point (i.e. "..") will not work if
+ * the nfsnode gets flushed out of the cache. Ufs does not have
+ * this problem, because one can identify root inodes by their
+ * number == ROOTINO (2).
+ */
+ if (error = nfs_nget(mp, &nmp->nm_fh, &np))
+ goto bad;
+ *vpp = NFSTOV(np);
+
+ return (0);
+bad:
+ nfs_disconnect(nmp);
+ free((caddr_t)nmp, M_NFSMNT);
+ m_freem(nam);
+ return (error);
+}
+
+/*
+ * unmount system call
+ */
+int
+nfs_unmount(mp, mntflags, p)
+ struct mount *mp;
+ int mntflags;
+ struct proc *p;
+{
+ register struct nfsmount *nmp;
+ struct nfsnode *np;
+ struct vnode *vp;
+ int error, flags = 0;
+ extern int doforce;
+
+ if (mntflags & MNT_FORCE) {
+ if (!doforce || (mp->mnt_flag & MNT_ROOTFS))
+ return (EINVAL);
+ flags |= FORCECLOSE;
+ }
+ nmp = VFSTONFS(mp);
+ /*
+ * Goes something like this..
+ * - Check for activity on the root vnode (other than ourselves).
+ * - Call vflush() to clear out vnodes for this file system,
+ * except for the root vnode.
+ * - Decrement reference on the vnode representing remote root.
+ * - Close the socket
+ * - Free up the data structures
+ */
+ /*
+ * We need to decrement the ref. count on the nfsnode representing
+ * the remote root. See comment in mountnfs(). The VFS unmount()
+ * has done vput on this vnode, otherwise we would get deadlock!
+ */
+ if (error = nfs_nget(mp, &nmp->nm_fh, &np))
+ return(error);
+ vp = NFSTOV(np);
+ if (vp->v_usecount > 2) {
+ vput(vp);
+ return (EBUSY);
+ }
+
+ /*
+ * Must handshake with nqnfs_clientd() if it is active.
+ */
+ nmp->nm_flag |= NFSMNT_DISMINPROG;
+ while (nmp->nm_inprog != NULLVP)
+ (void) tsleep((caddr_t)&lbolt, PSOCK, "nfsdism", 0);
+ if (error = vflush(mp, vp, flags)) {
+ vput(vp);
+ nmp->nm_flag &= ~NFSMNT_DISMINPROG;
+ return (error);
+ }
+
+ /*
+ * We are now committed to the unmount.
+ * For NQNFS, let the server daemon free the nfsmount structure.
+ */
+ if (nmp->nm_flag & (NFSMNT_NQNFS | NFSMNT_KERB))
+ nmp->nm_flag |= NFSMNT_DISMNT;
+
+ /*
+ * There are two reference counts to get rid of here.
+ */
+ vrele(vp);
+ vrele(vp);
+ vgone(vp);
+ nfs_disconnect(nmp);
+ m_freem(nmp->nm_nam);
+
+ if ((nmp->nm_flag & (NFSMNT_NQNFS | NFSMNT_KERB)) == 0)
+ free((caddr_t)nmp, M_NFSMNT);
+ return (0);
+}
+
+/*
+ * Return root of a filesystem
+ */
+int
+nfs_root(mp, vpp)
+ struct mount *mp;
+ struct vnode **vpp;
+{
+ register struct vnode *vp;
+ struct nfsmount *nmp;
+ struct nfsnode *np;
+ int error;
+
+ nmp = VFSTONFS(mp);
+ if (error = nfs_nget(mp, &nmp->nm_fh, &np))
+ return (error);
+ vp = NFSTOV(np);
+ vp->v_type = VDIR;
+ vp->v_flag = VROOT;
+ *vpp = vp;
+ return (0);
+}
+
+extern int syncprt;
+
+/*
+ * Flush out the buffer cache
+ */
+/* ARGSUSED */
+int
+nfs_sync(mp, waitfor, cred, p)
+ struct mount *mp;
+ int waitfor;
+ struct ucred *cred;
+ struct proc *p;
+{
+ register struct vnode *vp;
+ int error, allerror = 0;
+
+ /*
+ * Force stale buffer cache information to be flushed.
+ */
+loop:
+ for (vp = mp->mnt_vnodelist.lh_first;
+ vp != NULL;
+ vp = vp->v_mntvnodes.le_next) {
+ /*
+ * If the vnode that we are about to sync is no longer
+ * associated with this mount point, start over.
+ */
+ if (vp->v_mount != mp)
+ goto loop;
+ if (VOP_ISLOCKED(vp) || vp->v_dirtyblkhd.lh_first == NULL)
+ continue;
+ if (vget(vp, 1))
+ goto loop;
+ if (error = VOP_FSYNC(vp, cred, waitfor, p))
+ allerror = error;
+ vput(vp);
+ }
+ return (allerror);
+}
+
+/*
+ * NFS flat namespace lookup.
+ * Currently unsupported.
+ */
+/* ARGSUSED */
+int
+nfs_vget(mp, ino, vpp)
+ struct mount *mp;
+ ino_t ino;
+ struct vnode **vpp;
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * At this point, this should never happen
+ */
+/* ARGSUSED */
+int
+nfs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp)
+ register struct mount *mp;
+ struct fid *fhp;
+ struct mbuf *nam;
+ struct vnode **vpp;
+ int *exflagsp;
+ struct ucred **credanonp;
+{
+
+ return (EINVAL);
+}
+
+/*
+ * Vnode pointer to File handle, should never happen either
+ */
+/* ARGSUSED */
+int
+nfs_vptofh(vp, fhp)
+ struct vnode *vp;
+ struct fid *fhp;
+{
+
+ return (EINVAL);
+}
+
+/*
+ * Vfs start routine, a no-op.
+ */
+/* ARGSUSED */
+int
+nfs_start(mp, flags, p)
+ struct mount *mp;
+ int flags;
+ struct proc *p;
+{
+
+ return (0);
+}
+
+/*
+ * Do operations associated with quotas, not supported
+ */
+/* ARGSUSED */
+int
+nfs_quotactl(mp, cmd, uid, arg, p)
+ struct mount *mp;
+ int cmd;
+ uid_t uid;
+ caddr_t arg;
+ struct proc *p;
+{
+
+ return (EOPNOTSUPP);
+}
diff --git a/sys/nfsclient/nfs_vnops.c b/sys/nfsclient/nfs_vnops.c
new file mode 100644
index 0000000..a909b48
--- /dev/null
+++ b/sys/nfsclient/nfs_vnops.c
@@ -0,0 +1,2539 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_vnops.c 8.5 (Berkeley) 2/13/94
+ */
+
+/*
+ * vnode op calls for sun nfs version 2
+ */
+
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/mount.h>
+#include <sys/buf.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/conf.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/map.h>
+#include <sys/dirent.h>
+
+#include <vm/vm.h>
+
+#include <miscfs/specfs/specdev.h>
+#include <miscfs/fifofs/fifo.h>
+
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfsmount.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nqnfs.h>
+
+/* Defs */
+#define TRUE 1
+#define FALSE 0
+
+/*
+ * Global vfs data structures for nfs
+ */
+int (**nfsv2_vnodeop_p)();
+struct vnodeopv_entry_desc nfsv2_vnodeop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, nfs_lookup }, /* lookup */
+ { &vop_create_desc, nfs_create }, /* create */
+ { &vop_mknod_desc, nfs_mknod }, /* mknod */
+ { &vop_open_desc, nfs_open }, /* open */
+ { &vop_close_desc, nfs_close }, /* close */
+ { &vop_access_desc, nfs_access }, /* access */
+ { &vop_getattr_desc, nfs_getattr }, /* getattr */
+ { &vop_setattr_desc, nfs_setattr }, /* setattr */
+ { &vop_read_desc, nfs_read }, /* read */
+ { &vop_write_desc, nfs_write }, /* write */
+ { &vop_ioctl_desc, nfs_ioctl }, /* ioctl */
+ { &vop_select_desc, nfs_select }, /* select */
+ { &vop_mmap_desc, nfs_mmap }, /* mmap */
+ { &vop_fsync_desc, nfs_fsync }, /* fsync */
+ { &vop_seek_desc, nfs_seek }, /* seek */
+ { &vop_remove_desc, nfs_remove }, /* remove */
+ { &vop_link_desc, nfs_link }, /* link */
+ { &vop_rename_desc, nfs_rename }, /* rename */
+ { &vop_mkdir_desc, nfs_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, nfs_rmdir }, /* rmdir */
+ { &vop_symlink_desc, nfs_symlink }, /* symlink */
+ { &vop_readdir_desc, nfs_readdir }, /* readdir */
+ { &vop_readlink_desc, nfs_readlink }, /* readlink */
+ { &vop_abortop_desc, nfs_abortop }, /* abortop */
+ { &vop_inactive_desc, nfs_inactive }, /* inactive */
+ { &vop_reclaim_desc, nfs_reclaim }, /* reclaim */
+ { &vop_lock_desc, nfs_lock }, /* lock */
+ { &vop_unlock_desc, nfs_unlock }, /* unlock */
+ { &vop_bmap_desc, nfs_bmap }, /* bmap */
+ { &vop_strategy_desc, nfs_strategy }, /* strategy */
+ { &vop_print_desc, nfs_print }, /* print */
+ { &vop_islocked_desc, nfs_islocked }, /* islocked */
+ { &vop_pathconf_desc, nfs_pathconf }, /* pathconf */
+ { &vop_advlock_desc, nfs_advlock }, /* advlock */
+ { &vop_blkatoff_desc, nfs_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, nfs_valloc }, /* valloc */
+ { &vop_reallocblks_desc, nfs_reallocblks }, /* reallocblks */
+ { &vop_vfree_desc, nfs_vfree }, /* vfree */
+ { &vop_truncate_desc, nfs_truncate }, /* truncate */
+ { &vop_update_desc, nfs_update }, /* update */
+ { &vop_bwrite_desc, vn_bwrite },
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc nfsv2_vnodeop_opv_desc =
+ { &nfsv2_vnodeop_p, nfsv2_vnodeop_entries };
+
+/*
+ * Special device vnode ops
+ */
+int (**spec_nfsv2nodeop_p)();
+struct vnodeopv_entry_desc spec_nfsv2nodeop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, spec_lookup }, /* lookup */
+ { &vop_create_desc, spec_create }, /* create */
+ { &vop_mknod_desc, spec_mknod }, /* mknod */
+ { &vop_open_desc, spec_open }, /* open */
+ { &vop_close_desc, nfsspec_close }, /* close */
+ { &vop_access_desc, nfsspec_access }, /* access */
+ { &vop_getattr_desc, nfs_getattr }, /* getattr */
+ { &vop_setattr_desc, nfs_setattr }, /* setattr */
+ { &vop_read_desc, nfsspec_read }, /* read */
+ { &vop_write_desc, nfsspec_write }, /* write */
+ { &vop_ioctl_desc, spec_ioctl }, /* ioctl */
+ { &vop_select_desc, spec_select }, /* select */
+ { &vop_mmap_desc, spec_mmap }, /* mmap */
+ { &vop_fsync_desc, nfs_fsync }, /* fsync */
+ { &vop_seek_desc, spec_seek }, /* seek */
+ { &vop_remove_desc, spec_remove }, /* remove */
+ { &vop_link_desc, spec_link }, /* link */
+ { &vop_rename_desc, spec_rename }, /* rename */
+ { &vop_mkdir_desc, spec_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, spec_rmdir }, /* rmdir */
+ { &vop_symlink_desc, spec_symlink }, /* symlink */
+ { &vop_readdir_desc, spec_readdir }, /* readdir */
+ { &vop_readlink_desc, spec_readlink }, /* readlink */
+ { &vop_abortop_desc, spec_abortop }, /* abortop */
+ { &vop_inactive_desc, nfs_inactive }, /* inactive */
+ { &vop_reclaim_desc, nfs_reclaim }, /* reclaim */
+ { &vop_lock_desc, nfs_lock }, /* lock */
+ { &vop_unlock_desc, nfs_unlock }, /* unlock */
+ { &vop_bmap_desc, spec_bmap }, /* bmap */
+ { &vop_strategy_desc, spec_strategy }, /* strategy */
+ { &vop_print_desc, nfs_print }, /* print */
+ { &vop_islocked_desc, nfs_islocked }, /* islocked */
+ { &vop_pathconf_desc, spec_pathconf }, /* pathconf */
+ { &vop_advlock_desc, spec_advlock }, /* advlock */
+ { &vop_blkatoff_desc, spec_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, spec_valloc }, /* valloc */
+ { &vop_reallocblks_desc, spec_reallocblks }, /* reallocblks */
+ { &vop_vfree_desc, spec_vfree }, /* vfree */
+ { &vop_truncate_desc, spec_truncate }, /* truncate */
+ { &vop_update_desc, nfs_update }, /* update */
+ { &vop_bwrite_desc, vn_bwrite },
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc spec_nfsv2nodeop_opv_desc =
+ { &spec_nfsv2nodeop_p, spec_nfsv2nodeop_entries };
+
+#ifdef FIFO
+int (**fifo_nfsv2nodeop_p)();
+struct vnodeopv_entry_desc fifo_nfsv2nodeop_entries[] = {
+ { &vop_default_desc, vn_default_error },
+ { &vop_lookup_desc, fifo_lookup }, /* lookup */
+ { &vop_create_desc, fifo_create }, /* create */
+ { &vop_mknod_desc, fifo_mknod }, /* mknod */
+ { &vop_open_desc, fifo_open }, /* open */
+ { &vop_close_desc, nfsfifo_close }, /* close */
+ { &vop_access_desc, nfsspec_access }, /* access */
+ { &vop_getattr_desc, nfs_getattr }, /* getattr */
+ { &vop_setattr_desc, nfs_setattr }, /* setattr */
+ { &vop_read_desc, nfsfifo_read }, /* read */
+ { &vop_write_desc, nfsfifo_write }, /* write */
+ { &vop_ioctl_desc, fifo_ioctl }, /* ioctl */
+ { &vop_select_desc, fifo_select }, /* select */
+ { &vop_mmap_desc, fifo_mmap }, /* mmap */
+ { &vop_fsync_desc, nfs_fsync }, /* fsync */
+ { &vop_seek_desc, fifo_seek }, /* seek */
+ { &vop_remove_desc, fifo_remove }, /* remove */
+ { &vop_link_desc, fifo_link }, /* link */
+ { &vop_rename_desc, fifo_rename }, /* rename */
+ { &vop_mkdir_desc, fifo_mkdir }, /* mkdir */
+ { &vop_rmdir_desc, fifo_rmdir }, /* rmdir */
+ { &vop_symlink_desc, fifo_symlink }, /* symlink */
+ { &vop_readdir_desc, fifo_readdir }, /* readdir */
+ { &vop_readlink_desc, fifo_readlink }, /* readlink */
+ { &vop_abortop_desc, fifo_abortop }, /* abortop */
+ { &vop_inactive_desc, nfs_inactive }, /* inactive */
+ { &vop_reclaim_desc, nfs_reclaim }, /* reclaim */
+ { &vop_lock_desc, nfs_lock }, /* lock */
+ { &vop_unlock_desc, nfs_unlock }, /* unlock */
+ { &vop_bmap_desc, fifo_bmap }, /* bmap */
+ { &vop_strategy_desc, fifo_badop }, /* strategy */
+ { &vop_print_desc, nfs_print }, /* print */
+ { &vop_islocked_desc, nfs_islocked }, /* islocked */
+ { &vop_pathconf_desc, fifo_pathconf }, /* pathconf */
+ { &vop_advlock_desc, fifo_advlock }, /* advlock */
+ { &vop_blkatoff_desc, fifo_blkatoff }, /* blkatoff */
+ { &vop_valloc_desc, fifo_valloc }, /* valloc */
+ { &vop_reallocblks_desc, fifo_reallocblks }, /* reallocblks */
+ { &vop_vfree_desc, fifo_vfree }, /* vfree */
+ { &vop_truncate_desc, fifo_truncate }, /* truncate */
+ { &vop_update_desc, nfs_update }, /* update */
+ { &vop_bwrite_desc, vn_bwrite },
+ { (struct vnodeop_desc*)NULL, (int(*)())NULL }
+};
+struct vnodeopv_desc fifo_nfsv2nodeop_opv_desc =
+ { &fifo_nfsv2nodeop_p, fifo_nfsv2nodeop_entries };
+#endif /* FIFO */
+
+void nqnfs_clientlease();
+
+/*
+ * Global variables
+ */
+extern u_long nfs_procids[NFS_NPROCS];
+extern u_long nfs_prog, nfs_vers, nfs_true, nfs_false;
+extern char nfsiobuf[MAXPHYS+NBPG];
+struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
+int nfs_numasync = 0;
+#define DIRHDSIZ (sizeof (struct dirent) - (MAXNAMLEN + 1))
+
+/*
+ * nfs null call from vfs.
+ */
+int
+nfs_null(vp, cred, procp)
+ struct vnode *vp;
+ struct ucred *cred;
+ struct proc *procp;
+{
+ caddr_t bpos, dpos;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb;
+
+ nfsm_reqhead(vp, NFSPROC_NULL, 0);
+ nfsm_request(vp, NFSPROC_NULL, procp, cred);
+ nfsm_reqdone;
+ return (error);
+}
+
+/*
+ * nfs access vnode op.
+ * For nfs, just return ok. File accesses may fail later.
+ * For nqnfs, use the access rpc to check accessibility. If file modes are
+ * changed on the server, accesses might still fail later.
+ */
+int
+nfs_access(ap)
+ struct vop_access_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register u_long *tl;
+ register caddr_t cp;
+ caddr_t bpos, dpos;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+ /*
+ * For nqnfs, do an access rpc, otherwise you are stuck emulating
+ * ufs_access() locally using the vattr. This may not be correct,
+ * since the server may apply other access criteria such as
+ * client uid-->server uid mapping that we do not know about, but
+ * this is better than just returning anything that is lying about
+ * in the cache.
+ */
+ if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) {
+ nfsstats.rpccnt[NQNFSPROC_ACCESS]++;
+ nfsm_reqhead(vp, NQNFSPROC_ACCESS, NFSX_FH + 3 * NFSX_UNSIGNED);
+ nfsm_fhtom(vp);
+ nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED);
+ if (ap->a_mode & VREAD)
+ *tl++ = nfs_true;
+ else
+ *tl++ = nfs_false;
+ if (ap->a_mode & VWRITE)
+ *tl++ = nfs_true;
+ else
+ *tl++ = nfs_false;
+ if (ap->a_mode & VEXEC)
+ *tl = nfs_true;
+ else
+ *tl = nfs_false;
+ nfsm_request(vp, NQNFSPROC_ACCESS, ap->a_p, ap->a_cred);
+ nfsm_reqdone;
+ return (error);
+ } else
+ return (nfsspec_access(ap));
+}
+
+/*
+ * nfs open vnode op
+ * Check to see if the type is ok
+ * and that deletion is not in progress.
+ * For paged in text files, you will need to flush the page cache
+ * if consistency is lost.
+ */
+/* ARGSUSED */
+int
+nfs_open(ap)
+ struct vop_open_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ struct nfsnode *np = VTONFS(vp);
+ struct nfsmount *nmp = VFSTONFS(vp->v_mount);
+ struct vattr vattr;
+ int error;
+
+ if (vp->v_type != VREG && vp->v_type != VDIR && vp->v_type != VLNK)
+ return (EACCES);
+ if (vp->v_flag & VTEXT) {
+ /*
+ * Get a valid lease. If cached data is stale, flush it.
+ */
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ if (NQNFS_CKINVALID(vp, np, NQL_READ)) {
+ do {
+ error = nqnfs_getlease(vp, NQL_READ, ap->a_cred, ap->a_p);
+ } while (error == NQNFS_EXPIRED);
+ if (error)
+ return (error);
+ if (np->n_lrev != np->n_brev ||
+ (np->n_flag & NQNFSNONCACHE)) {
+ if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
+ ap->a_p, 1)) == EINTR)
+ return (error);
+ (void) vnode_pager_uncache(vp);
+ np->n_brev = np->n_lrev;
+ }
+ }
+ } else {
+ if (np->n_flag & NMODIFIED) {
+ if ((error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
+ ap->a_p, 1)) == EINTR)
+ return (error);
+ (void) vnode_pager_uncache(vp);
+ np->n_attrstamp = 0;
+ np->n_direofoffset = 0;
+ if (error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p))
+ return (error);
+ np->n_mtime = vattr.va_mtime.ts_sec;
+ } else {
+ if (error = VOP_GETATTR(vp, &vattr, ap->a_cred, ap->a_p))
+ return (error);
+ if (np->n_mtime != vattr.va_mtime.ts_sec) {
+ np->n_direofoffset = 0;
+ if ((error = nfs_vinvalbuf(vp, V_SAVE,
+ ap->a_cred, ap->a_p, 1)) == EINTR)
+ return (error);
+ (void) vnode_pager_uncache(vp);
+ np->n_mtime = vattr.va_mtime.ts_sec;
+ }
+ }
+ }
+ } else if ((nmp->nm_flag & NFSMNT_NQNFS) == 0)
+ np->n_attrstamp = 0; /* For Open/Close consistency */
+ return (0);
+}
+
+/*
+ * nfs close vnode op
+ * For reg files, invalidate any buffer cache entries.
+ */
+/* ARGSUSED */
+int
+nfs_close(ap)
+ struct vop_close_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode *a_vp;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct nfsnode *np = VTONFS(vp);
+ int error = 0;
+
+ if (vp->v_type == VREG) {
+ if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) == 0 &&
+ (np->n_flag & NMODIFIED)) {
+ error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 1);
+ np->n_attrstamp = 0;
+ }
+ if (np->n_flag & NWRITEERR) {
+ np->n_flag &= ~NWRITEERR;
+ error = np->n_error;
+ }
+ }
+ return (error);
+}
+
+/*
+ * nfs getattr call from vfs.
+ */
+int
+nfs_getattr(ap)
+ struct vop_getattr_args /* {
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct nfsnode *np = VTONFS(vp);
+ register caddr_t cp;
+ caddr_t bpos, dpos;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+ /*
+ * Update local times for special files.
+ */
+ if (np->n_flag & (NACC | NUPD))
+ np->n_flag |= NCHG;
+ /*
+ * First look in the cache.
+ */
+ if (nfs_getattrcache(vp, ap->a_vap) == 0)
+ return (0);
+ nfsstats.rpccnt[NFSPROC_GETATTR]++;
+ nfsm_reqhead(vp, NFSPROC_GETATTR, NFSX_FH);
+ nfsm_fhtom(vp);
+ nfsm_request(vp, NFSPROC_GETATTR, ap->a_p, ap->a_cred);
+ nfsm_loadattr(vp, ap->a_vap);
+ nfsm_reqdone;
+ return (error);
+}
+
+/*
+ * nfs setattr call.
+ */
+int
+nfs_setattr(ap)
+ struct vop_setattr_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode *a_vp;
+ struct vattr *a_vap;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct nfsv2_sattr *sp;
+ register caddr_t cp;
+ register long t1;
+ caddr_t bpos, dpos, cp2;
+ u_long *tl;
+ int error = 0, isnq;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ register struct vnode *vp = ap->a_vp;
+ register struct nfsnode *np = VTONFS(vp);
+ register struct vattr *vap = ap->a_vap;
+ u_quad_t frev, tsize;
+
+ if (vap->va_size != VNOVAL || vap->va_mtime.ts_sec != VNOVAL ||
+ vap->va_atime.ts_sec != VNOVAL) {
+ if (vap->va_size != VNOVAL) {
+ if (np->n_flag & NMODIFIED) {
+ if (vap->va_size == 0)
+ error = nfs_vinvalbuf(vp, 0, ap->a_cred,
+ ap->a_p, 1);
+ else
+ error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
+ ap->a_p, 1);
+ if (error)
+ return (error);
+ }
+ tsize = np->n_size;
+ np->n_size = np->n_vattr.va_size = vap->va_size;
+ vnode_pager_setsize(vp, (u_long)np->n_size);
+ } else if ((np->n_flag & NMODIFIED) &&
+ (error = nfs_vinvalbuf(vp, V_SAVE, ap->a_cred,
+ ap->a_p, 1)) == EINTR)
+ return (error);
+ }
+ nfsstats.rpccnt[NFSPROC_SETATTR]++;
+ isnq = (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS);
+ nfsm_reqhead(vp, NFSPROC_SETATTR, NFSX_FH+NFSX_SATTR(isnq));
+ nfsm_fhtom(vp);
+ nfsm_build(sp, struct nfsv2_sattr *, NFSX_SATTR(isnq));
+ if (vap->va_mode == (u_short)-1)
+ sp->sa_mode = VNOVAL;
+ else
+ sp->sa_mode = vtonfs_mode(vp->v_type, vap->va_mode);
+ if (vap->va_uid == (uid_t)-1)
+ sp->sa_uid = VNOVAL;
+ else
+ sp->sa_uid = txdr_unsigned(vap->va_uid);
+ if (vap->va_gid == (gid_t)-1)
+ sp->sa_gid = VNOVAL;
+ else
+ sp->sa_gid = txdr_unsigned(vap->va_gid);
+ if (isnq) {
+ txdr_hyper(&vap->va_size, &sp->sa_nqsize);
+ txdr_nqtime(&vap->va_atime, &sp->sa_nqatime);
+ txdr_nqtime(&vap->va_mtime, &sp->sa_nqmtime);
+ sp->sa_nqflags = txdr_unsigned(vap->va_flags);
+ sp->sa_nqrdev = VNOVAL;
+ } else {
+ sp->sa_nfssize = txdr_unsigned(vap->va_size);
+ txdr_nfstime(&vap->va_atime, &sp->sa_nfsatime);
+ txdr_nfstime(&vap->va_mtime, &sp->sa_nfsmtime);
+ }
+ nfsm_request(vp, NFSPROC_SETATTR, ap->a_p, ap->a_cred);
+ nfsm_loadattr(vp, (struct vattr *)0);
+ if ((VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) &&
+ NQNFS_CKCACHABLE(vp, NQL_WRITE)) {
+ nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED);
+ fxdr_hyper(tl, &frev);
+ if (frev > np->n_brev)
+ np->n_brev = frev;
+ }
+ nfsm_reqdone;
+ if (error) {
+ np->n_size = np->n_vattr.va_size = tsize;
+ vnode_pager_setsize(vp, (u_long)np->n_size);
+ }
+ return (error);
+}
+
+/*
+ * nfs lookup call, one step at a time...
+ * First look in cache
+ * If not found, unlock the directory nfsnode and do the rpc
+ */
+int
+nfs_lookup(ap)
+ struct vop_lookup_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ register struct componentname *cnp = ap->a_cnp;
+ register struct vnode *dvp = ap->a_dvp;
+ register struct vnode **vpp = ap->a_vpp;
+ register int flags = cnp->cn_flags;
+ register struct vnode *vdp;
+ register u_long *tl;
+ register caddr_t cp;
+ register long t1, t2;
+ struct nfsmount *nmp;
+ caddr_t bpos, dpos, cp2;
+ time_t reqtime;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ struct vnode *newvp;
+ long len;
+ nfsv2fh_t *fhp;
+ struct nfsnode *np;
+ int lockparent, wantparent, error = 0;
+ int nqlflag, cachable;
+ u_quad_t frev;
+
+ *vpp = NULL;
+ if (dvp->v_type != VDIR)
+ return (ENOTDIR);
+ lockparent = flags & LOCKPARENT;
+ wantparent = flags & (LOCKPARENT|WANTPARENT);
+ nmp = VFSTONFS(dvp->v_mount);
+ np = VTONFS(dvp);
+ if ((error = cache_lookup(dvp, vpp, cnp)) && error != ENOENT) {
+ struct vattr vattr;
+ int vpid;
+
+ vdp = *vpp;
+ vpid = vdp->v_id;
+ /*
+ * See the comment starting `Step through' in ufs/ufs_lookup.c
+ * for an explanation of the locking protocol
+ */
+ if (dvp == vdp) {
+ VREF(vdp);
+ error = 0;
+ } else
+ error = vget(vdp, 1);
+ if (!error) {
+ if (vpid == vdp->v_id) {
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ if ((nmp->nm_flag & NFSMNT_NQLOOKLEASE) == 0) {
+ nfsstats.lookupcache_hits++;
+ if (cnp->cn_nameiop != LOOKUP &&
+ (flags & ISLASTCN))
+ cnp->cn_flags |= SAVENAME;
+ return (0);
+ } else if (NQNFS_CKCACHABLE(dvp, NQL_READ)) {
+ if (np->n_lrev != np->n_brev ||
+ (np->n_flag & NMODIFIED)) {
+ np->n_direofoffset = 0;
+ cache_purge(dvp);
+ error = nfs_vinvalbuf(dvp, 0,
+ cnp->cn_cred, cnp->cn_proc,
+ 1);
+ if (error == EINTR)
+ return (error);
+ np->n_brev = np->n_lrev;
+ } else {
+ nfsstats.lookupcache_hits++;
+ if (cnp->cn_nameiop != LOOKUP &&
+ (flags & ISLASTCN))
+ cnp->cn_flags |= SAVENAME;
+ return (0);
+ }
+ }
+ } else if (!VOP_GETATTR(vdp, &vattr, cnp->cn_cred, cnp->cn_proc) &&
+ vattr.va_ctime.ts_sec == VTONFS(vdp)->n_ctime) {
+ nfsstats.lookupcache_hits++;
+ if (cnp->cn_nameiop != LOOKUP &&
+ (flags & ISLASTCN))
+ cnp->cn_flags |= SAVENAME;
+ return (0);
+ }
+ cache_purge(vdp);
+ }
+ vrele(vdp);
+ }
+ *vpp = NULLVP;
+ }
+ error = 0;
+ nfsstats.lookupcache_misses++;
+ nfsstats.rpccnt[NFSPROC_LOOKUP]++;
+ len = cnp->cn_namelen;
+ nfsm_reqhead(dvp, NFSPROC_LOOKUP, NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(len));
+
+ /*
+ * For nqnfs optionally piggyback a getlease request for the name
+ * being looked up.
+ */
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+ if ((nmp->nm_flag & NFSMNT_NQLOOKLEASE) &&
+ ((cnp->cn_flags & MAKEENTRY) &&
+ (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))))
+ *tl = txdr_unsigned(nmp->nm_leaseterm);
+ else
+ *tl = 0;
+ }
+ nfsm_fhtom(dvp);
+ nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
+ reqtime = time.tv_sec;
+ nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_proc, cnp->cn_cred);
+nfsmout:
+ if (error) {
+ if ((cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME) &&
+ (flags & ISLASTCN) && error == ENOENT)
+ error = EJUSTRETURN;
+ if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
+ cnp->cn_flags |= SAVENAME;
+ return (error);
+ }
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ if (*tl) {
+ nqlflag = fxdr_unsigned(int, *tl);
+ nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED);
+ cachable = fxdr_unsigned(int, *tl++);
+ reqtime += fxdr_unsigned(int, *tl++);
+ fxdr_hyper(tl, &frev);
+ } else
+ nqlflag = 0;
+ }
+ nfsm_dissect(fhp, nfsv2fh_t *, NFSX_FH);
+
+ /*
+ * Handle RENAME case...
+ */
+ if (cnp->cn_nameiop == RENAME && wantparent && (flags & ISLASTCN)) {
+ if (!bcmp(np->n_fh.fh_bytes, (caddr_t)fhp, NFSX_FH)) {
+ m_freem(mrep);
+ return (EISDIR);
+ }
+ if (error = nfs_nget(dvp->v_mount, fhp, &np)) {
+ m_freem(mrep);
+ return (error);
+ }
+ newvp = NFSTOV(np);
+ if (error =
+ nfs_loadattrcache(&newvp, &md, &dpos, (struct vattr *)0)) {
+ vrele(newvp);
+ m_freem(mrep);
+ return (error);
+ }
+ *vpp = newvp;
+ m_freem(mrep);
+ cnp->cn_flags |= SAVENAME;
+ return (0);
+ }
+
+ if (!bcmp(np->n_fh.fh_bytes, (caddr_t)fhp, NFSX_FH)) {
+ VREF(dvp);
+ newvp = dvp;
+ } else {
+ if (error = nfs_nget(dvp->v_mount, fhp, &np)) {
+ m_freem(mrep);
+ return (error);
+ }
+ newvp = NFSTOV(np);
+ }
+ if (error = nfs_loadattrcache(&newvp, &md, &dpos, (struct vattr *)0)) {
+ vrele(newvp);
+ m_freem(mrep);
+ return (error);
+ }
+ m_freem(mrep);
+ *vpp = newvp;
+ if (cnp->cn_nameiop != LOOKUP && (flags & ISLASTCN))
+ cnp->cn_flags |= SAVENAME;
+ if ((cnp->cn_flags & MAKEENTRY) &&
+ (cnp->cn_nameiop != DELETE || !(flags & ISLASTCN))) {
+ if ((nmp->nm_flag & NFSMNT_NQNFS) == 0)
+ np->n_ctime = np->n_vattr.va_ctime.ts_sec;
+ else if (nqlflag && reqtime > time.tv_sec)
+ nqnfs_clientlease(nmp, np, nqlflag, cachable, reqtime,
+ frev);
+ cache_enter(dvp, *vpp, cnp);
+ }
+ return (0);
+}
+
+/*
+ * nfs read call.
+ * Just call nfs_bioread() to do the work.
+ */
+int
+nfs_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+
+ if (vp->v_type != VREG)
+ return (EPERM);
+ return (nfs_bioread(vp, ap->a_uio, ap->a_ioflag, ap->a_cred));
+}
+
+/*
+ * nfs readlink call
+ */
+int
+nfs_readlink(ap)
+ struct vop_readlink_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+
+ if (vp->v_type != VLNK)
+ return (EPERM);
+ return (nfs_bioread(vp, ap->a_uio, 0, ap->a_cred));
+}
+
+/*
+ * Do a readlink rpc.
+ * Called by nfs_doio() from below the buffer cache.
+ */
+int
+nfs_readlinkrpc(vp, uiop, cred)
+ register struct vnode *vp;
+ struct uio *uiop;
+ struct ucred *cred;
+{
+ register u_long *tl;
+ register caddr_t cp;
+ register long t1;
+ caddr_t bpos, dpos, cp2;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ long len;
+
+ nfsstats.rpccnt[NFSPROC_READLINK]++;
+ nfsm_reqhead(vp, NFSPROC_READLINK, NFSX_FH);
+ nfsm_fhtom(vp);
+ nfsm_request(vp, NFSPROC_READLINK, uiop->uio_procp, cred);
+ nfsm_strsiz(len, NFS_MAXPATHLEN);
+ nfsm_mtouio(uiop, len);
+ nfsm_reqdone;
+ return (error);
+}
+
+/*
+ * nfs read rpc call
+ * Ditto above
+ */
+int
+nfs_readrpc(vp, uiop, cred)
+ register struct vnode *vp;
+ struct uio *uiop;
+ struct ucred *cred;
+{
+ register u_long *tl;
+ register caddr_t cp;
+ register long t1;
+ caddr_t bpos, dpos, cp2;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ struct nfsmount *nmp;
+ long len, retlen, tsiz;
+
+ nmp = VFSTONFS(vp->v_mount);
+ tsiz = uiop->uio_resid;
+ if (uiop->uio_offset + tsiz > 0xffffffff &&
+ (nmp->nm_flag & NFSMNT_NQNFS) == 0)
+ return (EFBIG);
+ while (tsiz > 0) {
+ nfsstats.rpccnt[NFSPROC_READ]++;
+ len = (tsiz > nmp->nm_rsize) ? nmp->nm_rsize : tsiz;
+ nfsm_reqhead(vp, NFSPROC_READ, NFSX_FH+NFSX_UNSIGNED*3);
+ nfsm_fhtom(vp);
+ nfsm_build(tl, u_long *, NFSX_UNSIGNED*3);
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ txdr_hyper(&uiop->uio_offset, tl);
+ *(tl + 2) = txdr_unsigned(len);
+ } else {
+ *tl++ = txdr_unsigned(uiop->uio_offset);
+ *tl++ = txdr_unsigned(len);
+ *tl = 0;
+ }
+ nfsm_request(vp, NFSPROC_READ, uiop->uio_procp, cred);
+ nfsm_loadattr(vp, (struct vattr *)0);
+ nfsm_strsiz(retlen, nmp->nm_rsize);
+ nfsm_mtouio(uiop, retlen);
+ m_freem(mrep);
+ if (retlen < len)
+ tsiz = 0;
+ else
+ tsiz -= len;
+ }
+nfsmout:
+ return (error);
+}
+
+/*
+ * nfs write call
+ */
+int
+nfs_writerpc(vp, uiop, cred, ioflags)
+ register struct vnode *vp;
+ struct uio *uiop;
+ struct ucred *cred;
+ int ioflags;
+{
+ register u_long *tl;
+ register caddr_t cp;
+ register long t1;
+ caddr_t bpos, dpos, cp2;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ struct nfsmount *nmp;
+ struct nfsnode *np = VTONFS(vp);
+ u_quad_t frev;
+ long len, tsiz;
+
+ nmp = VFSTONFS(vp->v_mount);
+ tsiz = uiop->uio_resid;
+ if (uiop->uio_offset + tsiz > 0xffffffff &&
+ (nmp->nm_flag & NFSMNT_NQNFS) == 0)
+ return (EFBIG);
+ while (tsiz > 0) {
+ nfsstats.rpccnt[NFSPROC_WRITE]++;
+ len = (tsiz > nmp->nm_wsize) ? nmp->nm_wsize : tsiz;
+ nfsm_reqhead(vp, NFSPROC_WRITE,
+ NFSX_FH+NFSX_UNSIGNED*4+nfsm_rndup(len));
+ nfsm_fhtom(vp);
+ nfsm_build(tl, u_long *, NFSX_UNSIGNED * 4);
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ txdr_hyper(&uiop->uio_offset, tl);
+ tl += 2;
+ if (ioflags & IO_APPEND)
+ *tl++ = txdr_unsigned(1);
+ else
+ *tl++ = 0;
+ } else {
+ *++tl = txdr_unsigned(uiop->uio_offset);
+ tl += 2;
+ }
+ *tl = txdr_unsigned(len);
+ nfsm_uiotom(uiop, len);
+ nfsm_request(vp, NFSPROC_WRITE, uiop->uio_procp, cred);
+ nfsm_loadattr(vp, (struct vattr *)0);
+ if (nmp->nm_flag & NFSMNT_MYWRITE)
+ VTONFS(vp)->n_mtime = VTONFS(vp)->n_vattr.va_mtime.ts_sec;
+ else if ((nmp->nm_flag & NFSMNT_NQNFS) &&
+ NQNFS_CKCACHABLE(vp, NQL_WRITE)) {
+ nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED);
+ fxdr_hyper(tl, &frev);
+ if (frev > np->n_brev)
+ np->n_brev = frev;
+ }
+ m_freem(mrep);
+ tsiz -= len;
+ }
+nfsmout:
+ if (error)
+ uiop->uio_resid = tsiz;
+ return (error);
+}
+
+/*
+ * nfs mknod call
+ * This is a kludge. Use a create rpc but with the IFMT bits of the mode
+ * set to specify the file type and the size field for rdev.
+ */
+/* ARGSUSED */
+int
+nfs_mknod(ap)
+ struct vop_mknod_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ } */ *ap;
+{
+ register struct vnode *dvp = ap->a_dvp;
+ register struct vattr *vap = ap->a_vap;
+ register struct componentname *cnp = ap->a_cnp;
+ register struct nfsv2_sattr *sp;
+ register u_long *tl;
+ register caddr_t cp;
+ register long t1, t2;
+ struct vnode *newvp;
+ struct vattr vattr;
+ char *cp2;
+ caddr_t bpos, dpos;
+ int error = 0, isnq;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ u_long rdev;
+
+ isnq = (VFSTONFS(dvp->v_mount)->nm_flag & NFSMNT_NQNFS);
+ if (vap->va_type == VCHR || vap->va_type == VBLK)
+ rdev = txdr_unsigned(vap->va_rdev);
+#ifdef FIFO
+ else if (vap->va_type == VFIFO)
+ rdev = 0xffffffff;
+#endif /* FIFO */
+ else {
+ VOP_ABORTOP(dvp, cnp);
+ vput(dvp);
+ return (EOPNOTSUPP);
+ }
+ if (error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) {
+ VOP_ABORTOP(dvp, cnp);
+ vput(dvp);
+ return (error);
+ }
+ nfsstats.rpccnt[NFSPROC_CREATE]++;
+ nfsm_reqhead(dvp, NFSPROC_CREATE,
+ NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(cnp->cn_namelen)+NFSX_SATTR(isnq));
+ nfsm_fhtom(dvp);
+ nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
+ nfsm_build(sp, struct nfsv2_sattr *, NFSX_SATTR(isnq));
+ sp->sa_mode = vtonfs_mode(vap->va_type, vap->va_mode);
+ sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid);
+ sp->sa_gid = txdr_unsigned(vattr.va_gid);
+ if (isnq) {
+ sp->sa_nqrdev = rdev;
+ sp->sa_nqflags = 0;
+ txdr_nqtime(&vap->va_atime, &sp->sa_nqatime);
+ txdr_nqtime(&vap->va_mtime, &sp->sa_nqmtime);
+ } else {
+ sp->sa_nfssize = rdev;
+ txdr_nfstime(&vap->va_atime, &sp->sa_nfsatime);
+ txdr_nfstime(&vap->va_mtime, &sp->sa_nfsmtime);
+ }
+ nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_proc, cnp->cn_cred);
+ nfsm_mtofh(dvp, newvp);
+ nfsm_reqdone;
+ if (!error && (cnp->cn_flags & MAKEENTRY))
+ cache_enter(dvp, newvp, cnp);
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ VTONFS(dvp)->n_flag |= NMODIFIED;
+ VTONFS(dvp)->n_attrstamp = 0;
+ vrele(dvp);
+ return (error);
+}
+
+/*
+ * nfs file create call
+ */
+int
+nfs_create(ap)
+ struct vop_create_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ } */ *ap;
+{
+ register struct vnode *dvp = ap->a_dvp;
+ register struct vattr *vap = ap->a_vap;
+ register struct componentname *cnp = ap->a_cnp;
+ register struct nfsv2_sattr *sp;
+ register u_long *tl;
+ register caddr_t cp;
+ register long t1, t2;
+ caddr_t bpos, dpos, cp2;
+ int error = 0, isnq;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ struct vattr vattr;
+
+ if (error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) {
+ VOP_ABORTOP(dvp, cnp);
+ vput(dvp);
+ return (error);
+ }
+ nfsstats.rpccnt[NFSPROC_CREATE]++;
+ isnq = (VFSTONFS(dvp->v_mount)->nm_flag & NFSMNT_NQNFS);
+ nfsm_reqhead(dvp, NFSPROC_CREATE,
+ NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(cnp->cn_namelen)+NFSX_SATTR(isnq));
+ nfsm_fhtom(dvp);
+ nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
+ nfsm_build(sp, struct nfsv2_sattr *, NFSX_SATTR(isnq));
+ sp->sa_mode = vtonfs_mode(vap->va_type, vap->va_mode);
+ sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid);
+ sp->sa_gid = txdr_unsigned(vattr.va_gid);
+ if (isnq) {
+ u_quad_t qval = 0;
+
+ txdr_hyper(&qval, &sp->sa_nqsize);
+ sp->sa_nqflags = 0;
+ sp->sa_nqrdev = -1;
+ txdr_nqtime(&vap->va_atime, &sp->sa_nqatime);
+ txdr_nqtime(&vap->va_mtime, &sp->sa_nqmtime);
+ } else {
+ sp->sa_nfssize = 0;
+ txdr_nfstime(&vap->va_atime, &sp->sa_nfsatime);
+ txdr_nfstime(&vap->va_mtime, &sp->sa_nfsmtime);
+ }
+ nfsm_request(dvp, NFSPROC_CREATE, cnp->cn_proc, cnp->cn_cred);
+ nfsm_mtofh(dvp, *ap->a_vpp);
+ nfsm_reqdone;
+ if (!error && (cnp->cn_flags & MAKEENTRY))
+ cache_enter(dvp, *ap->a_vpp, cnp);
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ VTONFS(dvp)->n_flag |= NMODIFIED;
+ VTONFS(dvp)->n_attrstamp = 0;
+ vrele(dvp);
+ return (error);
+}
+
+/*
+ * nfs file remove call
+ * To try and make nfs semantics closer to ufs semantics, a file that has
+ * other processes using the vnode is renamed instead of removed and then
+ * removed later on the last close.
+ * - If v_usecount > 1
+ * If a rename is not already in the works
+ * call nfs_sillyrename() to set it up
+ * else
+ * do the remove rpc
+ */
+int
+nfs_remove(ap)
+ struct vop_remove_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode * a_dvp;
+ struct vnode * a_vp;
+ struct componentname * a_cnp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct vnode *dvp = ap->a_dvp;
+ register struct componentname *cnp = ap->a_cnp;
+ register struct nfsnode *np = VTONFS(vp);
+ register u_long *tl;
+ register caddr_t cp;
+ register long t2;
+ caddr_t bpos, dpos;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+ if (vp->v_usecount > 1) {
+ if (!np->n_sillyrename)
+ error = nfs_sillyrename(dvp, vp, cnp);
+ } else {
+ /*
+ * Purge the name cache so that the chance of a lookup for
+ * the name succeeding while the remove is in progress is
+ * minimized. Without node locking it can still happen, such
+ * that an I/O op returns ESTALE, but since you get this if
+ * another host removes the file..
+ */
+ cache_purge(vp);
+ /*
+ * Throw away biocache buffers. Mainly to avoid
+ * unnecessary delayed writes.
+ */
+ error = nfs_vinvalbuf(vp, 0, cnp->cn_cred, cnp->cn_proc, 1);
+ if (error == EINTR)
+ return (error);
+ /* Do the rpc */
+ nfsstats.rpccnt[NFSPROC_REMOVE]++;
+ nfsm_reqhead(dvp, NFSPROC_REMOVE,
+ NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(cnp->cn_namelen));
+ nfsm_fhtom(dvp);
+ nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
+ nfsm_request(dvp, NFSPROC_REMOVE, cnp->cn_proc, cnp->cn_cred);
+ nfsm_reqdone;
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ VTONFS(dvp)->n_flag |= NMODIFIED;
+ VTONFS(dvp)->n_attrstamp = 0;
+ /*
+ * Kludge City: If the first reply to the remove rpc is lost..
+ * the reply to the retransmitted request will be ENOENT
+ * since the file was in fact removed
+ * Therefore, we cheat and return success.
+ */
+ if (error == ENOENT)
+ error = 0;
+ }
+ np->n_attrstamp = 0;
+ vrele(dvp);
+ vrele(vp);
+ return (error);
+}
+
+/*
+ * nfs file remove rpc called from nfs_inactive
+ */
+int
+nfs_removeit(sp)
+ register struct sillyrename *sp;
+{
+ register u_long *tl;
+ register caddr_t cp;
+ register long t2;
+ caddr_t bpos, dpos;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+ nfsstats.rpccnt[NFSPROC_REMOVE]++;
+ nfsm_reqhead(sp->s_dvp, NFSPROC_REMOVE,
+ NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(sp->s_namlen));
+ nfsm_fhtom(sp->s_dvp);
+ nfsm_strtom(sp->s_name, sp->s_namlen, NFS_MAXNAMLEN);
+ nfsm_request(sp->s_dvp, NFSPROC_REMOVE, NULL, sp->s_cred);
+ nfsm_reqdone;
+ VTONFS(sp->s_dvp)->n_flag |= NMODIFIED;
+ VTONFS(sp->s_dvp)->n_attrstamp = 0;
+ return (error);
+}
+
+/*
+ * nfs file rename call
+ */
+int
+nfs_rename(ap)
+ struct vop_rename_args /* {
+ struct vnode *a_fdvp;
+ struct vnode *a_fvp;
+ struct componentname *a_fcnp;
+ struct vnode *a_tdvp;
+ struct vnode *a_tvp;
+ struct componentname *a_tcnp;
+ } */ *ap;
+{
+ register struct vnode *fvp = ap->a_fvp;
+ register struct vnode *tvp = ap->a_tvp;
+ register struct vnode *fdvp = ap->a_fdvp;
+ register struct vnode *tdvp = ap->a_tdvp;
+ register struct componentname *tcnp = ap->a_tcnp;
+ register struct componentname *fcnp = ap->a_fcnp;
+ register u_long *tl;
+ register caddr_t cp;
+ register long t2;
+ caddr_t bpos, dpos;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+ /* Check for cross-device rename */
+ if ((fvp->v_mount != tdvp->v_mount) ||
+ (tvp && (fvp->v_mount != tvp->v_mount))) {
+ error = EXDEV;
+ goto out;
+ }
+
+
+ nfsstats.rpccnt[NFSPROC_RENAME]++;
+ nfsm_reqhead(fdvp, NFSPROC_RENAME,
+ (NFSX_FH+NFSX_UNSIGNED)*2+nfsm_rndup(fcnp->cn_namelen)+
+ nfsm_rndup(fcnp->cn_namelen)); /* or fcnp->cn_cred?*/
+ nfsm_fhtom(fdvp);
+ nfsm_strtom(fcnp->cn_nameptr, fcnp->cn_namelen, NFS_MAXNAMLEN);
+ nfsm_fhtom(tdvp);
+ nfsm_strtom(tcnp->cn_nameptr, tcnp->cn_namelen, NFS_MAXNAMLEN);
+ nfsm_request(fdvp, NFSPROC_RENAME, tcnp->cn_proc, tcnp->cn_cred);
+ nfsm_reqdone;
+ VTONFS(fdvp)->n_flag |= NMODIFIED;
+ VTONFS(fdvp)->n_attrstamp = 0;
+ VTONFS(tdvp)->n_flag |= NMODIFIED;
+ VTONFS(tdvp)->n_attrstamp = 0;
+ if (fvp->v_type == VDIR) {
+ if (tvp != NULL && tvp->v_type == VDIR)
+ cache_purge(tdvp);
+ cache_purge(fdvp);
+ }
+out:
+ if (tdvp == tvp)
+ vrele(tdvp);
+ else
+ vput(tdvp);
+ if (tvp)
+ vput(tvp);
+ vrele(fdvp);
+ vrele(fvp);
+ /*
+ * Kludge: Map ENOENT => 0 assuming that it is a reply to a retry.
+ */
+ if (error == ENOENT)
+ error = 0;
+ return (error);
+}
+
+/*
+ * nfs file rename rpc called from nfs_remove() above
+ */
+int
+nfs_renameit(sdvp, scnp, sp)
+ struct vnode *sdvp;
+ struct componentname *scnp;
+ register struct sillyrename *sp;
+{
+ register u_long *tl;
+ register caddr_t cp;
+ register long t2;
+ caddr_t bpos, dpos;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+ nfsstats.rpccnt[NFSPROC_RENAME]++;
+ nfsm_reqhead(sdvp, NFSPROC_RENAME,
+ (NFSX_FH+NFSX_UNSIGNED)*2+nfsm_rndup(scnp->cn_namelen)+
+ nfsm_rndup(sp->s_namlen));
+ nfsm_fhtom(sdvp);
+ nfsm_strtom(scnp->cn_nameptr, scnp->cn_namelen, NFS_MAXNAMLEN);
+ nfsm_fhtom(sdvp);
+ nfsm_strtom(sp->s_name, sp->s_namlen, NFS_MAXNAMLEN);
+ nfsm_request(sdvp, NFSPROC_RENAME, scnp->cn_proc, scnp->cn_cred);
+ nfsm_reqdone;
+ FREE(scnp->cn_pnbuf, M_NAMEI);
+ VTONFS(sdvp)->n_flag |= NMODIFIED;
+ VTONFS(sdvp)->n_attrstamp = 0;
+ return (error);
+}
+
+/*
+ * nfs hard link create call
+ */
+int
+nfs_link(ap)
+ struct vop_link_args /* {
+ struct vnode *a_vp;
+ struct vnode *a_tdvp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct vnode *tdvp = ap->a_tdvp;
+ register struct componentname *cnp = ap->a_cnp;
+ register u_long *tl;
+ register caddr_t cp;
+ register long t2;
+ caddr_t bpos, dpos;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+ if (vp->v_mount != tdvp->v_mount) {
+ /*VOP_ABORTOP(vp, cnp);*/
+ if (tdvp == vp)
+ vrele(vp);
+ else
+ vput(vp);
+ return (EXDEV);
+ }
+
+ nfsstats.rpccnt[NFSPROC_LINK]++;
+ nfsm_reqhead(tdvp, NFSPROC_LINK,
+ NFSX_FH*2+NFSX_UNSIGNED+nfsm_rndup(cnp->cn_namelen));
+ nfsm_fhtom(tdvp);
+ nfsm_fhtom(vp);
+ nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
+ nfsm_request(tdvp, NFSPROC_LINK, cnp->cn_proc, cnp->cn_cred);
+ nfsm_reqdone;
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ VTONFS(tdvp)->n_attrstamp = 0;
+ VTONFS(tdvp)->n_flag |= NMODIFIED;
+ VTONFS(vp)->n_attrstamp = 0;
+ vrele(vp);
+ /*
+ * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
+ */
+ if (error == EEXIST)
+ error = 0;
+ return (error);
+}
+
+/*
+ * nfs symbolic link create call
+ */
+/* start here */
+int
+nfs_symlink(ap)
+ struct vop_symlink_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ char *a_target;
+ } */ *ap;
+{
+ register struct vnode *dvp = ap->a_dvp;
+ register struct vattr *vap = ap->a_vap;
+ register struct componentname *cnp = ap->a_cnp;
+ register struct nfsv2_sattr *sp;
+ register u_long *tl;
+ register caddr_t cp;
+ register long t2;
+ caddr_t bpos, dpos;
+ int slen, error = 0, isnq;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+ nfsstats.rpccnt[NFSPROC_SYMLINK]++;
+ slen = strlen(ap->a_target);
+ isnq = (VFSTONFS(dvp->v_mount)->nm_flag & NFSMNT_NQNFS);
+ nfsm_reqhead(dvp, NFSPROC_SYMLINK, NFSX_FH+2*NFSX_UNSIGNED+
+ nfsm_rndup(cnp->cn_namelen)+nfsm_rndup(slen)+NFSX_SATTR(isnq));
+ nfsm_fhtom(dvp);
+ nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
+ nfsm_strtom(ap->a_target, slen, NFS_MAXPATHLEN);
+ nfsm_build(sp, struct nfsv2_sattr *, NFSX_SATTR(isnq));
+ sp->sa_mode = vtonfs_mode(VLNK, vap->va_mode);
+ sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid);
+ sp->sa_gid = txdr_unsigned(cnp->cn_cred->cr_gid);
+ if (isnq) {
+ quad_t qval = -1;
+
+ txdr_hyper(&qval, &sp->sa_nqsize);
+ sp->sa_nqflags = 0;
+ txdr_nqtime(&vap->va_atime, &sp->sa_nqatime);
+ txdr_nqtime(&vap->va_mtime, &sp->sa_nqmtime);
+ } else {
+ sp->sa_nfssize = -1;
+ txdr_nfstime(&vap->va_atime, &sp->sa_nfsatime);
+ txdr_nfstime(&vap->va_mtime, &sp->sa_nfsmtime);
+ }
+ nfsm_request(dvp, NFSPROC_SYMLINK, cnp->cn_proc, cnp->cn_cred);
+ nfsm_reqdone;
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ VTONFS(dvp)->n_flag |= NMODIFIED;
+ VTONFS(dvp)->n_attrstamp = 0;
+ vrele(dvp);
+ /*
+ * Kludge: Map EEXIST => 0 assuming that it is a reply to a retry.
+ */
+ if (error == EEXIST)
+ error = 0;
+ return (error);
+}
+
+/*
+ * nfs make dir call
+ */
+int
+nfs_mkdir(ap)
+ struct vop_mkdir_args /* {
+ struct vnode *a_dvp;
+ struct vnode **a_vpp;
+ struct componentname *a_cnp;
+ struct vattr *a_vap;
+ } */ *ap;
+{
+ register struct vnode *dvp = ap->a_dvp;
+ register struct vattr *vap = ap->a_vap;
+ register struct componentname *cnp = ap->a_cnp;
+ register struct vnode **vpp = ap->a_vpp;
+ register struct nfsv2_sattr *sp;
+ register u_long *tl;
+ register caddr_t cp;
+ register long t1, t2;
+ register int len;
+ caddr_t bpos, dpos, cp2;
+ int error = 0, firsttry = 1, isnq;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ struct vattr vattr;
+
+ if (error = VOP_GETATTR(dvp, &vattr, cnp->cn_cred, cnp->cn_proc)) {
+ VOP_ABORTOP(dvp, cnp);
+ vput(dvp);
+ return (error);
+ }
+ len = cnp->cn_namelen;
+ isnq = (VFSTONFS(dvp->v_mount)->nm_flag & NFSMNT_NQNFS);
+ nfsstats.rpccnt[NFSPROC_MKDIR]++;
+ nfsm_reqhead(dvp, NFSPROC_MKDIR,
+ NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(len)+NFSX_SATTR(isnq));
+ nfsm_fhtom(dvp);
+ nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
+ nfsm_build(sp, struct nfsv2_sattr *, NFSX_SATTR(isnq));
+ sp->sa_mode = vtonfs_mode(VDIR, vap->va_mode);
+ sp->sa_uid = txdr_unsigned(cnp->cn_cred->cr_uid);
+ sp->sa_gid = txdr_unsigned(vattr.va_gid);
+ if (isnq) {
+ quad_t qval = -1;
+
+ txdr_hyper(&qval, &sp->sa_nqsize);
+ sp->sa_nqflags = 0;
+ txdr_nqtime(&vap->va_atime, &sp->sa_nqatime);
+ txdr_nqtime(&vap->va_mtime, &sp->sa_nqmtime);
+ } else {
+ sp->sa_nfssize = -1;
+ txdr_nfstime(&vap->va_atime, &sp->sa_nfsatime);
+ txdr_nfstime(&vap->va_mtime, &sp->sa_nfsmtime);
+ }
+ nfsm_request(dvp, NFSPROC_MKDIR, cnp->cn_proc, cnp->cn_cred);
+ nfsm_mtofh(dvp, *vpp);
+ nfsm_reqdone;
+ VTONFS(dvp)->n_flag |= NMODIFIED;
+ VTONFS(dvp)->n_attrstamp = 0;
+ /*
+ * Kludge: Map EEXIST => 0 assuming that you have a reply to a retry
+ * if we can succeed in looking up the directory.
+ * "firsttry" is necessary since the macros may "goto nfsmout" which
+ * is above the if on errors. (Ugh)
+ */
+ if (error == EEXIST && firsttry) {
+ firsttry = 0;
+ error = 0;
+ nfsstats.rpccnt[NFSPROC_LOOKUP]++;
+ *vpp = NULL;
+ nfsm_reqhead(dvp, NFSPROC_LOOKUP,
+ NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(len));
+ nfsm_fhtom(dvp);
+ nfsm_strtom(cnp->cn_nameptr, len, NFS_MAXNAMLEN);
+ nfsm_request(dvp, NFSPROC_LOOKUP, cnp->cn_proc, cnp->cn_cred);
+ nfsm_mtofh(dvp, *vpp);
+ if ((*vpp)->v_type != VDIR) {
+ vput(*vpp);
+ error = EEXIST;
+ }
+ m_freem(mrep);
+ }
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ vrele(dvp);
+ return (error);
+}
+
+/*
+ * nfs remove directory call
+ */
+int
+nfs_rmdir(ap)
+ struct vop_rmdir_args /* {
+ struct vnode *a_dvp;
+ struct vnode *a_vp;
+ struct componentname *a_cnp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct vnode *dvp = ap->a_dvp;
+ register struct componentname *cnp = ap->a_cnp;
+ register u_long *tl;
+ register caddr_t cp;
+ register long t2;
+ caddr_t bpos, dpos;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+
+ if (dvp == vp) {
+ vrele(dvp);
+ vrele(dvp);
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ return (EINVAL);
+ }
+ nfsstats.rpccnt[NFSPROC_RMDIR]++;
+ nfsm_reqhead(dvp, NFSPROC_RMDIR,
+ NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(cnp->cn_namelen));
+ nfsm_fhtom(dvp);
+ nfsm_strtom(cnp->cn_nameptr, cnp->cn_namelen, NFS_MAXNAMLEN);
+ nfsm_request(dvp, NFSPROC_RMDIR, cnp->cn_proc, cnp->cn_cred);
+ nfsm_reqdone;
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ VTONFS(dvp)->n_flag |= NMODIFIED;
+ VTONFS(dvp)->n_attrstamp = 0;
+ cache_purge(dvp);
+ cache_purge(vp);
+ vrele(vp);
+ vrele(dvp);
+ /*
+ * Kludge: Map ENOENT => 0 assuming that you have a reply to a retry.
+ */
+ if (error == ENOENT)
+ error = 0;
+ return (error);
+}
+
+/*
+ * nfs readdir call
+ * Although cookie is defined as opaque, I translate it to/from net byte
+ * order so that it looks more sensible. This appears consistent with the
+ * Ultrix implementation of NFS.
+ */
+int
+nfs_readdir(ap)
+ struct vop_readdir_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct nfsnode *np = VTONFS(vp);
+ register struct uio *uio = ap->a_uio;
+ int tresid, error;
+ struct vattr vattr;
+
+ if (vp->v_type != VDIR)
+ return (EPERM);
+ /*
+ * First, check for hit on the EOF offset cache
+ */
+ if (uio->uio_offset != 0 && uio->uio_offset == np->n_direofoffset &&
+ (np->n_flag & NMODIFIED) == 0) {
+ if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS) {
+ if (NQNFS_CKCACHABLE(vp, NQL_READ)) {
+ nfsstats.direofcache_hits++;
+ return (0);
+ }
+ } else if (VOP_GETATTR(vp, &vattr, ap->a_cred, uio->uio_procp) == 0 &&
+ np->n_mtime == vattr.va_mtime.ts_sec) {
+ nfsstats.direofcache_hits++;
+ return (0);
+ }
+ }
+
+ /*
+ * Call nfs_bioread() to do the real work.
+ */
+ tresid = uio->uio_resid;
+ error = nfs_bioread(vp, uio, 0, ap->a_cred);
+
+ if (!error && uio->uio_resid == tresid)
+ nfsstats.direofcache_misses++;
+ return (error);
+}
+
+/*
+ * Readdir rpc call.
+ * Called from below the buffer cache by nfs_doio().
+ */
+int
+nfs_readdirrpc(vp, uiop, cred)
+ register struct vnode *vp;
+ struct uio *uiop;
+ struct ucred *cred;
+{
+ register long len;
+ register struct dirent *dp;
+ register u_long *tl;
+ register caddr_t cp;
+ register long t1;
+ long tlen, lastlen;
+ caddr_t bpos, dpos, cp2;
+ int error = 0;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ struct mbuf *md2;
+ caddr_t dpos2;
+ int siz;
+ int more_dirs = 1;
+ u_long off, savoff;
+ struct dirent *savdp;
+ struct nfsmount *nmp;
+ struct nfsnode *np = VTONFS(vp);
+ long tresid;
+
+ nmp = VFSTONFS(vp->v_mount);
+ tresid = uiop->uio_resid;
+ /*
+ * Loop around doing readdir rpc's of size uio_resid or nm_rsize,
+ * whichever is smaller, truncated to a multiple of NFS_DIRBLKSIZ.
+ * The stopping criteria is EOF or buffer full.
+ */
+ while (more_dirs && uiop->uio_resid >= NFS_DIRBLKSIZ) {
+ nfsstats.rpccnt[NFSPROC_READDIR]++;
+ nfsm_reqhead(vp, NFSPROC_READDIR,
+ NFSX_FH + 2 * NFSX_UNSIGNED);
+ nfsm_fhtom(vp);
+ nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
+ off = (u_long)uiop->uio_offset;
+ *tl++ = txdr_unsigned(off);
+ *tl = txdr_unsigned(((uiop->uio_resid > nmp->nm_rsize) ?
+ nmp->nm_rsize : uiop->uio_resid) & ~(NFS_DIRBLKSIZ-1));
+ nfsm_request(vp, NFSPROC_READDIR, uiop->uio_procp, cred);
+ siz = 0;
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ more_dirs = fxdr_unsigned(int, *tl);
+
+ /* Save the position so that we can do nfsm_mtouio() later */
+ dpos2 = dpos;
+ md2 = md;
+
+ /* loop thru the dir entries, doctoring them to 4bsd form */
+#ifdef lint
+ dp = (struct dirent *)0;
+#endif /* lint */
+ while (more_dirs && siz < uiop->uio_resid) {
+ savoff = off; /* Hold onto offset and dp */
+ savdp = dp;
+ nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
+ dp = (struct dirent *)tl;
+ dp->d_fileno = fxdr_unsigned(u_long, *tl++);
+ len = fxdr_unsigned(int, *tl);
+ if (len <= 0 || len > NFS_MAXNAMLEN) {
+ error = EBADRPC;
+ m_freem(mrep);
+ goto nfsmout;
+ }
+ dp->d_namlen = (u_char)len;
+ dp->d_type = DT_UNKNOWN;
+ nfsm_adv(len); /* Point past name */
+ tlen = nfsm_rndup(len);
+ /*
+ * This should not be necessary, but some servers have
+ * broken XDR such that these bytes are not null filled.
+ */
+ if (tlen != len) {
+ *dpos = '\0'; /* Null-terminate */
+ nfsm_adv(tlen - len);
+ len = tlen;
+ }
+ nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
+ off = fxdr_unsigned(u_long, *tl);
+ *tl++ = 0; /* Ensures null termination of name */
+ more_dirs = fxdr_unsigned(int, *tl);
+ dp->d_reclen = len + 4 * NFSX_UNSIGNED;
+ siz += dp->d_reclen;
+ }
+ /*
+ * If at end of rpc data, get the eof boolean
+ */
+ if (!more_dirs) {
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ more_dirs = (fxdr_unsigned(int, *tl) == 0);
+
+ /*
+ * If at EOF, cache directory offset
+ */
+ if (!more_dirs)
+ np->n_direofoffset = off;
+ }
+ /*
+ * If there is too much to fit in the data buffer, use savoff and
+ * savdp to trim off the last record.
+ * --> we are not at eof
+ */
+ if (siz > uiop->uio_resid) {
+ off = savoff;
+ siz -= dp->d_reclen;
+ dp = savdp;
+ more_dirs = 0; /* Paranoia */
+ }
+ if (siz > 0) {
+ lastlen = dp->d_reclen;
+ md = md2;
+ dpos = dpos2;
+ nfsm_mtouio(uiop, siz);
+ uiop->uio_offset = (off_t)off;
+ } else
+ more_dirs = 0; /* Ugh, never happens, but in case.. */
+ m_freem(mrep);
+ }
+ /*
+ * Fill last record, iff any, out to a multiple of NFS_DIRBLKSIZ
+ * by increasing d_reclen for the last record.
+ */
+ if (uiop->uio_resid < tresid) {
+ len = uiop->uio_resid & (NFS_DIRBLKSIZ - 1);
+ if (len > 0) {
+ dp = (struct dirent *)
+ (uiop->uio_iov->iov_base - lastlen);
+ dp->d_reclen += len;
+ uiop->uio_iov->iov_base += len;
+ uiop->uio_iov->iov_len -= len;
+ uiop->uio_resid -= len;
+ }
+ }
+nfsmout:
+ return (error);
+}
+
+/*
+ * Nqnfs readdir_and_lookup RPC. Used in place of nfs_readdirrpc().
+ */
+int
+nfs_readdirlookrpc(vp, uiop, cred)
+ struct vnode *vp;
+ register struct uio *uiop;
+ struct ucred *cred;
+{
+ register int len;
+ register struct dirent *dp;
+ register u_long *tl;
+ register caddr_t cp;
+ register long t1;
+ caddr_t bpos, dpos, cp2;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ struct nameidata nami, *ndp = &nami;
+ struct componentname *cnp = &ndp->ni_cnd;
+ u_long off, endoff, fileno;
+ time_t reqtime, ltime;
+ struct nfsmount *nmp;
+ struct nfsnode *np;
+ struct vnode *newvp;
+ nfsv2fh_t *fhp;
+ u_quad_t frev;
+ int error = 0, tlen, more_dirs = 1, tresid, doit, bigenough, i;
+ int cachable;
+
+ if (uiop->uio_iovcnt != 1)
+ panic("nfs rdirlook");
+ nmp = VFSTONFS(vp->v_mount);
+ tresid = uiop->uio_resid;
+ ndp->ni_dvp = vp;
+ newvp = NULLVP;
+ /*
+ * Loop around doing readdir rpc's of size uio_resid or nm_rsize,
+ * whichever is smaller, truncated to a multiple of NFS_DIRBLKSIZ.
+ * The stopping criteria is EOF or buffer full.
+ */
+ while (more_dirs && uiop->uio_resid >= NFS_DIRBLKSIZ) {
+ nfsstats.rpccnt[NQNFSPROC_READDIRLOOK]++;
+ nfsm_reqhead(vp, NQNFSPROC_READDIRLOOK,
+ NFSX_FH + 3 * NFSX_UNSIGNED);
+ nfsm_fhtom(vp);
+ nfsm_build(tl, u_long *, 3 * NFSX_UNSIGNED);
+ off = (u_long)uiop->uio_offset;
+ *tl++ = txdr_unsigned(off);
+ *tl++ = txdr_unsigned(((uiop->uio_resid > nmp->nm_rsize) ?
+ nmp->nm_rsize : uiop->uio_resid) & ~(NFS_DIRBLKSIZ-1));
+ if (nmp->nm_flag & NFSMNT_NQLOOKLEASE)
+ *tl = txdr_unsigned(nmp->nm_leaseterm);
+ else
+ *tl = 0;
+ reqtime = time.tv_sec;
+ nfsm_request(vp, NQNFSPROC_READDIRLOOK, uiop->uio_procp, cred);
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ more_dirs = fxdr_unsigned(int, *tl);
+
+ /* loop thru the dir entries, doctoring them to 4bsd form */
+ bigenough = 1;
+ while (more_dirs && bigenough) {
+ doit = 1;
+ nfsm_dissect(tl, u_long *, 4 * NFSX_UNSIGNED);
+ if (nmp->nm_flag & NFSMNT_NQLOOKLEASE) {
+ cachable = fxdr_unsigned(int, *tl++);
+ ltime = reqtime + fxdr_unsigned(int, *tl++);
+ fxdr_hyper(tl, &frev);
+ }
+ nfsm_dissect(fhp, nfsv2fh_t *, NFSX_FH);
+ if (!bcmp(VTONFS(vp)->n_fh.fh_bytes, (caddr_t)fhp, NFSX_FH)) {
+ VREF(vp);
+ newvp = vp;
+ np = VTONFS(vp);
+ } else {
+ if (error = nfs_nget(vp->v_mount, fhp, &np))
+ doit = 0;
+ newvp = NFSTOV(np);
+ }
+ if (error = nfs_loadattrcache(&newvp, &md, &dpos,
+ (struct vattr *)0))
+ doit = 0;
+ nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
+ fileno = fxdr_unsigned(u_long, *tl++);
+ len = fxdr_unsigned(int, *tl);
+ if (len <= 0 || len > NFS_MAXNAMLEN) {
+ error = EBADRPC;
+ m_freem(mrep);
+ goto nfsmout;
+ }
+ tlen = (len + 4) & ~0x3;
+ if ((tlen + DIRHDSIZ) > uiop->uio_resid)
+ bigenough = 0;
+ if (bigenough && doit) {
+ dp = (struct dirent *)uiop->uio_iov->iov_base;
+ dp->d_fileno = fileno;
+ dp->d_namlen = len;
+ dp->d_reclen = tlen + DIRHDSIZ;
+ dp->d_type =
+ IFTODT(VTTOIF(np->n_vattr.va_type));
+ uiop->uio_resid -= DIRHDSIZ;
+ uiop->uio_iov->iov_base += DIRHDSIZ;
+ uiop->uio_iov->iov_len -= DIRHDSIZ;
+ cnp->cn_nameptr = uiop->uio_iov->iov_base;
+ cnp->cn_namelen = len;
+ ndp->ni_vp = newvp;
+ nfsm_mtouio(uiop, len);
+ cp = uiop->uio_iov->iov_base;
+ tlen -= len;
+ for (i = 0; i < tlen; i++)
+ *cp++ = '\0';
+ uiop->uio_iov->iov_base += tlen;
+ uiop->uio_iov->iov_len -= tlen;
+ uiop->uio_resid -= tlen;
+ cnp->cn_hash = 0;
+ for (cp = cnp->cn_nameptr, i = 1; i <= len; i++, cp++)
+ cnp->cn_hash += (unsigned char)*cp * i;
+ if ((nmp->nm_flag & NFSMNT_NQLOOKLEASE) &&
+ ltime > time.tv_sec)
+ nqnfs_clientlease(nmp, np, NQL_READ,
+ cachable, ltime, frev);
+ if (cnp->cn_namelen <= NCHNAMLEN)
+ cache_enter(ndp->ni_dvp, ndp->ni_vp, cnp);
+ } else {
+ nfsm_adv(nfsm_rndup(len));
+ }
+ if (newvp != NULLVP) {
+ vrele(newvp);
+ newvp = NULLVP;
+ }
+ nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
+ if (bigenough)
+ endoff = off = fxdr_unsigned(u_long, *tl++);
+ else
+ endoff = fxdr_unsigned(u_long, *tl++);
+ more_dirs = fxdr_unsigned(int, *tl);
+ }
+ /*
+ * If at end of rpc data, get the eof boolean
+ */
+ if (!more_dirs) {
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ more_dirs = (fxdr_unsigned(int, *tl) == 0);
+
+ /*
+ * If at EOF, cache directory offset
+ */
+ if (!more_dirs)
+ VTONFS(vp)->n_direofoffset = endoff;
+ }
+ if (uiop->uio_resid < tresid)
+ uiop->uio_offset = (off_t)off;
+ else
+ more_dirs = 0;
+ m_freem(mrep);
+ }
+ /*
+ * Fill last record, iff any, out to a multiple of NFS_DIRBLKSIZ
+ * by increasing d_reclen for the last record.
+ */
+ if (uiop->uio_resid < tresid) {
+ len = uiop->uio_resid & (NFS_DIRBLKSIZ - 1);
+ if (len > 0) {
+ dp->d_reclen += len;
+ uiop->uio_iov->iov_base += len;
+ uiop->uio_iov->iov_len -= len;
+ uiop->uio_resid -= len;
+ }
+ }
+nfsmout:
+ if (newvp != NULLVP)
+ vrele(newvp);
+ return (error);
+}
+static char hextoasc[] = "0123456789abcdef";
+
+/*
+ * Silly rename. To make the NFS filesystem that is stateless look a little
+ * more like the "ufs" a remove of an active vnode is translated to a rename
+ * to a funny looking filename that is removed by nfs_inactive on the
+ * nfsnode. There is the potential for another process on a different client
+ * to create the same funny name between the nfs_lookitup() fails and the
+ * nfs_rename() completes, but...
+ */
+int
+nfs_sillyrename(dvp, vp, cnp)
+ struct vnode *dvp, *vp;
+ struct componentname *cnp;
+{
+ register struct nfsnode *np;
+ register struct sillyrename *sp;
+ int error;
+ short pid;
+
+ cache_purge(dvp);
+ np = VTONFS(vp);
+#ifdef SILLYSEPARATE
+ MALLOC(sp, struct sillyrename *, sizeof (struct sillyrename),
+ M_NFSREQ, M_WAITOK);
+#else
+ sp = &np->n_silly;
+#endif
+ sp->s_cred = crdup(cnp->cn_cred);
+ sp->s_dvp = dvp;
+ VREF(dvp);
+
+ /* Fudge together a funny name */
+ pid = cnp->cn_proc->p_pid;
+ bcopy(".nfsAxxxx4.4", sp->s_name, 13);
+ sp->s_namlen = 12;
+ sp->s_name[8] = hextoasc[pid & 0xf];
+ sp->s_name[7] = hextoasc[(pid >> 4) & 0xf];
+ sp->s_name[6] = hextoasc[(pid >> 8) & 0xf];
+ sp->s_name[5] = hextoasc[(pid >> 12) & 0xf];
+
+ /* Try lookitups until we get one that isn't there */
+ while (nfs_lookitup(sp, (nfsv2fh_t *)0, cnp->cn_proc) == 0) {
+ sp->s_name[4]++;
+ if (sp->s_name[4] > 'z') {
+ error = EINVAL;
+ goto bad;
+ }
+ }
+ if (error = nfs_renameit(dvp, cnp, sp))
+ goto bad;
+ nfs_lookitup(sp, &np->n_fh, cnp->cn_proc);
+ np->n_sillyrename = sp;
+ return (0);
+bad:
+ vrele(sp->s_dvp);
+ crfree(sp->s_cred);
+#ifdef SILLYSEPARATE
+ free((caddr_t)sp, M_NFSREQ);
+#endif
+ return (error);
+}
+
+/*
+ * Look up a file name for silly rename stuff.
+ * Just like nfs_lookup() except that it doesn't load returned values
+ * into the nfsnode table.
+ * If fhp != NULL it copies the returned file handle out
+ */
+int
+nfs_lookitup(sp, fhp, procp)
+ register struct sillyrename *sp;
+ nfsv2fh_t *fhp;
+ struct proc *procp;
+{
+ register struct vnode *vp = sp->s_dvp;
+ register u_long *tl;
+ register caddr_t cp;
+ register long t1, t2;
+ caddr_t bpos, dpos, cp2;
+ int error = 0, isnq;
+ struct mbuf *mreq, *mrep, *md, *mb, *mb2;
+ long len;
+
+ isnq = (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS);
+ nfsstats.rpccnt[NFSPROC_LOOKUP]++;
+ len = sp->s_namlen;
+ nfsm_reqhead(vp, NFSPROC_LOOKUP, NFSX_FH+NFSX_UNSIGNED+nfsm_rndup(len));
+ if (isnq) {
+ nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+ *tl = 0;
+ }
+ nfsm_fhtom(vp);
+ nfsm_strtom(sp->s_name, len, NFS_MAXNAMLEN);
+ nfsm_request(vp, NFSPROC_LOOKUP, procp, sp->s_cred);
+ if (fhp != NULL) {
+ if (isnq)
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ nfsm_dissect(cp, caddr_t, NFSX_FH);
+ bcopy(cp, (caddr_t)fhp, NFSX_FH);
+ }
+ nfsm_reqdone;
+ return (error);
+}
+
+/*
+ * Kludge City..
+ * - make nfs_bmap() essentially a no-op that does no translation
+ * - do nfs_strategy() by faking physical I/O with nfs_readrpc/nfs_writerpc
+ * after mapping the physical addresses into Kernel Virtual space in the
+ * nfsiobuf area.
+ * (Maybe I could use the process's page mapping, but I was concerned that
+ * Kernel Write might not be enabled and also figured copyout() would do
+ * a lot more work than bcopy() and also it currently happens in the
+ * context of the swapper process (2).
+ */
+int
+nfs_bmap(ap)
+ struct vop_bmap_args /* {
+ struct vnode *a_vp;
+ daddr_t a_bn;
+ struct vnode **a_vpp;
+ daddr_t *a_bnp;
+ int *a_runp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+
+ if (ap->a_vpp != NULL)
+ *ap->a_vpp = vp;
+ if (ap->a_bnp != NULL)
+ *ap->a_bnp = ap->a_bn * btodb(vp->v_mount->mnt_stat.f_iosize);
+ return (0);
+}
+
+/*
+ * Strategy routine.
+ * For async requests when nfsiod(s) are running, queue the request by
+ * calling nfs_asyncio(), otherwise just all nfs_doio() to do the
+ * request.
+ */
+int
+nfs_strategy(ap)
+ struct vop_strategy_args *ap;
+{
+ register struct buf *bp = ap->a_bp;
+ struct ucred *cr;
+ struct proc *p;
+ int error = 0;
+
+ if (bp->b_flags & B_PHYS)
+ panic("nfs physio");
+ if (bp->b_flags & B_ASYNC)
+ p = (struct proc *)0;
+ else
+ p = curproc; /* XXX */
+ if (bp->b_flags & B_READ)
+ cr = bp->b_rcred;
+ else
+ cr = bp->b_wcred;
+ /*
+ * If the op is asynchronous and an i/o daemon is waiting
+ * queue the request, wake it up and wait for completion
+ * otherwise just do it ourselves.
+ */
+ if ((bp->b_flags & B_ASYNC) == 0 ||
+ nfs_asyncio(bp, NOCRED))
+ error = nfs_doio(bp, cr, p);
+ return (error);
+}
+
+/*
+ * Mmap a file
+ *
+ * NB Currently unsupported.
+ */
+/* ARGSUSED */
+int
+nfs_mmap(ap)
+ struct vop_mmap_args /* {
+ struct vnode *a_vp;
+ int a_fflags;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ return (EINVAL);
+}
+
+/*
+ * Flush all the blocks associated with a vnode.
+ * Walk through the buffer pool and push any dirty pages
+ * associated with the vnode.
+ */
+/* ARGSUSED */
+int
+nfs_fsync(ap)
+ struct vop_fsync_args /* {
+ struct vnodeop_desc *a_desc;
+ struct vnode * a_vp;
+ struct ucred * a_cred;
+ int a_waitfor;
+ struct proc * a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct nfsnode *np = VTONFS(vp);
+ register struct buf *bp;
+ struct buf *nbp;
+ struct nfsmount *nmp;
+ int s, error = 0, slptimeo = 0, slpflag = 0;
+
+ nmp = VFSTONFS(vp->v_mount);
+ if (nmp->nm_flag & NFSMNT_INT)
+ slpflag = PCATCH;
+loop:
+ s = splbio();
+ for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) {
+ nbp = bp->b_vnbufs.le_next;
+ if (bp->b_flags & B_BUSY) {
+ if (ap->a_waitfor != MNT_WAIT)
+ continue;
+ bp->b_flags |= B_WANTED;
+ error = tsleep((caddr_t)bp, slpflag | (PRIBIO + 1),
+ "nfsfsync", slptimeo);
+ splx(s);
+ if (error) {
+ if (nfs_sigintr(nmp, (struct nfsreq *)0, ap->a_p))
+ return (EINTR);
+ if (slpflag == PCATCH) {
+ slpflag = 0;
+ slptimeo = 2 * hz;
+ }
+ }
+ goto loop;
+ }
+ if ((bp->b_flags & B_DELWRI) == 0)
+ panic("nfs_fsync: not dirty");
+ bremfree(bp);
+ bp->b_flags |= B_BUSY;
+ splx(s);
+ bp->b_flags |= B_ASYNC;
+ VOP_BWRITE(bp);
+ goto loop;
+ }
+ splx(s);
+ if (ap->a_waitfor == MNT_WAIT) {
+ while (vp->v_numoutput) {
+ vp->v_flag |= VBWAIT;
+ error = tsleep((caddr_t)&vp->v_numoutput,
+ slpflag | (PRIBIO + 1), "nfsfsync", slptimeo);
+ if (error) {
+ if (nfs_sigintr(nmp, (struct nfsreq *)0, ap->a_p))
+ return (EINTR);
+ if (slpflag == PCATCH) {
+ slpflag = 0;
+ slptimeo = 2 * hz;
+ }
+ }
+ }
+ if (vp->v_dirtyblkhd.lh_first) {
+#ifdef DIAGNOSTIC
+ vprint("nfs_fsync: dirty", vp);
+#endif
+ goto loop;
+ }
+ }
+ if (np->n_flag & NWRITEERR) {
+ error = np->n_error;
+ np->n_flag &= ~NWRITEERR;
+ }
+ return (error);
+}
+
+/*
+ * Return POSIX pathconf information applicable to nfs.
+ *
+ * Currently the NFS protocol does not support getting such
+ * information from the remote server.
+ */
+/* ARGSUSED */
+nfs_pathconf(ap)
+ struct vop_pathconf_args /* {
+ struct vnode *a_vp;
+ int a_name;
+ int *a_retval;
+ } */ *ap;
+{
+
+ return (EINVAL);
+}
+
+/*
+ * NFS advisory byte-level locks.
+ * Currently unsupported.
+ */
+int
+nfs_advlock(ap)
+ struct vop_advlock_args /* {
+ struct vnode *a_vp;
+ caddr_t a_id;
+ int a_op;
+ struct flock *a_fl;
+ int a_flags;
+ } */ *ap;
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * Print out the contents of an nfsnode.
+ */
+int
+nfs_print(ap)
+ struct vop_print_args /* {
+ struct vnode *a_vp;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct nfsnode *np = VTONFS(vp);
+
+ printf("tag VT_NFS, fileid %d fsid 0x%x",
+ np->n_vattr.va_fileid, np->n_vattr.va_fsid);
+#ifdef FIFO
+ if (vp->v_type == VFIFO)
+ fifo_printinfo(vp);
+#endif /* FIFO */
+ printf("\n");
+}
+
+/*
+ * NFS directory offset lookup.
+ * Currently unsupported.
+ */
+int
+nfs_blkatoff(ap)
+ struct vop_blkatoff_args /* {
+ struct vnode *a_vp;
+ off_t a_offset;
+ char **a_res;
+ struct buf **a_bpp;
+ } */ *ap;
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * NFS flat namespace allocation.
+ * Currently unsupported.
+ */
+int
+nfs_valloc(ap)
+ struct vop_valloc_args /* {
+ struct vnode *a_pvp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct vnode **a_vpp;
+ } */ *ap;
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * NFS flat namespace free.
+ * Currently unsupported.
+ */
+int
+nfs_vfree(ap)
+ struct vop_vfree_args /* {
+ struct vnode *a_pvp;
+ ino_t a_ino;
+ int a_mode;
+ } */ *ap;
+{
+
+ return (EOPNOTSUPP);
+}
+
+/*
+ * NFS file truncation.
+ */
+int
+nfs_truncate(ap)
+ struct vop_truncate_args /* {
+ struct vnode *a_vp;
+ off_t a_length;
+ int a_flags;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+
+ /* Use nfs_setattr */
+ printf("nfs_truncate: need to implement!!");
+ return (EOPNOTSUPP);
+}
+
+/*
+ * NFS update.
+ */
+int
+nfs_update(ap)
+ struct vop_update_args /* {
+ struct vnode *a_vp;
+ struct timeval *a_ta;
+ struct timeval *a_tm;
+ int a_waitfor;
+ } */ *ap;
+{
+
+ /* Use nfs_setattr */
+ printf("nfs_update: need to implement!!");
+ return (EOPNOTSUPP);
+}
+
+/*
+ * nfs special file access vnode op.
+ * Essentially just get vattr and then imitate iaccess() since the device is
+ * local to the client.
+ */
+int
+nfsspec_access(ap)
+ struct vop_access_args /* {
+ struct vnode *a_vp;
+ int a_mode;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vattr *vap;
+ register gid_t *gp;
+ register struct ucred *cred = ap->a_cred;
+ mode_t mode = ap->a_mode;
+ struct vattr vattr;
+ register int i;
+ int error;
+
+ /*
+ * If you're the super-user,
+ * you always get access.
+ */
+ if (cred->cr_uid == 0)
+ return (0);
+ vap = &vattr;
+ if (error = VOP_GETATTR(ap->a_vp, vap, cred, ap->a_p))
+ return (error);
+ /*
+ * Access check is based on only one of owner, group, public.
+ * If not owner, then check group. If not a member of the
+ * group, then check public access.
+ */
+ if (cred->cr_uid != vap->va_uid) {
+ mode >>= 3;
+ gp = cred->cr_groups;
+ for (i = 0; i < cred->cr_ngroups; i++, gp++)
+ if (vap->va_gid == *gp)
+ goto found;
+ mode >>= 3;
+found:
+ ;
+ }
+ return ((vap->va_mode & mode) == mode ? 0 : EACCES);
+}
+
+/*
+ * Read wrapper for special devices.
+ */
+int
+nfsspec_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct nfsnode *np = VTONFS(ap->a_vp);
+
+ /*
+ * Set access flag.
+ */
+ np->n_flag |= NACC;
+ np->n_atim = time;
+ return (VOCALL(spec_vnodeop_p, VOFFSET(vop_read), ap));
+}
+
+/*
+ * Write wrapper for special devices.
+ */
+int
+nfsspec_write(ap)
+ struct vop_write_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ register struct nfsnode *np = VTONFS(ap->a_vp);
+
+ /*
+ * Set update flag.
+ */
+ np->n_flag |= NUPD;
+ np->n_mtim = time;
+ return (VOCALL(spec_vnodeop_p, VOFFSET(vop_write), ap));
+}
+
+/*
+ * Close wrapper for special devices.
+ *
+ * Update the times on the nfsnode then do device close.
+ */
+int
+nfsspec_close(ap)
+ struct vop_close_args /* {
+ struct vnode *a_vp;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct nfsnode *np = VTONFS(vp);
+ struct vattr vattr;
+
+ if (np->n_flag & (NACC | NUPD)) {
+ np->n_flag |= NCHG;
+ if (vp->v_usecount == 1 &&
+ (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
+ VATTR_NULL(&vattr);
+ if (np->n_flag & NACC) {
+ vattr.va_atime.ts_sec = np->n_atim.tv_sec;
+ vattr.va_atime.ts_nsec =
+ np->n_atim.tv_usec * 1000;
+ }
+ if (np->n_flag & NUPD) {
+ vattr.va_mtime.ts_sec = np->n_mtim.tv_sec;
+ vattr.va_mtime.ts_nsec =
+ np->n_mtim.tv_usec * 1000;
+ }
+ (void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p);
+ }
+ }
+ return (VOCALL(spec_vnodeop_p, VOFFSET(vop_close), ap));
+}
+
+#ifdef FIFO
+/*
+ * Read wrapper for fifos.
+ */
+int
+nfsfifo_read(ap)
+ struct vop_read_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ extern int (**fifo_vnodeop_p)();
+ register struct nfsnode *np = VTONFS(ap->a_vp);
+
+ /*
+ * Set access flag.
+ */
+ np->n_flag |= NACC;
+ np->n_atim = time;
+ return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_read), ap));
+}
+
+/*
+ * Write wrapper for fifos.
+ */
+int
+nfsfifo_write(ap)
+ struct vop_write_args /* {
+ struct vnode *a_vp;
+ struct uio *a_uio;
+ int a_ioflag;
+ struct ucred *a_cred;
+ } */ *ap;
+{
+ extern int (**fifo_vnodeop_p)();
+ register struct nfsnode *np = VTONFS(ap->a_vp);
+
+ /*
+ * Set update flag.
+ */
+ np->n_flag |= NUPD;
+ np->n_mtim = time;
+ return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_write), ap));
+}
+
+/*
+ * Close wrapper for fifos.
+ *
+ * Update the times on the nfsnode then do fifo close.
+ */
+int
+nfsfifo_close(ap)
+ struct vop_close_args /* {
+ struct vnode *a_vp;
+ int a_fflag;
+ struct ucred *a_cred;
+ struct proc *a_p;
+ } */ *ap;
+{
+ register struct vnode *vp = ap->a_vp;
+ register struct nfsnode *np = VTONFS(vp);
+ struct vattr vattr;
+ extern int (**fifo_vnodeop_p)();
+
+ if (np->n_flag & (NACC | NUPD)) {
+ if (np->n_flag & NACC)
+ np->n_atim = time;
+ if (np->n_flag & NUPD)
+ np->n_mtim = time;
+ np->n_flag |= NCHG;
+ if (vp->v_usecount == 1 &&
+ (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) {
+ VATTR_NULL(&vattr);
+ if (np->n_flag & NACC) {
+ vattr.va_atime.ts_sec = np->n_atim.tv_sec;
+ vattr.va_atime.ts_nsec =
+ np->n_atim.tv_usec * 1000;
+ }
+ if (np->n_flag & NUPD) {
+ vattr.va_mtime.ts_sec = np->n_mtim.tv_sec;
+ vattr.va_mtime.ts_nsec =
+ np->n_mtim.tv_usec * 1000;
+ }
+ (void)VOP_SETATTR(vp, &vattr, ap->a_cred, ap->a_p);
+ }
+ }
+ return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_close), ap));
+}
+#endif /* FIFO */
diff --git a/sys/nfsclient/nfsargs.h b/sys/nfsclient/nfsargs.h
new file mode 100644
index 0000000..261fd42
--- /dev/null
+++ b/sys/nfsclient/nfsargs.h
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Tunable constants for nfs
+ */
+
+#define NFS_MAXIOVEC 34
+#define NFS_HZ 25 /* Ticks per second for NFS timeouts */
+#define NFS_TIMEO (1*NFS_HZ) /* Default timeout = 1 second */
+#define NFS_MINTIMEO (1*NFS_HZ) /* Min timeout to use */
+#define NFS_MAXTIMEO (60*NFS_HZ) /* Max timeout to backoff to */
+#define NFS_MINIDEMTIMEO (5*NFS_HZ) /* Min timeout for non-idempotent ops*/
+#define NFS_MAXREXMIT 100 /* Stop counting after this many */
+#define NFS_MAXWINDOW 1024 /* Max number of outstanding requests */
+#define NFS_RETRANS 10 /* Num of retrans for soft mounts */
+#define NFS_MAXGRPS 16 /* Max. size of groups list */
+#define NFS_MINATTRTIMO 5 /* Attribute cache timeout in sec */
+#define NFS_MAXATTRTIMO 60
+#define NFS_WSIZE 8192 /* Def. write data size <= 8192 */
+#define NFS_RSIZE 8192 /* Def. read data size <= 8192 */
+#define NFS_DEFRAHEAD 1 /* Def. read ahead # blocks */
+#define NFS_MAXRAHEAD 4 /* Max. read ahead # blocks */
+#define NFS_MAXREADDIR NFS_MAXDATA /* Max. size of directory read */
+#define NFS_MAXUIDHASH 64 /* Max. # of hashed uid entries/mp */
+#define NFS_MAXASYNCDAEMON 20 /* Max. number async_daemons runable */
+#define NFS_DIRBLKSIZ 1024 /* Size of an NFS directory block */
+#define NMOD(a) ((a) % nfs_asyncdaemons)
+
+/*
+ * Set the attribute timeout based on how recently the file has been modified.
+ */
+#define NFS_ATTRTIMEO(np) \
+ ((((np)->n_flag & NMODIFIED) || \
+ (time.tv_sec - (np)->n_mtime) / 10 < NFS_MINATTRTIMO) ? NFS_MINATTRTIMO : \
+ ((time.tv_sec - (np)->n_mtime) / 10 > NFS_MAXATTRTIMO ? NFS_MAXATTRTIMO : \
+ (time.tv_sec - (np)->n_mtime) / 10))
+
+/*
+ * Structures for the nfssvc(2) syscall. Not that anyone but nfsd and mount_nfs
+ * should ever try and use it.
+ */
+struct nfsd_args {
+ int sock; /* Socket to serve */
+ caddr_t name; /* Client address for connection based sockets */
+ int namelen; /* Length of name */
+};
+
+struct nfsd_srvargs {
+ struct nfsd *nsd_nfsd; /* Pointer to in kernel nfsd struct */
+ uid_t nsd_uid; /* Effective uid mapped to cred */
+ u_long nsd_haddr; /* Ip address of client */
+ struct ucred nsd_cr; /* Cred. uid maps to */
+ int nsd_authlen; /* Length of auth string (ret) */
+ char *nsd_authstr; /* Auth string (ret) */
+};
+
+struct nfsd_cargs {
+ char *ncd_dirp; /* Mount dir path */
+ uid_t ncd_authuid; /* Effective uid */
+ int ncd_authtype; /* Type of authenticator */
+ int ncd_authlen; /* Length of authenticator string */
+ char *ncd_authstr; /* Authenticator string */
+};
+
+/*
+ * Stats structure
+ */
+struct nfsstats {
+ int attrcache_hits;
+ int attrcache_misses;
+ int lookupcache_hits;
+ int lookupcache_misses;
+ int direofcache_hits;
+ int direofcache_misses;
+ int biocache_reads;
+ int read_bios;
+ int read_physios;
+ int biocache_writes;
+ int write_bios;
+ int write_physios;
+ int biocache_readlinks;
+ int readlink_bios;
+ int biocache_readdirs;
+ int readdir_bios;
+ int rpccnt[NFS_NPROCS];
+ int rpcretries;
+ int srvrpccnt[NFS_NPROCS];
+ int srvrpc_errs;
+ int srv_errs;
+ int rpcrequests;
+ int rpctimeouts;
+ int rpcunexpected;
+ int rpcinvalid;
+ int srvcache_inproghits;
+ int srvcache_idemdonehits;
+ int srvcache_nonidemdonehits;
+ int srvcache_misses;
+ int srvnqnfs_leases;
+ int srvnqnfs_maxleases;
+ int srvnqnfs_getleases;
+};
+
+/*
+ * Flags for nfssvc() system call.
+ */
+#define NFSSVC_BIOD 0x002
+#define NFSSVC_NFSD 0x004
+#define NFSSVC_ADDSOCK 0x008
+#define NFSSVC_AUTHIN 0x010
+#define NFSSVC_GOTAUTH 0x040
+#define NFSSVC_AUTHINFAIL 0x080
+#define NFSSVC_MNTD 0x100
+
+/*
+ * The set of signals the interrupt an I/O in progress for NFSMNT_INT mounts.
+ * What should be in this set is open to debate, but I believe that since
+ * I/O system calls on ufs are never interrupted by signals the set should
+ * be minimal. My reasoning is that many current programs that use signals
+ * such as SIGALRM will not expect file I/O system calls to be interrupted
+ * by them and break.
+ */
+#ifdef KERNEL
+#define NFSINT_SIGMASK (sigmask(SIGINT)|sigmask(SIGTERM)|sigmask(SIGKILL)| \
+ sigmask(SIGHUP)|sigmask(SIGQUIT))
+
+/*
+ * Socket errors ignored for connectionless sockets??
+ * For now, ignore them all
+ */
+#define NFSIGNORE_SOERROR(s, e) \
+ ((e) != EINTR && (e) != ERESTART && (e) != EWOULDBLOCK && \
+ ((s) & PR_CONNREQUIRED) == 0)
+
+/*
+ * Nfs outstanding request list element
+ */
+struct nfsreq {
+ struct nfsreq *r_next;
+ struct nfsreq *r_prev;
+ struct mbuf *r_mreq;
+ struct mbuf *r_mrep;
+ struct mbuf *r_md;
+ caddr_t r_dpos;
+ struct nfsmount *r_nmp;
+ struct vnode *r_vp;
+ u_long r_xid;
+ int r_flags; /* flags on request, see below */
+ int r_retry; /* max retransmission count */
+ int r_rexmit; /* current retrans count */
+ int r_timer; /* tick counter on reply */
+ int r_procnum; /* NFS procedure number */
+ int r_rtt; /* RTT for rpc */
+ struct proc *r_procp; /* Proc that did I/O system call */
+};
+
+/* Flag values for r_flags */
+#define R_TIMING 0x01 /* timing request (in mntp) */
+#define R_SENT 0x02 /* request has been sent */
+#define R_SOFTTERM 0x04 /* soft mnt, too many retries */
+#define R_INTR 0x08 /* intr mnt, signal pending */
+#define R_SOCKERR 0x10 /* Fatal error on socket */
+#define R_TPRINTFMSG 0x20 /* Did a tprintf msg. */
+#define R_MUSTRESEND 0x40 /* Must resend request */
+#define R_GETONEREP 0x80 /* Probe for one reply only */
+
+struct nfsstats nfsstats;
+
+/*
+ * A list of nfssvc_sock structures is maintained with all the sockets
+ * that require service by the nfsd.
+ * The nfsuid structs hang off of the nfssvc_sock structs in both lru
+ * and uid hash lists.
+ */
+#define NUIDHASHSIZ 32
+#define NUIDHASH(uid) ((uid) & (NUIDHASHSIZ - 1))
+
+/*
+ * Network address hash list element
+ */
+union nethostaddr {
+ u_long had_inetaddr;
+ struct mbuf *had_nam;
+};
+
+struct nfsuid {
+ struct nfsuid *nu_lrunext; /* MUST be first */
+ struct nfsuid *nu_lruprev;
+ struct nfsuid *nu_hnext;
+ struct nfsuid *nu_hprev;
+ int nu_flag; /* Flags */
+ uid_t nu_uid; /* Uid mapped by this entry */
+ union nethostaddr nu_haddr; /* Host addr. for dgram sockets */
+ struct ucred nu_cr; /* Cred uid mapped to */
+};
+
+#define nu_inetaddr nu_haddr.had_inetaddr
+#define nu_nam nu_haddr.had_nam
+/* Bits for nu_flag */
+#define NU_INETADDR 0x1
+
+struct nfssvc_sock {
+ struct nfsuid *ns_lrunext; /* MUST be first */
+ struct nfsuid *ns_lruprev;
+ struct nfssvc_sock *ns_next;
+ struct nfssvc_sock *ns_prev;
+ int ns_flag;
+ u_long ns_sref;
+ struct file *ns_fp;
+ struct socket *ns_so;
+ int ns_solock;
+ struct mbuf *ns_nam;
+ int ns_cc;
+ struct mbuf *ns_raw;
+ struct mbuf *ns_rawend;
+ int ns_reclen;
+ struct mbuf *ns_rec;
+ struct mbuf *ns_recend;
+ int ns_numuids;
+ struct nfsuid *ns_uidh[NUIDHASHSIZ];
+};
+
+/* Bits for "ns_flag" */
+#define SLP_VALID 0x01
+#define SLP_DOREC 0x02
+#define SLP_NEEDQ 0x04
+#define SLP_DISCONN 0x08
+#define SLP_GETSTREAM 0x10
+#define SLP_INIT 0x20
+#define SLP_WANTINIT 0x40
+
+#define SLP_ALLFLAGS 0xff
+
+/*
+ * One of these structures is allocated for each nfsd.
+ */
+struct nfsd {
+ struct nfsd *nd_next; /* Must be first */
+ struct nfsd *nd_prev;
+ int nd_flag; /* NFSD_ flags */
+ struct nfssvc_sock *nd_slp; /* Current socket */
+ struct mbuf *nd_nam; /* Client addr for datagram req. */
+ struct mbuf *nd_mrep; /* Req. mbuf list */
+ struct mbuf *nd_md;
+ caddr_t nd_dpos; /* Position in list */
+ int nd_procnum; /* RPC procedure number */
+ u_long nd_retxid; /* RPC xid */
+ int nd_repstat; /* Reply status value */
+ struct ucred nd_cr; /* Credentials for req. */
+ int nd_nqlflag; /* Leasing flag */
+ int nd_duration; /* Lease duration */
+ int nd_authlen; /* Authenticator len */
+ u_char nd_authstr[RPCAUTH_MAXSIZ]; /* Authenticator data */
+ struct proc *nd_procp; /* Proc ptr */
+};
+
+#define NFSD_WAITING 0x01
+#define NFSD_CHECKSLP 0x02
+#define NFSD_REQINPROG 0x04
+#define NFSD_NEEDAUTH 0x08
+#define NFSD_AUTHFAIL 0x10
+#endif /* KERNEL */
diff --git a/sys/nfsclient/nfsdiskless.h b/sys/nfsclient/nfsdiskless.h
new file mode 100644
index 0000000..74e6b7b
--- /dev/null
+++ b/sys/nfsclient/nfsdiskless.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfsdiskless.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Structure that must be initialized for a diskless nfs client.
+ * This structure is used by nfs_mountroot() to set up the root and swap
+ * vnodes plus do a partial ifconfig(8) and route(8) so that the critical net
+ * interface can communicate with the server.
+ * The primary bootstrap is expected to fill in the appropriate fields before
+ * starting vmunix. Whether or not the swap area is nfs mounted is determined
+ * by the value in swdevt[0]. (equal to NODEV --> swap over nfs)
+ * Currently only works for AF_INET protocols.
+ * NB: All fields are stored in net byte order to avoid hassles with
+ * client/server byte ordering differences.
+ */
+struct nfs_diskless {
+ struct ifaliasreq myif; /* Default interface */
+ struct sockaddr_in mygateway; /* Default gateway */
+ struct nfs_args swap_args; /* Mount args for swap file */
+ u_char swap_fh[NFS_FHSIZE]; /* Swap file's file handle */
+ struct sockaddr_in swap_saddr; /* Address of swap server */
+ char swap_hostnam[MNAMELEN]; /* Host name for mount pt */
+ int swap_nblks; /* Size of server swap file */
+ struct ucred swap_ucred; /* Swap credentials */
+ struct nfs_args root_args; /* Mount args for root fs */
+ u_char root_fh[NFS_FHSIZE]; /* File handle of root dir */
+ struct sockaddr_in root_saddr; /* Address of root server */
+ char root_hostnam[MNAMELEN]; /* Host name for mount pt */
+ long root_time; /* Timestamp of root fs */
+ char my_hostnam[MAXHOSTNAMELEN]; /* Client host name */
+};
diff --git a/sys/nfsclient/nfsm_subs.h b/sys/nfsclient/nfsm_subs.h
new file mode 100644
index 0000000..879db36
--- /dev/null
+++ b/sys/nfsclient/nfsm_subs.h
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfsm_subs.h 8.1 (Berkeley) 6/16/93
+ */
+
+/*
+ * These macros do strange and peculiar things to mbuf chains for
+ * the assistance of the nfs code. To attempt to use them for any
+ * other purpose will be dangerous. (they make weird assumptions)
+ */
+
+/*
+ * First define what the actual subs. return
+ */
+extern struct mbuf *nfsm_reqh();
+
+#define M_HASCL(m) ((m)->m_flags & M_EXT)
+#define NFSMINOFF(m) \
+ if (M_HASCL(m)) \
+ (m)->m_data = (m)->m_ext.ext_buf; \
+ else if ((m)->m_flags & M_PKTHDR) \
+ (m)->m_data = (m)->m_pktdat; \
+ else \
+ (m)->m_data = (m)->m_dat
+#define NFSMADV(m, s) (m)->m_data += (s)
+#define NFSMSIZ(m) ((M_HASCL(m))?MCLBYTES: \
+ (((m)->m_flags & M_PKTHDR)?MHLEN:MLEN))
+
+/*
+ * Now for the macros that do the simple stuff and call the functions
+ * for the hard stuff.
+ * These macros use several vars. declared in nfsm_reqhead and these
+ * vars. must not be used elsewhere unless you are careful not to corrupt
+ * them. The vars. starting with pN and tN (N=1,2,3,..) are temporaries
+ * that may be used so long as the value is not expected to retained
+ * after a macro.
+ * I know, this is kind of dorkey, but it makes the actual op functions
+ * fairly clean and deals with the mess caused by the xdr discriminating
+ * unions.
+ */
+
+#define nfsm_build(a,c,s) \
+ { if ((s) > M_TRAILINGSPACE(mb)) { \
+ MGET(mb2, M_WAIT, MT_DATA); \
+ if ((s) > MLEN) \
+ panic("build > MLEN"); \
+ mb->m_next = mb2; \
+ mb = mb2; \
+ mb->m_len = 0; \
+ bpos = mtod(mb, caddr_t); \
+ } \
+ (a) = (c)(bpos); \
+ mb->m_len += (s); \
+ bpos += (s); }
+
+#define nfsm_dissect(a,c,s) \
+ { t1 = mtod(md, caddr_t)+md->m_len-dpos; \
+ if (t1 >= (s)) { \
+ (a) = (c)(dpos); \
+ dpos += (s); \
+ } else if (error = nfsm_disct(&md, &dpos, (s), t1, &cp2)) { \
+ m_freem(mrep); \
+ goto nfsmout; \
+ } else { \
+ (a) = (c)cp2; \
+ } }
+
+#define nfsm_fhtom(v) \
+ nfsm_build(cp,caddr_t,NFSX_FH); \
+ bcopy((caddr_t)&(VTONFS(v)->n_fh), cp, NFSX_FH)
+
+#define nfsm_srvfhtom(f) \
+ nfsm_build(cp,caddr_t,NFSX_FH); \
+ bcopy((caddr_t)(f), cp, NFSX_FH)
+
+#define nfsm_mtofh(d,v) \
+ { struct nfsnode *np; nfsv2fh_t *fhp; \
+ nfsm_dissect(fhp,nfsv2fh_t *,NFSX_FH); \
+ if (error = nfs_nget((d)->v_mount, fhp, &np)) { \
+ m_freem(mrep); \
+ goto nfsmout; \
+ } \
+ (v) = NFSTOV(np); \
+ nfsm_loadattr(v, (struct vattr *)0); \
+ }
+
+#define nfsm_loadattr(v,a) \
+ { struct vnode *tvp = (v); \
+ if (error = nfs_loadattrcache(&tvp, &md, &dpos, (a))) { \
+ m_freem(mrep); \
+ goto nfsmout; \
+ } \
+ (v) = tvp; }
+
+#define nfsm_strsiz(s,m) \
+ { nfsm_dissect(tl,u_long *,NFSX_UNSIGNED); \
+ if (((s) = fxdr_unsigned(long,*tl)) > (m)) { \
+ m_freem(mrep); \
+ error = EBADRPC; \
+ goto nfsmout; \
+ } }
+
+#define nfsm_srvstrsiz(s,m) \
+ { nfsm_dissect(tl,u_long *,NFSX_UNSIGNED); \
+ if (((s) = fxdr_unsigned(long,*tl)) > (m) || (s) <= 0) { \
+ error = EBADRPC; \
+ nfsm_reply(0); \
+ } }
+
+#define nfsm_mtouio(p,s) \
+ if ((s) > 0 && \
+ (error = nfsm_mbuftouio(&md,(p),(s),&dpos))) { \
+ m_freem(mrep); \
+ goto nfsmout; \
+ }
+
+#define nfsm_uiotom(p,s) \
+ if (error = nfsm_uiotombuf((p),&mb,(s),&bpos)) { \
+ m_freem(mreq); \
+ goto nfsmout; \
+ }
+
+#define nfsm_reqhead(v,a,s) \
+ mb = mreq = nfsm_reqh((v),(a),(s),&bpos)
+
+#define nfsm_reqdone m_freem(mrep); \
+ nfsmout:
+
+#define nfsm_rndup(a) (((a)+3)&(~0x3))
+
+#define nfsm_request(v, t, p, c) \
+ if (error = nfs_request((v), mreq, (t), (p), \
+ (c), &mrep, &md, &dpos)) \
+ goto nfsmout
+
+#define nfsm_strtom(a,s,m) \
+ if ((s) > (m)) { \
+ m_freem(mreq); \
+ error = ENAMETOOLONG; \
+ goto nfsmout; \
+ } \
+ t2 = nfsm_rndup(s)+NFSX_UNSIGNED; \
+ if (t2 <= M_TRAILINGSPACE(mb)) { \
+ nfsm_build(tl,u_long *,t2); \
+ *tl++ = txdr_unsigned(s); \
+ *(tl+((t2>>2)-2)) = 0; \
+ bcopy((caddr_t)(a), (caddr_t)tl, (s)); \
+ } else if (error = nfsm_strtmbuf(&mb, &bpos, (a), (s))) { \
+ m_freem(mreq); \
+ goto nfsmout; \
+ }
+
+#define nfsm_srvdone \
+ nfsmout: \
+ return(error)
+
+#define nfsm_reply(s) \
+ { \
+ nfsd->nd_repstat = error; \
+ if (error) \
+ (void) nfs_rephead(0, nfsd, error, cache, &frev, \
+ mrq, &mb, &bpos); \
+ else \
+ (void) nfs_rephead((s), nfsd, error, cache, &frev, \
+ mrq, &mb, &bpos); \
+ m_freem(mrep); \
+ mreq = *mrq; \
+ if (error) \
+ return(0); \
+ }
+
+#define nfsm_adv(s) \
+ t1 = mtod(md, caddr_t)+md->m_len-dpos; \
+ if (t1 >= (s)) { \
+ dpos += (s); \
+ } else if (error = nfs_adv(&md, &dpos, (s), t1)) { \
+ m_freem(mrep); \
+ goto nfsmout; \
+ }
+
+#define nfsm_srvmtofh(f) \
+ nfsm_dissect(tl, u_long *, NFSX_FH); \
+ bcopy((caddr_t)tl, (caddr_t)f, NFSX_FH)
+
+#define nfsm_clget \
+ if (bp >= be) { \
+ if (mp == mb) \
+ mp->m_len += bp-bpos; \
+ MGET(mp, M_WAIT, MT_DATA); \
+ MCLGET(mp, M_WAIT); \
+ mp->m_len = NFSMSIZ(mp); \
+ mp2->m_next = mp; \
+ mp2 = mp; \
+ bp = mtod(mp, caddr_t); \
+ be = bp+mp->m_len; \
+ } \
+ tl = (u_long *)bp
+
+#define nfsm_srvfillattr \
+ fp->fa_type = vtonfs_type(vap->va_type); \
+ fp->fa_mode = vtonfs_mode(vap->va_type, vap->va_mode); \
+ fp->fa_nlink = txdr_unsigned(vap->va_nlink); \
+ fp->fa_uid = txdr_unsigned(vap->va_uid); \
+ fp->fa_gid = txdr_unsigned(vap->va_gid); \
+ if (nfsd->nd_nqlflag == NQL_NOVAL) { \
+ fp->fa_nfsblocksize = txdr_unsigned(vap->va_blocksize); \
+ if (vap->va_type == VFIFO) \
+ fp->fa_nfsrdev = 0xffffffff; \
+ else \
+ fp->fa_nfsrdev = txdr_unsigned(vap->va_rdev); \
+ fp->fa_nfsfsid = txdr_unsigned(vap->va_fsid); \
+ fp->fa_nfsfileid = txdr_unsigned(vap->va_fileid); \
+ fp->fa_nfssize = txdr_unsigned(vap->va_size); \
+ fp->fa_nfsblocks = txdr_unsigned(vap->va_bytes / NFS_FABLKSIZE); \
+ txdr_nfstime(&vap->va_atime, &fp->fa_nfsatime); \
+ txdr_nfstime(&vap->va_mtime, &fp->fa_nfsmtime); \
+ fp->fa_nfsctime.nfs_sec = txdr_unsigned(vap->va_ctime.ts_sec); \
+ fp->fa_nfsctime.nfs_usec = txdr_unsigned(vap->va_gen); \
+ } else { \
+ fp->fa_nqblocksize = txdr_unsigned(vap->va_blocksize); \
+ if (vap->va_type == VFIFO) \
+ fp->fa_nqrdev = 0xffffffff; \
+ else \
+ fp->fa_nqrdev = txdr_unsigned(vap->va_rdev); \
+ fp->fa_nqfsid = txdr_unsigned(vap->va_fsid); \
+ fp->fa_nqfileid = txdr_unsigned(vap->va_fileid); \
+ txdr_hyper(&vap->va_size, &fp->fa_nqsize); \
+ txdr_hyper(&vap->va_bytes, &fp->fa_nqbytes); \
+ txdr_nqtime(&vap->va_atime, &fp->fa_nqatime); \
+ txdr_nqtime(&vap->va_mtime, &fp->fa_nqmtime); \
+ txdr_nqtime(&vap->va_ctime, &fp->fa_nqctime); \
+ fp->fa_nqflags = txdr_unsigned(vap->va_flags); \
+ fp->fa_nqgen = txdr_unsigned(vap->va_gen); \
+ txdr_hyper(&vap->va_filerev, &fp->fa_nqfilerev); \
+ }
+
diff --git a/sys/nfsclient/nfsmount.h b/sys/nfsclient/nfsmount.h
new file mode 100644
index 0000000..4d74acb
--- /dev/null
+++ b/sys/nfsclient/nfsmount.h
@@ -0,0 +1,127 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfsmount.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Mount structure.
+ * One allocated on every NFS mount.
+ * Holds NFS specific information for mount.
+ */
+struct nfsmount {
+ int nm_flag; /* Flags for soft/hard... */
+ struct mount *nm_mountp; /* Vfs structure for this filesystem */
+ int nm_numgrps; /* Max. size of groupslist */
+ nfsv2fh_t nm_fh; /* File handle of root dir */
+ struct socket *nm_so; /* Rpc socket */
+ int nm_sotype; /* Type of socket */
+ int nm_soproto; /* and protocol */
+ int nm_soflags; /* pr_flags for socket protocol */
+ struct mbuf *nm_nam; /* Addr of server */
+ int nm_timeo; /* Init timer for NFSMNT_DUMBTIMR */
+ int nm_retry; /* Max retries */
+ int nm_srtt[4]; /* Timers for rpcs */
+ int nm_sdrtt[4];
+ int nm_sent; /* Request send count */
+ int nm_cwnd; /* Request send window */
+ int nm_timeouts; /* Request timeouts */
+ int nm_deadthresh; /* Threshold of timeouts-->dead server*/
+ int nm_rsize; /* Max size of read rpc */
+ int nm_wsize; /* Max size of write rpc */
+ int nm_readahead; /* Num. of blocks to readahead */
+ int nm_leaseterm; /* Term (sec) for NQNFS lease */
+ struct nfsnode *nm_tnext; /* Head of lease timer queue */
+ struct nfsnode *nm_tprev;
+ struct vnode *nm_inprog; /* Vnode in prog by nqnfs_clientd() */
+ uid_t nm_authuid; /* Uid for authenticator */
+ int nm_authtype; /* Authenticator type */
+ int nm_authlen; /* and length */
+ char *nm_authstr; /* Authenticator string */
+};
+
+#ifdef KERNEL
+/*
+ * Convert mount ptr to nfsmount ptr.
+ */
+#define VFSTONFS(mp) ((struct nfsmount *)((mp)->mnt_data))
+#endif /* KERNEL */
+
+/*
+ * Prototypes for NFS mount operations
+ */
+int nfs_mount __P((
+ struct mount *mp,
+ char *path,
+ caddr_t data,
+ struct nameidata *ndp,
+ struct proc *p));
+int nfs_start __P((
+ struct mount *mp,
+ int flags,
+ struct proc *p));
+int nfs_unmount __P((
+ struct mount *mp,
+ int mntflags,
+ struct proc *p));
+int nfs_root __P((
+ struct mount *mp,
+ struct vnode **vpp));
+int nfs_quotactl __P((
+ struct mount *mp,
+ int cmds,
+ uid_t uid,
+ caddr_t arg,
+ struct proc *p));
+int nfs_statfs __P((
+ struct mount *mp,
+ struct statfs *sbp,
+ struct proc *p));
+int nfs_sync __P((
+ struct mount *mp,
+ int waitfor,
+ struct ucred *cred,
+ struct proc *p));
+int nfs_fhtovp __P((
+ struct mount *mp,
+ struct fid *fhp,
+ struct mbuf *nam,
+ struct vnode **vpp,
+ int *exflagsp,
+ struct ucred **credanonp));
+int nfs_vptofh __P((
+ struct vnode *vp,
+ struct fid *fhp));
+int nfs_init __P(());
diff --git a/sys/nfsclient/nfsnode.h b/sys/nfsclient/nfsnode.h
new file mode 100644
index 0000000..f5fee5b
--- /dev/null
+++ b/sys/nfsclient/nfsnode.h
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfsnode.h 8.4 (Berkeley) 2/13/94
+ */
+
+/*
+ * Silly rename structure that hangs off the nfsnode until the name
+ * can be removed by nfs_inactive()
+ */
+struct sillyrename {
+ struct ucred *s_cred;
+ struct vnode *s_dvp;
+ long s_namlen;
+ char s_name[20];
+};
+
+/*
+ * The nfsnode is the nfs equivalent to ufs's inode. Any similarity
+ * is purely coincidental.
+ * There is a unique nfsnode allocated for each active file,
+ * each current directory, each mounted-on file, text file, and the root.
+ * An nfsnode is 'named' by its file handle. (nget/nfs_node.c)
+ */
+
+struct nfsnode {
+ struct nfsnode *n_forw; /* hash, forward */
+ struct nfsnode **n_back; /* hash, backward */
+ nfsv2fh_t n_fh; /* NFS File Handle */
+ long n_flag; /* Flag for locking.. */
+ struct vnode *n_vnode; /* vnode associated with this node */
+ struct vattr n_vattr; /* Vnode attribute cache */
+ time_t n_attrstamp; /* Time stamp for cached attributes */
+ struct sillyrename *n_sillyrename; /* Ptr to silly rename struct */
+ u_quad_t n_size; /* Current size of file */
+ int n_error; /* Save write error value */
+ u_long n_direofoffset; /* Dir. EOF offset cache */
+ time_t n_mtime; /* Prev modify time. */
+ time_t n_ctime; /* Prev create time. */
+ u_quad_t n_brev; /* Modify rev when cached */
+ u_quad_t n_lrev; /* Modify rev for lease */
+ time_t n_expiry; /* Lease expiry time */
+ struct nfsnode *n_tnext; /* Nqnfs timer chain */
+ struct nfsnode *n_tprev;
+ long spare1; /* To 8 byte boundary */
+ struct sillyrename n_silly; /* Silly rename struct */
+ struct timeval n_atim; /* Special file times */
+ struct timeval n_mtim;
+};
+
+/*
+ * Flags for n_flag
+ */
+#define NFLUSHWANT 0x0001 /* Want wakeup from a flush in prog. */
+#define NFLUSHINPROG 0x0002 /* Avoid multiple calls to vinvalbuf() */
+#define NMODIFIED 0x0004 /* Might have a modified buffer in bio */
+#define NWRITEERR 0x0008 /* Flag write errors so close will know */
+#define NQNFSNONCACHE 0x0020 /* Non-cachable lease */
+#define NQNFSWRITE 0x0040 /* Write lease */
+#define NQNFSEVICTED 0x0080 /* Has been evicted */
+#define NACC 0x0100 /* Special file accessed */
+#define NUPD 0x0200 /* Special file updated */
+#define NCHG 0x0400 /* Special file times changed */
+
+/*
+ * Convert between nfsnode pointers and vnode pointers
+ */
+#define VTONFS(vp) ((struct nfsnode *)(vp)->v_data)
+#define NFSTOV(np) ((struct vnode *)(np)->n_vnode)
+
+/*
+ * Queue head for nfsiod's
+ */
+TAILQ_HEAD(nfsbufs, buf) nfs_bufq;
+
+#ifdef KERNEL
+/*
+ * Prototypes for NFS vnode operations
+ */
+int nfs_lookup __P((struct vop_lookup_args *));
+int nfs_create __P((struct vop_create_args *));
+int nfs_mknod __P((struct vop_mknod_args *));
+int nfs_open __P((struct vop_open_args *));
+int nfs_close __P((struct vop_close_args *));
+int nfsspec_close __P((struct vop_close_args *));
+#ifdef FIFO
+int nfsfifo_close __P((struct vop_close_args *));
+#endif
+int nfs_access __P((struct vop_access_args *));
+int nfsspec_access __P((struct vop_access_args *));
+int nfs_getattr __P((struct vop_getattr_args *));
+int nfs_setattr __P((struct vop_setattr_args *));
+int nfs_read __P((struct vop_read_args *));
+int nfs_write __P((struct vop_write_args *));
+int nfsspec_read __P((struct vop_read_args *));
+int nfsspec_write __P((struct vop_write_args *));
+#ifdef FIFO
+int nfsfifo_read __P((struct vop_read_args *));
+int nfsfifo_write __P((struct vop_write_args *));
+#endif
+#define nfs_ioctl ((int (*) __P((struct vop_ioctl_args *)))enoioctl)
+#define nfs_select ((int (*) __P((struct vop_select_args *)))seltrue)
+int nfs_mmap __P((struct vop_mmap_args *));
+int nfs_fsync __P((struct vop_fsync_args *));
+#define nfs_seek ((int (*) __P((struct vop_seek_args *)))nullop)
+int nfs_remove __P((struct vop_remove_args *));
+int nfs_link __P((struct vop_link_args *));
+int nfs_rename __P((struct vop_rename_args *));
+int nfs_mkdir __P((struct vop_mkdir_args *));
+int nfs_rmdir __P((struct vop_rmdir_args *));
+int nfs_symlink __P((struct vop_symlink_args *));
+int nfs_readdir __P((struct vop_readdir_args *));
+int nfs_readlink __P((struct vop_readlink_args *));
+int nfs_abortop __P((struct vop_abortop_args *));
+int nfs_inactive __P((struct vop_inactive_args *));
+int nfs_reclaim __P((struct vop_reclaim_args *));
+int nfs_lock __P((struct vop_lock_args *));
+int nfs_unlock __P((struct vop_unlock_args *));
+int nfs_bmap __P((struct vop_bmap_args *));
+int nfs_strategy __P((struct vop_strategy_args *));
+int nfs_print __P((struct vop_print_args *));
+int nfs_islocked __P((struct vop_islocked_args *));
+int nfs_pathconf __P((struct vop_pathconf_args *));
+int nfs_advlock __P((struct vop_advlock_args *));
+int nfs_blkatoff __P((struct vop_blkatoff_args *));
+int nfs_vget __P((struct mount *, ino_t, struct vnode **));
+int nfs_valloc __P((struct vop_valloc_args *));
+#define nfs_reallocblks \
+ ((int (*) __P((struct vop_reallocblks_args *)))eopnotsupp)
+int nfs_vfree __P((struct vop_vfree_args *));
+int nfs_truncate __P((struct vop_truncate_args *));
+int nfs_update __P((struct vop_update_args *));
+int nfs_bwrite __P((struct vop_bwrite_args *));
+#endif /* KERNEL */
diff --git a/sys/nfsclient/nfsstats.h b/sys/nfsclient/nfsstats.h
new file mode 100644
index 0000000..261fd42
--- /dev/null
+++ b/sys/nfsclient/nfsstats.h
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Tunable constants for nfs
+ */
+
+#define NFS_MAXIOVEC 34
+#define NFS_HZ 25 /* Ticks per second for NFS timeouts */
+#define NFS_TIMEO (1*NFS_HZ) /* Default timeout = 1 second */
+#define NFS_MINTIMEO (1*NFS_HZ) /* Min timeout to use */
+#define NFS_MAXTIMEO (60*NFS_HZ) /* Max timeout to backoff to */
+#define NFS_MINIDEMTIMEO (5*NFS_HZ) /* Min timeout for non-idempotent ops*/
+#define NFS_MAXREXMIT 100 /* Stop counting after this many */
+#define NFS_MAXWINDOW 1024 /* Max number of outstanding requests */
+#define NFS_RETRANS 10 /* Num of retrans for soft mounts */
+#define NFS_MAXGRPS 16 /* Max. size of groups list */
+#define NFS_MINATTRTIMO 5 /* Attribute cache timeout in sec */
+#define NFS_MAXATTRTIMO 60
+#define NFS_WSIZE 8192 /* Def. write data size <= 8192 */
+#define NFS_RSIZE 8192 /* Def. read data size <= 8192 */
+#define NFS_DEFRAHEAD 1 /* Def. read ahead # blocks */
+#define NFS_MAXRAHEAD 4 /* Max. read ahead # blocks */
+#define NFS_MAXREADDIR NFS_MAXDATA /* Max. size of directory read */
+#define NFS_MAXUIDHASH 64 /* Max. # of hashed uid entries/mp */
+#define NFS_MAXASYNCDAEMON 20 /* Max. number async_daemons runable */
+#define NFS_DIRBLKSIZ 1024 /* Size of an NFS directory block */
+#define NMOD(a) ((a) % nfs_asyncdaemons)
+
+/*
+ * Set the attribute timeout based on how recently the file has been modified.
+ */
+#define NFS_ATTRTIMEO(np) \
+ ((((np)->n_flag & NMODIFIED) || \
+ (time.tv_sec - (np)->n_mtime) / 10 < NFS_MINATTRTIMO) ? NFS_MINATTRTIMO : \
+ ((time.tv_sec - (np)->n_mtime) / 10 > NFS_MAXATTRTIMO ? NFS_MAXATTRTIMO : \
+ (time.tv_sec - (np)->n_mtime) / 10))
+
+/*
+ * Structures for the nfssvc(2) syscall. Not that anyone but nfsd and mount_nfs
+ * should ever try and use it.
+ */
+struct nfsd_args {
+ int sock; /* Socket to serve */
+ caddr_t name; /* Client address for connection based sockets */
+ int namelen; /* Length of name */
+};
+
+struct nfsd_srvargs {
+ struct nfsd *nsd_nfsd; /* Pointer to in kernel nfsd struct */
+ uid_t nsd_uid; /* Effective uid mapped to cred */
+ u_long nsd_haddr; /* Ip address of client */
+ struct ucred nsd_cr; /* Cred. uid maps to */
+ int nsd_authlen; /* Length of auth string (ret) */
+ char *nsd_authstr; /* Auth string (ret) */
+};
+
+struct nfsd_cargs {
+ char *ncd_dirp; /* Mount dir path */
+ uid_t ncd_authuid; /* Effective uid */
+ int ncd_authtype; /* Type of authenticator */
+ int ncd_authlen; /* Length of authenticator string */
+ char *ncd_authstr; /* Authenticator string */
+};
+
+/*
+ * Stats structure
+ */
+struct nfsstats {
+ int attrcache_hits;
+ int attrcache_misses;
+ int lookupcache_hits;
+ int lookupcache_misses;
+ int direofcache_hits;
+ int direofcache_misses;
+ int biocache_reads;
+ int read_bios;
+ int read_physios;
+ int biocache_writes;
+ int write_bios;
+ int write_physios;
+ int biocache_readlinks;
+ int readlink_bios;
+ int biocache_readdirs;
+ int readdir_bios;
+ int rpccnt[NFS_NPROCS];
+ int rpcretries;
+ int srvrpccnt[NFS_NPROCS];
+ int srvrpc_errs;
+ int srv_errs;
+ int rpcrequests;
+ int rpctimeouts;
+ int rpcunexpected;
+ int rpcinvalid;
+ int srvcache_inproghits;
+ int srvcache_idemdonehits;
+ int srvcache_nonidemdonehits;
+ int srvcache_misses;
+ int srvnqnfs_leases;
+ int srvnqnfs_maxleases;
+ int srvnqnfs_getleases;
+};
+
+/*
+ * Flags for nfssvc() system call.
+ */
+#define NFSSVC_BIOD 0x002
+#define NFSSVC_NFSD 0x004
+#define NFSSVC_ADDSOCK 0x008
+#define NFSSVC_AUTHIN 0x010
+#define NFSSVC_GOTAUTH 0x040
+#define NFSSVC_AUTHINFAIL 0x080
+#define NFSSVC_MNTD 0x100
+
+/*
+ * The set of signals the interrupt an I/O in progress for NFSMNT_INT mounts.
+ * What should be in this set is open to debate, but I believe that since
+ * I/O system calls on ufs are never interrupted by signals the set should
+ * be minimal. My reasoning is that many current programs that use signals
+ * such as SIGALRM will not expect file I/O system calls to be interrupted
+ * by them and break.
+ */
+#ifdef KERNEL
+#define NFSINT_SIGMASK (sigmask(SIGINT)|sigmask(SIGTERM)|sigmask(SIGKILL)| \
+ sigmask(SIGHUP)|sigmask(SIGQUIT))
+
+/*
+ * Socket errors ignored for connectionless sockets??
+ * For now, ignore them all
+ */
+#define NFSIGNORE_SOERROR(s, e) \
+ ((e) != EINTR && (e) != ERESTART && (e) != EWOULDBLOCK && \
+ ((s) & PR_CONNREQUIRED) == 0)
+
+/*
+ * Nfs outstanding request list element
+ */
+struct nfsreq {
+ struct nfsreq *r_next;
+ struct nfsreq *r_prev;
+ struct mbuf *r_mreq;
+ struct mbuf *r_mrep;
+ struct mbuf *r_md;
+ caddr_t r_dpos;
+ struct nfsmount *r_nmp;
+ struct vnode *r_vp;
+ u_long r_xid;
+ int r_flags; /* flags on request, see below */
+ int r_retry; /* max retransmission count */
+ int r_rexmit; /* current retrans count */
+ int r_timer; /* tick counter on reply */
+ int r_procnum; /* NFS procedure number */
+ int r_rtt; /* RTT for rpc */
+ struct proc *r_procp; /* Proc that did I/O system call */
+};
+
+/* Flag values for r_flags */
+#define R_TIMING 0x01 /* timing request (in mntp) */
+#define R_SENT 0x02 /* request has been sent */
+#define R_SOFTTERM 0x04 /* soft mnt, too many retries */
+#define R_INTR 0x08 /* intr mnt, signal pending */
+#define R_SOCKERR 0x10 /* Fatal error on socket */
+#define R_TPRINTFMSG 0x20 /* Did a tprintf msg. */
+#define R_MUSTRESEND 0x40 /* Must resend request */
+#define R_GETONEREP 0x80 /* Probe for one reply only */
+
+struct nfsstats nfsstats;
+
+/*
+ * A list of nfssvc_sock structures is maintained with all the sockets
+ * that require service by the nfsd.
+ * The nfsuid structs hang off of the nfssvc_sock structs in both lru
+ * and uid hash lists.
+ */
+#define NUIDHASHSIZ 32
+#define NUIDHASH(uid) ((uid) & (NUIDHASHSIZ - 1))
+
+/*
+ * Network address hash list element
+ */
+union nethostaddr {
+ u_long had_inetaddr;
+ struct mbuf *had_nam;
+};
+
+struct nfsuid {
+ struct nfsuid *nu_lrunext; /* MUST be first */
+ struct nfsuid *nu_lruprev;
+ struct nfsuid *nu_hnext;
+ struct nfsuid *nu_hprev;
+ int nu_flag; /* Flags */
+ uid_t nu_uid; /* Uid mapped by this entry */
+ union nethostaddr nu_haddr; /* Host addr. for dgram sockets */
+ struct ucred nu_cr; /* Cred uid mapped to */
+};
+
+#define nu_inetaddr nu_haddr.had_inetaddr
+#define nu_nam nu_haddr.had_nam
+/* Bits for nu_flag */
+#define NU_INETADDR 0x1
+
+struct nfssvc_sock {
+ struct nfsuid *ns_lrunext; /* MUST be first */
+ struct nfsuid *ns_lruprev;
+ struct nfssvc_sock *ns_next;
+ struct nfssvc_sock *ns_prev;
+ int ns_flag;
+ u_long ns_sref;
+ struct file *ns_fp;
+ struct socket *ns_so;
+ int ns_solock;
+ struct mbuf *ns_nam;
+ int ns_cc;
+ struct mbuf *ns_raw;
+ struct mbuf *ns_rawend;
+ int ns_reclen;
+ struct mbuf *ns_rec;
+ struct mbuf *ns_recend;
+ int ns_numuids;
+ struct nfsuid *ns_uidh[NUIDHASHSIZ];
+};
+
+/* Bits for "ns_flag" */
+#define SLP_VALID 0x01
+#define SLP_DOREC 0x02
+#define SLP_NEEDQ 0x04
+#define SLP_DISCONN 0x08
+#define SLP_GETSTREAM 0x10
+#define SLP_INIT 0x20
+#define SLP_WANTINIT 0x40
+
+#define SLP_ALLFLAGS 0xff
+
+/*
+ * One of these structures is allocated for each nfsd.
+ */
+struct nfsd {
+ struct nfsd *nd_next; /* Must be first */
+ struct nfsd *nd_prev;
+ int nd_flag; /* NFSD_ flags */
+ struct nfssvc_sock *nd_slp; /* Current socket */
+ struct mbuf *nd_nam; /* Client addr for datagram req. */
+ struct mbuf *nd_mrep; /* Req. mbuf list */
+ struct mbuf *nd_md;
+ caddr_t nd_dpos; /* Position in list */
+ int nd_procnum; /* RPC procedure number */
+ u_long nd_retxid; /* RPC xid */
+ int nd_repstat; /* Reply status value */
+ struct ucred nd_cr; /* Credentials for req. */
+ int nd_nqlflag; /* Leasing flag */
+ int nd_duration; /* Lease duration */
+ int nd_authlen; /* Authenticator len */
+ u_char nd_authstr[RPCAUTH_MAXSIZ]; /* Authenticator data */
+ struct proc *nd_procp; /* Proc ptr */
+};
+
+#define NFSD_WAITING 0x01
+#define NFSD_CHECKSLP 0x02
+#define NFSD_REQINPROG 0x04
+#define NFSD_NEEDAUTH 0x08
+#define NFSD_AUTHFAIL 0x10
+#endif /* KERNEL */
diff --git a/sys/nfsserver/nfs.h b/sys/nfsserver/nfs.h
new file mode 100644
index 0000000..261fd42
--- /dev/null
+++ b/sys/nfsserver/nfs.h
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Tunable constants for nfs
+ */
+
+#define NFS_MAXIOVEC 34
+#define NFS_HZ 25 /* Ticks per second for NFS timeouts */
+#define NFS_TIMEO (1*NFS_HZ) /* Default timeout = 1 second */
+#define NFS_MINTIMEO (1*NFS_HZ) /* Min timeout to use */
+#define NFS_MAXTIMEO (60*NFS_HZ) /* Max timeout to backoff to */
+#define NFS_MINIDEMTIMEO (5*NFS_HZ) /* Min timeout for non-idempotent ops*/
+#define NFS_MAXREXMIT 100 /* Stop counting after this many */
+#define NFS_MAXWINDOW 1024 /* Max number of outstanding requests */
+#define NFS_RETRANS 10 /* Num of retrans for soft mounts */
+#define NFS_MAXGRPS 16 /* Max. size of groups list */
+#define NFS_MINATTRTIMO 5 /* Attribute cache timeout in sec */
+#define NFS_MAXATTRTIMO 60
+#define NFS_WSIZE 8192 /* Def. write data size <= 8192 */
+#define NFS_RSIZE 8192 /* Def. read data size <= 8192 */
+#define NFS_DEFRAHEAD 1 /* Def. read ahead # blocks */
+#define NFS_MAXRAHEAD 4 /* Max. read ahead # blocks */
+#define NFS_MAXREADDIR NFS_MAXDATA /* Max. size of directory read */
+#define NFS_MAXUIDHASH 64 /* Max. # of hashed uid entries/mp */
+#define NFS_MAXASYNCDAEMON 20 /* Max. number async_daemons runable */
+#define NFS_DIRBLKSIZ 1024 /* Size of an NFS directory block */
+#define NMOD(a) ((a) % nfs_asyncdaemons)
+
+/*
+ * Set the attribute timeout based on how recently the file has been modified.
+ */
+#define NFS_ATTRTIMEO(np) \
+ ((((np)->n_flag & NMODIFIED) || \
+ (time.tv_sec - (np)->n_mtime) / 10 < NFS_MINATTRTIMO) ? NFS_MINATTRTIMO : \
+ ((time.tv_sec - (np)->n_mtime) / 10 > NFS_MAXATTRTIMO ? NFS_MAXATTRTIMO : \
+ (time.tv_sec - (np)->n_mtime) / 10))
+
+/*
+ * Structures for the nfssvc(2) syscall. Not that anyone but nfsd and mount_nfs
+ * should ever try and use it.
+ */
+struct nfsd_args {
+ int sock; /* Socket to serve */
+ caddr_t name; /* Client address for connection based sockets */
+ int namelen; /* Length of name */
+};
+
+struct nfsd_srvargs {
+ struct nfsd *nsd_nfsd; /* Pointer to in kernel nfsd struct */
+ uid_t nsd_uid; /* Effective uid mapped to cred */
+ u_long nsd_haddr; /* Ip address of client */
+ struct ucred nsd_cr; /* Cred. uid maps to */
+ int nsd_authlen; /* Length of auth string (ret) */
+ char *nsd_authstr; /* Auth string (ret) */
+};
+
+struct nfsd_cargs {
+ char *ncd_dirp; /* Mount dir path */
+ uid_t ncd_authuid; /* Effective uid */
+ int ncd_authtype; /* Type of authenticator */
+ int ncd_authlen; /* Length of authenticator string */
+ char *ncd_authstr; /* Authenticator string */
+};
+
+/*
+ * Stats structure
+ */
+struct nfsstats {
+ int attrcache_hits;
+ int attrcache_misses;
+ int lookupcache_hits;
+ int lookupcache_misses;
+ int direofcache_hits;
+ int direofcache_misses;
+ int biocache_reads;
+ int read_bios;
+ int read_physios;
+ int biocache_writes;
+ int write_bios;
+ int write_physios;
+ int biocache_readlinks;
+ int readlink_bios;
+ int biocache_readdirs;
+ int readdir_bios;
+ int rpccnt[NFS_NPROCS];
+ int rpcretries;
+ int srvrpccnt[NFS_NPROCS];
+ int srvrpc_errs;
+ int srv_errs;
+ int rpcrequests;
+ int rpctimeouts;
+ int rpcunexpected;
+ int rpcinvalid;
+ int srvcache_inproghits;
+ int srvcache_idemdonehits;
+ int srvcache_nonidemdonehits;
+ int srvcache_misses;
+ int srvnqnfs_leases;
+ int srvnqnfs_maxleases;
+ int srvnqnfs_getleases;
+};
+
+/*
+ * Flags for nfssvc() system call.
+ */
+#define NFSSVC_BIOD 0x002
+#define NFSSVC_NFSD 0x004
+#define NFSSVC_ADDSOCK 0x008
+#define NFSSVC_AUTHIN 0x010
+#define NFSSVC_GOTAUTH 0x040
+#define NFSSVC_AUTHINFAIL 0x080
+#define NFSSVC_MNTD 0x100
+
+/*
+ * The set of signals the interrupt an I/O in progress for NFSMNT_INT mounts.
+ * What should be in this set is open to debate, but I believe that since
+ * I/O system calls on ufs are never interrupted by signals the set should
+ * be minimal. My reasoning is that many current programs that use signals
+ * such as SIGALRM will not expect file I/O system calls to be interrupted
+ * by them and break.
+ */
+#ifdef KERNEL
+#define NFSINT_SIGMASK (sigmask(SIGINT)|sigmask(SIGTERM)|sigmask(SIGKILL)| \
+ sigmask(SIGHUP)|sigmask(SIGQUIT))
+
+/*
+ * Socket errors ignored for connectionless sockets??
+ * For now, ignore them all
+ */
+#define NFSIGNORE_SOERROR(s, e) \
+ ((e) != EINTR && (e) != ERESTART && (e) != EWOULDBLOCK && \
+ ((s) & PR_CONNREQUIRED) == 0)
+
+/*
+ * Nfs outstanding request list element
+ */
+struct nfsreq {
+ struct nfsreq *r_next;
+ struct nfsreq *r_prev;
+ struct mbuf *r_mreq;
+ struct mbuf *r_mrep;
+ struct mbuf *r_md;
+ caddr_t r_dpos;
+ struct nfsmount *r_nmp;
+ struct vnode *r_vp;
+ u_long r_xid;
+ int r_flags; /* flags on request, see below */
+ int r_retry; /* max retransmission count */
+ int r_rexmit; /* current retrans count */
+ int r_timer; /* tick counter on reply */
+ int r_procnum; /* NFS procedure number */
+ int r_rtt; /* RTT for rpc */
+ struct proc *r_procp; /* Proc that did I/O system call */
+};
+
+/* Flag values for r_flags */
+#define R_TIMING 0x01 /* timing request (in mntp) */
+#define R_SENT 0x02 /* request has been sent */
+#define R_SOFTTERM 0x04 /* soft mnt, too many retries */
+#define R_INTR 0x08 /* intr mnt, signal pending */
+#define R_SOCKERR 0x10 /* Fatal error on socket */
+#define R_TPRINTFMSG 0x20 /* Did a tprintf msg. */
+#define R_MUSTRESEND 0x40 /* Must resend request */
+#define R_GETONEREP 0x80 /* Probe for one reply only */
+
+struct nfsstats nfsstats;
+
+/*
+ * A list of nfssvc_sock structures is maintained with all the sockets
+ * that require service by the nfsd.
+ * The nfsuid structs hang off of the nfssvc_sock structs in both lru
+ * and uid hash lists.
+ */
+#define NUIDHASHSIZ 32
+#define NUIDHASH(uid) ((uid) & (NUIDHASHSIZ - 1))
+
+/*
+ * Network address hash list element
+ */
+union nethostaddr {
+ u_long had_inetaddr;
+ struct mbuf *had_nam;
+};
+
+struct nfsuid {
+ struct nfsuid *nu_lrunext; /* MUST be first */
+ struct nfsuid *nu_lruprev;
+ struct nfsuid *nu_hnext;
+ struct nfsuid *nu_hprev;
+ int nu_flag; /* Flags */
+ uid_t nu_uid; /* Uid mapped by this entry */
+ union nethostaddr nu_haddr; /* Host addr. for dgram sockets */
+ struct ucred nu_cr; /* Cred uid mapped to */
+};
+
+#define nu_inetaddr nu_haddr.had_inetaddr
+#define nu_nam nu_haddr.had_nam
+/* Bits for nu_flag */
+#define NU_INETADDR 0x1
+
+struct nfssvc_sock {
+ struct nfsuid *ns_lrunext; /* MUST be first */
+ struct nfsuid *ns_lruprev;
+ struct nfssvc_sock *ns_next;
+ struct nfssvc_sock *ns_prev;
+ int ns_flag;
+ u_long ns_sref;
+ struct file *ns_fp;
+ struct socket *ns_so;
+ int ns_solock;
+ struct mbuf *ns_nam;
+ int ns_cc;
+ struct mbuf *ns_raw;
+ struct mbuf *ns_rawend;
+ int ns_reclen;
+ struct mbuf *ns_rec;
+ struct mbuf *ns_recend;
+ int ns_numuids;
+ struct nfsuid *ns_uidh[NUIDHASHSIZ];
+};
+
+/* Bits for "ns_flag" */
+#define SLP_VALID 0x01
+#define SLP_DOREC 0x02
+#define SLP_NEEDQ 0x04
+#define SLP_DISCONN 0x08
+#define SLP_GETSTREAM 0x10
+#define SLP_INIT 0x20
+#define SLP_WANTINIT 0x40
+
+#define SLP_ALLFLAGS 0xff
+
+/*
+ * One of these structures is allocated for each nfsd.
+ */
+struct nfsd {
+ struct nfsd *nd_next; /* Must be first */
+ struct nfsd *nd_prev;
+ int nd_flag; /* NFSD_ flags */
+ struct nfssvc_sock *nd_slp; /* Current socket */
+ struct mbuf *nd_nam; /* Client addr for datagram req. */
+ struct mbuf *nd_mrep; /* Req. mbuf list */
+ struct mbuf *nd_md;
+ caddr_t nd_dpos; /* Position in list */
+ int nd_procnum; /* RPC procedure number */
+ u_long nd_retxid; /* RPC xid */
+ int nd_repstat; /* Reply status value */
+ struct ucred nd_cr; /* Credentials for req. */
+ int nd_nqlflag; /* Leasing flag */
+ int nd_duration; /* Lease duration */
+ int nd_authlen; /* Authenticator len */
+ u_char nd_authstr[RPCAUTH_MAXSIZ]; /* Authenticator data */
+ struct proc *nd_procp; /* Proc ptr */
+};
+
+#define NFSD_WAITING 0x01
+#define NFSD_CHECKSLP 0x02
+#define NFSD_REQINPROG 0x04
+#define NFSD_NEEDAUTH 0x08
+#define NFSD_AUTHFAIL 0x10
+#endif /* KERNEL */
diff --git a/sys/nfsserver/nfs_serv.c b/sys/nfsserver/nfs_serv.c
new file mode 100644
index 0000000..f31b96e
--- /dev/null
+++ b/sys/nfsserver/nfs_serv.c
@@ -0,0 +1,1908 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_serv.c 8.3 (Berkeley) 1/12/94
+ */
+
+/*
+ * nfs version 2 server calls to vnode ops
+ * - these routines generally have 3 phases
+ * 1 - break down and validate rpc request in mbuf list
+ * 2 - do the vnode ops for the request
+ * (surprisingly ?? many are very similar to syscalls in vfs_syscalls.c)
+ * 3 - build the rpc reply in an mbuf list
+ * nb:
+ * - do not mix the phases, since the nfsm_?? macros can return failures
+ * on a bad rpc or similar and do not do any vrele() or vput()'s
+ *
+ * - the nfsm_reply() macro generates an nfs rpc reply with the nfs
+ * error number iff error != 0 whereas
+ * returning an error from the server function implies a fatal error
+ * such as a badly constructed rpc request that should be dropped without
+ * a reply.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/file.h>
+#include <sys/namei.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/mbuf.h>
+#include <sys/dirent.h>
+#include <sys/stat.h>
+
+#include <vm/vm.h>
+
+#include <nfs/nfsv2.h>
+#include <nfs/rpcv2.h>
+#include <nfs/nfs.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nqnfs.h>
+
+/* Defs */
+#define TRUE 1
+#define FALSE 0
+
+/* Global vars */
+extern u_long nfs_procids[NFS_NPROCS];
+extern u_long nfs_xdrneg1;
+extern u_long nfs_false, nfs_true;
+nfstype nfs_type[9] = { NFNON, NFREG, NFDIR, NFBLK, NFCHR, NFLNK, NFNON,
+ NFCHR, NFNON };
+
+/*
+ * nqnfs access service
+ */
+nqnfsrv_access(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, rdonly, cache, mode = 0;
+ char *cp2;
+ struct mbuf *mb, *mreq;
+ u_quad_t frev;
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_dissect(tl, u_long *, 3 * NFSX_UNSIGNED);
+ if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+ nfsm_reply(0);
+ if (*tl++ == nfs_true)
+ mode |= VREAD;
+ if (*tl++ == nfs_true)
+ mode |= VWRITE;
+ if (*tl == nfs_true)
+ mode |= VEXEC;
+ error = nfsrv_access(vp, mode, cred, rdonly, nfsd->nd_procp);
+ vput(vp);
+ nfsm_reply(0);
+ nfsm_srvdone;
+}
+
+/*
+ * nfs getattr service
+ */
+nfsrv_getattr(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ register struct nfsv2_fattr *fp;
+ struct vattr va;
+ register struct vattr *vap = &va;
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, rdonly, cache;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq;
+ u_quad_t frev;
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+ nfsm_reply(0);
+ nqsrv_getl(vp, NQL_READ);
+ error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp);
+ vput(vp);
+ nfsm_reply(NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ nfsm_srvfillattr;
+ nfsm_srvdone;
+}
+
+/*
+ * nfs setattr service
+ */
+nfsrv_setattr(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ struct vattr va;
+ register struct vattr *vap = &va;
+ register struct nfsv2_sattr *sp;
+ register struct nfsv2_fattr *fp;
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, rdonly, cache;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq;
+ u_quad_t frev, frev2;
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_SATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+ nfsm_reply(0);
+ nqsrv_getl(vp, NQL_WRITE);
+ VATTR_NULL(vap);
+ /*
+ * Nah nah nah nah na nah
+ * There is a bug in the Sun client that puts 0xffff in the mode
+ * field of sattr when it should put in 0xffffffff. The u_short
+ * doesn't sign extend.
+ * --> check the low order 2 bytes for 0xffff
+ */
+ if ((fxdr_unsigned(int, sp->sa_mode) & 0xffff) != 0xffff)
+ vap->va_mode = nfstov_mode(sp->sa_mode);
+ if (sp->sa_uid != nfs_xdrneg1)
+ vap->va_uid = fxdr_unsigned(uid_t, sp->sa_uid);
+ if (sp->sa_gid != nfs_xdrneg1)
+ vap->va_gid = fxdr_unsigned(gid_t, sp->sa_gid);
+ if (nfsd->nd_nqlflag == NQL_NOVAL) {
+ if (sp->sa_nfssize != nfs_xdrneg1)
+ vap->va_size = fxdr_unsigned(u_quad_t, sp->sa_nfssize);
+ if (sp->sa_nfsatime.nfs_sec != nfs_xdrneg1) {
+#ifdef notyet
+ fxdr_nfstime(&sp->sa_nfsatime, &vap->va_atime);
+#else
+ vap->va_atime.ts_sec =
+ fxdr_unsigned(long, sp->sa_nfsatime.nfs_sec);
+ vap->va_atime.ts_nsec = 0;
+#endif
+ }
+ if (sp->sa_nfsmtime.nfs_sec != nfs_xdrneg1)
+ fxdr_nfstime(&sp->sa_nfsmtime, &vap->va_mtime);
+ } else {
+ fxdr_hyper(&sp->sa_nqsize, &vap->va_size);
+ fxdr_nqtime(&sp->sa_nqatime, &vap->va_atime);
+ fxdr_nqtime(&sp->sa_nqmtime, &vap->va_mtime);
+ vap->va_flags = fxdr_unsigned(u_long, sp->sa_nqflags);
+ }
+
+ /*
+ * If the size is being changed write acces is required, otherwise
+ * just check for a read only file system.
+ */
+ if (vap->va_size == ((u_quad_t)((quad_t) -1))) {
+ if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) {
+ error = EROFS;
+ goto out;
+ }
+ } else {
+ if (vp->v_type == VDIR) {
+ error = EISDIR;
+ goto out;
+ } else if (error = nfsrv_access(vp, VWRITE, cred, rdonly,
+ nfsd->nd_procp))
+ goto out;
+ }
+ if (error = VOP_SETATTR(vp, vap, cred, nfsd->nd_procp)) {
+ vput(vp);
+ nfsm_reply(0);
+ }
+ error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp);
+out:
+ vput(vp);
+ nfsm_reply(NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL) + 2*NFSX_UNSIGNED);
+ nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ nfsm_srvfillattr;
+ if (nfsd->nd_nqlflag != NQL_NOVAL) {
+ nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+ txdr_hyper(&frev2, tl);
+ }
+ nfsm_srvdone;
+}
+
+/*
+ * nfs lookup rpc
+ */
+nfsrv_lookup(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ register struct nfsv2_fattr *fp;
+ struct nameidata nd;
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ register caddr_t cp;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, cache, duration2, cache2, len;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq;
+ struct vattr va, *vap = &va;
+ u_quad_t frev, frev2;
+
+ fhp = &nfh.fh_generic;
+ duration2 = 0;
+ if (nfsd->nd_nqlflag != NQL_NOVAL) {
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ duration2 = fxdr_unsigned(int, *tl);
+ }
+ nfsm_srvmtofh(fhp);
+ nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+ nd.ni_cnd.cn_cred = cred;
+ nd.ni_cnd.cn_nameiop = LOOKUP;
+ nd.ni_cnd.cn_flags = LOCKLEAF | SAVESTART;
+ if (error = nfs_namei(&nd, fhp, len, nfsd->nd_slp, nam, &md, &dpos,
+ nfsd->nd_procp))
+ nfsm_reply(0);
+ nqsrv_getl(nd.ni_startdir, NQL_READ);
+ vrele(nd.ni_startdir);
+ FREE(nd.ni_cnd.cn_pnbuf, M_NAMEI);
+ vp = nd.ni_vp;
+ bzero((caddr_t)fhp, sizeof(nfh));
+ fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
+ if (error = VFS_VPTOFH(vp, &fhp->fh_fid)) {
+ vput(vp);
+ nfsm_reply(0);
+ }
+ if (duration2)
+ (void) nqsrv_getlease(vp, &duration2, NQL_READ, nfsd,
+ nam, &cache2, &frev2, cred);
+ error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp);
+ vput(vp);
+ nfsm_reply(NFSX_FH + NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL) + 5*NFSX_UNSIGNED);
+ if (nfsd->nd_nqlflag != NQL_NOVAL) {
+ if (duration2) {
+ nfsm_build(tl, u_long *, 5*NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(NQL_READ);
+ *tl++ = txdr_unsigned(cache2);
+ *tl++ = txdr_unsigned(duration2);
+ txdr_hyper(&frev2, tl);
+ } else {
+ nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+ *tl = 0;
+ }
+ }
+ nfsm_srvfhtom(fhp);
+ nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ nfsm_srvfillattr;
+ nfsm_srvdone;
+}
+
+/*
+ * nfs readlink service
+ */
+nfsrv_readlink(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ struct iovec iv[(NFS_MAXPATHLEN+MLEN-1)/MLEN];
+ register struct iovec *ivp = iv;
+ register struct mbuf *mp;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, rdonly, cache, i, tlen, len;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mp2, *mp3, *mreq;
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ struct uio io, *uiop = &io;
+ u_quad_t frev;
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ len = 0;
+ i = 0;
+ while (len < NFS_MAXPATHLEN) {
+ MGET(mp, M_WAIT, MT_DATA);
+ MCLGET(mp, M_WAIT);
+ mp->m_len = NFSMSIZ(mp);
+ if (len == 0)
+ mp3 = mp2 = mp;
+ else {
+ mp2->m_next = mp;
+ mp2 = mp;
+ }
+ if ((len+mp->m_len) > NFS_MAXPATHLEN) {
+ mp->m_len = NFS_MAXPATHLEN-len;
+ len = NFS_MAXPATHLEN;
+ } else
+ len += mp->m_len;
+ ivp->iov_base = mtod(mp, caddr_t);
+ ivp->iov_len = mp->m_len;
+ i++;
+ ivp++;
+ }
+ uiop->uio_iov = iv;
+ uiop->uio_iovcnt = i;
+ uiop->uio_offset = 0;
+ uiop->uio_resid = len;
+ uiop->uio_rw = UIO_READ;
+ uiop->uio_segflg = UIO_SYSSPACE;
+ uiop->uio_procp = (struct proc *)0;
+ if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly)) {
+ m_freem(mp3);
+ nfsm_reply(0);
+ }
+ if (vp->v_type != VLNK) {
+ error = EINVAL;
+ goto out;
+ }
+ nqsrv_getl(vp, NQL_READ);
+ error = VOP_READLINK(vp, uiop, cred);
+out:
+ vput(vp);
+ if (error)
+ m_freem(mp3);
+ nfsm_reply(NFSX_UNSIGNED);
+ if (uiop->uio_resid > 0) {
+ len -= uiop->uio_resid;
+ tlen = nfsm_rndup(len);
+ nfsm_adj(mp3, NFS_MAXPATHLEN-tlen, tlen-len);
+ }
+ nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+ *tl = txdr_unsigned(len);
+ mb->m_next = mp3;
+ nfsm_srvdone;
+}
+
+/*
+ * nfs read service
+ */
+nfsrv_read(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ register struct iovec *iv;
+ struct iovec *iv2;
+ register struct mbuf *m;
+ register struct nfsv2_fattr *fp;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, rdonly, cache, i, cnt, len, left, siz, tlen;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq;
+ struct mbuf *m2;
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ struct uio io, *uiop = &io;
+ struct vattr va, *vap = &va;
+ off_t off;
+ u_quad_t frev;
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ if (nfsd->nd_nqlflag == NQL_NOVAL) {
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ off = (off_t)fxdr_unsigned(u_long, *tl);
+ } else {
+ nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
+ fxdr_hyper(tl, &off);
+ }
+ nfsm_srvstrsiz(cnt, NFS_MAXDATA);
+ if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+ nfsm_reply(0);
+ if (vp->v_type != VREG) {
+ error = (vp->v_type == VDIR) ? EISDIR : EACCES;
+ vput(vp);
+ nfsm_reply(0);
+ }
+ nqsrv_getl(vp, NQL_READ);
+ if ((error = nfsrv_access(vp, VREAD, cred, rdonly, nfsd->nd_procp)) &&
+ (error = nfsrv_access(vp, VEXEC, cred, rdonly, nfsd->nd_procp))) {
+ vput(vp);
+ nfsm_reply(0);
+ }
+ if (error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp)) {
+ vput(vp);
+ nfsm_reply(0);
+ }
+ if (off >= vap->va_size)
+ cnt = 0;
+ else if ((off + cnt) > vap->va_size)
+ cnt = nfsm_rndup(vap->va_size - off);
+ nfsm_reply(NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL)+NFSX_UNSIGNED+nfsm_rndup(cnt));
+ nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+ len = left = cnt;
+ if (cnt > 0) {
+ /*
+ * Generate the mbuf list with the uio_iov ref. to it.
+ */
+ i = 0;
+ m = m2 = mb;
+ MALLOC(iv, struct iovec *,
+ ((NFS_MAXDATA+MLEN-1)/MLEN) * sizeof (struct iovec),
+ M_TEMP, M_WAITOK);
+ iv2 = iv;
+ while (left > 0) {
+ siz = min(M_TRAILINGSPACE(m), left);
+ if (siz > 0) {
+ m->m_len += siz;
+ iv->iov_base = bpos;
+ iv->iov_len = siz;
+ iv++;
+ i++;
+ left -= siz;
+ }
+ if (left > 0) {
+ MGET(m, M_WAIT, MT_DATA);
+ MCLGET(m, M_WAIT);
+ m->m_len = 0;
+ m2->m_next = m;
+ m2 = m;
+ bpos = mtod(m, caddr_t);
+ }
+ }
+ uiop->uio_iov = iv2;
+ uiop->uio_iovcnt = i;
+ uiop->uio_offset = off;
+ uiop->uio_resid = cnt;
+ uiop->uio_rw = UIO_READ;
+ uiop->uio_segflg = UIO_SYSSPACE;
+ error = VOP_READ(vp, uiop, IO_NODELOCKED, cred);
+ off = uiop->uio_offset;
+ FREE((caddr_t)iv2, M_TEMP);
+ if (error || (error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp))) {
+ m_freem(mreq);
+ vput(vp);
+ nfsm_reply(0);
+ }
+ } else
+ uiop->uio_resid = 0;
+ vput(vp);
+ nfsm_srvfillattr;
+ len -= uiop->uio_resid;
+ tlen = nfsm_rndup(len);
+ if (cnt != tlen || tlen != len)
+ nfsm_adj(mb, cnt-tlen, tlen-len);
+ *tl = txdr_unsigned(len);
+ nfsm_srvdone;
+}
+
+/*
+ * nfs write service
+ */
+nfsrv_write(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ register struct iovec *ivp;
+ register struct mbuf *mp;
+ register struct nfsv2_fattr *fp;
+ struct iovec iv[NFS_MAXIOVEC];
+ struct vattr va;
+ register struct vattr *vap = &va;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, rdonly, cache, siz, len, xfer;
+ int ioflags = IO_SYNC | IO_NODELOCKED;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq;
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ struct uio io, *uiop = &io;
+ off_t off;
+ u_quad_t frev;
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_dissect(tl, u_long *, 4 * NFSX_UNSIGNED);
+ if (nfsd->nd_nqlflag == NQL_NOVAL) {
+ off = (off_t)fxdr_unsigned(u_long, *++tl);
+ tl += 2;
+ } else {
+ fxdr_hyper(tl, &off);
+ tl += 2;
+ if (fxdr_unsigned(u_long, *tl++))
+ ioflags |= IO_APPEND;
+ }
+ len = fxdr_unsigned(long, *tl);
+ if (len > NFS_MAXDATA || len <= 0) {
+ error = EBADRPC;
+ nfsm_reply(0);
+ }
+ if (dpos == (mtod(md, caddr_t)+md->m_len)) {
+ mp = md->m_next;
+ if (mp == NULL) {
+ error = EBADRPC;
+ nfsm_reply(0);
+ }
+ } else {
+ mp = md;
+ siz = dpos-mtod(mp, caddr_t);
+ mp->m_len -= siz;
+ NFSMADV(mp, siz);
+ }
+ if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+ nfsm_reply(0);
+ if (vp->v_type != VREG) {
+ error = (vp->v_type == VDIR) ? EISDIR : EACCES;
+ vput(vp);
+ nfsm_reply(0);
+ }
+ nqsrv_getl(vp, NQL_WRITE);
+ if (error = nfsrv_access(vp, VWRITE, cred, rdonly, nfsd->nd_procp)) {
+ vput(vp);
+ nfsm_reply(0);
+ }
+ uiop->uio_resid = 0;
+ uiop->uio_rw = UIO_WRITE;
+ uiop->uio_segflg = UIO_SYSSPACE;
+ uiop->uio_procp = (struct proc *)0;
+ /*
+ * Do up to NFS_MAXIOVEC mbufs of write each iteration of the
+ * loop until done.
+ */
+ while (len > 0 && uiop->uio_resid == 0) {
+ ivp = iv;
+ siz = 0;
+ uiop->uio_iov = ivp;
+ uiop->uio_iovcnt = 0;
+ uiop->uio_offset = off;
+ while (len > 0 && uiop->uio_iovcnt < NFS_MAXIOVEC && mp != NULL) {
+ ivp->iov_base = mtod(mp, caddr_t);
+ if (len < mp->m_len)
+ ivp->iov_len = xfer = len;
+ else
+ ivp->iov_len = xfer = mp->m_len;
+#ifdef notdef
+ /* Not Yet .. */
+ if (M_HASCL(mp) && (((u_long)ivp->iov_base) & CLOFSET) == 0)
+ ivp->iov_op = NULL; /* what should it be ?? */
+ else
+ ivp->iov_op = NULL;
+#endif
+ uiop->uio_iovcnt++;
+ ivp++;
+ len -= xfer;
+ siz += xfer;
+ mp = mp->m_next;
+ }
+ if (len > 0 && mp == NULL) {
+ error = EBADRPC;
+ vput(vp);
+ nfsm_reply(0);
+ }
+ uiop->uio_resid = siz;
+ if (error = VOP_WRITE(vp, uiop, ioflags, cred)) {
+ vput(vp);
+ nfsm_reply(0);
+ }
+ off = uiop->uio_offset;
+ }
+ error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp);
+ vput(vp);
+ nfsm_reply(NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ nfsm_srvfillattr;
+ if (nfsd->nd_nqlflag != NQL_NOVAL) {
+ nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+ txdr_hyper(&vap->va_filerev, tl);
+ }
+ nfsm_srvdone;
+}
+
+/*
+ * nfs create service
+ * now does a truncate to 0 length via. setattr if it already exists
+ */
+nfsrv_create(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ register struct nfsv2_fattr *fp;
+ struct vattr va;
+ register struct vattr *vap = &va;
+ register struct nfsv2_sattr *sp;
+ register u_long *tl;
+ struct nameidata nd;
+ register caddr_t cp;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, rdev, cache, len, tsize;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq;
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ u_quad_t frev;
+
+ nd.ni_cnd.cn_nameiop = 0;
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+ nd.ni_cnd.cn_cred = cred;
+ nd.ni_cnd.cn_nameiop = CREATE;
+ nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | SAVESTART;
+ if (error = nfs_namei(&nd, fhp, len, nfsd->nd_slp, nam, &md, &dpos,
+ nfsd->nd_procp))
+ nfsm_reply(0);
+ VATTR_NULL(vap);
+ nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_SATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ /*
+ * Iff doesn't exist, create it
+ * otherwise just truncate to 0 length
+ * should I set the mode too ??
+ */
+ if (nd.ni_vp == NULL) {
+ vap->va_type = IFTOVT(fxdr_unsigned(u_long, sp->sa_mode));
+ if (vap->va_type == VNON)
+ vap->va_type = VREG;
+ vap->va_mode = nfstov_mode(sp->sa_mode);
+ if (nfsd->nd_nqlflag == NQL_NOVAL)
+ rdev = fxdr_unsigned(long, sp->sa_nfssize);
+ else
+ rdev = fxdr_unsigned(long, sp->sa_nqrdev);
+ if (vap->va_type == VREG || vap->va_type == VSOCK) {
+ vrele(nd.ni_startdir);
+ nqsrv_getl(nd.ni_dvp, NQL_WRITE);
+ if (error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap))
+ nfsm_reply(0);
+ FREE(nd.ni_cnd.cn_pnbuf, M_NAMEI);
+ } else if (vap->va_type == VCHR || vap->va_type == VBLK ||
+ vap->va_type == VFIFO) {
+ if (vap->va_type == VCHR && rdev == 0xffffffff)
+ vap->va_type = VFIFO;
+ if (vap->va_type == VFIFO) {
+#ifndef FIFO
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ vput(nd.ni_dvp);
+ error = ENXIO;
+ goto out;
+#endif /* FIFO */
+ } else if (error = suser(cred, (u_short *)0)) {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ vput(nd.ni_dvp);
+ goto out;
+ } else
+ vap->va_rdev = (dev_t)rdev;
+ nqsrv_getl(nd.ni_dvp, NQL_WRITE);
+ if (error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap)) {
+ vrele(nd.ni_startdir);
+ nfsm_reply(0);
+ }
+ nd.ni_cnd.cn_nameiop = LOOKUP;
+ nd.ni_cnd.cn_flags &= ~(LOCKPARENT | SAVESTART);
+ nd.ni_cnd.cn_proc = nfsd->nd_procp;
+ nd.ni_cnd.cn_cred = nfsd->nd_procp->p_ucred;
+ if (error = lookup(&nd)) {
+ free(nd.ni_cnd.cn_pnbuf, M_NAMEI);
+ nfsm_reply(0);
+ }
+ FREE(nd.ni_cnd.cn_pnbuf, M_NAMEI);
+ if (nd.ni_cnd.cn_flags & ISSYMLINK) {
+ vrele(nd.ni_dvp);
+ vput(nd.ni_vp);
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ error = EINVAL;
+ nfsm_reply(0);
+ }
+ } else {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ vput(nd.ni_dvp);
+ error = ENXIO;
+ goto out;
+ }
+ vp = nd.ni_vp;
+ } else {
+ vrele(nd.ni_startdir);
+ free(nd.ni_cnd.cn_pnbuf, M_NAMEI);
+ vp = nd.ni_vp;
+ if (nd.ni_dvp == vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nfsd->nd_nqlflag == NQL_NOVAL) {
+ tsize = fxdr_unsigned(long, sp->sa_nfssize);
+ if (tsize != -1)
+ vap->va_size = (u_quad_t)tsize;
+ else
+ vap->va_size = -1;
+ } else
+ fxdr_hyper(&sp->sa_nqsize, &vap->va_size);
+ if (vap->va_size != -1) {
+ if (error = nfsrv_access(vp, VWRITE, cred,
+ (nd.ni_cnd.cn_flags & RDONLY), nfsd->nd_procp)) {
+ vput(vp);
+ nfsm_reply(0);
+ }
+ nqsrv_getl(vp, NQL_WRITE);
+ if (error = VOP_SETATTR(vp, vap, cred, nfsd->nd_procp)) {
+ vput(vp);
+ nfsm_reply(0);
+ }
+ }
+ }
+ bzero((caddr_t)fhp, sizeof(nfh));
+ fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
+ if (error = VFS_VPTOFH(vp, &fhp->fh_fid)) {
+ vput(vp);
+ nfsm_reply(0);
+ }
+ error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp);
+ vput(vp);
+ nfsm_reply(NFSX_FH+NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ nfsm_srvfhtom(fhp);
+ nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ nfsm_srvfillattr;
+ return (error);
+nfsmout:
+ if (nd.ni_cnd.cn_nameiop || nd.ni_cnd.cn_flags)
+ vrele(nd.ni_startdir);
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ if (nd.ni_vp)
+ vput(nd.ni_vp);
+ return (error);
+
+out:
+ vrele(nd.ni_startdir);
+ free(nd.ni_cnd.cn_pnbuf, M_NAMEI);
+ nfsm_reply(0);
+}
+
+/*
+ * nfs remove service
+ */
+nfsrv_remove(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ struct nameidata nd;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, cache, len;
+ char *cp2;
+ struct mbuf *mb, *mreq;
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ u_quad_t frev;
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+ nd.ni_cnd.cn_cred = cred;
+ nd.ni_cnd.cn_nameiop = DELETE;
+ nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
+ if (error = nfs_namei(&nd, fhp, len, nfsd->nd_slp, nam, &md, &dpos,
+ nfsd->nd_procp))
+ nfsm_reply(0);
+ vp = nd.ni_vp;
+ if (vp->v_type == VDIR &&
+ (error = suser(cred, (u_short *)0)))
+ goto out;
+ /*
+ * The root of a mounted filesystem cannot be deleted.
+ */
+ if (vp->v_flag & VROOT) {
+ error = EBUSY;
+ goto out;
+ }
+ if (vp->v_flag & VTEXT)
+ (void) vnode_pager_uncache(vp);
+out:
+ if (!error) {
+ nqsrv_getl(nd.ni_dvp, NQL_WRITE);
+ nqsrv_getl(vp, NQL_WRITE);
+ error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+ } else {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vput(vp);
+ }
+ nfsm_reply(0);
+ nfsm_srvdone;
+}
+
+/*
+ * nfs rename service
+ */
+nfsrv_rename(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, cache, len, len2;
+ char *cp2;
+ struct mbuf *mb, *mreq;
+ struct nameidata fromnd, tond;
+ struct vnode *fvp, *tvp, *tdvp;
+ nfsv2fh_t fnfh, tnfh;
+ fhandle_t *ffhp, *tfhp;
+ u_quad_t frev;
+ uid_t saved_uid;
+
+ ffhp = &fnfh.fh_generic;
+ tfhp = &tnfh.fh_generic;
+ fromnd.ni_cnd.cn_nameiop = 0;
+ tond.ni_cnd.cn_nameiop = 0;
+ nfsm_srvmtofh(ffhp);
+ nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+ /*
+ * Remember our original uid so that we can reset cr_uid before
+ * the second nfs_namei() call, in case it is remapped.
+ */
+ saved_uid = cred->cr_uid;
+ fromnd.ni_cnd.cn_cred = cred;
+ fromnd.ni_cnd.cn_nameiop = DELETE;
+ fromnd.ni_cnd.cn_flags = WANTPARENT | SAVESTART;
+ if (error = nfs_namei(&fromnd, ffhp, len, nfsd->nd_slp, nam, &md,
+ &dpos, nfsd->nd_procp))
+ nfsm_reply(0);
+ fvp = fromnd.ni_vp;
+ nfsm_srvmtofh(tfhp);
+ nfsm_strsiz(len2, NFS_MAXNAMLEN);
+ cred->cr_uid = saved_uid;
+ tond.ni_cnd.cn_cred = cred;
+ tond.ni_cnd.cn_nameiop = RENAME;
+ tond.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART;
+ if (error = nfs_namei(&tond, tfhp, len2, nfsd->nd_slp, nam, &md,
+ &dpos, nfsd->nd_procp)) {
+ VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
+ vrele(fromnd.ni_dvp);
+ vrele(fvp);
+ goto out1;
+ }
+ tdvp = tond.ni_dvp;
+ tvp = tond.ni_vp;
+ if (tvp != NULL) {
+ if (fvp->v_type == VDIR && tvp->v_type != VDIR) {
+ error = EISDIR;
+ goto out;
+ } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) {
+ error = ENOTDIR;
+ goto out;
+ }
+ if (tvp->v_type == VDIR && tvp->v_mountedhere) {
+ error = EXDEV;
+ goto out;
+ }
+ }
+ if (fvp->v_type == VDIR && fvp->v_mountedhere) {
+ error = EBUSY;
+ goto out;
+ }
+ if (fvp->v_mount != tdvp->v_mount) {
+ error = EXDEV;
+ goto out;
+ }
+ if (fvp == tdvp)
+ error = EINVAL;
+ /*
+ * If source is the same as the destination (that is the
+ * same vnode with the same name in the same directory),
+ * then there is nothing to do.
+ */
+ if (fvp == tvp && fromnd.ni_dvp == tdvp &&
+ fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen &&
+ !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr,
+ fromnd.ni_cnd.cn_namelen))
+ error = -1;
+out:
+ if (!error) {
+ nqsrv_getl(fromnd.ni_dvp, NQL_WRITE);
+ nqsrv_getl(tdvp, NQL_WRITE);
+ if (tvp)
+ nqsrv_getl(tvp, NQL_WRITE);
+ error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd,
+ tond.ni_dvp, tond.ni_vp, &tond.ni_cnd);
+ } else {
+ VOP_ABORTOP(tond.ni_dvp, &tond.ni_cnd);
+ if (tdvp == tvp)
+ vrele(tdvp);
+ else
+ vput(tdvp);
+ if (tvp)
+ vput(tvp);
+ VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
+ vrele(fromnd.ni_dvp);
+ vrele(fvp);
+ }
+ vrele(tond.ni_startdir);
+ FREE(tond.ni_cnd.cn_pnbuf, M_NAMEI);
+out1:
+ vrele(fromnd.ni_startdir);
+ FREE(fromnd.ni_cnd.cn_pnbuf, M_NAMEI);
+ nfsm_reply(0);
+ return (error);
+
+nfsmout:
+ if (tond.ni_cnd.cn_nameiop || tond.ni_cnd.cn_flags) {
+ vrele(tond.ni_startdir);
+ FREE(tond.ni_cnd.cn_pnbuf, M_NAMEI);
+ }
+ if (fromnd.ni_cnd.cn_nameiop || fromnd.ni_cnd.cn_flags) {
+ vrele(fromnd.ni_startdir);
+ FREE(fromnd.ni_cnd.cn_pnbuf, M_NAMEI);
+ VOP_ABORTOP(fromnd.ni_dvp, &fromnd.ni_cnd);
+ vrele(fromnd.ni_dvp);
+ vrele(fvp);
+ }
+ return (error);
+}
+
+/*
+ * nfs link service
+ */
+nfsrv_link(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ struct nameidata nd;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, rdonly, cache, len;
+ char *cp2;
+ struct mbuf *mb, *mreq;
+ struct vnode *vp, *xp;
+ nfsv2fh_t nfh, dnfh;
+ fhandle_t *fhp, *dfhp;
+ u_quad_t frev;
+
+ fhp = &nfh.fh_generic;
+ dfhp = &dnfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_srvmtofh(dfhp);
+ nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+ if (error = nfsrv_fhtovp(fhp, FALSE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+ nfsm_reply(0);
+ if (vp->v_type == VDIR && (error = suser(cred, (u_short *)0)))
+ goto out1;
+ nd.ni_cnd.cn_cred = cred;
+ nd.ni_cnd.cn_nameiop = CREATE;
+ nd.ni_cnd.cn_flags = LOCKPARENT;
+ if (error = nfs_namei(&nd, dfhp, len, nfsd->nd_slp, nam, &md, &dpos,
+ nfsd->nd_procp))
+ goto out1;
+ xp = nd.ni_vp;
+ if (xp != NULL) {
+ error = EEXIST;
+ goto out;
+ }
+ xp = nd.ni_dvp;
+ if (vp->v_mount != xp->v_mount)
+ error = EXDEV;
+out:
+ if (!error) {
+ nqsrv_getl(vp, NQL_WRITE);
+ nqsrv_getl(xp, NQL_WRITE);
+ error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd);
+ } else {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ if (nd.ni_vp)
+ vrele(nd.ni_vp);
+ }
+out1:
+ vrele(vp);
+ nfsm_reply(0);
+ nfsm_srvdone;
+}
+
+/*
+ * nfs symbolic link service
+ */
+nfsrv_symlink(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ struct vattr va;
+ struct nameidata nd;
+ register struct vattr *vap = &va;
+ register u_long *tl;
+ register long t1;
+ struct nfsv2_sattr *sp;
+ caddr_t bpos;
+ struct uio io;
+ struct iovec iv;
+ int error = 0, cache, len, len2;
+ char *pathcp, *cp2;
+ struct mbuf *mb, *mreq;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ u_quad_t frev;
+
+ pathcp = (char *)0;
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+ nd.ni_cnd.cn_cred = cred;
+ nd.ni_cnd.cn_nameiop = CREATE;
+ nd.ni_cnd.cn_flags = LOCKPARENT;
+ if (error = nfs_namei(&nd, fhp, len, nfsd->nd_slp, nam, &md, &dpos,
+ nfsd->nd_procp))
+ goto out;
+ nfsm_strsiz(len2, NFS_MAXPATHLEN);
+ MALLOC(pathcp, caddr_t, len2 + 1, M_TEMP, M_WAITOK);
+ iv.iov_base = pathcp;
+ iv.iov_len = len2;
+ io.uio_resid = len2;
+ io.uio_offset = 0;
+ io.uio_iov = &iv;
+ io.uio_iovcnt = 1;
+ io.uio_segflg = UIO_SYSSPACE;
+ io.uio_rw = UIO_READ;
+ io.uio_procp = (struct proc *)0;
+ nfsm_mtouio(&io, len2);
+ nfsm_dissect(sp, struct nfsv2_sattr *, NFSX_SATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ *(pathcp + len2) = '\0';
+ if (nd.ni_vp) {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vrele(nd.ni_vp);
+ error = EEXIST;
+ goto out;
+ }
+ VATTR_NULL(vap);
+ vap->va_mode = fxdr_unsigned(u_short, sp->sa_mode);
+ nqsrv_getl(nd.ni_dvp, NQL_WRITE);
+ error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap, pathcp);
+out:
+ if (pathcp)
+ FREE(pathcp, M_TEMP);
+ nfsm_reply(0);
+ return (error);
+nfsmout:
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ if (nd.ni_vp)
+ vrele(nd.ni_vp);
+ if (pathcp)
+ FREE(pathcp, M_TEMP);
+ return (error);
+}
+
+/*
+ * nfs mkdir service
+ */
+nfsrv_mkdir(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ struct vattr va;
+ register struct vattr *vap = &va;
+ register struct nfsv2_fattr *fp;
+ struct nameidata nd;
+ register caddr_t cp;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, cache, len;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq;
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ u_quad_t frev;
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+ nd.ni_cnd.cn_cred = cred;
+ nd.ni_cnd.cn_nameiop = CREATE;
+ nd.ni_cnd.cn_flags = LOCKPARENT;
+ if (error = nfs_namei(&nd, fhp, len, nfsd->nd_slp, nam, &md, &dpos,
+ nfsd->nd_procp))
+ nfsm_reply(0);
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ VATTR_NULL(vap);
+ vap->va_type = VDIR;
+ vap->va_mode = nfstov_mode(*tl++);
+ vp = nd.ni_vp;
+ if (vp != NULL) {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vrele(vp);
+ error = EEXIST;
+ nfsm_reply(0);
+ }
+ nqsrv_getl(nd.ni_dvp, NQL_WRITE);
+ if (error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, vap))
+ nfsm_reply(0);
+ vp = nd.ni_vp;
+ bzero((caddr_t)fhp, sizeof(nfh));
+ fhp->fh_fsid = vp->v_mount->mnt_stat.f_fsid;
+ if (error = VFS_VPTOFH(vp, &fhp->fh_fid)) {
+ vput(vp);
+ nfsm_reply(0);
+ }
+ error = VOP_GETATTR(vp, vap, cred, nfsd->nd_procp);
+ vput(vp);
+ nfsm_reply(NFSX_FH+NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ nfsm_srvfhtom(fhp);
+ nfsm_build(fp, struct nfsv2_fattr *, NFSX_FATTR(nfsd->nd_nqlflag != NQL_NOVAL));
+ nfsm_srvfillattr;
+ return (error);
+nfsmout:
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ if (nd.ni_vp)
+ vrele(nd.ni_vp);
+ return (error);
+}
+
+/*
+ * nfs rmdir service
+ */
+nfsrv_rmdir(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, cache, len;
+ char *cp2;
+ struct mbuf *mb, *mreq;
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ struct nameidata nd;
+ u_quad_t frev;
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_srvstrsiz(len, NFS_MAXNAMLEN);
+ nd.ni_cnd.cn_cred = cred;
+ nd.ni_cnd.cn_nameiop = DELETE;
+ nd.ni_cnd.cn_flags = LOCKPARENT | LOCKLEAF;
+ if (error = nfs_namei(&nd, fhp, len, nfsd->nd_slp, nam, &md, &dpos,
+ nfsd->nd_procp))
+ nfsm_reply(0);
+ vp = nd.ni_vp;
+ if (vp->v_type != VDIR) {
+ error = ENOTDIR;
+ goto out;
+ }
+ /*
+ * No rmdir "." please.
+ */
+ if (nd.ni_dvp == vp) {
+ error = EINVAL;
+ goto out;
+ }
+ /*
+ * The root of a mounted filesystem cannot be deleted.
+ */
+ if (vp->v_flag & VROOT)
+ error = EBUSY;
+out:
+ if (!error) {
+ nqsrv_getl(nd.ni_dvp, NQL_WRITE);
+ nqsrv_getl(vp, NQL_WRITE);
+ error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd);
+ } else {
+ VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
+ if (nd.ni_dvp == nd.ni_vp)
+ vrele(nd.ni_dvp);
+ else
+ vput(nd.ni_dvp);
+ vput(vp);
+ }
+ nfsm_reply(0);
+ nfsm_srvdone;
+}
+
+/*
+ * nfs readdir service
+ * - mallocs what it thinks is enough to read
+ * count rounded up to a multiple of NFS_DIRBLKSIZ <= NFS_MAXREADDIR
+ * - calls VOP_READDIR()
+ * - loops around building the reply
+ * if the output generated exceeds count break out of loop
+ * The nfsm_clget macro is used here so that the reply will be packed
+ * tightly in mbuf clusters.
+ * - it only knows that it has encountered eof when the VOP_READDIR()
+ * reads nothing
+ * - as such one readdir rpc will return eof false although you are there
+ * and then the next will return eof
+ * - it trims out records with d_fileno == 0
+ * this doesn't matter for Unix clients, but they might confuse clients
+ * for other os'.
+ * NB: It is tempting to set eof to true if the VOP_READDIR() reads less
+ * than requested, but this may not apply to all filesystems. For
+ * example, client NFS does not { although it is never remote mounted
+ * anyhow }
+ * The alternate call nqnfsrv_readdirlook() does lookups as well.
+ * PS: The NFS protocol spec. does not clarify what the "count" byte
+ * argument is a count of.. just name strings and file id's or the
+ * entire reply rpc or ...
+ * I tried just file name and id sizes and it confused the Sun client,
+ * so I am using the full rpc size now. The "paranoia.." comment refers
+ * to including the status longwords that are not a part of the dir.
+ * "entry" structures, but are in the rpc.
+ */
+struct flrep {
+ u_long fl_cachable;
+ u_long fl_duration;
+ u_long fl_frev[2];
+ nfsv2fh_t fl_nfh;
+ u_long fl_fattr[NFSX_NQFATTR / sizeof (u_long)];
+};
+
+nfsrv_readdir(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ register char *bp, *be;
+ register struct mbuf *mp;
+ register struct dirent *dp;
+ register caddr_t cp;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ struct mbuf *mb, *mb2, *mreq, *mp2;
+ char *cpos, *cend, *cp2, *rbuf;
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ struct uio io;
+ struct iovec iv;
+ int len, nlen, rem, xfer, tsiz, i, error = 0;
+ int siz, cnt, fullsiz, eofflag, rdonly, cache;
+ u_quad_t frev;
+ u_long on, off, toff;
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED);
+ toff = fxdr_unsigned(u_long, *tl++);
+ off = (toff & ~(NFS_DIRBLKSIZ-1));
+ on = (toff & (NFS_DIRBLKSIZ-1));
+ cnt = fxdr_unsigned(int, *tl);
+ siz = ((cnt+NFS_DIRBLKSIZ-1) & ~(NFS_DIRBLKSIZ-1));
+ if (cnt > NFS_MAXREADDIR)
+ siz = NFS_MAXREADDIR;
+ fullsiz = siz;
+ if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+ nfsm_reply(0);
+ nqsrv_getl(vp, NQL_READ);
+ if (error = nfsrv_access(vp, VEXEC, cred, rdonly, nfsd->nd_procp)) {
+ vput(vp);
+ nfsm_reply(0);
+ }
+ VOP_UNLOCK(vp);
+ MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
+again:
+ iv.iov_base = rbuf;
+ iv.iov_len = fullsiz;
+ io.uio_iov = &iv;
+ io.uio_iovcnt = 1;
+ io.uio_offset = (off_t)off;
+ io.uio_resid = fullsiz;
+ io.uio_segflg = UIO_SYSSPACE;
+ io.uio_rw = UIO_READ;
+ io.uio_procp = (struct proc *)0;
+ error = VOP_READDIR(vp, &io, cred);
+ off = (off_t)io.uio_offset;
+ if (error) {
+ vrele(vp);
+ free((caddr_t)rbuf, M_TEMP);
+ nfsm_reply(0);
+ }
+ if (io.uio_resid < fullsiz)
+ eofflag = 0;
+ else
+ eofflag = 1;
+ if (io.uio_resid) {
+ siz -= io.uio_resid;
+
+ /*
+ * If nothing read, return eof
+ * rpc reply
+ */
+ if (siz == 0) {
+ vrele(vp);
+ nfsm_reply(2*NFSX_UNSIGNED);
+ nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+ *tl++ = nfs_false;
+ *tl = nfs_true;
+ FREE((caddr_t)rbuf, M_TEMP);
+ return (0);
+ }
+ }
+
+ /*
+ * Check for degenerate cases of nothing useful read.
+ * If so go try again
+ */
+ cpos = rbuf + on;
+ cend = rbuf + siz;
+ dp = (struct dirent *)cpos;
+ while (cpos < cend && dp->d_fileno == 0) {
+ cpos += dp->d_reclen;
+ dp = (struct dirent *)cpos;
+ }
+ if (cpos >= cend) {
+ toff = off;
+ siz = fullsiz;
+ on = 0;
+ goto again;
+ }
+
+ cpos = rbuf + on;
+ cend = rbuf + siz;
+ dp = (struct dirent *)cpos;
+ len = 3*NFSX_UNSIGNED; /* paranoia, probably can be 0 */
+ nfsm_reply(siz);
+ mp = mp2 = mb;
+ bp = bpos;
+ be = bp + M_TRAILINGSPACE(mp);
+
+ /* Loop through the records and build reply */
+ while (cpos < cend) {
+ if (dp->d_fileno != 0) {
+ nlen = dp->d_namlen;
+ rem = nfsm_rndup(nlen)-nlen;
+ len += (4*NFSX_UNSIGNED + nlen + rem);
+ if (len > cnt) {
+ eofflag = 0;
+ break;
+ }
+ /*
+ * Build the directory record xdr from
+ * the dirent entry.
+ */
+ nfsm_clget;
+ *tl = nfs_true;
+ bp += NFSX_UNSIGNED;
+ nfsm_clget;
+ *tl = txdr_unsigned(dp->d_fileno);
+ bp += NFSX_UNSIGNED;
+ nfsm_clget;
+ *tl = txdr_unsigned(nlen);
+ bp += NFSX_UNSIGNED;
+
+ /* And loop around copying the name */
+ xfer = nlen;
+ cp = dp->d_name;
+ while (xfer > 0) {
+ nfsm_clget;
+ if ((bp+xfer) > be)
+ tsiz = be-bp;
+ else
+ tsiz = xfer;
+ bcopy(cp, bp, tsiz);
+ bp += tsiz;
+ xfer -= tsiz;
+ if (xfer > 0)
+ cp += tsiz;
+ }
+ /* And null pad to a long boundary */
+ for (i = 0; i < rem; i++)
+ *bp++ = '\0';
+ nfsm_clget;
+
+ /* Finish off the record */
+ toff += dp->d_reclen;
+ *tl = txdr_unsigned(toff);
+ bp += NFSX_UNSIGNED;
+ } else
+ toff += dp->d_reclen;
+ cpos += dp->d_reclen;
+ dp = (struct dirent *)cpos;
+ }
+ vrele(vp);
+ nfsm_clget;
+ *tl = nfs_false;
+ bp += NFSX_UNSIGNED;
+ nfsm_clget;
+ if (eofflag)
+ *tl = nfs_true;
+ else
+ *tl = nfs_false;
+ bp += NFSX_UNSIGNED;
+ if (mp != mb) {
+ if (bp < be)
+ mp->m_len = bp - mtod(mp, caddr_t);
+ } else
+ mp->m_len += bp - bpos;
+ FREE(rbuf, M_TEMP);
+ nfsm_srvdone;
+}
+
+nqnfsrv_readdirlook(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ register char *bp, *be;
+ register struct mbuf *mp;
+ register struct dirent *dp;
+ register caddr_t cp;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ struct mbuf *mb, *mb2, *mreq, *mp2;
+ char *cpos, *cend, *cp2, *rbuf;
+ struct vnode *vp, *nvp;
+ struct flrep fl;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ struct uio io;
+ struct iovec iv;
+ struct vattr va, *vap = &va;
+ struct nfsv2_fattr *fp;
+ int len, nlen, rem, xfer, tsiz, i, error = 0, duration2, cache2;
+ int siz, cnt, fullsiz, eofflag, rdonly, cache;
+ u_quad_t frev, frev2;
+ u_long on, off, toff;
+
+ fhp = &nfh.fh_generic;
+ nfsm_srvmtofh(fhp);
+ nfsm_dissect(tl, u_long *, 3*NFSX_UNSIGNED);
+ toff = fxdr_unsigned(u_long, *tl++);
+ off = (toff & ~(NFS_DIRBLKSIZ-1));
+ on = (toff & (NFS_DIRBLKSIZ-1));
+ cnt = fxdr_unsigned(int, *tl++);
+ duration2 = fxdr_unsigned(int, *tl);
+ siz = ((cnt+NFS_DIRBLKSIZ-1) & ~(NFS_DIRBLKSIZ-1));
+ if (cnt > NFS_MAXREADDIR)
+ siz = NFS_MAXREADDIR;
+ fullsiz = siz;
+ if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+ nfsm_reply(0);
+ nqsrv_getl(vp, NQL_READ);
+ if (error = nfsrv_access(vp, VEXEC, cred, rdonly, nfsd->nd_procp)) {
+ vput(vp);
+ nfsm_reply(0);
+ }
+ VOP_UNLOCK(vp);
+ MALLOC(rbuf, caddr_t, siz, M_TEMP, M_WAITOK);
+again:
+ iv.iov_base = rbuf;
+ iv.iov_len = fullsiz;
+ io.uio_iov = &iv;
+ io.uio_iovcnt = 1;
+ io.uio_offset = (off_t)off;
+ io.uio_resid = fullsiz;
+ io.uio_segflg = UIO_SYSSPACE;
+ io.uio_rw = UIO_READ;
+ io.uio_procp = (struct proc *)0;
+ error = VOP_READDIR(vp, &io, cred);
+ off = (u_long)io.uio_offset;
+ if (error) {
+ vrele(vp);
+ free((caddr_t)rbuf, M_TEMP);
+ nfsm_reply(0);
+ }
+ if (io.uio_resid < fullsiz)
+ eofflag = 0;
+ else
+ eofflag = 1;
+ if (io.uio_resid) {
+ siz -= io.uio_resid;
+
+ /*
+ * If nothing read, return eof
+ * rpc reply
+ */
+ if (siz == 0) {
+ vrele(vp);
+ nfsm_reply(2 * NFSX_UNSIGNED);
+ nfsm_build(tl, u_long *, 2 * NFSX_UNSIGNED);
+ *tl++ = nfs_false;
+ *tl = nfs_true;
+ FREE((caddr_t)rbuf, M_TEMP);
+ return (0);
+ }
+ }
+
+ /*
+ * Check for degenerate cases of nothing useful read.
+ * If so go try again
+ */
+ cpos = rbuf + on;
+ cend = rbuf + siz;
+ dp = (struct dirent *)cpos;
+ while (cpos < cend && dp->d_fileno == 0) {
+ cpos += dp->d_reclen;
+ dp = (struct dirent *)cpos;
+ }
+ if (cpos >= cend) {
+ toff = off;
+ siz = fullsiz;
+ on = 0;
+ goto again;
+ }
+
+ cpos = rbuf + on;
+ cend = rbuf + siz;
+ dp = (struct dirent *)cpos;
+ len = 3 * NFSX_UNSIGNED; /* paranoia, probably can be 0 */
+ nfsm_reply(siz);
+ mp = mp2 = mb;
+ bp = bpos;
+ be = bp + M_TRAILINGSPACE(mp);
+
+ /* Loop through the records and build reply */
+ while (cpos < cend) {
+ if (dp->d_fileno != 0) {
+ nlen = dp->d_namlen;
+ rem = nfsm_rndup(nlen)-nlen;
+
+ /*
+ * For readdir_and_lookup get the vnode using
+ * the file number.
+ */
+ if (VFS_VGET(vp->v_mount, dp->d_fileno, &nvp))
+ goto invalid;
+ bzero((caddr_t)&fl.fl_nfh, sizeof (nfsv2fh_t));
+ fl.fl_nfh.fh_generic.fh_fsid =
+ nvp->v_mount->mnt_stat.f_fsid;
+ if (VFS_VPTOFH(nvp, &fl.fl_nfh.fh_generic.fh_fid)) {
+ vput(nvp);
+ goto invalid;
+ }
+ if (duration2) {
+ (void) nqsrv_getlease(nvp, &duration2, NQL_READ,
+ nfsd, nam, &cache2, &frev2, cred);
+ fl.fl_duration = txdr_unsigned(duration2);
+ fl.fl_cachable = txdr_unsigned(cache2);
+ txdr_hyper(&frev2, fl.fl_frev);
+ } else
+ fl.fl_duration = 0;
+ if (VOP_GETATTR(nvp, vap, cred, nfsd->nd_procp)) {
+ vput(nvp);
+ goto invalid;
+ }
+ vput(nvp);
+ fp = (struct nfsv2_fattr *)&fl.fl_fattr;
+ nfsm_srvfillattr;
+ len += (4*NFSX_UNSIGNED + nlen + rem + NFSX_FH
+ + NFSX_NQFATTR);
+ if (len > cnt) {
+ eofflag = 0;
+ break;
+ }
+ /*
+ * Build the directory record xdr from
+ * the dirent entry.
+ */
+ nfsm_clget;
+ *tl = nfs_true;
+ bp += NFSX_UNSIGNED;
+
+ /*
+ * For readdir_and_lookup copy the stuff out.
+ */
+ xfer = sizeof (struct flrep);
+ cp = (caddr_t)&fl;
+ while (xfer > 0) {
+ nfsm_clget;
+ if ((bp+xfer) > be)
+ tsiz = be-bp;
+ else
+ tsiz = xfer;
+ bcopy(cp, bp, tsiz);
+ bp += tsiz;
+ xfer -= tsiz;
+ if (xfer > 0)
+ cp += tsiz;
+ }
+ nfsm_clget;
+ *tl = txdr_unsigned(dp->d_fileno);
+ bp += NFSX_UNSIGNED;
+ nfsm_clget;
+ *tl = txdr_unsigned(nlen);
+ bp += NFSX_UNSIGNED;
+
+ /* And loop around copying the name */
+ xfer = nlen;
+ cp = dp->d_name;
+ while (xfer > 0) {
+ nfsm_clget;
+ if ((bp+xfer) > be)
+ tsiz = be-bp;
+ else
+ tsiz = xfer;
+ bcopy(cp, bp, tsiz);
+ bp += tsiz;
+ xfer -= tsiz;
+ if (xfer > 0)
+ cp += tsiz;
+ }
+ /* And null pad to a long boundary */
+ for (i = 0; i < rem; i++)
+ *bp++ = '\0';
+ nfsm_clget;
+
+ /* Finish off the record */
+ toff += dp->d_reclen;
+ *tl = txdr_unsigned(toff);
+ bp += NFSX_UNSIGNED;
+ } else
+invalid:
+ toff += dp->d_reclen;
+ cpos += dp->d_reclen;
+ dp = (struct dirent *)cpos;
+ }
+ vrele(vp);
+ nfsm_clget;
+ *tl = nfs_false;
+ bp += NFSX_UNSIGNED;
+ nfsm_clget;
+ if (eofflag)
+ *tl = nfs_true;
+ else
+ *tl = nfs_false;
+ bp += NFSX_UNSIGNED;
+ if (mp != mb) {
+ if (bp < be)
+ mp->m_len = bp - mtod(mp, caddr_t);
+ } else
+ mp->m_len += bp - bpos;
+ FREE(rbuf, M_TEMP);
+ nfsm_srvdone;
+}
+
+/*
+ * nfs statfs service
+ */
+nfsrv_statfs(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ register struct statfs *sf;
+ register struct nfsv2_statfs *sfp;
+ register u_long *tl;
+ register long t1;
+ caddr_t bpos;
+ int error = 0, rdonly, cache, isnq;
+ char *cp2;
+ struct mbuf *mb, *mb2, *mreq;
+ struct vnode *vp;
+ nfsv2fh_t nfh;
+ fhandle_t *fhp;
+ struct statfs statfs;
+ u_quad_t frev;
+
+ fhp = &nfh.fh_generic;
+ isnq = (nfsd->nd_nqlflag != NQL_NOVAL);
+ nfsm_srvmtofh(fhp);
+ if (error = nfsrv_fhtovp(fhp, TRUE, &vp, cred, nfsd->nd_slp, nam, &rdonly))
+ nfsm_reply(0);
+ sf = &statfs;
+ error = VFS_STATFS(vp->v_mount, sf, nfsd->nd_procp);
+ vput(vp);
+ nfsm_reply(NFSX_STATFS(isnq));
+ nfsm_build(sfp, struct nfsv2_statfs *, NFSX_STATFS(isnq));
+ sfp->sf_tsize = txdr_unsigned(NFS_MAXDGRAMDATA);
+ sfp->sf_bsize = txdr_unsigned(sf->f_bsize);
+ sfp->sf_blocks = txdr_unsigned(sf->f_blocks);
+ sfp->sf_bfree = txdr_unsigned(sf->f_bfree);
+ sfp->sf_bavail = txdr_unsigned(sf->f_bavail);
+ if (isnq) {
+ sfp->sf_files = txdr_unsigned(sf->f_files);
+ sfp->sf_ffree = txdr_unsigned(sf->f_ffree);
+ }
+ nfsm_srvdone;
+}
+
+/*
+ * Null operation, used by clients to ping server
+ */
+/* ARGSUSED */
+nfsrv_null(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ caddr_t bpos;
+ int error = VNOVAL, cache;
+ struct mbuf *mb, *mreq;
+ u_quad_t frev;
+
+ nfsm_reply(0);
+ return (error);
+}
+
+/*
+ * No operation, used for obsolete procedures
+ */
+/* ARGSUSED */
+nfsrv_noop(nfsd, mrep, md, dpos, cred, nam, mrq)
+ struct nfsd *nfsd;
+ struct mbuf *mrep, *md;
+ caddr_t dpos;
+ struct ucred *cred;
+ struct mbuf *nam, **mrq;
+{
+ caddr_t bpos;
+ int error, cache;
+ struct mbuf *mb, *mreq;
+ u_quad_t frev;
+
+ if (nfsd->nd_repstat)
+ error = nfsd->nd_repstat;
+ else
+ error = EPROCUNAVAIL;
+ nfsm_reply(0);
+ return (error);
+}
+
+/*
+ * Perform access checking for vnodes obtained from file handles that would
+ * refer to files already opened by a Unix client. You cannot just use
+ * vn_writechk() and VOP_ACCESS() for two reasons.
+ * 1 - You must check for exported rdonly as well as MNT_RDONLY for the write case
+ * 2 - The owner is to be given access irrespective of mode bits so that
+ * processes that chmod after opening a file don't break. I don't like
+ * this because it opens a security hole, but since the nfs server opens
+ * a security hole the size of a barn door anyhow, what the heck.
+ */
+nfsrv_access(vp, flags, cred, rdonly, p)
+ register struct vnode *vp;
+ int flags;
+ register struct ucred *cred;
+ int rdonly;
+ struct proc *p;
+{
+ struct vattr vattr;
+ int error;
+ if (flags & VWRITE) {
+ /* Just vn_writechk() changed to check rdonly */
+ /*
+ * Disallow write attempts on read-only file systems;
+ * unless the file is a socket or a block or character
+ * device resident on the file system.
+ */
+ if (rdonly || (vp->v_mount->mnt_flag & MNT_RDONLY)) {
+ switch (vp->v_type) {
+ case VREG: case VDIR: case VLNK:
+ return (EROFS);
+ }
+ }
+ /*
+ * If there's shared text associated with
+ * the inode, try to free it up once. If
+ * we fail, we can't allow writing.
+ */
+ if ((vp->v_flag & VTEXT) && !vnode_pager_uncache(vp))
+ return (ETXTBSY);
+ }
+ if (error = VOP_GETATTR(vp, &vattr, cred, p))
+ return (error);
+ if ((error = VOP_ACCESS(vp, flags, cred, p)) &&
+ cred->cr_uid != vattr.va_uid)
+ return (error);
+ return (0);
+}
diff --git a/sys/nfsserver/nfs_srvcache.c b/sys/nfsserver/nfs_srvcache.c
new file mode 100644
index 0000000..63d8bb7
--- /dev/null
+++ b/sys/nfsserver/nfs_srvcache.c
@@ -0,0 +1,348 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_srvcache.c 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Reference: Chet Juszczak, "Improving the Performance and Correctness
+ * of an NFS Server", in Proc. Winter 1989 USENIX Conference,
+ * pages 53-63. San Diego, February 1989.
+ */
+#include <sys/param.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/mbuf.h>
+#include <sys/malloc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+
+#include <netinet/in.h>
+#ifdef ISO
+#include <netiso/iso.h>
+#endif
+#include <nfs/nfsm_subs.h>
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsrvcache.h>
+#include <nfs/nqnfs.h>
+
+long numnfsrvcache, desirednfsrvcache = NFSRVCACHESIZ;
+
+#define NFSRCHASH(xid) (((xid) + ((xid) >> 24)) & rheadhash)
+static struct nfsrvcache *nfsrvlruhead, **nfsrvlrutail = &nfsrvlruhead;
+static struct nfsrvcache **rheadhtbl;
+static u_long rheadhash;
+
+#define TRUE 1
+#define FALSE 0
+
+#define NETFAMILY(rp) \
+ (((rp)->rc_flag & RC_INETADDR) ? AF_INET : AF_ISO)
+
+/*
+ * Static array that defines which nfs rpc's are nonidempotent
+ */
+int nonidempotent[NFS_NPROCS] = {
+ FALSE,
+ FALSE,
+ TRUE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ TRUE,
+ TRUE,
+ TRUE,
+ TRUE,
+ TRUE,
+ TRUE,
+ TRUE,
+ TRUE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+};
+
+/* True iff the rpc reply is an nfs status ONLY! */
+static int repliesstatus[NFS_NPROCS] = {
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ TRUE,
+ TRUE,
+ TRUE,
+ TRUE,
+ FALSE,
+ TRUE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ FALSE,
+ TRUE,
+};
+
+/*
+ * Initialize the server request cache list
+ */
+nfsrv_initcache()
+{
+
+ rheadhtbl = hashinit(desirednfsrvcache, M_NFSD, &rheadhash);
+}
+
+/*
+ * Look for the request in the cache
+ * If found then
+ * return action and optionally reply
+ * else
+ * insert it in the cache
+ *
+ * The rules are as follows:
+ * - if in progress, return DROP request
+ * - if completed within DELAY of the current time, return DROP it
+ * - if completed a longer time ago return REPLY if the reply was cached or
+ * return DOIT
+ * Update/add new request at end of lru list
+ */
+nfsrv_getcache(nam, nd, repp)
+ struct mbuf *nam;
+ register struct nfsd *nd;
+ struct mbuf **repp;
+{
+ register struct nfsrvcache *rp, *rq, **rpp;
+ struct mbuf *mb;
+ struct sockaddr_in *saddr;
+ caddr_t bpos;
+ int ret;
+
+ if (nd->nd_nqlflag != NQL_NOVAL)
+ return (RC_DOIT);
+ rpp = &rheadhtbl[NFSRCHASH(nd->nd_retxid)];
+loop:
+ for (rp = *rpp; rp; rp = rp->rc_forw) {
+ if (nd->nd_retxid == rp->rc_xid && nd->nd_procnum == rp->rc_proc &&
+ netaddr_match(NETFAMILY(rp), &rp->rc_haddr, nam)) {
+ if ((rp->rc_flag & RC_LOCKED) != 0) {
+ rp->rc_flag |= RC_WANTED;
+ (void) tsleep((caddr_t)rp, PZERO-1, "nfsrc", 0);
+ goto loop;
+ }
+ rp->rc_flag |= RC_LOCKED;
+ /* If not at end of LRU chain, move it there */
+ if (rp->rc_next) {
+ /* remove from LRU chain */
+ *rp->rc_prev = rp->rc_next;
+ rp->rc_next->rc_prev = rp->rc_prev;
+ /* and replace at end of it */
+ rp->rc_next = NULL;
+ rp->rc_prev = nfsrvlrutail;
+ *nfsrvlrutail = rp;
+ nfsrvlrutail = &rp->rc_next;
+ }
+ if (rp->rc_state == RC_UNUSED)
+ panic("nfsrv cache");
+ if (rp->rc_state == RC_INPROG) {
+ nfsstats.srvcache_inproghits++;
+ ret = RC_DROPIT;
+ } else if (rp->rc_flag & RC_REPSTATUS) {
+ nfsstats.srvcache_nonidemdonehits++;
+ nfs_rephead(0, nd, rp->rc_status,
+ 0, (u_quad_t *)0, repp, &mb, &bpos);
+ ret = RC_REPLY;
+ } else if (rp->rc_flag & RC_REPMBUF) {
+ nfsstats.srvcache_nonidemdonehits++;
+ *repp = m_copym(rp->rc_reply, 0, M_COPYALL,
+ M_WAIT);
+ ret = RC_REPLY;
+ } else {
+ nfsstats.srvcache_idemdonehits++;
+ rp->rc_state = RC_INPROG;
+ ret = RC_DOIT;
+ }
+ rp->rc_flag &= ~RC_LOCKED;
+ if (rp->rc_flag & RC_WANTED) {
+ rp->rc_flag &= ~RC_WANTED;
+ wakeup((caddr_t)rp);
+ }
+ return (ret);
+ }
+ }
+ nfsstats.srvcache_misses++;
+ if (numnfsrvcache < desirednfsrvcache) {
+ rp = (struct nfsrvcache *)malloc((u_long)sizeof *rp,
+ M_NFSD, M_WAITOK);
+ bzero((char *)rp, sizeof *rp);
+ numnfsrvcache++;
+ rp->rc_flag = RC_LOCKED;
+ } else {
+ rp = nfsrvlruhead;
+ while ((rp->rc_flag & RC_LOCKED) != 0) {
+ rp->rc_flag |= RC_WANTED;
+ (void) tsleep((caddr_t)rp, PZERO-1, "nfsrc", 0);
+ rp = nfsrvlruhead;
+ }
+ rp->rc_flag |= RC_LOCKED;
+ /* remove from hash chain */
+ if (rq = rp->rc_forw)
+ rq->rc_back = rp->rc_back;
+ *rp->rc_back = rq;
+ /* remove from LRU chain */
+ *rp->rc_prev = rp->rc_next;
+ rp->rc_next->rc_prev = rp->rc_prev;
+ if (rp->rc_flag & RC_REPMBUF)
+ m_freem(rp->rc_reply);
+ if (rp->rc_flag & RC_NAM)
+ MFREE(rp->rc_nam, mb);
+ rp->rc_flag &= (RC_LOCKED | RC_WANTED);
+ }
+ /* place at end of LRU list */
+ rp->rc_next = NULL;
+ rp->rc_prev = nfsrvlrutail;
+ *nfsrvlrutail = rp;
+ nfsrvlrutail = &rp->rc_next;
+ rp->rc_state = RC_INPROG;
+ rp->rc_xid = nd->nd_retxid;
+ saddr = mtod(nam, struct sockaddr_in *);
+ switch (saddr->sin_family) {
+ case AF_INET:
+ rp->rc_flag |= RC_INETADDR;
+ rp->rc_inetaddr = saddr->sin_addr.s_addr;
+ break;
+ case AF_ISO:
+ default:
+ rp->rc_flag |= RC_NAM;
+ rp->rc_nam = m_copym(nam, 0, M_COPYALL, M_WAIT);
+ break;
+ };
+ rp->rc_proc = nd->nd_procnum;
+ /* insert into hash chain */
+ if (rq = *rpp)
+ rq->rc_back = &rp->rc_forw;
+ rp->rc_forw = rq;
+ rp->rc_back = rpp;
+ *rpp = rp;
+ rp->rc_flag &= ~RC_LOCKED;
+ if (rp->rc_flag & RC_WANTED) {
+ rp->rc_flag &= ~RC_WANTED;
+ wakeup((caddr_t)rp);
+ }
+ return (RC_DOIT);
+}
+
+/*
+ * Update a request cache entry after the rpc has been done
+ */
+void
+nfsrv_updatecache(nam, nd, repvalid, repmbuf)
+ struct mbuf *nam;
+ register struct nfsd *nd;
+ int repvalid;
+ struct mbuf *repmbuf;
+{
+ register struct nfsrvcache *rp;
+
+ if (nd->nd_nqlflag != NQL_NOVAL)
+ return;
+loop:
+ for (rp = rheadhtbl[NFSRCHASH(nd->nd_retxid)]; rp; rp = rp->rc_forw) {
+ if (nd->nd_retxid == rp->rc_xid && nd->nd_procnum == rp->rc_proc &&
+ netaddr_match(NETFAMILY(rp), &rp->rc_haddr, nam)) {
+ if ((rp->rc_flag & RC_LOCKED) != 0) {
+ rp->rc_flag |= RC_WANTED;
+ (void) tsleep((caddr_t)rp, PZERO-1, "nfsrc", 0);
+ goto loop;
+ }
+ rp->rc_flag |= RC_LOCKED;
+ rp->rc_state = RC_DONE;
+ /*
+ * If we have a valid reply update status and save
+ * the reply for non-idempotent rpc's.
+ */
+ if (repvalid && nonidempotent[nd->nd_procnum]) {
+ if (repliesstatus[nd->nd_procnum]) {
+ rp->rc_status = nd->nd_repstat;
+ rp->rc_flag |= RC_REPSTATUS;
+ } else {
+ rp->rc_reply = m_copym(repmbuf,
+ 0, M_COPYALL, M_WAIT);
+ rp->rc_flag |= RC_REPMBUF;
+ }
+ }
+ rp->rc_flag &= ~RC_LOCKED;
+ if (rp->rc_flag & RC_WANTED) {
+ rp->rc_flag &= ~RC_WANTED;
+ wakeup((caddr_t)rp);
+ }
+ return;
+ }
+ }
+}
+
+/*
+ * Clean out the cache. Called when the last nfsd terminates.
+ */
+void
+nfsrv_cleancache()
+{
+ register struct nfsrvcache *rp, *nextrp;
+
+ for (rp = nfsrvlruhead; rp; rp = nextrp) {
+ nextrp = rp->rc_next;
+ free(rp, M_NFSD);
+ }
+ bzero((char *)rheadhtbl, (rheadhash + 1) * sizeof(void *));
+ nfsrvlruhead = NULL;
+ nfsrvlrutail = &nfsrvlruhead;
+ numnfsrvcache = 0;
+}
diff --git a/sys/nfsserver/nfs_srvsock.c b/sys/nfsserver/nfs_srvsock.c
new file mode 100644
index 0000000..cf88ed3
--- /dev/null
+++ b/sys/nfsserver/nfs_srvsock.c
@@ -0,0 +1,1990 @@
+/*
+ * Copyright (c) 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_socket.c 8.3 (Berkeley) 1/12/94
+ */
+
+/*
+ * Socket operations for use by nfs
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/proc.h>
+#include <sys/mount.h>
+#include <sys/kernel.h>
+#include <sys/mbuf.h>
+#include <sys/vnode.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/syslog.h>
+#include <sys/tprintf.h>
+
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfsrtt.h>
+#include <nfs/nqnfs.h>
+
+#define TRUE 1
+#define FALSE 0
+
+/*
+ * Estimate rto for an nfs rpc sent via. an unreliable datagram.
+ * Use the mean and mean deviation of rtt for the appropriate type of rpc
+ * for the frequent rpcs and a default for the others.
+ * The justification for doing "other" this way is that these rpcs
+ * happen so infrequently that timer est. would probably be stale.
+ * Also, since many of these rpcs are
+ * non-idempotent, a conservative timeout is desired.
+ * getattr, lookup - A+2D
+ * read, write - A+4D
+ * other - nm_timeo
+ */
+#define NFS_RTO(n, t) \
+ ((t) == 0 ? (n)->nm_timeo : \
+ ((t) < 3 ? \
+ (((((n)->nm_srtt[t-1] + 3) >> 2) + (n)->nm_sdrtt[t-1] + 1) >> 1) : \
+ ((((n)->nm_srtt[t-1] + 7) >> 3) + (n)->nm_sdrtt[t-1] + 1)))
+#define NFS_SRTT(r) (r)->r_nmp->nm_srtt[proct[(r)->r_procnum] - 1]
+#define NFS_SDRTT(r) (r)->r_nmp->nm_sdrtt[proct[(r)->r_procnum] - 1]
+/*
+ * External data, mostly RPC constants in XDR form
+ */
+extern u_long rpc_reply, rpc_msgdenied, rpc_mismatch, rpc_vers, rpc_auth_unix,
+ rpc_msgaccepted, rpc_call, rpc_autherr, rpc_rejectedcred,
+ rpc_auth_kerb;
+extern u_long nfs_prog, nfs_vers, nqnfs_prog, nqnfs_vers;
+extern time_t nqnfsstarttime;
+extern int nonidempotent[NFS_NPROCS];
+
+/*
+ * Maps errno values to nfs error numbers.
+ * Use NFSERR_IO as the catch all for ones not specifically defined in
+ * RFC 1094.
+ */
+static int nfsrv_errmap[ELAST] = {
+ NFSERR_PERM, NFSERR_NOENT, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_NXIO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_ACCES, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_EXIST, NFSERR_IO, NFSERR_NODEV, NFSERR_NOTDIR,
+ NFSERR_ISDIR, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_FBIG, NFSERR_NOSPC, NFSERR_IO, NFSERR_ROFS,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_NAMETOL, NFSERR_IO, NFSERR_IO,
+ NFSERR_NOTEMPTY, NFSERR_IO, NFSERR_IO, NFSERR_DQUOT, NFSERR_STALE,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO, NFSERR_IO,
+ NFSERR_IO,
+};
+
+/*
+ * Defines which timer to use for the procnum.
+ * 0 - default
+ * 1 - getattr
+ * 2 - lookup
+ * 3 - read
+ * 4 - write
+ */
+static int proct[NFS_NPROCS] = {
+ 0, 1, 0, 0, 2, 3, 3, 0, 4, 0, 0, 0, 0, 0, 0, 0, 3, 0, 3, 0, 0, 0, 0,
+};
+
+/*
+ * There is a congestion window for outstanding rpcs maintained per mount
+ * point. The cwnd size is adjusted in roughly the way that:
+ * Van Jacobson, Congestion avoidance and Control, In "Proceedings of
+ * SIGCOMM '88". ACM, August 1988.
+ * describes for TCP. The cwnd size is chopped in half on a retransmit timeout
+ * and incremented by 1/cwnd when each rpc reply is received and a full cwnd
+ * of rpcs is in progress.
+ * (The sent count and cwnd are scaled for integer arith.)
+ * Variants of "slow start" were tried and were found to be too much of a
+ * performance hit (ave. rtt 3 times larger),
+ * I suspect due to the large rtt that nfs rpcs have.
+ */
+#define NFS_CWNDSCALE 256
+#define NFS_MAXCWND (NFS_CWNDSCALE * 32)
+static int nfs_backoff[8] = { 2, 4, 8, 16, 32, 64, 128, 256, };
+int nfs_sbwait();
+void nfs_disconnect(), nfs_realign(), nfsrv_wakenfsd(), nfs_sndunlock();
+void nfs_rcvunlock(), nqnfs_serverd(), nqnfs_clientlease();
+struct mbuf *nfsm_rpchead();
+int nfsrtton = 0;
+struct nfsrtt nfsrtt;
+struct nfsd nfsd_head;
+
+int nfsrv_null(),
+ nfsrv_getattr(),
+ nfsrv_setattr(),
+ nfsrv_lookup(),
+ nfsrv_readlink(),
+ nfsrv_read(),
+ nfsrv_write(),
+ nfsrv_create(),
+ nfsrv_remove(),
+ nfsrv_rename(),
+ nfsrv_link(),
+ nfsrv_symlink(),
+ nfsrv_mkdir(),
+ nfsrv_rmdir(),
+ nfsrv_readdir(),
+ nfsrv_statfs(),
+ nfsrv_noop(),
+ nqnfsrv_readdirlook(),
+ nqnfsrv_getlease(),
+ nqnfsrv_vacated(),
+ nqnfsrv_access();
+
+int (*nfsrv_procs[NFS_NPROCS])() = {
+ nfsrv_null,
+ nfsrv_getattr,
+ nfsrv_setattr,
+ nfsrv_noop,
+ nfsrv_lookup,
+ nfsrv_readlink,
+ nfsrv_read,
+ nfsrv_noop,
+ nfsrv_write,
+ nfsrv_create,
+ nfsrv_remove,
+ nfsrv_rename,
+ nfsrv_link,
+ nfsrv_symlink,
+ nfsrv_mkdir,
+ nfsrv_rmdir,
+ nfsrv_readdir,
+ nfsrv_statfs,
+ nqnfsrv_readdirlook,
+ nqnfsrv_getlease,
+ nqnfsrv_vacated,
+ nfsrv_noop,
+ nqnfsrv_access,
+};
+
+struct nfsreq nfsreqh;
+
+/*
+ * Initialize sockets and congestion for a new NFS connection.
+ * We do not free the sockaddr if error.
+ */
+nfs_connect(nmp, rep)
+ register struct nfsmount *nmp;
+ struct nfsreq *rep;
+{
+ register struct socket *so;
+ int s, error, rcvreserve, sndreserve;
+ struct sockaddr *saddr;
+ struct sockaddr_in *sin;
+ struct mbuf *m;
+ u_short tport;
+
+ nmp->nm_so = (struct socket *)0;
+ saddr = mtod(nmp->nm_nam, struct sockaddr *);
+ if (error = socreate(saddr->sa_family,
+ &nmp->nm_so, nmp->nm_sotype, nmp->nm_soproto))
+ goto bad;
+ so = nmp->nm_so;
+ nmp->nm_soflags = so->so_proto->pr_flags;
+
+ /*
+ * Some servers require that the client port be a reserved port number.
+ */
+ if (saddr->sa_family == AF_INET && (nmp->nm_flag & NFSMNT_RESVPORT)) {
+ MGET(m, M_WAIT, MT_SONAME);
+ sin = mtod(m, struct sockaddr_in *);
+ sin->sin_len = m->m_len = sizeof (struct sockaddr_in);
+ sin->sin_family = AF_INET;
+ sin->sin_addr.s_addr = INADDR_ANY;
+ tport = IPPORT_RESERVED - 1;
+ sin->sin_port = htons(tport);
+ while ((error = sobind(so, m)) == EADDRINUSE &&
+ --tport > IPPORT_RESERVED / 2)
+ sin->sin_port = htons(tport);
+ m_freem(m);
+ if (error)
+ goto bad;
+ }
+
+ /*
+ * Protocols that do not require connections may be optionally left
+ * unconnected for servers that reply from a port other than NFS_PORT.
+ */
+ if (nmp->nm_flag & NFSMNT_NOCONN) {
+ if (nmp->nm_soflags & PR_CONNREQUIRED) {
+ error = ENOTCONN;
+ goto bad;
+ }
+ } else {
+ if (error = soconnect(so, nmp->nm_nam))
+ goto bad;
+
+ /*
+ * Wait for the connection to complete. Cribbed from the
+ * connect system call but with the wait timing out so
+ * that interruptible mounts don't hang here for a long time.
+ */
+ s = splnet();
+ while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
+ (void) tsleep((caddr_t)&so->so_timeo, PSOCK,
+ "nfscon", 2 * hz);
+ if ((so->so_state & SS_ISCONNECTING) &&
+ so->so_error == 0 && rep &&
+ (error = nfs_sigintr(nmp, rep, rep->r_procp))) {
+ so->so_state &= ~SS_ISCONNECTING;
+ splx(s);
+ goto bad;
+ }
+ }
+ if (so->so_error) {
+ error = so->so_error;
+ so->so_error = 0;
+ splx(s);
+ goto bad;
+ }
+ splx(s);
+ }
+ if (nmp->nm_flag & (NFSMNT_SOFT | NFSMNT_INT)) {
+ so->so_rcv.sb_timeo = (5 * hz);
+ so->so_snd.sb_timeo = (5 * hz);
+ } else {
+ so->so_rcv.sb_timeo = 0;
+ so->so_snd.sb_timeo = 0;
+ }
+ if (nmp->nm_sotype == SOCK_DGRAM) {
+ sndreserve = nmp->nm_wsize + NFS_MAXPKTHDR;
+ rcvreserve = nmp->nm_rsize + NFS_MAXPKTHDR;
+ } else if (nmp->nm_sotype == SOCK_SEQPACKET) {
+ sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR) * 2;
+ rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR) * 2;
+ } else {
+ if (nmp->nm_sotype != SOCK_STREAM)
+ panic("nfscon sotype");
+ if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
+ MGET(m, M_WAIT, MT_SOOPTS);
+ *mtod(m, int *) = 1;
+ m->m_len = sizeof(int);
+ sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
+ }
+ if (so->so_proto->pr_protocol == IPPROTO_TCP) {
+ MGET(m, M_WAIT, MT_SOOPTS);
+ *mtod(m, int *) = 1;
+ m->m_len = sizeof(int);
+ sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
+ }
+ sndreserve = (nmp->nm_wsize + NFS_MAXPKTHDR + sizeof (u_long))
+ * 2;
+ rcvreserve = (nmp->nm_rsize + NFS_MAXPKTHDR + sizeof (u_long))
+ * 2;
+ }
+ if (error = soreserve(so, sndreserve, rcvreserve))
+ goto bad;
+ so->so_rcv.sb_flags |= SB_NOINTR;
+ so->so_snd.sb_flags |= SB_NOINTR;
+
+ /* Initialize other non-zero congestion variables */
+ nmp->nm_srtt[0] = nmp->nm_srtt[1] = nmp->nm_srtt[2] = nmp->nm_srtt[3] =
+ nmp->nm_srtt[4] = (NFS_TIMEO << 3);
+ nmp->nm_sdrtt[0] = nmp->nm_sdrtt[1] = nmp->nm_sdrtt[2] =
+ nmp->nm_sdrtt[3] = nmp->nm_sdrtt[4] = 0;
+ nmp->nm_cwnd = NFS_MAXCWND / 2; /* Initial send window */
+ nmp->nm_sent = 0;
+ nmp->nm_timeouts = 0;
+ return (0);
+
+bad:
+ nfs_disconnect(nmp);
+ return (error);
+}
+
+/*
+ * Reconnect routine:
+ * Called when a connection is broken on a reliable protocol.
+ * - clean up the old socket
+ * - nfs_connect() again
+ * - set R_MUSTRESEND for all outstanding requests on mount point
+ * If this fails the mount point is DEAD!
+ * nb: Must be called with the nfs_sndlock() set on the mount point.
+ */
+nfs_reconnect(rep)
+ register struct nfsreq *rep;
+{
+ register struct nfsreq *rp;
+ register struct nfsmount *nmp = rep->r_nmp;
+ int error;
+
+ nfs_disconnect(nmp);
+ while (error = nfs_connect(nmp, rep)) {
+ if (error == EINTR || error == ERESTART)
+ return (EINTR);
+ (void) tsleep((caddr_t)&lbolt, PSOCK, "nfscon", 0);
+ }
+
+ /*
+ * Loop through outstanding request list and fix up all requests
+ * on old socket.
+ */
+ rp = nfsreqh.r_next;
+ while (rp != &nfsreqh) {
+ if (rp->r_nmp == nmp)
+ rp->r_flags |= R_MUSTRESEND;
+ rp = rp->r_next;
+ }
+ return (0);
+}
+
+/*
+ * NFS disconnect. Clean up and unlink.
+ */
+void
+nfs_disconnect(nmp)
+ register struct nfsmount *nmp;
+{
+ register struct socket *so;
+
+ if (nmp->nm_so) {
+ so = nmp->nm_so;
+ nmp->nm_so = (struct socket *)0;
+ soshutdown(so, 2);
+ soclose(so);
+ }
+}
+
+/*
+ * This is the nfs send routine. For connection based socket types, it
+ * must be called with an nfs_sndlock() on the socket.
+ * "rep == NULL" indicates that it has been called from a server.
+ * For the client side:
+ * - return EINTR if the RPC is terminated, 0 otherwise
+ * - set R_MUSTRESEND if the send fails for any reason
+ * - do any cleanup required by recoverable socket errors (???)
+ * For the server side:
+ * - return EINTR or ERESTART if interrupted by a signal
+ * - return EPIPE if a connection is lost for connection based sockets (TCP...)
+ * - do any cleanup required by recoverable socket errors (???)
+ */
+nfs_send(so, nam, top, rep)
+ register struct socket *so;
+ struct mbuf *nam;
+ register struct mbuf *top;
+ struct nfsreq *rep;
+{
+ struct mbuf *sendnam;
+ int error, soflags, flags;
+
+ if (rep) {
+ if (rep->r_flags & R_SOFTTERM) {
+ m_freem(top);
+ return (EINTR);
+ }
+ if ((so = rep->r_nmp->nm_so) == NULL) {
+ rep->r_flags |= R_MUSTRESEND;
+ m_freem(top);
+ return (0);
+ }
+ rep->r_flags &= ~R_MUSTRESEND;
+ soflags = rep->r_nmp->nm_soflags;
+ } else
+ soflags = so->so_proto->pr_flags;
+ if ((soflags & PR_CONNREQUIRED) || (so->so_state & SS_ISCONNECTED))
+ sendnam = (struct mbuf *)0;
+ else
+ sendnam = nam;
+ if (so->so_type == SOCK_SEQPACKET)
+ flags = MSG_EOR;
+ else
+ flags = 0;
+
+ error = sosend(so, sendnam, (struct uio *)0, top,
+ (struct mbuf *)0, flags);
+ if (error) {
+ if (rep) {
+ log(LOG_INFO, "nfs send error %d for server %s\n",error,
+ rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+ /*
+ * Deal with errors for the client side.
+ */
+ if (rep->r_flags & R_SOFTTERM)
+ error = EINTR;
+ else
+ rep->r_flags |= R_MUSTRESEND;
+ } else
+ log(LOG_INFO, "nfsd send error %d\n", error);
+
+ /*
+ * Handle any recoverable (soft) socket errors here. (???)
+ */
+ if (error != EINTR && error != ERESTART &&
+ error != EWOULDBLOCK && error != EPIPE)
+ error = 0;
+ }
+ return (error);
+}
+
+/*
+ * Receive a Sun RPC Request/Reply. For SOCK_DGRAM, the work is all
+ * done by soreceive(), but for SOCK_STREAM we must deal with the Record
+ * Mark and consolidate the data into a new mbuf list.
+ * nb: Sometimes TCP passes the data up to soreceive() in long lists of
+ * small mbufs.
+ * For SOCK_STREAM we must be very careful to read an entire record once
+ * we have read any of it, even if the system call has been interrupted.
+ */
+nfs_receive(rep, aname, mp)
+ register struct nfsreq *rep;
+ struct mbuf **aname;
+ struct mbuf **mp;
+{
+ register struct socket *so;
+ struct uio auio;
+ struct iovec aio;
+ register struct mbuf *m;
+ struct mbuf *control;
+ u_long len;
+ struct mbuf **getnam;
+ int error, sotype, rcvflg;
+ struct proc *p = curproc; /* XXX */
+
+ /*
+ * Set up arguments for soreceive()
+ */
+ *mp = (struct mbuf *)0;
+ *aname = (struct mbuf *)0;
+ sotype = rep->r_nmp->nm_sotype;
+
+ /*
+ * For reliable protocols, lock against other senders/receivers
+ * in case a reconnect is necessary.
+ * For SOCK_STREAM, first get the Record Mark to find out how much
+ * more there is to get.
+ * We must lock the socket against other receivers
+ * until we have an entire rpc request/reply.
+ */
+ if (sotype != SOCK_DGRAM) {
+ if (error = nfs_sndlock(&rep->r_nmp->nm_flag, rep))
+ return (error);
+tryagain:
+ /*
+ * Check for fatal errors and resending request.
+ */
+ /*
+ * Ugh: If a reconnect attempt just happened, nm_so
+ * would have changed. NULL indicates a failed
+ * attempt that has essentially shut down this
+ * mount point.
+ */
+ if (rep->r_mrep || (rep->r_flags & R_SOFTTERM)) {
+ nfs_sndunlock(&rep->r_nmp->nm_flag);
+ return (EINTR);
+ }
+ if ((so = rep->r_nmp->nm_so) == NULL) {
+ if (error = nfs_reconnect(rep)) {
+ nfs_sndunlock(&rep->r_nmp->nm_flag);
+ return (error);
+ }
+ goto tryagain;
+ }
+ while (rep->r_flags & R_MUSTRESEND) {
+ m = m_copym(rep->r_mreq, 0, M_COPYALL, M_WAIT);
+ nfsstats.rpcretries++;
+ if (error = nfs_send(so, rep->r_nmp->nm_nam, m, rep)) {
+ if (error == EINTR || error == ERESTART ||
+ (error = nfs_reconnect(rep))) {
+ nfs_sndunlock(&rep->r_nmp->nm_flag);
+ return (error);
+ }
+ goto tryagain;
+ }
+ }
+ nfs_sndunlock(&rep->r_nmp->nm_flag);
+ if (sotype == SOCK_STREAM) {
+ aio.iov_base = (caddr_t) &len;
+ aio.iov_len = sizeof(u_long);
+ auio.uio_iov = &aio;
+ auio.uio_iovcnt = 1;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_rw = UIO_READ;
+ auio.uio_offset = 0;
+ auio.uio_resid = sizeof(u_long);
+ auio.uio_procp = p;
+ do {
+ rcvflg = MSG_WAITALL;
+ error = soreceive(so, (struct mbuf **)0, &auio,
+ (struct mbuf **)0, (struct mbuf **)0, &rcvflg);
+ if (error == EWOULDBLOCK && rep) {
+ if (rep->r_flags & R_SOFTTERM)
+ return (EINTR);
+ }
+ } while (error == EWOULDBLOCK);
+ if (!error && auio.uio_resid > 0) {
+ log(LOG_INFO,
+ "short receive (%d/%d) from nfs server %s\n",
+ sizeof(u_long) - auio.uio_resid,
+ sizeof(u_long),
+ rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+ error = EPIPE;
+ }
+ if (error)
+ goto errout;
+ len = ntohl(len) & ~0x80000000;
+ /*
+ * This is SERIOUS! We are out of sync with the sender
+ * and forcing a disconnect/reconnect is all I can do.
+ */
+ if (len > NFS_MAXPACKET) {
+ log(LOG_ERR, "%s (%d) from nfs server %s\n",
+ "impossible packet length",
+ len,
+ rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+ error = EFBIG;
+ goto errout;
+ }
+ auio.uio_resid = len;
+ do {
+ rcvflg = MSG_WAITALL;
+ error = soreceive(so, (struct mbuf **)0,
+ &auio, mp, (struct mbuf **)0, &rcvflg);
+ } while (error == EWOULDBLOCK || error == EINTR ||
+ error == ERESTART);
+ if (!error && auio.uio_resid > 0) {
+ log(LOG_INFO,
+ "short receive (%d/%d) from nfs server %s\n",
+ len - auio.uio_resid, len,
+ rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+ error = EPIPE;
+ }
+ } else {
+ /*
+ * NB: Since uio_resid is big, MSG_WAITALL is ignored
+ * and soreceive() will return when it has either a
+ * control msg or a data msg.
+ * We have no use for control msg., but must grab them
+ * and then throw them away so we know what is going
+ * on.
+ */
+ auio.uio_resid = len = 100000000; /* Anything Big */
+ auio.uio_procp = p;
+ do {
+ rcvflg = 0;
+ error = soreceive(so, (struct mbuf **)0,
+ &auio, mp, &control, &rcvflg);
+ if (control)
+ m_freem(control);
+ if (error == EWOULDBLOCK && rep) {
+ if (rep->r_flags & R_SOFTTERM)
+ return (EINTR);
+ }
+ } while (error == EWOULDBLOCK ||
+ (!error && *mp == NULL && control));
+ if ((rcvflg & MSG_EOR) == 0)
+ printf("Egad!!\n");
+ if (!error && *mp == NULL)
+ error = EPIPE;
+ len -= auio.uio_resid;
+ }
+errout:
+ if (error && error != EINTR && error != ERESTART) {
+ m_freem(*mp);
+ *mp = (struct mbuf *)0;
+ if (error != EPIPE)
+ log(LOG_INFO,
+ "receive error %d from nfs server %s\n",
+ error,
+ rep->r_nmp->nm_mountp->mnt_stat.f_mntfromname);
+ error = nfs_sndlock(&rep->r_nmp->nm_flag, rep);
+ if (!error)
+ error = nfs_reconnect(rep);
+ if (!error)
+ goto tryagain;
+ }
+ } else {
+ if ((so = rep->r_nmp->nm_so) == NULL)
+ return (EACCES);
+ if (so->so_state & SS_ISCONNECTED)
+ getnam = (struct mbuf **)0;
+ else
+ getnam = aname;
+ auio.uio_resid = len = 1000000;
+ auio.uio_procp = p;
+ do {
+ rcvflg = 0;
+ error = soreceive(so, getnam, &auio, mp,
+ (struct mbuf **)0, &rcvflg);
+ if (error == EWOULDBLOCK &&
+ (rep->r_flags & R_SOFTTERM))
+ return (EINTR);
+ } while (error == EWOULDBLOCK);
+ len -= auio.uio_resid;
+ }
+ if (error) {
+ m_freem(*mp);
+ *mp = (struct mbuf *)0;
+ }
+ /*
+ * Search for any mbufs that are not a multiple of 4 bytes long
+ * or with m_data not longword aligned.
+ * These could cause pointer alignment problems, so copy them to
+ * well aligned mbufs.
+ */
+ nfs_realign(*mp, 5 * NFSX_UNSIGNED);
+ return (error);
+}
+
+/*
+ * Implement receipt of reply on a socket.
+ * We must search through the list of received datagrams matching them
+ * with outstanding requests using the xid, until ours is found.
+ */
+/* ARGSUSED */
+nfs_reply(myrep)
+ struct nfsreq *myrep;
+{
+ register struct nfsreq *rep;
+ register struct nfsmount *nmp = myrep->r_nmp;
+ register long t1;
+ struct mbuf *mrep, *nam, *md;
+ u_long rxid, *tl;
+ caddr_t dpos, cp2;
+ int error;
+
+ /*
+ * Loop around until we get our own reply
+ */
+ for (;;) {
+ /*
+ * Lock against other receivers so that I don't get stuck in
+ * sbwait() after someone else has received my reply for me.
+ * Also necessary for connection based protocols to avoid
+ * race conditions during a reconnect.
+ */
+ if (error = nfs_rcvlock(myrep))
+ return (error);
+ /* Already received, bye bye */
+ if (myrep->r_mrep != NULL) {
+ nfs_rcvunlock(&nmp->nm_flag);
+ return (0);
+ }
+ /*
+ * Get the next Rpc reply off the socket
+ */
+ error = nfs_receive(myrep, &nam, &mrep);
+ nfs_rcvunlock(&nmp->nm_flag);
+ if (error) {
+
+ /*
+ * Ignore routing errors on connectionless protocols??
+ */
+ if (NFSIGNORE_SOERROR(nmp->nm_soflags, error)) {
+ nmp->nm_so->so_error = 0;
+ if (myrep->r_flags & R_GETONEREP)
+ return (0);
+ continue;
+ }
+ return (error);
+ }
+ if (nam)
+ m_freem(nam);
+
+ /*
+ * Get the xid and check that it is an rpc reply
+ */
+ md = mrep;
+ dpos = mtod(md, caddr_t);
+ nfsm_dissect(tl, u_long *, 2*NFSX_UNSIGNED);
+ rxid = *tl++;
+ if (*tl != rpc_reply) {
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ if (nqnfs_callback(nmp, mrep, md, dpos))
+ nfsstats.rpcinvalid++;
+ } else {
+ nfsstats.rpcinvalid++;
+ m_freem(mrep);
+ }
+nfsmout:
+ if (myrep->r_flags & R_GETONEREP)
+ return (0);
+ continue;
+ }
+
+ /*
+ * Loop through the request list to match up the reply
+ * Iff no match, just drop the datagram
+ */
+ rep = nfsreqh.r_next;
+ while (rep != &nfsreqh) {
+ if (rep->r_mrep == NULL && rxid == rep->r_xid) {
+ /* Found it.. */
+ rep->r_mrep = mrep;
+ rep->r_md = md;
+ rep->r_dpos = dpos;
+ if (nfsrtton) {
+ struct rttl *rt;
+
+ rt = &nfsrtt.rttl[nfsrtt.pos];
+ rt->proc = rep->r_procnum;
+ rt->rto = NFS_RTO(nmp, proct[rep->r_procnum]);
+ rt->sent = nmp->nm_sent;
+ rt->cwnd = nmp->nm_cwnd;
+ rt->srtt = nmp->nm_srtt[proct[rep->r_procnum] - 1];
+ rt->sdrtt = nmp->nm_sdrtt[proct[rep->r_procnum] - 1];
+ rt->fsid = nmp->nm_mountp->mnt_stat.f_fsid;
+ rt->tstamp = time;
+ if (rep->r_flags & R_TIMING)
+ rt->rtt = rep->r_rtt;
+ else
+ rt->rtt = 1000000;
+ nfsrtt.pos = (nfsrtt.pos + 1) % NFSRTTLOGSIZ;
+ }
+ /*
+ * Update congestion window.
+ * Do the additive increase of
+ * one rpc/rtt.
+ */
+ if (nmp->nm_cwnd <= nmp->nm_sent) {
+ nmp->nm_cwnd +=
+ (NFS_CWNDSCALE * NFS_CWNDSCALE +
+ (nmp->nm_cwnd >> 1)) / nmp->nm_cwnd;
+ if (nmp->nm_cwnd > NFS_MAXCWND)
+ nmp->nm_cwnd = NFS_MAXCWND;
+ }
+ rep->r_flags &= ~R_SENT;
+ nmp->nm_sent -= NFS_CWNDSCALE;
+ /*
+ * Update rtt using a gain of 0.125 on the mean
+ * and a gain of 0.25 on the deviation.
+ */
+ if (rep->r_flags & R_TIMING) {
+ /*
+ * Since the timer resolution of
+ * NFS_HZ is so course, it can often
+ * result in r_rtt == 0. Since
+ * r_rtt == N means that the actual
+ * rtt is between N+dt and N+2-dt ticks,
+ * add 1.
+ */
+ t1 = rep->r_rtt + 1;
+ t1 -= (NFS_SRTT(rep) >> 3);
+ NFS_SRTT(rep) += t1;
+ if (t1 < 0)
+ t1 = -t1;
+ t1 -= (NFS_SDRTT(rep) >> 2);
+ NFS_SDRTT(rep) += t1;
+ }
+ nmp->nm_timeouts = 0;
+ break;
+ }
+ rep = rep->r_next;
+ }
+ /*
+ * If not matched to a request, drop it.
+ * If it's mine, get out.
+ */
+ if (rep == &nfsreqh) {
+ nfsstats.rpcunexpected++;
+ m_freem(mrep);
+ } else if (rep == myrep) {
+ if (rep->r_mrep == NULL)
+ panic("nfsreply nil");
+ return (0);
+ }
+ if (myrep->r_flags & R_GETONEREP)
+ return (0);
+ }
+}
+
+/*
+ * nfs_request - goes something like this
+ * - fill in request struct
+ * - links it into list
+ * - calls nfs_send() for first transmit
+ * - calls nfs_receive() to get reply
+ * - break down rpc header and return with nfs reply pointed to
+ * by mrep or error
+ * nb: always frees up mreq mbuf list
+ */
+nfs_request(vp, mrest, procnum, procp, cred, mrp, mdp, dposp)
+ struct vnode *vp;
+ struct mbuf *mrest;
+ int procnum;
+ struct proc *procp;
+ struct ucred *cred;
+ struct mbuf **mrp;
+ struct mbuf **mdp;
+ caddr_t *dposp;
+{
+ register struct mbuf *m, *mrep;
+ register struct nfsreq *rep;
+ register u_long *tl;
+ register int i;
+ struct nfsmount *nmp;
+ struct mbuf *md, *mheadend;
+ struct nfsreq *reph;
+ struct nfsnode *np;
+ time_t reqtime, waituntil;
+ caddr_t dpos, cp2;
+ int t1, nqlflag, cachable, s, error = 0, mrest_len, auth_len, auth_type;
+ int trylater_delay = NQ_TRYLATERDEL, trylater_cnt = 0, failed_auth = 0;
+ u_long xid;
+ u_quad_t frev;
+ char *auth_str;
+
+ nmp = VFSTONFS(vp->v_mount);
+ MALLOC(rep, struct nfsreq *, sizeof(struct nfsreq), M_NFSREQ, M_WAITOK);
+ rep->r_nmp = nmp;
+ rep->r_vp = vp;
+ rep->r_procp = procp;
+ rep->r_procnum = procnum;
+ i = 0;
+ m = mrest;
+ while (m) {
+ i += m->m_len;
+ m = m->m_next;
+ }
+ mrest_len = i;
+
+ /*
+ * Get the RPC header with authorization.
+ */
+kerbauth:
+ auth_str = (char *)0;
+ if (nmp->nm_flag & NFSMNT_KERB) {
+ if (failed_auth) {
+ error = nfs_getauth(nmp, rep, cred, &auth_type,
+ &auth_str, &auth_len);
+ if (error) {
+ free((caddr_t)rep, M_NFSREQ);
+ m_freem(mrest);
+ return (error);
+ }
+ } else {
+ auth_type = RPCAUTH_UNIX;
+ auth_len = 5 * NFSX_UNSIGNED;
+ }
+ } else {
+ auth_type = RPCAUTH_UNIX;
+ if (cred->cr_ngroups < 1)
+ panic("nfsreq nogrps");
+ auth_len = ((((cred->cr_ngroups - 1) > nmp->nm_numgrps) ?
+ nmp->nm_numgrps : (cred->cr_ngroups - 1)) << 2) +
+ 5 * NFSX_UNSIGNED;
+ }
+ m = nfsm_rpchead(cred, (nmp->nm_flag & NFSMNT_NQNFS), procnum,
+ auth_type, auth_len, auth_str, mrest, mrest_len, &mheadend, &xid);
+ if (auth_str)
+ free(auth_str, M_TEMP);
+
+ /*
+ * For stream protocols, insert a Sun RPC Record Mark.
+ */
+ if (nmp->nm_sotype == SOCK_STREAM) {
+ M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
+ *mtod(m, u_long *) = htonl(0x80000000 |
+ (m->m_pkthdr.len - NFSX_UNSIGNED));
+ }
+ rep->r_mreq = m;
+ rep->r_xid = xid;
+tryagain:
+ if (nmp->nm_flag & NFSMNT_SOFT)
+ rep->r_retry = nmp->nm_retry;
+ else
+ rep->r_retry = NFS_MAXREXMIT + 1; /* past clip limit */
+ rep->r_rtt = rep->r_rexmit = 0;
+ if (proct[procnum] > 0)
+ rep->r_flags = R_TIMING;
+ else
+ rep->r_flags = 0;
+ rep->r_mrep = NULL;
+
+ /*
+ * Do the client side RPC.
+ */
+ nfsstats.rpcrequests++;
+ /*
+ * Chain request into list of outstanding requests. Be sure
+ * to put it LAST so timer finds oldest requests first.
+ */
+ s = splsoftclock();
+ reph = &nfsreqh;
+ reph->r_prev->r_next = rep;
+ rep->r_prev = reph->r_prev;
+ reph->r_prev = rep;
+ rep->r_next = reph;
+
+ /* Get send time for nqnfs */
+ reqtime = time.tv_sec;
+
+ /*
+ * If backing off another request or avoiding congestion, don't
+ * send this one now but let timer do it. If not timing a request,
+ * do it now.
+ */
+ if (nmp->nm_so && (nmp->nm_sotype != SOCK_DGRAM ||
+ (nmp->nm_flag & NFSMNT_DUMBTIMR) ||
+ nmp->nm_sent < nmp->nm_cwnd)) {
+ splx(s);
+ if (nmp->nm_soflags & PR_CONNREQUIRED)
+ error = nfs_sndlock(&nmp->nm_flag, rep);
+ if (!error) {
+ m = m_copym(m, 0, M_COPYALL, M_WAIT);
+ error = nfs_send(nmp->nm_so, nmp->nm_nam, m, rep);
+ if (nmp->nm_soflags & PR_CONNREQUIRED)
+ nfs_sndunlock(&nmp->nm_flag);
+ }
+ if (!error && (rep->r_flags & R_MUSTRESEND) == 0) {
+ nmp->nm_sent += NFS_CWNDSCALE;
+ rep->r_flags |= R_SENT;
+ }
+ } else {
+ splx(s);
+ rep->r_rtt = -1;
+ }
+
+ /*
+ * Wait for the reply from our send or the timer's.
+ */
+ if (!error || error == EPIPE)
+ error = nfs_reply(rep);
+
+ /*
+ * RPC done, unlink the request.
+ */
+ s = splsoftclock();
+ rep->r_prev->r_next = rep->r_next;
+ rep->r_next->r_prev = rep->r_prev;
+ splx(s);
+
+ /*
+ * Decrement the outstanding request count.
+ */
+ if (rep->r_flags & R_SENT) {
+ rep->r_flags &= ~R_SENT; /* paranoia */
+ nmp->nm_sent -= NFS_CWNDSCALE;
+ }
+
+ /*
+ * If there was a successful reply and a tprintf msg.
+ * tprintf a response.
+ */
+ if (!error && (rep->r_flags & R_TPRINTFMSG))
+ nfs_msg(rep->r_procp, nmp->nm_mountp->mnt_stat.f_mntfromname,
+ "is alive again");
+ mrep = rep->r_mrep;
+ md = rep->r_md;
+ dpos = rep->r_dpos;
+ if (error) {
+ m_freem(rep->r_mreq);
+ free((caddr_t)rep, M_NFSREQ);
+ return (error);
+ }
+
+ /*
+ * break down the rpc header and check if ok
+ */
+ nfsm_dissect(tl, u_long *, 3*NFSX_UNSIGNED);
+ if (*tl++ == rpc_msgdenied) {
+ if (*tl == rpc_mismatch)
+ error = EOPNOTSUPP;
+ else if ((nmp->nm_flag & NFSMNT_KERB) && *tl++ == rpc_autherr) {
+ if (*tl == rpc_rejectedcred && failed_auth == 0) {
+ failed_auth++;
+ mheadend->m_next = (struct mbuf *)0;
+ m_freem(mrep);
+ m_freem(rep->r_mreq);
+ goto kerbauth;
+ } else
+ error = EAUTH;
+ } else
+ error = EACCES;
+ m_freem(mrep);
+ m_freem(rep->r_mreq);
+ free((caddr_t)rep, M_NFSREQ);
+ return (error);
+ }
+
+ /*
+ * skip over the auth_verf, someday we may want to cache auth_short's
+ * for nfs_reqhead(), but for now just dump it
+ */
+ if (*++tl != 0) {
+ i = nfsm_rndup(fxdr_unsigned(long, *tl));
+ nfsm_adv(i);
+ }
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ /* 0 == ok */
+ if (*tl == 0) {
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ if (*tl != 0) {
+ error = fxdr_unsigned(int, *tl);
+ m_freem(mrep);
+ if ((nmp->nm_flag & NFSMNT_NQNFS) &&
+ error == NQNFS_TRYLATER) {
+ error = 0;
+ waituntil = time.tv_sec + trylater_delay;
+ while (time.tv_sec < waituntil)
+ (void) tsleep((caddr_t)&lbolt,
+ PSOCK, "nqnfstry", 0);
+ trylater_delay *= nfs_backoff[trylater_cnt];
+ if (trylater_cnt < 7)
+ trylater_cnt++;
+ goto tryagain;
+ }
+
+ /*
+ * If the File Handle was stale, invalidate the
+ * lookup cache, just in case.
+ */
+ if (error == ESTALE)
+ cache_purge(vp);
+ m_freem(rep->r_mreq);
+ free((caddr_t)rep, M_NFSREQ);
+ return (error);
+ }
+
+ /*
+ * For nqnfs, get any lease in reply
+ */
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ if (*tl) {
+ np = VTONFS(vp);
+ nqlflag = fxdr_unsigned(int, *tl);
+ nfsm_dissect(tl, u_long *, 4*NFSX_UNSIGNED);
+ cachable = fxdr_unsigned(int, *tl++);
+ reqtime += fxdr_unsigned(int, *tl++);
+ if (reqtime > time.tv_sec) {
+ fxdr_hyper(tl, &frev);
+ nqnfs_clientlease(nmp, np, nqlflag,
+ cachable, reqtime, frev);
+ }
+ }
+ }
+ *mrp = mrep;
+ *mdp = md;
+ *dposp = dpos;
+ m_freem(rep->r_mreq);
+ FREE((caddr_t)rep, M_NFSREQ);
+ return (0);
+ }
+ m_freem(mrep);
+ m_freem(rep->r_mreq);
+ free((caddr_t)rep, M_NFSREQ);
+ error = EPROTONOSUPPORT;
+nfsmout:
+ return (error);
+}
+
+/*
+ * Generate the rpc reply header
+ * siz arg. is used to decide if adding a cluster is worthwhile
+ */
+nfs_rephead(siz, nd, err, cache, frev, mrq, mbp, bposp)
+ int siz;
+ struct nfsd *nd;
+ int err;
+ int cache;
+ u_quad_t *frev;
+ struct mbuf **mrq;
+ struct mbuf **mbp;
+ caddr_t *bposp;
+{
+ register u_long *tl;
+ register struct mbuf *mreq;
+ caddr_t bpos;
+ struct mbuf *mb, *mb2;
+
+ MGETHDR(mreq, M_WAIT, MT_DATA);
+ mb = mreq;
+ /*
+ * If this is a big reply, use a cluster else
+ * try and leave leading space for the lower level headers.
+ */
+ siz += RPC_REPLYSIZ;
+ if (siz >= MINCLSIZE) {
+ MCLGET(mreq, M_WAIT);
+ } else
+ mreq->m_data += max_hdr;
+ tl = mtod(mreq, u_long *);
+ mreq->m_len = 6*NFSX_UNSIGNED;
+ bpos = ((caddr_t)tl)+mreq->m_len;
+ *tl++ = nd->nd_retxid;
+ *tl++ = rpc_reply;
+ if (err == ERPCMISMATCH || err == NQNFS_AUTHERR) {
+ *tl++ = rpc_msgdenied;
+ if (err == NQNFS_AUTHERR) {
+ *tl++ = rpc_autherr;
+ *tl = rpc_rejectedcred;
+ mreq->m_len -= NFSX_UNSIGNED;
+ bpos -= NFSX_UNSIGNED;
+ } else {
+ *tl++ = rpc_mismatch;
+ *tl++ = txdr_unsigned(2);
+ *tl = txdr_unsigned(2);
+ }
+ } else {
+ *tl++ = rpc_msgaccepted;
+ *tl++ = 0;
+ *tl++ = 0;
+ switch (err) {
+ case EPROGUNAVAIL:
+ *tl = txdr_unsigned(RPC_PROGUNAVAIL);
+ break;
+ case EPROGMISMATCH:
+ *tl = txdr_unsigned(RPC_PROGMISMATCH);
+ nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(2);
+ *tl = txdr_unsigned(2); /* someday 3 */
+ break;
+ case EPROCUNAVAIL:
+ *tl = txdr_unsigned(RPC_PROCUNAVAIL);
+ break;
+ default:
+ *tl = 0;
+ if (err != VNOVAL) {
+ nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+ if (err)
+ *tl = txdr_unsigned(nfsrv_errmap[err - 1]);
+ else
+ *tl = 0;
+ }
+ break;
+ };
+ }
+
+ /*
+ * For nqnfs, piggyback lease as requested.
+ */
+ if (nd->nd_nqlflag != NQL_NOVAL && err == 0) {
+ if (nd->nd_nqlflag) {
+ nfsm_build(tl, u_long *, 5*NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(nd->nd_nqlflag);
+ *tl++ = txdr_unsigned(cache);
+ *tl++ = txdr_unsigned(nd->nd_duration);
+ txdr_hyper(frev, tl);
+ } else {
+ if (nd->nd_nqlflag != 0)
+ panic("nqreph");
+ nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+ *tl = 0;
+ }
+ }
+ *mrq = mreq;
+ *mbp = mb;
+ *bposp = bpos;
+ if (err != 0 && err != VNOVAL)
+ nfsstats.srvrpc_errs++;
+ return (0);
+}
+
+/*
+ * Nfs timer routine
+ * Scan the nfsreq list and retranmit any requests that have timed out
+ * To avoid retransmission attempts on STREAM sockets (in the future) make
+ * sure to set the r_retry field to 0 (implies nm_retry == 0).
+ */
+void
+nfs_timer(arg)
+ void *arg;
+{
+ register struct nfsreq *rep;
+ register struct mbuf *m;
+ register struct socket *so;
+ register struct nfsmount *nmp;
+ register int timeo;
+ static long lasttime = 0;
+ int s, error;
+
+ s = splnet();
+ for (rep = nfsreqh.r_next; rep != &nfsreqh; rep = rep->r_next) {
+ nmp = rep->r_nmp;
+ if (rep->r_mrep || (rep->r_flags & R_SOFTTERM))
+ continue;
+ if (nfs_sigintr(nmp, rep, rep->r_procp)) {
+ rep->r_flags |= R_SOFTTERM;
+ continue;
+ }
+ if (rep->r_rtt >= 0) {
+ rep->r_rtt++;
+ if (nmp->nm_flag & NFSMNT_DUMBTIMR)
+ timeo = nmp->nm_timeo;
+ else
+ timeo = NFS_RTO(nmp, proct[rep->r_procnum]);
+ if (nmp->nm_timeouts > 0)
+ timeo *= nfs_backoff[nmp->nm_timeouts - 1];
+ if (rep->r_rtt <= timeo)
+ continue;
+ if (nmp->nm_timeouts < 8)
+ nmp->nm_timeouts++;
+ }
+ /*
+ * Check for server not responding
+ */
+ if ((rep->r_flags & R_TPRINTFMSG) == 0 &&
+ rep->r_rexmit > nmp->nm_deadthresh) {
+ nfs_msg(rep->r_procp,
+ nmp->nm_mountp->mnt_stat.f_mntfromname,
+ "not responding");
+ rep->r_flags |= R_TPRINTFMSG;
+ }
+ if (rep->r_rexmit >= rep->r_retry) { /* too many */
+ nfsstats.rpctimeouts++;
+ rep->r_flags |= R_SOFTTERM;
+ continue;
+ }
+ if (nmp->nm_sotype != SOCK_DGRAM) {
+ if (++rep->r_rexmit > NFS_MAXREXMIT)
+ rep->r_rexmit = NFS_MAXREXMIT;
+ continue;
+ }
+ if ((so = nmp->nm_so) == NULL)
+ continue;
+
+ /*
+ * If there is enough space and the window allows..
+ * Resend it
+ * Set r_rtt to -1 in case we fail to send it now.
+ */
+ rep->r_rtt = -1;
+ if (sbspace(&so->so_snd) >= rep->r_mreq->m_pkthdr.len &&
+ ((nmp->nm_flag & NFSMNT_DUMBTIMR) ||
+ (rep->r_flags & R_SENT) ||
+ nmp->nm_sent < nmp->nm_cwnd) &&
+ (m = m_copym(rep->r_mreq, 0, M_COPYALL, M_DONTWAIT))){
+ if ((nmp->nm_flag & NFSMNT_NOCONN) == 0)
+ error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
+ (struct mbuf *)0, (struct mbuf *)0);
+ else
+ error = (*so->so_proto->pr_usrreq)(so, PRU_SEND, m,
+ nmp->nm_nam, (struct mbuf *)0);
+ if (error) {
+ if (NFSIGNORE_SOERROR(nmp->nm_soflags, error))
+ so->so_error = 0;
+ } else {
+ /*
+ * Iff first send, start timing
+ * else turn timing off, backoff timer
+ * and divide congestion window by 2.
+ */
+ if (rep->r_flags & R_SENT) {
+ rep->r_flags &= ~R_TIMING;
+ if (++rep->r_rexmit > NFS_MAXREXMIT)
+ rep->r_rexmit = NFS_MAXREXMIT;
+ nmp->nm_cwnd >>= 1;
+ if (nmp->nm_cwnd < NFS_CWNDSCALE)
+ nmp->nm_cwnd = NFS_CWNDSCALE;
+ nfsstats.rpcretries++;
+ } else {
+ rep->r_flags |= R_SENT;
+ nmp->nm_sent += NFS_CWNDSCALE;
+ }
+ rep->r_rtt = 0;
+ }
+ }
+ }
+
+ /*
+ * Call the nqnfs server timer once a second to handle leases.
+ */
+ if (lasttime != time.tv_sec) {
+ lasttime = time.tv_sec;
+ nqnfs_serverd();
+ }
+ splx(s);
+ timeout(nfs_timer, (void *)0, hz / NFS_HZ);
+}
+
+/*
+ * Test for a termination condition pending on the process.
+ * This is used for NFSMNT_INT mounts.
+ */
+nfs_sigintr(nmp, rep, p)
+ struct nfsmount *nmp;
+ struct nfsreq *rep;
+ register struct proc *p;
+{
+
+ if (rep && (rep->r_flags & R_SOFTTERM))
+ return (EINTR);
+ if (!(nmp->nm_flag & NFSMNT_INT))
+ return (0);
+ if (p && p->p_siglist &&
+ (((p->p_siglist & ~p->p_sigmask) & ~p->p_sigignore) &
+ NFSINT_SIGMASK))
+ return (EINTR);
+ return (0);
+}
+
+/*
+ * Lock a socket against others.
+ * Necessary for STREAM sockets to ensure you get an entire rpc request/reply
+ * and also to avoid race conditions between the processes with nfs requests
+ * in progress when a reconnect is necessary.
+ */
+nfs_sndlock(flagp, rep)
+ register int *flagp;
+ struct nfsreq *rep;
+{
+ struct proc *p;
+ int slpflag = 0, slptimeo = 0;
+
+ if (rep) {
+ p = rep->r_procp;
+ if (rep->r_nmp->nm_flag & NFSMNT_INT)
+ slpflag = PCATCH;
+ } else
+ p = (struct proc *)0;
+ while (*flagp & NFSMNT_SNDLOCK) {
+ if (nfs_sigintr(rep->r_nmp, rep, p))
+ return (EINTR);
+ *flagp |= NFSMNT_WANTSND;
+ (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsndlck",
+ slptimeo);
+ if (slpflag == PCATCH) {
+ slpflag = 0;
+ slptimeo = 2 * hz;
+ }
+ }
+ *flagp |= NFSMNT_SNDLOCK;
+ return (0);
+}
+
+/*
+ * Unlock the stream socket for others.
+ */
+void
+nfs_sndunlock(flagp)
+ register int *flagp;
+{
+
+ if ((*flagp & NFSMNT_SNDLOCK) == 0)
+ panic("nfs sndunlock");
+ *flagp &= ~NFSMNT_SNDLOCK;
+ if (*flagp & NFSMNT_WANTSND) {
+ *flagp &= ~NFSMNT_WANTSND;
+ wakeup((caddr_t)flagp);
+ }
+}
+
+nfs_rcvlock(rep)
+ register struct nfsreq *rep;
+{
+ register int *flagp = &rep->r_nmp->nm_flag;
+ int slpflag, slptimeo = 0;
+
+ if (*flagp & NFSMNT_INT)
+ slpflag = PCATCH;
+ else
+ slpflag = 0;
+ while (*flagp & NFSMNT_RCVLOCK) {
+ if (nfs_sigintr(rep->r_nmp, rep, rep->r_procp))
+ return (EINTR);
+ *flagp |= NFSMNT_WANTRCV;
+ (void) tsleep((caddr_t)flagp, slpflag | (PZERO - 1), "nfsrcvlk",
+ slptimeo);
+ if (slpflag == PCATCH) {
+ slpflag = 0;
+ slptimeo = 2 * hz;
+ }
+ }
+ *flagp |= NFSMNT_RCVLOCK;
+ return (0);
+}
+
+/*
+ * Unlock the stream socket for others.
+ */
+void
+nfs_rcvunlock(flagp)
+ register int *flagp;
+{
+
+ if ((*flagp & NFSMNT_RCVLOCK) == 0)
+ panic("nfs rcvunlock");
+ *flagp &= ~NFSMNT_RCVLOCK;
+ if (*flagp & NFSMNT_WANTRCV) {
+ *flagp &= ~NFSMNT_WANTRCV;
+ wakeup((caddr_t)flagp);
+ }
+}
+
+/*
+ * Check for badly aligned mbuf data areas and
+ * realign data in an mbuf list by copying the data areas up, as required.
+ */
+void
+nfs_realign(m, hsiz)
+ register struct mbuf *m;
+ int hsiz;
+{
+ register struct mbuf *m2;
+ register int siz, mlen, olen;
+ register caddr_t tcp, fcp;
+ struct mbuf *mnew;
+
+ while (m) {
+ /*
+ * This never happens for UDP, rarely happens for TCP
+ * but frequently happens for iso transport.
+ */
+ if ((m->m_len & 0x3) || (mtod(m, int) & 0x3)) {
+ olen = m->m_len;
+ fcp = mtod(m, caddr_t);
+ if ((int)fcp & 0x3) {
+ m->m_flags &= ~M_PKTHDR;
+ if (m->m_flags & M_EXT)
+ m->m_data = m->m_ext.ext_buf +
+ ((m->m_ext.ext_size - olen) & ~0x3);
+ else
+ m->m_data = m->m_dat;
+ }
+ m->m_len = 0;
+ tcp = mtod(m, caddr_t);
+ mnew = m;
+ m2 = m->m_next;
+
+ /*
+ * If possible, only put the first invariant part
+ * of the RPC header in the first mbuf.
+ */
+ mlen = M_TRAILINGSPACE(m);
+ if (olen <= hsiz && mlen > hsiz)
+ mlen = hsiz;
+
+ /*
+ * Loop through the mbuf list consolidating data.
+ */
+ while (m) {
+ while (olen > 0) {
+ if (mlen == 0) {
+ m2->m_flags &= ~M_PKTHDR;
+ if (m2->m_flags & M_EXT)
+ m2->m_data = m2->m_ext.ext_buf;
+ else
+ m2->m_data = m2->m_dat;
+ m2->m_len = 0;
+ mlen = M_TRAILINGSPACE(m2);
+ tcp = mtod(m2, caddr_t);
+ mnew = m2;
+ m2 = m2->m_next;
+ }
+ siz = min(mlen, olen);
+ if (tcp != fcp)
+ bcopy(fcp, tcp, siz);
+ mnew->m_len += siz;
+ mlen -= siz;
+ olen -= siz;
+ tcp += siz;
+ fcp += siz;
+ }
+ m = m->m_next;
+ if (m) {
+ olen = m->m_len;
+ fcp = mtod(m, caddr_t);
+ }
+ }
+
+ /*
+ * Finally, set m_len == 0 for any trailing mbufs that have
+ * been copied out of.
+ */
+ while (m2) {
+ m2->m_len = 0;
+ m2 = m2->m_next;
+ }
+ return;
+ }
+ m = m->m_next;
+ }
+}
+
+/*
+ * Socket upcall routine for the nfsd sockets.
+ * The caddr_t arg is a pointer to the "struct nfssvc_sock".
+ * Essentially do as much as possible non-blocking, else punt and it will
+ * be called with M_WAIT from an nfsd.
+ */
+void
+nfsrv_rcv(so, arg, waitflag)
+ struct socket *so;
+ caddr_t arg;
+ int waitflag;
+{
+ register struct nfssvc_sock *slp = (struct nfssvc_sock *)arg;
+ register struct mbuf *m;
+ struct mbuf *mp, *nam;
+ struct uio auio;
+ int flags, error;
+
+ if ((slp->ns_flag & SLP_VALID) == 0)
+ return;
+#ifdef notdef
+ /*
+ * Define this to test for nfsds handling this under heavy load.
+ */
+ if (waitflag == M_DONTWAIT) {
+ slp->ns_flag |= SLP_NEEDQ; goto dorecs;
+ }
+#endif
+ auio.uio_procp = NULL;
+ if (so->so_type == SOCK_STREAM) {
+ /*
+ * If there are already records on the queue, defer soreceive()
+ * to an nfsd so that there is feedback to the TCP layer that
+ * the nfs servers are heavily loaded.
+ */
+ if (slp->ns_rec && waitflag == M_DONTWAIT) {
+ slp->ns_flag |= SLP_NEEDQ;
+ goto dorecs;
+ }
+
+ /*
+ * Do soreceive().
+ */
+ auio.uio_resid = 1000000000;
+ flags = MSG_DONTWAIT;
+ error = soreceive(so, &nam, &auio, &mp, (struct mbuf **)0, &flags);
+ if (error || mp == (struct mbuf *)0) {
+ if (error == EWOULDBLOCK)
+ slp->ns_flag |= SLP_NEEDQ;
+ else
+ slp->ns_flag |= SLP_DISCONN;
+ goto dorecs;
+ }
+ m = mp;
+ if (slp->ns_rawend) {
+ slp->ns_rawend->m_next = m;
+ slp->ns_cc += 1000000000 - auio.uio_resid;
+ } else {
+ slp->ns_raw = m;
+ slp->ns_cc = 1000000000 - auio.uio_resid;
+ }
+ while (m->m_next)
+ m = m->m_next;
+ slp->ns_rawend = m;
+
+ /*
+ * Now try and parse record(s) out of the raw stream data.
+ */
+ if (error = nfsrv_getstream(slp, waitflag)) {
+ if (error == EPERM)
+ slp->ns_flag |= SLP_DISCONN;
+ else
+ slp->ns_flag |= SLP_NEEDQ;
+ }
+ } else {
+ do {
+ auio.uio_resid = 1000000000;
+ flags = MSG_DONTWAIT;
+ error = soreceive(so, &nam, &auio, &mp,
+ (struct mbuf **)0, &flags);
+ if (mp) {
+ nfs_realign(mp, 10 * NFSX_UNSIGNED);
+ if (nam) {
+ m = nam;
+ m->m_next = mp;
+ } else
+ m = mp;
+ if (slp->ns_recend)
+ slp->ns_recend->m_nextpkt = m;
+ else
+ slp->ns_rec = m;
+ slp->ns_recend = m;
+ m->m_nextpkt = (struct mbuf *)0;
+ }
+ if (error) {
+ if ((so->so_proto->pr_flags & PR_CONNREQUIRED)
+ && error != EWOULDBLOCK) {
+ slp->ns_flag |= SLP_DISCONN;
+ goto dorecs;
+ }
+ }
+ } while (mp);
+ }
+
+ /*
+ * Now try and process the request records, non-blocking.
+ */
+dorecs:
+ if (waitflag == M_DONTWAIT &&
+ (slp->ns_rec || (slp->ns_flag & (SLP_NEEDQ | SLP_DISCONN))))
+ nfsrv_wakenfsd(slp);
+}
+
+/*
+ * Try and extract an RPC request from the mbuf data list received on a
+ * stream socket. The "waitflag" argument indicates whether or not it
+ * can sleep.
+ */
+nfsrv_getstream(slp, waitflag)
+ register struct nfssvc_sock *slp;
+ int waitflag;
+{
+ register struct mbuf *m;
+ register char *cp1, *cp2;
+ register int len;
+ struct mbuf *om, *m2, *recm;
+ u_long recmark;
+
+ if (slp->ns_flag & SLP_GETSTREAM)
+ panic("nfs getstream");
+ slp->ns_flag |= SLP_GETSTREAM;
+ for (;;) {
+ if (slp->ns_reclen == 0) {
+ if (slp->ns_cc < NFSX_UNSIGNED) {
+ slp->ns_flag &= ~SLP_GETSTREAM;
+ return (0);
+ }
+ m = slp->ns_raw;
+ if (m->m_len >= NFSX_UNSIGNED) {
+ bcopy(mtod(m, caddr_t), (caddr_t)&recmark, NFSX_UNSIGNED);
+ m->m_data += NFSX_UNSIGNED;
+ m->m_len -= NFSX_UNSIGNED;
+ } else {
+ cp1 = (caddr_t)&recmark;
+ cp2 = mtod(m, caddr_t);
+ while (cp1 < ((caddr_t)&recmark) + NFSX_UNSIGNED) {
+ while (m->m_len == 0) {
+ m = m->m_next;
+ cp2 = mtod(m, caddr_t);
+ }
+ *cp1++ = *cp2++;
+ m->m_data++;
+ m->m_len--;
+ }
+ }
+ slp->ns_cc -= NFSX_UNSIGNED;
+ slp->ns_reclen = ntohl(recmark) & ~0x80000000;
+ if (slp->ns_reclen < NFS_MINPACKET || slp->ns_reclen > NFS_MAXPACKET) {
+ slp->ns_flag &= ~SLP_GETSTREAM;
+ return (EPERM);
+ }
+ }
+
+ /*
+ * Now get the record part.
+ */
+ if (slp->ns_cc == slp->ns_reclen) {
+ recm = slp->ns_raw;
+ slp->ns_raw = slp->ns_rawend = (struct mbuf *)0;
+ slp->ns_cc = slp->ns_reclen = 0;
+ } else if (slp->ns_cc > slp->ns_reclen) {
+ len = 0;
+ m = slp->ns_raw;
+ om = (struct mbuf *)0;
+ while (len < slp->ns_reclen) {
+ if ((len + m->m_len) > slp->ns_reclen) {
+ m2 = m_copym(m, 0, slp->ns_reclen - len,
+ waitflag);
+ if (m2) {
+ if (om) {
+ om->m_next = m2;
+ recm = slp->ns_raw;
+ } else
+ recm = m2;
+ m->m_data += slp->ns_reclen - len;
+ m->m_len -= slp->ns_reclen - len;
+ len = slp->ns_reclen;
+ } else {
+ slp->ns_flag &= ~SLP_GETSTREAM;
+ return (EWOULDBLOCK);
+ }
+ } else if ((len + m->m_len) == slp->ns_reclen) {
+ om = m;
+ len += m->m_len;
+ m = m->m_next;
+ recm = slp->ns_raw;
+ om->m_next = (struct mbuf *)0;
+ } else {
+ om = m;
+ len += m->m_len;
+ m = m->m_next;
+ }
+ }
+ slp->ns_raw = m;
+ slp->ns_cc -= len;
+ slp->ns_reclen = 0;
+ } else {
+ slp->ns_flag &= ~SLP_GETSTREAM;
+ return (0);
+ }
+ nfs_realign(recm, 10 * NFSX_UNSIGNED);
+ if (slp->ns_recend)
+ slp->ns_recend->m_nextpkt = recm;
+ else
+ slp->ns_rec = recm;
+ slp->ns_recend = recm;
+ }
+}
+
+/*
+ * Parse an RPC header.
+ */
+nfsrv_dorec(slp, nd)
+ register struct nfssvc_sock *slp;
+ register struct nfsd *nd;
+{
+ register struct mbuf *m;
+ int error;
+
+ if ((slp->ns_flag & SLP_VALID) == 0 ||
+ (m = slp->ns_rec) == (struct mbuf *)0)
+ return (ENOBUFS);
+ if (slp->ns_rec = m->m_nextpkt)
+ m->m_nextpkt = (struct mbuf *)0;
+ else
+ slp->ns_recend = (struct mbuf *)0;
+ if (m->m_type == MT_SONAME) {
+ nd->nd_nam = m;
+ nd->nd_md = nd->nd_mrep = m->m_next;
+ m->m_next = (struct mbuf *)0;
+ } else {
+ nd->nd_nam = (struct mbuf *)0;
+ nd->nd_md = nd->nd_mrep = m;
+ }
+ nd->nd_dpos = mtod(nd->nd_md, caddr_t);
+ if (error = nfs_getreq(nd, TRUE)) {
+ m_freem(nd->nd_nam);
+ return (error);
+ }
+ return (0);
+}
+
+/*
+ * Parse an RPC request
+ * - verify it
+ * - fill in the cred struct.
+ */
+nfs_getreq(nd, has_header)
+ register struct nfsd *nd;
+ int has_header;
+{
+ register int len, i;
+ register u_long *tl;
+ register long t1;
+ struct uio uio;
+ struct iovec iov;
+ caddr_t dpos, cp2;
+ u_long nfsvers, auth_type;
+ int error = 0, nqnfs = 0;
+ struct mbuf *mrep, *md;
+
+ mrep = nd->nd_mrep;
+ md = nd->nd_md;
+ dpos = nd->nd_dpos;
+ if (has_header) {
+ nfsm_dissect(tl, u_long *, 10*NFSX_UNSIGNED);
+ nd->nd_retxid = *tl++;
+ if (*tl++ != rpc_call) {
+ m_freem(mrep);
+ return (EBADRPC);
+ }
+ } else {
+ nfsm_dissect(tl, u_long *, 8*NFSX_UNSIGNED);
+ }
+ nd->nd_repstat = 0;
+ if (*tl++ != rpc_vers) {
+ nd->nd_repstat = ERPCMISMATCH;
+ nd->nd_procnum = NFSPROC_NOOP;
+ return (0);
+ }
+ nfsvers = nfs_vers;
+ if (*tl != nfs_prog) {
+ if (*tl == nqnfs_prog) {
+ nqnfs++;
+ nfsvers = nqnfs_vers;
+ } else {
+ nd->nd_repstat = EPROGUNAVAIL;
+ nd->nd_procnum = NFSPROC_NOOP;
+ return (0);
+ }
+ }
+ tl++;
+ if (*tl++ != nfsvers) {
+ nd->nd_repstat = EPROGMISMATCH;
+ nd->nd_procnum = NFSPROC_NOOP;
+ return (0);
+ }
+ nd->nd_procnum = fxdr_unsigned(u_long, *tl++);
+ if (nd->nd_procnum == NFSPROC_NULL)
+ return (0);
+ if (nd->nd_procnum >= NFS_NPROCS ||
+ (!nqnfs && nd->nd_procnum > NFSPROC_STATFS) ||
+ (*tl != rpc_auth_unix && *tl != rpc_auth_kerb)) {
+ nd->nd_repstat = EPROCUNAVAIL;
+ nd->nd_procnum = NFSPROC_NOOP;
+ return (0);
+ }
+ auth_type = *tl++;
+ len = fxdr_unsigned(int, *tl++);
+ if (len < 0 || len > RPCAUTH_MAXSIZ) {
+ m_freem(mrep);
+ return (EBADRPC);
+ }
+
+ /*
+ * Handle auth_unix or auth_kerb.
+ */
+ if (auth_type == rpc_auth_unix) {
+ len = fxdr_unsigned(int, *++tl);
+ if (len < 0 || len > NFS_MAXNAMLEN) {
+ m_freem(mrep);
+ return (EBADRPC);
+ }
+ nfsm_adv(nfsm_rndup(len));
+ nfsm_dissect(tl, u_long *, 3*NFSX_UNSIGNED);
+ nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++);
+ nd->nd_cr.cr_gid = fxdr_unsigned(gid_t, *tl++);
+ len = fxdr_unsigned(int, *tl);
+ if (len < 0 || len > RPCAUTH_UNIXGIDS) {
+ m_freem(mrep);
+ return (EBADRPC);
+ }
+ nfsm_dissect(tl, u_long *, (len + 2)*NFSX_UNSIGNED);
+ for (i = 1; i <= len; i++)
+ if (i < NGROUPS)
+ nd->nd_cr.cr_groups[i] = fxdr_unsigned(gid_t, *tl++);
+ else
+ tl++;
+ nd->nd_cr.cr_ngroups = (len >= NGROUPS) ? NGROUPS : (len + 1);
+ } else if (auth_type == rpc_auth_kerb) {
+ nd->nd_cr.cr_uid = fxdr_unsigned(uid_t, *tl++);
+ nd->nd_authlen = fxdr_unsigned(int, *tl);
+ uio.uio_resid = nfsm_rndup(nd->nd_authlen);
+ if (uio.uio_resid > (len - 2 * NFSX_UNSIGNED)) {
+ m_freem(mrep);
+ return (EBADRPC);
+ }
+ uio.uio_offset = 0;
+ uio.uio_iov = &iov;
+ uio.uio_iovcnt = 1;
+ uio.uio_segflg = UIO_SYSSPACE;
+ iov.iov_base = (caddr_t)nd->nd_authstr;
+ iov.iov_len = RPCAUTH_MAXSIZ;
+ nfsm_mtouio(&uio, uio.uio_resid);
+ nfsm_dissect(tl, u_long *, 2 * NFSX_UNSIGNED);
+ nd->nd_flag |= NFSD_NEEDAUTH;
+ }
+
+ /*
+ * Do we have any use for the verifier.
+ * According to the "Remote Procedure Call Protocol Spec." it
+ * should be AUTH_NULL, but some clients make it AUTH_UNIX?
+ * For now, just skip over it
+ */
+ len = fxdr_unsigned(int, *++tl);
+ if (len < 0 || len > RPCAUTH_MAXSIZ) {
+ m_freem(mrep);
+ return (EBADRPC);
+ }
+ if (len > 0) {
+ nfsm_adv(nfsm_rndup(len));
+ }
+
+ /*
+ * For nqnfs, get piggybacked lease request.
+ */
+ if (nqnfs && nd->nd_procnum != NQNFSPROC_EVICTED) {
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ nd->nd_nqlflag = fxdr_unsigned(int, *tl);
+ if (nd->nd_nqlflag) {
+ nfsm_dissect(tl, u_long *, NFSX_UNSIGNED);
+ nd->nd_duration = fxdr_unsigned(int, *tl);
+ } else
+ nd->nd_duration = NQ_MINLEASE;
+ } else {
+ nd->nd_nqlflag = NQL_NOVAL;
+ nd->nd_duration = NQ_MINLEASE;
+ }
+ nd->nd_md = md;
+ nd->nd_dpos = dpos;
+ return (0);
+nfsmout:
+ return (error);
+}
+
+/*
+ * Search for a sleeping nfsd and wake it up.
+ * SIDE EFFECT: If none found, set NFSD_CHECKSLP flag, so that one of the
+ * running nfsds will go look for the work in the nfssvc_sock list.
+ */
+void
+nfsrv_wakenfsd(slp)
+ struct nfssvc_sock *slp;
+{
+ register struct nfsd *nd = nfsd_head.nd_next;
+
+ if ((slp->ns_flag & SLP_VALID) == 0)
+ return;
+ while (nd != (struct nfsd *)&nfsd_head) {
+ if (nd->nd_flag & NFSD_WAITING) {
+ nd->nd_flag &= ~NFSD_WAITING;
+ if (nd->nd_slp)
+ panic("nfsd wakeup");
+ slp->ns_sref++;
+ nd->nd_slp = slp;
+ wakeup((caddr_t)nd);
+ return;
+ }
+ nd = nd->nd_next;
+ }
+ slp->ns_flag |= SLP_DOREC;
+ nfsd_head.nd_flag |= NFSD_CHECKSLP;
+}
+
+nfs_msg(p, server, msg)
+ struct proc *p;
+ char *server, *msg;
+{
+ tpr_t tpr;
+
+ if (p)
+ tpr = tprintf_open(p);
+ else
+ tpr = NULL;
+ tprintf(tpr, "nfs server %s: %s\n", server, msg);
+ tprintf_close(tpr);
+}
diff --git a/sys/nfsserver/nfs_srvsubs.c b/sys/nfsserver/nfs_srvsubs.c
new file mode 100644
index 0000000..5778f7d
--- /dev/null
+++ b/sys/nfsserver/nfs_srvsubs.c
@@ -0,0 +1,1130 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_subs.c 8.3 (Berkeley) 1/4/94
+ */
+
+/*
+ * These functions support the macros and help fiddle mbuf chains for
+ * the nfs op functions. They do things like create the rpc header and
+ * copy data between mbuf chains and uio lists.
+ */
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+#include <sys/namei.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nfs.h>
+#include <nfs/xdr_subs.h>
+#include <nfs/nfsm_subs.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nqnfs.h>
+#include <nfs/nfsrtt.h>
+
+#include <miscfs/specfs/specdev.h>
+
+#include <netinet/in.h>
+#ifdef ISO
+#include <netiso/iso.h>
+#endif
+
+#define TRUE 1
+#define FALSE 0
+
+/*
+ * Data items converted to xdr at startup, since they are constant
+ * This is kinda hokey, but may save a little time doing byte swaps
+ */
+u_long nfs_procids[NFS_NPROCS];
+u_long nfs_xdrneg1;
+u_long rpc_call, rpc_vers, rpc_reply, rpc_msgdenied, rpc_autherr,
+ rpc_mismatch, rpc_auth_unix, rpc_msgaccepted, rpc_rejectedcred,
+ rpc_auth_kerb;
+u_long nfs_vers, nfs_prog, nfs_true, nfs_false;
+
+/* And other global data */
+static u_long nfs_xid = 0;
+enum vtype ntov_type[7] = { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VNON };
+extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
+extern struct nfsreq nfsreqh;
+extern int nqnfs_piggy[NFS_NPROCS];
+extern struct nfsrtt nfsrtt;
+extern time_t nqnfsstarttime;
+extern u_long nqnfs_prog, nqnfs_vers;
+extern int nqsrv_clockskew;
+extern int nqsrv_writeslack;
+extern int nqsrv_maxlease;
+
+/*
+ * Create the header for an rpc request packet
+ * The hsiz is the size of the rest of the nfs request header.
+ * (just used to decide if a cluster is a good idea)
+ */
+struct mbuf *
+nfsm_reqh(vp, procid, hsiz, bposp)
+ struct vnode *vp;
+ u_long procid;
+ int hsiz;
+ caddr_t *bposp;
+{
+ register struct mbuf *mb;
+ register u_long *tl;
+ register caddr_t bpos;
+ struct mbuf *mb2;
+ struct nfsmount *nmp;
+ int nqflag;
+
+ MGET(mb, M_WAIT, MT_DATA);
+ if (hsiz >= MINCLSIZE)
+ MCLGET(mb, M_WAIT);
+ mb->m_len = 0;
+ bpos = mtod(mb, caddr_t);
+
+ /*
+ * For NQNFS, add lease request.
+ */
+ if (vp) {
+ nmp = VFSTONFS(vp->v_mount);
+ if (nmp->nm_flag & NFSMNT_NQNFS) {
+ nqflag = NQNFS_NEEDLEASE(vp, procid);
+ if (nqflag) {
+ nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(nqflag);
+ *tl = txdr_unsigned(nmp->nm_leaseterm);
+ } else {
+ nfsm_build(tl, u_long *, NFSX_UNSIGNED);
+ *tl = 0;
+ }
+ }
+ }
+ /* Finally, return values */
+ *bposp = bpos;
+ return (mb);
+}
+
+/*
+ * Build the RPC header and fill in the authorization info.
+ * The authorization string argument is only used when the credentials
+ * come from outside of the kernel.
+ * Returns the head of the mbuf list.
+ */
+struct mbuf *
+nfsm_rpchead(cr, nqnfs, procid, auth_type, auth_len, auth_str, mrest,
+ mrest_len, mbp, xidp)
+ register struct ucred *cr;
+ int nqnfs;
+ int procid;
+ int auth_type;
+ int auth_len;
+ char *auth_str;
+ struct mbuf *mrest;
+ int mrest_len;
+ struct mbuf **mbp;
+ u_long *xidp;
+{
+ register struct mbuf *mb;
+ register u_long *tl;
+ register caddr_t bpos;
+ register int i;
+ struct mbuf *mreq, *mb2;
+ int siz, grpsiz, authsiz;
+
+ authsiz = nfsm_rndup(auth_len);
+ if (auth_type == RPCAUTH_NQNFS)
+ authsiz += 2 * NFSX_UNSIGNED;
+ MGETHDR(mb, M_WAIT, MT_DATA);
+ if ((authsiz + 10*NFSX_UNSIGNED) >= MINCLSIZE) {
+ MCLGET(mb, M_WAIT);
+ } else if ((authsiz + 10*NFSX_UNSIGNED) < MHLEN) {
+ MH_ALIGN(mb, authsiz + 10*NFSX_UNSIGNED);
+ } else {
+ MH_ALIGN(mb, 8*NFSX_UNSIGNED);
+ }
+ mb->m_len = 0;
+ mreq = mb;
+ bpos = mtod(mb, caddr_t);
+
+ /*
+ * First the RPC header.
+ */
+ nfsm_build(tl, u_long *, 8*NFSX_UNSIGNED);
+ if (++nfs_xid == 0)
+ nfs_xid++;
+ *tl++ = *xidp = txdr_unsigned(nfs_xid);
+ *tl++ = rpc_call;
+ *tl++ = rpc_vers;
+ if (nqnfs) {
+ *tl++ = txdr_unsigned(NQNFS_PROG);
+ *tl++ = txdr_unsigned(NQNFS_VER1);
+ } else {
+ *tl++ = txdr_unsigned(NFS_PROG);
+ *tl++ = txdr_unsigned(NFS_VER2);
+ }
+ *tl++ = txdr_unsigned(procid);
+
+ /*
+ * And then the authorization cred.
+ */
+ *tl++ = txdr_unsigned(auth_type);
+ *tl = txdr_unsigned(authsiz);
+ switch (auth_type) {
+ case RPCAUTH_UNIX:
+ nfsm_build(tl, u_long *, auth_len);
+ *tl++ = 0; /* stamp ?? */
+ *tl++ = 0; /* NULL hostname */
+ *tl++ = txdr_unsigned(cr->cr_uid);
+ *tl++ = txdr_unsigned(cr->cr_groups[0]);
+ grpsiz = (auth_len >> 2) - 5;
+ *tl++ = txdr_unsigned(grpsiz);
+ for (i = 1; i <= grpsiz; i++)
+ *tl++ = txdr_unsigned(cr->cr_groups[i]);
+ break;
+ case RPCAUTH_NQNFS:
+ nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(cr->cr_uid);
+ *tl = txdr_unsigned(auth_len);
+ siz = auth_len;
+ while (siz > 0) {
+ if (M_TRAILINGSPACE(mb) == 0) {
+ MGET(mb2, M_WAIT, MT_DATA);
+ if (siz >= MINCLSIZE)
+ MCLGET(mb2, M_WAIT);
+ mb->m_next = mb2;
+ mb = mb2;
+ mb->m_len = 0;
+ bpos = mtod(mb, caddr_t);
+ }
+ i = min(siz, M_TRAILINGSPACE(mb));
+ bcopy(auth_str, bpos, i);
+ mb->m_len += i;
+ auth_str += i;
+ bpos += i;
+ siz -= i;
+ }
+ if ((siz = (nfsm_rndup(auth_len) - auth_len)) > 0) {
+ for (i = 0; i < siz; i++)
+ *bpos++ = '\0';
+ mb->m_len += siz;
+ }
+ break;
+ };
+ nfsm_build(tl, u_long *, 2*NFSX_UNSIGNED);
+ *tl++ = txdr_unsigned(RPCAUTH_NULL);
+ *tl = 0;
+ mb->m_next = mrest;
+ mreq->m_pkthdr.len = authsiz + 10*NFSX_UNSIGNED + mrest_len;
+ mreq->m_pkthdr.rcvif = (struct ifnet *)0;
+ *mbp = mb;
+ return (mreq);
+}
+
+/*
+ * copies mbuf chain to the uio scatter/gather list
+ */
+nfsm_mbuftouio(mrep, uiop, siz, dpos)
+ struct mbuf **mrep;
+ register struct uio *uiop;
+ int siz;
+ caddr_t *dpos;
+{
+ register char *mbufcp, *uiocp;
+ register int xfer, left, len;
+ register struct mbuf *mp;
+ long uiosiz, rem;
+ int error = 0;
+
+ mp = *mrep;
+ mbufcp = *dpos;
+ len = mtod(mp, caddr_t)+mp->m_len-mbufcp;
+ rem = nfsm_rndup(siz)-siz;
+ while (siz > 0) {
+ if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL)
+ return (EFBIG);
+ left = uiop->uio_iov->iov_len;
+ uiocp = uiop->uio_iov->iov_base;
+ if (left > siz)
+ left = siz;
+ uiosiz = left;
+ while (left > 0) {
+ while (len == 0) {
+ mp = mp->m_next;
+ if (mp == NULL)
+ return (EBADRPC);
+ mbufcp = mtod(mp, caddr_t);
+ len = mp->m_len;
+ }
+ xfer = (left > len) ? len : left;
+#ifdef notdef
+ /* Not Yet.. */
+ if (uiop->uio_iov->iov_op != NULL)
+ (*(uiop->uio_iov->iov_op))
+ (mbufcp, uiocp, xfer);
+ else
+#endif
+ if (uiop->uio_segflg == UIO_SYSSPACE)
+ bcopy(mbufcp, uiocp, xfer);
+ else
+ copyout(mbufcp, uiocp, xfer);
+ left -= xfer;
+ len -= xfer;
+ mbufcp += xfer;
+ uiocp += xfer;
+ uiop->uio_offset += xfer;
+ uiop->uio_resid -= xfer;
+ }
+ if (uiop->uio_iov->iov_len <= siz) {
+ uiop->uio_iovcnt--;
+ uiop->uio_iov++;
+ } else {
+ uiop->uio_iov->iov_base += uiosiz;
+ uiop->uio_iov->iov_len -= uiosiz;
+ }
+ siz -= uiosiz;
+ }
+ *dpos = mbufcp;
+ *mrep = mp;
+ if (rem > 0) {
+ if (len < rem)
+ error = nfs_adv(mrep, dpos, rem, len);
+ else
+ *dpos += rem;
+ }
+ return (error);
+}
+
+/*
+ * copies a uio scatter/gather list to an mbuf chain...
+ */
+nfsm_uiotombuf(uiop, mq, siz, bpos)
+ register struct uio *uiop;
+ struct mbuf **mq;
+ int siz;
+ caddr_t *bpos;
+{
+ register char *uiocp;
+ register struct mbuf *mp, *mp2;
+ register int xfer, left, mlen;
+ int uiosiz, clflg, rem;
+ char *cp;
+
+ if (siz > MLEN) /* or should it >= MCLBYTES ?? */
+ clflg = 1;
+ else
+ clflg = 0;
+ rem = nfsm_rndup(siz)-siz;
+ mp = mp2 = *mq;
+ while (siz > 0) {
+ if (uiop->uio_iovcnt <= 0 || uiop->uio_iov == NULL)
+ return (EINVAL);
+ left = uiop->uio_iov->iov_len;
+ uiocp = uiop->uio_iov->iov_base;
+ if (left > siz)
+ left = siz;
+ uiosiz = left;
+ while (left > 0) {
+ mlen = M_TRAILINGSPACE(mp);
+ if (mlen == 0) {
+ MGET(mp, M_WAIT, MT_DATA);
+ if (clflg)
+ MCLGET(mp, M_WAIT);
+ mp->m_len = 0;
+ mp2->m_next = mp;
+ mp2 = mp;
+ mlen = M_TRAILINGSPACE(mp);
+ }
+ xfer = (left > mlen) ? mlen : left;
+#ifdef notdef
+ /* Not Yet.. */
+ if (uiop->uio_iov->iov_op != NULL)
+ (*(uiop->uio_iov->iov_op))
+ (uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+ else
+#endif
+ if (uiop->uio_segflg == UIO_SYSSPACE)
+ bcopy(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+ else
+ copyin(uiocp, mtod(mp, caddr_t)+mp->m_len, xfer);
+ mp->m_len += xfer;
+ left -= xfer;
+ uiocp += xfer;
+ uiop->uio_offset += xfer;
+ uiop->uio_resid -= xfer;
+ }
+ if (uiop->uio_iov->iov_len <= siz) {
+ uiop->uio_iovcnt--;
+ uiop->uio_iov++;
+ } else {
+ uiop->uio_iov->iov_base += uiosiz;
+ uiop->uio_iov->iov_len -= uiosiz;
+ }
+ siz -= uiosiz;
+ }
+ if (rem > 0) {
+ if (rem > M_TRAILINGSPACE(mp)) {
+ MGET(mp, M_WAIT, MT_DATA);
+ mp->m_len = 0;
+ mp2->m_next = mp;
+ }
+ cp = mtod(mp, caddr_t)+mp->m_len;
+ for (left = 0; left < rem; left++)
+ *cp++ = '\0';
+ mp->m_len += rem;
+ *bpos = cp;
+ } else
+ *bpos = mtod(mp, caddr_t)+mp->m_len;
+ *mq = mp;
+ return (0);
+}
+
+/*
+ * Help break down an mbuf chain by setting the first siz bytes contiguous
+ * pointed to by returned val.
+ * This is used by the macros nfsm_dissect and nfsm_dissecton for tough
+ * cases. (The macros use the vars. dpos and dpos2)
+ */
+nfsm_disct(mdp, dposp, siz, left, cp2)
+ struct mbuf **mdp;
+ caddr_t *dposp;
+ int siz;
+ int left;
+ caddr_t *cp2;
+{
+ register struct mbuf *mp, *mp2;
+ register int siz2, xfer;
+ register caddr_t p;
+
+ mp = *mdp;
+ while (left == 0) {
+ *mdp = mp = mp->m_next;
+ if (mp == NULL)
+ return (EBADRPC);
+ left = mp->m_len;
+ *dposp = mtod(mp, caddr_t);
+ }
+ if (left >= siz) {
+ *cp2 = *dposp;
+ *dposp += siz;
+ } else if (mp->m_next == NULL) {
+ return (EBADRPC);
+ } else if (siz > MHLEN) {
+ panic("nfs S too big");
+ } else {
+ MGET(mp2, M_WAIT, MT_DATA);
+ mp2->m_next = mp->m_next;
+ mp->m_next = mp2;
+ mp->m_len -= left;
+ mp = mp2;
+ *cp2 = p = mtod(mp, caddr_t);
+ bcopy(*dposp, p, left); /* Copy what was left */
+ siz2 = siz-left;
+ p += left;
+ mp2 = mp->m_next;
+ /* Loop around copying up the siz2 bytes */
+ while (siz2 > 0) {
+ if (mp2 == NULL)
+ return (EBADRPC);
+ xfer = (siz2 > mp2->m_len) ? mp2->m_len : siz2;
+ if (xfer > 0) {
+ bcopy(mtod(mp2, caddr_t), p, xfer);
+ NFSMADV(mp2, xfer);
+ mp2->m_len -= xfer;
+ p += xfer;
+ siz2 -= xfer;
+ }
+ if (siz2 > 0)
+ mp2 = mp2->m_next;
+ }
+ mp->m_len = siz;
+ *mdp = mp2;
+ *dposp = mtod(mp2, caddr_t);
+ }
+ return (0);
+}
+
+/*
+ * Advance the position in the mbuf chain.
+ */
+nfs_adv(mdp, dposp, offs, left)
+ struct mbuf **mdp;
+ caddr_t *dposp;
+ int offs;
+ int left;
+{
+ register struct mbuf *m;
+ register int s;
+
+ m = *mdp;
+ s = left;
+ while (s < offs) {
+ offs -= s;
+ m = m->m_next;
+ if (m == NULL)
+ return (EBADRPC);
+ s = m->m_len;
+ }
+ *mdp = m;
+ *dposp = mtod(m, caddr_t)+offs;
+ return (0);
+}
+
+/*
+ * Copy a string into mbufs for the hard cases...
+ */
+nfsm_strtmbuf(mb, bpos, cp, siz)
+ struct mbuf **mb;
+ char **bpos;
+ char *cp;
+ long siz;
+{
+ register struct mbuf *m1, *m2;
+ long left, xfer, len, tlen;
+ u_long *tl;
+ int putsize;
+
+ putsize = 1;
+ m2 = *mb;
+ left = M_TRAILINGSPACE(m2);
+ if (left > 0) {
+ tl = ((u_long *)(*bpos));
+ *tl++ = txdr_unsigned(siz);
+ putsize = 0;
+ left -= NFSX_UNSIGNED;
+ m2->m_len += NFSX_UNSIGNED;
+ if (left > 0) {
+ bcopy(cp, (caddr_t) tl, left);
+ siz -= left;
+ cp += left;
+ m2->m_len += left;
+ left = 0;
+ }
+ }
+ /* Loop around adding mbufs */
+ while (siz > 0) {
+ MGET(m1, M_WAIT, MT_DATA);
+ if (siz > MLEN)
+ MCLGET(m1, M_WAIT);
+ m1->m_len = NFSMSIZ(m1);
+ m2->m_next = m1;
+ m2 = m1;
+ tl = mtod(m1, u_long *);
+ tlen = 0;
+ if (putsize) {
+ *tl++ = txdr_unsigned(siz);
+ m1->m_len -= NFSX_UNSIGNED;
+ tlen = NFSX_UNSIGNED;
+ putsize = 0;
+ }
+ if (siz < m1->m_len) {
+ len = nfsm_rndup(siz);
+ xfer = siz;
+ if (xfer < len)
+ *(tl+(xfer>>2)) = 0;
+ } else {
+ xfer = len = m1->m_len;
+ }
+ bcopy(cp, (caddr_t) tl, xfer);
+ m1->m_len = len+tlen;
+ siz -= xfer;
+ cp += xfer;
+ }
+ *mb = m1;
+ *bpos = mtod(m1, caddr_t)+m1->m_len;
+ return (0);
+}
+
+/*
+ * Called once to initialize data structures...
+ */
+nfs_init()
+{
+ register int i;
+
+ nfsrtt.pos = 0;
+ rpc_vers = txdr_unsigned(RPC_VER2);
+ rpc_call = txdr_unsigned(RPC_CALL);
+ rpc_reply = txdr_unsigned(RPC_REPLY);
+ rpc_msgdenied = txdr_unsigned(RPC_MSGDENIED);
+ rpc_msgaccepted = txdr_unsigned(RPC_MSGACCEPTED);
+ rpc_mismatch = txdr_unsigned(RPC_MISMATCH);
+ rpc_autherr = txdr_unsigned(RPC_AUTHERR);
+ rpc_rejectedcred = txdr_unsigned(AUTH_REJECTCRED);
+ rpc_auth_unix = txdr_unsigned(RPCAUTH_UNIX);
+ rpc_auth_kerb = txdr_unsigned(RPCAUTH_NQNFS);
+ nfs_vers = txdr_unsigned(NFS_VER2);
+ nfs_prog = txdr_unsigned(NFS_PROG);
+ nfs_true = txdr_unsigned(TRUE);
+ nfs_false = txdr_unsigned(FALSE);
+ /* Loop thru nfs procids */
+ for (i = 0; i < NFS_NPROCS; i++)
+ nfs_procids[i] = txdr_unsigned(i);
+ /* Ensure async daemons disabled */
+ for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
+ nfs_iodwant[i] = (struct proc *)0;
+ TAILQ_INIT(&nfs_bufq);
+ nfs_xdrneg1 = txdr_unsigned(-1);
+ nfs_nhinit(); /* Init the nfsnode table */
+ nfsrv_init(0); /* Init server data structures */
+ nfsrv_initcache(); /* Init the server request cache */
+
+ /*
+ * Initialize the nqnfs server stuff.
+ */
+ if (nqnfsstarttime == 0) {
+ nqnfsstarttime = boottime.tv_sec + nqsrv_maxlease
+ + nqsrv_clockskew + nqsrv_writeslack;
+ NQLOADNOVRAM(nqnfsstarttime);
+ nqnfs_prog = txdr_unsigned(NQNFS_PROG);
+ nqnfs_vers = txdr_unsigned(NQNFS_VER1);
+ nqthead.th_head[0] = &nqthead;
+ nqthead.th_head[1] = &nqthead;
+ nqfhead = hashinit(NQLCHSZ, M_NQLEASE, &nqfheadhash);
+ }
+
+ /*
+ * Initialize reply list and start timer
+ */
+ nfsreqh.r_prev = nfsreqh.r_next = &nfsreqh;
+ nfs_timer();
+}
+
+/*
+ * Attribute cache routines.
+ * nfs_loadattrcache() - loads or updates the cache contents from attributes
+ * that are on the mbuf list
+ * nfs_getattrcache() - returns valid attributes if found in cache, returns
+ * error otherwise
+ */
+
+/*
+ * Load the attribute cache (that lives in the nfsnode entry) with
+ * the values on the mbuf list and
+ * Iff vap not NULL
+ * copy the attributes to *vaper
+ */
+nfs_loadattrcache(vpp, mdp, dposp, vaper)
+ struct vnode **vpp;
+ struct mbuf **mdp;
+ caddr_t *dposp;
+ struct vattr *vaper;
+{
+ register struct vnode *vp = *vpp;
+ register struct vattr *vap;
+ register struct nfsv2_fattr *fp;
+ extern int (**spec_nfsv2nodeop_p)();
+ register struct nfsnode *np, *nq, **nhpp;
+ register long t1;
+ caddr_t dpos, cp2;
+ int error = 0, isnq;
+ struct mbuf *md;
+ enum vtype vtyp;
+ u_short vmode;
+ long rdev;
+ struct timespec mtime;
+ struct vnode *nvp;
+
+ md = *mdp;
+ dpos = *dposp;
+ t1 = (mtod(md, caddr_t) + md->m_len) - dpos;
+ isnq = (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQNFS);
+ if (error = nfsm_disct(&md, &dpos, NFSX_FATTR(isnq), t1, &cp2))
+ return (error);
+ fp = (struct nfsv2_fattr *)cp2;
+ vtyp = nfstov_type(fp->fa_type);
+ vmode = fxdr_unsigned(u_short, fp->fa_mode);
+ if (vtyp == VNON || vtyp == VREG)
+ vtyp = IFTOVT(vmode);
+ if (isnq) {
+ rdev = fxdr_unsigned(long, fp->fa_nqrdev);
+ fxdr_nqtime(&fp->fa_nqmtime, &mtime);
+ } else {
+ rdev = fxdr_unsigned(long, fp->fa_nfsrdev);
+ fxdr_nfstime(&fp->fa_nfsmtime, &mtime);
+ }
+ /*
+ * If v_type == VNON it is a new node, so fill in the v_type,
+ * n_mtime fields. Check to see if it represents a special
+ * device, and if so, check for a possible alias. Once the
+ * correct vnode has been obtained, fill in the rest of the
+ * information.
+ */
+ np = VTONFS(vp);
+ if (vp->v_type == VNON) {
+ if (vtyp == VCHR && rdev == 0xffffffff)
+ vp->v_type = vtyp = VFIFO;
+ else
+ vp->v_type = vtyp;
+ if (vp->v_type == VFIFO) {
+#ifdef FIFO
+ extern int (**fifo_nfsv2nodeop_p)();
+ vp->v_op = fifo_nfsv2nodeop_p;
+#else
+ return (EOPNOTSUPP);
+#endif /* FIFO */
+ }
+ if (vp->v_type == VCHR || vp->v_type == VBLK) {
+ vp->v_op = spec_nfsv2nodeop_p;
+ if (nvp = checkalias(vp, (dev_t)rdev, vp->v_mount)) {
+ /*
+ * Discard unneeded vnode, but save its nfsnode.
+ */
+ if (nq = np->n_forw)
+ nq->n_back = np->n_back;
+ *np->n_back = nq;
+ nvp->v_data = vp->v_data;
+ vp->v_data = NULL;
+ vp->v_op = spec_vnodeop_p;
+ vrele(vp);
+ vgone(vp);
+ /*
+ * Reinitialize aliased node.
+ */
+ np->n_vnode = nvp;
+ nhpp = (struct nfsnode **)nfs_hash(&np->n_fh);
+ if (nq = *nhpp)
+ nq->n_back = &np->n_forw;
+ np->n_forw = nq;
+ np->n_back = nhpp;
+ *nhpp = np;
+ *vpp = vp = nvp;
+ }
+ }
+ np->n_mtime = mtime.ts_sec;
+ }
+ vap = &np->n_vattr;
+ vap->va_type = vtyp;
+ vap->va_mode = (vmode & 07777);
+ vap->va_nlink = fxdr_unsigned(u_short, fp->fa_nlink);
+ vap->va_uid = fxdr_unsigned(uid_t, fp->fa_uid);
+ vap->va_gid = fxdr_unsigned(gid_t, fp->fa_gid);
+ vap->va_rdev = (dev_t)rdev;
+ vap->va_mtime = mtime;
+ vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0];
+ if (isnq) {
+ fxdr_hyper(&fp->fa_nqsize, &vap->va_size);
+ vap->va_blocksize = fxdr_unsigned(long, fp->fa_nqblocksize);
+ fxdr_hyper(&fp->fa_nqbytes, &vap->va_bytes);
+ vap->va_fileid = fxdr_unsigned(long, fp->fa_nqfileid);
+ fxdr_nqtime(&fp->fa_nqatime, &vap->va_atime);
+ vap->va_flags = fxdr_unsigned(u_long, fp->fa_nqflags);
+ fxdr_nqtime(&fp->fa_nqctime, &vap->va_ctime);
+ vap->va_gen = fxdr_unsigned(u_long, fp->fa_nqgen);
+ fxdr_hyper(&fp->fa_nqfilerev, &vap->va_filerev);
+ } else {
+ vap->va_size = fxdr_unsigned(u_long, fp->fa_nfssize);
+ vap->va_blocksize = fxdr_unsigned(long, fp->fa_nfsblocksize);
+ vap->va_bytes = fxdr_unsigned(long, fp->fa_nfsblocks) * NFS_FABLKSIZE;
+ vap->va_fileid = fxdr_unsigned(long, fp->fa_nfsfileid);
+ fxdr_nfstime(&fp->fa_nfsatime, &vap->va_atime);
+ vap->va_flags = 0;
+ vap->va_ctime.ts_sec = fxdr_unsigned(long, fp->fa_nfsctime.nfs_sec);
+ vap->va_ctime.ts_nsec = 0;
+ vap->va_gen = fxdr_unsigned(u_long, fp->fa_nfsctime.nfs_usec);
+ vap->va_filerev = 0;
+ }
+ if (vap->va_size != np->n_size) {
+ if (vap->va_type == VREG) {
+ if (np->n_flag & NMODIFIED) {
+ if (vap->va_size < np->n_size)
+ vap->va_size = np->n_size;
+ else
+ np->n_size = vap->va_size;
+ } else
+ np->n_size = vap->va_size;
+ vnode_pager_setsize(vp, (u_long)np->n_size);
+ } else
+ np->n_size = vap->va_size;
+ }
+ np->n_attrstamp = time.tv_sec;
+ *dposp = dpos;
+ *mdp = md;
+ if (vaper != NULL) {
+ bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(*vap));
+#ifdef notdef
+ if ((np->n_flag & NMODIFIED) && np->n_size > vap->va_size)
+ if (np->n_size > vap->va_size)
+ vaper->va_size = np->n_size;
+#endif
+ if (np->n_flag & NCHG) {
+ if (np->n_flag & NACC) {
+ vaper->va_atime.ts_sec = np->n_atim.tv_sec;
+ vaper->va_atime.ts_nsec =
+ np->n_atim.tv_usec * 1000;
+ }
+ if (np->n_flag & NUPD) {
+ vaper->va_mtime.ts_sec = np->n_mtim.tv_sec;
+ vaper->va_mtime.ts_nsec =
+ np->n_mtim.tv_usec * 1000;
+ }
+ }
+ }
+ return (0);
+}
+
+/*
+ * Check the time stamp
+ * If the cache is valid, copy contents to *vap and return 0
+ * otherwise return an error
+ */
+nfs_getattrcache(vp, vaper)
+ register struct vnode *vp;
+ struct vattr *vaper;
+{
+ register struct nfsnode *np = VTONFS(vp);
+ register struct vattr *vap;
+
+ if (VFSTONFS(vp->v_mount)->nm_flag & NFSMNT_NQLOOKLEASE) {
+ if (!NQNFS_CKCACHABLE(vp, NQL_READ) || np->n_attrstamp == 0) {
+ nfsstats.attrcache_misses++;
+ return (ENOENT);
+ }
+ } else if ((time.tv_sec - np->n_attrstamp) >= NFS_ATTRTIMEO(np)) {
+ nfsstats.attrcache_misses++;
+ return (ENOENT);
+ }
+ nfsstats.attrcache_hits++;
+ vap = &np->n_vattr;
+ if (vap->va_size != np->n_size) {
+ if (vap->va_type == VREG) {
+ if (np->n_flag & NMODIFIED) {
+ if (vap->va_size < np->n_size)
+ vap->va_size = np->n_size;
+ else
+ np->n_size = vap->va_size;
+ } else
+ np->n_size = vap->va_size;
+ vnode_pager_setsize(vp, (u_long)np->n_size);
+ } else
+ np->n_size = vap->va_size;
+ }
+ bcopy((caddr_t)vap, (caddr_t)vaper, sizeof(struct vattr));
+#ifdef notdef
+ if ((np->n_flag & NMODIFIED) == 0) {
+ np->n_size = vaper->va_size;
+ vnode_pager_setsize(vp, (u_long)np->n_size);
+ } else if (np->n_size > vaper->va_size)
+ if (np->n_size > vaper->va_size)
+ vaper->va_size = np->n_size;
+#endif
+ if (np->n_flag & NCHG) {
+ if (np->n_flag & NACC) {
+ vaper->va_atime.ts_sec = np->n_atim.tv_sec;
+ vaper->va_atime.ts_nsec = np->n_atim.tv_usec * 1000;
+ }
+ if (np->n_flag & NUPD) {
+ vaper->va_mtime.ts_sec = np->n_mtim.tv_sec;
+ vaper->va_mtime.ts_nsec = np->n_mtim.tv_usec * 1000;
+ }
+ }
+ return (0);
+}
+
+/*
+ * Set up nameidata for a lookup() call and do it
+ */
+nfs_namei(ndp, fhp, len, slp, nam, mdp, dposp, p)
+ register struct nameidata *ndp;
+ fhandle_t *fhp;
+ int len;
+ struct nfssvc_sock *slp;
+ struct mbuf *nam;
+ struct mbuf **mdp;
+ caddr_t *dposp;
+ struct proc *p;
+{
+ register int i, rem;
+ register struct mbuf *md;
+ register char *fromcp, *tocp;
+ struct vnode *dp;
+ int error, rdonly;
+ struct componentname *cnp = &ndp->ni_cnd;
+
+ MALLOC(cnp->cn_pnbuf, char *, len + 1, M_NAMEI, M_WAITOK);
+ /*
+ * Copy the name from the mbuf list to ndp->ni_pnbuf
+ * and set the various ndp fields appropriately.
+ */
+ fromcp = *dposp;
+ tocp = cnp->cn_pnbuf;
+ md = *mdp;
+ rem = mtod(md, caddr_t) + md->m_len - fromcp;
+ cnp->cn_hash = 0;
+ for (i = 0; i < len; i++) {
+ while (rem == 0) {
+ md = md->m_next;
+ if (md == NULL) {
+ error = EBADRPC;
+ goto out;
+ }
+ fromcp = mtod(md, caddr_t);
+ rem = md->m_len;
+ }
+ if (*fromcp == '\0' || *fromcp == '/') {
+ error = EINVAL;
+ goto out;
+ }
+ cnp->cn_hash += (unsigned char)*fromcp;
+ *tocp++ = *fromcp++;
+ rem--;
+ }
+ *tocp = '\0';
+ *mdp = md;
+ *dposp = fromcp;
+ len = nfsm_rndup(len)-len;
+ if (len > 0) {
+ if (rem >= len)
+ *dposp += len;
+ else if (error = nfs_adv(mdp, dposp, len, rem))
+ goto out;
+ }
+ ndp->ni_pathlen = tocp - cnp->cn_pnbuf;
+ cnp->cn_nameptr = cnp->cn_pnbuf;
+ /*
+ * Extract and set starting directory.
+ */
+ if (error = nfsrv_fhtovp(fhp, FALSE, &dp, ndp->ni_cnd.cn_cred, slp,
+ nam, &rdonly))
+ goto out;
+ if (dp->v_type != VDIR) {
+ vrele(dp);
+ error = ENOTDIR;
+ goto out;
+ }
+ ndp->ni_startdir = dp;
+ if (rdonly)
+ cnp->cn_flags |= (NOCROSSMOUNT | RDONLY);
+ else
+ cnp->cn_flags |= NOCROSSMOUNT;
+ /*
+ * And call lookup() to do the real work
+ */
+ cnp->cn_proc = p;
+ if (error = lookup(ndp))
+ goto out;
+ /*
+ * Check for encountering a symbolic link
+ */
+ if (cnp->cn_flags & ISSYMLINK) {
+ if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1)
+ vput(ndp->ni_dvp);
+ else
+ vrele(ndp->ni_dvp);
+ vput(ndp->ni_vp);
+ ndp->ni_vp = NULL;
+ error = EINVAL;
+ goto out;
+ }
+ /*
+ * Check for saved name request
+ */
+ if (cnp->cn_flags & (SAVENAME | SAVESTART)) {
+ cnp->cn_flags |= HASBUF;
+ return (0);
+ }
+out:
+ FREE(cnp->cn_pnbuf, M_NAMEI);
+ return (error);
+}
+
+/*
+ * A fiddled version of m_adj() that ensures null fill to a long
+ * boundary and only trims off the back end
+ */
+void
+nfsm_adj(mp, len, nul)
+ struct mbuf *mp;
+ register int len;
+ int nul;
+{
+ register struct mbuf *m;
+ register int count, i;
+ register char *cp;
+
+ /*
+ * Trim from tail. Scan the mbuf chain,
+ * calculating its length and finding the last mbuf.
+ * If the adjustment only affects this mbuf, then just
+ * adjust and return. Otherwise, rescan and truncate
+ * after the remaining size.
+ */
+ count = 0;
+ m = mp;
+ for (;;) {
+ count += m->m_len;
+ if (m->m_next == (struct mbuf *)0)
+ break;
+ m = m->m_next;
+ }
+ if (m->m_len > len) {
+ m->m_len -= len;
+ if (nul > 0) {
+ cp = mtod(m, caddr_t)+m->m_len-nul;
+ for (i = 0; i < nul; i++)
+ *cp++ = '\0';
+ }
+ return;
+ }
+ count -= len;
+ if (count < 0)
+ count = 0;
+ /*
+ * Correct length for chain is "count".
+ * Find the mbuf with last data, adjust its length,
+ * and toss data from remaining mbufs on chain.
+ */
+ for (m = mp; m; m = m->m_next) {
+ if (m->m_len >= count) {
+ m->m_len = count;
+ if (nul > 0) {
+ cp = mtod(m, caddr_t)+m->m_len-nul;
+ for (i = 0; i < nul; i++)
+ *cp++ = '\0';
+ }
+ break;
+ }
+ count -= m->m_len;
+ }
+ while (m = m->m_next)
+ m->m_len = 0;
+}
+
+/*
+ * nfsrv_fhtovp() - convert a fh to a vnode ptr (optionally locked)
+ * - look up fsid in mount list (if not found ret error)
+ * - get vp and export rights by calling VFS_FHTOVP()
+ * - if cred->cr_uid == 0 or MNT_EXPORTANON set it to credanon
+ * - if not lockflag unlock it with VOP_UNLOCK()
+ */
+nfsrv_fhtovp(fhp, lockflag, vpp, cred, slp, nam, rdonlyp)
+ fhandle_t *fhp;
+ int lockflag;
+ struct vnode **vpp;
+ struct ucred *cred;
+ struct nfssvc_sock *slp;
+ struct mbuf *nam;
+ int *rdonlyp;
+{
+ register struct mount *mp;
+ register struct nfsuid *uidp;
+ register int i;
+ struct ucred *credanon;
+ int error, exflags;
+
+ *vpp = (struct vnode *)0;
+ if ((mp = getvfs(&fhp->fh_fsid)) == NULL)
+ return (ESTALE);
+ if (error = VFS_FHTOVP(mp, &fhp->fh_fid, nam, vpp, &exflags, &credanon))
+ return (error);
+ /*
+ * Check/setup credentials.
+ */
+ if (exflags & MNT_EXKERB) {
+ uidp = slp->ns_uidh[NUIDHASH(cred->cr_uid)];
+ while (uidp) {
+ if (uidp->nu_uid == cred->cr_uid)
+ break;
+ uidp = uidp->nu_hnext;
+ }
+ if (uidp) {
+ cred->cr_uid = uidp->nu_cr.cr_uid;
+ for (i = 0; i < uidp->nu_cr.cr_ngroups; i++)
+ cred->cr_groups[i] = uidp->nu_cr.cr_groups[i];
+ } else {
+ vput(*vpp);
+ return (NQNFS_AUTHERR);
+ }
+ } else if (cred->cr_uid == 0 || (exflags & MNT_EXPORTANON)) {
+ cred->cr_uid = credanon->cr_uid;
+ for (i = 0; i < credanon->cr_ngroups && i < NGROUPS; i++)
+ cred->cr_groups[i] = credanon->cr_groups[i];
+ }
+ if (exflags & MNT_EXRDONLY)
+ *rdonlyp = 1;
+ else
+ *rdonlyp = 0;
+ if (!lockflag)
+ VOP_UNLOCK(*vpp);
+ return (0);
+}
+
+/*
+ * This function compares two net addresses by family and returns TRUE
+ * if they are the same host.
+ * If there is any doubt, return FALSE.
+ * The AF_INET family is handled as a special case so that address mbufs
+ * don't need to be saved to store "struct in_addr", which is only 4 bytes.
+ */
+netaddr_match(family, haddr, nam)
+ int family;
+ union nethostaddr *haddr;
+ struct mbuf *nam;
+{
+ register struct sockaddr_in *inetaddr;
+
+ switch (family) {
+ case AF_INET:
+ inetaddr = mtod(nam, struct sockaddr_in *);
+ if (inetaddr->sin_family == AF_INET &&
+ inetaddr->sin_addr.s_addr == haddr->had_inetaddr)
+ return (1);
+ break;
+#ifdef ISO
+ case AF_ISO:
+ {
+ register struct sockaddr_iso *isoaddr1, *isoaddr2;
+
+ isoaddr1 = mtod(nam, struct sockaddr_iso *);
+ isoaddr2 = mtod(haddr->had_nam, struct sockaddr_iso *);
+ if (isoaddr1->siso_family == AF_ISO &&
+ isoaddr1->siso_nlen > 0 &&
+ isoaddr1->siso_nlen == isoaddr2->siso_nlen &&
+ SAME_ISOADDR(isoaddr1, isoaddr2))
+ return (1);
+ break;
+ }
+#endif /* ISO */
+ default:
+ break;
+ };
+ return (0);
+}
diff --git a/sys/nfsserver/nfs_syscalls.c b/sys/nfsserver/nfs_syscalls.c
new file mode 100644
index 0000000..5d86b42
--- /dev/null
+++ b/sys/nfsserver/nfs_syscalls.c
@@ -0,0 +1,874 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs_syscalls.c 8.3 (Berkeley) 1/4/94
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/file.h>
+#include <sys/stat.h>
+#include <sys/vnode.h>
+#include <sys/mount.h>
+#include <sys/proc.h>
+#include <sys/uio.h>
+#include <sys/malloc.h>
+#include <sys/buf.h>
+#include <sys/mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <sys/domain.h>
+#include <sys/protosw.h>
+#include <sys/namei.h>
+#include <sys/syslog.h>
+
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#ifdef ISO
+#include <netiso/iso.h>
+#endif
+#include <nfs/rpcv2.h>
+#include <nfs/nfsv2.h>
+#include <nfs/nfs.h>
+#include <nfs/nfsrvcache.h>
+#include <nfs/nfsmount.h>
+#include <nfs/nfsnode.h>
+#include <nfs/nqnfs.h>
+#include <nfs/nfsrtt.h>
+
+/* Global defs. */
+extern u_long nfs_prog, nfs_vers;
+extern int (*nfsrv_procs[NFS_NPROCS])();
+extern struct proc *nfs_iodwant[NFS_MAXASYNCDAEMON];
+extern int nfs_numasync;
+extern time_t nqnfsstarttime;
+extern struct nfsrv_req nsrvq_head;
+extern struct nfsd nfsd_head;
+extern int nqsrv_writeslack;
+extern int nfsrtton;
+struct nfssvc_sock *nfs_udpsock, *nfs_cltpsock;
+int nuidhash_max = NFS_MAXUIDHASH;
+static int nfs_numnfsd = 0;
+int nfsd_waiting = 0;
+static int notstarted = 1;
+static int modify_flag = 0;
+static struct nfsdrt nfsdrt;
+void nfsrv_cleancache(), nfsrv_rcv(), nfsrv_wakenfsd(), nfs_sndunlock();
+static void nfsd_rt();
+void nfsrv_slpderef(), nfsrv_init();
+
+#define TRUE 1
+#define FALSE 0
+
+static int nfs_asyncdaemon[NFS_MAXASYNCDAEMON];
+/*
+ * NFS server system calls
+ * getfh() lives here too, but maybe should move to kern/vfs_syscalls.c
+ */
+
+/*
+ * Get file handle system call
+ */
+struct getfh_args {
+ char *fname;
+ fhandle_t *fhp;
+};
+getfh(p, uap, retval)
+ struct proc *p;
+ register struct getfh_args *uap;
+ int *retval;
+{
+ register struct vnode *vp;
+ fhandle_t fh;
+ int error;
+ struct nameidata nd;
+
+ /*
+ * Must be super user
+ */
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, p);
+ if (error = namei(&nd))
+ return (error);
+ vp = nd.ni_vp;
+ bzero((caddr_t)&fh, sizeof(fh));
+ fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid;
+ error = VFS_VPTOFH(vp, &fh.fh_fid);
+ vput(vp);
+ if (error)
+ return (error);
+ error = copyout((caddr_t)&fh, (caddr_t)uap->fhp, sizeof (fh));
+ return (error);
+}
+
+static struct nfssvc_sock nfssvc_sockhead;
+
+/*
+ * Nfs server psuedo system call for the nfsd's
+ * Based on the flag value it either:
+ * - adds a socket to the selection list
+ * - remains in the kernel as an nfsd
+ * - remains in the kernel as an nfsiod
+ */
+struct nfssvc_args {
+ int flag;
+ caddr_t argp;
+};
+nfssvc(p, uap, retval)
+ struct proc *p;
+ register struct nfssvc_args *uap;
+ int *retval;
+{
+ struct nameidata nd;
+ struct file *fp;
+ struct mbuf *nam;
+ struct nfsd_args nfsdarg;
+ struct nfsd_srvargs nfsd_srvargs, *nsd = &nfsd_srvargs;
+ struct nfsd_cargs ncd;
+ struct nfsd *nfsd;
+ struct nfssvc_sock *slp;
+ struct nfsuid *nuidp, **nuh;
+ struct nfsmount *nmp;
+ int error;
+
+ /*
+ * Must be super user
+ */
+ if (error = suser(p->p_ucred, &p->p_acflag))
+ return (error);
+ while (nfssvc_sockhead.ns_flag & SLP_INIT) {
+ nfssvc_sockhead.ns_flag |= SLP_WANTINIT;
+ (void) tsleep((caddr_t)&nfssvc_sockhead, PSOCK, "nfsd init", 0);
+ }
+ if (uap->flag & NFSSVC_BIOD)
+ error = nfssvc_iod(p);
+ else if (uap->flag & NFSSVC_MNTD) {
+ if (error = copyin(uap->argp, (caddr_t)&ncd, sizeof (ncd)))
+ return (error);
+ NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE,
+ ncd.ncd_dirp, p);
+ if (error = namei(&nd))
+ return (error);
+ if ((nd.ni_vp->v_flag & VROOT) == 0)
+ error = EINVAL;
+ nmp = VFSTONFS(nd.ni_vp->v_mount);
+ vput(nd.ni_vp);
+ if (error)
+ return (error);
+ if ((nmp->nm_flag & NFSMNT_MNTD) &&
+ (uap->flag & NFSSVC_GOTAUTH) == 0)
+ return (0);
+ nmp->nm_flag |= NFSMNT_MNTD;
+ error = nqnfs_clientd(nmp, p->p_ucred, &ncd, uap->flag,
+ uap->argp, p);
+ } else if (uap->flag & NFSSVC_ADDSOCK) {
+ if (error = copyin(uap->argp, (caddr_t)&nfsdarg,
+ sizeof(nfsdarg)))
+ return (error);
+ if (error = getsock(p->p_fd, nfsdarg.sock, &fp))
+ return (error);
+ /*
+ * Get the client address for connected sockets.
+ */
+ if (nfsdarg.name == NULL || nfsdarg.namelen == 0)
+ nam = (struct mbuf *)0;
+ else if (error = sockargs(&nam, nfsdarg.name, nfsdarg.namelen,
+ MT_SONAME))
+ return (error);
+ error = nfssvc_addsock(fp, nam);
+ } else {
+ if (error = copyin(uap->argp, (caddr_t)nsd, sizeof (*nsd)))
+ return (error);
+ if ((uap->flag & NFSSVC_AUTHIN) && (nfsd = nsd->nsd_nfsd) &&
+ (nfsd->nd_slp->ns_flag & SLP_VALID)) {
+ slp = nfsd->nd_slp;
+
+ /*
+ * First check to see if another nfsd has already
+ * added this credential.
+ */
+ nuidp = slp->ns_uidh[NUIDHASH(nsd->nsd_uid)];
+ while (nuidp) {
+ if (nuidp->nu_uid == nsd->nsd_uid)
+ break;
+ nuidp = nuidp->nu_hnext;
+ }
+ if (!nuidp) {
+ /*
+ * Nope, so we will.
+ */
+ if (slp->ns_numuids < nuidhash_max) {
+ slp->ns_numuids++;
+ nuidp = (struct nfsuid *)
+ malloc(sizeof (struct nfsuid), M_NFSUID,
+ M_WAITOK);
+ } else
+ nuidp = (struct nfsuid *)0;
+ if ((slp->ns_flag & SLP_VALID) == 0) {
+ if (nuidp)
+ free((caddr_t)nuidp, M_NFSUID);
+ } else {
+ if (nuidp == (struct nfsuid *)0) {
+ nuidp = slp->ns_lruprev;
+ remque(nuidp);
+ if (nuidp->nu_hprev)
+ nuidp->nu_hprev->nu_hnext =
+ nuidp->nu_hnext;
+ if (nuidp->nu_hnext)
+ nuidp->nu_hnext->nu_hprev =
+ nuidp->nu_hprev;
+ }
+ nuidp->nu_cr = nsd->nsd_cr;
+ if (nuidp->nu_cr.cr_ngroups > NGROUPS)
+ nuidp->nu_cr.cr_ngroups = NGROUPS;
+ nuidp->nu_cr.cr_ref = 1;
+ nuidp->nu_uid = nsd->nsd_uid;
+ insque(nuidp, (struct nfsuid *)slp);
+ nuh = &slp->ns_uidh[NUIDHASH(nsd->nsd_uid)];
+ if (nuidp->nu_hnext = *nuh)
+ nuidp->nu_hnext->nu_hprev = nuidp;
+ nuidp->nu_hprev = (struct nfsuid *)0;
+ *nuh = nuidp;
+ }
+ }
+ }
+ if ((uap->flag & NFSSVC_AUTHINFAIL) && (nfsd = nsd->nsd_nfsd))
+ nfsd->nd_flag |= NFSD_AUTHFAIL;
+ error = nfssvc_nfsd(nsd, uap->argp, p);
+ }
+ if (error == EINTR || error == ERESTART)
+ error = 0;
+ return (error);
+}
+
+/*
+ * Adds a socket to the list for servicing by nfsds.
+ */
+nfssvc_addsock(fp, mynam)
+ struct file *fp;
+ struct mbuf *mynam;
+{
+ register struct mbuf *m;
+ register int siz;
+ register struct nfssvc_sock *slp;
+ register struct socket *so;
+ struct nfssvc_sock *tslp;
+ int error, s;
+
+ so = (struct socket *)fp->f_data;
+ tslp = (struct nfssvc_sock *)0;
+ /*
+ * Add it to the list, as required.
+ */
+ if (so->so_proto->pr_protocol == IPPROTO_UDP) {
+ tslp = nfs_udpsock;
+ if (tslp->ns_flag & SLP_VALID) {
+ m_freem(mynam);
+ return (EPERM);
+ }
+#ifdef ISO
+ } else if (so->so_proto->pr_protocol == ISOPROTO_CLTP) {
+ tslp = nfs_cltpsock;
+ if (tslp->ns_flag & SLP_VALID) {
+ m_freem(mynam);
+ return (EPERM);
+ }
+#endif /* ISO */
+ }
+ if (so->so_type == SOCK_STREAM)
+ siz = NFS_MAXPACKET + sizeof (u_long);
+ else
+ siz = NFS_MAXPACKET;
+ if (error = soreserve(so, siz, siz)) {
+ m_freem(mynam);
+ return (error);
+ }
+
+ /*
+ * Set protocol specific options { for now TCP only } and
+ * reserve some space. For datagram sockets, this can get called
+ * repeatedly for the same socket, but that isn't harmful.
+ */
+ if (so->so_type == SOCK_STREAM) {
+ MGET(m, M_WAIT, MT_SOOPTS);
+ *mtod(m, int *) = 1;
+ m->m_len = sizeof(int);
+ sosetopt(so, SOL_SOCKET, SO_KEEPALIVE, m);
+ }
+ if (so->so_proto->pr_domain->dom_family == AF_INET &&
+ so->so_proto->pr_protocol == IPPROTO_TCP) {
+ MGET(m, M_WAIT, MT_SOOPTS);
+ *mtod(m, int *) = 1;
+ m->m_len = sizeof(int);
+ sosetopt(so, IPPROTO_TCP, TCP_NODELAY, m);
+ }
+ so->so_rcv.sb_flags &= ~SB_NOINTR;
+ so->so_rcv.sb_timeo = 0;
+ so->so_snd.sb_flags &= ~SB_NOINTR;
+ so->so_snd.sb_timeo = 0;
+ if (tslp)
+ slp = tslp;
+ else {
+ slp = (struct nfssvc_sock *)
+ malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
+ bzero((caddr_t)slp, sizeof (struct nfssvc_sock));
+ slp->ns_prev = nfssvc_sockhead.ns_prev;
+ slp->ns_prev->ns_next = slp;
+ slp->ns_next = &nfssvc_sockhead;
+ nfssvc_sockhead.ns_prev = slp;
+ slp->ns_lrunext = slp->ns_lruprev = (struct nfsuid *)slp;
+ }
+ slp->ns_so = so;
+ slp->ns_nam = mynam;
+ fp->f_count++;
+ slp->ns_fp = fp;
+ s = splnet();
+ so->so_upcallarg = (caddr_t)slp;
+ so->so_upcall = nfsrv_rcv;
+ slp->ns_flag = (SLP_VALID | SLP_NEEDQ);
+ nfsrv_wakenfsd(slp);
+ splx(s);
+ return (0);
+}
+
+/*
+ * Called by nfssvc() for nfsds. Just loops around servicing rpc requests
+ * until it is killed by a signal.
+ */
+nfssvc_nfsd(nsd, argp, p)
+ struct nfsd_srvargs *nsd;
+ caddr_t argp;
+ struct proc *p;
+{
+ register struct mbuf *m, *nam2;
+ register int siz;
+ register struct nfssvc_sock *slp;
+ register struct socket *so;
+ register int *solockp;
+ struct nfsd *nd = nsd->nsd_nfsd;
+ struct mbuf *mreq, *nam;
+ struct timeval starttime;
+ struct nfsuid *uidp;
+ int error, cacherep, s;
+ int sotype;
+
+ s = splnet();
+ if (nd == (struct nfsd *)0) {
+ nsd->nsd_nfsd = nd = (struct nfsd *)
+ malloc(sizeof (struct nfsd), M_NFSD, M_WAITOK);
+ bzero((caddr_t)nd, sizeof (struct nfsd));
+ nd->nd_procp = p;
+ nd->nd_cr.cr_ref = 1;
+ insque(nd, &nfsd_head);
+ nd->nd_nqlflag = NQL_NOVAL;
+ nfs_numnfsd++;
+ }
+ /*
+ * Loop getting rpc requests until SIGKILL.
+ */
+ for (;;) {
+ if ((nd->nd_flag & NFSD_REQINPROG) == 0) {
+ while (nd->nd_slp == (struct nfssvc_sock *)0 &&
+ (nfsd_head.nd_flag & NFSD_CHECKSLP) == 0) {
+ nd->nd_flag |= NFSD_WAITING;
+ nfsd_waiting++;
+ error = tsleep((caddr_t)nd, PSOCK | PCATCH, "nfsd", 0);
+ nfsd_waiting--;
+ if (error)
+ goto done;
+ }
+ if (nd->nd_slp == (struct nfssvc_sock *)0 &&
+ (nfsd_head.nd_flag & NFSD_CHECKSLP)) {
+ slp = nfssvc_sockhead.ns_next;
+ while (slp != &nfssvc_sockhead) {
+ if ((slp->ns_flag & (SLP_VALID | SLP_DOREC))
+ == (SLP_VALID | SLP_DOREC)) {
+ slp->ns_flag &= ~SLP_DOREC;
+ slp->ns_sref++;
+ nd->nd_slp = slp;
+ break;
+ }
+ slp = slp->ns_next;
+ }
+ if (slp == &nfssvc_sockhead)
+ nfsd_head.nd_flag &= ~NFSD_CHECKSLP;
+ }
+ if ((slp = nd->nd_slp) == (struct nfssvc_sock *)0)
+ continue;
+ if (slp->ns_flag & SLP_VALID) {
+ if (slp->ns_flag & SLP_DISCONN)
+ nfsrv_zapsock(slp);
+ else if (slp->ns_flag & SLP_NEEDQ) {
+ slp->ns_flag &= ~SLP_NEEDQ;
+ (void) nfs_sndlock(&slp->ns_solock,
+ (struct nfsreq *)0);
+ nfsrv_rcv(slp->ns_so, (caddr_t)slp,
+ M_WAIT);
+ nfs_sndunlock(&slp->ns_solock);
+ }
+ error = nfsrv_dorec(slp, nd);
+ nd->nd_flag |= NFSD_REQINPROG;
+ }
+ } else {
+ error = 0;
+ slp = nd->nd_slp;
+ }
+ if (error || (slp->ns_flag & SLP_VALID) == 0) {
+ nd->nd_slp = (struct nfssvc_sock *)0;
+ nd->nd_flag &= ~NFSD_REQINPROG;
+ nfsrv_slpderef(slp);
+ continue;
+ }
+ splx(s);
+ so = slp->ns_so;
+ sotype = so->so_type;
+ starttime = time;
+ if (so->so_proto->pr_flags & PR_CONNREQUIRED)
+ solockp = &slp->ns_solock;
+ else
+ solockp = (int *)0;
+ /*
+ * nam == nam2 for connectionless protocols such as UDP
+ * nam2 == NULL for connection based protocols to disable
+ * recent request caching.
+ */
+ if (nam2 = nd->nd_nam) {
+ nam = nam2;
+ cacherep = RC_CHECKIT;
+ } else {
+ nam = slp->ns_nam;
+ cacherep = RC_DOIT;
+ }
+
+ /*
+ * Check to see if authorization is needed.
+ */
+ if (nd->nd_flag & NFSD_NEEDAUTH) {
+ static int logauth = 0;
+
+ nd->nd_flag &= ~NFSD_NEEDAUTH;
+ /*
+ * Check for a mapping already installed.
+ */
+ uidp = slp->ns_uidh[NUIDHASH(nd->nd_cr.cr_uid)];
+ while (uidp) {
+ if (uidp->nu_uid == nd->nd_cr.cr_uid)
+ break;
+ uidp = uidp->nu_hnext;
+ }
+ if (!uidp) {
+ nsd->nsd_uid = nd->nd_cr.cr_uid;
+ if (nam2 && logauth++ == 0)
+ log(LOG_WARNING, "Kerberized NFS using UDP\n");
+ nsd->nsd_haddr =
+ mtod(nam, struct sockaddr_in *)->sin_addr.s_addr;
+ nsd->nsd_authlen = nd->nd_authlen;
+ if (copyout(nd->nd_authstr, nsd->nsd_authstr,
+ nd->nd_authlen) == 0 &&
+ copyout((caddr_t)nsd, argp, sizeof (*nsd)) == 0)
+ return (ENEEDAUTH);
+ cacherep = RC_DROPIT;
+ }
+ }
+ if (cacherep == RC_CHECKIT)
+ cacherep = nfsrv_getcache(nam2, nd, &mreq);
+
+ /*
+ * Check for just starting up for NQNFS and send
+ * fake "try again later" replies to the NQNFS clients.
+ */
+ if (notstarted && nqnfsstarttime <= time.tv_sec) {
+ if (modify_flag) {
+ nqnfsstarttime = time.tv_sec + nqsrv_writeslack;
+ modify_flag = 0;
+ } else
+ notstarted = 0;
+ }
+ if (notstarted) {
+ if (nd->nd_nqlflag == NQL_NOVAL)
+ cacherep = RC_DROPIT;
+ else if (nd->nd_procnum != NFSPROC_WRITE) {
+ nd->nd_procnum = NFSPROC_NOOP;
+ nd->nd_repstat = NQNFS_TRYLATER;
+ cacherep = RC_DOIT;
+ } else
+ modify_flag = 1;
+ } else if (nd->nd_flag & NFSD_AUTHFAIL) {
+ nd->nd_flag &= ~NFSD_AUTHFAIL;
+ nd->nd_procnum = NFSPROC_NOOP;
+ nd->nd_repstat = NQNFS_AUTHERR;
+ cacherep = RC_DOIT;
+ }
+
+ switch (cacherep) {
+ case RC_DOIT:
+ error = (*(nfsrv_procs[nd->nd_procnum]))(nd,
+ nd->nd_mrep, nd->nd_md, nd->nd_dpos, &nd->nd_cr,
+ nam, &mreq);
+ if (nd->nd_cr.cr_ref != 1) {
+ printf("nfssvc cref=%d\n", nd->nd_cr.cr_ref);
+ panic("nfssvc cref");
+ }
+ if (error) {
+ if (nd->nd_procnum != NQNFSPROC_VACATED)
+ nfsstats.srv_errs++;
+ if (nam2) {
+ nfsrv_updatecache(nam2, nd, FALSE, mreq);
+ m_freem(nam2);
+ }
+ break;
+ }
+ nfsstats.srvrpccnt[nd->nd_procnum]++;
+ if (nam2)
+ nfsrv_updatecache(nam2, nd, TRUE, mreq);
+ nd->nd_mrep = (struct mbuf *)0;
+ case RC_REPLY:
+ m = mreq;
+ siz = 0;
+ while (m) {
+ siz += m->m_len;
+ m = m->m_next;
+ }
+ if (siz <= 0 || siz > NFS_MAXPACKET) {
+ printf("mbuf siz=%d\n",siz);
+ panic("Bad nfs svc reply");
+ }
+ m = mreq;
+ m->m_pkthdr.len = siz;
+ m->m_pkthdr.rcvif = (struct ifnet *)0;
+ /*
+ * For stream protocols, prepend a Sun RPC
+ * Record Mark.
+ */
+ if (sotype == SOCK_STREAM) {
+ M_PREPEND(m, NFSX_UNSIGNED, M_WAIT);
+ *mtod(m, u_long *) = htonl(0x80000000 | siz);
+ }
+ if (solockp)
+ (void) nfs_sndlock(solockp, (struct nfsreq *)0);
+ if (slp->ns_flag & SLP_VALID)
+ error = nfs_send(so, nam2, m, (struct nfsreq *)0);
+ else {
+ error = EPIPE;
+ m_freem(m);
+ }
+ if (nfsrtton)
+ nfsd_rt(&starttime, sotype, nd, nam, cacherep);
+ if (nam2)
+ MFREE(nam2, m);
+ if (nd->nd_mrep)
+ m_freem(nd->nd_mrep);
+ if (error == EPIPE)
+ nfsrv_zapsock(slp);
+ if (solockp)
+ nfs_sndunlock(solockp);
+ if (error == EINTR || error == ERESTART) {
+ nfsrv_slpderef(slp);
+ s = splnet();
+ goto done;
+ }
+ break;
+ case RC_DROPIT:
+ if (nfsrtton)
+ nfsd_rt(&starttime, sotype, nd, nam, cacherep);
+ m_freem(nd->nd_mrep);
+ m_freem(nam2);
+ break;
+ };
+ s = splnet();
+ if (nfsrv_dorec(slp, nd)) {
+ nd->nd_flag &= ~NFSD_REQINPROG;
+ nd->nd_slp = (struct nfssvc_sock *)0;
+ nfsrv_slpderef(slp);
+ }
+ }
+done:
+ remque(nd);
+ splx(s);
+ free((caddr_t)nd, M_NFSD);
+ nsd->nsd_nfsd = (struct nfsd *)0;
+ if (--nfs_numnfsd == 0)
+ nfsrv_init(TRUE); /* Reinitialize everything */
+ return (error);
+}
+
+/*
+ * Asynchronous I/O daemons for client nfs.
+ * They do read-ahead and write-behind operations on the block I/O cache.
+ * Never returns unless it fails or gets killed.
+ */
+nfssvc_iod(p)
+ struct proc *p;
+{
+ register struct buf *bp;
+ register int i, myiod;
+ int error = 0;
+
+ /*
+ * Assign my position or return error if too many already running
+ */
+ myiod = -1;
+ for (i = 0; i < NFS_MAXASYNCDAEMON; i++)
+ if (nfs_asyncdaemon[i] == 0) {
+ nfs_asyncdaemon[i]++;
+ myiod = i;
+ break;
+ }
+ if (myiod == -1)
+ return (EBUSY);
+ nfs_numasync++;
+ /*
+ * Just loop around doin our stuff until SIGKILL
+ */
+ for (;;) {
+ while (nfs_bufq.tqh_first == NULL && error == 0) {
+ nfs_iodwant[myiod] = p;
+ error = tsleep((caddr_t)&nfs_iodwant[myiod],
+ PWAIT | PCATCH, "nfsidl", 0);
+ }
+ while ((bp = nfs_bufq.tqh_first) != NULL) {
+ /* Take one off the front of the list */
+ TAILQ_REMOVE(&nfs_bufq, bp, b_freelist);
+ if (bp->b_flags & B_READ)
+ (void) nfs_doio(bp, bp->b_rcred, (struct proc *)0);
+ else
+ (void) nfs_doio(bp, bp->b_wcred, (struct proc *)0);
+ }
+ if (error) {
+ nfs_asyncdaemon[myiod] = 0;
+ nfs_numasync--;
+ return (error);
+ }
+ }
+}
+
+/*
+ * Shut down a socket associated with an nfssvc_sock structure.
+ * Should be called with the send lock set, if required.
+ * The trick here is to increment the sref at the start, so that the nfsds
+ * will stop using it and clear ns_flag at the end so that it will not be
+ * reassigned during cleanup.
+ */
+nfsrv_zapsock(slp)
+ register struct nfssvc_sock *slp;
+{
+ register struct nfsuid *nuidp, *onuidp;
+ register int i;
+ struct socket *so;
+ struct file *fp;
+ struct mbuf *m;
+
+ slp->ns_flag &= ~SLP_ALLFLAGS;
+ if (fp = slp->ns_fp) {
+ slp->ns_fp = (struct file *)0;
+ so = slp->ns_so;
+ so->so_upcall = NULL;
+ soshutdown(so, 2);
+ closef(fp, (struct proc *)0);
+ if (slp->ns_nam)
+ MFREE(slp->ns_nam, m);
+ m_freem(slp->ns_raw);
+ m_freem(slp->ns_rec);
+ nuidp = slp->ns_lrunext;
+ while (nuidp != (struct nfsuid *)slp) {
+ onuidp = nuidp;
+ nuidp = nuidp->nu_lrunext;
+ free((caddr_t)onuidp, M_NFSUID);
+ }
+ slp->ns_lrunext = slp->ns_lruprev = (struct nfsuid *)slp;
+ for (i = 0; i < NUIDHASHSIZ; i++)
+ slp->ns_uidh[i] = (struct nfsuid *)0;
+ }
+}
+
+/*
+ * Get an authorization string for the uid by having the mount_nfs sitting
+ * on this mount point porpous out of the kernel and do it.
+ */
+nfs_getauth(nmp, rep, cred, auth_type, auth_str, auth_len)
+ register struct nfsmount *nmp;
+ struct nfsreq *rep;
+ struct ucred *cred;
+ int *auth_type;
+ char **auth_str;
+ int *auth_len;
+{
+ int error = 0;
+
+ while ((nmp->nm_flag & NFSMNT_WAITAUTH) == 0) {
+ nmp->nm_flag |= NFSMNT_WANTAUTH;
+ (void) tsleep((caddr_t)&nmp->nm_authtype, PSOCK,
+ "nfsauth1", 2 * hz);
+ if (error = nfs_sigintr(nmp, rep, rep->r_procp)) {
+ nmp->nm_flag &= ~NFSMNT_WANTAUTH;
+ return (error);
+ }
+ }
+ nmp->nm_flag &= ~(NFSMNT_WAITAUTH | NFSMNT_WANTAUTH);
+ nmp->nm_authstr = *auth_str = (char *)malloc(RPCAUTH_MAXSIZ, M_TEMP, M_WAITOK);
+ nmp->nm_authuid = cred->cr_uid;
+ wakeup((caddr_t)&nmp->nm_authstr);
+
+ /*
+ * And wait for mount_nfs to do its stuff.
+ */
+ while ((nmp->nm_flag & NFSMNT_HASAUTH) == 0 && error == 0) {
+ (void) tsleep((caddr_t)&nmp->nm_authlen, PSOCK,
+ "nfsauth2", 2 * hz);
+ error = nfs_sigintr(nmp, rep, rep->r_procp);
+ }
+ if (nmp->nm_flag & NFSMNT_AUTHERR) {
+ nmp->nm_flag &= ~NFSMNT_AUTHERR;
+ error = EAUTH;
+ }
+ if (error)
+ free((caddr_t)*auth_str, M_TEMP);
+ else {
+ *auth_type = nmp->nm_authtype;
+ *auth_len = nmp->nm_authlen;
+ }
+ nmp->nm_flag &= ~NFSMNT_HASAUTH;
+ nmp->nm_flag |= NFSMNT_WAITAUTH;
+ if (nmp->nm_flag & NFSMNT_WANTAUTH) {
+ nmp->nm_flag &= ~NFSMNT_WANTAUTH;
+ wakeup((caddr_t)&nmp->nm_authtype);
+ }
+ return (error);
+}
+
+/*
+ * Derefence a server socket structure. If it has no more references and
+ * is no longer valid, you can throw it away.
+ */
+void
+nfsrv_slpderef(slp)
+ register struct nfssvc_sock *slp;
+{
+ if (--(slp->ns_sref) == 0 && (slp->ns_flag & SLP_VALID) == 0) {
+ slp->ns_prev->ns_next = slp->ns_next;
+ slp->ns_next->ns_prev = slp->ns_prev;
+ free((caddr_t)slp, M_NFSSVC);
+ }
+}
+
+/*
+ * Initialize the data structures for the server.
+ * Handshake with any new nfsds starting up to avoid any chance of
+ * corruption.
+ */
+void
+nfsrv_init(terminating)
+ int terminating;
+{
+ register struct nfssvc_sock *slp;
+ struct nfssvc_sock *oslp;
+
+ if (nfssvc_sockhead.ns_flag & SLP_INIT)
+ panic("nfsd init");
+ nfssvc_sockhead.ns_flag |= SLP_INIT;
+ if (terminating) {
+ slp = nfssvc_sockhead.ns_next;
+ while (slp != &nfssvc_sockhead) {
+ if (slp->ns_flag & SLP_VALID)
+ nfsrv_zapsock(slp);
+ slp->ns_next->ns_prev = slp->ns_prev;
+ slp->ns_prev->ns_next = slp->ns_next;
+ oslp = slp;
+ slp = slp->ns_next;
+ free((caddr_t)oslp, M_NFSSVC);
+ }
+ nfsrv_cleancache(); /* And clear out server cache */
+ }
+ nfs_udpsock = (struct nfssvc_sock *)
+ malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
+ bzero((caddr_t)nfs_udpsock, sizeof (struct nfssvc_sock));
+ nfs_cltpsock = (struct nfssvc_sock *)
+ malloc(sizeof (struct nfssvc_sock), M_NFSSVC, M_WAITOK);
+ bzero((caddr_t)nfs_cltpsock, sizeof (struct nfssvc_sock));
+ nfssvc_sockhead.ns_next = nfs_udpsock;
+ nfs_udpsock->ns_next = nfs_cltpsock;
+ nfs_cltpsock->ns_next = &nfssvc_sockhead;
+ nfssvc_sockhead.ns_prev = nfs_cltpsock;
+ nfs_cltpsock->ns_prev = nfs_udpsock;
+ nfs_udpsock->ns_prev = &nfssvc_sockhead;
+ nfs_udpsock->ns_lrunext = nfs_udpsock->ns_lruprev =
+ (struct nfsuid *)nfs_udpsock;
+ nfs_cltpsock->ns_lrunext = nfs_cltpsock->ns_lruprev =
+ (struct nfsuid *)nfs_cltpsock;
+ nfsd_head.nd_next = nfsd_head.nd_prev = &nfsd_head;
+ nfsd_head.nd_flag = 0;
+ nfssvc_sockhead.ns_flag &= ~SLP_INIT;
+ if (nfssvc_sockhead.ns_flag & SLP_WANTINIT) {
+ nfssvc_sockhead.ns_flag &= ~SLP_WANTINIT;
+ wakeup((caddr_t)&nfssvc_sockhead);
+ }
+}
+
+/*
+ * Add entries to the server monitor log.
+ */
+static void
+nfsd_rt(startp, sotype, nd, nam, cacherep)
+ struct timeval *startp;
+ int sotype;
+ register struct nfsd *nd;
+ struct mbuf *nam;
+ int cacherep;
+{
+ register struct drt *rt;
+
+ rt = &nfsdrt.drt[nfsdrt.pos];
+ if (cacherep == RC_DOIT)
+ rt->flag = 0;
+ else if (cacherep == RC_REPLY)
+ rt->flag = DRT_CACHEREPLY;
+ else
+ rt->flag = DRT_CACHEDROP;
+ if (sotype == SOCK_STREAM)
+ rt->flag |= DRT_TCP;
+ if (nd->nd_nqlflag != NQL_NOVAL)
+ rt->flag |= DRT_NQNFS;
+ rt->proc = nd->nd_procnum;
+ if (mtod(nam, struct sockaddr *)->sa_family == AF_INET)
+ rt->ipadr = mtod(nam, struct sockaddr_in *)->sin_addr.s_addr;
+ else
+ rt->ipadr = INADDR_ANY;
+ rt->resptime = ((time.tv_sec - startp->tv_sec) * 1000000) +
+ (time.tv_usec - startp->tv_usec);
+ rt->tstamp = time;
+ nfsdrt.pos = (nfsdrt.pos + 1) % NFSRTTLOGSIZ;
+}
diff --git a/sys/nfsserver/nfsm_subs.h b/sys/nfsserver/nfsm_subs.h
new file mode 100644
index 0000000..879db36
--- /dev/null
+++ b/sys/nfsserver/nfsm_subs.h
@@ -0,0 +1,269 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfsm_subs.h 8.1 (Berkeley) 6/16/93
+ */
+
+/*
+ * These macros do strange and peculiar things to mbuf chains for
+ * the assistance of the nfs code. To attempt to use them for any
+ * other purpose will be dangerous. (they make weird assumptions)
+ */
+
+/*
+ * First define what the actual subs. return
+ */
+extern struct mbuf *nfsm_reqh();
+
+#define M_HASCL(m) ((m)->m_flags & M_EXT)
+#define NFSMINOFF(m) \
+ if (M_HASCL(m)) \
+ (m)->m_data = (m)->m_ext.ext_buf; \
+ else if ((m)->m_flags & M_PKTHDR) \
+ (m)->m_data = (m)->m_pktdat; \
+ else \
+ (m)->m_data = (m)->m_dat
+#define NFSMADV(m, s) (m)->m_data += (s)
+#define NFSMSIZ(m) ((M_HASCL(m))?MCLBYTES: \
+ (((m)->m_flags & M_PKTHDR)?MHLEN:MLEN))
+
+/*
+ * Now for the macros that do the simple stuff and call the functions
+ * for the hard stuff.
+ * These macros use several vars. declared in nfsm_reqhead and these
+ * vars. must not be used elsewhere unless you are careful not to corrupt
+ * them. The vars. starting with pN and tN (N=1,2,3,..) are temporaries
+ * that may be used so long as the value is not expected to retained
+ * after a macro.
+ * I know, this is kind of dorkey, but it makes the actual op functions
+ * fairly clean and deals with the mess caused by the xdr discriminating
+ * unions.
+ */
+
+#define nfsm_build(a,c,s) \
+ { if ((s) > M_TRAILINGSPACE(mb)) { \
+ MGET(mb2, M_WAIT, MT_DATA); \
+ if ((s) > MLEN) \
+ panic("build > MLEN"); \
+ mb->m_next = mb2; \
+ mb = mb2; \
+ mb->m_len = 0; \
+ bpos = mtod(mb, caddr_t); \
+ } \
+ (a) = (c)(bpos); \
+ mb->m_len += (s); \
+ bpos += (s); }
+
+#define nfsm_dissect(a,c,s) \
+ { t1 = mtod(md, caddr_t)+md->m_len-dpos; \
+ if (t1 >= (s)) { \
+ (a) = (c)(dpos); \
+ dpos += (s); \
+ } else if (error = nfsm_disct(&md, &dpos, (s), t1, &cp2)) { \
+ m_freem(mrep); \
+ goto nfsmout; \
+ } else { \
+ (a) = (c)cp2; \
+ } }
+
+#define nfsm_fhtom(v) \
+ nfsm_build(cp,caddr_t,NFSX_FH); \
+ bcopy((caddr_t)&(VTONFS(v)->n_fh), cp, NFSX_FH)
+
+#define nfsm_srvfhtom(f) \
+ nfsm_build(cp,caddr_t,NFSX_FH); \
+ bcopy((caddr_t)(f), cp, NFSX_FH)
+
+#define nfsm_mtofh(d,v) \
+ { struct nfsnode *np; nfsv2fh_t *fhp; \
+ nfsm_dissect(fhp,nfsv2fh_t *,NFSX_FH); \
+ if (error = nfs_nget((d)->v_mount, fhp, &np)) { \
+ m_freem(mrep); \
+ goto nfsmout; \
+ } \
+ (v) = NFSTOV(np); \
+ nfsm_loadattr(v, (struct vattr *)0); \
+ }
+
+#define nfsm_loadattr(v,a) \
+ { struct vnode *tvp = (v); \
+ if (error = nfs_loadattrcache(&tvp, &md, &dpos, (a))) { \
+ m_freem(mrep); \
+ goto nfsmout; \
+ } \
+ (v) = tvp; }
+
+#define nfsm_strsiz(s,m) \
+ { nfsm_dissect(tl,u_long *,NFSX_UNSIGNED); \
+ if (((s) = fxdr_unsigned(long,*tl)) > (m)) { \
+ m_freem(mrep); \
+ error = EBADRPC; \
+ goto nfsmout; \
+ } }
+
+#define nfsm_srvstrsiz(s,m) \
+ { nfsm_dissect(tl,u_long *,NFSX_UNSIGNED); \
+ if (((s) = fxdr_unsigned(long,*tl)) > (m) || (s) <= 0) { \
+ error = EBADRPC; \
+ nfsm_reply(0); \
+ } }
+
+#define nfsm_mtouio(p,s) \
+ if ((s) > 0 && \
+ (error = nfsm_mbuftouio(&md,(p),(s),&dpos))) { \
+ m_freem(mrep); \
+ goto nfsmout; \
+ }
+
+#define nfsm_uiotom(p,s) \
+ if (error = nfsm_uiotombuf((p),&mb,(s),&bpos)) { \
+ m_freem(mreq); \
+ goto nfsmout; \
+ }
+
+#define nfsm_reqhead(v,a,s) \
+ mb = mreq = nfsm_reqh((v),(a),(s),&bpos)
+
+#define nfsm_reqdone m_freem(mrep); \
+ nfsmout:
+
+#define nfsm_rndup(a) (((a)+3)&(~0x3))
+
+#define nfsm_request(v, t, p, c) \
+ if (error = nfs_request((v), mreq, (t), (p), \
+ (c), &mrep, &md, &dpos)) \
+ goto nfsmout
+
+#define nfsm_strtom(a,s,m) \
+ if ((s) > (m)) { \
+ m_freem(mreq); \
+ error = ENAMETOOLONG; \
+ goto nfsmout; \
+ } \
+ t2 = nfsm_rndup(s)+NFSX_UNSIGNED; \
+ if (t2 <= M_TRAILINGSPACE(mb)) { \
+ nfsm_build(tl,u_long *,t2); \
+ *tl++ = txdr_unsigned(s); \
+ *(tl+((t2>>2)-2)) = 0; \
+ bcopy((caddr_t)(a), (caddr_t)tl, (s)); \
+ } else if (error = nfsm_strtmbuf(&mb, &bpos, (a), (s))) { \
+ m_freem(mreq); \
+ goto nfsmout; \
+ }
+
+#define nfsm_srvdone \
+ nfsmout: \
+ return(error)
+
+#define nfsm_reply(s) \
+ { \
+ nfsd->nd_repstat = error; \
+ if (error) \
+ (void) nfs_rephead(0, nfsd, error, cache, &frev, \
+ mrq, &mb, &bpos); \
+ else \
+ (void) nfs_rephead((s), nfsd, error, cache, &frev, \
+ mrq, &mb, &bpos); \
+ m_freem(mrep); \
+ mreq = *mrq; \
+ if (error) \
+ return(0); \
+ }
+
+#define nfsm_adv(s) \
+ t1 = mtod(md, caddr_t)+md->m_len-dpos; \
+ if (t1 >= (s)) { \
+ dpos += (s); \
+ } else if (error = nfs_adv(&md, &dpos, (s), t1)) { \
+ m_freem(mrep); \
+ goto nfsmout; \
+ }
+
+#define nfsm_srvmtofh(f) \
+ nfsm_dissect(tl, u_long *, NFSX_FH); \
+ bcopy((caddr_t)tl, (caddr_t)f, NFSX_FH)
+
+#define nfsm_clget \
+ if (bp >= be) { \
+ if (mp == mb) \
+ mp->m_len += bp-bpos; \
+ MGET(mp, M_WAIT, MT_DATA); \
+ MCLGET(mp, M_WAIT); \
+ mp->m_len = NFSMSIZ(mp); \
+ mp2->m_next = mp; \
+ mp2 = mp; \
+ bp = mtod(mp, caddr_t); \
+ be = bp+mp->m_len; \
+ } \
+ tl = (u_long *)bp
+
+#define nfsm_srvfillattr \
+ fp->fa_type = vtonfs_type(vap->va_type); \
+ fp->fa_mode = vtonfs_mode(vap->va_type, vap->va_mode); \
+ fp->fa_nlink = txdr_unsigned(vap->va_nlink); \
+ fp->fa_uid = txdr_unsigned(vap->va_uid); \
+ fp->fa_gid = txdr_unsigned(vap->va_gid); \
+ if (nfsd->nd_nqlflag == NQL_NOVAL) { \
+ fp->fa_nfsblocksize = txdr_unsigned(vap->va_blocksize); \
+ if (vap->va_type == VFIFO) \
+ fp->fa_nfsrdev = 0xffffffff; \
+ else \
+ fp->fa_nfsrdev = txdr_unsigned(vap->va_rdev); \
+ fp->fa_nfsfsid = txdr_unsigned(vap->va_fsid); \
+ fp->fa_nfsfileid = txdr_unsigned(vap->va_fileid); \
+ fp->fa_nfssize = txdr_unsigned(vap->va_size); \
+ fp->fa_nfsblocks = txdr_unsigned(vap->va_bytes / NFS_FABLKSIZE); \
+ txdr_nfstime(&vap->va_atime, &fp->fa_nfsatime); \
+ txdr_nfstime(&vap->va_mtime, &fp->fa_nfsmtime); \
+ fp->fa_nfsctime.nfs_sec = txdr_unsigned(vap->va_ctime.ts_sec); \
+ fp->fa_nfsctime.nfs_usec = txdr_unsigned(vap->va_gen); \
+ } else { \
+ fp->fa_nqblocksize = txdr_unsigned(vap->va_blocksize); \
+ if (vap->va_type == VFIFO) \
+ fp->fa_nqrdev = 0xffffffff; \
+ else \
+ fp->fa_nqrdev = txdr_unsigned(vap->va_rdev); \
+ fp->fa_nqfsid = txdr_unsigned(vap->va_fsid); \
+ fp->fa_nqfileid = txdr_unsigned(vap->va_fileid); \
+ txdr_hyper(&vap->va_size, &fp->fa_nqsize); \
+ txdr_hyper(&vap->va_bytes, &fp->fa_nqbytes); \
+ txdr_nqtime(&vap->va_atime, &fp->fa_nqatime); \
+ txdr_nqtime(&vap->va_mtime, &fp->fa_nqmtime); \
+ txdr_nqtime(&vap->va_ctime, &fp->fa_nqctime); \
+ fp->fa_nqflags = txdr_unsigned(vap->va_flags); \
+ fp->fa_nqgen = txdr_unsigned(vap->va_gen); \
+ txdr_hyper(&vap->va_filerev, &fp->fa_nqfilerev); \
+ }
+
diff --git a/sys/nfsserver/nfsrvcache.h b/sys/nfsserver/nfsrvcache.h
new file mode 100644
index 0000000..26da2c2
--- /dev/null
+++ b/sys/nfsserver/nfsrvcache.h
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfsrvcache.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Definitions for the server recent request cache
+ */
+
+#define NFSRVCACHESIZ 256
+
+struct nfsrvcache {
+ struct nfsrvcache *rc_forw; /* Hash chain links */
+ struct nfsrvcache **rc_back; /* Hash chain links */
+ struct nfsrvcache *rc_next; /* Lru list */
+ struct nfsrvcache **rc_prev; /* Lru list */
+ u_long rc_xid; /* rpc id number */
+ union {
+ struct mbuf *ru_repmb; /* Reply mbuf list OR */
+ int ru_repstat; /* Reply status */
+ } rc_un;
+ union nethostaddr rc_haddr; /* Host address */
+ short rc_proc; /* rpc proc number */
+ u_char rc_state; /* Current state of request */
+ u_char rc_flag; /* Flag bits */
+};
+
+#define rc_reply rc_un.ru_repmb
+#define rc_status rc_un.ru_repstat
+#define rc_inetaddr rc_haddr.had_inetaddr
+#define rc_nam rc_haddr.had_nam
+
+/* Cache entry states */
+#define RC_UNUSED 0
+#define RC_INPROG 1
+#define RC_DONE 2
+
+/* Return values */
+#define RC_DROPIT 0
+#define RC_REPLY 1
+#define RC_DOIT 2
+#define RC_CHECKIT 3
+
+/* Flag bits */
+#define RC_LOCKED 0x01
+#define RC_WANTED 0x02
+#define RC_REPSTATUS 0x04
+#define RC_REPMBUF 0x08
+#define RC_NQNFS 0x10
+#define RC_INETADDR 0x20
+#define RC_NAM 0x40
diff --git a/sys/nfsserver/nfsrvstats.h b/sys/nfsserver/nfsrvstats.h
new file mode 100644
index 0000000..261fd42
--- /dev/null
+++ b/sys/nfsserver/nfsrvstats.h
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Rick Macklem at The University of Guelph.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)nfs.h 8.1 (Berkeley) 6/10/93
+ */
+
+/*
+ * Tunable constants for nfs
+ */
+
+#define NFS_MAXIOVEC 34
+#define NFS_HZ 25 /* Ticks per second for NFS timeouts */
+#define NFS_TIMEO (1*NFS_HZ) /* Default timeout = 1 second */
+#define NFS_MINTIMEO (1*NFS_HZ) /* Min timeout to use */
+#define NFS_MAXTIMEO (60*NFS_HZ) /* Max timeout to backoff to */
+#define NFS_MINIDEMTIMEO (5*NFS_HZ) /* Min timeout for non-idempotent ops*/
+#define NFS_MAXREXMIT 100 /* Stop counting after this many */
+#define NFS_MAXWINDOW 1024 /* Max number of outstanding requests */
+#define NFS_RETRANS 10 /* Num of retrans for soft mounts */
+#define NFS_MAXGRPS 16 /* Max. size of groups list */
+#define NFS_MINATTRTIMO 5 /* Attribute cache timeout in sec */
+#define NFS_MAXATTRTIMO 60
+#define NFS_WSIZE 8192 /* Def. write data size <= 8192 */
+#define NFS_RSIZE 8192 /* Def. read data size <= 8192 */
+#define NFS_DEFRAHEAD 1 /* Def. read ahead # blocks */
+#define NFS_MAXRAHEAD 4 /* Max. read ahead # blocks */
+#define NFS_MAXREADDIR NFS_MAXDATA /* Max. size of directory read */
+#define NFS_MAXUIDHASH 64 /* Max. # of hashed uid entries/mp */
+#define NFS_MAXASYNCDAEMON 20 /* Max. number async_daemons runable */
+#define NFS_DIRBLKSIZ 1024 /* Size of an NFS directory block */
+#define NMOD(a) ((a) % nfs_asyncdaemons)
+
+/*
+ * Set the attribute timeout based on how recently the file has been modified.
+ */
+#define NFS_ATTRTIMEO(np) \
+ ((((np)->n_flag & NMODIFIED) || \
+ (time.tv_sec - (np)->n_mtime) / 10 < NFS_MINATTRTIMO) ? NFS_MINATTRTIMO : \
+ ((time.tv_sec - (np)->n_mtime) / 10 > NFS_MAXATTRTIMO ? NFS_MAXATTRTIMO : \
+ (time.tv_sec - (np)->n_mtime) / 10))
+
+/*
+ * Structures for the nfssvc(2) syscall. Not that anyone but nfsd and mount_nfs
+ * should ever try and use it.
+ */
+struct nfsd_args {
+ int sock; /* Socket to serve */
+ caddr_t name; /* Client address for connection based sockets */
+ int namelen; /* Length of name */
+};
+
+struct nfsd_srvargs {
+ struct nfsd *nsd_nfsd; /* Pointer to in kernel nfsd struct */
+ uid_t nsd_uid; /* Effective uid mapped to cred */
+ u_long nsd_haddr; /* Ip address of client */
+ struct ucred nsd_cr; /* Cred. uid maps to */
+ int nsd_authlen; /* Length of auth string (ret) */
+ char *nsd_authstr; /* Auth string (ret) */
+};
+
+struct nfsd_cargs {
+ char *ncd_dirp; /* Mount dir path */
+ uid_t ncd_authuid; /* Effective uid */
+ int ncd_authtype; /* Type of authenticator */
+ int ncd_authlen; /* Length of authenticator string */
+ char *ncd_authstr; /* Authenticator string */
+};
+
+/*
+ * Stats structure
+ */
+struct nfsstats {
+ int attrcache_hits;
+ int attrcache_misses;
+ int lookupcache_hits;
+ int lookupcache_misses;
+ int direofcache_hits;
+ int direofcache_misses;
+ int biocache_reads;
+ int read_bios;
+ int read_physios;
+ int biocache_writes;
+ int write_bios;
+ int write_physios;
+ int biocache_readlinks;
+ int readlink_bios;
+ int biocache_readdirs;
+ int readdir_bios;
+ int rpccnt[NFS_NPROCS];
+ int rpcretries;
+ int srvrpccnt[NFS_NPROCS];
+ int srvrpc_errs;
+ int srv_errs;
+ int rpcrequests;
+ int rpctimeouts;
+ int rpcunexpected;
+ int rpcinvalid;
+ int srvcache_inproghits;
+ int srvcache_idemdonehits;
+ int srvcache_nonidemdonehits;
+ int srvcache_misses;
+ int srvnqnfs_leases;
+ int srvnqnfs_maxleases;
+ int srvnqnfs_getleases;
+};
+
+/*
+ * Flags for nfssvc() system call.
+ */
+#define NFSSVC_BIOD 0x002
+#define NFSSVC_NFSD 0x004
+#define NFSSVC_ADDSOCK 0x008
+#define NFSSVC_AUTHIN 0x010
+#define NFSSVC_GOTAUTH 0x040
+#define NFSSVC_AUTHINFAIL 0x080
+#define NFSSVC_MNTD 0x100
+
+/*
+ * The set of signals the interrupt an I/O in progress for NFSMNT_INT mounts.
+ * What should be in this set is open to debate, but I believe that since
+ * I/O system calls on ufs are never interrupted by signals the set should
+ * be minimal. My reasoning is that many current programs that use signals
+ * such as SIGALRM will not expect file I/O system calls to be interrupted
+ * by them and break.
+ */
+#ifdef KERNEL
+#define NFSINT_SIGMASK (sigmask(SIGINT)|sigmask(SIGTERM)|sigmask(SIGKILL)| \
+ sigmask(SIGHUP)|sigmask(SIGQUIT))
+
+/*
+ * Socket errors ignored for connectionless sockets??
+ * For now, ignore them all
+ */
+#define NFSIGNORE_SOERROR(s, e) \
+ ((e) != EINTR && (e) != ERESTART && (e) != EWOULDBLOCK && \
+ ((s) & PR_CONNREQUIRED) == 0)
+
+/*
+ * Nfs outstanding request list element
+ */
+struct nfsreq {
+ struct nfsreq *r_next;
+ struct nfsreq *r_prev;
+ struct mbuf *r_mreq;
+ struct mbuf *r_mrep;
+ struct mbuf *r_md;
+ caddr_t r_dpos;
+ struct nfsmount *r_nmp;
+ struct vnode *r_vp;
+ u_long r_xid;
+ int r_flags; /* flags on request, see below */
+ int r_retry; /* max retransmission count */
+ int r_rexmit; /* current retrans count */
+ int r_timer; /* tick counter on reply */
+ int r_procnum; /* NFS procedure number */
+ int r_rtt; /* RTT for rpc */
+ struct proc *r_procp; /* Proc that did I/O system call */
+};
+
+/* Flag values for r_flags */
+#define R_TIMING 0x01 /* timing request (in mntp) */
+#define R_SENT 0x02 /* request has been sent */
+#define R_SOFTTERM 0x04 /* soft mnt, too many retries */
+#define R_INTR 0x08 /* intr mnt, signal pending */
+#define R_SOCKERR 0x10 /* Fatal error on socket */
+#define R_TPRINTFMSG 0x20 /* Did a tprintf msg. */
+#define R_MUSTRESEND 0x40 /* Must resend request */
+#define R_GETONEREP 0x80 /* Probe for one reply only */
+
+struct nfsstats nfsstats;
+
+/*
+ * A list of nfssvc_sock structures is maintained with all the sockets
+ * that require service by the nfsd.
+ * The nfsuid structs hang off of the nfssvc_sock structs in both lru
+ * and uid hash lists.
+ */
+#define NUIDHASHSIZ 32
+#define NUIDHASH(uid) ((uid) & (NUIDHASHSIZ - 1))
+
+/*
+ * Network address hash list element
+ */
+union nethostaddr {
+ u_long had_inetaddr;
+ struct mbuf *had_nam;
+};
+
+struct nfsuid {
+ struct nfsuid *nu_lrunext; /* MUST be first */
+ struct nfsuid *nu_lruprev;
+ struct nfsuid *nu_hnext;
+ struct nfsuid *nu_hprev;
+ int nu_flag; /* Flags */
+ uid_t nu_uid; /* Uid mapped by this entry */
+ union nethostaddr nu_haddr; /* Host addr. for dgram sockets */
+ struct ucred nu_cr; /* Cred uid mapped to */
+};
+
+#define nu_inetaddr nu_haddr.had_inetaddr
+#define nu_nam nu_haddr.had_nam
+/* Bits for nu_flag */
+#define NU_INETADDR 0x1
+
+struct nfssvc_sock {
+ struct nfsuid *ns_lrunext; /* MUST be first */
+ struct nfsuid *ns_lruprev;
+ struct nfssvc_sock *ns_next;
+ struct nfssvc_sock *ns_prev;
+ int ns_flag;
+ u_long ns_sref;
+ struct file *ns_fp;
+ struct socket *ns_so;
+ int ns_solock;
+ struct mbuf *ns_nam;
+ int ns_cc;
+ struct mbuf *ns_raw;
+ struct mbuf *ns_rawend;
+ int ns_reclen;
+ struct mbuf *ns_rec;
+ struct mbuf *ns_recend;
+ int ns_numuids;
+ struct nfsuid *ns_uidh[NUIDHASHSIZ];
+};
+
+/* Bits for "ns_flag" */
+#define SLP_VALID 0x01
+#define SLP_DOREC 0x02
+#define SLP_NEEDQ 0x04
+#define SLP_DISCONN 0x08
+#define SLP_GETSTREAM 0x10
+#define SLP_INIT 0x20
+#define SLP_WANTINIT 0x40
+
+#define SLP_ALLFLAGS 0xff
+
+/*
+ * One of these structures is allocated for each nfsd.
+ */
+struct nfsd {
+ struct nfsd *nd_next; /* Must be first */
+ struct nfsd *nd_prev;
+ int nd_flag; /* NFSD_ flags */
+ struct nfssvc_sock *nd_slp; /* Current socket */
+ struct mbuf *nd_nam; /* Client addr for datagram req. */
+ struct mbuf *nd_mrep; /* Req. mbuf list */
+ struct mbuf *nd_md;
+ caddr_t nd_dpos; /* Position in list */
+ int nd_procnum; /* RPC procedure number */
+ u_long nd_retxid; /* RPC xid */
+ int nd_repstat; /* Reply status value */
+ struct ucred nd_cr; /* Credentials for req. */
+ int nd_nqlflag; /* Leasing flag */
+ int nd_duration; /* Lease duration */
+ int nd_authlen; /* Authenticator len */
+ u_char nd_authstr[RPCAUTH_MAXSIZ]; /* Authenticator data */
+ struct proc *nd_procp; /* Proc ptr */
+};
+
+#define NFSD_WAITING 0x01
+#define NFSD_CHECKSLP 0x02
+#define NFSD_REQINPROG 0x04
+#define NFSD_NEEDAUTH 0x08
+#define NFSD_AUTHFAIL 0x10
+#endif /* KERNEL */
diff --git a/sys/sys/_sigset.h b/sys/sys/_sigset.h
new file mode 100644
index 0000000..8ccded4
--- /dev/null
+++ b/sys/sys/_sigset.h
@@ -0,0 +1,194 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)signal.h 8.2 (Berkeley) 1/21/94
+ */
+
+#ifndef _SYS_SIGNAL_H_
+#define _SYS_SIGNAL_H_
+
+#define NSIG 32 /* counting 0; could be 33 (mask is 1-32) */
+
+#ifndef _ANSI_SOURCE
+#include <machine/signal.h> /* sigcontext; codes for SIGILL, SIGFPE */
+#endif
+
+#define SIGHUP 1 /* hangup */
+#define SIGINT 2 /* interrupt */
+#define SIGQUIT 3 /* quit */
+#define SIGILL 4 /* illegal instruction (not reset when caught) */
+#ifndef _POSIX_SOURCE
+#define SIGTRAP 5 /* trace trap (not reset when caught) */
+#endif
+#define SIGABRT 6 /* abort() */
+#ifndef _POSIX_SOURCE
+#define SIGIOT SIGABRT /* compatibility */
+#define SIGEMT 7 /* EMT instruction */
+#endif
+#define SIGFPE 8 /* floating point exception */
+#define SIGKILL 9 /* kill (cannot be caught or ignored) */
+#ifndef _POSIX_SOURCE
+#define SIGBUS 10 /* bus error */
+#endif
+#define SIGSEGV 11 /* segmentation violation */
+#ifndef _POSIX_SOURCE
+#define SIGSYS 12 /* bad argument to system call */
+#endif
+#define SIGPIPE 13 /* write on a pipe with no one to read it */
+#define SIGALRM 14 /* alarm clock */
+#define SIGTERM 15 /* software termination signal from kill */
+#ifndef _POSIX_SOURCE
+#define SIGURG 16 /* urgent condition on IO channel */
+#endif
+#define SIGSTOP 17 /* sendable stop signal not from tty */
+#define SIGTSTP 18 /* stop signal from tty */
+#define SIGCONT 19 /* continue a stopped process */
+#define SIGCHLD 20 /* to parent on child stop or exit */
+#define SIGTTIN 21 /* to readers pgrp upon background tty read */
+#define SIGTTOU 22 /* like TTIN for output if (tp->t_local&LTOSTOP) */
+#ifndef _POSIX_SOURCE
+#define SIGIO 23 /* input/output possible signal */
+#define SIGXCPU 24 /* exceeded CPU time limit */
+#define SIGXFSZ 25 /* exceeded file size limit */
+#define SIGVTALRM 26 /* virtual time alarm */
+#define SIGPROF 27 /* profiling time alarm */
+#define SIGWINCH 28 /* window size changes */
+#define SIGINFO 29 /* information request */
+#endif
+#define SIGUSR1 30 /* user defined signal 1 */
+#define SIGUSR2 31 /* user defined signal 2 */
+
+#if defined(_ANSI_SOURCE) || defined(__cplusplus)
+/*
+ * Language spec sez we must list exactly one parameter, even though we
+ * actually supply three. Ugh!
+ */
+#define SIG_DFL (void (*)(int))0
+#define SIG_IGN (void (*)(int))1
+#define SIG_ERR (void (*)(int))-1
+#else
+#define SIG_DFL (void (*)())0
+#define SIG_IGN (void (*)())1
+#define SIG_ERR (void (*)())-1
+#endif
+
+#ifndef _ANSI_SOURCE
+typedef unsigned int sigset_t;
+
+/*
+ * Signal vector "template" used in sigaction call.
+ */
+struct sigaction {
+ void (*sa_handler)(); /* signal handler */
+ sigset_t sa_mask; /* signal mask to apply */
+ int sa_flags; /* see signal options below */
+};
+#ifndef _POSIX_SOURCE
+#define SA_ONSTACK 0x0001 /* take signal on signal stack */
+#define SA_RESTART 0x0002 /* restart system on signal return */
+#define SA_DISABLE 0x0004 /* disable taking signals on alternate stack */
+#ifdef COMPAT_SUNOS
+#define SA_USERTRAMP 0x0100 /* do not bounce off kernel's sigtramp */
+#endif
+#endif
+#define SA_NOCLDSTOP 0x0008 /* do not generate SIGCHLD on child stop */
+
+/*
+ * Flags for sigprocmask:
+ */
+#define SIG_BLOCK 1 /* block specified signal set */
+#define SIG_UNBLOCK 2 /* unblock specified signal set */
+#define SIG_SETMASK 3 /* set specified signal set */
+
+#ifndef _POSIX_SOURCE
+#ifndef KERNEL
+#include <sys/cdefs.h>
+#endif
+typedef void (*sig_t) __P((int)); /* type of signal function */
+
+/*
+ * Structure used in sigaltstack call.
+ */
+struct sigaltstack {
+ char *ss_base; /* signal stack base */
+ int ss_size; /* signal stack length */
+ int ss_flags; /* SA_DISABLE and/or SA_ONSTACK */
+};
+#define MINSIGSTKSZ 8192 /* minimum allowable stack */
+#define SIGSTKSZ (MINSIGSTKSZ + 32768) /* recommended stack size */
+
+/*
+ * 4.3 compatibility:
+ * Signal vector "template" used in sigvec call.
+ */
+struct sigvec {
+ void (*sv_handler)(); /* signal handler */
+ int sv_mask; /* signal mask to apply */
+ int sv_flags; /* see signal options below */
+};
+
+#define SV_ONSTACK SA_ONSTACK
+#define SV_INTERRUPT SA_RESTART /* same bit, opposite sense */
+#define sv_onstack sv_flags /* isn't compatibility wonderful! */
+
+/*
+ * Structure used in sigstack call.
+ */
+struct sigstack {
+ char *ss_sp; /* signal stack pointer */
+ int ss_onstack; /* current status */
+};
+
+/*
+ * Macro for converting signal number to a mask suitable for
+ * sigblock().
+ */
+#define sigmask(m) (1 << ((m)-1))
+
+#define BADSIG SIG_ERR
+
+#endif /* !_POSIX_SOURCE */
+#endif /* !_ANSI_SOURCE */
+
+/*
+ * For historical reasons; programs expect signal's return value to be
+ * defined by <sys/signal.h>.
+ */
+__BEGIN_DECLS
+void (*signal __P((int, void (*) __P((int))))) __P((int));
+__END_DECLS
+#endif /* !_SYS_SIGNAL_H_ */
diff --git a/sys/sys/bio.h b/sys/sys/bio.h
new file mode 100644
index 0000000..e6c329f
--- /dev/null
+++ b/sys/sys/bio.h
@@ -0,0 +1,178 @@
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)buf.h 8.7 (Berkeley) 1/21/94
+ */
+
+#ifndef _SYS_BUF_H_
+#define _SYS_BUF_H_
+#include <sys/queue.h>
+
+#define NOLIST ((struct buf *)0x87654321)
+
+/*
+ * The buffer header describes an I/O operation in the kernel.
+ */
+struct buf {
+ LIST_ENTRY(buf) b_hash; /* Hash chain. */
+ LIST_ENTRY(buf) b_vnbufs; /* Buffer's associated vnode. */
+ TAILQ_ENTRY(buf) b_freelist; /* Free list position if not active. */
+ struct buf *b_actf, **b_actb; /* Device driver queue when active. */
+ struct proc *b_proc; /* Associated proc; NULL if kernel. */
+ volatile long b_flags; /* B_* flags. */
+ int b_error; /* Errno value. */
+ long b_bufsize; /* Allocated buffer size. */
+ long b_bcount; /* Valid bytes in buffer. */
+ long b_resid; /* Remaining I/O. */
+ dev_t b_dev; /* Device associated with buffer. */
+ struct {
+ caddr_t b_addr; /* Memory, superblocks, indirect etc. */
+ } b_un;
+ void *b_saveaddr; /* Original b_addr for physio. */
+ daddr_t b_lblkno; /* Logical block number. */
+ daddr_t b_blkno; /* Underlying physical block number. */
+ /* Function to call upon completion. */
+ void (*b_iodone) __P((struct buf *));
+ struct vnode *b_vp; /* Device vnode. */
+ int b_pfcent; /* Center page when swapping cluster. */
+ int b_dirtyoff; /* Offset in buffer of dirty region. */
+ int b_dirtyend; /* Offset of end of dirty region. */
+ struct ucred *b_rcred; /* Read credentials reference. */
+ struct ucred *b_wcred; /* Write credentials reference. */
+ int b_validoff; /* Offset in buffer of valid region. */
+ int b_validend; /* Offset of end of valid region. */
+};
+
+/* Device driver compatibility definitions. */
+#define b_active b_bcount /* Driver queue head: drive active. */
+#define b_data b_un.b_addr /* b_un.b_addr is not changeable. */
+#define b_errcnt b_resid /* Retry count while I/O in progress. */
+#define iodone biodone /* Old name for biodone. */
+#define iowait biowait /* Old name for biowait. */
+
+/*
+ * These flags are kept in b_flags.
+ */
+#define B_AGE 0x00000001 /* Move to age queue when I/O done. */
+#define B_APPENDWRITE 0x00000002 /* Append-write in progress. */
+#define B_ASYNC 0x00000004 /* Start I/O, do not wait. */
+#define B_BAD 0x00000008 /* Bad block revectoring in progress. */
+#define B_BUSY 0x00000010 /* I/O in progress. */
+#define B_CACHE 0x00000020 /* Bread found us in the cache. */
+#define B_CALL 0x00000040 /* Call b_iodone from biodone. */
+#define B_DELWRI 0x00000080 /* Delay I/O until buffer reused. */
+#define B_DIRTY 0x00000100 /* Dirty page to be pushed out async. */
+#define B_DONE 0x00000200 /* I/O completed. */
+#define B_EINTR 0x00000400 /* I/O was interrupted */
+#define B_ERROR 0x00000800 /* I/O error occurred. */
+#define B_GATHERED 0x00001000 /* LFS: already in a segment. */
+#define B_INVAL 0x00002000 /* Does not contain valid info. */
+#define B_LOCKED 0x00004000 /* Locked in core (not reusable). */
+#define B_NOCACHE 0x00008000 /* Do not cache block after use. */
+#define B_PAGET 0x00010000 /* Page in/out of page table space. */
+#define B_PGIN 0x00020000 /* Pagein op, so swap() can count it. */
+#define B_PHYS 0x00040000 /* I/O to user memory. */
+#define B_RAW 0x00080000 /* Set by physio for raw transfers. */
+#define B_READ 0x00100000 /* Read buffer. */
+#define B_TAPE 0x00200000 /* Magnetic tape I/O. */
+#define B_UAREA 0x00400000 /* Buffer describes Uarea I/O. */
+#define B_WANTED 0x00800000 /* Process wants this buffer. */
+#define B_WRITE 0x00000000 /* Write buffer (pseudo flag). */
+#define B_WRITEINPROG 0x01000000 /* Write in progress. */
+#define B_XXX 0x02000000 /* Debugging flag. */
+
+/*
+ * This structure describes a clustered I/O. It is stored in the b_saveaddr
+ * field of the buffer on which I/O is done. At I/O completion, cluster
+ * callback uses the structure to parcel I/O's to individual buffers, and
+ * then free's this structure.
+ */
+struct cluster_save {
+ long bs_bcount; /* Saved b_bcount. */
+ long bs_bufsize; /* Saved b_bufsize. */
+ void *bs_saveaddr; /* Saved b_addr. */
+ int bs_nchildren; /* Number of associated buffers. */
+ struct buf **bs_children; /* List of associated buffers. */
+};
+
+/*
+ * Zero out the buffer's data area.
+ */
+#define clrbuf(bp) { \
+ blkclr((bp)->b_data, (u_int)(bp)->b_bcount); \
+ (bp)->b_resid = 0; \
+}
+
+/* Flags to low-level allocation routines. */
+#define B_CLRBUF 0x01 /* Request allocated buffer be cleared. */
+#define B_SYNC 0x02 /* Do all allocations synchronously. */
+
+#ifdef KERNEL
+int nbuf; /* The number of buffer headers */
+struct buf *buf; /* The buffer headers. */
+char *buffers; /* The buffer contents. */
+int bufpages; /* Number of memory pages in the buffer pool. */
+struct buf *swbuf; /* Swap I/O buffer headers. */
+int nswbuf; /* Number of swap I/O buffer headers. */
+struct buf bswlist; /* Head of swap I/O buffer headers free list. */
+struct buf *bclnlist; /* Head of cleaned page list. */
+
+__BEGIN_DECLS
+int allocbuf __P((struct buf *, int));
+int bawrite __P((struct buf *));
+int bdwrite __P((struct buf *));
+void biodone __P((struct buf *));
+int biowait __P((struct buf *));
+int bread __P((struct vnode *, daddr_t, int,
+ struct ucred *, struct buf **));
+int breadn __P((struct vnode *, daddr_t, int, daddr_t *, int *, int,
+ struct ucred *, struct buf **));
+int brelse __P((struct buf *));
+void bufinit __P((void));
+int bwrite __P((struct buf *));
+void cluster_callback __P((struct buf *));
+int cluster_read __P((struct vnode *, u_quad_t, daddr_t, long,
+ struct ucred *, struct buf **));
+void cluster_write __P((struct buf *, u_quad_t));
+struct buf *getblk __P((struct vnode *, daddr_t, int, int, int));
+struct buf *geteblk __P((int));
+struct buf *getnewbuf __P((int slpflag, int slptimeo));
+struct buf *incore __P((struct vnode *, daddr_t));
+u_int minphys __P((struct buf *bp));
+__END_DECLS
+#endif
+#endif /* !_SYS_BUF_H_ */
diff --git a/sys/sys/diskmbr.h b/sys/sys/diskmbr.h
new file mode 100644
index 0000000..a25ee29
--- /dev/null
+++ b/sys/sys/diskmbr.h
@@ -0,0 +1,332 @@
+/*
+ * Copyright (c) 1987, 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)disklabel.h 8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * Disk description table, see disktab(5)
+ */
+#define _PATH_DISKTAB "/etc/disktab"
+#define DISKTAB "/etc/disktab" /* deprecated */
+
+/*
+ * Each disk has a label which includes information about the hardware
+ * disk geometry, filesystem partitions, and drive specific information.
+ * The label is in block 0 or 1, possibly offset from the beginning
+ * to leave room for a bootstrap, etc.
+ */
+
+/* XXX these should be defined per controller (or drive) elsewhere, not here! */
+#ifdef i386
+#define LABELSECTOR 1 /* sector containing label */
+#define LABELOFFSET 0 /* offset of label in sector */
+#endif
+
+#ifndef LABELSECTOR
+#define LABELSECTOR 0 /* sector containing label */
+#endif
+
+#ifndef LABELOFFSET
+#define LABELOFFSET 64 /* offset of label in sector */
+#endif
+
+#define DISKMAGIC ((u_long) 0x82564557) /* The disk magic number */
+#ifndef MAXPARTITIONS
+#define MAXPARTITIONS 8
+#endif
+
+
+#ifndef LOCORE
+struct disklabel {
+ u_long d_magic; /* the magic number */
+ short d_type; /* drive type */
+ short d_subtype; /* controller/d_type specific */
+ char d_typename[16]; /* type name, e.g. "eagle" */
+ /*
+ * d_packname contains the pack identifier and is returned when
+ * the disklabel is read off the disk or in-core copy.
+ * d_boot0 and d_boot1 are the (optional) names of the
+ * primary (block 0) and secondary (block 1-15) bootstraps
+ * as found in /usr/mdec. These are returned when using
+ * getdiskbyname(3) to retrieve the values from /etc/disktab.
+ */
+#if defined(KERNEL) || defined(STANDALONE)
+ char d_packname[16]; /* pack identifier */
+#else
+ union {
+ char un_d_packname[16]; /* pack identifier */
+ struct {
+ char *un_d_boot0; /* primary bootstrap name */
+ char *un_d_boot1; /* secondary bootstrap name */
+ } un_b;
+ } d_un;
+#define d_packname d_un.un_d_packname
+#define d_boot0 d_un.un_b.un_d_boot0
+#define d_boot1 d_un.un_b.un_d_boot1
+#endif /* ! KERNEL or STANDALONE */
+ /* disk geometry: */
+ u_long d_secsize; /* # of bytes per sector */
+ u_long d_nsectors; /* # of data sectors per track */
+ u_long d_ntracks; /* # of tracks per cylinder */
+ u_long d_ncylinders; /* # of data cylinders per unit */
+ u_long d_secpercyl; /* # of data sectors per cylinder */
+ u_long d_secperunit; /* # of data sectors per unit */
+ /*
+ * Spares (bad sector replacements) below
+ * are not counted in d_nsectors or d_secpercyl.
+ * Spare sectors are assumed to be physical sectors
+ * which occupy space at the end of each track and/or cylinder.
+ */
+ u_short d_sparespertrack; /* # of spare sectors per track */
+ u_short d_sparespercyl; /* # of spare sectors per cylinder */
+ /*
+ * Alternate cylinders include maintenance, replacement,
+ * configuration description areas, etc.
+ */
+ u_long d_acylinders; /* # of alt. cylinders per unit */
+
+ /* hardware characteristics: */
+ /*
+ * d_interleave, d_trackskew and d_cylskew describe perturbations
+ * in the media format used to compensate for a slow controller.
+ * Interleave is physical sector interleave, set up by the formatter
+ * or controller when formatting. When interleaving is in use,
+ * logically adjacent sectors are not physically contiguous,
+ * but instead are separated by some number of sectors.
+ * It is specified as the ratio of physical sectors traversed
+ * per logical sector. Thus an interleave of 1:1 implies contiguous
+ * layout, while 2:1 implies that logical sector 0 is separated
+ * by one sector from logical sector 1.
+ * d_trackskew is the offset of sector 0 on track N
+ * relative to sector 0 on track N-1 on the same cylinder.
+ * Finally, d_cylskew is the offset of sector 0 on cylinder N
+ * relative to sector 0 on cylinder N-1.
+ */
+ u_short d_rpm; /* rotational speed */
+ u_short d_interleave; /* hardware sector interleave */
+ u_short d_trackskew; /* sector 0 skew, per track */
+ u_short d_cylskew; /* sector 0 skew, per cylinder */
+ u_long d_headswitch; /* head switch time, usec */
+ u_long d_trkseek; /* track-to-track seek, usec */
+ u_long d_flags; /* generic flags */
+#define NDDATA 5
+ u_long d_drivedata[NDDATA]; /* drive-type specific information */
+#define NSPARE 5
+ u_long d_spare[NSPARE]; /* reserved for future use */
+ u_long d_magic2; /* the magic number (again) */
+ u_short d_checksum; /* xor of data incl. partitions */
+
+ /* filesystem and partition information: */
+ u_short d_npartitions; /* number of partitions in following */
+ u_long d_bbsize; /* size of boot area at sn0, bytes */
+ u_long d_sbsize; /* max size of fs superblock, bytes */
+ struct partition { /* the partition table */
+ u_long p_size; /* number of sectors in partition */
+ u_long p_offset; /* starting sector */
+ u_long p_fsize; /* filesystem basic fragment size */
+ u_char p_fstype; /* filesystem type, see below */
+ u_char p_frag; /* filesystem fragments per block */
+ union {
+ u_short cpg; /* UFS: FS cylinders per group */
+ u_short sgs; /* LFS: FS segment shift */
+ } __partition_u1;
+#define p_cpg __partition_u1.cpg
+#define p_sgs __partition_u1.sgs
+ } d_partitions[MAXPARTITIONS]; /* actually may be more */
+};
+#else /* LOCORE */
+ /*
+ * offsets for asm boot files.
+ */
+ .set d_secsize,40
+ .set d_nsectors,44
+ .set d_ntracks,48
+ .set d_ncylinders,52
+ .set d_secpercyl,56
+ .set d_secperunit,60
+ .set d_end_,276 /* size of disk label */
+#endif /* LOCORE */
+
+/* d_type values: */
+#define DTYPE_SMD 1 /* SMD, XSMD; VAX hp/up */
+#define DTYPE_MSCP 2 /* MSCP */
+#define DTYPE_DEC 3 /* other DEC (rk, rl) */
+#define DTYPE_SCSI 4 /* SCSI */
+#define DTYPE_ESDI 5 /* ESDI interface */
+#define DTYPE_ST506 6 /* ST506 etc. */
+#define DTYPE_HPIB 7 /* CS/80 on HP-IB */
+#define DTYPE_HPFL 8 /* HP Fiber-link */
+#define DTYPE_FLOPPY 10 /* floppy */
+
+#ifdef DKTYPENAMES
+static char *dktypenames[] = {
+ "unknown",
+ "SMD",
+ "MSCP",
+ "old DEC",
+ "SCSI",
+ "ESDI",
+ "ST506",
+ "HP-IB",
+ "HP-FL",
+ "type 9",
+ "floppy",
+ 0
+};
+#define DKMAXTYPES (sizeof(dktypenames) / sizeof(dktypenames[0]) - 1)
+#endif
+
+/*
+ * Filesystem type and version.
+ * Used to interpret other filesystem-specific
+ * per-partition information.
+ */
+#define FS_UNUSED 0 /* unused */
+#define FS_SWAP 1 /* swap */
+#define FS_V6 2 /* Sixth Edition */
+#define FS_V7 3 /* Seventh Edition */
+#define FS_SYSV 4 /* System V */
+#define FS_V71K 5 /* V7 with 1K blocks (4.1, 2.9) */
+#define FS_V8 6 /* Eighth Edition, 4K blocks */
+#define FS_BSDFFS 7 /* 4.2BSD fast file system */
+#define FS_MSDOS 8 /* MSDOS file system */
+#define FS_BSDLFS 9 /* 4.4BSD log-structured file system */
+#define FS_OTHER 10 /* in use, but unknown/unsupported */
+#define FS_HPFS 11 /* OS/2 high-performance file system */
+#define FS_ISO9660 12 /* ISO 9660, normally CD-ROM */
+#define FS_BOOT 13 /* partition contains bootstrap */
+
+#ifdef DKTYPENAMES
+static char *fstypenames[] = {
+ "unused",
+ "swap",
+ "Version 6",
+ "Version 7",
+ "System V",
+ "4.1BSD",
+ "Eighth Edition",
+ "4.2BSD",
+ "MSDOS",
+ "4.4LFS",
+ "unknown",
+ "HPFS",
+ "ISO9660",
+ "boot",
+ 0
+};
+#define FSMAXTYPES (sizeof(fstypenames) / sizeof(fstypenames[0]) - 1)
+#endif
+
+/*
+ * flags shared by various drives:
+ */
+#define D_REMOVABLE 0x01 /* removable media */
+#define D_ECC 0x02 /* supports ECC */
+#define D_BADSECT 0x04 /* supports bad sector forw. */
+#define D_RAMDISK 0x08 /* disk emulator */
+#define D_CHAIN 0x10 /* can do back-back transfers */
+
+/*
+ * Drive data for SMD.
+ */
+#define d_smdflags d_drivedata[0]
+#define D_SSE 0x1 /* supports skip sectoring */
+#define d_mindist d_drivedata[1]
+#define d_maxdist d_drivedata[2]
+#define d_sdist d_drivedata[3]
+
+/*
+ * Drive data for ST506.
+ */
+#define d_precompcyl d_drivedata[0]
+#define d_gap3 d_drivedata[1] /* used only when formatting */
+
+/*
+ * Drive data for SCSI.
+ */
+#define d_blind d_drivedata[0]
+
+#ifndef LOCORE
+/*
+ * Structure used to perform a format
+ * or other raw operation, returning data
+ * and/or register values.
+ * Register identification and format
+ * are device- and driver-dependent.
+ */
+struct format_op {
+ char *df_buf;
+ int df_count; /* value-result */
+ daddr_t df_startblk;
+ int df_reg[8]; /* result */
+};
+
+/*
+ * Structure used internally to retrieve
+ * information about a partition on a disk.
+ */
+struct partinfo {
+ struct disklabel *disklab;
+ struct partition *part;
+};
+
+/*
+ * Disk-specific ioctls.
+ */
+ /* get and set disklabel; DIOCGPART used internally */
+#define DIOCGDINFO _IOR('d', 101, struct disklabel)/* get */
+#define DIOCSDINFO _IOW('d', 102, struct disklabel)/* set */
+#define DIOCWDINFO _IOW('d', 103, struct disklabel)/* set, update disk */
+#define DIOCGPART _IOW('d', 104, struct partinfo) /* get partition */
+
+/* do format operation, read or write */
+#define DIOCRFORMAT _IOWR('d', 105, struct format_op)
+#define DIOCWFORMAT _IOWR('d', 106, struct format_op)
+
+#define DIOCSSTEP _IOW('d', 107, int) /* set step rate */
+#define DIOCSRETRIES _IOW('d', 108, int) /* set # of retries */
+#define DIOCWLABEL _IOW('d', 109, int) /* write en/disable label */
+
+#define DIOCSBAD _IOW('d', 110, struct dkbad) /* set kernel dkbad */
+
+#endif /* LOCORE */
+
+#if !defined(KERNEL) && !defined(LOCORE)
+
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+struct disklabel *getdiskbyname __P((const char *));
+__END_DECLS
+
+#endif
diff --git a/sys/sys/diskpc98.h b/sys/sys/diskpc98.h
new file mode 100644
index 0000000..a25ee29
--- /dev/null
+++ b/sys/sys/diskpc98.h
@@ -0,0 +1,332 @@
+/*
+ * Copyright (c) 1987, 1988, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)disklabel.h 8.1 (Berkeley) 6/2/93
+ */
+
+/*
+ * Disk description table, see disktab(5)
+ */
+#define _PATH_DISKTAB "/etc/disktab"
+#define DISKTAB "/etc/disktab" /* deprecated */
+
+/*
+ * Each disk has a label which includes information about the hardware
+ * disk geometry, filesystem partitions, and drive specific information.
+ * The label is in block 0 or 1, possibly offset from the beginning
+ * to leave room for a bootstrap, etc.
+ */
+
+/* XXX these should be defined per controller (or drive) elsewhere, not here! */
+#ifdef i386
+#define LABELSECTOR 1 /* sector containing label */
+#define LABELOFFSET 0 /* offset of label in sector */
+#endif
+
+#ifndef LABELSECTOR
+#define LABELSECTOR 0 /* sector containing label */
+#endif
+
+#ifndef LABELOFFSET
+#define LABELOFFSET 64 /* offset of label in sector */
+#endif
+
+#define DISKMAGIC ((u_long) 0x82564557) /* The disk magic number */
+#ifndef MAXPARTITIONS
+#define MAXPARTITIONS 8
+#endif
+
+
+#ifndef LOCORE
+struct disklabel {
+ u_long d_magic; /* the magic number */
+ short d_type; /* drive type */
+ short d_subtype; /* controller/d_type specific */
+ char d_typename[16]; /* type name, e.g. "eagle" */
+ /*
+ * d_packname contains the pack identifier and is returned when
+ * the disklabel is read off the disk or in-core copy.
+ * d_boot0 and d_boot1 are the (optional) names of the
+ * primary (block 0) and secondary (block 1-15) bootstraps
+ * as found in /usr/mdec. These are returned when using
+ * getdiskbyname(3) to retrieve the values from /etc/disktab.
+ */
+#if defined(KERNEL) || defined(STANDALONE)
+ char d_packname[16]; /* pack identifier */
+#else
+ union {
+ char un_d_packname[16]; /* pack identifier */
+ struct {
+ char *un_d_boot0; /* primary bootstrap name */
+ char *un_d_boot1; /* secondary bootstrap name */
+ } un_b;
+ } d_un;
+#define d_packname d_un.un_d_packname
+#define d_boot0 d_un.un_b.un_d_boot0
+#define d_boot1 d_un.un_b.un_d_boot1
+#endif /* ! KERNEL or STANDALONE */
+ /* disk geometry: */
+ u_long d_secsize; /* # of bytes per sector */
+ u_long d_nsectors; /* # of data sectors per track */
+ u_long d_ntracks; /* # of tracks per cylinder */
+ u_long d_ncylinders; /* # of data cylinders per unit */
+ u_long d_secpercyl; /* # of data sectors per cylinder */
+ u_long d_secperunit; /* # of data sectors per unit */
+ /*
+ * Spares (bad sector replacements) below
+ * are not counted in d_nsectors or d_secpercyl.
+ * Spare sectors are assumed to be physical sectors
+ * which occupy space at the end of each track and/or cylinder.
+ */
+ u_short d_sparespertrack; /* # of spare sectors per track */
+ u_short d_sparespercyl; /* # of spare sectors per cylinder */
+ /*
+ * Alternate cylinders include maintenance, replacement,
+ * configuration description areas, etc.
+ */
+ u_long d_acylinders; /* # of alt. cylinders per unit */
+
+ /* hardware characteristics: */
+ /*
+ * d_interleave, d_trackskew and d_cylskew describe perturbations
+ * in the media format used to compensate for a slow controller.
+ * Interleave is physical sector interleave, set up by the formatter
+ * or controller when formatting. When interleaving is in use,
+ * logically adjacent sectors are not physically contiguous,
+ * but instead are separated by some number of sectors.
+ * It is specified as the ratio of physical sectors traversed
+ * per logical sector. Thus an interleave of 1:1 implies contiguous
+ * layout, while 2:1 implies that logical sector 0 is separated
+ * by one sector from logical sector 1.
+ * d_trackskew is the offset of sector 0 on track N
+ * relative to sector 0 on track N-1 on the same cylinder.
+ * Finally, d_cylskew is the offset of sector 0 on cylinder N
+ * relative to sector 0 on cylinder N-1.
+ */
+ u_short d_rpm; /* rotational speed */
+ u_short d_interleave; /* hardware sector interleave */
+ u_short d_trackskew; /* sector 0 skew, per track */
+ u_short d_cylskew; /* sector 0 skew, per cylinder */
+ u_long d_headswitch; /* head switch time, usec */
+ u_long d_trkseek; /* track-to-track seek, usec */
+ u_long d_flags; /* generic flags */
+#define NDDATA 5
+ u_long d_drivedata[NDDATA]; /* drive-type specific information */
+#define NSPARE 5
+ u_long d_spare[NSPARE]; /* reserved for future use */
+ u_long d_magic2; /* the magic number (again) */
+ u_short d_checksum; /* xor of data incl. partitions */
+
+ /* filesystem and partition information: */
+ u_short d_npartitions; /* number of partitions in following */
+ u_long d_bbsize; /* size of boot area at sn0, bytes */
+ u_long d_sbsize; /* max size of fs superblock, bytes */
+ struct partition { /* the partition table */
+ u_long p_size; /* number of sectors in partition */
+ u_long p_offset; /* starting sector */
+ u_long p_fsize; /* filesystem basic fragment size */
+ u_char p_fstype; /* filesystem type, see below */
+ u_char p_frag; /* filesystem fragments per block */
+ union {
+ u_short cpg; /* UFS: FS cylinders per group */
+ u_short sgs; /* LFS: FS segment shift */
+ } __partition_u1;
+#define p_cpg __partition_u1.cpg
+#define p_sgs __partition_u1.sgs
+ } d_partitions[MAXPARTITIONS]; /* actually may be more */
+};
+#else /* LOCORE */
+ /*
+ * offsets for asm boot files.
+ */
+ .set d_secsize,40
+ .set d_nsectors,44
+ .set d_ntracks,48
+ .set d_ncylinders,52
+ .set d_secpercyl,56
+ .set d_secperunit,60
+ .set d_end_,276 /* size of disk label */
+#endif /* LOCORE */
+
+/* d_type values: */
+#define DTYPE_SMD 1 /* SMD, XSMD; VAX hp/up */
+#define DTYPE_MSCP 2 /* MSCP */
+#define DTYPE_DEC 3 /* other DEC (rk, rl) */
+#define DTYPE_SCSI 4 /* SCSI */
+#define DTYPE_ESDI 5 /* ESDI interface */
+#define DTYPE_ST506 6 /* ST506 etc. */
+#define DTYPE_HPIB 7 /* CS/80 on HP-IB */
+#define DTYPE_HPFL 8 /* HP Fiber-link */
+#define DTYPE_FLOPPY 10 /* floppy */
+
+#ifdef DKTYPENAMES
+static char *dktypenames[] = {
+ "unknown",
+ "SMD",
+ "MSCP",
+ "old DEC",
+ "SCSI",
+ "ESDI",
+ "ST506",
+ "HP-IB",
+ "HP-FL",
+ "type 9",
+ "floppy",
+ 0
+};
+#define DKMAXTYPES (sizeof(dktypenames) / sizeof(dktypenames[0]) - 1)
+#endif
+
+/*
+ * Filesystem type and version.
+ * Used to interpret other filesystem-specific
+ * per-partition information.
+ */
+#define FS_UNUSED 0 /* unused */
+#define FS_SWAP 1 /* swap */
+#define FS_V6 2 /* Sixth Edition */
+#define FS_V7 3 /* Seventh Edition */
+#define FS_SYSV 4 /* System V */
+#define FS_V71K 5 /* V7 with 1K blocks (4.1, 2.9) */
+#define FS_V8 6 /* Eighth Edition, 4K blocks */
+#define FS_BSDFFS 7 /* 4.2BSD fast file system */
+#define FS_MSDOS 8 /* MSDOS file system */
+#define FS_BSDLFS 9 /* 4.4BSD log-structured file system */
+#define FS_OTHER 10 /* in use, but unknown/unsupported */
+#define FS_HPFS 11 /* OS/2 high-performance file system */
+#define FS_ISO9660 12 /* ISO 9660, normally CD-ROM */
+#define FS_BOOT 13 /* partition contains bootstrap */
+
+#ifdef DKTYPENAMES
+static char *fstypenames[] = {
+ "unused",
+ "swap",
+ "Version 6",
+ "Version 7",
+ "System V",
+ "4.1BSD",
+ "Eighth Edition",
+ "4.2BSD",
+ "MSDOS",
+ "4.4LFS",
+ "unknown",
+ "HPFS",
+ "ISO9660",
+ "boot",
+ 0
+};
+#define FSMAXTYPES (sizeof(fstypenames) / sizeof(fstypenames[0]) - 1)
+#endif
+
+/*
+ * flags shared by various drives:
+ */
+#define D_REMOVABLE 0x01 /* removable media */
+#define D_ECC 0x02 /* supports ECC */
+#define D_BADSECT 0x04 /* supports bad sector forw. */
+#define D_RAMDISK 0x08 /* disk emulator */
+#define D_CHAIN 0x10 /* can do back-back transfers */
+
+/*
+ * Drive data for SMD.
+ */
+#define d_smdflags d_drivedata[0]
+#define D_SSE 0x1 /* supports skip sectoring */
+#define d_mindist d_drivedata[1]
+#define d_maxdist d_drivedata[2]
+#define d_sdist d_drivedata[3]
+
+/*
+ * Drive data for ST506.
+ */
+#define d_precompcyl d_drivedata[0]
+#define d_gap3 d_drivedata[1] /* used only when formatting */
+
+/*
+ * Drive data for SCSI.
+ */
+#define d_blind d_drivedata[0]
+
+#ifndef LOCORE
+/*
+ * Structure used to perform a format
+ * or other raw operation, returning data
+ * and/or register values.
+ * Register identification and format
+ * are device- and driver-dependent.
+ */
+struct format_op {
+ char *df_buf;
+ int df_count; /* value-result */
+ daddr_t df_startblk;
+ int df_reg[8]; /* result */
+};
+
+/*
+ * Structure used internally to retrieve
+ * information about a partition on a disk.
+ */
+struct partinfo {
+ struct disklabel *disklab;
+ struct partition *part;
+};
+
+/*
+ * Disk-specific ioctls.
+ */
+ /* get and set disklabel; DIOCGPART used internally */
+#define DIOCGDINFO _IOR('d', 101, struct disklabel)/* get */
+#define DIOCSDINFO _IOW('d', 102, struct disklabel)/* set */
+#define DIOCWDINFO _IOW('d', 103, struct disklabel)/* set, update disk */
+#define DIOCGPART _IOW('d', 104, struct partinfo) /* get partition */
+
+/* do format operation, read or write */
+#define DIOCRFORMAT _IOWR('d', 105, struct format_op)
+#define DIOCWFORMAT _IOWR('d', 106, struct format_op)
+
+#define DIOCSSTEP _IOW('d', 107, int) /* set step rate */
+#define DIOCSRETRIES _IOW('d', 108, int) /* set # of retries */
+#define DIOCWLABEL _IOW('d', 109, int) /* write en/disable label */
+
+#define DIOCSBAD _IOW('d', 110, struct dkbad) /* set kernel dkbad */
+
+#endif /* LOCORE */
+
+#if !defined(KERNEL) && !defined(LOCORE)
+
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+struct disklabel *getdiskbyname __P((const char *));
+__END_DECLS
+
+#endif
diff --git a/sys/sys/linedisc.h b/sys/sys/linedisc.h
new file mode 100644
index 0000000..58cb6fa
--- /dev/null
+++ b/sys/sys/linedisc.h
@@ -0,0 +1,123 @@
+/*-
+ * Copyright (c) 1990, 1993
+ * The Regents of the University of California. All rights reserved.
+ * (c) UNIX System Laboratories, Inc.
+ * All or some portions of this file are derived from material licensed
+ * to the University of California by American Telephone and Telegraph
+ * Co. or Unix System Laboratories, Inc. and are reproduced herein with
+ * the permission of UNIX System Laboratories, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)conf.h 8.3 (Berkeley) 1/21/94
+ */
+
+/*
+ * Definitions of device driver entry switches
+ */
+
+struct buf;
+struct proc;
+struct tty;
+struct uio;
+struct vnode;
+
+struct bdevsw {
+ int (*d_open) __P((dev_t dev, int oflags, int devtype,
+ struct proc *p));
+ int (*d_close) __P((dev_t dev, int fflag, int devtype,
+ struct proc *p));
+ int (*d_strategy) __P((struct buf *bp));
+ int (*d_ioctl) __P((dev_t dev, int cmd, caddr_t data,
+ int fflag, struct proc *p));
+ int (*d_dump) (); /* parameters vary by architecture */
+ int (*d_psize) __P((dev_t dev));
+ int d_flags;
+};
+
+#ifdef KERNEL
+extern struct bdevsw bdevsw[];
+#endif
+
+struct cdevsw {
+ int (*d_open) __P((dev_t dev, int oflags, int devtype,
+ struct proc *p));
+ int (*d_close) __P((dev_t dev, int fflag, int devtype,
+ struct proc *));
+ int (*d_read) __P((dev_t dev, struct uio *uio, int ioflag));
+ int (*d_write) __P((dev_t dev, struct uio *uio, int ioflag));
+ int (*d_ioctl) __P((dev_t dev, int cmd, caddr_t data,
+ int fflag, struct proc *p));
+ int (*d_stop) __P((struct tty *tp, int rw));
+ int (*d_reset) __P((int uban)); /* XXX */
+ struct tty *d_ttys;
+ int (*d_select) __P((dev_t dev, int which, struct proc *p));
+ int (*d_mmap) __P(());
+ int (*d_strategy) __P((struct buf *bp));
+};
+
+#ifdef KERNEL
+extern struct cdevsw cdevsw[];
+
+/* symbolic sleep message strings */
+extern char devopn[], devio[], devwait[], devin[], devout[];
+extern char devioc[], devcls[];
+#endif
+
+struct linesw {
+ int (*l_open) __P((dev_t dev, struct tty *tp));
+ int (*l_close) __P((struct tty *tp, int flag));
+ int (*l_read) __P((struct tty *tp, struct uio *uio,
+ int flag));
+ int (*l_write) __P((struct tty *tp, struct uio *uio,
+ int flag));
+ int (*l_ioctl) __P((struct tty *tp, int cmd, caddr_t data,
+ int flag, struct proc *p));
+ int (*l_rint) __P((int c, struct tty *tp));
+ int (*l_start) __P((struct tty *tp));
+ int (*l_modem) __P((struct tty *tp, int flag));
+};
+
+#ifdef KERNEL
+extern struct linesw linesw[];
+#endif
+
+struct swdevt {
+ dev_t sw_dev;
+ int sw_flags;
+ int sw_nblks;
+ struct vnode *sw_vp;
+};
+#define SW_FREED 0x01
+#define SW_SEQUENTIAL 0x02
+#define sw_freed sw_flags /* XXX compat */
+
+#ifdef KERNEL
+extern struct swdevt swdevt[];
+#endif
diff --git a/sys/sys/selinfo.h b/sys/sys/selinfo.h
new file mode 100644
index 0000000..a279c59
--- /dev/null
+++ b/sys/sys/selinfo.h
@@ -0,0 +1,56 @@
+/*-
+ * Copyright (c) 1992, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)select.h 8.2 (Berkeley) 1/4/94
+ */
+
+#ifndef _SYS_SELECT_H_
+#define _SYS_SELECT_H_
+
+/*
+ * Used to maintain information about processes that wish to be
+ * notified when I/O becomes possible.
+ */
+struct selinfo {
+ pid_t si_pid; /* process to be notified */
+ short si_flags; /* see below */
+};
+#define SI_COLL 0x0001 /* collision occurred */
+
+#ifdef KERNEL
+struct proc;
+
+void selrecord __P((struct proc *selector, struct selinfo *));
+void selwakeup __P((struct selinfo *));
+#endif
+
+#endif /* !_SYS_SELECT_H_ */
diff --git a/sys/sys/timetc.h b/sys/sys/timetc.h
new file mode 100644
index 0000000..5322771
--- /dev/null
+++ b/sys/sys/timetc.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 1982, 1986, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)time.h 8.1 (Berkeley) 6/2/93
+ */
+
+#ifndef _SYS_TIME_H_
+#define _SYS_TIME_H_
+
+/*
+ * Structure returned by gettimeofday(2) system call,
+ * and used in other calls.
+ */
+struct timeval {
+ long tv_sec; /* seconds */
+ long tv_usec; /* and microseconds */
+};
+
+/*
+ * Structure defined by POSIX.4 to be like a timeval.
+ */
+struct timespec {
+ long ts_sec; /* seconds */
+ long ts_nsec; /* and nanoseconds */
+};
+
+#define TIMEVAL_TO_TIMESPEC(tv, ts) { \
+ (ts)->ts_sec = (tv)->tv_sec; \
+ (ts)->ts_nsec = (tv)->tv_usec * 1000; \
+}
+#define TIMESPEC_TO_TIMEVAL(tv, ts) { \
+ (tv)->tv_sec = (ts)->ts_sec; \
+ (tv)->tv_usec = (ts)->ts_nsec / 1000; \
+}
+
+struct timezone {
+ int tz_minuteswest; /* minutes west of Greenwich */
+ int tz_dsttime; /* type of dst correction */
+};
+#define DST_NONE 0 /* not on dst */
+#define DST_USA 1 /* USA style dst */
+#define DST_AUST 2 /* Australian style dst */
+#define DST_WET 3 /* Western European dst */
+#define DST_MET 4 /* Middle European dst */
+#define DST_EET 5 /* Eastern European dst */
+#define DST_CAN 6 /* Canada */
+
+/* Operations on timevals. */
+#define timerclear(tvp) (tvp)->tv_sec = (tvp)->tv_usec = 0
+#define timerisset(tvp) ((tvp)->tv_sec || (tvp)->tv_usec)
+#define timercmp(tvp, uvp, cmp) \
+ (((tvp)->tv_sec == (uvp)->tv_sec) ? \
+ ((tvp)->tv_usec cmp (uvp)->tv_usec) : \
+ ((tvp)->tv_sec cmp (uvp)->tv_sec))
+
+/*
+ * Names of the interval timers, and structure
+ * defining a timer setting.
+ */
+#define ITIMER_REAL 0
+#define ITIMER_VIRTUAL 1
+#define ITIMER_PROF 2
+
+struct itimerval {
+ struct timeval it_interval; /* timer interval */
+ struct timeval it_value; /* current value */
+};
+
+/*
+ * Getkerninfo clock information structure
+ */
+struct clockinfo {
+ int hz; /* clock frequency */
+ int tick; /* micro-seconds per hz tick */
+ int stathz; /* statistics clock frequency */
+ int profhz; /* profiling clock frequency */
+};
+
+#ifndef KERNEL
+#include <time.h>
+
+#ifndef _POSIX_SOURCE
+#include <sys/cdefs.h>
+
+__BEGIN_DECLS
+int adjtime __P((const struct timeval *, struct timeval *));
+int getitimer __P((int, struct itimerval *));
+int gettimeofday __P((struct timeval *, struct timezone *));
+int setitimer __P((int, const struct itimerval *, struct itimerval *));
+int settimeofday __P((const struct timeval *, const struct timezone *));
+int utimes __P((const char *, const struct timeval *));
+__END_DECLS
+#endif /* !POSIX */
+
+#endif /* !KERNEL */
+
+#endif /* !_SYS_TIME_H_ */
diff --git a/sys/tools/vnode_if.awk b/sys/tools/vnode_if.awk
new file mode 100644
index 0000000..e190fa0
--- /dev/null
+++ b/sys/tools/vnode_if.awk
@@ -0,0 +1,433 @@
+#!/bin/sh -
+#
+# Copyright (c) 1992, 1993
+# The Regents of the University of California. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# 1. Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# 2. Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution.
+# 3. All advertising materials mentioning features or use of this software
+# must display the following acknowledgement:
+# This product includes software developed by the University of
+# California, Berkeley and its contributors.
+# 4. Neither the name of the University nor the names of its contributors
+# may be used to endorse or promote products derived from this software
+# without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+# SUCH DAMAGE.
+#
+# @(#)vnode_if.sh 8.1 (Berkeley) 6/10/93
+#
+
+# Script to produce VFS front-end sugar.
+#
+# usage: vnode_if.sh srcfile
+# (where srcfile is currently /sys/kern/vnode_if.src)
+#
+# These awk scripts are not particularly well written, specifically they
+# don't use arrays well and figure out the same information repeatedly.
+# Please rewrite them if you actually understand how to use awk. Note,
+# they use nawk extensions and gawk's toupper.
+
+if [ $# -ne 1 ] ; then
+ echo 'usage: vnode_if.sh srcfile'
+ exit 1
+fi
+
+# Name of the source file.
+SRC=$1
+
+# Names of the created files.
+CFILE=vnode_if.c
+HEADER=vnode_if.h
+
+# Awk program (must support nawk extensions and gawk's "toupper")
+# Use "awk" at Berkeley, "gawk" elsewhere.
+AWK=awk
+
+# Print out header information for vnode_if.h.
+cat << END_OF_LEADING_COMMENT > $HEADER
+/*
+ * This file is produced automatically.
+ * Do not modify anything in here by hand.
+ *
+ * Created from @(#)vnode_if.sh 8.1 (Berkeley) 6/10/93
+ */
+
+extern struct vnodeop_desc vop_default_desc;
+END_OF_LEADING_COMMENT
+
+# Awk script to take vnode_if.src and turn it into vnode_if.h.
+$AWK '
+ NF == 0 || $0 ~ "^#" {
+ next;
+ }
+ {
+ # Get the function name.
+ name = $1;
+ uname = toupper(name);
+
+ # Get the function arguments.
+ for (c1 = 0;; ++c1) {
+ if (getline <= 0)
+ exit
+ if ($0 ~ "^};")
+ break;
+ a[c1] = $0;
+ }
+
+ # Print out the vop_F_args structure.
+ printf("struct %s_args {\n\tstruct vnodeop_desc *a_desc;\n",
+ name);
+ for (c2 = 0; c2 < c1; ++c2) {
+ c3 = split(a[c2], t);
+ printf("\t");
+ if (t[2] ~ "WILLRELE")
+ c4 = 3;
+ else
+ c4 = 2;
+ for (; c4 < c3; ++c4)
+ printf("%s ", t[c4]);
+ beg = match(t[c3], "[^*]");
+ printf("%sa_%s\n",
+ substr(t[c4], 0, beg - 1), substr(t[c4], beg));
+ }
+ printf("};\n");
+
+ # Print out extern declaration.
+ printf("extern struct vnodeop_desc %s_desc;\n", name);
+
+ # Print out inline struct.
+ printf("static inline int %s(", uname);
+ sep = ", ";
+ for (c2 = 0; c2 < c1; ++c2) {
+ if (c2 == c1 - 1)
+ sep = ")\n";
+ c3 = split(a[c2], t);
+ beg = match(t[c3], "[^*]");
+ end = match(t[c3], ";");
+ printf("%s%s", substr(t[c3], beg, end - beg), sep);
+ }
+ for (c2 = 0; c2 < c1; ++c2) {
+ c3 = split(a[c2], t);
+ printf("\t");
+ if (t[2] ~ "WILLRELE")
+ c4 = 3;
+ else
+ c4 = 2;
+ for (; c4 < c3; ++c4)
+ printf("%s ", t[c4]);
+ beg = match(t[c3], "[^*]");
+ printf("%s%s\n",
+ substr(t[c4], 0, beg - 1), substr(t[c4], beg));
+ }
+ printf("{\n\tstruct %s_args a;\n\n", name);
+ printf("\ta.a_desc = VDESC(%s);\n", name);
+ for (c2 = 0; c2 < c1; ++c2) {
+ c3 = split(a[c2], t);
+ printf("\t");
+ beg = match(t[c3], "[^*]");
+ end = match(t[c3], ";");
+ printf("a.a_%s = %s\n",
+ substr(t[c3], beg, end - beg), substr(t[c3], beg));
+ }
+ c1 = split(a[0], t);
+ beg = match(t[c1], "[^*]");
+ end = match(t[c1], ";");
+ printf("\treturn (VCALL(%s, VOFFSET(%s), &a));\n}\n",
+ substr(t[c1], beg, end - beg), name);
+ }' < $SRC >> $HEADER
+
+# Print out header information for vnode_if.c.
+cat << END_OF_LEADING_COMMENT > $CFILE
+/*
+ * This file is produced automatically.
+ * Do not modify anything in here by hand.
+ *
+ * Created from @(#)vnode_if.sh 8.1 (Berkeley) 6/10/93
+ */
+
+#include <sys/param.h>
+#include <sys/mount.h>
+#include <sys/vnode.h>
+
+struct vnodeop_desc vop_default_desc = {
+ 0,
+ "default",
+ 0,
+ NULL,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ NULL,
+};
+
+END_OF_LEADING_COMMENT
+
+# Awk script to take vnode_if.src and turn it into vnode_if.c.
+$AWK 'function kill_surrounding_ws (s) {
+ sub (/^[ \t]*/, "", s);
+ sub (/[ \t]*$/, "", s);
+ return s;
+ }
+
+ function read_args() {
+ numargs = 0;
+ while (getline ln) {
+ if (ln ~ /}/) {
+ break;
+ };
+
+ # Delete comments, if any.
+ gsub (/\/\*.*\*\//, "", ln);
+
+ # Delete leading/trailing space.
+ ln = kill_surrounding_ws(ln);
+
+ # Pick off direction.
+ if (1 == sub(/^INOUT[ \t]+/, "", ln))
+ dir = "INOUT";
+ else if (1 == sub(/^IN[ \t]+/, "", ln))
+ dir = "IN";
+ else if (1 == sub(/^OUT[ \t]+/, "", ln))
+ dir = "OUT";
+ else
+ bail("No IN/OUT direction for \"" ln "\".");
+
+ # check for "WILLRELE"
+ if (1 == sub(/^WILLRELE[ \t]+/, "", ln)) {
+ rele = "WILLRELE";
+ } else {
+ rele = "WONTRELE";
+ };
+
+ # kill trailing ;
+ if (1 != sub (/;$/, "", ln)) {
+ bail("Missing end-of-line ; in \"" ln "\".");
+ };
+
+ # pick off variable name
+ if (!(i = match(ln, /[A-Za-z0-9_]+$/))) {
+ bail("Missing var name \"a_foo\" in \"" ln "\".");
+ };
+ arg = substr (ln, i);
+ # Want to <<substr(ln, i) = "";>>, but nawk cannot.
+ # Hack around this.
+ ln = substr(ln, 1, i-1);
+
+ # what is left must be type
+ # (put clean it up some)
+ type = ln;
+ gsub (/[ \t]+/, " ", type); # condense whitespace
+ type = kill_surrounding_ws(type);
+
+ # (boy this was easier in Perl)
+
+ numargs++;
+ dirs[numargs] = dir;
+ reles[numargs] = rele;
+ types[numargs] = type;
+ args[numargs] = arg;
+ };
+ }
+
+ function generate_operation_vp_offsets() {
+ printf ("int %s_vp_offsets[] = {\n", name);
+ # as a side effect, figure out the releflags
+ releflags = "";
+ vpnum = 0;
+ for (i=1; i<=numargs; i++) {
+ if (types[i] == "struct vnode *") {
+ printf ("\tVOPARG_OFFSETOF(struct %s_args,a_%s),\n",
+ name, args[i]);
+ if (reles[i] == "WILLRELE") {
+ releflags = releflags "|VDESC_VP" vpnum "_WILLRELE";
+ };
+ vpnum++;
+ };
+ };
+ sub (/^\|/, "", releflags);
+ print "\tVDESC_NO_OFFSET";
+ print "};";
+ }
+
+ function find_arg_with_type (type) {
+ for (i=1; i<=numargs; i++) {
+ if (types[i] == type) {
+ return "VOPARG_OFFSETOF(struct " name "_args,a_" args[i] ")";
+ };
+ };
+ return "VDESC_NO_OFFSET";
+ }
+
+ function generate_operation_desc() {
+ printf ("struct vnodeop_desc %s_desc = {\n", name);
+ # offset
+ printf ("\t0,\n");
+ # printable name
+ printf ("\t\"%s\",\n", name);
+ # flags
+ vppwillrele = "";
+ for (i=1; i<=numargs; i++) {
+ if (types[i] == "struct vnode **" &&
+ (reles[i] == "WILLRELE")) {
+ vppwillrele = "|VDESC_VPP_WILLRELE";
+ };
+ };
+ if (releflags == "") {
+ printf ("\t0%s,\n", vppwillrele);
+ } else {
+ printf ("\t%s%s,\n", releflags, vppwillrele);
+ };
+ # vp offsets
+ printf ("\t%s_vp_offsets,\n", name);
+ # vpp (if any)
+ printf ("\t%s,\n", find_arg_with_type("struct vnode **"));
+ # cred (if any)
+ printf ("\t%s,\n", find_arg_with_type("struct ucred *"));
+ # proc (if any)
+ printf ("\t%s,\n", find_arg_with_type("struct proc *"));
+ # componentname
+ printf ("\t%s,\n", find_arg_with_type("struct componentname *"));
+ # transport layer information
+ printf ("\tNULL,\n};\n");
+ }
+
+ NF == 0 || $0 ~ "^#" {
+ next;
+ }
+ {
+ # get the function name
+ name = $1;
+
+ # get the function arguments
+ read_args();
+
+ # Print out the vop_F_vp_offsets structure. This all depends
+ # on naming conventions and nothing else.
+ generate_operation_vp_offsets();
+
+ # Print out the vnodeop_desc structure.
+ generate_operation_desc();
+
+ printf "\n";
+
+ }' < $SRC >> $CFILE
+# THINGS THAT DON'T WORK RIGHT YET.
+#
+# Two existing BSD vnodeops (bwrite and strategy) don't take any vnodes as
+# arguments. This means that these operations can't function successfully
+# through a bypass routine.
+#
+# Bwrite and strategy will be replaced when the VM page/buffer cache
+# integration happens.
+#
+# To get around this problem for now we handle these ops as special cases.
+
+cat << END_OF_SPECIAL_CASES >> $HEADER
+#include <sys/buf.h>
+struct vop_strategy_args {
+ struct vnodeop_desc *a_desc;
+ struct buf *a_bp;
+};
+extern struct vnodeop_desc vop_strategy_desc;
+static inline int VOP_STRATEGY(bp)
+ struct buf *bp;
+{
+ struct vop_strategy_args a;
+
+ a.a_desc = VDESC(vop_strategy);
+ a.a_bp = bp;
+ return (VCALL((bp)->b_vp, VOFFSET(vop_strategy), &a));
+}
+
+struct vop_bwrite_args {
+ struct vnodeop_desc *a_desc;
+ struct buf *a_bp;
+};
+extern struct vnodeop_desc vop_bwrite_desc;
+static inline int VOP_BWRITE(bp)
+ struct buf *bp;
+{
+ struct vop_bwrite_args a;
+
+ a.a_desc = VDESC(vop_bwrite);
+ a.a_bp = bp;
+ return (VCALL((bp)->b_vp, VOFFSET(vop_bwrite), &a));
+}
+END_OF_SPECIAL_CASES
+
+cat << END_OF_SPECIAL_CASES >> $CFILE
+int vop_strategy_vp_offsets[] = {
+ VDESC_NO_OFFSET
+};
+struct vnodeop_desc vop_strategy_desc = {
+ 0,
+ "vop_strategy",
+ 0,
+ vop_strategy_vp_offsets,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ NULL,
+};
+int vop_bwrite_vp_offsets[] = {
+ VDESC_NO_OFFSET
+};
+struct vnodeop_desc vop_bwrite_desc = {
+ 0,
+ "vop_bwrite",
+ 0,
+ vop_bwrite_vp_offsets,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ VDESC_NO_OFFSET,
+ NULL,
+};
+END_OF_SPECIAL_CASES
+
+# Add the vfs_op_descs array to the C file.
+$AWK '
+ BEGIN {
+ printf("\nstruct vnodeop_desc *vfs_op_descs[] = {\n");
+ printf("\t&vop_default_desc, /* MUST BE FIRST */\n");
+ printf("\t&vop_strategy_desc, /* XXX: SPECIAL CASE */\n");
+ printf("\t&vop_bwrite_desc, /* XXX: SPECIAL CASE */\n");
+ }
+ END {
+ printf("\tNULL\n};\n");
+ }
+ NF == 0 || $0 ~ "^#" {
+ next;
+ }
+ {
+ # Get the function name.
+ printf("\t&%s_desc,\n", $1);
+
+ # Skip the function arguments.
+ for (;;) {
+ if (getline <= 0)
+ exit
+ if ($0 ~ "^};")
+ break;
+ }
+ }' < $SRC >> $CFILE
+
OpenPOWER on IntegriCloud