diff options
Diffstat (limited to 'sys/fs')
82 files changed, 35047 insertions, 0 deletions
diff --git a/sys/fs/cd9660/TODO b/sys/fs/cd9660/TODO new file mode 100644 index 0000000..cf3fadc --- /dev/null +++ b/sys/fs/cd9660/TODO @@ -0,0 +1,43 @@ +# $Id$ + + 2) should understand Rock Ridge + + Yes, we have follows function. + + o Symbolic Link + o Real Name(long name) + o File Attribute + o Time stamp + o uid, gid + o Devices + o Relocated directories + + Except follows: + + o POSIX device number mapping + + There is some preliminary stuff in there that (ab-)uses the mknod + system call, but this needs a writable filesystem + + 5) should have name translation enabled by mount flag + + Yes. we can disable the Rock Ridge Extension by follows option; + + "mount -t isofs -o -norrip /dev/cd0d /cdrom" + + 6) should run as a user process, and not take up kernel space (cdroms + are slow) + + Not yet. + + 7) ECMA support. + + Not yet. we need not only a technical spec but also ECMA format + cd-rom itself! + + 8) Character set change by SVD ( multi SVD support ) + + Not yet. We should also hack the other part of system as 8 bit + clean. As far as I know, if you export the cdrom by NFS, the client + can access the 8 bit clean (ie. Solaris Japanese with EUC code ) + diff --git a/sys/fs/cd9660/TODO.hibler b/sys/fs/cd9660/TODO.hibler new file mode 100644 index 0000000..660b268 --- /dev/null +++ b/sys/fs/cd9660/TODO.hibler @@ -0,0 +1,14 @@ +1. Investiate making ISOFS another UFS shared filesystem (ala FFS/MFS/LFS). + Since it was modelled after the inode code, we might be able to merge + them back. It looks like a seperate (but very similar) lookup routine + will be needed due to the associated file stuff. + +2. It would be nice to be able to use the vfs_cluster code. + Unfortunately, if the logical block size is smaller than the page size, + it won't work. Also, if throughtput is relatively constant for any + block size (as it is for the HP drive--150kbs) then clustering may not + buy much (or may even hurt when vfs_cluster comes up with a large sync + cluster). + +3. Seems like there should be a "notrans" or some such mount option to show + filenames as they really are without lower-casing. Does this make sense? diff --git a/sys/fs/cd9660/cd9660_bmap.c b/sys/fs/cd9660/cd9660_bmap.c new file mode 100644 index 0000000..e787f83 --- /dev/null +++ b/sys/fs/cd9660/cd9660_bmap.c @@ -0,0 +1,105 @@ +/*- + * Copyright (c) 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley + * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension + * Support code is derived from software contributed to Berkeley + * by Atsushi Murai (amurai@spec.co.jp). + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)cd9660_bmap.c 8.3 (Berkeley) 1/23/94 + * $Id: cd9660_bmap.c,v 1.6 1997/02/22 09:38:47 peter Exp $ + */ + +#include <sys/param.h> +#include <sys/vnode.h> +#include <sys/mount.h> + +#include <isofs/cd9660/iso.h> +#include <isofs/cd9660/cd9660_node.h> + +/* + * Bmap converts a the logical block number of a file to its physical block + * number on the disk. The conversion is done by using the logical block + * number to index into the data block (extent) for the file. + */ +int +cd9660_bmap(ap) + struct vop_bmap_args /* { + struct vnode *a_vp; + daddr_t a_bn; + struct vnode **a_vpp; + daddr_t *a_bnp; + int *a_runp; + int *a_runb; + } */ *ap; +{ + struct iso_node *ip = VTOI(ap->a_vp); + daddr_t lblkno = ap->a_bn; + int bshift; + + /* + * Check for underlying vnode requests and ensure that logical + * to physical mapping is requested. + */ + if (ap->a_vpp != NULL) + *ap->a_vpp = ip->i_devvp; + if (ap->a_bnp == NULL) + return (0); + + /* + * Compute the requested block number + */ + bshift = ip->i_mnt->im_bshift; + *ap->a_bnp = (ip->iso_start + lblkno) << (bshift - DEV_BSHIFT); + + /* + * Determine maximum number of readahead blocks following the + * requested block. + */ + if (ap->a_runp) { + int nblk; + + nblk = (ip->i_size >> bshift) - (lblkno + 1); + if (nblk <= 0) + *ap->a_runp = 0; + else if (nblk >= (MAXBSIZE >> bshift)) + *ap->a_runp = (MAXBSIZE >> bshift) - 1; + else + *ap->a_runp = nblk; + } + + if (ap->a_runb) { + *ap->a_runb = 0; + } + + return 0; +} diff --git a/sys/fs/cd9660/cd9660_lookup.c b/sys/fs/cd9660/cd9660_lookup.c new file mode 100644 index 0000000..3d0ff74 --- /dev/null +++ b/sys/fs/cd9660/cd9660_lookup.c @@ -0,0 +1,422 @@ +/*- + * Copyright (c) 1989, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley + * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension + * Support code is derived from software contributed to Berkeley + * by Atsushi Murai (amurai@spec.co.jp). + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * from: @(#)ufs_lookup.c 7.33 (Berkeley) 5/19/91 + * + * @(#)cd9660_lookup.c 8.2 (Berkeley) 1/23/94 + * $Id: cd9660_lookup.c,v 1.20 1997/11/07 08:52:50 phk Exp $ + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/namei.h> +#include <sys/buf.h> +#include <sys/vnode.h> +#include <sys/mount.h> + +#include <isofs/cd9660/iso.h> +#include <isofs/cd9660/cd9660_node.h> +#include <isofs/cd9660/iso_rrip.h> + +/* + * Convert a component of a pathname into a pointer to a locked inode. + * This is a very central and rather complicated routine. + * If the file system is not maintained in a strict tree hierarchy, + * this can result in a deadlock situation (see comments in code below). + * + * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on + * whether the name is to be looked up, created, renamed, or deleted. + * When CREATE, RENAME, or DELETE is specified, information usable in + * creating, renaming, or deleting a directory entry may be calculated. + * If flag has LOCKPARENT or'ed into it and the target of the pathname + * exists, lookup returns both the target and its parent directory locked. + * When creating or renaming and LOCKPARENT is specified, the target may + * not be ".". When deleting and LOCKPARENT is specified, the target may + * be "."., but the caller must check to ensure it does an vrele and iput + * instead of two iputs. + * + * Overall outline of ufs_lookup: + * + * search for name in directory, to found or notfound + * notfound: + * if creating, return locked directory, leaving info on available slots + * else return error + * found: + * if at end of path and deleting, return information to allow delete + * if at end of path and rewriting (RENAME and LOCKPARENT), lock target + * inode and return info to allow rewrite + * if not at end, add name to cache; if at end and neither creating + * nor deleting, add name to cache + * + * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode unlocked. + */ +int +cd9660_lookup(ap) + struct vop_cachedlookup_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + } */ *ap; +{ + register struct vnode *vdp; /* vnode for directory being searched */ + register struct iso_node *dp; /* inode for directory being searched */ + register struct iso_mnt *imp; /* file system that directory is in */ + struct buf *bp; /* a buffer of directory entries */ + struct iso_directory_record *ep = 0;/* the current directory entry */ + int entryoffsetinblock; /* offset of ep in bp's buffer */ + int saveoffset = 0; /* offset of last directory entry in dir */ + int numdirpasses; /* strategy for directory search */ + doff_t endsearch; /* offset to end directory search */ + struct vnode *pdp; /* saved dp during symlink work */ + struct vnode *tdp; /* returned by cd9660_vget_internal */ + u_long bmask; /* block offset mask */ + int lockparent; /* 1 => lockparent flag is set */ + int wantparent; /* 1 => wantparent or lockparent flag */ + int error; + ino_t ino = 0; + int reclen; + u_short namelen; + int isoflags; + char altname[NAME_MAX]; + int res; + int assoc, len; + char *name; + struct vnode **vpp = ap->a_vpp; + struct componentname *cnp = ap->a_cnp; + int flags = cnp->cn_flags; + int nameiop = cnp->cn_nameiop; + struct proc *p = cnp->cn_proc; + + bp = NULL; + *vpp = NULL; + vdp = ap->a_dvp; + dp = VTOI(vdp); + imp = dp->i_mnt; + lockparent = flags & LOCKPARENT; + wantparent = flags & (LOCKPARENT|WANTPARENT); + + /* + * We now have a segment name to search for, and a directory to search. + */ + + len = cnp->cn_namelen; + name = cnp->cn_nameptr; + /* + * A leading `=' means, we are looking for an associated file + */ + if ((assoc = (imp->iso_ftype != ISO_FTYPE_RRIP && *name == ASSOCCHAR))) + { + len--; + name++; + } + + /* + * If there is cached information on a previous search of + * this directory, pick up where we last left off. + * We cache only lookups as these are the most common + * and have the greatest payoff. Caching CREATE has little + * benefit as it usually must search the entire directory + * to determine that the entry does not exist. Caching the + * location of the last DELETE or RENAME has not reduced + * profiling time and hence has been removed in the interest + * of simplicity. + */ + bmask = imp->im_bmask; + if (nameiop != LOOKUP || dp->i_diroff == 0 || + dp->i_diroff > dp->i_size) { + entryoffsetinblock = 0; + dp->i_offset = 0; + numdirpasses = 1; + } else { + dp->i_offset = dp->i_diroff; + if ((entryoffsetinblock = dp->i_offset & bmask) && + (error = cd9660_blkatoff(vdp, (off_t)dp->i_offset, NULL, &bp))) + return (error); + numdirpasses = 2; + nchstats.ncs_2passes++; + } + endsearch = dp->i_size; + +searchloop: + while (dp->i_offset < endsearch) { + /* + * If offset is on a block boundary, + * read the next directory block. + * Release previous if it exists. + */ + if ((dp->i_offset & bmask) == 0) { + if (bp != NULL) + brelse(bp); + if ((error = + cd9660_blkatoff(vdp, (off_t)dp->i_offset, NULL, &bp)) != 0) + return (error); + entryoffsetinblock = 0; + } + /* + * Get pointer to next entry. + */ + ep = (struct iso_directory_record *) + ((char *)bp->b_data + entryoffsetinblock); + + reclen = isonum_711(ep->length); + if (reclen == 0) { + /* skip to next block, if any */ + dp->i_offset = + (dp->i_offset & ~bmask) + imp->logical_block_size; + continue; + } + + if (reclen < ISO_DIRECTORY_RECORD_SIZE) + /* illegal entry, stop */ + break; + + if (entryoffsetinblock + reclen > imp->logical_block_size) + /* entries are not allowed to cross boundaries */ + break; + + namelen = isonum_711(ep->name_len); + isoflags = isonum_711(imp->iso_ftype == ISO_FTYPE_HIGH_SIERRA? + &ep->date[6]: ep->flags); + + if (reclen < ISO_DIRECTORY_RECORD_SIZE + namelen) + /* illegal entry, stop */ + break; + + /* + * Check for a name match. + */ + switch (imp->iso_ftype) { + default: + if (!(isoflags & 4) == !assoc) { + if ((len == 1 + && *name == '.') + || (flags & ISDOTDOT)) { + if (namelen == 1 + && ep->name[0] == ((flags & ISDOTDOT) ? 1 : 0)) { + /* + * Save directory entry's inode number and + * release directory buffer. + */ + dp->i_ino = isodirino(ep, imp); + goto found; + } + if (namelen != 1 + || ep->name[0] != 0) + goto notfound; + } else if (!(res = isofncmp(name,len, + ep->name,namelen))) { + if (isoflags & 2) + ino = isodirino(ep, imp); + else + ino = dbtob(bp->b_blkno) + + entryoffsetinblock; + saveoffset = dp->i_offset; + } else if (ino) + goto foundino; +#ifdef NOSORTBUG /* On some CDs directory entries are not sorted correctly */ + else if (res < 0) + goto notfound; + else if (res > 0 && numdirpasses == 2) + numdirpasses++; +#endif + } + break; + case ISO_FTYPE_RRIP: + if (isonum_711(ep->flags)&2) + ino = isodirino(ep, imp); + else + ino = dbtob(bp->b_blkno) + entryoffsetinblock; + dp->i_ino = ino; + cd9660_rrip_getname(ep,altname,&namelen,&dp->i_ino,imp); + if (namelen == cnp->cn_namelen + && !bcmp(name,altname,namelen)) + goto found; + ino = 0; + break; + } + dp->i_offset += reclen; + entryoffsetinblock += reclen; + } + if (ino) { +foundino: + dp->i_ino = ino; + if (saveoffset != dp->i_offset) { + if (lblkno(imp, dp->i_offset) != + lblkno(imp, saveoffset)) { + if (bp != NULL) + brelse(bp); + if ((error = cd9660_blkatoff(vdp, + (off_t)saveoffset, NULL, &bp)) != 0) + return (error); + } + entryoffsetinblock = saveoffset & bmask; + ep = (struct iso_directory_record *) + ((char *)bp->b_data + entryoffsetinblock); + dp->i_offset = saveoffset; + } + goto found; + } +notfound: + /* + * If we started in the middle of the directory and failed + * to find our target, we must check the beginning as well. + */ + if (numdirpasses == 2) { + numdirpasses--; + dp->i_offset = 0; + endsearch = dp->i_diroff; + goto searchloop; + } + if (bp != NULL) + brelse(bp); + + /* + * Insert name into cache (as non-existent) if appropriate. + */ + if (cnp->cn_flags & MAKEENTRY) + cache_enter(vdp, *vpp, cnp); + if (nameiop == CREATE || nameiop == RENAME) + return (EROFS); + return (ENOENT); + +found: + if (numdirpasses == 2) + nchstats.ncs_pass2++; + + /* + * Found component in pathname. + * If the final component of path name, save information + * in the cache as to where the entry was found. + */ + if ((flags & ISLASTCN) && nameiop == LOOKUP) + dp->i_diroff = dp->i_offset; + + /* + * Step through the translation in the name. We do not `iput' the + * directory because we may need it again if a symbolic link + * is relative to the current directory. Instead we save it + * unlocked as "pdp". We must get the target inode before unlocking + * the directory to insure that the inode will not be removed + * before we get it. We prevent deadlock by always fetching + * inodes from the root, moving down the directory tree. Thus + * when following backward pointers ".." we must unlock the + * parent directory before getting the requested directory. + * There is a potential race condition here if both the current + * and parent directories are removed before the `iget' for the + * inode associated with ".." returns. We hope that this occurs + * infrequently since we cannot avoid this race condition without + * implementing a sophisticated deadlock detection algorithm. + * Note also that this simple deadlock detection scheme will not + * work if the file system has any hard links other than ".." + * that point backwards in the directory structure. + */ + pdp = vdp; + /* + * If ino is different from dp->i_ino, + * it's a relocated directory. + */ + if (flags & ISDOTDOT) { + VOP_UNLOCK(pdp, 0, p); /* race to get the inode */ + error = cd9660_vget_internal(vdp->v_mount, dp->i_ino, &tdp, + dp->i_ino != ino, ep); + brelse(bp); + if (error) { + vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, p); + return (error); + } + if (lockparent && (flags & ISLASTCN) && + (error = vn_lock(pdp, LK_EXCLUSIVE, p))) { + vput(tdp); + return (error); + } + *vpp = tdp; + } else if (dp->i_number == dp->i_ino) { + brelse(bp); + VREF(vdp); /* we want ourself, ie "." */ + *vpp = vdp; + } else { + error = cd9660_vget_internal(vdp->v_mount, dp->i_ino, &tdp, + dp->i_ino != ino, ep); + brelse(bp); + if (error) + return (error); + if (!lockparent || !(flags & ISLASTCN)) + VOP_UNLOCK(pdp, 0, p); + *vpp = tdp; + } + + /* + * Insert name into cache if appropriate. + */ + if (cnp->cn_flags & MAKEENTRY) + cache_enter(vdp, *vpp, cnp); + return (0); +} + +/* + * Return buffer with the contents of block "offset" from the beginning of + * directory "ip". If "res" is non-zero, fill it in with a pointer to the + * remaining space in the directory. + */ +int +cd9660_blkatoff(vp, offset, res, bpp) + struct vnode *vp; + off_t offset; + char **res; + struct buf **bpp; +{ + struct iso_node *ip; + register struct iso_mnt *imp; + struct buf *bp; + daddr_t lbn; + int bsize, error; + + ip = VTOI(vp); + imp = ip->i_mnt; + lbn = lblkno(imp, offset); + bsize = blksize(imp, ip, lbn); + + if ((error = bread(vp, lbn, bsize, NOCRED, &bp)) != 0) { + brelse(bp); + *bpp = NULL; + return (error); + } + if (res) + *res = (char *)bp->b_data + blkoff(imp, offset); + *bpp = bp; + return (0); +} diff --git a/sys/fs/cd9660/cd9660_mount.h b/sys/fs/cd9660/cd9660_mount.h new file mode 100644 index 0000000..9d3f78e --- /dev/null +++ b/sys/fs/cd9660/cd9660_mount.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 1995 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley + * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension + * Support code is derived from software contributed to Berkeley + * by Atsushi Murai (amurai@spec.co.jp). + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)cd9660_mount.h 8.1 (Berkeley) 5/24/95 + */ + +/* + * Arguments to mount ISO 9660 filesystems. + */ +struct iso_args { + char *fspec; /* block special device to mount */ + struct export_args export; /* network export info */ + int flags; /* mounting flags, see below */ + int ssector; /* starting sector, 0 for 1st session */ +}; +#define ISOFSMNT_NORRIP 0x00000001 /* disable Rock Ridge Ext.*/ +#define ISOFSMNT_GENS 0x00000002 /* enable generation numbers */ +#define ISOFSMNT_EXTATT 0x00000004 /* enable extended attributes */ diff --git a/sys/fs/cd9660/cd9660_node.c b/sys/fs/cd9660/cd9660_node.c new file mode 100644 index 0000000..9640d6e --- /dev/null +++ b/sys/fs/cd9660/cd9660_node.c @@ -0,0 +1,428 @@ +/*- + * Copyright (c) 1982, 1986, 1989, 1994, 1995 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley + * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension + * Support code is derived from software contributed to Berkeley + * by Atsushi Murai (amurai@spec.co.jp). + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)cd9660_node.c 8.2 (Berkeley) 1/23/94 + * $Id: cd9660_node.c,v 1.26 1999/01/02 11:34:54 bde Exp $ + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/mount.h> +#include <sys/proc.h> +#include <sys/buf.h> +#include <sys/vnode.h> +#include <sys/malloc.h> +#include <sys/stat.h> + +#include <isofs/cd9660/iso.h> +#include <isofs/cd9660/cd9660_node.h> +#include <isofs/cd9660/cd9660_mount.h> + +/* + * Structures associated with iso_node caching. + */ +static struct iso_node **isohashtbl; +static u_long isohash; +#define INOHASH(device, inum) (((device) + ((inum)>>12)) & isohash) +#ifndef NULL_SIMPLELOCKS +static struct simplelock cd9660_ihash_slock; +#endif + +static void cd9660_ihashrem __P((struct iso_node *)); +static unsigned cd9660_chars2ui __P((unsigned char *begin, int len)); + +/* + * Initialize hash links for inodes and dnodes. + */ +int +cd9660_init(vfsp) + struct vfsconf *vfsp; +{ + + isohashtbl = hashinit(desiredvnodes, M_ISOFSMNT, &isohash); + simple_lock_init(&cd9660_ihash_slock); + return (0); +} + + +/* + * Use the device/inum pair to find the incore inode, and return a pointer + * to it. If it is in core, but locked, wait for it. + */ +struct vnode * +cd9660_ihashget(dev, inum) + dev_t dev; + ino_t inum; +{ + struct proc *p = curproc; /* XXX */ + struct iso_node *ip; + struct vnode *vp; + +loop: + simple_lock(&cd9660_ihash_slock); + for (ip = isohashtbl[INOHASH(dev, inum)]; ip; ip = ip->i_next) { + if (inum == ip->i_number && dev == ip->i_dev) { + vp = ITOV(ip); + simple_lock(&vp->v_interlock); + simple_unlock(&cd9660_ihash_slock); + if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) + goto loop; + return (vp); + } + } + simple_unlock(&cd9660_ihash_slock); + return (NULL); +} + +/* + * Insert the inode into the hash table, and return it locked. + */ +void +cd9660_ihashins(ip) + struct iso_node *ip; +{ + struct proc *p = curproc; /* XXX */ + struct iso_node **ipp, *iq; + + simple_lock(&cd9660_ihash_slock); + ipp = &isohashtbl[INOHASH(ip->i_dev, ip->i_number)]; + if ((iq = *ipp) != NULL) + iq->i_prev = &ip->i_next; + ip->i_next = iq; + ip->i_prev = ipp; + *ipp = ip; + simple_unlock(&cd9660_ihash_slock); + + lockmgr(&ip->i_lock, LK_EXCLUSIVE, (struct simplelock *)0, p); +} + +/* + * Remove the inode from the hash table. + */ +static void +cd9660_ihashrem(ip) + register struct iso_node *ip; +{ + register struct iso_node *iq; + + simple_lock(&cd9660_ihash_slock); + if ((iq = ip->i_next) != NULL) + iq->i_prev = ip->i_prev; + *ip->i_prev = iq; +#ifdef DIAGNOSTIC + ip->i_next = NULL; + ip->i_prev = NULL; +#endif + simple_unlock(&cd9660_ihash_slock); +} + +/* + * Last reference to an inode, write the inode out and if necessary, + * truncate and deallocate the file. + */ +int +cd9660_inactive(ap) + struct vop_inactive_args /* { + struct vnode *a_vp; + struct proc *a_p; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + struct proc *p = ap->a_p; + register struct iso_node *ip = VTOI(vp); + int error = 0; + + if (prtactive && vp->v_usecount != 0) + vprint("cd9660_inactive: pushing active", vp); + + ip->i_flag = 0; + VOP_UNLOCK(vp, 0, p); + /* + * If we are done with the inode, reclaim it + * so that it can be reused immediately. + */ + if (ip->inode.iso_mode == 0) + vrecycle(vp, (struct simplelock *)0, p); + return error; +} + +/* + * Reclaim an inode so that it can be used for other purposes. + */ +int +cd9660_reclaim(ap) + struct vop_reclaim_args /* { + struct vnode *a_vp; + struct proc *a_p; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + register struct iso_node *ip = VTOI(vp); + + if (prtactive && vp->v_usecount != 0) + vprint("cd9660_reclaim: pushing active", vp); + /* + * Remove the inode from its hash chain. + */ + cd9660_ihashrem(ip); + /* + * Purge old data structures associated with the inode. + */ + cache_purge(vp); + if (ip->i_devvp) { + vrele(ip->i_devvp); + ip->i_devvp = 0; + } + FREE(vp->v_data, M_ISOFSNODE); + vp->v_data = NULL; + return (0); +} + +/* + * File attributes + */ +void +cd9660_defattr(isodir, inop, bp, ftype) + struct iso_directory_record *isodir; + struct iso_node *inop; + struct buf *bp; + enum ISO_FTYPE ftype; +{ + struct buf *bp2 = NULL; + struct iso_mnt *imp; + struct iso_extended_attributes *ap = NULL; + int off; + + /* high sierra does not have timezone data, flag is one byte ahead */ + if (isonum_711(ftype == ISO_FTYPE_HIGH_SIERRA? + &isodir->date[6]: isodir->flags)&2) { + inop->inode.iso_mode = S_IFDIR; + /* + * If we return 2, fts() will assume there are no subdirectories + * (just links for the path and .), so instead we return 1. + */ + inop->inode.iso_links = 1; + } else { + inop->inode.iso_mode = S_IFREG; + inop->inode.iso_links = 1; + } + if (!bp + && ((imp = inop->i_mnt)->im_flags & ISOFSMNT_EXTATT) + && (off = isonum_711(isodir->ext_attr_length))) { + cd9660_blkatoff(ITOV(inop), (off_t)-(off << imp->im_bshift), NULL, + &bp2); + bp = bp2; + } + if (bp) { + ap = (struct iso_extended_attributes *)bp->b_data; + + if (isonum_711(ap->version) == 1) { + if (!(ap->perm[0]&0x40)) + inop->inode.iso_mode |= VEXEC >> 6; + if (!(ap->perm[0]&0x10)) + inop->inode.iso_mode |= VREAD >> 6; + if (!(ap->perm[0]&4)) + inop->inode.iso_mode |= VEXEC >> 3; + if (!(ap->perm[0]&1)) + inop->inode.iso_mode |= VREAD >> 3; + if (!(ap->perm[1]&0x40)) + inop->inode.iso_mode |= VEXEC; + if (!(ap->perm[1]&0x10)) + inop->inode.iso_mode |= VREAD; + inop->inode.iso_uid = isonum_723(ap->owner); /* what about 0? */ + inop->inode.iso_gid = isonum_723(ap->group); /* what about 0? */ + } else + ap = NULL; + } + if (!ap) { + inop->inode.iso_mode |= VREAD|VEXEC|(VREAD|VEXEC)>>3|(VREAD|VEXEC)>>6; + inop->inode.iso_uid = (uid_t)0; + inop->inode.iso_gid = (gid_t)0; + } + if (bp2) + brelse(bp2); +} + +/* + * Time stamps + */ +void +cd9660_deftstamp(isodir,inop,bp,ftype) + struct iso_directory_record *isodir; + struct iso_node *inop; + struct buf *bp; + enum ISO_FTYPE ftype; +{ + struct buf *bp2 = NULL; + struct iso_mnt *imp; + struct iso_extended_attributes *ap = NULL; + int off; + + if (!bp + && ((imp = inop->i_mnt)->im_flags & ISOFSMNT_EXTATT) + && (off = isonum_711(isodir->ext_attr_length))) { + cd9660_blkatoff(ITOV(inop), (off_t)-(off << imp->im_bshift), NULL, + &bp2); + bp = bp2; + } + if (bp) { + ap = (struct iso_extended_attributes *)bp->b_data; + + if (ftype != ISO_FTYPE_HIGH_SIERRA + && isonum_711(ap->version) == 1) { + if (!cd9660_tstamp_conv17(ap->ftime,&inop->inode.iso_atime)) + cd9660_tstamp_conv17(ap->ctime,&inop->inode.iso_atime); + if (!cd9660_tstamp_conv17(ap->ctime,&inop->inode.iso_ctime)) + inop->inode.iso_ctime = inop->inode.iso_atime; + if (!cd9660_tstamp_conv17(ap->mtime,&inop->inode.iso_mtime)) + inop->inode.iso_mtime = inop->inode.iso_ctime; + } else + ap = NULL; + } + if (!ap) { + cd9660_tstamp_conv7(isodir->date,&inop->inode.iso_ctime,ftype); + inop->inode.iso_atime = inop->inode.iso_ctime; + inop->inode.iso_mtime = inop->inode.iso_ctime; + } + if (bp2) + brelse(bp2); +} + +int +cd9660_tstamp_conv7(pi,pu,ftype) + u_char *pi; + struct timespec *pu; + enum ISO_FTYPE ftype; +{ + int crtime, days; + int y, m, d, hour, minute, second, tz; + + y = pi[0] + 1900; + m = pi[1]; + d = pi[2]; + hour = pi[3]; + minute = pi[4]; + second = pi[5]; + if(ftype != ISO_FTYPE_HIGH_SIERRA) + tz = pi[6]; + else + /* original high sierra misses timezone data */ + tz = 0; + + if (y < 1970) { + pu->tv_sec = 0; + pu->tv_nsec = 0; + return 0; + } else { +#ifdef ORIGINAL + /* computes day number relative to Sept. 19th,1989 */ + /* don't even *THINK* about changing formula. It works! */ + days = 367*(y-1980)-7*(y+(m+9)/12)/4-3*((y+(m-9)/7)/100+1)/4+275*m/9+d-100; +#else + /* + * Changed :-) to make it relative to Jan. 1st, 1970 + * and to disambiguate negative division + */ + days = 367*(y-1960)-7*(y+(m+9)/12)/4-3*((y+(m+9)/12-1)/100+1)/4+275*m/9+d-239; +#endif + crtime = ((((days * 24) + hour) * 60 + minute) * 60) + second; + + /* timezone offset is unreliable on some disks */ + if (-48 <= tz && tz <= 52) + crtime -= tz * 15 * 60; + } + pu->tv_sec = crtime; + pu->tv_nsec = 0; + return 1; +} + +static u_int +cd9660_chars2ui(begin,len) + u_char *begin; + int len; +{ + u_int rc; + + for (rc = 0; --len >= 0;) { + rc *= 10; + rc += *begin++ - '0'; + } + return rc; +} + +int +cd9660_tstamp_conv17(pi,pu) + u_char *pi; + struct timespec *pu; +{ + u_char buf[7]; + + /* year:"0001"-"9999" -> -1900 */ + buf[0] = cd9660_chars2ui(pi,4) - 1900; + + /* month: " 1"-"12" -> 1 - 12 */ + buf[1] = cd9660_chars2ui(pi + 4,2); + + /* day: " 1"-"31" -> 1 - 31 */ + buf[2] = cd9660_chars2ui(pi + 6,2); + + /* hour: " 0"-"23" -> 0 - 23 */ + buf[3] = cd9660_chars2ui(pi + 8,2); + + /* minute:" 0"-"59" -> 0 - 59 */ + buf[4] = cd9660_chars2ui(pi + 10,2); + + /* second:" 0"-"59" -> 0 - 59 */ + buf[5] = cd9660_chars2ui(pi + 12,2); + + /* difference of GMT */ + buf[6] = pi[16]; + + return cd9660_tstamp_conv7(buf, pu, ISO_FTYPE_DEFAULT); +} + +ino_t +isodirino(isodir, imp) + struct iso_directory_record *isodir; + struct iso_mnt *imp; +{ + ino_t ino; + + ino = (isonum_733(isodir->extent) + isonum_711(isodir->ext_attr_length)) + << imp->im_bshift; + return (ino); +} diff --git a/sys/fs/cd9660/cd9660_node.h b/sys/fs/cd9660/cd9660_node.h new file mode 100644 index 0000000..33b208f --- /dev/null +++ b/sys/fs/cd9660/cd9660_node.h @@ -0,0 +1,126 @@ +/*- + * Copyright (c) 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley + * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension + * Support code is derived from software contributed to Berkeley + * by Atsushi Murai (amurai@spec.co.jp). + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)cd9660_node.h 8.6 (Berkeley) 5/14/95 + * $Id: cd9660_node.h,v 1.16 1997/11/18 14:40:34 phk Exp $ + */ + +/* + * Theoretically, directories can be more than 2Gb in length, + * however, in practice this seems unlikely. So, we define + * the type doff_t as a long to keep down the cost of doing + * lookup on a 32-bit machine. If you are porting to a 64-bit + * architecture, you should make doff_t the same as off_t. + */ +#define doff_t long + +typedef struct { + struct timespec iso_atime; /* time of last access */ + struct timespec iso_mtime; /* time of last modification */ + struct timespec iso_ctime; /* time file changed */ + u_short iso_mode; /* files access mode and type */ + uid_t iso_uid; /* owner user id */ + gid_t iso_gid; /* owner group id */ + short iso_links; /* links of file */ + dev_t iso_rdev; /* Major/Minor number for special */ +} ISO_RRIP_INODE; + + +struct iso_node { + struct lock i_lock; /* node lock > Keep this first< */ + struct iso_node *i_next, **i_prev; /* hash chain */ + struct vnode *i_vnode; /* vnode associated with this inode */ + struct vnode *i_devvp; /* vnode for block I/O */ + u_long i_flag; /* see below */ + dev_t i_dev; /* device where inode resides */ + ino_t i_number; /* the identity of the inode */ + /* we use the actual starting block of the file */ + struct iso_mnt *i_mnt; /* filesystem associated with this inode */ + struct lockf *i_lockf; /* head of byte-level lock list */ + doff_t i_endoff; /* end of useful stuff in directory */ + doff_t i_diroff; /* offset in dir, where we found last entry */ + doff_t i_offset; /* offset of free space in directory */ + ino_t i_ino; /* inode number of found directory */ + + long iso_extent; /* extent of file */ + long i_size; + long iso_start; /* actual start of data of file (may be different */ + /* from iso_extent, if file has extended attributes) */ + ISO_RRIP_INODE inode; +}; + +#define i_forw i_chain[0] +#define i_back i_chain[1] + +/* flags */ +#define IN_ACCESS 0x0020 /* inode access time to be updated */ + +#define VTOI(vp) ((struct iso_node *)(vp)->v_data) +#define ITOV(ip) ((ip)->i_vnode) + +#ifdef KERNEL + +#ifdef MALLOC_DECLARE +MALLOC_DECLARE(M_ISOFSMNT); +MALLOC_DECLARE(M_ISOFSNODE); +#endif + +struct buf; +struct vop_bmap_args; +struct vop_cachedlookup_args; +struct vop_inactive_args; +struct vop_reclaim_args; + +/* + * Prototypes for ISOFS vnode operations + */ +int cd9660_lookup __P((struct vop_cachedlookup_args *)); +int cd9660_inactive __P((struct vop_inactive_args *)); +int cd9660_reclaim __P((struct vop_reclaim_args *)); +int cd9660_bmap __P((struct vop_bmap_args *)); +int cd9660_blkatoff __P((struct vnode *vp, off_t offset, char **res, struct buf **bpp)); + +void cd9660_defattr __P((struct iso_directory_record *, + struct iso_node *, struct buf *, enum ISO_FTYPE)); +void cd9660_deftstamp __P((struct iso_directory_record *, + struct iso_node *, struct buf *, enum ISO_FTYPE)); +struct vnode *cd9660_ihashget __P((dev_t, ino_t)); +void cd9660_ihashins __P((struct iso_node *)); +int cd9660_tstamp_conv7 __P((u_char *, struct timespec *, enum ISO_FTYPE)); +int cd9660_tstamp_conv17 __P((u_char *, struct timespec *)); + +#endif /* KERNEL */ diff --git a/sys/fs/cd9660/cd9660_rrip.c b/sys/fs/cd9660/cd9660_rrip.c new file mode 100644 index 0000000..b34553f --- /dev/null +++ b/sys/fs/cd9660/cd9660_rrip.c @@ -0,0 +1,723 @@ +/*- + * Copyright (c) 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley + * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension + * Support code is derived from software contributed to Berkeley + * by Atsushi Murai (amurai@spec.co.jp). + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)cd9660_rrip.c 8.6 (Berkeley) 12/5/94 + * $Id: cd9660_rrip.c,v 1.12 1997/02/22 09:38:49 peter Exp $ + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/buf.h> +#include <sys/vnode.h> +#include <sys/mount.h> +#include <sys/kernel.h> + +#include <isofs/cd9660/iso.h> +#include <isofs/cd9660/cd9660_node.h> +#include <isofs/cd9660/cd9660_rrip.h> +#include <isofs/cd9660/iso_rrip.h> + +typedef int rrt_func_t __P((void *, ISO_RRIP_ANALYZE *ana)); + +typedef struct { + char type[2]; + rrt_func_t *func; + void (*func2) __P((struct iso_directory_record *isodir, ISO_RRIP_ANALYZE *ana)); + int result; +} RRIP_TABLE; + +static int cd9660_rrip_altname __P((ISO_RRIP_ALTNAME *p, + ISO_RRIP_ANALYZE *ana)); +static int cd9660_rrip_attr __P((ISO_RRIP_ATTR *p, ISO_RRIP_ANALYZE *ana)); +static int cd9660_rrip_cont __P((ISO_RRIP_CONT *p, ISO_RRIP_ANALYZE *ana)); +static void cd9660_rrip_defattr __P((struct iso_directory_record *isodir, + ISO_RRIP_ANALYZE *ana)); +static void cd9660_rrip_defname __P((struct iso_directory_record *isodir, + ISO_RRIP_ANALYZE *ana)); +static void cd9660_rrip_deftstamp __P((struct iso_directory_record *isodir, + ISO_RRIP_ANALYZE *ana)); +static int cd9660_rrip_device __P((ISO_RRIP_DEVICE *p, + ISO_RRIP_ANALYZE *ana)); +static int cd9660_rrip_extref __P((ISO_RRIP_EXTREF *p, + ISO_RRIP_ANALYZE *ana)); +static int cd9660_rrip_idflag __P((ISO_RRIP_IDFLAG *p, + ISO_RRIP_ANALYZE *ana)); +static int cd9660_rrip_loop __P((struct iso_directory_record *isodir, + ISO_RRIP_ANALYZE *ana, + RRIP_TABLE *table)); +static int cd9660_rrip_pclink __P((ISO_RRIP_CLINK *p, + ISO_RRIP_ANALYZE *ana)); +static int cd9660_rrip_reldir __P((ISO_RRIP_RELDIR *p, + ISO_RRIP_ANALYZE *ana)); +static int cd9660_rrip_slink __P((ISO_RRIP_SLINK *p, + ISO_RRIP_ANALYZE *ana)); +static int cd9660_rrip_stop __P((ISO_SUSP_HEADER *p, + ISO_RRIP_ANALYZE *ana)); +static int cd9660_rrip_tstamp __P((ISO_RRIP_TSTAMP *p, + ISO_RRIP_ANALYZE *ana)); + +/* + * POSIX file attribute + */ +static int +cd9660_rrip_attr(p,ana) + ISO_RRIP_ATTR *p; + ISO_RRIP_ANALYZE *ana; +{ + ana->inop->inode.iso_mode = isonum_733(p->mode); + ana->inop->inode.iso_uid = isonum_733(p->uid); + ana->inop->inode.iso_gid = isonum_733(p->gid); + ana->inop->inode.iso_links = isonum_733(p->links); + ana->fields &= ~ISO_SUSP_ATTR; + return ISO_SUSP_ATTR; +} + +static void +cd9660_rrip_defattr(isodir,ana) + struct iso_directory_record *isodir; + ISO_RRIP_ANALYZE *ana; +{ + /* But this is a required field! */ + printf("RRIP without PX field?\n"); + cd9660_defattr(isodir,ana->inop,NULL,ISO_FTYPE_RRIP); +} + +/* + * Symbolic Links + */ +static int +cd9660_rrip_slink(p,ana) + ISO_RRIP_SLINK *p; + ISO_RRIP_ANALYZE *ana; +{ + register ISO_RRIP_SLINK_COMPONENT *pcomp; + register ISO_RRIP_SLINK_COMPONENT *pcompe; + int len, wlen, cont; + char *outbuf, *inbuf; + + pcomp = (ISO_RRIP_SLINK_COMPONENT *)p->component; + pcompe = (ISO_RRIP_SLINK_COMPONENT *)((char *)p + isonum_711(p->h.length)); + len = *ana->outlen; + outbuf = ana->outbuf; + cont = ana->cont; + + /* + * Gathering a Symbolic name from each component with path + */ + for (; + pcomp < pcompe; + pcomp = (ISO_RRIP_SLINK_COMPONENT *)((char *)pcomp + ISO_RRIP_SLSIZ + + isonum_711(pcomp->clen))) { + + if (!cont) { + if (len < ana->maxlen) { + len++; + *outbuf++ = '/'; + } + } + cont = 0; + + inbuf = ".."; + wlen = 0; + + switch (*pcomp->cflag) { + + case ISO_SUSP_CFLAG_CURRENT: + /* Inserting Current */ + wlen = 1; + break; + + case ISO_SUSP_CFLAG_PARENT: + /* Inserting Parent */ + wlen = 2; + break; + + case ISO_SUSP_CFLAG_ROOT: + /* Inserting slash for ROOT */ + /* start over from beginning(?) */ + outbuf -= len; + len = 0; + break; + + case ISO_SUSP_CFLAG_VOLROOT: + /* Inserting a mount point i.e. "/cdrom" */ + /* same as above */ + outbuf -= len; + len = 0; + inbuf = ana->imp->im_mountp->mnt_stat.f_mntonname; + wlen = strlen(inbuf); + break; + + case ISO_SUSP_CFLAG_HOST: + /* Inserting hostname i.e. "kurt.tools.de" */ + inbuf = hostname; + wlen = strlen(hostname); + break; + + case ISO_SUSP_CFLAG_CONTINUE: + cont = 1; + /* fall thru */ + case 0: + /* Inserting component */ + wlen = isonum_711(pcomp->clen); + inbuf = pcomp->name; + break; + default: + printf("RRIP with incorrect flags?"); + wlen = ana->maxlen + 1; + break; + } + + if (len + wlen > ana->maxlen) { + /* indicate error to caller */ + ana->cont = 1; + ana->fields = 0; + ana->outbuf -= *ana->outlen; + *ana->outlen = 0; + return 0; + } + + bcopy(inbuf,outbuf,wlen); + outbuf += wlen; + len += wlen; + + } + ana->outbuf = outbuf; + *ana->outlen = len; + ana->cont = cont; + + if (!isonum_711(p->flags)) { + ana->fields &= ~ISO_SUSP_SLINK; + return ISO_SUSP_SLINK; + } + return 0; +} + +/* + * Alternate name + */ +static int +cd9660_rrip_altname(p,ana) + ISO_RRIP_ALTNAME *p; + ISO_RRIP_ANALYZE *ana; +{ + char *inbuf; + int wlen; + int cont; + + inbuf = ".."; + wlen = 0; + cont = 0; + + switch (*p->flags) { + case ISO_SUSP_CFLAG_CURRENT: + /* Inserting Current */ + wlen = 1; + break; + + case ISO_SUSP_CFLAG_PARENT: + /* Inserting Parent */ + wlen = 2; + break; + + case ISO_SUSP_CFLAG_HOST: + /* Inserting hostname i.e. "kurt.tools.de" */ + inbuf = hostname; + wlen = strlen(hostname); + break; + + case ISO_SUSP_CFLAG_CONTINUE: + cont = 1; + /* fall thru */ + case 0: + /* Inserting component */ + wlen = isonum_711(p->h.length) - 5; + inbuf = (char *)p + 5; + break; + + default: + printf("RRIP with incorrect NM flags?\n"); + wlen = ana->maxlen + 1; + break; + } + + if ((*ana->outlen += wlen) > ana->maxlen) { + /* treat as no name field */ + ana->fields &= ~ISO_SUSP_ALTNAME; + ana->outbuf -= *ana->outlen - wlen; + *ana->outlen = 0; + return 0; + } + + bcopy(inbuf,ana->outbuf,wlen); + ana->outbuf += wlen; + + if (!cont) { + ana->fields &= ~ISO_SUSP_ALTNAME; + return ISO_SUSP_ALTNAME; + } + return 0; +} + +static void +cd9660_rrip_defname(isodir,ana) + struct iso_directory_record *isodir; + ISO_RRIP_ANALYZE *ana; +{ + strcpy(ana->outbuf,".."); + switch (*isodir->name) { + default: + isofntrans(isodir->name,isonum_711(isodir->name_len), + ana->outbuf,ana->outlen, + 1,isonum_711(isodir->flags)&4); + break; + case 0: + *ana->outlen = 1; + break; + case 1: + *ana->outlen = 2; + break; + } +} + +/* + * Parent or Child Link + */ +static int +cd9660_rrip_pclink(p,ana) + ISO_RRIP_CLINK *p; + ISO_RRIP_ANALYZE *ana; +{ + *ana->inump = isonum_733(p->dir_loc) << ana->imp->im_bshift; + ana->fields &= ~(ISO_SUSP_CLINK|ISO_SUSP_PLINK); + return *p->h.type == 'C' ? ISO_SUSP_CLINK : ISO_SUSP_PLINK; +} + +/* + * Relocated directory + */ +static int +cd9660_rrip_reldir(p,ana) + ISO_RRIP_RELDIR *p; + ISO_RRIP_ANALYZE *ana; +{ + /* special hack to make caller aware of RE field */ + *ana->outlen = 0; + ana->fields = 0; + return ISO_SUSP_RELDIR|ISO_SUSP_ALTNAME|ISO_SUSP_CLINK|ISO_SUSP_PLINK; +} + +static int +cd9660_rrip_tstamp(p,ana) + ISO_RRIP_TSTAMP *p; + ISO_RRIP_ANALYZE *ana; +{ + u_char *ptime; + + ptime = p->time; + + /* Check a format of time stamp (7bytes/17bytes) */ + if (!(*p->flags&ISO_SUSP_TSTAMP_FORM17)) { + if (*p->flags&ISO_SUSP_TSTAMP_CREAT) + ptime += 7; + + if (*p->flags&ISO_SUSP_TSTAMP_MODIFY) { + cd9660_tstamp_conv7(ptime,&ana->inop->inode.iso_mtime, + ISO_FTYPE_RRIP); + ptime += 7; + } else + bzero(&ana->inop->inode.iso_mtime,sizeof(struct timespec)); + + if (*p->flags&ISO_SUSP_TSTAMP_ACCESS) { + cd9660_tstamp_conv7(ptime,&ana->inop->inode.iso_atime, + ISO_FTYPE_RRIP); + ptime += 7; + } else + ana->inop->inode.iso_atime = ana->inop->inode.iso_mtime; + + if (*p->flags&ISO_SUSP_TSTAMP_ATTR) + cd9660_tstamp_conv7(ptime,&ana->inop->inode.iso_ctime, + ISO_FTYPE_RRIP); + else + ana->inop->inode.iso_ctime = ana->inop->inode.iso_mtime; + + } else { + if (*p->flags&ISO_SUSP_TSTAMP_CREAT) + ptime += 17; + + if (*p->flags&ISO_SUSP_TSTAMP_MODIFY) { + cd9660_tstamp_conv17(ptime,&ana->inop->inode.iso_mtime); + ptime += 17; + } else + bzero(&ana->inop->inode.iso_mtime,sizeof(struct timespec)); + + if (*p->flags&ISO_SUSP_TSTAMP_ACCESS) { + cd9660_tstamp_conv17(ptime,&ana->inop->inode.iso_atime); + ptime += 17; + } else + ana->inop->inode.iso_atime = ana->inop->inode.iso_mtime; + + if (*p->flags&ISO_SUSP_TSTAMP_ATTR) + cd9660_tstamp_conv17(ptime,&ana->inop->inode.iso_ctime); + else + ana->inop->inode.iso_ctime = ana->inop->inode.iso_mtime; + + } + ana->fields &= ~ISO_SUSP_TSTAMP; + return ISO_SUSP_TSTAMP; +} + +static void +cd9660_rrip_deftstamp(isodir,ana) + struct iso_directory_record *isodir; + ISO_RRIP_ANALYZE *ana; +{ + cd9660_deftstamp(isodir,ana->inop,NULL,ISO_FTYPE_RRIP); +} + +/* + * POSIX device modes + */ +static int +cd9660_rrip_device(p,ana) + ISO_RRIP_DEVICE *p; + ISO_RRIP_ANALYZE *ana; +{ + u_int high, low; + + high = isonum_733(p->dev_t_high); + low = isonum_733(p->dev_t_low); + + if (high == 0) + ana->inop->inode.iso_rdev = makedev(major(low), minor(low)); + else + ana->inop->inode.iso_rdev = makedev(high, minor(low)); + ana->fields &= ~ISO_SUSP_DEVICE; + return ISO_SUSP_DEVICE; +} + +/* + * Flag indicating + */ +static int +cd9660_rrip_idflag(p,ana) + ISO_RRIP_IDFLAG *p; + ISO_RRIP_ANALYZE *ana; +{ + ana->fields &= isonum_711(p->flags)|~0xff; /* don't touch high bits */ + /* special handling of RE field */ + if (ana->fields&ISO_SUSP_RELDIR) + return cd9660_rrip_reldir(/* XXX */ (ISO_RRIP_RELDIR *)p,ana); + + return ISO_SUSP_IDFLAG; +} + +/* + * Continuation pointer + */ +static int +cd9660_rrip_cont(p,ana) + ISO_RRIP_CONT *p; + ISO_RRIP_ANALYZE *ana; +{ + ana->iso_ce_blk = isonum_733(p->location); + ana->iso_ce_off = isonum_733(p->offset); + ana->iso_ce_len = isonum_733(p->length); + return ISO_SUSP_CONT; +} + +/* + * System Use end + */ +static int +cd9660_rrip_stop(p,ana) + ISO_SUSP_HEADER *p; + ISO_RRIP_ANALYZE *ana; +{ + return ISO_SUSP_STOP; +} + +/* + * Extension reference + */ +static int +cd9660_rrip_extref(p,ana) + ISO_RRIP_EXTREF *p; + ISO_RRIP_ANALYZE *ana; +{ + if (isonum_711(p->len_id) != 10 + || bcmp((char *)p + 8,"RRIP_1991A",10) + || isonum_711(p->version) != 1) + return 0; + ana->fields &= ~ISO_SUSP_EXTREF; + return ISO_SUSP_EXTREF; +} + +static int +cd9660_rrip_loop(isodir,ana,table) + struct iso_directory_record *isodir; + ISO_RRIP_ANALYZE *ana; + RRIP_TABLE *table; +{ + register RRIP_TABLE *ptable; + register ISO_SUSP_HEADER *phead; + register ISO_SUSP_HEADER *pend; + struct buf *bp = NULL; + char *pwhead; + int result; + + /* + * Note: If name length is odd, + * it will be padding 1 byte after the name + */ + pwhead = isodir->name + isonum_711(isodir->name_len); + if (!(isonum_711(isodir->name_len)&1)) + pwhead++; + + /* If it's not the '.' entry of the root dir obey SP field */ + if (*isodir->name != 0 + || isonum_733(isodir->extent) != ana->imp->root_extent) + pwhead += ana->imp->rr_skip; + else + pwhead += ana->imp->rr_skip0; + + phead = (ISO_SUSP_HEADER *)pwhead; + pend = (ISO_SUSP_HEADER *)((char *)isodir + isonum_711(isodir->length)); + + result = 0; + while (1) { + ana->iso_ce_len = 0; + /* + * Note: "pend" should be more than one SUSP header + */ + while (pend >= phead + 1) { + if (isonum_711(phead->version) == 1) { + for (ptable = table; ptable->func; ptable++) { + if (*phead->type == *ptable->type + && phead->type[1] == ptable->type[1]) { + result |= ptable->func(phead,ana); + break; + } + } + if (!ana->fields) + break; + } + if (result&ISO_SUSP_STOP) { + result &= ~ISO_SUSP_STOP; + break; + } + /* plausibility check */ + if (isonum_711(phead->length) < sizeof(*phead)) + break; + /* + * move to next SUSP + * Hopefully this works with newer versions, too + */ + phead = (ISO_SUSP_HEADER *)((char *)phead + isonum_711(phead->length)); + } + + if (ana->fields && ana->iso_ce_len) { + if (ana->iso_ce_blk >= ana->imp->volume_space_size + || ana->iso_ce_off + ana->iso_ce_len > ana->imp->logical_block_size + || bread(ana->imp->im_devvp, + ana->iso_ce_blk << + (ana->imp->im_bshift - DEV_BSHIFT), + ana->imp->logical_block_size, NOCRED, &bp)) + /* what to do now? */ + break; + phead = (ISO_SUSP_HEADER *)(bp->b_data + ana->iso_ce_off); + pend = (ISO_SUSP_HEADER *) ((char *)phead + ana->iso_ce_len); + } else + break; + } + if (bp) + brelse(bp); + /* + * If we don't find the Basic SUSP stuffs, just set default value + * (attribute/time stamp) + */ + for (ptable = table; ptable->func2; ptable++) + if (!(ptable->result&result)) + ptable->func2(isodir,ana); + + return result; +} + +/* + * Get Attributes. + */ +/* + * XXX the casts are bogus but will do for now. + */ +#define BC (rrt_func_t *) +static RRIP_TABLE rrip_table_analyze[] = { + { "PX", BC cd9660_rrip_attr, cd9660_rrip_defattr, ISO_SUSP_ATTR }, + { "TF", BC cd9660_rrip_tstamp, cd9660_rrip_deftstamp, ISO_SUSP_TSTAMP }, + { "PN", BC cd9660_rrip_device, 0, ISO_SUSP_DEVICE }, + { "RR", BC cd9660_rrip_idflag, 0, ISO_SUSP_IDFLAG }, + { "CE", BC cd9660_rrip_cont, 0, ISO_SUSP_CONT }, + { "ST", BC cd9660_rrip_stop, 0, ISO_SUSP_STOP }, + { "", 0, 0, 0 } +}; + +int +cd9660_rrip_analyze(isodir,inop,imp) + struct iso_directory_record *isodir; + struct iso_node *inop; + struct iso_mnt *imp; +{ + ISO_RRIP_ANALYZE analyze; + + analyze.inop = inop; + analyze.imp = imp; + analyze.fields = ISO_SUSP_ATTR|ISO_SUSP_TSTAMP|ISO_SUSP_DEVICE; + + return cd9660_rrip_loop(isodir,&analyze,rrip_table_analyze); +} + +/* + * Get Alternate Name. + */ +static RRIP_TABLE rrip_table_getname[] = { + { "NM", BC cd9660_rrip_altname, cd9660_rrip_defname, ISO_SUSP_ALTNAME }, + { "CL", BC cd9660_rrip_pclink, 0, ISO_SUSP_CLINK|ISO_SUSP_PLINK }, + { "PL", BC cd9660_rrip_pclink, 0, ISO_SUSP_CLINK|ISO_SUSP_PLINK }, + { "RE", BC cd9660_rrip_reldir, 0, ISO_SUSP_RELDIR }, + { "RR", BC cd9660_rrip_idflag, 0, ISO_SUSP_IDFLAG }, + { "CE", BC cd9660_rrip_cont, 0, ISO_SUSP_CONT }, + { "ST", BC cd9660_rrip_stop, 0, ISO_SUSP_STOP }, + { "", 0, 0, 0 } +}; + +int +cd9660_rrip_getname(isodir,outbuf,outlen,inump,imp) + struct iso_directory_record *isodir; + char *outbuf; + u_short *outlen; + ino_t *inump; + struct iso_mnt *imp; +{ + ISO_RRIP_ANALYZE analyze; + RRIP_TABLE *tab; + + analyze.outbuf = outbuf; + analyze.outlen = outlen; + analyze.maxlen = NAME_MAX; + analyze.inump = inump; + analyze.imp = imp; + analyze.fields = ISO_SUSP_ALTNAME|ISO_SUSP_RELDIR|ISO_SUSP_CLINK|ISO_SUSP_PLINK; + *outlen = 0; + + tab = rrip_table_getname; + if (*isodir->name == 0 + || *isodir->name == 1) { + cd9660_rrip_defname(isodir,&analyze); + + analyze.fields &= ~ISO_SUSP_ALTNAME; + tab++; + } + + return cd9660_rrip_loop(isodir,&analyze,tab); +} + +/* + * Get Symbolic Link. + */ +static RRIP_TABLE rrip_table_getsymname[] = { + { "SL", BC cd9660_rrip_slink, 0, ISO_SUSP_SLINK }, + { "RR", BC cd9660_rrip_idflag, 0, ISO_SUSP_IDFLAG }, + { "CE", BC cd9660_rrip_cont, 0, ISO_SUSP_CONT }, + { "ST", BC cd9660_rrip_stop, 0, ISO_SUSP_STOP }, + { "", 0, 0, 0 } +}; + +int +cd9660_rrip_getsymname(isodir,outbuf,outlen,imp) + struct iso_directory_record *isodir; + char *outbuf; + u_short *outlen; + struct iso_mnt *imp; +{ + ISO_RRIP_ANALYZE analyze; + + analyze.outbuf = outbuf; + analyze.outlen = outlen; + *outlen = 0; + analyze.maxlen = MAXPATHLEN; + analyze.cont = 1; /* don't start with a slash */ + analyze.imp = imp; + analyze.fields = ISO_SUSP_SLINK; + + return (cd9660_rrip_loop(isodir,&analyze,rrip_table_getsymname)&ISO_SUSP_SLINK); +} + +static RRIP_TABLE rrip_table_extref[] = { + { "ER", BC cd9660_rrip_extref, 0, ISO_SUSP_EXTREF }, + { "CE", BC cd9660_rrip_cont, 0, ISO_SUSP_CONT }, + { "ST", BC cd9660_rrip_stop, 0, ISO_SUSP_STOP }, + { "", 0, 0, 0 } +}; + +/* + * Check for Rock Ridge Extension and return offset of its fields. + * Note: We insist on the ER field. + */ +int +cd9660_rrip_offset(isodir,imp) + struct iso_directory_record *isodir; + struct iso_mnt *imp; +{ + ISO_RRIP_OFFSET *p; + ISO_RRIP_ANALYZE analyze; + + imp->rr_skip0 = 0; + p = (ISO_RRIP_OFFSET *)(isodir->name + 1); + if (bcmp(p,"SP\7\1\276\357",6)) { + /* Maybe, it's a CDROM XA disc? */ + imp->rr_skip0 = 15; + p = (ISO_RRIP_OFFSET *)((char *)p + 15); + if (bcmp(p,"SP\7\1\276\357",6)) + return -1; + } + + analyze.imp = imp; + analyze.fields = ISO_SUSP_EXTREF; + if (!(cd9660_rrip_loop(isodir,&analyze,rrip_table_extref)&ISO_SUSP_EXTREF)) + return -1; + + return isonum_711(p->skip); +} diff --git a/sys/fs/cd9660/cd9660_rrip.h b/sys/fs/cd9660/cd9660_rrip.h new file mode 100644 index 0000000..cacee39 --- /dev/null +++ b/sys/fs/cd9660/cd9660_rrip.h @@ -0,0 +1,141 @@ +/*- + * Copyright (c) 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley + * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension + * Support code is derived from software contributed to Berkeley + * by Atsushi Murai (amurai@spec.co.jp). + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)cd9660_rrip.h 8.2 (Berkeley) 12/5/94 + * $Id: cd9660_rrip.h,v 1.3.2000.1 1996/09/30 12:46:48 dfr Exp $ + */ + +typedef struct { + char type [ISODCL ( 0, 1)]; + u_char length [ISODCL ( 2, 2)]; /* 711 */ + u_char version [ISODCL ( 3, 3)]; +} ISO_SUSP_HEADER; + +typedef struct { + ISO_SUSP_HEADER h; + char mode [ISODCL ( 4, 11)]; /* 733 */ + char links [ISODCL ( 12, 19)]; /* 733 */ + char uid [ISODCL ( 20, 27)]; /* 733 */ + char gid [ISODCL ( 28, 35)]; /* 733 */ +} ISO_RRIP_ATTR; + +typedef struct { + ISO_SUSP_HEADER h; + char dev_t_high [ISODCL ( 4, 11)]; /* 733 */ + char dev_t_low [ISODCL ( 12, 19)]; /* 733 */ +} ISO_RRIP_DEVICE; + +#define ISO_SUSP_CFLAG_CONTINUE 0x01 +#define ISO_SUSP_CFLAG_CURRENT 0x02 +#define ISO_SUSP_CFLAG_PARENT 0x04 +#define ISO_SUSP_CFLAG_ROOT 0x08 +#define ISO_SUSP_CFLAG_VOLROOT 0x10 +#define ISO_SUSP_CFLAG_HOST 0x20 + +typedef struct { + u_char cflag [ISODCL ( 1, 1)]; + u_char clen [ISODCL ( 2, 2)]; + u_char name [1]; /* XXX */ +} ISO_RRIP_SLINK_COMPONENT; +#define ISO_RRIP_SLSIZ 2 + +typedef struct { + ISO_SUSP_HEADER h; + u_char flags [ISODCL ( 4, 4)]; + u_char component [ISODCL ( 5, 5)]; +} ISO_RRIP_SLINK; + +typedef struct { + ISO_SUSP_HEADER h; + char flags [ISODCL ( 4, 4)]; +} ISO_RRIP_ALTNAME; + +typedef struct { + ISO_SUSP_HEADER h; + char dir_loc [ISODCL ( 4, 11)]; /* 733 */ +} ISO_RRIP_CLINK; + +typedef struct { + ISO_SUSP_HEADER h; + char dir_loc [ISODCL ( 4, 11)]; /* 733 */ +} ISO_RRIP_PLINK; + +typedef struct { + ISO_SUSP_HEADER h; +} ISO_RRIP_RELDIR; + +#define ISO_SUSP_TSTAMP_FORM17 0x80 +#define ISO_SUSP_TSTAMP_FORM7 0x00 +#define ISO_SUSP_TSTAMP_CREAT 0x01 +#define ISO_SUSP_TSTAMP_MODIFY 0x02 +#define ISO_SUSP_TSTAMP_ACCESS 0x04 +#define ISO_SUSP_TSTAMP_ATTR 0x08 +#define ISO_SUSP_TSTAMP_BACKUP 0x10 +#define ISO_SUSP_TSTAMP_EXPIRE 0x20 +#define ISO_SUSP_TSTAMP_EFFECT 0x40 + +typedef struct { + ISO_SUSP_HEADER h; + u_char flags [ISODCL ( 4, 4)]; + u_char time [ISODCL ( 5, 5)]; +} ISO_RRIP_TSTAMP; + +typedef struct { + ISO_SUSP_HEADER h; + u_char flags [ISODCL ( 4, 4)]; +} ISO_RRIP_IDFLAG; + +typedef struct { + ISO_SUSP_HEADER h; + char len_id [ISODCL ( 4, 4)]; + char len_des [ISODCL ( 5, 5)]; + char len_src [ISODCL ( 6, 6)]; + char version [ISODCL ( 7, 7)]; +} ISO_RRIP_EXTREF; + +typedef struct { + ISO_SUSP_HEADER h; + char check [ISODCL ( 4, 5)]; + char skip [ISODCL ( 6, 6)]; +} ISO_RRIP_OFFSET; + +typedef struct { + ISO_SUSP_HEADER h; + char location [ISODCL ( 4, 11)]; + char offset [ISODCL ( 12, 19)]; + char length [ISODCL ( 20, 27)]; +} ISO_RRIP_CONT; diff --git a/sys/fs/cd9660/cd9660_util.c b/sys/fs/cd9660/cd9660_util.c new file mode 100644 index 0000000..090f10d --- /dev/null +++ b/sys/fs/cd9660/cd9660_util.c @@ -0,0 +1,141 @@ +/*- + * Copyright (c) 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley + * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension + * Support code is derived from software contributed to Berkeley + * by Atsushi Murai (amurai@spec.co.jp). + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)cd9660_util.c 8.3 (Berkeley) 12/5/94 + * $Id: cd9660_util.c,v 1.9 1997/02/22 09:38:50 peter Exp $ + */ + +#include <sys/param.h> +#include <sys/mount.h> +#include <sys/vnode.h> + +#include <isofs/cd9660/iso.h> + +/* + * translate and compare a filename + * Note: Version number plus ';' may be omitted. + */ +int +isofncmp(fn, fnlen, isofn, isolen) + u_char *fn; + int fnlen; + u_char *isofn; + int isolen; +{ + int i, j; + unsigned char c; + + while (--fnlen >= 0) { + if (--isolen < 0) + return *fn; + if ((c = *isofn++) == ';') { + switch (*fn++) { + default: + return *--fn; + case 0: + return 0; + case ';': + break; + } + for (i = 0; --fnlen >= 0; i = i * 10 + *fn++ - '0') { + if (*fn < '0' || *fn > '9') { + return -1; + } + } + for (j = 0; --isolen >= 0; j = j * 10 + *isofn++ - '0'); + return i - j; + } + if (c != *fn) { + if (c >= 'A' && c <= 'Z') { + if (c + ('a' - 'A') != *fn) { + if (*fn >= 'a' && *fn <= 'z') + return *fn - ('a' - 'A') - c; + else + return *fn - c; + } + } else + return *fn - c; + } + fn++; + } + if (isolen > 0) { + switch (*isofn) { + default: + return -1; + case '.': + if (isofn[1] != ';') + return -1; + case ';': + return 0; + } + } + return 0; +} + +/* + * translate a filename + */ +void +isofntrans(infn, infnlen, outfn, outfnlen, original, assoc) + u_char *infn; + int infnlen; + u_char *outfn; + u_short *outfnlen; + int original; + int assoc; +{ + int fnidx = 0; + + if (assoc) { + *outfn++ = ASSOCCHAR; + fnidx++; + infnlen++; + } + for (; fnidx < infnlen; fnidx++) { + char c = *infn++; + + if (!original && c >= 'A' && c <= 'Z') + *outfn++ = c + ('a' - 'A'); + else if (!original && c == '.' && *infn == ';') + break; + else if (!original && c == ';') + break; + else + *outfn++ = c; + } + *outfnlen = fnidx; +} diff --git a/sys/fs/cd9660/cd9660_vfsops.c b/sys/fs/cd9660/cd9660_vfsops.c new file mode 100644 index 0000000..ba4e385 --- /dev/null +++ b/sys/fs/cd9660/cd9660_vfsops.c @@ -0,0 +1,894 @@ +/*- + * Copyright (c) 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley + * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension + * Support code is derived from software contributed to Berkeley + * by Atsushi Murai (amurai@spec.co.jp). + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)cd9660_vfsops.c 8.18 (Berkeley) 5/22/95 + * $Id: cd9660_vfsops.c,v 1.50 1999/01/30 12:26:22 phk Exp $ + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/namei.h> +#include <sys/proc.h> +#include <sys/kernel.h> +#include <sys/vnode.h> +#include <miscfs/specfs/specdev.h> +#include <sys/mount.h> +#include <sys/buf.h> +#include <sys/cdio.h> +#include <sys/conf.h> +#include <sys/fcntl.h> +#include <sys/malloc.h> +#include <sys/stat.h> + +#include <isofs/cd9660/iso.h> +#include <isofs/cd9660/iso_rrip.h> +#include <isofs/cd9660/cd9660_node.h> +#include <isofs/cd9660/cd9660_mount.h> + +MALLOC_DEFINE(M_ISOFSMNT, "ISOFS mount", "ISOFS mount structure"); +MALLOC_DEFINE(M_ISOFSNODE, "ISOFS node", "ISOFS vnode private part"); + +static int cd9660_mount __P((struct mount *, + char *, caddr_t, struct nameidata *, struct proc *)); +static int cd9660_start __P((struct mount *, int, struct proc *)); +static int cd9660_unmount __P((struct mount *, int, struct proc *)); +static int cd9660_root __P((struct mount *, struct vnode **)); +static int cd9660_quotactl __P((struct mount *, int, uid_t, caddr_t, + struct proc *)); +static int cd9660_statfs __P((struct mount *, struct statfs *, struct proc *)); +static int cd9660_sync __P((struct mount *, int, struct ucred *, + struct proc *)); +static int cd9660_vget __P((struct mount *, ino_t, struct vnode **)); +static int cd9660_fhtovp __P((struct mount *, struct fid *, struct sockaddr *, + struct vnode **, int *, struct ucred **)); +static int cd9660_vptofh __P((struct vnode *, struct fid *)); + +static struct vfsops cd9660_vfsops = { + cd9660_mount, + cd9660_start, + cd9660_unmount, + cd9660_root, + cd9660_quotactl, + cd9660_statfs, + cd9660_sync, + cd9660_vget, + cd9660_fhtovp, + cd9660_vptofh, + cd9660_init +}; +VFS_SET(cd9660_vfsops, cd9660, VFCF_READONLY); + + +/* + * Called by vfs_mountroot when iso is going to be mounted as root. + */ + +static int iso_get_ssector __P((dev_t dev, struct proc *p)); +static int iso_mountfs __P((struct vnode *devvp, struct mount *mp, + struct proc *p, struct iso_args *argp)); + +/* + * Try to find the start of the last data track on this CD-ROM. This + * is used to mount the last session of a multi-session CD. Bail out + * and return 0 if we fail, this is always a safe bet. + */ +static int +iso_get_ssector(dev, p) + dev_t dev; + struct proc *p; +{ + struct ioc_toc_header h; + struct ioc_read_toc_single_entry t; + int i; + struct cdevsw *bd; + d_ioctl_t *ioctlp; + + bd = bdevsw[major(dev)]; + ioctlp = bd->d_ioctl; + if (ioctlp == NULL) + return 0; + + if (ioctlp(dev, CDIOREADTOCHEADER, (caddr_t)&h, FREAD, p) != 0) + return 0; + + for (i = h.ending_track; i >= 0; i--) { + t.address_format = CD_LBA_FORMAT; + t.track = i; + if (ioctlp(dev, CDIOREADTOCENTRY, (caddr_t)&t, FREAD, p) != 0) + return 0; + if ((t.entry.control & 4) != 0) + /* found a data track */ + break; + } + + if (i < 0) + return 0; + + return ntohl(t.entry.addr.lba); +} + +static int iso_mountroot __P((struct mount *mp, struct proc *p)); + +static int +iso_mountroot(mp, p) + struct mount *mp; + struct proc *p; +{ + struct iso_args args; + int error; + + if ((error = bdevvp(rootdev, &rootvp))) { + printf("iso_mountroot: can't find rootvp"); + return (error); + } + args.flags = ISOFSMNT_ROOT; + args.ssector = iso_get_ssector(rootdev, p); + if (bootverbose) + printf("iso_mountroot(): using session at block %d\n", + args.ssector); + if ((error = iso_mountfs(rootvp, mp, p, &args)) != 0) + return (error); + + (void)cd9660_statfs(mp, &mp->mnt_stat, p); + return (0); +} + +/* + * VFS Operations. + * + * mount system call + */ +static int +cd9660_mount(mp, path, data, ndp, p) + register struct mount *mp; + char *path; + caddr_t data; + struct nameidata *ndp; + struct proc *p; +{ + struct vnode *devvp; + struct iso_args args; + size_t size; + int error; + mode_t accessmode; + struct iso_mnt *imp = 0; + + if ((mp->mnt_flag & MNT_ROOTFS) != 0) { + if (bdevsw[major(rootdev)]->d_flags & D_NOCLUSTERR) + mp->mnt_flag |= MNT_NOCLUSTERR; + return (iso_mountroot(mp, p)); + } + if ((error = copyin(data, (caddr_t)&args, sizeof (struct iso_args)))) + return (error); + + if ((mp->mnt_flag & MNT_RDONLY) == 0) + return (EROFS); + + /* + * If updating, check whether changing from read-only to + * read/write; if there is no device name, that's all we do. + * Disallow clearing MNT_NOCLUSTERR flag, if block device requests. + */ + if (mp->mnt_flag & MNT_UPDATE) { + imp = VFSTOISOFS(mp); + if (bdevsw[major(imp->im_devvp->v_rdev)]->d_flags & + D_NOCLUSTERR) + mp->mnt_flag |= MNT_NOCLUSTERR; + if (args.fspec == 0) + return (vfs_export(mp, &imp->im_export, &args.export)); + } + /* + * Not an update, or updating the name: look up the name + * and verify that it refers to a sensible block device. + */ + NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p); + if ((error = namei(ndp))) + return (error); + devvp = ndp->ni_vp; + + if (devvp->v_type != VBLK) { + vrele(devvp); + return ENOTBLK; + } + if (major(devvp->v_rdev) >= nblkdev || + bdevsw[major(devvp->v_rdev)] == NULL) { + vrele(devvp); + return ENXIO; + } + + /* + * Verify that user has necessary permissions on the device, + * or has superuser abilities + */ + accessmode = VREAD; + vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); + error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p); + if (error) + error = suser(p->p_ucred, &p->p_acflag); + if (error) { + vput(devvp); + return (error); + } + VOP_UNLOCK(devvp, 0, p); + + if ((mp->mnt_flag & MNT_UPDATE) == 0) { + if (bdevsw[major(devvp->v_rdev)]->d_flags & D_NOCLUSTERR) + mp->mnt_flag |= MNT_NOCLUSTERR; + error = iso_mountfs(devvp, mp, p, &args); + } else { + if (devvp != imp->im_devvp) + error = EINVAL; /* needs translation */ + else + vrele(devvp); + } + if (error) { + vrele(devvp); + return error; + } + imp = VFSTOISOFS(mp); + (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size); + bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size); + (void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, + &size); + bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); + (void) cd9660_statfs(mp, &mp->mnt_stat, p); + return 0; +} + +/* + * Common code for mount and mountroot + */ +static int +iso_mountfs(devvp, mp, p, argp) + register struct vnode *devvp; + struct mount *mp; + struct proc *p; + struct iso_args *argp; +{ + register struct iso_mnt *isomp = (struct iso_mnt *)0; + struct buf *bp = NULL; + dev_t dev = devvp->v_rdev; + int error = EINVAL; + int needclose = 0; + int high_sierra = 0; + int iso_bsize; + int iso_blknum; + struct iso_volume_descriptor *vdp = 0; + struct iso_primary_descriptor *pri; + struct iso_sierra_primary_descriptor *pri_sierra; + struct iso_directory_record *rootp; + int logical_block_size; + + if (!(mp->mnt_flag & MNT_RDONLY)) + return EROFS; + + /* + * Disallow multiple mounts of the same device. + * Disallow mounting of a device that is currently in use + * (except for root, which might share swap device for miniroot). + * Flush out any old buffers remaining from a previous use. + */ + if ((error = vfs_mountedon(devvp))) + return error; + if (vcount(devvp) > 1 && devvp != rootvp) + return EBUSY; + if ((error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0))) + return (error); + + if ((error = VOP_OPEN(devvp, FREAD, FSCRED, p))) + return error; + needclose = 1; + + /* This is the "logical sector size". The standard says this + * should be 2048 or the physical sector size on the device, + * whichever is greater. For now, we'll just use a constant. + */ + iso_bsize = ISO_DEFAULT_BLOCK_SIZE; + + for (iso_blknum = 16 + argp->ssector; + iso_blknum < 100 + argp->ssector; + iso_blknum++) { + if ((error = bread(devvp, iso_blknum * btodb(iso_bsize), + iso_bsize, NOCRED, &bp)) != 0) + goto out; + + vdp = (struct iso_volume_descriptor *)bp->b_data; + if (bcmp (vdp->id, ISO_STANDARD_ID, sizeof vdp->id) != 0) { + if (bcmp (vdp->id_sierra, ISO_SIERRA_ID, + sizeof vdp->id) != 0) { + error = EINVAL; + goto out; + } else + high_sierra = 1; + } + + if (isonum_711 (high_sierra? vdp->type_sierra: vdp->type) == ISO_VD_END) { + error = EINVAL; + goto out; + } + + if (isonum_711 (high_sierra? vdp->type_sierra: vdp->type) == ISO_VD_PRIMARY) + break; + brelse(bp); + } + + if (isonum_711 (high_sierra? vdp->type_sierra: vdp->type) != ISO_VD_PRIMARY) { + error = EINVAL; + goto out; + } + + pri = (struct iso_primary_descriptor *)vdp; + pri_sierra = (struct iso_sierra_primary_descriptor *)vdp; + + logical_block_size = + isonum_723 (high_sierra? + pri_sierra->logical_block_size: + pri->logical_block_size); + + if (logical_block_size < DEV_BSIZE || logical_block_size > MAXBSIZE + || (logical_block_size & (logical_block_size - 1)) != 0) { + error = EINVAL; + goto out; + } + + rootp = (struct iso_directory_record *) + (high_sierra? + pri_sierra->root_directory_record: + pri->root_directory_record); + + isomp = malloc(sizeof *isomp, M_ISOFSMNT, M_WAITOK); + bzero((caddr_t)isomp, sizeof *isomp); + isomp->logical_block_size = logical_block_size; + isomp->volume_space_size = + isonum_733 (high_sierra? + pri_sierra->volume_space_size: + pri->volume_space_size); + /* + * Since an ISO9660 multi-session CD can also access previous + * sessions, we have to include them into the space consider- + * ations. This doesn't yield a very accurate number since + * parts of the old sessions might be inaccessible now, but we + * can't do much better. This is also important for the NFS + * filehandle validation. + */ + isomp->volume_space_size += argp->ssector; + bcopy (rootp, isomp->root, sizeof isomp->root); + isomp->root_extent = isonum_733 (rootp->extent); + isomp->root_size = isonum_733 (rootp->size); + + isomp->im_bmask = logical_block_size - 1; + isomp->im_bshift = 0; + while ((1 << isomp->im_bshift) < isomp->logical_block_size) + isomp->im_bshift++; + + bp->b_flags |= B_AGE; + brelse(bp); + bp = NULL; + + mp->mnt_data = (qaddr_t)isomp; + mp->mnt_stat.f_fsid.val[0] = (long)dev; + mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; + mp->mnt_maxsymlinklen = 0; + mp->mnt_flag |= MNT_LOCAL; + isomp->im_mountp = mp; + isomp->im_dev = dev; + isomp->im_devvp = devvp; + + devvp->v_specmountpoint = mp; + + /* Check the Rock Ridge Extention support */ + if (!(argp->flags & ISOFSMNT_NORRIP)) { + if ((error = bread(isomp->im_devvp, + (isomp->root_extent + isonum_711(rootp->ext_attr_length)) << + (isomp->im_bshift - DEV_BSHIFT), + isomp->logical_block_size, NOCRED, &bp)) != 0) + goto out; + + rootp = (struct iso_directory_record *)bp->b_data; + + if ((isomp->rr_skip = cd9660_rrip_offset(rootp,isomp)) < 0) { + argp->flags |= ISOFSMNT_NORRIP; + } else { + argp->flags &= ~ISOFSMNT_GENS; + } + + /* + * The contents are valid, + * but they will get reread as part of another vnode, so... + */ + bp->b_flags |= B_AGE; + brelse(bp); + bp = NULL; + } + isomp->im_flags = argp->flags&(ISOFSMNT_NORRIP|ISOFSMNT_GENS|ISOFSMNT_EXTATT); + + if(high_sierra) + /* this effectively ignores all the mount flags */ + isomp->iso_ftype = ISO_FTYPE_HIGH_SIERRA; + else + switch (isomp->im_flags&(ISOFSMNT_NORRIP|ISOFSMNT_GENS)) { + default: + isomp->iso_ftype = ISO_FTYPE_DEFAULT; + break; + case ISOFSMNT_GENS|ISOFSMNT_NORRIP: + isomp->iso_ftype = ISO_FTYPE_9660; + break; + case 0: + isomp->iso_ftype = ISO_FTYPE_RRIP; + break; + } + + return 0; +out: + devvp->v_specmountpoint = NULL; + if (bp) + brelse(bp); + if (needclose) + (void)VOP_CLOSE(devvp, FREAD, NOCRED, p); + if (isomp) { + free((caddr_t)isomp, M_ISOFSMNT); + mp->mnt_data = (qaddr_t)0; + } + return error; +} + +/* + * Make a filesystem operational. + * Nothing to do at the moment. + */ +/* ARGSUSED */ +static int +cd9660_start(mp, flags, p) + struct mount *mp; + int flags; + struct proc *p; +{ + return 0; +} + +/* + * unmount system call + */ +static int +cd9660_unmount(mp, mntflags, p) + struct mount *mp; + int mntflags; + struct proc *p; +{ + register struct iso_mnt *isomp; + int error, flags = 0; + + if (mntflags & MNT_FORCE) + flags |= FORCECLOSE; +#if 0 + mntflushbuf(mp, 0); + if (mntinvalbuf(mp)) + return EBUSY; +#endif + if ((error = vflush(mp, NULLVP, flags))) + return (error); + + isomp = VFSTOISOFS(mp); + + + isomp->im_devvp->v_specmountpoint = NULL; + error = VOP_CLOSE(isomp->im_devvp, FREAD, NOCRED, p); + vrele(isomp->im_devvp); + free((caddr_t)isomp, M_ISOFSMNT); + mp->mnt_data = (qaddr_t)0; + mp->mnt_flag &= ~MNT_LOCAL; + return (error); +} + +/* + * Return root of a filesystem + */ +static int +cd9660_root(mp, vpp) + struct mount *mp; + struct vnode **vpp; +{ + struct iso_mnt *imp = VFSTOISOFS(mp); + struct iso_directory_record *dp = + (struct iso_directory_record *)imp->root; + ino_t ino = isodirino(dp, imp); + + /* + * With RRIP we must use the `.' entry of the root directory. + * Simply tell vget, that it's a relocated directory. + */ + return (cd9660_vget_internal(mp, ino, vpp, + imp->iso_ftype == ISO_FTYPE_RRIP, dp)); +} + +/* + * Do operations associated with quotas, not supported + */ +/* ARGSUSED */ +static int +cd9660_quotactl(mp, cmd, uid, arg, p) + struct mount *mp; + int cmd; + uid_t uid; + caddr_t arg; + struct proc *p; +{ + + return (EOPNOTSUPP); +} + +/* + * Get file system statistics. + */ +int +cd9660_statfs(mp, sbp, p) + struct mount *mp; + register struct statfs *sbp; + struct proc *p; +{ + register struct iso_mnt *isomp; + + isomp = VFSTOISOFS(mp); + + sbp->f_bsize = isomp->logical_block_size; + sbp->f_iosize = sbp->f_bsize; /* XXX */ + sbp->f_blocks = isomp->volume_space_size; + sbp->f_bfree = 0; /* total free blocks */ + sbp->f_bavail = 0; /* blocks free for non superuser */ + sbp->f_files = 0; /* total files */ + sbp->f_ffree = 0; /* free file nodes */ + if (sbp != &mp->mnt_stat) { + sbp->f_type = mp->mnt_vfc->vfc_typenum; + bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); + bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); + } + return 0; +} + +/* ARGSUSED */ +static int +cd9660_sync(mp, waitfor, cred, p) + struct mount *mp; + int waitfor; + struct ucred *cred; + struct proc *p; +{ + return (0); +} + +/* + * File handle to vnode + * + * Have to be really careful about stale file handles: + * - check that the inode number is in range + * - call iget() to get the locked inode + * - check for an unallocated inode (i_mode == 0) + * - check that the generation number matches + */ + +struct ifid { + ushort ifid_len; + ushort ifid_pad; + int ifid_ino; + long ifid_start; +}; + +/* ARGSUSED */ +int +cd9660_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp) + register struct mount *mp; + struct fid *fhp; + struct sockaddr *nam; + struct vnode **vpp; + int *exflagsp; + struct ucred **credanonp; +{ + struct ifid *ifhp = (struct ifid *)fhp; + register struct iso_node *ip; + register struct netcred *np; + register struct iso_mnt *imp = VFSTOISOFS(mp); + struct vnode *nvp; + int error; + +#ifdef ISOFS_DBG + printf("fhtovp: ino %d, start %ld\n", + ifhp->ifid_ino, ifhp->ifid_start); +#endif + + /* + * Get the export permission structure for this <mp, client> tuple. + */ + np = vfs_export_lookup(mp, &imp->im_export, nam); + if (np == NULL) + return (EACCES); + + if ((error = VFS_VGET(mp, ifhp->ifid_ino, &nvp)) != 0) { + *vpp = NULLVP; + return (error); + } + ip = VTOI(nvp); + if (ip->inode.iso_mode == 0) { + vput(nvp); + *vpp = NULLVP; + return (ESTALE); + } + *vpp = nvp; + *exflagsp = np->netc_exflags; + *credanonp = &np->netc_anon; + return (0); +} + +int +cd9660_vget(mp, ino, vpp) + struct mount *mp; + ino_t ino; + struct vnode **vpp; +{ + + /* + * XXXX + * It would be nice if we didn't always set the `relocated' flag + * and force the extra read, but I don't want to think about fixing + * that right now. + */ + return (cd9660_vget_internal(mp, ino, vpp, +#if 0 + VFSTOISOFS(mp)->iso_ftype == ISO_FTYPE_RRIP, +#else + 0, +#endif + (struct iso_directory_record *)0)); +} + +int +cd9660_vget_internal(mp, ino, vpp, relocated, isodir) + struct mount *mp; + ino_t ino; + struct vnode **vpp; + int relocated; + struct iso_directory_record *isodir; +{ + struct iso_mnt *imp; + struct iso_node *ip; + struct buf *bp; + struct vnode *vp, *nvp; + dev_t dev; + int error; + + imp = VFSTOISOFS(mp); + dev = imp->im_dev; + if ((*vpp = cd9660_ihashget(dev, ino)) != NULLVP) + return (0); + + /* Allocate a new vnode/iso_node. */ + if ((error = getnewvnode(VT_ISOFS, mp, cd9660_vnodeop_p, &vp)) != 0) { + *vpp = NULLVP; + return (error); + } + MALLOC(ip, struct iso_node *, sizeof(struct iso_node), M_ISOFSNODE, + M_WAITOK); + bzero((caddr_t)ip, sizeof(struct iso_node)); + lockinit(&ip->i_lock, PINOD, "isonode", 0, 0); + vp->v_data = ip; + ip->i_vnode = vp; + ip->i_dev = dev; + ip->i_number = ino; + + /* + * Put it onto its hash chain and lock it so that other requests for + * this inode will block if they arrive while we are sleeping waiting + * for old data structures to be purged or for the contents of the + * disk portion of this inode to be read. + */ + cd9660_ihashins(ip); + + if (isodir == 0) { + int lbn, off; + + lbn = lblkno(imp, ino); + if (lbn >= imp->volume_space_size) { + vput(vp); + printf("fhtovp: lbn exceed volume space %d\n", lbn); + return (ESTALE); + } + + off = blkoff(imp, ino); + if (off + ISO_DIRECTORY_RECORD_SIZE > imp->logical_block_size) { + vput(vp); + printf("fhtovp: crosses block boundary %d\n", + off + ISO_DIRECTORY_RECORD_SIZE); + return (ESTALE); + } + + error = bread(imp->im_devvp, + lbn << (imp->im_bshift - DEV_BSHIFT), + imp->logical_block_size, NOCRED, &bp); + if (error) { + vput(vp); + brelse(bp); + printf("fhtovp: bread error %d\n",error); + return (error); + } + isodir = (struct iso_directory_record *)(bp->b_data + off); + + if (off + isonum_711(isodir->length) > + imp->logical_block_size) { + vput(vp); + if (bp != 0) + brelse(bp); + printf("fhtovp: directory crosses block boundary %d[off=%d/len=%d]\n", + off +isonum_711(isodir->length), off, + isonum_711(isodir->length)); + return (ESTALE); + } + +#if 0 + if (isonum_733(isodir->extent) + + isonum_711(isodir->ext_attr_length) != ifhp->ifid_start) { + if (bp != 0) + brelse(bp); + printf("fhtovp: file start miss %d vs %d\n", + isonum_733(isodir->extent) + isonum_711(isodir->ext_attr_length), + ifhp->ifid_start); + return (ESTALE); + } +#endif + } else + bp = 0; + + ip->i_mnt = imp; + ip->i_devvp = imp->im_devvp; + VREF(ip->i_devvp); + + if (relocated) { + /* + * On relocated directories we must + * read the `.' entry out of a dir. + */ + ip->iso_start = ino >> imp->im_bshift; + if (bp != 0) + brelse(bp); + if ((error = cd9660_blkatoff(vp, (off_t)0, NULL, &bp)) != 0) { + vput(vp); + return (error); + } + isodir = (struct iso_directory_record *)bp->b_data; + } + + ip->iso_extent = isonum_733(isodir->extent); + ip->i_size = isonum_733(isodir->size); + ip->iso_start = isonum_711(isodir->ext_attr_length) + ip->iso_extent; + + /* + * Setup time stamp, attribute + */ + vp->v_type = VNON; + switch (imp->iso_ftype) { + default: /* ISO_FTYPE_9660 */ + { + struct buf *bp2; + int off; + if ((imp->im_flags & ISOFSMNT_EXTATT) + && (off = isonum_711(isodir->ext_attr_length))) + cd9660_blkatoff(vp, (off_t)-(off << imp->im_bshift), NULL, + &bp2); + else + bp2 = NULL; + cd9660_defattr(isodir, ip, bp2, ISO_FTYPE_9660); + cd9660_deftstamp(isodir, ip, bp2, ISO_FTYPE_9660); + if (bp2) + brelse(bp2); + break; + } + case ISO_FTYPE_RRIP: + cd9660_rrip_analyze(isodir, ip, imp); + break; + } + + if (bp != 0) + brelse(bp); + + /* + * Initialize the associated vnode + */ + switch (vp->v_type = IFTOVT(ip->inode.iso_mode)) { + case VFIFO: + vp->v_op = cd9660_fifoop_p; + break; + case VCHR: + case VBLK: + /* + * if device, look at device number table for translation + */ + vp->v_op = cd9660_specop_p; + if ((nvp = checkalias(vp, ip->inode.iso_rdev, mp)) != NULL) { + /* + * Discard unneeded vnode, but save its iso_node. + * Note that the lock is carried over in the iso_node + * to the replacement vnode. + */ + nvp->v_data = vp->v_data; + vp->v_data = NULL; + vp->v_op = spec_vnodeop_p; + vrele(vp); + vgone(vp); + /* + * Reinitialize aliased inode. + */ + vp = nvp; + ip->i_vnode = vp; + } + break; + default: + break; + } + + if (ip->iso_extent == imp->root_extent) + vp->v_flag |= VROOT; + + /* + * XXX need generation number? + */ + + *vpp = vp; + return (0); +} + +/* + * Vnode pointer to File handle + */ +/* ARGSUSED */ +int +cd9660_vptofh(vp, fhp) + struct vnode *vp; + struct fid *fhp; +{ + register struct iso_node *ip = VTOI(vp); + register struct ifid *ifhp; + + ifhp = (struct ifid *)fhp; + ifhp->ifid_len = sizeof(struct ifid); + + ifhp->ifid_ino = ip->i_number; + ifhp->ifid_start = ip->iso_start; + +#ifdef ISOFS_DBG + printf("vptofh: ino %d, start %ld\n", + ifhp->ifid_ino,ifhp->ifid_start); +#endif + return 0; +} diff --git a/sys/fs/cd9660/cd9660_vnops.c b/sys/fs/cd9660/cd9660_vnops.c new file mode 100644 index 0000000..5ec970a --- /dev/null +++ b/sys/fs/cd9660/cd9660_vnops.c @@ -0,0 +1,920 @@ +/*- + * Copyright (c) 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley + * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension + * Support code is derived from software contributed to Berkeley + * by Atsushi Murai (amurai@spec.co.jp). + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)cd9660_vnops.c 8.19 (Berkeley) 5/27/95 + * $Id: cd9660_vnops.c,v 1.53 1998/07/04 20:45:30 julian Exp $ + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/namei.h> +#include <sys/kernel.h> +#include <sys/stat.h> +#include <sys/buf.h> +#include <sys/mount.h> +#include <sys/vnode.h> +#include <miscfs/specfs/specdev.h> +#include <miscfs/fifofs/fifo.h> +#include <sys/malloc.h> +#include <sys/dirent.h> +#include <sys/unistd.h> + +#include <vm/vm.h> +#include <vm/vm_zone.h> +#include <vm/vnode_pager.h> + +#include <isofs/cd9660/iso.h> +#include <isofs/cd9660/cd9660_node.h> +#include <isofs/cd9660/iso_rrip.h> + +static int cd9660_setattr __P((struct vop_setattr_args *)); +static int cd9660_access __P((struct vop_access_args *)); +static int cd9660_getattr __P((struct vop_getattr_args *)); +static int cd9660_pathconf __P((struct vop_pathconf_args *)); +static int cd9660_read __P((struct vop_read_args *)); +struct isoreaddir; +static int iso_uiodir __P((struct isoreaddir *idp, struct dirent *dp, + off_t off)); +static int iso_shipdir __P((struct isoreaddir *idp)); +static int cd9660_readdir __P((struct vop_readdir_args *)); +static int cd9660_readlink __P((struct vop_readlink_args *ap)); +static int cd9660_abortop __P((struct vop_abortop_args *)); +static int cd9660_strategy __P((struct vop_strategy_args *)); +static int cd9660_print __P((struct vop_print_args *)); +static int cd9660_getpages __P((struct vop_getpages_args *)); +static int cd9660_putpages __P((struct vop_putpages_args *)); + +/* + * Setattr call. Only allowed for block and character special devices. + */ +int +cd9660_setattr(ap) + struct vop_setattr_args /* { + struct vnodeop_desc *a_desc; + struct vnode *a_vp; + struct vattr *a_vap; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + struct vattr *vap = ap->a_vap; + + if (vap->va_flags != (u_long)VNOVAL || vap->va_uid != (uid_t)VNOVAL || + vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || + vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) + return (EROFS); + if (vap->va_size != (u_quad_t)VNOVAL) { + switch (vp->v_type) { + case VDIR: + return (EISDIR); + case VLNK: + case VREG: + return (EROFS); + case VCHR: + case VBLK: + case VSOCK: + case VFIFO: + case VNON: + case VBAD: + return (0); + } + } + return (0); +} + +/* + * Check mode permission on inode pointer. Mode is READ, WRITE or EXEC. + * The mode is shifted to select the owner/group/other fields. The + * super user is granted all permissions. + */ +/* ARGSUSED */ +static int +cd9660_access(ap) + struct vop_access_args /* { + struct vnode *a_vp; + int a_mode; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + struct iso_node *ip = VTOI(vp); + struct ucred *cred = ap->a_cred; + mode_t mask, mode = ap->a_mode; + gid_t *gp; + int i; + + /* + * Disallow write attempts unless the file is a socket, + * fifo, or a block or character device resident on the + * file system. + */ + if (mode & VWRITE) { + switch (vp->v_type) { + case VDIR: + case VLNK: + case VREG: + return (EROFS); + /* NOT REACHED */ + default: + break; + } + } + + /* User id 0 always gets access. */ + if (cred->cr_uid == 0) + return (0); + + mask = 0; + + /* Otherwise, check the owner. */ + if (cred->cr_uid == ip->inode.iso_uid) { + if (mode & VEXEC) + mask |= S_IXUSR; + if (mode & VREAD) + mask |= S_IRUSR; + if (mode & VWRITE) + mask |= S_IWUSR; + return ((ip->inode.iso_mode & mask) == mask ? 0 : EACCES); + } + + /* Otherwise, check the groups. */ + for (i = 0, gp = cred->cr_groups; i < cred->cr_ngroups; i++, gp++) + if (ip->inode.iso_gid == *gp) { + if (mode & VEXEC) + mask |= S_IXGRP; + if (mode & VREAD) + mask |= S_IRGRP; + if (mode & VWRITE) + mask |= S_IWGRP; + return ((ip->inode.iso_mode & mask) == mask ? + 0 : EACCES); + } + + /* Otherwise, check everyone else. */ + if (mode & VEXEC) + mask |= S_IXOTH; + if (mode & VREAD) + mask |= S_IROTH; + if (mode & VWRITE) + mask |= S_IWOTH; + return ((ip->inode.iso_mode & mask) == mask ? 0 : EACCES); +} + +static int +cd9660_getattr(ap) + struct vop_getattr_args /* { + struct vnode *a_vp; + struct vattr *a_vap; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; + +{ + struct vnode *vp = ap->a_vp; + register struct vattr *vap = ap->a_vap; + register struct iso_node *ip = VTOI(vp); + + vap->va_fsid = ip->i_dev; + vap->va_fileid = ip->i_number; + + vap->va_mode = ip->inode.iso_mode; + vap->va_nlink = ip->inode.iso_links; + vap->va_uid = ip->inode.iso_uid; + vap->va_gid = ip->inode.iso_gid; + vap->va_atime = ip->inode.iso_atime; + vap->va_mtime = ip->inode.iso_mtime; + vap->va_ctime = ip->inode.iso_ctime; + vap->va_rdev = ip->inode.iso_rdev; + + vap->va_size = (u_quad_t) ip->i_size; + if (ip->i_size == 0 && (vap->va_mode & S_IFMT) == S_IFLNK) { + struct vop_readlink_args rdlnk; + struct iovec aiov; + struct uio auio; + char *cp; + + MALLOC(cp, char *, MAXPATHLEN, M_TEMP, M_WAITOK); + aiov.iov_base = cp; + aiov.iov_len = MAXPATHLEN; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_offset = 0; + auio.uio_rw = UIO_READ; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_procp = ap->a_p; + auio.uio_resid = MAXPATHLEN; + rdlnk.a_uio = &auio; + rdlnk.a_vp = ap->a_vp; + rdlnk.a_cred = ap->a_cred; + if (cd9660_readlink(&rdlnk) == 0) + vap->va_size = MAXPATHLEN - auio.uio_resid; + FREE(cp, M_TEMP); + } + vap->va_flags = 0; + vap->va_gen = 1; + vap->va_blocksize = ip->i_mnt->logical_block_size; + vap->va_bytes = (u_quad_t) ip->i_size; + vap->va_type = vp->v_type; + vap->va_filerev = 0; + return (0); +} + +/* + * Vnode op for reading. + */ +static int +cd9660_read(ap) + struct vop_read_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + register struct uio *uio = ap->a_uio; + register struct iso_node *ip = VTOI(vp); + register struct iso_mnt *imp; + struct buf *bp; + daddr_t lbn, rablock; + off_t diff; + int rasize, error = 0; + long size, n, on; + + if (uio->uio_resid == 0) + return (0); + if (uio->uio_offset < 0) + return (EINVAL); + ip->i_flag |= IN_ACCESS; + imp = ip->i_mnt; + do { + lbn = lblkno(imp, uio->uio_offset); + on = blkoff(imp, uio->uio_offset); + n = min((u_int)(imp->logical_block_size - on), + uio->uio_resid); + diff = (off_t)ip->i_size - uio->uio_offset; + if (diff <= 0) + return (0); + if (diff < n) + n = diff; + size = blksize(imp, ip, lbn); + rablock = lbn + 1; + if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { + if (lblktosize(imp, rablock) < ip->i_size) + error = cluster_read(vp, (off_t)ip->i_size, + lbn, size, NOCRED, uio->uio_resid, + (ap->a_ioflag >> 16), &bp); + else + error = bread(vp, lbn, size, NOCRED, &bp); + } else { + if (vp->v_lastr + 1 == lbn && + lblktosize(imp, rablock) < ip->i_size) { + rasize = blksize(imp, ip, rablock); + error = breadn(vp, lbn, size, &rablock, + &rasize, 1, NOCRED, &bp); + } else + error = bread(vp, lbn, size, NOCRED, &bp); + } + vp->v_lastr = lbn; + n = min(n, size - bp->b_resid); + if (error) { + brelse(bp); + return (error); + } + + error = uiomove(bp->b_data + on, (int)n, uio); + brelse(bp); + } while (error == 0 && uio->uio_resid > 0 && n != 0); + return (error); +} + +/* + * Structure for reading directories + */ +struct isoreaddir { + struct dirent saveent; + struct dirent assocent; + struct dirent current; + off_t saveoff; + off_t assocoff; + off_t curroff; + struct uio *uio; + off_t uio_off; + int eofflag; + u_long *cookies; + int ncookies; +}; + +int +iso_uiodir(idp,dp,off) + struct isoreaddir *idp; + struct dirent *dp; + off_t off; +{ + int error; + + dp->d_name[dp->d_namlen] = 0; + dp->d_reclen = GENERIC_DIRSIZ(dp); + + if (idp->uio->uio_resid < dp->d_reclen) { + idp->eofflag = 0; + return (-1); + } + + if (idp->cookies) { + if (idp->ncookies <= 0) { + idp->eofflag = 0; + return (-1); + } + + *idp->cookies++ = off; + --idp->ncookies; + } + + if ((error = uiomove((caddr_t) dp,dp->d_reclen,idp->uio)) != 0) + return (error); + idp->uio_off = off; + return (0); +} + +int +iso_shipdir(idp) + struct isoreaddir *idp; +{ + struct dirent *dp; + int cl, sl, assoc; + int error; + char *cname, *sname; + + cl = idp->current.d_namlen; + cname = idp->current.d_name; +assoc = (cl > 1) && (*cname == ASSOCCHAR); + if (assoc) { + cl--; + cname++; + } + + dp = &idp->saveent; + sname = dp->d_name; + if (!(sl = dp->d_namlen)) { + dp = &idp->assocent; + sname = dp->d_name + 1; + sl = dp->d_namlen - 1; + } + if (sl > 0) { + if (sl != cl + || bcmp(sname,cname,sl)) { + if (idp->assocent.d_namlen) { + if ((error = iso_uiodir(idp,&idp->assocent,idp->assocoff)) != 0) + return (error); + idp->assocent.d_namlen = 0; + } + if (idp->saveent.d_namlen) { + if ((error = iso_uiodir(idp,&idp->saveent,idp->saveoff)) != 0) + return (error); + idp->saveent.d_namlen = 0; + } + } + } + idp->current.d_reclen = GENERIC_DIRSIZ(&idp->current); + if (assoc) { + idp->assocoff = idp->curroff; + bcopy(&idp->current,&idp->assocent,idp->current.d_reclen); + } else { + idp->saveoff = idp->curroff; + bcopy(&idp->current,&idp->saveent,idp->current.d_reclen); + } + return (0); +} + +/* + * Vnode op for readdir + */ +static int +cd9660_readdir(ap) + struct vop_readdir_args /* { + struct vnode *a_vp; + struct uio *a_uio; + struct ucred *a_cred; + int *a_eofflag; + int *a_ncookies; + u_long *a_cookies; + } */ *ap; +{ + register struct uio *uio = ap->a_uio; + struct isoreaddir *idp; + struct vnode *vdp = ap->a_vp; + struct iso_node *dp; + struct iso_mnt *imp; + struct buf *bp = NULL; + struct iso_directory_record *ep; + int entryoffsetinblock; + doff_t endsearch; + u_long bmask; + int error = 0; + int reclen; + u_short namelen; + int ncookies = 0; + u_long *cookies = NULL; + + dp = VTOI(vdp); + imp = dp->i_mnt; + bmask = imp->im_bmask; + + MALLOC(idp, struct isoreaddir *, sizeof(*idp), M_TEMP, M_WAITOK); + idp->saveent.d_namlen = idp->assocent.d_namlen = 0; + /* + * XXX + * Is it worth trying to figure out the type? + */ + idp->saveent.d_type = idp->assocent.d_type = idp->current.d_type = + DT_UNKNOWN; + idp->uio = uio; + if (ap->a_ncookies == NULL) { + idp->cookies = NULL; + } else { + /* + * Guess the number of cookies needed. + */ + ncookies = uio->uio_resid / 16; + MALLOC(cookies, u_long *, ncookies * sizeof(u_int), M_TEMP, + M_WAITOK); + idp->cookies = cookies; + idp->ncookies = ncookies; + } + idp->eofflag = 1; + idp->curroff = uio->uio_offset; + + if ((entryoffsetinblock = idp->curroff & bmask) && + (error = cd9660_blkatoff(vdp, (off_t)idp->curroff, NULL, &bp))) { + FREE(idp, M_TEMP); + return (error); + } + endsearch = dp->i_size; + + while (idp->curroff < endsearch) { + /* + * If offset is on a block boundary, + * read the next directory block. + * Release previous if it exists. + */ + if ((idp->curroff & bmask) == 0) { + if (bp != NULL) + brelse(bp); + if ((error = + cd9660_blkatoff(vdp, (off_t)idp->curroff, NULL, &bp)) != 0) + break; + entryoffsetinblock = 0; + } + /* + * Get pointer to next entry. + */ + ep = (struct iso_directory_record *) + ((char *)bp->b_data + entryoffsetinblock); + + reclen = isonum_711(ep->length); + if (reclen == 0) { + /* skip to next block, if any */ + idp->curroff = + (idp->curroff & ~bmask) + imp->logical_block_size; + continue; + } + + if (reclen < ISO_DIRECTORY_RECORD_SIZE) { + error = EINVAL; + /* illegal entry, stop */ + break; + } + + if (entryoffsetinblock + reclen > imp->logical_block_size) { + error = EINVAL; + /* illegal directory, so stop looking */ + break; + } + + idp->current.d_namlen = isonum_711(ep->name_len); + + if (reclen < ISO_DIRECTORY_RECORD_SIZE + idp->current.d_namlen) { + error = EINVAL; + /* illegal entry, stop */ + break; + } + + if (isonum_711(ep->flags)&2) + idp->current.d_fileno = isodirino(ep, imp); + else + idp->current.d_fileno = dbtob(bp->b_blkno) + + entryoffsetinblock; + + idp->curroff += reclen; + + switch (imp->iso_ftype) { + case ISO_FTYPE_RRIP: + cd9660_rrip_getname(ep,idp->current.d_name, &namelen, + &idp->current.d_fileno,imp); + idp->current.d_namlen = (u_char)namelen; + if (idp->current.d_namlen) + error = iso_uiodir(idp,&idp->current,idp->curroff); + break; + default: /* ISO_FTYPE_DEFAULT || ISO_FTYPE_9660 || ISO_FTYPE_HIGH_SIERRA*/ + strcpy(idp->current.d_name,".."); + switch (ep->name[0]) { + case 0: + idp->current.d_namlen = 1; + error = iso_uiodir(idp,&idp->current,idp->curroff); + break; + case 1: + idp->current.d_namlen = 2; + error = iso_uiodir(idp,&idp->current,idp->curroff); + break; + default: + isofntrans(ep->name,idp->current.d_namlen, + idp->current.d_name, &namelen, + imp->iso_ftype == ISO_FTYPE_9660, + isonum_711(ep->flags)&4); + idp->current.d_namlen = (u_char)namelen; + if (imp->iso_ftype == ISO_FTYPE_DEFAULT) + error = iso_shipdir(idp); + else + error = iso_uiodir(idp,&idp->current,idp->curroff); + break; + } + } + if (error) + break; + + entryoffsetinblock += reclen; + } + + if (!error && imp->iso_ftype == ISO_FTYPE_DEFAULT) { + idp->current.d_namlen = 0; + error = iso_shipdir(idp); + } + if (error < 0) + error = 0; + + if (ap->a_ncookies != NULL) { + if (error) + free(cookies, M_TEMP); + else { + /* + * Work out the number of cookies actually used. + */ + *ap->a_ncookies = ncookies - idp->ncookies; + *ap->a_cookies = cookies; + } + } + + if (bp) + brelse (bp); + + uio->uio_offset = idp->uio_off; + *ap->a_eofflag = idp->eofflag; + + FREE(idp, M_TEMP); + + return (error); +} + +/* + * Return target name of a symbolic link + * Shouldn't we get the parent vnode and read the data from there? + * This could eventually result in deadlocks in cd9660_lookup. + * But otherwise the block read here is in the block buffer two times. + */ +typedef struct iso_directory_record ISODIR; +typedef struct iso_node ISONODE; +typedef struct iso_mnt ISOMNT; +static int +cd9660_readlink(ap) + struct vop_readlink_args /* { + struct vnode *a_vp; + struct uio *a_uio; + struct ucred *a_cred; + } */ *ap; +{ + ISONODE *ip; + ISODIR *dirp; + ISOMNT *imp; + struct buf *bp; + struct uio *uio; + u_short symlen; + int error; + char *symname; + + ip = VTOI(ap->a_vp); + imp = ip->i_mnt; + uio = ap->a_uio; + + if (imp->iso_ftype != ISO_FTYPE_RRIP) + return (EINVAL); + + /* + * Get parents directory record block that this inode included. + */ + error = bread(imp->im_devvp, + (ip->i_number >> imp->im_bshift) << + (imp->im_bshift - DEV_BSHIFT), + imp->logical_block_size, NOCRED, &bp); + if (error) { + brelse(bp); + return (EINVAL); + } + + /* + * Setup the directory pointer for this inode + */ + dirp = (ISODIR *)(bp->b_data + (ip->i_number & imp->im_bmask)); + + /* + * Just make sure, we have a right one.... + * 1: Check not cross boundary on block + */ + if ((ip->i_number & imp->im_bmask) + isonum_711(dirp->length) + > (unsigned)imp->logical_block_size) { + brelse(bp); + return (EINVAL); + } + + /* + * Now get a buffer + * Abuse a namei buffer for now. + */ + if (uio->uio_segflg == UIO_SYSSPACE) + symname = uio->uio_iov->iov_base; + else + symname = zalloc(namei_zone); + + /* + * Ok, we just gathering a symbolic name in SL record. + */ + if (cd9660_rrip_getsymname(dirp, symname, &symlen, imp) == 0) { + if (uio->uio_segflg != UIO_SYSSPACE) + zfree(namei_zone, symname); + brelse(bp); + return (EINVAL); + } + /* + * Don't forget before you leave from home ;-) + */ + brelse(bp); + + /* + * return with the symbolic name to caller's. + */ + if (uio->uio_segflg != UIO_SYSSPACE) { + error = uiomove(symname, symlen, uio); + zfree(namei_zone, symname); + return (error); + } + uio->uio_resid -= symlen; + uio->uio_iov->iov_base += symlen; + uio->uio_iov->iov_len -= symlen; + return (0); +} + +/* + * Ufs abort op, called after namei() when a CREATE/DELETE isn't actually + * done. If a buffer has been saved in anticipation of a CREATE, delete it. + */ +static int +cd9660_abortop(ap) + struct vop_abortop_args /* { + struct vnode *a_dvp; + struct componentname *a_cnp; + } */ *ap; +{ + if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF) + zfree(namei_zone, ap->a_cnp->cn_pnbuf); + return (0); +} + +/* + * Calculate the logical to physical mapping if not done already, + * then call the device strategy routine. + */ +static int +cd9660_strategy(ap) + struct vop_strategy_args /* { + struct buf *a_vp; + struct buf *a_bp; + } */ *ap; +{ + register struct buf *bp = ap->a_bp; + register struct vnode *vp = bp->b_vp; + register struct iso_node *ip; + int error; + + ip = VTOI(vp); + if (vp->v_type == VBLK || vp->v_type == VCHR) + panic("cd9660_strategy: spec"); + if (bp->b_blkno == bp->b_lblkno) { + if ((error = + VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL, NULL))) { + bp->b_error = error; + bp->b_flags |= B_ERROR; + biodone(bp); + return (error); + } + if ((long)bp->b_blkno == -1) + clrbuf(bp); + } + if ((long)bp->b_blkno == -1) { + biodone(bp); + return (0); + } + vp = ip->i_devvp; + bp->b_dev = vp->v_rdev; + VOP_STRATEGY(vp, bp); + return (0); +} + +/* + * Print out the contents of an inode. + */ +static int +cd9660_print(ap) + struct vop_print_args /* { + struct vnode *a_vp; + } */ *ap; +{ + + printf("tag VT_ISOFS, isofs vnode\n"); + return (0); +} + +/* + * Return POSIX pathconf information applicable to cd9660 filesystems. + */ +static int +cd9660_pathconf(ap) + struct vop_pathconf_args /* { + struct vnode *a_vp; + int a_name; + register_t *a_retval; + } */ *ap; +{ + + switch (ap->a_name) { + case _PC_LINK_MAX: + *ap->a_retval = 1; + return (0); + case _PC_NAME_MAX: + if (VTOI(ap->a_vp)->i_mnt->iso_ftype == ISO_FTYPE_RRIP) + *ap->a_retval = NAME_MAX; + else + *ap->a_retval = 37; + return (0); + case _PC_PATH_MAX: + *ap->a_retval = PATH_MAX; + return (0); + case _PC_PIPE_BUF: + *ap->a_retval = PIPE_BUF; + return (0); + case _PC_CHOWN_RESTRICTED: + *ap->a_retval = 1; + return (0); + case _PC_NO_TRUNC: + *ap->a_retval = 1; + return (0); + default: + return (EINVAL); + } + /* NOTREACHED */ +} + +/* + * get page routine + * + * XXX By default, wimp out... note that a_offset is ignored (and always + * XXX has been). + */ +int +cd9660_getpages(ap) + struct vop_getpages_args *ap; +{ + return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count, + ap->a_reqpage); +} + +/* + * put page routine + * + * XXX By default, wimp out... note that a_offset is ignored (and always + * XXX has been). + */ +int +cd9660_putpages(ap) + struct vop_putpages_args *ap; +{ + return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count, + ap->a_sync, ap->a_rtvals); +} + +/* + * Global vfs data structures for cd9660 + */ +vop_t **cd9660_vnodeop_p; +static struct vnodeopv_entry_desc cd9660_vnodeop_entries[] = { + { &vop_default_desc, (vop_t *) vop_defaultop }, + { &vop_abortop_desc, (vop_t *) cd9660_abortop }, + { &vop_access_desc, (vop_t *) cd9660_access }, + { &vop_bmap_desc, (vop_t *) cd9660_bmap }, + { &vop_cachedlookup_desc, (vop_t *) cd9660_lookup }, + { &vop_getattr_desc, (vop_t *) cd9660_getattr }, + { &vop_inactive_desc, (vop_t *) cd9660_inactive }, + { &vop_islocked_desc, (vop_t *) vop_stdislocked }, + { &vop_lock_desc, (vop_t *) vop_stdlock }, + { &vop_lookup_desc, (vop_t *) vfs_cache_lookup }, + { &vop_pathconf_desc, (vop_t *) cd9660_pathconf }, + { &vop_print_desc, (vop_t *) cd9660_print }, + { &vop_read_desc, (vop_t *) cd9660_read }, + { &vop_readdir_desc, (vop_t *) cd9660_readdir }, + { &vop_readlink_desc, (vop_t *) cd9660_readlink }, + { &vop_reclaim_desc, (vop_t *) cd9660_reclaim }, + { &vop_setattr_desc, (vop_t *) cd9660_setattr }, + { &vop_strategy_desc, (vop_t *) cd9660_strategy }, + { &vop_unlock_desc, (vop_t *) vop_stdunlock }, + { &vop_getpages_desc, (vop_t *) cd9660_getpages }, + { &vop_putpages_desc, (vop_t *) cd9660_putpages }, + { NULL, NULL } +}; +static struct vnodeopv_desc cd9660_vnodeop_opv_desc = + { &cd9660_vnodeop_p, cd9660_vnodeop_entries }; +VNODEOP_SET(cd9660_vnodeop_opv_desc); + +/* + * Special device vnode ops + */ +vop_t **cd9660_specop_p; +static struct vnodeopv_entry_desc cd9660_specop_entries[] = { + { &vop_default_desc, (vop_t *) spec_vnoperate }, + { &vop_access_desc, (vop_t *) cd9660_access }, + { &vop_getattr_desc, (vop_t *) cd9660_getattr }, + { &vop_inactive_desc, (vop_t *) cd9660_inactive }, + { &vop_islocked_desc, (vop_t *) vop_stdislocked }, + { &vop_lock_desc, (vop_t *) vop_stdlock }, + { &vop_print_desc, (vop_t *) cd9660_print }, + { &vop_reclaim_desc, (vop_t *) cd9660_reclaim }, + { &vop_setattr_desc, (vop_t *) cd9660_setattr }, + { &vop_unlock_desc, (vop_t *) vop_stdunlock }, + { NULL, NULL } +}; +static struct vnodeopv_desc cd9660_specop_opv_desc = + { &cd9660_specop_p, cd9660_specop_entries }; +VNODEOP_SET(cd9660_specop_opv_desc); + +vop_t **cd9660_fifoop_p; +static struct vnodeopv_entry_desc cd9660_fifoop_entries[] = { + { &vop_default_desc, (vop_t *) fifo_vnoperate }, + { &vop_access_desc, (vop_t *) cd9660_access }, + { &vop_getattr_desc, (vop_t *) cd9660_getattr }, + { &vop_inactive_desc, (vop_t *) cd9660_inactive }, + { &vop_islocked_desc, (vop_t *) vop_stdislocked }, + { &vop_lock_desc, (vop_t *) vop_stdlock }, + { &vop_print_desc, (vop_t *) cd9660_print }, + { &vop_reclaim_desc, (vop_t *) cd9660_reclaim }, + { &vop_setattr_desc, (vop_t *) cd9660_setattr }, + { &vop_unlock_desc, (vop_t *) vop_stdunlock }, + { NULL, NULL } +}; +static struct vnodeopv_desc cd9660_fifoop_opv_desc = + { &cd9660_fifoop_p, cd9660_fifoop_entries }; + +VNODEOP_SET(cd9660_fifoop_opv_desc); diff --git a/sys/fs/cd9660/iso.h b/sys/fs/cd9660/iso.h new file mode 100644 index 0000000..7b50fb6 --- /dev/null +++ b/sys/fs/cd9660/iso.h @@ -0,0 +1,312 @@ +/*- + * Copyright (c) 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley + * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension + * Support code is derived from software contributed to Berkeley + * by Atsushi Murai (amurai@spec.co.jp). + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)iso.h 8.6 (Berkeley) 5/10/95 + * $Id: iso.h,v 1.15 1997/05/04 16:17:49 joerg Exp $ + */ + +#define ISODCL(from, to) (to - from + 1) + +struct iso_volume_descriptor { + char type[ISODCL(1,1)]; /* 711 */ + char id[ISODCL(2,6)]; + char version[ISODCL(7,7)]; + char unused[ISODCL(8,8)]; + char type_sierra[ISODCL(9,9)]; /* 711 */ + char id_sierra[ISODCL(10,14)]; + char version_sierra[ISODCL(15,15)]; + char data[ISODCL(16,2048)]; +}; + +/* volume descriptor types */ +#define ISO_VD_PRIMARY 1 +#define ISO_VD_END 255 + +#define ISO_STANDARD_ID "CD001" +#define ISO_ECMA_ID "CDW01" + +#define ISO_SIERRA_ID "CDROM" + +struct iso_primary_descriptor { + char type [ISODCL ( 1, 1)]; /* 711 */ + char id [ISODCL ( 2, 6)]; + char version [ISODCL ( 7, 7)]; /* 711 */ + char unused1 [ISODCL ( 8, 8)]; + char system_id [ISODCL ( 9, 40)]; /* achars */ + char volume_id [ISODCL ( 41, 72)]; /* dchars */ + char unused2 [ISODCL ( 73, 80)]; + char volume_space_size [ISODCL ( 81, 88)]; /* 733 */ + char unused3 [ISODCL ( 89, 120)]; + char volume_set_size [ISODCL (121, 124)]; /* 723 */ + char volume_sequence_number [ISODCL (125, 128)]; /* 723 */ + char logical_block_size [ISODCL (129, 132)]; /* 723 */ + char path_table_size [ISODCL (133, 140)]; /* 733 */ + char type_l_path_table [ISODCL (141, 144)]; /* 731 */ + char opt_type_l_path_table [ISODCL (145, 148)]; /* 731 */ + char type_m_path_table [ISODCL (149, 152)]; /* 732 */ + char opt_type_m_path_table [ISODCL (153, 156)]; /* 732 */ + char root_directory_record [ISODCL (157, 190)]; /* 9.1 */ + char volume_set_id [ISODCL (191, 318)]; /* dchars */ + char publisher_id [ISODCL (319, 446)]; /* achars */ + char preparer_id [ISODCL (447, 574)]; /* achars */ + char application_id [ISODCL (575, 702)]; /* achars */ + char copyright_file_id [ISODCL (703, 739)]; /* 7.5 dchars */ + char abstract_file_id [ISODCL (740, 776)]; /* 7.5 dchars */ + char bibliographic_file_id [ISODCL (777, 813)]; /* 7.5 dchars */ + char creation_date [ISODCL (814, 830)]; /* 8.4.26.1 */ + char modification_date [ISODCL (831, 847)]; /* 8.4.26.1 */ + char expiration_date [ISODCL (848, 864)]; /* 8.4.26.1 */ + char effective_date [ISODCL (865, 881)]; /* 8.4.26.1 */ + char file_structure_version [ISODCL (882, 882)]; /* 711 */ + char unused4 [ISODCL (883, 883)]; + char application_data [ISODCL (884, 1395)]; + char unused5 [ISODCL (1396, 2048)]; +}; +#define ISO_DEFAULT_BLOCK_SIZE 2048 + +struct iso_sierra_primary_descriptor { + char unknown1 [ISODCL ( 1, 8)]; /* 733 */ + char type [ISODCL ( 9, 9)]; /* 711 */ + char id [ISODCL ( 10, 14)]; + char version [ISODCL ( 15, 15)]; /* 711 */ + char unused1 [ISODCL ( 16, 16)]; + char system_id [ISODCL ( 17, 48)]; /* achars */ + char volume_id [ISODCL ( 49, 80)]; /* dchars */ + char unused2 [ISODCL ( 81, 88)]; + char volume_space_size [ISODCL ( 89, 96)]; /* 733 */ + char unused3 [ISODCL ( 97, 128)]; + char volume_set_size [ISODCL (129, 132)]; /* 723 */ + char volume_sequence_number [ISODCL (133, 136)]; /* 723 */ + char logical_block_size [ISODCL (137, 140)]; /* 723 */ + char path_table_size [ISODCL (141, 148)]; /* 733 */ + char type_l_path_table [ISODCL (149, 152)]; /* 731 */ + char opt_type_l_path_table [ISODCL (153, 156)]; /* 731 */ + char unknown2 [ISODCL (157, 160)]; /* 731 */ + char unknown3 [ISODCL (161, 164)]; /* 731 */ + char type_m_path_table [ISODCL (165, 168)]; /* 732 */ + char opt_type_m_path_table [ISODCL (169, 172)]; /* 732 */ + char unknown4 [ISODCL (173, 176)]; /* 732 */ + char unknown5 [ISODCL (177, 180)]; /* 732 */ + char root_directory_record [ISODCL (181, 214)]; /* 9.1 */ + char volume_set_id [ISODCL (215, 342)]; /* dchars */ + char publisher_id [ISODCL (343, 470)]; /* achars */ + char preparer_id [ISODCL (471, 598)]; /* achars */ + char application_id [ISODCL (599, 726)]; /* achars */ + char copyright_id [ISODCL (727, 790)]; /* achars */ + char creation_date [ISODCL (791, 806)]; /* ? */ + char modification_date [ISODCL (807, 822)]; /* ? */ + char expiration_date [ISODCL (823, 838)]; /* ? */ + char effective_date [ISODCL (839, 854)]; /* ? */ + char file_structure_version [ISODCL (855, 855)]; /* 711 */ + char unused4 [ISODCL (856, 2048)]; +}; + +struct iso_directory_record { + char length [ISODCL (1, 1)]; /* 711 */ + char ext_attr_length [ISODCL (2, 2)]; /* 711 */ + u_char extent [ISODCL (3, 10)]; /* 733 */ + u_char size [ISODCL (11, 18)]; /* 733 */ + char date [ISODCL (19, 25)]; /* 7 by 711 */ + char flags [ISODCL (26, 26)]; + char file_unit_size [ISODCL (27, 27)]; /* 711 */ + char interleave [ISODCL (28, 28)]; /* 711 */ + char volume_sequence_number [ISODCL (29, 32)]; /* 723 */ + char name_len [ISODCL (33, 33)]; /* 711 */ + char name [1]; /* XXX */ +}; +/* can't take sizeof(iso_directory_record), because of possible alignment + of the last entry (34 instead of 33) */ +#define ISO_DIRECTORY_RECORD_SIZE 33 + +struct iso_extended_attributes { + u_char owner [ISODCL (1, 4)]; /* 723 */ + u_char group [ISODCL (5, 8)]; /* 723 */ + u_char perm [ISODCL (9, 10)]; /* 9.5.3 */ + char ctime [ISODCL (11, 27)]; /* 8.4.26.1 */ + char mtime [ISODCL (28, 44)]; /* 8.4.26.1 */ + char xtime [ISODCL (45, 61)]; /* 8.4.26.1 */ + char ftime [ISODCL (62, 78)]; /* 8.4.26.1 */ + char recfmt [ISODCL (79, 79)]; /* 711 */ + char recattr [ISODCL (80, 80)]; /* 711 */ + u_char reclen [ISODCL (81, 84)]; /* 723 */ + char system_id [ISODCL (85, 116)]; /* achars */ + char system_use [ISODCL (117, 180)]; + char version [ISODCL (181, 181)]; /* 711 */ + char len_esc [ISODCL (182, 182)]; /* 711 */ + char reserved [ISODCL (183, 246)]; + u_char len_au [ISODCL (247, 250)]; /* 723 */ +}; + +#ifdef KERNEL + +/* CD-ROM Format type */ +enum ISO_FTYPE { ISO_FTYPE_DEFAULT, ISO_FTYPE_9660, ISO_FTYPE_RRIP, + ISO_FTYPE_ECMA, ISO_FTYPE_HIGH_SIERRA }; + +#ifndef ISOFSMNT_ROOT +#define ISOFSMNT_ROOT 0 +#endif + +struct iso_mnt { + int im_flags; + + struct mount *im_mountp; + dev_t im_dev; + struct vnode *im_devvp; + + int logical_block_size; + int im_bshift; + int im_bmask; + + int volume_space_size; + struct netexport im_export; + + char root[ISODCL (157, 190)]; + int root_extent; + int root_size; + enum ISO_FTYPE iso_ftype; + + int rr_skip; + int rr_skip0; +}; + +#define VFSTOISOFS(mp) ((struct iso_mnt *)((mp)->mnt_data)) + +#define blkoff(imp, loc) ((loc) & (imp)->im_bmask) +#define lblktosize(imp, blk) ((blk) << (imp)->im_bshift) +#define lblkno(imp, loc) ((loc) >> (imp)->im_bshift) +#define blksize(imp, ip, lbn) ((imp)->logical_block_size) + +int cd9660_vget_internal __P((struct mount *, ino_t, struct vnode **, int, + struct iso_directory_record *)); +int cd9660_init __P((struct vfsconf *)); +#define cd9660_sysctl ((int (*) __P((int *, u_int, void *, size_t *, void *, \ + size_t, struct proc *)))eopnotsupp) + +extern vop_t **cd9660_vnodeop_p; +extern vop_t **cd9660_specop_p; +extern vop_t **cd9660_fifoop_p; + +int isofncmp __P((u_char *, int, u_char *, int)); +void isofntrans __P((u_char *, int, u_char *, u_short *, int, int)); +ino_t isodirino __P((struct iso_directory_record *, struct iso_mnt *)); + +#endif /* KERNEL */ + +/* + * The isonum_xxx functions are inlined anyway, and could come handy even + * outside the kernel. Thus we don't hide them here. + */ + +static __inline int isonum_711 __P((u_char *)); +static __inline int +isonum_711(p) + u_char *p; +{ + return *p; +} + +static __inline int isonum_712 __P((char *)); +static __inline int +isonum_712(p) + char *p; +{ + return *p; +} + +#ifndef UNALIGNED_ACCESS + +static __inline int isonum_723 __P((u_char *)); +static __inline int +isonum_723(p) + u_char *p; +{ + return *p|(p[1] << 8); +} + +static __inline int isonum_733 __P((u_char *)); +static __inline int +isonum_733(p) + u_char *p; +{ + return *p|(p[1] << 8)|(p[2] << 16)|(p[3] << 24); +} + +#else /* UNALIGNED_ACCESS */ + +#if BYTE_ORDER == LITTLE_ENDIAN + +static __inline int +isonum_723(p) + u_char *p +{ + return *(u_int16t *)p; +} + +static __inline int +isonum_733(p) + u_char *p; +{ + return *(u_int32t *)p; +} + +#endif + +#if BYTE_ORDER == BIG_ENDIAN + +static __inline int +isonum_723(p) + u_char *p +{ + return *(u_int16t *)(p + 2); +} + +static __inline int +isonum_733(p) + u_char *p; +{ + return *(u_int32t *)(p + 4); +} + +#endif + +#endif /* UNALIGNED_ACCESS */ + +/* + * Associated files have a leading '='. + */ +#define ASSOCCHAR '=' diff --git a/sys/fs/cd9660/iso_rrip.h b/sys/fs/cd9660/iso_rrip.h new file mode 100644 index 0000000..2b256d5 --- /dev/null +++ b/sys/fs/cd9660/iso_rrip.h @@ -0,0 +1,86 @@ +/*- + * Copyright (c) 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley + * by Pace Willisson (pace@blitz.com). The Rock Ridge Extension + * Support code is derived from software contributed to Berkeley + * by Atsushi Murai (amurai@spec.co.jp). + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)iso_rrip.h 8.2 (Berkeley) 1/23/94 + * $Id: iso_rrip.h,v 1.4 1997/02/22 09:38:52 peter Exp $ + */ + + +/* + * Analyze function flag (similar to RR field bits) + */ +#define ISO_SUSP_ATTR 0x0001 +#define ISO_SUSP_DEVICE 0x0002 +#define ISO_SUSP_SLINK 0x0004 +#define ISO_SUSP_ALTNAME 0x0008 +#define ISO_SUSP_CLINK 0x0010 +#define ISO_SUSP_PLINK 0x0020 +#define ISO_SUSP_RELDIR 0x0040 +#define ISO_SUSP_TSTAMP 0x0080 +#define ISO_SUSP_IDFLAG 0x0100 +#define ISO_SUSP_EXTREF 0x0200 +#define ISO_SUSP_CONT 0x0400 +#define ISO_SUSP_OFFSET 0x0800 +#define ISO_SUSP_STOP 0x1000 +#define ISO_SUSP_UNKNOWN 0x8000 + +typedef struct { + struct iso_node *inop; + int fields; /* interesting fields in this analysis */ + daddr_t iso_ce_blk; /* block of continuation area */ + off_t iso_ce_off; /* offset of continuation area */ + int iso_ce_len; /* length of continuation area */ + struct iso_mnt *imp; /* mount structure */ + ino_t *inump; /* inode number pointer */ + char *outbuf; /* name/symbolic link output area */ + u_short *outlen; /* length of above */ + u_short maxlen; /* maximum length of above */ + int cont; /* continuation of above */ +} ISO_RRIP_ANALYZE; + +struct iso_directory_record; + +int cd9660_rrip_analyze __P((struct iso_directory_record *isodir, + struct iso_node *inop, struct iso_mnt *imp)); +int cd9660_rrip_getname __P((struct iso_directory_record *isodir, + char *outbuf, u_short *outlen, + ino_t *inump, struct iso_mnt *imp)); +int cd9660_rrip_getsymname __P((struct iso_directory_record *isodir, + char *outbuf, u_short *outlen, + struct iso_mnt *imp)); +int cd9660_rrip_offset __P((struct iso_directory_record *isodir, + struct iso_mnt *imp)); diff --git a/sys/fs/coda/README b/sys/fs/coda/README new file mode 100644 index 0000000..f9bf3c3 --- /dev/null +++ b/sys/fs/coda/README @@ -0,0 +1,60 @@ + Announcing the Availability of the + Coda Distributed + Filesystem + for + BSD Unix Systems + + Coda is a distributed file system like NFS and AFS. It is +freely available, like NFS. But it functions much like AFS in being a +"stateful" file system. Coda and AFS cache files on your local +machine to improve performance. But Coda goes a step further than AFS +by letting you access the cached files when there is no available +network, viz. disconnected laptops and network outages. In Coda, both +the client and server are outside the kernel which makes them easier +to experiment with. + +To get more information on Coda, I would like to refer people to + http://www.coda.cs.cmu.edu +There is a wealth of documents, papers, and theses there. There is +also a good introduction to the Coda File System in + http://www.coda.cs.cmu.edu/ljpaper/lj.html + +Coda was originally developed as an academic prototype/testbed. It is +being polished and rewritten where necessary. Coda is a work in +progress and does have bugs. It is, though, very usable. Our +interest is in making Coda available to as many people as possible and +to have Coda evolve and flourish. + +The bulk of the Coda file system code supports the Coda client +program, the Coda server program and the utilities needed by both. +All these programs are unix programs and can run equally well on any +Unix platform. Our main development thrust is improving these +programs. There is a small part of Coda that deals with the kernel to +file system interface. This code is OS specific (but should not be +platform specific). + +Coda is currently available for several OS's and platforms: + Freebsd-2.2.5: i386 + Freebsd-2.2.6: i386 + Freebsd -current: i386 + linux 2.0: i386 & sparc + linux 2.1: i386 & sparc + NetBSD 1.3: i386 + NetBSD -current: i386 +The relevant sources, binaries, and docs can be found in + ftp://ftp.coda.cs.cmu.edu/pub/coda/ + +We intend to come out with new Coda releases often, not daily. We +don't want to slight any OS/platform not mentioned above. We are just +limited in our resources as to what we can support internally. We +will be happy to integrate OpenBSD support as well as other OS +support. Also, adding platform support should be relatively easy and +we can discuss this. The only difficulty is that Coda has a light weight +process package. It does some manipulations in assembler which would +have to be redone for a different platform. + +There are several mailing lists @coda.cs.cmu.edu that discuss coda: +coda-announce and linux-coda. We are going to revise linux-coda to be +OS neutral, since it is mainly Coda we want to discuss. We appreciate +comments, feedback, bug reports, bug fixes, enhancements, etc. + diff --git a/sys/fs/coda/TODO b/sys/fs/coda/TODO new file mode 100644 index 0000000..eac5143 --- /dev/null +++ b/sys/fs/coda/TODO @@ -0,0 +1,17 @@ +OOPS: + FreeBSD does not fsync!!! + +Near term: + Fix bug in executing/mapping new files. + cfs_mount bug: interaction with cfs_inactive no cfs_unsave. + vref/vn_lock == vget except no VXWANT which may be on. + Review locks: vn_lock/VOP_UNLOCK/lockmgr ... + +Medium term: + Add missing VFS methods. + Do performance profile. + Tune hash algorithm used in cfs_namecache. + Tune hash algorithm used in cfs_subr. + +Eventually: + Use standard queue macros. diff --git a/sys/fs/coda/cnode.h b/sys/fs/coda/cnode.h new file mode 100644 index 0000000..bf6f632 --- /dev/null +++ b/sys/fs/coda/cnode.h @@ -0,0 +1,319 @@ +/* + * + * Coda: an Experimental Distributed File System + * Release 3.1 + * + * Copyright (c) 1987-1998 Carnegie Mellon University + * All Rights Reserved + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation, and + * that credit is given to Carnegie Mellon University in all documents + * and publicity pertaining to direct or indirect use of this code or its + * derivatives. + * + * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS, + * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS + * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON + * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER + * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF + * ANY DERIVATIVE WORK. + * + * Carnegie Mellon encourages users of this software to return any + * improvements or extensions that they make, and to grant Carnegie + * Mellon the rights to redistribute these changes without encumbrance. + * + * @(#) src/sys/coda/cnode.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ + * $Id: cnode.h,v 1.4 1998/09/13 13:57:59 rvb Exp $ + * + */ + +/* + * Mach Operating System + * Copyright (c) 1990 Carnegie-Mellon University + * Copyright (c) 1989 Carnegie-Mellon University + * All rights reserved. The CMU software License Agreement specifies + * the terms and conditions for use and redistribution. + */ + +/* + * This code was written for the Coda file system at Carnegie Mellon University. + * Contributers include David Steere, James Kistler, and M. Satyanarayanan. + */ + +/* + * HISTORY + * $Log: cnode.h,v $ + * Revision 1.4 1998/09/13 13:57:59 rvb + * Finish conversion of cfs -> coda + * + * Revision 1.3 1998/09/11 18:50:17 rvb + * All the references to cfs, in symbols, structs, and strings + * have been changed to coda. (Same for CFS.) + * + * Revision 1.2 1998/09/02 19:09:53 rvb + * Pass2 complete + * + * Revision 1.1.1.1 1998/08/29 21:14:52 rvb + * Very Preliminary Coda + * + * Revision 1.10 1998/08/28 18:12:25 rvb + * Now it also works on FreeBSD -current. This code will be + * committed to the FreeBSD -current and NetBSD -current + * trees. It will then be tailored to the particular platform + * by flushing conditional code. + * + * Revision 1.9 1998/08/18 17:05:24 rvb + * Don't use __RCSID now + * + * Revision 1.8 1998/08/18 16:31:49 rvb + * Sync the code for NetBSD -current; test on 1.3 later + * + * Revision 1.7 98/02/24 22:22:53 rvb + * Fixes up mainly to flush iopen and friends + * + * Revision 1.6 98/01/31 20:53:19 rvb + * First version that works on FreeBSD 2.2.5 + * + * Revision 1.5 98/01/23 11:53:51 rvb + * Bring RVB_CODA1_1 to HEAD + * + * Revision 1.4.2.5 98/01/23 11:21:14 rvb + * Sync with 2.2.5 + * + * Revision 1.4.2.4 98/01/22 13:03:38 rvb + * Had Breaken ls . + * + * Revision 1.4.2.3 97/12/19 14:26:09 rvb + * session id + * + * Revision 1.4.2.2 97/12/16 12:40:24 rvb + * Sync with 1.3 + * + * Revision 1.4.2.1 97/12/06 17:41:28 rvb + * Sync with peters coda.h + * + * Revision 1.4 97/12/05 10:39:30 rvb + * Read CHANGES + * + * Revision 1.3.18.2 97/11/12 12:09:45 rvb + * reorg pass1 + * + * Revision 1.3.18.1 97/10/29 16:06:31 rvb + * Kill DYING + * + * Revision 1.3 1996/12/12 22:11:03 bnoble + * Fixed the "downcall invokes venus operation" deadlock in all known cases. + * There may be more. + * + * Revision 1.2 1996/01/02 16:57:26 bnoble + * Added support for Coda MiniCache and raw inode calls (final commit) + * + * Revision 1.1.2.1 1995/12/20 01:57:53 bnoble + * Added CODA-specific files + * + * Revision 3.1.1.1 1995/03/04 19:08:23 bnoble + * Branch for NetBSD port revisions + * + * Revision 3.1 1995/03/04 19:08:23 bnoble + * Bump to major revision 3 to prepare for NetBSD port + * + * Revision 2.2 1994/12/06 13:39:18 dcs + * Add a flag value to indicate a cnode was orphaned, e.g. the venus + * that created it has exited. This will allow one to restart venus + * even though some process may be cd'd into /coda. + * + * Revision 2.1 94/07/21 16:25:33 satya + * Conversion to C++ 3.0; start of Coda Release 2.0 + * + * Revision 1.2.7.1 94/06/16 11:26:02 raiff + * Branch for release beta-16Jun1994_39118 + * + * Revision 1.2 92/10/27 17:58:41 lily + * merge kernel/latest and alpha/src/cfs + * + * Revision 2.3 92/09/30 14:16:53 mja + * Picked up fixed #ifdef _KERNEL. Also... + * + * Substituted rvb's history blurb so that we agree with Mach 2.5 sources. + * [91/02/09 jjk] + * + * Added contributors blurb. + * [90/12/13 jjk] + * + * Revision 2.2 90/07/05 11:27:24 mrt + * Created for the Coda File System. + * [90/05/23 dcs] + * + * Revision 1.4 90/05/31 17:02:16 dcs + * Prepare for merge with facilities kernel. + * + * + * + */ + +#ifndef _CNODE_H_ +#define _CNODE_H_ + +#include <sys/vnode.h> +#include <sys/lock.h> +#include <machine/clock.h> + +MALLOC_DECLARE(M_CODA); + +/* + * tmp below since we need struct queue + */ +#include <coda/coda_kernel.h> + +/* + * Cnode lookup stuff. + * NOTE: CODA_CACHESIZE must be a power of 2 for cfshash to work! + */ +#define CODA_CACHESIZE 512 + +#define CODA_ALLOC(ptr, cast, size) \ +do { \ + ptr = (cast)malloc((unsigned long) size, M_CODA, M_WAITOK); \ + if (ptr == 0) { \ + panic("kernel malloc returns 0 at %s:%d\n", __FILE__, __LINE__); \ + } \ +} while (0) + +#define CODA_FREE(ptr, size) free((ptr), M_CODA) + +/* + * global cache state control + */ +extern int coda_nc_use; + +/* + * Used to select debugging statements throughout the cfs code. + */ +extern int codadebug; +extern int coda_nc_debug; +extern int coda_printf_delay; +extern int coda_vnop_print_entry; +extern int coda_psdev_print_entry; +extern int coda_vfsop_print_entry; + +#define CODADBGMSK(N) (1 << N) +#define CODADEBUG(N, STMT) { if (codadebug & CODADBGMSK(N)) { STMT } } +#define myprintf(args) \ +do { \ + if (coda_printf_delay) \ + DELAY(coda_printf_delay);\ + printf args ; \ +} while (0) + +struct cnode { + struct vnode *c_vnode; + u_short c_flags; /* flags (see below) */ + ViceFid c_fid; /* file handle */ + struct lock c_lock; /* new lock protocol */ + struct vnode *c_ovp; /* open vnode pointer */ + u_short c_ocount; /* count of openers */ + u_short c_owrite; /* count of open for write */ + struct vattr c_vattr; /* attributes */ + char *c_symlink; /* pointer to symbolic link */ + u_short c_symlen; /* length of symbolic link */ + dev_t c_device; /* associated vnode device */ + ino_t c_inode; /* associated vnode inode */ + struct cnode *c_next; /* links if on NetBSD machine */ +}; +#define VTOC(vp) ((struct cnode *)(vp)->v_data) +#define CTOV(cp) ((struct vnode *)((cp)->c_vnode)) + +/* flags */ +#define C_VATTR 0x01 /* Validity of vattr in the cnode */ +#define C_SYMLINK 0x02 /* Validity of symlink pointer in the Code */ +#define C_WANTED 0x08 /* Set if lock wanted */ +#define C_LOCKED 0x10 /* Set if lock held */ +#define C_UNMOUNTING 0X20 /* Set if unmounting */ +#define C_PURGING 0x40 /* Set if purging a fid */ + +#define VALID_VATTR(cp) ((cp->c_flags) & C_VATTR) +#define VALID_SYMLINK(cp) ((cp->c_flags) & C_SYMLINK) +#define IS_UNMOUNTING(cp) ((cp)->c_flags & C_UNMOUNTING) + +struct vcomm { + u_long vc_seq; + struct selinfo vc_selproc; + struct queue vc_requests; + struct queue vc_replys; +}; + +#define VC_OPEN(vcp) ((vcp)->vc_requests.forw != NULL) +#define MARK_VC_CLOSED(vcp) (vcp)->vc_requests.forw = NULL; +#define MARK_VC_OPEN(vcp) /* MT */ + +struct coda_clstat { + int ncalls; /* client requests */ + int nbadcalls; /* upcall failures */ + int reqs[CODA_NCALLS]; /* count of each request */ +}; +extern struct coda_clstat coda_clstat; + +/* + * CODA structure to hold mount/file system information + */ +struct coda_mntinfo { + struct vnode *mi_rootvp; + struct mount *mi_vfsp; + struct vcomm mi_vcomm; +}; +extern struct coda_mntinfo coda_mnttbl[]; /* indexed by minor device number */ + +/* + * vfs pointer to mount info + */ +#define vftomi(vfsp) ((struct coda_mntinfo *)(vfsp->mnt_data)) +#define CODA_MOUNTED(vfsp) (vftomi((vfsp)) != (struct coda_mntinfo *)0) + +/* + * vnode pointer to mount info + */ +#define vtomi(vp) ((struct coda_mntinfo *)(vp->v_mount->mnt_data)) + +/* + * Used for identifying usage of "Control" object + */ +extern struct vnode *coda_ctlvp; +#define IS_CTL_VP(vp) ((vp) == coda_ctlvp) +#define IS_CTL_NAME(vp, name, l)((l == CODA_CONTROLLEN) \ + && ((vp) == vtomi((vp))->mi_rootvp) \ + && strncmp(name, CODA_CONTROL, l) == 0) + +/* + * An enum to tell us whether something that will remove a reference + * to a cnode was a downcall or not + */ +enum dc_status { + IS_DOWNCALL = 6, + NOT_DOWNCALL = 7 +}; + +/* cfs_psdev.h */ +extern int coda_call(struct coda_mntinfo *mntinfo, int inSize, int *outSize, caddr_t buffer); +extern int coda_kernel_version; + +/* cfs_subr.h */ +extern int handleDownCall(int opcode, union outputArgs *out); +extern void coda_unmounting(struct mount *whoIam); +extern int coda_vmflush(struct cnode *cp); + +/* cfs_vnodeops.h */ +extern struct cnode *make_coda_node(ViceFid *fid, struct mount *vfsp, short type); +extern int coda_vnodeopstats_init(void); + +/* coda_vfsops.h */ +extern struct mount *devtomp(dev_t dev); + +/* sigh */ +#define CODA_RDWR ((u_long) 31) + +#endif /* _CNODE_H_ */ + diff --git a/sys/fs/coda/coda.h b/sys/fs/coda/coda.h new file mode 100644 index 0000000..7b67ea9 --- /dev/null +++ b/sys/fs/coda/coda.h @@ -0,0 +1,761 @@ +/* + * + * Coda: an Experimental Distributed File System + * Release 3.1 + * + * Copyright (c) 1987-1998 Carnegie Mellon University + * All Rights Reserved + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation, and + * that credit is given to Carnegie Mellon University in all documents + * and publicity pertaining to direct or indirect use of this code or its + * derivatives. + * + * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS, + * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS + * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON + * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER + * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF + * ANY DERIVATIVE WORK. + * + * Carnegie Mellon encourages users of this software to return any + * improvements or extensions that they make, and to grant Carnegie + * Mellon the rights to redistribute these changes without encumbrance. + * + * @(#) src/sys/coda/coda.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ + * $Id: coda.h,v 1.5 1998/10/28 19:33:49 rvb Exp $ + * + */ + + +/* + * + * Based on cfs.h from Mach, but revamped for increased simplicity. + * Linux modifications by Peter Braam, Aug 1996 + */ + +#ifndef _CODA_HEADER_ +#define _CODA_HEADER_ + + + +/* Catch new _KERNEL defn for NetBSD */ +#ifdef __NetBSD__ +#include <sys/types.h> +#endif + +#ifndef CODA_MAXSYMLINKS +#define CODA_MAXSYMLINKS 10 +#endif + +#if defined(DJGPP) || defined(__CYGWIN32__) +#ifdef KERNEL +typedef unsigned long u_long; +typedef unsigned int u_int; +typedef unsigned short u_short; +typedef u_long ino_t; +typedef u_long dev_t; +typedef void * caddr_t; +#ifdef DOS +typedef unsigned __int64 u_quad_t; +#else +typedef unsigned long long u_quad_t; +#endif + +#define inline + +struct timespec { + long ts_sec; + long ts_nsec; +}; +#else /* DJGPP but not KERNEL */ +#include <sys/types.h> +#include <sys/time.h> +typedef unsigned long long u_quad_t; +#endif /* !KERNEL */ +#endif /* !DJGPP */ + + +#if defined(__linux__) +#define cdev_t u_quad_t +#if !defined(_UQUAD_T_) && (!defined(__GLIBC__) || __GLIBC__ < 2) +#define _UQUAD_T_ 1 +typedef unsigned long long u_quad_t; +#endif +#else +#define cdev_t dev_t +#endif + +#ifdef __CYGWIN32__ +typedef unsigned char u_int8_t; +struct timespec { + time_t tv_sec; /* seconds */ + long tv_nsec; /* nanoseconds */ +}; +#endif + + +/* + * Cfs constants + */ +#define CODA_MAXNAMLEN 255 +#define CODA_MAXPATHLEN 1024 +#define CODA_MAXSYMLINK 10 + +/* these are Coda's version of O_RDONLY etc combinations + * to deal with VFS open modes + */ +#define C_O_READ 0x001 +#define C_O_WRITE 0x002 +#define C_O_TRUNC 0x010 +#define C_O_EXCL 0x100 +#define C_O_CREAT 0x200 + +/* these are to find mode bits in Venus */ +#define C_M_READ 00400 +#define C_M_WRITE 00200 + +/* for access Venus will use */ +#define C_A_C_OK 8 /* Test for writing upon create. */ +#define C_A_R_OK 4 /* Test for read permission. */ +#define C_A_W_OK 2 /* Test for write permission. */ +#define C_A_X_OK 1 /* Test for execute permission. */ +#define C_A_F_OK 0 /* Test for existence. */ + + + +#ifndef _VENUS_DIRENT_T_ +#define _VENUS_DIRENT_T_ 1 +struct venus_dirent { + unsigned long d_fileno; /* file number of entry */ + unsigned short d_reclen; /* length of this record */ + char d_type; /* file type, see below */ + char d_namlen; /* length of string in d_name */ + char d_name[CODA_MAXNAMLEN + 1];/* name must be no longer than this */ +}; +#undef DIRSIZ +#define DIRSIZ(dp) ((sizeof (struct venus_dirent) - (CODA_MAXNAMLEN+1)) + \ + (((dp)->d_namlen+1 + 3) &~ 3)) + +/* + * File types + */ +#define CDT_UNKNOWN 0 +#define CDT_FIFO 1 +#define CDT_CHR 2 +#define CDT_DIR 4 +#define CDT_BLK 6 +#define CDT_REG 8 +#define CDT_LNK 10 +#define CDT_SOCK 12 +#define CDT_WHT 14 + +/* + * Convert between stat structure types and directory types. + */ +#define IFTOCDT(mode) (((mode) & 0170000) >> 12) +#define CDTTOIF(dirtype) ((dirtype) << 12) + +#endif + +#ifndef _FID_T_ +#define _FID_T_ 1 +typedef u_long VolumeId; +typedef u_long VnodeId; +typedef u_long Unique_t; +typedef u_long FileVersion; +#endif + +#ifndef _VICEFID_T_ +#define _VICEFID_T_ 1 +typedef struct ViceFid { + VolumeId Volume; + VnodeId Vnode; + Unique_t Unique; +} ViceFid; +#endif /* VICEFID */ + + +#ifdef __linux__ +static __inline__ ino_t coda_f2i(struct ViceFid *fid) +{ + if ( ! fid ) + return 0; + if (fid->Vnode == 0xfffffffe || fid->Vnode == 0xffffffff) + return ((fid->Volume << 20) | (fid->Unique & 0xfffff)); + else + return (fid->Unique + (fid->Vnode<<10) + (fid->Volume<<20)); +} + +#else +#define coda_f2i(fid)\ + ((fid) ? ((fid)->Unique + ((fid)->Vnode<<10) + ((fid)->Volume<<20)) : 0) +#endif + + +#ifndef __BIT_TYPES_DEFINED__ +#define u_int32_t unsigned int +#endif + + +#ifndef _VUID_T_ +#define _VUID_T_ +typedef u_int32_t vuid_t; +typedef u_int32_t vgid_t; +#endif /*_VUID_T_ */ + +#ifndef _CODACRED_T_ +#define _CODACRED_T_ +struct coda_cred { + vuid_t cr_uid, cr_euid, cr_suid, cr_fsuid; /* Real, efftve, set, fs uid*/ + vgid_t cr_groupid, cr_egid, cr_sgid, cr_fsgid; /* same for groups */ +}; +#endif + +#ifndef _VENUS_VATTR_T_ +#define _VENUS_VATTR_T_ +/* + * Vnode types. VNON means no type. + */ +enum coda_vtype { C_VNON, C_VREG, C_VDIR, C_VBLK, C_VCHR, C_VLNK, C_VSOCK, C_VFIFO, C_VBAD }; + +struct coda_vattr { + int va_type; /* vnode type (for create) */ + u_short va_mode; /* files access mode and type */ + short va_nlink; /* number of references to file */ + vuid_t va_uid; /* owner user id */ + vgid_t va_gid; /* owner group id */ + long va_fileid; /* file id */ + u_quad_t va_size; /* file size in bytes */ + long va_blocksize; /* blocksize preferred for i/o */ + struct timespec va_atime; /* time of last access */ + struct timespec va_mtime; /* time of last modification */ + struct timespec va_ctime; /* time file changed */ + u_long va_gen; /* generation number of file */ + u_long va_flags; /* flags defined for file */ + cdev_t va_rdev; /* device special file represents */ + u_quad_t va_bytes; /* bytes of disk space held by file */ + u_quad_t va_filerev; /* file modification number */ +}; + +#endif + +/* + * Kernel <--> Venus communications. + */ + +#define CODA_ROOT 2 +#define CODA_SYNC 3 +#define CODA_OPEN 4 +#define CODA_CLOSE 5 +#define CODA_IOCTL 6 +#define CODA_GETATTR 7 +#define CODA_SETATTR 8 +#define CODA_ACCESS 9 +#define CODA_LOOKUP 10 +#define CODA_CREATE 11 +#define CODA_REMOVE 12 +#define CODA_LINK 13 +#define CODA_RENAME 14 +#define CODA_MKDIR 15 +#define CODA_RMDIR 16 +#define CODA_READDIR 17 +#define CODA_SYMLINK 18 +#define CODA_READLINK 19 +#define CODA_FSYNC 20 +#define CODA_INACTIVE 21 +#define CODA_VGET 22 +#define CODA_SIGNAL 23 +#define CODA_REPLACE 24 +#define CODA_FLUSH 25 +#define CODA_PURGEUSER 26 +#define CODA_ZAPFILE 27 +#define CODA_ZAPDIR 28 +#define CODA_PURGEFID 30 +#define CODA_OPEN_BY_PATH 31 +#define CODA_RESOLVE 32 +#define CODA_REINTEGRATE 33 +#define CODA_NCALLS 34 + +#define DOWNCALL(opcode) (opcode >= CODA_REPLACE && opcode <= CODA_PURGEFID) + +#define VC_MAXDATASIZE 8192 +#define VC_MAXMSGSIZE sizeof(union inputArgs)+sizeof(union outputArgs) +\ + VC_MAXDATASIZE + +#define CIOC_KERNEL_VERSION _IOWR('c', 10, sizeof (int)) +#if 0 + /* don't care about kernel version number */ +#define CODA_KERNEL_VERSION 0 + /* The old venus 4.6 compatible interface */ +#define CODA_KERNEL_VERSION 1 +#endif + /* venus_lookup gets an extra parameter to aid windows.*/ +#define CODA_KERNEL_VERSION 2 + +/* + * Venus <-> Coda RPC arguments + */ +struct coda_in_hdr { + unsigned long opcode; + unsigned long unique; /* Keep multiple outstanding msgs distinct */ + u_short pid; /* Common to all */ + u_short pgid; /* Common to all */ + u_short sid; /* Common to all */ + struct coda_cred cred; /* Common to all */ +}; + +/* Really important that opcode and unique are 1st two fields! */ +struct coda_out_hdr { + unsigned long opcode; + unsigned long unique; + unsigned long result; +}; + +/* coda_root: NO_IN */ +struct coda_root_out { + struct coda_out_hdr oh; + ViceFid VFid; +}; + +struct coda_root_in { + struct coda_in_hdr in; +}; + +/* coda_sync: */ +/* Nothing needed for coda_sync */ + +/* coda_open: */ +struct coda_open_in { + struct coda_in_hdr ih; + ViceFid VFid; + int flags; +}; + +struct coda_open_out { + struct coda_out_hdr oh; + cdev_t dev; + ino_t inode; +}; + + +/* coda_close: */ +struct coda_close_in { + struct coda_in_hdr ih; + ViceFid VFid; + int flags; +}; + +struct coda_close_out { + struct coda_out_hdr out; +}; + +/* coda_ioctl: */ +struct coda_ioctl_in { + struct coda_in_hdr ih; + ViceFid VFid; + int cmd; + int len; + int rwflag; + char *data; /* Place holder for data. */ +}; + +struct coda_ioctl_out { + struct coda_out_hdr oh; + int len; + caddr_t data; /* Place holder for data. */ +}; + + +/* coda_getattr: */ +struct coda_getattr_in { + struct coda_in_hdr ih; + ViceFid VFid; +}; + +struct coda_getattr_out { + struct coda_out_hdr oh; + struct coda_vattr attr; +}; + + +/* coda_setattr: NO_OUT */ +struct coda_setattr_in { + struct coda_in_hdr ih; + ViceFid VFid; + struct coda_vattr attr; +}; + +struct coda_setattr_out { + struct coda_out_hdr out; +}; + +/* coda_access: NO_OUT */ +struct coda_access_in { + struct coda_in_hdr ih; + ViceFid VFid; + int flags; +}; + +struct coda_access_out { + struct coda_out_hdr out; +}; + + +/* lookup flags */ +#define CLU_CASE_SENSITIVE 0x01 +#define CLU_CASE_INSENSITIVE 0x02 + +/* coda_lookup: */ +struct coda_lookup_in { + struct coda_in_hdr ih; + ViceFid VFid; + int name; /* Place holder for data. */ + int flags; +}; + +struct coda_lookup_out { + struct coda_out_hdr oh; + ViceFid VFid; + int vtype; +}; + + +/* coda_create: */ +struct coda_create_in { + struct coda_in_hdr ih; + ViceFid VFid; + struct coda_vattr attr; + int excl; + int mode; + int name; /* Place holder for data. */ +}; + +struct coda_create_out { + struct coda_out_hdr oh; + ViceFid VFid; + struct coda_vattr attr; +}; + + +/* coda_remove: NO_OUT */ +struct coda_remove_in { + struct coda_in_hdr ih; + ViceFid VFid; + int name; /* Place holder for data. */ +}; + +struct coda_remove_out { + struct coda_out_hdr out; +}; + +/* coda_link: NO_OUT */ +struct coda_link_in { + struct coda_in_hdr ih; + ViceFid sourceFid; /* cnode to link *to* */ + ViceFid destFid; /* Directory in which to place link */ + int tname; /* Place holder for data. */ +}; + +struct coda_link_out { + struct coda_out_hdr out; +}; + + +/* coda_rename: NO_OUT */ +struct coda_rename_in { + struct coda_in_hdr ih; + ViceFid sourceFid; + int srcname; + ViceFid destFid; + int destname; +}; + +struct coda_rename_out { + struct coda_out_hdr out; +}; + +/* coda_mkdir: */ +struct coda_mkdir_in { + struct coda_in_hdr ih; + ViceFid VFid; + struct coda_vattr attr; + int name; /* Place holder for data. */ +}; + +struct coda_mkdir_out { + struct coda_out_hdr oh; + ViceFid VFid; + struct coda_vattr attr; +}; + + +/* coda_rmdir: NO_OUT */ +struct coda_rmdir_in { + struct coda_in_hdr ih; + ViceFid VFid; + int name; /* Place holder for data. */ +}; + +struct coda_rmdir_out { + struct coda_out_hdr out; +}; + +/* coda_readdir: */ +struct coda_readdir_in { + struct coda_in_hdr ih; + ViceFid VFid; + int count; + int offset; +}; + +struct coda_readdir_out { + struct coda_out_hdr oh; + int size; + caddr_t data; /* Place holder for data. */ +}; + +/* coda_symlink: NO_OUT */ +struct coda_symlink_in { + struct coda_in_hdr ih; + ViceFid VFid; /* Directory to put symlink in */ + int srcname; + struct coda_vattr attr; + int tname; +}; + +struct coda_symlink_out { + struct coda_out_hdr out; +}; + +/* coda_readlink: */ +struct coda_readlink_in { + struct coda_in_hdr ih; + ViceFid VFid; +}; + +struct coda_readlink_out { + struct coda_out_hdr oh; + int count; + caddr_t data; /* Place holder for data. */ +}; + + +/* coda_fsync: NO_OUT */ +struct coda_fsync_in { + struct coda_in_hdr ih; + ViceFid VFid; +}; + +struct coda_fsync_out { + struct coda_out_hdr out; +}; + +/* coda_inactive: NO_OUT */ +struct coda_inactive_in { + struct coda_in_hdr ih; + ViceFid VFid; +}; + +/* coda_vget: */ +struct coda_vget_in { + struct coda_in_hdr ih; + ViceFid VFid; +}; + +struct coda_vget_out { + struct coda_out_hdr oh; + ViceFid VFid; + int vtype; +}; + + +/* CODA_SIGNAL is out-of-band, doesn't need data. */ +/* CODA_INVALIDATE is a venus->kernel call */ +/* CODA_FLUSH is a venus->kernel call */ + +/* coda_purgeuser: */ +/* CODA_PURGEUSER is a venus->kernel call */ +struct coda_purgeuser_out { + struct coda_out_hdr oh; + struct coda_cred cred; +}; + +/* coda_zapfile: */ +/* CODA_ZAPFILE is a venus->kernel call */ +struct coda_zapfile_out { + struct coda_out_hdr oh; + ViceFid CodaFid; +}; + +/* coda_zapdir: */ +/* CODA_ZAPDIR is a venus->kernel call */ +struct coda_zapdir_out { + struct coda_out_hdr oh; + ViceFid CodaFid; +}; + +/* coda_zapnode: */ +/* CODA_ZAPVNODE is a venus->kernel call */ +struct coda_zapvnode_out { + struct coda_out_hdr oh; + struct coda_cred cred; + ViceFid VFid; +}; + +/* coda_purgefid: */ +/* CODA_PURGEFID is a venus->kernel call */ +struct coda_purgefid_out { + struct coda_out_hdr oh; + ViceFid CodaFid; +}; + +/* coda_rdwr: */ +struct coda_rdwr_in { + struct coda_in_hdr ih; + ViceFid VFid; + int rwflag; + int count; + int offset; + int ioflag; + caddr_t data; /* Place holder for data. */ +}; + +struct coda_rdwr_out { + struct coda_out_hdr oh; + int rwflag; + int count; + caddr_t data; /* Place holder for data. */ +}; + + +/* coda_replace: */ +/* CODA_REPLACE is a venus->kernel call */ +struct coda_replace_out { /* coda_replace is a venus->kernel call */ + struct coda_out_hdr oh; + ViceFid NewFid; + ViceFid OldFid; +}; + +/* coda_open_by_path: */ +struct coda_open_by_path_in { + struct coda_in_hdr ih; + ViceFid VFid; + int flags; +}; + +struct coda_open_by_path_out { + struct coda_out_hdr oh; + int path; +}; + +/* + * Occasionally, we don't cache the fid returned by CODA_LOOKUP. + * For instance, if the fid is inconsistent. + * This case is handled by setting the top bit of the type result parameter. + */ +#define CODA_NOCACHE 0x80000000 + +union inputArgs { + struct coda_in_hdr ih; /* NB: every struct below begins with an ih */ + struct coda_open_in coda_open; + struct coda_close_in coda_close; + struct coda_ioctl_in coda_ioctl; + struct coda_getattr_in coda_getattr; + struct coda_setattr_in coda_setattr; + struct coda_access_in coda_access; + struct coda_lookup_in coda_lookup; + struct coda_create_in coda_create; + struct coda_remove_in coda_remove; + struct coda_link_in coda_link; + struct coda_rename_in coda_rename; + struct coda_mkdir_in coda_mkdir; + struct coda_rmdir_in coda_rmdir; + struct coda_readdir_in coda_readdir; + struct coda_symlink_in coda_symlink; + struct coda_readlink_in coda_readlink; + struct coda_fsync_in coda_fsync; + struct coda_inactive_in coda_inactive; + struct coda_vget_in coda_vget; + struct coda_rdwr_in coda_rdwr; + struct coda_open_by_path_in coda_open_by_path; +}; + +union outputArgs { + struct coda_out_hdr oh; /* NB: every struct below begins with an oh */ + struct coda_root_out coda_root; + struct coda_open_out coda_open; + struct coda_ioctl_out coda_ioctl; + struct coda_getattr_out coda_getattr; + struct coda_lookup_out coda_lookup; + struct coda_create_out coda_create; + struct coda_mkdir_out coda_mkdir; + struct coda_readdir_out coda_readdir; + struct coda_readlink_out coda_readlink; + struct coda_vget_out coda_vget; + struct coda_purgeuser_out coda_purgeuser; + struct coda_zapfile_out coda_zapfile; + struct coda_zapdir_out coda_zapdir; + struct coda_zapvnode_out coda_zapvnode; + struct coda_purgefid_out coda_purgefid; + struct coda_rdwr_out coda_rdwr; + struct coda_replace_out coda_replace; + struct coda_open_by_path_out coda_open_by_path; +}; + +union coda_downcalls { + /* CODA_INVALIDATE is a venus->kernel call */ + /* CODA_FLUSH is a venus->kernel call */ + struct coda_purgeuser_out purgeuser; + struct coda_zapfile_out zapfile; + struct coda_zapdir_out zapdir; + struct coda_zapvnode_out zapvnode; + struct coda_purgefid_out purgefid; + struct coda_replace_out replace; +}; + + +/* + * Used for identifying usage of "Control" and pioctls + */ + +#define PIOCPARM_MASK 0x0000ffff +struct ViceIoctl { + caddr_t in, out; /* Data to be transferred in, or out */ + short in_size; /* Size of input buffer <= 2K */ + short out_size; /* Maximum size of output buffer, <= 2K */ +}; + +#if defined(__CYGWIN32__) || defined(DJGPP) +struct PioctlData { + unsigned long cmd; + const char *path; + int follow; + struct ViceIoctl vi; +}; +#else +struct PioctlData { + const char *path; + int follow; + struct ViceIoctl vi; +}; +#endif + +#define CODA_CONTROL ".CONTROL" +#define CODA_CONTROLLEN 8 +#define CTL_VOL -1 +#define CTL_VNO -1 +#define CTL_UNI -1 +#define CTL_INO -1 +#define CTL_FILE "/coda/.CONTROL" + + +#define IS_CTL_FID(fidp) ((fidp)->Volume == CTL_VOL &&\ + (fidp)->Vnode == CTL_VNO &&\ + (fidp)->Unique == CTL_UNI) +#endif + diff --git a/sys/fs/coda/coda_fbsd.c b/sys/fs/coda/coda_fbsd.c new file mode 100644 index 0000000..703708c --- /dev/null +++ b/sys/fs/coda/coda_fbsd.c @@ -0,0 +1,216 @@ +/* + * + * Coda: an Experimental Distributed File System + * Release 3.1 + * + * Copyright (c) 1987-1998 Carnegie Mellon University + * All Rights Reserved + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation, and + * that credit is given to Carnegie Mellon University in all documents + * and publicity pertaining to direct or indirect use of this code or its + * derivatives. + * + * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS, + * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS + * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON + * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER + * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF + * ANY DERIVATIVE WORK. + * + * Carnegie Mellon encourages users of this software to return any + * improvements or extensions that they make, and to grant Carnegie + * Mellon the rights to redistribute these changes without encumbrance. + * + * @(#) src/sys/coda/coda_fbsd.cr,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ + * $Id: coda_fbsd.c,v 1.12 1999/01/27 20:09:17 dillon Exp $ + * + */ + +#include "vcoda.h" +#include "opt_devfs.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/proc.h> +#include <sys/malloc.h> +#include <sys/fcntl.h> +#include <sys/ucred.h> +#include <sys/vnode.h> +#include <sys/conf.h> + +#include <vm/vm.h> +#include <vm/vnode_pager.h> + +#include <coda/coda.h> +#include <coda/cnode.h> +#include <coda/coda_vnops.h> +#include <coda/coda_psdev.h> + +#ifdef DEVFS +#include <sys/devfsext.h> + +static void *cfs_devfs_token[NVCODA]; +static void *coda_devfs_token[NVCODA]; +#endif + +/* + From: "Jordan K. Hubbard" <jkh@time.cdrom.com> + Subject: Re: New 3.0 SNAPshot CDROM about ready for production.. + To: "Robert.V.Baron" <rvb@GLUCK.CODA.CS.CMU.EDU> + Date: Fri, 20 Feb 1998 15:57:01 -0800 + + > Also I need a character device major number. (and might want to reserve + > a block of 10 syscalls.) + + Just one char device number? No block devices? Very well, cdev 93 is yours! +*/ + +#define VC_DEV_NO 93 + +static struct cdevsw codadevsw = +{ + vc_nb_open, vc_nb_close, vc_nb_read, vc_nb_write, /*93*/ + vc_nb_ioctl, nostop, nullreset, nodevtotty, + vc_nb_poll, nommap, NULL, "Coda", NULL, -1 +}; + +int vcdebug = 1; +#define VCDEBUG if (vcdebug) printf + +static int +codadev_modevent(module_t mod, int type, void *data) +{ + dev_t dev; +#ifdef DEVFS + int i; +#endif + static struct cdevsw *oldcdevsw; + + switch (type) { + case MOD_LOAD: + dev = makedev(VC_DEV_NO, 0); + cdevsw_add(&dev,&codadevsw, &oldcdevsw); +#ifdef DEVFS + /* tmp */ +#undef NVCODA +#define NVCODA 1 + for (i = 0; i < NVCODA; i++) { + cfs_devfs_token[i] = + devfs_add_devswf(&codadevsw, i, + DV_CHR, UID_ROOT, GID_WHEEL, 0666, + "cfs%d", i); + coda_devfs_token[i] = + devfs_add_devswf(&codadevsw, i, + DV_CHR, UID_ROOT, GID_WHEEL, 0666, + "coda%d", i); + } +#endif + break; + case MOD_UNLOAD: +#ifdef DEVFS + for (i = 0; i < NVCODA; i++) { + devfs_remove_dev(cfs_devfs_token[i]); + devfs_remove_dev(coda_devfs_token[i]); + } +#endif + cdevsw_add(&dev, oldcdevsw, NULL); + break; + default: + break; + } + return 0; +} +static moduledata_t codadev_mod = { + "codadev", + codadev_modevent, + NULL +}; +DECLARE_MODULE(codadev, codadev_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE+VC_DEV_NO); + +int +coda_fbsd_getpages(v) + void *v; +{ + struct vop_getpages_args *ap = v; + int ret = 0; + +#if 1 + /* ??? a_offset */ + ret = vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count, + ap->a_reqpage); + return ret; +#else + { + struct vnode *vp = ap->a_vp; + struct cnode *cp = VTOC(vp); + struct vnode *cfvp = cp->c_ovp; + int opened_internally = 0; + struct ucred *cred = (struct ucred *) 0; + struct proc *p = curproc; + int error = 0; + + if (IS_CTL_VP(vp)) { + return(EINVAL); + } + + /* Redirect the request to UFS. */ + + if (cfvp == NULL) { + opened_internally = 1; + + error = VOP_OPEN(vp, FREAD, cred, p); +printf("coda_getp: Internally Opening %p\n", vp); + + if (error) { + printf("coda_getpage: VOP_OPEN on container failed %d\n", error); + return (error); + } + if (vp->v_type == VREG) { + error = vfs_object_create(vp, p, cred); + if (error != 0) { + printf("coda_getpage: vfs_object_create() returns %d\n", error); + vput(vp); + return(error); + } + } + + cfvp = cp->c_ovp; + } else { +printf("coda_getp: has container %p\n", cfvp); + } + +printf("coda_fbsd_getpages: using container "); +/* + error = vnode_pager_generic_getpages(cfvp, ap->a_m, ap->a_count, + ap->a_reqpage); +*/ + error = VOP_GETPAGES(cfvp, ap->a_m, ap->a_count, + ap->a_reqpage, ap->a_offset); +printf("error = %d\n", error); + + /* Do an internal close if necessary. */ + if (opened_internally) { + (void)VOP_CLOSE(vp, FREAD, cred, p); + } + + return(error); + } +#endif +} + +int +coda_fbsd_putpages(v) + void *v; +{ + struct vop_putpages_args *ap = v; + + /*??? a_offset */ + return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count, + ap->a_sync, ap->a_rtvals); +} diff --git a/sys/fs/coda/coda_io.h b/sys/fs/coda/coda_io.h new file mode 100644 index 0000000..dd12fa1 --- /dev/null +++ b/sys/fs/coda/coda_io.h @@ -0,0 +1,128 @@ +/* + * + * Coda: an Experimental Distributed File System + * Release 3.1 + * + * Copyright (c) 1987-1998 Carnegie Mellon University + * All Rights Reserved + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation, and + * that credit is given to Carnegie Mellon University in all documents + * and publicity pertaining to direct or indirect use of this code or its + * derivatives. + * + * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS, + * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS + * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON + * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER + * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF + * ANY DERIVATIVE WORK. + * + * Carnegie Mellon encourages users of this software to return any + * improvements or extensions that they make, and to grant Carnegie + * Mellon the rights to redistribute these changes without encumbrance. + * + * @(#) src/sys/coda/coda_io.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ + * $Id: coda_io.h,v 1.3 1998/09/11 18:50:17 rvb Exp $ + * + */ + +/* + * Mach Operating System + * Copyright (c) 1990 Carnegie-Mellon University + * Copyright (c) 1989 Carnegie-Mellon University + * All rights reserved. The CMU software License Agreement specifies + * the terms and conditions for use and redistribution. + */ + +/* + * This code was written for the Coda file system at Carnegie Mellon University. + * Contributers include David Steere, James Kistler, and M. Satyanarayanan. + */ + +/* + * HISTORY + * $Log: coda_io.h,v $ + * Revision 1.3 1998/09/11 18:50:17 rvb + * All the references to cfs, in symbols, structs, and strings + * have been changed to coda. (Same for CFS.) + * + * Revision 1.2 1998/09/02 19:09:53 rvb + * Pass2 complete + * + * Revision 1.1.1.1 1998/08/29 21:14:52 rvb + * Very Preliminary Coda + * + * Revision 1.5 1998/08/18 17:05:23 rvb + * Don't use __RCSID now + * + * Revision 1.4 1998/08/18 16:31:47 rvb + * Sync the code for NetBSD -current; test on 1.3 later + * + * Revision 1.3 98/01/23 11:53:49 rvb + * Bring RVB_CODA1_1 to HEAD + * + * Revision 1.2.38.1 97/12/16 12:40:22 rvb + * Sync with 1.3 + * + * Revision 1.2 96/01/02 16:57:15 bnoble + * Added support for Coda MiniCache and raw inode calls (final commit) + * + * Revision 1.1.2.1 1995/12/20 01:57:42 bnoble + * Added CODA-specific files + * + * Revision 3.1.1.1 1995/03/04 19:08:20 bnoble + * Branch for NetBSD port revisions + * + * Revision 3.1 1995/03/04 19:08:20 bnoble + * Bump to major revision 3 to prepare for NetBSD port + * + * Revision 2.1 1994/07/21 16:25:25 satya + * Conversion to C++ 3.0; start of Coda Release 2.0 + * + * Revision 1.3 94/06/14 16:53:47 dcs + * Added support for ODY-like mounting in the kernel (SETS) + * + * Revision 1.3 94/06/14 16:48:03 dcs + * Added support for ODY-like mounting in the kernel (SETS) + * + * Revision 1.2 92/10/27 17:58:28 lily + * merge kernel/latest and alpha/src/cfs + * + * Revision 1.1 92/04/03 17:35:34 satya + * Initial revision + * + * Revision 1.5 91/02/09 12:53:26 jjk + * Substituted rvb's history blurb so that we agree with Mach 2.5 sources. + * + * Revision 2.2.1.1 91/01/06 22:08:22 rvb + * Created for the Coda File System. + * [90/05/23 dcs] + * + * Revision 1.3 90/07/19 10:23:05 dcs + * Added ; to cfs_resize definition for port to 386. + * + * Revision 1.2 90/05/31 17:02:09 dcs + * Prepare for merge with facilities kernel. + * + * + * + */ + +#ifndef _CODAIO_H_ +#define _CODAIO_H_ + +/* Define ioctl commands for vcioctl, /dev/cfs */ + +#define CODARESIZE _IOW('c', 1, struct coda_resize ) /* Resize CODA NameCache */ +#define CODASTATS _IO('c', 2) /* Collect stats */ +#define CODAPRINT _IO('c', 3) /* Print Cache */ +#define CODATEST _IO('c', 4) /* Print Cache */ + +struct coda_resize { int hashsize, heapsize; }; + +#endif diff --git a/sys/fs/coda/coda_kernel.h b/sys/fs/coda/coda_kernel.h new file mode 100644 index 0000000..33b372f --- /dev/null +++ b/sys/fs/coda/coda_kernel.h @@ -0,0 +1,66 @@ +/* + * + * Coda: an Experimental Distributed File System + * Release 3.1 + * + * Copyright (c) 1987-1998 Carnegie Mellon University + * All Rights Reserved + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation, and + * that credit is given to Carnegie Mellon University in all documents + * and publicity pertaining to direct or indirect use of this code or its + * derivatives. + * + * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS, + * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS + * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON + * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER + * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF + * ANY DERIVATIVE WORK. + * + * Carnegie Mellon encourages users of this software to return any + * improvements or extensions that they make, and to grant Carnegie + * Mellon the rights to redistribute these changes without encumbrance. + * + * @(#) src/sys/coda/coda_kernel.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ + * $Id: coda_kernel.h,v 1.2 1998/09/02 19:09:53 rvb Exp $ + * + */ + +/* Macros to manipulate the queue */ +#ifndef INIT_QUEUE +struct queue { + struct queue *forw, *back; +}; + +#define INIT_QUEUE(head) \ +do { \ + (head).forw = (struct queue *)&(head); \ + (head).back = (struct queue *)&(head); \ +} while (0) + +#define GETNEXT(head) (head).forw + +#define EMPTY(head) ((head).forw == &(head)) + +#define EOQ(el, head) ((struct queue *)(el) == (struct queue *)&(head)) + +#define INSQUE(el, head) \ +do { \ + (el).forw = ((head).back)->forw; \ + (el).back = (head).back; \ + ((head).back)->forw = (struct queue *)&(el); \ + (head).back = (struct queue *)&(el); \ +} while (0) + +#define REMQUE(el) \ +do { \ + ((el).forw)->back = (el).back; \ + (el).back->forw = (el).forw; \ +} while (0) + +#endif diff --git a/sys/fs/coda/coda_namecache.c b/sys/fs/coda/coda_namecache.c new file mode 100644 index 0000000..2da7b09 --- /dev/null +++ b/sys/fs/coda/coda_namecache.c @@ -0,0 +1,915 @@ +/* + * + * Coda: an Experimental Distributed File System + * Release 3.1 + * + * Copyright (c) 1987-1998 Carnegie Mellon University + * All Rights Reserved + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation, and + * that credit is given to Carnegie Mellon University in all documents + * and publicity pertaining to direct or indirect use of this code or its + * derivatives. + * + * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS, + * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS + * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON + * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER + * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF + * ANY DERIVATIVE WORK. + * + * Carnegie Mellon encourages users of this software to return any + * improvements or extensions that they make, and to grant Carnegie + * Mellon the rights to redistribute these changes without encumbrance. + * + * @(#) src/sys/coda/coda_namecache.c,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ + * $Id: coda_namecache.c,v 1.7 1998/09/28 20:52:58 rvb Exp $ + * + */ + +/* + * Mach Operating System + * Copyright (c) 1990 Carnegie-Mellon University + * Copyright (c) 1989 Carnegie-Mellon University + * All rights reserved. The CMU software License Agreement specifies + * the terms and conditions for use and redistribution. + */ + +/* + * This code was written for the Coda file system at Carnegie Mellon University. + * Contributers include David Steere, James Kistler, and M. Satyanarayanan. + */ + +/* + * HISTORY + * $Log: coda_namecache.c,v $ + * Revision 1.7 1998/09/28 20:52:58 rvb + * Cleanup and fix THE bug + * + * Revision 1.6 1998/09/25 17:38:31 rvb + * Put "stray" printouts under DIAGNOSTIC. Make everything build + * with DEBUG on. Add support for lkm. (The macro's don't work + * for me; for a good chuckle look at the end of coda_fbsd.c.) + * + * Revision 1.5 1998/09/13 13:57:59 rvb + * Finish conversion of cfs -> coda + * + * Revision 1.4 1998/09/11 18:50:17 rvb + * All the references to cfs, in symbols, structs, and strings + * have been changed to coda. (Same for CFS.) + * + * Revision 1.2 1998/09/02 19:09:53 rvb + * Pass2 complete + * + * Revision 1.1.1.1 1998/08/29 21:14:52 rvb + * Very Preliminary Coda + * + * Revision 1.11 1998/08/28 18:12:16 rvb + * Now it also works on FreeBSD -current. This code will be + * committed to the FreeBSD -current and NetBSD -current + * trees. It will then be tailored to the particular platform + * by flushing conditional code. + * + * Revision 1.10 1998/08/18 17:05:14 rvb + * Don't use __RCSID now + * + * Revision 1.9 1998/08/18 16:31:39 rvb + * Sync the code for NetBSD -current; test on 1.3 later + * + * Revision 1.8 98/01/31 20:53:10 rvb + * First version that works on FreeBSD 2.2.5 + * + * Revision 1.7 98/01/23 11:53:39 rvb + * Bring RVB_CODA1_1 to HEAD + * + * Revision 1.6.2.4 98/01/23 11:21:02 rvb + * Sync with 2.2.5 + * + * Revision 1.6.2.3 97/12/16 12:40:03 rvb + * Sync with 1.3 + * + * Revision 1.6.2.2 97/12/09 16:07:10 rvb + * Sync with vfs/include/coda.h + * + * Revision 1.6.2.1 97/12/06 17:41:18 rvb + * Sync with peters coda.h + * + * Revision 1.6 97/12/05 10:39:13 rvb + * Read CHANGES + * + * Revision 1.5.4.7 97/11/25 08:08:43 rvb + * cfs_venus ... done; until cred/vattr change + * + * Revision 1.5.4.6 97/11/24 15:44:43 rvb + * Final cfs_venus.c w/o macros, but one locking bug + * + * Revision 1.5.4.5 97/11/20 11:46:38 rvb + * Capture current cfs_venus + * + * Revision 1.5.4.4 97/11/18 10:27:13 rvb + * cfs_nbsd.c is DEAD!!!; integrated into cfs_vf/vnops.c + * cfs_nb_foo and cfs_foo are joined + * + * Revision 1.5.4.3 97/11/13 22:02:57 rvb + * pass2 cfs_NetBSD.h mt + * + * Revision 1.5.4.2 97/11/12 12:09:35 rvb + * reorg pass1 + * + * Revision 1.5.4.1 97/10/28 23:10:12 rvb + * >64Meg; venus can be killed! + * + * Revision 1.5 97/08/05 11:08:01 lily + * Removed cfsnc_replace, replaced it with a coda_find, unhash, and + * rehash. This fixes a cnode leak and a bug in which the fid is + * not actually replaced. (cfs_namecache.c, cfsnc.h, cfs_subr.c) + * + * Revision 1.4 96/12/12 22:10:57 bnoble + * Fixed the "downcall invokes venus operation" deadlock in all known cases. + * There may be more + * + * Revision 1.3 1996/11/08 18:06:09 bnoble + * Minor changes in vnode operation signature, VOP_UPDATE signature, and + * some newly defined bits in the include files. + * + * Revision 1.2 1996/01/02 16:56:50 bnoble + * Added support for Coda MiniCache and raw inode calls (final commit) + * + * Revision 1.1.2.1 1995/12/20 01:57:15 bnoble + * Added CODA-specific files + * + * Revision 3.1.1.1 1995/03/04 19:07:57 bnoble + * Branch for NetBSD port revisions + * + * Revision 3.1 1995/03/04 19:07:56 bnoble + * Bump to major revision 3 to prepare for NetBSD port + * + * Revision 2.3 1994/10/14 09:57:54 dcs + * Made changes 'cause sun4s have braindead compilers + * + * Revision 2.2 94/08/28 19:37:35 luqi + * Add a new CODA_REPLACE call to allow venus to replace a ViceFid in the + * mini-cache. + * + * In "cfs.h": + * Add CODA_REPLACE decl. + * + * In "cfs_namecache.c": + * Add routine cfsnc_replace. + * + * In "cfs_subr.c": + * Add case-statement to process CODA_REPLACE. + * + * In "cfsnc.h": + * Add decl for CODA_NC_REPLACE. + * + * + * Revision 2.1 94/07/21 16:25:15 satya + * Conversion to C++ 3.0; start of Coda Release 2.0 + * + * Revision 1.2 92/10/27 17:58:21 lily + * merge kernel/latest and alpha/src/cfs + * + * Revision 2.3 92/09/30 14:16:20 mja + * call coda_flush instead of calling inode_uncache_try directly + * (from dcs). Also... + * + * Substituted rvb's history blurb so that we agree with Mach 2.5 sources. + * [91/02/09 jjk] + * + * Added contributors blurb. + * [90/12/13 jjk] + * + * Revision 2.2 90/07/05 11:26:30 mrt + * Created for the Coda File System. + * [90/05/23 dcs] + * + * Revision 1.3 90/05/31 17:01:24 dcs + * Prepare for merge with facilities kernel. + * + * + */ + +/* + * This module contains the routines to implement the CODA name cache. The + * purpose of this cache is to reduce the cost of translating pathnames + * into Vice FIDs. Each entry in the cache contains the name of the file, + * the vnode (FID) of the parent directory, and the cred structure of the + * user accessing the file. + * + * The first time a file is accessed, it is looked up by the local Venus + * which first insures that the user has access to the file. In addition + * we are guaranteed that Venus will invalidate any name cache entries in + * case the user no longer should be able to access the file. For these + * reasons we do not need to keep access list information as well as a + * cred structure for each entry. + * + * The table can be accessed through the routines cnc_init(), cnc_enter(), + * cnc_lookup(), cnc_rmfidcred(), cnc_rmfid(), cnc_rmcred(), and cnc_purge(). + * There are several other routines which aid in the implementation of the + * hash table. + */ + +/* + * NOTES: rvb@cs + * 1. The name cache holds a reference to every vnode in it. Hence files can not be + * closed or made inactive until they are released. + * 2. coda_nc_name(cp) was added to get a name for a cnode pointer for debugging. + * 3. coda_nc_find() has debug code to detect when entries are stored with different + * credentials. We don't understand yet, if/how entries are NOT EQ but still + * EQUAL + * 4. I wonder if this name cache could be replace by the vnode name cache. + * The latter has no zapping functions, so probably not. + */ + +#include <sys/param.h> +#include <sys/errno.h> +#include <sys/malloc.h> +#include <sys/ucred.h> +#include <sys/select.h> + +#ifndef insque +#include <sys/systm.h> +#endif /* insque */ + +#include <vm/vm.h> +#include <vm/vm_object.h> + +#include <coda/coda.h> +#include <coda/cnode.h> +#include <coda/coda_namecache.h> + +#ifdef DEBUG +#include <coda/coda_vnops.h> +#endif + +/* + * Declaration of the name cache data structure. + */ + +int coda_nc_use = 1; /* Indicate use of CODA Name Cache */ +int coda_nc_size = CODA_NC_CACHESIZE; /* size of the cache */ +int coda_nc_hashsize = CODA_NC_HASHSIZE; /* size of the primary hash */ + +struct coda_cache *coda_nc_heap; /* pointer to the cache entries */ +struct coda_hash *coda_nc_hash; /* hash table of coda_cache pointers */ +struct coda_lru coda_nc_lru; /* head of lru chain */ + +struct coda_nc_statistics coda_nc_stat; /* Keep various stats */ + +/* + * for testing purposes + */ +int coda_nc_debug = 0; + +/* + * Entry points for the CODA Name Cache + */ +static struct coda_cache *coda_nc_find(struct cnode *dcp, const char *name, int namelen, + struct ucred *cred, int hash); +static void coda_nc_remove(struct coda_cache *cncp, enum dc_status dcstat); + +/* + * Initialize the cache, the LRU structure and the Hash structure(s) + */ + +#define TOTAL_CACHE_SIZE (sizeof(struct coda_cache) * coda_nc_size) +#define TOTAL_HASH_SIZE (sizeof(struct coda_hash) * coda_nc_hashsize) + +int coda_nc_initialized = 0; /* Initially the cache has not been initialized */ + +void +coda_nc_init(void) +{ + int i; + + /* zero the statistics structure */ + + bzero(&coda_nc_stat, (sizeof(struct coda_nc_statistics))); + +#ifdef CODA_VERBOSE + printf("CODA NAME CACHE: CACHE %d, HASH TBL %d\n", CODA_NC_CACHESIZE, CODA_NC_HASHSIZE); +#endif + CODA_ALLOC(coda_nc_heap, struct coda_cache *, TOTAL_CACHE_SIZE); + CODA_ALLOC(coda_nc_hash, struct coda_hash *, TOTAL_HASH_SIZE); + + coda_nc_lru.lru_next = + coda_nc_lru.lru_prev = (struct coda_cache *)LRU_PART(&coda_nc_lru); + + + for (i=0; i < coda_nc_size; i++) { /* initialize the heap */ + CODA_NC_LRUINS(&coda_nc_heap[i], &coda_nc_lru); + CODA_NC_HSHNUL(&coda_nc_heap[i]); + coda_nc_heap[i].cp = coda_nc_heap[i].dcp = (struct cnode *)0; + } + + for (i=0; i < coda_nc_hashsize; i++) { /* initialize the hashtable */ + CODA_NC_HSHNUL((struct coda_cache *)&coda_nc_hash[i]); + } + + coda_nc_initialized++; +} + +/* + * Auxillary routines -- shouldn't be entry points + */ + +static struct coda_cache * +coda_nc_find(dcp, name, namelen, cred, hash) + struct cnode *dcp; + const char *name; + int namelen; + struct ucred *cred; + int hash; +{ + /* + * hash to find the appropriate bucket, look through the chain + * for the right entry (especially right cred, unless cred == 0) + */ + struct coda_cache *cncp; + int count = 1; + + CODA_NC_DEBUG(CODA_NC_FIND, + myprintf(("coda_nc_find(dcp %p, name %s, len %d, cred %p, hash %d\n", + dcp, name, namelen, cred, hash));) + + for (cncp = coda_nc_hash[hash].hash_next; + cncp != (struct coda_cache *)&coda_nc_hash[hash]; + cncp = cncp->hash_next, count++) + { + + if ((CODA_NAMEMATCH(cncp, name, namelen, dcp)) && + ((cred == 0) || (cncp->cred == cred))) + { + /* compare cr_uid instead */ + coda_nc_stat.Search_len += count; + return(cncp); + } +#ifdef DEBUG + else if (CODA_NAMEMATCH(cncp, name, namelen, dcp)) { + printf("coda_nc_find: name %s, new cred = %p, cred = %p\n", + name, cred, cncp->cred); + printf("nref %d, nuid %d, ngid %d // oref %d, ocred %d, ogid %d\n", + cred->cr_ref, cred->cr_uid, cred->cr_gid, + cncp->cred->cr_ref, cncp->cred->cr_uid, cncp->cred->cr_gid); + print_cred(cred); + print_cred(cncp->cred); + } +#endif + } + + return((struct coda_cache *)0); +} + +/* + * Enter a new (dir cnode, name) pair into the cache, updating the + * LRU and Hash as needed. + */ +void +coda_nc_enter(dcp, name, namelen, cred, cp) + struct cnode *dcp; + const char *name; + int namelen; + struct ucred *cred; + struct cnode *cp; +{ + struct coda_cache *cncp; + int hash; + + if (coda_nc_use == 0) /* Cache is off */ + return; + + CODA_NC_DEBUG(CODA_NC_ENTER, + myprintf(("Enter: dcp %p cp %p name %s cred %p \n", + dcp, cp, name, cred)); ) + + if (namelen > CODA_NC_NAMELEN) { + CODA_NC_DEBUG(CODA_NC_ENTER, + myprintf(("long name enter %s\n",name));) + coda_nc_stat.long_name_enters++; /* record stats */ + return; + } + + hash = CODA_NC_HASH(name, namelen, dcp); + cncp = coda_nc_find(dcp, name, namelen, cred, hash); + if (cncp != (struct coda_cache *) 0) { + coda_nc_stat.dbl_enters++; /* duplicate entry */ + return; + } + + coda_nc_stat.enters++; /* record the enters statistic */ + + /* Grab the next element in the lru chain */ + cncp = CODA_NC_LRUGET(coda_nc_lru); + + CODA_NC_LRUREM(cncp); /* remove it from the lists */ + + if (CODA_NC_VALID(cncp)) { + /* Seems really ugly, but we have to decrement the appropriate + hash bucket length here, so we have to find the hash bucket + */ + coda_nc_hash[CODA_NC_HASH(cncp->name, cncp->namelen, cncp->dcp)].length--; + + coda_nc_stat.lru_rm++; /* zapped a valid entry */ + CODA_NC_HSHREM(cncp); + vrele(CTOV(cncp->dcp)); + vrele(CTOV(cncp->cp)); + crfree(cncp->cred); + } + + /* + * Put a hold on the current vnodes and fill in the cache entry. + */ + vref(CTOV(cp)); + vref(CTOV(dcp)); + crhold(cred); + cncp->dcp = dcp; + cncp->cp = cp; + cncp->namelen = namelen; + cncp->cred = cred; + + bcopy(name, cncp->name, (unsigned)namelen); + + /* Insert into the lru and hash chains. */ + + CODA_NC_LRUINS(cncp, &coda_nc_lru); + CODA_NC_HSHINS(cncp, &coda_nc_hash[hash]); + coda_nc_hash[hash].length++; /* Used for tuning */ + + CODA_NC_DEBUG(CODA_NC_PRINTCODA_NC, print_coda_nc(); ) +} + +/* + * Find the (dir cnode, name) pair in the cache, if it's cred + * matches the input, return it, otherwise return 0 + */ +struct cnode * +coda_nc_lookup(dcp, name, namelen, cred) + struct cnode *dcp; + const char *name; + int namelen; + struct ucred *cred; +{ + int hash; + struct coda_cache *cncp; + + if (coda_nc_use == 0) /* Cache is off */ + return((struct cnode *) 0); + + if (namelen > CODA_NC_NAMELEN) { + CODA_NC_DEBUG(CODA_NC_LOOKUP, + myprintf(("long name lookup %s\n",name));) + coda_nc_stat.long_name_lookups++; /* record stats */ + return((struct cnode *) 0); + } + + /* Use the hash function to locate the starting point, + then the search routine to go down the list looking for + the correct cred. + */ + + hash = CODA_NC_HASH(name, namelen, dcp); + cncp = coda_nc_find(dcp, name, namelen, cred, hash); + if (cncp == (struct coda_cache *) 0) { + coda_nc_stat.misses++; /* record miss */ + return((struct cnode *) 0); + } + + coda_nc_stat.hits++; + + /* put this entry at the end of the LRU */ + CODA_NC_LRUREM(cncp); + CODA_NC_LRUINS(cncp, &coda_nc_lru); + + /* move it to the front of the hash chain */ + /* don't need to change the hash bucket length */ + CODA_NC_HSHREM(cncp); + CODA_NC_HSHINS(cncp, &coda_nc_hash[hash]); + + CODA_NC_DEBUG(CODA_NC_LOOKUP, + printf("lookup: dcp %p, name %s, cred %p = cp %p\n", + dcp, name, cred, cncp->cp); ) + + return(cncp->cp); +} + +static void +coda_nc_remove(cncp, dcstat) + struct coda_cache *cncp; + enum dc_status dcstat; +{ + /* + * remove an entry -- vrele(cncp->dcp, cp), crfree(cred), + * remove it from it's hash chain, and + * place it at the head of the lru list. + */ + CODA_NC_DEBUG(CODA_NC_REMOVE, + myprintf(("coda_nc_remove %s from parent %lx.%lx.%lx\n", + cncp->name, (cncp->dcp)->c_fid.Volume, + (cncp->dcp)->c_fid.Vnode, (cncp->dcp)->c_fid.Unique));) + + CODA_NC_HSHREM(cncp); + + CODA_NC_HSHNUL(cncp); /* have it be a null chain */ + if ((dcstat == IS_DOWNCALL) && (CTOV(cncp->dcp)->v_usecount == 1)) { + cncp->dcp->c_flags |= C_PURGING; + } + vrele(CTOV(cncp->dcp)); + + if ((dcstat == IS_DOWNCALL) && (CTOV(cncp->cp)->v_usecount == 1)) { + cncp->cp->c_flags |= C_PURGING; + } + vrele(CTOV(cncp->cp)); + + crfree(cncp->cred); + bzero(DATA_PART(cncp),DATA_SIZE); + + /* Put the null entry just after the least-recently-used entry */ + /* LRU_TOP adjusts the pointer to point to the top of the structure. */ + CODA_NC_LRUREM(cncp); + CODA_NC_LRUINS(cncp, LRU_TOP(coda_nc_lru.lru_prev)); +} + +/* + * Remove all entries with a parent which has the input fid. + */ +void +coda_nc_zapParentfid(fid, dcstat) + ViceFid *fid; + enum dc_status dcstat; +{ + /* To get to a specific fid, we might either have another hashing + function or do a sequential search through the cache for the + appropriate entries. The later may be acceptable since I don't + think callbacks or whatever Case 1 covers are frequent occurences. + */ + struct coda_cache *cncp, *ncncp; + int i; + + if (coda_nc_use == 0) /* Cache is off */ + return; + + CODA_NC_DEBUG(CODA_NC_ZAPPFID, + myprintf(("ZapParent: fid 0x%lx, 0x%lx, 0x%lx \n", + fid->Volume, fid->Vnode, fid->Unique)); ) + + coda_nc_stat.zapPfids++; + + for (i = 0; i < coda_nc_hashsize; i++) { + + /* + * Need to save the hash_next pointer in case we remove the + * entry. remove causes hash_next to point to itself. + */ + + for (cncp = coda_nc_hash[i].hash_next; + cncp != (struct coda_cache *)&coda_nc_hash[i]; + cncp = ncncp) { + ncncp = cncp->hash_next; + if ((cncp->dcp->c_fid.Volume == fid->Volume) && + (cncp->dcp->c_fid.Vnode == fid->Vnode) && + (cncp->dcp->c_fid.Unique == fid->Unique)) { + coda_nc_hash[i].length--; /* Used for tuning */ + coda_nc_remove(cncp, dcstat); + } + } + } +} + + +/* + * Remove all entries which have the same fid as the input + */ +void +coda_nc_zapfid(fid, dcstat) + ViceFid *fid; + enum dc_status dcstat; +{ + /* See comment for zapParentfid. This routine will be used + if attributes are being cached. + */ + struct coda_cache *cncp, *ncncp; + int i; + + if (coda_nc_use == 0) /* Cache is off */ + return; + + CODA_NC_DEBUG(CODA_NC_ZAPFID, + myprintf(("Zapfid: fid 0x%lx, 0x%lx, 0x%lx \n", + fid->Volume, fid->Vnode, fid->Unique)); ) + + coda_nc_stat.zapFids++; + + for (i = 0; i < coda_nc_hashsize; i++) { + for (cncp = coda_nc_hash[i].hash_next; + cncp != (struct coda_cache *)&coda_nc_hash[i]; + cncp = ncncp) { + ncncp = cncp->hash_next; + if ((cncp->cp->c_fid.Volume == fid->Volume) && + (cncp->cp->c_fid.Vnode == fid->Vnode) && + (cncp->cp->c_fid.Unique == fid->Unique)) { + coda_nc_hash[i].length--; /* Used for tuning */ + coda_nc_remove(cncp, dcstat); + } + } + } +} + +/* + * Remove all entries which match the fid and the cred + */ +void +coda_nc_zapvnode(fid, cred, dcstat) + ViceFid *fid; + struct ucred *cred; + enum dc_status dcstat; +{ + /* See comment for zapfid. I don't think that one would ever + want to zap a file with a specific cred from the kernel. + We'll leave this one unimplemented. + */ + if (coda_nc_use == 0) /* Cache is off */ + return; + + CODA_NC_DEBUG(CODA_NC_ZAPVNODE, + myprintf(("Zapvnode: fid 0x%lx, 0x%lx, 0x%lx cred %p\n", + fid->Volume, fid->Vnode, fid->Unique, cred)); ) + +} + +/* + * Remove all entries which have the (dir vnode, name) pair + */ +void +coda_nc_zapfile(dcp, name, namelen) + struct cnode *dcp; + const char *name; + int namelen; +{ + /* use the hash function to locate the file, then zap all + entries of it regardless of the cred. + */ + struct coda_cache *cncp; + int hash; + + if (coda_nc_use == 0) /* Cache is off */ + return; + + CODA_NC_DEBUG(CODA_NC_ZAPFILE, + myprintf(("Zapfile: dcp %p name %s \n", + dcp, name)); ) + + if (namelen > CODA_NC_NAMELEN) { + coda_nc_stat.long_remove++; /* record stats */ + return; + } + + coda_nc_stat.zapFile++; + + hash = CODA_NC_HASH(name, namelen, dcp); + cncp = coda_nc_find(dcp, name, namelen, 0, hash); + + while (cncp) { + coda_nc_hash[hash].length--; /* Used for tuning */ + + coda_nc_remove(cncp, NOT_DOWNCALL); + cncp = coda_nc_find(dcp, name, namelen, 0, hash); + } +} + +/* + * Remove all the entries for a particular user. Used when tokens expire. + * A user is determined by his/her effective user id (id_uid). + */ +void +coda_nc_purge_user(uid, dcstat) + vuid_t uid; + enum dc_status dcstat; +{ + /* + * I think the best approach is to go through the entire cache + * via HASH or whatever and zap all entries which match the + * input cred. Or just flush the whole cache. It might be + * best to go through on basis of LRU since cache will almost + * always be full and LRU is more straightforward. + */ + + struct coda_cache *cncp, *ncncp; + int hash; + + if (coda_nc_use == 0) /* Cache is off */ + return; + + CODA_NC_DEBUG(CODA_NC_PURGEUSER, + myprintf(("ZapDude: uid %x\n", uid)); ) + coda_nc_stat.zapUsers++; + + for (cncp = CODA_NC_LRUGET(coda_nc_lru); + cncp != (struct coda_cache *)(&coda_nc_lru); + cncp = ncncp) { + ncncp = CODA_NC_LRUGET(*cncp); + + if ((CODA_NC_VALID(cncp)) && + ((cncp->cred)->cr_uid == uid)) { + /* Seems really ugly, but we have to decrement the appropriate + hash bucket length here, so we have to find the hash bucket + */ + hash = CODA_NC_HASH(cncp->name, cncp->namelen, cncp->dcp); + coda_nc_hash[hash].length--; /* For performance tuning */ + + coda_nc_remove(cncp, dcstat); + } + } +} + +/* + * Flush the entire name cache. In response to a flush of the Venus cache. + */ +void +coda_nc_flush(dcstat) + enum dc_status dcstat; +{ + /* One option is to deallocate the current name cache and + call init to start again. Or just deallocate, then rebuild. + Or again, we could just go through the array and zero the + appropriate fields. + */ + + /* + * Go through the whole lru chain and kill everything as we go. + * I don't use remove since that would rebuild the lru chain + * as it went and that seemed unneccesary. + */ + struct coda_cache *cncp; + int i; + + if (coda_nc_use == 0) /* Cache is off */ + return; + + coda_nc_stat.Flushes++; + + for (cncp = CODA_NC_LRUGET(coda_nc_lru); + cncp != (struct coda_cache *)&coda_nc_lru; + cncp = CODA_NC_LRUGET(*cncp)) { + if (CODA_NC_VALID(cncp)) { + + CODA_NC_HSHREM(cncp); /* only zero valid nodes */ + CODA_NC_HSHNUL(cncp); + if ((dcstat == IS_DOWNCALL) + && (CTOV(cncp->dcp)->v_usecount == 1)) + { + cncp->dcp->c_flags |= C_PURGING; + } + vrele(CTOV(cncp->dcp)); + + if (CTOV(cncp->cp)->v_flag & VTEXT) { + if (coda_vmflush(cncp->cp)) + CODADEBUG(CODA_FLUSH, + myprintf(("coda_nc_flush: (%lx.%lx.%lx) busy\n", cncp->cp->c_fid.Volume, cncp->cp->c_fid.Vnode, cncp->cp->c_fid.Unique)); ) + } + + if ((dcstat == IS_DOWNCALL) + && (CTOV(cncp->cp)->v_usecount == 1)) + { + cncp->cp->c_flags |= C_PURGING; + } + vrele(CTOV(cncp->cp)); + + crfree(cncp->cred); + bzero(DATA_PART(cncp),DATA_SIZE); + } + } + + for (i = 0; i < coda_nc_hashsize; i++) + coda_nc_hash[i].length = 0; +} + +/* + * Debugging routines + */ + +/* + * This routine should print out all the hash chains to the console. + */ +void +print_coda_nc(void) +{ + int hash; + struct coda_cache *cncp; + + for (hash = 0; hash < coda_nc_hashsize; hash++) { + myprintf(("\nhash %d\n",hash)); + + for (cncp = coda_nc_hash[hash].hash_next; + cncp != (struct coda_cache *)&coda_nc_hash[hash]; + cncp = cncp->hash_next) { + myprintf(("cp %p dcp %p cred %p name %s\n", + cncp->cp, cncp->dcp, + cncp->cred, cncp->name)); + } + } +} + +void +coda_nc_gather_stats(void) +{ + int i, max = 0, sum = 0, temp, zeros = 0, ave, n; + + for (i = 0; i < coda_nc_hashsize; i++) { + if (coda_nc_hash[i].length) { + sum += coda_nc_hash[i].length; + } else { + zeros++; + } + + if (coda_nc_hash[i].length > max) + max = coda_nc_hash[i].length; + } + + /* + * When computing the Arithmetic mean, only count slots which + * are not empty in the distribution. + */ + coda_nc_stat.Sum_bucket_len = sum; + coda_nc_stat.Num_zero_len = zeros; + coda_nc_stat.Max_bucket_len = max; + + if ((n = coda_nc_hashsize - zeros) > 0) + ave = sum / n; + else + ave = 0; + + sum = 0; + for (i = 0; i < coda_nc_hashsize; i++) { + if (coda_nc_hash[i].length) { + temp = coda_nc_hash[i].length - ave; + sum += temp * temp; + } + } + coda_nc_stat.Sum2_bucket_len = sum; +} + +/* + * The purpose of this routine is to allow the hash and cache sizes to be + * changed dynamically. This should only be used in controlled environments, + * it makes no effort to lock other users from accessing the cache while it + * is in an improper state (except by turning the cache off). + */ +int +coda_nc_resize(hashsize, heapsize, dcstat) + int hashsize, heapsize; + enum dc_status dcstat; +{ + if ((hashsize % 2) || (heapsize % 2)) { /* Illegal hash or cache sizes */ + return(EINVAL); + } + + coda_nc_use = 0; /* Turn the cache off */ + + coda_nc_flush(dcstat); /* free any cnodes in the cache */ + + /* WARNING: free must happen *before* size is reset */ + CODA_FREE(coda_nc_heap,TOTAL_CACHE_SIZE); + CODA_FREE(coda_nc_hash,TOTAL_HASH_SIZE); + + coda_nc_hashsize = hashsize; + coda_nc_size = heapsize; + + coda_nc_init(); /* Set up a cache with the new size */ + + coda_nc_use = 1; /* Turn the cache back on */ + return(0); +} + +#ifdef DEBUG +char coda_nc_name_buf[CODA_MAXNAMLEN+1]; + +void +coda_nc_name(struct cnode *cp) +{ + struct coda_cache *cncp, *ncncp; + int i; + + if (coda_nc_use == 0) /* Cache is off */ + return; + + for (i = 0; i < coda_nc_hashsize; i++) { + for (cncp = coda_nc_hash[i].hash_next; + cncp != (struct coda_cache *)&coda_nc_hash[i]; + cncp = ncncp) { + ncncp = cncp->hash_next; + if (cncp->cp == cp) { + bcopy(cncp->name, coda_nc_name_buf, cncp->namelen); + coda_nc_name_buf[cncp->namelen] = 0; + printf(" is %s (%p,%p)@%p", + coda_nc_name_buf, cncp->cp, cncp->dcp, cncp); + } + + } + } +} +#endif diff --git a/sys/fs/coda/coda_namecache.h b/sys/fs/coda/coda_namecache.h new file mode 100644 index 0000000..f7b3194 --- /dev/null +++ b/sys/fs/coda/coda_namecache.h @@ -0,0 +1,285 @@ +/* + * + * Coda: an Experimental Distributed File System + * Release 3.1 + * + * Copyright (c) 1987-1998 Carnegie Mellon University + * All Rights Reserved + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation, and + * that credit is given to Carnegie Mellon University in all documents + * and publicity pertaining to direct or indirect use of this code or its + * derivatives. + * + * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS, + * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS + * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON + * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER + * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF + * ANY DERIVATIVE WORK. + * + * Carnegie Mellon encourages users of this software to return any + * improvements or extensions that they make, and to grant Carnegie + * Mellon the rights to redistribute these changes without encumbrance. + * + * @(#) src/sys/coda/coda_namecache.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ + * $Id: coda_namecache.h,v 1.3 1998/09/11 18:50:17 rvb Exp $ + * + */ + +/* + * Mach Operating System + * Copyright (c) 1990 Carnegie-Mellon University + * Copyright (c) 1989 Carnegie-Mellon University + * All rights reserved. The CMU software License Agreement specifies + * the terms and conditions for use and redistribution. + */ + +/* + * This code was written for the Coda file system at Carnegie Mellon University. + * Contributers include David Steere, James Kistler, and M. Satyanarayanan. + */ + +/* + * HISTORY + * $Log: coda_namecache.h,v $ + * Revision 1.3 1998/09/11 18:50:17 rvb + * All the references to cfs, in symbols, structs, and strings + * have been changed to coda. (Same for CFS.) + * + * Revision 1.2 1998/09/02 19:09:53 rvb + * Pass2 complete + * + * Revision 1.1.1.1 1998/08/29 21:14:52 rvb + * Very Preliminary Coda + * + * Revision 1.8 1998/08/28 18:12:25 rvb + * Now it also works on FreeBSD -current. This code will be + * committed to the FreeBSD -current and NetBSD -current + * trees. It will then be tailored to the particular platform + * by flushing conditional code. + * + * Revision 1.7 1998/08/18 17:05:24 rvb + * Don't use __RCSID now + * + * Revision 1.6 1998/08/18 16:31:49 rvb + * Sync the code for NetBSD -current; test on 1.3 later + * + * Revision 1.5 98/01/23 11:53:51 rvb + * Bring RVB_CODA1_1 to HEAD + * + * Revision 1.4.2.1 97/12/16 12:40:23 rvb + * Sync with 1.3 + * + * Revision 1.4 97/12/05 10:39:29 rvb + * Read CHANGES + * + * Revision 1.3.4.3 97/11/24 15:44:51 rvb + * Final cfs_venus.c w/o macros, but one locking bug + * + * Revision 1.3.4.2 97/11/12 12:09:44 rvb + * reorg pass1 + * + * Revision 1.3.4.1 97/11/06 21:06:05 rvb + * don't include headers in headers + * + * Revision 1.3 97/08/05 11:08:19 lily + * Removed cfsnc_replace, replaced it with a coda_find, unhash, and + * rehash. This fixes a cnode leak and a bug in which the fid is + * not actually replaced. (cfs_namecache.c, cfsnc.h, cfs_subr.c) + * + * Revision 1.2 96/01/02 16:57:19 bnoble + * Added support for Coda MiniCache and raw inode calls (final commit) + * + * Revision 1.1.2.1 1995/12/20 01:57:45 bnoble + * Added CODA-specific files + * + * Revision 3.1.1.1 1995/03/04 19:08:22 bnoble + * Branch for NetBSD port revisions + * + * Revision 3.1 1995/03/04 19:08:21 bnoble + * Bump to major revision 3 to prepare for NetBSD port + * + * Revision 2.2 1994/08/28 19:37:39 luqi + * Add a new CODA_REPLACE call to allow venus to replace a ViceFid in the + * mini-cache. + * + * In "cfs.h": + * Add CODA_REPLACE decl. + * + * In "cfs_namecache.c": + * Add routine cfsnc_replace. + * + * In "cfs_subr.c": + * Add case-statement to process CODA_REPLACE. + * + * In "cfsnc.h": + * Add decl for CODA_NC_REPLACE. + * + * Revision 2.1 94/07/21 16:25:27 satya + * Conversion to C++ 3.0; start of Coda Release 2.0 + * + * Revision 1.2 92/10/27 17:58:34 lily + * merge kernel/latest and alpha/src/cfs + * + * Revision 2.2 90/07/05 11:27:04 mrt + * Created for the Coda File System. + * [90/05/23 dcs] + * + * Revision 1.4 90/05/31 17:02:12 dcs + * Prepare for merge with facilities kernel. + * + * + */ +#ifndef _CODA_NC_HEADER_ +#define _CODA_NC_HEADER_ + +/* + * Coda constants + */ +#define CODA_NC_NAMELEN 15 /* longest name stored in cache */ +#define CODA_NC_CACHESIZE 256 /* Default cache size */ +#define CODA_NC_HASHSIZE 64 /* Must be multiple of 2 */ + +/* + * Hash function for the primary hash. + */ + +/* + * First try -- (first + last letters + length + (int)cp) mod size + * 2nd try -- same, except dir fid.vnode instead of cp + */ + +#ifdef oldhash +#define CODA_NC_HASH(name, namelen, cp) \ + ((name[0] + name[namelen-1] + namelen + (int)(cp)) & (coda_nc_hashsize-1)) +#else +#define CODA_NC_HASH(name, namelen, cp) \ + ((name[0] + (name[namelen-1]<<4) + namelen + (((int)cp)>>8)) & (coda_nc_hashsize-1)) +#endif + +#define CODA_NAMEMATCH(cp, name, namelen, dcp) \ + ((namelen == cp->namelen) && (dcp == cp->dcp) && \ + (bcmp(cp->name,name,namelen) == 0)) + +/* + * Functions to modify the hash and lru chains. + * insque and remque assume that the pointers are the first thing + * in the list node, thus the trickery for lru. + */ + +#define CODA_NC_HSHINS(elem, pred) insque(elem,pred) +#define CODA_NC_HSHREM(elem) remque(elem) +#define CODA_NC_HSHNUL(elem) (elem)->hash_next = \ + (elem)->hash_prev = (elem) + +#define CODA_NC_LRUINS(elem, pred) insque(LRU_PART(elem), LRU_PART(pred)) +#define CODA_NC_LRUREM(elem) remque(LRU_PART(elem)); +#define CODA_NC_LRUGET(lruhead) LRU_TOP((lruhead).lru_prev) + +#define CODA_NC_VALID(cncp) (cncp->dcp != (struct cnode *)0) + +#define LRU_PART(cncp) (struct coda_cache *) \ + ((char *)cncp + (2*sizeof(struct coda_cache *))) +#define LRU_TOP(cncp) (struct coda_cache *) \ + ((char *)cncp - (2*sizeof(struct coda_cache *))) +#define DATA_PART(cncp) (struct coda_cache *) \ + ((char *)cncp + (4*sizeof(struct coda_cache *))) +#define DATA_SIZE (sizeof(struct coda_cache)-(4*sizeof(struct coda_cache *))) + +/* + * Structure for an element in the CODA Name Cache. + * NOTE: I use the position of arguments and their size in the + * implementation of the functions CODA_NC_LRUINS, CODA_NC_LRUREM, and + * DATA_PART. + */ + +struct coda_cache { + struct coda_cache *hash_next,*hash_prev; /* Hash list */ + struct coda_cache *lru_next, *lru_prev; /* LRU list */ + struct cnode *cp; /* vnode of the file */ + struct cnode *dcp; /* parent's cnode */ + struct ucred *cred; /* user credentials */ + char name[CODA_NC_NAMELEN]; /* segment name */ + int namelen; /* length of name */ +}; + +struct coda_lru { /* Start of LRU chain */ + char *dummy1, *dummy2; /* place holders */ + struct coda_cache *lru_next, *lru_prev; /* position of pointers is important */ +}; + + +struct coda_hash { /* Start of Hash chain */ + struct coda_cache *hash_next, *hash_prev; /* NOTE: chain pointers must be first */ + int length; /* used for tuning purposes */ +}; + + +/* + * Symbols to aid in debugging the namecache code. Assumes the existence + * of the variable coda_nc_debug, which is defined in cfs_namecache.c + */ +#define CODA_NC_DEBUG(N, STMT) { if (coda_nc_debug & (1 <<N)) { STMT } } + +/* Prototypes of functions exported within cfs */ +extern void coda_nc_init(void); +extern void coda_nc_enter(struct cnode *, const char *, int, struct ucred *, struct cnode *); +extern struct cnode *coda_nc_lookup(struct cnode *, const char *, int, struct ucred *); + +extern void coda_nc_zapParentfid(ViceFid *, enum dc_status); +extern void coda_nc_zapfid(ViceFid *, enum dc_status); +extern void coda_nc_zapvnode(ViceFid *, struct ucred *, enum dc_status); +extern void coda_nc_zapfile(struct cnode *, const char *, int); +extern void coda_nc_purge_user(vuid_t, enum dc_status); +extern void coda_nc_flush(enum dc_status); + +extern void print_coda_nc(void); +extern void coda_nc_gather_stats(void); +extern int coda_nc_resize(int, int, enum dc_status); +extern void coda_nc_name(struct cnode *cp); + +/* + * Structure to contain statistics on the cache usage + */ + +struct coda_nc_statistics { + unsigned hits; + unsigned misses; + unsigned enters; + unsigned dbl_enters; + unsigned long_name_enters; + unsigned long_name_lookups; + unsigned long_remove; + unsigned lru_rm; + unsigned zapPfids; + unsigned zapFids; + unsigned zapFile; + unsigned zapUsers; + unsigned Flushes; + unsigned Sum_bucket_len; + unsigned Sum2_bucket_len; + unsigned Max_bucket_len; + unsigned Num_zero_len; + unsigned Search_len; +}; + +#define CODA_NC_FIND ((u_long) 1) +#define CODA_NC_REMOVE ((u_long) 2) +#define CODA_NC_INIT ((u_long) 3) +#define CODA_NC_ENTER ((u_long) 4) +#define CODA_NC_LOOKUP ((u_long) 5) +#define CODA_NC_ZAPPFID ((u_long) 6) +#define CODA_NC_ZAPFID ((u_long) 7) +#define CODA_NC_ZAPVNODE ((u_long) 8) +#define CODA_NC_ZAPFILE ((u_long) 9) +#define CODA_NC_PURGEUSER ((u_long) 10) +#define CODA_NC_FLUSH ((u_long) 11) +#define CODA_NC_PRINTCODA_NC ((u_long) 12) +#define CODA_NC_PRINTSTATS ((u_long) 13) + +#endif diff --git a/sys/fs/coda/coda_opstats.h b/sys/fs/coda/coda_opstats.h new file mode 100644 index 0000000..e62c04d --- /dev/null +++ b/sys/fs/coda/coda_opstats.h @@ -0,0 +1,127 @@ +/* + * + * Coda: an Experimental Distributed File System + * Release 3.1 + * + * Copyright (c) 1987-1998 Carnegie Mellon University + * All Rights Reserved + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation, and + * that credit is given to Carnegie Mellon University in all documents + * and publicity pertaining to direct or indirect use of this code or its + * derivatives. + * + * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS, + * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS + * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON + * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER + * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF + * ANY DERIVATIVE WORK. + * + * Carnegie Mellon encourages users of this software to return any + * improvements or extensions that they make, and to grant Carnegie + * Mellon the rights to redistribute these changes without encumbrance. + * + * @(#) src/sys/coda/coda_opstats.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ + * $Id: coda_opstats.h,v 1.3 1998/09/11 18:50:17 rvb Exp $ + * + */ + +/* + * operation stats: what the minicache can intercept that + * *isn't* seen by venus. These stats are kept to augment + * the stats maintained by the Volume-Session mechanism. + */ + +/* vfsops: + * mount: not currently bounced to Venus + * umount: nope + * root: only first call, rest is cached. + * statfs: none (bogus) + * sync: none (bogus) + * vget: all + */ + +#define CODA_MOUNT_STATS 0 +#define CODA_UMOUNT_STATS 1 +#define CODA_ROOT_STATS 2 +#define CODA_STATFS_STATS 3 +#define CODA_SYNC_STATS 4 +#define CODA_VGET_STATS 5 +#define CODA_VFSOPS_SIZE 6 + +/* vnodeops: + * open: all to venus + * close: all to venus + * rdrw: bogus. Maybe redirected to UFS. + * May call open/close for internal opens/closes + * (Does exec not call open?) + * ioctl: causes a lookupname + * passes through + * select: can't get there from here. + * getattr: can be satsified by cache + * setattr: all go through + * access: can be satisfied by cache + * readlink: can be satisfied by cache + * fsync: passes through + * inactive: passes through + * lookup: can be satisfied by cache + * create: passes through + * remove: passes through + * link: passes through + * rename: passes through + * mkdir: passes through + * rmdir: passes through + * symlink: passes through + * readdir: may be redirected to UFS + * may cause an "internal" open/close + */ + +#define CODA_OPEN_STATS 0 +#define CODA_CLOSE_STATS 1 +#define CODA_RDWR_STATS 2 +#define CODA_IOCTL_STATS 3 +#define CODA_SELECT_STATS 4 +#define CODA_GETATTR_STATS 5 +#define CODA_SETATTR_STATS 6 +#define CODA_ACCESS_STATS 7 +#define CODA_READLINK_STATS 8 +#define CODA_FSYNC_STATS 9 +#define CODA_INACTIVE_STATS 10 +#define CODA_LOOKUP_STATS 11 +#define CODA_CREATE_STATS 12 +#define CODA_REMOVE_STATS 13 +#define CODA_LINK_STATS 14 +#define CODA_RENAME_STATS 15 +#define CODA_MKDIR_STATS 16 +#define CODA_RMDIR_STATS 17 +#define CODA_SYMLINK_STATS 18 +#define CODA_READDIR_STATS 19 +#define CODA_VNODEOPS_SIZE 20 + +/* + * I propose the following structres: + */ + +struct coda_op_stats { + int opcode; /* vfs opcode */ + long entries; /* number of times call attempted */ + long sat_intrn; /* number of times call satisfied by cache */ + long unsat_intrn; /* number of times call failed in cache, but + was not bounced to venus proper. */ + long gen_intrn; /* number of times call generated internally */ + /* (do we need that?) */ +}; + +/* + * With each call to the minicache, we'll bump the counters whenver + * a call is satisfied internally (through the cache or through a + * redirect), and whenever an operation is caused internally. + * Then, we can add the total operations caught by the minicache + * to the world-wide totals, and leave a caveat for the specific + * graphs later. + */ diff --git a/sys/fs/coda/coda_pioctl.h b/sys/fs/coda/coda_pioctl.h new file mode 100644 index 0000000..2aa55bb --- /dev/null +++ b/sys/fs/coda/coda_pioctl.h @@ -0,0 +1,133 @@ +/* + * + * Coda: an Experimental Distributed File System + * Release 3.1 + * + * Copyright (c) 1987-1998 Carnegie Mellon University + * All Rights Reserved + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation, and + * that credit is given to Carnegie Mellon University in all documents + * and publicity pertaining to direct or indirect use of this code or its + * derivatives. + * + * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS, + * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS + * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON + * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER + * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF + * ANY DERIVATIVE WORK. + * + * Carnegie Mellon encourages users of this software to return any + * improvements or extensions that they make, and to grant Carnegie + * Mellon the rights to redistribute these changes without encumbrance. + * + * @(#) src/sys/coda/coda_pioctl.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ + * $Id: coda_pioctl.h,v 1.3 1998/09/11 18:50:17 rvb Exp $ + * + */ + +/* + * Mach Operating System + * Copyright (c) 1989 Carnegie-Mellon University + * Copyright (c) 1988 Carnegie-Mellon University + * Copyright (c) 1987 Carnegie-Mellon University + * All rights reserved. The CMU software License Agreement specifies + * the terms and conditions for use and redistribution. + */ + +/* + * HISTORY + * $Log: coda_pioctl.h,v $ + * Revision 1.3 1998/09/11 18:50:17 rvb + * All the references to cfs, in symbols, structs, and strings + * have been changed to coda. (Same for CFS.) + * + * Revision 1.2 1998/09/02 19:09:53 rvb + * Pass2 complete + * + * Revision 1.1.1.1 1998/08/29 21:14:52 rvb + * Very Preliminary Coda + * + * Revision 1.7 1998/08/28 18:12:26 rvb + * Now it also works on FreeBSD -current. This code will be + * committed to the FreeBSD -current and NetBSD -current + * trees. It will then be tailored to the particular platform + * by flushing conditional code. + * + * Revision 1.6 1998/08/18 17:05:26 rvb + * Don't use __RCSID now + * + * Revision 1.5 1998/08/18 16:31:51 rvb + * Sync the code for NetBSD -current; test on 1.3 later + * + * Revision 1.4 98/01/23 11:53:54 rvb + * Bring RVB_CODA1_1 to HEAD + * + * Revision 1.3.2.1 97/12/06 17:41:29 rvb + * Sync with peters coda.h + * + * Revision 1.3 97/12/05 10:39:31 rvb + * Read CHANGES + * + * Revision 1.2.34.2 97/11/13 22:03:06 rvb + * pass2 cfs_NetBSD.h mt + * + * Revision 1.2.34.1 97/11/12 12:38:11 rvb + * mach_vioctl.h -> pioctl.h + * + * Revision 1.2 96/01/02 16:57:27 bnoble + * Added support for Coda MiniCache and raw inode calls (final commit) + * + * Revision 1.1.2.1 1995/12/20 01:57:54 bnoble + * Added CODA-specific files + * + * Revision 2.4 90/08/30 11:51:12 bohman + * Ioctl changes for STDC. + * [90/08/28 bohman] + * + * Revision 2.3 89/03/09 22:10:26 rpd + * More cleanup. + * + * Revision 2.2 89/02/25 17:58:32 gm0w + * Changes for cleanup. + * + * 7-Feb-87 Avadis Tevanian (avie) at Carnegie-Mellon University + * No need for VICE conditional. + * + * 22-Oct-86 Jay Kistler (jjk) at Carnegie-Mellon University + * Created from Andrew's vice.h and viceioctl.h. + * + */ +/* + * ITC Remote file system - vice ioctl interface module + */ + +/* + * TODO: Find /usr/local/include/viceioctl.h. + */ + +#ifndef _SYS_PIOCTL_H_ +#define _SYS_PIOCTL_H_ + +/* The 2K limits above are a consequence of the size of the kernel buffer + used to buffer requests from the user to venus--2*MAXPATHLEN. + The buffer pointers may be null, or the counts may be 0 if there + are no input or output parameters + */ + +#define _VICEIOCTL(id) ((unsigned int ) _IOW('V', id, struct ViceIoctl)) + +/* Use this macro to define up to 256 vice ioctl's. These ioctl's + all potentially have in/out parameters--this depends upon the + values in the ViceIoctl structure. This structure is itself passed + into the kernel by the normal ioctl parameter passing mechanism. + */ + +#define _VALIDVICEIOCTL(com) (com >= _VICEIOCTL(0) && com <= _VICEIOCTL(255)) + +#endif diff --git a/sys/fs/coda/coda_psdev.c b/sys/fs/coda/coda_psdev.c new file mode 100644 index 0000000..0d29f6e --- /dev/null +++ b/sys/fs/coda/coda_psdev.c @@ -0,0 +1,788 @@ +/* + * + * Coda: an Experimental Distributed File System + * Release 3.1 + * + * Copyright (c) 1987-1998 Carnegie Mellon University + * All Rights Reserved + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation, and + * that credit is given to Carnegie Mellon University in all documents + * and publicity pertaining to direct or indirect use of this code or its + * derivatives. + * + * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS, + * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS + * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON + * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER + * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF + * ANY DERIVATIVE WORK. + * + * Carnegie Mellon encourages users of this software to return any + * improvements or extensions that they make, and to grant Carnegie + * Mellon the rights to redistribute these changes without encumbrance. + * + * @(#) src/sys/coda/coda_psdev.c,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ + * $Id: coda_psdev.c,v 1.9 1998/11/11 20:32:20 rvb Exp $ + * + */ + +/* + * Mach Operating System + * Copyright (c) 1989 Carnegie-Mellon University + * All rights reserved. The CMU software License Agreement specifies + * the terms and conditions for use and redistribution. + */ + +/* + * This code was written for the Coda file system at Carnegie Mellon + * University. Contributers include David Steere, James Kistler, and + * M. Satyanarayanan. */ + +/* + * These routines define the psuedo device for communication between + * Coda's Venus and Minicache in Mach 2.6. They used to be in cfs_subr.c, + * but I moved them to make it easier to port the Minicache without + * porting coda. -- DCS 10/12/94 + */ + +/* + * HISTORY + * $Log: coda_psdev.c,v $ + * Revision 1.9 1998/11/11 20:32:20 rvb + * coda_lookup now passes up an extra flag. But old veni will + * be ok; new veni will check /dev/cfs0 to make sure that a new + * kernel is running. + * Also, a bug in vc_nb_close iff CODA_SIGNAL's were seen has been + * fixed. + * + * Revision 1.8 1998/10/28 20:31:13 rvb + * Change the way unmounting happens to guarantee that the + * client programs are allowed to finish up (coda_call is + * forced to complete) and release their locks. Thus there + * is a reasonable chance that the vflush implicit in the + * unmount will not get hung on held locks. + * + * Revision 1.7 1998/09/29 20:19:45 rvb + * Fixes for lkm: + * 1. use VFS_LKM vs ACTUALLY_LKM_NOT_KERNEL + * 2. don't pass -DCODA to lkm build + * + * Revision 1.6 1998/09/28 20:52:58 rvb + * Cleanup and fix THE bug + * + * Revision 1.5 1998/09/25 17:38:31 rvb + * Put "stray" printouts under DIAGNOSTIC. Make everything build + * with DEBUG on. Add support for lkm. (The macro's don't work + * for me; for a good chuckle look at the end of coda_fbsd.c.) + * + * Revision 1.4 1998/09/13 13:57:59 rvb + * Finish conversion of cfs -> coda + * + * Revision 1.3 1998/09/11 18:50:17 rvb + * All the references to cfs, in symbols, structs, and strings + * have been changed to coda. (Same for CFS.) + * + * Revision 1.2 1998/09/02 19:09:53 rvb + * Pass2 complete + * + * Revision 1.1.1.1 1998/08/29 21:14:52 rvb + * Very Preliminary Coda + * + * Revision 1.9 1998/08/28 18:12:17 rvb + * Now it also works on FreeBSD -current. This code will be + * committed to the FreeBSD -current and NetBSD -current + * trees. It will then be tailored to the particular platform + * by flushing conditional code. + * + * Revision 1.8 1998/08/18 17:05:15 rvb + * Don't use __RCSID now + * + * Revision 1.7 1998/08/18 16:31:41 rvb + * Sync the code for NetBSD -current; test on 1.3 later + * + * Revision 1.8 1998/06/09 23:30:42 rvb + * Try to allow ^C -- take 1 + * + * Revision 1.5.2.8 98/01/23 11:21:04 rvb + * Sync with 2.2.5 + * + * Revision 1.5.2.7 98/01/22 22:22:21 rvb + * sync 1.2 and 1.3 + * + * Revision 1.5.2.6 98/01/22 13:11:24 rvb + * Move make_coda_node ctlfid later so vfsp is known; work on ^c and ^z + * + * Revision 1.5.2.5 97/12/16 22:01:27 rvb + * Oops add cfs_subr.h cfs_venus.h; sync with peter + * + * Revision 1.5.2.4 97/12/16 12:40:05 rvb + * Sync with 1.3 + * + * Revision 1.5.2.3 97/12/10 14:08:24 rvb + * Fix O_ flags; check result in coda_call + * + * Revision 1.5.2.2 97/12/10 11:40:24 rvb + * No more ody + * + * Revision 1.5.2.1 97/12/06 17:41:20 rvb + * Sync with peters coda.h + * + * Revision 1.5 97/12/05 10:39:16 rvb + * Read CHANGES + * + * Revision 1.4.18.9 97/12/05 08:58:07 rvb + * peter found this one + * + * Revision 1.4.18.8 97/11/26 15:28:57 rvb + * Cant make downcall pbuf == union cfs_downcalls yet + * + * Revision 1.4.18.7 97/11/25 09:40:49 rvb + * Final cfs_venus.c w/o macros, but one locking bug + * + * Revision 1.4.18.6 97/11/20 11:46:41 rvb + * Capture current cfs_venus + * + * Revision 1.4.18.5 97/11/18 10:27:15 rvb + * cfs_nbsd.c is DEAD!!!; integrated into cfs_vf/vnops.c + * cfs_nb_foo and cfs_foo are joined + * + * Revision 1.4.18.4 97/11/13 22:02:59 rvb + * pass2 cfs_NetBSD.h mt + * + * Revision 1.4.18.3 97/11/12 12:09:38 rvb + * reorg pass1 + * + * Revision 1.4.18.2 97/10/29 16:06:09 rvb + * Kill DYING + * + * Revision 1.4.18.1 1997/10/28 23:10:15 rvb + * >64Meg; venus can be killed! + * + * Revision 1.4 1996/12/12 22:10:58 bnoble + * Fixed the "downcall invokes venus operation" deadlock in all known cases. + * There may be more + * + * Revision 1.3 1996/11/13 04:14:20 bnoble + * Merging BNOBLE_WORK_6_20_96 into main line + * + * Revision 1.2.8.1 1996/08/22 14:25:04 bnoble + * Added a return code from vc_nb_close + * + * Revision 1.2 1996/01/02 16:56:58 bnoble + * Added support for Coda MiniCache and raw inode calls (final commit) + * + * Revision 1.1.2.1 1995/12/20 01:57:24 bnoble + * Added CODA-specific files + * + * Revision 1.1 1995/03/14 20:52:15 bnoble + * Initial revision + * + */ + +/* These routines are the device entry points for Venus. */ + +extern int coda_nc_initialized; /* Set if cache has been initialized */ + +#include <vcoda.h> + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/proc.h> +#include <sys/malloc.h> +#include <sys/mount.h> +#include <sys/file.h> +#include <sys/ioccom.h> +#include <sys/poll.h> +#include <sys/conf.h> + +#include <coda/coda.h> +#include <coda/cnode.h> +#include <coda/coda_namecache.h> +#include <coda/coda_io.h> +#include <coda/coda_psdev.h> + +#define CTL_C + +int coda_psdev_print_entry = 0; +static +int outstanding_upcalls = 0; +int coda_call_sleep = PZERO - 1; +#ifdef CTL_C +int coda_pcatch = PCATCH; +#else +#endif + +#define ENTRY if(coda_psdev_print_entry) myprintf(("Entered %s\n",__FUNCTION__)) + +void vcodaattach(int n); + +struct vmsg { + struct queue vm_chain; + caddr_t vm_data; + u_short vm_flags; + u_short vm_inSize; /* Size is at most 5000 bytes */ + u_short vm_outSize; + u_short vm_opcode; /* copied from data to save ptr lookup */ + int vm_unique; + caddr_t vm_sleep; /* Not used by Mach. */ +}; + +#define VM_READ 1 +#define VM_WRITE 2 +#define VM_INTR 4 + +/* vcodaattach: do nothing */ +void +vcodaattach(n) + int n; +{ +} + +int +vc_nb_open(dev, flag, mode, p) + dev_t dev; + int flag; + int mode; + struct proc *p; /* NetBSD only */ +{ + register struct vcomm *vcp; + + ENTRY; + + if (minor(dev) >= NVCODA || minor(dev) < 0) + return(ENXIO); + + if (!coda_nc_initialized) + coda_nc_init(); + + vcp = &coda_mnttbl[minor(dev)].mi_vcomm; + if (VC_OPEN(vcp)) + return(EBUSY); + + bzero(&(vcp->vc_selproc), sizeof (struct selinfo)); + INIT_QUEUE(vcp->vc_requests); + INIT_QUEUE(vcp->vc_replys); + MARK_VC_OPEN(vcp); + + coda_mnttbl[minor(dev)].mi_vfsp = NULL; + coda_mnttbl[minor(dev)].mi_rootvp = NULL; + + return(0); +} + +int +vc_nb_close (dev, flag, mode, p) + dev_t dev; + int flag; + int mode; + struct proc *p; +{ + register struct vcomm *vcp; + register struct vmsg *vmp, *nvmp = NULL; + struct coda_mntinfo *mi; + int err; + + ENTRY; + + if (minor(dev) >= NVCODA || minor(dev) < 0) + return(ENXIO); + + mi = &coda_mnttbl[minor(dev)]; + vcp = &(mi->mi_vcomm); + + if (!VC_OPEN(vcp)) + panic("vcclose: not open"); + + /* prevent future operations on this vfs from succeeding by auto- + * unmounting any vfs mounted via this device. This frees user or + * sysadm from having to remember where all mount points are located. + * Put this before WAKEUPs to avoid queuing new messages between + * the WAKEUP and the unmount (which can happen if we're unlucky) + */ + if (!mi->mi_rootvp) { + /* just a simple open/close w no mount */ + MARK_VC_CLOSED(vcp); + return 0; + } + + /* Let unmount know this is for real */ + VTOC(mi->mi_rootvp)->c_flags |= C_UNMOUNTING; + coda_unmounting(mi->mi_vfsp); + + outstanding_upcalls = 0; + /* Wakeup clients so they can return. */ + for (vmp = (struct vmsg *)GETNEXT(vcp->vc_requests); + !EOQ(vmp, vcp->vc_requests); + vmp = nvmp) + { + nvmp = (struct vmsg *)GETNEXT(vmp->vm_chain); + /* Free signal request messages and don't wakeup cause + no one is waiting. */ + if (vmp->vm_opcode == CODA_SIGNAL) { + CODA_FREE((caddr_t)vmp->vm_data, (u_int)VC_IN_NO_DATA); + CODA_FREE((caddr_t)vmp, (u_int)sizeof(struct vmsg)); + continue; + } + outstanding_upcalls++; + wakeup(&vmp->vm_sleep); + } + + for (vmp = (struct vmsg *)GETNEXT(vcp->vc_replys); + !EOQ(vmp, vcp->vc_replys); + vmp = (struct vmsg *)GETNEXT(vmp->vm_chain)) + { + outstanding_upcalls++; + wakeup(&vmp->vm_sleep); + } + + MARK_VC_CLOSED(vcp); + + if (outstanding_upcalls) { +#ifdef CODA_VERBOSE + printf("presleep: outstanding_upcalls = %d\n", outstanding_upcalls); + (void) tsleep(&outstanding_upcalls, coda_call_sleep, "coda_umount", 0); + printf("postsleep: outstanding_upcalls = %d\n", outstanding_upcalls); +#else + (void) tsleep(&outstanding_upcalls, coda_call_sleep, "coda_umount", 0); +#endif + } + + err = dounmount(mi->mi_vfsp, flag, p); + if (err) + myprintf(("Error %d unmounting vfs in vcclose(%d)\n", + err, minor(dev))); + return 0; +} + +int +vc_nb_read(dev, uiop, flag) + dev_t dev; + struct uio *uiop; + int flag; +{ + register struct vcomm * vcp; + register struct vmsg *vmp; + int error = 0; + + ENTRY; + + if (minor(dev) >= NVCODA || minor(dev) < 0) + return(ENXIO); + + vcp = &coda_mnttbl[minor(dev)].mi_vcomm; + /* Get message at head of request queue. */ + if (EMPTY(vcp->vc_requests)) + return(0); /* Nothing to read */ + + vmp = (struct vmsg *)GETNEXT(vcp->vc_requests); + + /* Move the input args into userspace */ + uiop->uio_rw = UIO_READ; + error = uiomove(vmp->vm_data, vmp->vm_inSize, uiop); + if (error) { + myprintf(("vcread: error (%d) on uiomove\n", error)); + error = EINVAL; + } + +#ifdef OLD_DIAGNOSTIC + if (vmp->vm_chain.forw == 0 || vmp->vm_chain.back == 0) + panic("vc_nb_read: bad chain"); +#endif + + REMQUE(vmp->vm_chain); + + /* If request was a signal, free up the message and don't + enqueue it in the reply queue. */ + if (vmp->vm_opcode == CODA_SIGNAL) { + if (codadebug) + myprintf(("vcread: signal msg (%d, %d)\n", + vmp->vm_opcode, vmp->vm_unique)); + CODA_FREE((caddr_t)vmp->vm_data, (u_int)VC_IN_NO_DATA); + CODA_FREE((caddr_t)vmp, (u_int)sizeof(struct vmsg)); + return(error); + } + + vmp->vm_flags |= VM_READ; + INSQUE(vmp->vm_chain, vcp->vc_replys); + + return(error); +} + +int +vc_nb_write(dev, uiop, flag) + dev_t dev; + struct uio *uiop; + int flag; +{ + register struct vcomm * vcp; + register struct vmsg *vmp; + struct coda_out_hdr *out; + u_long seq; + u_long opcode; + int buf[2]; + int error = 0; + + ENTRY; + + if (minor(dev) >= NVCODA || minor(dev) < 0) + return(ENXIO); + + vcp = &coda_mnttbl[minor(dev)].mi_vcomm; + + /* Peek at the opcode, unique without transfering the data. */ + uiop->uio_rw = UIO_WRITE; + error = uiomove((caddr_t)buf, sizeof(int) * 2, uiop); + if (error) { + myprintf(("vcwrite: error (%d) on uiomove\n", error)); + return(EINVAL); + } + + opcode = buf[0]; + seq = buf[1]; + + if (codadebug) + myprintf(("vcwrite got a call for %ld.%ld\n", opcode, seq)); + + if (DOWNCALL(opcode)) { + union outputArgs pbuf; + + /* get the rest of the data. */ + uiop->uio_rw = UIO_WRITE; + error = uiomove((caddr_t)&pbuf.coda_purgeuser.oh.result, sizeof(pbuf) - (sizeof(int)*2), uiop); + if (error) { + myprintf(("vcwrite: error (%d) on uiomove (Op %ld seq %ld)\n", + error, opcode, seq)); + return(EINVAL); + } + + return handleDownCall(opcode, &pbuf); + } + + /* Look for the message on the (waiting for) reply queue. */ + for (vmp = (struct vmsg *)GETNEXT(vcp->vc_replys); + !EOQ(vmp, vcp->vc_replys); + vmp = (struct vmsg *)GETNEXT(vmp->vm_chain)) + { + if (vmp->vm_unique == seq) break; + } + + if (EOQ(vmp, vcp->vc_replys)) { + if (codadebug) + myprintf(("vcwrite: msg (%ld, %ld) not found\n", opcode, seq)); + + return(ESRCH); + } + + /* Remove the message from the reply queue */ + REMQUE(vmp->vm_chain); + + /* move data into response buffer. */ + out = (struct coda_out_hdr *)vmp->vm_data; + /* Don't need to copy opcode and uniquifier. */ + + /* get the rest of the data. */ + if (vmp->vm_outSize < uiop->uio_resid) { + myprintf(("vcwrite: more data than asked for (%d < %d)\n", + vmp->vm_outSize, uiop->uio_resid)); + wakeup(&vmp->vm_sleep); /* Notify caller of the error. */ + return(EINVAL); + } + + buf[0] = uiop->uio_resid; /* Save this value. */ + uiop->uio_rw = UIO_WRITE; + error = uiomove((caddr_t) &out->result, vmp->vm_outSize - (sizeof(int) * 2), uiop); + if (error) { + myprintf(("vcwrite: error (%d) on uiomove (op %ld seq %ld)\n", + error, opcode, seq)); + return(EINVAL); + } + + /* I don't think these are used, but just in case. */ + /* XXX - aren't these two already correct? -bnoble */ + out->opcode = opcode; + out->unique = seq; + vmp->vm_outSize = buf[0]; /* Amount of data transferred? */ + vmp->vm_flags |= VM_WRITE; + wakeup(&vmp->vm_sleep); + + return(0); +} + +int +vc_nb_ioctl(dev, cmd, addr, flag, p) + dev_t dev; + u_long cmd; + caddr_t addr; + int flag; + struct proc *p; +{ + ENTRY; + + switch(cmd) { + case CODARESIZE: { + struct coda_resize *data = (struct coda_resize *)addr; + return(coda_nc_resize(data->hashsize, data->heapsize, IS_DOWNCALL)); + break; + } + case CODASTATS: + if (coda_nc_use) { + coda_nc_gather_stats(); + return(0); + } else { + return(ENODEV); + } + break; + case CODAPRINT: + if (coda_nc_use) { + print_coda_nc(); + return(0); + } else { + return(ENODEV); + } + break; + case CIOC_KERNEL_VERSION: + switch (*(u_int *)addr) { + case 0: + *(u_int *)addr = coda_kernel_version; + return 0; + break; + case 1: + case 2: + if (coda_kernel_version != *(u_int *)addr) + return ENOENT; + else + return 0; + default: + return ENOENT; + } + break; + default : + return(EINVAL); + break; + } +} + +int +vc_nb_poll(dev, events, p) + dev_t dev; + int events; + struct proc *p; +{ + register struct vcomm *vcp; + int event_msk = 0; + + ENTRY; + + if (minor(dev) >= NVCODA || minor(dev) < 0) + return(ENXIO); + + vcp = &coda_mnttbl[minor(dev)].mi_vcomm; + + event_msk = events & (POLLIN|POLLRDNORM); + if (!event_msk) + return(0); + + if (!EMPTY(vcp->vc_requests)) + return(events & (POLLIN|POLLRDNORM)); + + selrecord(p, &(vcp->vc_selproc)); + + return(0); +} + +/* + * Statistics + */ +struct coda_clstat coda_clstat; + +/* + * Key question: whether to sleep interuptably or uninteruptably when + * waiting for Venus. The former seems better (cause you can ^C a + * job), but then GNU-EMACS completion breaks. Use tsleep with no + * timeout, and no longjmp happens. But, when sleeping + * "uninterruptibly", we don't get told if it returns abnormally + * (e.g. kill -9). + */ + +int +coda_call(mntinfo, inSize, outSize, buffer) + struct coda_mntinfo *mntinfo; int inSize; int *outSize; caddr_t buffer; +{ + struct vcomm *vcp; + struct vmsg *vmp; + int error; +#ifdef CTL_C + struct proc *p = curproc; + unsigned int psig_omask = p->p_sigmask; + int i; +#endif + if (mntinfo == NULL) { + /* Unlikely, but could be a race condition with a dying warden */ + return ENODEV; + } + + vcp = &(mntinfo->mi_vcomm); + + coda_clstat.ncalls++; + coda_clstat.reqs[((struct coda_in_hdr *)buffer)->opcode]++; + + if (!VC_OPEN(vcp)) + return(ENODEV); + + CODA_ALLOC(vmp,struct vmsg *,sizeof(struct vmsg)); + /* Format the request message. */ + vmp->vm_data = buffer; + vmp->vm_flags = 0; + vmp->vm_inSize = inSize; + vmp->vm_outSize + = *outSize ? *outSize : inSize; /* |buffer| >= inSize */ + vmp->vm_opcode = ((struct coda_in_hdr *)buffer)->opcode; + vmp->vm_unique = ++vcp->vc_seq; + if (codadebug) + myprintf(("Doing a call for %d.%d\n", + vmp->vm_opcode, vmp->vm_unique)); + + /* Fill in the common input args. */ + ((struct coda_in_hdr *)buffer)->unique = vmp->vm_unique; + + /* Append msg to request queue and poke Venus. */ + INSQUE(vmp->vm_chain, vcp->vc_requests); + selwakeup(&(vcp->vc_selproc)); + + /* We can be interrupted while we wait for Venus to process + * our request. If the interrupt occurs before Venus has read + * the request, we dequeue and return. If it occurs after the + * read but before the reply, we dequeue, send a signal + * message, and return. If it occurs after the reply we ignore + * it. In no case do we want to restart the syscall. If it + * was interrupted by a venus shutdown (vcclose), return + * ENODEV. */ + + /* Ignore return, We have to check anyway */ +#ifdef CTL_C + /* This is work in progress. Setting coda_pcatch lets tsleep reawaken + on a ^c or ^z. The problem is that emacs sets certain interrupts + as SA_RESTART. This means that we should exit sleep handle the + "signal" and then go to sleep again. Mostly this is done by letting + the syscall complete and be restarted. We are not idempotent and + can not do this. A better solution is necessary. + */ + i = 0; + do { + error = tsleep(&vmp->vm_sleep, (coda_call_sleep|coda_pcatch), "coda_call", hz*2); + if (error == 0) + break; + else if (error == EWOULDBLOCK) { +#ifdef CODA_VERBOSE + printf("coda_call: tsleep TIMEOUT %d sec\n", 2+2*i); +#endif + } else if (p->p_siglist == sigmask(SIGIO)) { + p->p_sigmask |= p->p_siglist; +#ifdef CODA_VERBOSE + printf("coda_call: tsleep returns %d SIGIO, cnt %d\n", error, i); +#endif + } else if (p->p_siglist == sigmask(SIGALRM)) { + p->p_sigmask |= p->p_siglist; +#ifdef CODA_VERBOSE + printf("coda_call: tsleep returns %d SIGALRM, cnt %d\n", error, i); +#endif + } else { + printf("coda_call: tsleep returns %d, cnt %d\n", error, i); + printf("coda_call: siglist = %x, sigmask = %x, mask %x\n", + p->p_siglist, p->p_sigmask, + p->p_siglist & ~p->p_sigmask); + break; +#ifdef notyet + p->p_sigmask |= p->p_siglist; + printf("coda_call: new mask, siglist = %x, sigmask = %x, mask %x\n", + p->p_siglist, p->p_sigmask, + p->p_siglist & ~p->p_sigmask); +#endif + } + } while (error && i++ < 128 && VC_OPEN(vcp)); + p->p_sigmask = psig_omask; +#else + (void) tsleep(&vmp->vm_sleep, coda_call_sleep, "coda_call", 0); +#endif + if (VC_OPEN(vcp)) { /* Venus is still alive */ + /* Op went through, interrupt or not... */ + if (vmp->vm_flags & VM_WRITE) { + error = 0; + *outSize = vmp->vm_outSize; + } + + else if (!(vmp->vm_flags & VM_READ)) { + /* Interrupted before venus read it. */ +#ifdef CODA_VERBOSE + if (1) +#else + if (codadebug) +#endif + myprintf(("interrupted before read: op = %d.%d, flags = %x\n", + vmp->vm_opcode, vmp->vm_unique, vmp->vm_flags)); + REMQUE(vmp->vm_chain); + error = EINTR; + } + + else { + /* (!(vmp->vm_flags & VM_WRITE)) means interrupted after + upcall started */ + /* Interrupted after start of upcall, send venus a signal */ + struct coda_in_hdr *dog; + struct vmsg *svmp; + +#ifdef CODA_VERBOSE + if (1) +#else + if (codadebug) +#endif + myprintf(("Sending Venus a signal: op = %d.%d, flags = %x\n", + vmp->vm_opcode, vmp->vm_unique, vmp->vm_flags)); + + REMQUE(vmp->vm_chain); + error = EINTR; + + CODA_ALLOC(svmp, struct vmsg *, sizeof (struct vmsg)); + + CODA_ALLOC((svmp->vm_data), char *, sizeof (struct coda_in_hdr)); + dog = (struct coda_in_hdr *)svmp->vm_data; + + svmp->vm_flags = 0; + dog->opcode = svmp->vm_opcode = CODA_SIGNAL; + dog->unique = svmp->vm_unique = vmp->vm_unique; + svmp->vm_inSize = sizeof (struct coda_in_hdr); +/*??? rvb */ svmp->vm_outSize = sizeof (struct coda_in_hdr); + + if (codadebug) + myprintf(("coda_call: enqueing signal msg (%d, %d)\n", + svmp->vm_opcode, svmp->vm_unique)); + + /* insert at head of queue! */ + INSQUE(svmp->vm_chain, vcp->vc_requests); + selwakeup(&(vcp->vc_selproc)); + } + } + + else { /* If venus died (!VC_OPEN(vcp)) */ + if (codadebug) + myprintf(("vcclose woke op %d.%d flags %d\n", + vmp->vm_opcode, vmp->vm_unique, vmp->vm_flags)); + + error = ENODEV; + } + + CODA_FREE(vmp, sizeof(struct vmsg)); + + if (outstanding_upcalls > 0 && (--outstanding_upcalls == 0)) + wakeup(&outstanding_upcalls); + + if (!error) + error = ((struct coda_out_hdr *)buffer)->result; + return(error); +} diff --git a/sys/fs/coda/coda_psdev.h b/sys/fs/coda/coda_psdev.h new file mode 100644 index 0000000..11922ad --- /dev/null +++ b/sys/fs/coda/coda_psdev.h @@ -0,0 +1,39 @@ +/* + * + * Coda: an Experimental Distributed File System + * Release 3.1 + * + * Copyright (c) 1998 Carnegie Mellon University + * All Rights Reserved + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation, and + * that credit is given to Carnegie Mellon University in all documents + * and publicity pertaining to direct or indirect use of this code or its + * derivatives. + * + * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS, + * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS + * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON + * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER + * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF + * ANY DERIVATIVE WORK. + * + * Carnegie Mellon encourages users of this software to return any + * improvements or extensions that they make, and to grant Carnegie + * Mellon the rights to redistribute these changes without encumbrance. + * + * @(#) src/sys/coda/coda_psdev.c,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ + * $Id: coda_psdev.c,v 1.4 1998/09/13 13:57:59 rvb Exp $ + * + */ + +int vc_nb_open(dev_t dev, int flag, int mode, struct proc *p); +int vc_nb_close (dev_t dev, int flag, int mode, struct proc *p); +int vc_nb_read(dev_t dev, struct uio *uiop, int flag); +int vc_nb_write(dev_t dev, struct uio *uiop, int flag); +int vc_nb_ioctl(dev_t dev, u_long cmd, caddr_t addr, int flag, struct proc *p); +int vc_nb_poll(dev_t dev, int events, struct proc *p); diff --git a/sys/fs/coda/coda_subr.c b/sys/fs/coda/coda_subr.c new file mode 100644 index 0000000..40d2d0b --- /dev/null +++ b/sys/fs/coda/coda_subr.c @@ -0,0 +1,747 @@ +/* + * + * Coda: an Experimental Distributed File System + * Release 3.1 + * + * Copyright (c) 1987-1998 Carnegie Mellon University + * All Rights Reserved + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation, and + * that credit is given to Carnegie Mellon University in all documents + * and publicity pertaining to direct or indirect use of this code or its + * derivatives. + * + * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS, + * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS + * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON + * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER + * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF + * ANY DERIVATIVE WORK. + * + * Carnegie Mellon encourages users of this software to return any + * improvements or extensions that they make, and to grant Carnegie + * Mellon the rights to redistribute these changes without encumbrance. + * + * @(#) src/sys/coda/coda_subr.c,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ + * $Id: coda_subr.c,v 1.8 1998/10/28 19:33:50 rvb Exp $ + * + */ + +/* + * Mach Operating System + * Copyright (c) 1989 Carnegie-Mellon University + * All rights reserved. The CMU software License Agreement specifies + * the terms and conditions for use and redistribution. + */ + +/* + * This code was written for the Coda file system at Carnegie Mellon + * University. Contributers include David Steere, James Kistler, and + * M. Satyanarayanan. */ + +/* + * HISTORY + * $Log: coda_subr.c,v $ + * Revision 1.8 1998/10/28 19:33:50 rvb + * Venus must be passed O_CREAT flag on VOP_OPEN iff this is + * a creat so that we can will allow a mode 444 file to be + * written into. Sync with the latest coda.h and deal with + * collateral damage. + * + * Revision 1.7 1998/09/29 20:19:45 rvb + * Fixes for lkm: + * 1. use VFS_LKM vs ACTUALLY_LKM_NOT_KERNEL + * 2. don't pass -DCODA to lkm build + * + * Revision 1.6 1998/09/25 17:38:31 rvb + * Put "stray" printouts under DIAGNOSTIC. Make everything build + * with DEBUG on. Add support for lkm. (The macro's don't work + * for me; for a good chuckle look at the end of coda_fbsd.c.) + * + * Revision 1.5 1998/09/13 13:57:59 rvb + * Finish conversion of cfs -> coda + * + * Revision 1.4 1998/09/11 18:50:17 rvb + * All the references to cfs, in symbols, structs, and strings + * have been changed to coda. (Same for CFS.) + * + * Revision 1.2 1998/09/02 19:09:53 rvb + * Pass2 complete + * + * Revision 1.1.1.1 1998/08/29 21:14:52 rvb + * Very Preliminary Coda + * + * Revision 1.11 1998/08/28 18:12:18 rvb + * Now it also works on FreeBSD -current. This code will be + * committed to the FreeBSD -current and NetBSD -current + * trees. It will then be tailored to the particular platform + * by flushing conditional code. + * + * Revision 1.10 1998/08/18 17:05:16 rvb + * Don't use __RCSID now + * + * Revision 1.9 1998/08/18 16:31:41 rvb + * Sync the code for NetBSD -current; test on 1.3 later + * + * Revision 1.8 98/01/31 20:53:12 rvb + * First version that works on FreeBSD 2.2.5 + * + * Revision 1.7 98/01/23 11:53:42 rvb + * Bring RVB_CODA1_1 to HEAD + * + * Revision 1.6.2.3 98/01/23 11:21:05 rvb + * Sync with 2.2.5 + * + * Revision 1.6.2.2 97/12/16 12:40:06 rvb + * Sync with 1.3 + * + * Revision 1.6.2.1 97/12/06 17:41:21 rvb + * Sync with peters coda.h + * + * Revision 1.6 97/12/05 10:39:17 rvb + * Read CHANGES + * + * Revision 1.5.4.8 97/11/26 15:28:58 rvb + * Cant make downcall pbuf == union cfs_downcalls yet + * + * Revision 1.5.4.7 97/11/20 11:46:42 rvb + * Capture current cfs_venus + * + * Revision 1.5.4.6 97/11/18 10:27:16 rvb + * cfs_nbsd.c is DEAD!!!; integrated into cfs_vf/vnops.c + * cfs_nb_foo and cfs_foo are joined + * + * Revision 1.5.4.5 97/11/13 22:03:00 rvb + * pass2 cfs_NetBSD.h mt + * + * Revision 1.5.4.4 97/11/12 12:09:39 rvb + * reorg pass1 + * + * Revision 1.5.4.3 97/11/06 21:02:38 rvb + * first pass at ^c ^z + * + * Revision 1.5.4.2 97/10/29 16:06:27 rvb + * Kill DYING + * + * Revision 1.5.4.1 97/10/28 23:10:16 rvb + * >64Meg; venus can be killed! + * + * Revision 1.5 97/08/05 11:08:17 lily + * Removed cfsnc_replace, replaced it with a coda_find, unhash, and + * rehash. This fixes a cnode leak and a bug in which the fid is + * not actually replaced. (cfs_namecache.c, cfsnc.h, cfs_subr.c) + * + * Revision 1.4 96/12/12 22:10:59 bnoble + * Fixed the "downcall invokes venus operation" deadlock in all known cases. + * There may be more + * + * Revision 1.3 1996/12/05 16:20:15 bnoble + * Minor debugging aids + * + * Revision 1.2 1996/01/02 16:57:01 bnoble + * Added support for Coda MiniCache and raw inode calls (final commit) + * + * Revision 1.1.2.1 1995/12/20 01:57:27 bnoble + * Added CODA-specific files + * + * Revision 3.1.1.1 1995/03/04 19:07:59 bnoble + * Branch for NetBSD port revisions + * + * Revision 3.1 1995/03/04 19:07:58 bnoble + * Bump to major revision 3 to prepare for NetBSD port + * + * Revision 2.8 1995/03/03 17:00:04 dcs + * Fixed kernel bug involving sleep and upcalls. Basically if you killed + * a job waiting on venus, the venus upcall queues got trashed. Depending + * on luck, you could kill the kernel or not. + * (mods to cfs_subr.c and cfs_mach.d) + * + * Revision 2.7 95/03/02 22:45:21 dcs + * Sun4 compatibility + * + * Revision 2.6 95/02/17 16:25:17 dcs + * These versions represent several changes: + * 1. Allow venus to restart even if outstanding references exist. + * 2. Have only one ctlvp per client, as opposed to one per mounted cfs device.d + * 3. Allow ody_expand to return many members, not just one. + * + * Revision 2.5 94/11/09 15:56:26 dcs + * Had the thread sleeping on the wrong thing! + * + * Revision 2.4 94/10/14 09:57:57 dcs + * Made changes 'cause sun4s have braindead compilers + * + * Revision 2.3 94/10/12 16:46:26 dcs + * Cleaned kernel/venus interface by removing XDR junk, plus + * so cleanup to allow this code to be more easily ported. + * + * Revision 1.2 92/10/27 17:58:22 lily + * merge kernel/latest and alpha/src/cfs + * + * Revision 2.4 92/09/30 14:16:26 mja + * Incorporated Dave Steere's fix for the GNU-Emacs bug. + * Also, included his coda_flush routine in place of the former coda_nc_flush. + * [91/02/07 jjk] + * + * Added contributors blurb. + * [90/12/13 jjk] + * + * Hack to allow users to keep coda venus calls uninterruptible. THis + * basically prevents the Gnu-emacs bug from appearing, in which a call + * was being interrupted, and return EINTR, but gnu didn't check for the + * error and figured the file was buggered. + * [90/12/09 dcs] + * + * Revision 2.3 90/08/10 10:23:20 mrt + * Removed include of vm/vm_page.h as it no longer exists. + * [90/08/10 mrt] + * + * Revision 2.2 90/07/05 11:26:35 mrt + * Initialize name cache on first call to vcopen. + * [90/05/23 dcs] + * + * Created for the Coda File System. + * [90/05/23 dcs] + * + * Revision 1.5 90/05/31 17:01:35 dcs + * Prepare for merge with facilities kernel. + * + * Revision 1.2 90/03/19 15:56:25 dcs + * Initialize name cache on first call to vcopen. + * + * Revision 1.1 90/03/15 10:43:26 jjk + * Initial revision + * + */ + +/* NOTES: rvb + * 1. Added coda_unmounting to mark all cnodes as being UNMOUNTING. This has to + * be done before dounmount is called. Because some of the routines that + * dounmount calls before coda_unmounted might try to force flushes to venus. + * The vnode pager does this. + * 2. coda_unmounting marks all cnodes scanning coda_cache. + * 3. cfs_checkunmounting (under DEBUG) checks all cnodes by chasing the vnodes + * under the /coda mount point. + * 4. coda_cacheprint (under DEBUG) prints names with vnode/cnode address + */ + +#include <vcoda.h> + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/malloc.h> +#include <sys/select.h> +#include <sys/mount.h> + +#include <coda/coda.h> +#include <coda/cnode.h> +#include <coda/coda_subr.h> +#include <coda/coda_namecache.h> + +int coda_active = 0; +int coda_reuse = 0; +int coda_new = 0; + +struct cnode *coda_freelist = NULL; +struct cnode *coda_cache[CODA_CACHESIZE]; + +#define coda_hash(fid) (((fid)->Volume + (fid)->Vnode) & (CODA_CACHESIZE-1)) +#define CNODE_NEXT(cp) ((cp)->c_next) +#define ODD(vnode) ((vnode) & 0x1) + +/* + * Allocate a cnode. + */ +struct cnode * +coda_alloc(void) +{ + struct cnode *cp; + + if (coda_freelist) { + cp = coda_freelist; + coda_freelist = CNODE_NEXT(cp); + coda_reuse++; + } + else { + CODA_ALLOC(cp, struct cnode *, sizeof(struct cnode)); + /* NetBSD vnodes don't have any Pager info in them ('cause there are + no external pagers, duh!) */ +#define VNODE_VM_INFO_INIT(vp) /* MT */ + VNODE_VM_INFO_INIT(CTOV(cp)); + coda_new++; + } + bzero(cp, sizeof (struct cnode)); + + return(cp); +} + +/* + * Deallocate a cnode. + */ +void +coda_free(cp) + register struct cnode *cp; +{ + + CNODE_NEXT(cp) = coda_freelist; + coda_freelist = cp; +} + +/* + * Put a cnode in the hash table + */ +void +coda_save(cp) + struct cnode *cp; +{ + CNODE_NEXT(cp) = coda_cache[coda_hash(&cp->c_fid)]; + coda_cache[coda_hash(&cp->c_fid)] = cp; +} + +/* + * Remove a cnode from the hash table + */ +void +coda_unsave(cp) + struct cnode *cp; +{ + struct cnode *ptr; + struct cnode *ptrprev = NULL; + + ptr = coda_cache[coda_hash(&cp->c_fid)]; + while (ptr != NULL) { + if (ptr == cp) { + if (ptrprev == NULL) { + coda_cache[coda_hash(&cp->c_fid)] + = CNODE_NEXT(ptr); + } else { + CNODE_NEXT(ptrprev) = CNODE_NEXT(ptr); + } + CNODE_NEXT(cp) = (struct cnode *)NULL; + + return; + } + ptrprev = ptr; + ptr = CNODE_NEXT(ptr); + } +} + +/* + * Lookup a cnode by fid. If the cnode is dying, it is bogus so skip it. + * NOTE: this allows multiple cnodes with same fid -- dcs 1/25/95 + */ +struct cnode * +coda_find(fid) + ViceFid *fid; +{ + struct cnode *cp; + + cp = coda_cache[coda_hash(fid)]; + while (cp) { + if ((cp->c_fid.Vnode == fid->Vnode) && + (cp->c_fid.Volume == fid->Volume) && + (cp->c_fid.Unique == fid->Unique) && + (!IS_UNMOUNTING(cp))) + { + coda_active++; + return(cp); + } + cp = CNODE_NEXT(cp); + } + return(NULL); +} + +/* + * coda_kill is called as a side effect to vcopen. To prevent any + * cnodes left around from an earlier run of a venus or warden from + * causing problems with the new instance, mark any outstanding cnodes + * as dying. Future operations on these cnodes should fail (excepting + * coda_inactive of course!). Since multiple venii/wardens can be + * running, only kill the cnodes for a particular entry in the + * coda_mnttbl. -- DCS 12/1/94 */ + +int +coda_kill(whoIam, dcstat) + struct mount *whoIam; + enum dc_status dcstat; +{ + int hash, count = 0; + struct cnode *cp; + + /* + * Algorithm is as follows: + * Second, flush whatever vnodes we can from the name cache. + * + * Finally, step through whatever is left and mark them dying. + * This prevents any operation at all. + */ + + /* This is slightly overkill, but should work. Eventually it'd be + * nice to only flush those entries from the namecache that + * reference a vnode in this vfs. */ + coda_nc_flush(dcstat); + + for (hash = 0; hash < CODA_CACHESIZE; hash++) { + for (cp = coda_cache[hash]; cp != NULL; cp = CNODE_NEXT(cp)) { + if (CTOV(cp)->v_mount == whoIam) { +#ifdef DEBUG + printf("coda_kill: vp %p, cp %p\n", CTOV(cp), cp); +#endif + count++; + CODADEBUG(CODA_FLUSH, + myprintf(("Live cnode fid %lx.%lx.%lx flags %d count %d\n", + (cp->c_fid).Volume, + (cp->c_fid).Vnode, + (cp->c_fid).Unique, + cp->c_flags, + CTOV(cp)->v_usecount)); ); + } + } + } + return count; +} + +/* + * There are two reasons why a cnode may be in use, it may be in the + * name cache or it may be executing. + */ +void +coda_flush(dcstat) + enum dc_status dcstat; +{ + int hash; + struct cnode *cp; + + coda_clstat.ncalls++; + coda_clstat.reqs[CODA_FLUSH]++; + + coda_nc_flush(dcstat); /* flush files from the name cache */ + + for (hash = 0; hash < CODA_CACHESIZE; hash++) { + for (cp = coda_cache[hash]; cp != NULL; cp = CNODE_NEXT(cp)) { + if (!ODD(cp->c_fid.Vnode)) /* only files can be executed */ + coda_vmflush(cp); + } + } +} + +/* + * As a debugging measure, print out any cnodes that lived through a + * name cache flush. + */ +void +coda_testflush(void) +{ + int hash; + struct cnode *cp; + + for (hash = 0; hash < CODA_CACHESIZE; hash++) { + for (cp = coda_cache[hash]; + cp != NULL; + cp = CNODE_NEXT(cp)) { + myprintf(("Live cnode fid %lx.%lx.%lx count %d\n", + (cp->c_fid).Volume,(cp->c_fid).Vnode, + (cp->c_fid).Unique, CTOV(cp)->v_usecount)); + } + } +} + +/* + * First, step through all cnodes and mark them unmounting. + * NetBSD kernels may try to fsync them now that venus + * is dead, which would be a bad thing. + * + */ +void +coda_unmounting(whoIam) + struct mount *whoIam; +{ + int hash; + struct cnode *cp; + + for (hash = 0; hash < CODA_CACHESIZE; hash++) { + for (cp = coda_cache[hash]; cp != NULL; cp = CNODE_NEXT(cp)) { + if (CTOV(cp)->v_mount == whoIam) { + if (cp->c_flags & (C_LOCKED|C_WANTED)) { + printf("coda_unmounting: Unlocking %p\n", cp); + cp->c_flags &= ~(C_LOCKED|C_WANTED); + wakeup((caddr_t) cp); + } + cp->c_flags |= C_UNMOUNTING; + } + } + } +} + +#ifdef DEBUG +void +coda_checkunmounting(mp) + struct mount *mp; +{ + register struct vnode *vp, *nvp; + struct cnode *cp; + int count = 0, bad = 0; +loop: + for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) { + if (vp->v_mount != mp) + goto loop; + nvp = vp->v_mntvnodes.le_next; + cp = VTOC(vp); + count++; + if (!(cp->c_flags & C_UNMOUNTING)) { + bad++; + printf("vp %p, cp %p missed\n", vp, cp); + cp->c_flags |= C_UNMOUNTING; + } + } +} + +void +coda_cacheprint(whoIam) + struct mount *whoIam; +{ + int hash; + struct cnode *cp; + int count = 0; + + printf("coda_cacheprint: coda_ctlvp %p, cp %p", coda_ctlvp, VTOC(coda_ctlvp)); + coda_nc_name(VTOC(coda_ctlvp)); + printf("\n"); + + for (hash = 0; hash < CODA_CACHESIZE; hash++) { + for (cp = coda_cache[hash]; cp != NULL; cp = CNODE_NEXT(cp)) { + if (CTOV(cp)->v_mount == whoIam) { + printf("coda_cacheprint: vp %p, cp %p", CTOV(cp), cp); + coda_nc_name(cp); + printf("\n"); + count++; + } + } + } + printf("coda_cacheprint: count %d\n", count); +} +#endif + +/* + * There are 6 cases where invalidations occur. The semantics of each + * is listed here. + * + * CODA_FLUSH -- flush all entries from the name cache and the cnode cache. + * CODA_PURGEUSER -- flush all entries from the name cache for a specific user + * This call is a result of token expiration. + * + * The next two are the result of callbacks on a file or directory. + * CODA_ZAPDIR -- flush the attributes for the dir from its cnode. + * Zap all children of this directory from the namecache. + * CODA_ZAPFILE -- flush the attributes for a file. + * + * The fifth is a result of Venus detecting an inconsistent file. + * CODA_PURGEFID -- flush the attribute for the file + * If it is a dir (odd vnode), purge its + * children from the namecache + * remove the file from the namecache. + * + * The sixth allows Venus to replace local fids with global ones + * during reintegration. + * + * CODA_REPLACE -- replace one ViceFid with another throughout the name cache + */ + +int handleDownCall(opcode, out) + int opcode; union outputArgs *out; +{ + int error; + + /* Handle invalidate requests. */ + switch (opcode) { + case CODA_FLUSH : { + + coda_flush(IS_DOWNCALL); + + CODADEBUG(CODA_FLUSH,coda_testflush();) /* print remaining cnodes */ + return(0); + } + + case CODA_PURGEUSER : { + coda_clstat.ncalls++; + coda_clstat.reqs[CODA_PURGEUSER]++; + + /* XXX - need to prevent fsync's */ + coda_nc_purge_user(out->coda_purgeuser.cred.cr_uid, IS_DOWNCALL); + return(0); + } + + case CODA_ZAPFILE : { + struct cnode *cp; + + error = 0; + coda_clstat.ncalls++; + coda_clstat.reqs[CODA_ZAPFILE]++; + + cp = coda_find(&out->coda_zapfile.CodaFid); + if (cp != NULL) { + vref(CTOV(cp)); + + cp->c_flags &= ~C_VATTR; + if (CTOV(cp)->v_flag & VTEXT) + error = coda_vmflush(cp); + CODADEBUG(CODA_ZAPFILE, myprintf(("zapfile: fid = (%lx.%lx.%lx), + refcnt = %d, error = %d\n", + cp->c_fid.Volume, + cp->c_fid.Vnode, + cp->c_fid.Unique, + CTOV(cp)->v_usecount - 1, error));); + if (CTOV(cp)->v_usecount == 1) { + cp->c_flags |= C_PURGING; + } + vrele(CTOV(cp)); + } + + return(error); + } + + case CODA_ZAPDIR : { + struct cnode *cp; + + coda_clstat.ncalls++; + coda_clstat.reqs[CODA_ZAPDIR]++; + + cp = coda_find(&out->coda_zapdir.CodaFid); + if (cp != NULL) { + vref(CTOV(cp)); + + cp->c_flags &= ~C_VATTR; + coda_nc_zapParentfid(&out->coda_zapdir.CodaFid, IS_DOWNCALL); + + CODADEBUG(CODA_ZAPDIR, myprintf(("zapdir: fid = (%lx.%lx.%lx), + refcnt = %d\n",cp->c_fid.Volume, + cp->c_fid.Vnode, + cp->c_fid.Unique, + CTOV(cp)->v_usecount - 1));); + if (CTOV(cp)->v_usecount == 1) { + cp->c_flags |= C_PURGING; + } + vrele(CTOV(cp)); + } + + return(0); + } + + case CODA_PURGEFID : { + struct cnode *cp; + + error = 0; + coda_clstat.ncalls++; + coda_clstat.reqs[CODA_PURGEFID]++; + + cp = coda_find(&out->coda_purgefid.CodaFid); + if (cp != NULL) { + vref(CTOV(cp)); + if (ODD(out->coda_purgefid.CodaFid.Vnode)) { /* Vnode is a directory */ + coda_nc_zapParentfid(&out->coda_purgefid.CodaFid, + IS_DOWNCALL); + } + cp->c_flags &= ~C_VATTR; + coda_nc_zapfid(&out->coda_purgefid.CodaFid, IS_DOWNCALL); + if (!(ODD(out->coda_purgefid.CodaFid.Vnode)) + && (CTOV(cp)->v_flag & VTEXT)) { + + error = coda_vmflush(cp); + } + CODADEBUG(CODA_PURGEFID, myprintf(("purgefid: fid = (%lx.%lx.%lx), refcnt = %d, error = %d\n", + cp->c_fid.Volume, cp->c_fid.Vnode, + cp->c_fid.Unique, + CTOV(cp)->v_usecount - 1, error));); + if (CTOV(cp)->v_usecount == 1) { + cp->c_flags |= C_PURGING; + } + vrele(CTOV(cp)); + } + return(error); + } + + case CODA_REPLACE : { + struct cnode *cp = NULL; + + coda_clstat.ncalls++; + coda_clstat.reqs[CODA_REPLACE]++; + + cp = coda_find(&out->coda_replace.OldFid); + if (cp != NULL) { + /* remove the cnode from the hash table, replace the fid, and reinsert */ + vref(CTOV(cp)); + coda_unsave(cp); + cp->c_fid = out->coda_replace.NewFid; + coda_save(cp); + + CODADEBUG(CODA_REPLACE, myprintf(("replace: oldfid = (%lx.%lx.%lx), newfid = (%lx.%lx.%lx), cp = %p\n", + out->coda_replace.OldFid.Volume, + out->coda_replace.OldFid.Vnode, + out->coda_replace.OldFid.Unique, + cp->c_fid.Volume, cp->c_fid.Vnode, + cp->c_fid.Unique, cp));) + vrele(CTOV(cp)); + } + return (0); + } + default: + myprintf(("handleDownCall: unknown opcode %d\n", opcode)); + return (EINVAL); + } +} + +/* coda_grab_vnode: lives in either cfs_mach.c or cfs_nbsd.c */ + +int +coda_vmflush(cp) + struct cnode *cp; +{ + return 0; +} + + +/* + * kernel-internal debugging switches + */ +void coda_debugon(void) +{ + codadebug = -1; + coda_nc_debug = -1; + coda_vnop_print_entry = 1; + coda_psdev_print_entry = 1; + coda_vfsop_print_entry = 1; +} + +void coda_debugoff(void) +{ + codadebug = 0; + coda_nc_debug = 0; + coda_vnop_print_entry = 0; + coda_psdev_print_entry = 0; + coda_vfsop_print_entry = 0; +} + +/* + * Utilities used by both client and server + * Standard levels: + * 0) no debugging + * 1) hard failures + * 2) soft failures + * 3) current test software + * 4) main procedure entry points + * 5) main procedure exit points + * 6) utility procedure entry points + * 7) utility procedure exit points + * 8) obscure procedure entry points + * 9) obscure procedure exit points + * 10) random stuff + * 11) all <= 1 + * 12) all <= 2 + * 13) all <= 3 + * ... + */ diff --git a/sys/fs/coda/coda_subr.h b/sys/fs/coda/coda_subr.h new file mode 100644 index 0000000..fe27bfd --- /dev/null +++ b/sys/fs/coda/coda_subr.h @@ -0,0 +1,45 @@ +/* + * + * Coda: an Experimental Distributed File System + * Release 3.1 + * + * Copyright (c) 1987-1998 Carnegie Mellon University + * All Rights Reserved + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation, and + * that credit is given to Carnegie Mellon University in all documents + * and publicity pertaining to direct or indirect use of this code or its + * derivatives. + * + * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS, + * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS + * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON + * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER + * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF + * ANY DERIVATIVE WORK. + * + * Carnegie Mellon encourages users of this software to return any + * improvements or extensions that they make, and to grant Carnegie + * Mellon the rights to redistribute these changes without encumbrance. + * + * @(#) src/sys/coda/coda_subr.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ + * $Id: coda_subr.h,v 1.5 1998/09/13 13:57:59 rvb Exp $ + * + */ + +struct cnode *coda_alloc(void); +void coda_free(struct cnode *cp); +struct cnode *coda_find(ViceFid *fid); +void coda_flush(enum dc_status dcstat); +void coda_testflush(void); +void coda_checkunmounting(struct mount *mp); +void coda_cacheprint(struct mount *whoIam); +void coda_debugon(void); +void coda_debugoff(void); +int coda_kill(struct mount *whoIam, enum dc_status dcstat); +void coda_save(struct cnode *cp); +void coda_unsave(struct cnode *cp); diff --git a/sys/fs/coda/coda_venus.c b/sys/fs/coda/coda_venus.c new file mode 100644 index 0000000..96228f1 --- /dev/null +++ b/sys/fs/coda/coda_venus.c @@ -0,0 +1,660 @@ +/* + * + * Coda: an Experimental Distributed File System + * Release 3.1 + * + * Copyright (c) 1987-1998 Carnegie Mellon University + * All Rights Reserved + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation, and + * that credit is given to Carnegie Mellon University in all documents + * and publicity pertaining to direct or indirect use of this code or its + * derivatives. + * + * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS, + * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS + * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON + * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER + * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF + * ANY DERIVATIVE WORK. + * + * Carnegie Mellon encourages users of this software to return any + * improvements or extensions that they make, and to grant Carnegie + * Mellon the rights to redistribute these changes without encumbrance. + * + * @(#) src/sys/cfs/coda_venus.c,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ + * $Id: coda_venus.c,v 1.5 1998/10/28 19:33:50 rvb Exp $ + * + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/malloc.h> +#include <sys/proc.h> +#include <sys/select.h> +#include <sys/ioccom.h> +#include <sys/fcntl.h> + +#include <coda/coda.h> +#include <coda/cnode.h> +#include <coda/coda_venus.h> +#include <coda/coda_pioctl.h> + +#define DECL_NO_IN(name) \ + struct coda_in_hdr *inp; \ + struct name ## _out *outp; \ + int name ## _size = sizeof (struct coda_in_hdr); \ + int Isize = sizeof (struct coda_in_hdr); \ + int Osize = sizeof (struct name ## _out); \ + int error + +#define DECL(name) \ + struct name ## _in *inp; \ + struct name ## _out *outp; \ + int name ## _size = sizeof (struct name ## _in); \ + int Isize = sizeof (struct name ## _in); \ + int Osize = sizeof (struct name ## _out); \ + int error + +#define DECL_NO_OUT(name) \ + struct name ## _in *inp; \ + struct coda_out_hdr *outp; \ + int name ## _size = sizeof (struct name ## _in); \ + int Isize = sizeof (struct name ## _in); \ + int Osize = sizeof (struct coda_out_hdr); \ + int error + +#define ALLOC_NO_IN(name) \ + if (Osize > name ## _size) \ + name ## _size = Osize; \ + CODA_ALLOC(inp, struct coda_in_hdr *, name ## _size);\ + outp = (struct name ## _out *) inp + +#define ALLOC(name) \ + if (Osize > name ## _size) \ + name ## _size = Osize; \ + CODA_ALLOC(inp, struct name ## _in *, name ## _size);\ + outp = (struct name ## _out *) inp + +#define ALLOC_NO_OUT(name) \ + if (Osize > name ## _size) \ + name ## _size = Osize; \ + CODA_ALLOC(inp, struct name ## _in *, name ## _size);\ + outp = (struct coda_out_hdr *) inp + +#define STRCPY(struc, name, len) \ + bcopy(name, (char *)inp + (int)inp->struc, len); \ + ((char*)inp + (int)inp->struc)[len++] = 0; \ + Isize += len + +#define INIT_IN(in, op, ident, p) \ + (in)->opcode = (op); \ + (in)->pid = p ? p->p_pid : -1; \ + (in)->pgid = p ? p->p_pgid : -1; \ + (in)->sid = (p && p->p_session && p->p_session->s_leader) ? (p->p_session->s_leader->p_pid) : -1; \ + if (ident != NOCRED) { \ + (in)->cred.cr_uid = ident->cr_uid; \ + (in)->cred.cr_groupid = ident->cr_gid; \ + } else { \ + bzero(&((in)->cred),sizeof(struct coda_cred)); \ + (in)->cred.cr_uid = -1; \ + (in)->cred.cr_groupid = -1; \ + } \ + +#define CNV_OFLAG(to, from) \ + do { \ + to = 0; \ + if (from & FREAD) to |= C_O_READ; \ + if (from & FWRITE) to |= C_O_WRITE; \ + if (from & O_TRUNC) to |= C_O_TRUNC; \ + if (from & O_EXCL) to |= C_O_EXCL; \ + if (from & O_CREAT) to |= C_O_CREAT; \ + } while (0) + +#define CNV_VV2V_ATTR(top, fromp) \ + do { \ + (top)->va_type = (fromp)->va_type; \ + (top)->va_mode = (fromp)->va_mode; \ + (top)->va_nlink = (fromp)->va_nlink; \ + (top)->va_uid = (fromp)->va_uid; \ + (top)->va_gid = (fromp)->va_gid; \ + (top)->va_fsid = VNOVAL; \ + (top)->va_fileid = (fromp)->va_fileid; \ + (top)->va_size = (fromp)->va_size; \ + (top)->va_blocksize = (fromp)->va_blocksize; \ + (top)->va_atime = (fromp)->va_atime; \ + (top)->va_mtime = (fromp)->va_mtime; \ + (top)->va_ctime = (fromp)->va_ctime; \ + (top)->va_gen = (fromp)->va_gen; \ + (top)->va_flags = (fromp)->va_flags; \ + (top)->va_rdev = (fromp)->va_rdev; \ + (top)->va_bytes = (fromp)->va_bytes; \ + (top)->va_filerev = (fromp)->va_filerev; \ + (top)->va_vaflags = VNOVAL; \ + (top)->va_spare = VNOVAL; \ + } while (0) + +#define CNV_V2VV_ATTR(top, fromp) \ + do { \ + (top)->va_type = (fromp)->va_type; \ + (top)->va_mode = (fromp)->va_mode; \ + (top)->va_nlink = (fromp)->va_nlink; \ + (top)->va_uid = (fromp)->va_uid; \ + (top)->va_gid = (fromp)->va_gid; \ + (top)->va_fileid = (fromp)->va_fileid; \ + (top)->va_size = (fromp)->va_size; \ + (top)->va_blocksize = (fromp)->va_blocksize; \ + (top)->va_atime = (fromp)->va_atime; \ + (top)->va_mtime = (fromp)->va_mtime; \ + (top)->va_ctime = (fromp)->va_ctime; \ + (top)->va_gen = (fromp)->va_gen; \ + (top)->va_flags = (fromp)->va_flags; \ + (top)->va_rdev = (fromp)->va_rdev; \ + (top)->va_bytes = (fromp)->va_bytes; \ + (top)->va_filerev = (fromp)->va_filerev; \ + } while (0) + + +int coda_kernel_version = CODA_KERNEL_VERSION; + +int +venus_root(void *mdp, + struct ucred *cred, struct proc *p, +/*out*/ ViceFid *VFid) +{ + DECL_NO_IN(coda_root); /* sets Isize & Osize */ + ALLOC_NO_IN(coda_root); /* sets inp & outp */ + + /* send the open to venus. */ + INIT_IN(inp, CODA_ROOT, cred, p); + + error = coda_call(mdp, Isize, &Osize, (char *)inp); + if (!error) + *VFid = outp->VFid; + + CODA_FREE(inp, coda_root_size); + return error; +} + +int +venus_open(void *mdp, ViceFid *fid, int flag, + struct ucred *cred, struct proc *p, +/*out*/ dev_t *dev, ino_t *inode) +{ + int cflag; + DECL(coda_open); /* sets Isize & Osize */ + ALLOC(coda_open); /* sets inp & outp */ + + /* send the open to venus. */ + INIT_IN(&inp->ih, CODA_OPEN, cred, p); + inp->VFid = *fid; + CNV_OFLAG(cflag, flag); + inp->flags = cflag; + + error = coda_call(mdp, Isize, &Osize, (char *)inp); + if (!error) { + *dev = outp->dev; + *inode = outp->inode; + } + + CODA_FREE(inp, coda_open_size); + return error; +} + +int +venus_close(void *mdp, ViceFid *fid, int flag, + struct ucred *cred, struct proc *p) +{ + int cflag; + DECL_NO_OUT(coda_close); /* sets Isize & Osize */ + ALLOC_NO_OUT(coda_close); /* sets inp & outp */ + + INIT_IN(&inp->ih, CODA_CLOSE, cred, p); + inp->VFid = *fid; + CNV_OFLAG(cflag, flag); + inp->flags = cflag; + + error = coda_call(mdp, Isize, &Osize, (char *)inp); + + CODA_FREE(inp, coda_close_size); + return error; +} + +/* + * these two calls will not exist!!! the container file is read/written + * directly. + */ +void +venus_read(void) +{ +} + +void +venus_write(void) +{ +} + +/* + * this is a bit sad too. the ioctl's are for the control file, not for + * normal files. + */ +int +venus_ioctl(void *mdp, ViceFid *fid, + int com, int flag, caddr_t data, + struct ucred *cred, struct proc *p) +{ + DECL(coda_ioctl); /* sets Isize & Osize */ + struct PioctlData *iap = (struct PioctlData *)data; + int tmp; + + coda_ioctl_size = VC_MAXMSGSIZE; + ALLOC(coda_ioctl); /* sets inp & outp */ + + INIT_IN(&inp->ih, CODA_IOCTL, cred, p); + inp->VFid = *fid; + + /* command was mutated by increasing its size field to reflect the + * path and follow args. we need to subtract that out before sending + * the command to venus. + */ + inp->cmd = (com & ~(IOCPARM_MASK << 16)); + tmp = ((com >> 16) & IOCPARM_MASK) - sizeof (char *) - sizeof (int); + inp->cmd |= (tmp & IOCPARM_MASK) << 16; + + inp->rwflag = flag; + inp->len = iap->vi.in_size; + inp->data = (char *)(sizeof (struct coda_ioctl_in)); + + error = copyin(iap->vi.in, (char*)inp + (int)inp->data, + iap->vi.in_size); + if (error) { + CODA_FREE(inp, coda_ioctl_size); + return(error); + } + + Osize = VC_MAXMSGSIZE; + error = coda_call(mdp, Isize + iap->vi.in_size, &Osize, (char *)inp); + + /* copy out the out buffer. */ + if (!error) { + if (outp->len > iap->vi.out_size) { + error = EINVAL; + } else { + error = copyout((char *)outp + (int)outp->data, + iap->vi.out, iap->vi.out_size); + } + } + + CODA_FREE(inp, coda_ioctl_size); + return error; +} + +int +venus_getattr(void *mdp, ViceFid *fid, + struct ucred *cred, struct proc *p, +/*out*/ struct vattr *vap) +{ + DECL(coda_getattr); /* sets Isize & Osize */ + ALLOC(coda_getattr); /* sets inp & outp */ + + /* send the open to venus. */ + INIT_IN(&inp->ih, CODA_GETATTR, cred, p); + inp->VFid = *fid; + + error = coda_call(mdp, Isize, &Osize, (char *)inp); + if (!error) { + CNV_VV2V_ATTR(vap, &outp->attr); + } + + CODA_FREE(inp, coda_getattr_size); + return error; +} + +int +venus_setattr(void *mdp, ViceFid *fid, struct vattr *vap, + struct ucred *cred, struct proc *p) +{ + DECL_NO_OUT(coda_setattr); /* sets Isize & Osize */ + ALLOC_NO_OUT(coda_setattr); /* sets inp & outp */ + + /* send the open to venus. */ + INIT_IN(&inp->ih, CODA_SETATTR, cred, p); + inp->VFid = *fid; + CNV_V2VV_ATTR(&inp->attr, vap); + + error = coda_call(mdp, Isize, &Osize, (char *)inp); + + CODA_FREE(inp, coda_setattr_size); + return error; +} + +int +venus_access(void *mdp, ViceFid *fid, int mode, + struct ucred *cred, struct proc *p) +{ + DECL_NO_OUT(coda_access); /* sets Isize & Osize */ + ALLOC_NO_OUT(coda_access); /* sets inp & outp */ + + /* send the open to venus. */ + INIT_IN(&inp->ih, CODA_ACCESS, cred, p); + inp->VFid = *fid; + /* NOTE: + * NetBSD and Venus internals use the "data" in the low 3 bits. + * Hence, the conversion. + */ + inp->flags = mode>>6; + + error = coda_call(mdp, Isize, &Osize, (char *)inp); + + CODA_FREE(inp, coda_access_size); + return error; +} + +int +venus_readlink(void *mdp, ViceFid *fid, + struct ucred *cred, struct proc *p, +/*out*/ char **str, int *len) +{ + DECL(coda_readlink); /* sets Isize & Osize */ + coda_readlink_size += CODA_MAXPATHLEN; + ALLOC(coda_readlink); /* sets inp & outp */ + + /* send the open to venus. */ + INIT_IN(&inp->ih, CODA_READLINK, cred, p); + inp->VFid = *fid; + + Osize += CODA_MAXPATHLEN; + error = coda_call(mdp, Isize, &Osize, (char *)inp); + if (!error) { + CODA_ALLOC(*str, char *, outp->count); + *len = outp->count; + bcopy((char *)outp + (int)outp->data, *str, *len); + } + + CODA_FREE(inp, coda_readlink_size); + return error; +} + +int +venus_fsync(void *mdp, ViceFid *fid, + struct ucred *cred, struct proc *p) +{ + DECL_NO_OUT(coda_fsync); /* sets Isize & Osize */ + ALLOC_NO_OUT(coda_fsync); /* sets inp & outp */ + + /* send the open to venus. */ + INIT_IN(&inp->ih, CODA_FSYNC, cred, p); + inp->VFid = *fid; + + error = coda_call(mdp, Isize, &Osize, (char *)inp); + + CODA_FREE(inp, coda_fsync_size); + return error; +} + +int +venus_lookup(void *mdp, ViceFid *fid, + const char *nm, int len, + struct ucred *cred, struct proc *p, +/*out*/ ViceFid *VFid, int *vtype) +{ + DECL(coda_lookup); /* sets Isize & Osize */ + coda_lookup_size += len + 1; + ALLOC(coda_lookup); /* sets inp & outp */ + + /* send the open to venus. */ + INIT_IN(&inp->ih, CODA_LOOKUP, cred, p); + inp->VFid = *fid; + + /* NOTE: + * Between version 1 and version 2 we have added an extra flag field + * to this structure. But because the string was at the end and because + * of the wierd way we represent strings by having the slot point to + * where the string characters are in the "heap", we can just slip the + * flag parameter in after the string slot pointer and veni that don't + * know better won't see this new flag field ... + * Otherwise we'd need two different venus_lookup functions. + */ + inp->name = Isize; + inp->flags = CLU_CASE_SENSITIVE; /* doesn't really matter for BSD */ + STRCPY(name, nm, len); /* increments Isize */ + + error = coda_call(mdp, Isize, &Osize, (char *)inp); + if (!error) { + *VFid = outp->VFid; + *vtype = outp->vtype; + } + + CODA_FREE(inp, coda_lookup_size); + return error; +} + +int +venus_create(void *mdp, ViceFid *fid, + const char *nm, int len, int exclusive, int mode, struct vattr *va, + struct ucred *cred, struct proc *p, +/*out*/ ViceFid *VFid, struct vattr *attr) +{ + DECL(coda_create); /* sets Isize & Osize */ + coda_create_size += len + 1; + ALLOC(coda_create); /* sets inp & outp */ + + /* send the open to venus. */ + INIT_IN(&inp->ih, CODA_CREATE, cred, p); + inp->VFid = *fid; + inp->excl = exclusive ? C_O_EXCL : 0; + inp->mode = mode; + CNV_V2VV_ATTR(&inp->attr, va); + + inp->name = Isize; + STRCPY(name, nm, len); /* increments Isize */ + + error = coda_call(mdp, Isize, &Osize, (char *)inp); + if (!error) { + *VFid = outp->VFid; + CNV_VV2V_ATTR(attr, &outp->attr); + } + + CODA_FREE(inp, coda_create_size); + return error; +} + +int +venus_remove(void *mdp, ViceFid *fid, + const char *nm, int len, + struct ucred *cred, struct proc *p) +{ + DECL_NO_OUT(coda_remove); /* sets Isize & Osize */ + coda_remove_size += len + 1; + ALLOC_NO_OUT(coda_remove); /* sets inp & outp */ + + /* send the open to venus. */ + INIT_IN(&inp->ih, CODA_REMOVE, cred, p); + inp->VFid = *fid; + + inp->name = Isize; + STRCPY(name, nm, len); /* increments Isize */ + + error = coda_call(mdp, Isize, &Osize, (char *)inp); + + CODA_FREE(inp, coda_remove_size); + return error; +} + +int +venus_link(void *mdp, ViceFid *fid, ViceFid *tfid, + const char *nm, int len, + struct ucred *cred, struct proc *p) +{ + DECL_NO_OUT(coda_link); /* sets Isize & Osize */ + coda_link_size += len + 1; + ALLOC_NO_OUT(coda_link); /* sets inp & outp */ + + /* send the open to venus. */ + INIT_IN(&inp->ih, CODA_LINK, cred, p); + inp->sourceFid = *fid; + inp->destFid = *tfid; + + inp->tname = Isize; + STRCPY(tname, nm, len); /* increments Isize */ + + error = coda_call(mdp, Isize, &Osize, (char *)inp); + + CODA_FREE(inp, coda_link_size); + return error; +} + +int +venus_rename(void *mdp, ViceFid *fid, ViceFid *tfid, + const char *nm, int len, const char *tnm, int tlen, + struct ucred *cred, struct proc *p) +{ + DECL_NO_OUT(coda_rename); /* sets Isize & Osize */ + coda_rename_size += len + 1 + tlen + 1; + ALLOC_NO_OUT(coda_rename); /* sets inp & outp */ + + /* send the open to venus. */ + INIT_IN(&inp->ih, CODA_RENAME, cred, p); + inp->sourceFid = *fid; + inp->destFid = *tfid; + + inp->srcname = Isize; + STRCPY(srcname, nm, len); /* increments Isize */ + + inp->destname = Isize; + STRCPY(destname, tnm, tlen); /* increments Isize */ + + error = coda_call(mdp, Isize, &Osize, (char *)inp); + + CODA_FREE(inp, coda_rename_size); + return error; +} + +int +venus_mkdir(void *mdp, ViceFid *fid, + const char *nm, int len, struct vattr *va, + struct ucred *cred, struct proc *p, +/*out*/ ViceFid *VFid, struct vattr *ova) +{ + DECL(coda_mkdir); /* sets Isize & Osize */ + coda_mkdir_size += len + 1; + ALLOC(coda_mkdir); /* sets inp & outp */ + + /* send the open to venus. */ + INIT_IN(&inp->ih, CODA_MKDIR, cred, p); + inp->VFid = *fid; + CNV_V2VV_ATTR(&inp->attr, va); + + inp->name = Isize; + STRCPY(name, nm, len); /* increments Isize */ + + error = coda_call(mdp, Isize, &Osize, (char *)inp); + if (!error) { + *VFid = outp->VFid; + CNV_VV2V_ATTR(ova, &outp->attr); + } + + CODA_FREE(inp, coda_mkdir_size); + return error; +} + +int +venus_rmdir(void *mdp, ViceFid *fid, + const char *nm, int len, + struct ucred *cred, struct proc *p) +{ + DECL_NO_OUT(coda_rmdir); /* sets Isize & Osize */ + coda_rmdir_size += len + 1; + ALLOC_NO_OUT(coda_rmdir); /* sets inp & outp */ + + /* send the open to venus. */ + INIT_IN(&inp->ih, CODA_RMDIR, cred, p); + inp->VFid = *fid; + + inp->name = Isize; + STRCPY(name, nm, len); /* increments Isize */ + + error = coda_call(mdp, Isize, &Osize, (char *)inp); + + CODA_FREE(inp, coda_rmdir_size); + return error; +} + +int +venus_symlink(void *mdp, ViceFid *fid, + const char *lnm, int llen, const char *nm, int len, struct vattr *va, + struct ucred *cred, struct proc *p) +{ + DECL_NO_OUT(coda_symlink); /* sets Isize & Osize */ + coda_symlink_size += llen + 1 + len + 1; + ALLOC_NO_OUT(coda_symlink); /* sets inp & outp */ + + /* send the open to venus. */ + INIT_IN(&inp->ih, CODA_SYMLINK, cred, p); + inp->VFid = *fid; + CNV_V2VV_ATTR(&inp->attr, va); + + inp->srcname = Isize; + STRCPY(srcname, lnm, llen); /* increments Isize */ + + inp->tname = Isize; + STRCPY(tname, nm, len); /* increments Isize */ + + error = coda_call(mdp, Isize, &Osize, (char *)inp); + + CODA_FREE(inp, coda_symlink_size); + return error; +} + +int +venus_readdir(void *mdp, ViceFid *fid, + int count, int offset, + struct ucred *cred, struct proc *p, +/*out*/ char *buffer, int *len) +{ + DECL(coda_readdir); /* sets Isize & Osize */ + coda_readdir_size = VC_MAXMSGSIZE; + ALLOC(coda_readdir); /* sets inp & outp */ + + /* send the open to venus. */ + INIT_IN(&inp->ih, CODA_READDIR, cred, p); + inp->VFid = *fid; + inp->count = count; + inp->offset = offset; + + Osize = VC_MAXMSGSIZE; + error = coda_call(mdp, Isize, &Osize, (char *)inp); + if (!error) { + bcopy((char *)outp + (int)outp->data, buffer, outp->size); + *len = outp->size; + } + + CODA_FREE(inp, coda_readdir_size); + return error; +} + +int +venus_fhtovp(void *mdp, ViceFid *fid, + struct ucred *cred, struct proc *p, +/*out*/ ViceFid *VFid, int *vtype) +{ + DECL(coda_vget); /* sets Isize & Osize */ + ALLOC(coda_vget); /* sets inp & outp */ + + /* Send the open to Venus. */ + INIT_IN(&inp->ih, CODA_VGET, cred, p); + inp->VFid = *fid; + + error = coda_call(mdp, Isize, &Osize, (char *)inp); + if (!error) { + *VFid = outp->VFid; + *vtype = outp->vtype; + } + + CODA_FREE(inp, coda_vget_size); + return error; +} diff --git a/sys/fs/coda/coda_venus.h b/sys/fs/coda/coda_venus.h new file mode 100644 index 0000000..6fdd15c --- /dev/null +++ b/sys/fs/coda/coda_venus.h @@ -0,0 +1,133 @@ +/* + * + * Coda: an Experimental Distributed File System + * Release 3.1 + * + * Copyright (c) 1987-1998 Carnegie Mellon University + * All Rights Reserved + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation, and + * that credit is given to Carnegie Mellon University in all documents + * and publicity pertaining to direct or indirect use of this code or its + * derivatives. + * + * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS, + * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS + * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON + * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER + * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF + * ANY DERIVATIVE WORK. + * + * Carnegie Mellon encourages users of this software to return any + * improvements or extensions that they make, and to grant Carnegie + * Mellon the rights to redistribute these changes without encumbrance. + * + * @(#) src/sys/coda/coda_venus.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ + * $Id: coda_venus.h,v 1.2 1998/09/02 19:09:53 rvb Exp $ + * + */ + +int +venus_root(void *mdp, + struct ucred *cred, struct proc *p, +/*out*/ ViceFid *VFid); + +int +venus_open(void *mdp, ViceFid *fid, int flag, + struct ucred *cred, struct proc *p, +/*out*/ dev_t *dev, ino_t *inode); + +int +venus_close(void *mdp, ViceFid *fid, int flag, + struct ucred *cred, struct proc *p); + +void +venus_read(void); + +void +venus_write(void); + +int +venus_ioctl(void *mdp, ViceFid *fid, + int com, int flag, caddr_t data, + struct ucred *cred, struct proc *p); + +int +venus_getattr(void *mdp, ViceFid *fid, + struct ucred *cred, struct proc *p, +/*out*/ struct vattr *vap); + +int +venus_setattr(void *mdp, ViceFid *fid, struct vattr *vap, + struct ucred *cred, struct proc *p); + +int +venus_access(void *mdp, ViceFid *fid, int mode, + struct ucred *cred, struct proc *p); + +int +venus_readlink(void *mdp, ViceFid *fid, + struct ucred *cred, struct proc *p, +/*out*/ char **str, int *len); + +int +venus_fsync(void *mdp, ViceFid *fid, + struct ucred *cred, struct proc *p); + +int +venus_lookup(void *mdp, ViceFid *fid, + const char *nm, int len, + struct ucred *cred, struct proc *p, +/*out*/ ViceFid *VFid, int *vtype); + +int +venus_create(void *mdp, ViceFid *fid, + const char *nm, int len, int exclusive, int mode, struct vattr *va, + struct ucred *cred, struct proc *p, +/*out*/ ViceFid *VFid, struct vattr *attr); + +int +venus_remove(void *mdp, ViceFid *fid, + const char *nm, int len, + struct ucred *cred, struct proc *p); + +int +venus_link(void *mdp, ViceFid *fid, ViceFid *tfid, + const char *nm, int len, + struct ucred *cred, struct proc *p); + +int +venus_rename(void *mdp, ViceFid *fid, ViceFid *tfid, + const char *nm, int len, const char *tnm, int tlen, + struct ucred *cred, struct proc *p); + +int +venus_mkdir(void *mdp, ViceFid *fid, + const char *nm, int len, struct vattr *va, + struct ucred *cred, struct proc *p, +/*out*/ ViceFid *VFid, struct vattr *ova); + +int +venus_rmdir(void *mdp, ViceFid *fid, + const char *nm, int len, + struct ucred *cred, struct proc *p); + +int +venus_symlink(void *mdp, ViceFid *fid, + const char *lnm, int llen, const char *nm, int len, struct vattr *va, + struct ucred *cred, struct proc *p); + +int +venus_readdir(void *mdp, ViceFid *fid, + int count, int offset, + struct ucred *cred, struct proc *p, +/*out*/ char *buffer, int *len); + +int +venus_fhtovp(void *mdp, ViceFid *fid, + struct ucred *cred, struct proc *p, +/*out*/ ViceFid *VFid, int *vtype); diff --git a/sys/fs/coda/coda_vfsops.c b/sys/fs/coda/coda_vfsops.c new file mode 100644 index 0000000..8f6befe --- /dev/null +++ b/sys/fs/coda/coda_vfsops.c @@ -0,0 +1,770 @@ +/* + * + * Coda: an Experimental Distributed File System + * Release 3.1 + * + * Copyright (c) 1987-1998 Carnegie Mellon University + * All Rights Reserved + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation, and + * that credit is given to Carnegie Mellon University in all documents + * and publicity pertaining to direct or indirect use of this code or its + * derivatives. + * + * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS, + * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS + * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON + * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER + * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF + * ANY DERIVATIVE WORK. + * + * Carnegie Mellon encourages users of this software to return any + * improvements or extensions that they make, and to grant Carnegie + * Mellon the rights to redistribute these changes without encumbrance. + * + * @(#) src/sys/cfs/coda_vfsops.c,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ + * $Id: coda_vfsops.c,v 1.10 1998/12/04 22:54:43 archie Exp $ + * + */ + +/* + * Mach Operating System + * Copyright (c) 1989 Carnegie-Mellon University + * All rights reserved. The CMU software License Agreement specifies + * the terms and conditions for use and redistribution. + */ + +/* + * This code was written for the Coda file system at Carnegie Mellon + * University. Contributers include David Steere, James Kistler, and + * M. Satyanarayanan. + */ + +/* + * HISTORY + * $Log: coda_vfsops.c,v $ + * Revision 1.10 1998/12/04 22:54:43 archie + * Examine all occurrences of sprintf(), strcat(), and str[n]cpy() + * for possible buffer overflow problems. Replaced most sprintf()'s + * with snprintf(); for others cases, added terminating NUL bytes where + * appropriate, replaced constants like "16" with sizeof(), etc. + * + * These changes include several bug fixes, but most changes are for + * maintainability's sake. Any instance where it wasn't "immediately + * obvious" that a buffer overflow could not occur was made safer. + * + * Reviewed by: Bruce Evans <bde@zeta.org.au> + * Reviewed by: Matthew Dillon <dillon@apollo.backplane.com> + * Reviewed by: Mike Spengler <mks@networkcs.com> + * + * Revision 1.9 1998/11/16 19:48:26 rvb + * A few bug fixes for Robert Watson + * + * Revision 1.8 1998/11/03 08:55:06 peter + * Support KLD. We register and unregister two modules. "coda" (the vfs) + * via VFS_SET(), and "codadev" for the cdevsw entry. From kldstat -v: + * 3 1 0xf02c5000 115d8 coda.ko + * Contains modules: + * Id Name + * 2 codadev + * 3 coda + * + * Revision 1.7 1998/09/29 20:19:45 rvb + * Fixes for lkm: + * 1. use VFS_LKM vs ACTUALLY_LKM_NOT_KERNEL + * 2. don't pass -DCODA to lkm build + * + * Revision 1.6 1998/09/25 17:38:32 rvb + * Put "stray" printouts under DIAGNOSTIC. Make everything build + * with DEBUG on. Add support for lkm. (The macro's don't work + * for me; for a good chuckle look at the end of coda_fbsd.c.) + * + * Revision 1.5 1998/09/13 13:57:59 rvb + * Finish conversion of cfs -> coda + * + * Revision 1.4 1998/09/11 18:50:17 rvb + * All the references to cfs, in symbols, structs, and strings + * have been changed to coda. (Same for CFS.) + * + * Revision 1.2 1998/09/02 19:09:53 rvb + * Pass2 complete + * + * Revision 1.1.1.1 1998/08/29 21:14:52 rvb + * Very Preliminary Coda + * + * Revision 1.11 1998/08/28 18:12:22 rvb + * Now it also works on FreeBSD -current. This code will be + * committed to the FreeBSD -current and NetBSD -current + * trees. It will then be tailored to the particular platform + * by flushing conditional code. + * + * Revision 1.10 1998/08/18 17:05:19 rvb + * Don't use __RCSID now + * + * Revision 1.9 1998/08/18 16:31:44 rvb + * Sync the code for NetBSD -current; test on 1.3 later + * + * Revision 1.8 98/02/24 22:22:48 rvb + * Fixes up mainly to flush iopen and friends + * + * Revision 1.7 98/01/23 11:53:45 rvb + * Bring RVB_CODA1_1 to HEAD + * + * Revision 1.6.2.6 98/01/23 11:21:07 rvb + * Sync with 2.2.5 + * + * Revision 1.6.2.5 98/01/22 13:05:33 rvb + * Move make_coda_node ctlfid later so vfsp is known + * + * Revision 1.6.2.4 97/12/19 14:26:05 rvb + * session id + * + * Revision 1.6.2.3 97/12/16 12:40:11 rvb + * Sync with 1.3 + * + * Revision 1.6.2.2 97/12/10 11:40:25 rvb + * No more ody + * + * Revision 1.6.2.1 97/12/06 17:41:24 rvb + * Sync with peters coda.h + * + * Revision 1.6 97/12/05 10:39:21 rvb + * Read CHANGES + * + * Revision 1.5.14.8 97/11/24 15:44:46 rvb + * Final cfs_venus.c w/o macros, but one locking bug + * + * Revision 1.5.14.7 97/11/21 13:22:03 rvb + * Catch a few coda_calls in coda_vfsops.c + * + * Revision 1.5.14.6 97/11/20 11:46:48 rvb + * Capture current cfs_venus + * + * Revision 1.5.14.5 97/11/18 10:27:17 rvb + * cfs_nbsd.c is DEAD!!!; integrated into cfs_vf/vnops.c + * cfs_nb_foo and cfs_foo are joined + * + * Revision 1.5.14.4 97/11/13 22:03:01 rvb + * pass2 cfs_NetBSD.h mt + * + * Revision 1.5.14.3 97/11/12 12:09:40 rvb + * reorg pass1 + * + * Revision 1.5.14.2 97/10/29 16:06:28 rvb + * Kill DYING + * + * Revision 1.5.14.1 1997/10/28 23:10:17 rvb + * >64Meg; venus can be killed! + * + * Revision 1.5 1997/01/13 17:11:07 bnoble + * Coda statfs needs to return something other than -1 for blocks avail. and + * files available for wabi (and other windowsish) programs to install + * there correctly. + * + * Revision 1.4 1996/12/12 22:11:00 bnoble + * Fixed the "downcall invokes venus operation" deadlock in all known cases. + * There may be more + * + * Revision 1.3 1996/11/08 18:06:12 bnoble + * Minor changes in vnode operation signature, VOP_UPDATE signature, and + * some newly defined bits in the include files. + * + * Revision 1.2 1996/01/02 16:57:04 bnoble + * Added support for Coda MiniCache and raw inode calls (final commit) + * + * Revision 1.1.2.1 1995/12/20 01:57:32 bnoble + * Added CODA-specific files + * + * Revision 3.1.1.1 1995/03/04 19:08:02 bnoble + * Branch for NetBSD port revisions + * + * Revision 3.1 1995/03/04 19:08:01 bnoble + * Bump to major revision 3 to prepare for NetBSD port + * + * Revision 2.4 1995/02/17 16:25:22 dcs + * These versions represent several changes: + * 1. Allow venus to restart even if outstanding references exist. + * 2. Have only one ctlvp per client, as opposed to one per mounted cfs device.d + * 3. Allow ody_expand to return many members, not just one. + * + * Revision 2.3 94/10/14 09:58:21 dcs + * Made changes 'cause sun4s have braindead compilers + * + * Revision 2.2 94/10/12 16:46:33 dcs + * Cleaned kernel/venus interface by removing XDR junk, plus + * so cleanup to allow this code to be more easily ported. + * + * Revision 1.3 93/05/28 16:24:29 bnoble + * *** empty log message *** + * + * Revision 1.2 92/10/27 17:58:24 lily + * merge kernel/latest and alpha/src/cfs + * + * Revision 2.3 92/09/30 14:16:32 mja + * Added call to coda_flush to coda_unmount. + * [90/12/15 dcs] + * + * Added contributors blurb. + * [90/12/13 jjk] + * + * Revision 2.2 90/07/05 11:26:40 mrt + * Created for the Coda File System. + * [90/05/23 dcs] + * + * Revision 1.3 90/05/31 17:01:42 dcs + * Prepare for merge with facilities kernel. + * + * + */ + +#include <vcoda.h> + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/proc.h> +#include <sys/malloc.h> +#include <sys/conf.h> +#include <sys/namei.h> +#include <sys/mount.h> +#include <sys/select.h> + +#include <coda/coda.h> +#include <coda/cnode.h> +#include <coda/coda_vfsops.h> +#include <coda/coda_venus.h> +#include <coda/coda_subr.h> +#include <coda/coda_opstats.h> + +#include <miscfs/specfs/specdev.h> + +MALLOC_DEFINE(M_CODA, "CODA storage", "Various Coda Structures"); + +int codadebug = 0; +int coda_vfsop_print_entry = 0; +#define ENTRY if(coda_vfsop_print_entry) myprintf(("Entered %s\n",__FUNCTION__)) + +struct vnode *coda_ctlvp; +struct coda_mntinfo coda_mnttbl[NVCODA]; /* indexed by minor device number */ + +/* structure to keep statistics of internally generated/satisfied calls */ + +struct coda_op_stats coda_vfsopstats[CODA_VFSOPS_SIZE]; + +#define MARK_ENTRY(op) (coda_vfsopstats[op].entries++) +#define MARK_INT_SAT(op) (coda_vfsopstats[op].sat_intrn++) +#define MARK_INT_FAIL(op) (coda_vfsopstats[op].unsat_intrn++) +#define MRAK_INT_GEN(op) (coda_vfsopstats[op].gen_intrn++) + +extern int coda_nc_initialized; /* Set if cache has been initialized */ +extern int vc_nb_open __P((dev_t, int, int, struct proc *)); + +int +coda_vfsopstats_init(void) +{ + register int i; + + for (i=0;i<CODA_VFSOPS_SIZE;i++) { + coda_vfsopstats[i].opcode = i; + coda_vfsopstats[i].entries = 0; + coda_vfsopstats[i].sat_intrn = 0; + coda_vfsopstats[i].unsat_intrn = 0; + coda_vfsopstats[i].gen_intrn = 0; + } + + return 0; +} + +/* + * cfs mount vfsop + * Set up mount info record and attach it to vfs struct. + */ +/*ARGSUSED*/ +int +coda_mount(vfsp, path, data, ndp, p) + struct mount *vfsp; /* Allocated and initialized by mount(2) */ + char *path; /* path covered: ignored by the fs-layer */ + caddr_t data; /* Need to define a data type for this in netbsd? */ + struct nameidata *ndp; /* Clobber this to lookup the device name */ + struct proc *p; /* The ever-famous proc pointer */ +{ + struct vnode *dvp; + struct cnode *cp; + dev_t dev; + struct coda_mntinfo *mi; + struct vnode *rootvp; + ViceFid rootfid; + ViceFid ctlfid; + int error; + + ENTRY; + + coda_vfsopstats_init(); + coda_vnodeopstats_init(); + + MARK_ENTRY(CODA_MOUNT_STATS); + if (CODA_MOUNTED(vfsp)) { + MARK_INT_FAIL(CODA_MOUNT_STATS); + return(EBUSY); + } + + /* Validate mount device. Similar to getmdev(). */ + + NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, data, p); + error = namei(ndp); + dvp = ndp->ni_vp; + + if (error) { + MARK_INT_FAIL(CODA_MOUNT_STATS); + return (error); + } + if (dvp->v_type != VCHR) { + MARK_INT_FAIL(CODA_MOUNT_STATS); + vrele(dvp); + return(ENXIO); + } + dev = dvp->v_specinfo->si_rdev; + vrele(dvp); + if (major(dev) >= nchrdev || major(dev) < 0) { + MARK_INT_FAIL(CODA_MOUNT_STATS); + return(ENXIO); + } + + /* + * See if the device table matches our expectations. + */ + if (cdevsw[major(dev)]->d_open != vc_nb_open) + { + MARK_INT_FAIL(CODA_MOUNT_STATS); + return(ENXIO); + } + + if (minor(dev) >= NVCODA || minor(dev) < 0) { + MARK_INT_FAIL(CODA_MOUNT_STATS); + return(ENXIO); + } + + /* + * Initialize the mount record and link it to the vfs struct + */ + mi = &coda_mnttbl[minor(dev)]; + + if (!VC_OPEN(&mi->mi_vcomm)) { + MARK_INT_FAIL(CODA_MOUNT_STATS); + return(ENODEV); + } + + /* No initialization (here) of mi_vcomm! */ + vfsp->mnt_data = (qaddr_t)mi; + vfs_getnewfsid (vfsp); + + mi->mi_vfsp = vfsp; + + /* + * Make a root vnode to placate the Vnode interface, but don't + * actually make the CODA_ROOT call to venus until the first call + * to coda_root in case a server is down while venus is starting. + */ + rootfid.Volume = 0; + rootfid.Vnode = 0; + rootfid.Unique = 0; + cp = make_coda_node(&rootfid, vfsp, VDIR); + rootvp = CTOV(cp); + rootvp->v_flag |= VROOT; + + ctlfid.Volume = CTL_VOL; + ctlfid.Vnode = CTL_VNO; + ctlfid.Unique = CTL_UNI; +/* cp = make_coda_node(&ctlfid, vfsp, VCHR); + The above code seems to cause a loop in the cnode links. + I don't totally understand when it happens, it is caught + when closing down the system. + */ + cp = make_coda_node(&ctlfid, 0, VCHR); + + coda_ctlvp = CTOV(cp); + + /* Add vfs and rootvp to chain of vfs hanging off mntinfo */ + mi->mi_vfsp = vfsp; + mi->mi_rootvp = rootvp; + + /* set filesystem block size */ + vfsp->mnt_stat.f_bsize = 8192; /* XXX -JJK */ + + /* Set f_iosize. XXX -- inamura@isl.ntt.co.jp. + For vnode_pager_haspage() references. The value should be obtained + from underlying UFS. */ + /* Checked UFS. iosize is set as 8192 */ + vfsp->mnt_stat.f_iosize = 8192; + + /* error is currently guaranteed to be zero, but in case some + code changes... */ + CODADEBUG(1, + myprintf(("coda_mount returned %d\n",error));); + if (error) + MARK_INT_FAIL(CODA_MOUNT_STATS); + else + MARK_INT_SAT(CODA_MOUNT_STATS); + + return(error); +} + +int +coda_start(vfsp, flags, p) + struct mount *vfsp; + int flags; + struct proc *p; +{ + ENTRY; + return (0); +} + +int +coda_unmount(vfsp, mntflags, p) + struct mount *vfsp; + int mntflags; + struct proc *p; +{ + struct coda_mntinfo *mi = vftomi(vfsp); + int active, error = 0; + + ENTRY; + MARK_ENTRY(CODA_UMOUNT_STATS); + if (!CODA_MOUNTED(vfsp)) { + MARK_INT_FAIL(CODA_UMOUNT_STATS); + return(EINVAL); + } + + if (mi->mi_vfsp == vfsp) { /* We found the victim */ + if (!IS_UNMOUNTING(VTOC(mi->mi_rootvp))) + return (EBUSY); /* Venus is still running */ + +#ifdef DEBUG + printf("coda_unmount: ROOT: vp %p, cp %p\n", mi->mi_rootvp, VTOC(mi->mi_rootvp)); +#endif + vrele(mi->mi_rootvp); + + active = coda_kill(vfsp, NOT_DOWNCALL); + mi->mi_rootvp->v_flag &= ~VROOT; + error = vflush(mi->mi_vfsp, NULLVP, FORCECLOSE); + printf("coda_unmount: active = %d, vflush active %d\n", active, error); + error = 0; + /* I'm going to take this out to allow lookups to go through. I'm + * not sure it's important anyway. -- DCS 2/2/94 + */ + /* vfsp->VFS_DATA = NULL; */ + + /* No more vfsp's to hold onto */ + mi->mi_vfsp = NULL; + mi->mi_rootvp = NULL; + + if (error) + MARK_INT_FAIL(CODA_UMOUNT_STATS); + else + MARK_INT_SAT(CODA_UMOUNT_STATS); + + return(error); + } + return (EINVAL); +} + +/* + * find root of cfs + */ +int +coda_root(vfsp, vpp) + struct mount *vfsp; + struct vnode **vpp; +{ + struct coda_mntinfo *mi = vftomi(vfsp); + struct vnode **result; + int error; + struct proc *p = curproc; /* XXX - bnoble */ + ViceFid VFid; + + ENTRY; + MARK_ENTRY(CODA_ROOT_STATS); + result = NULL; + + if (vfsp == mi->mi_vfsp) { + if ((VTOC(mi->mi_rootvp)->c_fid.Volume != 0) || + (VTOC(mi->mi_rootvp)->c_fid.Vnode != 0) || + (VTOC(mi->mi_rootvp)->c_fid.Unique != 0)) + { /* Found valid root. */ + *vpp = mi->mi_rootvp; + /* On Mach, this is vref. On NetBSD, VOP_LOCK */ +#if 1 + vref(*vpp); + vn_lock(*vpp, LK_EXCLUSIVE, p); +#else + vget(*vpp, LK_EXCLUSIVE, p); +#endif + MARK_INT_SAT(CODA_ROOT_STATS); + return(0); + } + } + + error = venus_root(vftomi(vfsp), p->p_cred->pc_ucred, p, &VFid); + + if (!error) { + /* + * Save the new rootfid in the cnode, and rehash the cnode into the + * cnode hash with the new fid key. + */ + coda_unsave(VTOC(mi->mi_rootvp)); + VTOC(mi->mi_rootvp)->c_fid = VFid; + coda_save(VTOC(mi->mi_rootvp)); + + *vpp = mi->mi_rootvp; +#if 1 + vref(*vpp); + vn_lock(*vpp, LK_EXCLUSIVE, p); +#else + vget(*vpp, LK_EXCLUSIVE, p); +#endif + + MARK_INT_SAT(CODA_ROOT_STATS); + goto exit; + } else if (error == ENODEV || error == EINTR) { + /* Gross hack here! */ + /* + * If Venus fails to respond to the CODA_ROOT call, coda_call returns + * ENODEV. Return the uninitialized root vnode to allow vfs + * operations such as unmount to continue. Without this hack, + * there is no way to do an unmount if Venus dies before a + * successful CODA_ROOT call is done. All vnode operations + * will fail. + */ + *vpp = mi->mi_rootvp; +#if 1 + vref(*vpp); + vn_lock(*vpp, LK_EXCLUSIVE, p); +#else + vget(*vpp, LK_EXCLUSIVE, p); +#endif + + MARK_INT_FAIL(CODA_ROOT_STATS); + error = 0; + goto exit; + } else { + CODADEBUG( CODA_ROOT, myprintf(("error %d in CODA_ROOT\n", error)); ); + MARK_INT_FAIL(CODA_ROOT_STATS); + + goto exit; + } + + exit: + return(error); +} + +int +coda_quotactl(vfsp, cmd, uid, arg, p) + struct mount *vfsp; + int cmd; + uid_t uid; + caddr_t arg; + struct proc *p; +{ + ENTRY; + return (EOPNOTSUPP); +} + +/* + * Get file system statistics. + */ +int +coda_nb_statfs(vfsp, sbp, p) + register struct mount *vfsp; + struct statfs *sbp; + struct proc *p; +{ + ENTRY; +/* MARK_ENTRY(CODA_STATFS_STATS); */ + if (!CODA_MOUNTED(vfsp)) { +/* MARK_INT_FAIL(CODA_STATFS_STATS);*/ + return(EINVAL); + } + + bzero(sbp, sizeof(struct statfs)); + /* XXX - what to do about f_flags, others? --bnoble */ + /* Below This is what AFS does + #define NB_SFS_SIZ 0x895440 + */ + /* Note: Normal fs's have a bsize of 0x400 == 1024 */ + sbp->f_type = vfsp->mnt_vfc->vfc_typenum; + sbp->f_bsize = 8192; /* XXX */ + sbp->f_iosize = 8192; /* XXX */ +#define NB_SFS_SIZ 0x8AB75D + sbp->f_blocks = NB_SFS_SIZ; + sbp->f_bfree = NB_SFS_SIZ; + sbp->f_bavail = NB_SFS_SIZ; + sbp->f_files = NB_SFS_SIZ; + sbp->f_ffree = NB_SFS_SIZ; + bcopy((caddr_t)&(vfsp->mnt_stat.f_fsid), (caddr_t)&(sbp->f_fsid), sizeof (fsid_t)); + snprintf(sbp->f_mntonname, sizeof(sbp->f_mntonname), "/coda"); + snprintf(sbp->f_mntfromname, sizeof(sbp->f_mntfromname), "CODA"); +/* MARK_INT_SAT(CODA_STATFS_STATS); */ + return(0); +} + +/* + * Flush any pending I/O. + */ +int +coda_sync(vfsp, waitfor, cred, p) + struct mount *vfsp; + int waitfor; + struct ucred *cred; + struct proc *p; +{ + ENTRY; + MARK_ENTRY(CODA_SYNC_STATS); + MARK_INT_SAT(CODA_SYNC_STATS); + return(0); +} + +int +coda_vget(vfsp, ino, vpp) + struct mount *vfsp; + ino_t ino; + struct vnode **vpp; +{ + ENTRY; + return (EOPNOTSUPP); +} + +/* + * fhtovp is now what vget used to be in 4.3-derived systems. For + * some silly reason, vget is now keyed by a 32 bit ino_t, rather than + * a type-specific fid. + */ +int +coda_fhtovp(vfsp, fhp, nam, vpp, exflagsp, creadanonp) + register struct mount *vfsp; + struct fid *fhp; + struct mbuf *nam; + struct vnode **vpp; + int *exflagsp; + struct ucred **creadanonp; +{ + struct cfid *cfid = (struct cfid *)fhp; + struct cnode *cp = 0; + int error; + struct proc *p = curproc; /* XXX -mach */ + ViceFid VFid; + int vtype; + + ENTRY; + + MARK_ENTRY(CODA_VGET_STATS); + /* Check for vget of control object. */ + if (IS_CTL_FID(&cfid->cfid_fid)) { + *vpp = coda_ctlvp; + vref(coda_ctlvp); + MARK_INT_SAT(CODA_VGET_STATS); + return(0); + } + + error = venus_fhtovp(vftomi(vfsp), &cfid->cfid_fid, p->p_cred->pc_ucred, p, &VFid, &vtype); + + if (error) { + CODADEBUG(CODA_VGET, myprintf(("vget error %d\n",error));) + *vpp = (struct vnode *)0; + } else { + CODADEBUG(CODA_VGET, + myprintf(("vget: vol %lx vno %lx uni %lx type %d result %d\n", + VFid.Volume, VFid.Vnode, VFid.Unique, vtype, error)); ) + + cp = make_coda_node(&VFid, vfsp, vtype); + *vpp = CTOV(cp); + } + return(error); +} + +int +coda_vptofh(vnp, fidp) + struct vnode *vnp; + struct fid *fidp; +{ + ENTRY; + return (EOPNOTSUPP); +} + +int +coda_init(struct vfsconf *vfsp) +{ + ENTRY; + return 0; +} + +/* + * To allow for greater ease of use, some vnodes may be orphaned when + * Venus dies. Certain operations should still be allowed to go + * through, but without propagating ophan-ness. So this function will + * get a new vnode for the file from the current run of Venus. */ + +int +getNewVnode(vpp) + struct vnode **vpp; +{ + struct cfid cfid; + struct coda_mntinfo *mi = vftomi((*vpp)->v_mount); + + ENTRY; + + cfid.cfid_len = (short)sizeof(ViceFid); + cfid.cfid_fid = VTOC(*vpp)->c_fid; /* Structure assignment. */ + /* XXX ? */ + + /* We're guessing that if set, the 1st element on the list is a + * valid vnode to use. If not, return ENODEV as venus is dead. + */ + if (mi->mi_vfsp == NULL) + return ENODEV; + + return coda_fhtovp(mi->mi_vfsp, (struct fid*)&cfid, NULL, vpp, + NULL, NULL); +} + +#include <ufs/ufs/quota.h> +#include <ufs/ufs/ufsmount.h> +/* get the mount structure corresponding to a given device. Assume + * device corresponds to a UFS. Return NULL if no device is found. + */ +struct mount *devtomp(dev) + dev_t dev; +{ + struct mount *mp, *nmp; + + for (mp = mountlist.cqh_first; mp != (void*)&mountlist; mp = nmp) { + nmp = mp->mnt_list.cqe_next; + if (((VFSTOUFS(mp))->um_dev == (dev_t) dev)) { + /* mount corresponds to UFS and the device matches one we want */ + return(mp); + } + } + /* mount structure wasn't found */ + return(NULL); +} + +struct vfsops coda_vfsops = { + coda_mount, + coda_start, + coda_unmount, + coda_root, + coda_quotactl, + coda_nb_statfs, + coda_sync, + coda_vget, + (int (*) (struct mount *, struct fid *, struct sockaddr *, struct vnode **, + int *, struct ucred **)) + eopnotsupp, + (int (*) (struct vnode *, struct fid *)) eopnotsupp, + coda_init, +}; + +VFS_SET(coda_vfsops, coda, VFCF_NETWORK); diff --git a/sys/fs/coda/coda_vfsops.h b/sys/fs/coda/coda_vfsops.h new file mode 100644 index 0000000..ef23c3f --- /dev/null +++ b/sys/fs/coda/coda_vfsops.h @@ -0,0 +1,63 @@ +/* + * + * Coda: an Experimental Distributed File System + * Release 3.1 + * + * Copyright (c) 1987-1998 Carnegie Mellon University + * All Rights Reserved + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation, and + * that credit is given to Carnegie Mellon University in all documents + * and publicity pertaining to direct or indirect use of this code or its + * derivatives. + * + * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS, + * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS + * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON + * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER + * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF + * ANY DERIVATIVE WORK. + * + * Carnegie Mellon encourages users of this software to return any + * improvements or extensions that they make, and to grant Carnegie + * Mellon the rights to redistribute these changes without encumbrance. + * + * @(#) src/sys/cfs/coda_vfsops.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ + * $Id: coda_vfsops.h,v 1.2 1998/09/02 19:09:53 rvb Exp $ + * + */ + +/* + * cfid structure: + * This overlays the fid structure (see vfs.h) + * Only used below and will probably go away. + */ + +struct cfid { + u_short cfid_len; + u_short padding; + ViceFid cfid_fid; +}; + +struct mount; + +int coda_vfsopstats_init(void); +int coda_mount(struct mount *, char *, caddr_t, struct nameidata *, + struct proc *); +int coda_start(struct mount *, int, struct proc *); +int coda_unmount(struct mount *, int, struct proc *); +int coda_root(struct mount *, struct vnode **); +int coda_quotactl(struct mount *, int, uid_t, caddr_t, struct proc *); +int coda_nb_statfs(struct mount *, struct statfs *, struct proc *); +int coda_sync(struct mount *, int, struct ucred *, struct proc *); +int coda_vget(struct mount *, ino_t, struct vnode **); +int coda_fhtovp(struct mount *, struct fid *, struct mbuf *, struct vnode **, + int *, struct ucred **); +int coda_vptofh(struct vnode *, struct fid *); +int coda_init(struct vfsconf *vfsp); + +int getNewVnode(struct vnode **vpp); diff --git a/sys/fs/coda/coda_vnops.c b/sys/fs/coda/coda_vnops.c new file mode 100644 index 0000000..efa0dda --- /dev/null +++ b/sys/fs/coda/coda_vnops.c @@ -0,0 +1,2233 @@ +/* + * + * Coda: an Experimental Distributed File System + * Release 3.1 + * + * Copyright (c) 1987-1998 Carnegie Mellon University + * All Rights Reserved + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation, and + * that credit is given to Carnegie Mellon University in all documents + * and publicity pertaining to direct or indirect use of this code or its + * derivatives. + * + * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS, + * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS + * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON + * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER + * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF + * ANY DERIVATIVE WORK. + * + * Carnegie Mellon encourages users of this software to return any + * improvements or extensions that they make, and to grant Carnegie + * Mellon the rights to redistribute these changes without encumbrance. + * + * @(#) src/sys/coda/coda_vnops.c,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ + * $Id: coda_vnops.c,v 1.14 1999/01/27 20:09:17 dillon Exp $ + * + */ + +/* + * Mach Operating System + * Copyright (c) 1990 Carnegie-Mellon University + * Copyright (c) 1989 Carnegie-Mellon University + * All rights reserved. The CMU software License Agreement specifies + * the terms and conditions for use and redistribution. + */ + +/* + * This code was written for the Coda file system at Carnegie Mellon + * University. Contributers include David Steere, James Kistler, and + * M. Satyanarayanan. + */ + +/* + * HISTORY + * $Log: coda_vnops.c,v $ + * Revision 1.14 1999/01/27 20:09:17 dillon + * Fix warnings preparing for -Wall -Wcast-qual + * + * Also disable one usb module in LINT due to fatal compilation errors, + * temporary. + * + * Revision 1.13 1999/01/20 14:49:05 eivind + * Add 'options DEBUG_LOCKS', which stores extra information in struct + * lock, and add some macros and function parameters to make sure that + * the information get to the point where it can be put in the lock + * structure. + * + * While I'm here, add DEBUG_VFS_LOCKS to LINT. + * + * Revision 1.12 1999/01/07 16:14:12 bde + * Don't pass unused unused timestamp args to UFS_UPDATE() or waste + * time initializing them. This almost finishes centralizing (in-core) + * timestamp updates in ufs_itimes(). + * + * Revision 1.11 1999/01/05 18:49:51 eivind + * Remove the 'waslocked' parameter to vfs_object_create(). + * + * Revision 1.10 1998/12/04 18:44:21 rvb + * Don't print diagnostic anymore + * + * Revision 1.9 1998/11/16 19:48:26 rvb + * A few bug fixes for Robert Watson + * + * Revision 1.8 1998/10/28 20:31:13 rvb + * Change the way unmounting happens to guarantee that the + * client programs are allowed to finish up (coda_call is + * forced to complete) and release their locks. Thus there + * is a reasonable chance that the vflush implicit in the + * unmount will not get hung on held locks. + * + * Revision 1.7 1998/10/25 17:44:41 phk + * Nitpicking and dusting performed on a train. Removes trivial warnings + * about unused variables, labels and other lint. + * + * Revision 1.6 1998/09/28 20:52:58 rvb + * Cleanup and fix THE bug + * + * Revision 1.5 1998/09/25 17:38:32 rvb + * Put "stray" printouts under DIAGNOSTIC. Make everything build + * with DEBUG on. Add support for lkm. (The macro's don't work + * for me; for a good chuckle look at the end of coda_fbsd.c.) + * + * Revision 1.4 1998/09/13 13:57:59 rvb + * Finish conversion of cfs -> coda + * + * Revision 1.3 1998/09/11 18:50:17 rvb + * All the references to cfs, in symbols, structs, and strings + * have been changed to coda. (Same for CFS.) + * + * Revision 1.2 1998/09/02 19:09:53 rvb + * Pass2 complete + * + * Revision 1.1.1.1 1998/08/29 21:14:52 rvb + * Very Preliminary Coda + * + * Revision 1.12 1998/08/28 18:28:00 rvb + * NetBSD -current is stricter! + * + * Revision 1.11 1998/08/28 18:12:23 rvb + * Now it also works on FreeBSD -current. This code will be + * committed to the FreeBSD -current and NetBSD -current + * trees. It will then be tailored to the particular platform + * by flushing conditional code. + * + * Revision 1.10 1998/08/18 17:05:21 rvb + * Don't use __RCSID now + * + * Revision 1.9 1998/08/18 16:31:46 rvb + * Sync the code for NetBSD -current; test on 1.3 later + * + * Revision 1.8 98/02/24 22:22:50 rvb + * Fixes up mainly to flush iopen and friends + * + * Revision 1.7 98/01/31 20:53:15 rvb + * First version that works on FreeBSD 2.2.5 + * + * Revision 1.6 98/01/23 11:53:47 rvb + * Bring RVB_CODA1_1 to HEAD + * + * Revision 1.5.2.8 98/01/23 11:21:11 rvb + * Sync with 2.2.5 + * + * Revision 1.5.2.7 97/12/19 14:26:08 rvb + * session id + * + * Revision 1.5.2.6 97/12/16 22:01:34 rvb + * Oops add cfs_subr.h cfs_venus.h; sync with peter + * + * Revision 1.5.2.5 97/12/16 12:40:14 rvb + * Sync with 1.3 + * + * Revision 1.5.2.4 97/12/10 14:08:31 rvb + * Fix O_ flags; check result in coda_call + * + * Revision 1.5.2.3 97/12/10 11:40:27 rvb + * No more ody + * + * Revision 1.5.2.2 97/12/09 16:07:15 rvb + * Sync with vfs/include/coda.h + * + * Revision 1.5.2.1 97/12/06 17:41:25 rvb + * Sync with peters coda.h + * + * Revision 1.5 97/12/05 10:39:23 rvb + * Read CHANGES + * + * Revision 1.4.14.10 97/11/25 08:08:48 rvb + * cfs_venus ... done; until cred/vattr change + * + * Revision 1.4.14.9 97/11/24 15:44:48 rvb + * Final cfs_venus.c w/o macros, but one locking bug + * + * Revision 1.4.14.8 97/11/21 11:28:04 rvb + * cfs_venus.c is done: first pass + * + * Revision 1.4.14.7 97/11/20 11:46:51 rvb + * Capture current cfs_venus + * + * Revision 1.4.14.6 97/11/18 10:27:19 rvb + * cfs_nbsd.c is DEAD!!!; integrated into cfs_vf/vnops.c + * cfs_nb_foo and cfs_foo are joined + * + * Revision 1.4.14.5 97/11/13 22:03:03 rvb + * pass2 cfs_NetBSD.h mt + * + * Revision 1.4.14.4 97/11/12 12:09:42 rvb + * reorg pass1 + * + * Revision 1.4.14.3 97/11/06 21:03:28 rvb + * don't include headers in headers + * + * Revision 1.4.14.2 97/10/29 16:06:30 rvb + * Kill DYING + * + * Revision 1.4.14.1 1997/10/28 23:10:18 rvb + * >64Meg; venus can be killed! + * + * Revision 1.4 1997/02/20 13:54:50 lily + * check for NULL return from coda_nc_lookup before CTOV + * + * Revision 1.3 1996/12/12 22:11:02 bnoble + * Fixed the "downcall invokes venus operation" deadlock in all known cases. + * There may be more + * + * Revision 1.2 1996/01/02 16:57:07 bnoble + * Added support for Coda MiniCache and raw inode calls (final commit) + * + * Revision 1.1.2.1 1995/12/20 01:57:34 bnoble + * Added CODA-specific files + * + * Revision 3.1.1.1 1995/03/04 19:08:06 bnoble + * Branch for NetBSD port revisions + * + * Revision 3.1 1995/03/04 19:08:04 bnoble + * Bump to major revision 3 to prepare for NetBSD port + * + * Revision 2.6 1995/02/17 16:25:26 dcs + * These versions represent several changes: + * 1. Allow venus to restart even if outstanding references exist. + * 2. Have only one ctlvp per client, as opposed to one per mounted cfs device.d + * 3. Allow ody_expand to return many members, not just one. + * + * Revision 2.5 94/11/09 20:29:27 dcs + * Small bug in remove dealing with hard links and link counts was fixed. + * + * Revision 2.4 94/10/14 09:58:42 dcs + * Made changes 'cause sun4s have braindead compilers + * + * Revision 2.3 94/10/12 16:46:37 dcs + * Cleaned kernel/venus interface by removing XDR junk, plus + * so cleanup to allow this code to be more easily ported. + * + * Revision 2.2 94/09/20 14:12:41 dcs + * Fixed bug in rename when moving a directory. + * + * Revision 2.1 94/07/21 16:25:22 satya + * Conversion to C++ 3.0; start of Coda Release 2.0 + * + * Revision 1.4 93/12/17 01:38:01 luqi + * Changes made for kernel to pass process info to Venus: + * + * (1) in file cfs.h + * add process id and process group id in most of the cfs argument types. + * + * (2) in file cfs_vnodeops.c + * add process info passing in most of the cfs vnode operations. + * + * (3) in file cfs_xdr.c + * expand xdr routines according changes in (1). + * add variable pass_process_info to allow venus for kernel version checking. + * + * Revision 1.3 93/05/28 16:24:33 bnoble + * *** empty log message *** + * + * Revision 1.2 92/10/27 17:58:25 lily + * merge kernel/latest and alpha/src/cfs + * + * Revision 2.4 92/09/30 14:16:37 mja + * Redid buffer allocation so that it does kmem_{alloc,free} for all + * architectures. Zone allocation, previously used on the 386, caused + * panics if it was invoked repeatedly. Stack allocation, previously + * used on all other architectures, tickled some Mach bug that appeared + * with large stack frames. + * [91/02/09 jjk] + * + * Added contributors blurb. + * [90/12/13 jjk] + * + * Revision 2.3 90/07/26 15:50:09 mrt + * Fixed fix to rename to remove .. from moved directories. + * [90/06/28 dcs] + * + * Revision 1.7 90/06/28 16:24:25 dcs + * Fixed bug with moving directories, we weren't flushing .. for the moved directory. + * + * Revision 1.6 90/05/31 17:01:47 dcs + * Prepare for merge with facilities kernel. + * + * + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/proc.h> +#include <sys/malloc.h> +#include <sys/mount.h> +#include <sys/errno.h> +#include <sys/acct.h> +#include <sys/file.h> +#include <sys/fcntl.h> +#include <sys/uio.h> +#include <sys/namei.h> +#include <sys/ioccom.h> +#include <sys/select.h> + +#include <vm/vm.h> +#include <vm/vm_object.h> +#include <vm/vm_extern.h> +#include <vm/vm_zone.h> + +#include <coda/coda.h> +#include <coda/cnode.h> +#include <coda/coda_vnops.h> +#include <coda/coda_venus.h> +#include <coda/coda_opstats.h> +#include <coda/coda_subr.h> +#include <coda/coda_namecache.h> +#include <coda/coda_pioctl.h> + +/* + * These flags select various performance enhancements. + */ +int coda_attr_cache = 1; /* Set to cache attributes in the kernel */ +int coda_symlink_cache = 1; /* Set to cache symbolic link information */ +int coda_access_cache = 1; /* Set to handle some access checks directly */ + +/* structure to keep track of vfs calls */ + +struct coda_op_stats coda_vnodeopstats[CODA_VNODEOPS_SIZE]; + +#define MARK_ENTRY(op) (coda_vnodeopstats[op].entries++) +#define MARK_INT_SAT(op) (coda_vnodeopstats[op].sat_intrn++) +#define MARK_INT_FAIL(op) (coda_vnodeopstats[op].unsat_intrn++) +#define MARK_INT_GEN(op) (coda_vnodeopstats[op].gen_intrn++) + +/* What we are delaying for in printf */ +int coda_printf_delay = 0; /* in microseconds */ +int coda_vnop_print_entry = 0; +static int coda_lockdebug = 0; + +/* Definition of the vfs operation vector */ + +/* + * Some NetBSD details: + * + * coda_start is called at the end of the mount syscall. + * coda_init is called at boot time. + */ + +#define ENTRY if(coda_vnop_print_entry) myprintf(("Entered %s\n",__FUNCTION__)) + +/* Definition of the vnode operation vector */ + +struct vnodeopv_entry_desc coda_vnodeop_entries[] = { + { &vop_default_desc, coda_vop_error }, + { &vop_lookup_desc, coda_lookup }, /* lookup */ + { &vop_create_desc, coda_create }, /* create */ + { &vop_mknod_desc, coda_vop_error }, /* mknod */ + { &vop_open_desc, coda_open }, /* open */ + { &vop_close_desc, coda_close }, /* close */ + { &vop_access_desc, coda_access }, /* access */ + { &vop_getattr_desc, coda_getattr }, /* getattr */ + { &vop_setattr_desc, coda_setattr }, /* setattr */ + { &vop_read_desc, coda_read }, /* read */ + { &vop_write_desc, coda_write }, /* write */ + { &vop_ioctl_desc, coda_ioctl }, /* ioctl */ + { &vop_mmap_desc, coda_vop_error }, /* mmap */ + { &vop_fsync_desc, coda_fsync }, /* fsync */ + { &vop_remove_desc, coda_remove }, /* remove */ + { &vop_link_desc, coda_link }, /* link */ + { &vop_rename_desc, coda_rename }, /* rename */ + { &vop_mkdir_desc, coda_mkdir }, /* mkdir */ + { &vop_rmdir_desc, coda_rmdir }, /* rmdir */ + { &vop_symlink_desc, coda_symlink }, /* symlink */ + { &vop_readdir_desc, coda_readdir }, /* readdir */ + { &vop_readlink_desc, coda_readlink }, /* readlink */ + { &vop_abortop_desc, coda_abortop }, /* abortop */ + { &vop_inactive_desc, coda_inactive }, /* inactive */ + { &vop_reclaim_desc, coda_reclaim }, /* reclaim */ + { &vop_lock_desc, coda_lock }, /* lock */ + { &vop_unlock_desc, coda_unlock }, /* unlock */ + { &vop_bmap_desc, coda_bmap }, /* bmap */ + { &vop_strategy_desc, coda_strategy }, /* strategy */ + { &vop_print_desc, coda_vop_error }, /* print */ + { &vop_islocked_desc, coda_islocked }, /* islocked */ + { &vop_pathconf_desc, coda_vop_error }, /* pathconf */ + { &vop_advlock_desc, coda_vop_nop }, /* advlock */ + { &vop_bwrite_desc, coda_vop_error }, /* bwrite */ + { &vop_lease_desc, coda_vop_nop }, /* lease */ + { &vop_poll_desc, (vop_t *) vop_stdpoll }, + { &vop_getpages_desc, coda_fbsd_getpages }, /* pager intf.*/ + { &vop_putpages_desc, coda_fbsd_putpages }, /* pager intf.*/ + +#if 0 + + we need to define these someday +#define UFS_BLKATOFF(aa, bb, cc, dd) VFSTOUFS((aa)->v_mount)->um_blkatoff(aa, bb, cc, dd) +#define UFS_VALLOC(aa, bb, cc, dd) VFSTOUFS((aa)->v_mount)->um_valloc(aa, bb, cc, dd) +#define UFS_VFREE(aa, bb, cc) VFSTOUFS((aa)->v_mount)->um_vfree(aa, bb, cc) +#define UFS_TRUNCATE(aa, bb, cc, dd, ee) VFSTOUFS((aa)->v_mount)->um_truncate(aa, bb, cc, dd, ee) +#define UFS_UPDATE(aa, bb) VFSTOUFS((aa)->v_mount)->um_update(aa, bb) + + missing + { &vop_reallocblks_desc, (vop_t *) ufs_missingop }, + { &vop_cachedlookup_desc, (vop_t *) ufs_lookup }, + { &vop_whiteout_desc, (vop_t *) ufs_whiteout }, +#endif + { (struct vnodeop_desc*)NULL, (int(*)(void *))NULL } +}; + +static struct vnodeopv_desc coda_vnodeop_opv_desc = + { &coda_vnodeop_p, coda_vnodeop_entries }; + +VNODEOP_SET(coda_vnodeop_opv_desc); + +/* A generic panic: we were called with something we didn't define yet */ +int +coda_vop_error(void *anon) { + struct vnodeop_desc **desc = (struct vnodeop_desc **)anon; + + myprintf(("coda_vop_error: Vnode operation %s called, but not defined.\n", + (*desc)->vdesc_name)); + /* + panic("coda_vop_error"); + */ + return EIO; +} + +/* A generic do-nothing. For lease_check, advlock */ +int +coda_vop_nop(void *anon) { + struct vnodeop_desc **desc = (struct vnodeop_desc **)anon; + + if (codadebug) { + myprintf(("Vnode operation %s called, but unsupported\n", + (*desc)->vdesc_name)); + } + return (0); +} + +int +coda_vnodeopstats_init(void) +{ + register int i; + + for(i=0;i<CODA_VNODEOPS_SIZE;i++) { + coda_vnodeopstats[i].opcode = i; + coda_vnodeopstats[i].entries = 0; + coda_vnodeopstats[i].sat_intrn = 0; + coda_vnodeopstats[i].unsat_intrn = 0; + coda_vnodeopstats[i].gen_intrn = 0; + } + return 0; +} + +/* + * coda_open calls Venus to return the device, inode pair of the cache + * file holding the data. Using iget, coda_open finds the vnode of the + * cache file, and then opens it. + */ +int +coda_open(v) + void *v; +{ + /* + * NetBSD can pass the O_EXCL flag in mode, even though the check + * has already happened. Venus defensively assumes that if open + * is passed the EXCL, it must be a bug. We strip the flag here. + */ +/* true args */ + struct vop_open_args *ap = v; + register struct vnode **vpp = &(ap->a_vp); + struct cnode *cp = VTOC(*vpp); + int flag = ap->a_mode & (~O_EXCL); + struct ucred *cred = ap->a_cred; + struct proc *p = ap->a_p; +/* locals */ + int error; + struct vnode *vp; + dev_t dev; + ino_t inode; + + MARK_ENTRY(CODA_OPEN_STATS); + + /* Check for open of control file. */ + if (IS_CTL_VP(*vpp)) { + /* XXX */ + /* if (WRITEABLE(flag)) */ + if (flag & (FWRITE | O_TRUNC | O_CREAT | O_EXCL)) { + MARK_INT_FAIL(CODA_OPEN_STATS); + return(EACCES); + } + MARK_INT_SAT(CODA_OPEN_STATS); + return(0); + } + + error = venus_open(vtomi((*vpp)), &cp->c_fid, flag, cred, p, &dev, &inode); + if (error) + return (error); + if (!error) { + CODADEBUG( CODA_OPEN,myprintf(("open: dev %d inode %d result %d\n", + dev, inode, error)); ) + } + + /* Translate the <device, inode> pair for the cache file into + an inode pointer. */ + error = coda_grab_vnode(dev, inode, &vp); + if (error) + return (error); + + /* We get the vnode back locked. Needs unlocked */ + VOP_UNLOCK(vp, 0, p); + /* Keep a reference until the close comes in. */ + vref(*vpp); + + /* Save the vnode pointer for the cache file. */ + if (cp->c_ovp == NULL) { + cp->c_ovp = vp; + } else { + if (cp->c_ovp != vp) + panic("coda_open: cp->c_ovp != ITOV(ip)"); + } + cp->c_ocount++; + + /* Flush the attribute cached if writing the file. */ + if (flag & FWRITE) { + cp->c_owrite++; + cp->c_flags &= ~C_VATTR; + } + + /* Save the <device, inode> pair for the cache file to speed + up subsequent page_read's. */ + cp->c_device = dev; + cp->c_inode = inode; + + /* Open the cache file. */ + error = VOP_OPEN(vp, flag, cred, p); + if (error) { + printf("coda_open: VOP_OPEN on container failed %d\n", error); + return (error); + } +/* grab (above) does this when it calls newvnode unless it's in the cache*/ + if (vp->v_type == VREG) { + error = vfs_object_create(vp, p, cred); + if (error != 0) { + printf("coda_open: vfs_object_create() returns %d\n", error); + vput(vp); + } + } + + return(error); +} + +/* + * Close the cache file used for I/O and notify Venus. + */ +int +coda_close(v) + void *v; +{ +/* true args */ + struct vop_close_args *ap = v; + struct vnode *vp = ap->a_vp; + struct cnode *cp = VTOC(vp); + int flag = ap->a_fflag; + struct ucred *cred = ap->a_cred; + struct proc *p = ap->a_p; +/* locals */ + int error; + + MARK_ENTRY(CODA_CLOSE_STATS); + + /* Check for close of control file. */ + if (IS_CTL_VP(vp)) { + MARK_INT_SAT(CODA_CLOSE_STATS); + return(0); + } + + if (IS_UNMOUNTING(cp)) { + if (cp->c_ovp) { +#ifdef CODA_VERBOSE + printf("coda_close: destroying container ref %d, ufs vp %p of vp %p/cp %p\n", + vp->v_usecount, cp->c_ovp, vp, cp); +#endif +#ifdef hmm + vgone(cp->c_ovp); +#else + VOP_CLOSE(cp->c_ovp, flag, cred, p); /* Do errors matter here? */ + vrele(cp->c_ovp); +#endif + } else { +#ifdef CODA_VERBOSE + printf("coda_close: NO container vp %p/cp %p\n", vp, cp); +#endif + } + return ENODEV; + } else { + VOP_CLOSE(cp->c_ovp, flag, cred, p); /* Do errors matter here? */ + vrele(cp->c_ovp); + } + + if (--cp->c_ocount == 0) + cp->c_ovp = NULL; + + if (flag & FWRITE) /* file was opened for write */ + --cp->c_owrite; + + error = venus_close(vtomi(vp), &cp->c_fid, flag, cred, p); + vrele(CTOV(cp)); + + CODADEBUG(CODA_CLOSE, myprintf(("close: result %d\n",error)); ) + return(error); +} + +int +coda_read(v) + void *v; +{ + struct vop_read_args *ap = v; + + ENTRY; + return(coda_rdwr(ap->a_vp, ap->a_uio, UIO_READ, + ap->a_ioflag, ap->a_cred, ap->a_uio->uio_procp)); +} + +int +coda_write(v) + void *v; +{ + struct vop_write_args *ap = v; + + ENTRY; + return(coda_rdwr(ap->a_vp, ap->a_uio, UIO_WRITE, + ap->a_ioflag, ap->a_cred, ap->a_uio->uio_procp)); +} + +int +coda_rdwr(vp, uiop, rw, ioflag, cred, p) + struct vnode *vp; + struct uio *uiop; + enum uio_rw rw; + int ioflag; + struct ucred *cred; + struct proc *p; +{ +/* upcall decl */ + /* NOTE: container file operation!!! */ +/* locals */ + struct cnode *cp = VTOC(vp); + struct vnode *cfvp = cp->c_ovp; + int igot_internally = 0; + int opened_internally = 0; + int error = 0; + + MARK_ENTRY(CODA_RDWR_STATS); + + CODADEBUG(CODA_RDWR, myprintf(("coda_rdwr(%d, %p, %d, %qd, %d)\n", rw, + uiop->uio_iov->iov_base, uiop->uio_resid, + uiop->uio_offset, uiop->uio_segflg)); ) + + /* Check for rdwr of control object. */ + if (IS_CTL_VP(vp)) { + MARK_INT_FAIL(CODA_RDWR_STATS); + return(EINVAL); + } + + /* + * If file is not already open this must be a page + * {read,write} request. Iget the cache file's inode + * pointer if we still have its <device, inode> pair. + * Otherwise, we must do an internal open to derive the + * pair. + */ + if (cfvp == NULL) { + /* + * If we're dumping core, do the internal open. Otherwise + * venus won't have the correct size of the core when + * it's completely written. + */ + if (cp->c_inode != 0 && !(p && (p->p_acflag & ACORE))) { + igot_internally = 1; + error = coda_grab_vnode(cp->c_device, cp->c_inode, &cfvp); + if (error) { + MARK_INT_FAIL(CODA_RDWR_STATS); + return(error); + } + /* + * We get the vnode back locked in both Mach and + * NetBSD. Needs unlocked + */ + VOP_UNLOCK(cfvp, 0, p); + } + else { + opened_internally = 1; + MARK_INT_GEN(CODA_OPEN_STATS); + error = VOP_OPEN(vp, (rw == UIO_READ ? FREAD : FWRITE), + cred, p); +printf("coda_rdwr: Internally Opening %p\n", vp); + if (error) { + printf("coda_rdwr: VOP_OPEN on container failed %d\n", error); + return (error); + } + if (vp->v_type == VREG) { + error = vfs_object_create(vp, p, cred); + if (error != 0) { + printf("coda_rdwr: vfs_object_create() returns %d\n", error); + vput(vp); + } + } + if (error) { + MARK_INT_FAIL(CODA_RDWR_STATS); + return(error); + } + cfvp = cp->c_ovp; + } + } + + /* Have UFS handle the call. */ + CODADEBUG(CODA_RDWR, myprintf(("indirect rdwr: fid = (%lx.%lx.%lx), refcnt = %d\n", + cp->c_fid.Volume, cp->c_fid.Vnode, + cp->c_fid.Unique, CTOV(cp)->v_usecount)); ) + + + if (rw == UIO_READ) { + error = VOP_READ(cfvp, uiop, ioflag, cred); + } else { + error = VOP_WRITE(cfvp, uiop, ioflag, cred); + /* ufs_write updates the vnode_pager_setsize for the vnode/object */ + + { struct vattr attr; + + if (VOP_GETATTR(cfvp, &attr, cred, p) == 0) { + vnode_pager_setsize(vp, attr.va_size); + } + } + } + + if (error) + MARK_INT_FAIL(CODA_RDWR_STATS); + else + MARK_INT_SAT(CODA_RDWR_STATS); + + /* Do an internal close if necessary. */ + if (opened_internally) { + MARK_INT_GEN(CODA_CLOSE_STATS); + (void)VOP_CLOSE(vp, (rw == UIO_READ ? FREAD : FWRITE), cred, p); + } + + /* Invalidate cached attributes if writing. */ + if (rw == UIO_WRITE) + cp->c_flags &= ~C_VATTR; + return(error); +} + +int +coda_ioctl(v) + void *v; +{ +/* true args */ + struct vop_ioctl_args *ap = v; + struct vnode *vp = ap->a_vp; + int com = ap->a_command; + caddr_t data = ap->a_data; + int flag = ap->a_fflag; + struct ucred *cred = ap->a_cred; + struct proc *p = ap->a_p; +/* locals */ + int error; + struct vnode *tvp; + struct nameidata ndp; + struct PioctlData *iap = (struct PioctlData *)data; + + MARK_ENTRY(CODA_IOCTL_STATS); + + CODADEBUG(CODA_IOCTL, myprintf(("in coda_ioctl on %s\n", iap->path));) + + /* Don't check for operation on a dying object, for ctlvp it + shouldn't matter */ + + /* Must be control object to succeed. */ + if (!IS_CTL_VP(vp)) { + MARK_INT_FAIL(CODA_IOCTL_STATS); + CODADEBUG(CODA_IOCTL, myprintf(("coda_ioctl error: vp != ctlvp"));) + return (EOPNOTSUPP); + } + /* Look up the pathname. */ + + /* Should we use the name cache here? It would get it from + lookupname sooner or later anyway, right? */ + + NDINIT(&ndp, LOOKUP, (iap->follow ? FOLLOW : NOFOLLOW), UIO_USERSPACE, iap->path, p); + error = namei(&ndp); + tvp = ndp.ni_vp; + + if (error) { + MARK_INT_FAIL(CODA_IOCTL_STATS); + CODADEBUG(CODA_IOCTL, myprintf(("coda_ioctl error: lookup returns %d\n", + error));) + return(error); + } + + /* + * Make sure this is a coda style cnode, but it may be a + * different vfsp + */ + /* XXX: this totally violates the comment about vtagtype in vnode.h */ + if (tvp->v_tag != VT_CODA) { + vrele(tvp); + MARK_INT_FAIL(CODA_IOCTL_STATS); + CODADEBUG(CODA_IOCTL, + myprintf(("coda_ioctl error: %s not a coda object\n", + iap->path));) + return(EINVAL); + } + + if (iap->vi.in_size > VC_MAXDATASIZE) { + vrele(tvp); + return(EINVAL); + } + error = venus_ioctl(vtomi(tvp), &((VTOC(tvp))->c_fid), com, flag, data, cred, p); + + if (error) + MARK_INT_FAIL(CODA_IOCTL_STATS); + else + CODADEBUG(CODA_IOCTL, myprintf(("Ioctl returns %d \n", error)); ) + + vrele(tvp); + return(error); +} + +/* + * To reduce the cost of a user-level venus;we cache attributes in + * the kernel. Each cnode has storage allocated for an attribute. If + * c_vattr is valid, return a reference to it. Otherwise, get the + * attributes from venus and store them in the cnode. There is some + * question if this method is a security leak. But I think that in + * order to make this call, the user must have done a lookup and + * opened the file, and therefore should already have access. + */ +int +coda_getattr(v) + void *v; +{ +/* true args */ + struct vop_getattr_args *ap = v; + struct vnode *vp = ap->a_vp; + struct cnode *cp = VTOC(vp); + struct vattr *vap = ap->a_vap; + struct ucred *cred = ap->a_cred; + struct proc *p = ap->a_p; +/* locals */ + int error; + + MARK_ENTRY(CODA_GETATTR_STATS); + + if (IS_UNMOUNTING(cp)) + return ENODEV; + + /* Check for getattr of control object. */ + if (IS_CTL_VP(vp)) { + MARK_INT_FAIL(CODA_GETATTR_STATS); + return(ENOENT); + } + + /* Check to see if the attributes have already been cached */ + if (VALID_VATTR(cp)) { + CODADEBUG(CODA_GETATTR, { myprintf(("attr cache hit: (%lx.%lx.%lx)\n", + cp->c_fid.Volume, + cp->c_fid.Vnode, + cp->c_fid.Unique));}); + CODADEBUG(CODA_GETATTR, if (!(codadebug & ~CODA_GETATTR)) + print_vattr(&cp->c_vattr); ); + + *vap = cp->c_vattr; + MARK_INT_SAT(CODA_GETATTR_STATS); + return(0); + } + + error = venus_getattr(vtomi(vp), &cp->c_fid, cred, p, vap); + + if (!error) { + CODADEBUG(CODA_GETATTR, myprintf(("getattr miss (%lx.%lx.%lx): result %d\n", + cp->c_fid.Volume, + cp->c_fid.Vnode, + cp->c_fid.Unique, + error)); ) + + CODADEBUG(CODA_GETATTR, if (!(codadebug & ~CODA_GETATTR)) + print_vattr(vap); ); + + { int size = vap->va_size; + struct vnode *convp = cp->c_ovp; + if (convp != (struct vnode *)0) { + vnode_pager_setsize(convp, size); + } + } + /* If not open for write, store attributes in cnode */ + if ((cp->c_owrite == 0) && (coda_attr_cache)) { + cp->c_vattr = *vap; + cp->c_flags |= C_VATTR; + } + + } + return(error); +} + +int +coda_setattr(v) + void *v; +{ +/* true args */ + struct vop_setattr_args *ap = v; + register struct vnode *vp = ap->a_vp; + struct cnode *cp = VTOC(vp); + register struct vattr *vap = ap->a_vap; + struct ucred *cred = ap->a_cred; + struct proc *p = ap->a_p; +/* locals */ + int error; + + MARK_ENTRY(CODA_SETATTR_STATS); + + /* Check for setattr of control object. */ + if (IS_CTL_VP(vp)) { + MARK_INT_FAIL(CODA_SETATTR_STATS); + return(ENOENT); + } + + if (codadebug & CODADBGMSK(CODA_SETATTR)) { + print_vattr(vap); + } + error = venus_setattr(vtomi(vp), &cp->c_fid, vap, cred, p); + + if (!error) + cp->c_flags &= ~C_VATTR; + + { int size = vap->va_size; + struct vnode *convp = cp->c_ovp; + if (size != VNOVAL && convp != (struct vnode *)0) { + vnode_pager_setsize(convp, size); + } + } + CODADEBUG(CODA_SETATTR, myprintf(("setattr %d\n", error)); ) + return(error); +} + +int +coda_access(v) + void *v; +{ +/* true args */ + struct vop_access_args *ap = v; + struct vnode *vp = ap->a_vp; + struct cnode *cp = VTOC(vp); + int mode = ap->a_mode; + struct ucred *cred = ap->a_cred; + struct proc *p = ap->a_p; +/* locals */ + int error; + + MARK_ENTRY(CODA_ACCESS_STATS); + + /* Check for access of control object. Only read access is + allowed on it. */ + if (IS_CTL_VP(vp)) { + /* bogus hack - all will be marked as successes */ + MARK_INT_SAT(CODA_ACCESS_STATS); + return(((mode & VREAD) && !(mode & (VWRITE | VEXEC))) + ? 0 : EACCES); + } + + /* + * if the file is a directory, and we are checking exec (eg lookup) + * access, and the file is in the namecache, then the user must have + * lookup access to it. + */ + if (coda_access_cache) { + if ((vp->v_type == VDIR) && (mode & VEXEC)) { + if (coda_nc_lookup(cp, ".", 1, cred)) { + MARK_INT_SAT(CODA_ACCESS_STATS); + return(0); /* it was in the cache */ + } + } + } + + error = venus_access(vtomi(vp), &cp->c_fid, mode, cred, p); + + return(error); +} + +/* + * CODA abort op, called after namei() when a CREATE/DELETE isn't actually + * done. If a buffer has been saved in anticipation of a coda_create or + * a coda_remove, delete it. + */ +/* ARGSUSED */ +int +coda_abortop(v) + void *v; +{ +/* true args */ + struct vop_abortop_args /* { + struct vnode *a_dvp; + struct componentname *a_cnp; + } */ *ap = v; +/* upcall decl */ +/* locals */ + + if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF) + zfree(namei_zone, ap->a_cnp->cn_pnbuf); + return (0); +} + +int +coda_readlink(v) + void *v; +{ +/* true args */ + struct vop_readlink_args *ap = v; + struct vnode *vp = ap->a_vp; + struct cnode *cp = VTOC(vp); + struct uio *uiop = ap->a_uio; + struct ucred *cred = ap->a_cred; + struct proc *p = ap->a_uio->uio_procp; +/* locals */ + int error; + char *str; + int len; + + MARK_ENTRY(CODA_READLINK_STATS); + + /* Check for readlink of control object. */ + if (IS_CTL_VP(vp)) { + MARK_INT_FAIL(CODA_READLINK_STATS); + return(ENOENT); + } + + if ((coda_symlink_cache) && (VALID_SYMLINK(cp))) { /* symlink was cached */ + uiop->uio_rw = UIO_READ; + error = uiomove(cp->c_symlink, (int)cp->c_symlen, uiop); + if (error) + MARK_INT_FAIL(CODA_READLINK_STATS); + else + MARK_INT_SAT(CODA_READLINK_STATS); + return(error); + } + + error = venus_readlink(vtomi(vp), &cp->c_fid, cred, p, &str, &len); + + if (!error) { + uiop->uio_rw = UIO_READ; + error = uiomove(str, len, uiop); + + if (coda_symlink_cache) { + cp->c_symlink = str; + cp->c_symlen = len; + cp->c_flags |= C_SYMLINK; + } else + CODA_FREE(str, len); + } + + CODADEBUG(CODA_READLINK, myprintf(("in readlink result %d\n",error));) + return(error); +} + +int +coda_fsync(v) + void *v; +{ +/* true args */ + struct vop_fsync_args *ap = v; + struct vnode *vp = ap->a_vp; + struct cnode *cp = VTOC(vp); + struct ucred *cred = ap->a_cred; + struct proc *p = ap->a_p; +/* locals */ + struct vnode *convp = cp->c_ovp; + int error; + + MARK_ENTRY(CODA_FSYNC_STATS); + + /* Check for fsync on an unmounting object */ + /* The NetBSD kernel, in it's infinite wisdom, can try to fsync + * after an unmount has been initiated. This is a Bad Thing, + * which we have to avoid. Not a legitimate failure for stats. + */ + if (IS_UNMOUNTING(cp)) { + return(ENODEV); + } + + /* Check for fsync of control object. */ + if (IS_CTL_VP(vp)) { + MARK_INT_SAT(CODA_FSYNC_STATS); + return(0); + } + + if (convp) + VOP_FSYNC(convp, cred, MNT_WAIT, p); + + /* + * We see fsyncs with usecount == 1 then usecount == 0. + * For now we ignore them. + */ + /* + if (!vp->v_usecount) { + printf("coda_fsync on vnode %p with %d usecount. c_flags = %x (%x)\n", + vp, vp->v_usecount, cp->c_flags, cp->c_flags&C_PURGING); + } + */ + + /* + * We can expect fsync on any vnode at all if venus is pruging it. + * Venus can't very well answer the fsync request, now can it? + * Hopefully, it won't have to, because hopefully, venus preserves + * the (possibly untrue) invariant that it never purges an open + * vnode. Hopefully. + */ + if (cp->c_flags & C_PURGING) { + return(0); + } + + /* needs research */ + return 0; + error = venus_fsync(vtomi(vp), &cp->c_fid, cred, p); + + CODADEBUG(CODA_FSYNC, myprintf(("in fsync result %d\n",error)); ); + return(error); +} + +int +coda_inactive(v) + void *v; +{ + /* XXX - at the moment, inactive doesn't look at cred, and doesn't + have a proc pointer. Oops. */ +/* true args */ + struct vop_inactive_args *ap = v; + struct vnode *vp = ap->a_vp; + struct cnode *cp = VTOC(vp); + struct ucred *cred __attribute__((unused)) = NULL; + struct proc *p __attribute__((unused)) = curproc; +/* upcall decl */ +/* locals */ + + /* We don't need to send inactive to venus - DCS */ + MARK_ENTRY(CODA_INACTIVE_STATS); + + if (IS_CTL_VP(vp)) { + MARK_INT_SAT(CODA_INACTIVE_STATS); + return 0; + } + + CODADEBUG(CODA_INACTIVE, myprintf(("in inactive, %lx.%lx.%lx. vfsp %p\n", + cp->c_fid.Volume, cp->c_fid.Vnode, + cp->c_fid.Unique, vp->v_mount));) + + /* If an array has been allocated to hold the symlink, deallocate it */ + if ((coda_symlink_cache) && (VALID_SYMLINK(cp))) { + if (cp->c_symlink == NULL) + panic("coda_inactive: null symlink pointer in cnode"); + + CODA_FREE(cp->c_symlink, cp->c_symlen); + cp->c_flags &= ~C_SYMLINK; + cp->c_symlen = 0; + } + + /* Remove it from the table so it can't be found. */ + coda_unsave(cp); + if ((struct coda_mntinfo *)(vp->v_mount->mnt_data) == NULL) { + myprintf(("Help! vfsp->vfs_data was NULL, but vnode %p wasn't dying\n", vp)); + panic("badness in coda_inactive\n"); + } + + if (IS_UNMOUNTING(cp)) { +#ifdef DEBUG + printf("coda_inactive: IS_UNMOUNTING use %d: vp %p, cp %p\n", vp->v_usecount, vp, cp); + if (cp->c_ovp != NULL) + printf("coda_inactive: cp->ovp != NULL use %d: vp %p, cp %p\n", + vp->v_usecount, vp, cp); +#endif + lockmgr(&cp->c_lock, LK_RELEASE, &vp->v_interlock, p); + } else { +#ifdef OLD_DIAGNOSTIC + if (CTOV(cp)->v_usecount) { + panic("coda_inactive: nonzero reference count"); + } + if (cp->c_ovp != NULL) { + panic("coda_inactive: cp->ovp != NULL"); + } +#endif + VOP_UNLOCK(vp, 0, p); + vgone(vp); + } + + MARK_INT_SAT(CODA_INACTIVE_STATS); + return(0); +} + +/* + * Remote file system operations having to do with directory manipulation. + */ + +/* + * It appears that in NetBSD, lookup is supposed to return the vnode locked + */ +int +coda_lookup(v) + void *v; +{ +/* true args */ + struct vop_lookup_args *ap = v; + struct vnode *dvp = ap->a_dvp; + struct cnode *dcp = VTOC(dvp); + struct vnode **vpp = ap->a_vpp; + /* + * It looks as though ap->a_cnp->ni_cnd->cn_nameptr holds the rest + * of the string to xlate, and that we must try to get at least + * ap->a_cnp->ni_cnd->cn_namelen of those characters to macth. I + * could be wrong. + */ + struct componentname *cnp = ap->a_cnp; + struct ucred *cred = cnp->cn_cred; + struct proc *p = cnp->cn_proc; +/* locals */ + struct cnode *cp; + const char *nm = cnp->cn_nameptr; + int len = cnp->cn_namelen; + ViceFid VFid; + int vtype; + int error = 0; + + MARK_ENTRY(CODA_LOOKUP_STATS); + + CODADEBUG(CODA_LOOKUP, myprintf(("lookup: %s in %lx.%lx.%lx\n", + nm, dcp->c_fid.Volume, + dcp->c_fid.Vnode, dcp->c_fid.Unique));); + + /* Check for lookup of control object. */ + if (IS_CTL_NAME(dvp, nm, len)) { + *vpp = coda_ctlvp; + vref(*vpp); + MARK_INT_SAT(CODA_LOOKUP_STATS); + goto exit; + } + + if (len+1 > CODA_MAXNAMLEN) { + MARK_INT_FAIL(CODA_LOOKUP_STATS); + CODADEBUG(CODA_LOOKUP, myprintf(("name too long: lookup, %lx.%lx.%lx(%s)\n", + dcp->c_fid.Volume, dcp->c_fid.Vnode, + dcp->c_fid.Unique, nm));); + *vpp = (struct vnode *)0; + error = EINVAL; + goto exit; + } + /* First try to look the file up in the cfs name cache */ + /* lock the parent vnode? */ + cp = coda_nc_lookup(dcp, nm, len, cred); + if (cp) { + *vpp = CTOV(cp); + vref(*vpp); + CODADEBUG(CODA_LOOKUP, + myprintf(("lookup result %d vpp %p\n",error,*vpp));) + } else { + + /* The name wasn't cached, so we need to contact Venus */ + error = venus_lookup(vtomi(dvp), &dcp->c_fid, nm, len, cred, p, &VFid, &vtype); + + if (error) { + MARK_INT_FAIL(CODA_LOOKUP_STATS); + CODADEBUG(CODA_LOOKUP, myprintf(("lookup error on %lx.%lx.%lx(%s)%d\n", + dcp->c_fid.Volume, dcp->c_fid.Vnode, dcp->c_fid.Unique, nm, error));) + *vpp = (struct vnode *)0; + } else { + MARK_INT_SAT(CODA_LOOKUP_STATS); + CODADEBUG(CODA_LOOKUP, + myprintf(("lookup: vol %lx vno %lx uni %lx type %o result %d\n", + VFid.Volume, VFid.Vnode, VFid.Unique, vtype, + error)); ) + + cp = make_coda_node(&VFid, dvp->v_mount, vtype); + *vpp = CTOV(cp); + + /* enter the new vnode in the Name Cache only if the top bit isn't set */ + /* And don't enter a new vnode for an invalid one! */ + if (!(vtype & CODA_NOCACHE)) + coda_nc_enter(VTOC(dvp), nm, len, cred, VTOC(*vpp)); + } + } + + exit: + /* + * If we are creating, and this was the last name to be looked up, + * and the error was ENOENT, then there really shouldn't be an + * error and we can make the leaf NULL and return success. Since + * this is supposed to work under Mach as well as NetBSD, we're + * leaving this fn wrapped. We also must tell lookup/namei that + * we need to save the last component of the name. (Create will + * have to free the name buffer later...lucky us...) + */ + if (((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME)) + && (cnp->cn_flags & ISLASTCN) + && (error == ENOENT)) + { + error = EJUSTRETURN; + cnp->cn_flags |= SAVENAME; + *ap->a_vpp = NULL; + } + + /* + * If we are removing, and we are at the last element, and we + * found it, then we need to keep the name around so that the + * removal will go ahead as planned. Unfortunately, this will + * probably also lock the to-be-removed vnode, which may or may + * not be a good idea. I'll have to look at the bits of + * coda_remove to make sure. We'll only save the name if we did in + * fact find the name, otherwise coda_remove won't have a chance + * to free the pathname. + */ + if ((cnp->cn_nameiop == DELETE) + && (cnp->cn_flags & ISLASTCN) + && !error) + { + cnp->cn_flags |= SAVENAME; + } + + /* + * If the lookup went well, we need to (potentially?) unlock the + * parent, and lock the child. We are only responsible for + * checking to see if the parent is supposed to be unlocked before + * we return. We must always lock the child (provided there is + * one, and (the parent isn't locked or it isn't the same as the + * parent.) Simple, huh? We can never leave the parent locked unless + * we are ISLASTCN + */ + if (!error || (error == EJUSTRETURN)) { + if (!(cnp->cn_flags & LOCKPARENT) || !(cnp->cn_flags & ISLASTCN)) { + if ((error = VOP_UNLOCK(dvp, 0, p))) { + return error; + } + /* + * The parent is unlocked. As long as there is a child, + * lock it without bothering to check anything else. + */ + if (*ap->a_vpp) { + if ((error = VOP_LOCK(*ap->a_vpp, LK_EXCLUSIVE, p))) { + printf("coda_lookup: "); + panic("unlocked parent but couldn't lock child"); + } + } + } else { + /* The parent is locked, and may be the same as the child */ + if (*ap->a_vpp && (*ap->a_vpp != dvp)) { + /* Different, go ahead and lock it. */ + if ((error = VOP_LOCK(*ap->a_vpp, LK_EXCLUSIVE, p))) { + printf("coda_lookup: "); + panic("unlocked parent but couldn't lock child"); + } + } + } + } else { + /* If the lookup failed, we need to ensure that the leaf is NULL */ + /* Don't change any locking? */ + *ap->a_vpp = NULL; + } + return(error); +} + +/*ARGSUSED*/ +int +coda_create(v) + void *v; +{ +/* true args */ + struct vop_create_args *ap = v; + struct vnode *dvp = ap->a_dvp; + struct cnode *dcp = VTOC(dvp); + struct vattr *va = ap->a_vap; + int exclusive = 1; + int mode = ap->a_vap->va_mode; + struct vnode **vpp = ap->a_vpp; + struct componentname *cnp = ap->a_cnp; + struct ucred *cred = cnp->cn_cred; + struct proc *p = cnp->cn_proc; +/* locals */ + int error; + struct cnode *cp; + const char *nm = cnp->cn_nameptr; + int len = cnp->cn_namelen; + ViceFid VFid; + struct vattr attr; + + MARK_ENTRY(CODA_CREATE_STATS); + + /* All creates are exclusive XXX */ + /* I'm assuming the 'mode' argument is the file mode bits XXX */ + + /* Check for create of control object. */ + if (IS_CTL_NAME(dvp, nm, len)) { + *vpp = (struct vnode *)0; + MARK_INT_FAIL(CODA_CREATE_STATS); + return(EACCES); + } + + error = venus_create(vtomi(dvp), &dcp->c_fid, nm, len, exclusive, mode, va, cred, p, &VFid, &attr); + + if (!error) { + + /* If this is an exclusive create, panic if the file already exists. */ + /* Venus should have detected the file and reported EEXIST. */ + + if ((exclusive == 1) && + (coda_find(&VFid) != NULL)) + panic("cnode existed for newly created file!"); + + cp = make_coda_node(&VFid, dvp->v_mount, attr.va_type); + *vpp = CTOV(cp); + + /* Update va to reflect the new attributes. */ + (*va) = attr; + + /* Update the attribute cache and mark it as valid */ + if (coda_attr_cache) { + VTOC(*vpp)->c_vattr = attr; + VTOC(*vpp)->c_flags |= C_VATTR; + } + + /* Invalidate the parent's attr cache, the modification time has changed */ + VTOC(dvp)->c_flags &= ~C_VATTR; + + /* enter the new vnode in the Name Cache */ + coda_nc_enter(VTOC(dvp), nm, len, cred, VTOC(*vpp)); + + CODADEBUG(CODA_CREATE, + myprintf(("create: (%lx.%lx.%lx), result %d\n", + VFid.Volume, VFid.Vnode, VFid.Unique, error)); ) + } else { + *vpp = (struct vnode *)0; + CODADEBUG(CODA_CREATE, myprintf(("create error %d\n", error));) + } + + if (!error) { + if (cnp->cn_flags & LOCKLEAF) { + if ((error = VOP_LOCK(*ap->a_vpp, LK_EXCLUSIVE, p))) { + printf("coda_create: "); + panic("unlocked parent but couldn't lock child"); + } + } +#ifdef OLD_DIAGNOSTIC + else { + printf("coda_create: LOCKLEAF not set!\n"); + } +#endif + } + /* Have to free the previously saved name */ + /* + * This condition is stolen from ufs_makeinode. I have no idea + * why it's here, but what the hey... + */ + if ((cnp->cn_flags & SAVESTART) == 0) { + zfree(namei_zone, cnp->cn_pnbuf); + } + return(error); +} + +int +coda_remove(v) + void *v; +{ +/* true args */ + struct vop_remove_args *ap = v; + struct vnode *dvp = ap->a_dvp; + struct cnode *cp = VTOC(dvp); + struct componentname *cnp = ap->a_cnp; + struct ucred *cred = cnp->cn_cred; + struct proc *p = cnp->cn_proc; +/* locals */ + int error; + const char *nm = cnp->cn_nameptr; + int len = cnp->cn_namelen; + struct cnode *tp; + + MARK_ENTRY(CODA_REMOVE_STATS); + + CODADEBUG(CODA_REMOVE, myprintf(("remove: %s in %lx.%lx.%lx\n", + nm, cp->c_fid.Volume, cp->c_fid.Vnode, + cp->c_fid.Unique));); + + /* Remove the file's entry from the CODA Name Cache */ + /* We're being conservative here, it might be that this person + * doesn't really have sufficient access to delete the file + * but we feel zapping the entry won't really hurt anyone -- dcs + */ + /* I'm gonna go out on a limb here. If a file and a hardlink to it + * exist, and one is removed, the link count on the other will be + * off by 1. We could either invalidate the attrs if cached, or + * fix them. I'll try to fix them. DCS 11/8/94 + */ + tp = coda_nc_lookup(VTOC(dvp), nm, len, cred); + if (tp) { + if (VALID_VATTR(tp)) { /* If attrs are cached */ + if (tp->c_vattr.va_nlink > 1) { /* If it's a hard link */ + tp->c_vattr.va_nlink--; + } + } + + coda_nc_zapfile(VTOC(dvp), nm, len); + /* No need to flush it if it doesn't exist! */ + } + /* Invalidate the parent's attr cache, the modification time has changed */ + VTOC(dvp)->c_flags &= ~C_VATTR; + + /* Check for remove of control object. */ + if (IS_CTL_NAME(dvp, nm, len)) { + MARK_INT_FAIL(CODA_REMOVE_STATS); + return(ENOENT); + } + + error = venus_remove(vtomi(dvp), &cp->c_fid, nm, len, cred, p); + + CODADEBUG(CODA_REMOVE, myprintf(("in remove result %d\n",error)); ) + + if ((cnp->cn_flags & SAVESTART) == 0) { + zfree(namei_zone, cnp->cn_pnbuf); + } + return(error); +} + +int +coda_link(v) + void *v; +{ +/* true args */ + struct vop_link_args *ap = v; + struct vnode *vp = ap->a_vp; + struct cnode *cp = VTOC(vp); + struct vnode *tdvp = ap->a_tdvp; + struct cnode *tdcp = VTOC(tdvp); + struct componentname *cnp = ap->a_cnp; + struct ucred *cred = cnp->cn_cred; + struct proc *p = cnp->cn_proc; +/* locals */ + int error; + const char *nm = cnp->cn_nameptr; + int len = cnp->cn_namelen; + + MARK_ENTRY(CODA_LINK_STATS); + + if (codadebug & CODADBGMSK(CODA_LINK)) { + + myprintf(("nb_link: vp fid: (%lx.%lx.%lx)\n", + cp->c_fid.Volume, cp->c_fid.Vnode, cp->c_fid.Unique)); + myprintf(("nb_link: tdvp fid: (%lx.%lx.%lx)\n", + tdcp->c_fid.Volume, tdcp->c_fid.Vnode, tdcp->c_fid.Unique)); + + } + if (codadebug & CODADBGMSK(CODA_LINK)) { + myprintf(("link: vp fid: (%lx.%lx.%lx)\n", + cp->c_fid.Volume, cp->c_fid.Vnode, cp->c_fid.Unique)); + myprintf(("link: tdvp fid: (%lx.%lx.%lx)\n", + tdcp->c_fid.Volume, tdcp->c_fid.Vnode, tdcp->c_fid.Unique)); + + } + + /* Check for link to/from control object. */ + if (IS_CTL_NAME(tdvp, nm, len) || IS_CTL_VP(vp)) { + MARK_INT_FAIL(CODA_LINK_STATS); + return(EACCES); + } + + error = venus_link(vtomi(vp), &cp->c_fid, &tdcp->c_fid, nm, len, cred, p); + + /* Invalidate the parent's attr cache, the modification time has changed */ + VTOC(tdvp)->c_flags &= ~C_VATTR; + VTOC(vp)->c_flags &= ~C_VATTR; + + CODADEBUG(CODA_LINK, myprintf(("in link result %d\n",error)); ) + + /* Drop the name buffer if we don't need to SAVESTART */ + if ((cnp->cn_flags & SAVESTART) == 0) { + zfree(namei_zone, cnp->cn_pnbuf); + } + return(error); +} + +int +coda_rename(v) + void *v; +{ +/* true args */ + struct vop_rename_args *ap = v; + struct vnode *odvp = ap->a_fdvp; + struct cnode *odcp = VTOC(odvp); + struct componentname *fcnp = ap->a_fcnp; + struct vnode *ndvp = ap->a_tdvp; + struct cnode *ndcp = VTOC(ndvp); + struct componentname *tcnp = ap->a_tcnp; + struct ucred *cred = fcnp->cn_cred; + struct proc *p = fcnp->cn_proc; +/* true args */ + int error; + const char *fnm = fcnp->cn_nameptr; + int flen = fcnp->cn_namelen; + const char *tnm = tcnp->cn_nameptr; + int tlen = tcnp->cn_namelen; + + MARK_ENTRY(CODA_RENAME_STATS); + + /* Hmmm. The vnodes are already looked up. Perhaps they are locked? + This could be Bad. XXX */ +#ifdef OLD_DIAGNOSTIC + if ((fcnp->cn_cred != tcnp->cn_cred) + || (fcnp->cn_proc != tcnp->cn_proc)) + { + panic("coda_rename: component names don't agree"); + } +#endif + + /* Check for rename involving control object. */ + if (IS_CTL_NAME(odvp, fnm, flen) || IS_CTL_NAME(ndvp, tnm, tlen)) { + MARK_INT_FAIL(CODA_RENAME_STATS); + return(EACCES); + } + + /* Problem with moving directories -- need to flush entry for .. */ + if (odvp != ndvp) { + struct cnode *ovcp = coda_nc_lookup(VTOC(odvp), fnm, flen, cred); + if (ovcp) { + struct vnode *ovp = CTOV(ovcp); + if ((ovp) && + (ovp->v_type == VDIR)) /* If it's a directory */ + coda_nc_zapfile(VTOC(ovp),"..", 2); + } + } + + /* Remove the entries for both source and target files */ + coda_nc_zapfile(VTOC(odvp), fnm, flen); + coda_nc_zapfile(VTOC(ndvp), tnm, tlen); + + /* Invalidate the parent's attr cache, the modification time has changed */ + VTOC(odvp)->c_flags &= ~C_VATTR; + VTOC(ndvp)->c_flags &= ~C_VATTR; + + if (flen+1 > CODA_MAXNAMLEN) { + MARK_INT_FAIL(CODA_RENAME_STATS); + error = EINVAL; + goto exit; + } + + if (tlen+1 > CODA_MAXNAMLEN) { + MARK_INT_FAIL(CODA_RENAME_STATS); + error = EINVAL; + goto exit; + } + + error = venus_rename(vtomi(odvp), &odcp->c_fid, &ndcp->c_fid, fnm, flen, tnm, tlen, cred, p); + + exit: + CODADEBUG(CODA_RENAME, myprintf(("in rename result %d\n",error));) + /* XXX - do we need to call cache pureg on the moved vnode? */ + cache_purge(ap->a_fvp); + + /* It seems to be incumbent on us to drop locks on all four vnodes */ + /* From-vnodes are not locked, only ref'd. To-vnodes are locked. */ + + vrele(ap->a_fvp); + vrele(odvp); + + if (ap->a_tvp) { + if (ap->a_tvp == ndvp) { + vrele(ap->a_tvp); + } else { + vput(ap->a_tvp); + } + } + + vput(ndvp); + return(error); +} + +int +coda_mkdir(v) + void *v; +{ +/* true args */ + struct vop_mkdir_args *ap = v; + struct vnode *dvp = ap->a_dvp; + struct cnode *dcp = VTOC(dvp); + struct componentname *cnp = ap->a_cnp; + register struct vattr *va = ap->a_vap; + struct vnode **vpp = ap->a_vpp; + struct ucred *cred = cnp->cn_cred; + struct proc *p = cnp->cn_proc; +/* locals */ + int error; + const char *nm = cnp->cn_nameptr; + int len = cnp->cn_namelen; + struct cnode *cp; + ViceFid VFid; + struct vattr ova; + + MARK_ENTRY(CODA_MKDIR_STATS); + + /* Check for mkdir of target object. */ + if (IS_CTL_NAME(dvp, nm, len)) { + *vpp = (struct vnode *)0; + MARK_INT_FAIL(CODA_MKDIR_STATS); + return(EACCES); + } + + if (len+1 > CODA_MAXNAMLEN) { + *vpp = (struct vnode *)0; + MARK_INT_FAIL(CODA_MKDIR_STATS); + return(EACCES); + } + + error = venus_mkdir(vtomi(dvp), &dcp->c_fid, nm, len, va, cred, p, &VFid, &ova); + + if (!error) { + if (coda_find(&VFid) != NULL) + panic("cnode existed for newly created directory!"); + + + cp = make_coda_node(&VFid, dvp->v_mount, va->va_type); + *vpp = CTOV(cp); + + /* enter the new vnode in the Name Cache */ + coda_nc_enter(VTOC(dvp), nm, len, cred, VTOC(*vpp)); + + /* as a side effect, enter "." and ".." for the directory */ + coda_nc_enter(VTOC(*vpp), ".", 1, cred, VTOC(*vpp)); + coda_nc_enter(VTOC(*vpp), "..", 2, cred, VTOC(dvp)); + + if (coda_attr_cache) { + VTOC(*vpp)->c_vattr = ova; /* update the attr cache */ + VTOC(*vpp)->c_flags |= C_VATTR; /* Valid attributes in cnode */ + } + + /* Invalidate the parent's attr cache, the modification time has changed */ + VTOC(dvp)->c_flags &= ~C_VATTR; + + CODADEBUG( CODA_MKDIR, myprintf(("mkdir: (%lx.%lx.%lx) result %d\n", + VFid.Volume, VFid.Vnode, VFid.Unique, error)); ) + } else { + *vpp = (struct vnode *)0; + CODADEBUG(CODA_MKDIR, myprintf(("mkdir error %d\n",error));) + } + + /* Have to free the previously saved name */ + /* + * ufs_mkdir doesn't check for SAVESTART before freeing the + * pathname buffer, but ufs_create does. For the moment, I'll + * follow their lead, but this seems like it is probably + * incorrect. + */ + zfree(namei_zone, cnp->cn_pnbuf); + return(error); +} + +int +coda_rmdir(v) + void *v; +{ +/* true args */ + struct vop_rmdir_args *ap = v; + struct vnode *dvp = ap->a_dvp; + struct cnode *dcp = VTOC(dvp); + struct componentname *cnp = ap->a_cnp; + struct ucred *cred = cnp->cn_cred; + struct proc *p = cnp->cn_proc; +/* true args */ + int error; + const char *nm = cnp->cn_nameptr; + int len = cnp->cn_namelen; + struct cnode *cp; + + MARK_ENTRY(CODA_RMDIR_STATS); + + /* Check for rmdir of control object. */ + if (IS_CTL_NAME(dvp, nm, len)) { + MARK_INT_FAIL(CODA_RMDIR_STATS); + return(ENOENT); + } + + /* We're being conservative here, it might be that this person + * doesn't really have sufficient access to delete the file + * but we feel zapping the entry won't really hurt anyone -- dcs + */ + /* + * As a side effect of the rmdir, remove any entries for children of + * the directory, especially "." and "..". + */ + cp = coda_nc_lookup(dcp, nm, len, cred); + if (cp) coda_nc_zapParentfid(&(cp->c_fid), NOT_DOWNCALL); + + /* Remove the file's entry from the CODA Name Cache */ + coda_nc_zapfile(dcp, nm, len); + + /* Invalidate the parent's attr cache, the modification time has changed */ + dcp->c_flags &= ~C_VATTR; + + error = venus_rmdir(vtomi(dvp), &dcp->c_fid, nm, len, cred, p); + + CODADEBUG(CODA_RMDIR, myprintf(("in rmdir result %d\n", error)); ) + + if ((cnp->cn_flags & SAVESTART) == 0) { + zfree(namei_zone, cnp->cn_pnbuf); + } + return(error); +} + +int +coda_symlink(v) + void *v; +{ +/* true args */ + struct vop_symlink_args *ap = v; + struct vnode *tdvp = ap->a_dvp; + struct cnode *tdcp = VTOC(tdvp); + struct componentname *cnp = ap->a_cnp; + struct vattr *tva = ap->a_vap; + char *path = ap->a_target; + struct ucred *cred = cnp->cn_cred; + struct proc *p = cnp->cn_proc; +/* locals */ + int error; + /* + * XXX I'm assuming the following things about coda_symlink's + * arguments: + * t(foo) is the new name/parent/etc being created. + * lname is the contents of the new symlink. + */ + char *nm = cnp->cn_nameptr; + int len = cnp->cn_namelen; + int plen = strlen(path); + + /* XXX What about the vpp argument? Do we need it? */ + /* + * Here's the strategy for the moment: perform the symlink, then + * do a lookup to grab the resulting vnode. I know this requires + * two communications with Venus for a new sybolic link, but + * that's the way the ball bounces. I don't yet want to change + * the way the Mach symlink works. When Mach support is + * deprecated, we should change symlink so that the common case + * returns the resultant vnode in a vpp argument. + */ + + MARK_ENTRY(CODA_SYMLINK_STATS); + + /* Check for symlink of control object. */ + if (IS_CTL_NAME(tdvp, nm, len)) { + MARK_INT_FAIL(CODA_SYMLINK_STATS); + return(EACCES); + } + + if (plen+1 > CODA_MAXPATHLEN) { + MARK_INT_FAIL(CODA_SYMLINK_STATS); + return(EINVAL); + } + + if (len+1 > CODA_MAXNAMLEN) { + MARK_INT_FAIL(CODA_SYMLINK_STATS); + error = EINVAL; + goto exit; + } + + error = venus_symlink(vtomi(tdvp), &tdcp->c_fid, path, plen, nm, len, tva, cred, p); + + /* Invalidate the parent's attr cache, the modification time has changed */ + tdcp->c_flags &= ~C_VATTR; + + /* + * Free the name buffer + */ + if ((cnp->cn_flags & SAVESTART) == 0) { + zfree(namei_zone, cnp->cn_pnbuf); + } + + exit: + CODADEBUG(CODA_SYMLINK, myprintf(("in symlink result %d\n",error)); ) + return(error); +} + +/* + * Read directory entries. + */ +int +coda_readdir(v) + void *v; +{ +/* true args */ + struct vop_readdir_args *ap = v; + struct vnode *vp = ap->a_vp; + struct cnode *cp = VTOC(vp); + register struct uio *uiop = ap->a_uio; + struct ucred *cred = ap->a_cred; + int *eofflag = ap->a_eofflag; + u_long **cookies = ap->a_cookies; + int *ncookies = ap->a_ncookies; + struct proc *p = ap->a_uio->uio_procp; +/* upcall decl */ +/* locals */ + int error = 0; + + MARK_ENTRY(CODA_READDIR_STATS); + + CODADEBUG(CODA_READDIR, myprintf(("coda_readdir(%p, %d, %qd, %d)\n", uiop->uio_iov->iov_base, uiop->uio_resid, uiop->uio_offset, uiop->uio_segflg)); ) + + /* Check for readdir of control object. */ + if (IS_CTL_VP(vp)) { + MARK_INT_FAIL(CODA_READDIR_STATS); + return(ENOENT); + } + + { + /* If directory is not already open do an "internal open" on it. */ + int opened_internally = 0; + if (cp->c_ovp == NULL) { + opened_internally = 1; + MARK_INT_GEN(CODA_OPEN_STATS); + error = VOP_OPEN(vp, FREAD, cred, p); +printf("coda_readdir: Internally Opening %p\n", vp); + if (error) { + printf("coda_readdir: VOP_OPEN on container failed %d\n", error); + return (error); + } + if (vp->v_type == VREG) { + error = vfs_object_create(vp, p, cred); + if (error != 0) { + printf("coda_readdir: vfs_object_create() returns %d\n", error); + vput(vp); + } + } + if (error) return(error); + } + + /* Have UFS handle the call. */ + CODADEBUG(CODA_READDIR, myprintf(("indirect readdir: fid = (%lx.%lx.%lx), refcnt = %d\n",cp->c_fid.Volume, cp->c_fid.Vnode, cp->c_fid.Unique, vp->v_usecount)); ) + error = VOP_READDIR(cp->c_ovp, uiop, cred, eofflag, ncookies, + cookies); + + if (error) + MARK_INT_FAIL(CODA_READDIR_STATS); + else + MARK_INT_SAT(CODA_READDIR_STATS); + + /* Do an "internal close" if necessary. */ + if (opened_internally) { + MARK_INT_GEN(CODA_CLOSE_STATS); + (void)VOP_CLOSE(vp, FREAD, cred, p); + } + } + + return(error); +} + +/* + * Convert from file system blocks to device blocks + */ +int +coda_bmap(v) + void *v; +{ + /* XXX on the global proc */ +/* true args */ + struct vop_bmap_args *ap = v; + struct vnode *vp __attribute__((unused)) = ap->a_vp; /* file's vnode */ + daddr_t bn __attribute__((unused)) = ap->a_bn; /* fs block number */ + struct vnode **vpp = ap->a_vpp; /* RETURN vp of device */ + daddr_t *bnp __attribute__((unused)) = ap->a_bnp; /* RETURN device block number */ + struct proc *p __attribute__((unused)) = curproc; +/* upcall decl */ +/* locals */ + + int ret = 0; + struct cnode *cp; + + cp = VTOC(vp); + if (cp->c_ovp) { + return EINVAL; + ret = VOP_BMAP(cp->c_ovp, bn, vpp, bnp, ap->a_runp, ap->a_runb); +#if 0 + printf("VOP_BMAP(cp->c_ovp %p, bn %p, vpp %p, bnp %p, ap->a_runp %p, ap->a_runb %p) = %d\n", + cp->c_ovp, bn, vpp, bnp, ap->a_runp, ap->a_runb, ret); +#endif + return ret; + } else { +#if 0 + printf("coda_bmap: no container\n"); +#endif + return(EOPNOTSUPP); + } +} + +/* + * I don't think the following two things are used anywhere, so I've + * commented them out + * + * struct buf *async_bufhead; + * int async_daemon_count; + */ +int +coda_strategy(v) + void *v; +{ +/* true args */ + struct vop_strategy_args *ap = v; + register struct buf *bp __attribute__((unused)) = ap->a_bp; + struct proc *p __attribute__((unused)) = curproc; +/* upcall decl */ +/* locals */ + + printf("coda_strategy: called ???\n"); + return(EOPNOTSUPP); +} + +int +coda_reclaim(v) + void *v; +{ +/* true args */ + struct vop_reclaim_args *ap = v; + struct vnode *vp = ap->a_vp; + struct cnode *cp = VTOC(vp); +/* upcall decl */ +/* locals */ + +/* + * Forced unmount/flush will let vnodes with non zero use be destroyed! + */ + ENTRY; + + if (IS_UNMOUNTING(cp)) { +#ifdef DEBUG + if (VTOC(vp)->c_ovp) { + if (IS_UNMOUNTING(cp)) + printf("coda_reclaim: c_ovp not void: vp %p, cp %p\n", vp, cp); + } +#endif + } else { +#ifdef OLD_DIAGNOSTIC + if (vp->v_usecount != 0) + print("coda_reclaim: pushing active %p\n", vp); + if (VTOC(vp)->c_ovp) { + panic("coda_reclaim: c_ovp not void"); + } +#endif + } + cache_purge(vp); + coda_free(VTOC(vp)); + VTOC(vp) = NULL; + return (0); +} + +int +coda_lock(v) + void *v; +{ +/* true args */ + struct vop_lock_args *ap = v; + struct vnode *vp = ap->a_vp; + struct cnode *cp = VTOC(vp); + struct proc *p = ap->a_p; +/* upcall decl */ +/* locals */ + + ENTRY; + + if (coda_lockdebug) { + myprintf(("Attempting lock on %lx.%lx.%lx\n", + cp->c_fid.Volume, cp->c_fid.Vnode, cp->c_fid.Unique)); + } + +#ifndef DEBUG_LOCKS + return (lockmgr(&cp->c_lock, ap->a_flags, &vp->v_interlock, p)); +#else + return (debuglockmgr(&cp->c_lock, ap->a_flags, &vp->v_interlock, p, + "coda_lock", vp->filename, vp->line)); +#endif +} + +int +coda_unlock(v) + void *v; +{ +/* true args */ + struct vop_unlock_args *ap = v; + struct vnode *vp = ap->a_vp; + struct cnode *cp = VTOC(vp); + struct proc *p = ap->a_p; +/* upcall decl */ +/* locals */ + + ENTRY; + if (coda_lockdebug) { + myprintf(("Attempting unlock on %lx.%lx.%lx\n", + cp->c_fid.Volume, cp->c_fid.Vnode, cp->c_fid.Unique)); + } + + return (lockmgr(&cp->c_lock, ap->a_flags | LK_RELEASE, &vp->v_interlock, p)); +} + +int +coda_islocked(v) + void *v; +{ +/* true args */ + struct vop_islocked_args *ap = v; + struct cnode *cp = VTOC(ap->a_vp); + ENTRY; + + return (lockstatus(&cp->c_lock)); +} + +/* How one looks up a vnode given a device/inode pair: */ +int +coda_grab_vnode(dev_t dev, ino_t ino, struct vnode **vpp) +{ + /* This is like VFS_VGET() or igetinode()! */ + int error; + struct mount *mp; + + if (!(mp = devtomp(dev))) { + myprintf(("coda_grab_vnode: devtomp(%d) returns NULL\n", dev)); + return(ENXIO); + } + + /* XXX - ensure that nonzero-return means failure */ + error = VFS_VGET(mp,ino,vpp); + if (error) { + myprintf(("coda_grab_vnode: iget/vget(%d, %d) returns %p, err %d\n", + dev, ino, *vpp, error)); + return(ENOENT); + } + return(0); +} + +void +print_vattr( attr ) + struct vattr *attr; +{ + char *typestr; + + switch (attr->va_type) { + case VNON: + typestr = "VNON"; + break; + case VREG: + typestr = "VREG"; + break; + case VDIR: + typestr = "VDIR"; + break; + case VBLK: + typestr = "VBLK"; + break; + case VCHR: + typestr = "VCHR"; + break; + case VLNK: + typestr = "VLNK"; + break; + case VSOCK: + typestr = "VSCK"; + break; + case VFIFO: + typestr = "VFFO"; + break; + case VBAD: + typestr = "VBAD"; + break; + default: + typestr = "????"; + break; + } + + + myprintf(("attr: type %s mode %d uid %d gid %d fsid %d rdev %d\n", + typestr, (int)attr->va_mode, (int)attr->va_uid, + (int)attr->va_gid, (int)attr->va_fsid, (int)attr->va_rdev)); + + myprintf((" fileid %d nlink %d size %d blocksize %d bytes %d\n", + (int)attr->va_fileid, (int)attr->va_nlink, + (int)attr->va_size, + (int)attr->va_blocksize,(int)attr->va_bytes)); + myprintf((" gen %ld flags %ld vaflags %d\n", + attr->va_gen, attr->va_flags, attr->va_vaflags)); + myprintf((" atime sec %d nsec %d\n", + (int)attr->va_atime.tv_sec, (int)attr->va_atime.tv_nsec)); + myprintf((" mtime sec %d nsec %d\n", + (int)attr->va_mtime.tv_sec, (int)attr->va_mtime.tv_nsec)); + myprintf((" ctime sec %d nsec %d\n", + (int)attr->va_ctime.tv_sec, (int)attr->va_ctime.tv_nsec)); +} + +/* How to print a ucred */ +void +print_cred(cred) + struct ucred *cred; +{ + + int i; + + myprintf(("ref %d\tuid %d\n",cred->cr_ref,cred->cr_uid)); + + for (i=0; i < cred->cr_ngroups; i++) + myprintf(("\tgroup %d: (%d)\n",i,cred->cr_groups[i])); + myprintf(("\n")); + +} + +/* + * Return a vnode for the given fid. + * If no cnode exists for this fid create one and put it + * in a table hashed by fid.Volume and fid.Vnode. If the cnode for + * this fid is already in the table return it (ref count is + * incremented by coda_find. The cnode will be flushed from the + * table when coda_inactive calls coda_unsave. + */ +struct cnode * +make_coda_node(fid, vfsp, type) + ViceFid *fid; struct mount *vfsp; short type; +{ + struct cnode *cp; + int err; + + if ((cp = coda_find(fid)) == NULL) { + struct vnode *vp; + + cp = coda_alloc(); + lockinit(&cp->c_lock, PINOD, "cnode", 0, 0); + cp->c_fid = *fid; + + err = getnewvnode(VT_CODA, vfsp, coda_vnodeop_p, &vp); + if (err) { + panic("coda: getnewvnode returned error %d\n", err); + } + vp->v_data = cp; + vp->v_type = type; + cp->c_vnode = vp; + coda_save(cp); + + } else { + vref(CTOV(cp)); + } + + return cp; +} diff --git a/sys/fs/coda/coda_vnops.h b/sys/fs/coda/coda_vnops.h new file mode 100644 index 0000000..6c787d5 --- /dev/null +++ b/sys/fs/coda/coda_vnops.h @@ -0,0 +1,142 @@ +/* + * + * Coda: an Experimental Distributed File System + * Release 3.1 + * + * Copyright (c) 1987-1998 Carnegie Mellon University + * All Rights Reserved + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation, and + * that credit is given to Carnegie Mellon University in all documents + * and publicity pertaining to direct or indirect use of this code or its + * derivatives. + * + * CODA IS AN EXPERIMENTAL SOFTWARE SYSTEM AND IS KNOWN TO HAVE BUGS, + * SOME OF WHICH MAY HAVE SERIOUS CONSEQUENCES. CARNEGIE MELLON ALLOWS + * FREE USE OF THIS SOFTWARE IN ITS "AS IS" CONDITION. CARNEGIE MELLON + * DISCLAIMS ANY LIABILITY OF ANY KIND FOR ANY DAMAGES WHATSOEVER + * RESULTING DIRECTLY OR INDIRECTLY FROM THE USE OF THIS SOFTWARE OR OF + * ANY DERIVATIVE WORK. + * + * Carnegie Mellon encourages users of this software to return any + * improvements or extensions that they make, and to grant Carnegie + * Mellon the rights to redistribute these changes without encumbrance. + * + * @(#) src/sys/coda/coda_vnops.h,v 1.1.1.1 1998/08/29 21:14:52 rvb Exp $ + * $Id: coda_vnops.h,v 1.3 1998/09/11 18:50:17 rvb Exp $ + * + */ + +/* + * Mach Operating System + * Copyright (c) 1990 Carnegie-Mellon University + * Copyright (c) 1989 Carnegie-Mellon University + * All rights reserved. The CMU software License Agreement specifies + * the terms and conditions for use and redistribution. + */ + +/* + * This code was written for the Coda file system at Carnegie Mellon + * University. Contributers include David Steere, James Kistler, and + * M. Satyanarayanan. + */ + +/* + * HISTORY + * $Log: coda_vnops.h,v $ + * Revision 1.3 1998/09/11 18:50:17 rvb + * All the references to cfs, in symbols, structs, and strings + * have been changed to coda. (Same for CFS.) + * + * Revision 1.2 1998/09/02 19:09:53 rvb + * Pass2 complete + * + * Revision 1.1.1.1 1998/08/29 21:14:52 rvb + * Very Preliminary Coda + * + * Revision 1.7 1998/08/28 18:12:24 rvb + * Now it also works on FreeBSD -current. This code will be + * committed to the FreeBSD -current and NetBSD -current + * trees. It will then be tailored to the particular platform + * by flushing conditional code. + * + * Revision 1.6 1998/08/18 17:05:22 rvb + * Don't use __RCSID now + * + * Revision 1.5 1998/08/18 16:31:47 rvb + * Sync the code for NetBSD -current; test on 1.3 later + * + * Revision 1.4 98/01/23 11:53:49 rvb + * Bring RVB_CODA1_1 to HEAD + * + * Revision 1.3.2.3 98/01/23 11:21:13 rvb + * Sync with 2.2.5 + * + * Revision 1.3.2.2 97/12/16 12:40:20 rvb + * Sync with 1.3 + * + * Revision 1.3.2.1 97/12/10 14:08:34 rvb + * Fix O_ flags; check result in coda_call + * + * Revision 1.3 97/12/05 10:39:25 rvb + * Read CHANGES + * + * Revision 1.2.34.2 97/11/20 11:46:54 rvb + * Capture current cfs_venus + * + * Revision 1.2.34.1 97/11/13 22:03:04 rvb + * pass2 cfs_NetBSD.h mt + * + * Revision 1.2 96/01/02 16:57:14 bnoble + * Added support for Coda MiniCache and raw inode calls (final commit) + * + * Revision 1.1.2.1 1995/12/20 01:57:40 bnoble + * Added CODA-specific files + * + */ + +/* NetBSD interfaces to the vnodeops */ +int coda_open __P((void *)); +int coda_close __P((void *)); +int coda_read __P((void *)); +int coda_write __P((void *)); +int coda_ioctl __P((void *)); +/* 1.3 int cfs_select __P((void *));*/ +int coda_getattr __P((void *)); +int coda_setattr __P((void *)); +int coda_access __P((void *)); +int coda_abortop __P((void *)); +int coda_readlink __P((void *)); +int coda_fsync __P((void *)); +int coda_inactive __P((void *)); +int coda_lookup __P((void *)); +int coda_create __P((void *)); +int coda_remove __P((void *)); +int coda_link __P((void *)); +int coda_rename __P((void *)); +int coda_mkdir __P((void *)); +int coda_rmdir __P((void *)); +int coda_symlink __P((void *)); +int coda_readdir __P((void *)); +int coda_bmap __P((void *)); +int coda_strategy __P((void *)); +int coda_reclaim __P((void *)); +int coda_lock __P((void *)); +int coda_unlock __P((void *)); +int coda_islocked __P((void *)); +int coda_vop_error __P((void *)); +int coda_vop_nop __P((void *)); +int coda_fbsd_getpages __P((void *)); +int coda_fbsd_putpages __P((void *)); + +int (**coda_vnodeop_p)(void *); + +int coda_rdwr(struct vnode *vp, struct uio *uiop, enum uio_rw rw, + int ioflag, struct ucred *cred, struct proc *p); +int coda_grab_vnode(dev_t dev, ino_t ino, struct vnode **vpp); +void print_vattr(struct vattr *attr); +void print_cred(struct ucred *cred); diff --git a/sys/fs/deadfs/dead_vnops.c b/sys/fs/deadfs/dead_vnops.c new file mode 100644 index 0000000..4e3853c --- /dev/null +++ b/sys/fs/deadfs/dead_vnops.c @@ -0,0 +1,296 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)dead_vnops.c 8.1 (Berkeley) 6/10/93 + * $Id: dead_vnops.c,v 1.24 1998/08/23 11:43:29 bde Exp $ + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/vnode.h> +#include <sys/buf.h> +#include <sys/poll.h> + +static int chkvnlock __P((struct vnode *)); +/* + * Prototypes for dead operations on vnodes. + */ +static int dead_badop __P((void)); +static int dead_bmap __P((struct vop_bmap_args *)); +static int dead_ioctl __P((struct vop_ioctl_args *)); +static int dead_lock __P((struct vop_lock_args *)); +static int dead_lookup __P((struct vop_lookup_args *)); +static int dead_open __P((struct vop_open_args *)); +static int dead_poll __P((struct vop_poll_args *)); +static int dead_print __P((struct vop_print_args *)); +static int dead_read __P((struct vop_read_args *)); +static int dead_write __P((struct vop_write_args *)); + +vop_t **dead_vnodeop_p; +static struct vnodeopv_entry_desc dead_vnodeop_entries[] = { + { &vop_default_desc, (vop_t *) vop_defaultop }, + { &vop_access_desc, (vop_t *) vop_ebadf }, + { &vop_advlock_desc, (vop_t *) vop_ebadf }, + { &vop_bmap_desc, (vop_t *) dead_bmap }, + { &vop_create_desc, (vop_t *) dead_badop }, + { &vop_getattr_desc, (vop_t *) vop_ebadf }, + { &vop_inactive_desc, (vop_t *) vop_null }, + { &vop_ioctl_desc, (vop_t *) dead_ioctl }, + { &vop_link_desc, (vop_t *) dead_badop }, + { &vop_lock_desc, (vop_t *) dead_lock }, + { &vop_lookup_desc, (vop_t *) dead_lookup }, + { &vop_mkdir_desc, (vop_t *) dead_badop }, + { &vop_mknod_desc, (vop_t *) dead_badop }, + { &vop_mmap_desc, (vop_t *) dead_badop }, + { &vop_open_desc, (vop_t *) dead_open }, + { &vop_pathconf_desc, (vop_t *) vop_ebadf }, /* per pathconf(2) */ + { &vop_poll_desc, (vop_t *) dead_poll }, + { &vop_print_desc, (vop_t *) dead_print }, + { &vop_read_desc, (vop_t *) dead_read }, + { &vop_readdir_desc, (vop_t *) vop_ebadf }, + { &vop_readlink_desc, (vop_t *) vop_ebadf }, + { &vop_reclaim_desc, (vop_t *) vop_null }, + { &vop_remove_desc, (vop_t *) dead_badop }, + { &vop_rename_desc, (vop_t *) dead_badop }, + { &vop_rmdir_desc, (vop_t *) dead_badop }, + { &vop_setattr_desc, (vop_t *) vop_ebadf }, + { &vop_symlink_desc, (vop_t *) dead_badop }, + { &vop_write_desc, (vop_t *) dead_write }, + { NULL, NULL } +}; +static struct vnodeopv_desc dead_vnodeop_opv_desc = + { &dead_vnodeop_p, dead_vnodeop_entries }; + +VNODEOP_SET(dead_vnodeop_opv_desc); + +/* + * Trivial lookup routine that always fails. + */ +/* ARGSUSED */ +static int +dead_lookup(ap) + struct vop_lookup_args /* { + struct vnode * a_dvp; + struct vnode ** a_vpp; + struct componentname * a_cnp; + } */ *ap; +{ + + *ap->a_vpp = NULL; + return (ENOTDIR); +} + +/* + * Open always fails as if device did not exist. + */ +/* ARGSUSED */ +static int +dead_open(ap) + struct vop_open_args /* { + struct vnode *a_vp; + int a_mode; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + + return (ENXIO); +} + +/* + * Vnode op for read + */ +/* ARGSUSED */ +static int +dead_read(ap) + struct vop_read_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap; +{ + + if (chkvnlock(ap->a_vp)) + panic("dead_read: lock"); + /* + * Return EOF for tty devices, EIO for others + */ + if ((ap->a_vp->v_flag & VISTTY) == 0) + return (EIO); + return (0); +} + +/* + * Vnode op for write + */ +/* ARGSUSED */ +static int +dead_write(ap) + struct vop_write_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap; +{ + + if (chkvnlock(ap->a_vp)) + panic("dead_write: lock"); + return (EIO); +} + +/* + * Device ioctl operation. + */ +/* ARGSUSED */ +static int +dead_ioctl(ap) + struct vop_ioctl_args /* { + struct vnode *a_vp; + int a_command; + caddr_t a_data; + int a_fflag; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + + if (!chkvnlock(ap->a_vp)) + return (ENOTTY); + return (VCALL(ap->a_vp, VOFFSET(vop_ioctl), ap)); +} + + +/* + * Wait until the vnode has finished changing state. + */ +static int +dead_lock(ap) + struct vop_lock_args /* { + struct vnode *a_vp; + int a_flags; + struct proc *a_p; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + + /* + * Since we are not using the lock manager, we must clear + * the interlock here. + */ + if (ap->a_flags & LK_INTERLOCK) { + simple_unlock(&vp->v_interlock); + ap->a_flags &= ~LK_INTERLOCK; + } + if (!chkvnlock(vp)) + return (0); + return (VCALL(vp, VOFFSET(vop_lock), ap)); +} + +/* + * Wait until the vnode has finished changing state. + */ +static int +dead_bmap(ap) + struct vop_bmap_args /* { + struct vnode *a_vp; + daddr_t a_bn; + struct vnode **a_vpp; + daddr_t *a_bnp; + int *a_runp; + int *a_runb; + } */ *ap; +{ + + if (!chkvnlock(ap->a_vp)) + return (EIO); + return (VOP_BMAP(ap->a_vp, ap->a_bn, ap->a_vpp, ap->a_bnp, ap->a_runp, ap->a_runb)); +} + +/* + * Print out the contents of a dead vnode. + */ +/* ARGSUSED */ +static int +dead_print(ap) + struct vop_print_args /* { + struct vnode *a_vp; + } */ *ap; +{ + + printf("tag VT_NON, dead vnode\n"); + return (0); +} + +/* + * Empty vnode bad operation + */ +static int +dead_badop() +{ + + panic("dead_badop called"); + /* NOTREACHED */ +} + +/* + * We have to wait during times when the vnode is + * in a state of change. + */ +int +chkvnlock(vp) + register struct vnode *vp; +{ + int locked = 0; + + while (vp->v_flag & VXLOCK) { + vp->v_flag |= VXWANT; + (void) tsleep((caddr_t)vp, PINOD, "ckvnlk", 0); + locked = 1; + } + return (locked); +} + +/* + * Trivial poll routine that always returns POLLHUP. + * This is necessary so that a process which is polling a file + * gets notified when that file is revoke()d. + */ +static int +dead_poll(ap) + struct vop_poll_args *ap; +{ + return (POLLHUP); +} diff --git a/sys/fs/fdescfs/fdesc.h b/sys/fs/fdescfs/fdesc.h new file mode 100644 index 0000000..bbba54d --- /dev/null +++ b/sys/fs/fdescfs/fdesc.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software donated to Berkeley by + * Jan-Simon Pendry. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)fdesc.h 8.5 (Berkeley) 1/21/94 + * + * $Id: fdesc.h,v 1.5 1997/02/22 09:40:14 peter Exp $ + */ + +#ifdef KERNEL +struct fdescmount { + struct vnode *f_root; /* Root node */ +}; + +#define FD_ROOT 2 +#define FD_DEVFD 3 +#define FD_STDIN 4 +#define FD_STDOUT 5 +#define FD_STDERR 6 +#define FD_CTTY 7 +#define FD_DESC 8 +#define FD_MAX 12 + +typedef enum { + Froot, + Fdevfd, + Fdesc, + Flink, + Fctty +} fdntype; + +struct fdescnode { + LIST_ENTRY(fdescnode) fd_hash; /* Hash list */ + struct vnode *fd_vnode; /* Back ptr to vnode */ + fdntype fd_type; /* Type of this node */ + unsigned fd_fd; /* Fd to be dup'ed */ + char *fd_link; /* Link to fd/n */ + int fd_ix; /* filesystem index */ +}; + +#define VFSTOFDESC(mp) ((struct fdescmount *)((mp)->mnt_data)) +#define VTOFDESC(vp) ((struct fdescnode *)(vp)->v_data) + +extern dev_t devctty; +extern int fdesc_init __P((struct vfsconf *)); +extern int fdesc_root __P((struct mount *, struct vnode **)); +extern int fdesc_allocvp __P((fdntype, int, struct mount *, struct vnode **)); +#endif /* KERNEL */ diff --git a/sys/fs/fdescfs/fdesc_vfsops.c b/sys/fs/fdescfs/fdesc_vfsops.c new file mode 100644 index 0000000..758f3b5 --- /dev/null +++ b/sys/fs/fdescfs/fdesc_vfsops.c @@ -0,0 +1,264 @@ +/* + * Copyright (c) 1992, 1993, 1995 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software donated to Berkeley by + * Jan-Simon Pendry. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)fdesc_vfsops.c 8.4 (Berkeley) 1/21/94 + * + * $Id: fdesc_vfsops.c,v 1.17 1999/01/12 11:49:30 eivind Exp $ + */ + +/* + * /dev/fd Filesystem + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/proc.h> +#include <sys/resourcevar.h> +#include <sys/filedesc.h> +#include <sys/vnode.h> +#include <sys/mount.h> +#include <sys/malloc.h> +#include <miscfs/fdesc/fdesc.h> + +static MALLOC_DEFINE(M_FDESCMNT, "FDESC mount", "FDESC mount structure"); + +static int fdesc_mount __P((struct mount *mp, char *path, caddr_t data, + struct nameidata *ndp, struct proc *p)); +static int fdesc_start __P((struct mount *mp, int flags, struct proc *p)); +static int fdesc_unmount __P((struct mount *mp, int mntflags, + struct proc *p)); +static int fdesc_statfs __P((struct mount *mp, struct statfs *sbp, + struct proc *p)); +static int fdesc_sync __P((struct mount *mp, int waitfor, + struct ucred *cred, struct proc *p)); + +/* + * Mount the per-process file descriptors (/dev/fd) + */ +static int +fdesc_mount(mp, path, data, ndp, p) + struct mount *mp; + char *path; + caddr_t data; + struct nameidata *ndp; + struct proc *p; +{ + int error = 0; + u_int size; + struct fdescmount *fmp; + struct vnode *rvp; + + /* + * Update is a no-op + */ + if (mp->mnt_flag & MNT_UPDATE) + return (EOPNOTSUPP); + + error = fdesc_allocvp(Froot, FD_ROOT, mp, &rvp); + if (error) + return (error); + + MALLOC(fmp, struct fdescmount *, sizeof(struct fdescmount), + M_FDESCMNT, M_WAITOK); /* XXX */ + rvp->v_type = VDIR; + rvp->v_flag |= VROOT; + fmp->f_root = rvp; + /* XXX -- don't mark as local to work around fts() problems */ + /*mp->mnt_flag |= MNT_LOCAL;*/ + mp->mnt_data = (qaddr_t) fmp; + vfs_getnewfsid(mp); + + (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size); + bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size); + bzero(mp->mnt_stat.f_mntfromname, MNAMELEN); + bcopy("fdesc", mp->mnt_stat.f_mntfromname, sizeof("fdesc")); + (void)fdesc_statfs(mp, &mp->mnt_stat, p); + return (0); +} + +static int +fdesc_start(mp, flags, p) + struct mount *mp; + int flags; + struct proc *p; +{ + return (0); +} + +static int +fdesc_unmount(mp, mntflags, p) + struct mount *mp; + int mntflags; + struct proc *p; +{ + int error; + int flags = 0; + struct vnode *rootvp = VFSTOFDESC(mp)->f_root; + + if (mntflags & MNT_FORCE) + flags |= FORCECLOSE; + + /* + * Clear out buffer cache. I don't think we + * ever get anything cached at this level at the + * moment, but who knows... + */ + if (rootvp->v_usecount > 1) + return (EBUSY); + if ((error = vflush(mp, rootvp, flags)) != 0) + return (error); + + /* + * Release reference on underlying root vnode + */ + vrele(rootvp); + /* + * And blow it away for future re-use + */ + vgone(rootvp); + /* + * Finally, throw away the fdescmount structure + */ + free(mp->mnt_data, M_FDESCMNT); /* XXX */ + mp->mnt_data = 0; + + return (0); +} + +int +fdesc_root(mp, vpp) + struct mount *mp; + struct vnode **vpp; +{ + struct proc *p = curproc; /* XXX */ + struct vnode *vp; + + /* + * Return locked reference to root. + */ + vp = VFSTOFDESC(mp)->f_root; + VREF(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + *vpp = vp; + return (0); +} + +static int +fdesc_statfs(mp, sbp, p) + struct mount *mp; + struct statfs *sbp; + struct proc *p; +{ + struct filedesc *fdp; + int lim; + int i; + int last; + int freefd; + + /* + * Compute number of free file descriptors. + * [ Strange results will ensue if the open file + * limit is ever reduced below the current number + * of open files... ] + */ + lim = p->p_rlimit[RLIMIT_NOFILE].rlim_cur; + fdp = p->p_fd; + last = min(fdp->fd_nfiles, lim); + freefd = 0; + for (i = fdp->fd_freefile; i < last; i++) + if (fdp->fd_ofiles[i] == NULL) + freefd++; + + /* + * Adjust for the fact that the fdesc array may not + * have been fully allocated yet. + */ + if (fdp->fd_nfiles < lim) + freefd += (lim - fdp->fd_nfiles); + + sbp->f_flags = 0; + sbp->f_bsize = DEV_BSIZE; + sbp->f_iosize = DEV_BSIZE; + sbp->f_blocks = 2; /* 1K to keep df happy */ + sbp->f_bfree = 0; + sbp->f_bavail = 0; + sbp->f_files = lim + 1; /* Allow for "." */ + sbp->f_ffree = freefd; /* See comments above */ + if (sbp != &mp->mnt_stat) { + sbp->f_type = mp->mnt_vfc->vfc_typenum; + bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid)); + bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); + bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); + } + return (0); +} + +static int +fdesc_sync(mp, waitfor, cred, p) + struct mount *mp; + int waitfor; + struct ucred *cred; + struct proc *p; +{ + + return (0); +} + +#define fdesc_fhtovp ((int (*) __P((struct mount *, struct fid *, \ + struct sockaddr *, struct vnode **, int *, struct ucred **)))eopnotsupp) +#define fdesc_quotactl ((int (*) __P((struct mount *, int, uid_t, caddr_t, \ + struct proc *)))eopnotsupp) +#define fdesc_sysctl ((int (*) __P((int *, u_int, void *, size_t *, void *, \ + size_t, struct proc *)))eopnotsupp) +#define fdesc_vget ((int (*) __P((struct mount *, ino_t, struct vnode **))) \ + eopnotsupp) +#define fdesc_vptofh ((int (*) __P((struct vnode *, struct fid *)))eopnotsupp) + +static struct vfsops fdesc_vfsops = { + fdesc_mount, + fdesc_start, + fdesc_unmount, + fdesc_root, + fdesc_quotactl, + fdesc_statfs, + fdesc_sync, + fdesc_vget, + fdesc_fhtovp, + fdesc_vptofh, + fdesc_init, +}; + +VFS_SET(fdesc_vfsops, fdesc, VFCF_SYNTHETIC); diff --git a/sys/fs/fdescfs/fdesc_vnops.c b/sys/fs/fdescfs/fdesc_vnops.c new file mode 100644 index 0000000..6bdea5f --- /dev/null +++ b/sys/fs/fdescfs/fdesc_vnops.c @@ -0,0 +1,872 @@ +/* + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software donated to Berkeley by + * Jan-Simon Pendry. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)fdesc_vnops.c 8.9 (Berkeley) 1/21/94 + * + * $Id: fdesc_vnops.c,v 1.40 1998/12/14 05:00:57 dillon Exp $ + */ + +/* + * /dev/fd Filesystem + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/kernel.h> /* boottime */ +#include <sys/filedesc.h> +#include <sys/vnode.h> +#include <sys/malloc.h> +#include <sys/file.h> +#include <sys/stat.h> +#include <sys/mount.h> +#include <sys/namei.h> +#include <sys/dirent.h> +#include <sys/socketvar.h> +#include <sys/conf.h> +#include <miscfs/fdesc/fdesc.h> + +extern struct cdevsw ctty_cdevsw; + +#define cttyvp(p) ((p)->p_flag & P_CONTROLT ? (p)->p_session->s_ttyvp : NULL) + +#define FDL_WANT 0x01 +#define FDL_LOCKED 0x02 +static int fdcache_lock; + +static vop_t **fdesc_vnodeop_p; + +dev_t devctty; + +#if (FD_STDIN != FD_STDOUT-1) || (FD_STDOUT != FD_STDERR-1) +FD_STDIN, FD_STDOUT, FD_STDERR must be a sequence n, n+1, n+2 +#endif + +#define NFDCACHE 4 +#define FD_NHASH(ix) \ + (&fdhashtbl[(ix) & fdhash]) +static LIST_HEAD(fdhashhead, fdescnode) *fdhashtbl; +static u_long fdhash; + +static int fdesc_attr __P((int fd, struct vattr *vap, struct ucred *cred, + struct proc *p)); +static int fdesc_badop __P((void)); +static int fdesc_getattr __P((struct vop_getattr_args *ap)); +static int fdesc_inactive __P((struct vop_inactive_args *ap)); +static int fdesc_ioctl __P((struct vop_ioctl_args *ap)); +static int fdesc_lookup __P((struct vop_lookup_args *ap)); +static int fdesc_open __P((struct vop_open_args *ap)); +static int fdesc_print __P((struct vop_print_args *ap)); +static int fdesc_read __P((struct vop_read_args *ap)); +static int fdesc_readdir __P((struct vop_readdir_args *ap)); +static int fdesc_readlink __P((struct vop_readlink_args *ap)); +static int fdesc_reclaim __P((struct vop_reclaim_args *ap)); +static int fdesc_poll __P((struct vop_poll_args *ap)); +static int fdesc_setattr __P((struct vop_setattr_args *ap)); +static int fdesc_write __P((struct vop_write_args *ap)); + +/* + * Initialise cache headers + */ +int +fdesc_init(vfsp) + struct vfsconf *vfsp; +{ + + devctty = makedev(nchrdev, 0); + fdhashtbl = hashinit(NFDCACHE, M_CACHE, &fdhash); + return (0); +} + +int +fdesc_allocvp(ftype, ix, mp, vpp) + fdntype ftype; + int ix; + struct mount *mp; + struct vnode **vpp; +{ + struct proc *p = curproc; /* XXX */ + struct fdhashhead *fc; + struct fdescnode *fd; + int error = 0; + + fc = FD_NHASH(ix); +loop: + for (fd = fc->lh_first; fd != 0; fd = fd->fd_hash.le_next) { + if (fd->fd_ix == ix && fd->fd_vnode->v_mount == mp) { + if (vget(fd->fd_vnode, 0, p)) + goto loop; + *vpp = fd->fd_vnode; + return (error); + } + } + + /* + * otherwise lock the array while we call getnewvnode + * since that can block. + */ + if (fdcache_lock & FDL_LOCKED) { + fdcache_lock |= FDL_WANT; + (void) tsleep((caddr_t) &fdcache_lock, PINOD, "fdalvp", 0); + goto loop; + } + fdcache_lock |= FDL_LOCKED; + + /* + * Do the MALLOC before the getnewvnode since doing so afterward + * might cause a bogus v_data pointer to get dereferenced + * elsewhere if MALLOC should block. + */ + MALLOC(fd, struct fdescnode *, sizeof(struct fdescnode), M_TEMP, M_WAITOK); + + error = getnewvnode(VT_FDESC, mp, fdesc_vnodeop_p, vpp); + if (error) { + FREE(fd, M_TEMP); + goto out; + } + (*vpp)->v_data = fd; + fd->fd_vnode = *vpp; + fd->fd_type = ftype; + fd->fd_fd = -1; + fd->fd_link = 0; + fd->fd_ix = ix; + LIST_INSERT_HEAD(fc, fd, fd_hash); + +out:; + fdcache_lock &= ~FDL_LOCKED; + + if (fdcache_lock & FDL_WANT) { + fdcache_lock &= ~FDL_WANT; + wakeup((caddr_t) &fdcache_lock); + } + + return (error); +} + +/* + * vp is the current namei directory + * ndp is the name to locate in that directory... + */ +static int +fdesc_lookup(ap) + struct vop_lookup_args /* { + struct vnode * a_dvp; + struct vnode ** a_vpp; + struct componentname * a_cnp; + } */ *ap; +{ + struct vnode **vpp = ap->a_vpp; + struct vnode *dvp = ap->a_dvp; + struct componentname *cnp = ap->a_cnp; + char *pname = cnp->cn_nameptr; + struct proc *p = cnp->cn_proc; + int nfiles = p->p_fd->fd_nfiles; + unsigned fd = -1; + int error; + struct vnode *fvp; + char *ln; + + if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME) { + error = EROFS; + goto bad; + } + + VOP_UNLOCK(dvp, 0, p); + if (cnp->cn_namelen == 1 && *pname == '.') { + *vpp = dvp; + VREF(dvp); + vn_lock(dvp, LK_SHARED | LK_RETRY, p); + return (0); + } + + switch (VTOFDESC(dvp)->fd_type) { + default: + case Flink: + case Fdesc: + case Fctty: + error = ENOTDIR; + goto bad; + + case Froot: + if (cnp->cn_namelen == 2 && bcmp(pname, "fd", 2) == 0) { + error = fdesc_allocvp(Fdevfd, FD_DEVFD, dvp->v_mount, &fvp); + if (error) + goto bad; + *vpp = fvp; + fvp->v_type = VDIR; + vn_lock(fvp, LK_SHARED | LK_RETRY, p); + return (0); + } + + if (cnp->cn_namelen == 3 && bcmp(pname, "tty", 3) == 0) { + struct vnode *ttyvp = cttyvp(p); + if (ttyvp == NULL) { + error = ENXIO; + goto bad; + } + error = fdesc_allocvp(Fctty, FD_CTTY, dvp->v_mount, &fvp); + if (error) + goto bad; + *vpp = fvp; + fvp->v_type = VFIFO; + vn_lock(fvp, LK_SHARED | LK_RETRY, p); + return (0); + } + + ln = 0; + switch (cnp->cn_namelen) { + case 5: + if (bcmp(pname, "stdin", 5) == 0) { + ln = "fd/0"; + fd = FD_STDIN; + } + break; + case 6: + if (bcmp(pname, "stdout", 6) == 0) { + ln = "fd/1"; + fd = FD_STDOUT; + } else + if (bcmp(pname, "stderr", 6) == 0) { + ln = "fd/2"; + fd = FD_STDERR; + } + break; + } + + if (ln) { + error = fdesc_allocvp(Flink, fd, dvp->v_mount, &fvp); + if (error) + goto bad; + VTOFDESC(fvp)->fd_link = ln; + *vpp = fvp; + fvp->v_type = VLNK; + vn_lock(fvp, LK_SHARED | LK_RETRY, p); + return (0); + } else { + error = ENOENT; + goto bad; + } + + /* FALL THROUGH */ + + case Fdevfd: + if (cnp->cn_namelen == 2 && bcmp(pname, "..", 2) == 0) { + if ((error = fdesc_root(dvp->v_mount, vpp)) != 0) + goto bad; + return (0); + } + + fd = 0; + while (*pname >= '0' && *pname <= '9') { + fd = 10 * fd + *pname++ - '0'; + if (fd >= nfiles) + break; + } + + if (*pname != '\0') { + error = ENOENT; + goto bad; + } + + if (fd >= nfiles || p->p_fd->fd_ofiles[fd] == NULL) { + error = EBADF; + goto bad; + } + + error = fdesc_allocvp(Fdesc, FD_DESC+fd, dvp->v_mount, &fvp); + if (error) + goto bad; + VTOFDESC(fvp)->fd_fd = fd; + vn_lock(fvp, LK_SHARED | LK_RETRY, p); + *vpp = fvp; + return (0); + } + +bad:; + vn_lock(dvp, LK_SHARED | LK_RETRY, p); + *vpp = NULL; + return (error); +} + +static int +fdesc_open(ap) + struct vop_open_args /* { + struct vnode *a_vp; + int a_mode; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + int error = 0; + + switch (VTOFDESC(vp)->fd_type) { + case Fdesc: + /* + * XXX Kludge: set p->p_dupfd to contain the value of the + * the file descriptor being sought for duplication. The error + * return ensures that the vnode for this device will be + * released by vn_open. Open will detect this special error and + * take the actions in dupfdopen. Other callers of vn_open or + * VOP_OPEN will simply report the error. + */ + ap->a_p->p_dupfd = VTOFDESC(vp)->fd_fd; /* XXX */ + error = ENODEV; + break; + + case Fctty: + error = (*ctty_cdevsw.d_open)(devctty, ap->a_mode, 0, ap->a_p); + break; + } + + return (error); +} + +static int +fdesc_attr(fd, vap, cred, p) + int fd; + struct vattr *vap; + struct ucred *cred; + struct proc *p; +{ + struct filedesc *fdp = p->p_fd; + struct file *fp; + struct stat stb; + int error; + + if (fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) + return (EBADF); + + switch (fp->f_type) { + case DTYPE_FIFO: + case DTYPE_VNODE: + error = VOP_GETATTR((struct vnode *) fp->f_data, vap, cred, p); + if (error == 0 && vap->va_type == VDIR) { + /* + * directories can cause loops in the namespace, + * so turn off the 'x' bits to avoid trouble. + */ + vap->va_mode &= ~((VEXEC)|(VEXEC>>3)|(VEXEC>>6)); + } + break; + + case DTYPE_SOCKET: + error = soo_stat((struct socket *)fp->f_data, &stb); + if (error == 0) { + vattr_null(vap); + vap->va_type = VSOCK; + vap->va_mode = stb.st_mode; + vap->va_nlink = stb.st_nlink; + vap->va_uid = stb.st_uid; + vap->va_gid = stb.st_gid; + vap->va_fsid = stb.st_dev; + vap->va_fileid = stb.st_ino; + vap->va_size = stb.st_size; + vap->va_blocksize = stb.st_blksize; + vap->va_atime = stb.st_atimespec; + vap->va_mtime = stb.st_mtimespec; + vap->va_ctime = stb.st_ctimespec; + vap->va_gen = stb.st_gen; + vap->va_flags = stb.st_flags; + vap->va_rdev = stb.st_rdev; + vap->va_bytes = stb.st_blocks * stb.st_blksize; + } + break; + + default: + panic("fdesc attr"); + break; + } + + return (error); +} + +static int +fdesc_getattr(ap) + struct vop_getattr_args /* { + struct vnode *a_vp; + struct vattr *a_vap; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + struct vattr *vap = ap->a_vap; + unsigned fd; + int error = 0; + + switch (VTOFDESC(vp)->fd_type) { + case Froot: + case Fdevfd: + case Flink: + case Fctty: + bzero((caddr_t) vap, sizeof(*vap)); + vattr_null(vap); + vap->va_fileid = VTOFDESC(vp)->fd_ix; + + switch (VTOFDESC(vp)->fd_type) { + case Flink: + vap->va_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH; + vap->va_type = VLNK; + vap->va_nlink = 1; + vap->va_size = strlen(VTOFDESC(vp)->fd_link); + break; + + case Fctty: + vap->va_mode = S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP|S_IROTH|S_IWOTH; + vap->va_type = VFIFO; + vap->va_nlink = 1; + vap->va_size = 0; + break; + + default: + vap->va_mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH; + vap->va_type = VDIR; + vap->va_nlink = 2; + vap->va_size = DEV_BSIZE; + break; + } + vap->va_uid = 0; + vap->va_gid = 0; + vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; + vap->va_blocksize = DEV_BSIZE; + vap->va_atime.tv_sec = boottime.tv_sec; + vap->va_atime.tv_nsec = 0; + vap->va_mtime = vap->va_atime; + vap->va_ctime = vap->va_mtime; + vap->va_gen = 0; + vap->va_flags = 0; + vap->va_rdev = 0; + vap->va_bytes = 0; + break; + + case Fdesc: + fd = VTOFDESC(vp)->fd_fd; + error = fdesc_attr(fd, vap, ap->a_cred, ap->a_p); + break; + + default: + panic("fdesc_getattr"); + break; + } + + if (error == 0) + vp->v_type = vap->va_type; + + return (error); +} + +static int +fdesc_setattr(ap) + struct vop_setattr_args /* { + struct vnode *a_vp; + struct vattr *a_vap; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + struct filedesc *fdp = ap->a_p->p_fd; + struct vattr *vap = ap->a_vap; + struct file *fp; + unsigned fd; + int error; + + /* + * Can't mess with the root vnode + */ + switch (VTOFDESC(ap->a_vp)->fd_type) { + case Fdesc: + break; + + case Fctty: + if (vap->va_flags != VNOVAL) + return (EOPNOTSUPP); + return (0); + + default: + return (EACCES); + } + + fd = VTOFDESC(ap->a_vp)->fd_fd; + if (fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) { + return (EBADF); + } + + /* + * Can setattr the underlying vnode, but not sockets! + */ + switch (fp->f_type) { + case DTYPE_FIFO: + case DTYPE_PIPE: + case DTYPE_VNODE: + error = VOP_SETATTR((struct vnode *) fp->f_data, ap->a_vap, ap->a_cred, ap->a_p); + break; + + case DTYPE_SOCKET: + if (vap->va_flags != VNOVAL) + error = EOPNOTSUPP; + else + error = 0; + break; + + default: + error = EBADF; + break; + } + + return (error); +} + +#define UIO_MX 16 + +static struct dirtmp { + u_long d_fileno; + u_short d_reclen; + u_short d_namlen; + char d_name[8]; +} rootent[] = { + { FD_DEVFD, UIO_MX, 2, "fd" }, + { FD_STDIN, UIO_MX, 5, "stdin" }, + { FD_STDOUT, UIO_MX, 6, "stdout" }, + { FD_STDERR, UIO_MX, 6, "stderr" }, + { FD_CTTY, UIO_MX, 3, "tty" }, +}; + +static int +fdesc_readdir(ap) + struct vop_readdir_args /* { + struct vnode *a_vp; + struct uio *a_uio; + struct ucred *a_cred; + int *a_eofflag; + u_long *a_cookies; + int a_ncookies; + } */ *ap; +{ + struct uio *uio = ap->a_uio; + struct filedesc *fdp; + int error, i, off; + + /* + * We don't allow exporting fdesc mounts, and currently local + * requests do not need cookies. + */ + if (ap->a_ncookies) + panic("fdesc_readdir: not hungry"); + + if (VTOFDESC(ap->a_vp)->fd_type != Froot && + VTOFDESC(ap->a_vp)->fd_type != Fdevfd) + panic("fdesc_readdir: not dir"); + + off = (int)uio->uio_offset; + if (off != uio->uio_offset || off < 0 || (u_int)off % UIO_MX != 0 || + uio->uio_resid < UIO_MX) + return (EINVAL); + i = (u_int)off / UIO_MX; + fdp = uio->uio_procp->p_fd; + + if (VTOFDESC(ap->a_vp)->fd_type == Froot) { + struct dirent d; + struct dirent *dp = &d; + struct dirtmp *dt; + + error = 0; + + while (i < sizeof(rootent) / sizeof(rootent[0]) && + uio->uio_resid >= UIO_MX) { + dt = &rootent[i]; + switch (dt->d_fileno) { + case FD_CTTY: + if (cttyvp(uio->uio_procp) == NULL) + continue; + break; + + case FD_STDIN: + case FD_STDOUT: + case FD_STDERR: + if ((dt->d_fileno-FD_STDIN) >= fdp->fd_nfiles) + continue; + if (fdp->fd_ofiles[dt->d_fileno-FD_STDIN] == NULL) + continue; + break; + } + bzero((caddr_t) dp, UIO_MX); + dp->d_fileno = dt->d_fileno; + dp->d_namlen = dt->d_namlen; + dp->d_type = DT_UNKNOWN; + dp->d_reclen = dt->d_reclen; + bcopy(dt->d_name, dp->d_name, dp->d_namlen+1); + error = uiomove((caddr_t) dp, UIO_MX, uio); + if (error) + break; + i++; + } + uio->uio_offset = i * UIO_MX; + return (error); + } + + error = 0; + while (i < fdp->fd_nfiles && uio->uio_resid >= UIO_MX) { + if (fdp->fd_ofiles[i] != NULL) { + struct dirent d; + struct dirent *dp = &d; + + bzero((caddr_t) dp, UIO_MX); + + dp->d_namlen = sprintf(dp->d_name, "%d", i); + dp->d_reclen = UIO_MX; + dp->d_type = DT_UNKNOWN; + dp->d_fileno = i + FD_STDIN; + /* + * And ship to userland + */ + error = uiomove((caddr_t) dp, UIO_MX, uio); + if (error) + break; + } + i++; + } + + uio->uio_offset = i * UIO_MX; + return (error); +} + +static int +fdesc_readlink(ap) + struct vop_readlink_args /* { + struct vnode *a_vp; + struct uio *a_uio; + struct ucred *a_cred; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + int error; + + if (vp->v_type != VLNK) + return (EPERM); + + if (VTOFDESC(vp)->fd_type == Flink) { + char *ln = VTOFDESC(vp)->fd_link; + error = uiomove(ln, strlen(ln), ap->a_uio); + } else { + error = EOPNOTSUPP; + } + + return (error); +} + +static int +fdesc_read(ap) + struct vop_read_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap; +{ + int error = EOPNOTSUPP; + + switch (VTOFDESC(ap->a_vp)->fd_type) { + case Fctty: + error = (*ctty_cdevsw.d_read)(devctty, ap->a_uio, ap->a_ioflag); + break; + + default: + error = EOPNOTSUPP; + break; + } + + return (error); +} + +static int +fdesc_write(ap) + struct vop_write_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap; +{ + int error = EOPNOTSUPP; + + switch (VTOFDESC(ap->a_vp)->fd_type) { + case Fctty: + error = (*ctty_cdevsw.d_write)(devctty, ap->a_uio, ap->a_ioflag); + break; + + default: + error = EOPNOTSUPP; + break; + } + + return (error); +} + +static int +fdesc_ioctl(ap) + struct vop_ioctl_args /* { + struct vnode *a_vp; + int a_command; + caddr_t a_data; + int a_fflag; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + int error = EOPNOTSUPP; + + switch (VTOFDESC(ap->a_vp)->fd_type) { + case Fctty: + error = (*ctty_cdevsw.d_ioctl)(devctty, ap->a_command, + ap->a_data, ap->a_fflag, ap->a_p); + break; + + default: + error = EOPNOTSUPP; + break; + } + + return (error); +} + +static int +fdesc_poll(ap) + struct vop_poll_args /* { + struct vnode *a_vp; + int a_events; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + int revents; + + switch (VTOFDESC(ap->a_vp)->fd_type) { + case Fctty: + revents = (*ctty_cdevsw.d_poll)(devctty, ap->a_events, ap->a_p); + break; + + default: + revents = seltrue(0, ap->a_events, ap->a_p); + break; + } + + return (revents); +} + +static int +fdesc_inactive(ap) + struct vop_inactive_args /* { + struct vnode *a_vp; + struct proc *a_p; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + + /* + * Clear out the v_type field to avoid + * nasty things happening in vgone(). + */ + VOP_UNLOCK(vp, 0, ap->a_p); + vp->v_type = VNON; + return (0); +} + +static int +fdesc_reclaim(ap) + struct vop_reclaim_args /* { + struct vnode *a_vp; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + struct fdescnode *fd = VTOFDESC(vp); + + LIST_REMOVE(fd, fd_hash); + FREE(vp->v_data, M_TEMP); + vp->v_data = 0; + + return (0); +} + +/* + * Print out the contents of a /dev/fd vnode. + */ +/* ARGSUSED */ +static int +fdesc_print(ap) + struct vop_print_args /* { + struct vnode *a_vp; + } */ *ap; +{ + + printf("tag VT_NON, fdesc vnode\n"); + return (0); +} + +/* + * /dev/fd "should never get here" operation + */ +static int +fdesc_badop() +{ + + panic("fdesc: bad op"); + /* NOTREACHED */ +} + +static struct vnodeopv_entry_desc fdesc_vnodeop_entries[] = { + { &vop_default_desc, (vop_t *) vop_defaultop }, + { &vop_access_desc, (vop_t *) vop_null }, + { &vop_bmap_desc, (vop_t *) fdesc_badop }, + { &vop_getattr_desc, (vop_t *) fdesc_getattr }, + { &vop_inactive_desc, (vop_t *) fdesc_inactive }, + { &vop_ioctl_desc, (vop_t *) fdesc_ioctl }, + { &vop_lookup_desc, (vop_t *) fdesc_lookup }, + { &vop_open_desc, (vop_t *) fdesc_open }, + { &vop_pathconf_desc, (vop_t *) vop_stdpathconf }, + { &vop_poll_desc, (vop_t *) fdesc_poll }, + { &vop_print_desc, (vop_t *) fdesc_print }, + { &vop_read_desc, (vop_t *) fdesc_read }, + { &vop_readdir_desc, (vop_t *) fdesc_readdir }, + { &vop_readlink_desc, (vop_t *) fdesc_readlink }, + { &vop_reclaim_desc, (vop_t *) fdesc_reclaim }, + { &vop_setattr_desc, (vop_t *) fdesc_setattr }, + { &vop_write_desc, (vop_t *) fdesc_write }, + { NULL, NULL } +}; +static struct vnodeopv_desc fdesc_vnodeop_opv_desc = + { &fdesc_vnodeop_p, fdesc_vnodeop_entries }; + +VNODEOP_SET(fdesc_vnodeop_opv_desc); diff --git a/sys/fs/fifofs/fifo.h b/sys/fs/fifofs/fifo.h new file mode 100644 index 0000000..ec186d0d --- /dev/null +++ b/sys/fs/fifofs/fifo.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 1991, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)fifo.h 8.6 (Berkeley) 5/21/95 + * $Id: fifo.h,v 1.14 1997/09/14 02:57:51 peter Exp $ + */ + +extern vop_t **fifo_vnodeop_p; + +/* + * Prototypes for fifo operations on vnodes. + */ +int fifo_vnoperate __P((struct vop_generic_args *)); +int fifo_printinfo __P((struct vnode *)); + diff --git a/sys/fs/fifofs/fifo_vnops.c b/sys/fs/fifofs/fifo_vnops.c new file mode 100644 index 0000000..f7e47e1 --- /dev/null +++ b/sys/fs/fifofs/fifo_vnops.c @@ -0,0 +1,537 @@ +/* + * Copyright (c) 1990, 1993, 1995 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)fifo_vnops.c 8.10 (Berkeley) 5/27/95 + * $Id: fifo_vnops.c,v 1.42 1998/02/04 22:32:45 eivind Exp $ + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/unistd.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/vnode.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/filio.h> +#include <sys/fcntl.h> +#include <sys/file.h> +#include <sys/poll.h> +#include <sys/un.h> +#include <miscfs/fifofs/fifo.h> + +/* + * This structure is associated with the FIFO vnode and stores + * the state associated with the FIFO. + */ +struct fifoinfo { + struct socket *fi_readsock; + struct socket *fi_writesock; + long fi_readers; + long fi_writers; +}; + +static int fifo_badop __P((void)); +static int fifo_print __P((struct vop_print_args *)); +static int fifo_lookup __P((struct vop_lookup_args *)); +static int fifo_open __P((struct vop_open_args *)); +static int fifo_close __P((struct vop_close_args *)); +static int fifo_read __P((struct vop_read_args *)); +static int fifo_write __P((struct vop_write_args *)); +static int fifo_ioctl __P((struct vop_ioctl_args *)); +static int fifo_poll __P((struct vop_poll_args *)); +static int fifo_inactive __P((struct vop_inactive_args *)); +static int fifo_bmap __P((struct vop_bmap_args *)); +static int fifo_pathconf __P((struct vop_pathconf_args *)); +static int fifo_advlock __P((struct vop_advlock_args *)); + + +vop_t **fifo_vnodeop_p; +static struct vnodeopv_entry_desc fifo_vnodeop_entries[] = { + { &vop_default_desc, (vop_t *) vop_defaultop }, + { &vop_abortop_desc, (vop_t *) fifo_badop }, + { &vop_access_desc, (vop_t *) vop_ebadf }, + { &vop_advlock_desc, (vop_t *) fifo_advlock }, + { &vop_bmap_desc, (vop_t *) fifo_bmap }, + { &vop_close_desc, (vop_t *) fifo_close }, + { &vop_create_desc, (vop_t *) fifo_badop }, + { &vop_getattr_desc, (vop_t *) vop_ebadf }, + { &vop_inactive_desc, (vop_t *) fifo_inactive }, + { &vop_ioctl_desc, (vop_t *) fifo_ioctl }, + { &vop_lease_desc, (vop_t *) vop_null }, + { &vop_link_desc, (vop_t *) fifo_badop }, + { &vop_lookup_desc, (vop_t *) fifo_lookup }, + { &vop_mkdir_desc, (vop_t *) fifo_badop }, + { &vop_mknod_desc, (vop_t *) fifo_badop }, + { &vop_open_desc, (vop_t *) fifo_open }, + { &vop_pathconf_desc, (vop_t *) fifo_pathconf }, + { &vop_poll_desc, (vop_t *) fifo_poll }, + { &vop_print_desc, (vop_t *) fifo_print }, + { &vop_read_desc, (vop_t *) fifo_read }, + { &vop_readdir_desc, (vop_t *) fifo_badop }, + { &vop_readlink_desc, (vop_t *) fifo_badop }, + { &vop_reallocblks_desc, (vop_t *) fifo_badop }, + { &vop_reclaim_desc, (vop_t *) vop_null }, + { &vop_remove_desc, (vop_t *) fifo_badop }, + { &vop_rename_desc, (vop_t *) fifo_badop }, + { &vop_rmdir_desc, (vop_t *) fifo_badop }, + { &vop_setattr_desc, (vop_t *) vop_ebadf }, + { &vop_symlink_desc, (vop_t *) fifo_badop }, + { &vop_write_desc, (vop_t *) fifo_write }, + { NULL, NULL } +}; +static struct vnodeopv_desc fifo_vnodeop_opv_desc = + { &fifo_vnodeop_p, fifo_vnodeop_entries }; + +VNODEOP_SET(fifo_vnodeop_opv_desc); + +int +fifo_vnoperate(ap) + struct vop_generic_args /* { + struct vnodeop_desc *a_desc; + <other random data follows, presumably> + } */ *ap; +{ + return (VOCALL(fifo_vnodeop_p, ap->a_desc->vdesc_offset, ap)); +} + +/* + * Trivial lookup routine that always fails. + */ +/* ARGSUSED */ +static int +fifo_lookup(ap) + struct vop_lookup_args /* { + struct vnode * a_dvp; + struct vnode ** a_vpp; + struct componentname * a_cnp; + } */ *ap; +{ + + *ap->a_vpp = NULL; + return (ENOTDIR); +} + +/* + * Open called to set up a new instance of a fifo or + * to find an active instance of a fifo. + */ +/* ARGSUSED */ +static int +fifo_open(ap) + struct vop_open_args /* { + struct vnode *a_vp; + int a_mode; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + struct fifoinfo *fip; + struct proc *p = ap->a_p; + struct socket *rso, *wso; + int error; + + if ((fip = vp->v_fifoinfo) == NULL) { + MALLOC(fip, struct fifoinfo *, sizeof(*fip), M_VNODE, M_WAITOK); + vp->v_fifoinfo = fip; + error = socreate(AF_LOCAL, &rso, SOCK_STREAM, 0, ap->a_p); + if (error) { + free(fip, M_VNODE); + vp->v_fifoinfo = NULL; + return (error); + } + fip->fi_readsock = rso; + error = socreate(AF_LOCAL, &wso, SOCK_STREAM, 0, ap->a_p); + if (error) { + (void)soclose(rso); + free(fip, M_VNODE); + vp->v_fifoinfo = NULL; + return (error); + } + fip->fi_writesock = wso; + error = unp_connect2(wso, rso); + if (error) { + (void)soclose(wso); + (void)soclose(rso); + free(fip, M_VNODE); + vp->v_fifoinfo = NULL; + return (error); + } + fip->fi_readers = fip->fi_writers = 0; + wso->so_snd.sb_lowat = PIPE_BUF; + rso->so_state |= SS_CANTRCVMORE; + } + if (ap->a_mode & FREAD) { + fip->fi_readers++; + if (fip->fi_readers == 1) { + fip->fi_writesock->so_state &= ~SS_CANTSENDMORE; + if (fip->fi_writers > 0) + wakeup((caddr_t)&fip->fi_writers); + } + } + if (ap->a_mode & FWRITE) { + fip->fi_writers++; + if (fip->fi_writers == 1) { + fip->fi_readsock->so_state &= ~SS_CANTRCVMORE; + if (fip->fi_readers > 0) + wakeup((caddr_t)&fip->fi_readers); + } + } + if ((ap->a_mode & FREAD) && (ap->a_mode & O_NONBLOCK) == 0) { + while (fip->fi_writers == 0) { + VOP_UNLOCK(vp, 0, p); + error = tsleep((caddr_t)&fip->fi_readers, + PCATCH | PSOCK, "fifoor", 0); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + if (error) + goto bad; + } + } + if (ap->a_mode & FWRITE) { + if (ap->a_mode & O_NONBLOCK) { + if (fip->fi_readers == 0) { + error = ENXIO; + goto bad; + } + } else { + while (fip->fi_readers == 0) { + VOP_UNLOCK(vp, 0, p); + error = tsleep((caddr_t)&fip->fi_writers, + PCATCH | PSOCK, "fifoow", 0); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + if (error) + goto bad; + } + } + } + return (0); +bad: + VOP_CLOSE(vp, ap->a_mode, ap->a_cred, p); + return (error); +} + +/* + * Vnode op for read + */ +/* ARGSUSED */ +static int +fifo_read(ap) + struct vop_read_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap; +{ + struct uio *uio = ap->a_uio; + struct socket *rso = ap->a_vp->v_fifoinfo->fi_readsock; + struct proc *p = uio->uio_procp; + int error, startresid; + +#ifdef DIAGNOSTIC + if (uio->uio_rw != UIO_READ) + panic("fifo_read mode"); +#endif + if (uio->uio_resid == 0) + return (0); + if (ap->a_ioflag & IO_NDELAY) + rso->so_state |= SS_NBIO; + startresid = uio->uio_resid; + VOP_UNLOCK(ap->a_vp, 0, p); + error = soreceive(rso, (struct sockaddr **)0, uio, (struct mbuf **)0, + (struct mbuf **)0, (int *)0); + vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY, p); + if (ap->a_ioflag & IO_NDELAY) + rso->so_state &= ~SS_NBIO; + return (error); +} + +/* + * Vnode op for write + */ +/* ARGSUSED */ +static int +fifo_write(ap) + struct vop_write_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap; +{ + struct socket *wso = ap->a_vp->v_fifoinfo->fi_writesock; + struct proc *p = ap->a_uio->uio_procp; + int error; + +#ifdef DIAGNOSTIC + if (ap->a_uio->uio_rw != UIO_WRITE) + panic("fifo_write mode"); +#endif + if (ap->a_ioflag & IO_NDELAY) + wso->so_state |= SS_NBIO; + VOP_UNLOCK(ap->a_vp, 0, p); + error = sosend(wso, (struct sockaddr *)0, ap->a_uio, 0, + (struct mbuf *)0, 0, p); + vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY, p); + if (ap->a_ioflag & IO_NDELAY) + wso->so_state &= ~SS_NBIO; + return (error); +} + +/* + * Device ioctl operation. + */ +/* ARGSUSED */ +static int +fifo_ioctl(ap) + struct vop_ioctl_args /* { + struct vnode *a_vp; + int a_command; + caddr_t a_data; + int a_fflag; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + struct file filetmp; + int error; + + if (ap->a_command == FIONBIO) + return (0); + if (ap->a_fflag & FREAD) { + filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_readsock; + error = soo_ioctl(&filetmp, ap->a_command, ap->a_data, ap->a_p); + if (error) + return (error); + } + if (ap->a_fflag & FWRITE) { + filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_writesock; + error = soo_ioctl(&filetmp, ap->a_command, ap->a_data, ap->a_p); + if (error) + return (error); + } + return (0); +} + +/* ARGSUSED */ +static int +fifo_poll(ap) + struct vop_poll_args /* { + struct vnode *a_vp; + int a_events; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + struct file filetmp; + int revents = 0; + + if (ap->a_events & (POLLIN | POLLPRI | POLLRDNORM | POLLRDBAND)) { + filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_readsock; + if (filetmp.f_data) + revents |= soo_poll(&filetmp, ap->a_events, ap->a_cred, + ap->a_p); + } + if (ap->a_events & (POLLOUT | POLLWRNORM | POLLWRBAND)) { + filetmp.f_data = (caddr_t)ap->a_vp->v_fifoinfo->fi_writesock; + if (filetmp.f_data) + revents |= soo_poll(&filetmp, ap->a_events, ap->a_cred, + ap->a_p); + } + return (revents); +} + +static int +fifo_inactive(ap) + struct vop_inactive_args /* { + struct vnode *a_vp; + struct proc *a_p; + } */ *ap; +{ + + VOP_UNLOCK(ap->a_vp, 0, ap->a_p); + return (0); +} + +/* + * This is a noop, simply returning what one has been given. + */ +static int +fifo_bmap(ap) + struct vop_bmap_args /* { + struct vnode *a_vp; + daddr_t a_bn; + struct vnode **a_vpp; + daddr_t *a_bnp; + int *a_runp; + int *a_runb; + } */ *ap; +{ + + if (ap->a_vpp != NULL) + *ap->a_vpp = ap->a_vp; + if (ap->a_bnp != NULL) + *ap->a_bnp = ap->a_bn; + if (ap->a_runp != NULL) + *ap->a_runp = 0; + if (ap->a_runb != NULL) + *ap->a_runb = 0; + return (0); +} + +/* + * Device close routine + */ +/* ARGSUSED */ +static int +fifo_close(ap) + struct vop_close_args /* { + struct vnode *a_vp; + int a_fflag; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + register struct fifoinfo *fip = vp->v_fifoinfo; + int error1, error2; + + if (ap->a_fflag & FREAD) { + fip->fi_readers--; + if (fip->fi_readers == 0) + socantsendmore(fip->fi_writesock); + } + if (ap->a_fflag & FWRITE) { + fip->fi_writers--; + if (fip->fi_writers == 0) + socantrcvmore(fip->fi_readsock); + } + if (vp->v_usecount > 1) + return (0); + error1 = soclose(fip->fi_readsock); + error2 = soclose(fip->fi_writesock); + FREE(fip, M_VNODE); + vp->v_fifoinfo = NULL; + if (error1) + return (error1); + return (error2); +} + + +/* + * Print out internal contents of a fifo vnode. + */ +int +fifo_printinfo(vp) + struct vnode *vp; +{ + register struct fifoinfo *fip = vp->v_fifoinfo; + + printf(", fifo with %ld readers and %ld writers", + fip->fi_readers, fip->fi_writers); + return (0); +} + +/* + * Print out the contents of a fifo vnode. + */ +static int +fifo_print(ap) + struct vop_print_args /* { + struct vnode *a_vp; + } */ *ap; +{ + + printf("tag VT_NON"); + fifo_printinfo(ap->a_vp); + printf("\n"); + return (0); +} + +/* + * Return POSIX pathconf information applicable to fifo's. + */ +int +fifo_pathconf(ap) + struct vop_pathconf_args /* { + struct vnode *a_vp; + int a_name; + int *a_retval; + } */ *ap; +{ + + switch (ap->a_name) { + case _PC_LINK_MAX: + *ap->a_retval = LINK_MAX; + return (0); + case _PC_PIPE_BUF: + *ap->a_retval = PIPE_BUF; + return (0); + case _PC_CHOWN_RESTRICTED: + *ap->a_retval = 1; + return (0); + default: + return (EINVAL); + } + /* NOTREACHED */ +} + +/* + * Fifo advisory byte-level locks. + */ +/* ARGSUSED */ +static int +fifo_advlock(ap) + struct vop_advlock_args /* { + struct vnode *a_vp; + caddr_t a_id; + int a_op; + struct flock *a_fl; + int a_flags; + } */ *ap; +{ + + return (ap->a_flags & F_FLOCK ? EOPNOTSUPP : EINVAL); +} + +/* + * Fifo bad operation + */ +static int +fifo_badop() +{ + + panic("fifo_badop called"); + /* NOTREACHED */ +} diff --git a/sys/fs/msdosfs/bootsect.h b/sys/fs/msdosfs/bootsect.h new file mode 100644 index 0000000..11b93371a --- /dev/null +++ b/sys/fs/msdosfs/bootsect.h @@ -0,0 +1,113 @@ +/* $Id: bootsect.h,v 1.5 1997/02/22 09:40:43 peter Exp $ */ +/* $NetBSD: bootsect.h,v 1.9 1997/11/17 15:36:17 ws Exp $ */ + +/* + * Written by Paul Popelka (paulp@uts.amdahl.com) + * + * You can do anything you want with this software, just don't say you wrote + * it, and don't remove this notice. + * + * This software is provided "as is". + * + * The author supplies this software to be publicly redistributed on the + * understanding that the author is not responsible for the correct + * functioning of this software in any circumstances and is not liable for + * any damages caused by this software. + * + * October 1992 + */ + +/* + * Format of a boot sector. This is the first sector on a DOS floppy disk + * or the fist sector of a partition on a hard disk. But, it is not the + * first sector of a partitioned hard disk. + */ +struct bootsector33 { + u_int8_t bsJump[3]; /* jump inst E9xxxx or EBxx90 */ + int8_t bsOemName[8]; /* OEM name and version */ + int8_t bsBPB[19]; /* BIOS parameter block */ + int8_t bsDriveNumber; /* drive number (0x80) */ + int8_t bsBootCode[479]; /* pad so struct is 512b */ + u_int8_t bsBootSectSig0; + u_int8_t bsBootSectSig1; +#define BOOTSIG0 0x55 +#define BOOTSIG1 0xaa +}; + +struct extboot { + int8_t exDriveNumber; /* drive number (0x80) */ + int8_t exReserved1; /* reserved */ + int8_t exBootSignature; /* ext. boot signature (0x29) */ +#define EXBOOTSIG 0x29 + int8_t exVolumeID[4]; /* volume ID number */ + int8_t exVolumeLabel[11]; /* volume label */ + int8_t exFileSysType[8]; /* fs type (FAT12 or FAT16) */ +}; + +struct bootsector50 { + u_int8_t bsJump[3]; /* jump inst E9xxxx or EBxx90 */ + int8_t bsOemName[8]; /* OEM name and version */ + int8_t bsBPB[25]; /* BIOS parameter block */ + int8_t bsExt[26]; /* Bootsector Extension */ + int8_t bsBootCode[448]; /* pad so structure is 512b */ + u_int8_t bsBootSectSig0; + u_int8_t bsBootSectSig1; +#define BOOTSIG0 0x55 +#define BOOTSIG1 0xaa +}; + +struct bootsector710 { + u_int8_t bsJump[3]; /* jump inst E9xxxx or EBxx90 */ + int8_t bsOEMName[8]; /* OEM name and version */ + int8_t bsPBP[53]; /* BIOS parameter block */ + int8_t bsExt[26]; /* Bootsector Extension */ + int8_t bsBootCode[418]; /* pad so structure is 512b */ + u_int8_t bsBootSectSig2; /* 2 & 3 are only defined for FAT32? */ + u_int8_t bsBootSectSig3; + u_int8_t bsBootSectSig0; + u_int8_t bsBootSectSig1; +#define BOOTSIG0 0x55 +#define BOOTSIG1 0xaa +#define BOOTSIG2 0 +#define BOOTSIG3 0 +}; +#ifdef atari +/* + * The boot sector on a gemdos fs is a little bit different from the msdos fs + * format. Currently there is no need to declare a seperate structure, the + * bootsector33 struct will do. + */ +#if 0 +struct bootsec_atari { + u_int8_t bsBranch[2]; /* branch inst if auto-boot */ + int8_t bsFiller[6]; /* anything or nothing */ + int8_t bsSerial[3]; /* serial no. for mediachange */ + int8_t bsBPB[19]; /* BIOS parameter block */ + int8_t bsBootCode[482]; /* pad so struct is 512b */ +}; +#endif +#endif /* atari */ + +union bootsector { + struct bootsector33 bs33; + struct bootsector50 bs50; + struct bootsector710 bs710; +}; + +#if 0 +/* + * Shorthand for fields in the bpb. + */ +#define bsBytesPerSec bsBPB.bpbBytesPerSec +#define bsSectPerClust bsBPB.bpbSectPerClust +#define bsResSectors bsBPB.bpbResSectors +#define bsFATS bsBPB.bpbFATS +#define bsRootDirEnts bsBPB.bpbRootDirEnts +#define bsSectors bsBPB.bpbSectors +#define bsMedia bsBPB.bpbMedia +#define bsFATsecs bsBPB.bpbFATsecs +#define bsSectPerTrack bsBPB.bpbSectPerTrack +#define bsHeads bsBPB.bpbHeads +#define bsHiddenSecs bsBPB.bpbHiddenSecs +#define bsHugeSectors bsBPB.bpbHugeSectors +#endif diff --git a/sys/fs/msdosfs/bpb.h b/sys/fs/msdosfs/bpb.h new file mode 100644 index 0000000..bc00a75 --- /dev/null +++ b/sys/fs/msdosfs/bpb.h @@ -0,0 +1,209 @@ +/* $Id: bpb.h,v 1.5 1997/02/22 09:40:44 peter Exp $ */ +/* $NetBSD: bpb.h,v 1.7 1997/11/17 15:36:24 ws Exp $ */ + +/* + * Written by Paul Popelka (paulp@uts.amdahl.com) + * + * You can do anything you want with this software, just don't say you wrote + * it, and don't remove this notice. + * + * This software is provided "as is". + * + * The author supplies this software to be publicly redistributed on the + * understanding that the author is not responsible for the correct + * functioning of this software in any circumstances and is not liable for + * any damages caused by this software. + * + * October 1992 + */ + +/* + * BIOS Parameter Block (BPB) for DOS 3.3 + */ +struct bpb33 { + u_int16_t bpbBytesPerSec; /* bytes per sector */ + u_int8_t bpbSecPerClust; /* sectors per cluster */ + u_int16_t bpbResSectors; /* number of reserved sectors */ + u_int8_t bpbFATs; /* number of FATs */ + u_int16_t bpbRootDirEnts; /* number of root directory entries */ + u_int16_t bpbSectors; /* total number of sectors */ + u_int8_t bpbMedia; /* media descriptor */ + u_int16_t bpbFATsecs; /* number of sectors per FAT */ + u_int16_t bpbSecPerTrack; /* sectors per track */ + u_int16_t bpbHeads; /* number of heads */ + u_int16_t bpbHiddenSecs; /* number of hidden sectors */ +}; + +/* + * BPB for DOS 5.0 The difference is bpbHiddenSecs is a short for DOS 3.3, + * and bpbHugeSectors is not in the 3.3 bpb. + */ +struct bpb50 { + u_int16_t bpbBytesPerSec; /* bytes per sector */ + u_int8_t bpbSecPerClust; /* sectors per cluster */ + u_int16_t bpbResSectors; /* number of reserved sectors */ + u_int8_t bpbFATs; /* number of FATs */ + u_int16_t bpbRootDirEnts; /* number of root directory entries */ + u_int16_t bpbSectors; /* total number of sectors */ + u_int8_t bpbMedia; /* media descriptor */ + u_int16_t bpbFATsecs; /* number of sectors per FAT */ + u_int16_t bpbSecPerTrack; /* sectors per track */ + u_int16_t bpbHeads; /* number of heads */ + u_int32_t bpbHiddenSecs; /* # of hidden sectors */ + u_int32_t bpbHugeSectors; /* # of sectors if bpbSectors == 0 */ +}; + +/* + * BPB for DOS 7.10 (FAT32). This one has a few extensions to bpb50. + */ +struct bpb710 { + u_int16_t bpbBytesPerSec; /* bytes per sector */ + u_int8_t bpbSecPerClust; /* sectors per cluster */ + u_int16_t bpbResSectors; /* number of reserved sectors */ + u_int8_t bpbFATs; /* number of FATs */ + u_int16_t bpbRootDirEnts; /* number of root directory entries */ + u_int16_t bpbSectors; /* total number of sectors */ + u_int8_t bpbMedia; /* media descriptor */ + u_int16_t bpbFATsecs; /* number of sectors per FAT */ + u_int16_t bpbSecPerTrack; /* sectors per track */ + u_int16_t bpbHeads; /* number of heads */ + u_int32_t bpbHiddenSecs; /* # of hidden sectors */ + u_int32_t bpbHugeSectors; /* # of sectors if bpbSectors == 0 */ + u_int32_t bpbBigFATsecs; /* like bpbFATsecs for FAT32 */ + u_int16_t bpbExtFlags; /* extended flags: */ +#define FATNUM 0xf /* mask for numbering active FAT */ +#define FATMIRROR 0x80 /* FAT is mirrored (like it always was) */ + u_int16_t bpbFSVers; /* filesystem version */ +#define FSVERS 0 /* currently only 0 is understood */ + u_int32_t bpbRootClust; /* start cluster for root directory */ + u_int16_t bpbFSInfo; /* filesystem info structure sector */ + u_int16_t bpbBackup; /* backup boot sector */ + /* There is a 12 byte filler here, but we ignore it */ +}; + +#ifdef atari +/* + * BPB for gemdos filesystems. Atari leaves the obsolete stuff undefined. + * Currently there is no need for a separate BPB structure. + */ +#if 0 +struct bpb_a { + u_int16_t bpbBytesPerSec; /* bytes per sector */ + u_int8_t bpbSecPerClust; /* sectors per cluster */ + u_int16_t bpbResSectors; /* number of reserved sectors */ + u_int8_t bpbFATs; /* number of FATs */ + u_int16_t bpbRootDirEnts; /* number of root directory entries */ + u_int16_t bpbSectors; /* total number of sectors */ + u_int8_t bpbUseless1; /* meaningless on gemdos fs */ + u_int16_t bpbFATsecs; /* number of sectors per FAT */ + u_int16_t bpbUseless2; /* meaningless for harddisk fs */ + u_int16_t bpbUseless3; /* meaningless for harddisk fs */ + u_int16_t bpbHiddenSecs; /* the TOS-BIOS ignores this */ +}; +#endif +#endif /* atari */ + +/* + * The following structures represent how the bpb's look on disk. shorts + * and longs are just character arrays of the appropriate length. This is + * because the compiler forces shorts and longs to align on word or + * halfword boundaries. + * + * XXX The little-endian code here assumes that the processor can access + * 16-bit and 32-bit quantities on byte boundaries. If this is not true, + * use the macros for the big-endian case. + */ +#include <machine/endian.h> +#if (BYTE_ORDER == LITTLE_ENDIAN) /* && defined(UNALIGNED_ACCESS) */ +#define getushort(x) *((u_int16_t *)(x)) +#define getulong(x) *((u_int32_t *)(x)) +#define putushort(p, v) (*((u_int16_t *)(p)) = (v)) +#define putulong(p, v) (*((u_int32_t *)(p)) = (v)) +#else +#define getushort(x) (((u_int8_t *)(x))[0] + (((u_int8_t *)(x))[1] << 8)) +#define getulong(x) (((u_int8_t *)(x))[0] + (((u_int8_t *)(x))[1] << 8) \ + + (((u_int8_t *)(x))[2] << 16) \ + + (((u_int8_t *)(x))[3] << 24)) +#define putushort(p, v) (((u_int8_t *)(p))[0] = (v), \ + ((u_int8_t *)(p))[1] = (v) >> 8) +#define putulong(p, v) (((u_int8_t *)(p))[0] = (v), \ + ((u_int8_t *)(p))[1] = (v) >> 8, \ + ((u_int8_t *)(p))[2] = (v) >> 16,\ + ((u_int8_t *)(p))[3] = (v) >> 24) +#endif + +/* + * BIOS Parameter Block (BPB) for DOS 3.3 + */ +struct byte_bpb33 { + int8_t bpbBytesPerSec[2]; /* bytes per sector */ + int8_t bpbSecPerClust; /* sectors per cluster */ + int8_t bpbResSectors[2]; /* number of reserved sectors */ + int8_t bpbFATs; /* number of FATs */ + int8_t bpbRootDirEnts[2]; /* number of root directory entries */ + int8_t bpbSectors[2]; /* total number of sectors */ + int8_t bpbMedia; /* media descriptor */ + int8_t bpbFATsecs[2]; /* number of sectors per FAT */ + int8_t bpbSecPerTrack[2]; /* sectors per track */ + int8_t bpbHeads[2]; /* number of heads */ + int8_t bpbHiddenSecs[2]; /* number of hidden sectors */ +}; + +/* + * BPB for DOS 5.0 The difference is bpbHiddenSecs is a short for DOS 3.3, + * and bpbHugeSectors is not in the 3.3 bpb. + */ +struct byte_bpb50 { + int8_t bpbBytesPerSec[2]; /* bytes per sector */ + int8_t bpbSecPerClust; /* sectors per cluster */ + int8_t bpbResSectors[2]; /* number of reserved sectors */ + int8_t bpbFATs; /* number of FATs */ + int8_t bpbRootDirEnts[2]; /* number of root directory entries */ + int8_t bpbSectors[2]; /* total number of sectors */ + int8_t bpbMedia; /* media descriptor */ + int8_t bpbFATsecs[2]; /* number of sectors per FAT */ + int8_t bpbSecPerTrack[2]; /* sectors per track */ + int8_t bpbHeads[2]; /* number of heads */ + int8_t bpbHiddenSecs[4]; /* number of hidden sectors */ + int8_t bpbHugeSectors[4]; /* # of sectors if bpbSectors == 0 */ +}; + +/* + * BPB for DOS 7.10 (FAT32). This one has a few extensions to bpb50. + */ +struct byte_bpb710 { + u_int8_t bpbBytesPerSec[2]; /* bytes per sector */ + u_int8_t bpbSecPerClust; /* sectors per cluster */ + u_int8_t bpbResSectors[2]; /* number of reserved sectors */ + u_int8_t bpbFATs; /* number of FATs */ + u_int8_t bpbRootDirEnts[2]; /* number of root directory entries */ + u_int8_t bpbSectors[2]; /* total number of sectors */ + u_int8_t bpbMedia; /* media descriptor */ + u_int8_t bpbFATsecs[2]; /* number of sectors per FAT */ + u_int8_t bpbSecPerTrack[2]; /* sectors per track */ + u_int8_t bpbHeads[2]; /* number of heads */ + u_int8_t bpbHiddenSecs[4]; /* # of hidden sectors */ + u_int8_t bpbHugeSectors[4]; /* # of sectors if bpbSectors == 0 */ + u_int8_t bpbBigFATsecs[4]; /* like bpbFATsecs for FAT32 */ + u_int8_t bpbExtFlags[2]; /* extended flags: */ + u_int8_t bpbFSVers[2]; /* filesystem version */ + u_int8_t bpbRootClust[4]; /* start cluster for root directory */ + u_int8_t bpbFSInfo[2]; /* filesystem info structure sector */ + u_int8_t bpbBackup[2]; /* backup boot sector */ + /* There is a 12 byte filler here, but we ignore it */ +}; + +/* + * FAT32 FSInfo block. + */ +struct fsinfo { + u_int8_t fsisig1[4]; + u_int8_t fsifill1[480]; + u_int8_t fsisig2[4]; + u_int8_t fsinfree[4]; + u_int8_t fsinxtfree[4]; + u_int8_t fsifill2[12]; + u_int8_t fsisig3[4]; + u_int8_t fsifill3[508]; + u_int8_t fsisig4[4]; +}; diff --git a/sys/fs/msdosfs/denode.h b/sys/fs/msdosfs/denode.h new file mode 100644 index 0000000..ba2ef8c --- /dev/null +++ b/sys/fs/msdosfs/denode.h @@ -0,0 +1,286 @@ +/* $Id: denode.h,v 1.17 1998/11/21 00:20:24 dt Exp $ */ +/* $NetBSD: denode.h,v 1.25 1997/11/17 15:36:28 ws Exp $ */ + +/*- + * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank. + * Copyright (C) 1994, 1995, 1997 TooLs GmbH. + * All rights reserved. + * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below). + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by TooLs GmbH. + * 4. The name of TooLs GmbH may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* + * Written by Paul Popelka (paulp@uts.amdahl.com) + * + * You can do anything you want with this software, just don't say you wrote + * it, and don't remove this notice. + * + * This software is provided "as is". + * + * The author supplies this software to be publicly redistributed on the + * understanding that the author is not responsible for the correct + * functioning of this software in any circumstances and is not liable for + * any damages caused by this software. + * + * October 1992 + */ + +/* + * This is the pc filesystem specific portion of the vnode structure. + * + * To describe a file uniquely the de_dirclust, de_diroffset, and + * de_StartCluster fields are used. + * + * de_dirclust contains the cluster number of the directory cluster + * containing the entry for a file or directory. + * de_diroffset is the index into the cluster for the entry describing + * a file or directory. + * de_StartCluster is the number of the first cluster of the file or directory. + * + * Now to describe the quirks of the pc filesystem. + * - Clusters 0 and 1 are reserved. + * - The first allocatable cluster is 2. + * - The root directory is of fixed size and all blocks that make it up + * are contiguous. + * - Cluster 0 refers to the root directory when it is found in the + * startcluster field of a directory entry that points to another directory. + * - Cluster 0 implies a 0 length file when found in the start cluster field + * of a directory entry that points to a file. + * - You can't use the cluster number 0 to derive the address of the root + * directory. + * - Multiple directory entries can point to a directory. The entry in the + * parent directory points to a child directory. Any directories in the + * child directory contain a ".." entry that points back to the parent. + * The child directory itself contains a "." entry that points to itself. + * - The root directory does not contain a "." or ".." entry. + * - Directory entries for directories are never changed once they are created + * (except when removed). The size stays 0, and the last modification time + * is never changed. This is because so many directory entries can point to + * the physical clusters that make up a directory. It would lead to an + * update nightmare. + * - The length field in a directory entry pointing to a directory contains 0 + * (always). The only way to find the end of a directory is to follow the + * cluster chain until the "last cluster" marker is found. + * + * My extensions to make this house of cards work. These apply only to the in + * memory copy of the directory entry. + * - A reference count for each denode will be kept since dos doesn't keep such + * things. + */ + +/* + * Internal pseudo-offset for (nonexistent) directory entry for the root + * dir in the root dir + */ +#define MSDOSFSROOT_OFS 0x1fffffff + +/* + * The fat cache structure. fc_fsrcn is the filesystem relative cluster + * number that corresponds to the file relative cluster number in this + * structure (fc_frcn). + */ +struct fatcache { + u_long fc_frcn; /* file relative cluster number */ + u_long fc_fsrcn; /* filesystem relative cluster number */ +}; + +/* + * The fat entry cache as it stands helps make extending files a "quick" + * operation by avoiding having to scan the fat to discover the last + * cluster of the file. The cache also helps sequential reads by + * remembering the last cluster read from the file. This also prevents us + * from having to rescan the fat to find the next cluster to read. This + * cache is probably pretty worthless if a file is opened by multiple + * processes. + */ +#define FC_SIZE 2 /* number of entries in the cache */ +#define FC_LASTMAP 0 /* entry the last call to pcbmap() resolved + * to */ +#define FC_LASTFC 1 /* entry for the last cluster in the file */ + +#define FCE_EMPTY 0xffffffff /* doesn't represent an actual cluster # */ + +/* + * Set a slot in the fat cache. + */ +#define fc_setcache(dep, slot, frcn, fsrcn) \ + (dep)->de_fc[slot].fc_frcn = frcn; \ + (dep)->de_fc[slot].fc_fsrcn = fsrcn; + +/* + * This is the in memory variant of a dos directory entry. It is usually + * contained within a vnode. + */ +struct denode { + struct lock de_lock; /* denode lock >Keep this first< */ + struct denode *de_next; /* Hash chain forward */ + struct denode **de_prev; /* Hash chain back */ + struct vnode *de_vnode; /* addr of vnode we are part of */ + struct vnode *de_devvp; /* vnode of blk dev we live on */ + u_long de_flag; /* flag bits */ + dev_t de_dev; /* device where direntry lives */ + u_long de_dirclust; /* cluster of the directory file containing this entry */ + u_long de_diroffset; /* offset of this entry in the directory cluster */ + u_long de_fndoffset; /* offset of found dir entry */ + int de_fndcnt; /* number of slots before de_fndoffset */ + long de_refcnt; /* reference count */ + struct msdosfsmount *de_pmp; /* addr of our mount struct */ + u_char de_Name[12]; /* name, from DOS directory entry */ + u_char de_Attributes; /* attributes, from directory entry */ + u_char de_LowerCase; /* NT VFAT lower case flags */ + u_char de_CHun; /* Hundredth of second of CTime*/ + u_short de_CTime; /* creation time */ + u_short de_CDate; /* creation date */ + u_short de_ADate; /* access date */ + u_short de_MTime; /* modification time */ + u_short de_MDate; /* modification date */ + u_long de_StartCluster; /* starting cluster of file */ + u_long de_FileSize; /* size of file in bytes */ + struct fatcache de_fc[FC_SIZE]; /* fat cache */ + u_quad_t de_modrev; /* Revision level for lease. */ +}; + +/* + * Values for the de_flag field of the denode. + */ +#define DE_UPDATE 0x0004 /* Modification time update request */ +#define DE_CREATE 0x0008 /* Creation time update */ +#define DE_ACCESS 0x0010 /* Access time update */ +#define DE_MODIFIED 0x0020 /* Denode has been modified */ +#define DE_RENAME 0x0040 /* Denode is in the process of being renamed */ + + +/* + * Transfer directory entries between internal and external form. + * dep is a struct denode * (internal form), + * dp is a struct direntry * (external form). + */ +#define DE_INTERNALIZE32(dep, dp) \ + ((dep)->de_StartCluster |= getushort((dp)->deHighClust) << 16) +#define DE_INTERNALIZE(dep, dp) \ + (bcopy((dp)->deName, (dep)->de_Name, 11), \ + (dep)->de_Attributes = (dp)->deAttributes, \ + (dep)->de_LowerCase = (dp)->deLowerCase, \ + (dep)->de_CHun = (dp)->deCHundredth, \ + (dep)->de_CTime = getushort((dp)->deCTime), \ + (dep)->de_CDate = getushort((dp)->deCDate), \ + (dep)->de_ADate = getushort((dp)->deADate), \ + (dep)->de_MTime = getushort((dp)->deMTime), \ + (dep)->de_MDate = getushort((dp)->deMDate), \ + (dep)->de_StartCluster = getushort((dp)->deStartCluster), \ + (dep)->de_FileSize = getulong((dp)->deFileSize), \ + (FAT32((dep)->de_pmp) ? DE_INTERNALIZE32((dep), (dp)) : 0)) + +#define DE_EXTERNALIZE(dp, dep) \ + (bcopy((dep)->de_Name, (dp)->deName, 11), \ + (dp)->deAttributes = (dep)->de_Attributes, \ + (dp)->deLowerCase = (dep)->de_LowerCase, \ + (dp)->deCHundredth = (dep)->de_CHun, \ + putushort((dp)->deCTime, (dep)->de_CTime), \ + putushort((dp)->deCDate, (dep)->de_CDate), \ + putushort((dp)->deADate, (dep)->de_ADate), \ + putushort((dp)->deMTime, (dep)->de_MTime), \ + putushort((dp)->deMDate, (dep)->de_MDate), \ + putushort((dp)->deStartCluster, (dep)->de_StartCluster), \ + putulong((dp)->deFileSize, \ + ((dep)->de_Attributes & ATTR_DIRECTORY) ? 0 : (dep)->de_FileSize), \ + putushort((dp)->deHighClust, (dep)->de_StartCluster >> 16)) + +#define de_forw de_chain[0] +#define de_back de_chain[1] + +#ifdef KERNEL + +#define VTODE(vp) ((struct denode *)(vp)->v_data) +#define DETOV(de) ((de)->de_vnode) + +#define DETIMES(dep, acc, mod, cre) do { \ + if ((dep)->de_flag & DE_UPDATE) { \ + (dep)->de_flag |= DE_MODIFIED; \ + unix2dostime((mod), &(dep)->de_MDate, &(dep)->de_MTime, \ + NULL); \ + (dep)->de_Attributes |= ATTR_ARCHIVE; \ + } \ + if ((dep)->de_pmp->pm_flags & MSDOSFSMNT_NOWIN95) { \ + (dep)->de_flag &= ~(DE_UPDATE | DE_CREATE | DE_ACCESS); \ + break; \ + } \ + if ((dep)->de_flag & DE_ACCESS) { \ + u_int16_t adate; \ + \ + unix2dostime((acc), &adate, NULL, NULL); \ + if (adate != (dep)->de_ADate) { \ + (dep)->de_flag |= DE_MODIFIED; \ + (dep)->de_ADate = adate; \ + } \ + } \ + if ((dep)->de_flag & DE_CREATE) { \ + unix2dostime((cre), &(dep)->de_CDate, &(dep)->de_CTime, \ + &(dep)->de_CHun); \ + (dep)->de_flag |= DE_MODIFIED; \ + } \ + (dep)->de_flag &= ~(DE_UPDATE | DE_CREATE | DE_ACCESS); \ +} while (0); + +/* + * This overlays the fid structure (see mount.h) + */ +struct defid { + u_short defid_len; /* length of structure */ + u_short defid_pad; /* force long alignment */ + + u_long defid_dirclust; /* cluster this dir entry came from */ + u_long defid_dirofs; /* offset of entry within the cluster */ +#if 0 + u_long defid_gen; /* generation number */ +#endif +}; + +extern vop_t **msdosfs_vnodeop_p; + +int msdosfs_lookup __P((struct vop_cachedlookup_args *)); +int msdosfs_inactive __P((struct vop_inactive_args *)); +int msdosfs_reclaim __P((struct vop_reclaim_args *)); + +/* + * Internal service routine prototypes. + */ +int deget __P((struct msdosfsmount *, u_long, u_long, struct denode **)); +int uniqdosname __P((struct denode *, struct componentname *, u_char *)); +int findwin95 __P((struct denode *)); + +int readep __P((struct msdosfsmount *pmp, u_long dirclu, u_long dirofs, struct buf **bpp, struct direntry **epp)); +int readde __P((struct denode *dep, struct buf **bpp, struct direntry **epp)); +int deextend __P((struct denode *dep, u_long length, struct ucred *cred)); +int fillinusemap __P((struct msdosfsmount *pmp)); +void reinsert __P((struct denode *dep)); +int dosdirempty __P((struct denode *dep)); +int createde __P((struct denode *dep, struct denode *ddep, struct denode **depp, struct componentname *cnp)); +int deupdat __P((struct denode *dep, int waitfor)); +int removede __P((struct denode *pdep, struct denode *dep)); +int detrunc __P((struct denode *dep, u_long length, int flags, struct ucred *cred, struct proc *p)); +int doscheckpath __P(( struct denode *source, struct denode *target)); +#endif /* KERNEL */ diff --git a/sys/fs/msdosfs/direntry.h b/sys/fs/msdosfs/direntry.h new file mode 100644 index 0000000..796fe78 --- /dev/null +++ b/sys/fs/msdosfs/direntry.h @@ -0,0 +1,143 @@ +/* $Id: direntry.h,v 1.12 1998/02/26 06:45:42 msmith Exp $ */ +/* $NetBSD: direntry.h,v 1.14 1997/11/17 15:36:32 ws Exp $ */ + +/*- + * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank. + * Copyright (C) 1994, 1995, 1997 TooLs GmbH. + * All rights reserved. + * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below). + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by TooLs GmbH. + * 4. The name of TooLs GmbH may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* + * Written by Paul Popelka (paulp@uts.amdahl.com) + * + * You can do anything you want with this software, just don't say you wrote + * it, and don't remove this notice. + * + * This software is provided "as is". + * + * The author supplies this software to be publicly redistributed on the + * understanding that the author is not responsible for the correct + * functioning of this software in any circumstances and is not liable for + * any damages caused by this software. + * + * October 1992 + */ + +/* + * Structure of a dos directory entry. + */ +struct direntry { + u_int8_t deName[8]; /* filename, blank filled */ +#define SLOT_EMPTY 0x00 /* slot has never been used */ +#define SLOT_E5 0x05 /* the real value is 0xe5 */ +#define SLOT_DELETED 0xe5 /* file in this slot deleted */ + u_int8_t deExtension[3]; /* extension, blank filled */ + u_int8_t deAttributes; /* file attributes */ +#define ATTR_NORMAL 0x00 /* normal file */ +#define ATTR_READONLY 0x01 /* file is readonly */ +#define ATTR_HIDDEN 0x02 /* file is hidden */ +#define ATTR_SYSTEM 0x04 /* file is a system file */ +#define ATTR_VOLUME 0x08 /* entry is a volume label */ +#define ATTR_DIRECTORY 0x10 /* entry is a directory name */ +#define ATTR_ARCHIVE 0x20 /* file is new or modified */ + u_int8_t deLowerCase; /* NT VFAT lower case flags */ +#define LCASE_BASE 0x08 /* filename base in lower case */ +#define LCASE_EXT 0x10 /* filename extension in lower case */ + u_int8_t deCHundredth; /* hundredth of seconds in CTime */ + u_int8_t deCTime[2]; /* create time */ + u_int8_t deCDate[2]; /* create date */ + u_int8_t deADate[2]; /* access date */ + u_int8_t deHighClust[2]; /* high bytes of cluster number */ + u_int8_t deMTime[2]; /* last update time */ + u_int8_t deMDate[2]; /* last update date */ + u_int8_t deStartCluster[2]; /* starting cluster of file */ + u_int8_t deFileSize[4]; /* size of file in bytes */ +}; + +/* + * Structure of a Win95 long name directory entry + */ +struct winentry { + u_int8_t weCnt; +#define WIN_LAST 0x40 +#define WIN_CNT 0x3f + u_int8_t wePart1[10]; + u_int8_t weAttributes; +#define ATTR_WIN95 0x0f + u_int8_t weReserved1; + u_int8_t weChksum; + u_int8_t wePart2[12]; + u_int16_t weReserved2; + u_int8_t wePart3[4]; +}; +#define WIN_CHARS 13 /* Number of chars per winentry */ + +/* + * Maximum filename length in Win95 + * Note: Must be < sizeof(dirent.d_name) + */ +#define WIN_MAXLEN 255 + +/* + * This is the format of the contents of the deTime field in the direntry + * structure. + * We don't use bitfields because we don't know how compilers for + * arbitrary machines will lay them out. + */ +#define DT_2SECONDS_MASK 0x1F /* seconds divided by 2 */ +#define DT_2SECONDS_SHIFT 0 +#define DT_MINUTES_MASK 0x7E0 /* minutes */ +#define DT_MINUTES_SHIFT 5 +#define DT_HOURS_MASK 0xF800 /* hours */ +#define DT_HOURS_SHIFT 11 + +/* + * This is the format of the contents of the deDate field in the direntry + * structure. + */ +#define DD_DAY_MASK 0x1F /* day of month */ +#define DD_DAY_SHIFT 0 +#define DD_MONTH_MASK 0x1E0 /* month */ +#define DD_MONTH_SHIFT 5 +#define DD_YEAR_MASK 0xFE00 /* year - 1980 */ +#define DD_YEAR_SHIFT 9 + +#ifdef KERNEL +struct dirent; +void unix2dostime __P((struct timespec *tsp, u_int16_t *ddp, + u_int16_t *dtp, u_int8_t *dhp)); +void dos2unixtime __P((u_int dd, u_int dt, u_int dh, struct timespec *tsp)); +int dos2unixfn __P((u_char dn[11], u_char *un, int lower, int d2u_loaded, u_int8_t *d2u, int ul_loaded, u_int8_t *ul)); +int unix2dosfn __P((const u_char *un, u_char dn[12], int unlen, u_int gen, int u2d_loaded, u_int8_t *u2d, int lu_loaded, u_int8_t *lu)); +int unix2winfn __P((const u_char *un, int unlen, struct winentry *wep, int cnt, int chksum, int table_loaded, u_int16_t *u2w)); +int winChkName __P((const u_char *un, int unlen, struct winentry *wep, int chksum, int u2w_loaded, u_int16_t *u2w, int ul_loaded, u_int8_t *ul)); +int win2unixfn __P((struct winentry *wep, struct dirent *dp, int chksum, int table_loaded, u_int16_t *u2w)); +u_int8_t winChksum __P((u_int8_t *name)); +int winSlotCnt __P((const u_char *un, int unlen)); +int winLenFixup __P((const u_char *un, int unlen)); +#endif /* KERNEL */ diff --git a/sys/fs/msdosfs/fat.h b/sys/fs/msdosfs/fat.h new file mode 100644 index 0000000..74b05e2 --- /dev/null +++ b/sys/fs/msdosfs/fat.h @@ -0,0 +1,108 @@ +/* $Id: fat.h,v 1.6 1997/02/22 09:40:45 peter Exp $ */ +/* $NetBSD: fat.h,v 1.12 1997/11/17 15:36:36 ws Exp $ */ + +/*- + * Copyright (C) 1994, 1997 Wolfgang Solfrank. + * Copyright (C) 1994, 1997 TooLs GmbH. + * All rights reserved. + * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below). + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by TooLs GmbH. + * 4. The name of TooLs GmbH may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* + * Written by Paul Popelka (paulp@uts.amdahl.com) + * + * You can do anything you want with this software, just don't say you wrote + * it, and don't remove this notice. + * + * This software is provided "as is". + * + * The author supplies this software to be publicly redistributed on the + * understanding that the author is not responsible for the correct + * functioning of this software in any circumstances and is not liable for + * any damages caused by this software. + * + * October 1992 + */ + +/* + * Some useful cluster numbers. + */ +#define MSDOSFSROOT 0 /* cluster 0 means the root dir */ +#define CLUST_FREE 0 /* cluster 0 also means a free cluster */ +#define MSDOSFSFREE CLUST_FREE +#define CLUST_FIRST 2 /* first legal cluster number */ +#define CLUST_RSRVD 0xfffffff6 /* reserved cluster range */ +#define CLUST_BAD 0xfffffff7 /* a cluster with a defect */ +#define CLUST_EOFS 0xfffffff8 /* start of eof cluster range */ +#define CLUST_EOFE 0xffffffff /* end of eof cluster range */ + +#define FAT12_MASK 0x00000fff /* mask for 12 bit cluster numbers */ +#define FAT16_MASK 0x0000ffff /* mask for 16 bit cluster numbers */ +#define FAT32_MASK 0x0fffffff /* mask for FAT32 cluster numbers */ + +/* + * MSDOSFS: + * Return true if filesystem uses 12 bit fats. Microsoft Programmer's + * Reference says if the maximum cluster number in a filesystem is greater + * than 4078 ((CLUST_RSRVS - CLUST_FIRST) & FAT12_MASK) then we've got a + * 16 bit fat filesystem. While mounting, the result of this test is stored + * in pm_fatentrysize. + * GEMDOS-flavour (atari): + * If the filesystem is on floppy we've got a 12 bit fat filesystem, otherwise + * 16 bit. We check the d_type field in the disklabel struct while mounting + * and store the result in the pm_fatentrysize. Note that this kind of + * detection gets flakey when mounting a vnd-device. + */ +#define FAT12(pmp) (pmp->pm_fatmask == FAT12_MASK) +#define FAT16(pmp) (pmp->pm_fatmask == FAT16_MASK) +#define FAT32(pmp) (pmp->pm_fatmask == FAT32_MASK) + +#define MSDOSFSEOF(pmp, cn) ((((cn) | ~(pmp)->pm_fatmask) & CLUST_EOFS) == CLUST_EOFS) + +#ifdef KERNEL +/* + * These are the values for the function argument to the function + * fatentry(). + */ +#define FAT_GET 0x0001 /* get a fat entry */ +#define FAT_SET 0x0002 /* set a fat entry */ +#define FAT_GET_AND_SET (FAT_GET | FAT_SET) + +/* + * Flags to extendfile: + */ +#define DE_CLEAR 1 /* Zero out the blocks allocated */ + +int pcbmap __P((struct denode *dep, u_long findcn, daddr_t *bnp, u_long *cnp, int* sp)); +int clusterfree __P((struct msdosfsmount *pmp, u_long cn, u_long *oldcnp)); +int clusteralloc __P((struct msdosfsmount *pmp, u_long start, u_long count, u_long fillwith, u_long *retcluster, u_long *got)); +int fatentry __P((int function, struct msdosfsmount *pmp, u_long cluster, u_long *oldcontents, u_long newcontents)); +int freeclusterchain __P((struct msdosfsmount *pmp, u_long startchain)); +int extendfile __P((struct denode *dep, u_long count, struct buf **bpp, u_long *ncp, int flags)); +void fc_purge __P((struct denode *dep, u_int frcn)); + +#endif /* KERNEL */ diff --git a/sys/fs/msdosfs/msdosfs_conv.c b/sys/fs/msdosfs/msdosfs_conv.c new file mode 100644 index 0000000..2c792eb --- /dev/null +++ b/sys/fs/msdosfs/msdosfs_conv.c @@ -0,0 +1,1041 @@ +/* $Id: msdosfs_conv.c,v 1.27 1998/05/17 21:18:08 dt Exp $ */ +/* $NetBSD: msdosfs_conv.c,v 1.25 1997/11/17 15:36:40 ws Exp $ */ + +/*- + * Copyright (C) 1995, 1997 Wolfgang Solfrank. + * Copyright (C) 1995, 1997 TooLs GmbH. + * All rights reserved. + * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below). + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by TooLs GmbH. + * 4. The name of TooLs GmbH may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* + * Written by Paul Popelka (paulp@uts.amdahl.com) + * + * You can do anything you want with this software, just don't say you wrote + * it, and don't remove this notice. + * + * This software is provided "as is". + * + * The author supplies this software to be publicly redistributed on the + * understanding that the author is not responsible for the correct + * functioning of this software in any circumstances and is not liable for + * any damages caused by this software. + * + * October 1992 + */ + +/* + * System include files. + */ +#include <sys/param.h> +#include <sys/time.h> +#include <sys/kernel.h> /* defines tz */ +#include <sys/systm.h> +#include <machine/clock.h> +#include <sys/dirent.h> + +/* + * MSDOSFS include files. + */ +#include <msdosfs/direntry.h> + +/* + * Total number of days that have passed for each month in a regular year. + */ +static u_short regyear[] = { + 31, 59, 90, 120, 151, 181, + 212, 243, 273, 304, 334, 365 +}; + +/* + * Total number of days that have passed for each month in a leap year. + */ +static u_short leapyear[] = { + 31, 60, 91, 121, 152, 182, + 213, 244, 274, 305, 335, 366 +}; + +/* + * Variables used to remember parts of the last time conversion. Maybe we + * can avoid a full conversion. + */ +static u_long lasttime; +static u_long lastday; +static u_short lastddate; +static u_short lastdtime; + +static __inline u_int8_t find_lcode __P((u_int16_t code, u_int16_t *u2w)); + +/* + * Convert the unix version of time to dos's idea of time to be used in + * file timestamps. The passed in unix time is assumed to be in GMT. + */ +void +unix2dostime(tsp, ddp, dtp, dhp) + struct timespec *tsp; + u_int16_t *ddp; + u_int16_t *dtp; + u_int8_t *dhp; +{ + u_long t; + u_long days; + u_long inc; + u_long year; + u_long month; + u_short *months; + + /* + * If the time from the last conversion is the same as now, then + * skip the computations and use the saved result. + */ + t = tsp->tv_sec - (tz.tz_minuteswest * 60) + - (wall_cmos_clock ? adjkerntz : 0); + /* - daylight savings time correction */ + t &= ~1; + if (lasttime != t) { + lasttime = t; + lastdtime = (((t / 2) % 30) << DT_2SECONDS_SHIFT) + + (((t / 60) % 60) << DT_MINUTES_SHIFT) + + (((t / 3600) % 24) << DT_HOURS_SHIFT); + + /* + * If the number of days since 1970 is the same as the last + * time we did the computation then skip all this leap year + * and month stuff. + */ + days = t / (24 * 60 * 60); + if (days != lastday) { + lastday = days; + for (year = 1970;; year++) { + inc = year & 0x03 ? 365 : 366; + if (days < inc) + break; + days -= inc; + } + months = year & 0x03 ? regyear : leapyear; + for (month = 0; days >= months[month]; month++) + ; + if (month > 0) + days -= months[month - 1]; + lastddate = ((days + 1) << DD_DAY_SHIFT) + + ((month + 1) << DD_MONTH_SHIFT); + /* + * Remember dos's idea of time is relative to 1980. + * unix's is relative to 1970. If somehow we get a + * time before 1980 then don't give totally crazy + * results. + */ + if (year > 1980) + lastddate += (year - 1980) << DD_YEAR_SHIFT; + } + } + if (dtp) + *dtp = lastdtime; + if (dhp) + *dhp = (tsp->tv_sec & 1) * 100 + tsp->tv_nsec / 10000000; + + *ddp = lastddate; +} + +/* + * The number of seconds between Jan 1, 1970 and Jan 1, 1980. In that + * interval there were 8 regular years and 2 leap years. + */ +#define SECONDSTO1980 (((8 * 365) + (2 * 366)) * (24 * 60 * 60)) + +static u_short lastdosdate; +static u_long lastseconds; + +/* + * Convert from dos' idea of time to unix'. This will probably only be + * called from the stat(), and fstat() system calls and so probably need + * not be too efficient. + */ +void +dos2unixtime(dd, dt, dh, tsp) + u_int dd; + u_int dt; + u_int dh; + struct timespec *tsp; +{ + u_long seconds; + u_long month; + u_long year; + u_long days; + u_short *months; + + if (dd == 0) { + /* + * Uninitialized field, return the epoch. + */ + tsp->tv_sec = 0; + tsp->tv_nsec = 0; + return; + } + seconds = (((dt & DT_2SECONDS_MASK) >> DT_2SECONDS_SHIFT) << 1) + + ((dt & DT_MINUTES_MASK) >> DT_MINUTES_SHIFT) * 60 + + ((dt & DT_HOURS_MASK) >> DT_HOURS_SHIFT) * 3600 + + dh / 100; + /* + * If the year, month, and day from the last conversion are the + * same then use the saved value. + */ + if (lastdosdate != dd) { + lastdosdate = dd; + days = 0; + year = (dd & DD_YEAR_MASK) >> DD_YEAR_SHIFT; + days = year * 365; + days += year / 4 + 1; /* add in leap days */ + if ((year & 0x03) == 0) + days--; /* if year is a leap year */ + months = year & 0x03 ? regyear : leapyear; + month = (dd & DD_MONTH_MASK) >> DD_MONTH_SHIFT; + if (month < 1 || month > 12) { + printf("dos2unixtime(): month value out of range (%ld)\n", + month); + month = 1; + } + if (month > 1) + days += months[month - 2]; + days += ((dd & DD_DAY_MASK) >> DD_DAY_SHIFT) - 1; + lastseconds = (days * 24 * 60 * 60) + SECONDSTO1980; + } + tsp->tv_sec = seconds + lastseconds + (tz.tz_minuteswest * 60) + + adjkerntz; + /* + daylight savings time correction */ + tsp->tv_nsec = (dh % 100) * 10000000; +} + +/* + * 0 - character disallowed in long file name. + * 1 - character should be replaced by '_' in DOS file name, + * and generation number inserted. + * 2 - character ('.' and ' ') should be skipped in DOS file name, + * and generation number inserted. + */ +static u_char +unix2dos[256] = { + 0, 0, 0, 0, 0, 0, 0, 0, /* 00-07 */ + 0, 0, 0, 0, 0, 0, 0, 0, /* 08-0f */ + 0, 0, 0, 0, 0, 0, 0, 0, /* 10-17 */ + 0, 0, 0, 0, 0, 0, 0, 0, /* 18-1f */ + 2, 0x21, 0, 0x23, 0x24, 0x25, 0x26, 0x27, /* 20-27 */ + 0x28, 0x29, 0, 1, 1, 0x2d, 2, 0, /* 28-2f */ + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 30-37 */ + 0x38, 0x39, 0, 1, 0, 1, 0, 0, /* 38-3f */ + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 40-47 */ + 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 48-4f */ + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 50-57 */ + 0x58, 0x59, 0x5a, 1, 0, 1, 0x5e, 0x5f, /* 58-5f */ + 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 60-67 */ + 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 68-6f */ + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 70-77 */ + 0x58, 0x59, 0x5a, 0x7b, 0, 0x7d, 0x7e, 0, /* 78-7f */ + 0, 0, 0, 0, 0, 0, 0, 0, /* 80-87 */ + 0, 0, 0, 0, 0, 0, 0, 0, /* 88-8f */ + 0, 0, 0, 0, 0, 0, 0, 0, /* 90-97 */ + 0, 0, 0, 0, 0, 0, 0, 0, /* 98-9f */ + 0, 0xad, 0xbd, 0x9c, 0xcf, 0xbe, 0xdd, 0xf5, /* a0-a7 */ + 0xf9, 0xb8, 0xa6, 0xae, 0xaa, 0xf0, 0xa9, 0xee, /* a8-af */ + 0xf8, 0xf1, 0xfd, 0xfc, 0xef, 0xe6, 0xf4, 0xfa, /* b0-b7 */ + 0xf7, 0xfb, 0xa7, 0xaf, 0xac, 0xab, 0xf3, 0xa8, /* b8-bf */ + 0xb7, 0xb5, 0xb6, 0xc7, 0x8e, 0x8f, 0x92, 0x80, /* c0-c7 */ + 0xd4, 0x90, 0xd2, 0xd3, 0xde, 0xd6, 0xd7, 0xd8, /* c8-cf */ + 0xd1, 0xa5, 0xe3, 0xe0, 0xe2, 0xe5, 0x99, 0x9e, /* d0-d7 */ + 0x9d, 0xeb, 0xe9, 0xea, 0x9a, 0xed, 0xe8, 0xe1, /* d8-df */ + 0xb7, 0xb5, 0xb6, 0xc7, 0x8e, 0x8f, 0x92, 0x80, /* e0-e7 */ + 0xd4, 0x90, 0xd2, 0xd3, 0xde, 0xd6, 0xd7, 0xd8, /* e8-ef */ + 0xd1, 0xa5, 0xe3, 0xe0, 0xe2, 0xe5, 0x99, 0xf6, /* f0-f7 */ + 0x9d, 0xeb, 0xe9, 0xea, 0x9a, 0xed, 0xe8, 0x98, /* f8-ff */ +}; + +static u_char +dos2unix[256] = { + 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, /* 00-07 */ + 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, /* 08-0f */ + 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, /* 10-17 */ + 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, /* 18-1f */ + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 20-27 */ + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 28-2f */ + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 30-37 */ + 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 38-3f */ + 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 40-47 */ + 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 48-4f */ + 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 50-57 */ + 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 58-5f */ + 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 60-67 */ + 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 68-6f */ + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 70-77 */ + 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 78-7f */ + 0xc7, 0xfc, 0xe9, 0xe2, 0xe4, 0xe0, 0xe5, 0xe7, /* 80-87 */ + 0xea, 0xeb, 0xe8, 0xef, 0xee, 0xec, 0xc4, 0xc5, /* 88-8f */ + 0xc9, 0xe6, 0xc6, 0xf4, 0xf6, 0xf2, 0xfb, 0xf9, /* 90-97 */ + 0xff, 0xd6, 0xdc, 0xf8, 0xa3, 0xd8, 0xd7, 0x3f, /* 98-9f */ + 0xe1, 0xed, 0xf3, 0xfa, 0xf1, 0xd1, 0xaa, 0xba, /* a0-a7 */ + 0xbf, 0xae, 0xac, 0xbd, 0xbc, 0xa1, 0xab, 0xbb, /* a8-af */ + 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0xc1, 0xc2, 0xc0, /* b0-b7 */ + 0xa9, 0x3f, 0x3f, 0x3f, 0x3f, 0xa2, 0xa5, 0x3f, /* b8-bf */ + 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0xe3, 0xc3, /* c0-c7 */ + 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0x3f, 0xa4, /* c8-cf */ + 0xf0, 0xd0, 0xca, 0xcb, 0xc8, 0x3f, 0xcd, 0xce, /* d0-d7 */ + 0xcf, 0x3f, 0x3f, 0x3f, 0x3f, 0xa6, 0xcc, 0x3f, /* d8-df */ + 0xd3, 0xdf, 0xd4, 0xd2, 0xf5, 0xd5, 0xb5, 0xfe, /* e0-e7 */ + 0xde, 0xda, 0xdb, 0xd9, 0xfd, 0xdd, 0xaf, 0x3f, /* e8-ef */ + 0xad, 0xb1, 0x3f, 0xbe, 0xb6, 0xa7, 0xf7, 0xb8, /* f0-f7 */ + 0xb0, 0xa8, 0xb7, 0xb9, 0xb3, 0xb2, 0x3f, 0x3f, /* f8-ff */ +}; + +static u_char +u2l[256] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 00-07 */ + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 08-0f */ + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 10-17 */ + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 18-1f */ + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 20-27 */ + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 28-2f */ + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 30-37 */ + 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 38-3f */ + 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 40-47 */ + 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 48-4f */ + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 50-57 */ + 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 58-5f */ + 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 60-67 */ + 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 68-6f */ + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 70-77 */ + 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 78-7f */ + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 80-87 */ + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 88-8f */ + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 90-97 */ + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 98-9f */ + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* a0-a7 */ + 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* a8-af */ + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* b0-b7 */ + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* b8-bf */ + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* c0-c7 */ + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* c8-cf */ + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xd7, /* d0-d7 */ + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xdf, /* d8-df */ + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* e0-e7 */ + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* e8-ef */ + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* f0-f7 */ + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* f8-ff */ +}; + +static u_char +l2u[256] = { + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 00-07 */ + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 08-0f */ + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 10-17 */ + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 18-1f */ + 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 20-27 */ + 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 28-2f */ + 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 30-37 */ + 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 38-3f */ + 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 40-47 */ + 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 48-4f */ + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 50-57 */ + 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 58-5f */ + 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 60-67 */ + 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 68-6f */ + 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 70-77 */ + 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 78-7f */ + 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 80-87 */ + 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 88-8f */ + 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 90-97 */ + 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 98-9f */ + 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* a0-a7 */ + 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* a8-af */ + 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* b0-b7 */ + 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* b8-bf */ + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* c0-c7 */ + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* c8-cf */ + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xd7, /* d0-d7 */ + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xdf, /* d8-df */ + 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* e0-e7 */ + 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* e8-ef */ + 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* f0-f7 */ + 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* f8-ff */ +}; + +/* + * DOS filenames are made of 2 parts, the name part and the extension part. + * The name part is 8 characters long and the extension part is 3 + * characters long. They may contain trailing blanks if the name or + * extension are not long enough to fill their respective fields. + */ + +/* + * Convert a DOS filename to a unix filename. And, return the number of + * characters in the resulting unix filename excluding the terminating + * null. + */ +int +dos2unixfn(dn, un, lower, d2u_loaded, d2u, ul_loaded, ul) + u_char dn[11]; + u_char *un; + int lower; + int d2u_loaded; + u_int8_t *d2u; + int ul_loaded; + u_int8_t *ul; +{ + int i; + int thislong = 1; + u_char c; + + /* + * If first char of the filename is SLOT_E5 (0x05), then the real + * first char of the filename should be 0xe5. But, they couldn't + * just have a 0xe5 mean 0xe5 because that is used to mean a freed + * directory slot. Another dos quirk. + */ + if (*dn == SLOT_E5) + c = d2u_loaded ? d2u[0xe5 & 0x7f] : dos2unix[0xe5]; + else + c = d2u_loaded && (*dn & 0x80) ? d2u[*dn & 0x7f] : + dos2unix[*dn]; + *un++ = (lower & LCASE_BASE) ? (ul_loaded && (c & 0x80) ? + ul[c & 0x7f] : u2l[c]) : c; + dn++; + + /* + * Copy the name portion into the unix filename string. + */ + for (i = 1; i < 8 && *dn != ' '; i++) { + c = d2u_loaded && (*dn & 0x80) ? d2u[*dn & 0x7f] : + dos2unix[*dn]; + dn++; + *un++ = (lower & LCASE_BASE) ? (ul_loaded && (c & 0x80) ? + ul[c & 0x7f] : u2l[c]) : c; + thislong++; + } + dn += 8 - i; + + /* + * Now, if there is an extension then put in a period and copy in + * the extension. + */ + if (*dn != ' ') { + *un++ = '.'; + thislong++; + for (i = 0; i < 3 && *dn != ' '; i++) { + c = d2u_loaded && (*dn & 0x80) ? d2u[*dn & 0x7f] : + dos2unix[*dn]; + dn++; + *un++ = (lower & LCASE_EXT) ? (ul_loaded && (c & 0x80) ? + ul[c & 0x7f] : u2l[c]) : c; + thislong++; + } + } + *un++ = 0; + + return (thislong); +} + +/* + * Convert a unix filename to a DOS filename according to Win95 rules. + * If applicable and gen is not 0, it is inserted into the converted + * filename as a generation number. + * Returns + * 0 if name couldn't be converted + * 1 if the converted name is the same as the original + * (no long filename entry necessary for Win95) + * 2 if conversion was successful + * 3 if conversion was successful and generation number was inserted + */ +int +unix2dosfn(un, dn, unlen, gen, u2d_loaded, u2d, lu_loaded, lu) + const u_char *un; + u_char dn[12]; + int unlen; + u_int gen; + int u2d_loaded; + u_int8_t *u2d; + int lu_loaded; + u_int8_t *lu; +{ + int i, j, l; + int conv = 1; + const u_char *cp, *dp, *dp1; + u_char gentext[6], *wcp; + u_int8_t c; +#define U2D(c) (u2d_loaded && ((c) & 0x80) ? u2d[(c) & 0x7f] : unix2dos[c]) + + /* + * Fill the dos filename string with blanks. These are DOS's pad + * characters. + */ + for (i = 0; i < 11; i++) + dn[i] = ' '; + dn[11] = 0; + + /* + * The filenames "." and ".." are handled specially, since they + * don't follow dos filename rules. + */ + if (un[0] == '.' && unlen == 1) { + dn[0] = '.'; + return gen <= 1; + } + if (un[0] == '.' && un[1] == '.' && unlen == 2) { + dn[0] = '.'; + dn[1] = '.'; + return gen <= 1; + } + + /* + * Filenames with only blanks and dots are not allowed! + */ + for (cp = un, i = unlen; --i >= 0; cp++) + if (*cp != ' ' && *cp != '.') + break; + if (i < 0) + return 0; + + + /* + * Filenames with some characters are not allowed! + */ + for (cp = un, i = unlen; --i >= 0; cp++) + if (U2D(*cp) == 0) + return 0; + + /* + * Now find the extension + * Note: dot as first char doesn't start extension + * and trailing dots and blanks are ignored + */ + dp = dp1 = 0; + for (cp = un + 1, i = unlen - 1; --i >= 0;) { + switch (*cp++) { + case '.': + if (!dp1) + dp1 = cp; + break; + case ' ': + break; + default: + if (dp1) + dp = dp1; + dp1 = 0; + break; + } + } + + /* + * Now convert it + */ + if (dp) { + if (dp1) + l = dp1 - dp; + else + l = unlen - (dp - un); + for (i = 0, j = 8; i < l && j < 11; i++, j++) { + c = dp[i]; + c = lu_loaded && (c & 0x80) ? + lu[c & 0x7f] : l2u[c]; + c = U2D(c); + if (dp[i] != (dn[j] = c) + && conv != 3) + conv = 2; + if (dn[j] == 1) { + conv = 3; + dn[j] = '_'; + } + if (dn[j] == 2) { + conv = 3; + dn[j--] = ' '; + } + } + if (i < l) + conv = 3; + dp--; + } else { + for (dp = cp; *--dp == ' ' || *dp == '.';); + dp++; + } + + /* + * Now convert the rest of the name + */ + for (i = j = 0; un < dp && j < 8; i++, j++, un++) { + c = lu_loaded && (*un & 0x80) ? + lu[*un & 0x7f] : l2u[*un]; + c = U2D(c); + if (*un != (dn[j] = c) + && conv != 3) + conv = 2; + if (dn[j] == 1) { + conv = 3; + dn[j] = '_'; + } + if (dn[j] == 2) { + conv = 3; + dn[j--] = ' '; + } + } + if (un < dp) + conv = 3; + /* + * If we didn't have any chars in filename, + * generate a default + */ + if (!j) + dn[0] = '_'; + + /* + * The first character cannot be E5, + * because that means a deleted entry + */ + if (dn[0] == 0xe5) + dn[0] = SLOT_E5; + + /* + * If there wasn't any char dropped, + * there is no place for generation numbers + */ + if (conv != 3) { + if (gen > 1) + return 0; + return conv; + } + + /* + * Now insert the generation number into the filename part + */ + if (gen == 0) + return conv; + for (wcp = gentext + sizeof(gentext); wcp > gentext && gen; gen /= 10) + *--wcp = gen % 10 + '0'; + if (gen) + return 0; + for (i = 8; dn[--i] == ' ';); + i++; + if (gentext + sizeof(gentext) - wcp + 1 > 8 - i) + i = 8 - (gentext + sizeof(gentext) - wcp + 1); + dn[i++] = '~'; + while (wcp < gentext + sizeof(gentext)) + dn[i++] = *wcp++; + return 3; +#undef U2D +} + +/* + * Create a Win95 long name directory entry + * Note: assumes that the filename is valid, + * i.e. doesn't consist solely of blanks and dots + */ +int +unix2winfn(un, unlen, wep, cnt, chksum, table_loaded, u2w) + const u_char *un; + int unlen; + struct winentry *wep; + int cnt; + int chksum; + int table_loaded; + u_int16_t *u2w; +{ + const u_int8_t *cp; + u_int8_t *wcp; + int i; + u_int16_t code; + + /* + * Drop trailing blanks and dots + */ + for (cp = un + unlen; *--cp == ' ' || *cp == '.'; unlen--); + + un += (cnt - 1) * WIN_CHARS; + unlen -= (cnt - 1) * WIN_CHARS; + + /* + * Initialize winentry to some useful default + */ + for (wcp = (u_int8_t *)wep, i = sizeof(*wep); --i >= 0; *wcp++ = 0xff); + wep->weCnt = cnt; + wep->weAttributes = ATTR_WIN95; + wep->weReserved1 = 0; + wep->weChksum = chksum; + wep->weReserved2 = 0; + + /* + * Now convert the filename parts + */ + for (wcp = wep->wePart1, i = sizeof(wep->wePart1)/2; --i >= 0;) { + if (--unlen < 0) + goto done; + if (table_loaded && (*un & 0x80)) { + code = u2w[*un++ & 0x7f]; + *wcp++ = code; + *wcp++ = code >> 8; + } else { + *wcp++ = *un++; + *wcp++ = 0; + } + } + for (wcp = wep->wePart2, i = sizeof(wep->wePart2)/2; --i >= 0;) { + if (--unlen < 0) + goto done; + if (table_loaded && (*un & 0x80)) { + code = u2w[*un++ & 0x7f]; + *wcp++ = code; + *wcp++ = code >> 8; + } else { + *wcp++ = *un++; + *wcp++ = 0; + } + } + for (wcp = wep->wePart3, i = sizeof(wep->wePart3)/2; --i >= 0;) { + if (--unlen < 0) + goto done; + if (table_loaded && (*un & 0x80)) { + code = u2w[*un++ & 0x7f]; + *wcp++ = code; + *wcp++ = code >> 8; + } else { + *wcp++ = *un++; + *wcp++ = 0; + } + } + if (!unlen) + wep->weCnt |= WIN_LAST; + return unlen; + +done: + *wcp++ = 0; + *wcp++ = 0; + wep->weCnt |= WIN_LAST; + return 0; +} + +static __inline u_int8_t +find_lcode(code, u2w) + u_int16_t code; + u_int16_t *u2w; +{ + int i; + + for (i = 0; i < 128; i++) + if (u2w[i] == code) + return (i | 0x80); + return '?'; +} + +/* + * Compare our filename to the one in the Win95 entry + * Returns the checksum or -1 if no match + */ +int +winChkName(un, unlen, wep, chksum, u2w_loaded, u2w, ul_loaded, ul) + const u_char *un; + int unlen; + struct winentry *wep; + int chksum; + int u2w_loaded; + u_int16_t *u2w; + int ul_loaded; + u_int8_t *ul; +{ + u_int8_t *cp; + int i; + u_int16_t code; + u_int8_t c1, c2; + + /* + * First compare checksums + */ + if (wep->weCnt&WIN_LAST) + chksum = wep->weChksum; + else if (chksum != wep->weChksum) + chksum = -1; + if (chksum == -1) + return -1; + + /* + * Offset of this entry + */ + i = ((wep->weCnt&WIN_CNT) - 1) * WIN_CHARS; + un += i; + if ((unlen -= i) <= 0) + return -1; + if ((wep->weCnt&WIN_LAST) && unlen > WIN_CHARS) + return -1; + + /* + * Compare the name parts + */ + for (cp = wep->wePart1, i = sizeof(wep->wePart1)/2; --i >= 0;) { + if (--unlen < 0) { + if (!*cp++ && !*cp) + return chksum; + return -1; + } + code = (cp[1] << 8) | cp[0]; + if (code & 0xff80) { + if (u2w_loaded) + code = find_lcode(code, u2w); + else if (code & 0xff00) + code = '?'; + } + c1 = ul_loaded && (code & 0x80) ? + ul[code & 0x7f] : u2l[code]; + c2 = ul_loaded && (*un & 0x80) ? + ul[*un & 0x7f] : u2l[*un]; + if (c1 != c2) + return -1; + cp += 2; + un++; + } + for (cp = wep->wePart2, i = sizeof(wep->wePart2)/2; --i >= 0;) { + if (--unlen < 0) { + if (!*cp++ && !*cp) + return chksum; + return -1; + } + code = (cp[1] << 8) | cp[0]; + if (code & 0xff80) { + if (u2w_loaded) + code = find_lcode(code, u2w); + else if (code & 0xff00) + code = '?'; + } + c1 = ul_loaded && (code & 0x80) ? + ul[code & 0x7f] : u2l[code]; + c2 = ul_loaded && (*un & 0x80) ? + ul[*un & 0x7f] : u2l[*un]; + if (c1 != c2) + return -1; + cp += 2; + un++; + } + for (cp = wep->wePart3, i = sizeof(wep->wePart3)/2; --i >= 0;) { + if (--unlen < 0) { + if (!*cp++ && !*cp) + return chksum; + return -1; + } + code = (cp[1] << 8) | cp[0]; + if (code & 0xff80) { + if (u2w_loaded) + code = find_lcode(code, u2w); + else if (code & 0xff00) + code = '?'; + } + c1 = ul_loaded && (code & 0x80) ? + ul[code & 0x7f] : u2l[code]; + c2 = ul_loaded && (*un & 0x80) ? + ul[*un & 0x7f] : u2l[*un]; + if (c1 != c2) + return -1; + cp += 2; + un++; + } + return chksum; +} + +/* + * Convert Win95 filename to dirbuf. + * Returns the checksum or -1 if impossible + */ +int +win2unixfn(wep, dp, chksum, table_loaded, u2w) + struct winentry *wep; + struct dirent *dp; + int chksum; + int table_loaded; + u_int16_t *u2w; +{ + u_int8_t *cp; + u_int8_t *np, *ep = dp->d_name + WIN_MAXLEN; + u_int16_t code; + int i; + + if ((wep->weCnt&WIN_CNT) > howmany(WIN_MAXLEN, WIN_CHARS) + || !(wep->weCnt&WIN_CNT)) + return -1; + + /* + * First compare checksums + */ + if (wep->weCnt&WIN_LAST) { + chksum = wep->weChksum; + /* + * This works even though d_namlen is one byte! + */ + dp->d_namlen = (wep->weCnt&WIN_CNT) * WIN_CHARS; + } else if (chksum != wep->weChksum) + chksum = -1; + if (chksum == -1) + return -1; + + /* + * Offset of this entry + */ + i = ((wep->weCnt&WIN_CNT) - 1) * WIN_CHARS; + np = (u_int8_t *)dp->d_name + i; + + /* + * Convert the name parts + */ + for (cp = wep->wePart1, i = sizeof(wep->wePart1)/2; --i >= 0;) { + code = (cp[1] << 8) | cp[0]; + switch (code) { + case 0: + *np = '\0'; + dp->d_namlen -= sizeof(wep->wePart2)/2 + + sizeof(wep->wePart3)/2 + i + 1; + return chksum; + case '/': + *np = '\0'; + return -1; + default: + if (code & 0xff80) { + if (table_loaded) + code = find_lcode(code, u2w); + else if (code & 0xff00) + code = '?'; + } + *np++ = code; + break; + } + /* + * The size comparison should result in the compiler + * optimizing the whole if away + */ + if (WIN_MAXLEN % WIN_CHARS < sizeof(wep->wePart1) / 2 + && np > ep) { + np[-1] = 0; + return -1; + } + cp += 2; + } + for (cp = wep->wePart2, i = sizeof(wep->wePart2)/2; --i >= 0;) { + code = (cp[1] << 8) | cp[0]; + switch (code) { + case 0: + *np = '\0'; + dp->d_namlen -= sizeof(wep->wePart3)/2 + i + 1; + return chksum; + case '/': + *np = '\0'; + return -1; + default: + if (code & 0xff80) { + if (table_loaded) + code = find_lcode(code, u2w); + else if (code & 0xff00) + code = '?'; + } + *np++ = code; + break; + } + /* + * The size comparisons should be optimized away + */ + if (WIN_MAXLEN % WIN_CHARS >= sizeof(wep->wePart1) / 2 + && WIN_MAXLEN % WIN_CHARS < (sizeof(wep->wePart1) + sizeof(wep->wePart2)) / 2 + && np > ep) { + np[-1] = 0; + return -1; + } + cp += 2; + } + for (cp = wep->wePart3, i = sizeof(wep->wePart3)/2; --i >= 0;) { + code = (cp[1] << 8) | cp[0]; + switch (code) { + case 0: + *np = '\0'; + dp->d_namlen -= i + 1; + return chksum; + case '/': + *np = '\0'; + return -1; + default: + if (code & 0xff80) { + if (table_loaded) + code = find_lcode(code, u2w); + else if (code & 0xff00) + code = '?'; + } + *np++ = code; + break; + } + /* + * See above + */ + if (WIN_MAXLEN % WIN_CHARS >= (sizeof(wep->wePart1) + sizeof(wep->wePart2)) / 2 + && np > ep) { + np[-1] = 0; + return -1; + } + cp += 2; + } + return chksum; +} + +/* + * Compute the checksum of a DOS filename for Win95 use + */ +u_int8_t +winChksum(name) + u_int8_t *name; +{ + int i; + u_int8_t s; + + for (s = 0, i = 11; --i >= 0; s += *name++) + s = (s << 7)|(s >> 1); + return s; +} + +/* + * Determine the number of slots necessary for Win95 names + */ +int +winSlotCnt(un, unlen) + const u_char *un; + int unlen; +{ + unlen = winLenFixup(un, unlen); + if (unlen > WIN_MAXLEN) + return 0; + return howmany(unlen, WIN_CHARS); +} + +/* + * Determine the number of bytes neccesary for Win95 names + */ +int +winLenFixup(un, unlen) + const u_char* un; + int unlen; +{ + for (un += unlen; unlen > 0; unlen--) + if (*--un != ' ' && *un != '.') + break; + return unlen; +} diff --git a/sys/fs/msdosfs/msdosfs_denode.c b/sys/fs/msdosfs/msdosfs_denode.c new file mode 100644 index 0000000..74be5c4 --- /dev/null +++ b/sys/fs/msdosfs/msdosfs_denode.c @@ -0,0 +1,712 @@ +/* $Id: msdosfs_denode.c,v 1.43 1998/12/07 21:58:34 archie Exp $ */ +/* $NetBSD: msdosfs_denode.c,v 1.28 1998/02/10 14:10:00 mrg Exp $ */ + +/*- + * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank. + * Copyright (C) 1994, 1995, 1997 TooLs GmbH. + * All rights reserved. + * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below). + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by TooLs GmbH. + * 4. The name of TooLs GmbH may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* + * Written by Paul Popelka (paulp@uts.amdahl.com) + * + * You can do anything you want with this software, just don't say you wrote + * it, and don't remove this notice. + * + * This software is provided "as is". + * + * The author supplies this software to be publicly redistributed on the + * understanding that the author is not responsible for the correct + * functioning of this software in any circumstances and is not liable for + * any damages caused by this software. + * + * October 1992 + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/mount.h> +#include <sys/malloc.h> +#include <sys/proc.h> +#include <sys/buf.h> +#include <sys/vnode.h> + +#include <vm/vm.h> +#include <vm/vm_extern.h> + +#include <msdosfs/bpb.h> +#include <msdosfs/msdosfsmount.h> +#include <msdosfs/direntry.h> +#include <msdosfs/denode.h> +#include <msdosfs/fat.h> + +static MALLOC_DEFINE(M_MSDOSFSNODE, "MSDOSFS node", "MSDOSFS vnode private part"); + +static struct denode **dehashtbl; +static u_long dehash; /* size of hash table - 1 */ +#define DEHASH(dev, dcl, doff) (dehashtbl[((dev) + (dcl) + (doff) / \ + sizeof(struct direntry)) & dehash]) +#ifndef NULL_SIMPLELOCKS +static struct simplelock dehash_slock; +#endif + +union _qcvt { + quad_t qcvt; + long val[2]; +}; +#define SETHIGH(q, h) { \ + union _qcvt tmp; \ + tmp.qcvt = (q); \ + tmp.val[_QUAD_HIGHWORD] = (h); \ + (q) = tmp.qcvt; \ +} +#define SETLOW(q, l) { \ + union _qcvt tmp; \ + tmp.qcvt = (q); \ + tmp.val[_QUAD_LOWWORD] = (l); \ + (q) = tmp.qcvt; \ +} + +static struct denode * + msdosfs_hashget __P((dev_t dev, u_long dirclust, + u_long diroff)); +static void msdosfs_hashins __P((struct denode *dep)); +static void msdosfs_hashrem __P((struct denode *dep)); + +/*ARGSUSED*/ +int +msdosfs_init(vfsp) + struct vfsconf *vfsp; +{ + dehashtbl = hashinit(desiredvnodes/2, M_MSDOSFSMNT, &dehash); + simple_lock_init(&dehash_slock); + return (0); +} + +static struct denode * +msdosfs_hashget(dev, dirclust, diroff) + dev_t dev; + u_long dirclust; + u_long diroff; +{ + struct proc *p = curproc; /* XXX */ + struct denode *dep; + struct vnode *vp; + +loop: + simple_lock(&dehash_slock); + for (dep = DEHASH(dev, dirclust, diroff); dep; dep = dep->de_next) { + if (dirclust == dep->de_dirclust + && diroff == dep->de_diroffset + && dev == dep->de_dev + && dep->de_refcnt != 0) { + vp = DETOV(dep); + simple_lock(&vp->v_interlock); + simple_unlock(&dehash_slock); + if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, p)) + goto loop; + return (dep); + } + } + simple_unlock(&dehash_slock); + return (NULL); +} + +static void +msdosfs_hashins(dep) + struct denode *dep; +{ + struct denode **depp, *deq; + + simple_lock(&dehash_slock); + depp = &DEHASH(dep->de_dev, dep->de_dirclust, dep->de_diroffset); + deq = *depp; + if (deq) + deq->de_prev = &dep->de_next; + dep->de_next = deq; + dep->de_prev = depp; + *depp = dep; + simple_unlock(&dehash_slock); +} + +static void +msdosfs_hashrem(dep) + struct denode *dep; +{ + struct denode *deq; + + simple_lock(&dehash_slock); + deq = dep->de_next; + if (deq) + deq->de_prev = dep->de_prev; + *dep->de_prev = deq; +#ifdef DIAGNOSTIC + dep->de_next = NULL; + dep->de_prev = NULL; +#endif + simple_unlock(&dehash_slock); +} + +/* + * If deget() succeeds it returns with the gotten denode locked(). + * + * pmp - address of msdosfsmount structure of the filesystem containing + * the denode of interest. The pm_dev field and the address of + * the msdosfsmount structure are used. + * dirclust - which cluster bp contains, if dirclust is 0 (root directory) + * diroffset is relative to the beginning of the root directory, + * otherwise it is cluster relative. + * diroffset - offset past begin of cluster of denode we want + * depp - returns the address of the gotten denode. + */ +int +deget(pmp, dirclust, diroffset, depp) + struct msdosfsmount *pmp; /* so we know the maj/min number */ + u_long dirclust; /* cluster this dir entry came from */ + u_long diroffset; /* index of entry within the cluster */ + struct denode **depp; /* returns the addr of the gotten denode */ +{ + int error; + dev_t dev = pmp->pm_dev; + struct mount *mntp = pmp->pm_mountp; + struct direntry *direntptr; + struct denode *ldep; + struct vnode *nvp; + struct buf *bp; + struct proc *p = curproc; /* XXX */ + struct timeval tv; + +#ifdef MSDOSFS_DEBUG + printf("deget(pmp %p, dirclust %lu, diroffset %lx, depp %p)\n", + pmp, dirclust, diroffset, depp); +#endif + + /* + * On FAT32 filesystems, root is a (more or less) normal + * directory + */ + if (FAT32(pmp) && dirclust == MSDOSFSROOT) + dirclust = pmp->pm_rootdirblk; + + /* + * See if the denode is in the denode cache. Use the location of + * the directory entry to compute the hash value. For subdir use + * address of "." entry. For root dir (if not FAT32) use cluster + * MSDOSFSROOT, offset MSDOSFSROOT_OFS + * + * NOTE: The check for de_refcnt > 0 below insures the denode being + * examined does not represent an unlinked but still open file. + * These files are not to be accessible even when the directory + * entry that represented the file happens to be reused while the + * deleted file is still open. + */ + ldep = msdosfs_hashget(dev, dirclust, diroffset); + if (ldep) { + *depp = ldep; + return (0); + } + + /* + * Do the MALLOC before the getnewvnode since doing so afterward + * might cause a bogus v_data pointer to get dereferenced + * elsewhere if MALLOC should block. + */ + MALLOC(ldep, struct denode *, sizeof(struct denode), M_MSDOSFSNODE, M_WAITOK); + + /* + * Directory entry was not in cache, have to create a vnode and + * copy it from the passed disk buffer. + */ + /* getnewvnode() does a VREF() on the vnode */ + error = getnewvnode(VT_MSDOSFS, mntp, msdosfs_vnodeop_p, &nvp); + if (error) { + *depp = NULL; + FREE(ldep, M_MSDOSFSNODE); + return error; + } + bzero((caddr_t)ldep, sizeof *ldep); + lockinit(&ldep->de_lock, PINOD, "denode", 0, 0); + nvp->v_data = ldep; + ldep->de_vnode = nvp; + ldep->de_flag = 0; + ldep->de_devvp = 0; + ldep->de_dev = dev; + ldep->de_dirclust = dirclust; + ldep->de_diroffset = diroffset; + fc_purge(ldep, 0); /* init the fat cache for this denode */ + + /* + * Lock the denode so that it can't be accessed until we've read + * it in and have done what we need to it. Do this here instead + * of at the start of msdosfs_hashins() so that reinsert() can + * call msdosfs_hashins() with a locked denode. + */ + if (lockmgr(&ldep->de_lock, LK_EXCLUSIVE, (struct simplelock *)0, p)) + panic("deget: unexpected lock failure"); + + /* + * Insert the denode into the hash queue. + */ + msdosfs_hashins(ldep); + + ldep->de_pmp = pmp; + ldep->de_refcnt = 1; + /* + * Copy the directory entry into the denode area of the vnode. + */ + if ((dirclust == MSDOSFSROOT + || (FAT32(pmp) && dirclust == pmp->pm_rootdirblk)) + && diroffset == MSDOSFSROOT_OFS) { + /* + * Directory entry for the root directory. There isn't one, + * so we manufacture one. We should probably rummage + * through the root directory and find a label entry (if it + * exists), and then use the time and date from that entry + * as the time and date for the root denode. + */ + nvp->v_flag |= VROOT; /* should be further down XXX */ + + ldep->de_Attributes = ATTR_DIRECTORY; + ldep->de_LowerCase = 0; + if (FAT32(pmp)) + ldep->de_StartCluster = pmp->pm_rootdirblk; + /* de_FileSize will be filled in further down */ + else { + ldep->de_StartCluster = MSDOSFSROOT; + ldep->de_FileSize = pmp->pm_rootdirsize * pmp->pm_BytesPerSec; + } + /* + * fill in time and date so that dos2unixtime() doesn't + * spit up when called from msdosfs_getattr() with root + * denode + */ + ldep->de_CHun = 0; + ldep->de_CTime = 0x0000; /* 00:00:00 */ + ldep->de_CDate = (0 << DD_YEAR_SHIFT) | (1 << DD_MONTH_SHIFT) + | (1 << DD_DAY_SHIFT); + /* Jan 1, 1980 */ + ldep->de_ADate = ldep->de_CDate; + ldep->de_MTime = ldep->de_CTime; + ldep->de_MDate = ldep->de_CDate; + /* leave the other fields as garbage */ + } else { + error = readep(pmp, dirclust, diroffset, &bp, &direntptr); + if (error) { + /* + * The denode does not contain anything useful, so + * it would be wrong to leave it on its hash chain. + * Arrange for vput() to just forget about it. + */ + ldep->de_Name[0] = SLOT_DELETED; + + vput(nvp); + *depp = NULL; + return (error); + } + DE_INTERNALIZE(ldep, direntptr); + brelse(bp); + } + + /* + * Fill in a few fields of the vnode and finish filling in the + * denode. Then return the address of the found denode. + */ + if (ldep->de_Attributes & ATTR_DIRECTORY) { + /* + * Since DOS directory entries that describe directories + * have 0 in the filesize field, we take this opportunity + * to find out the length of the directory and plug it into + * the denode structure. + */ + u_long size; + + nvp->v_type = VDIR; + if (ldep->de_StartCluster != MSDOSFSROOT) { + error = pcbmap(ldep, 0xffff, 0, &size, 0); + if (error == E2BIG) { + ldep->de_FileSize = de_cn2off(pmp, size); + error = 0; + } else + printf("deget(): pcbmap returned %d\n", error); + } + } else + nvp->v_type = VREG; + getmicrouptime(&tv); + SETHIGH(ldep->de_modrev, tv.tv_sec); + SETLOW(ldep->de_modrev, tv.tv_usec * 4294); + ldep->de_devvp = pmp->pm_devvp; + VREF(ldep->de_devvp); + *depp = ldep; + return (0); +} + +int +deupdat(dep, waitfor) + struct denode *dep; + int waitfor; +{ + int error; + struct buf *bp; + struct direntry *dirp; + struct timespec ts; + + if (DETOV(dep)->v_mount->mnt_flag & MNT_RDONLY) + return (0); + getnanotime(&ts); + DETIMES(dep, &ts, &ts, &ts); + if ((dep->de_flag & DE_MODIFIED) == 0) + return (0); + dep->de_flag &= ~DE_MODIFIED; + if (dep->de_Attributes & ATTR_DIRECTORY) + return (0); + if (dep->de_refcnt <= 0) + return (0); + error = readde(dep, &bp, &dirp); + if (error) + return (error); + DE_EXTERNALIZE(dirp, dep); + if (waitfor) + return (bwrite(bp)); + else { + bdwrite(bp); + return (0); + } +} + +/* + * Truncate the file described by dep to the length specified by length. + */ +int +detrunc(dep, length, flags, cred, p) + struct denode *dep; + u_long length; + int flags; + struct ucred *cred; + struct proc *p; +{ + int error; + int allerror; + u_long eofentry; + u_long chaintofree; + daddr_t bn; + int boff; + int isadir = dep->de_Attributes & ATTR_DIRECTORY; + struct buf *bp; + struct msdosfsmount *pmp = dep->de_pmp; + +#ifdef MSDOSFS_DEBUG + printf("detrunc(): file %s, length %lu, flags %x\n", dep->de_Name, length, flags); +#endif + + /* + * Disallow attempts to truncate the root directory since it is of + * fixed size. That's just the way dos filesystems are. We use + * the VROOT bit in the vnode because checking for the directory + * bit and a startcluster of 0 in the denode is not adequate to + * recognize the root directory at this point in a file or + * directory's life. + */ + if ((DETOV(dep)->v_flag & VROOT) && !FAT32(pmp)) { + printf("detrunc(): can't truncate root directory, clust %ld, offset %ld\n", + dep->de_dirclust, dep->de_diroffset); + return (EINVAL); + } + + + if (dep->de_FileSize < length) { + vnode_pager_setsize(DETOV(dep), length); + return deextend(dep, length, cred); + } + + /* + * If the desired length is 0 then remember the starting cluster of + * the file and set the StartCluster field in the directory entry + * to 0. If the desired length is not zero, then get the number of + * the last cluster in the shortened file. Then get the number of + * the first cluster in the part of the file that is to be freed. + * Then set the next cluster pointer in the last cluster of the + * file to CLUST_EOFE. + */ + if (length == 0) { + chaintofree = dep->de_StartCluster; + dep->de_StartCluster = 0; + eofentry = ~0; + } else { + error = pcbmap(dep, de_clcount(pmp, length) - 1, 0, + &eofentry, 0); + if (error) { +#ifdef MSDOSFS_DEBUG + printf("detrunc(): pcbmap fails %d\n", error); +#endif + return (error); + } + } + + fc_purge(dep, de_clcount(pmp, length)); + + /* + * If the new length is not a multiple of the cluster size then we + * must zero the tail end of the new last cluster in case it + * becomes part of the file again because of a seek. + */ + if ((boff = length & pmp->pm_crbomask) != 0) { + if (isadir) { + bn = cntobn(pmp, eofentry); + error = bread(pmp->pm_devvp, bn, pmp->pm_bpcluster, + NOCRED, &bp); + } else { + bn = de_blk(pmp, length); + error = bread(DETOV(dep), bn, pmp->pm_bpcluster, + NOCRED, &bp); + } + if (error) { + brelse(bp); +#ifdef MSDOSFS_DEBUG + printf("detrunc(): bread fails %d\n", error); +#endif + return (error); + } + /* + * is this the right place for it? + */ + bzero(bp->b_data + boff, pmp->pm_bpcluster - boff); + if (flags & IO_SYNC) + bwrite(bp); + else + bdwrite(bp); + } + + /* + * Write out the updated directory entry. Even if the update fails + * we free the trailing clusters. + */ + dep->de_FileSize = length; + if (!isadir) + dep->de_flag |= DE_UPDATE|DE_MODIFIED; + allerror = vtruncbuf(DETOV(dep), cred, p, length, pmp->pm_bpcluster); +#ifdef MSDOSFS_DEBUG + if (allerror) + printf("detrunc(): vtruncbuf error %d\n", allerror); +#endif + error = deupdat(dep, 1); + if (error && (allerror == 0)) + allerror = error; +#ifdef MSDOSFS_DEBUG + printf("detrunc(): allerror %d, eofentry %lu\n", + allerror, eofentry); +#endif + + /* + * If we need to break the cluster chain for the file then do it + * now. + */ + if (eofentry != ~0) { + error = fatentry(FAT_GET_AND_SET, pmp, eofentry, + &chaintofree, CLUST_EOFE); + if (error) { +#ifdef MSDOSFS_DEBUG + printf("detrunc(): fatentry errors %d\n", error); +#endif + return (error); + } + fc_setcache(dep, FC_LASTFC, de_cluster(pmp, length - 1), + eofentry); + } + + /* + * Now free the clusters removed from the file because of the + * truncation. + */ + if (chaintofree != 0 && !MSDOSFSEOF(pmp, chaintofree)) + freeclusterchain(pmp, chaintofree); + + return (allerror); +} + +/* + * Extend the file described by dep to length specified by length. + */ +int +deextend(dep, length, cred) + struct denode *dep; + u_long length; + struct ucred *cred; +{ + struct msdosfsmount *pmp = dep->de_pmp; + u_long count; + int error; + + /* + * The root of a DOS filesystem cannot be extended. + */ + if ((DETOV(dep)->v_flag & VROOT) && !FAT32(pmp)) + return (EINVAL); + + /* + * Directories cannot be extended. + */ + if (dep->de_Attributes & ATTR_DIRECTORY) + return (EISDIR); + + if (length <= dep->de_FileSize) + panic("deextend: file too large"); + + /* + * Compute the number of clusters to allocate. + */ + count = de_clcount(pmp, length) - de_clcount(pmp, dep->de_FileSize); + if (count > 0) { + if (count > pmp->pm_freeclustercount) + return (ENOSPC); + error = extendfile(dep, count, NULL, NULL, DE_CLEAR); + if (error) { + /* truncate the added clusters away again */ + (void) detrunc(dep, dep->de_FileSize, 0, cred, NULL); + return (error); + } + } + dep->de_FileSize = length; + dep->de_flag |= DE_UPDATE|DE_MODIFIED; + return (deupdat(dep, 1)); +} + +/* + * Move a denode to its correct hash queue after the file it represents has + * been moved to a new directory. + */ +void +reinsert(dep) + struct denode *dep; +{ + /* + * Fix up the denode cache. If the denode is for a directory, + * there is nothing to do since the hash is based on the starting + * cluster of the directory file and that hasn't changed. If for a + * file the hash is based on the location of the directory entry, + * so we must remove it from the cache and re-enter it with the + * hash based on the new location of the directory entry. + */ + if (dep->de_Attributes & ATTR_DIRECTORY) + return; + msdosfs_hashrem(dep); + msdosfs_hashins(dep); +} + +int +msdosfs_reclaim(ap) + struct vop_reclaim_args /* { + struct vnode *a_vp; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + struct denode *dep = VTODE(vp); + +#ifdef MSDOSFS_DEBUG + printf("msdosfs_reclaim(): dep %p, file %s, refcnt %ld\n", + dep, dep->de_Name, dep->de_refcnt); +#endif + + if (prtactive && vp->v_usecount != 0) + vprint("msdosfs_reclaim(): pushing active", vp); + /* + * Remove the denode from its hash chain. + */ + msdosfs_hashrem(dep); + /* + * Purge old data structures associated with the denode. + */ + cache_purge(vp); + if (dep->de_devvp) { + vrele(dep->de_devvp); + dep->de_devvp = 0; + } +#if 0 /* XXX */ + dep->de_flag = 0; +#endif + FREE(dep, M_MSDOSFSNODE); + vp->v_data = NULL; + + return (0); +} + +int +msdosfs_inactive(ap) + struct vop_inactive_args /* { + struct vnode *a_vp; + struct proc *a_p; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + struct denode *dep = VTODE(vp); + struct proc *p = ap->a_p; + int error = 0; + +#ifdef MSDOSFS_DEBUG + printf("msdosfs_inactive(): dep %p, de_Name[0] %x\n", dep, dep->de_Name[0]); +#endif + + if (prtactive && vp->v_usecount != 0) + vprint("msdosfs_inactive(): pushing active", vp); + + /* + * Ignore denodes related to stale file handles. + */ + if (dep->de_Name[0] == SLOT_DELETED) + goto out; + + /* + * If the file has been deleted and it is on a read/write + * filesystem, then truncate the file, and mark the directory slot + * as empty. (This may not be necessary for the dos filesystem.) + */ +#ifdef MSDOSFS_DEBUG + printf("msdosfs_inactive(): dep %p, refcnt %ld, mntflag %x, MNT_RDONLY %x\n", + dep, dep->de_refcnt, vp->v_mount->mnt_flag, MNT_RDONLY); +#endif + if (dep->de_refcnt <= 0 && (vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { + error = detrunc(dep, (u_long) 0, 0, NOCRED, p); + dep->de_flag |= DE_UPDATE; + dep->de_Name[0] = SLOT_DELETED; + } + deupdat(dep, 0); + +out: + VOP_UNLOCK(vp, 0, p); + /* + * If we are done with the denode, reclaim it + * so that it can be reused immediately. + */ +#ifdef MSDOSFS_DEBUG + printf("msdosfs_inactive(): v_usecount %d, de_Name[0] %x\n", vp->v_usecount, + dep->de_Name[0]); +#endif + if (dep->de_Name[0] == SLOT_DELETED) + vrecycle(vp, (struct simplelock *)0, p); + return (error); +} diff --git a/sys/fs/msdosfs/msdosfs_fat.c b/sys/fs/msdosfs/msdosfs_fat.c new file mode 100644 index 0000000..1ec29db --- /dev/null +++ b/sys/fs/msdosfs/msdosfs_fat.c @@ -0,0 +1,1100 @@ +/* $Id: msdosfs_fat.c,v 1.20 1998/04/06 11:39:04 phk Exp $ */ +/* $NetBSD: msdosfs_fat.c,v 1.28 1997/11/17 15:36:49 ws Exp $ */ + +/*- + * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank. + * Copyright (C) 1994, 1995, 1997 TooLs GmbH. + * All rights reserved. + * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below). + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by TooLs GmbH. + * 4. The name of TooLs GmbH may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* + * Written by Paul Popelka (paulp@uts.amdahl.com) + * + * You can do anything you want with this software, just don't say you wrote + * it, and don't remove this notice. + * + * This software is provided "as is". + * + * The author supplies this software to be publicly redistributed on the + * understanding that the author is not responsible for the correct + * functioning of this software in any circumstances and is not liable for + * any damages caused by this software. + * + * October 1992 + */ + +/* + * kernel include files. + */ +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/buf.h> +#include <sys/mount.h> /* to define statfs structure */ +#include <sys/vnode.h> /* to define vattr structure */ + +/* + * msdosfs include files. + */ +#include <msdosfs/bpb.h> +#include <msdosfs/msdosfsmount.h> +#include <msdosfs/direntry.h> +#include <msdosfs/denode.h> +#include <msdosfs/fat.h> + +/* + * Fat cache stats. + */ +static int fc_fileextends; /* # of file extends */ +static int fc_lfcempty; /* # of time last file cluster cache entry + * was empty */ +static int fc_bmapcalls; /* # of times pcbmap was called */ + +#define LMMAX 20 +static int fc_lmdistance[LMMAX];/* counters for how far off the last + * cluster mapped entry was. */ +static int fc_largedistance; /* off by more than LMMAX */ + +static int chainalloc __P((struct msdosfsmount *pmp, u_long start, + u_long count, u_long fillwith, + u_long *retcluster, u_long *got)); +static int chainlength __P((struct msdosfsmount *pmp, u_long start, + u_long count)); +static void fatblock __P((struct msdosfsmount *pmp, u_long ofs, + u_long *bnp, u_long *sizep, u_long *bop)); +static int fatchain __P((struct msdosfsmount *pmp, u_long start, + u_long count, u_long fillwith)); +static void fc_lookup __P((struct denode *dep, u_long findcn, + u_long *frcnp, u_long *fsrcnp)); +static void updatefats __P((struct msdosfsmount *pmp, struct buf *bp, + u_long fatbn)); +static __inline void + usemap_alloc __P((struct msdosfsmount *pmp, u_long cn)); +static __inline void + usemap_free __P((struct msdosfsmount *pmp, u_long cn)); + +static void +fatblock(pmp, ofs, bnp, sizep, bop) + struct msdosfsmount *pmp; + u_long ofs; + u_long *bnp; + u_long *sizep; + u_long *bop; +{ + u_long bn, size; + + bn = ofs / pmp->pm_fatblocksize * pmp->pm_fatblocksec; + size = min(pmp->pm_fatblocksec, pmp->pm_FATsecs - bn) + * pmp->pm_BytesPerSec; + bn += pmp->pm_fatblk + pmp->pm_curfat * pmp->pm_FATsecs; + + if (bnp) + *bnp = bn; + if (sizep) + *sizep = size; + if (bop) + *bop = ofs % pmp->pm_fatblocksize; +} + +/* + * Map the logical cluster number of a file into a physical disk sector + * that is filesystem relative. + * + * dep - address of denode representing the file of interest + * findcn - file relative cluster whose filesystem relative cluster number + * and/or block number are/is to be found + * bnp - address of where to place the file system relative block number. + * If this pointer is null then don't return this quantity. + * cnp - address of where to place the file system relative cluster number. + * If this pointer is null then don't return this quantity. + * + * NOTE: Either bnp or cnp must be non-null. + * This function has one side effect. If the requested file relative cluster + * is beyond the end of file, then the actual number of clusters in the file + * is returned in *cnp. This is useful for determining how long a directory is. + * If cnp is null, nothing is returned. + */ +int +pcbmap(dep, findcn, bnp, cnp, sp) + struct denode *dep; + u_long findcn; /* file relative cluster to get */ + daddr_t *bnp; /* returned filesys relative blk number */ + u_long *cnp; /* returned cluster number */ + int *sp; /* returned block size */ +{ + int error; + u_long i; + u_long cn; + u_long prevcn = 0; /* XXX: prevcn could be used unititialized */ + u_long byteoffset; + u_long bn; + u_long bo; + struct buf *bp = NULL; + u_long bp_bn = -1; + struct msdosfsmount *pmp = dep->de_pmp; + u_long bsize; + + fc_bmapcalls++; + + /* + * If they don't give us someplace to return a value then don't + * bother doing anything. + */ + if (bnp == NULL && cnp == NULL && sp == NULL) + return (0); + + cn = dep->de_StartCluster; + /* + * The "file" that makes up the root directory is contiguous, + * permanently allocated, of fixed size, and is not made up of + * clusters. If the cluster number is beyond the end of the root + * directory, then return the number of clusters in the file. + */ + if (cn == MSDOSFSROOT) { + if (dep->de_Attributes & ATTR_DIRECTORY) { + if (de_cn2off(pmp, findcn) >= dep->de_FileSize) { + if (cnp) + *cnp = de_bn2cn(pmp, pmp->pm_rootdirsize); + return (E2BIG); + } + if (bnp) + *bnp = pmp->pm_rootdirblk + de_cn2bn(pmp, findcn); + if (cnp) + *cnp = MSDOSFSROOT; + if (sp) + *sp = min(pmp->pm_bpcluster, + dep->de_FileSize - de_cn2off(pmp, findcn)); + return (0); + } else { /* just an empty file */ + if (cnp) + *cnp = 0; + return (E2BIG); + } + } + + /* + * All other files do I/O in cluster sized blocks + */ + if (sp) + *sp = pmp->pm_bpcluster; + + /* + * Rummage around in the fat cache, maybe we can avoid tromping + * thru every fat entry for the file. And, keep track of how far + * off the cache was from where we wanted to be. + */ + i = 0; + fc_lookup(dep, findcn, &i, &cn); + if ((bn = findcn - i) >= LMMAX) + fc_largedistance++; + else + fc_lmdistance[bn]++; + + /* + * Handle all other files or directories the normal way. + */ + for (; i < findcn; i++) { + /* + * Stop with all reserved clusters, not just with EOF. + */ + if ((cn | ~pmp->pm_fatmask) >= CLUST_RSRVD) + goto hiteof; + byteoffset = FATOFS(pmp, cn); + fatblock(pmp, byteoffset, &bn, &bsize, &bo); + if (bn != bp_bn) { + if (bp) + brelse(bp); + error = bread(pmp->pm_devvp, bn, bsize, NOCRED, &bp); + if (error) { + brelse(bp); + return (error); + } + bp_bn = bn; + } + prevcn = cn; + if (FAT32(pmp)) + cn = getulong(&bp->b_data[bo]); + else + cn = getushort(&bp->b_data[bo]); + if (FAT12(pmp) && (prevcn & 1)) + cn >>= 4; + cn &= pmp->pm_fatmask; + + /* + * Force the special cluster numbers + * to be the same for all cluster sizes + * to let the rest of msdosfs handle + * all cases the same. + */ + if ((cn | ~pmp->pm_fatmask) >= CLUST_RSRVD) + cn |= ~pmp->pm_fatmask; + } + + if (!MSDOSFSEOF(pmp, cn)) { + if (bp) + brelse(bp); + if (bnp) + *bnp = cntobn(pmp, cn); + if (cnp) + *cnp = cn; + fc_setcache(dep, FC_LASTMAP, i, cn); + return (0); + } + +hiteof:; + if (cnp) + *cnp = i; + if (bp) + brelse(bp); + /* update last file cluster entry in the fat cache */ + fc_setcache(dep, FC_LASTFC, i - 1, prevcn); + return (E2BIG); +} + +/* + * Find the closest entry in the fat cache to the cluster we are looking + * for. + */ +static void +fc_lookup(dep, findcn, frcnp, fsrcnp) + struct denode *dep; + u_long findcn; + u_long *frcnp; + u_long *fsrcnp; +{ + int i; + u_long cn; + struct fatcache *closest = 0; + + for (i = 0; i < FC_SIZE; i++) { + cn = dep->de_fc[i].fc_frcn; + if (cn != FCE_EMPTY && cn <= findcn) { + if (closest == 0 || cn > closest->fc_frcn) + closest = &dep->de_fc[i]; + } + } + if (closest) { + *frcnp = closest->fc_frcn; + *fsrcnp = closest->fc_fsrcn; + } +} + +/* + * Purge the fat cache in denode dep of all entries relating to file + * relative cluster frcn and beyond. + */ +void +fc_purge(dep, frcn) + struct denode *dep; + u_int frcn; +{ + int i; + struct fatcache *fcp; + + fcp = dep->de_fc; + for (i = 0; i < FC_SIZE; i++, fcp++) { + if (fcp->fc_frcn >= frcn) + fcp->fc_frcn = FCE_EMPTY; + } +} + +/* + * Update the fat. + * If mirroring the fat, update all copies, with the first copy as last. + * Else update only the current fat (ignoring the others). + * + * pmp - msdosfsmount structure for filesystem to update + * bp - addr of modified fat block + * fatbn - block number relative to begin of filesystem of the modified fat block. + */ +static void +updatefats(pmp, bp, fatbn) + struct msdosfsmount *pmp; + struct buf *bp; + u_long fatbn; +{ + int i; + struct buf *bpn; + +#ifdef MSDOSFS_DEBUG + printf("updatefats(pmp %p, bp %p, fatbn %lu)\n", pmp, bp, fatbn); +#endif + + /* + * If we have an FSInfo block, update it. + */ + if (pmp->pm_fsinfo) { + u_long cn = pmp->pm_nxtfree; + + if (pmp->pm_freeclustercount + && (pmp->pm_inusemap[cn / N_INUSEBITS] + & (1 << (cn % N_INUSEBITS)))) { + /* + * The cluster indicated in FSInfo isn't free + * any longer. Got get a new free one. + */ + for (cn = 0; cn < pmp->pm_maxcluster; cn += N_INUSEBITS) + if (pmp->pm_inusemap[cn / N_INUSEBITS] != (u_int)-1) + break; + pmp->pm_nxtfree = cn + + ffs(pmp->pm_inusemap[cn / N_INUSEBITS] + ^ (u_int)-1) - 1; + } + if (bread(pmp->pm_devvp, pmp->pm_fsinfo, 1024, NOCRED, &bpn) != 0) { + /* + * Ignore the error, but turn off FSInfo update for the future. + */ + pmp->pm_fsinfo = 0; + brelse(bpn); + } else { + struct fsinfo *fp = (struct fsinfo *)bpn->b_data; + + putulong(fp->fsinfree, pmp->pm_freeclustercount); + putulong(fp->fsinxtfree, pmp->pm_nxtfree); + if (pmp->pm_flags & MSDOSFSMNT_WAITONFAT) + bwrite(bpn); + else + bdwrite(bpn); + } + } + + if (pmp->pm_flags & MSDOSFS_FATMIRROR) { + /* + * Now copy the block(s) of the modified fat to the other copies of + * the fat and write them out. This is faster than reading in the + * other fats and then writing them back out. This could tie up + * the fat for quite a while. Preventing others from accessing it. + * To prevent us from going after the fat quite so much we use + * delayed writes, unless they specfied "synchronous" when the + * filesystem was mounted. If synch is asked for then use + * bwrite()'s and really slow things down. + */ + for (i = 1; i < pmp->pm_FATs; i++) { + fatbn += pmp->pm_FATsecs; + /* getblk() never fails */ + bpn = getblk(pmp->pm_devvp, fatbn, bp->b_bcount, 0, 0); + bcopy(bp->b_data, bpn->b_data, bp->b_bcount); + if (pmp->pm_flags & MSDOSFSMNT_WAITONFAT) + bwrite(bpn); + else + bdwrite(bpn); + } + } + + /* + * Write out the first (or current) fat last. + */ + if (pmp->pm_flags & MSDOSFSMNT_WAITONFAT) + bwrite(bp); + else + bdwrite(bp); + /* + * Maybe update fsinfo sector here? + */ +} + +/* + * Updating entries in 12 bit fats is a pain in the butt. + * + * The following picture shows where nibbles go when moving from a 12 bit + * cluster number into the appropriate bytes in the FAT. + * + * byte m byte m+1 byte m+2 + * +----+----+ +----+----+ +----+----+ + * | 0 1 | | 2 3 | | 4 5 | FAT bytes + * +----+----+ +----+----+ +----+----+ + * + * +----+----+----+ +----+----+----+ + * | 3 0 1 | | 4 5 2 | + * +----+----+----+ +----+----+----+ + * cluster n cluster n+1 + * + * Where n is even. m = n + (n >> 2) + * + */ +static __inline void +usemap_alloc(pmp, cn) + struct msdosfsmount *pmp; + u_long cn; +{ + + pmp->pm_inusemap[cn / N_INUSEBITS] |= 1 << (cn % N_INUSEBITS); + pmp->pm_freeclustercount--; +} + +static __inline void +usemap_free(pmp, cn) + struct msdosfsmount *pmp; + u_long cn; +{ + + pmp->pm_freeclustercount++; + pmp->pm_inusemap[cn / N_INUSEBITS] &= ~(1 << (cn % N_INUSEBITS)); +} + +int +clusterfree(pmp, cluster, oldcnp) + struct msdosfsmount *pmp; + u_long cluster; + u_long *oldcnp; +{ + int error; + u_long oldcn; + + usemap_free(pmp, cluster); + error = fatentry(FAT_GET_AND_SET, pmp, cluster, &oldcn, MSDOSFSFREE); + if (error) { + usemap_alloc(pmp, cluster); + return (error); + } + /* + * If the cluster was successfully marked free, then update + * the count of free clusters, and turn off the "allocated" + * bit in the "in use" cluster bit map. + */ + if (oldcnp) + *oldcnp = oldcn; + return (0); +} + +/* + * Get or Set or 'Get and Set' the cluster'th entry in the fat. + * + * function - whether to get or set a fat entry + * pmp - address of the msdosfsmount structure for the filesystem + * whose fat is to be manipulated. + * cn - which cluster is of interest + * oldcontents - address of a word that is to receive the contents of the + * cluster'th entry if this is a get function + * newcontents - the new value to be written into the cluster'th element of + * the fat if this is a set function. + * + * This function can also be used to free a cluster by setting the fat entry + * for a cluster to 0. + * + * All copies of the fat are updated if this is a set function. NOTE: If + * fatentry() marks a cluster as free it does not update the inusemap in + * the msdosfsmount structure. This is left to the caller. + */ +int +fatentry(function, pmp, cn, oldcontents, newcontents) + int function; + struct msdosfsmount *pmp; + u_long cn; + u_long *oldcontents; + u_long newcontents; +{ + int error; + u_long readcn; + u_long bn, bo, bsize, byteoffset; + struct buf *bp; + +#ifdef MSDOSFS_DEBUG + printf("fatentry(func %d, pmp %p, clust %lu, oldcon %p, newcon %lx)\n", + function, pmp, cn, oldcontents, newcontents); +#endif + +#ifdef DIAGNOSTIC + /* + * Be sure they asked us to do something. + */ + if ((function & (FAT_SET | FAT_GET)) == 0) { + printf("fatentry(): function code doesn't specify get or set\n"); + return (EINVAL); + } + + /* + * If they asked us to return a cluster number but didn't tell us + * where to put it, give them an error. + */ + if ((function & FAT_GET) && oldcontents == NULL) { + printf("fatentry(): get function with no place to put result\n"); + return (EINVAL); + } +#endif + + /* + * Be sure the requested cluster is in the filesystem. + */ + if (cn < CLUST_FIRST || cn > pmp->pm_maxcluster) + return (EINVAL); + + byteoffset = FATOFS(pmp, cn); + fatblock(pmp, byteoffset, &bn, &bsize, &bo); + error = bread(pmp->pm_devvp, bn, bsize, NOCRED, &bp); + if (error) { + brelse(bp); + return (error); + } + + if (function & FAT_GET) { + if (FAT32(pmp)) + readcn = getulong(&bp->b_data[bo]); + else + readcn = getushort(&bp->b_data[bo]); + if (FAT12(pmp) & (cn & 1)) + readcn >>= 4; + readcn &= pmp->pm_fatmask; + /* map reserved fat entries to same values for all fats */ + if ((readcn | ~pmp->pm_fatmask) >= CLUST_RSRVD) + readcn |= ~pmp->pm_fatmask; + *oldcontents = readcn; + } + if (function & FAT_SET) { + switch (pmp->pm_fatmask) { + case FAT12_MASK: + readcn = getushort(&bp->b_data[bo]); + if (cn & 1) { + readcn &= 0x000f; + readcn |= newcontents << 4; + } else { + readcn &= 0xf000; + readcn |= newcontents & 0xfff; + } + putushort(&bp->b_data[bo], readcn); + break; + case FAT16_MASK: + putushort(&bp->b_data[bo], newcontents); + break; + case FAT32_MASK: + /* + * According to spec we have to retain the + * high order bits of the fat entry. + */ + readcn = getulong(&bp->b_data[bo]); + readcn &= ~FAT32_MASK; + readcn |= newcontents & FAT32_MASK; + putulong(&bp->b_data[bo], readcn); + break; + } + updatefats(pmp, bp, bn); + bp = NULL; + pmp->pm_fmod = 1; + } + if (bp) + brelse(bp); + return (0); +} + +/* + * Update a contiguous cluster chain + * + * pmp - mount point + * start - first cluster of chain + * count - number of clusters in chain + * fillwith - what to write into fat entry of last cluster + */ +static int +fatchain(pmp, start, count, fillwith) + struct msdosfsmount *pmp; + u_long start; + u_long count; + u_long fillwith; +{ + int error; + u_long bn, bo, bsize, byteoffset, readcn, newc; + struct buf *bp; + +#ifdef MSDOSFS_DEBUG + printf("fatchain(pmp %p, start %lu, count %lu, fillwith %lx)\n", + pmp, start, count, fillwith); +#endif + /* + * Be sure the clusters are in the filesystem. + */ + if (start < CLUST_FIRST || start + count - 1 > pmp->pm_maxcluster) + return (EINVAL); + + while (count > 0) { + byteoffset = FATOFS(pmp, start); + fatblock(pmp, byteoffset, &bn, &bsize, &bo); + error = bread(pmp->pm_devvp, bn, bsize, NOCRED, &bp); + if (error) { + brelse(bp); + return (error); + } + while (count > 0) { + start++; + newc = --count > 0 ? start : fillwith; + switch (pmp->pm_fatmask) { + case FAT12_MASK: + readcn = getushort(&bp->b_data[bo]); + if (start & 1) { + readcn &= 0xf000; + readcn |= newc & 0xfff; + } else { + readcn &= 0x000f; + readcn |= newc << 4; + } + putushort(&bp->b_data[bo], readcn); + bo++; + if (!(start & 1)) + bo++; + break; + case FAT16_MASK: + putushort(&bp->b_data[bo], newc); + bo += 2; + break; + case FAT32_MASK: + readcn = getulong(&bp->b_data[bo]); + readcn &= ~pmp->pm_fatmask; + readcn |= newc & pmp->pm_fatmask; + putulong(&bp->b_data[bo], readcn); + bo += 4; + break; + } + if (bo >= bsize) + break; + } + updatefats(pmp, bp, bn); + } + pmp->pm_fmod = 1; + return (0); +} + +/* + * Check the length of a free cluster chain starting at start. + * + * pmp - mount point + * start - start of chain + * count - maximum interesting length + */ +static int +chainlength(pmp, start, count) + struct msdosfsmount *pmp; + u_long start; + u_long count; +{ + u_long idx, max_idx; + u_int map; + u_long len; + + max_idx = pmp->pm_maxcluster / N_INUSEBITS; + idx = start / N_INUSEBITS; + start %= N_INUSEBITS; + map = pmp->pm_inusemap[idx]; + map &= ~((1 << start) - 1); + if (map) { + len = ffs(map) - 1 - start; + return (len > count ? count : len); + } + len = N_INUSEBITS - start; + if (len >= count) + return (count); + while (++idx <= max_idx) { + if (len >= count) + break; + map = pmp->pm_inusemap[idx]; + if (map) { + len += ffs(map) - 1; + break; + } + len += N_INUSEBITS; + } + return (len > count ? count : len); +} + +/* + * Allocate contigous free clusters. + * + * pmp - mount point. + * start - start of cluster chain. + * count - number of clusters to allocate. + * fillwith - put this value into the fat entry for the + * last allocated cluster. + * retcluster - put the first allocated cluster's number here. + * got - how many clusters were actually allocated. + */ +static int +chainalloc(pmp, start, count, fillwith, retcluster, got) + struct msdosfsmount *pmp; + u_long start; + u_long count; + u_long fillwith; + u_long *retcluster; + u_long *got; +{ + int error; + u_long cl, n; + + for (cl = start, n = count; n-- > 0;) + usemap_alloc(pmp, cl++); + + error = fatchain(pmp, start, count, fillwith); + if (error != 0) + return (error); +#ifdef MSDOSFS_DEBUG + printf("clusteralloc(): allocated cluster chain at %lu (%lu clusters)\n", + start, count); +#endif + if (retcluster) + *retcluster = start; + if (got) + *got = count; + return (0); +} + +/* + * Allocate contiguous free clusters. + * + * pmp - mount point. + * start - preferred start of cluster chain. + * count - number of clusters requested. + * fillwith - put this value into the fat entry for the + * last allocated cluster. + * retcluster - put the first allocated cluster's number here. + * got - how many clusters were actually allocated. + */ +int +clusteralloc(pmp, start, count, fillwith, retcluster, got) + struct msdosfsmount *pmp; + u_long start; + u_long count; + u_long fillwith; + u_long *retcluster; + u_long *got; +{ + u_long idx; + u_long len, newst, foundl, cn, l; + u_long foundcn = 0; /* XXX: foundcn could be used unititialized */ + u_int map; + +#ifdef MSDOSFS_DEBUG + printf("clusteralloc(): find %lu clusters\n",count); +#endif + if (start) { + if ((len = chainlength(pmp, start, count)) >= count) + return (chainalloc(pmp, start, count, fillwith, retcluster, got)); + } else + len = 0; + + /* + * Start at a (pseudo) random place to maximize cluster runs + * under multiple writers. + */ + newst = random() % (pmp->pm_maxcluster + 1); + foundl = 0; + + for (cn = newst; cn <= pmp->pm_maxcluster;) { + idx = cn / N_INUSEBITS; + map = pmp->pm_inusemap[idx]; + map |= (1 << (cn % N_INUSEBITS)) - 1; + if (map != (u_int)-1) { + cn = idx * N_INUSEBITS + ffs(map^(u_int)-1) - 1; + if ((l = chainlength(pmp, cn, count)) >= count) + return (chainalloc(pmp, cn, count, fillwith, retcluster, got)); + if (l > foundl) { + foundcn = cn; + foundl = l; + } + cn += l + 1; + continue; + } + cn += N_INUSEBITS - cn % N_INUSEBITS; + } + for (cn = 0; cn < newst;) { + idx = cn / N_INUSEBITS; + map = pmp->pm_inusemap[idx]; + map |= (1 << (cn % N_INUSEBITS)) - 1; + if (map != (u_int)-1) { + cn = idx * N_INUSEBITS + ffs(map^(u_int)-1) - 1; + if ((l = chainlength(pmp, cn, count)) >= count) + return (chainalloc(pmp, cn, count, fillwith, retcluster, got)); + if (l > foundl) { + foundcn = cn; + foundl = l; + } + cn += l + 1; + continue; + } + cn += N_INUSEBITS - cn % N_INUSEBITS; + } + + if (!foundl) + return (ENOSPC); + + if (len) + return (chainalloc(pmp, start, len, fillwith, retcluster, got)); + else + return (chainalloc(pmp, foundcn, foundl, fillwith, retcluster, got)); +} + + +/* + * Free a chain of clusters. + * + * pmp - address of the msdosfs mount structure for the filesystem + * containing the cluster chain to be freed. + * startcluster - number of the 1st cluster in the chain of clusters to be + * freed. + */ +int +freeclusterchain(pmp, cluster) + struct msdosfsmount *pmp; + u_long cluster; +{ + int error; + struct buf *bp = NULL; + u_long bn, bo, bsize, byteoffset; + u_long readcn, lbn = -1; + + while (cluster >= CLUST_FIRST && cluster <= pmp->pm_maxcluster) { + byteoffset = FATOFS(pmp, cluster); + fatblock(pmp, byteoffset, &bn, &bsize, &bo); + if (lbn != bn) { + if (bp) + updatefats(pmp, bp, lbn); + error = bread(pmp->pm_devvp, bn, bsize, NOCRED, &bp); + if (error) { + brelse(bp); + return (error); + } + lbn = bn; + } + usemap_free(pmp, cluster); + switch (pmp->pm_fatmask) { + case FAT12_MASK: + readcn = getushort(&bp->b_data[bo]); + if (cluster & 1) { + cluster = readcn >> 4; + readcn &= 0x000f; + readcn |= MSDOSFSFREE << 4; + } else { + cluster = readcn; + readcn &= 0xf000; + readcn |= MSDOSFSFREE & 0xfff; + } + putushort(&bp->b_data[bo], readcn); + break; + case FAT16_MASK: + cluster = getushort(&bp->b_data[bo]); + putushort(&bp->b_data[bo], MSDOSFSFREE); + break; + case FAT32_MASK: + cluster = getulong(&bp->b_data[bo]); + putulong(&bp->b_data[bo], + (MSDOSFSFREE & FAT32_MASK) | (cluster & ~FAT32_MASK)); + break; + } + cluster &= pmp->pm_fatmask; + if ((cluster | ~pmp->pm_fatmask) >= CLUST_RSRVD) + cluster |= pmp->pm_fatmask; + } + if (bp) + updatefats(pmp, bp, bn); + return (0); +} + +/* + * Read in fat blocks looking for free clusters. For every free cluster + * found turn off its corresponding bit in the pm_inusemap. + */ +int +fillinusemap(pmp) + struct msdosfsmount *pmp; +{ + struct buf *bp = NULL; + u_long cn, readcn; + int error; + u_long bn, bo, bsize, byteoffset; + + /* + * Mark all clusters in use, we mark the free ones in the fat scan + * loop further down. + */ + for (cn = 0; cn < (pmp->pm_maxcluster + N_INUSEBITS) / N_INUSEBITS; cn++) + pmp->pm_inusemap[cn] = (u_int)-1; + + /* + * Figure how many free clusters are in the filesystem by ripping + * through the fat counting the number of entries whose content is + * zero. These represent free clusters. + */ + pmp->pm_freeclustercount = 0; + for (cn = CLUST_FIRST; cn <= pmp->pm_maxcluster; cn++) { + byteoffset = FATOFS(pmp, cn); + bo = byteoffset % pmp->pm_fatblocksize; + if (!bo || !bp) { + /* Read new FAT block */ + if (bp) + brelse(bp); + fatblock(pmp, byteoffset, &bn, &bsize, NULL); + error = bread(pmp->pm_devvp, bn, bsize, NOCRED, &bp); + if (error) { + brelse(bp); + return (error); + } + } + if (FAT32(pmp)) + readcn = getulong(&bp->b_data[bo]); + else + readcn = getushort(&bp->b_data[bo]); + if (FAT12(pmp) && (cn & 1)) + readcn >>= 4; + readcn &= pmp->pm_fatmask; + + if (readcn == 0) + usemap_free(pmp, cn); + } + brelse(bp); + return (0); +} + +/* + * Allocate a new cluster and chain it onto the end of the file. + * + * dep - the file to extend + * count - number of clusters to allocate + * bpp - where to return the address of the buf header for the first new + * file block + * ncp - where to put cluster number of the first newly allocated cluster + * If this pointer is 0, do not return the cluster number. + * flags - see fat.h + * + * NOTE: This function is not responsible for turning on the DE_UPDATE bit of + * the de_flag field of the denode and it does not change the de_FileSize + * field. This is left for the caller to do. + */ +int +extendfile(dep, count, bpp, ncp, flags) + struct denode *dep; + u_long count; + struct buf **bpp; + u_long *ncp; + int flags; +{ + int error; + u_long frcn; + u_long cn, got; + struct msdosfsmount *pmp = dep->de_pmp; + struct buf *bp; + + /* + * Don't try to extend the root directory + */ + if (dep->de_StartCluster == MSDOSFSROOT + && (dep->de_Attributes & ATTR_DIRECTORY)) { + printf("extendfile(): attempt to extend root directory\n"); + return (ENOSPC); + } + + /* + * If the "file's last cluster" cache entry is empty, and the file + * is not empty, then fill the cache entry by calling pcbmap(). + */ + fc_fileextends++; + if (dep->de_fc[FC_LASTFC].fc_frcn == FCE_EMPTY && + dep->de_StartCluster != 0) { + fc_lfcempty++; + error = pcbmap(dep, 0xffff, 0, &cn, 0); + /* we expect it to return E2BIG */ + if (error != E2BIG) + return (error); + } + + while (count > 0) { + /* + * Allocate a new cluster chain and cat onto the end of the + * file. * If the file is empty we make de_StartCluster point + * to the new block. Note that de_StartCluster being 0 is + * sufficient to be sure the file is empty since we exclude + * attempts to extend the root directory above, and the root + * dir is the only file with a startcluster of 0 that has + * blocks allocated (sort of). + */ + if (dep->de_StartCluster == 0) + cn = 0; + else + cn = dep->de_fc[FC_LASTFC].fc_fsrcn + 1; + error = clusteralloc(pmp, cn, count, CLUST_EOFE, &cn, &got); + if (error) + return (error); + + count -= got; + + /* + * Give them the filesystem relative cluster number if they want + * it. + */ + if (ncp) { + *ncp = cn; + ncp = NULL; + } + + if (dep->de_StartCluster == 0) { + dep->de_StartCluster = cn; + frcn = 0; + } else { + error = fatentry(FAT_SET, pmp, + dep->de_fc[FC_LASTFC].fc_fsrcn, + 0, cn); + if (error) { + clusterfree(pmp, cn, NULL); + return (error); + } + frcn = dep->de_fc[FC_LASTFC].fc_frcn + 1; + } + + /* + * Update the "last cluster of the file" entry in the denode's fat + * cache. + */ + fc_setcache(dep, FC_LASTFC, frcn + got - 1, cn + got - 1); + + if (flags & DE_CLEAR) { + while (got-- > 0) { + /* + * Get the buf header for the new block of the file. + */ + if (dep->de_Attributes & ATTR_DIRECTORY) + bp = getblk(pmp->pm_devvp, cntobn(pmp, cn++), + pmp->pm_bpcluster, 0, 0); + else { + bp = getblk(DETOV(dep), de_cn2bn(pmp, frcn++), + pmp->pm_bpcluster, 0, 0); + /* + * Do the bmap now, as in msdosfs_write + */ + if (pcbmap(dep, + de_bn2cn(pmp, bp->b_lblkno), + &bp->b_blkno, 0, 0)) + bp->b_blkno = -1; + if (bp->b_blkno == -1) + panic("extendfile: pcbmap"); + } + clrbuf(bp); + if (bpp) { + *bpp = bp; + bpp = NULL; + } else + bdwrite(bp); + } + } + } + + return (0); +} diff --git a/sys/fs/msdosfs/msdosfs_lookup.c b/sys/fs/msdosfs/msdosfs_lookup.c new file mode 100644 index 0000000..87de1f2 --- /dev/null +++ b/sys/fs/msdosfs/msdosfs_lookup.c @@ -0,0 +1,1085 @@ +/* $Id: msdosfs_lookup.c,v 1.27 1998/12/07 21:58:35 archie Exp $ */ +/* $NetBSD: msdosfs_lookup.c,v 1.37 1997/11/17 15:36:54 ws Exp $ */ + +/*- + * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank. + * Copyright (C) 1994, 1995, 1997 TooLs GmbH. + * All rights reserved. + * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below). + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by TooLs GmbH. + * 4. The name of TooLs GmbH may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* + * Written by Paul Popelka (paulp@uts.amdahl.com) + * + * You can do anything you want with this software, just don't say you wrote + * it, and don't remove this notice. + * + * This software is provided "as is". + * + * The author supplies this software to be publicly redistributed on the + * understanding that the author is not responsible for the correct + * functioning of this software in any circumstances and is not liable for + * any damages caused by this software. + * + * October 1992 + */ + +#include <sys/param.h> +#include <sys/namei.h> +#include <sys/buf.h> +#include <sys/vnode.h> +#include <sys/mount.h> +#include <sys/systm.h> + +#include <msdosfs/bpb.h> +#include <msdosfs/direntry.h> +#include <msdosfs/denode.h> +#include <msdosfs/msdosfsmount.h> +#include <msdosfs/fat.h> + +/* + * When we search a directory the blocks containing directory entries are + * read and examined. The directory entries contain information that would + * normally be in the inode of a unix filesystem. This means that some of + * a directory's contents may also be in memory resident denodes (sort of + * an inode). This can cause problems if we are searching while some other + * process is modifying a directory. To prevent one process from accessing + * incompletely modified directory information we depend upon being the + * sole owner of a directory block. bread/brelse provide this service. + * This being the case, when a process modifies a directory it must first + * acquire the disk block that contains the directory entry to be modified. + * Then update the disk block and the denode, and then write the disk block + * out to disk. This way disk blocks containing directory entries and in + * memory denode's will be in synch. + */ +int +msdosfs_lookup(ap) + struct vop_cachedlookup_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + } */ *ap; +{ + struct vnode *vdp = ap->a_dvp; + struct vnode **vpp = ap->a_vpp; + struct componentname *cnp = ap->a_cnp; + daddr_t bn; + int error; + int lockparent; + int wantparent; + int slotcount; + int slotoffset = 0; + int frcn; + u_long cluster; + int blkoff; + int diroff; + int blsize; + int isadir; /* ~0 if found direntry is a directory */ + u_long scn; /* starting cluster number */ + struct vnode *pdp; + struct denode *dp; + struct denode *tdp; + struct msdosfsmount *pmp; + struct buf *bp = 0; + struct direntry *dep = NULL; + u_char dosfilename[12]; + int flags = cnp->cn_flags; + int nameiop = cnp->cn_nameiop; + struct proc *p = cnp->cn_proc; + int unlen; + + int wincnt = 1; + int chksum = -1; + int olddos = 1; + +#ifdef MSDOSFS_DEBUG + printf("msdosfs_lookup(): looking for %s\n", cnp->cn_nameptr); +#endif + dp = VTODE(vdp); + pmp = dp->de_pmp; + *vpp = NULL; + lockparent = flags & LOCKPARENT; + wantparent = flags & (LOCKPARENT | WANTPARENT); +#ifdef MSDOSFS_DEBUG + printf("msdosfs_lookup(): vdp %p, dp %p, Attr %02x\n", + vdp, dp, dp->de_Attributes); +#endif + + /* + * If they are going after the . or .. entry in the root directory, + * they won't find it. DOS filesystems don't have them in the root + * directory. So, we fake it. deget() is in on this scam too. + */ + if ((vdp->v_flag & VROOT) && cnp->cn_nameptr[0] == '.' && + (cnp->cn_namelen == 1 || + (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.'))) { + isadir = ATTR_DIRECTORY; + scn = MSDOSFSROOT; +#ifdef MSDOSFS_DEBUG + printf("msdosfs_lookup(): looking for . or .. in root directory\n"); +#endif + cluster = MSDOSFSROOT; + blkoff = MSDOSFSROOT_OFS; + goto foundroot; + } + + switch (unix2dosfn((const u_char *)cnp->cn_nameptr, dosfilename, + cnp->cn_namelen, 0, + pmp->pm_flags & MSDOSFSMNT_U2WTABLE, pmp->pm_u2d, + pmp->pm_flags & MSDOSFSMNT_ULTABLE, pmp->pm_lu)) { + case 0: + return (EINVAL); + case 1: + break; + case 2: + wincnt = winSlotCnt((const u_char *)cnp->cn_nameptr, + cnp->cn_namelen) + 1; + break; + case 3: + olddos = 0; + wincnt = winSlotCnt((const u_char *)cnp->cn_nameptr, + cnp->cn_namelen) + 1; + break; + } + if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME) { + wincnt = 1; + olddos = 1; + } + unlen = winLenFixup(cnp->cn_nameptr, cnp->cn_namelen); + + /* + * Suppress search for slots unless creating + * file and at end of pathname, in which case + * we watch for a place to put the new file in + * case it doesn't already exist. + */ + slotcount = wincnt; + if ((nameiop == CREATE || nameiop == RENAME) && + (flags & ISLASTCN)) + slotcount = 0; + +#ifdef MSDOSFS_DEBUG + printf("msdosfs_lookup(): dos version of filename %s, length %ld\n", + dosfilename, cnp->cn_namelen); +#endif + /* + * Search the directory pointed at by vdp for the name pointed at + * by cnp->cn_nameptr. + */ + tdp = NULL; + /* + * The outer loop ranges over the clusters that make up the + * directory. Note that the root directory is different from all + * other directories. It has a fixed number of blocks that are not + * part of the pool of allocatable clusters. So, we treat it a + * little differently. The root directory starts at "cluster" 0. + */ + diroff = 0; + for (frcn = 0;; frcn++) { + error = pcbmap(dp, frcn, &bn, &cluster, &blsize); + if (error) { + if (error == E2BIG) + break; + return (error); + } + error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp); + if (error) { + brelse(bp); + return (error); + } + for (blkoff = 0; blkoff < blsize; + blkoff += sizeof(struct direntry), + diroff += sizeof(struct direntry)) { + dep = (struct direntry *)(bp->b_data + blkoff); + /* + * If the slot is empty and we are still looking + * for an empty then remember this one. If the + * slot is not empty then check to see if it + * matches what we are looking for. If the slot + * has never been filled with anything, then the + * remainder of the directory has never been used, + * so there is no point in searching it. + */ + if (dep->deName[0] == SLOT_EMPTY || + dep->deName[0] == SLOT_DELETED) { + /* + * Drop memory of previous long matches + */ + chksum = -1; + + if (slotcount < wincnt) { + slotcount++; + slotoffset = diroff; + } + if (dep->deName[0] == SLOT_EMPTY) { + brelse(bp); + goto notfound; + } + } else { + /* + * If there wasn't enough space for our winentries, + * forget about the empty space + */ + if (slotcount < wincnt) + slotcount = 0; + + /* + * Check for Win95 long filename entry + */ + if (dep->deAttributes == ATTR_WIN95) { + if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME) + continue; + + chksum = winChkName((const u_char *)cnp->cn_nameptr, + unlen, + (struct winentry *)dep, + chksum, + pmp->pm_flags & MSDOSFSMNT_U2WTABLE, + pmp->pm_u2w, + pmp->pm_flags & MSDOSFSMNT_ULTABLE, + pmp->pm_ul); + continue; + } + + /* + * Ignore volume labels (anywhere, not just + * the root directory). + */ + if (dep->deAttributes & ATTR_VOLUME) { + chksum = -1; + continue; + } + + /* + * Check for a checksum or name match + */ + if (chksum != winChksum(dep->deName) + && (!olddos || bcmp(dosfilename, dep->deName, 11))) { + chksum = -1; + continue; + } +#ifdef MSDOSFS_DEBUG + printf("msdosfs_lookup(): match blkoff %d, diroff %d\n", + blkoff, diroff); +#endif + /* + * Remember where this directory + * entry came from for whoever did + * this lookup. + */ + dp->de_fndoffset = diroff; + dp->de_fndcnt = wincnt - 1; + + goto found; + } + } /* for (blkoff = 0; .... */ + /* + * Release the buffer holding the directory cluster just + * searched. + */ + brelse(bp); + } /* for (frcn = 0; ; frcn++) */ + +notfound: + /* + * We hold no disk buffers at this point. + */ + + /* + * Fixup the slot description to point to the place where + * we might put the new DOS direntry (putting the Win95 + * long name entries before that) + */ + if (!slotcount) { + slotcount = 1; + slotoffset = diroff; + } + if (wincnt > slotcount) + slotoffset += sizeof(struct direntry) * (wincnt - slotcount); + + /* + * If we get here we didn't find the entry we were looking for. But + * that's ok if we are creating or renaming and are at the end of + * the pathname and the directory hasn't been removed. + */ +#ifdef MSDOSFS_DEBUG + printf("msdosfs_lookup(): op %d, refcnt %ld\n", + nameiop, dp->de_refcnt); + printf(" slotcount %d, slotoffset %d\n", + slotcount, slotoffset); +#endif + if ((nameiop == CREATE || nameiop == RENAME) && + (flags & ISLASTCN) && dp->de_refcnt != 0) { + /* + * Access for write is interpreted as allowing + * creation of files in the directory. + */ + error = VOP_ACCESS(vdp, VWRITE, cnp->cn_cred, cnp->cn_proc); + if (error) + return (error); + /* + * Return an indication of where the new directory + * entry should be put. + */ + dp->de_fndoffset = slotoffset; + dp->de_fndcnt = wincnt - 1; + + /* + * We return with the directory locked, so that + * the parameters we set up above will still be + * valid if we actually decide to do a direnter(). + * We return ni_vp == NULL to indicate that the entry + * does not currently exist; we leave a pointer to + * the (locked) directory inode in ndp->ni_dvp. + * The pathname buffer is saved so that the name + * can be obtained later. + * + * NB - if the directory is unlocked, then this + * information cannot be used. + */ + cnp->cn_flags |= SAVENAME; + if (!lockparent) + VOP_UNLOCK(vdp, 0, p); + return (EJUSTRETURN); + } + /* + * Insert name into cache (as non-existent) if appropriate. + */ + if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE) + cache_enter(vdp, *vpp, cnp); + return (ENOENT); + +found: + /* + * NOTE: We still have the buffer with matched directory entry at + * this point. + */ + isadir = dep->deAttributes & ATTR_DIRECTORY; + scn = getushort(dep->deStartCluster); + if (FAT32(pmp)) { + scn |= getushort(dep->deHighClust) << 16; + if (scn == pmp->pm_rootdirblk) { + /* + * There should actually be 0 here. + * Just ignore the error. + */ + scn = MSDOSFSROOT; + } + } + + if (isadir) { + cluster = scn; + if (cluster == MSDOSFSROOT) + blkoff = MSDOSFSROOT_OFS; + else + blkoff = 0; + } else if (cluster == MSDOSFSROOT) + blkoff = diroff; + + /* + * Now release buf to allow deget to read the entry again. + * Reserving it here and giving it to deget could result + * in a deadlock. + */ + brelse(bp); + bp = 0; + +foundroot: + /* + * If we entered at foundroot, then we are looking for the . or .. + * entry of the filesystems root directory. isadir and scn were + * setup before jumping here. And, bp is already null. + */ + if (FAT32(pmp) && scn == MSDOSFSROOT) + scn = pmp->pm_rootdirblk; + + /* + * If deleting, and at end of pathname, return + * parameters which can be used to remove file. + * If the wantparent flag isn't set, we return only + * the directory (in ndp->ni_dvp), otherwise we go + * on and lock the inode, being careful with ".". + */ + if (nameiop == DELETE && (flags & ISLASTCN)) { + /* + * Don't allow deleting the root. + */ + if (blkoff == MSDOSFSROOT_OFS) + return EROFS; /* really? XXX */ + + /* + * Write access to directory required to delete files. + */ + error = VOP_ACCESS(vdp, VWRITE, cnp->cn_cred, cnp->cn_proc); + if (error) + return (error); + + /* + * Return pointer to current entry in dp->i_offset. + * Save directory inode pointer in ndp->ni_dvp for dirremove(). + */ + if (dp->de_StartCluster == scn && isadir) { /* "." */ + VREF(vdp); + *vpp = vdp; + return (0); + } + error = deget(pmp, cluster, blkoff, &tdp); + if (error) + return (error); + *vpp = DETOV(tdp); + if (!lockparent) + VOP_UNLOCK(vdp, 0, p); + return (0); + } + + /* + * If rewriting (RENAME), return the inode and the + * information required to rewrite the present directory + * Must get inode of directory entry to verify it's a + * regular file, or empty directory. + */ + if (nameiop == RENAME && wantparent && + (flags & ISLASTCN)) { + if (blkoff == MSDOSFSROOT_OFS) + return EROFS; /* really? XXX */ + + error = VOP_ACCESS(vdp, VWRITE, cnp->cn_cred, cnp->cn_proc); + if (error) + return (error); + + /* + * Careful about locking second inode. + * This can only occur if the target is ".". + */ + if (dp->de_StartCluster == scn && isadir) + return (EISDIR); + + if ((error = deget(pmp, cluster, blkoff, &tdp)) != 0) + return (error); + *vpp = DETOV(tdp); + cnp->cn_flags |= SAVENAME; + if (!lockparent) + VOP_UNLOCK(vdp, 0, p); + return (0); + } + + /* + * Step through the translation in the name. We do not `vput' the + * directory because we may need it again if a symbolic link + * is relative to the current directory. Instead we save it + * unlocked as "pdp". We must get the target inode before unlocking + * the directory to insure that the inode will not be removed + * before we get it. We prevent deadlock by always fetching + * inodes from the root, moving down the directory tree. Thus + * when following backward pointers ".." we must unlock the + * parent directory before getting the requested directory. + * There is a potential race condition here if both the current + * and parent directories are removed before the VFS_VGET for the + * inode associated with ".." returns. We hope that this occurs + * infrequently since we cannot avoid this race condition without + * implementing a sophisticated deadlock detection algorithm. + * Note also that this simple deadlock detection scheme will not + * work if the file system has any hard links other than ".." + * that point backwards in the directory structure. + */ + pdp = vdp; + if (flags & ISDOTDOT) { + VOP_UNLOCK(pdp, 0, p); + error = deget(pmp, cluster, blkoff, &tdp); + if (error) { + vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, p); + return (error); + } + if (lockparent && (flags & ISLASTCN) && + (error = vn_lock(pdp, LK_EXCLUSIVE, p))) { + vput(DETOV(tdp)); + return (error); + } + *vpp = DETOV(tdp); + } else if (dp->de_StartCluster == scn && isadir) { + VREF(vdp); /* we want ourself, ie "." */ + *vpp = vdp; + } else { + if ((error = deget(pmp, cluster, blkoff, &tdp)) != 0) + return (error); + if (!lockparent || !(flags & ISLASTCN)) + VOP_UNLOCK(pdp, 0, p); + *vpp = DETOV(tdp); + } + + /* + * Insert name into cache if appropriate. + */ + if (cnp->cn_flags & MAKEENTRY) + cache_enter(vdp, *vpp, cnp); + return (0); +} + +/* + * dep - directory entry to copy into the directory + * ddep - directory to add to + * depp - return the address of the denode for the created directory entry + * if depp != 0 + * cnp - componentname needed for Win95 long filenames + */ +int +createde(dep, ddep, depp, cnp) + struct denode *dep; + struct denode *ddep; + struct denode **depp; + struct componentname *cnp; +{ + int error; + u_long dirclust, diroffset; + struct direntry *ndep; + struct msdosfsmount *pmp = ddep->de_pmp; + struct buf *bp; + daddr_t bn; + int blsize; + +#ifdef MSDOSFS_DEBUG + printf("createde(dep %p, ddep %p, depp %p, cnp %p)\n", + dep, ddep, depp, cnp); +#endif + + /* + * If no space left in the directory then allocate another cluster + * and chain it onto the end of the file. There is one exception + * to this. That is, if the root directory has no more space it + * can NOT be expanded. extendfile() checks for and fails attempts + * to extend the root directory. We just return an error in that + * case. + */ + if (ddep->de_fndoffset >= ddep->de_FileSize) { + diroffset = ddep->de_fndoffset + sizeof(struct direntry) + - ddep->de_FileSize; + dirclust = de_clcount(pmp, diroffset); + error = extendfile(ddep, dirclust, 0, 0, DE_CLEAR); + if (error) { + (void)detrunc(ddep, ddep->de_FileSize, 0, NOCRED, NULL); + return error; + } + + /* + * Update the size of the directory + */ + ddep->de_FileSize += de_cn2off(pmp, dirclust); + } + + /* + * We just read in the cluster with space. Copy the new directory + * entry in. Then write it to disk. NOTE: DOS directories + * do not get smaller as clusters are emptied. + */ + error = pcbmap(ddep, de_cluster(pmp, ddep->de_fndoffset), + &bn, &dirclust, &blsize); + if (error) + return error; + diroffset = ddep->de_fndoffset; + if (dirclust != MSDOSFSROOT) + diroffset &= pmp->pm_crbomask; + if ((error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp)) != 0) { + brelse(bp); + return error; + } + ndep = bptoep(pmp, bp, ddep->de_fndoffset); + + DE_EXTERNALIZE(ndep, dep); + + /* + * Now write the Win95 long name + */ + if (ddep->de_fndcnt > 0) { + u_int8_t chksum = winChksum(ndep->deName); + const u_char *un = (const u_char *)cnp->cn_nameptr; + int unlen = cnp->cn_namelen; + int cnt = 1; + + while (--ddep->de_fndcnt >= 0) { + if (!(ddep->de_fndoffset & pmp->pm_crbomask)) { + if ((error = bwrite(bp)) != 0) + return error; + + ddep->de_fndoffset -= sizeof(struct direntry); + error = pcbmap(ddep, + de_cluster(pmp, + ddep->de_fndoffset), + &bn, 0, &blsize); + if (error) + return error; + + error = bread(pmp->pm_devvp, bn, blsize, + NOCRED, &bp); + if (error) { + brelse(bp); + return error; + } + ndep = bptoep(pmp, bp, ddep->de_fndoffset); + } else { + ndep--; + ddep->de_fndoffset -= sizeof(struct direntry); + } + if (!unix2winfn(un, unlen, (struct winentry *)ndep, + cnt++, chksum, + pmp->pm_flags & MSDOSFSMNT_U2WTABLE, + pmp->pm_u2w)) + break; + } + } + + if ((error = bwrite(bp)) != 0) + return error; + + /* + * If they want us to return with the denode gotten. + */ + if (depp) { + if (dep->de_Attributes & ATTR_DIRECTORY) { + dirclust = dep->de_StartCluster; + if (FAT32(pmp) && dirclust == pmp->pm_rootdirblk) + dirclust = MSDOSFSROOT; + if (dirclust == MSDOSFSROOT) + diroffset = MSDOSFSROOT_OFS; + else + diroffset = 0; + } + return deget(pmp, dirclust, diroffset, depp); + } + + return 0; +} + +/* + * Be sure a directory is empty except for "." and "..". Return 1 if empty, + * return 0 if not empty or error. + */ +int +dosdirempty(dep) + struct denode *dep; +{ + int blsize; + int error; + u_long cn; + daddr_t bn; + struct buf *bp; + struct msdosfsmount *pmp = dep->de_pmp; + struct direntry *dentp; + + /* + * Since the filesize field in directory entries for a directory is + * zero, we just have to feel our way through the directory until + * we hit end of file. + */ + for (cn = 0;; cn++) { + if ((error = pcbmap(dep, cn, &bn, 0, &blsize)) != 0) { + if (error == E2BIG) + return (1); /* it's empty */ + return (0); + } + error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp); + if (error) { + brelse(bp); + return (0); + } + for (dentp = (struct direntry *)bp->b_data; + (char *)dentp < bp->b_data + blsize; + dentp++) { + if (dentp->deName[0] != SLOT_DELETED && + (dentp->deAttributes & ATTR_VOLUME) == 0) { + /* + * In dos directories an entry whose name + * starts with SLOT_EMPTY (0) starts the + * beginning of the unused part of the + * directory, so we can just return that it + * is empty. + */ + if (dentp->deName[0] == SLOT_EMPTY) { + brelse(bp); + return (1); + } + /* + * Any names other than "." and ".." in a + * directory mean it is not empty. + */ + if (bcmp(dentp->deName, ". ", 11) && + bcmp(dentp->deName, ".. ", 11)) { + brelse(bp); +#ifdef MSDOSFS_DEBUG + printf("dosdirempty(): entry found %02x, %02x\n", + dentp->deName[0], dentp->deName[1]); +#endif + return (0); /* not empty */ + } + } + } + brelse(bp); + } + /* NOTREACHED */ +} + +/* + * Check to see if the directory described by target is in some + * subdirectory of source. This prevents something like the following from + * succeeding and leaving a bunch or files and directories orphaned. mv + * /a/b/c /a/b/c/d/e/f Where c and f are directories. + * + * source - the inode for /a/b/c + * target - the inode for /a/b/c/d/e/f + * + * Returns 0 if target is NOT a subdirectory of source. + * Otherwise returns a non-zero error number. + * The target inode is always unlocked on return. + */ +int +doscheckpath(source, target) + struct denode *source; + struct denode *target; +{ + daddr_t scn; + struct msdosfsmount *pmp; + struct direntry *ep; + struct denode *dep; + struct buf *bp = NULL; + int error = 0; + + dep = target; + if ((target->de_Attributes & ATTR_DIRECTORY) == 0 || + (source->de_Attributes & ATTR_DIRECTORY) == 0) { + error = ENOTDIR; + goto out; + } + if (dep->de_StartCluster == source->de_StartCluster) { + error = EEXIST; + goto out; + } + if (dep->de_StartCluster == MSDOSFSROOT) + goto out; + pmp = dep->de_pmp; +#ifdef DIAGNOSTIC + if (pmp != source->de_pmp) + panic("doscheckpath: source and target on different filesystems"); +#endif + if (FAT32(pmp) && dep->de_StartCluster == pmp->pm_rootdirblk) + goto out; + + for (;;) { + if ((dep->de_Attributes & ATTR_DIRECTORY) == 0) { + error = ENOTDIR; + break; + } + scn = dep->de_StartCluster; + error = bread(pmp->pm_devvp, cntobn(pmp, scn), + pmp->pm_bpcluster, NOCRED, &bp); + if (error) + break; + + ep = (struct direntry *) bp->b_data + 1; + if ((ep->deAttributes & ATTR_DIRECTORY) == 0 || + bcmp(ep->deName, ".. ", 11) != 0) { + error = ENOTDIR; + break; + } + scn = getushort(ep->deStartCluster); + if (FAT32(pmp)) + scn |= getushort(ep->deHighClust) << 16; + + if (scn == source->de_StartCluster) { + error = EINVAL; + break; + } + if (scn == MSDOSFSROOT) + break; + if (FAT32(pmp) && scn == pmp->pm_rootdirblk) { + /* + * scn should be 0 in this case, + * but we silently ignore the error. + */ + break; + } + + vput(DETOV(dep)); + brelse(bp); + bp = NULL; + /* NOTE: deget() clears dep on error */ + if ((error = deget(pmp, scn, 0, &dep)) != 0) + break; + } +out:; + if (bp) + brelse(bp); + if (error == ENOTDIR) + printf("doscheckpath(): .. not a directory?\n"); + if (dep != NULL) + vput(DETOV(dep)); + return (error); +} + +/* + * Read in the disk block containing the directory entry (dirclu, dirofs) + * and return the address of the buf header, and the address of the + * directory entry within the block. + */ +int +readep(pmp, dirclust, diroffset, bpp, epp) + struct msdosfsmount *pmp; + u_long dirclust, diroffset; + struct buf **bpp; + struct direntry **epp; +{ + int error; + daddr_t bn; + int blsize; + + blsize = pmp->pm_bpcluster; + if (dirclust == MSDOSFSROOT + && de_blk(pmp, diroffset + blsize) > pmp->pm_rootdirsize) + blsize = de_bn2off(pmp, pmp->pm_rootdirsize) & pmp->pm_crbomask; + bn = detobn(pmp, dirclust, diroffset); + if ((error = bread(pmp->pm_devvp, bn, blsize, NOCRED, bpp)) != 0) { + brelse(*bpp); + *bpp = NULL; + return (error); + } + if (epp) + *epp = bptoep(pmp, *bpp, diroffset); + return (0); +} + +/* + * Read in the disk block containing the directory entry dep came from and + * return the address of the buf header, and the address of the directory + * entry within the block. + */ +int +readde(dep, bpp, epp) + struct denode *dep; + struct buf **bpp; + struct direntry **epp; +{ + + return (readep(dep->de_pmp, dep->de_dirclust, dep->de_diroffset, + bpp, epp)); +} + +/* + * Remove a directory entry. At this point the file represented by the + * directory entry to be removed is still full length until noone has it + * open. When the file no longer being used msdosfs_inactive() is called + * and will truncate the file to 0 length. When the vnode containing the + * denode is needed for some other purpose by VFS it will call + * msdosfs_reclaim() which will remove the denode from the denode cache. + */ +int +removede(pdep, dep) + struct denode *pdep; /* directory where the entry is removed */ + struct denode *dep; /* file to be removed */ +{ + int error; + struct direntry *ep; + struct buf *bp; + daddr_t bn; + int blsize; + struct msdosfsmount *pmp = pdep->de_pmp; + u_long offset = pdep->de_fndoffset; + +#ifdef MSDOSFS_DEBUG + printf("removede(): filename %s, dep %p, offset %08lx\n", + dep->de_Name, dep, offset); +#endif + + dep->de_refcnt--; + offset += sizeof(struct direntry); + do { + offset -= sizeof(struct direntry); + error = pcbmap(pdep, de_cluster(pmp, offset), &bn, 0, &blsize); + if (error) + return error; + error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp); + if (error) { + brelse(bp); + return error; + } + ep = bptoep(pmp, bp, offset); + /* + * Check whether, if we came here the second time, i.e. + * when underflowing into the previous block, the last + * entry in this block is a longfilename entry, too. + */ + if (ep->deAttributes != ATTR_WIN95 + && offset != pdep->de_fndoffset) { + brelse(bp); + break; + } + offset += sizeof(struct direntry); + while (1) { + /* + * We are a bit agressive here in that we delete any Win95 + * entries preceding this entry, not just the ones we "own". + * Since these presumably aren't valid anyway, + * there should be no harm. + */ + offset -= sizeof(struct direntry); + ep--->deName[0] = SLOT_DELETED; + if ((pmp->pm_flags & MSDOSFSMNT_NOWIN95) + || !(offset & pmp->pm_crbomask) + || ep->deAttributes != ATTR_WIN95) + break; + } + if ((error = bwrite(bp)) != 0) + return error; + } while (!(pmp->pm_flags & MSDOSFSMNT_NOWIN95) + && !(offset & pmp->pm_crbomask) + && offset); + return 0; +} + +/* + * Create a unique DOS name in dvp + */ +int +uniqdosname(dep, cnp, cp) + struct denode *dep; + struct componentname *cnp; + u_char *cp; +{ + struct msdosfsmount *pmp = dep->de_pmp; + struct direntry *dentp; + int gen; + int blsize; + u_long cn; + daddr_t bn; + struct buf *bp; + int error; + + if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME) + return (unix2dosfn((const u_char *)cnp->cn_nameptr, cp, + cnp->cn_namelen, 0, + pmp->pm_flags & MSDOSFSMNT_U2WTABLE, pmp->pm_u2d, + pmp->pm_flags & MSDOSFSMNT_ULTABLE, pmp->pm_lu) ? + 0 : EINVAL); + + for (gen = 1;; gen++) { + /* + * Generate DOS name with generation number + */ + if (!unix2dosfn((const u_char *)cnp->cn_nameptr, cp, + cnp->cn_namelen, gen, + pmp->pm_flags & MSDOSFSMNT_U2WTABLE, pmp->pm_u2d, + pmp->pm_flags & MSDOSFSMNT_ULTABLE, pmp->pm_lu)) + return gen == 1 ? EINVAL : EEXIST; + + /* + * Now look for a dir entry with this exact name + */ + for (cn = error = 0; !error; cn++) { + if ((error = pcbmap(dep, cn, &bn, 0, &blsize)) != 0) { + if (error == E2BIG) /* EOF reached and not found */ + return 0; + return error; + } + error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp); + if (error) { + brelse(bp); + return error; + } + for (dentp = (struct direntry *)bp->b_data; + (char *)dentp < bp->b_data + blsize; + dentp++) { + if (dentp->deName[0] == SLOT_EMPTY) { + /* + * Last used entry and not found + */ + brelse(bp); + return 0; + } + /* + * Ignore volume labels and Win95 entries + */ + if (dentp->deAttributes & ATTR_VOLUME) + continue; + if (!bcmp(dentp->deName, cp, 11)) { + error = EEXIST; + break; + } + } + brelse(bp); + } + } +} + +/* + * Find any Win'95 long filename entry in directory dep + */ +int +findwin95(dep) + struct denode *dep; +{ + struct msdosfsmount *pmp = dep->de_pmp; + struct direntry *dentp; + int blsize, win95; + u_long cn; + daddr_t bn; + struct buf *bp; + + win95 = 1; + /* + * Read through the directory looking for Win'95 entries + * Note: Error currently handled just as EOF XXX + */ + for (cn = 0;; cn++) { + if (pcbmap(dep, cn, &bn, 0, &blsize)) + return (win95); + if (bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp)) { + brelse(bp); + return (win95); + } + for (dentp = (struct direntry *)bp->b_data; + (char *)dentp < bp->b_data + blsize; + dentp++) { + if (dentp->deName[0] == SLOT_EMPTY) { + /* + * Last used entry and not found + */ + brelse(bp); + return (win95); + } + if (dentp->deName[0] == SLOT_DELETED) { + /* + * Ignore deleted files + * Note: might be an indication of Win'95 anyway XXX + */ + continue; + } + if (dentp->deAttributes == ATTR_WIN95) { + brelse(bp); + return 1; + } + win95 = 0; + } + brelse(bp); + } +} diff --git a/sys/fs/msdosfs/msdosfs_vfsops.c b/sys/fs/msdosfs/msdosfs_vfsops.c new file mode 100644 index 0000000..bca552c --- /dev/null +++ b/sys/fs/msdosfs/msdosfs_vfsops.c @@ -0,0 +1,1019 @@ +/* $Id: msdosfs_vfsops.c,v 1.39 1998/12/07 21:58:35 archie Exp $ */ +/* $NetBSD: msdosfs_vfsops.c,v 1.51 1997/11/17 15:36:58 ws Exp $ */ + +/*- + * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank. + * Copyright (C) 1994, 1995, 1997 TooLs GmbH. + * All rights reserved. + * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below). + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by TooLs GmbH. + * 4. The name of TooLs GmbH may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* + * Written by Paul Popelka (paulp@uts.amdahl.com) + * + * You can do anything you want with this software, just don't say you wrote + * it, and don't remove this notice. + * + * This software is provided "as is". + * + * The author supplies this software to be publicly redistributed on the + * understanding that the author is not responsible for the correct + * functioning of this software in any circumstances and is not liable for + * any damages caused by this software. + * + * October 1992 + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/conf.h> +#include <sys/namei.h> +#include <sys/proc.h> +#include <sys/kernel.h> +#include <sys/vnode.h> +#include <miscfs/specfs/specdev.h> /* XXX */ /* defines v_rdev */ +#include <sys/mount.h> +#include <sys/buf.h> +#include <sys/fcntl.h> +#include <sys/malloc.h> +#include <sys/stat.h> /* defines ALLPERMS */ + +#include <msdosfs/bpb.h> +#include <msdosfs/bootsect.h> +#include <msdosfs/direntry.h> +#include <msdosfs/denode.h> +#include <msdosfs/msdosfsmount.h> +#include <msdosfs/fat.h> + +MALLOC_DEFINE(M_MSDOSFSMNT, "MSDOSFS mount", "MSDOSFS mount structure"); +static MALLOC_DEFINE(M_MSDOSFSFAT, "MSDOSFS FAT", "MSDOSFS file allocation table"); + +static int update_mp __P((struct mount *mp, struct msdosfs_args *argp)); +static int mountmsdosfs __P((struct vnode *devvp, struct mount *mp, + struct proc *p, struct msdosfs_args *argp)); +static int msdosfs_fhtovp __P((struct mount *, struct fid *, + struct sockaddr *, struct vnode **, int *, + struct ucred **)); +static int msdosfs_mount __P((struct mount *, char *, caddr_t, + struct nameidata *, struct proc *)); +static int msdosfs_quotactl __P((struct mount *, int, uid_t, caddr_t, + struct proc *)); +static int msdosfs_root __P((struct mount *, struct vnode **)); +static int msdosfs_start __P((struct mount *, int, struct proc *)); +static int msdosfs_statfs __P((struct mount *, struct statfs *, + struct proc *)); +static int msdosfs_sync __P((struct mount *, int, struct ucred *, + struct proc *)); +static int msdosfs_unmount __P((struct mount *, int, struct proc *)); +static int msdosfs_vget __P((struct mount *mp, ino_t ino, + struct vnode **vpp)); +static int msdosfs_vptofh __P((struct vnode *, struct fid *)); + +static int +update_mp(mp, argp) + struct mount *mp; + struct msdosfs_args *argp; +{ + struct msdosfsmount *pmp = VFSTOMSDOSFS(mp); + int error; + + pmp->pm_gid = argp->gid; + pmp->pm_uid = argp->uid; + pmp->pm_mask = argp->mask & ALLPERMS; + pmp->pm_flags |= argp->flags & MSDOSFSMNT_MNTOPT; + if (pmp->pm_flags & MSDOSFSMNT_U2WTABLE) { + bcopy(argp->u2w, pmp->pm_u2w, sizeof(pmp->pm_u2w)); + bcopy(argp->d2u, pmp->pm_d2u, sizeof(pmp->pm_d2u)); + bcopy(argp->u2d, pmp->pm_u2d, sizeof(pmp->pm_u2d)); + } + if (pmp->pm_flags & MSDOSFSMNT_ULTABLE) { + bcopy(argp->ul, pmp->pm_ul, sizeof(pmp->pm_ul)); + bcopy(argp->lu, pmp->pm_lu, sizeof(pmp->pm_lu)); + } + +#ifndef __FreeBSD__ + /* + * GEMDOS knows nothing (yet) about win95 + */ + if (pmp->pm_flags & MSDOSFSMNT_GEMDOSFS) + pmp->pm_flags |= MSDOSFSMNT_NOWIN95; +#endif + + if (pmp->pm_flags & MSDOSFSMNT_NOWIN95) + pmp->pm_flags |= MSDOSFSMNT_SHORTNAME; + else if (!(pmp->pm_flags & + (MSDOSFSMNT_SHORTNAME | MSDOSFSMNT_LONGNAME))) { + struct vnode *rootvp; + + /* + * Try to divine whether to support Win'95 long filenames + */ + if (FAT32(pmp)) + pmp->pm_flags |= MSDOSFSMNT_LONGNAME; + else { + if ((error = msdosfs_root(mp, &rootvp)) != 0) + return error; + pmp->pm_flags |= findwin95(VTODE(rootvp)) + ? MSDOSFSMNT_LONGNAME + : MSDOSFSMNT_SHORTNAME; + vput(rootvp); + } + } + return 0; +} + +#ifndef __FreeBSD__ +int +msdosfs_mountroot() +{ + register struct mount *mp; + struct proc *p = curproc; /* XXX */ + size_t size; + int error; + struct msdosfs_args args; + + if (root_device->dv_class != DV_DISK) + return (ENODEV); + + /* + * Get vnodes for swapdev and rootdev. + */ + if (bdevvp(rootdev, &rootvp)) + panic("msdosfs_mountroot: can't setup rootvp"); + + mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); + bzero((char *)mp, (u_long)sizeof(struct mount)); + mp->mnt_op = &msdosfs_vfsops; + mp->mnt_flag = 0; + LIST_INIT(&mp->mnt_vnodelist); + + args.flags = 0; + args.uid = 0; + args.gid = 0; + args.mask = 0777; + + if ((error = mountmsdosfs(rootvp, mp, p, &args)) != 0) { + free(mp, M_MOUNT); + return (error); + } + + if ((error = update_mp(mp, &args)) != 0) { + (void)msdosfs_unmount(mp, 0, p); + free(mp, M_MOUNT); + return (error); + } + + if ((error = vfs_lock(mp)) != 0) { + (void)msdosfs_unmount(mp, 0, p); + free(mp, M_MOUNT); + return (error); + } + + CIRCLEQ_INSERT_TAIL(&mountlist, mp, mnt_list); + mp->mnt_vnodecovered = NULLVP; + (void) copystr("/", mp->mnt_stat.f_mntonname, MNAMELEN - 1, + &size); + bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size); + (void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, + &size); + bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); + (void)msdosfs_statfs(mp, &mp->mnt_stat, p); + vfs_unlock(mp); + return (0); +} +#endif + +/* + * mp - path - addr in user space of mount point (ie /usr or whatever) + * data - addr in user space of mount params including the name of the block + * special file to treat as a filesystem. + */ +static int +msdosfs_mount(mp, path, data, ndp, p) + struct mount *mp; + char *path; + caddr_t data; + struct nameidata *ndp; + struct proc *p; +{ + struct vnode *devvp; /* vnode for blk device to mount */ + struct msdosfs_args args; /* will hold data from mount request */ + /* msdosfs specific mount control block */ + struct msdosfsmount *pmp = NULL; + size_t size; + int error, flags; + mode_t accessmode; + + error = copyin(data, (caddr_t)&args, sizeof(struct msdosfs_args)); + if (error) + return (error); + if (args.magic != MSDOSFS_ARGSMAGIC) + args.flags = 0; + /* + * If updating, check whether changing from read-only to + * read/write; if there is no device name, that's all we do. + */ + if (mp->mnt_flag & MNT_UPDATE) { + pmp = VFSTOMSDOSFS(mp); + error = 0; + if (!(pmp->pm_flags & MSDOSFSMNT_RONLY) && (mp->mnt_flag & MNT_RDONLY)) { + flags = WRITECLOSE; + if (mp->mnt_flag & MNT_FORCE) + flags |= FORCECLOSE; + error = vflush(mp, NULLVP, flags); + } + if (!error && (mp->mnt_flag & MNT_RELOAD)) + /* not yet implemented */ + error = EOPNOTSUPP; + if (error) + return (error); + if ((pmp->pm_flags & MSDOSFSMNT_RONLY) && (mp->mnt_kern_flag & MNTK_WANTRDWR)) { + /* + * If upgrade to read-write by non-root, then verify + * that user has necessary permissions on the device. + */ + if (p->p_ucred->cr_uid != 0) { + devvp = pmp->pm_devvp; + vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); + error = VOP_ACCESS(devvp, VREAD | VWRITE, + p->p_ucred, p); + if (error) { + VOP_UNLOCK(devvp, 0, p); + return (error); + } + VOP_UNLOCK(devvp, 0, p); + } + pmp->pm_flags &= ~MSDOSFSMNT_RONLY; + } + if (args.fspec == 0) { +#ifdef __notyet__ /* doesn't work correctly with current mountd XXX */ + if (args.flags & MSDOSFSMNT_MNTOPT) { + pmp->pm_flags &= ~MSDOSFSMNT_MNTOPT; + pmp->pm_flags |= args.flags & MSDOSFSMNT_MNTOPT; + if (pmp->pm_flags & MSDOSFSMNT_NOWIN95) + pmp->pm_flags |= MSDOSFSMNT_SHORTNAME; + } +#endif + /* + * Process export requests. + */ + return (vfs_export(mp, &pmp->pm_export, &args.export)); + } + } + /* + * Not an update, or updating the name: look up the name + * and verify that it refers to a sensible block device. + */ + NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p); + error = namei(ndp); + if (error) + return (error); + devvp = ndp->ni_vp; + + if (devvp->v_type != VBLK) { + vrele(devvp); + return (ENOTBLK); + } + if (major(devvp->v_rdev) >= nblkdev || + bdevsw[major(devvp->v_rdev)] == NULL) { + vrele(devvp); + return (ENXIO); + } + /* + * If mount by non-root, then verify that user has necessary + * permissions on the device. + */ + if (p->p_ucred->cr_uid != 0) { + accessmode = VREAD; + if ((mp->mnt_flag & MNT_RDONLY) == 0) + accessmode |= VWRITE; + vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); + error = VOP_ACCESS(devvp, accessmode, p->p_ucred, p); + if (error) { + vput(devvp); + return (error); + } + VOP_UNLOCK(devvp, 0, p); + } + if ((mp->mnt_flag & MNT_UPDATE) == 0) { + error = mountmsdosfs(devvp, mp, p, &args); +#ifdef MSDOSFS_DEBUG /* only needed for the printf below */ + pmp = VFSTOMSDOSFS(mp); +#endif + } else { + if (devvp != pmp->pm_devvp) + error = EINVAL; /* XXX needs translation */ + else + vrele(devvp); + } + if (error) { + vrele(devvp); + return (error); + } + + error = update_mp(mp, &args); + if (error) { + msdosfs_unmount(mp, MNT_FORCE, p); + return error; + } + + (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size); + bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size); + (void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, + &size); + bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); + (void) msdosfs_statfs(mp, &mp->mnt_stat, p); +#ifdef MSDOSFS_DEBUG + printf("msdosfs_mount(): mp %p, pmp %p, inusemap %p\n", mp, pmp, pmp->pm_inusemap); +#endif + return (0); +} + +static int +mountmsdosfs(devvp, mp, p, argp) + struct vnode *devvp; + struct mount *mp; + struct proc *p; + struct msdosfs_args *argp; +{ + struct msdosfsmount *pmp; + struct buf *bp; + dev_t dev = devvp->v_rdev; +#ifndef __FreeBSD__ + struct partinfo dpart; + int bsize = 0, dtype = 0, tmp; +#endif + union bootsector *bsp; + struct byte_bpb33 *b33; + struct byte_bpb50 *b50; + struct byte_bpb710 *b710; + u_int8_t SecPerClust; + int ronly, error; + + /* + * Disallow multiple mounts of the same device. + * Disallow mounting of a device that is currently in use + * (except for root, which might share swap device for miniroot). + * Flush out any old buffers remaining from a previous use. + */ + error = vfs_mountedon(devvp); + if (error) + return (error); + if (vcount(devvp) > 1 && devvp != rootvp) + return (EBUSY); + vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY, p); + error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0); + VOP_UNLOCK(devvp, 0, p); + if (error) + return (error); + + ronly = (mp->mnt_flag & MNT_RDONLY) != 0; + error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p); + if (error) + return (error); + + bp = NULL; /* both used in error_exit */ + pmp = NULL; + +#ifndef __FreeBSD__ + if (argp->flags & MSDOSFSMNT_GEMDOSFS) { + /* + * We need the disklabel to calculate the size of a FAT entry + * later on. Also make sure the partition contains a filesystem + * of type FS_MSDOS. This doesn't work for floppies, so we have + * to check for them too. + * + * At least some parts of the msdos fs driver seem to assume + * that the size of a disk block will always be 512 bytes. + * Let's check it... + */ + error = VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, + FREAD, NOCRED, p); + if (error) + goto error_exit; + tmp = dpart.part->p_fstype; + dtype = dpart.disklab->d_type; + bsize = dpart.disklab->d_secsize; + if (bsize != 512 || (dtype!=DTYPE_FLOPPY && tmp!=FS_MSDOS)) { + error = EINVAL; + goto error_exit; + } + } +#endif + + /* + * Read the boot sector of the filesystem, and then check the + * boot signature. If not a dos boot sector then error out. + */ +#ifdef PC98 + error = bread(devvp, 0, 1024, NOCRED, &bp); +#else + error = bread(devvp, 0, 512, NOCRED, &bp); +#endif + if (error) + goto error_exit; + bp->b_flags |= B_AGE; + bsp = (union bootsector *)bp->b_data; + b33 = (struct byte_bpb33 *)bsp->bs33.bsBPB; + b50 = (struct byte_bpb50 *)bsp->bs50.bsBPB; + b710 = (struct byte_bpb710 *)bsp->bs710.bsPBP; + +#ifndef __FreeBSD__ + if (!(argp->flags & MSDOSFSMNT_GEMDOSFS)) { +#endif +#ifdef PC98 + if ((bsp->bs50.bsBootSectSig0 != BOOTSIG0 + || bsp->bs50.bsBootSectSig1 != BOOTSIG1) + && (bsp->bs50.bsBootSectSig0 != 0 /* PC98 DOS 3.3x */ + || bsp->bs50.bsBootSectSig1 != 0) + && (bsp->bs50.bsBootSectSig0 != 0x90 /* PC98 DOS 5.0 */ + || bsp->bs50.bsBootSectSig1 != 0x3d) + && (bsp->bs50.bsBootSectSig0 != 0x46 /* PC98 DOS 3.3B */ + || bsp->bs50.bsBootSectSig1 != 0xfa)) { +#else + if (bsp->bs50.bsBootSectSig0 != BOOTSIG0 + || bsp->bs50.bsBootSectSig1 != BOOTSIG1) { +#endif + error = EINVAL; + printf("mountmsdosfs(): bad signature\n"); + goto error_exit; + } +#ifndef __FreeBSD__ + } +#endif + + pmp = malloc(sizeof *pmp, M_MSDOSFSMNT, M_WAITOK); + bzero((caddr_t)pmp, sizeof *pmp); + pmp->pm_mountp = mp; + + /* + * Compute several useful quantities from the bpb in the + * bootsector. Copy in the dos 5 variant of the bpb then fix up + * the fields that are different between dos 5 and dos 3.3. + */ + SecPerClust = b50->bpbSecPerClust; + pmp->pm_BytesPerSec = getushort(b50->bpbBytesPerSec); + pmp->pm_ResSectors = getushort(b50->bpbResSectors); + pmp->pm_FATs = b50->bpbFATs; + pmp->pm_RootDirEnts = getushort(b50->bpbRootDirEnts); + pmp->pm_Sectors = getushort(b50->bpbSectors); + pmp->pm_FATsecs = getushort(b50->bpbFATsecs); + pmp->pm_SecPerTrack = getushort(b50->bpbSecPerTrack); + pmp->pm_Heads = getushort(b50->bpbHeads); + pmp->pm_Media = b50->bpbMedia; + +#ifndef __FreeBSD__ + if (!(argp->flags & MSDOSFSMNT_GEMDOSFS)) { +#endif + /* XXX - We should probably check more values here */ + if (!pmp->pm_BytesPerSec || !SecPerClust + || !pmp->pm_Heads || pmp->pm_Heads > 255 +#ifdef PC98 + || !pmp->pm_SecPerTrack || pmp->pm_SecPerTrack > 255) { +#else + || !pmp->pm_SecPerTrack || pmp->pm_SecPerTrack > 63) { +#endif + error = EINVAL; + printf("mountmsdosfs(): bad bpb\n"); + goto error_exit; + } +#ifndef __FreeBSD__ + } +#endif + + if (pmp->pm_Sectors == 0) { + pmp->pm_HiddenSects = getulong(b50->bpbHiddenSecs); + pmp->pm_HugeSectors = getulong(b50->bpbHugeSectors); + } else { + pmp->pm_HiddenSects = getushort(b33->bpbHiddenSecs); + pmp->pm_HugeSectors = pmp->pm_Sectors; + } + if (pmp->pm_HugeSectors > 0xffffffff / + (pmp->pm_BytesPerSec / sizeof(struct direntry)) + 1) { + /* + * We cannot deal currently with this size of disk + * due to fileid limitations (see msdosfs_getattr and + * msdosfs_readdir) + */ + error = EINVAL; + printf("mountmsdosfs(): disk too big, sorry\n"); + goto error_exit; + } + + if (pmp->pm_RootDirEnts == 0) { + if (bsp->bs710.bsBootSectSig2 != BOOTSIG2 + || bsp->bs710.bsBootSectSig3 != BOOTSIG3 + || pmp->pm_Sectors + || pmp->pm_FATsecs + || getushort(b710->bpbFSVers)) { + error = EINVAL; + printf("mountmsdosfs(): bad FAT32 filesystem\n"); + goto error_exit; + } + pmp->pm_fatmask = FAT32_MASK; + pmp->pm_fatmult = 4; + pmp->pm_fatdiv = 1; + pmp->pm_FATsecs = getulong(b710->bpbBigFATsecs); + if (getushort(b710->bpbExtFlags) & FATMIRROR) + pmp->pm_curfat = getushort(b710->bpbExtFlags) & FATNUM; + else + pmp->pm_flags |= MSDOSFS_FATMIRROR; + } else + pmp->pm_flags |= MSDOSFS_FATMIRROR; + +#ifndef __FreeBSD__ + if (argp->flags & MSDOSFSMNT_GEMDOSFS) { + if (FAT32(pmp)) { + /* + * GEMDOS doesn't know fat32. + */ + error = EINVAL; + goto error_exit; + } + + /* + * Check a few values (could do some more): + * - logical sector size: power of 2, >= block size + * - sectors per cluster: power of 2, >= 1 + * - number of sectors: >= 1, <= size of partition + */ + if ( (SecPerClust == 0) + || (SecPerClust & (SecPerClust - 1)) + || (pmp->pm_BytesPerSec < bsize) + || (pmp->pm_BytesPerSec & (pmp->pm_BytesPerSec - 1)) + || (pmp->pm_HugeSectors == 0) + || (pmp->pm_HugeSectors * (pmp->pm_BytesPerSec / bsize) + > dpart.part->p_size) + ) { + error = EINVAL; + goto error_exit; + } + /* + * XXX - Many parts of the msdos fs driver seem to assume that + * the number of bytes per logical sector (BytesPerSec) will + * always be the same as the number of bytes per disk block + * Let's pretend it is. + */ + tmp = pmp->pm_BytesPerSec / bsize; + pmp->pm_BytesPerSec = bsize; + pmp->pm_HugeSectors *= tmp; + pmp->pm_HiddenSects *= tmp; + pmp->pm_ResSectors *= tmp; + pmp->pm_Sectors *= tmp; + pmp->pm_FATsecs *= tmp; + SecPerClust *= tmp; + } +#endif + pmp->pm_fatblk = pmp->pm_ResSectors; + if (FAT32(pmp)) { + pmp->pm_rootdirblk = getulong(b710->bpbRootClust); + pmp->pm_firstcluster = pmp->pm_fatblk + + (pmp->pm_FATs * pmp->pm_FATsecs); + pmp->pm_fsinfo = getushort(b710->bpbFSInfo); + } else { + pmp->pm_rootdirblk = pmp->pm_fatblk + + (pmp->pm_FATs * pmp->pm_FATsecs); + pmp->pm_rootdirsize = (pmp->pm_RootDirEnts * sizeof(struct direntry) + + pmp->pm_BytesPerSec - 1) + / pmp->pm_BytesPerSec;/* in sectors */ + pmp->pm_firstcluster = pmp->pm_rootdirblk + pmp->pm_rootdirsize; + } + + pmp->pm_nmbrofclusters = (pmp->pm_HugeSectors - pmp->pm_firstcluster) / + SecPerClust; + pmp->pm_maxcluster = pmp->pm_nmbrofclusters + 1; + pmp->pm_fatsize = pmp->pm_FATsecs * pmp->pm_BytesPerSec; + +#ifndef __FreeBSD__ + if (argp->flags & MSDOSFSMNT_GEMDOSFS) { + if ((pmp->pm_nmbrofclusters <= (0xff0 - 2)) + && ((dtype == DTYPE_FLOPPY) || ((dtype == DTYPE_VNODE) + && ((pmp->pm_Heads == 1) || (pmp->pm_Heads == 2)))) + ) { + pmp->pm_fatmask = FAT12_MASK; + pmp->pm_fatmult = 3; + pmp->pm_fatdiv = 2; + } else { + pmp->pm_fatmask = FAT16_MASK; + pmp->pm_fatmult = 2; + pmp->pm_fatdiv = 1; + } + } else +#endif + if (pmp->pm_fatmask == 0) { + if (pmp->pm_maxcluster + <= ((CLUST_RSRVD - CLUST_FIRST) & FAT12_MASK)) { + /* + * This will usually be a floppy disk. This size makes + * sure that one fat entry will not be split across + * multiple blocks. + */ + pmp->pm_fatmask = FAT12_MASK; + pmp->pm_fatmult = 3; + pmp->pm_fatdiv = 2; + } else { + pmp->pm_fatmask = FAT16_MASK; + pmp->pm_fatmult = 2; + pmp->pm_fatdiv = 1; + } + } + if (FAT12(pmp)) + pmp->pm_fatblocksize = 3 * pmp->pm_BytesPerSec; + else + pmp->pm_fatblocksize = DFLTBSIZE; + + pmp->pm_fatblocksec = pmp->pm_fatblocksize / pmp->pm_BytesPerSec; + pmp->pm_bnshift = ffs(pmp->pm_BytesPerSec) - 1; + + /* + * Compute mask and shift value for isolating cluster relative byte + * offsets and cluster numbers from a file offset. + */ + pmp->pm_bpcluster = SecPerClust * pmp->pm_BytesPerSec; + pmp->pm_crbomask = pmp->pm_bpcluster - 1; + pmp->pm_cnshift = ffs(pmp->pm_bpcluster) - 1; + + /* + * Check for valid cluster size + * must be a power of 2 + */ + if (pmp->pm_bpcluster ^ (1 << pmp->pm_cnshift)) { + error = EINVAL; + goto error_exit; + } + + /* + * Release the bootsector buffer. + */ + brelse(bp); + bp = NULL; + + /* + * Check FSInfo. + */ + if (pmp->pm_fsinfo) { + struct fsinfo *fp; + + if ((error = bread(devvp, pmp->pm_fsinfo, 1024, NOCRED, &bp)) != 0) + goto error_exit; + fp = (struct fsinfo *)bp->b_data; + if (!bcmp(fp->fsisig1, "RRaA", 4) + && !bcmp(fp->fsisig2, "rrAa", 4) + && !bcmp(fp->fsisig3, "\0\0\125\252", 4) + && !bcmp(fp->fsisig4, "\0\0\125\252", 4)) + pmp->pm_nxtfree = getulong(fp->fsinxtfree); + else + pmp->pm_fsinfo = 0; + brelse(bp); + bp = NULL; + } + + /* + * Check and validate (or perhaps invalidate?) the fsinfo structure? XXX + */ + + /* + * Allocate memory for the bitmap of allocated clusters, and then + * fill it in. + */ + pmp->pm_inusemap = malloc(((pmp->pm_maxcluster + N_INUSEBITS - 1) + / N_INUSEBITS) + * sizeof(*pmp->pm_inusemap), + M_MSDOSFSFAT, M_WAITOK); + + /* + * fillinusemap() needs pm_devvp. + */ + pmp->pm_dev = dev; + pmp->pm_devvp = devvp; + + /* + * Have the inuse map filled in. + */ + if ((error = fillinusemap(pmp)) != 0) + goto error_exit; + + /* + * If they want fat updates to be synchronous then let them suffer + * the performance degradation in exchange for the on disk copy of + * the fat being correct just about all the time. I suppose this + * would be a good thing to turn on if the kernel is still flakey. + */ + if (mp->mnt_flag & MNT_SYNCHRONOUS) + pmp->pm_flags |= MSDOSFSMNT_WAITONFAT; + + /* + * Finish up. + */ + if (ronly) + pmp->pm_flags |= MSDOSFSMNT_RONLY; + else + pmp->pm_fmod = 1; + mp->mnt_data = (qaddr_t) pmp; + mp->mnt_stat.f_fsid.val[0] = (long)dev; + mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; + mp->mnt_flag |= MNT_LOCAL; + devvp->v_specmountpoint = mp; + + return 0; + +error_exit: + if (bp) + brelse(bp); + (void) VOP_CLOSE(devvp, ronly ? FREAD : FREAD | FWRITE, NOCRED, p); + if (pmp) { + if (pmp->pm_inusemap) + free(pmp->pm_inusemap, M_MSDOSFSFAT); + free(pmp, M_MSDOSFSMNT); + mp->mnt_data = (qaddr_t)0; + } + return (error); +} + +static int +msdosfs_start(mp, flags, p) + struct mount *mp; + int flags; + struct proc *p; +{ + + return (0); +} + +/* + * Unmount the filesystem described by mp. + */ +static int +msdosfs_unmount(mp, mntflags, p) + struct mount *mp; + int mntflags; + struct proc *p; +{ + struct msdosfsmount *pmp; + int error, flags; + + flags = 0; + if (mntflags & MNT_FORCE) + flags |= FORCECLOSE; + error = vflush(mp, NULLVP, flags); + if (error) + return error; + pmp = VFSTOMSDOSFS(mp); + pmp->pm_devvp->v_specmountpoint = NULL; +#ifdef MSDOSFS_DEBUG + { + struct vnode *vp = pmp->pm_devvp; + + printf("msdosfs_umount(): just before calling VOP_CLOSE()\n"); + printf("flag %08lx, usecount %d, writecount %d, holdcnt %ld\n", + vp->v_flag, vp->v_usecount, vp->v_writecount, vp->v_holdcnt); + printf("lastr %d, id %lu, mount %p, op %p\n", + vp->v_lastr, vp->v_id, vp->v_mount, vp->v_op); + printf("freef %p, freeb %p, mount %p\n", + vp->v_freelist.tqe_next, vp->v_freelist.tqe_prev, + vp->v_mount); + printf("cleanblkhd %p, dirtyblkhd %p, numoutput %ld, type %d\n", + TAILQ_FIRST(&vp->v_cleanblkhd), + TAILQ_FIRST(&vp->v_dirtyblkhd), + vp->v_numoutput, vp->v_type); + printf("union %p, tag %d, data[0] %08x, data[1] %08x\n", + vp->v_socket, vp->v_tag, + ((u_int *)vp->v_data)[0], + ((u_int *)vp->v_data)[1]); + } +#endif + error = VOP_CLOSE(pmp->pm_devvp, + (pmp->pm_flags&MSDOSFSMNT_RONLY) ? FREAD : FREAD | FWRITE, + NOCRED, p); + vrele(pmp->pm_devvp); + free(pmp->pm_inusemap, M_MSDOSFSFAT); + free(pmp, M_MSDOSFSMNT); + mp->mnt_data = (qaddr_t)0; + mp->mnt_flag &= ~MNT_LOCAL; + return (error); +} + +static int +msdosfs_root(mp, vpp) + struct mount *mp; + struct vnode **vpp; +{ + struct msdosfsmount *pmp = VFSTOMSDOSFS(mp); + struct denode *ndep; + int error; + +#ifdef MSDOSFS_DEBUG + printf("msdosfs_root(); mp %p, pmp %p\n", mp, pmp); +#endif + error = deget(pmp, MSDOSFSROOT, MSDOSFSROOT_OFS, &ndep); + if (error) + return (error); + *vpp = DETOV(ndep); + return (0); +} + +static int +msdosfs_quotactl(mp, cmds, uid, arg, p) + struct mount *mp; + int cmds; + uid_t uid; + caddr_t arg; + struct proc *p; +{ + return EOPNOTSUPP; +} + +static int +msdosfs_statfs(mp, sbp, p) + struct mount *mp; + struct statfs *sbp; + struct proc *p; +{ + struct msdosfsmount *pmp; + + pmp = VFSTOMSDOSFS(mp); + sbp->f_bsize = pmp->pm_bpcluster; + sbp->f_iosize = pmp->pm_bpcluster; + sbp->f_blocks = pmp->pm_nmbrofclusters; + sbp->f_bfree = pmp->pm_freeclustercount; + sbp->f_bavail = pmp->pm_freeclustercount; + sbp->f_files = pmp->pm_RootDirEnts; /* XXX */ + sbp->f_ffree = 0; /* what to put in here? */ + if (sbp != &mp->mnt_stat) { + sbp->f_type = mp->mnt_vfc->vfc_typenum; + bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); + bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); + } + strncpy(sbp->f_fstypename, mp->mnt_vfc->vfc_name, MFSNAMELEN); + return (0); +} + +static int +msdosfs_sync(mp, waitfor, cred, p) + struct mount *mp; + int waitfor; + struct ucred *cred; + struct proc *p; +{ + struct vnode *vp, *nvp; + struct denode *dep; + struct msdosfsmount *pmp = VFSTOMSDOSFS(mp); + int error, allerror = 0; + + /* + * If we ever switch to not updating all of the fats all the time, + * this would be the place to update them from the first one. + */ + if (pmp->pm_fmod != 0) + if (pmp->pm_flags & MSDOSFSMNT_RONLY) + panic("msdosfs_sync: rofs mod"); + else { + /* update fats here */ + } + /* + * Write back each (modified) denode. + */ + simple_lock(&mntvnode_slock); +loop: + for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { + /* + * If the vnode that we are about to sync is no longer + * associated with this mount point, start over. + */ + if (vp->v_mount != mp) + goto loop; + + simple_lock(&vp->v_interlock); + nvp = vp->v_mntvnodes.le_next; + dep = VTODE(vp); + if (vp->v_type == VNON || + ((dep->de_flag & + (DE_ACCESS | DE_CREATE | DE_UPDATE | DE_MODIFIED)) == 0 && + (TAILQ_EMPTY(&vp->v_dirtyblkhd) || waitfor == MNT_LAZY))) { + simple_unlock(&vp->v_interlock); + continue; + } + simple_unlock(&mntvnode_slock); + error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, p); + if (error) { + simple_lock(&mntvnode_slock); + if (error == ENOENT) + goto loop; + continue; + } + error = VOP_FSYNC(vp, cred, waitfor, p); + if (error) + allerror = error; + VOP_UNLOCK(vp, 0, p); + vrele(vp); + simple_lock(&mntvnode_slock); + } + simple_unlock(&mntvnode_slock); + + /* + * Flush filesystem control info. + */ + if (waitfor != MNT_LAZY) { + vn_lock(pmp->pm_devvp, LK_EXCLUSIVE | LK_RETRY, p); + error = VOP_FSYNC(pmp->pm_devvp, cred, waitfor, p); + if (error) + allerror = error; + VOP_UNLOCK(pmp->pm_devvp, 0, p); + } + return (allerror); +} + +static int +msdosfs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp) + struct mount *mp; + struct fid *fhp; + struct sockaddr *nam; + struct vnode **vpp; + int *exflagsp; + struct ucred **credanonp; +{ + struct msdosfsmount *pmp = VFSTOMSDOSFS(mp); + struct defid *defhp = (struct defid *) fhp; + struct denode *dep; + struct netcred *np; + int error; + + np = vfs_export_lookup(mp, &pmp->pm_export, nam); + if (np == NULL) + return (EACCES); + error = deget(pmp, defhp->defid_dirclust, defhp->defid_dirofs, &dep); + if (error) { + *vpp = NULLVP; + return (error); + } + *vpp = DETOV(dep); + *exflagsp = np->netc_exflags; + *credanonp = &np->netc_anon; + return (0); +} + +static int +msdosfs_vptofh(vp, fhp) + struct vnode *vp; + struct fid *fhp; +{ + struct denode *dep; + struct defid *defhp; + + dep = VTODE(vp); + defhp = (struct defid *)fhp; + defhp->defid_len = sizeof(struct defid); + defhp->defid_dirclust = dep->de_dirclust; + defhp->defid_dirofs = dep->de_diroffset; + /* defhp->defid_gen = dep->de_gen; */ + return (0); +} + +static int +msdosfs_vget(mp, ino, vpp) + struct mount *mp; + ino_t ino; + struct vnode **vpp; +{ + return EOPNOTSUPP; +} + +static struct vfsops msdosfs_vfsops = { + msdosfs_mount, + msdosfs_start, + msdosfs_unmount, + msdosfs_root, + msdosfs_quotactl, + msdosfs_statfs, + msdosfs_sync, + msdosfs_vget, + msdosfs_fhtovp, + msdosfs_vptofh, + msdosfs_init +}; + +VFS_SET(msdosfs_vfsops, msdos, 0); diff --git a/sys/fs/msdosfs/msdosfs_vnops.c b/sys/fs/msdosfs/msdosfs_vnops.c new file mode 100644 index 0000000..36aa91d --- /dev/null +++ b/sys/fs/msdosfs/msdosfs_vnops.c @@ -0,0 +1,1983 @@ +/* $Id: msdosfs_vnops.c,v 1.80 1998/12/07 21:58:35 archie Exp $ */ +/* $NetBSD: msdosfs_vnops.c,v 1.68 1998/02/10 14:10:04 mrg Exp $ */ + +/*- + * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank. + * Copyright (C) 1994, 1995, 1997 TooLs GmbH. + * All rights reserved. + * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below). + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by TooLs GmbH. + * 4. The name of TooLs GmbH may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* + * Written by Paul Popelka (paulp@uts.amdahl.com) + * + * You can do anything you want with this software, just don't say you wrote + * it, and don't remove this notice. + * + * This software is provided "as is". + * + * The author supplies this software to be publicly redistributed on the + * understanding that the author is not responsible for the correct + * functioning of this software in any circumstances and is not liable for + * any damages caused by this software. + * + * October 1992 + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/namei.h> +#include <sys/resourcevar.h> /* defines plimit structure in proc struct */ +#include <sys/kernel.h> +#include <sys/stat.h> +#include <sys/buf.h> +#include <sys/proc.h> +#include <sys/mount.h> +#include <sys/unistd.h> +#include <sys/vnode.h> +#include <miscfs/specfs/specdev.h> /* XXX */ /* defines v_rdev */ +#include <sys/malloc.h> +#include <sys/dirent.h> +#include <sys/signalvar.h> + +#include <vm/vm.h> +#include <vm/vm_extern.h> +#include <vm/vm_zone.h> +#include <vm/vnode_pager.h> + +#include <msdosfs/bpb.h> +#include <msdosfs/direntry.h> +#include <msdosfs/denode.h> +#include <msdosfs/msdosfsmount.h> +#include <msdosfs/fat.h> + +/* + * Prototypes for MSDOSFS vnode operations + */ +static int msdosfs_create __P((struct vop_create_args *)); +static int msdosfs_mknod __P((struct vop_mknod_args *)); +static int msdosfs_close __P((struct vop_close_args *)); +static int msdosfs_access __P((struct vop_access_args *)); +static int msdosfs_getattr __P((struct vop_getattr_args *)); +static int msdosfs_setattr __P((struct vop_setattr_args *)); +static int msdosfs_read __P((struct vop_read_args *)); +static int msdosfs_write __P((struct vop_write_args *)); +static int msdosfs_fsync __P((struct vop_fsync_args *)); +static int msdosfs_remove __P((struct vop_remove_args *)); +static int msdosfs_link __P((struct vop_link_args *)); +static int msdosfs_rename __P((struct vop_rename_args *)); +static int msdosfs_mkdir __P((struct vop_mkdir_args *)); +static int msdosfs_rmdir __P((struct vop_rmdir_args *)); +static int msdosfs_symlink __P((struct vop_symlink_args *)); +static int msdosfs_readdir __P((struct vop_readdir_args *)); +static int msdosfs_abortop __P((struct vop_abortop_args *)); +static int msdosfs_bmap __P((struct vop_bmap_args *)); +static int msdosfs_strategy __P((struct vop_strategy_args *)); +static int msdosfs_print __P((struct vop_print_args *)); +static int msdosfs_pathconf __P((struct vop_pathconf_args *ap)); +static int msdosfs_getpages __P((struct vop_getpages_args *)); +static int msdosfs_putpages __P((struct vop_putpages_args *)); + +/* + * Some general notes: + * + * In the ufs filesystem the inodes, superblocks, and indirect blocks are + * read/written using the vnode for the filesystem. Blocks that represent + * the contents of a file are read/written using the vnode for the file + * (including directories when they are read/written as files). This + * presents problems for the dos filesystem because data that should be in + * an inode (if dos had them) resides in the directory itself. Since we + * must update directory entries without the benefit of having the vnode + * for the directory we must use the vnode for the filesystem. This means + * that when a directory is actually read/written (via read, write, or + * readdir, or seek) we must use the vnode for the filesystem instead of + * the vnode for the directory as would happen in ufs. This is to insure we + * retreive the correct block from the buffer cache since the hash value is + * based upon the vnode address and the desired block number. + */ + +/* + * Create a regular file. On entry the directory to contain the file being + * created is locked. We must release before we return. We must also free + * the pathname buffer pointed at by cnp->cn_pnbuf, always on error, or + * only if the SAVESTART bit in cn_flags is clear on success. + */ +static int +msdosfs_create(ap) + struct vop_create_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + } */ *ap; +{ + struct componentname *cnp = ap->a_cnp; + struct denode ndirent; + struct denode *dep; + struct denode *pdep = VTODE(ap->a_dvp); + struct timespec ts; + int error; + +#ifdef MSDOSFS_DEBUG + printf("msdosfs_create(cnp %p, vap %p\n", cnp, ap->a_vap); +#endif + + /* + * If this is the root directory and there is no space left we + * can't do anything. This is because the root directory can not + * change size. + */ + if (pdep->de_StartCluster == MSDOSFSROOT + && pdep->de_fndoffset >= pdep->de_FileSize) { + error = ENOSPC; + goto bad; + } + + /* + * Create a directory entry for the file, then call createde() to + * have it installed. NOTE: DOS files are always executable. We + * use the absence of the owner write bit to make the file + * readonly. + */ +#ifdef DIAGNOSTIC + if ((cnp->cn_flags & HASBUF) == 0) + panic("msdosfs_create: no name"); +#endif + bzero(&ndirent, sizeof(ndirent)); + error = uniqdosname(pdep, cnp, ndirent.de_Name); + if (error) + goto bad; + + ndirent.de_Attributes = (ap->a_vap->va_mode & VWRITE) ? + ATTR_ARCHIVE : ATTR_ARCHIVE | ATTR_READONLY; + ndirent.de_LowerCase = 0; + ndirent.de_StartCluster = 0; + ndirent.de_FileSize = 0; + ndirent.de_dev = pdep->de_dev; + ndirent.de_devvp = pdep->de_devvp; + ndirent.de_pmp = pdep->de_pmp; + ndirent.de_flag = DE_ACCESS | DE_CREATE | DE_UPDATE; + getnanotime(&ts); + DETIMES(&ndirent, &ts, &ts, &ts); + error = createde(&ndirent, pdep, &dep, cnp); + if (error) + goto bad; + if ((cnp->cn_flags & SAVESTART) == 0) + zfree(namei_zone, cnp->cn_pnbuf); + *ap->a_vpp = DETOV(dep); + return (0); + +bad: + zfree(namei_zone, cnp->cn_pnbuf); + return (error); +} + +static int +msdosfs_mknod(ap) + struct vop_mknod_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + } */ *ap; +{ + + switch (ap->a_vap->va_type) { + case VDIR: + return (msdosfs_mkdir((struct vop_mkdir_args *)ap)); + break; + + case VREG: + return (msdosfs_create((struct vop_create_args *)ap)); + break; + + default: + zfree(namei_zone, ap->a_cnp->cn_pnbuf); + return (EINVAL); + } + /* NOTREACHED */ +} + +static int +msdosfs_close(ap) + struct vop_close_args /* { + struct vnode *a_vp; + int a_fflag; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + struct denode *dep = VTODE(vp); + struct timespec ts; + + simple_lock(&vp->v_interlock); + if (vp->v_usecount > 1) { + getnanotime(&ts); + DETIMES(dep, &ts, &ts, &ts); + } + simple_unlock(&vp->v_interlock); + return 0; +} + +static int +msdosfs_access(ap) + struct vop_access_args /* { + struct vnode *a_vp; + int a_mode; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + struct denode *dep = VTODE(ap->a_vp); + struct msdosfsmount *pmp = dep->de_pmp; + struct ucred *cred = ap->a_cred; + mode_t mask, file_mode, mode = ap->a_mode; + register gid_t *gp; + int i; + + file_mode = (S_IXUSR|S_IXGRP|S_IXOTH) | (S_IRUSR|S_IRGRP|S_IROTH) | + ((dep->de_Attributes & ATTR_READONLY) ? 0 : (S_IWUSR|S_IWGRP|S_IWOTH)); + file_mode &= pmp->pm_mask; + + /* + * Disallow write attempts on read-only file systems; + * unless the file is a socket, fifo, or a block or + * character device resident on the file system. + */ + if (mode & VWRITE) { + switch (vp->v_type) { + case VDIR: + case VLNK: + case VREG: + if (vp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); + break; + default: + break; + } + } + + /* User id 0 always gets access. */ + if (cred->cr_uid == 0) + return 0; + + mask = 0; + + /* Otherwise, check the owner. */ + if (cred->cr_uid == pmp->pm_uid) { + if (mode & VEXEC) + mask |= S_IXUSR; + if (mode & VREAD) + mask |= S_IRUSR; + if (mode & VWRITE) + mask |= S_IWUSR; + return (file_mode & mask) == mask ? 0 : EACCES; + } + + /* Otherwise, check the groups. */ + for (i = 0, gp = cred->cr_groups; i < cred->cr_ngroups; i++, gp++) + if (pmp->pm_gid == *gp) { + if (mode & VEXEC) + mask |= S_IXGRP; + if (mode & VREAD) + mask |= S_IRGRP; + if (mode & VWRITE) + mask |= S_IWGRP; + return (file_mode & mask) == mask ? 0 : EACCES; + } + + /* Otherwise, check everyone else. */ + if (mode & VEXEC) + mask |= S_IXOTH; + if (mode & VREAD) + mask |= S_IROTH; + if (mode & VWRITE) + mask |= S_IWOTH; + return (file_mode & mask) == mask ? 0 : EACCES; +} + +static int +msdosfs_getattr(ap) + struct vop_getattr_args /* { + struct vnode *a_vp; + struct vattr *a_vap; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + struct denode *dep = VTODE(ap->a_vp); + struct msdosfsmount *pmp = dep->de_pmp; + struct vattr *vap = ap->a_vap; + mode_t mode; + struct timespec ts; + u_long dirsperblk = pmp->pm_BytesPerSec / sizeof(struct direntry); + u_long fileid; + + getnanotime(&ts); + DETIMES(dep, &ts, &ts, &ts); + vap->va_fsid = dep->de_dev; + /* + * The following computation of the fileid must be the same as that + * used in msdosfs_readdir() to compute d_fileno. If not, pwd + * doesn't work. + */ + if (dep->de_Attributes & ATTR_DIRECTORY) { + fileid = cntobn(pmp, dep->de_StartCluster) * dirsperblk; + if (dep->de_StartCluster == MSDOSFSROOT) + fileid = 1; + } else { + fileid = cntobn(pmp, dep->de_dirclust) * dirsperblk; + if (dep->de_dirclust == MSDOSFSROOT) + fileid = roottobn(pmp, 0) * dirsperblk; + fileid += dep->de_diroffset / sizeof(struct direntry); + } + vap->va_fileid = fileid; + if ((dep->de_Attributes & ATTR_READONLY) == 0) + mode = S_IRWXU|S_IRWXG|S_IRWXO; + else + mode = S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH; + vap->va_mode = mode & pmp->pm_mask; + vap->va_uid = pmp->pm_uid; + vap->va_gid = pmp->pm_gid; + vap->va_nlink = 1; + vap->va_rdev = 0; + vap->va_size = dep->de_FileSize; + dos2unixtime(dep->de_MDate, dep->de_MTime, 0, &vap->va_mtime); + if (pmp->pm_flags & MSDOSFSMNT_LONGNAME) { + dos2unixtime(dep->de_ADate, 0, 0, &vap->va_atime); + dos2unixtime(dep->de_CDate, dep->de_CTime, dep->de_CHun, &vap->va_ctime); + } else { + vap->va_atime = vap->va_mtime; + vap->va_ctime = vap->va_mtime; + } + vap->va_flags = 0; + if ((dep->de_Attributes & ATTR_ARCHIVE) == 0) + vap->va_flags |= SF_ARCHIVED; + vap->va_gen = 0; + vap->va_blocksize = pmp->pm_bpcluster; + vap->va_bytes = + (dep->de_FileSize + pmp->pm_crbomask) & ~pmp->pm_crbomask; + vap->va_type = ap->a_vp->v_type; + vap->va_filerev = dep->de_modrev; + return (0); +} + +static int +msdosfs_setattr(ap) + struct vop_setattr_args /* { + struct vnode *a_vp; + struct vattr *a_vap; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + struct denode *dep = VTODE(ap->a_vp); + struct msdosfsmount *pmp = dep->de_pmp; + struct vattr *vap = ap->a_vap; + struct ucred *cred = ap->a_cred; + int error = 0; + +#ifdef MSDOSFS_DEBUG + printf("msdosfs_setattr(): vp %p, vap %p, cred %p, p %p\n", + ap->a_vp, vap, cred, ap->a_p); +#endif + + /* + * Check for unsettable attributes. + */ + if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || + (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || + (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || + (vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { +#ifdef MSDOSFS_DEBUG + printf("msdosfs_setattr(): returning EINVAL\n"); + printf(" va_type %d, va_nlink %x, va_fsid %lx, va_fileid %lx\n", + vap->va_type, vap->va_nlink, vap->va_fsid, vap->va_fileid); + printf(" va_blocksize %lx, va_rdev %x, va_bytes %qx, va_gen %lx\n", + vap->va_blocksize, vap->va_rdev, vap->va_bytes, vap->va_gen); + printf(" va_uid %x, va_gid %x\n", + vap->va_uid, vap->va_gid); +#endif + return (EINVAL); + } + if (vap->va_flags != VNOVAL) { + if (vp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); + if (cred->cr_uid != pmp->pm_uid && + (error = suser(cred, &ap->a_p->p_acflag))) + return (error); + /* + * We are very inconsistent about handling unsupported + * attributes. We ignored the access time and the + * read and execute bits. We were strict for the other + * attributes. + * + * Here we are strict, stricter than ufs in not allowing + * users to attempt to set SF_SETTABLE bits or anyone to + * set unsupported bits. However, we ignore attempts to + * set ATTR_ARCHIVE for directories `cp -pr' from a more + * sensible file system attempts it a lot. + */ + if (cred->cr_uid != 0) { + if (vap->va_flags & SF_SETTABLE) + return EPERM; + } + if (vap->va_flags & ~SF_ARCHIVED) + return EOPNOTSUPP; + if (vap->va_flags & SF_ARCHIVED) + dep->de_Attributes &= ~ATTR_ARCHIVE; + else if (!(dep->de_Attributes & ATTR_DIRECTORY)) + dep->de_Attributes |= ATTR_ARCHIVE; + dep->de_flag |= DE_MODIFIED; + } + + if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { + uid_t uid; + gid_t gid; + + if (vp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); + uid = vap->va_uid; + if (uid == (uid_t)VNOVAL) + uid = pmp->pm_uid; + gid = vap->va_gid; + if (gid == (gid_t)VNOVAL) + gid = pmp->pm_gid; + if ((cred->cr_uid != pmp->pm_uid || uid != pmp->pm_uid || + (gid != pmp->pm_gid && !groupmember(gid, cred))) && + (error = suser(cred, &ap->a_p->p_acflag))) + return error; + if (uid != pmp->pm_uid || gid != pmp->pm_gid) + return EINVAL; + } + + if (vap->va_size != VNOVAL) { + /* + * Disallow write attempts on read-only file systems; + * unless the file is a socket, fifo, or a block or + * character device resident on the file system. + */ + switch (vp->v_type) { + case VDIR: + return (EISDIR); + /* NOT REACHED */ + case VLNK: + case VREG: + if (vp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); + break; + default: + break; + } + error = detrunc(dep, vap->va_size, 0, cred, ap->a_p); + if (error) + return error; + } + if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { + if (vp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); + if (cred->cr_uid != pmp->pm_uid && + (error = suser(cred, &ap->a_p->p_acflag)) && + ((vap->va_vaflags & VA_UTIMES_NULL) == 0 || + (error = VOP_ACCESS(ap->a_vp, VWRITE, cred, ap->a_p)))) + return (error); + if (vp->v_type != VDIR) { + if ((pmp->pm_flags & MSDOSFSMNT_NOWIN95) == 0 && + vap->va_atime.tv_sec != VNOVAL) + unix2dostime(&vap->va_atime, &dep->de_ADate, NULL, NULL); + if (vap->va_mtime.tv_sec != VNOVAL) + unix2dostime(&vap->va_mtime, &dep->de_MDate, &dep->de_MTime, NULL); + dep->de_Attributes |= ATTR_ARCHIVE; + dep->de_flag |= DE_MODIFIED; + } + } + /* + * DOS files only have the ability to have their writability + * attribute set, so we use the owner write bit to set the readonly + * attribute. + */ + if (vap->va_mode != (mode_t)VNOVAL) { + if (vp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); + if (cred->cr_uid != pmp->pm_uid && + (error = suser(cred, &ap->a_p->p_acflag))) + return (error); + if (vp->v_type != VDIR) { + /* We ignore the read and execute bits. */ + if (vap->va_mode & VWRITE) + dep->de_Attributes &= ~ATTR_READONLY; + else + dep->de_Attributes |= ATTR_READONLY; + dep->de_flag |= DE_MODIFIED; + } + } + return (deupdat(dep, 1)); +} + +static int +msdosfs_read(ap) + struct vop_read_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap; +{ + int error = 0; + int diff; + int blsize; + int isadir; + long n; + long on; + daddr_t lbn; + daddr_t rablock; + int rasize; + struct buf *bp; + struct vnode *vp = ap->a_vp; + struct denode *dep = VTODE(vp); + struct msdosfsmount *pmp = dep->de_pmp; + struct uio *uio = ap->a_uio; + + /* + * If they didn't ask for any data, then we are done. + */ + if (uio->uio_resid == 0) + return (0); + if (uio->uio_offset < 0) + return (EINVAL); + + isadir = dep->de_Attributes & ATTR_DIRECTORY; + do { + lbn = de_cluster(pmp, uio->uio_offset); + on = uio->uio_offset & pmp->pm_crbomask; + n = min((u_long) (pmp->pm_bpcluster - on), uio->uio_resid); + diff = dep->de_FileSize - uio->uio_offset; + if (diff <= 0) + return (0); + if (diff < n) + n = diff; + /* convert cluster # to block # if a directory */ + if (isadir) { + error = pcbmap(dep, lbn, &lbn, 0, &blsize); + if (error) + return (error); + } + /* + * If we are operating on a directory file then be sure to + * do i/o with the vnode for the filesystem instead of the + * vnode for the directory. + */ + if (isadir) { + error = bread(pmp->pm_devvp, lbn, blsize, NOCRED, &bp); + } else { + rablock = lbn + 1; + if (vp->v_lastr + 1 == lbn && + de_cn2off(pmp, rablock) < dep->de_FileSize) { + rasize = pmp->pm_bpcluster; + error = breadn(vp, lbn, pmp->pm_bpcluster, + &rablock, &rasize, 1, NOCRED, &bp); + } else + error = bread(vp, lbn, pmp->pm_bpcluster, + NOCRED, &bp); + vp->v_lastr = lbn; + } + n = min(n, pmp->pm_bpcluster - bp->b_resid); + if (error) { + brelse(bp); + return (error); + } + error = uiomove(bp->b_data + on, (int) n, uio); + brelse(bp); + } while (error == 0 && uio->uio_resid > 0 && n != 0); + if (!isadir && !(vp->v_mount->mnt_flag & MNT_NOATIME)) + dep->de_flag |= DE_ACCESS; + return (error); +} + +/* + * Write data to a file or directory. + */ +static int +msdosfs_write(ap) + struct vop_write_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap; +{ + int n; + int croffset; + int resid; + u_long osize; + int error = 0; + u_long count; + daddr_t bn, lastcn; + struct buf *bp; + int ioflag = ap->a_ioflag; + struct uio *uio = ap->a_uio; + struct proc *p = uio->uio_procp; + struct vnode *vp = ap->a_vp; + struct vnode *thisvp; + struct denode *dep = VTODE(vp); + struct msdosfsmount *pmp = dep->de_pmp; + struct ucred *cred = ap->a_cred; + +#ifdef MSDOSFS_DEBUG + printf("msdosfs_write(vp %p, uio %p, ioflag %x, cred %p\n", + vp, uio, ioflag, cred); + printf("msdosfs_write(): diroff %lu, dirclust %lu, startcluster %lu\n", + dep->de_diroffset, dep->de_dirclust, dep->de_StartCluster); +#endif + + switch (vp->v_type) { + case VREG: + if (ioflag & IO_APPEND) + uio->uio_offset = dep->de_FileSize; + thisvp = vp; + break; + case VDIR: + return EISDIR; + default: + panic("msdosfs_write(): bad file type"); + } + + if (uio->uio_offset < 0) + return (EINVAL); + + if (uio->uio_resid == 0) + return (0); + + /* + * If they've exceeded their filesize limit, tell them about it. + */ + if (p && + ((uio->uio_offset + uio->uio_resid) > + p->p_rlimit[RLIMIT_FSIZE].rlim_cur)) { + psignal(p, SIGXFSZ); + return (EFBIG); + } + + /* + * If the offset we are starting the write at is beyond the end of + * the file, then they've done a seek. Unix filesystems allow + * files with holes in them, DOS doesn't so we must fill the hole + * with zeroed blocks. + */ + if (uio->uio_offset > dep->de_FileSize) { + error = deextend(dep, uio->uio_offset, cred); + if (error) + return (error); + } + + /* + * Remember some values in case the write fails. + */ + resid = uio->uio_resid; + osize = dep->de_FileSize; + + /* + * If we write beyond the end of the file, extend it to its ultimate + * size ahead of the time to hopefully get a contiguous area. + */ + if (uio->uio_offset + resid > osize) { + count = de_clcount(pmp, uio->uio_offset + resid) - + de_clcount(pmp, osize); + error = extendfile(dep, count, NULL, NULL, 0); + if (error && (error != ENOSPC || (ioflag & IO_UNIT))) + goto errexit; + lastcn = dep->de_fc[FC_LASTFC].fc_frcn; + } else + lastcn = de_clcount(pmp, osize) - 1; + + do { + if (de_cluster(pmp, uio->uio_offset) > lastcn) { + error = ENOSPC; + break; + } + + croffset = uio->uio_offset & pmp->pm_crbomask; + n = min(uio->uio_resid, pmp->pm_bpcluster - croffset); + if (uio->uio_offset + n > dep->de_FileSize) { + dep->de_FileSize = uio->uio_offset + n; + /* The object size needs to be set before buffer is allocated */ + vnode_pager_setsize(vp, dep->de_FileSize); + } + + bn = de_cluster(pmp, uio->uio_offset); + if ((uio->uio_offset & pmp->pm_crbomask) == 0 + && (de_cluster(pmp, uio->uio_offset + uio->uio_resid) + > de_cluster(pmp, uio->uio_offset) + || uio->uio_offset + uio->uio_resid >= dep->de_FileSize)) { + /* + * If either the whole cluster gets written, + * or we write the cluster from its start beyond EOF, + * then no need to read data from disk. + */ + bp = getblk(thisvp, bn, pmp->pm_bpcluster, 0, 0); + clrbuf(bp); + /* + * Do the bmap now, since pcbmap needs buffers + * for the fat table. (see msdosfs_strategy) + */ + if (bp->b_blkno == bp->b_lblkno) { + error = pcbmap(dep, bp->b_lblkno, &bp->b_blkno, + 0, 0); + if (error) + bp->b_blkno = -1; + } + if (bp->b_blkno == -1) { + brelse(bp); + if (!error) + error = EIO; /* XXX */ + break; + } + } else { + /* + * The block we need to write into exists, so read it in. + */ + error = bread(thisvp, bn, pmp->pm_bpcluster, cred, &bp); + if (error) { + brelse(bp); + break; + } + } + + /* + * Should these vnode_pager_* functions be done on dir + * files? + */ + + /* + * Copy the data from user space into the buf header. + */ + error = uiomove(bp->b_data + croffset, n, uio); + + /* + * If they want this synchronous then write it and wait for + * it. Otherwise, if on a cluster boundary write it + * asynchronously so we can move on to the next block + * without delay. Otherwise do a delayed write because we + * may want to write somemore into the block later. + */ + if (ioflag & IO_SYNC) + (void) bwrite(bp); + else if (n + croffset == pmp->pm_bpcluster) + bawrite(bp); + else + bdwrite(bp); + dep->de_flag |= DE_UPDATE; + } while (error == 0 && uio->uio_resid > 0); + + /* + * If the write failed and they want us to, truncate the file back + * to the size it was before the write was attempted. + */ +errexit: + if (error) { + if (ioflag & IO_UNIT) { + detrunc(dep, osize, ioflag & IO_SYNC, NOCRED, NULL); + uio->uio_offset -= resid - uio->uio_resid; + uio->uio_resid = resid; + } else { + detrunc(dep, dep->de_FileSize, ioflag & IO_SYNC, NOCRED, NULL); + if (uio->uio_resid != resid) + error = 0; + } + } else if (ioflag & IO_SYNC) + error = deupdat(dep, 1); + return (error); +} + +/* + * Flush the blocks of a file to disk. + * + * This function is worthless for vnodes that represent directories. Maybe we + * could just do a sync if they try an fsync on a directory file. + */ +static int +msdosfs_fsync(ap) + struct vop_fsync_args /* { + struct vnode *a_vp; + struct ucred *a_cred; + int a_waitfor; + struct proc *a_p; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + int s; + struct buf *bp, *nbp; + + /* + * Flush all dirty buffers associated with a vnode. + */ +loop: + s = splbio(); + for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { + nbp = TAILQ_NEXT(bp, b_vnbufs); + if ((bp->b_flags & B_BUSY)) + continue; + if ((bp->b_flags & B_DELWRI) == 0) + panic("msdosfs_fsync: not dirty"); + bremfree(bp); + bp->b_flags |= B_BUSY; + splx(s); + (void) bwrite(bp); + goto loop; + } + while (vp->v_numoutput) { + vp->v_flag |= VBWAIT; + (void) tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "msdosfsn", 0); + } +#ifdef DIAGNOSTIC + if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) { + vprint("msdosfs_fsync: dirty", vp); + goto loop; + } +#endif + splx(s); + return (deupdat(VTODE(vp), ap->a_waitfor == MNT_WAIT)); +} + +static int +msdosfs_remove(ap) + struct vop_remove_args /* { + struct vnode *a_dvp; + struct vnode *a_vp; + struct componentname *a_cnp; + } */ *ap; +{ + struct denode *dep = VTODE(ap->a_vp); + struct denode *ddep = VTODE(ap->a_dvp); + int error; + + if (ap->a_vp->v_type == VDIR) + error = EPERM; + else + error = removede(ddep, dep); +#ifdef MSDOSFS_DEBUG + printf("msdosfs_remove(), dep %p, v_usecount %d\n", dep, ap->a_vp->v_usecount); +#endif + return (error); +} + +/* + * DOS filesystems don't know what links are. But since we already called + * msdosfs_lookup() with create and lockparent, the parent is locked so we + * have to free it before we return the error. + */ +static int +msdosfs_link(ap) + struct vop_link_args /* { + struct vnode *a_tdvp; + struct vnode *a_vp; + struct componentname *a_cnp; + } */ *ap; +{ + VOP_ABORTOP(ap->a_tdvp, ap->a_cnp); + return (EOPNOTSUPP); +} + +/* + * Renames on files require moving the denode to a new hash queue since the + * denode's location is used to compute which hash queue to put the file + * in. Unless it is a rename in place. For example "mv a b". + * + * What follows is the basic algorithm: + * + * if (file move) { + * if (dest file exists) { + * remove dest file + * } + * if (dest and src in same directory) { + * rewrite name in existing directory slot + * } else { + * write new entry in dest directory + * update offset and dirclust in denode + * move denode to new hash chain + * clear old directory entry + * } + * } else { + * directory move + * if (dest directory exists) { + * if (dest is not empty) { + * return ENOTEMPTY + * } + * remove dest directory + * } + * if (dest and src in same directory) { + * rewrite name in existing entry + * } else { + * be sure dest is not a child of src directory + * write entry in dest directory + * update "." and ".." in moved directory + * clear old directory entry for moved directory + * } + * } + * + * On entry: + * source's parent directory is unlocked + * source file or directory is unlocked + * destination's parent directory is locked + * destination file or directory is locked if it exists + * + * On exit: + * all denodes should be released + * + * Notes: + * I'm not sure how the memory containing the pathnames pointed at by the + * componentname structures is freed, there may be some memory bleeding + * for each rename done. + */ +static int +msdosfs_rename(ap) + struct vop_rename_args /* { + struct vnode *a_fdvp; + struct vnode *a_fvp; + struct componentname *a_fcnp; + struct vnode *a_tdvp; + struct vnode *a_tvp; + struct componentname *a_tcnp; + } */ *ap; +{ + struct vnode *tdvp = ap->a_tdvp; + struct vnode *fvp = ap->a_fvp; + struct vnode *fdvp = ap->a_fdvp; + struct vnode *tvp = ap->a_tvp; + struct componentname *tcnp = ap->a_tcnp; + struct componentname *fcnp = ap->a_fcnp; + struct proc *p = fcnp->cn_proc; + struct denode *ip, *xp, *dp, *zp; + u_char toname[11], oldname[11]; + u_long from_diroffset, to_diroffset; + u_char to_count; + int doingdirectory = 0, newparent = 0; + int error; + u_long cn; + daddr_t bn; + struct denode *fddep; /* from file's parent directory */ + struct denode *fdep; /* from file or directory */ + struct denode *tddep; /* to file's parent directory */ + struct denode *tdep; /* to file or directory */ + struct msdosfsmount *pmp; + struct direntry *dotdotp; + struct buf *bp; + + fddep = VTODE(ap->a_fdvp); + fdep = VTODE(ap->a_fvp); + tddep = VTODE(ap->a_tdvp); + tdep = tvp ? VTODE(tvp) : NULL; + pmp = fddep->de_pmp; + + pmp = VFSTOMSDOSFS(fdvp->v_mount); + +#ifdef DIAGNOSTIC + if ((tcnp->cn_flags & HASBUF) == 0 || + (fcnp->cn_flags & HASBUF) == 0) + panic("msdosfs_rename: no name"); +#endif + /* + * Check for cross-device rename. + */ + if ((fvp->v_mount != tdvp->v_mount) || + (tvp && (fvp->v_mount != tvp->v_mount))) { + error = EXDEV; +abortit: + VOP_ABORTOP(tdvp, tcnp); + if (tdvp == tvp) + vrele(tdvp); + else + vput(tdvp); + if (tvp) + vput(tvp); + VOP_ABORTOP(fdvp, fcnp); + vrele(fdvp); + vrele(fvp); + return (error); + } + + /* + * If source and dest are the same, do nothing. + */ + if (tvp == fvp) { + error = 0; + goto abortit; + } + + error = vn_lock(fvp, LK_EXCLUSIVE, p); + if (error) + goto abortit; + dp = VTODE(fdvp); + ip = VTODE(fvp); + + /* + * Be sure we are not renaming ".", "..", or an alias of ".". This + * leads to a crippled directory tree. It's pretty tough to do a + * "ls" or "pwd" with the "." directory entry missing, and "cd .." + * doesn't work if the ".." entry is missing. + */ + if (ip->de_Attributes & ATTR_DIRECTORY) { + /* + * Avoid ".", "..", and aliases of "." for obvious reasons. + */ + if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || + dp == ip || + (fcnp->cn_flags & ISDOTDOT) || + (tcnp->cn_flags & ISDOTDOT) || + (ip->de_flag & DE_RENAME)) { + VOP_UNLOCK(fvp, 0, p); + error = EINVAL; + goto abortit; + } + ip->de_flag |= DE_RENAME; + doingdirectory++; + } + + /* + * When the target exists, both the directory + * and target vnodes are returned locked. + */ + dp = VTODE(tdvp); + xp = tvp ? VTODE(tvp) : NULL; + /* + * Remember direntry place to use for destination + */ + to_diroffset = dp->de_fndoffset; + to_count = dp->de_fndcnt; + + /* + * If ".." must be changed (ie the directory gets a new + * parent) then the source directory must not be in the + * directory heirarchy above the target, as this would + * orphan everything below the source directory. Also + * the user must have write permission in the source so + * as to be able to change "..". We must repeat the call + * to namei, as the parent directory is unlocked by the + * call to doscheckpath(). + */ + error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_proc); + VOP_UNLOCK(fvp, 0, p); + if (VTODE(fdvp)->de_StartCluster != VTODE(tdvp)->de_StartCluster) + newparent = 1; + vrele(fdvp); + if (doingdirectory && newparent) { + if (error) /* write access check above */ + goto bad; + if (xp != NULL) + vput(tvp); + /* + * doscheckpath() vput()'s dp, + * so we have to do a relookup afterwards + */ + error = doscheckpath(ip, dp); + if (error) + goto out; + if ((tcnp->cn_flags & SAVESTART) == 0) + panic("msdosfs_rename: lost to startdir"); + error = relookup(tdvp, &tvp, tcnp); + if (error) + goto out; + dp = VTODE(tdvp); + xp = tvp ? VTODE(tvp) : NULL; + } + + if (xp != NULL) { + /* + * Target must be empty if a directory and have no links + * to it. Also, ensure source and target are compatible + * (both directories, or both not directories). + */ + if (xp->de_Attributes & ATTR_DIRECTORY) { + if (!dosdirempty(xp)) { + error = ENOTEMPTY; + goto bad; + } + if (!doingdirectory) { + error = ENOTDIR; + goto bad; + } + cache_purge(tdvp); + } else if (doingdirectory) { + error = EISDIR; + goto bad; + } + error = removede(dp, xp); + if (error) + goto bad; + vput(tvp); + xp = NULL; + } + + /* + * Convert the filename in tcnp into a dos filename. We copy this + * into the denode and directory entry for the destination + * file/directory. + */ + error = uniqdosname(VTODE(tdvp), tcnp, toname); + if (error) + goto abortit; + + /* + * Since from wasn't locked at various places above, + * have to do a relookup here. + */ + fcnp->cn_flags &= ~MODMASK; + fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; + if ((fcnp->cn_flags & SAVESTART) == 0) + panic("msdosfs_rename: lost from startdir"); + if (!newparent) + VOP_UNLOCK(tdvp, 0, p); + (void) relookup(fdvp, &fvp, fcnp); + if (fvp == NULL) { + /* + * From name has disappeared. + */ + if (doingdirectory) + panic("rename: lost dir entry"); + vrele(ap->a_fvp); + if (newparent) + VOP_UNLOCK(tdvp, 0, p); + vrele(tdvp); + return 0; + } + xp = VTODE(fvp); + zp = VTODE(fdvp); + from_diroffset = zp->de_fndoffset; + + /* + * Ensure that the directory entry still exists and has not + * changed till now. If the source is a file the entry may + * have been unlinked or renamed. In either case there is + * no further work to be done. If the source is a directory + * then it cannot have been rmdir'ed or renamed; this is + * prohibited by the DE_RENAME flag. + */ + if (xp != ip) { + if (doingdirectory) + panic("rename: lost dir entry"); + vrele(ap->a_fvp); + VOP_UNLOCK(fvp, 0, p); + if (newparent) + VOP_UNLOCK(fdvp, 0, p); + xp = NULL; + } else { + vrele(fvp); + xp = NULL; + + /* + * First write a new entry in the destination + * directory and mark the entry in the source directory + * as deleted. Then move the denode to the correct hash + * chain for its new location in the filesystem. And, if + * we moved a directory, then update its .. entry to point + * to the new parent directory. + */ + bcopy(ip->de_Name, oldname, 11); + bcopy(toname, ip->de_Name, 11); /* update denode */ + dp->de_fndoffset = to_diroffset; + dp->de_fndcnt = to_count; + error = createde(ip, dp, (struct denode **)0, tcnp); + if (error) { + bcopy(oldname, ip->de_Name, 11); + if (newparent) + VOP_UNLOCK(fdvp, 0, p); + VOP_UNLOCK(fvp, 0, p); + goto bad; + } + ip->de_refcnt++; + zp->de_fndoffset = from_diroffset; + error = removede(zp, ip); + if (error) { + /* XXX should really panic here, fs is corrupt */ + if (newparent) + VOP_UNLOCK(fdvp, 0, p); + VOP_UNLOCK(fvp, 0, p); + goto bad; + } + if (!doingdirectory) { + error = pcbmap(dp, de_cluster(pmp, to_diroffset), 0, + &ip->de_dirclust, 0); + if (error) { + /* XXX should really panic here, fs is corrupt */ + if (newparent) + VOP_UNLOCK(fdvp, 0, p); + VOP_UNLOCK(fvp, 0, p); + goto bad; + } + if (ip->de_dirclust == MSDOSFSROOT) + ip->de_diroffset = to_diroffset; + else + ip->de_diroffset = to_diroffset & pmp->pm_crbomask; + } + reinsert(ip); + if (newparent) + VOP_UNLOCK(fdvp, 0, p); + } + + /* + * If we moved a directory to a new parent directory, then we must + * fixup the ".." entry in the moved directory. + */ + if (doingdirectory && newparent) { + cn = ip->de_StartCluster; + if (cn == MSDOSFSROOT) { + /* this should never happen */ + panic("msdosfs_rename(): updating .. in root directory?"); + } else + bn = cntobn(pmp, cn); + error = bread(pmp->pm_devvp, bn, pmp->pm_bpcluster, + NOCRED, &bp); + if (error) { + /* XXX should really panic here, fs is corrupt */ + brelse(bp); + VOP_UNLOCK(fvp, 0, p); + goto bad; + } + dotdotp = (struct direntry *)bp->b_data + 1; + putushort(dotdotp->deStartCluster, dp->de_StartCluster); + if (FAT32(pmp)) + putushort(dotdotp->deHighClust, dp->de_StartCluster >> 16); + error = bwrite(bp); + if (error) { + /* XXX should really panic here, fs is corrupt */ + VOP_UNLOCK(fvp, 0, p); + goto bad; + } + } + + VOP_UNLOCK(fvp, 0, p); +bad: + if (xp) + vput(tvp); + vput(tdvp); +out: + ip->de_flag &= ~DE_RENAME; + vrele(fdvp); + vrele(fvp); + return (error); + +} + +static struct { + struct direntry dot; + struct direntry dotdot; +} dosdirtemplate = { + { ". ", " ", /* the . entry */ + ATTR_DIRECTORY, /* file attribute */ + 0, /* reserved */ + 0, { 0, 0 }, { 0, 0 }, /* create time & date */ + { 0, 0 }, /* access date */ + { 0, 0 }, /* high bits of start cluster */ + { 210, 4 }, { 210, 4 }, /* modify time & date */ + { 0, 0 }, /* startcluster */ + { 0, 0, 0, 0 } /* filesize */ + }, + { ".. ", " ", /* the .. entry */ + ATTR_DIRECTORY, /* file attribute */ + 0, /* reserved */ + 0, { 0, 0 }, { 0, 0 }, /* create time & date */ + { 0, 0 }, /* access date */ + { 0, 0 }, /* high bits of start cluster */ + { 210, 4 }, { 210, 4 }, /* modify time & date */ + { 0, 0 }, /* startcluster */ + { 0, 0, 0, 0 } /* filesize */ + } +}; + +static int +msdosfs_mkdir(ap) + struct vop_mkdir_args /* { + struct vnode *a_dvp; + struvt vnode **a_vpp; + struvt componentname *a_cnp; + struct vattr *a_vap; + } */ *ap; +{ + struct componentname *cnp = ap->a_cnp; + struct denode *dep; + struct denode *pdep = VTODE(ap->a_dvp); + struct direntry *denp; + struct msdosfsmount *pmp = pdep->de_pmp; + struct buf *bp; + u_long newcluster, pcl; + int bn; + int error; + struct denode ndirent; + struct timespec ts; + + /* + * If this is the root directory and there is no space left we + * can't do anything. This is because the root directory can not + * change size. + */ + if (pdep->de_StartCluster == MSDOSFSROOT + && pdep->de_fndoffset >= pdep->de_FileSize) { + error = ENOSPC; + goto bad2; + } + + /* + * Allocate a cluster to hold the about to be created directory. + */ + error = clusteralloc(pmp, 0, 1, CLUST_EOFE, &newcluster, NULL); + if (error) + goto bad2; + + bzero(&ndirent, sizeof(ndirent)); + ndirent.de_pmp = pmp; + ndirent.de_flag = DE_ACCESS | DE_CREATE | DE_UPDATE; + getnanotime(&ts); + DETIMES(&ndirent, &ts, &ts, &ts); + + /* + * Now fill the cluster with the "." and ".." entries. And write + * the cluster to disk. This way it is there for the parent + * directory to be pointing at if there were a crash. + */ + bn = cntobn(pmp, newcluster); + /* always succeeds */ + bp = getblk(pmp->pm_devvp, bn, pmp->pm_bpcluster, 0, 0); + bzero(bp->b_data, pmp->pm_bpcluster); + bcopy(&dosdirtemplate, bp->b_data, sizeof dosdirtemplate); + denp = (struct direntry *)bp->b_data; + putushort(denp[0].deStartCluster, newcluster); + putushort(denp[0].deCDate, ndirent.de_CDate); + putushort(denp[0].deCTime, ndirent.de_CTime); + denp[0].deCHundredth = ndirent.de_CHun; + putushort(denp[0].deADate, ndirent.de_ADate); + putushort(denp[0].deMDate, ndirent.de_MDate); + putushort(denp[0].deMTime, ndirent.de_MTime); + pcl = pdep->de_StartCluster; + if (FAT32(pmp) && pcl == pmp->pm_rootdirblk) + pcl = 0; + putushort(denp[1].deStartCluster, pcl); + putushort(denp[1].deCDate, ndirent.de_CDate); + putushort(denp[1].deCTime, ndirent.de_CTime); + denp[1].deCHundredth = ndirent.de_CHun; + putushort(denp[1].deADate, ndirent.de_ADate); + putushort(denp[1].deMDate, ndirent.de_MDate); + putushort(denp[1].deMTime, ndirent.de_MTime); + if (FAT32(pmp)) { + putushort(denp[0].deHighClust, newcluster >> 16); + putushort(denp[1].deHighClust, pdep->de_StartCluster >> 16); + } + + error = bwrite(bp); + if (error) + goto bad; + + /* + * Now build up a directory entry pointing to the newly allocated + * cluster. This will be written to an empty slot in the parent + * directory. + */ +#ifdef DIAGNOSTIC + if ((cnp->cn_flags & HASBUF) == 0) + panic("msdosfs_mkdir: no name"); +#endif + error = uniqdosname(pdep, cnp, ndirent.de_Name); + if (error) + goto bad; + + ndirent.de_Attributes = ATTR_DIRECTORY; + ndirent.de_LowerCase = 0; + ndirent.de_StartCluster = newcluster; + ndirent.de_FileSize = 0; + ndirent.de_dev = pdep->de_dev; + ndirent.de_devvp = pdep->de_devvp; + error = createde(&ndirent, pdep, &dep, cnp); + if (error) + goto bad; + if ((cnp->cn_flags & SAVESTART) == 0) + zfree(namei_zone, cnp->cn_pnbuf); + *ap->a_vpp = DETOV(dep); + return (0); + +bad: + clusterfree(pmp, newcluster, NULL); +bad2: + zfree(namei_zone, cnp->cn_pnbuf); + return (error); +} + +static int +msdosfs_rmdir(ap) + struct vop_rmdir_args /* { + struct vnode *a_dvp; + struct vnode *a_vp; + struct componentname *a_cnp; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + register struct vnode *dvp = ap->a_dvp; + register struct componentname *cnp = ap->a_cnp; + register struct denode *ip, *dp; + struct proc *p = cnp->cn_proc; + int error; + + ip = VTODE(vp); + dp = VTODE(dvp); + + /* + * Verify the directory is empty (and valid). + * (Rmdir ".." won't be valid since + * ".." will contain a reference to + * the current directory and thus be + * non-empty.) + */ + error = 0; + if (!dosdirempty(ip) || ip->de_flag & DE_RENAME) { + error = ENOTEMPTY; + goto out; + } + /* + * Delete the entry from the directory. For dos filesystems this + * gets rid of the directory entry on disk, the in memory copy + * still exists but the de_refcnt is <= 0. This prevents it from + * being found by deget(). When the vput() on dep is done we give + * up access and eventually msdosfs_reclaim() will be called which + * will remove it from the denode cache. + */ + error = removede(dp, ip); + if (error) + goto out; + /* + * This is where we decrement the link count in the parent + * directory. Since dos filesystems don't do this we just purge + * the name cache. + */ + cache_purge(dvp); + VOP_UNLOCK(dvp, 0, p); + /* + * Truncate the directory that is being deleted. + */ + error = detrunc(ip, (u_long)0, IO_SYNC, cnp->cn_cred, p); + cache_purge(vp); + + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); +out: + return (error); +} + +/* + * DOS filesystems don't know what symlinks are. + */ +static int +msdosfs_symlink(ap) + struct vop_symlink_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + char *a_target; + } */ *ap; +{ + zfree(namei_zone, ap->a_cnp->cn_pnbuf); + /* VOP_ABORTOP(ap->a_dvp, ap->a_cnp); ??? */ + return (EOPNOTSUPP); +} + +static int +msdosfs_readdir(ap) + struct vop_readdir_args /* { + struct vnode *a_vp; + struct uio *a_uio; + struct ucred *a_cred; + int *a_eofflag; + int *a_ncookies; + u_long **a_cookies; + } */ *ap; +{ + int error = 0; + int diff; + long n; + int blsize; + long on; + u_long cn; + u_long fileno; + u_long dirsperblk; + long bias = 0; + daddr_t bn, lbn; + struct buf *bp; + struct denode *dep = VTODE(ap->a_vp); + struct msdosfsmount *pmp = dep->de_pmp; + struct direntry *dentp; + struct dirent dirbuf; + struct uio *uio = ap->a_uio; + u_long *cookies = NULL; + int ncookies = 0; + off_t offset, off; + int chksum = -1; + +#ifdef MSDOSFS_DEBUG + printf("msdosfs_readdir(): vp %p, uio %p, cred %p, eofflagp %p\n", + ap->a_vp, uio, ap->a_cred, ap->a_eofflag); +#endif + + /* + * msdosfs_readdir() won't operate properly on regular files since + * it does i/o only with the the filesystem vnode, and hence can + * retrieve the wrong block from the buffer cache for a plain file. + * So, fail attempts to readdir() on a plain file. + */ + if ((dep->de_Attributes & ATTR_DIRECTORY) == 0) + return (ENOTDIR); + + /* + * To be safe, initialize dirbuf + */ + bzero(dirbuf.d_name, sizeof(dirbuf.d_name)); + + /* + * If the user buffer is smaller than the size of one dos directory + * entry or the file offset is not a multiple of the size of a + * directory entry, then we fail the read. + */ + off = offset = uio->uio_offset; + if (uio->uio_resid < sizeof(struct direntry) || + (offset & (sizeof(struct direntry) - 1))) + return (EINVAL); + + if (ap->a_ncookies) { + ncookies = uio->uio_resid / 16; + MALLOC(cookies, u_long *, ncookies * sizeof(u_long), M_TEMP, + M_WAITOK); + *ap->a_cookies = cookies; + *ap->a_ncookies = ncookies; + } + + dirsperblk = pmp->pm_BytesPerSec / sizeof(struct direntry); + + /* + * If they are reading from the root directory then, we simulate + * the . and .. entries since these don't exist in the root + * directory. We also set the offset bias to make up for having to + * simulate these entries. By this I mean that at file offset 64 we + * read the first entry in the root directory that lives on disk. + */ + if (dep->de_StartCluster == MSDOSFSROOT + || (FAT32(pmp) && dep->de_StartCluster == pmp->pm_rootdirblk)) { +#if 0 + printf("msdosfs_readdir(): going after . or .. in root dir, offset %d\n", + offset); +#endif + bias = 2 * sizeof(struct direntry); + if (offset < bias) { + for (n = (int)offset / sizeof(struct direntry); + n < 2; n++) { + if (FAT32(pmp)) + dirbuf.d_fileno = cntobn(pmp, + pmp->pm_rootdirblk) + * dirsperblk; + else + dirbuf.d_fileno = 1; + dirbuf.d_type = DT_DIR; + switch (n) { + case 0: + dirbuf.d_namlen = 1; + strcpy(dirbuf.d_name, "."); + break; + case 1: + dirbuf.d_namlen = 2; + strcpy(dirbuf.d_name, ".."); + break; + } + dirbuf.d_reclen = GENERIC_DIRSIZ(&dirbuf); + if (uio->uio_resid < dirbuf.d_reclen) + goto out; + error = uiomove((caddr_t) &dirbuf, + dirbuf.d_reclen, uio); + if (error) + goto out; + offset += sizeof(struct direntry); + off = offset; + if (cookies) { + *cookies++ = offset; + if (--ncookies <= 0) + goto out; + } + } + } + } + + off = offset; + while (uio->uio_resid > 0) { + lbn = de_cluster(pmp, offset - bias); + on = (offset - bias) & pmp->pm_crbomask; + n = min(pmp->pm_bpcluster - on, uio->uio_resid); + diff = dep->de_FileSize - (offset - bias); + if (diff <= 0) + break; + n = min(n, diff); + error = pcbmap(dep, lbn, &bn, &cn, &blsize); + if (error) + break; + error = bread(pmp->pm_devvp, bn, blsize, NOCRED, &bp); + if (error) { + brelse(bp); + return (error); + } + n = min(n, blsize - bp->b_resid); + + /* + * Convert from dos directory entries to fs-independent + * directory entries. + */ + for (dentp = (struct direntry *)(bp->b_data + on); + (char *)dentp < bp->b_data + on + n; + dentp++, offset += sizeof(struct direntry)) { +#if 0 + printf("rd: dentp %08x prev %08x crnt %08x deName %02x attr %02x\n", + dentp, prev, crnt, dentp->deName[0], dentp->deAttributes); +#endif + /* + * If this is an unused entry, we can stop. + */ + if (dentp->deName[0] == SLOT_EMPTY) { + brelse(bp); + goto out; + } + /* + * Skip deleted entries. + */ + if (dentp->deName[0] == SLOT_DELETED) { + chksum = -1; + continue; + } + + /* + * Handle Win95 long directory entries + */ + if (dentp->deAttributes == ATTR_WIN95) { + if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME) + continue; + chksum = win2unixfn((struct winentry *)dentp, + &dirbuf, chksum, + pmp->pm_flags & MSDOSFSMNT_U2WTABLE, + pmp->pm_u2w); + continue; + } + + /* + * Skip volume labels + */ + if (dentp->deAttributes & ATTR_VOLUME) { + chksum = -1; + continue; + } + /* + * This computation of d_fileno must match + * the computation of va_fileid in + * msdosfs_getattr. + */ + if (dentp->deAttributes & ATTR_DIRECTORY) { + fileno = getushort(dentp->deStartCluster); + if (FAT32(pmp)) + fileno |= getushort(dentp->deHighClust) << 16; + /* if this is the root directory */ + if (fileno == MSDOSFSROOT) + if (FAT32(pmp)) + fileno = cntobn(pmp, + pmp->pm_rootdirblk) + * dirsperblk; + else + fileno = 1; + else + fileno = cntobn(pmp, fileno) * dirsperblk; + dirbuf.d_fileno = fileno; + dirbuf.d_type = DT_DIR; + } else { + dirbuf.d_fileno = offset / sizeof(struct direntry); + dirbuf.d_type = DT_REG; + } + if (chksum != winChksum(dentp->deName)) + dirbuf.d_namlen = dos2unixfn(dentp->deName, + (u_char *)dirbuf.d_name, + dentp->deLowerCase | + ((pmp->pm_flags & MSDOSFSMNT_SHORTNAME) ? + (LCASE_BASE | LCASE_EXT) : 0), + pmp->pm_flags & MSDOSFSMNT_U2WTABLE, + pmp->pm_d2u, + pmp->pm_flags & MSDOSFSMNT_ULTABLE, + pmp->pm_ul); + else + dirbuf.d_name[dirbuf.d_namlen] = 0; + chksum = -1; + dirbuf.d_reclen = GENERIC_DIRSIZ(&dirbuf); + if (uio->uio_resid < dirbuf.d_reclen) { + brelse(bp); + goto out; + } + error = uiomove((caddr_t) &dirbuf, + dirbuf.d_reclen, uio); + if (error) { + brelse(bp); + goto out; + } + if (cookies) { + *cookies++ = offset + sizeof(struct direntry); + if (--ncookies <= 0) { + brelse(bp); + goto out; + } + } + off = offset + sizeof(struct direntry); + } + brelse(bp); + } +out: + /* Subtract unused cookies */ + if (ap->a_ncookies) + *ap->a_ncookies -= ncookies; + + uio->uio_offset = off; + + /* + * Set the eofflag (NFS uses it) + */ + if (ap->a_eofflag) + if (dep->de_FileSize - (offset - bias) <= 0) + *ap->a_eofflag = 1; + else + *ap->a_eofflag = 0; + + return (error); +} + +static int +msdosfs_abortop(ap) + struct vop_abortop_args /* { + struct vnode *a_dvp; + struct componentname *a_cnp; + } */ *ap; +{ + if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF) + zfree(namei_zone, ap->a_cnp->cn_pnbuf); + return (0); +} + +/* + * vp - address of vnode file the file + * bn - which cluster we are interested in mapping to a filesystem block number. + * vpp - returns the vnode for the block special file holding the filesystem + * containing the file of interest + * bnp - address of where to return the filesystem relative block number + */ +static int +msdosfs_bmap(ap) + struct vop_bmap_args /* { + struct vnode *a_vp; + daddr_t a_bn; + struct vnode **a_vpp; + daddr_t *a_bnp; + int *a_runp; + int *a_runb; + } */ *ap; +{ + struct denode *dep = VTODE(ap->a_vp); + + if (ap->a_vpp != NULL) + *ap->a_vpp = dep->de_devvp; + if (ap->a_bnp == NULL) + return (0); + if (ap->a_runp) { + /* + * Sequential clusters should be counted here. + */ + *ap->a_runp = 0; + } + if (ap->a_runb) { + *ap->a_runb = 0; + } + return (pcbmap(dep, ap->a_bn, ap->a_bnp, 0, 0)); +} + +static int +msdosfs_strategy(ap) + struct vop_strategy_args /* { + struct vnode *a_vp; + struct buf *a_bp; + } */ *ap; +{ + struct buf *bp = ap->a_bp; + struct denode *dep = VTODE(bp->b_vp); + struct vnode *vp; + int error = 0; + + if (bp->b_vp->v_type == VBLK || bp->b_vp->v_type == VCHR) + panic("msdosfs_strategy: spec"); + /* + * If we don't already know the filesystem relative block number + * then get it using pcbmap(). If pcbmap() returns the block + * number as -1 then we've got a hole in the file. DOS filesystems + * don't allow files with holes, so we shouldn't ever see this. + */ + if (bp->b_blkno == bp->b_lblkno) { + error = pcbmap(dep, bp->b_lblkno, &bp->b_blkno, 0, 0); + if (error) { + bp->b_error = error; + bp->b_flags |= B_ERROR; + biodone(bp); + return (error); + } + if ((long)bp->b_blkno == -1) + vfs_bio_clrbuf(bp); + } + if (bp->b_blkno == -1) { + biodone(bp); + return (0); + } + /* + * Read/write the block from/to the disk that contains the desired + * file block. + */ + vp = dep->de_devvp; + bp->b_dev = vp->v_rdev; + VOP_STRATEGY(vp, bp); + return (0); +} + +static int +msdosfs_print(ap) + struct vop_print_args /* { + struct vnode *vp; + } */ *ap; +{ + struct denode *dep = VTODE(ap->a_vp); + + printf( + "tag VT_MSDOSFS, startcluster %lu, dircluster %lu, diroffset %lu ", + dep->de_StartCluster, dep->de_dirclust, dep->de_diroffset); + printf(" dev %d, %d", major(dep->de_dev), minor(dep->de_dev)); + lockmgr_printinfo(&dep->de_lock); + printf("\n"); + return (0); +} + +static int +msdosfs_pathconf(ap) + struct vop_pathconf_args /* { + struct vnode *a_vp; + int a_name; + int *a_retval; + } */ *ap; +{ + struct msdosfsmount *pmp = VTODE(ap->a_vp)->de_pmp; + + switch (ap->a_name) { + case _PC_LINK_MAX: + *ap->a_retval = 1; + return (0); + case _PC_NAME_MAX: + *ap->a_retval = pmp->pm_flags & MSDOSFSMNT_LONGNAME ? WIN_MAXLEN : 12; + return (0); + case _PC_PATH_MAX: + *ap->a_retval = PATH_MAX; + return (0); + case _PC_CHOWN_RESTRICTED: + *ap->a_retval = 1; + return (0); + case _PC_NO_TRUNC: + *ap->a_retval = 0; + return (0); + default: + return (EINVAL); + } + /* NOTREACHED */ +} + +/* + * get page routine + * + * XXX By default, wimp out... note that a_offset is ignored (and always + * XXX has been). + */ +int +msdosfs_getpages(ap) + struct vop_getpages_args *ap; +{ + return vnode_pager_generic_getpages(ap->a_vp, ap->a_m, ap->a_count, + ap->a_reqpage); +} + +/* + * put page routine + * + * XXX By default, wimp out... note that a_offset is ignored (and always + * XXX has been). + */ +int +msdosfs_putpages(ap) + struct vop_putpages_args *ap; +{ + return vnode_pager_generic_putpages(ap->a_vp, ap->a_m, ap->a_count, + ap->a_sync, ap->a_rtvals); +} + +/* Global vfs data structures for msdosfs */ +vop_t **msdosfs_vnodeop_p; +static struct vnodeopv_entry_desc msdosfs_vnodeop_entries[] = { + { &vop_default_desc, (vop_t *) vop_defaultop }, + { &vop_abortop_desc, (vop_t *) msdosfs_abortop }, + { &vop_access_desc, (vop_t *) msdosfs_access }, + { &vop_bmap_desc, (vop_t *) msdosfs_bmap }, + { &vop_cachedlookup_desc, (vop_t *) msdosfs_lookup }, + { &vop_close_desc, (vop_t *) msdosfs_close }, + { &vop_create_desc, (vop_t *) msdosfs_create }, + { &vop_fsync_desc, (vop_t *) msdosfs_fsync }, + { &vop_getattr_desc, (vop_t *) msdosfs_getattr }, + { &vop_inactive_desc, (vop_t *) msdosfs_inactive }, + { &vop_islocked_desc, (vop_t *) vop_stdislocked }, + { &vop_link_desc, (vop_t *) msdosfs_link }, + { &vop_lock_desc, (vop_t *) vop_stdlock }, + { &vop_lookup_desc, (vop_t *) vfs_cache_lookup }, + { &vop_mkdir_desc, (vop_t *) msdosfs_mkdir }, + { &vop_mknod_desc, (vop_t *) msdosfs_mknod }, + { &vop_pathconf_desc, (vop_t *) msdosfs_pathconf }, + { &vop_print_desc, (vop_t *) msdosfs_print }, + { &vop_read_desc, (vop_t *) msdosfs_read }, + { &vop_readdir_desc, (vop_t *) msdosfs_readdir }, + { &vop_reclaim_desc, (vop_t *) msdosfs_reclaim }, + { &vop_remove_desc, (vop_t *) msdosfs_remove }, + { &vop_rename_desc, (vop_t *) msdosfs_rename }, + { &vop_rmdir_desc, (vop_t *) msdosfs_rmdir }, + { &vop_setattr_desc, (vop_t *) msdosfs_setattr }, + { &vop_strategy_desc, (vop_t *) msdosfs_strategy }, + { &vop_symlink_desc, (vop_t *) msdosfs_symlink }, + { &vop_unlock_desc, (vop_t *) vop_stdunlock }, + { &vop_write_desc, (vop_t *) msdosfs_write }, + { &vop_getpages_desc, (vop_t *) msdosfs_getpages }, + { &vop_putpages_desc, (vop_t *) msdosfs_putpages }, + { NULL, NULL } +}; +static struct vnodeopv_desc msdosfs_vnodeop_opv_desc = + { &msdosfs_vnodeop_p, msdosfs_vnodeop_entries }; + +VNODEOP_SET(msdosfs_vnodeop_opv_desc); diff --git a/sys/fs/msdosfs/msdosfsmount.h b/sys/fs/msdosfs/msdosfsmount.h new file mode 100644 index 0000000..31a2a67 --- /dev/null +++ b/sys/fs/msdosfs/msdosfsmount.h @@ -0,0 +1,239 @@ +/* $Id: msdosfsmount.h,v 1.15 1998/02/23 09:39:29 ache Exp $ */ +/* $NetBSD: msdosfsmount.h,v 1.17 1997/11/17 15:37:07 ws Exp $ */ + +/*- + * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank. + * Copyright (C) 1994, 1995, 1997 TooLs GmbH. + * All rights reserved. + * Original code by Paul Popelka (paulp@uts.amdahl.com) (see below). + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by TooLs GmbH. + * 4. The name of TooLs GmbH may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* + * Written by Paul Popelka (paulp@uts.amdahl.com) + * + * You can do anything you want with this software, just don't say you wrote + * it, and don't remove this notice. + * + * This software is provided "as is". + * + * The author supplies this software to be publicly redistributed on the + * understanding that the author is not responsible for the correct + * functioning of this software in any circumstances and is not liable for + * any damages caused by this software. + * + * October 1992 + */ + +#ifndef _MSDOSFS_MSDOSFSMOUNT_H_ +#define _MSDOSFS_MSDOSFSMOUNT_H_ + +#ifdef KERNEL + +#ifdef MALLOC_DECLARE +MALLOC_DECLARE(M_MSDOSFSMNT); +#endif + +/* + * Layout of the mount control block for a msdos file system. + */ +struct msdosfsmount { + struct mount *pm_mountp;/* vfs mount struct for this fs */ + dev_t pm_dev; /* block special device mounted */ + uid_t pm_uid; /* uid to set as owner of the files */ + gid_t pm_gid; /* gid to set as owner of the files */ + mode_t pm_mask; /* mask to and with file protection bits */ + struct vnode *pm_devvp; /* vnode for block device mntd */ + struct bpb50 pm_bpb; /* BIOS parameter blk for this fs */ + u_long pm_FATsecs; /* actual number of fat sectors */ + u_long pm_fatblk; /* block # of first FAT */ + u_long pm_rootdirblk; /* block # (cluster # for FAT32) of root directory number */ + u_long pm_rootdirsize; /* size in blocks (not clusters) */ + u_long pm_firstcluster; /* block number of first cluster */ + u_long pm_nmbrofclusters; /* # of clusters in filesystem */ + u_long pm_maxcluster; /* maximum cluster number */ + u_long pm_freeclustercount; /* number of free clusters */ + u_long pm_cnshift; /* shift file offset right this amount to get a cluster number */ + u_long pm_crbomask; /* and a file offset with this mask to get cluster rel offset */ + u_long pm_bnshift; /* shift file offset right this amount to get a block number */ + u_long pm_bpcluster; /* bytes per cluster */ + u_long pm_fmod; /* ~0 if fs is modified, this can rollover to 0 */ + u_long pm_fatblocksize; /* size of fat blocks in bytes */ + u_long pm_fatblocksec; /* size of fat blocks in sectors */ + u_long pm_fatsize; /* size of fat in bytes */ + u_long pm_fatmask; /* mask to use for fat numbers */ + u_long pm_fsinfo; /* fsinfo block number */ + u_long pm_nxtfree; /* next free cluster in fsinfo block */ + u_int pm_fatmult; /* these 2 values are used in fat */ + u_int pm_fatdiv; /* offset computation */ + u_int pm_curfat; /* current fat for FAT32 (0 otherwise) */ + u_int *pm_inusemap; /* ptr to bitmap of in-use clusters */ + u_int pm_flags; /* see below */ + struct netexport pm_export; /* export information */ + u_int16_t pm_u2w[128]; /* Local->Unicode table */ + u_int8_t pm_ul[128]; /* Local upper->lower table */ + u_int8_t pm_lu[128]; /* Local lower->upper table */ + u_int8_t pm_d2u[128]; /* DOS->local table */ + u_int8_t pm_u2d[128]; /* Local->DOS table */ +}; +/* Byte offset in FAT on filesystem pmp, cluster cn */ +#define FATOFS(pmp, cn) ((cn) * (pmp)->pm_fatmult / (pmp)->pm_fatdiv) + + +#define VFSTOMSDOSFS(mp) ((struct msdosfsmount *)mp->mnt_data) + +/* Number of bits in one pm_inusemap item: */ +#define N_INUSEBITS (8 * sizeof(u_int)) + +/* + * Shorthand for fields in the bpb contained in the msdosfsmount structure. + */ +#define pm_BytesPerSec pm_bpb.bpbBytesPerSec +#define pm_ResSectors pm_bpb.bpbResSectors +#define pm_FATs pm_bpb.bpbFATs +#define pm_RootDirEnts pm_bpb.bpbRootDirEnts +#define pm_Sectors pm_bpb.bpbSectors +#define pm_Media pm_bpb.bpbMedia +#define pm_SecPerTrack pm_bpb.bpbSecPerTrack +#define pm_Heads pm_bpb.bpbHeads +#define pm_HiddenSects pm_bpb.bpbHiddenSecs +#define pm_HugeSectors pm_bpb.bpbHugeSectors + +/* + * Convert pointer to buffer -> pointer to direntry + */ +#define bptoep(pmp, bp, dirofs) \ + ((struct direntry *)(((bp)->b_data) \ + + ((dirofs) & (pmp)->pm_crbomask))) + +/* + * Convert block number to cluster number + */ +#define de_bn2cn(pmp, bn) \ + ((bn) >> ((pmp)->pm_cnshift - (pmp)->pm_bnshift)) + +/* + * Convert cluster number to block number + */ +#define de_cn2bn(pmp, cn) \ + ((cn) << ((pmp)->pm_cnshift - (pmp)->pm_bnshift)) + +/* + * Convert file offset to cluster number + */ +#define de_cluster(pmp, off) \ + ((off) >> (pmp)->pm_cnshift) + +/* + * Clusters required to hold size bytes + */ +#define de_clcount(pmp, size) \ + (((size) + (pmp)->pm_bpcluster - 1) >> (pmp)->pm_cnshift) + +/* + * Convert file offset to block number + */ +#define de_blk(pmp, off) \ + (de_cn2bn(pmp, de_cluster((pmp), (off)))) + +/* + * Convert cluster number to file offset + */ +#define de_cn2off(pmp, cn) \ + ((cn) << (pmp)->pm_cnshift) + +/* + * Convert block number to file offset + */ +#define de_bn2off(pmp, bn) \ + ((bn) << (pmp)->pm_bnshift) +/* + * Map a cluster number into a filesystem relative block number. + */ +#define cntobn(pmp, cn) \ + (de_cn2bn((pmp), (cn)-CLUST_FIRST) + (pmp)->pm_firstcluster) + +/* + * Calculate block number for directory entry in root dir, offset dirofs + */ +#define roottobn(pmp, dirofs) \ + (de_blk((pmp), (dirofs)) + (pmp)->pm_rootdirblk) + +/* + * Calculate block number for directory entry at cluster dirclu, offset + * dirofs + */ +#define detobn(pmp, dirclu, dirofs) \ + ((dirclu) == MSDOSFSROOT \ + ? roottobn((pmp), (dirofs)) \ + : cntobn((pmp), (dirclu))) + +int msdosfs_init __P((struct vfsconf *vfsp)); +int msdosfs_mountroot __P((void)); + +#endif /* KERNEL */ + +/* + * Arguments to mount MSDOS filesystems. + */ +struct msdosfs_args { + char *fspec; /* blocks special holding the fs to mount */ + struct export_args export; /* network export information */ + uid_t uid; /* uid that owns msdosfs files */ + gid_t gid; /* gid that owns msdosfs files */ + mode_t mask; /* mask to be applied for msdosfs perms */ + int flags; /* see below */ + int magic; /* version number */ + u_int16_t u2w[128]; /* Local->Unicode table */ + u_int8_t ul[128]; /* Local upper->lower table */ + u_int8_t lu[128]; /* Local lower->upper table */ + u_int8_t d2u[128]; /* DOS->local table */ + u_int8_t u2d[128]; /* Local->DOS table */ +}; + +/* + * Msdosfs mount options: + */ +#define MSDOSFSMNT_SHORTNAME 1 /* Force old DOS short names only */ +#define MSDOSFSMNT_LONGNAME 2 /* Force Win'95 long names */ +#define MSDOSFSMNT_NOWIN95 4 /* Completely ignore Win95 entries */ +#ifndef __FreeBSD__ +#define MSDOSFSMNT_GEMDOSFS 8 /* This is a gemdos-flavour */ +#endif +#define MSDOSFSMNT_U2WTABLE 0x10 /* Local->Unicode and local<->DOS */ + /* tables loaded */ +#define MSDOSFSMNT_ULTABLE 0x20 /* Local upper<->lower table loaded */ +/* All flags above: */ +#define MSDOSFSMNT_MNTOPT \ + (MSDOSFSMNT_SHORTNAME|MSDOSFSMNT_LONGNAME|MSDOSFSMNT_NOWIN95 \ + /*|MSDOSFSMNT_GEMDOSFS*/|MSDOSFSMNT_U2WTABLE|MSDOSFSMNT_ULTABLE) +#define MSDOSFSMNT_RONLY 0x80000000 /* mounted read-only */ +#define MSDOSFSMNT_WAITONFAT 0x40000000 /* mounted synchronous */ +#define MSDOSFS_FATMIRROR 0x20000000 /* FAT is mirrored */ + +#define MSDOSFS_ARGSMAGIC 0xe4eff300 + +#endif /* !_MSDOSFS_MSDOSFSMOUNT_H_ */ diff --git a/sys/fs/nullfs/null.h b/sys/fs/nullfs/null.h new file mode 100644 index 0000000..7d46a11 --- /dev/null +++ b/sys/fs/nullfs/null.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software donated to Berkeley by + * Jan-Simon Pendry. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)null.h 8.3 (Berkeley) 8/20/94 + * + * $Id: null.h,v 1.7 1997/05/25 04:50:02 peter Exp $ + */ + +struct null_args { + char *target; /* Target of loopback */ +}; + +struct null_mount { + struct mount *nullm_vfs; + struct vnode *nullm_rootvp; /* Reference to root null_node */ +}; + +#ifdef KERNEL +/* + * A cache of vnode references + */ +struct null_node { + LIST_ENTRY(null_node) null_hash; /* Hash list */ + struct vnode *null_lowervp; /* VREFed once */ + struct vnode *null_vnode; /* Back pointer */ +}; + +extern int nullfs_init __P((struct vfsconf *vfsp)); +extern int null_node_create __P((struct mount *mp, struct vnode *target, struct vnode **vpp)); + +#define MOUNTTONULLMOUNT(mp) ((struct null_mount *)((mp)->mnt_data)) +#define VTONULL(vp) ((struct null_node *)(vp)->v_data) +#define NULLTOV(xp) ((xp)->null_vnode) +#ifdef NULLFS_DIAGNOSTIC +extern struct vnode *null_checkvp __P((struct vnode *vp, char *fil, int lno)); +#define NULLVPTOLOWERVP(vp) null_checkvp((vp), __FILE__, __LINE__) +#else +#define NULLVPTOLOWERVP(vp) (VTONULL(vp)->null_lowervp) +#endif + +extern int null_bypass __P((struct vop_generic_args *ap)); + +extern vop_t **null_vnodeop_p; +#endif /* KERNEL */ diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c new file mode 100644 index 0000000..603f418 --- /dev/null +++ b/sys/fs/nullfs/null_subr.c @@ -0,0 +1,303 @@ +/* + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software donated to Berkeley by + * Jan-Simon Pendry. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)null_subr.c 8.7 (Berkeley) 5/14/95 + * + * $Id: null_subr.c,v 1.18 1998/07/15 02:32:18 bde Exp $ + */ + +#include "opt_debug_nullfs.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/vnode.h> +#include <sys/mount.h> +#include <sys/malloc.h> +#include <miscfs/nullfs/null.h> + +#define LOG2_SIZEVNODE 7 /* log2(sizeof struct vnode) */ +#define NNULLNODECACHE 16 + +/* + * Null layer cache: + * Each cache entry holds a reference to the lower vnode + * along with a pointer to the alias vnode. When an + * entry is added the lower vnode is VREF'd. When the + * alias is removed the lower vnode is vrele'd. + */ + +#define NULL_NHASH(vp) \ + (&null_node_hashtbl[(((uintptr_t)vp)>>LOG2_SIZEVNODE) & null_node_hash]) +static LIST_HEAD(null_node_hashhead, null_node) *null_node_hashtbl; +static u_long null_node_hash; + +static int null_node_alloc __P((struct mount *mp, struct vnode *lowervp, + struct vnode **vpp)); +static struct vnode * + null_node_find __P((struct mount *mp, struct vnode *lowervp)); + +/* + * Initialise cache headers + */ +int +nullfs_init(vfsp) + struct vfsconf *vfsp; +{ + +#ifdef NULLFS_DIAGNOSTIC + printf("nullfs_init\n"); /* printed during system boot */ +#endif + null_node_hashtbl = hashinit(NNULLNODECACHE, M_CACHE, &null_node_hash); + return (0); +} + +/* + * Return a VREF'ed alias for lower vnode if already exists, else 0. + */ +static struct vnode * +null_node_find(mp, lowervp) + struct mount *mp; + struct vnode *lowervp; +{ + struct proc *p = curproc; /* XXX */ + struct null_node_hashhead *hd; + struct null_node *a; + struct vnode *vp; + + /* + * Find hash base, and then search the (two-way) linked + * list looking for a null_node structure which is referencing + * the lower vnode. If found, the increment the null_node + * reference count (but NOT the lower vnode's VREF counter). + */ + hd = NULL_NHASH(lowervp); +loop: + for (a = hd->lh_first; a != 0; a = a->null_hash.le_next) { + if (a->null_lowervp == lowervp && NULLTOV(a)->v_mount == mp) { + vp = NULLTOV(a); + /* + * We need vget for the VXLOCK + * stuff, but we don't want to lock + * the lower node. + */ + if (vget(vp, 0, p)) { + printf ("null_node_find: vget failed.\n"); + goto loop; + }; + return (vp); + } + } + + return NULLVP; +} + + +/* + * Make a new null_node node. + * Vp is the alias vnode, lofsvp is the lower vnode. + * Maintain a reference to (lowervp). + */ +static int +null_node_alloc(mp, lowervp, vpp) + struct mount *mp; + struct vnode *lowervp; + struct vnode **vpp; +{ + struct null_node_hashhead *hd; + struct null_node *xp; + struct vnode *othervp, *vp; + int error; + + /* + * Do the MALLOC before the getnewvnode since doing so afterward + * might cause a bogus v_data pointer to get dereferenced + * elsewhere if MALLOC should block. + */ + MALLOC(xp, struct null_node *, sizeof(struct null_node), M_TEMP, M_WAITOK); + + error = getnewvnode(VT_NULL, mp, null_vnodeop_p, vpp); + if (error) { + FREE(xp, M_TEMP); + return (error); + } + vp = *vpp; + + vp->v_type = lowervp->v_type; + xp->null_vnode = vp; + vp->v_data = xp; + xp->null_lowervp = lowervp; + /* + * Before we insert our new node onto the hash chains, + * check to see if someone else has beaten us to it. + * (We could have slept in MALLOC.) + */ + othervp = null_node_find(mp, lowervp); + if (othervp) { + FREE(xp, M_TEMP); + vp->v_type = VBAD; /* node is discarded */ + vp->v_usecount = 0; /* XXX */ + *vpp = othervp; + return 0; + }; + VREF(lowervp); /* Extra VREF will be vrele'd in null_node_create */ + hd = NULL_NHASH(lowervp); + LIST_INSERT_HEAD(hd, xp, null_hash); + return 0; +} + + +/* + * Try to find an existing null_node vnode refering + * to it, otherwise make a new null_node vnode which + * contains a reference to the lower vnode. + */ +int +null_node_create(mp, lowervp, newvpp) + struct mount *mp; + struct vnode *lowervp; + struct vnode **newvpp; +{ + struct vnode *aliasvp; + + aliasvp = null_node_find(mp, lowervp); + if (aliasvp) { + /* + * null_node_find has taken another reference + * to the alias vnode. + */ +#ifdef NULLFS_DIAGNOSTIC + vprint("null_node_create: exists", aliasvp); +#endif + /* VREF(aliasvp); --- done in null_node_find */ + } else { + int error; + + /* + * Get new vnode. + */ +#ifdef NULLFS_DIAGNOSTIC + printf("null_node_create: create new alias vnode\n"); +#endif + + /* + * Make new vnode reference the null_node. + */ + error = null_node_alloc(mp, lowervp, &aliasvp); + if (error) + return error; + + /* + * aliasvp is already VREF'd by getnewvnode() + */ + } + + vrele(lowervp); + +#ifdef DIAGNOSTIC + if (lowervp->v_usecount < 1) { + /* Should never happen... */ + vprint ("null_node_create: alias ", aliasvp); + vprint ("null_node_create: lower ", lowervp); + panic ("null_node_create: lower has 0 usecount."); + }; +#endif + +#ifdef NULLFS_DIAGNOSTIC + vprint("null_node_create: alias", aliasvp); + vprint("null_node_create: lower", lowervp); +#endif + + *newvpp = aliasvp; + return (0); +} + +#ifdef NULLFS_DIAGNOSTIC +#include "opt_ddb.h" + +#ifdef DDB +#define null_checkvp_barrier 1 +#else +#define null_checkvp_barrier 0 +#endif + +struct vnode * +null_checkvp(vp, fil, lno) + struct vnode *vp; + char *fil; + int lno; +{ + struct null_node *a = VTONULL(vp); +#ifdef notyet + /* + * Can't do this check because vop_reclaim runs + * with a funny vop vector. + */ + if (vp->v_op != null_vnodeop_p) { + printf ("null_checkvp: on non-null-node\n"); + while (null_checkvp_barrier) /*WAIT*/ ; + panic("null_checkvp"); + }; +#endif + if (a->null_lowervp == NULLVP) { + /* Should never happen */ + int i; u_long *p; + printf("vp = %p, ZERO ptr\n", (void *)vp); + for (p = (u_long *) a, i = 0; i < 8; i++) + printf(" %lx", p[i]); + printf("\n"); + /* wait for debugger */ + while (null_checkvp_barrier) /*WAIT*/ ; + panic("null_checkvp"); + } + if (a->null_lowervp->v_usecount < 1) { + int i; u_long *p; + printf("vp = %p, unref'ed lowervp\n", (void *)vp); + for (p = (u_long *) a, i = 0; i < 8; i++) + printf(" %lx", p[i]); + printf("\n"); + /* wait for debugger */ + while (null_checkvp_barrier) /*WAIT*/ ; + panic ("null with unref'ed lowervp"); + }; +#ifdef notyet + printf("null %x/%d -> %x/%d [%s, %d]\n", + NULLTOV(a), NULLTOV(a)->v_usecount, + a->null_lowervp, a->null_lowervp->v_usecount, + fil, lno); +#endif + return a->null_lowervp; +} +#endif diff --git a/sys/fs/nullfs/null_vfsops.c b/sys/fs/nullfs/null_vfsops.c new file mode 100644 index 0000000..4ead5bd --- /dev/null +++ b/sys/fs/nullfs/null_vfsops.c @@ -0,0 +1,425 @@ +/* + * Copyright (c) 1992, 1993, 1995 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software donated to Berkeley by + * Jan-Simon Pendry. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)null_vfsops.c 8.2 (Berkeley) 1/21/94 + * + * @(#)lofs_vfsops.c 1.2 (Berkeley) 6/18/92 + * $Id: null_vfsops.c,v 1.27 1998/07/30 17:40:45 bde Exp $ + */ + +/* + * Null Layer + * (See null_vnops.c for a description of what this does.) + */ + +#include "opt_debug_nullfs.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/proc.h> +#include <sys/malloc.h> +#include <sys/vnode.h> +#include <sys/mount.h> +#include <sys/namei.h> +#include <miscfs/nullfs/null.h> + +static MALLOC_DEFINE(M_NULLFSMNT, "NULLFS mount", "NULLFS mount structure"); + +static int nullfs_fhtovp __P((struct mount *mp, struct fid *fidp, + struct sockaddr *nam, struct vnode **vpp, + int *exflagsp, struct ucred **credanonp)); +static int nullfs_mount __P((struct mount *mp, char *path, caddr_t data, + struct nameidata *ndp, struct proc *p)); +static int nullfs_quotactl __P((struct mount *mp, int cmd, uid_t uid, + caddr_t arg, struct proc *p)); +static int nullfs_root __P((struct mount *mp, struct vnode **vpp)); +static int nullfs_start __P((struct mount *mp, int flags, struct proc *p)); +static int nullfs_statfs __P((struct mount *mp, struct statfs *sbp, + struct proc *p)); +static int nullfs_sync __P((struct mount *mp, int waitfor, + struct ucred *cred, struct proc *p)); +static int nullfs_unmount __P((struct mount *mp, int mntflags, + struct proc *p)); +static int nullfs_vget __P((struct mount *mp, ino_t ino, + struct vnode **vpp)); +static int nullfs_vptofh __P((struct vnode *vp, struct fid *fhp)); + +/* + * Mount null layer + */ +static int +nullfs_mount(mp, path, data, ndp, p) + struct mount *mp; + char *path; + caddr_t data; + struct nameidata *ndp; + struct proc *p; +{ + int error = 0; + struct null_args args; + struct vnode *lowerrootvp, *vp; + struct vnode *nullm_rootvp; + struct null_mount *xmp; + u_int size; + int isvnunlocked = 0; + +#ifdef NULLFS_DIAGNOSTIC + printf("nullfs_mount(mp = %p)\n", (void *)mp); +#endif + + /* + * Update is a no-op + */ + if (mp->mnt_flag & MNT_UPDATE) { + return (EOPNOTSUPP); + /* return VFS_MOUNT(MOUNTTONULLMOUNT(mp)->nullm_vfs, path, data, ndp, p);*/ + } + + /* + * Get argument + */ + error = copyin(data, (caddr_t)&args, sizeof(struct null_args)); + if (error) + return (error); + + /* + * Unlock lower node to avoid deadlock. + * (XXX) VOP_ISLOCKED is needed? + */ + if ((mp->mnt_vnodecovered->v_op == null_vnodeop_p) && + VOP_ISLOCKED(mp->mnt_vnodecovered)) { + VOP_UNLOCK(mp->mnt_vnodecovered, 0, p); + isvnunlocked = 1; + } + /* + * Find lower node + */ + NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT|LOCKLEAF, + UIO_USERSPACE, args.target, p); + error = namei(ndp); + /* + * Re-lock vnode. + */ + if (isvnunlocked && !VOP_ISLOCKED(mp->mnt_vnodecovered)) + vn_lock(mp->mnt_vnodecovered, LK_EXCLUSIVE | LK_RETRY, p); + + if (error) + return (error); + + /* + * Sanity check on lower vnode + */ + lowerrootvp = ndp->ni_vp; + + vrele(ndp->ni_dvp); + ndp->ni_dvp = NULLVP; + + /* + * Check multi null mount to avoid `lock against myself' panic. + */ + if (lowerrootvp == VTONULL(mp->mnt_vnodecovered)->null_lowervp) { +#ifdef DIAGNOSTIC + printf("nullfs_mount: multi null mount?\n"); +#endif + return (EDEADLK); + } + + xmp = (struct null_mount *) malloc(sizeof(struct null_mount), + M_NULLFSMNT, M_WAITOK); /* XXX */ + + /* + * Save reference to underlying FS + */ + xmp->nullm_vfs = lowerrootvp->v_mount; + + /* + * Save reference. Each mount also holds + * a reference on the root vnode. + */ + error = null_node_create(mp, lowerrootvp, &vp); + /* + * Unlock the node (either the lower or the alias) + */ + VOP_UNLOCK(vp, 0, p); + /* + * Make sure the node alias worked + */ + if (error) { + vrele(lowerrootvp); + free(xmp, M_NULLFSMNT); /* XXX */ + return (error); + } + + /* + * Keep a held reference to the root vnode. + * It is vrele'd in nullfs_unmount. + */ + nullm_rootvp = vp; + nullm_rootvp->v_flag |= VROOT; + xmp->nullm_rootvp = nullm_rootvp; + if (NULLVPTOLOWERVP(nullm_rootvp)->v_mount->mnt_flag & MNT_LOCAL) + mp->mnt_flag |= MNT_LOCAL; + mp->mnt_data = (qaddr_t) xmp; + vfs_getnewfsid(mp); + + (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size); + bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size); + (void) copyinstr(args.target, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, + &size); + bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); + (void)nullfs_statfs(mp, &mp->mnt_stat, p); +#ifdef NULLFS_DIAGNOSTIC + printf("nullfs_mount: lower %s, alias at %s\n", + mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname); +#endif + return (0); +} + +/* + * VFS start. Nothing needed here - the start routine + * on the underlying filesystem will have been called + * when that filesystem was mounted. + */ +static int +nullfs_start(mp, flags, p) + struct mount *mp; + int flags; + struct proc *p; +{ + return (0); + /* return VFS_START(MOUNTTONULLMOUNT(mp)->nullm_vfs, flags, p); */ +} + +/* + * Free reference to null layer + */ +static int +nullfs_unmount(mp, mntflags, p) + struct mount *mp; + int mntflags; + struct proc *p; +{ + struct vnode *nullm_rootvp = MOUNTTONULLMOUNT(mp)->nullm_rootvp; + int error; + int flags = 0; + +#ifdef NULLFS_DIAGNOSTIC + printf("nullfs_unmount(mp = %p)\n", (void *)mp); +#endif + + if (mntflags & MNT_FORCE) + flags |= FORCECLOSE; + + /* + * Clear out buffer cache. I don't think we + * ever get anything cached at this level at the + * moment, but who knows... + */ +#if 0 + mntflushbuf(mp, 0); + if (mntinvalbuf(mp, 1)) + return (EBUSY); +#endif + if (nullm_rootvp->v_usecount > 1) + return (EBUSY); + error = vflush(mp, nullm_rootvp, flags); + if (error) + return (error); + +#ifdef NULLFS_DIAGNOSTIC + vprint("alias root of lower", nullm_rootvp); +#endif + /* + * Release reference on underlying root vnode + */ + vrele(nullm_rootvp); + /* + * And blow it away for future re-use + */ + vgone(nullm_rootvp); + /* + * Finally, throw away the null_mount structure + */ + free(mp->mnt_data, M_NULLFSMNT); /* XXX */ + mp->mnt_data = 0; + return 0; +} + +static int +nullfs_root(mp, vpp) + struct mount *mp; + struct vnode **vpp; +{ + struct proc *p = curproc; /* XXX */ + struct vnode *vp; + +#ifdef NULLFS_DIAGNOSTIC + printf("nullfs_root(mp = %p, vp = %p->%p)\n", (void *)mp, + (void *)MOUNTTONULLMOUNT(mp)->nullm_rootvp, + (void *)NULLVPTOLOWERVP(MOUNTTONULLMOUNT(mp)->nullm_rootvp)); +#endif + + /* + * Return locked reference to root. + */ + vp = MOUNTTONULLMOUNT(mp)->nullm_rootvp; + VREF(vp); + if (VOP_ISLOCKED(vp)) { + /* + * XXX + * Should we check type of node? + */ +#ifdef DIAGNOSTIC + printf("nullfs_root: multi null mount?\n"); +#endif + vrele(vp); + return (EDEADLK); + } else + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + *vpp = vp; + return 0; +} + +static int +nullfs_quotactl(mp, cmd, uid, arg, p) + struct mount *mp; + int cmd; + uid_t uid; + caddr_t arg; + struct proc *p; +{ + return VFS_QUOTACTL(MOUNTTONULLMOUNT(mp)->nullm_vfs, cmd, uid, arg, p); +} + +static int +nullfs_statfs(mp, sbp, p) + struct mount *mp; + struct statfs *sbp; + struct proc *p; +{ + int error; + struct statfs mstat; + +#ifdef NULLFS_DIAGNOSTIC + printf("nullfs_statfs(mp = %p, vp = %p->%p)\n", (void *)mp, + (void *)MOUNTTONULLMOUNT(mp)->nullm_rootvp, + (void *)NULLVPTOLOWERVP(MOUNTTONULLMOUNT(mp)->nullm_rootvp)); +#endif + + bzero(&mstat, sizeof(mstat)); + + error = VFS_STATFS(MOUNTTONULLMOUNT(mp)->nullm_vfs, &mstat, p); + if (error) + return (error); + + /* now copy across the "interesting" information and fake the rest */ + sbp->f_type = mstat.f_type; + sbp->f_flags = mstat.f_flags; + sbp->f_bsize = mstat.f_bsize; + sbp->f_iosize = mstat.f_iosize; + sbp->f_blocks = mstat.f_blocks; + sbp->f_bfree = mstat.f_bfree; + sbp->f_bavail = mstat.f_bavail; + sbp->f_files = mstat.f_files; + sbp->f_ffree = mstat.f_ffree; + if (sbp != &mp->mnt_stat) { + bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid)); + bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); + bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); + } + return (0); +} + +static int +nullfs_sync(mp, waitfor, cred, p) + struct mount *mp; + int waitfor; + struct ucred *cred; + struct proc *p; +{ + /* + * XXX - Assumes no data cached at null layer. + */ + return (0); +} + +static int +nullfs_vget(mp, ino, vpp) + struct mount *mp; + ino_t ino; + struct vnode **vpp; +{ + + return VFS_VGET(MOUNTTONULLMOUNT(mp)->nullm_vfs, ino, vpp); +} + +static int +nullfs_fhtovp(mp, fidp, nam, vpp, exflagsp, credanonp) + struct mount *mp; + struct fid *fidp; + struct sockaddr *nam; + struct vnode **vpp; + int *exflagsp; + struct ucred**credanonp; +{ + + return VFS_FHTOVP(MOUNTTONULLMOUNT(mp)->nullm_vfs, fidp, nam, + vpp, exflagsp, credanonp); +} + +static int +nullfs_vptofh(vp, fhp) + struct vnode *vp; + struct fid *fhp; +{ + return VFS_VPTOFH(NULLVPTOLOWERVP(vp), fhp); +} + +static struct vfsops null_vfsops = { + nullfs_mount, + nullfs_start, + nullfs_unmount, + nullfs_root, + nullfs_quotactl, + nullfs_statfs, + nullfs_sync, + nullfs_vget, + nullfs_fhtovp, + nullfs_vptofh, + nullfs_init, +}; + +VFS_SET(null_vfsops, null, VFCF_LOOPBACK); diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c new file mode 100644 index 0000000..db5c341 --- /dev/null +++ b/sys/fs/nullfs/null_vnops.c @@ -0,0 +1,669 @@ +/* + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * John Heidemann of the UCLA Ficus project. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)null_vnops.c 8.6 (Berkeley) 5/27/95 + * + * Ancestors: + * @(#)lofs_vnops.c 1.2 (Berkeley) 6/18/92 + * $Id: null_vnops.c,v 1.31 1999/01/27 22:42:06 dillon Exp $ + * ...and... + * @(#)null_vnodeops.c 1.20 92/07/07 UCLA Ficus project + * + * $Id: null_vnops.c,v 1.31 1999/01/27 22:42:06 dillon Exp $ + */ + +/* + * Null Layer + * + * (See mount_null(8) for more information.) + * + * The null layer duplicates a portion of the file system + * name space under a new name. In this respect, it is + * similar to the loopback file system. It differs from + * the loopback fs in two respects: it is implemented using + * a stackable layers techniques, and its "null-node"s stack above + * all lower-layer vnodes, not just over directory vnodes. + * + * The null layer has two purposes. First, it serves as a demonstration + * of layering by proving a layer which does nothing. (It actually + * does everything the loopback file system does, which is slightly + * more than nothing.) Second, the null layer can serve as a prototype + * layer. Since it provides all necessary layer framework, + * new file system layers can be created very easily be starting + * with a null layer. + * + * The remainder of this man page examines the null layer as a basis + * for constructing new layers. + * + * + * INSTANTIATING NEW NULL LAYERS + * + * New null layers are created with mount_null(8). + * Mount_null(8) takes two arguments, the pathname + * of the lower vfs (target-pn) and the pathname where the null + * layer will appear in the namespace (alias-pn). After + * the null layer is put into place, the contents + * of target-pn subtree will be aliased under alias-pn. + * + * + * OPERATION OF A NULL LAYER + * + * The null layer is the minimum file system layer, + * simply bypassing all possible operations to the lower layer + * for processing there. The majority of its activity centers + * on the bypass routine, through which nearly all vnode operations + * pass. + * + * The bypass routine accepts arbitrary vnode operations for + * handling by the lower layer. It begins by examing vnode + * operation arguments and replacing any null-nodes by their + * lower-layer equivlants. It then invokes the operation + * on the lower layer. Finally, it replaces the null-nodes + * in the arguments and, if a vnode is return by the operation, + * stacks a null-node on top of the returned vnode. + * + * Although bypass handles most operations, vop_getattr, vop_lock, + * vop_unlock, vop_inactive, vop_reclaim, and vop_print are not + * bypassed. Vop_getattr must change the fsid being returned. + * Vop_lock and vop_unlock must handle any locking for the + * current vnode as well as pass the lock request down. + * Vop_inactive and vop_reclaim are not bypassed so that + * they can handle freeing null-layer specific data. Vop_print + * is not bypassed to avoid excessive debugging information. + * Also, certain vnode operations change the locking state within + * the operation (create, mknod, remove, link, rename, mkdir, rmdir, + * and symlink). Ideally these operations should not change the + * lock state, but should be changed to let the caller of the + * function unlock them. Otherwise all intermediate vnode layers + * (such as union, umapfs, etc) must catch these functions to do + * the necessary locking at their layer. + * + * + * INSTANTIATING VNODE STACKS + * + * Mounting associates the null layer with a lower layer, + * effect stacking two VFSes. Vnode stacks are instead + * created on demand as files are accessed. + * + * The initial mount creates a single vnode stack for the + * root of the new null layer. All other vnode stacks + * are created as a result of vnode operations on + * this or other null vnode stacks. + * + * New vnode stacks come into existance as a result of + * an operation which returns a vnode. + * The bypass routine stacks a null-node above the new + * vnode before returning it to the caller. + * + * For example, imagine mounting a null layer with + * "mount_null /usr/include /dev/layer/null". + * Changing directory to /dev/layer/null will assign + * the root null-node (which was created when the null layer was mounted). + * Now consider opening "sys". A vop_lookup would be + * done on the root null-node. This operation would bypass through + * to the lower layer which would return a vnode representing + * the UFS "sys". Null_bypass then builds a null-node + * aliasing the UFS "sys" and returns this to the caller. + * Later operations on the null-node "sys" will repeat this + * process when constructing other vnode stacks. + * + * + * CREATING OTHER FILE SYSTEM LAYERS + * + * One of the easiest ways to construct new file system layers is to make + * a copy of the null layer, rename all files and variables, and + * then begin modifing the copy. Sed can be used to easily rename + * all variables. + * + * The umap layer is an example of a layer descended from the + * null layer. + * + * + * INVOKING OPERATIONS ON LOWER LAYERS + * + * There are two techniques to invoke operations on a lower layer + * when the operation cannot be completely bypassed. Each method + * is appropriate in different situations. In both cases, + * it is the responsibility of the aliasing layer to make + * the operation arguments "correct" for the lower layer + * by mapping an vnode arguments to the lower layer. + * + * The first approach is to call the aliasing layer's bypass routine. + * This method is most suitable when you wish to invoke the operation + * currently being handled on the lower layer. It has the advantage + * that the bypass routine already must do argument mapping. + * An example of this is null_getattrs in the null layer. + * + * A second approach is to directly invoke vnode operations on + * the lower layer with the VOP_OPERATIONNAME interface. + * The advantage of this method is that it is easy to invoke + * arbitrary operations on the lower layer. The disadvantage + * is that vnode arguments must be manualy mapped. + * + */ + +#include "opt_debug_nullfs.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/sysctl.h> +#include <sys/vnode.h> +#include <sys/mount.h> +#include <sys/namei.h> +#include <sys/malloc.h> +#include <sys/buf.h> +#include <miscfs/nullfs/null.h> + +static int null_bug_bypass = 0; /* for debugging: enables bypass printf'ing */ +SYSCTL_INT(_debug, OID_AUTO, nullfs_bug_bypass, CTLFLAG_RW, + &null_bug_bypass, 0, ""); + +static int null_access __P((struct vop_access_args *ap)); +static int null_bwrite __P((struct vop_bwrite_args *ap)); +static int null_getattr __P((struct vop_getattr_args *ap)); +static int null_inactive __P((struct vop_inactive_args *ap)); +static int null_lock __P((struct vop_lock_args *ap)); +static int null_lookup __P((struct vop_lookup_args *ap)); +static int null_print __P((struct vop_print_args *ap)); +static int null_reclaim __P((struct vop_reclaim_args *ap)); +static int null_setattr __P((struct vop_setattr_args *ap)); +static int null_strategy __P((struct vop_strategy_args *ap)); +static int null_unlock __P((struct vop_unlock_args *ap)); + +/* + * This is the 10-Apr-92 bypass routine. + * This version has been optimized for speed, throwing away some + * safety checks. It should still always work, but it's not as + * robust to programmer errors. + * Define SAFETY to include some error checking code. + * + * In general, we map all vnodes going down and unmap them on the way back. + * As an exception to this, vnodes can be marked "unmapped" by setting + * the Nth bit in operation's vdesc_flags. + * + * Also, some BSD vnode operations have the side effect of vrele'ing + * their arguments. With stacking, the reference counts are held + * by the upper node, not the lower one, so we must handle these + * side-effects here. This is not of concern in Sun-derived systems + * since there are no such side-effects. + * + * This makes the following assumptions: + * - only one returned vpp + * - no INOUT vpp's (Sun's vop_open has one of these) + * - the vnode operation vector of the first vnode should be used + * to determine what implementation of the op should be invoked + * - all mapped vnodes are of our vnode-type (NEEDSWORK: + * problems on rmdir'ing mount points and renaming?) + */ +int +null_bypass(ap) + struct vop_generic_args /* { + struct vnodeop_desc *a_desc; + <other random data follows, presumably> + } */ *ap; +{ + register struct vnode **this_vp_p; + int error; + struct vnode *old_vps[VDESC_MAX_VPS]; + struct vnode **vps_p[VDESC_MAX_VPS]; + struct vnode ***vppp; + struct vnodeop_desc *descp = ap->a_desc; + int reles, i; + + if (null_bug_bypass) + printf ("null_bypass: %s\n", descp->vdesc_name); + +#ifdef SAFETY + /* + * We require at least one vp. + */ + if (descp->vdesc_vp_offsets == NULL || + descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET) + panic ("null_bypass: no vp's in map."); +#endif + + /* + * Map the vnodes going in. + * Later, we'll invoke the operation based on + * the first mapped vnode's operation vector. + */ + reles = descp->vdesc_flags; + for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) { + if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET) + break; /* bail out at end of list */ + vps_p[i] = this_vp_p = + VOPARG_OFFSETTO(struct vnode**,descp->vdesc_vp_offsets[i],ap); + /* + * We're not guaranteed that any but the first vnode + * are of our type. Check for and don't map any + * that aren't. (We must always map first vp or vclean fails.) + */ + if (i && (*this_vp_p == NULLVP || + (*this_vp_p)->v_op != null_vnodeop_p)) { + old_vps[i] = NULLVP; + } else { + old_vps[i] = *this_vp_p; + *(vps_p[i]) = NULLVPTOLOWERVP(*this_vp_p); + /* + * XXX - Several operations have the side effect + * of vrele'ing their vp's. We must account for + * that. (This should go away in the future.) + */ + if (reles & 1) + VREF(*this_vp_p); + } + + } + + /* + * Call the operation on the lower layer + * with the modified argument structure. + */ + error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap); + + /* + * Maintain the illusion of call-by-value + * by restoring vnodes in the argument structure + * to their original value. + */ + reles = descp->vdesc_flags; + for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) { + if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET) + break; /* bail out at end of list */ + if (old_vps[i]) { + *(vps_p[i]) = old_vps[i]; + if (reles & 1) + vrele(*(vps_p[i])); + } + } + + /* + * Map the possible out-going vpp + * (Assumes that the lower layer always returns + * a VREF'ed vpp unless it gets an error.) + */ + if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET && + !(descp->vdesc_flags & VDESC_NOMAP_VPP) && + !error) { + /* + * XXX - even though some ops have vpp returned vp's, + * several ops actually vrele this before returning. + * We must avoid these ops. + * (This should go away when these ops are regularized.) + */ + if (descp->vdesc_flags & VDESC_VPP_WILLRELE) + goto out; + vppp = VOPARG_OFFSETTO(struct vnode***, + descp->vdesc_vpp_offset,ap); + if (*vppp) + error = null_node_create(old_vps[0]->v_mount, **vppp, *vppp); + } + + out: + return (error); +} + +/* + * We have to carry on the locking protocol on the null layer vnodes + * as we progress through the tree. We also have to enforce read-only + * if this layer is mounted read-only. + */ +static int +null_lookup(ap) + struct vop_lookup_args /* { + struct vnode * a_dvp; + struct vnode ** a_vpp; + struct componentname * a_cnp; + } */ *ap; +{ + struct componentname *cnp = ap->a_cnp; + struct proc *p = cnp->cn_proc; + int flags = cnp->cn_flags; + struct vop_lock_args lockargs; + struct vop_unlock_args unlockargs; + struct vnode *dvp, *vp; + int error; + + if ((flags & ISLASTCN) && (ap->a_dvp->v_mount->mnt_flag & MNT_RDONLY) && + (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) + return (EROFS); + error = null_bypass((struct vop_generic_args *)ap); + if (error == EJUSTRETURN && (flags & ISLASTCN) && + (ap->a_dvp->v_mount->mnt_flag & MNT_RDONLY) && + (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME)) + error = EROFS; + /* + * We must do the same locking and unlocking at this layer as + * is done in the layers below us. We could figure this out + * based on the error return and the LASTCN, LOCKPARENT, and + * LOCKLEAF flags. However, it is more expidient to just find + * out the state of the lower level vnodes and set ours to the + * same state. + */ + dvp = ap->a_dvp; + vp = *ap->a_vpp; + if (dvp == vp) + return (error); + if (!VOP_ISLOCKED(dvp)) { + unlockargs.a_vp = dvp; + unlockargs.a_flags = 0; + unlockargs.a_p = p; + vop_nounlock(&unlockargs); + } + if (vp != NULLVP && VOP_ISLOCKED(vp)) { + lockargs.a_vp = vp; + lockargs.a_flags = LK_SHARED; + lockargs.a_p = p; + vop_nolock(&lockargs); + } + return (error); +} + +/* + * Setattr call. Disallow write attempts if the layer is mounted read-only. + */ +int +null_setattr(ap) + struct vop_setattr_args /* { + struct vnodeop_desc *a_desc; + struct vnode *a_vp; + struct vattr *a_vap; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + struct vattr *vap = ap->a_vap; + + if ((vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || + vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || + vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL) && + (vp->v_mount->mnt_flag & MNT_RDONLY)) + return (EROFS); + if (vap->va_size != VNOVAL) { + switch (vp->v_type) { + case VDIR: + return (EISDIR); + case VCHR: + case VBLK: + case VSOCK: + case VFIFO: + if (vap->va_flags != VNOVAL) + return (EOPNOTSUPP); + return (0); + case VREG: + case VLNK: + default: + /* + * Disallow write attempts if the filesystem is + * mounted read-only. + */ + if (vp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); + } + } + return (null_bypass((struct vop_generic_args *)ap)); +} + +/* + * We handle getattr only to change the fsid. + */ +static int +null_getattr(ap) + struct vop_getattr_args /* { + struct vnode *a_vp; + struct vattr *a_vap; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + int error; + + if ((error = null_bypass((struct vop_generic_args *)ap)) != 0) + return (error); + /* Requires that arguments be restored. */ + ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0]; + return (0); +} + +static int +null_access(ap) + struct vop_access_args /* { + struct vnode *a_vp; + int a_mode; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + mode_t mode = ap->a_mode; + + /* + * Disallow write attempts on read-only layers; + * unless the file is a socket, fifo, or a block or + * character device resident on the file system. + */ + if (mode & VWRITE) { + switch (vp->v_type) { + case VDIR: + case VLNK: + case VREG: + if (vp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); + break; + default: + break; + } + } + return (null_bypass((struct vop_generic_args *)ap)); +} + +/* + * We need to process our own vnode lock and then clear the + * interlock flag as it applies only to our vnode, not the + * vnodes below us on the stack. + */ +static int +null_lock(ap) + struct vop_lock_args /* { + struct vnode *a_vp; + int a_flags; + struct proc *a_p; + } */ *ap; +{ + + vop_nolock(ap); + if ((ap->a_flags & LK_TYPE_MASK) == LK_DRAIN) + return (0); + ap->a_flags &= ~LK_INTERLOCK; + return (null_bypass((struct vop_generic_args *)ap)); +} + +/* + * We need to process our own vnode unlock and then clear the + * interlock flag as it applies only to our vnode, not the + * vnodes below us on the stack. + */ +static int +null_unlock(ap) + struct vop_unlock_args /* { + struct vnode *a_vp; + int a_flags; + struct proc *a_p; + } */ *ap; +{ + vop_nounlock(ap); + ap->a_flags &= ~LK_INTERLOCK; + return (null_bypass((struct vop_generic_args *)ap)); +} + +static int +null_inactive(ap) + struct vop_inactive_args /* { + struct vnode *a_vp; + struct proc *a_p; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + struct null_node *xp = VTONULL(vp); + struct vnode *lowervp = xp->null_lowervp; + /* + * Do nothing (and _don't_ bypass). + * Wait to vrele lowervp until reclaim, + * so that until then our null_node is in the + * cache and reusable. + * We still have to tell the lower layer the vnode + * is now inactive though. + * + * NEEDSWORK: Someday, consider inactive'ing + * the lowervp and then trying to reactivate it + * with capabilities (v_id) + * like they do in the name lookup cache code. + * That's too much work for now. + */ + VOP_INACTIVE(lowervp, ap->a_p); + VOP_UNLOCK(ap->a_vp, 0, ap->a_p); + return (0); +} + +static int +null_reclaim(ap) + struct vop_reclaim_args /* { + struct vnode *a_vp; + struct proc *a_p; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + struct null_node *xp = VTONULL(vp); + struct vnode *lowervp = xp->null_lowervp; + + /* + * Note: in vop_reclaim, vp->v_op == dead_vnodeop_p, + * so we can't call VOPs on ourself. + */ + /* After this assignment, this node will not be re-used. */ + xp->null_lowervp = NULLVP; + LIST_REMOVE(xp, null_hash); + FREE(vp->v_data, M_TEMP); + vp->v_data = NULL; + vrele (lowervp); + return (0); +} + +static int +null_print(ap) + struct vop_print_args /* { + struct vnode *a_vp; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + printf ("\ttag VT_NULLFS, vp=%p, lowervp=%p\n", vp, NULLVPTOLOWERVP(vp)); + return (0); +} + +/* + * XXX - vop_strategy must be hand coded because it has no + * vnode in its arguments. + * This goes away with a merged VM/buffer cache. + */ +static int +null_strategy(ap) + struct vop_strategy_args /* { + struct buf *a_bp; + } */ *ap; +{ + struct buf *bp = ap->a_bp; + int error; + struct vnode *savedvp; + + savedvp = bp->b_vp; + bp->b_vp = NULLVPTOLOWERVP(bp->b_vp); + + error = VOP_STRATEGY(bp->b_vp, bp); + + bp->b_vp = savedvp; + + return (error); +} + +/* + * XXX - like vop_strategy, vop_bwrite must be hand coded because it has no + * vnode in its arguments. + * This goes away with a merged VM/buffer cache. + */ +static int +null_bwrite(ap) + struct vop_bwrite_args /* { + struct buf *a_bp; + } */ *ap; +{ + struct buf *bp = ap->a_bp; + int error; + struct vnode *savedvp; + + savedvp = bp->b_vp; + bp->b_vp = NULLVPTOLOWERVP(bp->b_vp); + + error = VOP_BWRITE(bp); + + bp->b_vp = savedvp; + + return (error); +} + +/* + * Global vfs data structures + */ +vop_t **null_vnodeop_p; +static struct vnodeopv_entry_desc null_vnodeop_entries[] = { + { &vop_default_desc, (vop_t *) null_bypass }, + { &vop_access_desc, (vop_t *) null_access }, + { &vop_bwrite_desc, (vop_t *) null_bwrite }, + { &vop_getattr_desc, (vop_t *) null_getattr }, + { &vop_inactive_desc, (vop_t *) null_inactive }, + { &vop_lock_desc, (vop_t *) null_lock }, + { &vop_lookup_desc, (vop_t *) null_lookup }, + { &vop_print_desc, (vop_t *) null_print }, + { &vop_reclaim_desc, (vop_t *) null_reclaim }, + { &vop_setattr_desc, (vop_t *) null_setattr }, + { &vop_strategy_desc, (vop_t *) null_strategy }, + { &vop_unlock_desc, (vop_t *) null_unlock }, + { NULL, NULL } +}; +static struct vnodeopv_desc null_vnodeop_opv_desc = + { &null_vnodeop_p, null_vnodeop_entries }; + +VNODEOP_SET(null_vnodeop_opv_desc); diff --git a/sys/fs/portalfs/portal.h b/sys/fs/portalfs/portal.h new file mode 100644 index 0000000..d60826e --- /dev/null +++ b/sys/fs/portalfs/portal.h @@ -0,0 +1,71 @@ +/* + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software donated to Berkeley by + * Jan-Simon Pendry. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)portal.h 8.4 (Berkeley) 1/21/94 + * + * $Id: portal.h,v 1.4 1997/02/22 09:40:24 peter Exp $ + */ + +struct portal_args { + char *pa_config; /* Config file */ + int pa_socket; /* Socket to server */ +}; + +struct portal_cred { + int pcr_flag; /* File open mode */ + uid_t pcr_uid; /* From ucred */ + short pcr_ngroups; /* From ucred */ + gid_t pcr_groups[NGROUPS]; /* From ucred */ +}; + +#ifdef KERNEL +struct portalmount { + struct vnode *pm_root; /* Root node */ + struct file *pm_server; /* Held reference to server socket */ +}; + +struct portalnode { + int pt_size; /* Length of Arg */ + char *pt_arg; /* Arg to send to server */ + int pt_fileid; /* cookie */ +}; + +#define VFSTOPORTAL(mp) ((struct portalmount *)((mp)->mnt_data)) +#define VTOPORTAL(vp) ((struct portalnode *)(vp)->v_data) + +#define PORTAL_ROOTFILEID 2 + +extern vop_t **portal_vnodeop_p; +#endif /* KERNEL */ diff --git a/sys/fs/portalfs/portal_vfsops.c b/sys/fs/portalfs/portal_vfsops.c new file mode 100644 index 0000000..633bf77 --- /dev/null +++ b/sys/fs/portalfs/portal_vfsops.c @@ -0,0 +1,292 @@ +/* + * Copyright (c) 1992, 1993, 1995 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software donated to Berkeley by + * Jan-Simon Pendry. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)portal_vfsops.c 8.11 (Berkeley) 5/14/95 + * + * $Id: portal_vfsops.c,v 1.21 1998/05/06 05:29:35 msmith Exp $ + */ + +/* + * Portal Filesystem + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/proc.h> +#include <sys/filedesc.h> +#include <sys/file.h> +#include <sys/vnode.h> +#include <sys/mount.h> +#include <sys/malloc.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/protosw.h> +#include <sys/domain.h> +#include <miscfs/portal/portal.h> + +static MALLOC_DEFINE(M_PORTALFSMNT, "PORTAL mount", "PORTAL mount structure"); + +static int portal_init __P((struct vfsconf *)); +static int portal_mount __P((struct mount *mp, char *path, caddr_t data, + struct nameidata *ndp, struct proc *p)); +static int portal_start __P((struct mount *mp, int flags, struct proc *p)); +static int portal_unmount __P((struct mount *mp, int mntflags, + struct proc *p)); +static int portal_root __P((struct mount *mp, struct vnode **vpp)); +static int portal_statfs __P((struct mount *mp, struct statfs *sbp, + struct proc *p)); + +static int +portal_init(vfsp) + struct vfsconf *vfsp; +{ + + return (0); +} + +/* + * Mount the per-process file descriptors (/dev/fd) + */ +static int +portal_mount(mp, path, data, ndp, p) + struct mount *mp; + char *path; + caddr_t data; + struct nameidata *ndp; + struct proc *p; +{ + struct file *fp; + struct portal_args args; + struct portalmount *fmp; + struct socket *so; + struct vnode *rvp; + struct portalnode *pn; + u_int size; + int error; + + /* + * Update is a no-op + */ + if (mp->mnt_flag & MNT_UPDATE) + return (EOPNOTSUPP); + + error = copyin(data, (caddr_t) &args, sizeof(struct portal_args)); + if (error) + return (error); + + error = getsock(p->p_fd, args.pa_socket, &fp); + if (error) + return (error); + so = (struct socket *) fp->f_data; + if (so->so_proto->pr_domain->dom_family != AF_UNIX) + return (ESOCKTNOSUPPORT); + + MALLOC(pn, struct portalnode *, sizeof(struct portalnode), + M_TEMP, M_WAITOK); + + MALLOC(fmp, struct portalmount *, sizeof(struct portalmount), + M_PORTALFSMNT, M_WAITOK); /* XXX */ + + error = getnewvnode(VT_PORTAL, mp, portal_vnodeop_p, &rvp); /* XXX */ + if (error) { + FREE(fmp, M_PORTALFSMNT); + FREE(pn, M_TEMP); + return (error); + } + + rvp->v_data = pn; + rvp->v_type = VDIR; + rvp->v_flag |= VROOT; + VTOPORTAL(rvp)->pt_arg = 0; + VTOPORTAL(rvp)->pt_size = 0; + VTOPORTAL(rvp)->pt_fileid = PORTAL_ROOTFILEID; + fmp->pm_root = rvp; + fmp->pm_server = fp; fp->f_count++; + + mp->mnt_flag |= MNT_LOCAL; + mp->mnt_data = (qaddr_t) fmp; + vfs_getnewfsid(mp); + + (void)copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size); + bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size); + (void)copyinstr(args.pa_config, + mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &size); + bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); + +#ifdef notdef + bzero(mp->mnt_stat.f_mntfromname, MNAMELEN); + bcopy("portal", mp->mnt_stat.f_mntfromname, sizeof("portal")); +#endif + + (void)portal_statfs(mp, &mp->mnt_stat, p); + return (0); +} + +static int +portal_start(mp, flags, p) + struct mount *mp; + int flags; + struct proc *p; +{ + + return (0); +} + +static int +portal_unmount(mp, mntflags, p) + struct mount *mp; + int mntflags; + struct proc *p; +{ + struct vnode *rootvp = VFSTOPORTAL(mp)->pm_root; + int error, flags = 0; + + + if (mntflags & MNT_FORCE) + flags |= FORCECLOSE; + + /* + * Clear out buffer cache. I don't think we + * ever get anything cached at this level at the + * moment, but who knows... + */ +#ifdef notyet + mntflushbuf(mp, 0); + if (mntinvalbuf(mp, 1)) + return (EBUSY); +#endif + if (rootvp->v_usecount > 1) + return (EBUSY); + error = vflush(mp, rootvp, flags); + if (error) + return (error); + + /* + * Release reference on underlying root vnode + */ + vrele(rootvp); + /* + * And blow it away for future re-use + */ + vgone(rootvp); + /* + * Shutdown the socket. This will cause the select in the + * daemon to wake up, and then the accept will get ECONNABORTED + * which it interprets as a request to go and bury itself. + */ + soshutdown((struct socket *) VFSTOPORTAL(mp)->pm_server->f_data, 2); + /* + * Discard reference to underlying file. Must call closef because + * this may be the last reference. + */ + closef(VFSTOPORTAL(mp)->pm_server, (struct proc *) 0); + /* + * Finally, throw away the portalmount structure + */ + free(mp->mnt_data, M_PORTALFSMNT); /* XXX */ + mp->mnt_data = 0; + return (0); +} + +static int +portal_root(mp, vpp) + struct mount *mp; + struct vnode **vpp; +{ + struct proc *p = curproc; /* XXX */ + struct vnode *vp; + + /* + * Return locked reference to root. + */ + vp = VFSTOPORTAL(mp)->pm_root; + VREF(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + *vpp = vp; + return (0); +} + +static int +portal_statfs(mp, sbp, p) + struct mount *mp; + struct statfs *sbp; + struct proc *p; +{ + + sbp->f_flags = 0; + sbp->f_bsize = DEV_BSIZE; + sbp->f_iosize = DEV_BSIZE; + sbp->f_blocks = 2; /* 1K to keep df happy */ + sbp->f_bfree = 0; + sbp->f_bavail = 0; + sbp->f_files = 1; /* Allow for "." */ + sbp->f_ffree = 0; /* See comments above */ + if (sbp != &mp->mnt_stat) { + sbp->f_type = mp->mnt_vfc->vfc_typenum; + bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid)); + bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); + bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); + } + return (0); +} + +#define portal_fhtovp ((int (*) __P((struct mount *, struct fid *, \ + struct sockaddr *, struct vnode **, int *, struct ucred **)))eopnotsupp) +#define portal_quotactl ((int (*) __P((struct mount *, int, uid_t, caddr_t, \ + struct proc *)))eopnotsupp) +#define portal_sync ((int (*) __P((struct mount *, int, struct ucred *, \ + struct proc *)))nullop) +#define portal_sysctl ((int (*) __P((int *, u_int, void *, size_t *, void *, \ + size_t, struct proc *)))eopnotsupp) +#define portal_vget ((int (*) __P((struct mount *, ino_t, struct vnode **))) \ + eopnotsupp) +#define portal_vptofh ((int (*) __P((struct vnode *, struct fid *)))eopnotsupp) + +static struct vfsops portal_vfsops = { + portal_mount, + portal_start, + portal_unmount, + portal_root, + portal_quotactl, + portal_statfs, + portal_sync, + portal_vget, + portal_fhtovp, + portal_vptofh, + portal_init, +}; + +VFS_SET(portal_vfsops, portal, VFCF_SYNTHETIC); diff --git a/sys/fs/portalfs/portal_vnops.c b/sys/fs/portalfs/portal_vnops.c new file mode 100644 index 0000000..819d636 --- /dev/null +++ b/sys/fs/portalfs/portal_vnops.c @@ -0,0 +1,607 @@ +/* + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software donated to Berkeley by + * Jan-Simon Pendry. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)portal_vnops.c 8.14 (Berkeley) 5/21/95 + * + * $Id: portal_vnops.c,v 1.34 1998/12/07 21:58:32 archie Exp $ + */ + +/* + * Portal Filesystem + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/sysproto.h> +#include <sys/kernel.h> +#include <sys/time.h> +#include <sys/proc.h> +#include <sys/filedesc.h> +#include <sys/vnode.h> +#include <sys/fcntl.h> +#include <sys/file.h> +#include <sys/stat.h> +#include <sys/mount.h> +#include <sys/malloc.h> +#include <sys/namei.h> +#include <sys/mbuf.h> +#include <sys/socket.h> +#include <sys/socketvar.h> +#include <sys/un.h> +#include <sys/unpcb.h> +#include <miscfs/portal/portal.h> + +static int portal_fileid = PORTAL_ROOTFILEID+1; + +static int portal_badop __P((void)); +static void portal_closefd __P((struct proc *p, int fd)); +static int portal_connect __P((struct socket *so, struct socket *so2)); +static int portal_getattr __P((struct vop_getattr_args *ap)); +static int portal_inactive __P((struct vop_inactive_args *ap)); +static int portal_lookup __P((struct vop_lookup_args *ap)); +static int portal_open __P((struct vop_open_args *ap)); +static int portal_print __P((struct vop_print_args *ap)); +static int portal_readdir __P((struct vop_readdir_args *ap)); +static int portal_reclaim __P((struct vop_reclaim_args *ap)); +static int portal_setattr __P((struct vop_setattr_args *ap)); + +static void +portal_closefd(p, fd) + struct proc *p; + int fd; +{ + int error; + struct close_args ua; + + ua.fd = fd; + error = close(p, &ua); + /* + * We should never get an error, and there isn't anything + * we could do if we got one, so just print a message. + */ + if (error) + printf("portal_closefd: error = %d\n", error); +} + +/* + * vp is the current namei directory + * cnp is the name to locate in that directory... + */ +static int +portal_lookup(ap) + struct vop_lookup_args /* { + struct vnode * a_dvp; + struct vnode ** a_vpp; + struct componentname * a_cnp; + } */ *ap; +{ + struct componentname *cnp = ap->a_cnp; + struct vnode **vpp = ap->a_vpp; + struct vnode *dvp = ap->a_dvp; + char *pname = cnp->cn_nameptr; + struct portalnode *pt; + int error; + struct vnode *fvp = 0; + char *path; + int size; + + *vpp = NULLVP; + + if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME) + return (EROFS); + + if (cnp->cn_namelen == 1 && *pname == '.') { + *vpp = dvp; + VREF(dvp); + /*VOP_LOCK(dvp);*/ + return (0); + } + + /* + * Do the MALLOC before the getnewvnode since doing so afterward + * might cause a bogus v_data pointer to get dereferenced + * elsewhere if MALLOC should block. + */ + MALLOC(pt, struct portalnode *, sizeof(struct portalnode), + M_TEMP, M_WAITOK); + + error = getnewvnode(VT_PORTAL, dvp->v_mount, portal_vnodeop_p, &fvp); + if (error) { + FREE(pt, M_TEMP); + goto bad; + } + fvp->v_type = VREG; + fvp->v_data = pt; + /* + * Save all of the remaining pathname and + * advance the namei next pointer to the end + * of the string. + */ + for (size = 0, path = pname; *path; path++) + size++; + cnp->cn_consume = size - cnp->cn_namelen; + + pt->pt_arg = malloc(size+1, M_TEMP, M_WAITOK); + pt->pt_size = size+1; + bcopy(pname, pt->pt_arg, pt->pt_size); + pt->pt_fileid = portal_fileid++; + + *vpp = fvp; + /*VOP_LOCK(fvp);*/ + return (0); + +bad:; + if (fvp) + vrele(fvp); + return (error); +} + +static int +portal_connect(so, so2) + struct socket *so; + struct socket *so2; +{ + /* from unp_connect, bypassing the namei stuff... */ + struct socket *so3; + struct unpcb *unp2; + struct unpcb *unp3; + + if (so2 == 0) + return (ECONNREFUSED); + + if (so->so_type != so2->so_type) + return (EPROTOTYPE); + + if ((so2->so_options & SO_ACCEPTCONN) == 0) + return (ECONNREFUSED); + + if ((so3 = sonewconn(so2, 0)) == 0) + return (ECONNREFUSED); + + unp2 = sotounpcb(so2); + unp3 = sotounpcb(so3); + if (unp2->unp_addr) + unp3->unp_addr = (struct sockaddr_un *) + dup_sockaddr((struct sockaddr *)unp2->unp_addr, 0); + so2 = so3; + + return (unp_connect2(so, so2)); +} + +static int +portal_open(ap) + struct vop_open_args /* { + struct vnode *a_vp; + int a_mode; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + struct socket *so = 0; + struct portalnode *pt; + struct proc *p = ap->a_p; + struct vnode *vp = ap->a_vp; + int s; + struct uio auio; + struct iovec aiov[2]; + int res; + struct mbuf *cm = 0; + struct cmsghdr *cmsg; + int newfds; + int *ip; + int fd; + int error; + int len; + struct portalmount *fmp; + struct file *fp; + struct portal_cred pcred; + + /* + * Nothing to do when opening the root node. + */ + if (vp->v_flag & VROOT) + return (0); + + /* + * Can't be opened unless the caller is set up + * to deal with the side effects. Check for this + * by testing whether the p_dupfd has been set. + */ + if (p->p_dupfd >= 0) + return (ENODEV); + + pt = VTOPORTAL(vp); + fmp = VFSTOPORTAL(vp->v_mount); + + /* + * Create a new socket. + */ + error = socreate(AF_UNIX, &so, SOCK_STREAM, 0, ap->a_p); + if (error) + goto bad; + + /* + * Reserve some buffer space + */ + res = pt->pt_size + sizeof(pcred) + 512; /* XXX */ + error = soreserve(so, res, res); + if (error) + goto bad; + + /* + * Kick off connection + */ + error = portal_connect(so, (struct socket *)fmp->pm_server->f_data); + if (error) + goto bad; + + /* + * Wait for connection to complete + */ + /* + * XXX: Since the mount point is holding a reference on the + * underlying server socket, it is not easy to find out whether + * the server process is still running. To handle this problem + * we loop waiting for the new socket to be connected (something + * which will only happen if the server is still running) or for + * the reference count on the server socket to drop to 1, which + * will happen if the server dies. Sleep for 5 second intervals + * and keep polling the reference count. XXX. + */ + s = splnet(); + while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { + if (fmp->pm_server->f_count == 1) { + error = ECONNREFUSED; + splx(s); + goto bad; + } + (void) tsleep((caddr_t) &so->so_timeo, PSOCK, "portalcon", 5 * hz); + } + splx(s); + + if (so->so_error) { + error = so->so_error; + goto bad; + } + + /* + * Set miscellaneous flags + */ + so->so_rcv.sb_timeo = 0; + so->so_snd.sb_timeo = 0; + so->so_rcv.sb_flags |= SB_NOINTR; + so->so_snd.sb_flags |= SB_NOINTR; + + + pcred.pcr_flag = ap->a_mode; + pcred.pcr_uid = ap->a_cred->cr_uid; + pcred.pcr_ngroups = ap->a_cred->cr_ngroups; + bcopy(ap->a_cred->cr_groups, pcred.pcr_groups, NGROUPS * sizeof(gid_t)); + aiov[0].iov_base = (caddr_t) &pcred; + aiov[0].iov_len = sizeof(pcred); + aiov[1].iov_base = pt->pt_arg; + aiov[1].iov_len = pt->pt_size; + auio.uio_iov = aiov; + auio.uio_iovcnt = 2; + auio.uio_rw = UIO_WRITE; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_procp = p; + auio.uio_offset = 0; + auio.uio_resid = aiov[0].iov_len + aiov[1].iov_len; + + error = sosend(so, (struct sockaddr *) 0, &auio, + (struct mbuf *) 0, (struct mbuf *) 0, 0, p); + if (error) + goto bad; + + len = auio.uio_resid = sizeof(int); + do { + struct mbuf *m = 0; + int flags = MSG_WAITALL; + error = soreceive(so, (struct sockaddr **) 0, &auio, + &m, &cm, &flags); + if (error) + goto bad; + + /* + * Grab an error code from the mbuf. + */ + if (m) { + m = m_pullup(m, sizeof(int)); /* Needed? */ + if (m) { + error = *(mtod(m, int *)); + m_freem(m); + } else { + error = EINVAL; + } + } else { + if (cm == 0) { + error = ECONNRESET; /* XXX */ +#ifdef notdef + break; +#endif + } + } + } while (cm == 0 && auio.uio_resid == len && !error); + + if (cm == 0) + goto bad; + + if (auio.uio_resid) { + error = 0; +#ifdef notdef + error = EMSGSIZE; + goto bad; +#endif + } + + /* + * XXX: Break apart the control message, and retrieve the + * received file descriptor. Note that more than one descriptor + * may have been received, or that the rights chain may have more + * than a single mbuf in it. What to do? + */ + cmsg = mtod(cm, struct cmsghdr *); + newfds = (cmsg->cmsg_len - sizeof(*cmsg)) / sizeof (int); + if (newfds == 0) { + error = ECONNREFUSED; + goto bad; + } + /* + * At this point the rights message consists of a control message + * header, followed by a data region containing a vector of + * integer file descriptors. The fds were allocated by the action + * of receiving the control message. + */ + ip = (int *) (cmsg + 1); + fd = *ip++; + if (newfds > 1) { + /* + * Close extra fds. + */ + int i; + printf("portal_open: %d extra fds\n", newfds - 1); + for (i = 1; i < newfds; i++) { + portal_closefd(p, *ip); + ip++; + } + } + + /* + * Check that the mode the file is being opened for is a subset + * of the mode of the existing descriptor. + */ + fp = p->p_fd->fd_ofiles[fd]; + if (((ap->a_mode & (FREAD|FWRITE)) | fp->f_flag) != fp->f_flag) { + portal_closefd(p, fd); + error = EACCES; + goto bad; + } + + /* + * Save the dup fd in the proc structure then return the + * special error code (ENXIO) which causes magic things to + * happen in vn_open. The whole concept is, well, hmmm. + */ + p->p_dupfd = fd; + error = ENXIO; + +bad:; + /* + * And discard the control message. + */ + if (cm) { + m_freem(cm); + } + + if (so) { + soshutdown(so, 2); + soclose(so); + } + return (error); +} + +static int +portal_getattr(ap) + struct vop_getattr_args /* { + struct vnode *a_vp; + struct vattr *a_vap; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + struct vattr *vap = ap->a_vap; + + bzero(vap, sizeof(*vap)); + vattr_null(vap); + vap->va_uid = 0; + vap->va_gid = 0; + vap->va_fsid = vp->v_mount->mnt_stat.f_fsid.val[0]; + vap->va_size = DEV_BSIZE; + vap->va_blocksize = DEV_BSIZE; + nanotime(&vap->va_atime); + vap->va_mtime = vap->va_atime; + vap->va_ctime = vap->va_ctime; + vap->va_gen = 0; + vap->va_flags = 0; + vap->va_rdev = 0; + /* vap->va_qbytes = 0; */ + vap->va_bytes = 0; + /* vap->va_qsize = 0; */ + if (vp->v_flag & VROOT) { + vap->va_type = VDIR; + vap->va_mode = S_IRUSR|S_IWUSR|S_IXUSR| + S_IRGRP|S_IWGRP|S_IXGRP| + S_IROTH|S_IWOTH|S_IXOTH; + vap->va_nlink = 2; + vap->va_fileid = 2; + } else { + vap->va_type = VREG; + vap->va_mode = S_IRUSR|S_IWUSR| + S_IRGRP|S_IWGRP| + S_IROTH|S_IWOTH; + vap->va_nlink = 1; + vap->va_fileid = VTOPORTAL(vp)->pt_fileid; + } + return (0); +} + +static int +portal_setattr(ap) + struct vop_setattr_args /* { + struct vnode *a_vp; + struct vattr *a_vap; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + + /* + * Can't mess with the root vnode + */ + if (ap->a_vp->v_flag & VROOT) + return (EACCES); + + if (ap->a_vap->va_flags != VNOVAL) + return (EOPNOTSUPP); + + return (0); +} + +/* + * Fake readdir, just return empty directory. + * It is hard to deal with '.' and '..' so don't bother. + */ +static int +portal_readdir(ap) + struct vop_readdir_args /* { + struct vnode *a_vp; + struct uio *a_uio; + struct ucred *a_cred; + int *a_eofflag; + u_long *a_cookies; + int a_ncookies; + } */ *ap; +{ + + /* + * We don't allow exporting portal mounts, and currently local + * requests do not need cookies. + */ + if (ap->a_ncookies) + panic("portal_readdir: not hungry"); + + return (0); +} + +static int +portal_inactive(ap) + struct vop_inactive_args /* { + struct vnode *a_vp; + struct proc *a_p; + } */ *ap; +{ + + VOP_UNLOCK(ap->a_vp, 0, ap->a_p); + return (0); +} + +static int +portal_reclaim(ap) + struct vop_reclaim_args /* { + struct vnode *a_vp; + } */ *ap; +{ + struct portalnode *pt = VTOPORTAL(ap->a_vp); + + if (pt->pt_arg) { + free((caddr_t) pt->pt_arg, M_TEMP); + pt->pt_arg = 0; + } + FREE(ap->a_vp->v_data, M_TEMP); + ap->a_vp->v_data = 0; + + return (0); +} + + +/* + * Print out the contents of a Portal vnode. + */ +/* ARGSUSED */ +static int +portal_print(ap) + struct vop_print_args /* { + struct vnode *a_vp; + } */ *ap; +{ + + printf("tag VT_PORTAL, portal vnode\n"); + return (0); +} + + +/* + * Portal "should never get here" operation + */ +static int +portal_badop() +{ + + panic("portal: bad op"); + /* NOTREACHED */ +} + +vop_t **portal_vnodeop_p; +static struct vnodeopv_entry_desc portal_vnodeop_entries[] = { + { &vop_default_desc, (vop_t *) vop_defaultop }, + { &vop_access_desc, (vop_t *) vop_null }, + { &vop_bmap_desc, (vop_t *) portal_badop }, + { &vop_getattr_desc, (vop_t *) portal_getattr }, + { &vop_inactive_desc, (vop_t *) portal_inactive }, + { &vop_lookup_desc, (vop_t *) portal_lookup }, + { &vop_open_desc, (vop_t *) portal_open }, + { &vop_pathconf_desc, (vop_t *) vop_stdpathconf }, + { &vop_print_desc, (vop_t *) portal_print }, + { &vop_readdir_desc, (vop_t *) portal_readdir }, + { &vop_reclaim_desc, (vop_t *) portal_reclaim }, + { &vop_setattr_desc, (vop_t *) portal_setattr }, + { NULL, NULL } +}; +static struct vnodeopv_desc portal_vnodeop_opv_desc = + { &portal_vnodeop_p, portal_vnodeop_entries }; + +VNODEOP_SET(portal_vnodeop_opv_desc); diff --git a/sys/fs/procfs/README b/sys/fs/procfs/README new file mode 100644 index 0000000..5f1b6cc --- /dev/null +++ b/sys/fs/procfs/README @@ -0,0 +1,113 @@ +saute procfs lyonnais + +procfs supports two levels of directory. the filesystem root +directory contains a representation of the system process table. +this consists of an entry for each active and zombie process, and +an additional entry "curproc" which always represents the process +making the lookup request. + +each of the sub-directories contains several files. these files +are used to control and interrogate processes. the files implemented +are: + + file - xxx. the exec'ed file. + + status - r/o. returns process status. + + ctl - w/o. sends a control message to the process. + for example: + echo hup > /proc/curproc/note + will send a SIGHUP to the shell. + whereas + echo attach > /proc/1293/ctl + would set up process 1293 for debugging. + see below for more details. + + mem - r/w. virtual memory image of the process. + parts of the address space are readable + only if they exist in the target process. + a more reasonable alternative might be + to return zero pages instead of an error. + comments? + + note - w/o. writing a string here sends the + equivalent note to the process. + [ not implemented. ] + + notepg - w/o. the same as note, but sends to all + members of the process group. + [ not implemented. ] + + regs - r/w. process register set. this can be read + or written any time even if the process + is not stopped. since the bsd kernel + is single-processor, this implementation + will get the "right" register values. + a multi-proc kernel would need to do some + synchronisation. + +this then looks like: + +% ls -li /proc +total 0 + 9 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 0 + 17 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 1 + 89 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 10 + 25 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 2 +2065 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 257 +2481 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 309 + 265 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 32 +3129 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 390 +3209 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 400 +3217 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 401 +3273 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 408 + 393 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 48 + 409 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 50 + 465 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 57 + 481 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 59 + 537 dr-xr-xr-x 2 root kmem 0 Sep 21 15:06 66 + 545 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 67 + 657 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 81 + 665 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 82 + 673 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 83 + 681 dr-xr-xr-x 2 root wheel 0 Sep 21 15:06 84 +3273 dr-xr-xr-x 2 jsp staff 0 Sep 21 15:06 curproc +% ls -li /proc/curproc +total 408 +3341 --w------- 1 jsp staff 0 Sep 21 15:06 ctl +1554 -r-xr-xr-x 1 bin bin 90112 Mar 29 04:52 file +3339 -rw------- 1 jsp staff 118784 Sep 21 15:06 mem +3343 --w------- 1 jsp staff 0 Sep 21 15:06 note +3344 --w------- 1 jsp staff 0 Sep 21 15:06 notepg +3340 -rw------- 1 jsp staff 0 Sep 21 15:06 regs +3342 -r--r--r-- 1 jsp staff 0 Sep 21 15:06 status +% df /proc/curproc /proc/curproc/file +Filesystem 512-blocks Used Avail Capacity Mounted on +proc 2 2 0 100% /proc +/dev/wd0a 16186 13548 1018 93% / +% cat /proc/curproc/status +cat 446 439 400 81 12,0 ctty 748620684 270000 0 0 0 20000 nochan 11 20 20 20 0 21 117 + + + +the basic sequence of commands written to "ctl" would be + + attach - this stops the target process and + arranges for the sending process + to become the debug control process + wait - wait for the target process to come to + a steady state ready for debugging. + step - single step, with no signal delivery. + run - continue running, with no signal delivery, + until next trap or breakpoint. + <signame> - deliver signal <signame> and continue running. + detach - continue execution of the target process + and remove it from control by the debug process + +in a normal debugging environment, where the target is fork/exec'd by +the debugger, the debugger should fork and the child should stop itself +(with a self-inflicted SIGSTOP). the parent should do a "wait" then an +"attach". as before, the child will hit a breakpoint on the first +instruction in any newly exec'd image. + +$Id$ diff --git a/sys/fs/procfs/procfs.h b/sys/fs/procfs/procfs.h new file mode 100644 index 0000000..619e1b2 --- /dev/null +++ b/sys/fs/procfs/procfs.h @@ -0,0 +1,177 @@ +/* + * Copyright (c) 1993 Jan-Simon Pendry + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Jan-Simon Pendry. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)procfs.h 8.9 (Berkeley) 5/14/95 + * + * From: + * $Id: procfs.h,v 1.20 1998/07/07 04:08:44 bde Exp $ + */ + +/* + * The different types of node in a procfs filesystem + */ +typedef enum { + Proot, /* the filesystem root */ + Pcurproc, /* symbolic link for curproc */ + Pproc, /* a process-specific sub-directory */ + Pfile, /* the executable file */ + Pmem, /* the process's memory image */ + Pregs, /* the process's register set */ + Pfpregs, /* the process's FP register set */ + Pctl, /* process control */ + Pstatus, /* process status */ + Pnote, /* process notifier */ + Pnotepg, /* process group notifier */ + Pmap, /* memory map */ + Ptype, /* executable type */ + Pcmdline /* command line */ +} pfstype; + +/* + * control data for the proc file system. + */ +struct pfsnode { + struct pfsnode *pfs_next; /* next on list */ + struct vnode *pfs_vnode; /* vnode associated with this pfsnode */ + pfstype pfs_type; /* type of procfs node */ + pid_t pfs_pid; /* associated process */ + u_short pfs_mode; /* mode bits for stat() */ + u_long pfs_flags; /* open flags */ + u_long pfs_fileno; /* unique file id */ + pid_t pfs_lockowner; /* pfs lock owner */ +}; + +#define PROCFS_NOTELEN 64 /* max length of a note (/proc/$pid/note) */ +#define PROCFS_CTLLEN 8 /* max length of a ctl msg (/proc/$pid/ctl */ + +/* + * Kernel stuff follows + */ +#ifdef KERNEL +#define CNEQ(cnp, s, len) \ + ((cnp)->cn_namelen == (len) && \ + (bcmp((s), (cnp)->cn_nameptr, (len)) == 0)) + +#define KMEM_GROUP 2 + +/* + * Check to see whether access to target process is allowed + * Evaluates to 1 if access is allowed. + */ +#define CHECKIO(p1, p2) \ + ((((p1)->p_cred->pc_ucred->cr_uid == (p2)->p_cred->p_ruid) && \ + ((p1)->p_cred->p_ruid == (p2)->p_cred->p_ruid) && \ + ((p1)->p_cred->p_svuid == (p2)->p_cred->p_ruid) && \ + ((p2)->p_flag & P_SUGID) == 0) || \ + (suser((p1)->p_cred->pc_ucred, &(p1)->p_acflag) == 0)) + +/* + * Format of a directory entry in /proc, ... + * This must map onto struct dirent (see <dirent.h>) + */ +#define PROCFS_NAMELEN 8 +struct pfsdent { + u_int32_t d_fileno; + u_int16_t d_reclen; + u_int8_t d_type; + u_int8_t d_namlen; + char d_name[PROCFS_NAMELEN]; +}; +#define UIO_MX sizeof(struct pfsdent) +#define PROCFS_FILENO(pid, type) \ + (((type) < Pproc) ? \ + ((type) + 2) : \ + ((((pid)+1) << 4) + ((int) (type)))) + +/* + * Convert between pfsnode vnode + */ +#define VTOPFS(vp) ((struct pfsnode *)(vp)->v_data) +#define PFSTOV(pfs) ((pfs)->pfs_vnode) + +typedef struct vfs_namemap vfs_namemap_t; +struct vfs_namemap { + const char *nm_name; + int nm_val; +}; + +int vfs_getuserstr __P((struct uio *, char *, int *)); +vfs_namemap_t *vfs_findname __P((vfs_namemap_t *, char *, int)); + +/* <machine/reg.h> */ +struct reg; +struct fpreg; + +#define PFIND(pid) ((pid) ? pfind(pid) : &proc0) + +void procfs_exit __P((struct proc *)); +int procfs_freevp __P((struct vnode *)); +int procfs_allocvp __P((struct mount *, struct vnode **, long, pfstype)); +struct vnode *procfs_findtextvp __P((struct proc *)); +int procfs_sstep __P((struct proc *)); +void procfs_fix_sstep __P((struct proc *)); +int procfs_read_regs __P((struct proc *, struct reg *)); +int procfs_write_regs __P((struct proc *, struct reg *)); +int procfs_read_fpregs __P((struct proc *, struct fpreg *)); +int procfs_write_fpregs __P((struct proc *, struct fpreg *)); +int procfs_donote __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio)); +int procfs_doregs __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio)); +int procfs_dofpregs __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio)); +int procfs_domem __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio)); +int procfs_doctl __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio)); +int procfs_dostatus __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio)); +int procfs_domap __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio)); +int procfs_dotype __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio)); +int procfs_docmdline __P((struct proc *, struct proc *, struct pfsnode *pfsp, struct uio *uio)); + +/* Return 1 if process has special kernel digging privileges */ +int procfs_kmemaccess __P((struct proc *)); + +/* functions to check whether or not files should be displayed */ +int procfs_validfile __P((struct proc *)); +int procfs_validfpregs __P((struct proc *)); +int procfs_validregs __P((struct proc *)); +int procfs_validmap __P((struct proc *)); +int procfs_validtype __P((struct proc *)); + +#define PROCFS_LOCKED 0x01 +#define PROCFS_WANT 0x02 + +extern vop_t **procfs_vnodeop_p; + +int procfs_root __P((struct mount *, struct vnode **)); +int procfs_rw __P((struct vop_read_args *)); +#endif /* KERNEL */ diff --git a/sys/fs/procfs/procfs_ctl.c b/sys/fs/procfs/procfs_ctl.c new file mode 100644 index 0000000..21724e5 --- /dev/null +++ b/sys/fs/procfs/procfs_ctl.c @@ -0,0 +1,315 @@ +/* + * Copyright (c) 1993 Jan-Simon Pendry + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Jan-Simon Pendry. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)procfs_ctl.c 8.4 (Berkeley) 6/15/94 + * + * From: + * $Id: procfs_ctl.c,v 1.16 1997/04/27 21:32:21 alex Exp $ + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/vnode.h> +#include <sys/ptrace.h> +#include <sys/signalvar.h> +#include <miscfs/procfs/procfs.h> + +#include <vm/vm.h> +#include <vm/vm_extern.h> + +#ifndef FIX_SSTEP +#define FIX_SSTEP(p) +#endif + +/* + * True iff process (p) is in trace wait state + * relative to process (curp) + */ +#define TRACE_WAIT_P(curp, p) \ + ((p)->p_stat == SSTOP && \ + (p)->p_pptr == (curp) && \ + ((p)->p_flag & P_TRACED)) + +#define PROCFS_CTL_ATTACH 1 +#define PROCFS_CTL_DETACH 2 +#define PROCFS_CTL_STEP 3 +#define PROCFS_CTL_RUN 4 +#define PROCFS_CTL_WAIT 5 + +static vfs_namemap_t ctlnames[] = { + /* special /proc commands */ + { "attach", PROCFS_CTL_ATTACH }, + { "detach", PROCFS_CTL_DETACH }, + { "step", PROCFS_CTL_STEP }, + { "run", PROCFS_CTL_RUN }, + { "wait", PROCFS_CTL_WAIT }, + { 0 }, +}; + +static vfs_namemap_t signames[] = { + /* regular signal names */ + { "hup", SIGHUP }, { "int", SIGINT }, + { "quit", SIGQUIT }, { "ill", SIGILL }, + { "trap", SIGTRAP }, { "abrt", SIGABRT }, + { "iot", SIGIOT }, { "emt", SIGEMT }, + { "fpe", SIGFPE }, { "kill", SIGKILL }, + { "bus", SIGBUS }, { "segv", SIGSEGV }, + { "sys", SIGSYS }, { "pipe", SIGPIPE }, + { "alrm", SIGALRM }, { "term", SIGTERM }, + { "urg", SIGURG }, { "stop", SIGSTOP }, + { "tstp", SIGTSTP }, { "cont", SIGCONT }, + { "chld", SIGCHLD }, { "ttin", SIGTTIN }, + { "ttou", SIGTTOU }, { "io", SIGIO }, + { "xcpu", SIGXCPU }, { "xfsz", SIGXFSZ }, + { "vtalrm", SIGVTALRM }, { "prof", SIGPROF }, + { "winch", SIGWINCH }, { "info", SIGINFO }, + { "usr1", SIGUSR1 }, { "usr2", SIGUSR2 }, + { 0 }, +}; + +static int procfs_control __P((struct proc *curp, struct proc *p, int op)); + +static int +procfs_control(curp, p, op) + struct proc *curp; + struct proc *p; + int op; +{ + int error; + + /* + * Attach - attaches the target process for debugging + * by the calling process. + */ + if (op == PROCFS_CTL_ATTACH) { + /* check whether already being traced */ + if (p->p_flag & P_TRACED) + return (EBUSY); + + /* can't trace yourself! */ + if (p->p_pid == curp->p_pid) + return (EINVAL); + + /* can't trace init when securelevel > 0 */ + if (securelevel > 0 && p->p_pid == 1) + return (EPERM); + + /* + * Go ahead and set the trace flag. + * Save the old parent (it's reset in + * _DETACH, and also in kern_exit.c:wait4() + * Reparent the process so that the tracing + * proc gets to see all the action. + * Stop the target. + */ + p->p_flag |= P_TRACED; + faultin(p); + p->p_xstat = 0; /* XXX ? */ + if (p->p_pptr != curp) { + p->p_oppid = p->p_pptr->p_pid; + proc_reparent(p, curp); + } + psignal(p, SIGSTOP); + return (0); + } + + /* + * Target process must be stopped, owned by (curp) and + * be set up for tracing (P_TRACED flag set). + * Allow DETACH to take place at any time for sanity. + * Allow WAIT any time, of course. + */ + switch (op) { + case PROCFS_CTL_DETACH: + case PROCFS_CTL_WAIT: + break; + + default: + if (!TRACE_WAIT_P(curp, p)) + return (EBUSY); + } + + +#ifdef FIX_SSTEP + /* + * do single-step fixup if needed + */ + FIX_SSTEP(p); +#endif + + /* + * Don't deliver any signal by default. + * To continue with a signal, just send + * the signal name to the ctl file + */ + p->p_xstat = 0; + + switch (op) { + /* + * Detach. Cleans up the target process, reparent it if possible + * and set it running once more. + */ + case PROCFS_CTL_DETACH: + /* if not being traced, then this is a painless no-op */ + if ((p->p_flag & P_TRACED) == 0) + return (0); + + /* not being traced any more */ + p->p_flag &= ~P_TRACED; + + /* remove pending SIGTRAP, else the process will die */ + p->p_siglist &= ~sigmask (SIGTRAP); + + /* give process back to original parent */ + if (p->p_oppid != p->p_pptr->p_pid) { + struct proc *pp; + + pp = pfind(p->p_oppid); + if (pp) + proc_reparent(p, pp); + } + + p->p_oppid = 0; + p->p_flag &= ~P_WAITED; /* XXX ? */ + wakeup((caddr_t) curp); /* XXX for CTL_WAIT below ? */ + + break; + + /* + * Step. Let the target process execute a single instruction. + */ + case PROCFS_CTL_STEP: + PHOLD(p); + error = procfs_sstep(p); + PRELE(p); + if (error) + return (error); + break; + + /* + * Run. Let the target process continue running until a breakpoint + * or some other trap. + */ + case PROCFS_CTL_RUN: + break; + + /* + * Wait for the target process to stop. + * If the target is not being traced then just wait + * to enter + */ + case PROCFS_CTL_WAIT: + error = 0; + if (p->p_flag & P_TRACED) { + while (error == 0 && + (p->p_stat != SSTOP) && + (p->p_flag & P_TRACED) && + (p->p_pptr == curp)) { + error = tsleep((caddr_t) p, + PWAIT|PCATCH, "procfsx", 0); + } + if (error == 0 && !TRACE_WAIT_P(curp, p)) + error = EBUSY; + } else { + while (error == 0 && p->p_stat != SSTOP) { + error = tsleep((caddr_t) p, + PWAIT|PCATCH, "procfs", 0); + } + } + return (error); + + default: + panic("procfs_control"); + } + + if (p->p_stat == SSTOP) + setrunnable(p); + return (0); +} + +int +procfs_doctl(curp, p, pfs, uio) + struct proc *curp; + struct pfsnode *pfs; + struct uio *uio; + struct proc *p; +{ + int xlen; + int error; + char msg[PROCFS_CTLLEN+1]; + vfs_namemap_t *nm; + + if (uio->uio_rw != UIO_WRITE) + return (EOPNOTSUPP); + + xlen = PROCFS_CTLLEN; + error = vfs_getuserstr(uio, msg, &xlen); + if (error) + return (error); + + /* + * Map signal names into signal generation + * or debug control. Unknown commands and/or signals + * return EOPNOTSUPP. + * + * Sending a signal while the process is being debugged + * also has the side effect of letting the target continue + * to run. There is no way to single-step a signal delivery. + */ + error = EOPNOTSUPP; + + nm = vfs_findname(ctlnames, msg, xlen); + if (nm) { + error = procfs_control(curp, p, nm->nm_val); + } else { + nm = vfs_findname(signames, msg, xlen); + if (nm) { + if (TRACE_WAIT_P(curp, p)) { + p->p_xstat = nm->nm_val; +#ifdef FIX_SSTEP + FIX_SSTEP(p); +#endif + setrunnable(p); + } else { + psignal(p, nm->nm_val); + } + error = 0; + } + } + + return (error); +} diff --git a/sys/fs/procfs/procfs_fpregs.c b/sys/fs/procfs/procfs_fpregs.c new file mode 100644 index 0000000..14c3fd3 --- /dev/null +++ b/sys/fs/procfs/procfs_fpregs.c @@ -0,0 +1,98 @@ +/* + * Copyright (c) 1993 Jan-Simon Pendry + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Jan-Simon Pendry. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)procfs_fpregs.c 8.2 (Berkeley) 6/15/94 + * + * From: + * $Id: procfs_fpregs.c,v 1.7 1997/08/02 14:32:11 bde Exp $ + */ + +#include <sys/param.h> +#include <sys/proc.h> +#include <sys/vnode.h> +#include <machine/reg.h> +#include <miscfs/procfs/procfs.h> +#include <vm/vm.h> +#include <vm/vm_extern.h> + +int +procfs_dofpregs(curp, p, pfs, uio) + struct proc *curp; + struct proc *p; + struct pfsnode *pfs; + struct uio *uio; +{ + int error; + struct fpreg r; + char *kv; + int kl; + + if (!CHECKIO(curp, p)) + return EPERM; + kl = sizeof(r); + kv = (char *) &r; + + kv += uio->uio_offset; + kl -= uio->uio_offset; + if (kl > uio->uio_resid) + kl = uio->uio_resid; + + PHOLD(p); + + if (kl < 0) + error = EINVAL; + else + error = procfs_read_fpregs(p, &r); + if (error == 0) + error = uiomove(kv, kl, uio); + if (error == 0 && uio->uio_rw == UIO_WRITE) { + if (p->p_stat != SSTOP) + error = EBUSY; + else + error = procfs_write_fpregs(p, &r); + } + PRELE(p); + + uio->uio_offset = 0; + return (error); +} + +int +procfs_validfpregs(p) + struct proc *p; +{ + return ((p->p_flag & P_SYSTEM) == 0); +} diff --git a/sys/fs/procfs/procfs_map.c b/sys/fs/procfs/procfs_map.c new file mode 100644 index 0000000..c6b8966 --- /dev/null +++ b/sys/fs/procfs/procfs_map.c @@ -0,0 +1,185 @@ +/* + * Copyright (c) 1993 Jan-Simon Pendry + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Jan-Simon Pendry. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)procfs_status.c 8.3 (Berkeley) 2/17/94 + * + * $Id: procfs_map.c,v 1.18 1998/12/04 22:54:51 archie Exp $ + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/vnode.h> +#include <miscfs/procfs/procfs.h> + +#include <vm/vm.h> +#include <vm/vm_prot.h> +#include <sys/lock.h> +#include <vm/pmap.h> +#include <vm/vm_map.h> +#include <vm/vm_page.h> +#include <vm/vm_object.h> + + +#define MEBUFFERSIZE 256 + +/* + * The map entries can *almost* be read with programs like cat. However, + * large maps need special programs to read. It is not easy to implement + * a program that can sense the required size of the buffer, and then + * subsequently do a read with the appropriate size. This operation cannot + * be atomic. The best that we can do is to allow the program to do a read + * with an arbitrarily large buffer, and return as much as we can. We can + * return an error code if the buffer is too small (EFBIG), then the program + * can try a bigger buffer. + */ +int +procfs_domap(curp, p, pfs, uio) + struct proc *curp; + struct proc *p; + struct pfsnode *pfs; + struct uio *uio; +{ + int len; + int error; + vm_map_t map = &p->p_vmspace->vm_map; + pmap_t pmap = &p->p_vmspace->vm_pmap; + vm_map_entry_t entry; + char mebuffer[MEBUFFERSIZE]; + + if (uio->uio_rw != UIO_READ) + return (EOPNOTSUPP); + + if (uio->uio_offset != 0) + return (0); + + error = 0; + if (map != &curproc->p_vmspace->vm_map) + vm_map_lock_read(map); + for (entry = map->header.next; + ((uio->uio_resid > 0) && (entry != &map->header)); + entry = entry->next) { + vm_object_t obj, tobj, lobj; + int ref_count, shadow_count, flags; + vm_offset_t addr; + int resident, privateresident; + char *type; + + if (entry->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) + continue; + + obj = entry->object.vm_object; + if (obj && (obj->shadow_count == 1)) + privateresident = obj->resident_page_count; + else + privateresident = 0; + + resident = 0; + addr = entry->start; + while (addr < entry->end) { + if (pmap_extract( pmap, addr)) + resident++; + addr += PAGE_SIZE; + } + + for( lobj = tobj = obj; tobj; tobj = tobj->backing_object) + lobj = tobj; + + if (lobj) { + switch(lobj->type) { + +default: +case OBJT_DEFAULT: + type = "default"; + break; +case OBJT_VNODE: + type = "vnode"; + break; +case OBJT_SWAP: + type = "swap"; + break; +case OBJT_DEVICE: + type = "device"; + break; + } + + flags = obj->flags; + ref_count = obj->ref_count; + shadow_count = obj->shadow_count; + } else { + type = "none"; + flags = 0; + ref_count = 0; + shadow_count = 0; + } + + + /* + * format: + * start, end, resident, private resident, cow, access, type. + */ + snprintf(mebuffer, sizeof(mebuffer), + "0x%x 0x%x %d %d %p %s%s%s %d %d 0x%x %s %s %s\n", + entry->start, entry->end, + resident, privateresident, obj, + (entry->protection & VM_PROT_READ)?"r":"-", + (entry->protection & VM_PROT_WRITE)?"w":"-", + (entry->protection & VM_PROT_EXECUTE)?"x":"-", + ref_count, shadow_count, flags, + (entry->eflags & MAP_ENTRY_COW)?"COW":"NCOW", + (entry->eflags & MAP_ENTRY_NEEDS_COPY)?"NC":"NNC", + type); + + len = strlen(mebuffer); + if (len > uio->uio_resid) { + error = EFBIG; + break; + } + error = uiomove(mebuffer, len, uio); + if (error) + break; + } + if (map != &curproc->p_vmspace->vm_map) + vm_map_unlock_read(map); + return error; +} + +int +procfs_validmap(p) + struct proc *p; +{ + return ((p->p_flag & P_SYSTEM) == 0); +} diff --git a/sys/fs/procfs/procfs_mem.c b/sys/fs/procfs/procfs_mem.c new file mode 100644 index 0000000..22d8f74 --- /dev/null +++ b/sys/fs/procfs/procfs_mem.c @@ -0,0 +1,342 @@ +/* + * Copyright (c) 1993 Jan-Simon Pendry + * Copyright (c) 1993 Sean Eric Fagan + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Jan-Simon Pendry and Sean Eric Fagan. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)procfs_mem.c 8.5 (Berkeley) 6/15/94 + * + * $Id: procfs_mem.c,v 1.34 1998/07/15 02:32:19 bde Exp $ + */ + +/* + * This is a lightly hacked and merged version + * of sef's pread/pwrite functions + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/vnode.h> +#include <miscfs/procfs/procfs.h> +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/vm_prot.h> +#include <sys/lock.h> +#include <vm/pmap.h> +#include <vm/vm_map.h> +#include <vm/vm_kern.h> +#include <vm/vm_object.h> +#include <vm/vm_page.h> +#include <vm/vm_extern.h> +#include <sys/user.h> +#include <sys/ptrace.h> + +static int procfs_rwmem __P((struct proc *curp, + struct proc *p, struct uio *uio)); + +static int +procfs_rwmem(curp, p, uio) + struct proc *curp; + struct proc *p; + struct uio *uio; +{ + int error; + int writing; + struct vmspace *vm; + vm_map_t map; + vm_object_t object = NULL; + vm_offset_t pageno = 0; /* page number */ + vm_prot_t reqprot; + vm_offset_t kva; + + /* + * if the vmspace is in the midst of being deallocated or the + * process is exiting, don't try to grab anything. The page table + * usage in that process can be messed up. + */ + vm = p->p_vmspace; + if ((p->p_flag & P_WEXIT) || (vm->vm_refcnt < 1)) + return EFAULT; + ++vm->vm_refcnt; + /* + * The map we want... + */ + map = &vm->vm_map; + + writing = uio->uio_rw == UIO_WRITE; + reqprot = writing ? (VM_PROT_WRITE | VM_PROT_OVERRIDE_WRITE) : VM_PROT_READ; + + kva = kmem_alloc_pageable(kernel_map, PAGE_SIZE); + + /* + * Only map in one page at a time. We don't have to, but it + * makes things easier. This way is trivial - right? + */ + do { + vm_map_t tmap; + vm_offset_t uva; + int page_offset; /* offset into page */ + vm_map_entry_t out_entry; + vm_prot_t out_prot; + boolean_t wired; + vm_pindex_t pindex; + u_int len; + vm_page_t m; + + object = NULL; + + uva = (vm_offset_t) uio->uio_offset; + + /* + * Get the page number of this segment. + */ + pageno = trunc_page(uva); + page_offset = uva - pageno; + + /* + * How many bytes to copy + */ + len = min(PAGE_SIZE - page_offset, uio->uio_resid); + + if (uva >= VM_MAXUSER_ADDRESS) { + vm_offset_t tkva; + + if (writing || + uva >= VM_MAXUSER_ADDRESS + UPAGES * PAGE_SIZE || + (ptrace_read_u_check(p, + uva - (vm_offset_t) VM_MAXUSER_ADDRESS, + (size_t) len) && + !procfs_kmemaccess(curp))) { + error = 0; + break; + } + + /* we are reading the "U area", force it into core */ + PHOLD(p); + + /* sanity check */ + if (!(p->p_flag & P_INMEM)) { + /* aiee! */ + PRELE(p); + error = EFAULT; + break; + } + + /* populate the ptrace/procfs area */ + p->p_addr->u_kproc.kp_proc = *p; + fill_eproc (p, &p->p_addr->u_kproc.kp_eproc); + + /* locate the in-core address */ + tkva = (uintptr_t)p->p_addr + uva - VM_MAXUSER_ADDRESS; + + /* transfer it */ + error = uiomove((caddr_t)tkva, len, uio); + + /* let the pages go */ + PRELE(p); + + continue; + } + + /* + * Fault the page on behalf of the process + */ + error = vm_fault(map, pageno, reqprot, FALSE); + if (error) { + error = EFAULT; + break; + } + + /* + * Now we need to get the page. out_entry, out_prot, wired, + * and single_use aren't used. One would think the vm code + * would be a *bit* nicer... We use tmap because + * vm_map_lookup() can change the map argument. + */ + tmap = map; + error = vm_map_lookup(&tmap, pageno, reqprot, + &out_entry, &object, &pindex, &out_prot, + &wired); + + if (error) { + error = EFAULT; + + /* + * Make sure that there is no residue in 'object' from + * an error return on vm_map_lookup. + */ + object = NULL; + + break; + } + + m = vm_page_lookup(object, pindex); + + /* Allow fallback to backing objects if we are reading */ + + while (m == NULL && !writing && object->backing_object) { + + pindex += OFF_TO_IDX(object->backing_object_offset); + object = object->backing_object; + + m = vm_page_lookup(object, pindex); + } + + if (m == NULL) { + error = EFAULT; + + /* + * Make sure that there is no residue in 'object' from + * an error return on vm_map_lookup. + */ + object = NULL; + + vm_map_lookup_done(tmap, out_entry); + + break; + } + + /* + * Wire the page into memory + */ + vm_page_wire(m); + + /* + * We're done with tmap now. + * But reference the object first, so that we won't loose + * it. + */ + vm_object_reference(object); + vm_map_lookup_done(tmap, out_entry); + + pmap_kenter(kva, VM_PAGE_TO_PHYS(m)); + + /* + * Now do the i/o move. + */ + error = uiomove((caddr_t)(kva + page_offset), len, uio); + + pmap_kremove(kva); + + /* + * release the page and the object + */ + vm_page_unwire(m, 1); + vm_object_deallocate(object); + + object = NULL; + + } while (error == 0 && uio->uio_resid > 0); + + if (object) + vm_object_deallocate(object); + + kmem_free(kernel_map, kva, PAGE_SIZE); + vmspace_free(vm); + return (error); +} + +/* + * Copy data in and out of the target process. + * We do this by mapping the process's page into + * the kernel and then doing a uiomove direct + * from the kernel address space. + */ +int +procfs_domem(curp, p, pfs, uio) + struct proc *curp; + struct proc *p; + struct pfsnode *pfs; + struct uio *uio; +{ + + if (uio->uio_resid == 0) + return (0); + + /* + * XXX + * We need to check for KMEM_GROUP because ps is sgid kmem; + * not allowing it here causes ps to not work properly. Arguably, + * this is a bug with what ps does. We only need to do this + * for Pmem nodes, and only if it's reading. This is still not + * good, as it may still be possible to grab illicit data if + * a process somehow gets to be KMEM_GROUP. Note that this also + * means that KMEM_GROUP can't change without editing procfs.h! + * All in all, quite yucky. + */ + + if (!CHECKIO(curp, p) && + !(uio->uio_rw == UIO_READ && + procfs_kmemaccess(curp))) + return EPERM; + + return (procfs_rwmem(curp, p, uio)); +} + +/* + * Given process (p), find the vnode from which + * its text segment is being executed. + * + * It would be nice to grab this information from + * the VM system, however, there is no sure-fire + * way of doing that. Instead, fork(), exec() and + * wait() all maintain the p_textvp field in the + * process proc structure which contains a held + * reference to the exec'ed vnode. + */ +struct vnode * +procfs_findtextvp(p) + struct proc *p; +{ + + return (p->p_textvp); +} + +int procfs_kmemaccess(curp) + struct proc *curp; +{ + int i; + struct ucred *cred; + + cred = curp->p_cred->pc_ucred; + if (suser(cred, &curp->p_acflag)) + return 1; + + for (i = 0; i < cred->cr_ngroups; i++) + if (cred->cr_groups[i] == KMEM_GROUP) + return 1; + + return 0; +} diff --git a/sys/fs/procfs/procfs_note.c b/sys/fs/procfs/procfs_note.c new file mode 100644 index 0000000..8bfde33 --- /dev/null +++ b/sys/fs/procfs/procfs_note.c @@ -0,0 +1,67 @@ +/* + * Copyright (c) 1993 Jan-Simon Pendry + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Jan-Simon Pendry. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)procfs_note.c 8.2 (Berkeley) 1/21/94 + * + * $Id: procfs_note.c,v 1.4 1997/02/22 09:40:28 peter Exp $ + */ + +#include <sys/param.h> +#include <sys/vnode.h> +#include <miscfs/procfs/procfs.h> + +int +procfs_donote(curp, p, pfs, uio) + struct proc *curp; + struct proc *p; + struct pfsnode *pfs; + struct uio *uio; +{ + int xlen; + int error; + char note[PROCFS_NOTELEN+1]; + + if (uio->uio_rw != UIO_WRITE) + return (EINVAL); + + xlen = PROCFS_NOTELEN; + error = vfs_getuserstr(uio, note, &xlen); + if (error) + return (error); + + /* send to process's notify function */ + return (EOPNOTSUPP); +} diff --git a/sys/fs/procfs/procfs_regs.c b/sys/fs/procfs/procfs_regs.c new file mode 100644 index 0000000..d215d44 --- /dev/null +++ b/sys/fs/procfs/procfs_regs.c @@ -0,0 +1,98 @@ +/* + * Copyright (c) 1993 Jan-Simon Pendry + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Jan-Simon Pendry. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)procfs_regs.c 8.4 (Berkeley) 6/15/94 + * + * From: + * $Id: procfs_regs.c,v 1.7 1997/08/02 14:32:16 bde Exp $ + */ + +#include <sys/param.h> +#include <sys/proc.h> +#include <sys/vnode.h> +#include <machine/reg.h> +#include <miscfs/procfs/procfs.h> +#include <vm/vm.h> +#include <vm/vm_extern.h> + +int +procfs_doregs(curp, p, pfs, uio) + struct proc *curp; + struct proc *p; + struct pfsnode *pfs; + struct uio *uio; +{ + int error; + struct reg r; + char *kv; + int kl; + + if (!CHECKIO(curp, p)) + return EPERM; + kl = sizeof(r); + kv = (char *) &r; + + kv += uio->uio_offset; + kl -= uio->uio_offset; + if (kl > uio->uio_resid) + kl = uio->uio_resid; + + PHOLD(p); + + if (kl < 0) + error = EINVAL; + else + error = procfs_read_regs(p, &r); + if (error == 0) + error = uiomove(kv, kl, uio); + if (error == 0 && uio->uio_rw == UIO_WRITE) { + if (p->p_stat != SSTOP) + error = EBUSY; + else + error = procfs_write_regs(p, &r); + } + PRELE(p); + + uio->uio_offset = 0; + return (error); +} + +int +procfs_validregs(p) + struct proc *p; +{ + return ((p->p_flag & P_SYSTEM) == 0); +} diff --git a/sys/fs/procfs/procfs_status.c b/sys/fs/procfs/procfs_status.c new file mode 100644 index 0000000..3176a64 --- /dev/null +++ b/sys/fs/procfs/procfs_status.c @@ -0,0 +1,186 @@ +/* + * Copyright (c) 1993 Jan-Simon Pendry + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Jan-Simon Pendry. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)procfs_status.c 8.4 (Berkeley) 6/15/94 + * + * From: + * $Id: procfs_status.c,v 1.11 1998/07/11 07:45:45 bde Exp $ + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/vnode.h> +#include <sys/tty.h> +#include <sys/resourcevar.h> +#include <miscfs/procfs/procfs.h> + +int +procfs_dostatus(curp, p, pfs, uio) + struct proc *curp; + struct proc *p; + struct pfsnode *pfs; + struct uio *uio; +{ + struct session *sess; + struct tty *tp; + struct ucred *cr; + char *ps; + char *sep; + int pid, ppid, pgid, sid; + int i; + int xlen; + int error; + char psbuf[256]; /* XXX - conservative */ + + if (uio->uio_rw != UIO_READ) + return (EOPNOTSUPP); + + pid = p->p_pid; + ppid = p->p_pptr ? p->p_pptr->p_pid : 0, + pgid = p->p_pgrp->pg_id; + sess = p->p_pgrp->pg_session; + sid = sess->s_leader ? sess->s_leader->p_pid : 0; + +/* comm pid ppid pgid sid maj,min ctty,sldr start ut st wmsg + euid ruid rgid,egid,groups[1 .. NGROUPS] +*/ + ps = psbuf; + bcopy(p->p_comm, ps, MAXCOMLEN); + ps[MAXCOMLEN] = '\0'; + ps += strlen(ps); + ps += sprintf(ps, " %d %d %d %d ", pid, ppid, pgid, sid); + + if ((p->p_flag&P_CONTROLT) && (tp = sess->s_ttyp)) + ps += sprintf(ps, "%d,%d ", major(tp->t_dev), minor(tp->t_dev)); + else + ps += sprintf(ps, "%d,%d ", -1, -1); + + sep = ""; + if (sess->s_ttyvp) { + ps += sprintf(ps, "%sctty", sep); + sep = ","; + } + if (SESS_LEADER(p)) { + ps += sprintf(ps, "%ssldr", sep); + sep = ","; + } + if (*sep != ',') + ps += sprintf(ps, "noflags"); + + if (p->p_flag & P_INMEM) + ps += sprintf(ps, " %ld,%ld", + p->p_stats->p_start.tv_sec, + p->p_stats->p_start.tv_usec); + else + ps += sprintf(ps, " -1,-1"); + + { + struct timeval ut, st; + + calcru(p, &ut, &st, (void *) 0); + ps += sprintf(ps, " %ld,%ld %ld,%ld", + ut.tv_sec, + ut.tv_usec, + st.tv_sec, + st.tv_usec); + } + + ps += sprintf(ps, " %s", + (p->p_wchan && p->p_wmesg) ? p->p_wmesg : "nochan"); + + cr = p->p_ucred; + + ps += sprintf(ps, " %lu %lu %lu", + (u_long)cr->cr_uid, + (u_long)p->p_cred->p_ruid, + (u_long)p->p_cred->p_rgid); + + /* egid (p->p_cred->p_svgid) is equal to cr_ngroups[0] + see also getegid(2) in /sys/kern/kern_prot.c */ + + for (i = 0; i < cr->cr_ngroups; i++) + ps += sprintf(ps, ",%lu", (u_long)cr->cr_groups[i]); + ps += sprintf(ps, "\n"); + + xlen = ps - psbuf; + xlen -= uio->uio_offset; + ps = psbuf + uio->uio_offset; + xlen = imin(xlen, uio->uio_resid); + if (xlen <= 0) + error = 0; + else + error = uiomove(ps, xlen, uio); + + return (error); +} + +int +procfs_docmdline(curp, p, pfs, uio) + struct proc *curp; + struct proc *p; + struct pfsnode *pfs; + struct uio *uio; +{ + char *ps; + int xlen; + int error; + char psbuf[256]; + + if (uio->uio_rw != UIO_READ) + return (EOPNOTSUPP); + + /* + * For now, this is a hack. To implement this fully would require + * groping around in the process address space to follow argv etc. + */ + ps = psbuf; + bcopy(p->p_comm, ps, MAXCOMLEN); + ps[MAXCOMLEN] = '\0'; + ps += strlen(ps); + + ps += sprintf(ps, "\n"); + + xlen = ps - psbuf; + xlen -= uio->uio_offset; + ps = psbuf + uio->uio_offset; + xlen = min(xlen, uio->uio_resid); + if (xlen <= 0) + error = 0; + else + error = uiomove(ps, xlen, uio); + return (error); +} diff --git a/sys/fs/procfs/procfs_subr.c b/sys/fs/procfs/procfs_subr.c new file mode 100644 index 0000000..98e3687 --- /dev/null +++ b/sys/fs/procfs/procfs_subr.c @@ -0,0 +1,392 @@ +/* + * Copyright (c) 1993 Jan-Simon Pendry + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Jan-Simon Pendry. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)procfs_subr.c 8.6 (Berkeley) 5/14/95 + * + * $Id: procfs_subr.c,v 1.22 1999/01/05 03:53:06 peter Exp $ + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/vnode.h> +#include <sys/malloc.h> +#include <miscfs/procfs/procfs.h> + +static struct pfsnode *pfshead; +static int pfsvplock; + +/* + * allocate a pfsnode/vnode pair. the vnode is + * referenced, but not locked. + * + * the pid, pfs_type, and mount point uniquely + * identify a pfsnode. the mount point is needed + * because someone might mount this filesystem + * twice. + * + * all pfsnodes are maintained on a singly-linked + * list. new nodes are only allocated when they cannot + * be found on this list. entries on the list are + * removed when the vfs reclaim entry is called. + * + * a single lock is kept for the entire list. this is + * needed because the getnewvnode() function can block + * waiting for a vnode to become free, in which case there + * may be more than one process trying to get the same + * vnode. this lock is only taken if we are going to + * call getnewvnode, since the kernel itself is single-threaded. + * + * if an entry is found on the list, then call vget() to + * take a reference. this is done because there may be + * zero references to it and so it needs to removed from + * the vnode free list. + */ +int +procfs_allocvp(mp, vpp, pid, pfs_type) + struct mount *mp; + struct vnode **vpp; + long pid; + pfstype pfs_type; +{ + struct proc *p = curproc; /* XXX */ + struct pfsnode *pfs; + struct vnode *vp; + struct pfsnode **pp; + int error; + +loop: + for (pfs = pfshead; pfs != 0; pfs = pfs->pfs_next) { + vp = PFSTOV(pfs); + if (pfs->pfs_pid == pid && + pfs->pfs_type == pfs_type && + vp->v_mount == mp) { + if (vget(vp, 0, p)) + goto loop; + *vpp = vp; + return (0); + } + } + + /* + * otherwise lock the vp list while we call getnewvnode + * since that can block. + */ + if (pfsvplock & PROCFS_LOCKED) { + pfsvplock |= PROCFS_WANT; + (void) tsleep((caddr_t) &pfsvplock, PINOD, "pfsavp", 0); + goto loop; + } + pfsvplock |= PROCFS_LOCKED; + + /* + * Do the MALLOC before the getnewvnode since doing so afterward + * might cause a bogus v_data pointer to get dereferenced + * elsewhere if MALLOC should block. + */ + MALLOC(pfs, struct pfsnode *, sizeof(struct pfsnode), M_TEMP, M_WAITOK); + + if ((error = getnewvnode(VT_PROCFS, mp, procfs_vnodeop_p, vpp)) != 0) { + FREE(pfs, M_TEMP); + goto out; + } + vp = *vpp; + + vp->v_data = pfs; + + pfs->pfs_next = 0; + pfs->pfs_pid = (pid_t) pid; + pfs->pfs_type = pfs_type; + pfs->pfs_vnode = vp; + pfs->pfs_flags = 0; + pfs->pfs_lockowner = 0; + pfs->pfs_fileno = PROCFS_FILENO(pid, pfs_type); + + switch (pfs_type) { + case Proot: /* /proc = dr-xr-xr-x */ + pfs->pfs_mode = (VREAD|VEXEC) | + (VREAD|VEXEC) >> 3 | + (VREAD|VEXEC) >> 6; + vp->v_type = VDIR; + vp->v_flag = VROOT; + break; + + case Pcurproc: /* /proc/curproc = lr--r--r-- */ + pfs->pfs_mode = (VREAD) | + (VREAD >> 3) | + (VREAD >> 6); + vp->v_type = VLNK; + break; + + case Pproc: + pfs->pfs_mode = (VREAD|VEXEC) | + (VREAD|VEXEC) >> 3 | + (VREAD|VEXEC) >> 6; + vp->v_type = VDIR; + break; + + case Pfile: + case Pmem: + pfs->pfs_mode = (VREAD|VWRITE) | + (VREAD) >> 3;; + vp->v_type = VREG; + break; + + case Pregs: + case Pfpregs: + pfs->pfs_mode = (VREAD|VWRITE); + vp->v_type = VREG; + break; + + case Pctl: + case Pnote: + case Pnotepg: + pfs->pfs_mode = (VWRITE); + vp->v_type = VREG; + break; + + case Ptype: + case Pmap: + case Pstatus: + case Pcmdline: + pfs->pfs_mode = (VREAD) | + (VREAD >> 3) | + (VREAD >> 6); + vp->v_type = VREG; + break; + + default: + panic("procfs_allocvp"); + } + + /* add to procfs vnode list */ + for (pp = &pfshead; *pp; pp = &(*pp)->pfs_next) + continue; + *pp = pfs; + +out: + pfsvplock &= ~PROCFS_LOCKED; + + if (pfsvplock & PROCFS_WANT) { + pfsvplock &= ~PROCFS_WANT; + wakeup((caddr_t) &pfsvplock); + } + + return (error); +} + +int +procfs_freevp(vp) + struct vnode *vp; +{ + struct pfsnode **pfspp; + struct pfsnode *pfs = VTOPFS(vp); + + for (pfspp = &pfshead; *pfspp != 0; pfspp = &(*pfspp)->pfs_next) { + if (*pfspp == pfs) { + *pfspp = pfs->pfs_next; + break; + } + } + + FREE(vp->v_data, M_TEMP); + vp->v_data = 0; + return (0); +} + +int +procfs_rw(ap) + struct vop_read_args *ap; +{ + struct vnode *vp = ap->a_vp; + struct uio *uio = ap->a_uio; + struct proc *curp = uio->uio_procp; + struct pfsnode *pfs = VTOPFS(vp); + struct proc *p; + int rtval; + + p = PFIND(pfs->pfs_pid); + if (p == 0) + return (EINVAL); + if (p->p_pid == 1 && securelevel > 0 && uio->uio_rw == UIO_WRITE) + return (EACCES); + + while (pfs->pfs_lockowner) { + tsleep(&pfs->pfs_lockowner, PRIBIO, "pfslck", 0); + } + pfs->pfs_lockowner = curproc->p_pid; + + switch (pfs->pfs_type) { + case Pnote: + case Pnotepg: + rtval = procfs_donote(curp, p, pfs, uio); + break; + + case Pregs: + rtval = procfs_doregs(curp, p, pfs, uio); + break; + + case Pfpregs: + rtval = procfs_dofpregs(curp, p, pfs, uio); + break; + + case Pctl: + rtval = procfs_doctl(curp, p, pfs, uio); + break; + + case Pstatus: + rtval = procfs_dostatus(curp, p, pfs, uio); + break; + + case Pmap: + rtval = procfs_domap(curp, p, pfs, uio); + break; + + case Pmem: + rtval = procfs_domem(curp, p, pfs, uio); + break; + + case Ptype: + rtval = procfs_dotype(curp, p, pfs, uio); + break; + + case Pcmdline: + rtval = procfs_docmdline(curp, p, pfs, uio); + break; + + default: + rtval = EOPNOTSUPP; + break; + } + pfs->pfs_lockowner = 0; + wakeup(&pfs->pfs_lockowner); + return rtval; +} + +/* + * Get a string from userland into (buf). Strip a trailing + * nl character (to allow easy access from the shell). + * The buffer should be *buflenp + 1 chars long. vfs_getuserstr + * will automatically add a nul char at the end. + * + * Returns 0 on success or the following errors + * + * EINVAL: file offset is non-zero. + * EMSGSIZE: message is longer than kernel buffer + * EFAULT: user i/o buffer is not addressable + */ +int +vfs_getuserstr(uio, buf, buflenp) + struct uio *uio; + char *buf; + int *buflenp; +{ + int xlen; + int error; + + if (uio->uio_offset != 0) + return (EINVAL); + + xlen = *buflenp; + + /* must be able to read the whole string in one go */ + if (xlen < uio->uio_resid) + return (EMSGSIZE); + xlen = uio->uio_resid; + + if ((error = uiomove(buf, xlen, uio)) != 0) + return (error); + + /* allow multiple writes without seeks */ + uio->uio_offset = 0; + + /* cleanup string and remove trailing newline */ + buf[xlen] = '\0'; + xlen = strlen(buf); + if (xlen > 0 && buf[xlen-1] == '\n') + buf[--xlen] = '\0'; + *buflenp = xlen; + + return (0); +} + +vfs_namemap_t * +vfs_findname(nm, buf, buflen) + vfs_namemap_t *nm; + char *buf; + int buflen; +{ + + for (; nm->nm_name; nm++) + if (bcmp(buf, nm->nm_name, buflen+1) == 0) + return (nm); + + return (0); +} + +void +procfs_exit(struct proc *p) +{ + struct pfsnode *pfs; + pid_t pid = p->p_pid; + + /* + * The reason for this loop is not obvious -- basicly, + * procfs_freevp(), which is called via vgone() (eventually), + * removes the specified procfs node from the pfshead list. + * It does this by *pfsp = pfs->pfs_next, meaning that it + * overwrites the node. So when we do pfs = pfs->next, we + * end up skipping the node that replaces the one that was + * vgone'd. Since it may have been the last one on the list, + * it may also have been set to null -- but *our* pfs pointer, + * here, doesn't see this. So the loop starts from the beginning + * again. + * + * This is not a for() loop because the final event + * would be "pfs = pfs->pfs_next"; in the case where + * pfs is set to pfshead again, that would mean that + * pfshead is skipped over. + * + */ + pfs = pfshead; + while (pfs) { + if (pfs->pfs_pid == pid) { + vgone(PFSTOV(pfs)); + pfs = pfshead; + } else + pfs = pfs->pfs_next; + } +} diff --git a/sys/fs/procfs/procfs_type.c b/sys/fs/procfs/procfs_type.c new file mode 100644 index 0000000..8f85c54 --- /dev/null +++ b/sys/fs/procfs/procfs_type.c @@ -0,0 +1,85 @@ +/* + * Copyright (c) 1993 Jan-Simon Pendry + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Jan-Simon Pendry. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $Id: procfs_type.c,v 1.4 1997/03/24 11:24:42 bde Exp $ + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/sysent.h> +#include <sys/vnode.h> +#include <miscfs/procfs/procfs.h> + +int +procfs_dotype(curp, p, pfs, uio) + struct proc *curp; + struct proc *p; + struct pfsnode *pfs; + struct uio *uio; +{ + int len; + int error; + /* + * buffer for emulation type + */ + char mebuffer[256]; + char *none = "Not Available"; + + if (uio->uio_rw != UIO_READ) + return (EOPNOTSUPP); + + if (uio->uio_offset != 0) + return (0); + + if (p && p->p_sysent && p->p_sysent->sv_name) { + len = strlen(p->p_sysent->sv_name); + bcopy(p->p_sysent->sv_name, mebuffer, len); + } else { + len = strlen(none); + bcopy(none, mebuffer, len); + } + mebuffer[len++] = '\n'; + error = uiomove(mebuffer, len, uio); + return error; +} + +int +procfs_validtype(p) + struct proc *p; +{ + return ((p->p_flag & P_SYSTEM) == 0); +} diff --git a/sys/fs/procfs/procfs_vfsops.c b/sys/fs/procfs/procfs_vfsops.c new file mode 100644 index 0000000..ac1ab53 --- /dev/null +++ b/sys/fs/procfs/procfs_vfsops.c @@ -0,0 +1,215 @@ +/* + * Copyright (c) 1993 Jan-Simon Pendry + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Jan-Simon Pendry. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)procfs_vfsops.c 8.7 (Berkeley) 5/10/95 + * + * $Id: procfs_vfsops.c,v 1.25 1998/07/27 22:47:17 alex Exp $ + */ + +/* + * procfs VFS interface + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/proc.h> +#include <sys/syslog.h> +#include <sys/mount.h> +#include <sys/vnode.h> +#include <miscfs/procfs/procfs.h> + +static int procfs_init __P((struct vfsconf *vfsp)); +static int procfs_mount __P((struct mount *mp, char *path, caddr_t data, + struct nameidata *ndp, struct proc *p)); +static int procfs_start __P((struct mount *mp, int flags, struct proc *p)); +static int procfs_statfs __P((struct mount *mp, struct statfs *sbp, + struct proc *p)); +static int procfs_unmount __P((struct mount *mp, int mntflags, + struct proc *p)); + +/* + * VFS Operations. + * + * mount system call + */ +/* ARGSUSED */ +static int +procfs_mount(mp, path, data, ndp, p) + struct mount *mp; + char *path; + caddr_t data; + struct nameidata *ndp; + struct proc *p; +{ + size_t size; + int error; + + if (UIO_MX & (UIO_MX-1)) { + log(LOG_ERR, "procfs: invalid directory entry size\n"); + return (EINVAL); + } + + if (mp->mnt_flag & MNT_UPDATE) + return (EOPNOTSUPP); + + if (mp->mnt_vfc->vfc_refcount == 1 && (error = at_exit(procfs_exit))) { + printf("procfs: cannot register procfs_exit with at_exit\n"); + return(error); + } + + mp->mnt_flag |= MNT_LOCAL; + mp->mnt_data = 0; + vfs_getnewfsid(mp); + + (void) copyinstr(path, (caddr_t)mp->mnt_stat.f_mntonname, MNAMELEN, &size); + bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size); + + size = sizeof("procfs") - 1; + bcopy("procfs", mp->mnt_stat.f_mntfromname, size); + bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); + (void)procfs_statfs(mp, &mp->mnt_stat, p); + + return (0); +} + +/* + * unmount system call + */ +static int +procfs_unmount(mp, mntflags, p) + struct mount *mp; + int mntflags; + struct proc *p; +{ + int error; + int flags = 0; + + if (mntflags & MNT_FORCE) + flags |= FORCECLOSE; + + error = vflush(mp, 0, flags); + if (error) + return (error); + + if (mp->mnt_vfc->vfc_refcount == 1) + rm_at_exit(procfs_exit); + + return (0); +} + +int +procfs_root(mp, vpp) + struct mount *mp; + struct vnode **vpp; +{ + + return (procfs_allocvp(mp, vpp, 0, Proot)); +} + +/* ARGSUSED */ +static int +procfs_start(mp, flags, p) + struct mount *mp; + int flags; + struct proc *p; +{ + + return (0); +} + +/* + * Get file system statistics. + */ +static int +procfs_statfs(mp, sbp, p) + struct mount *mp; + struct statfs *sbp; + struct proc *p; +{ + sbp->f_bsize = PAGE_SIZE; + sbp->f_iosize = PAGE_SIZE; + sbp->f_blocks = 1; /* avoid divide by zero in some df's */ + sbp->f_bfree = 0; + sbp->f_bavail = 0; + sbp->f_files = maxproc; /* approx */ + sbp->f_ffree = maxproc - nprocs; /* approx */ + + if (sbp != &mp->mnt_stat) { + sbp->f_type = mp->mnt_vfc->vfc_typenum; + bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid)); + bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); + bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); + } + + return (0); +} + +static int +procfs_init(vfsp) + struct vfsconf *vfsp; +{ + + return (0); +} + +#define procfs_fhtovp ((int (*) __P((struct mount *, struct fid *, \ + struct sockaddr *, struct vnode **, int *, struct ucred **)))einval) +#define procfs_quotactl ((int (*) __P((struct mount *, int, uid_t, caddr_t, \ + struct proc *)))eopnotsupp) +#define procfs_sync ((int (*) __P((struct mount *, int, struct ucred *, \ + struct proc *)))nullop) +#define procfs_sysctl ((int (*) __P((int *, u_int, void *, size_t *, void *, \ + size_t, struct proc *)))eopnotsupp) +#define procfs_vget ((int (*) __P((struct mount *, ino_t, struct vnode **))) \ + eopnotsupp) +#define procfs_vptofh ((int (*) __P((struct vnode *, struct fid *)))einval) + +static struct vfsops procfs_vfsops = { + procfs_mount, + procfs_start, + procfs_unmount, + procfs_root, + procfs_quotactl, + procfs_statfs, + procfs_sync, + procfs_vget, + procfs_fhtovp, + procfs_vptofh, + procfs_init, +}; + +VFS_SET(procfs_vfsops, procfs, VFCF_SYNTHETIC); diff --git a/sys/fs/procfs/procfs_vnops.c b/sys/fs/procfs/procfs_vnops.c new file mode 100644 index 0000000..1aa5453 --- /dev/null +++ b/sys/fs/procfs/procfs_vnops.c @@ -0,0 +1,1013 @@ +/* + * Copyright (c) 1993, 1995 Jan-Simon Pendry + * Copyright (c) 1993, 1995 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Jan-Simon Pendry. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95 + * + * $Id: procfs_vnops.c,v 1.63 1999/01/05 03:53:06 peter Exp $ + */ + +/* + * procfs vnode interface + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/time.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/fcntl.h> +#include <sys/proc.h> +#include <sys/signalvar.h> +#include <sys/vnode.h> +#include <sys/namei.h> +#include <sys/dirent.h> +#include <machine/reg.h> +#include <vm/vm_zone.h> +#include <miscfs/procfs/procfs.h> +#include <sys/pioctl.h> + +static int procfs_abortop __P((struct vop_abortop_args *)); +static int procfs_access __P((struct vop_access_args *)); +static int procfs_badop __P((void)); +static int procfs_bmap __P((struct vop_bmap_args *)); +static int procfs_close __P((struct vop_close_args *)); +static int procfs_getattr __P((struct vop_getattr_args *)); +static int procfs_inactive __P((struct vop_inactive_args *)); +static int procfs_ioctl __P((struct vop_ioctl_args *)); +static int procfs_lookup __P((struct vop_lookup_args *)); +static int procfs_open __P((struct vop_open_args *)); +static int procfs_print __P((struct vop_print_args *)); +static int procfs_readdir __P((struct vop_readdir_args *)); +static int procfs_readlink __P((struct vop_readlink_args *)); +static int procfs_reclaim __P((struct vop_reclaim_args *)); +static int procfs_setattr __P((struct vop_setattr_args *)); + +/* + * This is a list of the valid names in the + * process-specific sub-directories. It is + * used in procfs_lookup and procfs_readdir + */ +static struct proc_target { + u_char pt_type; + u_char pt_namlen; + char *pt_name; + pfstype pt_pfstype; + int (*pt_valid) __P((struct proc *p)); +} proc_targets[] = { +#define N(s) sizeof(s)-1, s + /* name type validp */ + { DT_DIR, N("."), Pproc, NULL }, + { DT_DIR, N(".."), Proot, NULL }, + { DT_REG, N("file"), Pfile, procfs_validfile }, + { DT_REG, N("mem"), Pmem, NULL }, + { DT_REG, N("regs"), Pregs, procfs_validregs }, + { DT_REG, N("fpregs"), Pfpregs, procfs_validfpregs }, + { DT_REG, N("ctl"), Pctl, NULL }, + { DT_REG, N("status"), Pstatus, NULL }, + { DT_REG, N("note"), Pnote, NULL }, + { DT_REG, N("notepg"), Pnotepg, NULL }, + { DT_REG, N("map"), Pmap, procfs_validmap }, + { DT_REG, N("etype"), Ptype, procfs_validtype }, + { DT_REG, N("cmdline"), Pcmdline, NULL }, +#undef N +}; +static const int nproc_targets = sizeof(proc_targets) / sizeof(proc_targets[0]); + +static pid_t atopid __P((const char *, u_int)); + +/* + * set things up for doing i/o on + * the pfsnode (vp). (vp) is locked + * on entry, and should be left locked + * on exit. + * + * for procfs we don't need to do anything + * in particular for i/o. all that is done + * is to support exclusive open on process + * memory images. + */ +static int +procfs_open(ap) + struct vop_open_args /* { + struct vnode *a_vp; + int a_mode; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + struct pfsnode *pfs = VTOPFS(ap->a_vp); + struct proc *p1, *p2; + + p2 = PFIND(pfs->pfs_pid); + if (p2 == NULL) + return (ENOENT); + + switch (pfs->pfs_type) { + case Pmem: + if (((pfs->pfs_flags & FWRITE) && (ap->a_mode & O_EXCL)) || + ((pfs->pfs_flags & O_EXCL) && (ap->a_mode & FWRITE))) + return (EBUSY); + + p1 = ap->a_p; + if (!CHECKIO(p1, p2) && + !procfs_kmemaccess(p1)) + return (EPERM); + + if (ap->a_mode & FWRITE) + pfs->pfs_flags = ap->a_mode & (FWRITE|O_EXCL); + + return (0); + + default: + break; + } + + return (0); +} + +/* + * close the pfsnode (vp) after doing i/o. + * (vp) is not locked on entry or exit. + * + * nothing to do for procfs other than undo + * any exclusive open flag (see _open above). + */ +static int +procfs_close(ap) + struct vop_close_args /* { + struct vnode *a_vp; + int a_fflag; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + struct pfsnode *pfs = VTOPFS(ap->a_vp); + struct proc *p; + + switch (pfs->pfs_type) { + case Pmem: + if ((ap->a_fflag & FWRITE) && (pfs->pfs_flags & O_EXCL)) + pfs->pfs_flags &= ~(FWRITE|O_EXCL); + /* + * This rather complicated-looking code is trying to + * determine if this was the last close on this particular + * vnode. While one would expect v_usecount to be 1 at + * that point, it seems that (according to John Dyson) + * the VM system will bump up the usecount. So: if the + * usecount is 2, and VOBJBUF is set, then this is really + * the last close. Otherwise, if the usecount is < 2 + * then it is definitely the last close. + * If this is the last close, then it checks to see if + * the target process has PF_LINGER set in p_pfsflags, + * if this is *not* the case, then the process' stop flags + * are cleared, and the process is woken up. This is + * to help prevent the case where a process has been + * told to stop on an event, but then the requesting process + * has gone away or forgotten about it. + */ + if ((ap->a_vp->v_usecount < 2) + && (p = pfind(pfs->pfs_pid)) + && !(p->p_pfsflags & PF_LINGER)) { + p->p_stops = 0; + p->p_step = 0; + wakeup(&p->p_step); + } + break; + default: + break; + } + + return (0); +} + +/* + * do an ioctl operation on a pfsnode (vp). + * (vp) is not locked on entry or exit. + */ +static int +procfs_ioctl(ap) + struct vop_ioctl_args *ap; +{ + struct pfsnode *pfs = VTOPFS(ap->a_vp); + struct proc *procp, *p; + int error; + int signo; + struct procfs_status *psp; + unsigned char flags; + + p = ap->a_p; + procp = pfind(pfs->pfs_pid); + if (procp == NULL) { + return ENOTTY; + } + + if (!CHECKIO(p, procp)) + return EPERM; + + switch (ap->a_command) { + case PIOCBIS: + procp->p_stops |= *(unsigned int*)ap->a_data; + break; + case PIOCBIC: + procp->p_stops &= ~*(unsigned int*)ap->a_data; + break; + case PIOCSFL: + /* + * NFLAGS is "non-suser flags" -- currently, only + * PFS_ISUGID ("ignore set u/g id"); + */ +#define NFLAGS (PF_ISUGID) + flags = (unsigned char)*(unsigned int*)ap->a_data; + if (flags & NFLAGS && (error = suser(p->p_ucred, &p->p_acflag))) + return error; + procp->p_pfsflags = flags; + break; + case PIOCGFL: + *(unsigned int*)ap->a_data = (unsigned int)procp->p_pfsflags; + case PIOCSTATUS: + psp = (struct procfs_status *)ap->a_data; + psp->state = (procp->p_step == 0); + psp->flags = procp->p_pfsflags; + psp->events = procp->p_stops; + if (procp->p_step) { + psp->why = procp->p_stype; + psp->val = procp->p_xstat; + } else { + psp->why = psp->val = 0; /* Not defined values */ + } + break; + case PIOCWAIT: + psp = (struct procfs_status *)ap->a_data; + if (procp->p_step == 0) { + error = tsleep(&procp->p_stype, PWAIT | PCATCH, "piocwait", 0); + if (error) + return error; + } + psp->state = 1; /* It stopped */ + psp->flags = procp->p_pfsflags; + psp->events = procp->p_stops; + psp->why = procp->p_stype; /* why it stopped */ + psp->val = procp->p_xstat; /* any extra info */ + break; + case PIOCCONT: /* Restart a proc */ + if (procp->p_step == 0) + return EINVAL; /* Can only start a stopped process */ + if ((signo = *(int*)ap->a_data) != 0) { + if (signo >= NSIG || signo <= 0) + return EINVAL; + psignal(procp, signo); + } + procp->p_step = 0; + wakeup(&procp->p_step); + break; + default: + return (ENOTTY); + } + return 0; +} + +/* + * do block mapping for pfsnode (vp). + * since we don't use the buffer cache + * for procfs this function should never + * be called. in any case, it's not clear + * what part of the kernel ever makes use + * of this function. for sanity, this is the + * usual no-op bmap, although returning + * (EIO) would be a reasonable alternative. + */ +static int +procfs_bmap(ap) + struct vop_bmap_args /* { + struct vnode *a_vp; + daddr_t a_bn; + struct vnode **a_vpp; + daddr_t *a_bnp; + int *a_runp; + } */ *ap; +{ + + if (ap->a_vpp != NULL) + *ap->a_vpp = ap->a_vp; + if (ap->a_bnp != NULL) + *ap->a_bnp = ap->a_bn; + if (ap->a_runp != NULL) + *ap->a_runp = 0; + return (0); +} + +/* + * procfs_inactive is called when the pfsnode + * is vrele'd and the reference count goes + * to zero. (vp) will be on the vnode free + * list, so to get it back vget() must be + * used. + * + * (vp) is locked on entry, but must be unlocked on exit. + */ +static int +procfs_inactive(ap) + struct vop_inactive_args /* { + struct vnode *a_vp; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + + VOP_UNLOCK(vp, 0, ap->a_p); + + return (0); +} + +/* + * _reclaim is called when getnewvnode() + * wants to make use of an entry on the vnode + * free list. at this time the filesystem needs + * to free any private data and remove the node + * from any private lists. + */ +static int +procfs_reclaim(ap) + struct vop_reclaim_args /* { + struct vnode *a_vp; + } */ *ap; +{ + + return (procfs_freevp(ap->a_vp)); +} + +/* + * _print is used for debugging. + * just print a readable description + * of (vp). + */ +static int +procfs_print(ap) + struct vop_print_args /* { + struct vnode *a_vp; + } */ *ap; +{ + struct pfsnode *pfs = VTOPFS(ap->a_vp); + + printf("tag VT_PROCFS, type %d, pid %ld, mode %x, flags %lx\n", + pfs->pfs_type, (long)pfs->pfs_pid, pfs->pfs_mode, pfs->pfs_flags); + return (0); +} + +/* + * _abortop is called when operations such as + * rename and create fail. this entry is responsible + * for undoing any side-effects caused by the lookup. + * this will always include freeing the pathname buffer. + */ +static int +procfs_abortop(ap) + struct vop_abortop_args /* { + struct vnode *a_dvp; + struct componentname *a_cnp; + } */ *ap; +{ + + if ((ap->a_cnp->cn_flags & (HASBUF | SAVESTART)) == HASBUF) + zfree(namei_zone, ap->a_cnp->cn_pnbuf); + return (0); +} + +/* + * generic entry point for unsupported operations + */ +static int +procfs_badop() +{ + + return (EIO); +} + +/* + * Invent attributes for pfsnode (vp) and store + * them in (vap). + * Directories lengths are returned as zero since + * any real length would require the genuine size + * to be computed, and nothing cares anyway. + * + * this is relatively minimal for procfs. + */ +static int +procfs_getattr(ap) + struct vop_getattr_args /* { + struct vnode *a_vp; + struct vattr *a_vap; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + struct pfsnode *pfs = VTOPFS(ap->a_vp); + struct vattr *vap = ap->a_vap; + struct proc *procp; + int error; + + /* + * First make sure that the process and its credentials + * still exist. + */ + switch (pfs->pfs_type) { + case Proot: + case Pcurproc: + procp = 0; + break; + + default: + procp = PFIND(pfs->pfs_pid); + if (procp == 0 || procp->p_cred == NULL || + procp->p_ucred == NULL) + return (ENOENT); + } + + error = 0; + + /* start by zeroing out the attributes */ + VATTR_NULL(vap); + + /* next do all the common fields */ + vap->va_type = ap->a_vp->v_type; + vap->va_mode = pfs->pfs_mode; + vap->va_fileid = pfs->pfs_fileno; + vap->va_flags = 0; + vap->va_blocksize = PAGE_SIZE; + vap->va_bytes = vap->va_size = 0; + + /* + * Make all times be current TOD. + * It would be possible to get the process start + * time from the p_stat structure, but there's + * no "file creation" time stamp anyway, and the + * p_stat structure is not addressible if u. gets + * swapped out for that process. + */ + nanotime(&vap->va_ctime); + vap->va_atime = vap->va_mtime = vap->va_ctime; + + /* + * If the process has exercised some setuid or setgid + * privilege, then rip away read/write permission so + * that only root can gain access. + */ + switch (pfs->pfs_type) { + case Pctl: + case Pregs: + case Pfpregs: + if (procp->p_flag & P_SUGID) + vap->va_mode &= ~((VREAD|VWRITE)| + ((VREAD|VWRITE)>>3)| + ((VREAD|VWRITE)>>6)); + break; + case Pmem: + /* Retain group kmem readablity. */ + if (procp->p_flag & P_SUGID) + vap->va_mode &= ~(VREAD|VWRITE); + break; + default: + break; + } + + /* + * now do the object specific fields + * + * The size could be set from struct reg, but it's hardly + * worth the trouble, and it puts some (potentially) machine + * dependent data into this machine-independent code. If it + * becomes important then this function should break out into + * a per-file stat function in the corresponding .c file. + */ + + switch (pfs->pfs_type) { + case Proot: + /* + * Set nlink to 1 to tell fts(3) we don't actually know. + */ + vap->va_nlink = 1; + vap->va_uid = 0; + vap->va_gid = 0; + vap->va_size = vap->va_bytes = DEV_BSIZE; + break; + + case Pcurproc: { + char buf[16]; /* should be enough */ + vap->va_nlink = 1; + vap->va_uid = 0; + vap->va_gid = 0; + vap->va_size = vap->va_bytes = + snprintf(buf, sizeof(buf), "%ld", (long)curproc->p_pid); + break; + } + + case Pproc: + vap->va_nlink = nproc_targets; + vap->va_uid = procp->p_ucred->cr_uid; + vap->va_gid = procp->p_ucred->cr_gid; + vap->va_size = vap->va_bytes = DEV_BSIZE; + break; + + case Pfile: + error = EOPNOTSUPP; + break; + + case Pmem: + vap->va_nlink = 1; + /* + * If we denied owner access earlier, then we have to + * change the owner to root - otherwise 'ps' and friends + * will break even though they are setgid kmem. *SIGH* + */ + if (procp->p_flag & P_SUGID) + vap->va_uid = 0; + else + vap->va_uid = procp->p_ucred->cr_uid; + vap->va_gid = KMEM_GROUP; + break; + + case Ptype: + case Pmap: + case Pregs: + vap->va_bytes = vap->va_size = sizeof(struct reg); + vap->va_nlink = 1; + vap->va_uid = procp->p_ucred->cr_uid; + vap->va_gid = procp->p_ucred->cr_gid; + break; + + case Pfpregs: + vap->va_bytes = vap->va_size = sizeof(struct fpreg); + + case Pctl: + case Pstatus: + case Pnote: + case Pnotepg: + case Pcmdline: + vap->va_nlink = 1; + vap->va_uid = procp->p_ucred->cr_uid; + vap->va_gid = procp->p_ucred->cr_gid; + break; + + default: + panic("procfs_getattr"); + } + + return (error); +} + +static int +procfs_setattr(ap) + struct vop_setattr_args /* { + struct vnode *a_vp; + struct vattr *a_vap; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + + if (ap->a_vap->va_flags != VNOVAL) + return (EOPNOTSUPP); + + /* + * just fake out attribute setting + * it's not good to generate an error + * return, otherwise things like creat() + * will fail when they try to set the + * file length to 0. worse, this means + * that echo $note > /proc/$pid/note will fail. + */ + + return (0); +} + +/* + * implement access checking. + * + * something very similar to this code is duplicated + * throughout the 4bsd kernel and should be moved + * into kern/vfs_subr.c sometime. + * + * actually, the check for super-user is slightly + * broken since it will allow read access to write-only + * objects. this doesn't cause any particular trouble + * but does mean that the i/o entry points need to check + * that the operation really does make sense. + */ +static int +procfs_access(ap) + struct vop_access_args /* { + struct vnode *a_vp; + int a_mode; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + struct vattr *vap; + struct vattr vattr; + int error; + + /* + * If you're the super-user, + * you always get access. + */ + if (ap->a_cred->cr_uid == 0) + return (0); + + vap = &vattr; + error = VOP_GETATTR(ap->a_vp, vap, ap->a_cred, ap->a_p); + if (error) + return (error); + + /* + * Access check is based on only one of owner, group, public. + * If not owner, then check group. If not a member of the + * group, then check public access. + */ + if (ap->a_cred->cr_uid != vap->va_uid) { + gid_t *gp; + int i; + + ap->a_mode >>= 3; + gp = ap->a_cred->cr_groups; + for (i = 0; i < ap->a_cred->cr_ngroups; i++, gp++) + if (vap->va_gid == *gp) + goto found; + ap->a_mode >>= 3; +found: + ; + } + + if ((vap->va_mode & ap->a_mode) == ap->a_mode) + return (0); + + return (EACCES); +} + +/* + * lookup. this is incredibly complicated in the + * general case, however for most pseudo-filesystems + * very little needs to be done. + * + * unless you want to get a migraine, just make sure your + * filesystem doesn't do any locking of its own. otherwise + * read and inwardly digest ufs_lookup(). + */ +static int +procfs_lookup(ap) + struct vop_lookup_args /* { + struct vnode * a_dvp; + struct vnode ** a_vpp; + struct componentname * a_cnp; + } */ *ap; +{ + struct componentname *cnp = ap->a_cnp; + struct vnode **vpp = ap->a_vpp; + struct vnode *dvp = ap->a_dvp; + char *pname = cnp->cn_nameptr; + struct proc *curp = cnp->cn_proc; + struct proc_target *pt; + struct vnode *fvp; + pid_t pid; + struct pfsnode *pfs; + struct proc *p; + int i; + + *vpp = NULL; + + if (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME) + return (EROFS); + + if (cnp->cn_namelen == 1 && *pname == '.') { + *vpp = dvp; + VREF(dvp); + /* vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, curp); */ + return (0); + } + + pfs = VTOPFS(dvp); + switch (pfs->pfs_type) { + case Proot: + if (cnp->cn_flags & ISDOTDOT) + return (EIO); + + if (CNEQ(cnp, "curproc", 7)) + return (procfs_allocvp(dvp->v_mount, vpp, 0, Pcurproc)); + + pid = atopid(pname, cnp->cn_namelen); + if (pid == NO_PID) + break; + + p = PFIND(pid); + if (p == 0) + break; + + return (procfs_allocvp(dvp->v_mount, vpp, pid, Pproc)); + + case Pproc: + if (cnp->cn_flags & ISDOTDOT) + return (procfs_root(dvp->v_mount, vpp)); + + p = PFIND(pfs->pfs_pid); + if (p == 0) + break; + + for (pt = proc_targets, i = 0; i < nproc_targets; pt++, i++) { + if (cnp->cn_namelen == pt->pt_namlen && + bcmp(pt->pt_name, pname, cnp->cn_namelen) == 0 && + (pt->pt_valid == NULL || (*pt->pt_valid)(p))) + goto found; + } + break; + + found: + if (pt->pt_pfstype == Pfile) { + fvp = procfs_findtextvp(p); + /* We already checked that it exists. */ + VREF(fvp); + vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, curp); + *vpp = fvp; + return (0); + } + + return (procfs_allocvp(dvp->v_mount, vpp, pfs->pfs_pid, + pt->pt_pfstype)); + + default: + return (ENOTDIR); + } + + return (cnp->cn_nameiop == LOOKUP ? ENOENT : EROFS); +} + +/* + * Does this process have a text file? + */ +int +procfs_validfile(p) + struct proc *p; +{ + + return (procfs_findtextvp(p) != NULLVP); +} + +/* + * readdir returns directory entries from pfsnode (vp). + * + * the strategy here with procfs is to generate a single + * directory entry at a time (struct pfsdent) and then + * copy that out to userland using uiomove. a more efficent + * though more complex implementation, would try to minimize + * the number of calls to uiomove(). for procfs, this is + * hardly worth the added code complexity. + * + * this should just be done through read() + */ +static int +procfs_readdir(ap) + struct vop_readdir_args /* { + struct vnode *a_vp; + struct uio *a_uio; + struct ucred *a_cred; + int *a_eofflag; + int *a_ncookies; + u_long **a_cookies; + } */ *ap; +{ + struct uio *uio = ap->a_uio; + struct pfsdent d; + struct pfsdent *dp = &d; + struct pfsnode *pfs; + int count, error, i, off; + + pfs = VTOPFS(ap->a_vp); + + off = (int)uio->uio_offset; + if (off != uio->uio_offset || off < 0 || (u_int)off % UIO_MX != 0 || + uio->uio_resid < UIO_MX) + return (EINVAL); + + error = 0; + count = 0; + i = (u_int)off / UIO_MX; + + switch (pfs->pfs_type) { + /* + * this is for the process-specific sub-directories. + * all that is needed to is copy out all the entries + * from the procent[] table (top of this file). + */ + case Pproc: { + struct proc *p; + struct proc_target *pt; + + p = PFIND(pfs->pfs_pid); + if (p == NULL) + break; + + for (pt = &proc_targets[i]; + uio->uio_resid >= UIO_MX && i < nproc_targets; pt++, i++) { + if (pt->pt_valid && (*pt->pt_valid)(p) == 0) + continue; + + dp->d_reclen = UIO_MX; + dp->d_fileno = PROCFS_FILENO(pfs->pfs_pid, pt->pt_pfstype); + dp->d_namlen = pt->pt_namlen; + bcopy(pt->pt_name, dp->d_name, pt->pt_namlen + 1); + dp->d_type = pt->pt_type; + + if ((error = uiomove((caddr_t)dp, UIO_MX, uio)) != 0) + break; + } + + break; + } + + /* + * this is for the root of the procfs filesystem + * what is needed is a special entry for "curproc" + * followed by an entry for each process on allproc +#ifdef PROCFS_ZOMBIE + * and zombproc. +#endif + */ + + case Proot: { +#ifdef PROCFS_ZOMBIE + int doingzomb = 0; +#endif + int pcnt = 0; + volatile struct proc *p = allproc.lh_first; + + for (; p && uio->uio_resid >= UIO_MX; i++, pcnt++) { + bzero((char *) dp, UIO_MX); + dp->d_reclen = UIO_MX; + + switch (i) { + case 0: /* `.' */ + case 1: /* `..' */ + dp->d_fileno = PROCFS_FILENO(0, Proot); + dp->d_namlen = i + 1; + bcopy("..", dp->d_name, dp->d_namlen); + dp->d_name[i + 1] = '\0'; + dp->d_type = DT_DIR; + break; + + case 2: + dp->d_fileno = PROCFS_FILENO(0, Pcurproc); + dp->d_namlen = 7; + bcopy("curproc", dp->d_name, 8); + dp->d_type = DT_LNK; + break; + + default: + while (pcnt < i) { + pcnt++; + p = p->p_list.le_next; + if (!p) + goto done; + } + dp->d_fileno = PROCFS_FILENO(p->p_pid, Pproc); + dp->d_namlen = sprintf(dp->d_name, "%ld", + (long)p->p_pid); + dp->d_type = DT_REG; + p = p->p_list.le_next; + break; + } + + if ((error = uiomove((caddr_t)dp, UIO_MX, uio)) != 0) + break; + } + done: + +#ifdef PROCFS_ZOMBIE + if (p == 0 && doingzomb == 0) { + doingzomb = 1; + p = zombproc.lh_first; + goto again; + } +#endif + + break; + + } + + default: + error = ENOTDIR; + break; + } + + uio->uio_offset = i * UIO_MX; + + return (error); +} + +/* + * readlink reads the link of `curproc' + */ +static int +procfs_readlink(ap) + struct vop_readlink_args *ap; +{ + char buf[16]; /* should be enough */ + int len; + + if (VTOPFS(ap->a_vp)->pfs_fileno != PROCFS_FILENO(0, Pcurproc)) + return (EINVAL); + + len = snprintf(buf, sizeof(buf), "%ld", (long)curproc->p_pid); + + return (uiomove((caddr_t)buf, len, ap->a_uio)); +} + +/* + * convert decimal ascii to pid_t + */ +static pid_t +atopid(b, len) + const char *b; + u_int len; +{ + pid_t p = 0; + + while (len--) { + char c = *b++; + if (c < '0' || c > '9') + return (NO_PID); + p = 10 * p + (c - '0'); + if (p > PID_MAX) + return (NO_PID); + } + + return (p); +} + +/* + * procfs vnode operations. + */ +vop_t **procfs_vnodeop_p; +static struct vnodeopv_entry_desc procfs_vnodeop_entries[] = { + { &vop_default_desc, (vop_t *) vop_defaultop }, + { &vop_abortop_desc, (vop_t *) procfs_abortop }, + { &vop_access_desc, (vop_t *) procfs_access }, + { &vop_advlock_desc, (vop_t *) procfs_badop }, + { &vop_bmap_desc, (vop_t *) procfs_bmap }, + { &vop_close_desc, (vop_t *) procfs_close }, + { &vop_create_desc, (vop_t *) procfs_badop }, + { &vop_getattr_desc, (vop_t *) procfs_getattr }, + { &vop_inactive_desc, (vop_t *) procfs_inactive }, + { &vop_link_desc, (vop_t *) procfs_badop }, + { &vop_lookup_desc, (vop_t *) procfs_lookup }, + { &vop_mkdir_desc, (vop_t *) procfs_badop }, + { &vop_mknod_desc, (vop_t *) procfs_badop }, + { &vop_open_desc, (vop_t *) procfs_open }, + { &vop_pathconf_desc, (vop_t *) vop_stdpathconf }, + { &vop_print_desc, (vop_t *) procfs_print }, + { &vop_read_desc, (vop_t *) procfs_rw }, + { &vop_readdir_desc, (vop_t *) procfs_readdir }, + { &vop_readlink_desc, (vop_t *) procfs_readlink }, + { &vop_reclaim_desc, (vop_t *) procfs_reclaim }, + { &vop_remove_desc, (vop_t *) procfs_badop }, + { &vop_rename_desc, (vop_t *) procfs_badop }, + { &vop_rmdir_desc, (vop_t *) procfs_badop }, + { &vop_setattr_desc, (vop_t *) procfs_setattr }, + { &vop_symlink_desc, (vop_t *) procfs_badop }, + { &vop_write_desc, (vop_t *) procfs_rw }, + { &vop_ioctl_desc, (vop_t *) procfs_ioctl }, + { NULL, NULL } +}; +static struct vnodeopv_desc procfs_vnodeop_opv_desc = + { &procfs_vnodeop_p, procfs_vnodeop_entries }; + +VNODEOP_SET(procfs_vnodeop_opv_desc); diff --git a/sys/fs/specfs/spec_vnops.c b/sys/fs/specfs/spec_vnops.c new file mode 100644 index 0000000..88290e4 --- /dev/null +++ b/sys/fs/specfs/spec_vnops.c @@ -0,0 +1,938 @@ +/* + * Copyright (c) 1989, 1993, 1995 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)spec_vnops.c 8.14 (Berkeley) 5/21/95 + * $Id: spec_vnops.c,v 1.79 1999/01/21 08:29:07 dillon Exp $ + */ + +#include <sys/param.h> +#include <sys/proc.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/conf.h> +#include <sys/buf.h> +#include <sys/mount.h> +#include <sys/vnode.h> +#include <sys/stat.h> +#include <sys/fcntl.h> +#include <sys/disklabel.h> +#include <sys/vmmeter.h> + +#include <vm/vm.h> +#include <vm/vm_prot.h> +#include <vm/vm_object.h> +#include <vm/vm_page.h> +#include <vm/vm_pager.h> +#include <vm/vnode_pager.h> +#include <vm/vm_extern.h> + +#include <miscfs/specfs/specdev.h> + +static int spec_advlock __P((struct vop_advlock_args *)); +static int spec_badop __P((void)); +static int spec_bmap __P((struct vop_bmap_args *)); +static int spec_close __P((struct vop_close_args *)); +static int spec_freeblks __P((struct vop_freeblks_args *)); +static int spec_fsync __P((struct vop_fsync_args *)); +static int spec_getattr __P((struct vop_getattr_args *)); +static int spec_getpages __P((struct vop_getpages_args *)); +static int spec_inactive __P((struct vop_inactive_args *)); +static int spec_ioctl __P((struct vop_ioctl_args *)); +static int spec_lookup __P((struct vop_lookup_args *)); +static int spec_open __P((struct vop_open_args *)); +static int spec_poll __P((struct vop_poll_args *)); +static int spec_print __P((struct vop_print_args *)); +static int spec_read __P((struct vop_read_args *)); +static int spec_strategy __P((struct vop_strategy_args *)); +static int spec_write __P((struct vop_write_args *)); + +struct vnode *speclisth[SPECHSZ]; +vop_t **spec_vnodeop_p; +static struct vnodeopv_entry_desc spec_vnodeop_entries[] = { + { &vop_default_desc, (vop_t *) vop_defaultop }, + { &vop_access_desc, (vop_t *) vop_ebadf }, + { &vop_advlock_desc, (vop_t *) spec_advlock }, + { &vop_bmap_desc, (vop_t *) spec_bmap }, + { &vop_close_desc, (vop_t *) spec_close }, + { &vop_create_desc, (vop_t *) spec_badop }, + { &vop_freeblks_desc, (vop_t *) spec_freeblks }, + { &vop_fsync_desc, (vop_t *) spec_fsync }, + { &vop_getattr_desc, (vop_t *) spec_getattr }, + { &vop_getpages_desc, (vop_t *) spec_getpages }, + { &vop_inactive_desc, (vop_t *) spec_inactive }, + { &vop_ioctl_desc, (vop_t *) spec_ioctl }, + { &vop_lease_desc, (vop_t *) vop_null }, + { &vop_link_desc, (vop_t *) spec_badop }, + { &vop_lookup_desc, (vop_t *) spec_lookup }, + { &vop_mkdir_desc, (vop_t *) spec_badop }, + { &vop_mknod_desc, (vop_t *) spec_badop }, + { &vop_open_desc, (vop_t *) spec_open }, + { &vop_pathconf_desc, (vop_t *) vop_stdpathconf }, + { &vop_poll_desc, (vop_t *) spec_poll }, + { &vop_print_desc, (vop_t *) spec_print }, + { &vop_read_desc, (vop_t *) spec_read }, + { &vop_readdir_desc, (vop_t *) spec_badop }, + { &vop_readlink_desc, (vop_t *) spec_badop }, + { &vop_reallocblks_desc, (vop_t *) spec_badop }, + { &vop_reclaim_desc, (vop_t *) vop_null }, + { &vop_remove_desc, (vop_t *) spec_badop }, + { &vop_rename_desc, (vop_t *) spec_badop }, + { &vop_rmdir_desc, (vop_t *) spec_badop }, + { &vop_setattr_desc, (vop_t *) vop_ebadf }, + { &vop_strategy_desc, (vop_t *) spec_strategy }, + { &vop_symlink_desc, (vop_t *) spec_badop }, + { &vop_write_desc, (vop_t *) spec_write }, + { NULL, NULL } +}; +static struct vnodeopv_desc spec_vnodeop_opv_desc = + { &spec_vnodeop_p, spec_vnodeop_entries }; + +VNODEOP_SET(spec_vnodeop_opv_desc); + + +int +spec_vnoperate(ap) + struct vop_generic_args /* { + struct vnodeop_desc *a_desc; + <other random data follows, presumably> + } */ *ap; +{ + return (VOCALL(spec_vnodeop_p, ap->a_desc->vdesc_offset, ap)); +} + +static void spec_getpages_iodone __P((struct buf *bp)); + +/* + * Trivial lookup routine that always fails. + */ +static int +spec_lookup(ap) + struct vop_lookup_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + } */ *ap; +{ + + *ap->a_vpp = NULL; + return (ENOTDIR); +} + +/* + * Open a special file. + */ +/* ARGSUSED */ +static int +spec_open(ap) + struct vop_open_args /* { + struct vnode *a_vp; + int a_mode; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + struct proc *p = ap->a_p; + struct vnode *bvp, *vp = ap->a_vp; + dev_t bdev, dev = (dev_t)vp->v_rdev; + int maj = major(dev); + int error; + + /* + * Don't allow open if fs is mounted -nodev. + */ + if (vp->v_mount && (vp->v_mount->mnt_flag & MNT_NODEV)) + return (ENXIO); + + switch (vp->v_type) { + + case VCHR: + if ((u_int)maj >= nchrdev) + return (ENXIO); + if ( (cdevsw[maj] == NULL) || (cdevsw[maj]->d_open == NULL)) + return ENXIO; + if (ap->a_cred != FSCRED && (ap->a_mode & FWRITE)) { + /* + * When running in very secure mode, do not allow + * opens for writing of any disk character devices. + */ + if (securelevel >= 2 + && cdevsw[maj]->d_bmaj != -1 + && (cdevsw[maj]->d_flags & D_TYPEMASK) == D_DISK) + return (EPERM); + /* + * When running in secure mode, do not allow opens + * for writing of /dev/mem, /dev/kmem, or character + * devices whose corresponding block devices are + * currently mounted. + */ + if (securelevel >= 1) { + if ((bdev = chrtoblk(dev)) != NODEV && + vfinddev(bdev, VBLK, &bvp) && + bvp->v_usecount > 0 && + (error = vfs_mountedon(bvp))) + return (error); + if (iskmemdev(dev)) + return (EPERM); + } + } + if ((cdevsw[maj]->d_flags & D_TYPEMASK) == D_TTY) + vp->v_flag |= VISTTY; + VOP_UNLOCK(vp, 0, p); + error = (*cdevsw[maj]->d_open)(dev, ap->a_mode, S_IFCHR, p); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + return (error); + /* NOT REACHED */ + case VBLK: + if ((u_int)maj >= nblkdev) + return (ENXIO); + if ( (bdevsw[maj] == NULL) || (bdevsw[maj]->d_open == NULL)) + return ENXIO; + /* + * When running in very secure mode, do not allow + * opens for writing of any disk block devices. + */ + if (securelevel >= 2 && ap->a_cred != FSCRED && + (ap->a_mode & FWRITE) && + (bdevsw[maj]->d_flags & D_TYPEMASK) == D_DISK) + return (EPERM); + + /* + * Do not allow opens of block devices that are + * currently mounted. + */ + error = vfs_mountedon(vp); + if (error) + return (error); + return ((*bdevsw[maj]->d_open)(dev, ap->a_mode, S_IFBLK, p)); + /* NOT REACHED */ + default: + break; + } + return (0); +} + +/* + * Vnode op for read + */ +/* ARGSUSED */ +static int +spec_read(ap) + struct vop_read_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + register struct uio *uio = ap->a_uio; + struct proc *p = uio->uio_procp; + struct buf *bp; + daddr_t bn, nextbn; + long bsize, bscale; + struct partinfo dpart; + int n, on; + d_ioctl_t *ioctl; + int error = 0; + dev_t dev; + +#ifdef DIAGNOSTIC + if (uio->uio_rw != UIO_READ) + panic("spec_read mode"); + if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) + panic("spec_read proc"); +#endif + if (uio->uio_resid == 0) + return (0); + + switch (vp->v_type) { + + case VCHR: + VOP_UNLOCK(vp, 0, p); + error = (*cdevsw[major(vp->v_rdev)]->d_read) + (vp->v_rdev, uio, ap->a_ioflag); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + return (error); + + case VBLK: + if (uio->uio_offset < 0) + return (EINVAL); + bsize = BLKDEV_IOSIZE; + dev = vp->v_rdev; + if ((ioctl = bdevsw[major(dev)]->d_ioctl) != NULL && + (*ioctl)(dev, DIOCGPART, (caddr_t)&dpart, FREAD, p) == 0 && + dpart.part->p_fstype == FS_BSDFFS && + dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) + bsize = dpart.part->p_frag * dpart.part->p_fsize; + bscale = btodb(bsize); + do { + bn = btodb(uio->uio_offset) & ~(bscale - 1); + on = uio->uio_offset % bsize; + n = min((unsigned)(bsize - on), uio->uio_resid); + if (vp->v_lastr + bscale == bn) { + nextbn = bn + bscale; + error = breadn(vp, bn, (int)bsize, &nextbn, + (int *)&bsize, 1, NOCRED, &bp); + } else + error = bread(vp, bn, (int)bsize, NOCRED, &bp); + vp->v_lastr = bn; + n = min(n, bsize - bp->b_resid); + if (error) { + brelse(bp); + return (error); + } + error = uiomove((char *)bp->b_data + on, n, uio); + brelse(bp); + } while (error == 0 && uio->uio_resid > 0 && n != 0); + return (error); + + default: + panic("spec_read type"); + } + /* NOTREACHED */ +} + +/* + * Vnode op for write + */ +/* ARGSUSED */ +static int +spec_write(ap) + struct vop_write_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + register struct uio *uio = ap->a_uio; + struct proc *p = uio->uio_procp; + struct buf *bp; + daddr_t bn; + int bsize, blkmask; + struct partinfo dpart; + register int n, on; + int error = 0; + +#ifdef DIAGNOSTIC + if (uio->uio_rw != UIO_WRITE) + panic("spec_write mode"); + if (uio->uio_segflg == UIO_USERSPACE && uio->uio_procp != curproc) + panic("spec_write proc"); +#endif + + switch (vp->v_type) { + + case VCHR: + VOP_UNLOCK(vp, 0, p); + error = (*cdevsw[major(vp->v_rdev)]->d_write) + (vp->v_rdev, uio, ap->a_ioflag); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + return (error); + + case VBLK: + if (uio->uio_resid == 0) + return (0); + if (uio->uio_offset < 0) + return (EINVAL); + bsize = BLKDEV_IOSIZE; + if ((*bdevsw[major(vp->v_rdev)]->d_ioctl)(vp->v_rdev, DIOCGPART, + (caddr_t)&dpart, FREAD, p) == 0) { + if (dpart.part->p_fstype == FS_BSDFFS && + dpart.part->p_frag != 0 && dpart.part->p_fsize != 0) + bsize = dpart.part->p_frag * + dpart.part->p_fsize; + } + blkmask = btodb(bsize) - 1; + do { + bn = btodb(uio->uio_offset) & ~blkmask; + on = uio->uio_offset % bsize; + n = min((unsigned)(bsize - on), uio->uio_resid); + if (n == bsize) + bp = getblk(vp, bn, bsize, 0, 0); + else + error = bread(vp, bn, bsize, NOCRED, &bp); + if (error) { + brelse(bp); + return (error); + } + n = min(n, bsize - bp->b_resid); + error = uiomove((char *)bp->b_data + on, n, uio); + if (n + on == bsize) + bawrite(bp); + else + bdwrite(bp); + } while (error == 0 && uio->uio_resid > 0 && n != 0); + return (error); + + default: + panic("spec_write type"); + } + /* NOTREACHED */ +} + +/* + * Device ioctl operation. + */ +/* ARGSUSED */ +static int +spec_ioctl(ap) + struct vop_ioctl_args /* { + struct vnode *a_vp; + int a_command; + caddr_t a_data; + int a_fflag; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + dev_t dev = ap->a_vp->v_rdev; + + switch (ap->a_vp->v_type) { + + case VCHR: + return ((*cdevsw[major(dev)]->d_ioctl)(dev, ap->a_command, + ap->a_data, ap->a_fflag, ap->a_p)); + case VBLK: + return ((*bdevsw[major(dev)]->d_ioctl)(dev, ap->a_command, + ap->a_data, ap->a_fflag, ap->a_p)); + default: + panic("spec_ioctl"); + /* NOTREACHED */ + } +} + +/* ARGSUSED */ +static int +spec_poll(ap) + struct vop_poll_args /* { + struct vnode *a_vp; + int a_events; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + register dev_t dev; + + switch (ap->a_vp->v_type) { + + case VCHR: + dev = ap->a_vp->v_rdev; + return (*cdevsw[major(dev)]->d_poll)(dev, ap->a_events, ap->a_p); + default: + return (vop_defaultop((struct vop_generic_args *)ap)); + + } +} +/* + * Synch buffers associated with a block device + */ +/* ARGSUSED */ +static int +spec_fsync(ap) + struct vop_fsync_args /* { + struct vnode *a_vp; + struct ucred *a_cred; + int a_waitfor; + struct proc *a_p; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + register struct buf *bp; + struct buf *nbp; + int s; + + if (vp->v_type == VCHR) + return (0); + /* + * Flush all dirty buffers associated with a block device. + */ +loop: + s = splbio(); + for (bp = TAILQ_FIRST(&vp->v_dirtyblkhd); bp; bp = nbp) { + nbp = TAILQ_NEXT(bp, b_vnbufs); + if ((bp->b_flags & B_BUSY)) + continue; + if ((bp->b_flags & B_DELWRI) == 0) + panic("spec_fsync: not dirty"); + if ((vp->v_flag & VOBJBUF) && (bp->b_flags & B_CLUSTEROK)) { + vfs_bio_awrite(bp); + splx(s); + } else { + bremfree(bp); + bp->b_flags |= B_BUSY; + splx(s); + bawrite(bp); + } + goto loop; + } + if (ap->a_waitfor == MNT_WAIT) { + while (vp->v_numoutput) { + vp->v_flag |= VBWAIT; + (void) tsleep((caddr_t)&vp->v_numoutput, PRIBIO + 1, "spfsyn", 0); + } +#ifdef DIAGNOSTIC + if (!TAILQ_EMPTY(&vp->v_dirtyblkhd)) { + vprint("spec_fsync: dirty", vp); + splx(s); + goto loop; + } +#endif + } + splx(s); + return (0); +} + +static int +spec_inactive(ap) + struct vop_inactive_args /* { + struct vnode *a_vp; + struct proc *a_p; + } */ *ap; +{ + + VOP_UNLOCK(ap->a_vp, 0, ap->a_p); + return (0); +} + +/* + * Just call the device strategy routine + */ +static int +spec_strategy(ap) + struct vop_strategy_args /* { + struct buf *a_bp; + } */ *ap; +{ + struct buf *bp; + + bp = ap->a_bp; + if (((bp->b_flags & B_READ) == 0) && + (LIST_FIRST(&bp->b_dep)) != NULL && bioops.io_start) + (*bioops.io_start)(bp); + (*bdevsw[major(bp->b_dev)]->d_strategy)(bp); + return (0); +} + +static int +spec_freeblks(ap) + struct vop_freeblks_args /* { + struct vnode *a_vp; + daddr_t a_addr; + daddr_t a_length; + } */ *ap; +{ + struct cdevsw *bsw; + struct buf *bp; + + bsw = bdevsw[major(ap->a_vp->v_rdev)]; + if ((bsw->d_flags & D_CANFREE) == 0) + return (0); + bp = geteblk(ap->a_length); + bp->b_flags |= B_FREEBUF | B_BUSY; + bp->b_dev = ap->a_vp->v_rdev; + bp->b_blkno = ap->a_addr; + bp->b_offset = dbtob(ap->a_addr); + bp->b_bcount = ap->a_length; + (*bsw->d_strategy)(bp); + return (0); +} + +/* + * This is a noop, simply returning what one has been given. + */ +static int +spec_bmap(ap) + struct vop_bmap_args /* { + struct vnode *a_vp; + daddr_t a_bn; + struct vnode **a_vpp; + daddr_t *a_bnp; + int *a_runp; + int *a_runb; + } */ *ap; +{ + + if (ap->a_vpp != NULL) + *ap->a_vpp = ap->a_vp; + if (ap->a_bnp != NULL) + *ap->a_bnp = ap->a_bn; + if (ap->a_runp != NULL) + *ap->a_runp = 0; + if (ap->a_runb != NULL) + *ap->a_runb = 0; + return (0); +} + +/* + * Device close routine + */ +/* ARGSUSED */ +static int +spec_close(ap) + struct vop_close_args /* { + struct vnode *a_vp; + int a_fflag; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + dev_t dev = vp->v_rdev; + d_close_t *devclose; + int mode, error; + + switch (vp->v_type) { + + case VCHR: + /* + * Hack: a tty device that is a controlling terminal + * has a reference from the session structure. + * We cannot easily tell that a character device is + * a controlling terminal, unless it is the closing + * process' controlling terminal. In that case, + * if the reference count is 2 (this last descriptor + * plus the session), release the reference from the session. + */ + if (vcount(vp) == 2 && ap->a_p && + (vp->v_flag & VXLOCK) == 0 && + vp == ap->a_p->p_session->s_ttyvp) { + vrele(vp); + ap->a_p->p_session->s_ttyvp = NULL; + } + /* + * If the vnode is locked, then we are in the midst + * of forcably closing the device, otherwise we only + * close on last reference. + */ + if (vcount(vp) > 1 && (vp->v_flag & VXLOCK) == 0) + return (0); + devclose = cdevsw[major(dev)]->d_close; + mode = S_IFCHR; + break; + + case VBLK: + /* + * On last close of a block device (that isn't mounted) + * we must invalidate any in core blocks, so that + * we can, for instance, change floppy disks. + */ + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p); + error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0); + VOP_UNLOCK(vp, 0, ap->a_p); + if (error) + return (error); + + /* + * We do not want to really close the device if it + * is still in use unless we are trying to close it + * forcibly. Since every use (buffer, vnode, swap, cmap) + * holds a reference to the vnode, and because we mark + * any other vnodes that alias this device, when the + * sum of the reference counts on all the aliased + * vnodes descends to one, we are on last close. + */ + if ((vcount(vp) > 1) && (vp->v_flag & VXLOCK) == 0) + return (0); + + devclose = bdevsw[major(dev)]->d_close; + mode = S_IFBLK; + break; + + default: + panic("spec_close: not special"); + } + + return ((*devclose)(dev, ap->a_fflag, mode, ap->a_p)); +} + +/* + * Print out the contents of a special device vnode. + */ +static int +spec_print(ap) + struct vop_print_args /* { + struct vnode *a_vp; + } */ *ap; +{ + + printf("tag VT_NON, dev %d, %d\n", major(ap->a_vp->v_rdev), + minor(ap->a_vp->v_rdev)); + return (0); +} + +/* + * Special device advisory byte-level locks. + */ +/* ARGSUSED */ +static int +spec_advlock(ap) + struct vop_advlock_args /* { + struct vnode *a_vp; + caddr_t a_id; + int a_op; + struct flock *a_fl; + int a_flags; + } */ *ap; +{ + + return (ap->a_flags & F_FLOCK ? EOPNOTSUPP : EINVAL); +} + +/* + * Special device bad operation + */ +static int +spec_badop() +{ + + panic("spec_badop called"); + /* NOTREACHED */ +} + +static void +spec_getpages_iodone(bp) + struct buf *bp; +{ + + bp->b_flags |= B_DONE; + wakeup(bp); +} + +static int +spec_getpages(ap) + struct vop_getpages_args *ap; +{ + vm_offset_t kva; + int error; + int i, pcount, size, s; + daddr_t blkno; + struct buf *bp; + vm_page_t m; + vm_ooffset_t offset; + int toff, nextoff, nread; + struct vnode *vp = ap->a_vp; + int blksiz; + int gotreqpage; + + error = 0; + pcount = round_page(ap->a_count) / PAGE_SIZE; + + /* + * Calculate the offset of the transfer. + */ + offset = IDX_TO_OFF(ap->a_m[0]->pindex) + ap->a_offset; + + /* XXX sanity check before we go into details. */ + /* XXX limits should be defined elsewhere. */ +#define DADDR_T_BIT 32 +#define OFFSET_MAX ((1LL << (DADDR_T_BIT + DEV_BSHIFT)) - 1) + if (offset < 0 || offset > OFFSET_MAX) { + /* XXX still no %q in kernel. */ + printf("spec_getpages: preposterous offset 0x%x%08x\n", + (u_int)((u_quad_t)offset >> 32), + (u_int)(offset & 0xffffffff)); + return (VM_PAGER_ERROR); + } + + blkno = btodb(offset); + + /* + * Round up physical size for real devices, use the + * fundamental blocksize of the fs if possible. + */ + if (vp && vp->v_mount) { + if (vp->v_type != VBLK) { + vprint("Non VBLK", vp); + } + blksiz = vp->v_mount->mnt_stat.f_bsize; + if (blksiz < DEV_BSIZE) { + blksiz = DEV_BSIZE; + } + } + else + blksiz = DEV_BSIZE; + size = (ap->a_count + blksiz - 1) & ~(blksiz - 1); + + bp = getpbuf(NULL); + kva = (vm_offset_t)bp->b_data; + + /* + * Map the pages to be read into the kva. + */ + pmap_qenter(kva, ap->a_m, pcount); + + /* Build a minimal buffer header. */ + bp->b_flags = B_BUSY | B_READ | B_CALL; + bp->b_iodone = spec_getpages_iodone; + + /* B_PHYS is not set, but it is nice to fill this in. */ + bp->b_proc = curproc; + bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred; + if (bp->b_rcred != NOCRED) + crhold(bp->b_rcred); + if (bp->b_wcred != NOCRED) + crhold(bp->b_wcred); + bp->b_blkno = blkno; + bp->b_lblkno = blkno; + pbgetvp(ap->a_vp, bp); + bp->b_bcount = size; + bp->b_bufsize = size; + bp->b_resid = 0; + + cnt.v_vnodein++; + cnt.v_vnodepgsin += pcount; + + /* Do the input. */ + VOP_STRATEGY(bp->b_vp, bp); + + s = splbio(); + + /* We definitely need to be at splbio here. */ + while ((bp->b_flags & B_DONE) == 0) + tsleep(bp, PVM, "spread", 0); + + splx(s); + + if ((bp->b_flags & B_ERROR) != 0) { + if (bp->b_error) + error = bp->b_error; + else + error = EIO; + } + + nread = size - bp->b_resid; + + if (nread < ap->a_count) { + bzero((caddr_t)kva + nread, + ap->a_count - nread); + } + pmap_qremove(kva, pcount); + + + gotreqpage = 0; + for (i = 0, toff = 0; i < pcount; i++, toff = nextoff) { + nextoff = toff + PAGE_SIZE; + m = ap->a_m[i]; + + m->flags &= ~PG_ZERO; + + if (nextoff <= nread) { + m->valid = VM_PAGE_BITS_ALL; + m->dirty = 0; + } else if (toff < nread) { + int nvalid = ((nread + DEV_BSIZE - 1) - toff) & ~(DEV_BSIZE - 1); + vm_page_set_validclean(m, 0, nvalid); + } else { + m->valid = 0; + m->dirty = 0; + } + + if (i != ap->a_reqpage) { + /* + * Just in case someone was asking for this page we + * now tell them that it is ok to use. + */ + if (!error || (m->valid == VM_PAGE_BITS_ALL)) { + if (m->valid) { + if (m->flags & PG_WANTED) { + vm_page_activate(m); + } else { + vm_page_deactivate(m); + } + vm_page_wakeup(m); + } else { + vm_page_free(m); + } + } else { + vm_page_free(m); + } + } else if (m->valid) { + gotreqpage = 1; + } + } + if (!gotreqpage) { + m = ap->a_m[ap->a_reqpage]; +#ifndef MAX_PERF + printf( + "spec_getpages: I/O read failure: (error code=%d)\n", + error); + printf( + " size: %d, resid: %ld, a_count: %d, valid: 0x%x\n", + size, bp->b_resid, ap->a_count, m->valid); + printf( + " nread: %d, reqpage: %d, pindex: %lu, pcount: %d\n", + nread, ap->a_reqpage, (u_long)m->pindex, pcount); +#endif + /* + * Free the buffer header back to the swap buffer pool. + */ + relpbuf(bp, NULL); + return VM_PAGER_ERROR; + } + /* + * Free the buffer header back to the swap buffer pool. + */ + relpbuf(bp, NULL); + return VM_PAGER_OK; +} + +/* ARGSUSED */ +static int +spec_getattr(ap) + struct vop_getattr_args /* { + struct vnode *a_vp; + struct vattr *a_vap; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + register struct vattr *vap = ap->a_vap; + struct partinfo dpart; + + bzero(vap, sizeof (*vap)); + + if (vp->v_type == VBLK) + vap->va_blocksize = BLKDEV_IOSIZE; + else if (vp->v_type == VCHR) + vap->va_blocksize = MAXBSIZE; + + if ((*bdevsw[major(vp->v_rdev)]->d_ioctl)(vp->v_rdev, DIOCGPART, + (caddr_t)&dpart, FREAD, ap->a_p) == 0) { + vap->va_bytes = dbtob(dpart.disklab->d_partitions + [minor(vp->v_rdev)].p_size); + vap->va_size = vap->va_bytes; + } + return (0); +} diff --git a/sys/fs/umapfs/umap.h b/sys/fs/umapfs/umap.h new file mode 100644 index 0000000..0c6ca34 --- /dev/null +++ b/sys/fs/umapfs/umap.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software donated to Berkeley by + * the UCLA Ficus project. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)umap.h 8.4 (Berkeley) 8/20/94 + * + * $Id: umap.h,v 1.8 1997/02/22 09:40:37 peter Exp $ + */ + +#define MAPFILEENTRIES 64 +#define GMAPFILEENTRIES 16 +#define NOBODY 32767 +#define NULLGROUP 65534 + +struct umap_args { + char *target; /* Target of loopback */ + int nentries; /* # of entries in user map array */ + int gnentries; /* # of entries in group map array */ + u_long (*mapdata)[2]; /* pointer to array of user mappings */ + u_long (*gmapdata)[2]; /* pointer to array of group mappings */ +}; + +struct umap_mount { + struct mount *umapm_vfs; + struct vnode *umapm_rootvp; /* Reference to root umap_node */ + int info_nentries; /* number of uid mappings */ + int info_gnentries; /* number of gid mappings */ + u_long info_mapdata[MAPFILEENTRIES][2]; /* mapping data for + user mapping in ficus */ + u_long info_gmapdata[GMAPFILEENTRIES][2]; /*mapping data for + group mapping in ficus */ +}; + +#ifdef KERNEL +/* + * A cache of vnode references + */ +struct umap_node { + LIST_ENTRY(umap_node) umap_hash; /* Hash list */ + struct vnode *umap_lowervp; /* Aliased vnode - VREFed once */ + struct vnode *umap_vnode; /* Back pointer to vnode/umap_node */ +}; + +extern int umapfs_init __P((struct vfsconf *vfsp)); +extern int umap_node_create __P((struct mount *mp, struct vnode *target, struct vnode **vpp)); +extern u_long umap_reverse_findid __P((u_long id, u_long map[][2], int nentries)); +extern void umap_mapids __P((struct mount *v_mount, struct ucred *credp)); + +#define MOUNTTOUMAPMOUNT(mp) ((struct umap_mount *)((mp)->mnt_data)) +#define VTOUMAP(vp) ((struct umap_node *)(vp)->v_data) +#define UMAPTOV(xp) ((xp)->umap_vnode) +#ifdef UMAPFS_DIAGNOSTIC +extern struct vnode *umap_checkvp __P((struct vnode *vp, char *fil, int lno)); +#define UMAPVPTOLOWERVP(vp) umap_checkvp((vp), __FILE__, __LINE__) +#else +#define UMAPVPTOLOWERVP(vp) (VTOUMAP(vp)->umap_lowervp) +#endif + +extern vop_t **umap_vnodeop_p; +#endif /* KERNEL */ diff --git a/sys/fs/umapfs/umap_subr.c b/sys/fs/umapfs/umap_subr.c new file mode 100644 index 0000000..4974f03 --- /dev/null +++ b/sys/fs/umapfs/umap_subr.c @@ -0,0 +1,402 @@ +/* + * Copyright (c) 1992, 1993, 1995 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software donated to Berkeley by + * Jan-Simon Pendry. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)umap_subr.c 8.9 (Berkeley) 5/14/95 + * + * $Id: umap_subr.c,v 1.15 1998/11/09 09:21:25 peter Exp $ + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/vnode.h> +#include <sys/mount.h> +#include <sys/malloc.h> +#include <miscfs/umapfs/umap.h> + +#define LOG2_SIZEVNODE 7 /* log2(sizeof struct vnode) */ +#define NUMAPNODECACHE 16 + +/* + * Null layer cache: + * Each cache entry holds a reference to the target vnode + * along with a pointer to the alias vnode. When an + * entry is added the target vnode is VREF'd. When the + * alias is removed the target vnode is vrele'd. + */ + +#define UMAP_NHASH(vp) \ + (&umap_node_hashtbl \ + [((uintptr_t)(void *)(vp) >> LOG2_SIZEVNODE) & umap_node_hash]) +static LIST_HEAD(umap_node_hashhead, umap_node) *umap_node_hashtbl; +static u_long umap_node_hash; + +static u_long umap_findid __P((u_long id, u_long map[][2], int nentries)); +static int umap_node_alloc __P((struct mount *mp, struct vnode *lowervp, + struct vnode **vpp)); +static struct vnode * + umap_node_find __P((struct mount *mp, struct vnode *targetvp)); + +/* + * Initialise cache headers + */ +int +umapfs_init(vfsp) + struct vfsconf *vfsp; +{ + +#ifdef UMAPFS_DIAGNOSTIC + printf("umapfs_init\n"); /* printed during system boot */ +#endif + umap_node_hashtbl = hashinit(NUMAPNODECACHE, M_CACHE, &umap_node_hash); + return (0); +} + +/* + * umap_findid is called by various routines in umap_vnodeops.c to + * find a user or group id in a map. + */ +static u_long +umap_findid(id, map, nentries) + u_long id; + u_long map[][2]; + int nentries; +{ + int i; + + /* Find uid entry in map */ + i = 0; + while ((i<nentries) && ((map[i][0]) != id)) + i++; + + if (i < nentries) + return (map[i][1]); + else + return (-1); + +} + +/* + * umap_reverse_findid is called by umap_getattr() in umap_vnodeops.c to + * find a user or group id in a map, in reverse. + */ +u_long +umap_reverse_findid(id, map, nentries) + u_long id; + u_long map[][2]; + int nentries; +{ + int i; + + /* Find uid entry in map */ + i = 0; + while ((i<nentries) && ((map[i][1]) != id)) + i++; + + if (i < nentries) + return (map[i][0]); + else + return (-1); + +} + +/* + * Return alias for target vnode if already exists, else 0. + */ +static struct vnode * +umap_node_find(mp, targetvp) + struct mount *mp; + struct vnode *targetvp; +{ + struct proc *p = curproc; /* XXX */ + struct umap_node_hashhead *hd; + struct umap_node *a; + struct vnode *vp; + +#ifdef UMAPFS_DIAGNOSTIC + printf("umap_node_find(mp = %x, target = %x)\n", mp, targetvp); +#endif + + /* + * Find hash base, and then search the (two-way) linked + * list looking for a umap_node structure which is referencing + * the target vnode. If found, the increment the umap_node + * reference count (but NOT the target vnode's VREF counter). + */ + hd = UMAP_NHASH(targetvp); +loop: + for (a = hd->lh_first; a != 0; a = a->umap_hash.le_next) { + if (a->umap_lowervp == targetvp && + a->umap_vnode->v_mount == mp) { + vp = UMAPTOV(a); + /* + * We need vget for the VXLOCK + * stuff, but we don't want to lock + * the lower node. + */ + if (vget(vp, 0, p)) { +#ifdef UMAPFS_DIAGNOSTIC + printf ("umap_node_find: vget failed.\n"); +#endif + goto loop; + } + return (vp); + } + } + +#ifdef UMAPFS_DIAGNOSTIC + printf("umap_node_find(%x, %x): NOT found\n", mp, targetvp); +#endif + + return (0); +} + +/* + * Make a new umap_node node. + * Vp is the alias vnode, lofsvp is the target vnode. + * Maintain a reference to (targetvp). + */ +static int +umap_node_alloc(mp, lowervp, vpp) + struct mount *mp; + struct vnode *lowervp; + struct vnode **vpp; +{ + struct umap_node_hashhead *hd; + struct umap_node *xp; + struct vnode *othervp, *vp; + int error; + + /* XXX This routine probably needs a node_alloc lock */ + + /* + * Do the MALLOC before the getnewvnode since doing so afterward + * might cause a bogus v_data pointer to get dereferenced + * elsewhere if MALLOC should block. + */ + MALLOC(xp, struct umap_node *, sizeof(struct umap_node), + M_TEMP, M_WAITOK); + + error = getnewvnode(VT_UMAP, mp, umap_vnodeop_p, vpp); + if (error) { + FREE(xp, M_TEMP); + return (error); + } + vp = *vpp; + + vp->v_type = lowervp->v_type; + xp->umap_vnode = vp; + vp->v_data = xp; + xp->umap_lowervp = lowervp; + /* + * Before we insert our new node onto the hash chains, + * check to see if someone else has beaten us to it. + * (We could have slept in MALLOC.) + */ + othervp = umap_node_find(mp, lowervp); + if (othervp) { + FREE(xp, M_TEMP); + vp->v_type = VBAD; /* node is discarded */ + vp->v_usecount = 0; /* XXX */ + *vpp = othervp; + return (0); + } + VREF(lowervp); /* Extra VREF will be vrele'd in umap_node_create */ + hd = UMAP_NHASH(lowervp); + LIST_INSERT_HEAD(hd, xp, umap_hash); + return (0); +} + + +/* + * Try to find an existing umap_node vnode refering + * to it, otherwise make a new umap_node vnode which + * contains a reference to the target vnode. + */ +int +umap_node_create(mp, targetvp, newvpp) + struct mount *mp; + struct vnode *targetvp; + struct vnode **newvpp; +{ + struct vnode *aliasvp; + + aliasvp = umap_node_find(mp, targetvp); + if (aliasvp) { + /* + * Take another reference to the alias vnode + */ +#ifdef UMAPFS_DIAGNOSTIC + vprint("umap_node_create: exists", aliasvp); +#endif + /* VREF(aliasvp); */ + } else { + int error; + + /* + * Get new vnode. + */ +#ifdef UMAPFS_DIAGNOSTIC + printf("umap_node_create: create new alias vnode\n"); +#endif + /* + * Make new vnode reference the umap_node. + */ + error = umap_node_alloc(mp, targetvp, &aliasvp); + if (error) + return (error); + + /* + * aliasvp is already VREF'd by getnewvnode() + */ + } + + vrele(targetvp); + +#ifdef UMAPFS_DIAGNOSTIC + vprint("umap_node_create: alias", aliasvp); + vprint("umap_node_create: target", targetvp); +#endif + + *newvpp = aliasvp; + return (0); +} + +#ifdef UMAPFS_DIAGNOSTIC +int umap_checkvp_barrier = 1; +struct vnode * +umap_checkvp(vp, fil, lno) + struct vnode *vp; + char *fil; + int lno; +{ + struct umap_node *a = VTOUMAP(vp); +#if 0 + /* + * Can't do this check because vop_reclaim runs + * with funny vop vector. + */ + if (vp->v_op != umap_vnodeop_p) { + printf ("umap_checkvp: on non-umap-node\n"); + while (umap_checkvp_barrier) /*WAIT*/ ; + panic("umap_checkvp"); + } +#endif + if (a->umap_lowervp == NULL) { + /* Should never happen */ + int i; u_long *p; + printf("vp = %x, ZERO ptr\n", vp); + for (p = (u_long *) a, i = 0; i < 8; i++) + printf(" %x", p[i]); + printf("\n"); + /* wait for debugger */ + while (umap_checkvp_barrier) /*WAIT*/ ; + panic("umap_checkvp"); + } + if (a->umap_lowervp->v_usecount < 1) { + int i; u_long *p; + printf("vp = %x, unref'ed lowervp\n", vp); + for (p = (u_long *) a, i = 0; i < 8; i++) + printf(" %x", p[i]); + printf("\n"); + /* wait for debugger */ + while (umap_checkvp_barrier) /*WAIT*/ ; + panic ("umap with unref'ed lowervp"); + } +#if 0 + printf("umap %x/%d -> %x/%d [%s, %d]\n", + a->umap_vnode, a->umap_vnode->v_usecount, + a->umap_lowervp, a->umap_lowervp->v_usecount, + fil, lno); +#endif + return (a->umap_lowervp); +} +#endif + +/* umap_mapids maps all of the ids in a credential, both user and group. */ + +void +umap_mapids(v_mount, credp) + struct mount *v_mount; + struct ucred *credp; +{ + int i; + uid_t uid; + gid_t gid; + + if (credp == NOCRED) + return; + + /* Find uid entry in map */ + + uid = (uid_t) umap_findid(credp->cr_uid, + MOUNTTOUMAPMOUNT(v_mount)->info_mapdata, + MOUNTTOUMAPMOUNT(v_mount)->info_nentries); + + if (uid != -1) + credp->cr_uid = uid; + else + credp->cr_uid = (uid_t) NOBODY; + +#ifdef notdef + /* cr_gid is the same as cr_groups[0] in 4BSD */ + + /* Find gid entry in map */ + + gid = (gid_t) umap_findid(credp->cr_gid, + MOUNTTOUMAPMOUNT(v_mount)->info_gmapdata, + MOUNTTOUMAPMOUNT(v_mount)->info_gnentries); + + if (gid != -1) + credp->cr_gid = gid; + else + credp->cr_gid = NULLGROUP; +#endif + + /* Now we must map each of the set of groups in the cr_groups + structure. */ + + i = 0; + while (credp->cr_groups[i] != 0) { + gid = (gid_t) umap_findid(credp->cr_groups[i], + MOUNTTOUMAPMOUNT(v_mount)->info_gmapdata, + MOUNTTOUMAPMOUNT(v_mount)->info_gnentries); + + if (gid != -1) + credp->cr_groups[i++] = gid; + else + credp->cr_groups[i++] = NULLGROUP; + } +} diff --git a/sys/fs/umapfs/umap_vfsops.c b/sys/fs/umapfs/umap_vfsops.c new file mode 100644 index 0000000..03b4cb4 --- /dev/null +++ b/sys/fs/umapfs/umap_vfsops.c @@ -0,0 +1,431 @@ +/* + * Copyright (c) 1992, 1993, 1995 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software donated to Berkeley by + * the UCLA Ficus project. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)umap_vfsops.c 8.8 (Berkeley) 5/14/95 + * + * $Id: umap_vfsops.c,v 1.22 1998/05/06 05:29:36 msmith Exp $ + */ + +/* + * Umap Layer + * (See mount_umap(8) for a description of this layer.) + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/proc.h> +#include <sys/vnode.h> +#include <sys/mount.h> +#include <sys/namei.h> +#include <sys/malloc.h> +#include <miscfs/umapfs/umap.h> + +static MALLOC_DEFINE(M_UMAPFSMNT, "UMAP mount", "UMAP mount structure"); + +static int umapfs_fhtovp __P((struct mount *mp, struct fid *fidp, + struct sockaddr *nam, struct vnode **vpp, + int *exflagsp, struct ucred **credanonp)); +static int umapfs_mount __P((struct mount *mp, char *path, caddr_t data, + struct nameidata *ndp, struct proc *p)); +static int umapfs_quotactl __P((struct mount *mp, int cmd, uid_t uid, + caddr_t arg, struct proc *p)); +static int umapfs_root __P((struct mount *mp, struct vnode **vpp)); +static int umapfs_start __P((struct mount *mp, int flags, struct proc *p)); +static int umapfs_statfs __P((struct mount *mp, struct statfs *sbp, + struct proc *p)); +static int umapfs_sync __P((struct mount *mp, int waitfor, + struct ucred *cred, struct proc *p)); +static int umapfs_unmount __P((struct mount *mp, int mntflags, + struct proc *p)); +static int umapfs_vget __P((struct mount *mp, ino_t ino, + struct vnode **vpp)); +static int umapfs_vptofh __P((struct vnode *vp, struct fid *fhp)); + +/* + * Mount umap layer + */ +static int +umapfs_mount(mp, path, data, ndp, p) + struct mount *mp; + char *path; + caddr_t data; + struct nameidata *ndp; + struct proc *p; +{ + struct umap_args args; + struct vnode *lowerrootvp, *vp; + struct vnode *umapm_rootvp; + struct umap_mount *amp; + u_int size; + int error; +#ifdef UMAP_DIAGNOSTIC + int i; +#endif + +#ifdef UMAPFS_DIAGNOSTIC + printf("umapfs_mount(mp = %x)\n", mp); +#endif + + /* + * Update is a no-op + */ + if (mp->mnt_flag & MNT_UPDATE) { + return (EOPNOTSUPP); + /* return (VFS_MOUNT(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, path, data, ndp, p));*/ + } + + /* + * Get argument + */ + error = copyin(data, (caddr_t)&args, sizeof(struct umap_args)); + if (error) + return (error); + + /* + * Find lower node + */ + NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT|LOCKLEAF, + UIO_USERSPACE, args.target, p); + error = namei(ndp); + if (error) + return (error); + + /* + * Sanity check on lower vnode + */ + lowerrootvp = ndp->ni_vp; +#ifdef UMAPFS_DIAGNOSTIC + printf("vp = %x, check for VDIR...\n", lowerrootvp); +#endif + vrele(ndp->ni_dvp); + ndp->ni_dvp = 0; + + if (lowerrootvp->v_type != VDIR) { + vput(lowerrootvp); + return (EINVAL); + } + +#ifdef UMAPFS_DIAGNOSTIC + printf("mp = %x\n", mp); +#endif + + amp = (struct umap_mount *) malloc(sizeof(struct umap_mount), + M_UMAPFSMNT, M_WAITOK); /* XXX */ + + /* + * Save reference to underlying FS + */ + amp->umapm_vfs = lowerrootvp->v_mount; + + /* + * Now copy in the number of entries and maps for umap mapping. + */ + amp->info_nentries = args.nentries; + amp->info_gnentries = args.gnentries; + error = copyin(args.mapdata, (caddr_t)amp->info_mapdata, + 2*sizeof(u_long)*args.nentries); + if (error) + return (error); + +#ifdef UMAP_DIAGNOSTIC + printf("umap_mount:nentries %d\n",args.nentries); + for (i = 0; i < args.nentries; i++) + printf(" %d maps to %d\n", amp->info_mapdata[i][0], + amp->info_mapdata[i][1]); +#endif + + error = copyin(args.gmapdata, (caddr_t)amp->info_gmapdata, + 2*sizeof(u_long)*args.gnentries); + if (error) + return (error); + +#ifdef UMAP_DIAGNOSTIC + printf("umap_mount:gnentries %d\n",args.gnentries); + for (i = 0; i < args.gnentries; i++) + printf(" group %d maps to %d\n", + amp->info_gmapdata[i][0], + amp->info_gmapdata[i][1]); +#endif + + + /* + * Save reference. Each mount also holds + * a reference on the root vnode. + */ + error = umap_node_create(mp, lowerrootvp, &vp); + /* + * Unlock the node (either the lower or the alias) + */ + VOP_UNLOCK(vp, 0, p); + /* + * Make sure the node alias worked + */ + if (error) { + vrele(lowerrootvp); + free(amp, M_UMAPFSMNT); /* XXX */ + return (error); + } + + /* + * Keep a held reference to the root vnode. + * It is vrele'd in umapfs_unmount. + */ + umapm_rootvp = vp; + umapm_rootvp->v_flag |= VROOT; + amp->umapm_rootvp = umapm_rootvp; + if (UMAPVPTOLOWERVP(umapm_rootvp)->v_mount->mnt_flag & MNT_LOCAL) + mp->mnt_flag |= MNT_LOCAL; + mp->mnt_data = (qaddr_t) amp; + vfs_getnewfsid(mp); + + (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size); + bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size); + (void) copyinstr(args.target, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, + &size); + bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); + (void)umapfs_statfs(mp, &mp->mnt_stat, p); +#ifdef UMAPFS_DIAGNOSTIC + printf("umapfs_mount: lower %s, alias at %s\n", + mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname); +#endif + return (0); +} + +/* + * VFS start. Nothing needed here - the start routine + * on the underlying filesystem will have been called + * when that filesystem was mounted. + */ +static int +umapfs_start(mp, flags, p) + struct mount *mp; + int flags; + struct proc *p; +{ + return (0); + /* return (VFS_START(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, flags, p)); */ +} + +/* + * Free reference to umap layer + */ +static int +umapfs_unmount(mp, mntflags, p) + struct mount *mp; + int mntflags; + struct proc *p; +{ + struct vnode *umapm_rootvp = MOUNTTOUMAPMOUNT(mp)->umapm_rootvp; + int error; + int flags = 0; + +#ifdef UMAPFS_DIAGNOSTIC + printf("umapfs_unmount(mp = %x)\n", mp); +#endif + + if (mntflags & MNT_FORCE) + flags |= FORCECLOSE; + + /* + * Clear out buffer cache. I don't think we + * ever get anything cached at this level at the + * moment, but who knows... + */ +#ifdef notyet + mntflushbuf(mp, 0); + if (mntinvalbuf(mp, 1)) + return (EBUSY); +#endif + if (umapm_rootvp->v_usecount > 1) + return (EBUSY); + error = vflush(mp, umapm_rootvp, flags); + if (error) + return (error); + +#ifdef UMAPFS_DIAGNOSTIC + vprint("alias root of lower", umapm_rootvp); +#endif + /* + * Release reference on underlying root vnode + */ + vrele(umapm_rootvp); + /* + * And blow it away for future re-use + */ + vgone(umapm_rootvp); + /* + * Finally, throw away the umap_mount structure + */ + free(mp->mnt_data, M_UMAPFSMNT); /* XXX */ + mp->mnt_data = 0; + return (0); +} + +static int +umapfs_root(mp, vpp) + struct mount *mp; + struct vnode **vpp; +{ + struct proc *p = curproc; /* XXX */ + struct vnode *vp; + +#ifdef UMAPFS_DIAGNOSTIC + printf("umapfs_root(mp = %x, vp = %x->%x)\n", mp, + MOUNTTOUMAPMOUNT(mp)->umapm_rootvp, + UMAPVPTOLOWERVP(MOUNTTOUMAPMOUNT(mp)->umapm_rootvp) + ); +#endif + + /* + * Return locked reference to root. + */ + vp = MOUNTTOUMAPMOUNT(mp)->umapm_rootvp; + VREF(vp); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + *vpp = vp; + return (0); +} + +static int +umapfs_quotactl(mp, cmd, uid, arg, p) + struct mount *mp; + int cmd; + uid_t uid; + caddr_t arg; + struct proc *p; +{ + return (VFS_QUOTACTL(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, cmd, uid, arg, p)); +} + +static int +umapfs_statfs(mp, sbp, p) + struct mount *mp; + struct statfs *sbp; + struct proc *p; +{ + int error; + struct statfs mstat; + +#ifdef UMAPFS_DIAGNOSTIC + printf("umapfs_statfs(mp = %x, vp = %x->%x)\n", mp, + MOUNTTOUMAPMOUNT(mp)->umapm_rootvp, + UMAPVPTOLOWERVP(MOUNTTOUMAPMOUNT(mp)->umapm_rootvp) + ); +#endif + + bzero(&mstat, sizeof(mstat)); + + error = VFS_STATFS(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, &mstat, p); + if (error) + return (error); + + /* now copy across the "interesting" information and fake the rest */ + sbp->f_type = mstat.f_type; + sbp->f_flags = mstat.f_flags; + sbp->f_bsize = mstat.f_bsize; + sbp->f_iosize = mstat.f_iosize; + sbp->f_blocks = mstat.f_blocks; + sbp->f_bfree = mstat.f_bfree; + sbp->f_bavail = mstat.f_bavail; + sbp->f_files = mstat.f_files; + sbp->f_ffree = mstat.f_ffree; + if (sbp != &mp->mnt_stat) { + bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid)); + bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); + bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); + } + return (0); +} + +static int +umapfs_sync(mp, waitfor, cred, p) + struct mount *mp; + int waitfor; + struct ucred *cred; + struct proc *p; +{ + /* + * XXX - Assumes no data cached at umap layer. + */ + return (0); +} + +static int +umapfs_vget(mp, ino, vpp) + struct mount *mp; + ino_t ino; + struct vnode **vpp; +{ + + return (VFS_VGET(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, ino, vpp)); +} + +static int +umapfs_fhtovp(mp, fidp, nam, vpp, exflagsp, credanonp) + struct mount *mp; + struct fid *fidp; + struct sockaddr *nam; + struct vnode **vpp; + int *exflagsp; + struct ucred**credanonp; +{ + + return (VFS_FHTOVP(MOUNTTOUMAPMOUNT(mp)->umapm_vfs, fidp, nam, vpp, exflagsp,credanonp)); +} + +static int +umapfs_vptofh(vp, fhp) + struct vnode *vp; + struct fid *fhp; +{ + return (VFS_VPTOFH(UMAPVPTOLOWERVP(vp), fhp)); +} + +static struct vfsops umap_vfsops = { + umapfs_mount, + umapfs_start, + umapfs_unmount, + umapfs_root, + umapfs_quotactl, + umapfs_statfs, + umapfs_sync, + umapfs_vget, + umapfs_fhtovp, + umapfs_vptofh, + umapfs_init, +}; + +VFS_SET(umap_vfsops, umap, VFCF_LOOPBACK); diff --git a/sys/fs/umapfs/umap_vnops.c b/sys/fs/umapfs/umap_vnops.c new file mode 100644 index 0000000..893e1e5 --- /dev/null +++ b/sys/fs/umapfs/umap_vnops.c @@ -0,0 +1,566 @@ +/* + * Copyright (c) 1992, 1993 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software donated to Berkeley by + * the UCLA Ficus project. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)umap_vnops.c 8.6 (Berkeley) 5/22/95 + * $Id: umap_vnops.c,v 1.25 1998/07/30 17:40:45 bde Exp $ + */ + +/* + * Umap Layer + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/sysctl.h> +#include <sys/vnode.h> +#include <sys/mount.h> +#include <sys/namei.h> +#include <sys/malloc.h> +#include <sys/buf.h> +#include <miscfs/umapfs/umap.h> +#include <miscfs/nullfs/null.h> + +static int umap_bug_bypass = 0; /* for debugging: enables bypass printf'ing */ +SYSCTL_INT(_debug, OID_AUTO, umapfs_bug_bypass, CTLFLAG_RW, + &umap_bug_bypass, 0, ""); + +static int umap_bwrite __P((struct vop_bwrite_args *ap)); +static int umap_bypass __P((struct vop_generic_args *ap)); +static int umap_getattr __P((struct vop_getattr_args *ap)); +static int umap_inactive __P((struct vop_inactive_args *ap)); +static int umap_lock __P((struct vop_lock_args *ap)); +static int umap_print __P((struct vop_print_args *ap)); +static int umap_reclaim __P((struct vop_reclaim_args *ap)); +static int umap_rename __P((struct vop_rename_args *ap)); +static int umap_strategy __P((struct vop_strategy_args *ap)); +static int umap_unlock __P((struct vop_unlock_args *ap)); + +/* + * This is the 10-Apr-92 bypass routine. + * See null_vnops.c:null_bypass for more details. + */ +static int +umap_bypass(ap) + struct vop_generic_args /* { + struct vnodeop_desc *a_desc; + <other random data follows, presumably> + } */ *ap; +{ + struct ucred **credpp = 0, *credp = 0; + struct ucred *savecredp = 0, *savecompcredp = 0; + struct ucred *compcredp = 0; + struct vnode **this_vp_p; + int error; + struct vnode *old_vps[VDESC_MAX_VPS]; + struct vnode *vp1 = 0; + struct vnode **vps_p[VDESC_MAX_VPS]; + struct vnode ***vppp; + struct vnodeop_desc *descp = ap->a_desc; + int reles, i; + struct componentname **compnamepp = 0; + + if (umap_bug_bypass) + printf ("umap_bypass: %s\n", descp->vdesc_name); + +#ifdef SAFETY + /* + * We require at least one vp. + */ + if (descp->vdesc_vp_offsets == NULL || + descp->vdesc_vp_offsets[0] == VDESC_NO_OFFSET) + panic ("umap_bypass: no vp's in map."); +#endif + + /* + * Map the vnodes going in. + * Later, we'll invoke the operation based on + * the first mapped vnode's operation vector. + */ + reles = descp->vdesc_flags; + for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) { + if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET) + break; /* bail out at end of list */ + vps_p[i] = this_vp_p = + VOPARG_OFFSETTO(struct vnode**, descp->vdesc_vp_offsets[i], ap); + + if (i == 0) { + vp1 = *vps_p[0]; + } + + /* + * We're not guaranteed that any but the first vnode + * are of our type. Check for and don't map any + * that aren't. (Must map first vp or vclean fails.) + */ + + if (i && (*this_vp_p)->v_op != umap_vnodeop_p) { + old_vps[i] = NULL; + } else { + old_vps[i] = *this_vp_p; + *(vps_p[i]) = UMAPVPTOLOWERVP(*this_vp_p); + if (reles & 1) + VREF(*this_vp_p); + } + + } + + /* + * Fix the credentials. (That's the purpose of this layer.) + */ + + if (descp->vdesc_cred_offset != VDESC_NO_OFFSET) { + + credpp = VOPARG_OFFSETTO(struct ucred**, + descp->vdesc_cred_offset, ap); + + /* Save old values */ + + savecredp = (*credpp); + if (savecredp != NOCRED) + (*credpp) = crdup(savecredp); + credp = *credpp; + + if (umap_bug_bypass && credp->cr_uid != 0) + printf("umap_bypass: user was %lu, group %lu\n", + (u_long)credp->cr_uid, (u_long)credp->cr_gid); + + /* Map all ids in the credential structure. */ + + umap_mapids(vp1->v_mount, credp); + + if (umap_bug_bypass && credp->cr_uid != 0) + printf("umap_bypass: user now %lu, group %lu\n", + (u_long)credp->cr_uid, (u_long)credp->cr_gid); + } + + /* BSD often keeps a credential in the componentname structure + * for speed. If there is one, it better get mapped, too. + */ + + if (descp->vdesc_componentname_offset != VDESC_NO_OFFSET) { + + compnamepp = VOPARG_OFFSETTO(struct componentname**, + descp->vdesc_componentname_offset, ap); + + compcredp = (*compnamepp)->cn_cred; + savecompcredp = compcredp; + if (savecompcredp != NOCRED) + (*compnamepp)->cn_cred = crdup(savecompcredp); + compcredp = (*compnamepp)->cn_cred; + + if (umap_bug_bypass && compcredp->cr_uid != 0) + printf( + "umap_bypass: component credit user was %lu, group %lu\n", + (u_long)compcredp->cr_uid, + (u_long)compcredp->cr_gid); + + /* Map all ids in the credential structure. */ + + umap_mapids(vp1->v_mount, compcredp); + + if (umap_bug_bypass && compcredp->cr_uid != 0) + printf( + "umap_bypass: component credit user now %lu, group %lu\n", + (u_long)compcredp->cr_uid, + (u_long)compcredp->cr_gid); + } + + /* + * Call the operation on the lower layer + * with the modified argument structure. + */ + error = VCALL(*(vps_p[0]), descp->vdesc_offset, ap); + + /* + * Maintain the illusion of call-by-value + * by restoring vnodes in the argument structure + * to their original value. + */ + reles = descp->vdesc_flags; + for (i = 0; i < VDESC_MAX_VPS; reles >>= 1, i++) { + if (descp->vdesc_vp_offsets[i] == VDESC_NO_OFFSET) + break; /* bail out at end of list */ + if (old_vps[i]) { + *(vps_p[i]) = old_vps[i]; + if (reles & 1) + vrele(*(vps_p[i])); + }; + }; + + /* + * Map the possible out-going vpp + * (Assumes that the lower layer always returns + * a VREF'ed vpp unless it gets an error.) + */ + if (descp->vdesc_vpp_offset != VDESC_NO_OFFSET && + !(descp->vdesc_flags & VDESC_NOMAP_VPP) && + !error) { + if (descp->vdesc_flags & VDESC_VPP_WILLRELE) + goto out; + vppp = VOPARG_OFFSETTO(struct vnode***, + descp->vdesc_vpp_offset, ap); + if (*vppp) + error = umap_node_create(old_vps[0]->v_mount, **vppp, *vppp); + }; + + out: + /* + * Free duplicate cred structure and restore old one. + */ + if (descp->vdesc_cred_offset != VDESC_NO_OFFSET) { + if (umap_bug_bypass && credp && credp->cr_uid != 0) + printf("umap_bypass: returning-user was %lu\n", + (u_long)credp->cr_uid); + + if (savecredp != NOCRED) { + crfree(credp); + (*credpp) = savecredp; + if (umap_bug_bypass && credpp && (*credpp)->cr_uid != 0) + printf( + "umap_bypass: returning-user now %lu\n\n", + (u_long)(*credpp)->cr_uid); + } + } + + if (descp->vdesc_componentname_offset != VDESC_NO_OFFSET) { + if (umap_bug_bypass && compcredp && compcredp->cr_uid != 0) + printf( + "umap_bypass: returning-component-user was %lu\n", + (u_long)compcredp->cr_uid); + + if (savecompcredp != NOCRED) { + crfree(compcredp); + (*compnamepp)->cn_cred = savecompcredp; + if (umap_bug_bypass && credpp && (*credpp)->cr_uid != 0) + printf( + "umap_bypass: returning-component-user now %lu\n", + (u_long)compcredp->cr_uid); + } + } + + return (error); +} + + +/* + * We handle getattr to change the fsid. + */ +static int +umap_getattr(ap) + struct vop_getattr_args /* { + struct vnode *a_vp; + struct vattr *a_vap; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + short uid, gid; + int error, tmpid, nentries, gnentries; + u_long (*mapdata)[2], (*gmapdata)[2]; + struct vnode **vp1p; + struct vnodeop_desc *descp = ap->a_desc; + + error = umap_bypass((struct vop_generic_args *)ap); + if (error) + return (error); + /* Requires that arguments be restored. */ + ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0]; + + /* + * Umap needs to map the uid and gid returned by a stat + * into the proper values for this site. This involves + * finding the returned uid in the mapping information, + * translating it into the uid on the other end, + * and filling in the proper field in the vattr + * structure pointed to by ap->a_vap. The group + * is easier, since currently all groups will be + * translate to the NULLGROUP. + */ + + /* Find entry in map */ + + uid = ap->a_vap->va_uid; + gid = ap->a_vap->va_gid; + if (umap_bug_bypass) + printf("umap_getattr: mapped uid = %d, mapped gid = %d\n", uid, + gid); + + vp1p = VOPARG_OFFSETTO(struct vnode**, descp->vdesc_vp_offsets[0], ap); + nentries = MOUNTTOUMAPMOUNT((*vp1p)->v_mount)->info_nentries; + mapdata = (MOUNTTOUMAPMOUNT((*vp1p)->v_mount)->info_mapdata); + gnentries = MOUNTTOUMAPMOUNT((*vp1p)->v_mount)->info_gnentries; + gmapdata = (MOUNTTOUMAPMOUNT((*vp1p)->v_mount)->info_gmapdata); + + /* Reverse map the uid for the vnode. Since it's a reverse + map, we can't use umap_mapids() to do it. */ + + tmpid = umap_reverse_findid(uid, mapdata, nentries); + + if (tmpid != -1) { + + ap->a_vap->va_uid = (uid_t) tmpid; + if (umap_bug_bypass) + printf("umap_getattr: original uid = %d\n", uid); + } else + ap->a_vap->va_uid = (uid_t) NOBODY; + + /* Reverse map the gid for the vnode. */ + + tmpid = umap_reverse_findid(gid, gmapdata, gnentries); + + if (tmpid != -1) { + + ap->a_vap->va_gid = (gid_t) tmpid; + if (umap_bug_bypass) + printf("umap_getattr: original gid = %d\n", gid); + } else + ap->a_vap->va_gid = (gid_t) NULLGROUP; + + return (0); +} + +/* + * We need to process our own vnode lock and then clear the + * interlock flag as it applies only to our vnode, not the + * vnodes below us on the stack. + */ +static int +umap_lock(ap) + struct vop_lock_args /* { + struct vnode *a_vp; + int a_flags; + struct proc *a_p; + } */ *ap; +{ + + vop_nolock(ap); + if ((ap->a_flags & LK_TYPE_MASK) == LK_DRAIN) + return (0); + ap->a_flags &= ~LK_INTERLOCK; + return (null_bypass((struct vop_generic_args *)ap)); +} + +/* + * We need to process our own vnode unlock and then clear the + * interlock flag as it applies only to our vnode, not the + * vnodes below us on the stack. + */ +int +umap_unlock(ap) + struct vop_unlock_args /* { + struct vnode *a_vp; + int a_flags; + struct proc *a_p; + } */ *ap; +{ + vop_nounlock(ap); + ap->a_flags &= ~LK_INTERLOCK; + return (null_bypass((struct vop_generic_args *)ap)); +} + +static int +umap_inactive(ap) + struct vop_inactive_args /* { + struct vnode *a_vp; + struct proc *a_p; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + struct umap_node *xp = VTOUMAP(vp); + struct vnode *lowervp = xp->umap_lowervp; + /* + * Do nothing (and _don't_ bypass). + * Wait to vrele lowervp until reclaim, + * so that until then our umap_node is in the + * cache and reusable. + * + */ + VOP_INACTIVE(lowervp, ap->a_p); + VOP_UNLOCK(ap->a_vp, 0, ap->a_p); + return (0); +} + +static int +umap_reclaim(ap) + struct vop_reclaim_args /* { + struct vnode *a_vp; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + struct umap_node *xp = VTOUMAP(vp); + struct vnode *lowervp = xp->umap_lowervp; + + /* After this assignment, this node will not be re-used. */ + xp->umap_lowervp = NULL; + LIST_REMOVE(xp, umap_hash); + FREE(vp->v_data, M_TEMP); + vp->v_data = NULL; + vrele(lowervp); + return (0); +} + +static int +umap_strategy(ap) + struct vop_strategy_args /* { + struct vnode *a_vp; + struct buf *a_bp; + } */ *ap; +{ + struct buf *bp = ap->a_bp; + int error; + struct vnode *savedvp; + + savedvp = bp->b_vp; + bp->b_vp = UMAPVPTOLOWERVP(bp->b_vp); + + error = VOP_STRATEGY(bp->b_vp, ap->a_bp); + + bp->b_vp = savedvp; + + return (error); +} + +static int +umap_bwrite(ap) + struct vop_bwrite_args /* { + struct buf *a_bp; + } */ *ap; +{ + struct buf *bp = ap->a_bp; + int error; + struct vnode *savedvp; + + savedvp = bp->b_vp; + bp->b_vp = UMAPVPTOLOWERVP(bp->b_vp); + + error = VOP_BWRITE(ap->a_bp); + + bp->b_vp = savedvp; + + return (error); +} + + +static int +umap_print(ap) + struct vop_print_args /* { + struct vnode *a_vp; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + printf("\ttag VT_UMAPFS, vp=%p, lowervp=%p\n", vp, UMAPVPTOLOWERVP(vp)); + return (0); +} + +static int +umap_rename(ap) + struct vop_rename_args /* { + struct vnode *a_fdvp; + struct vnode *a_fvp; + struct componentname *a_fcnp; + struct vnode *a_tdvp; + struct vnode *a_tvp; + struct componentname *a_tcnp; + } */ *ap; +{ + int error; + struct componentname *compnamep; + struct ucred *compcredp, *savecompcredp; + struct vnode *vp; + + /* + * Rename is irregular, having two componentname structures. + * We need to map the cre in the second structure, + * and then bypass takes care of the rest. + */ + + vp = ap->a_fdvp; + compnamep = ap->a_tcnp; + compcredp = compnamep->cn_cred; + + savecompcredp = compcredp; + compcredp = compnamep->cn_cred = crdup(savecompcredp); + + if (umap_bug_bypass && compcredp->cr_uid != 0) + printf( + "umap_rename: rename component credit user was %lu, group %lu\n", + (u_long)compcredp->cr_uid, (u_long)compcredp->cr_gid); + + /* Map all ids in the credential structure. */ + + umap_mapids(vp->v_mount, compcredp); + + if (umap_bug_bypass && compcredp->cr_uid != 0) + printf( + "umap_rename: rename component credit user now %lu, group %lu\n", + (u_long)compcredp->cr_uid, (u_long)compcredp->cr_gid); + + error = umap_bypass((struct vop_generic_args *)ap); + + /* Restore the additional mapped componentname cred structure. */ + + crfree(compcredp); + compnamep->cn_cred = savecompcredp; + + return error; +} + +/* + * Global vfs data structures + */ +/* + * XXX - strategy, bwrite are hand coded currently. They should + * go away with a merged buffer/block cache. + * + */ +vop_t **umap_vnodeop_p; +static struct vnodeopv_entry_desc umap_vnodeop_entries[] = { + { &vop_default_desc, (vop_t *) umap_bypass }, + { &vop_bwrite_desc, (vop_t *) umap_bwrite }, + { &vop_getattr_desc, (vop_t *) umap_getattr }, + { &vop_inactive_desc, (vop_t *) umap_inactive }, + { &vop_lock_desc, (vop_t *) umap_lock }, + { &vop_print_desc, (vop_t *) umap_print }, + { &vop_reclaim_desc, (vop_t *) umap_reclaim }, + { &vop_rename_desc, (vop_t *) umap_rename }, + { &vop_strategy_desc, (vop_t *) umap_strategy }, + { &vop_unlock_desc, (vop_t *) umap_unlock }, + { NULL, NULL } +}; +static struct vnodeopv_desc umap_vnodeop_opv_desc = + { &umap_vnodeop_p, umap_vnodeop_entries }; + +VNODEOP_SET(umap_vnodeop_opv_desc); diff --git a/sys/fs/unionfs/union.h b/sys/fs/unionfs/union.h new file mode 100644 index 0000000..6a4aa22 --- /dev/null +++ b/sys/fs/unionfs/union.h @@ -0,0 +1,129 @@ +/* + * Copyright (c) 1994 The Regents of the University of California. + * Copyright (c) 1994 Jan-Simon Pendry. + * All rights reserved. + * + * This code is derived from software donated to Berkeley by + * Jan-Simon Pendry. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)union.h 8.9 (Berkeley) 12/10/94 + * $Id: union.h,v 1.12 1998/02/26 03:23:51 kato Exp $ + */ + +struct union_args { + char *target; /* Target of loopback */ + int mntflags; /* Options on the mount */ +}; + +#define UNMNT_ABOVE 0x0001 /* Target appears below mount point */ +#define UNMNT_BELOW 0x0002 /* Target appears below mount point */ +#define UNMNT_REPLACE 0x0003 /* Target replaces mount point */ +#define UNMNT_OPMASK 0x0003 + +struct union_mount { + struct vnode *um_uppervp; + struct vnode *um_lowervp; + struct ucred *um_cred; /* Credentials of user calling mount */ + int um_cmode; /* cmask from mount process */ + int um_op; /* Operation mode */ +}; + +#ifdef KERNEL + +/* + * DEFDIRMODE is the mode bits used to create a shadow directory. + */ +#define VRWXMODE (VREAD|VWRITE|VEXEC) +#define VRWMODE (VREAD|VWRITE) +#define UN_DIRMODE ((VRWXMODE)|(VRWXMODE>>3)|(VRWXMODE>>6)) +#define UN_FILEMODE ((VRWMODE)|(VRWMODE>>3)|(VRWMODE>>6)) + +/* + * A cache of vnode references + */ +struct union_node { + LIST_ENTRY(union_node) un_cache; /* Hash chain */ + struct vnode *un_vnode; /* Back pointer */ + struct vnode *un_uppervp; /* overlaying object */ + struct vnode *un_lowervp; /* underlying object */ + struct vnode *un_dirvp; /* Parent dir of uppervp */ + struct vnode *un_pvp; /* Parent vnode */ + char *un_path; /* saved component name */ + int un_hash; /* saved un_path hash value */ + int un_openl; /* # of opens on lowervp */ + unsigned int un_flags; + struct vnode **un_dircache; /* cached union stack */ + off_t un_uppersz; /* size of upper object */ + off_t un_lowersz; /* size of lower object */ +#ifdef DIAGNOSTIC + pid_t un_pid; +#endif +}; + +#define UN_WANT 0x01 +#define UN_LOCKED 0x02 +#define UN_ULOCK 0x04 /* Upper node is locked */ +#define UN_KLOCK 0x08 /* Keep upper node locked on vput */ +#define UN_CACHED 0x10 /* In union cache */ + +extern int union_allocvp __P((struct vnode **, struct mount *, + struct vnode *, struct vnode *, + struct componentname *, struct vnode *, + struct vnode *, int)); +extern int union_freevp __P((struct vnode *)); +extern struct vnode *union_dircache __P((struct vnode *, struct proc *)); +extern int union_copyup __P((struct union_node *, int, struct ucred *, + struct proc *)); +extern int union_dowhiteout __P((struct union_node *, struct ucred *, + struct proc *)); +extern int union_mkshadow __P((struct union_mount *, struct vnode *, + struct componentname *, struct vnode **)); +extern int union_mkwhiteout __P((struct union_mount *, struct vnode *, + struct componentname *, char *)); +extern int union_cn_close __P((struct vnode *, int, struct ucred *, + struct proc *)); +extern void union_removed_upper __P((struct union_node *un)); +extern struct vnode *union_lowervp __P((struct vnode *)); +extern void union_newsize __P((struct vnode *, off_t, off_t)); + +extern int (*union_dircheckp) __P((struct proc *, struct vnode **, + struct file *)); + +#define MOUNTTOUNIONMOUNT(mp) ((struct union_mount *)((mp)->mnt_data)) +#define VTOUNION(vp) ((struct union_node *)(vp)->v_data) +#define UNIONTOV(un) ((un)->un_vnode) +#define LOWERVP(vp) (VTOUNION(vp)->un_lowervp) +#define UPPERVP(vp) (VTOUNION(vp)->un_uppervp) +#define OTHERVP(vp) (UPPERVP(vp) ? UPPERVP(vp) : LOWERVP(vp)) + +extern vop_t **union_vnodeop_p; +extern struct vfsops union_vfsops; +#endif /* KERNEL */ diff --git a/sys/fs/unionfs/union_subr.c b/sys/fs/unionfs/union_subr.c new file mode 100644 index 0000000..7559b6e --- /dev/null +++ b/sys/fs/unionfs/union_subr.c @@ -0,0 +1,1218 @@ +/* + * Copyright (c) 1994 Jan-Simon Pendry + * Copyright (c) 1994 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Jan-Simon Pendry. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)union_subr.c 8.20 (Berkeley) 5/20/95 + * $Id: union_subr.c,v 1.35 1998/12/07 21:58:34 archie Exp $ + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/vnode.h> +#include <sys/namei.h> +#include <sys/malloc.h> +#include <sys/fcntl.h> +#include <sys/file.h> +#include <sys/filedesc.h> +#include <sys/module.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <vm/vm.h> +#include <vm/vm_extern.h> /* for vnode_pager_setsize */ +#include <vm/vm_zone.h> +#include <miscfs/union/union.h> + +#include <sys/proc.h> + +extern int union_init __P((void)); + +/* must be power of two, otherwise change UNION_HASH() */ +#define NHASH 32 + +/* unsigned int ... */ +#define UNION_HASH(u, l) \ + (((((uintptr_t) (u)) + ((uintptr_t) l)) >> 8) & (NHASH-1)) + +static LIST_HEAD(unhead, union_node) unhead[NHASH]; +static int unvplock[NHASH]; + +static void union_dircache_r __P((struct vnode *vp, struct vnode ***vppp, + int *cntp)); +static int union_list_lock __P((int ix)); +static void union_list_unlock __P((int ix)); +static int union_relookup __P((struct union_mount *um, struct vnode *dvp, + struct vnode **vpp, + struct componentname *cnp, + struct componentname *cn, char *path, + int pathlen)); +static void union_updatevp __P((struct union_node *un, + struct vnode *uppervp, + struct vnode *lowervp)); +static void union_newlower __P((struct union_node *, struct vnode *)); +static void union_newupper __P((struct union_node *, struct vnode *)); +static int union_copyfile __P((struct vnode *, struct vnode *, + struct ucred *, struct proc *)); +static int union_vn_create __P((struct vnode **, struct union_node *, + struct proc *)); +static int union_vn_close __P((struct vnode *, int, struct ucred *, + struct proc *)); + +int +union_init() +{ + int i; + + for (i = 0; i < NHASH; i++) + LIST_INIT(&unhead[i]); + bzero((caddr_t) unvplock, sizeof(unvplock)); + return (0); +} + +static int +union_list_lock(ix) + int ix; +{ + + if (unvplock[ix] & UN_LOCKED) { + unvplock[ix] |= UN_WANT; + (void) tsleep((caddr_t) &unvplock[ix], PINOD, "unllck", 0); + return (1); + } + + unvplock[ix] |= UN_LOCKED; + + return (0); +} + +static void +union_list_unlock(ix) + int ix; +{ + + unvplock[ix] &= ~UN_LOCKED; + + if (unvplock[ix] & UN_WANT) { + unvplock[ix] &= ~UN_WANT; + wakeup((caddr_t) &unvplock[ix]); + } +} + +static void +union_updatevp(un, uppervp, lowervp) + struct union_node *un; + struct vnode *uppervp; + struct vnode *lowervp; +{ + int ohash = UNION_HASH(un->un_uppervp, un->un_lowervp); + int nhash = UNION_HASH(uppervp, lowervp); + int docache = (lowervp != NULLVP || uppervp != NULLVP); + int lhash, uhash; + + /* + * Ensure locking is ordered from lower to higher + * to avoid deadlocks. + */ + if (nhash < ohash) { + lhash = nhash; + uhash = ohash; + } else { + lhash = ohash; + uhash = nhash; + } + + if (lhash != uhash) + while (union_list_lock(lhash)) + continue; + + while (union_list_lock(uhash)) + continue; + + if (ohash != nhash || !docache) { + if (un->un_flags & UN_CACHED) { + un->un_flags &= ~UN_CACHED; + LIST_REMOVE(un, un_cache); + } + } + + if (ohash != nhash) + union_list_unlock(ohash); + + if (un->un_lowervp != lowervp) { + if (un->un_lowervp) { + vrele(un->un_lowervp); + if (un->un_path) { + free(un->un_path, M_TEMP); + un->un_path = 0; + } + if (un->un_dirvp) { + vrele(un->un_dirvp); + un->un_dirvp = NULLVP; + } + } + un->un_lowervp = lowervp; + un->un_lowersz = VNOVAL; + } + + if (un->un_uppervp != uppervp) { + if (un->un_uppervp) + vrele(un->un_uppervp); + + un->un_uppervp = uppervp; + un->un_uppersz = VNOVAL; + } + + if (docache && (ohash != nhash)) { + LIST_INSERT_HEAD(&unhead[nhash], un, un_cache); + un->un_flags |= UN_CACHED; + } + + union_list_unlock(nhash); +} + +static void +union_newlower(un, lowervp) + struct union_node *un; + struct vnode *lowervp; +{ + + union_updatevp(un, un->un_uppervp, lowervp); +} + +static void +union_newupper(un, uppervp) + struct union_node *un; + struct vnode *uppervp; +{ + + union_updatevp(un, uppervp, un->un_lowervp); +} + +/* + * Keep track of size changes in the underlying vnodes. + * If the size changes, then callback to the vm layer + * giving priority to the upper layer size. + */ +void +union_newsize(vp, uppersz, lowersz) + struct vnode *vp; + off_t uppersz, lowersz; +{ + struct union_node *un; + off_t sz; + + /* only interested in regular files */ + if (vp->v_type != VREG) + return; + + un = VTOUNION(vp); + sz = VNOVAL; + + if ((uppersz != VNOVAL) && (un->un_uppersz != uppersz)) { + un->un_uppersz = uppersz; + if (sz == VNOVAL) + sz = un->un_uppersz; + } + + if ((lowersz != VNOVAL) && (un->un_lowersz != lowersz)) { + un->un_lowersz = lowersz; + if (sz == VNOVAL) + sz = un->un_lowersz; + } + + if (sz != VNOVAL) { +#ifdef UNION_DIAGNOSTIC + printf("union: %s size now %ld\n", + uppersz != VNOVAL ? "upper" : "lower", (long) sz); +#endif + vnode_pager_setsize(vp, sz); + } +} + +/* + * allocate a union_node/vnode pair. the vnode is + * referenced and locked. the new vnode is returned + * via (vpp). (mp) is the mountpoint of the union filesystem, + * (dvp) is the parent directory where the upper layer object + * should exist (but doesn't) and (cnp) is the componentname + * information which is partially copied to allow the upper + * layer object to be created at a later time. (uppervp) + * and (lowervp) reference the upper and lower layer objects + * being mapped. either, but not both, can be nil. + * if supplied, (uppervp) is locked. + * the reference is either maintained in the new union_node + * object which is allocated, or they are vrele'd. + * + * all union_nodes are maintained on a singly-linked + * list. new nodes are only allocated when they cannot + * be found on this list. entries on the list are + * removed when the vfs reclaim entry is called. + * + * a single lock is kept for the entire list. this is + * needed because the getnewvnode() function can block + * waiting for a vnode to become free, in which case there + * may be more than one process trying to get the same + * vnode. this lock is only taken if we are going to + * call getnewvnode, since the kernel itself is single-threaded. + * + * if an entry is found on the list, then call vget() to + * take a reference. this is done because there may be + * zero references to it and so it needs to removed from + * the vnode free list. + */ +int +union_allocvp(vpp, mp, undvp, dvp, cnp, uppervp, lowervp, docache) + struct vnode **vpp; + struct mount *mp; + struct vnode *undvp; /* parent union vnode */ + struct vnode *dvp; /* may be null */ + struct componentname *cnp; /* may be null */ + struct vnode *uppervp; /* may be null */ + struct vnode *lowervp; /* may be null */ + int docache; +{ + int error; + struct union_node *un = 0; + struct vnode *xlowervp = NULLVP; + struct union_mount *um = MOUNTTOUNIONMOUNT(mp); + int hash = 0; + int vflag; + int try; + + if (uppervp == NULLVP && lowervp == NULLVP) + panic("union: unidentifiable allocation"); + + if (uppervp && lowervp && (uppervp->v_type != lowervp->v_type)) { + xlowervp = lowervp; + lowervp = NULLVP; + } + + /* detect the root vnode (and aliases) */ + vflag = 0; + if ((uppervp == um->um_uppervp) && + ((lowervp == NULLVP) || lowervp == um->um_lowervp)) { + if (lowervp == NULLVP) { + lowervp = um->um_lowervp; + if (lowervp != NULLVP) + VREF(lowervp); + } + vflag = VROOT; + } + +loop: + if (!docache) { + un = 0; + } else for (try = 0; try < 3; try++) { + switch (try) { + case 0: + if (lowervp == NULLVP) + continue; + hash = UNION_HASH(uppervp, lowervp); + break; + + case 1: + if (uppervp == NULLVP) + continue; + hash = UNION_HASH(uppervp, NULLVP); + break; + + case 2: + if (lowervp == NULLVP) + continue; + hash = UNION_HASH(NULLVP, lowervp); + break; + } + + while (union_list_lock(hash)) + continue; + + for (un = unhead[hash].lh_first; un != 0; + un = un->un_cache.le_next) { + if ((un->un_lowervp == lowervp || + un->un_lowervp == NULLVP) && + (un->un_uppervp == uppervp || + un->un_uppervp == NULLVP) && + (UNIONTOV(un)->v_mount == mp)) { + if (vget(UNIONTOV(un), 0, + cnp ? cnp->cn_proc : NULL)) { + union_list_unlock(hash); + goto loop; + } + break; + } + } + + union_list_unlock(hash); + + if (un) + break; + } + + if (un) { + /* + * Obtain a lock on the union_node. + * uppervp is locked, though un->un_uppervp + * may not be. this doesn't break the locking + * hierarchy since in the case that un->un_uppervp + * is not yet locked it will be vrele'd and replaced + * with uppervp. + */ + + if ((dvp != NULLVP) && (uppervp == dvp)) { + /* + * Access ``.'', so (un) will already + * be locked. Since this process has + * the lock on (uppervp) no other + * process can hold the lock on (un). + */ +#ifdef DIAGNOSTIC + if ((un->un_flags & UN_LOCKED) == 0) + panic("union: . not locked"); + else if (curproc && un->un_pid != curproc->p_pid && + un->un_pid > -1 && curproc->p_pid > -1) + panic("union: allocvp not lock owner"); +#endif + } else { + if (un->un_flags & UN_LOCKED) { + vrele(UNIONTOV(un)); + un->un_flags |= UN_WANT; + (void) tsleep((caddr_t) &un->un_flags, PINOD, "unalvp", 0); + goto loop; + } + un->un_flags |= UN_LOCKED; + +#ifdef DIAGNOSTIC + if (curproc) + un->un_pid = curproc->p_pid; + else + un->un_pid = -1; +#endif + } + + /* + * At this point, the union_node is locked, + * un->un_uppervp may not be locked, and uppervp + * is locked or nil. + */ + + /* + * Save information about the upper layer. + */ + if (uppervp != un->un_uppervp) { + union_newupper(un, uppervp); + } else if (uppervp) { + vrele(uppervp); + } + + if (un->un_uppervp) { + un->un_flags |= UN_ULOCK; + un->un_flags &= ~UN_KLOCK; + } + + /* + * Save information about the lower layer. + * This needs to keep track of pathname + * and directory information which union_vn_create + * might need. + */ + if (lowervp != un->un_lowervp) { + union_newlower(un, lowervp); + if (cnp && (lowervp != NULLVP)) { + un->un_hash = cnp->cn_hash; + un->un_path = malloc(cnp->cn_namelen+1, + M_TEMP, M_WAITOK); + bcopy(cnp->cn_nameptr, un->un_path, + cnp->cn_namelen); + un->un_path[cnp->cn_namelen] = '\0'; + VREF(dvp); + un->un_dirvp = dvp; + } + } else if (lowervp) { + vrele(lowervp); + } + *vpp = UNIONTOV(un); + return (0); + } + + if (docache) { + /* + * otherwise lock the vp list while we call getnewvnode + * since that can block. + */ + hash = UNION_HASH(uppervp, lowervp); + + if (union_list_lock(hash)) + goto loop; + } + + error = getnewvnode(VT_UNION, mp, union_vnodeop_p, vpp); + if (error) { + if (uppervp) { + if (dvp == uppervp) + vrele(uppervp); + else + vput(uppervp); + } + if (lowervp) + vrele(lowervp); + + goto out; + } + + MALLOC((*vpp)->v_data, void *, sizeof(struct union_node), + M_TEMP, M_WAITOK); + + (*vpp)->v_flag |= vflag; + if (uppervp) + (*vpp)->v_type = uppervp->v_type; + else + (*vpp)->v_type = lowervp->v_type; + un = VTOUNION(*vpp); + un->un_vnode = *vpp; + un->un_uppervp = uppervp; + un->un_uppersz = VNOVAL; + un->un_lowervp = lowervp; + un->un_lowersz = VNOVAL; + un->un_pvp = undvp; + if (undvp != NULLVP) + VREF(undvp); + un->un_dircache = 0; + un->un_openl = 0; + un->un_flags = UN_LOCKED; + if (un->un_uppervp) + un->un_flags |= UN_ULOCK; +#ifdef DIAGNOSTIC + if (curproc) + un->un_pid = curproc->p_pid; + else + un->un_pid = -1; +#endif + if (cnp && (lowervp != NULLVP)) { + un->un_hash = cnp->cn_hash; + un->un_path = malloc(cnp->cn_namelen+1, M_TEMP, M_WAITOK); + bcopy(cnp->cn_nameptr, un->un_path, cnp->cn_namelen); + un->un_path[cnp->cn_namelen] = '\0'; + VREF(dvp); + un->un_dirvp = dvp; + } else { + un->un_hash = 0; + un->un_path = 0; + un->un_dirvp = 0; + } + + if (docache) { + LIST_INSERT_HEAD(&unhead[hash], un, un_cache); + un->un_flags |= UN_CACHED; + } + + if (xlowervp) + vrele(xlowervp); + +out: + if (docache) + union_list_unlock(hash); + + return (error); +} + +int +union_freevp(vp) + struct vnode *vp; +{ + struct union_node *un = VTOUNION(vp); + + if (un->un_flags & UN_CACHED) { + un->un_flags &= ~UN_CACHED; + LIST_REMOVE(un, un_cache); + } + + if (un->un_pvp != NULLVP) + vrele(un->un_pvp); + if (un->un_uppervp != NULLVP) + vrele(un->un_uppervp); + if (un->un_lowervp != NULLVP) + vrele(un->un_lowervp); + if (un->un_dirvp != NULLVP) + vrele(un->un_dirvp); + if (un->un_path) + free(un->un_path, M_TEMP); + + FREE(vp->v_data, M_TEMP); + vp->v_data = 0; + + return (0); +} + +/* + * copyfile. copy the vnode (fvp) to the vnode (tvp) + * using a sequence of reads and writes. both (fvp) + * and (tvp) are locked on entry and exit. + */ +static int +union_copyfile(fvp, tvp, cred, p) + struct vnode *fvp; + struct vnode *tvp; + struct ucred *cred; + struct proc *p; +{ + char *buf; + struct uio uio; + struct iovec iov; + int error = 0; + + /* + * strategy: + * allocate a buffer of size MAXBSIZE. + * loop doing reads and writes, keeping track + * of the current uio offset. + * give up at the first sign of trouble. + */ + + uio.uio_procp = p; + uio.uio_segflg = UIO_SYSSPACE; + uio.uio_offset = 0; + + VOP_UNLOCK(fvp, 0, p); /* XXX */ + VOP_LEASE(fvp, p, cred, LEASE_READ); + vn_lock(fvp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */ + VOP_UNLOCK(tvp, 0, p); /* XXX */ + VOP_LEASE(tvp, p, cred, LEASE_WRITE); + vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); /* XXX */ + + buf = malloc(MAXBSIZE, M_TEMP, M_WAITOK); + + /* ugly loop follows... */ + do { + off_t offset = uio.uio_offset; + + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + iov.iov_base = buf; + iov.iov_len = MAXBSIZE; + uio.uio_resid = iov.iov_len; + uio.uio_rw = UIO_READ; + error = VOP_READ(fvp, &uio, 0, cred); + + if (error == 0) { + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + iov.iov_base = buf; + iov.iov_len = MAXBSIZE - uio.uio_resid; + uio.uio_offset = offset; + uio.uio_rw = UIO_WRITE; + uio.uio_resid = iov.iov_len; + + if (uio.uio_resid == 0) + break; + + do { + error = VOP_WRITE(tvp, &uio, 0, cred); + } while ((uio.uio_resid > 0) && (error == 0)); + } + + } while (error == 0); + + free(buf, M_TEMP); + return (error); +} + +/* + * (un) is assumed to be locked on entry and remains + * locked on exit. + */ +int +union_copyup(un, docopy, cred, p) + struct union_node *un; + int docopy; + struct ucred *cred; + struct proc *p; +{ + int error; + struct vnode *lvp, *uvp; + + /* + * If the user does not have read permission, the vnode should not + * be copied to upper layer. + */ + vn_lock(un->un_lowervp, LK_EXCLUSIVE | LK_RETRY, p); + error = VOP_ACCESS(un->un_lowervp, VREAD, cred, p); + VOP_UNLOCK(un->un_lowervp, 0, p); + if (error) + return (error); + + error = union_vn_create(&uvp, un, p); + if (error) + return (error); + + /* at this point, uppervp is locked */ + union_newupper(un, uvp); + un->un_flags |= UN_ULOCK; + + lvp = un->un_lowervp; + + if (docopy) { + /* + * XX - should not ignore errors + * from VOP_CLOSE + */ + vn_lock(lvp, LK_EXCLUSIVE | LK_RETRY, p); + error = VOP_OPEN(lvp, FREAD, cred, p); + if (error == 0) { + error = union_copyfile(lvp, uvp, cred, p); + VOP_UNLOCK(lvp, 0, p); + (void) VOP_CLOSE(lvp, FREAD, cred, p); + } +#ifdef UNION_DIAGNOSTIC + if (error == 0) + uprintf("union: copied up %s\n", un->un_path); +#endif + + } + un->un_flags &= ~UN_ULOCK; + VOP_UNLOCK(uvp, 0, p); + union_vn_close(uvp, FWRITE, cred, p); + vn_lock(uvp, LK_EXCLUSIVE | LK_RETRY, p); + un->un_flags |= UN_ULOCK; + + /* + * Subsequent IOs will go to the top layer, so + * call close on the lower vnode and open on the + * upper vnode to ensure that the filesystem keeps + * its references counts right. This doesn't do + * the right thing with (cred) and (FREAD) though. + * Ignoring error returns is not right, either. + */ + if (error == 0) { + int i; + + for (i = 0; i < un->un_openl; i++) { + (void) VOP_CLOSE(lvp, FREAD, cred, p); + (void) VOP_OPEN(uvp, FREAD, cred, p); + } + un->un_openl = 0; + } + + return (error); + +} + +static int +union_relookup(um, dvp, vpp, cnp, cn, path, pathlen) + struct union_mount *um; + struct vnode *dvp; + struct vnode **vpp; + struct componentname *cnp; + struct componentname *cn; + char *path; + int pathlen; +{ + int error; + + /* + * A new componentname structure must be faked up because + * there is no way to know where the upper level cnp came + * from or what it is being used for. This must duplicate + * some of the work done by NDINIT, some of the work done + * by namei, some of the work done by lookup and some of + * the work done by VOP_LOOKUP when given a CREATE flag. + * Conclusion: Horrible. + * + * The pathname buffer will be FREEed by VOP_MKDIR. + */ + cn->cn_namelen = pathlen; + cn->cn_pnbuf = zalloc(namei_zone); + bcopy(path, cn->cn_pnbuf, cn->cn_namelen); + cn->cn_pnbuf[cn->cn_namelen] = '\0'; + + cn->cn_nameiop = CREATE; + cn->cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN); + cn->cn_proc = cnp->cn_proc; + if (um->um_op == UNMNT_ABOVE) + cn->cn_cred = cnp->cn_cred; + else + cn->cn_cred = um->um_cred; + cn->cn_nameptr = cn->cn_pnbuf; + cn->cn_hash = cnp->cn_hash; + cn->cn_consume = cnp->cn_consume; + + VREF(dvp); + error = relookup(dvp, vpp, cn); + if (!error) + vrele(dvp); + else { + zfree(namei_zone, cn->cn_pnbuf); + cn->cn_pnbuf = NULL; + } + + return (error); +} + +/* + * Create a shadow directory in the upper layer. + * The new vnode is returned locked. + * + * (um) points to the union mount structure for access to the + * the mounting process's credentials. + * (dvp) is the directory in which to create the shadow directory. + * it is unlocked on entry and exit. + * (cnp) is the componentname to be created. + * (vpp) is the returned newly created shadow directory, which + * is returned locked. + */ +int +union_mkshadow(um, dvp, cnp, vpp) + struct union_mount *um; + struct vnode *dvp; + struct componentname *cnp; + struct vnode **vpp; +{ + int error; + struct vattr va; + struct proc *p = cnp->cn_proc; + struct componentname cn; + + error = union_relookup(um, dvp, vpp, cnp, &cn, + cnp->cn_nameptr, cnp->cn_namelen); + if (error) + return (error); + + if (*vpp) { + VOP_ABORTOP(dvp, &cn); + VOP_UNLOCK(dvp, 0, p); + vrele(*vpp); + *vpp = NULLVP; + return (EEXIST); + } + + /* + * policy: when creating the shadow directory in the + * upper layer, create it owned by the user who did + * the mount, group from parent directory, and mode + * 777 modified by umask (ie mostly identical to the + * mkdir syscall). (jsp, kb) + */ + + VATTR_NULL(&va); + va.va_type = VDIR; + va.va_mode = um->um_cmode; + + /* VOP_LEASE: dvp is locked */ + VOP_LEASE(dvp, p, cn.cn_cred, LEASE_WRITE); + + error = VOP_MKDIR(dvp, vpp, &cn, &va); + vput(dvp); + return (error); +} + +/* + * Create a whiteout entry in the upper layer. + * + * (um) points to the union mount structure for access to the + * the mounting process's credentials. + * (dvp) is the directory in which to create the whiteout. + * it is locked on entry and exit. + * (cnp) is the componentname to be created. + */ +int +union_mkwhiteout(um, dvp, cnp, path) + struct union_mount *um; + struct vnode *dvp; + struct componentname *cnp; + char *path; +{ + int error; + struct proc *p = cnp->cn_proc; + struct vnode *wvp; + struct componentname cn; + + VOP_UNLOCK(dvp, 0, p); + error = union_relookup(um, dvp, &wvp, cnp, &cn, path, strlen(path)); + if (error) { + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); + return (error); + } + + if (wvp) { + VOP_ABORTOP(dvp, &cn); + vrele(dvp); + vrele(wvp); + return (EEXIST); + } + + /* VOP_LEASE: dvp is locked */ + VOP_LEASE(dvp, p, p->p_ucred, LEASE_WRITE); + + error = VOP_WHITEOUT(dvp, &cn, CREATE); + if (error) + VOP_ABORTOP(dvp, &cn); + + vrele(dvp); + + return (error); +} + +/* + * union_vn_create: creates and opens a new shadow file + * on the upper union layer. this function is similar + * in spirit to calling vn_open but it avoids calling namei(). + * the problem with calling namei is that a) it locks too many + * things, and b) it doesn't start at the "right" directory, + * whereas relookup is told where to start. + */ +static int +union_vn_create(vpp, un, p) + struct vnode **vpp; + struct union_node *un; + struct proc *p; +{ + struct vnode *vp; + struct ucred *cred = p->p_ucred; + struct vattr vat; + struct vattr *vap = &vat; + int fmode = FFLAGS(O_WRONLY|O_CREAT|O_TRUNC|O_EXCL); + int error; + int cmode = UN_FILEMODE & ~p->p_fd->fd_cmask; + struct componentname cn; + + *vpp = NULLVP; + + /* + * Build a new componentname structure (for the same + * reasons outlines in union_mkshadow). + * The difference here is that the file is owned by + * the current user, rather than by the person who + * did the mount, since the current user needs to be + * able to write the file (that's why it is being + * copied in the first place). + */ + cn.cn_namelen = strlen(un->un_path); + cn.cn_pnbuf = zalloc(namei_zone); + bcopy(un->un_path, cn.cn_pnbuf, cn.cn_namelen+1); + cn.cn_nameiop = CREATE; + cn.cn_flags = (LOCKPARENT|HASBUF|SAVENAME|SAVESTART|ISLASTCN); + cn.cn_proc = p; + cn.cn_cred = p->p_ucred; + cn.cn_nameptr = cn.cn_pnbuf; + cn.cn_hash = un->un_hash; + cn.cn_consume = 0; + + VREF(un->un_dirvp); + error = relookup(un->un_dirvp, &vp, &cn); + if (error) + return (error); + vrele(un->un_dirvp); + + if (vp) { + VOP_ABORTOP(un->un_dirvp, &cn); + if (un->un_dirvp == vp) + vrele(un->un_dirvp); + else + vput(un->un_dirvp); + vrele(vp); + return (EEXIST); + } + + /* + * Good - there was no race to create the file + * so go ahead and create it. The permissions + * on the file will be 0666 modified by the + * current user's umask. Access to the file, while + * it is unioned, will require access to the top *and* + * bottom files. Access when not unioned will simply + * require access to the top-level file. + * TODO: confirm choice of access permissions. + */ + VATTR_NULL(vap); + vap->va_type = VREG; + vap->va_mode = cmode; + VOP_LEASE(un->un_dirvp, p, cred, LEASE_WRITE); + error = VOP_CREATE(un->un_dirvp, &vp, &cn, vap); + vput(un->un_dirvp); + if (error) + return (error); + + error = VOP_OPEN(vp, fmode, cred, p); + if (error) { + vput(vp); + return (error); + } + + vp->v_writecount++; + *vpp = vp; + return (0); +} + +static int +union_vn_close(vp, fmode, cred, p) + struct vnode *vp; + int fmode; + struct ucred *cred; + struct proc *p; +{ + + if (fmode & FWRITE) + --vp->v_writecount; + return (VOP_CLOSE(vp, fmode, cred, p)); +} + +void +union_removed_upper(un) + struct union_node *un; +{ + struct proc *p = curproc; /* XXX */ + struct vnode **vpp; + + /* + * Do not set the uppervp to NULLVP. If lowervp is NULLVP, + * union node will have neither uppervp nor lowervp. We remove + * the union node from cache, so that it will not be referrenced. + */ +#if 0 + union_newupper(un, NULLVP); +#endif + if (un->un_dircache != 0) { + for (vpp = un->un_dircache; *vpp != NULLVP; vpp++) + vrele(*vpp); + free(un->un_dircache, M_TEMP); + un->un_dircache = 0; + } + + if (un->un_flags & UN_CACHED) { + un->un_flags &= ~UN_CACHED; + LIST_REMOVE(un, un_cache); + } + + if (un->un_flags & UN_ULOCK) { + un->un_flags &= ~UN_ULOCK; + VOP_UNLOCK(un->un_uppervp, 0, p); + } +} + +#if 0 +struct vnode * +union_lowervp(vp) + struct vnode *vp; +{ + struct union_node *un = VTOUNION(vp); + + if ((un->un_lowervp != NULLVP) && + (vp->v_type == un->un_lowervp->v_type)) { + if (vget(un->un_lowervp, 0) == 0) + return (un->un_lowervp); + } + + return (NULLVP); +} +#endif + +/* + * determine whether a whiteout is needed + * during a remove/rmdir operation. + */ +int +union_dowhiteout(un, cred, p) + struct union_node *un; + struct ucred *cred; + struct proc *p; +{ + struct vattr va; + + if (un->un_lowervp != NULLVP) + return (1); + + if (VOP_GETATTR(un->un_uppervp, &va, cred, p) == 0 && + (va.va_flags & OPAQUE)) + return (1); + + return (0); +} + +static void +union_dircache_r(vp, vppp, cntp) + struct vnode *vp; + struct vnode ***vppp; + int *cntp; +{ + struct union_node *un; + + if (vp->v_op != union_vnodeop_p) { + if (vppp) { + VREF(vp); + *(*vppp)++ = vp; + if (--(*cntp) == 0) + panic("union: dircache table too small"); + } else { + (*cntp)++; + } + + return; + } + + un = VTOUNION(vp); + if (un->un_uppervp != NULLVP) + union_dircache_r(un->un_uppervp, vppp, cntp); + if (un->un_lowervp != NULLVP) + union_dircache_r(un->un_lowervp, vppp, cntp); +} + +struct vnode * +union_dircache(vp, p) + struct vnode *vp; + struct proc *p; +{ + int cnt; + struct vnode *nvp; + struct vnode **vpp; + struct vnode **dircache; + struct union_node *un; + int error; + + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + dircache = VTOUNION(vp)->un_dircache; + + nvp = NULLVP; + + if (dircache == 0) { + cnt = 0; + union_dircache_r(vp, 0, &cnt); + cnt++; + dircache = (struct vnode **) + malloc(cnt * sizeof(struct vnode *), + M_TEMP, M_WAITOK); + vpp = dircache; + union_dircache_r(vp, &vpp, &cnt); + *vpp = NULLVP; + vpp = dircache + 1; + } else { + vpp = dircache; + do { + if (*vpp++ == VTOUNION(vp)->un_uppervp) + break; + } while (*vpp != NULLVP); + } + + if (*vpp == NULLVP) + goto out; + + vn_lock(*vpp, LK_EXCLUSIVE | LK_RETRY, p); + VREF(*vpp); + error = union_allocvp(&nvp, vp->v_mount, NULLVP, NULLVP, 0, *vpp, NULLVP, 0); + if (error) + goto out; + + VTOUNION(vp)->un_dircache = 0; + un = VTOUNION(nvp); + un->un_dircache = dircache; + +out: + VOP_UNLOCK(vp, 0, p); + return (nvp); +} + +/* + * Module glue to remove #ifdef UNION from vfs_syscalls.c + */ +static int +union_dircheck(struct proc *p, struct vnode **vp, struct file *fp) +{ + int error = 0; + + if ((*vp)->v_op == union_vnodeop_p) { + struct vnode *lvp; + + lvp = union_dircache(*vp, p); + if (lvp != NULLVP) { + struct vattr va; + + /* + * If the directory is opaque, + * then don't show lower entries + */ + error = VOP_GETATTR(*vp, &va, fp->f_cred, p); + if (va.va_flags & OPAQUE) { + vput(lvp); + lvp = NULL; + } + } + + if (lvp != NULLVP) { + error = VOP_OPEN(lvp, FREAD, fp->f_cred, p); + if (error) { + vput(lvp); + return (error); + } + VOP_UNLOCK(lvp, 0, p); + fp->f_data = (caddr_t) lvp; + fp->f_offset = 0; + error = vn_close(*vp, FREAD, fp->f_cred, p); + if (error) + return (error); + *vp = lvp; + return -1; /* goto unionread */ + } + } + if (((*vp)->v_flag & VROOT) && ((*vp)->v_mount->mnt_flag & MNT_UNION)) { + struct vnode *tvp = *vp; + *vp = (*vp)->v_mount->mnt_vnodecovered; + VREF(*vp); + fp->f_data = (caddr_t) *vp; + fp->f_offset = 0; + vrele(tvp); + return -1; /* goto unionread */ + } + return error; +} + +static int +union_modevent(module_t mod, int type, void *data) +{ + switch (type) { + case MOD_LOAD: + union_dircheckp = union_dircheck; + break; + case MOD_UNLOAD: + union_dircheckp = NULL; + break; + default: + break; + } + return 0; +} +static moduledata_t union_mod = { + "union_dircheck", + union_modevent, + NULL +}; +DECLARE_MODULE(union_dircheck, union_mod, SI_SUB_VFS, SI_ORDER_ANY); diff --git a/sys/fs/unionfs/union_vfsops.c b/sys/fs/unionfs/union_vfsops.c new file mode 100644 index 0000000..db4d4d3 --- /dev/null +++ b/sys/fs/unionfs/union_vfsops.c @@ -0,0 +1,555 @@ +/* + * Copyright (c) 1994, 1995 The Regents of the University of California. + * Copyright (c) 1994, 1995 Jan-Simon Pendry. + * All rights reserved. + * + * This code is derived from software donated to Berkeley by + * Jan-Simon Pendry. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)union_vfsops.c 8.20 (Berkeley) 5/20/95 + * $Id: union_vfsops.c,v 1.30 1998/09/07 13:17:02 bde Exp $ + */ + +/* + * Union Layer + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/proc.h> +#include <sys/vnode.h> +#include <sys/mount.h> +#include <sys/namei.h> +#include <sys/malloc.h> +#include <sys/filedesc.h> +#include <miscfs/union/union.h> + +static MALLOC_DEFINE(M_UNIONFSMNT, "UNION mount", "UNION mount structure"); + +extern int union_init __P((struct vfsconf *)); + +extern int union_fhtovp __P((struct mount *mp, struct fid *fidp, + struct mbuf *nam, struct vnode **vpp, + int *exflagsp, struct ucred **credanonp)); +static int union_mount __P((struct mount *mp, char *path, caddr_t data, + struct nameidata *ndp, struct proc *p)); +extern int union_quotactl __P((struct mount *mp, int cmd, uid_t uid, + caddr_t arg, struct proc *p)); +static int union_root __P((struct mount *mp, struct vnode **vpp)); +static int union_start __P((struct mount *mp, int flags, struct proc *p)); +static int union_statfs __P((struct mount *mp, struct statfs *sbp, + struct proc *p)); +extern int union_sync __P((struct mount *mp, int waitfor, + struct ucred *cred, struct proc *p)); +static int union_unmount __P((struct mount *mp, int mntflags, + struct proc *p)); +extern int union_vget __P((struct mount *mp, ino_t ino, + struct vnode **vpp)); +extern int union_vptofh __P((struct vnode *vp, struct fid *fhp)); + +/* + * Mount union filesystem + */ +static int +union_mount(mp, path, data, ndp, p) + struct mount *mp; + char *path; + caddr_t data; + struct nameidata *ndp; + struct proc *p; +{ + int error = 0; + struct union_args args; + struct vnode *lowerrootvp = NULLVP; + struct vnode *upperrootvp = NULLVP; + struct union_mount *um = 0; + struct ucred *cred = 0; + char *cp = 0; + int len; + u_int size; + +#ifdef UNION_DIAGNOSTIC + printf("union_mount(mp = %x)\n", mp); +#endif + + /* + * Disable clustered write, otherwise system becomes unstable. + */ + mp->mnt_flag |= MNT_NOCLUSTERW; + + /* + * Update is a no-op + */ + if (mp->mnt_flag & MNT_UPDATE) { + /* + * Need to provide. + * 1. a way to convert between rdonly and rdwr mounts. + * 2. support for nfs exports. + */ + error = EOPNOTSUPP; + goto bad; + } + + /* + * Get argument + */ + error = copyin(data, (caddr_t)&args, sizeof(struct union_args)); + if (error) + goto bad; + + lowerrootvp = mp->mnt_vnodecovered; + VREF(lowerrootvp); + + /* + * Unlock lower node to avoid deadlock. + */ + if (lowerrootvp->v_op == union_vnodeop_p) + VOP_UNLOCK(lowerrootvp, 0, p); + + /* + * Find upper node. + */ + NDINIT(ndp, LOOKUP, FOLLOW|WANTPARENT, + UIO_USERSPACE, args.target, p); + + error = namei(ndp); + if (lowerrootvp->v_op == union_vnodeop_p) + vn_lock(lowerrootvp, LK_EXCLUSIVE | LK_RETRY, p); + if (error) + goto bad; + + upperrootvp = ndp->ni_vp; + vrele(ndp->ni_dvp); + ndp->ni_dvp = NULL; + + /* + * Check multi union mount to avoid `lock myself again' panic. + */ + if (upperrootvp == VTOUNION(lowerrootvp)->un_uppervp) { +#ifdef DIAGNOSTIC + printf("union_mount: multi union mount?\n"); +#endif + error = EDEADLK; + goto bad; + } + + if (upperrootvp->v_type != VDIR) { + error = EINVAL; + goto bad; + } + + um = (struct union_mount *) malloc(sizeof(struct union_mount), + M_UNIONFSMNT, M_WAITOK); /* XXX */ + + /* + * Keep a held reference to the target vnodes. + * They are vrele'd in union_unmount. + * + * Depending on the _BELOW flag, the filesystems are + * viewed in a different order. In effect, this is the + * same as providing a mount under option to the mount syscall. + */ + + um->um_op = args.mntflags & UNMNT_OPMASK; + switch (um->um_op) { + case UNMNT_ABOVE: + um->um_lowervp = lowerrootvp; + um->um_uppervp = upperrootvp; + break; + + case UNMNT_BELOW: + um->um_lowervp = upperrootvp; + um->um_uppervp = lowerrootvp; + break; + + case UNMNT_REPLACE: + vrele(lowerrootvp); + lowerrootvp = NULLVP; + um->um_uppervp = upperrootvp; + um->um_lowervp = lowerrootvp; + break; + + default: + error = EINVAL; + goto bad; + } + + /* + * Unless the mount is readonly, ensure that the top layer + * supports whiteout operations + */ + if ((mp->mnt_flag & MNT_RDONLY) == 0) { + error = VOP_WHITEOUT(um->um_uppervp, (struct componentname *) 0, LOOKUP); + if (error) + goto bad; + } + + um->um_cred = p->p_ucred; + crhold(um->um_cred); + um->um_cmode = UN_DIRMODE &~ p->p_fd->fd_cmask; + + /* + * Depending on what you think the MNT_LOCAL flag might mean, + * you may want the && to be || on the conditional below. + * At the moment it has been defined that the filesystem is + * only local if it is all local, ie the MNT_LOCAL flag implies + * that the entire namespace is local. If you think the MNT_LOCAL + * flag implies that some of the files might be stored locally + * then you will want to change the conditional. + */ + if (um->um_op == UNMNT_ABOVE) { + if (((um->um_lowervp == NULLVP) || + (um->um_lowervp->v_mount->mnt_flag & MNT_LOCAL)) && + (um->um_uppervp->v_mount->mnt_flag & MNT_LOCAL)) + mp->mnt_flag |= MNT_LOCAL; + } + + /* + * Copy in the upper layer's RDONLY flag. This is for the benefit + * of lookup() which explicitly checks the flag, rather than asking + * the filesystem for its own opinion. This means, that an update + * mount of the underlying filesystem to go from rdonly to rdwr + * will leave the unioned view as read-only. + */ + mp->mnt_flag |= (um->um_uppervp->v_mount->mnt_flag & MNT_RDONLY); + + mp->mnt_data = (qaddr_t) um; + vfs_getnewfsid(mp); + + (void) copyinstr(path, mp->mnt_stat.f_mntonname, MNAMELEN - 1, &size); + bzero(mp->mnt_stat.f_mntonname + size, MNAMELEN - size); + + switch (um->um_op) { + case UNMNT_ABOVE: + cp = "<above>:"; + break; + case UNMNT_BELOW: + cp = "<below>:"; + break; + case UNMNT_REPLACE: + cp = ""; + break; + } + len = strlen(cp); + bcopy(cp, mp->mnt_stat.f_mntfromname, len); + + cp = mp->mnt_stat.f_mntfromname + len; + len = MNAMELEN - len; + + (void) copyinstr(args.target, cp, len - 1, &size); + bzero(cp + size, len - size); + + (void)union_statfs(mp, &mp->mnt_stat, p); + +#ifdef UNION_DIAGNOSTIC + printf("union_mount: from %s, on %s\n", + mp->mnt_stat.f_mntfromname, mp->mnt_stat.f_mntonname); +#endif + return (0); + +bad: + if (um) + free(um, M_UNIONFSMNT); + if (cred) + crfree(cred); + if (upperrootvp) + vrele(upperrootvp); + if (lowerrootvp) + vrele(lowerrootvp); + return (error); +} + +/* + * VFS start. Nothing needed here - the start routine + * on the underlying filesystem(s) will have been called + * when that filesystem was mounted. + */ +static int +union_start(mp, flags, p) + struct mount *mp; + int flags; + struct proc *p; +{ + + return (0); +} + +/* + * Free reference to union layer + */ +static int +union_unmount(mp, mntflags, p) + struct mount *mp; + int mntflags; + struct proc *p; +{ + struct union_mount *um = MOUNTTOUNIONMOUNT(mp); + struct vnode *um_rootvp; + int error; + int freeing; + int flags = 0; + +#ifdef UNION_DIAGNOSTIC + printf("union_unmount(mp = %x)\n", mp); +#endif + + if (mntflags & MNT_FORCE) + flags |= FORCECLOSE; + + if ((error = union_root(mp, &um_rootvp)) != 0) + return (error); + + /* + * Keep flushing vnodes from the mount list. + * This is needed because of the un_pvp held + * reference to the parent vnode. + * If more vnodes have been freed on a given pass, + * the try again. The loop will iterate at most + * (d) times, where (d) is the maximum tree depth + * in the filesystem. + */ + for (freeing = 0; vflush(mp, um_rootvp, flags) != 0;) { + struct vnode *vp; + int n; + + /* count #vnodes held on mount list */ + for (n = 0, vp = mp->mnt_vnodelist.lh_first; + vp != NULLVP; + vp = vp->v_mntvnodes.le_next) + n++; + + /* if this is unchanged then stop */ + if (n == freeing) + break; + + /* otherwise try once more time */ + freeing = n; + } + + /* At this point the root vnode should have a single reference */ + if (um_rootvp->v_usecount > 1) { + vput(um_rootvp); + return (EBUSY); + } + +#ifdef UNION_DIAGNOSTIC + vprint("union root", um_rootvp); +#endif + /* + * Discard references to upper and lower target vnodes. + */ + if (um->um_lowervp) + vrele(um->um_lowervp); + vrele(um->um_uppervp); + crfree(um->um_cred); + /* + * Release reference on underlying root vnode + */ + vput(um_rootvp); + /* + * And blow it away for future re-use + */ + vgone(um_rootvp); + /* + * Finally, throw away the union_mount structure + */ + free(mp->mnt_data, M_UNIONFSMNT); /* XXX */ + mp->mnt_data = 0; + return (0); +} + +static int +union_root(mp, vpp) + struct mount *mp; + struct vnode **vpp; +{ + struct proc *p = curproc; /* XXX */ + struct union_mount *um = MOUNTTOUNIONMOUNT(mp); + int error; + int loselock; + int lockadj = 0; + + if (um->um_lowervp && um->um_op != UNMNT_BELOW && + VOP_ISLOCKED(um->um_lowervp)) { + VREF(um->um_lowervp); + VOP_UNLOCK(um->um_lowervp, 0, p); + lockadj = 1; + } + + /* + * Return locked reference to root. + */ + VREF(um->um_uppervp); + if ((um->um_op == UNMNT_BELOW) && + VOP_ISLOCKED(um->um_uppervp)) { + loselock = 1; + } else { + vn_lock(um->um_uppervp, LK_EXCLUSIVE | LK_RETRY, p); + loselock = 0; + } + if (um->um_lowervp) + VREF(um->um_lowervp); + error = union_allocvp(vpp, mp, + (struct vnode *) 0, + (struct vnode *) 0, + (struct componentname *) 0, + um->um_uppervp, + um->um_lowervp, + 1); + + if (error) { + if (loselock) + vrele(um->um_uppervp); + else + vput(um->um_uppervp); + if (um->um_lowervp) + vrele(um->um_lowervp); + } else { + if (loselock) + VTOUNION(*vpp)->un_flags &= ~UN_ULOCK; + } + if (lockadj) { + vn_lock(um->um_lowervp, LK_EXCLUSIVE | LK_RETRY, p); + vrele(um->um_lowervp); + } + + return (error); +} + +static int +union_statfs(mp, sbp, p) + struct mount *mp; + struct statfs *sbp; + struct proc *p; +{ + int error; + struct union_mount *um = MOUNTTOUNIONMOUNT(mp); + struct statfs mstat; + int lbsize; + +#ifdef UNION_DIAGNOSTIC + printf("union_statfs(mp = %x, lvp = %x, uvp = %x)\n", mp, + um->um_lowervp, + um->um_uppervp); +#endif + + bzero(&mstat, sizeof(mstat)); + + if (um->um_lowervp) { + error = VFS_STATFS(um->um_lowervp->v_mount, &mstat, p); + if (error) + return (error); + } + + /* now copy across the "interesting" information and fake the rest */ +#if 0 + sbp->f_type = mstat.f_type; + sbp->f_flags = mstat.f_flags; + sbp->f_bsize = mstat.f_bsize; + sbp->f_iosize = mstat.f_iosize; +#endif + lbsize = mstat.f_bsize; + sbp->f_blocks = mstat.f_blocks; + sbp->f_bfree = mstat.f_bfree; + sbp->f_bavail = mstat.f_bavail; + sbp->f_files = mstat.f_files; + sbp->f_ffree = mstat.f_ffree; + + error = VFS_STATFS(um->um_uppervp->v_mount, &mstat, p); + if (error) + return (error); + + sbp->f_flags = mstat.f_flags; + sbp->f_bsize = mstat.f_bsize; + sbp->f_iosize = mstat.f_iosize; + + /* + * if the lower and upper blocksizes differ, then frig the + * block counts so that the sizes reported by df make some + * kind of sense. none of this makes sense though. + */ + + if (mstat.f_bsize != lbsize) + sbp->f_blocks = ((off_t) sbp->f_blocks * lbsize) / mstat.f_bsize; + + /* + * The "total" fields count total resources in all layers, + * the "free" fields count only those resources which are + * free in the upper layer (since only the upper layer + * is writeable). + */ + sbp->f_blocks += mstat.f_blocks; + sbp->f_bfree = mstat.f_bfree; + sbp->f_bavail = mstat.f_bavail; + sbp->f_files += mstat.f_files; + sbp->f_ffree = mstat.f_ffree; + + if (sbp != &mp->mnt_stat) { + sbp->f_type = mp->mnt_vfc->vfc_typenum; + bcopy(&mp->mnt_stat.f_fsid, &sbp->f_fsid, sizeof(sbp->f_fsid)); + bcopy(mp->mnt_stat.f_mntonname, sbp->f_mntonname, MNAMELEN); + bcopy(mp->mnt_stat.f_mntfromname, sbp->f_mntfromname, MNAMELEN); + } + return (0); +} + +/* + * XXX - Assumes no data cached at union layer. + */ +#define union_sync ((int (*) __P((struct mount *, int, struct ucred *, \ + struct proc *)))nullop) + +#define union_fhtovp ((int (*) __P((struct mount *, struct fid *, \ + struct sockaddr *, struct vnode **, int *, struct ucred **)))eopnotsupp) +#define union_quotactl ((int (*) __P((struct mount *, int, uid_t, caddr_t, \ + struct proc *)))eopnotsupp) +#define union_sysctl ((int (*) __P((int *, u_int, void *, size_t *, void *, \ + size_t, struct proc *)))eopnotsupp) +#define union_vget ((int (*) __P((struct mount *, ino_t, struct vnode **))) \ + eopnotsupp) +#define union_vptofh ((int (*) __P((struct vnode *, struct fid *)))eopnotsupp) + +static struct vfsops union_vfsops = { + union_mount, + union_start, + union_unmount, + union_root, + union_quotactl, + union_statfs, + union_sync, + union_vget, + union_fhtovp, + union_vptofh, + union_init, +}; + +VFS_SET(union_vfsops, union, VFCF_LOOPBACK); diff --git a/sys/fs/unionfs/union_vnops.c b/sys/fs/unionfs/union_vnops.c new file mode 100644 index 0000000..ba9b2a3 --- /dev/null +++ b/sys/fs/unionfs/union_vnops.c @@ -0,0 +1,1804 @@ +/* + * Copyright (c) 1992, 1993, 1994, 1995 Jan-Simon Pendry. + * Copyright (c) 1992, 1993, 1994, 1995 + * The Regents of the University of California. All rights reserved. + * + * This code is derived from software contributed to Berkeley by + * Jan-Simon Pendry. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)union_vnops.c 8.32 (Berkeley) 6/23/95 + * $Id: union_vnops.c,v 1.59 1998/12/14 05:00:59 dillon Exp $ + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/fcntl.h> +#include <sys/stat.h> +#include <sys/kernel.h> +#include <sys/vnode.h> +#include <sys/mount.h> +#include <sys/namei.h> +#include <sys/malloc.h> +#include <sys/buf.h> +#include <sys/lock.h> +#include <miscfs/union/union.h> + +#define FIXUP(un, p) { \ + if (((un)->un_flags & UN_ULOCK) == 0) { \ + union_fixup(un, p); \ + } \ +} + +static int union_abortop __P((struct vop_abortop_args *ap)); +static int union_access __P((struct vop_access_args *ap)); +static int union_advlock __P((struct vop_advlock_args *ap)); +static int union_bmap __P((struct vop_bmap_args *ap)); +static int union_close __P((struct vop_close_args *ap)); +static int union_create __P((struct vop_create_args *ap)); +static void union_fixup __P((struct union_node *un, struct proc *p)); +static int union_fsync __P((struct vop_fsync_args *ap)); +static int union_getattr __P((struct vop_getattr_args *ap)); +static int union_inactive __P((struct vop_inactive_args *ap)); +static int union_ioctl __P((struct vop_ioctl_args *ap)); +static int union_islocked __P((struct vop_islocked_args *ap)); +static int union_lease __P((struct vop_lease_args *ap)); +static int union_link __P((struct vop_link_args *ap)); +static int union_lock __P((struct vop_lock_args *ap)); +static int union_lookup __P((struct vop_lookup_args *ap)); +static int union_lookup1 __P((struct vnode *udvp, struct vnode **dvpp, + struct vnode **vpp, + struct componentname *cnp)); +static int union_mkdir __P((struct vop_mkdir_args *ap)); +static int union_mknod __P((struct vop_mknod_args *ap)); +static int union_mmap __P((struct vop_mmap_args *ap)); +static int union_open __P((struct vop_open_args *ap)); +static int union_pathconf __P((struct vop_pathconf_args *ap)); +static int union_print __P((struct vop_print_args *ap)); +static int union_read __P((struct vop_read_args *ap)); +static int union_readdir __P((struct vop_readdir_args *ap)); +static int union_readlink __P((struct vop_readlink_args *ap)); +static int union_reclaim __P((struct vop_reclaim_args *ap)); +static int union_remove __P((struct vop_remove_args *ap)); +static int union_rename __P((struct vop_rename_args *ap)); +static int union_revoke __P((struct vop_revoke_args *ap)); +static int union_rmdir __P((struct vop_rmdir_args *ap)); +static int union_poll __P((struct vop_poll_args *ap)); +static int union_setattr __P((struct vop_setattr_args *ap)); +static int union_strategy __P((struct vop_strategy_args *ap)); +static int union_symlink __P((struct vop_symlink_args *ap)); +static int union_unlock __P((struct vop_unlock_args *ap)); +static int union_whiteout __P((struct vop_whiteout_args *ap)); +static int union_write __P((struct vop_read_args *ap)); + +static void +union_fixup(un, p) + struct union_node *un; + struct proc *p; +{ + + vn_lock(un->un_uppervp, LK_EXCLUSIVE | LK_RETRY, p); + un->un_flags |= UN_ULOCK; +} + +static int +union_lookup1(udvp, dvpp, vpp, cnp) + struct vnode *udvp; + struct vnode **dvpp; + struct vnode **vpp; + struct componentname *cnp; +{ + int error; + struct proc *p = cnp->cn_proc; + struct vnode *tdvp; + struct vnode *dvp; + struct mount *mp; + + dvp = *dvpp; + + /* + * If stepping up the directory tree, check for going + * back across the mount point, in which case do what + * lookup would do by stepping back down the mount + * hierarchy. + */ + if (cnp->cn_flags & ISDOTDOT) { + while ((dvp != udvp) && (dvp->v_flag & VROOT)) { + /* + * Don't do the NOCROSSMOUNT check + * at this level. By definition, + * union fs deals with namespaces, not + * filesystems. + */ + tdvp = dvp; + *dvpp = dvp = dvp->v_mount->mnt_vnodecovered; + vput(tdvp); + VREF(dvp); + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); + } + } + + error = VOP_LOOKUP(dvp, &tdvp, cnp); + if (error) + return (error); + + /* + * The parent directory will have been unlocked, unless lookup + * found the last component. In which case, re-lock the node + * here to allow it to be unlocked again (phew) in union_lookup. + */ + if (dvp != tdvp && !(cnp->cn_flags & ISLASTCN)) + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); + + dvp = tdvp; + + /* + * Lastly check if the current node is a mount point in + * which case walk up the mount hierarchy making sure not to + * bump into the root of the mount tree (ie. dvp != udvp). + */ + while (dvp != udvp && (dvp->v_type == VDIR) && + (mp = dvp->v_mountedhere)) { + + if (vfs_busy(mp, 0, 0, p)) + continue; + + error = VFS_ROOT(mp, &tdvp); + vfs_unbusy(mp, p); + if (error) { + vput(dvp); + return (error); + } + + vput(dvp); + dvp = tdvp; + } + + *vpp = dvp; + return (0); +} + +static int +union_lookup(ap) + struct vop_lookup_args /* { + struct vnodeop_desc *a_desc; + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + } */ *ap; +{ + int error; + int uerror, lerror; + struct vnode *uppervp, *lowervp; + struct vnode *upperdvp, *lowerdvp; + struct vnode *dvp = ap->a_dvp; + struct union_node *dun = VTOUNION(dvp); + struct componentname *cnp = ap->a_cnp; + struct proc *p = cnp->cn_proc; + int lockparent = cnp->cn_flags & LOCKPARENT; + struct union_mount *um = MOUNTTOUNIONMOUNT(dvp->v_mount); + struct ucred *saved_cred = NULL; + int iswhiteout; + struct vattr va; + + + /* + * Disallow write attemps to the filesystem mounted read-only. + */ + if ((cnp->cn_flags & ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && + (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) + return (EROFS); + +#ifdef notyet + if (cnp->cn_namelen == 3 && + cnp->cn_nameptr[2] == '.' && + cnp->cn_nameptr[1] == '.' && + cnp->cn_nameptr[0] == '.') { + dvp = *ap->a_vpp = LOWERVP(ap->a_dvp); + if (dvp == NULLVP) + return (ENOENT); + VREF(dvp); + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); + if (!lockparent || !(cnp->cn_flags & ISLASTCN)) + VOP_UNLOCK(ap->a_dvp, 0, p); + return (0); + } +#endif + + cnp->cn_flags |= LOCKPARENT; + + upperdvp = dun->un_uppervp; + lowerdvp = dun->un_lowervp; + uppervp = NULLVP; + lowervp = NULLVP; + iswhiteout = 0; + + if (cnp->cn_flags & ISDOTDOT) { + if (upperdvp != NULL) + VREF(upperdvp); + if (lowerdvp != NULL) + VREF(lowerdvp); + } + + /* + * do the lookup in the upper level. + * if that level comsumes additional pathnames, + * then assume that something special is going + * on and just return that vnode. + */ + if (upperdvp != NULLVP) { + FIXUP(dun, p); + /* + * If we're doing `..' in the underlying filesystem, + * we must drop our lock on the union node before + * going up the tree in the lower file system--if we block + * on the lowervp lock, and that's held by someone else + * coming down the tree and who's waiting for our lock, + * we would be hosed. + */ + if (cnp->cn_flags & ISDOTDOT) { + /* retain lock on underlying VP: */ + dun->un_flags |= UN_KLOCK; + VOP_UNLOCK(dvp, 0, p); + } + uerror = union_lookup1(um->um_uppervp, &upperdvp, + &uppervp, cnp); + /* + * Disallow write attemps to the filesystem mounted read-only. + */ + if (uerror == EJUSTRETURN && (cnp->cn_flags & ISLASTCN) && + (dvp->v_mount->mnt_flag & MNT_RDONLY) && + (cnp->cn_nameiop == CREATE || cnp->cn_nameiop == RENAME)) { + if (!lockparent) + cnp->cn_flags &= ~LOCKPARENT; + return (EROFS); + } + + if (cnp->cn_flags & ISDOTDOT) { + if (dun->un_uppervp == upperdvp) { + /* + * We got the underlying bugger back locked... + * now take back the union node lock. Since we + * hold the uppervp lock, we can diddle union + * locking flags at will. :) + */ + dun->un_flags |= UN_ULOCK; + } + /* + * If upperdvp got swapped out, it means we did + * some mount point magic, and we do not have + * dun->un_uppervp locked currently--so we get it + * locked here (don't set the UN_ULOCK flag). + */ + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY, p); + } + + /*if (uppervp == upperdvp) + dun->un_flags |= UN_KLOCK;*/ + + if (cnp->cn_consume != 0) { + *ap->a_vpp = uppervp; + if (!lockparent) + cnp->cn_flags &= ~LOCKPARENT; + error = uerror; + goto out; + } + if (uerror == ENOENT || uerror == EJUSTRETURN) { + if (cnp->cn_flags & ISWHITEOUT) { + iswhiteout = 1; + } else if (lowerdvp != NULLVP) { + lerror = VOP_GETATTR(upperdvp, &va, + cnp->cn_cred, cnp->cn_proc); + if (lerror == 0 && (va.va_flags & OPAQUE)) + iswhiteout = 1; + } + } + } else { + uerror = ENOENT; + } + + /* + * in a similar way to the upper layer, do the lookup + * in the lower layer. this time, if there is some + * component magic going on, then vput whatever we got + * back from the upper layer and return the lower vnode + * instead. + */ + if (lowerdvp != NULLVP && !iswhiteout) { + int nameiop; + + vn_lock(lowerdvp, LK_EXCLUSIVE | LK_RETRY, p); + + /* + * Only do a LOOKUP on the bottom node, since + * we won't be making changes to it anyway. + */ + nameiop = cnp->cn_nameiop; + cnp->cn_nameiop = LOOKUP; + if (um->um_op == UNMNT_BELOW) { + saved_cred = cnp->cn_cred; + cnp->cn_cred = um->um_cred; + } + /* + * We shouldn't have to worry about locking interactions + * between the lower layer and our union layer (w.r.t. + * `..' processing) because we don't futz with lowervp + * locks in the union-node instantiation code path. + */ + lerror = union_lookup1(um->um_lowervp, &lowerdvp, + &lowervp, cnp); + if (um->um_op == UNMNT_BELOW) + cnp->cn_cred = saved_cred; + cnp->cn_nameiop = nameiop; + + if (lowervp != lowerdvp) + VOP_UNLOCK(lowerdvp, 0, p); + + if (cnp->cn_consume != 0 || lerror == EACCES) { + if (lerror == EACCES) + lowervp = NULLVP; + if (uppervp != NULLVP) { + if (uppervp == upperdvp) + vrele(uppervp); + else + vput(uppervp); + uppervp = NULLVP; + } + *ap->a_vpp = lowervp; + if (!lockparent) + cnp->cn_flags &= ~LOCKPARENT; + error = lerror; + goto out; + } + } else { + lerror = ENOENT; + if ((cnp->cn_flags & ISDOTDOT) && dun->un_pvp != NULLVP) { + lowervp = LOWERVP(dun->un_pvp); + if (lowervp != NULLVP) { + VREF(lowervp); + vn_lock(lowervp, LK_EXCLUSIVE | LK_RETRY, p); + lerror = 0; + } + } + } + + if (!lockparent) + cnp->cn_flags &= ~LOCKPARENT; + + /* + * at this point, we have uerror and lerror indicating + * possible errors with the lookups in the upper and lower + * layers. additionally, uppervp and lowervp are (locked) + * references to existing vnodes in the upper and lower layers. + * + * there are now three cases to consider. + * 1. if both layers returned an error, then return whatever + * error the upper layer generated. + * + * 2. if the top layer failed and the bottom layer succeeded + * then two subcases occur. + * a. the bottom vnode is not a directory, in which + * case just return a new union vnode referencing + * an empty top layer and the existing bottom layer. + * b. the bottom vnode is a directory, in which case + * create a new directory in the top-level and + * continue as in case 3. + * + * 3. if the top layer succeeded then return a new union + * vnode referencing whatever the new top layer and + * whatever the bottom layer returned. + */ + + *ap->a_vpp = NULLVP; + + /* case 1. */ + if ((uerror != 0) && (lerror != 0)) { + error = uerror; + goto out; + } + + /* case 2. */ + if (uerror != 0 /* && (lerror == 0) */ ) { + if (lowervp->v_type == VDIR) { /* case 2b. */ + dun->un_flags &= ~UN_ULOCK; + VOP_UNLOCK(upperdvp, 0, p); + uerror = union_mkshadow(um, upperdvp, cnp, &uppervp); + vn_lock(upperdvp, LK_EXCLUSIVE | LK_RETRY, p); + dun->un_flags |= UN_ULOCK; + + if (uerror) { + if (lowervp != NULLVP) { + vput(lowervp); + lowervp = NULLVP; + } + error = uerror; + goto out; + } + } + } + + if (lowervp != NULLVP) + VOP_UNLOCK(lowervp, 0, p); + + error = union_allocvp(ap->a_vpp, dvp->v_mount, dvp, upperdvp, cnp, + uppervp, lowervp, 1); + + if (error) { + if (uppervp != NULLVP) + vput(uppervp); + if (lowervp != NULLVP) + vrele(lowervp); + } else { + if (*ap->a_vpp != dvp) + if (!lockparent || !(cnp->cn_flags & ISLASTCN)) + VOP_UNLOCK(dvp, 0, p); +#ifdef DIAGNOSTIC + if (cnp->cn_namelen == 1 && + cnp->cn_nameptr[0] == '.' && + *ap->a_vpp != dvp) { + panic("union_lookup returning . (%p) not same as startdir (%p)", + ap->a_vpp, dvp); + } +#endif + } + +out: + if (cnp->cn_flags & ISDOTDOT) { + if (upperdvp != NULL) + vrele(upperdvp); + if (lowerdvp != NULL) + vrele(lowerdvp); + } + + return (error); +} + +static int +union_create(ap) + struct vop_create_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + } */ *ap; +{ + struct union_node *dun = VTOUNION(ap->a_dvp); + struct vnode *dvp = dun->un_uppervp; + struct componentname *cnp = ap->a_cnp; + struct proc *p = cnp->cn_proc; + + if (dvp != NULLVP) { + struct vnode *vp; + struct mount *mp; + int error; + + FIXUP(dun, p); + + dun->un_flags |= UN_KLOCK; + VOP_UNLOCK(ap->a_dvp, 0, p); + error = VOP_CREATE(dvp, &vp, cnp, ap->a_vap); + if (error) { + dun->un_flags |= UN_ULOCK; + return (error); + } + + mp = ap->a_dvp->v_mount; + VOP_UNLOCK(dvp, 0, p); + error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, cnp, vp, + NULLVP, 1); + if (error) + vput(vp); + vn_lock(ap->a_dvp, LK_EXCLUSIVE| LK_RETRY, p); + return (error); + } + + return (EROFS); +} + +static int +union_whiteout(ap) + struct vop_whiteout_args /* { + struct vnode *a_dvp; + struct componentname *a_cnp; + int a_flags; + } */ *ap; +{ + struct union_node *un = VTOUNION(ap->a_dvp); + struct componentname *cnp = ap->a_cnp; + struct proc *p = cnp->cn_proc; + + if (un->un_uppervp == NULLVP) + return (EOPNOTSUPP); + + FIXUP(un, p); + return (VOP_WHITEOUT(un->un_uppervp, cnp, ap->a_flags)); +} + +static int +union_mknod(ap) + struct vop_mknod_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + } */ *ap; +{ + struct union_node *dun = VTOUNION(ap->a_dvp); + struct vnode *dvp = dun->un_uppervp; + struct componentname *cnp = ap->a_cnp; + struct proc *p = cnp->cn_proc; + + if (dvp != NULLVP) { + struct vnode *vp; + struct mount *mp; + int error; + + FIXUP(dun, p); + + dun->un_flags |= UN_KLOCK; + VOP_UNLOCK(ap->a_dvp, 0, p); + error = VOP_MKNOD(dvp, &vp, cnp, ap->a_vap); + if (error) { + dun->un_flags |= UN_ULOCK; + return (error); + } + + if (vp != NULLVP) { + mp = ap->a_dvp->v_mount; + VOP_UNLOCK(dvp, 0, p); + error = union_allocvp(ap->a_vpp, mp, NULLVP, NULLVP, + cnp, vp, NULLVP, 1); + if (error) + vput(vp); + vn_lock(ap->a_dvp, LK_EXCLUSIVE| LK_RETRY, p); + } else { + dun->un_flags |= UN_ULOCK; + } + return (error); + } + + return (EROFS); +} + +static int +union_open(ap) + struct vop_open_args /* { + struct vnodeop_desc *a_desc; + struct vnode *a_vp; + int a_mode; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + struct union_node *un = VTOUNION(ap->a_vp); + struct vnode *tvp; + int mode = ap->a_mode; + struct ucred *cred = ap->a_cred; + struct proc *p = ap->a_p; + int error; + + /* + * If there is an existing upper vp then simply open that. + */ + tvp = un->un_uppervp; + if (tvp == NULLVP) { + /* + * If the lower vnode is being opened for writing, then + * copy the file contents to the upper vnode and open that, + * otherwise can simply open the lower vnode. + */ + tvp = un->un_lowervp; + if ((ap->a_mode & FWRITE) && (tvp->v_type == VREG)) { + error = union_copyup(un, (mode&O_TRUNC) == 0, cred, p); + if (error == 0) + error = VOP_OPEN(un->un_uppervp, mode, cred, p); + return (error); + } + + /* + * Just open the lower vnode + */ + un->un_openl++; + vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); + error = VOP_OPEN(tvp, mode, cred, p); + VOP_UNLOCK(tvp, 0, p); + + return (error); + } + + FIXUP(un, p); + + error = VOP_OPEN(tvp, mode, cred, p); + + return (error); +} + +static int +union_close(ap) + struct vop_close_args /* { + struct vnode *a_vp; + int a_fflag; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + struct union_node *un = VTOUNION(ap->a_vp); + struct vnode *vp; + + if ((vp = un->un_uppervp) == NULLVP) { +#ifdef UNION_DIAGNOSTIC + if (un->un_openl <= 0) + panic("union: un_openl cnt"); +#endif + --un->un_openl; + vp = un->un_lowervp; + } + + ap->a_vp = vp; + return (VCALL(vp, VOFFSET(vop_close), ap)); +} + +/* + * Check access permission on the union vnode. + * The access check being enforced is to check + * against both the underlying vnode, and any + * copied vnode. This ensures that no additional + * file permissions are given away simply because + * the user caused an implicit file copy. + */ +static int +union_access(ap) + struct vop_access_args /* { + struct vnodeop_desc *a_desc; + struct vnode *a_vp; + int a_mode; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + struct union_node *un = VTOUNION(ap->a_vp); + struct proc *p = ap->a_p; + int error = EACCES; + struct vnode *vp; + struct vnode *savedvp; + + /* + * Disallow write attempts on filesystems mounted read-only. + */ + if (ap->a_mode & VWRITE && (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY)) { + switch (ap->a_vp->v_type) { + case VREG: + case VDIR: + case VLNK: + return (EROFS); + default: + break; + } + } + if ((vp = un->un_uppervp) != NULLVP) { + FIXUP(un, p); + ap->a_vp = vp; + return (VCALL(vp, VOFFSET(vop_access), ap)); + } + + if ((vp = un->un_lowervp) != NULLVP) { + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + savedvp = ap->a_vp; + ap->a_vp = vp; + error = VCALL(vp, VOFFSET(vop_access), ap); + if (error == 0) { + struct union_mount *um = MOUNTTOUNIONMOUNT(savedvp->v_mount); + + if (um->um_op == UNMNT_BELOW) { + ap->a_cred = um->um_cred; + error = VCALL(vp, VOFFSET(vop_access), ap); + } + } + VOP_UNLOCK(vp, 0, p); + if (error) + return (error); + } + + return (error); +} + +/* + * We handle getattr only to change the fsid and + * track object sizes + */ +static int +union_getattr(ap) + struct vop_getattr_args /* { + struct vnode *a_vp; + struct vattr *a_vap; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + int error; + struct union_node *un = VTOUNION(ap->a_vp); + struct vnode *vp = un->un_uppervp; + struct proc *p = ap->a_p; + struct vattr *vap; + struct vattr va; + + + /* + * Some programs walk the filesystem hierarchy by counting + * links to directories to avoid stat'ing all the time. + * This means the link count on directories needs to be "correct". + * The only way to do that is to call getattr on both layers + * and fix up the link count. The link count will not necessarily + * be accurate but will be large enough to defeat the tree walkers. + */ + + vap = ap->a_vap; + + vp = un->un_uppervp; + if (vp != NULLVP) { + /* + * It's not clear whether VOP_GETATTR is to be + * called with the vnode locked or not. stat() calls + * it with (vp) locked, and fstat calls it with + * (vp) unlocked. + * In the mean time, compensate here by checking + * the union_node's lock flag. + */ + if (un->un_flags & UN_LOCKED) + FIXUP(un, p); + + error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p); + if (error) + return (error); + union_newsize(ap->a_vp, vap->va_size, VNOVAL); + } + + if (vp == NULLVP) { + vp = un->un_lowervp; + } else if (vp->v_type == VDIR && un->un_lowervp != NULLVP) { + vp = un->un_lowervp; + vap = &va; + } else { + vp = NULLVP; + } + + if (vp != NULLVP) { + error = VOP_GETATTR(vp, vap, ap->a_cred, ap->a_p); + if (error) + return (error); + union_newsize(ap->a_vp, VNOVAL, vap->va_size); + } + + if ((vap != ap->a_vap) && (vap->va_type == VDIR)) + ap->a_vap->va_nlink += vap->va_nlink; + + ap->a_vap->va_fsid = ap->a_vp->v_mount->mnt_stat.f_fsid.val[0]; + return (0); +} + +static int +union_setattr(ap) + struct vop_setattr_args /* { + struct vnode *a_vp; + struct vattr *a_vap; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + struct union_node *un = VTOUNION(ap->a_vp); + struct proc *p = ap->a_p; + struct vattr *vap = ap->a_vap; + int error; + + /* + * Disallow write attempts on filesystems mounted read-only. + */ + if ((ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) && + (vap->va_flags != VNOVAL || vap->va_uid != (uid_t)VNOVAL || + vap->va_gid != (gid_t)VNOVAL || vap->va_atime.tv_sec != VNOVAL || + vap->va_mtime.tv_sec != VNOVAL || vap->va_mode != (mode_t)VNOVAL)) + return (EROFS); + + /* + * Handle case of truncating lower object to zero size, + * by creating a zero length upper object. This is to + * handle the case of open with O_TRUNC and O_CREAT. + */ + if ((un->un_uppervp == NULLVP) && + /* assert(un->un_lowervp != NULLVP) */ + (un->un_lowervp->v_type == VREG)) { + error = union_copyup(un, (ap->a_vap->va_size != 0), + ap->a_cred, ap->a_p); + if (error) + return (error); + } + + /* + * Try to set attributes in upper layer, + * otherwise return read-only filesystem error. + */ + if (un->un_uppervp != NULLVP) { + FIXUP(un, p); + error = VOP_SETATTR(un->un_uppervp, ap->a_vap, + ap->a_cred, ap->a_p); + if ((error == 0) && (ap->a_vap->va_size != VNOVAL)) + union_newsize(ap->a_vp, ap->a_vap->va_size, VNOVAL); + } else { + error = EROFS; + } + + return (error); +} + +static int +union_read(ap) + struct vop_read_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap; +{ + int error; + struct proc *p = ap->a_uio->uio_procp; + struct vnode *vp = OTHERVP(ap->a_vp); + int dolock = (vp == LOWERVP(ap->a_vp)); + + if (dolock) + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + else + FIXUP(VTOUNION(ap->a_vp), p); + error = VOP_READ(vp, ap->a_uio, ap->a_ioflag, ap->a_cred); + if (dolock) + VOP_UNLOCK(vp, 0, p); + + /* + * XXX + * perhaps the size of the underlying object has changed under + * our feet. take advantage of the offset information present + * in the uio structure. + */ + if (error == 0) { + struct union_node *un = VTOUNION(ap->a_vp); + off_t cur = ap->a_uio->uio_offset; + + if (vp == un->un_uppervp) { + if (cur > un->un_uppersz) + union_newsize(ap->a_vp, cur, VNOVAL); + } else { + if (cur > un->un_lowersz) + union_newsize(ap->a_vp, VNOVAL, cur); + } + } + + return (error); +} + +static int +union_write(ap) + struct vop_read_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap; +{ + int error; + struct vnode *vp; + struct union_node *un = VTOUNION(ap->a_vp); + struct proc *p = ap->a_uio->uio_procp; + + vp = UPPERVP(ap->a_vp); + if (vp == NULLVP) + panic("union: missing upper layer in write"); + + FIXUP(un, p); + error = VOP_WRITE(vp, ap->a_uio, ap->a_ioflag, ap->a_cred); + + /* + * the size of the underlying object may be changed by the + * write. + */ + if (error == 0) { + off_t cur = ap->a_uio->uio_offset; + + if (cur > un->un_uppersz) + union_newsize(ap->a_vp, cur, VNOVAL); + } + + return (error); +} + +static int +union_lease(ap) + struct vop_lease_args /* { + struct vnode *a_vp; + struct proc *a_p; + struct ucred *a_cred; + int a_flag; + } */ *ap; +{ + register struct vnode *ovp = OTHERVP(ap->a_vp); + + ap->a_vp = ovp; + return (VCALL(ovp, VOFFSET(vop_lease), ap)); +} + +static int +union_ioctl(ap) + struct vop_ioctl_args /* { + struct vnode *a_vp; + int a_command; + caddr_t a_data; + int a_fflag; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + register struct vnode *ovp = OTHERVP(ap->a_vp); + + ap->a_vp = ovp; + return (VCALL(ovp, VOFFSET(vop_ioctl), ap)); +} + +static int +union_poll(ap) + struct vop_poll_args /* { + struct vnode *a_vp; + int a_events; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + register struct vnode *ovp = OTHERVP(ap->a_vp); + + ap->a_vp = ovp; + return (VCALL(ovp, VOFFSET(vop_poll), ap)); +} + +static int +union_revoke(ap) + struct vop_revoke_args /* { + struct vnode *a_vp; + int a_flags; + struct proc *a_p; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + + if (UPPERVP(vp)) + VOP_REVOKE(UPPERVP(vp), ap->a_flags); + if (LOWERVP(vp)) + VOP_REVOKE(LOWERVP(vp), ap->a_flags); + vgone(vp); + return (0); +} + +static int +union_mmap(ap) + struct vop_mmap_args /* { + struct vnode *a_vp; + int a_fflags; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + register struct vnode *ovp = OTHERVP(ap->a_vp); + + ap->a_vp = ovp; + return (VCALL(ovp, VOFFSET(vop_mmap), ap)); +} + +static int +union_fsync(ap) + struct vop_fsync_args /* { + struct vnode *a_vp; + struct ucred *a_cred; + int a_waitfor; + struct proc *a_p; + } */ *ap; +{ + int error = 0; + struct proc *p = ap->a_p; + struct vnode *targetvp = OTHERVP(ap->a_vp); + struct union_node *un; + + if (targetvp != NULLVP) { + int dolock = (targetvp == LOWERVP(ap->a_vp)); + + un = VTOUNION(ap->a_vp); + if (dolock) + vn_lock(targetvp, LK_EXCLUSIVE | LK_RETRY, p); + else { + un = VTOUNION(ap->a_vp); + if ((un->un_flags & UN_ULOCK) == 0 && + targetvp->v_data != NULL && + ((struct lock *)targetvp->v_data)->lk_lockholder + == curproc->p_pid && + VOP_ISLOCKED(targetvp) != 0) + return 0; /* XXX */ + + FIXUP(un, p); + } + + error = VOP_FSYNC(targetvp, ap->a_cred, ap->a_waitfor, p); + if (dolock) + VOP_UNLOCK(targetvp, 0, p); + } + + return (error); +} + +static int +union_remove(ap) + struct vop_remove_args /* { + struct vnode *a_dvp; + struct vnode *a_vp; + struct componentname *a_cnp; + } */ *ap; +{ + struct union_node *dun = VTOUNION(ap->a_dvp); + struct union_node *un = VTOUNION(ap->a_vp); + struct componentname *cnp = ap->a_cnp; + struct proc *p = cnp->cn_proc; + int error; + + if (dun->un_uppervp == NULLVP) + panic("union remove: null upper vnode"); + + if (un->un_uppervp != NULLVP) { + struct vnode *dvp = dun->un_uppervp; + struct vnode *vp = un->un_uppervp; + + FIXUP(dun, p); + dun->un_flags |= UN_KLOCK; + VOP_UNLOCK(ap->a_dvp, 0, p); + FIXUP(un, p); + un->un_flags |= UN_KLOCK; + VOP_UNLOCK(ap->a_vp, 0, p); + + if (union_dowhiteout(un, cnp->cn_cred, p)) + cnp->cn_flags |= DOWHITEOUT; + error = VOP_REMOVE(dvp, vp, cnp); +#if 0 + /* XXX */ + if (!error) + union_removed_upper(un); +#endif + dun->un_flags |= UN_ULOCK; + un->un_flags |= UN_ULOCK; + } else { + FIXUP(dun, p); + error = union_mkwhiteout( + MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount), + dun->un_uppervp, ap->a_cnp, un->un_path); + } + + return (error); +} + +static int +union_link(ap) + struct vop_link_args /* { + struct vnode *a_tdvp; + struct vnode *a_vp; + struct componentname *a_cnp; + } */ *ap; +{ + struct componentname *cnp = ap->a_cnp; + struct proc *p = cnp->cn_proc; + struct union_node *dun = VTOUNION(ap->a_tdvp); + struct vnode *vp; + struct vnode *tdvp; + int error = 0; + + + if (ap->a_tdvp->v_op != ap->a_vp->v_op) { + vp = ap->a_vp; + } else { + struct union_node *tun = VTOUNION(ap->a_vp); + if (tun->un_uppervp == NULLVP) { + vn_lock(ap->a_vp, LK_EXCLUSIVE | LK_RETRY, p); + if (dun->un_uppervp == tun->un_dirvp) { + dun->un_flags &= ~UN_ULOCK; + VOP_UNLOCK(dun->un_uppervp, 0, p); + } + error = union_copyup(tun, 1, cnp->cn_cred, p); + if (dun->un_uppervp == tun->un_dirvp) { + vn_lock(dun->un_uppervp, + LK_EXCLUSIVE | LK_RETRY, p); + dun->un_flags |= UN_ULOCK; + } + VOP_UNLOCK(ap->a_vp, 0, p); + } + vp = tun->un_uppervp; + } + + tdvp = dun->un_uppervp; + if (tdvp == NULLVP) + error = EROFS; + + if (error) + return (error); + + FIXUP(dun, p); + dun->un_flags |= UN_KLOCK; + VOP_UNLOCK(ap->a_tdvp, 0, p); + + error = VOP_LINK(tdvp, vp, cnp); + + dun->un_flags |= UN_ULOCK; + + return (error); +} + +static int +union_rename(ap) + struct vop_rename_args /* { + struct vnode *a_fdvp; + struct vnode *a_fvp; + struct componentname *a_fcnp; + struct vnode *a_tdvp; + struct vnode *a_tvp; + struct componentname *a_tcnp; + } */ *ap; +{ + int error; + + struct vnode *fdvp = ap->a_fdvp; + struct vnode *fvp = ap->a_fvp; + struct vnode *tdvp = ap->a_tdvp; + struct vnode *tvp = ap->a_tvp; + + if (fdvp->v_op == union_vnodeop_p) { /* always true */ + struct union_node *un = VTOUNION(fdvp); + if (un->un_uppervp == NULLVP) { + /* + * this should never happen in normal + * operation but might if there was + * a problem creating the top-level shadow + * directory. + */ + error = EXDEV; + goto bad; + } + + fdvp = un->un_uppervp; + VREF(fdvp); + vrele(ap->a_fdvp); + } + + if (fvp->v_op == union_vnodeop_p) { /* always true */ + struct union_node *un = VTOUNION(fvp); + if (un->un_uppervp == NULLVP) { + /* XXX: should do a copyup */ + error = EXDEV; + goto bad; + } + + if (un->un_lowervp != NULLVP) + ap->a_fcnp->cn_flags |= DOWHITEOUT; + + fvp = un->un_uppervp; + VREF(fvp); + vrele(ap->a_fvp); + } + + if (tdvp->v_op == union_vnodeop_p) { + struct union_node *un = VTOUNION(tdvp); + if (un->un_uppervp == NULLVP) { + /* + * this should never happen in normal + * operation but might if there was + * a problem creating the top-level shadow + * directory. + */ + error = EXDEV; + goto bad; + } + + tdvp = un->un_uppervp; + VREF(tdvp); + un->un_flags |= UN_KLOCK; + vput(ap->a_tdvp); + } + + if (tvp != NULLVP && tvp->v_op == union_vnodeop_p) { + struct union_node *un = VTOUNION(tvp); + + tvp = un->un_uppervp; + if (tvp != NULLVP) { + VREF(tvp); + un->un_flags |= UN_KLOCK; + } + vput(ap->a_tvp); + } + + return (VOP_RENAME(fdvp, fvp, ap->a_fcnp, tdvp, tvp, ap->a_tcnp)); + +bad: + vrele(fdvp); + vrele(fvp); + vput(tdvp); + if (tvp != NULLVP) + vput(tvp); + + return (error); +} + +static int +union_mkdir(ap) + struct vop_mkdir_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + } */ *ap; +{ + struct union_node *dun = VTOUNION(ap->a_dvp); + struct vnode *dvp = dun->un_uppervp; + struct componentname *cnp = ap->a_cnp; + struct proc *p = cnp->cn_proc; + + if (dvp != NULLVP) { + struct vnode *vp; + int error; + + FIXUP(dun, p); + dun->un_flags |= UN_KLOCK; + VOP_UNLOCK(ap->a_dvp, 0, p); + error = VOP_MKDIR(dvp, &vp, cnp, ap->a_vap); + if (error) { + dun->un_flags |= UN_ULOCK; + return (error); + } + + VOP_UNLOCK(dvp, 0, p); + error = union_allocvp(ap->a_vpp, ap->a_dvp->v_mount, ap->a_dvp, + NULLVP, cnp, vp, NULLVP, 1); + if (error) + vput(vp); + vn_lock(ap->a_dvp, LK_EXCLUSIVE| LK_RETRY, p); + + return (error); + } + + return (EROFS); +} + +static int +union_rmdir(ap) + struct vop_rmdir_args /* { + struct vnode *a_dvp; + struct vnode *a_vp; + struct componentname *a_cnp; + } */ *ap; +{ + struct union_node *dun = VTOUNION(ap->a_dvp); + struct union_node *un = VTOUNION(ap->a_vp); + struct componentname *cnp = ap->a_cnp; + struct proc *p = cnp->cn_proc; + int error; + + if (dun->un_uppervp == NULLVP) + panic("union rmdir: null upper vnode"); + + if (un->un_uppervp != NULLVP) { + struct vnode *dvp = dun->un_uppervp; + struct vnode *vp = un->un_uppervp; + + FIXUP(dun, p); + dun->un_flags |= UN_KLOCK; + VOP_UNLOCK(ap->a_dvp, 0, p); + FIXUP(un, p); + un->un_flags |= UN_KLOCK; + VOP_UNLOCK(ap->a_vp, 0, p); + + if (union_dowhiteout(un, cnp->cn_cred, p)) + cnp->cn_flags |= DOWHITEOUT; + error = VOP_RMDIR(dvp, vp, ap->a_cnp); +#if 0 + /* XXX */ + if (!error) + union_removed_upper(un); +#endif + dun->un_flags |= UN_ULOCK; + un->un_flags |= UN_ULOCK; + } else { + FIXUP(dun, p); + error = union_mkwhiteout( + MOUNTTOUNIONMOUNT(UNIONTOV(dun)->v_mount), + dun->un_uppervp, ap->a_cnp, un->un_path); + } + + return (error); +} + +static int +union_symlink(ap) + struct vop_symlink_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + char *a_target; + } */ *ap; +{ + struct union_node *dun = VTOUNION(ap->a_dvp); + struct vnode *dvp = dun->un_uppervp; + struct componentname *cnp = ap->a_cnp; + struct proc *p = cnp->cn_proc; + + if (dvp != NULLVP) { + struct vnode *vp; + int error; + + FIXUP(dun, p); + dun->un_flags |= UN_KLOCK; + VOP_UNLOCK(ap->a_dvp, 0, p); + error = VOP_SYMLINK(dvp, &vp, cnp, ap->a_vap, ap->a_target); + dun->un_flags |= UN_ULOCK; + *ap->a_vpp = NULLVP; + return (error); + } + + return (EROFS); +} + +/* + * union_readdir works in concert with getdirentries and + * readdir(3) to provide a list of entries in the unioned + * directories. getdirentries is responsible for walking + * down the union stack. readdir(3) is responsible for + * eliminating duplicate names from the returned data stream. + */ +static int +union_readdir(ap) + struct vop_readdir_args /* { + struct vnode *a_vp; + struct uio *a_uio; + struct ucred *a_cred; + int *a_eofflag; + u_long *a_cookies; + int a_ncookies; + } */ *ap; +{ + struct union_node *un = VTOUNION(ap->a_vp); + struct vnode *uvp = un->un_uppervp; + struct proc *p = ap->a_uio->uio_procp; + + if (uvp == NULLVP) + return (0); + + FIXUP(un, p); + ap->a_vp = uvp; + return (VCALL(uvp, VOFFSET(vop_readdir), ap)); +} + +static int +union_readlink(ap) + struct vop_readlink_args /* { + struct vnode *a_vp; + struct uio *a_uio; + struct ucred *a_cred; + } */ *ap; +{ + int error; + struct uio *uio = ap->a_uio; + struct proc *p = uio->uio_procp; + struct vnode *vp = OTHERVP(ap->a_vp); + int dolock = (vp == LOWERVP(ap->a_vp)); + + if (dolock) + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + else + FIXUP(VTOUNION(ap->a_vp), p); + ap->a_vp = vp; + error = VCALL(vp, VOFFSET(vop_readlink), ap); + if (dolock) + VOP_UNLOCK(vp, 0, p); + + return (error); +} + +static int +union_abortop(ap) + struct vop_abortop_args /* { + struct vnode *a_dvp; + struct componentname *a_cnp; + } */ *ap; +{ + int error; + struct componentname *cnp = ap->a_cnp; + struct proc *p = cnp->cn_proc; + struct vnode *vp = OTHERVP(ap->a_dvp); + struct union_node *un = VTOUNION(ap->a_dvp); + int islocked = un->un_flags & UN_LOCKED; + int dolock = (vp == LOWERVP(ap->a_dvp)); + + if (islocked) { + if (dolock) + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + else + FIXUP(VTOUNION(ap->a_dvp), p); + } + ap->a_dvp = vp; + error = VCALL(vp, VOFFSET(vop_abortop), ap); + if (islocked && dolock) + VOP_UNLOCK(vp, 0, p); + + return (error); +} + +static int +union_inactive(ap) + struct vop_inactive_args /* { + struct vnode *a_vp; + struct proc *a_p; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + struct proc *p = ap->a_p; + struct union_node *un = VTOUNION(vp); + struct vnode **vpp; + + /* + * Do nothing (and _don't_ bypass). + * Wait to vrele lowervp until reclaim, + * so that until then our union_node is in the + * cache and reusable. + * + * NEEDSWORK: Someday, consider inactive'ing + * the lowervp and then trying to reactivate it + * with capabilities (v_id) + * like they do in the name lookup cache code. + * That's too much work for now. + */ + + if (un->un_dircache != 0) { + for (vpp = un->un_dircache; *vpp != NULLVP; vpp++) + vrele(*vpp); + free(un->un_dircache, M_TEMP); + un->un_dircache = 0; + } + + VOP_UNLOCK(vp, 0, p); + + if ((un->un_flags & UN_CACHED) == 0) + vgone(vp); + + return (0); +} + +static int +union_reclaim(ap) + struct vop_reclaim_args /* { + struct vnode *a_vp; + } */ *ap; +{ + + union_freevp(ap->a_vp); + + return (0); +} + +static int +union_lock(ap) + struct vop_lock_args *ap; +{ + struct vnode *vp = ap->a_vp; + struct proc *p = ap->a_p; + int flags = ap->a_flags; + struct union_node *un; + int error; + + vop_nolock(ap); + /* + * Need to do real lockmgr-style locking here. + * in the mean time, draining won't work quite right, + * which could lead to a few race conditions. + * the following test was here, but is not quite right, we + * still need to take the lock: + if ((flags & LK_TYPE_MASK) == LK_DRAIN) + return (0); + */ + flags &= ~LK_INTERLOCK; + +start: + un = VTOUNION(vp); + + if (un->un_uppervp != NULLVP) { + if (((un->un_flags & UN_ULOCK) == 0) && + (vp->v_usecount != 0)) { + error = vn_lock(un->un_uppervp, flags, p); + if (error) + return (error); + un->un_flags |= UN_ULOCK; + } +#ifdef DIAGNOSTIC + if (un->un_flags & UN_KLOCK) { + vprint("dangling upper lock", vp); + panic("union: dangling upper lock"); + } +#endif + } + + if (un->un_flags & UN_LOCKED) { +#ifdef DIAGNOSTIC + if (curproc && un->un_pid == curproc->p_pid && + un->un_pid > -1 && curproc->p_pid > -1) + panic("union: locking against myself"); +#endif + un->un_flags |= UN_WANT; + tsleep((caddr_t)&un->un_flags, PINOD, "unionlk2", 0); + goto start; + } + +#ifdef DIAGNOSTIC + if (curproc) + un->un_pid = curproc->p_pid; + else + un->un_pid = -1; +#endif + + un->un_flags |= UN_LOCKED; + return (0); +} + +/* + * When operations want to vput() a union node yet retain a lock on + * the upper vnode (say, to do some further operations like link(), + * mkdir(), ...), they set UN_KLOCK on the union node, then call + * vput() which calls VOP_UNLOCK() and comes here. union_unlock() + * unlocks the union node (leaving the upper vnode alone), clears the + * KLOCK flag, and then returns to vput(). The caller then does whatever + * is left to do with the upper vnode, and ensures that it gets unlocked. + * + * If UN_KLOCK isn't set, then the upper vnode is unlocked here. + */ +static int +union_unlock(ap) + struct vop_unlock_args /* { + struct vnode *a_vp; + int a_flags; + struct proc *a_p; + } */ *ap; +{ + struct union_node *un = VTOUNION(ap->a_vp); + struct proc *p = ap->a_p; + +#ifdef DIAGNOSTIC + if ((un->un_flags & UN_LOCKED) == 0) + panic("union: unlock unlocked node"); + if (curproc && un->un_pid != curproc->p_pid && + curproc->p_pid > -1 && un->un_pid > -1) + panic("union: unlocking other process's union node"); +#endif + + un->un_flags &= ~UN_LOCKED; + + if ((un->un_flags & (UN_ULOCK|UN_KLOCK)) == UN_ULOCK) + VOP_UNLOCK(un->un_uppervp, 0, p); + + un->un_flags &= ~(UN_ULOCK|UN_KLOCK); + + if (un->un_flags & UN_WANT) { + un->un_flags &= ~UN_WANT; + wakeup((caddr_t) &un->un_flags); + } + +#ifdef DIAGNOSTIC + un->un_pid = 0; +#endif + vop_nounlock(ap); + + return (0); +} + +static int +union_bmap(ap) + struct vop_bmap_args /* { + struct vnode *a_vp; + daddr_t a_bn; + struct vnode **a_vpp; + daddr_t *a_bnp; + int *a_runp; + int *a_runb; + } */ *ap; +{ + int error; + struct proc *p = curproc; /* XXX */ + struct vnode *vp = OTHERVP(ap->a_vp); + int dolock = (vp == LOWERVP(ap->a_vp)); + + if (dolock) + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + else + FIXUP(VTOUNION(ap->a_vp), p); + ap->a_vp = vp; + error = VCALL(vp, VOFFSET(vop_bmap), ap); + if (dolock) + VOP_UNLOCK(vp, 0, p); + + return (error); +} + +static int +union_print(ap) + struct vop_print_args /* { + struct vnode *a_vp; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + + printf("\ttag VT_UNION, vp=%p, uppervp=%p, lowervp=%p\n", + vp, UPPERVP(vp), LOWERVP(vp)); + if (UPPERVP(vp) != NULLVP) + vprint("union: upper", UPPERVP(vp)); + if (LOWERVP(vp) != NULLVP) + vprint("union: lower", LOWERVP(vp)); + + return (0); +} + +static int +union_islocked(ap) + struct vop_islocked_args /* { + struct vnode *a_vp; + } */ *ap; +{ + + return ((VTOUNION(ap->a_vp)->un_flags & UN_LOCKED) ? 1 : 0); +} + +static int +union_pathconf(ap) + struct vop_pathconf_args /* { + struct vnode *a_vp; + int a_name; + int *a_retval; + } */ *ap; +{ + int error; + struct proc *p = curproc; /* XXX */ + struct vnode *vp = OTHERVP(ap->a_vp); + int dolock = (vp == LOWERVP(ap->a_vp)); + + if (dolock) + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + else + FIXUP(VTOUNION(ap->a_vp), p); + ap->a_vp = vp; + error = VCALL(vp, VOFFSET(vop_pathconf), ap); + if (dolock) + VOP_UNLOCK(vp, 0, p); + + return (error); +} + +static int +union_advlock(ap) + struct vop_advlock_args /* { + struct vnode *a_vp; + caddr_t a_id; + int a_op; + struct flock *a_fl; + int a_flags; + } */ *ap; +{ + register struct vnode *ovp = OTHERVP(ap->a_vp); + + ap->a_vp = ovp; + return (VCALL(ovp, VOFFSET(vop_advlock), ap)); +} + + +/* + * XXX - vop_strategy must be hand coded because it has no + * vnode in its arguments. + * This goes away with a merged VM/buffer cache. + */ +static int +union_strategy(ap) + struct vop_strategy_args /* { + struct vnode *a_vp; + struct buf *a_bp; + } */ *ap; +{ + struct buf *bp = ap->a_bp; + int error; + struct vnode *savedvp; + + savedvp = bp->b_vp; + bp->b_vp = OTHERVP(bp->b_vp); + +#ifdef DIAGNOSTIC + if (bp->b_vp == NULLVP) + panic("union_strategy: nil vp"); + if (((bp->b_flags & B_READ) == 0) && + (bp->b_vp == LOWERVP(savedvp))) + panic("union_strategy: writing to lowervp"); +#endif + + error = VOP_STRATEGY(bp->b_vp, bp); + bp->b_vp = savedvp; + + return (error); +} + +/* + * Global vfs data structures + */ +vop_t **union_vnodeop_p; +static struct vnodeopv_entry_desc union_vnodeop_entries[] = { + { &vop_default_desc, (vop_t *) vop_defaultop }, + { &vop_abortop_desc, (vop_t *) union_abortop }, + { &vop_access_desc, (vop_t *) union_access }, + { &vop_advlock_desc, (vop_t *) union_advlock }, + { &vop_bmap_desc, (vop_t *) union_bmap }, + { &vop_close_desc, (vop_t *) union_close }, + { &vop_create_desc, (vop_t *) union_create }, + { &vop_fsync_desc, (vop_t *) union_fsync }, + { &vop_getattr_desc, (vop_t *) union_getattr }, + { &vop_inactive_desc, (vop_t *) union_inactive }, + { &vop_ioctl_desc, (vop_t *) union_ioctl }, + { &vop_islocked_desc, (vop_t *) union_islocked }, + { &vop_lease_desc, (vop_t *) union_lease }, + { &vop_link_desc, (vop_t *) union_link }, + { &vop_lock_desc, (vop_t *) union_lock }, + { &vop_lookup_desc, (vop_t *) union_lookup }, + { &vop_mkdir_desc, (vop_t *) union_mkdir }, + { &vop_mknod_desc, (vop_t *) union_mknod }, + { &vop_mmap_desc, (vop_t *) union_mmap }, + { &vop_open_desc, (vop_t *) union_open }, + { &vop_pathconf_desc, (vop_t *) union_pathconf }, + { &vop_poll_desc, (vop_t *) union_poll }, + { &vop_print_desc, (vop_t *) union_print }, + { &vop_read_desc, (vop_t *) union_read }, + { &vop_readdir_desc, (vop_t *) union_readdir }, + { &vop_readlink_desc, (vop_t *) union_readlink }, + { &vop_reclaim_desc, (vop_t *) union_reclaim }, + { &vop_remove_desc, (vop_t *) union_remove }, + { &vop_rename_desc, (vop_t *) union_rename }, + { &vop_revoke_desc, (vop_t *) union_revoke }, + { &vop_rmdir_desc, (vop_t *) union_rmdir }, + { &vop_setattr_desc, (vop_t *) union_setattr }, + { &vop_strategy_desc, (vop_t *) union_strategy }, + { &vop_symlink_desc, (vop_t *) union_symlink }, + { &vop_unlock_desc, (vop_t *) union_unlock }, + { &vop_whiteout_desc, (vop_t *) union_whiteout }, + { &vop_write_desc, (vop_t *) union_write }, + { NULL, NULL } +}; +static struct vnodeopv_desc union_vnodeop_opv_desc = + { &union_vnodeop_p, union_vnodeop_entries }; + +VNODEOP_SET(union_vnodeop_opv_desc); |