diff options
Diffstat (limited to 'sys/fs')
-rw-r--r-- | sys/fs/ext2fs/ext2_alloc.c | 973 | ||||
-rw-r--r-- | sys/fs/ext2fs/ext2_balloc.c | 292 | ||||
-rw-r--r-- | sys/fs/ext2fs/ext2_bmap.c | 334 | ||||
-rwxr-xr-x | sys/fs/ext2fs/ext2_dinode.h | 78 | ||||
-rwxr-xr-x | sys/fs/ext2fs/ext2_dir.h | 81 | ||||
-rw-r--r-- | sys/fs/ext2fs/ext2_extern.h | 93 | ||||
-rw-r--r-- | sys/fs/ext2fs/ext2_inode.c | 537 | ||||
-rw-r--r-- | sys/fs/ext2fs/ext2_inode_cnv.c | 138 | ||||
-rw-r--r-- | sys/fs/ext2fs/ext2_lookup.c | 1101 | ||||
-rw-r--r-- | sys/fs/ext2fs/ext2_mount.h | 79 | ||||
-rw-r--r-- | sys/fs/ext2fs/ext2_readwrite.c | 309 | ||||
-rw-r--r-- | sys/fs/ext2fs/ext2_subr.c | 120 | ||||
-rw-r--r-- | sys/fs/ext2fs/ext2_vfsops.c | 1071 | ||||
-rw-r--r-- | sys/fs/ext2fs/ext2_vnops.c | 1676 | ||||
-rwxr-xr-x | sys/fs/ext2fs/ext2fs.h | 329 | ||||
-rw-r--r-- | sys/fs/ext2fs/fs.h | 152 | ||||
-rw-r--r-- | sys/fs/ext2fs/inode.h | 170 |
17 files changed, 7533 insertions, 0 deletions
diff --git a/sys/fs/ext2fs/ext2_alloc.c b/sys/fs/ext2fs/ext2_alloc.c new file mode 100644 index 0000000..1095100 --- /dev/null +++ b/sys/fs/ext2fs/ext2_alloc.c @@ -0,0 +1,973 @@ +/*- + * modified for Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + */ +/*- + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ffs_alloc.c 8.8 (Berkeley) 2/21/94 + * $FreeBSD$ + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/conf.h> +#include <sys/vnode.h> +#include <sys/stat.h> +#include <sys/mount.h> +#include <sys/syslog.h> +#include <sys/buf.h> + +#include <fs/ext2fs/inode.h> +#include <fs/ext2fs/ext2_mount.h> +#include <fs/ext2fs/ext2fs.h> +#include <fs/ext2fs/fs.h> +#include <fs/ext2fs/ext2_extern.h> + +static daddr_t ext2_alloccg(struct inode *, int, daddr_t, int); +static u_long ext2_dirpref(struct inode *); +static void ext2_fserr(struct m_ext2fs *, uid_t, char *); +static u_long ext2_hashalloc(struct inode *, int, long, int, + daddr_t (*)(struct inode *, int, daddr_t, + int)); +static daddr_t ext2_nodealloccg(struct inode *, int, daddr_t, int); +static daddr_t ext2_mapsearch(struct m_ext2fs *, char *, daddr_t); +/* + * Allocate a block in the file system. + * + * A preference may be optionally specified. If a preference is given + * the following hierarchy is used to allocate a block: + * 1) allocate the requested block. + * 2) allocate a rotationally optimal block in the same cylinder. + * 3) allocate a block in the same cylinder group. + * 4) quadradically rehash into other cylinder groups, until an + * available block is located. + * If no block preference is given the following hierarchy is used + * to allocate a block: + * 1) allocate a block in the cylinder group that contains the + * inode for the file. + * 2) quadradically rehash into other cylinder groups, until an + * available block is located. + * + * A preference may be optionally specified. If a preference is given + * the following hierarchy is used to allocate a block: + * 1) allocate the requested block. + * 2) allocate a rotationally optimal block in the same cylinder. + * 3) allocate a block in the same cylinder group. + * 4) quadradically rehash into other cylinder groups, until an + * available block is located. + * If no block preference is given the following hierarchy is used + * to allocate a block: + * 1) allocate a block in the cylinder group that contains the + * inode for the file. + * 2) quadradically rehash into other cylinder groups, until an + * available block is located. + */ + +int +ext2_alloc(ip, lbn, bpref, size, cred, bnp) + struct inode *ip; + int32_t lbn, bpref; + int size; + struct ucred *cred; + int32_t *bnp; +{ + struct m_ext2fs *fs; + struct ext2mount *ump; + int32_t bno; + int cg; + *bnp = 0; + fs = ip->i_e2fs; + ump = ip->i_ump; + mtx_assert(EXT2_MTX(ump), MA_OWNED); +#ifdef DIAGNOSTIC + if ((u_int)size > fs->e2fs_bsize || blkoff(fs, size) != 0) { + vn_printf(ip->i_devvp, "bsize = %lu, size = %d, fs = %s\n", + (long unsigned int)fs->e2fs_bsize, size, fs->e2fs_fsmnt); + panic("ext2_alloc: bad size"); + } + if (cred == NOCRED) + panic("ext2_alloc: missing credential"); +#endif /* DIAGNOSTIC */ + if (size == fs->e2fs_bsize && fs->e2fs->e2fs_fbcount == 0) + goto nospace; + if (cred->cr_uid != 0 && + fs->e2fs->e2fs_fbcount < fs->e2fs->e2fs_rbcount) + goto nospace; + if (bpref >= fs->e2fs->e2fs_bcount) + bpref = 0; + if (bpref == 0) + cg = ino_to_cg(fs, ip->i_number); + else + cg = dtog(fs, bpref); + bno = (daddr_t)ext2_hashalloc(ip, cg, bpref, fs->e2fs_bsize, + ext2_alloccg); + if (bno > 0) { + ip->i_blocks += btodb(fs->e2fs_bsize); + ip->i_flag |= IN_CHANGE | IN_UPDATE; + *bnp = bno; + return (0); + } +nospace: + EXT2_UNLOCK(ump); + ext2_fserr(fs, cred->cr_uid, "file system full"); + uprintf("\n%s: write failed, file system is full\n", fs->e2fs_fsmnt); + return (ENOSPC); +} + +/* + * Reallocate a sequence of blocks into a contiguous sequence of blocks. + * + * The vnode and an array of buffer pointers for a range of sequential + * logical blocks to be made contiguous is given. The allocator attempts + * to find a range of sequential blocks starting as close as possible to + * an fs_rotdelay offset from the end of the allocation for the logical + * block immediately preceding the current range. If successful, the + * physical block numbers in the buffer pointers and in the inode are + * changed to reflect the new allocation. If unsuccessful, the allocation + * is left unchanged. The success in doing the reallocation is returned. + * Note that the error return is not reflected back to the user. Rather + * the previous block allocation will be used. + */ + +#ifdef FANCY_REALLOC +#include <sys/sysctl.h> +static int doasyncfree = 1; +static int doreallocblks = 1; + +#ifdef OPT_DEBUG +SYSCTL_INT(_debug, 14, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, ""); +#endif /* OPT_DEBUG */ +#endif + +int +ext2_reallocblks(ap) + struct vop_reallocblks_args /* { + struct vnode *a_vp; + struct cluster_save *a_buflist; + } */ *ap; +{ +#ifndef FANCY_REALLOC +/* printf("ext2_reallocblks not implemented\n"); */ +return ENOSPC; +#else + + struct m_ext2fs *fs; + struct inode *ip; + struct vnode *vp; + struct buf *sbp, *ebp; + int32_t *bap, *sbap, *ebap = 0; + struct ext2mount *ump; + struct cluster_save *buflist; + struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; + int32_t start_lbn, end_lbn, soff, newblk, blkno =0; + int i, len, start_lvl, end_lvl, pref, ssize; + + vp = ap->a_vp; + ip = VTOI(vp); + fs = ip->i_e2fs; + ump = ip->i_ump; +#ifdef UNKLAR + if (fs->fs_contigsumsize <= 0) + return (ENOSPC); +#endif + buflist = ap->a_buflist; + len = buflist->bs_nchildren; + start_lbn = buflist->bs_children[0]->b_lblkno; + end_lbn = start_lbn + len - 1; +#ifdef DIAGNOSTIC + for (i = 1; i < len; i++) + if (buflist->bs_children[i]->b_lblkno != start_lbn + i) + panic("ext2_reallocblks: non-cluster"); +#endif + /* + * If the latest allocation is in a new cylinder group, assume that + * the filesystem has decided to move and do not force it back to + * the previous cylinder group. + */ + if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != + dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) + return (ENOSPC); + if (ext2_getlbns(vp, start_lbn, start_ap, &start_lvl) || + ext2_getlbns(vp, end_lbn, end_ap, &end_lvl)) + return (ENOSPC); + /* + * Get the starting offset and block map for the first block. + */ + if (start_lvl == 0) { + sbap = &ip->i_db[0]; + soff = start_lbn; + } else { + idp = &start_ap[start_lvl - 1]; + if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &sbp)) { + brelse(sbp); + return (ENOSPC); + } + sbap = (int32_t *)sbp->b_data; + soff = idp->in_off; + } + /* + * Find the preferred location for the cluster. + */ + EXT2_LOCK(ump); + pref = ext2_blkpref(ip, start_lbn, soff, sbap, blkno); + /* + * If the block range spans two block maps, get the second map. + */ + if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { + ssize = len; + } else { +#ifdef DIAGNOSTIC + if (start_ap[start_lvl-1].in_lbn == idp->in_lbn) + panic("ext2_reallocblk: start == end"); +#endif + ssize = len - (idp->in_off + 1); + if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &ebp)){ + EXT2_UNLOCK(ump); + goto fail; + } + ebap = (int32_t *)ebp->b_data; + } + /* + * Search the block map looking for an allocation of the desired size. + */ + if ((newblk = (int32_t)ext2_hashalloc(ip, dtog(fs, pref), pref, + len, ext2_clusteralloc)) == 0){ + EXT2_UNLOCK(ump); + goto fail; + } + /* + * We have found a new contiguous block. + * + * First we have to replace the old block pointers with the new + * block pointers in the inode and indirect blocks associated + * with the file. + */ + blkno = newblk; + for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->e2fs_fpb) { + if (i == ssize) + bap = ebap; + soff = -i; +#ifdef DIAGNOSTIC + if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap)) + panic("ext2_reallocblks: alloc mismatch"); +#endif + *bap++ = blkno; + } + /* + * Next we must write out the modified inode and indirect blocks. + * For strict correctness, the writes should be synchronous since + * the old block values may have been written to disk. In practise + * they are almost never written, but if we are concerned about + * strict correctness, the `doasyncfree' flag should be set to zero. + * + * The test on `doasyncfree' should be changed to test a flag + * that shows whether the associated buffers and inodes have + * been written. The flag should be set when the cluster is + * started and cleared whenever the buffer or inode is flushed. + * We can then check below to see if it is set, and do the + * synchronous write only when it has been cleared. + */ + if (sbap != &ip->i_db[0]) { + if (doasyncfree) + bdwrite(sbp); + else + bwrite(sbp); + } else { + ip->i_flag |= IN_CHANGE | IN_UPDATE; + if (!doasyncfree) + ext2_update(vp, 1); + } + if (ssize < len) { + if (doasyncfree) + bdwrite(ebp); + else + bwrite(ebp); + } + /* + * Last, free the old blocks and assign the new blocks to the buffers. + */ + for (blkno = newblk, i = 0; i < len; i++, blkno += fs->e2fs_fpb) { + ext2_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno), + fs->e2fs_bsize); + buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); + } + return (0); + +fail: + if (ssize < len) + brelse(ebp); + if (sbap != &ip->i_db[0]) + brelse(sbp); + return (ENOSPC); + +#endif /* FANCY_REALLOC */ +} + +/* + * Allocate an inode in the file system. + * + */ +int +ext2_valloc(pvp, mode, cred, vpp) + struct vnode *pvp; + int mode; + struct ucred *cred; + struct vnode **vpp; +{ + struct inode *pip; + struct m_ext2fs *fs; + struct inode *ip; + struct ext2mount *ump; + ino_t ino, ipref; + int i, error, cg; + + *vpp = NULL; + pip = VTOI(pvp); + fs = pip->i_e2fs; + ump = pip->i_ump; + + EXT2_LOCK(ump); + if (fs->e2fs->e2fs_ficount == 0) + goto noinodes; + /* + * If it is a directory then obtain a cylinder group based on + * ext2_dirpref else obtain it using ino_to_cg. The preferred inode is + * always the next inode. + */ + if((mode & IFMT) == IFDIR) { + cg = ext2_dirpref(pip); + if (fs->e2fs_contigdirs[cg] < 255) + fs->e2fs_contigdirs[cg]++; + } else { + cg = ino_to_cg(fs, pip->i_number); + if (fs->e2fs_contigdirs[cg] > 0) + fs->e2fs_contigdirs[cg]--; + } + ipref = cg * fs->e2fs->e2fs_ipg + 1; + ino = (ino_t)ext2_hashalloc(pip, cg, (long)ipref, mode, ext2_nodealloccg); + + if (ino == 0) + goto noinodes; + error = VFS_VGET(pvp->v_mount, ino, LK_EXCLUSIVE, vpp); + if (error) { + ext2_vfree(pvp, ino, mode); + return (error); + } + ip = VTOI(*vpp); + + /* + the question is whether using VGET was such good idea at all - + Linux doesn't read the old inode in when it's allocating a + new one. I will set at least i_size & i_blocks the zero. + */ + ip->i_mode = 0; + ip->i_size = 0; + ip->i_blocks = 0; + ip->i_flags = 0; + /* now we want to make sure that the block pointers are zeroed out */ + for (i = 0; i < NDADDR; i++) + ip->i_db[i] = 0; + for (i = 0; i < NIADDR; i++) + ip->i_ib[i] = 0; + + /* + * Set up a new generation number for this inode. + * XXX check if this makes sense in ext2 + */ + if (ip->i_gen == 0 || ++ip->i_gen == 0) + ip->i_gen = random() / 2 + 1; +/* +printf("ext2_valloc: allocated inode %d\n", ino); +*/ + return (0); +noinodes: + EXT2_UNLOCK(ump); + ext2_fserr(fs, cred->cr_uid, "out of inodes"); + uprintf("\n%s: create/symlink failed, no inodes free\n", fs->e2fs_fsmnt); + return (ENOSPC); +} + +/* + * Find a cylinder to place a directory. + * + * The policy implemented by this algorithm is to allocate a + * directory inode in the same cylinder group as its parent + * directory, but also to reserve space for its files inodes + * and data. Restrict the number of directories which may be + * allocated one after another in the same cylinder group + * without intervening allocation of files. + * + * If we allocate a first level directory then force allocation + * in another cylinder group. + * + */ +static u_long +ext2_dirpref(struct inode *pip) +{ + struct m_ext2fs *fs; + int cg, prefcg, dirsize, cgsize; + int avgifree, avgbfree, avgndir, curdirsize; + int minifree, minbfree, maxndir; + int mincg, minndir; + int maxcontigdirs; + + mtx_assert(EXT2_MTX(pip->i_ump), MA_OWNED); + fs = pip->i_e2fs; + + avgifree = fs->e2fs->e2fs_ficount / fs->e2fs_gcount; + avgbfree = fs->e2fs->e2fs_fbcount / fs->e2fs_gcount; + avgndir = fs->e2fs_total_dir / fs->e2fs_gcount; + + /* + * Force allocation in another cg if creating a first level dir. + */ + ASSERT_VOP_LOCKED(ITOV(pip), "ext2fs_dirpref"); + if (ITOV(pip)->v_vflag & VV_ROOT) { + prefcg = arc4random() % fs->e2fs_gcount; + mincg = prefcg; + minndir = fs->e2fs_ipg; + for (cg = prefcg; cg < fs->e2fs_gcount; cg++) + if (fs->e2fs_gd[cg].ext2bgd_ndirs < minndir && + fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree && + fs->e2fs_gd[cg].ext2bgd_nbfree >= avgbfree) { + mincg = cg; + minndir = fs->e2fs_gd[cg].ext2bgd_ndirs; + } + for (cg = 0; cg < prefcg; cg++) + if (fs->e2fs_gd[cg].ext2bgd_ndirs < minndir && + fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree && + fs->e2fs_gd[cg].ext2bgd_nbfree >= avgbfree) { + mincg = cg; + minndir = fs->e2fs_gd[cg].ext2bgd_ndirs; + } + + return (mincg); + } + + /* + * Count various limits which used for + * optimal allocation of a directory inode. + */ + maxndir = min(avgndir + fs->e2fs_ipg / 16, fs->e2fs_ipg); + minifree = avgifree - avgifree / 4; + if (minifree < 1) + minifree = 1; + minbfree = avgbfree - avgbfree / 4; + if (minbfree < 1) + minbfree = 1; + cgsize = fs->e2fs_fsize * fs->e2fs_fpg; + dirsize = AVGDIRSIZE; + curdirsize = avgndir ? (cgsize - avgbfree * fs->e2fs_bsize) / avgndir : 0; + if (dirsize < curdirsize) + dirsize = curdirsize; + if (dirsize <= 0) + maxcontigdirs = 0; /* dirsize overflowed */ + else + maxcontigdirs = min((avgbfree * fs->e2fs_bsize) / dirsize, 255); + maxcontigdirs = min(maxcontigdirs, fs->e2fs_ipg / AFPDIR); + if (maxcontigdirs == 0) + maxcontigdirs = 1; + + /* + * Limit number of dirs in one cg and reserve space for + * regular files, but only if we have no deficit in + * inodes or space. + */ + prefcg = ino_to_cg(fs, pip->i_number); + for (cg = prefcg; cg < fs->e2fs_gcount; cg++) + if (fs->e2fs_gd[cg].ext2bgd_ndirs < maxndir && + fs->e2fs_gd[cg].ext2bgd_nifree >= minifree && + fs->e2fs_gd[cg].ext2bgd_nbfree >= minbfree) { + if (fs->e2fs_contigdirs[cg] < maxcontigdirs) + return (cg); + } + for (cg = 0; cg < prefcg; cg++) + if (fs->e2fs_gd[cg].ext2bgd_ndirs < maxndir && + fs->e2fs_gd[cg].ext2bgd_nifree >= minifree && + fs->e2fs_gd[cg].ext2bgd_nbfree >= minbfree) { + if (fs->e2fs_contigdirs[cg] < maxcontigdirs) + return (cg); + } + /* + * This is a backstop when we have deficit in space. + */ + for (cg = prefcg; cg < fs->e2fs_gcount; cg++) + if (fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree) + return (cg); + for (cg = 0; cg < prefcg; cg++) + if (fs->e2fs_gd[cg].ext2bgd_nifree >= avgifree) + break; + return (cg); +} + +/* + * Select the desired position for the next block in a file. + * + * we try to mimic what Remy does in inode_getblk/block_getblk + * + * we note: blocknr == 0 means that we're about to allocate either + * a direct block or a pointer block at the first level of indirection + * (In other words, stuff that will go in i_db[] or i_ib[]) + * + * blocknr != 0 means that we're allocating a block that is none + * of the above. Then, blocknr tells us the number of the block + * that will hold the pointer + */ +int32_t +ext2_blkpref(ip, lbn, indx, bap, blocknr) + struct inode *ip; + int32_t lbn; + int indx; + int32_t *bap; + int32_t blocknr; +{ + int tmp; + mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED); + + /* if the next block is actually what we thought it is, + then set the goal to what we thought it should be + */ + if(ip->i_next_alloc_block == lbn && ip->i_next_alloc_goal != 0) + return ip->i_next_alloc_goal; + + /* now check whether we were provided with an array that basically + tells us previous blocks to which we want to stay closeby + */ + if(bap) + for (tmp = indx - 1; tmp >= 0; tmp--) + if (bap[tmp]) + return bap[tmp]; + + /* else let's fall back to the blocknr, or, if there is none, + follow the rule that a block should be allocated near its inode + */ + return blocknr ? blocknr : + (int32_t)(ip->i_block_group * + EXT2_BLOCKS_PER_GROUP(ip->i_e2fs)) + + ip->i_e2fs->e2fs->e2fs_first_dblock; +} + +/* + * Implement the cylinder overflow algorithm. + * + * The policy implemented by this algorithm is: + * 1) allocate the block in its requested cylinder group. + * 2) quadradically rehash on the cylinder group number. + * 3) brute force search for a free block. + */ +static u_long +ext2_hashalloc(struct inode *ip, int cg, long pref, int size, + daddr_t (*allocator)(struct inode *, int, daddr_t, int)) +{ + struct m_ext2fs *fs; + ino_t result; + int i, icg = cg; + + mtx_assert(EXT2_MTX(ip->i_ump), MA_OWNED); + fs = ip->i_e2fs; + /* + * 1: preferred cylinder group + */ + result = (*allocator)(ip, cg, pref, size); + if (result) + return (result); + /* + * 2: quadratic rehash + */ + for (i = 1; i < fs->e2fs_gcount; i *= 2) { + cg += i; + if (cg >= fs->e2fs_gcount) + cg -= fs->e2fs_gcount; + result = (*allocator)(ip, cg, 0, size); + if (result) + return (result); + } + /* + * 3: brute force search + * Note that we start at i == 2, since 0 was checked initially, + * and 1 is always checked in the quadratic rehash. + */ + cg = (icg + 2) % fs->e2fs_gcount; + for (i = 2; i < fs->e2fs_gcount; i++) { + result = (*allocator)(ip, cg, 0, size); + if (result) + return (result); + cg++; + if (cg == fs->e2fs_gcount) + cg = 0; + } + return (0); +} + +/* + * Determine whether a block can be allocated. + * + * Check to see if a block of the appropriate size is available, + * and if it is, allocate it. + */ +static daddr_t +ext2_alloccg(struct inode *ip, int cg, daddr_t bpref, int size) +{ + struct m_ext2fs *fs; + struct buf *bp; + struct ext2mount *ump; + int error, bno, start, end, loc; + char *bbp; + /* XXX ondisk32 */ + fs = ip->i_e2fs; + ump = ip->i_ump; + if (fs->e2fs_gd[cg].ext2bgd_nbfree == 0) + return (0); + EXT2_UNLOCK(ump); + error = bread(ip->i_devvp, fsbtodb(fs, + fs->e2fs_gd[cg].ext2bgd_b_bitmap), + (int)fs->e2fs_bsize, NOCRED, &bp); + if (error) { + brelse(bp); + EXT2_LOCK(ump); + return (0); + } + bbp = (char *)bp->b_data; + + if (dtog(fs, bpref) != cg) + bpref = 0; + if (bpref != 0) { + bpref = dtogd(fs, bpref); + /* + * if the requested block is available, use it + */ + if (isclr(bbp, bpref)) { + bno = bpref; + goto gotit; + } + } + /* + * no blocks in the requested cylinder, so take next + * available one in this cylinder group. + * first try to get 8 contigous blocks, then fall back to a single + * block. + */ + if (bpref) + start = dtogd(fs, bpref) / NBBY; + else + start = 0; + end = howmany(fs->e2fs->e2fs_fpg, NBBY) - start; + for (loc = start; loc < end; loc++) { + if (bbp[loc] == 0) { + bno = loc * NBBY; + goto gotit; + } + } + for (loc = 0; loc < start; loc++) { + if (bbp[loc] == 0) { + bno = loc * NBBY; + goto gotit; + } + } + + bno = ext2_mapsearch(fs, bbp, bpref); + if (bno < 0){ + brelse(bp); + EXT2_LOCK(ump); + return (0); + } +gotit: +#ifdef DIAGNOSTIC + if (isset(bbp, (daddr_t)bno)) { + printf("ext2fs_alloccgblk: cg=%d bno=%d fs=%s\n", + cg, bno, fs->e2fs_fsmnt); + panic("ext2fs_alloccg: dup alloc"); + } +#endif + setbit(bbp, (daddr_t)bno); + EXT2_LOCK(ump); + fs->e2fs->e2fs_fbcount--; + fs->e2fs_gd[cg].ext2bgd_nbfree--; + fs->e2fs_fmod = 1; + EXT2_UNLOCK(ump); + bdwrite(bp); + return (cg * fs->e2fs->e2fs_fpg + fs->e2fs->e2fs_first_dblock + bno); +} + +/* + * Determine whether an inode can be allocated. + * + * Check to see if an inode is available, and if it is, + * allocate it using tode in the specified cylinder group. + */ +static daddr_t +ext2_nodealloccg(struct inode *ip, int cg, daddr_t ipref, int mode) +{ + struct m_ext2fs *fs; + struct buf *bp; + struct ext2mount *ump; + int error, start, len, loc, map, i; + char *ibp; + ipref--; /* to avoid a lot of (ipref -1) */ + if (ipref == -1) + ipref = 0; + fs = ip->i_e2fs; + ump = ip->i_ump; + if (fs->e2fs_gd[cg].ext2bgd_nifree == 0) + return (0); + EXT2_UNLOCK(ump); + error = bread(ip->i_devvp, fsbtodb(fs, + fs->e2fs_gd[cg].ext2bgd_i_bitmap), + (int)fs->e2fs_bsize, NOCRED, &bp); + if (error) { + brelse(bp); + EXT2_LOCK(ump); + return (0); + } + ibp = (char *)bp->b_data; + if (ipref) { + ipref %= fs->e2fs->e2fs_ipg; + if (isclr(ibp, ipref)) + goto gotit; + } + start = ipref / NBBY; + len = howmany(fs->e2fs->e2fs_ipg - ipref, NBBY); + loc = skpc(0xff, len, &ibp[start]); + if (loc == 0) { + len = start + 1; + start = 0; + loc = skpc(0xff, len, &ibp[0]); + if (loc == 0) { + printf("cg = %d, ipref = %lld, fs = %s\n", + cg, (long long)ipref, fs->e2fs_fsmnt); + panic("ext2fs_nodealloccg: map corrupted"); + /* NOTREACHED */ + } + } + i = start + len - loc; + map = ibp[i]; + ipref = i * NBBY; + for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) { + if ((map & i) == 0) { + goto gotit; + } + } + printf("fs = %s\n", fs->e2fs_fsmnt); + panic("ext2fs_nodealloccg: block not in map"); + /* NOTREACHED */ +gotit: + setbit(ibp, ipref); + EXT2_LOCK(ump); + fs->e2fs_gd[cg].ext2bgd_nifree--; + fs->e2fs->e2fs_ficount--; + fs->e2fs_fmod = 1; + if ((mode & IFMT) == IFDIR) { + fs->e2fs_gd[cg].ext2bgd_ndirs++; + fs->e2fs_total_dir++; + } + EXT2_UNLOCK(ump); + bdwrite(bp); + return (cg * fs->e2fs->e2fs_ipg + ipref +1); +} + +/* + * Free a block or fragment. + * + */ +void +ext2_blkfree(ip, bno, size) + struct inode *ip; + int32_t bno; + long size; +{ + struct m_ext2fs *fs; + struct buf *bp; + struct ext2mount *ump; + int cg, error; + char *bbp; + + fs = ip->i_e2fs; + ump = ip->i_ump; + cg = dtog(fs, bno); + if ((u_int)bno >= fs->e2fs->e2fs_bcount) { + printf("bad block %lld, ino %llu\n", (long long)bno, + (unsigned long long)ip->i_number); + ext2_fserr(fs, ip->i_uid, "bad block"); + return; + } + error = bread(ip->i_devvp, + fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap), + (int)fs->e2fs_bsize, NOCRED, &bp); + if (error) { + brelse(bp); + return; + } + bbp = (char *)bp->b_data; + bno = dtogd(fs, bno); + if (isclr(bbp, bno)) { + printf("block = %lld, fs = %s\n", + (long long)bno, fs->e2fs_fsmnt); + panic("blkfree: freeing free block"); + } + clrbit(bbp, bno); + EXT2_LOCK(ump); + fs->e2fs->e2fs_fbcount++; + fs->e2fs_gd[cg].ext2bgd_nbfree++; + fs->e2fs_fmod = 1; + EXT2_UNLOCK(ump); + bdwrite(bp); +} + +/* + * Free an inode. + * + */ +int +ext2_vfree(pvp, ino, mode) + struct vnode *pvp; + ino_t ino; + int mode; +{ + struct m_ext2fs *fs; + struct inode *pip; + struct buf *bp; + struct ext2mount *ump; + int error, cg; + char * ibp; +/* mode_t save_i_mode; */ + + pip = VTOI(pvp); + fs = pip->i_e2fs; + ump = pip->i_ump; + if ((u_int)ino > fs->e2fs_ipg * fs->e2fs_gcount) + panic("ext2_vfree: range: devvp = %p, ino = %d, fs = %s", + pip->i_devvp, ino, fs->e2fs_fsmnt); + + cg = ino_to_cg(fs, ino); + error = bread(pip->i_devvp, + fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_i_bitmap), + (int)fs->e2fs_bsize, NOCRED, &bp); + if (error) { + brelse(bp); + return (0); + } + ibp = (char *)bp->b_data; + ino = (ino - 1) % fs->e2fs->e2fs_ipg; + if (isclr(ibp, ino)) { + printf("ino = %llu, fs = %s\n", + (unsigned long long)ino, fs->e2fs_fsmnt); + if (fs->e2fs_ronly == 0) + panic("ifree: freeing free inode"); + } + clrbit(ibp, ino); + EXT2_LOCK(ump); + fs->e2fs->e2fs_ficount++; + fs->e2fs_gd[cg].ext2bgd_nifree++; + if ((mode & IFMT) == IFDIR) { + fs->e2fs_gd[cg].ext2bgd_ndirs--; + fs->e2fs_total_dir--; + } + fs->e2fs_fmod = 1; + EXT2_UNLOCK(ump); + bdwrite(bp); + return (0); +} + +/* + * Find a block in the specified cylinder group. + * + * It is a panic if a request is made to find a block if none are + * available. + */ +static daddr_t +ext2_mapsearch(struct m_ext2fs *fs, char *bbp, daddr_t bpref) +{ + daddr_t bno; + int start, len, loc, i, map; + + /* + * find the fragment by searching through the free block + * map for an appropriate bit pattern + */ + if (bpref) + start = dtogd(fs, bpref) / NBBY; + else + start = 0; + len = howmany(fs->e2fs->e2fs_fpg, NBBY) - start; + loc = skpc(0xff, len, &bbp[start]); + if (loc == 0) { + len = start + 1; + start = 0; + loc = skpc(0xff, len, &bbp[start]); + if (loc == 0) { + printf("start = %d, len = %d, fs = %s\n", + start, len, fs->e2fs_fsmnt); + panic("ext2fs_alloccg: map corrupted"); + /* NOTREACHED */ + } + } + i = start + len - loc; + map = bbp[i]; + bno = i * NBBY; + for (i = 1; i < (1 << NBBY); i <<= 1, bno++) { + if ((map & i) == 0) + return (bno); + } + printf("fs = %s\n", fs->e2fs_fsmnt); + panic("ext2fs_mapsearch: block not in map"); + /* NOTREACHED */ +} + +/* + * Fserr prints the name of a file system with an error diagnostic. + * + * The form of the error message is: + * fs: error message + */ +static void +ext2_fserr(fs, uid, cp) + struct m_ext2fs *fs; + uid_t uid; + char *cp; +{ + + log(LOG_ERR, "uid %u on %s: %s\n", uid, fs->e2fs_fsmnt, cp); +} + +int +cg_has_sb(int i) +{ + int a3, a5, a7; + + if (i == 0 || i == 1) + return 1; + for (a3 = 3, a5 = 5, a7 = 7; + a3 <= i || a5 <= i || a7 <= i; + a3 *= 3, a5 *= 5, a7 *= 7) + if (i == a3 || i == a5 || i == a7) + return 1; + return 0; +} diff --git a/sys/fs/ext2fs/ext2_balloc.c b/sys/fs/ext2fs/ext2_balloc.c new file mode 100644 index 0000000..124ac32 --- /dev/null +++ b/sys/fs/ext2fs/ext2_balloc.c @@ -0,0 +1,292 @@ +/*- + * modified for Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + */ +/*- + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ffs_balloc.c 8.4 (Berkeley) 9/23/93 + * $FreeBSD$ + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/bio.h> +#include <sys/buf.h> +#include <sys/lock.h> +#include <sys/ucred.h> +#include <sys/vnode.h> + +#include <fs/ext2fs/inode.h> +#include <fs/ext2fs/ext2fs.h> +#include <fs/ext2fs/fs.h> +#include <fs/ext2fs/ext2_extern.h> +#include <fs/ext2fs/ext2_mount.h> +/* + * Balloc defines the structure of file system storage + * by allocating the physical blocks on a device given + * the inode and the logical block number in a file. + */ +int +ext2_balloc(ip, lbn, size, cred, bpp, flags) + struct inode *ip; + int32_t lbn; + int size; + struct ucred *cred; + struct buf **bpp; + int flags; +{ + struct m_ext2fs *fs; + struct ext2mount *ump; + int32_t nb; + struct buf *bp, *nbp; + struct vnode *vp = ITOV(ip); + struct indir indirs[NIADDR + 2]; + int32_t newb, *bap, pref; + int osize, nsize, num, i, error; + + *bpp = NULL; + if (lbn < 0) + return (EFBIG); + fs = ip->i_e2fs; + ump = ip->i_ump; + + /* + * check if this is a sequential block allocation. + * If so, increment next_alloc fields to allow ext2_blkpref + * to make a good guess + */ + if (lbn == ip->i_next_alloc_block + 1) { + ip->i_next_alloc_block++; + ip->i_next_alloc_goal++; + } + + /* + * The first NDADDR blocks are direct blocks + */ + if (lbn < NDADDR) { + nb = ip->i_db[lbn]; + /* no new block is to be allocated, and no need to expand + the file */ + if (nb != 0 && ip->i_size >= (lbn + 1) * fs->e2fs_bsize) { + error = bread(vp, lbn, fs->e2fs_bsize, NOCRED, &bp); + if (error) { + brelse(bp); + return (error); + } + bp->b_blkno = fsbtodb(fs, nb); + *bpp = bp; + return (0); + } + if (nb != 0) { + /* + * Consider need to reallocate a fragment. + */ + osize = fragroundup(fs, blkoff(fs, ip->i_size)); + nsize = fragroundup(fs, size); + if (nsize <= osize) { + error = bread(vp, lbn, osize, NOCRED, &bp); + if (error) { + brelse(bp); + return (error); + } + bp->b_blkno = fsbtodb(fs, nb); + } else { + /* Godmar thinks: this shouldn't happen w/o fragments */ + printf("nsize %d(%d) > osize %d(%d) nb %d\n", + (int)nsize, (int)size, (int)osize, + (int)ip->i_size, (int)nb); + panic( + "ext2_balloc: Something is terribly wrong"); +/* + * please note there haven't been any changes from here on - + * FFS seems to work. + */ + } + } else { + if (ip->i_size < (lbn + 1) * fs->e2fs_bsize) + nsize = fragroundup(fs, size); + else + nsize = fs->e2fs_bsize; + EXT2_LOCK(ump); + error = ext2_alloc(ip, lbn, + ext2_blkpref(ip, lbn, (int)lbn, &ip->i_db[0], 0), + nsize, cred, &newb); + if (error) + return (error); + bp = getblk(vp, lbn, nsize, 0, 0, 0); + bp->b_blkno = fsbtodb(fs, newb); + if (flags & B_CLRBUF) + vfs_bio_clrbuf(bp); + } + ip->i_db[lbn] = dbtofsb(fs, bp->b_blkno); + ip->i_flag |= IN_CHANGE | IN_UPDATE; + *bpp = bp; + return (0); + } + /* + * Determine the number of levels of indirection. + */ + pref = 0; + if ((error = ext2_getlbns(vp, lbn, indirs, &num)) != 0) + return(error); +#ifdef DIAGNOSTIC + if (num < 1) + panic ("ext2_balloc: ext2_getlbns returned indirect block"); +#endif + /* + * Fetch the first indirect block allocating if necessary. + */ + --num; + nb = ip->i_ib[indirs[0].in_off]; + if (nb == 0) { + EXT2_LOCK(ump); + pref = ext2_blkpref(ip, lbn, indirs[0].in_off + + EXT2_NDIR_BLOCKS, &ip->i_db[0], 0); + if ((error = ext2_alloc(ip, lbn, pref, + (int)fs->e2fs_bsize, cred, &newb))) + return (error); + nb = newb; + bp = getblk(vp, indirs[1].in_lbn, fs->e2fs_bsize, 0, 0, 0); + bp->b_blkno = fsbtodb(fs, newb); + vfs_bio_clrbuf(bp); + /* + * Write synchronously so that indirect blocks + * never point at garbage. + */ + if ((error = bwrite(bp)) != 0) { + ext2_blkfree(ip, nb, fs->e2fs_bsize); + return (error); + } + ip->i_ib[indirs[0].in_off] = newb; + ip->i_flag |= IN_CHANGE | IN_UPDATE; + } + /* + * Fetch through the indirect blocks, allocating as necessary. + */ + for (i = 1;;) { + error = bread(vp, + indirs[i].in_lbn, (int)fs->e2fs_bsize, NOCRED, &bp); + if (error) { + brelse(bp); + return (error); + } + bap = (int32_t *)bp->b_data; + nb = bap[indirs[i].in_off]; + if (i == num) + break; + i += 1; + if (nb != 0) { + bqrelse(bp); + continue; + } + EXT2_LOCK(ump); + if (pref == 0) + pref = ext2_blkpref(ip, lbn, indirs[i].in_off, bap, + bp->b_lblkno); + error = ext2_alloc(ip, lbn, pref, (int)fs->e2fs_bsize, cred, &newb); + if (error) { + brelse(bp); + return (error); + } + nb = newb; + nbp = getblk(vp, indirs[i].in_lbn, fs->e2fs_bsize, 0, 0, 0); + nbp->b_blkno = fsbtodb(fs, nb); + vfs_bio_clrbuf(nbp); + /* + * Write synchronously so that indirect blocks + * never point at garbage. + */ + if ((error = bwrite(nbp)) != 0) { + ext2_blkfree(ip, nb, fs->e2fs_bsize); + EXT2_UNLOCK(ump); + brelse(bp); + return (error); + } + bap[indirs[i - 1].in_off] = nb; + /* + * If required, write synchronously, otherwise use + * delayed write. + */ + if (flags & B_SYNC) { + bwrite(bp); + } else { + if (bp->b_bufsize == fs->e2fs_bsize) + bp->b_flags |= B_CLUSTEROK; + bdwrite(bp); + } + } + /* + * Get the data block, allocating if necessary. + */ + if (nb == 0) { + EXT2_LOCK(ump); + pref = ext2_blkpref(ip, lbn, indirs[i].in_off, &bap[0], + bp->b_lblkno); + if ((error = ext2_alloc(ip, + lbn, pref, (int)fs->e2fs_bsize, cred, &newb)) != 0) { + brelse(bp); + return (error); + } + nb = newb; + nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0, 0); + nbp->b_blkno = fsbtodb(fs, nb); + if (flags & B_CLRBUF) + vfs_bio_clrbuf(nbp); + bap[indirs[i].in_off] = nb; + /* + * If required, write synchronously, otherwise use + * delayed write. + */ + if (flags & B_SYNC) { + bwrite(bp); + } else { + if (bp->b_bufsize == fs->e2fs_bsize) + bp->b_flags |= B_CLUSTEROK; + bdwrite(bp); + } + *bpp = nbp; + return (0); + } + brelse(bp); + if (flags & B_CLRBUF) { + error = bread(vp, lbn, (int)fs->e2fs_bsize, NOCRED, &nbp); + if (error) { + brelse(nbp); + return (error); + } + } else { + nbp = getblk(vp, lbn, fs->e2fs_bsize, 0, 0, 0); + nbp->b_blkno = fsbtodb(fs, nb); + } + *bpp = nbp; + return (0); +} + diff --git a/sys/fs/ext2fs/ext2_bmap.c b/sys/fs/ext2fs/ext2_bmap.c new file mode 100644 index 0000000..c742188 --- /dev/null +++ b/sys/fs/ext2fs/ext2_bmap.c @@ -0,0 +1,334 @@ +/*- + * Copyright (c) 1989, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ufs_bmap.c 8.7 (Berkeley) 3/21/95 + * $FreeBSD$ + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/bio.h> +#include <sys/buf.h> +#include <sys/proc.h> +#include <sys/vnode.h> +#include <sys/mount.h> +#include <sys/resourcevar.h> +#include <sys/stat.h> + +#include <fs/ext2fs/inode.h> +#include <fs/ext2fs/ext2fs.h> +#include <fs/ext2fs/ext2_mount.h> +#include <fs/ext2fs/ext2_extern.h> + +/* + * Bmap converts the logical block number of a file to its physical block + * number on the disk. The conversion is done by using the logical block + * number to index into the array of block pointers described by the dinode. + */ +int +ext2_bmap(ap) + struct vop_bmap_args /* { + struct vnode *a_vp; + daddr_t a_bn; + struct bufobj **a_bop; + daddr_t *a_bnp; + int *a_runp; + int *a_runb; + } */ *ap; +{ + int32_t blkno; + int error; + + /* + * Check for underlying vnode requests and ensure that logical + * to physical mapping is requested. + */ + if (ap->a_bop != NULL) + *ap->a_bop = &VTOI(ap->a_vp)->i_devvp->v_bufobj; + if (ap->a_bnp == NULL) + return (0); + + error = ext2_bmaparray(ap->a_vp, ap->a_bn, &blkno, + ap->a_runp, ap->a_runb); + *ap->a_bnp = blkno; + return (error); +} + +/* + * Indirect blocks are now on the vnode for the file. They are given negative + * logical block numbers. Indirect blocks are addressed by the negative + * address of the first data block to which they point. Double indirect blocks + * are addressed by one less than the address of the first indirect block to + * which they point. Triple indirect blocks are addressed by one less than + * the address of the first double indirect block to which they point. + * + * ext2_bmaparray does the bmap conversion, and if requested returns the + * array of logical blocks which must be traversed to get to a block. + * Each entry contains the offset into that block that gets you to the + * next block and the disk address of the block (if it is assigned). + */ + +int +ext2_bmaparray(vp, bn, bnp, runp, runb) + struct vnode *vp; + int32_t bn; + int32_t *bnp; + int *runp; + int *runb; +{ + struct inode *ip; + struct buf *bp; + struct ext2mount *ump; + struct mount *mp; + struct vnode *devvp; + struct indir a[NIADDR+1], *ap; + int32_t daddr; + long metalbn; + int error, num, maxrun = 0, bsize; + int *nump; + + ap = NULL; + ip = VTOI(vp); + mp = vp->v_mount; + ump = VFSTOEXT2(mp); + devvp = ump->um_devvp; + + bsize = EXT2_BLOCK_SIZE(ump->um_e2fs); + + if (runp) { + maxrun = mp->mnt_iosize_max / bsize - 1; + *runp = 0; + } + + if (runb) { + *runb = 0; + } + + + ap = a; + nump = # + error = ext2_getlbns(vp, bn, ap, nump); + if (error) + return (error); + + num = *nump; + if (num == 0) { + *bnp = blkptrtodb(ump, ip->i_db[bn]); + if (*bnp == 0) { + *bnp = -1; + } else if (runp) { + int32_t bnb = bn; + for (++bn; bn < NDADDR && *runp < maxrun && + is_sequential(ump, ip->i_db[bn - 1], ip->i_db[bn]); + ++bn, ++*runp); + bn = bnb; + if (runb && (bn > 0)) { + for (--bn; (bn >= 0) && (*runb < maxrun) && + is_sequential(ump, ip->i_db[bn], + ip->i_db[bn+1]); + --bn, ++*runb); + } + } + return (0); + } + + + /* Get disk address out of indirect block array */ + daddr = ip->i_ib[ap->in_off]; + + for (bp = NULL, ++ap; --num; ++ap) { + /* + * Exit the loop if there is no disk address assigned yet and + * the indirect block isn't in the cache, or if we were + * looking for an indirect block and we've found it. + */ + + metalbn = ap->in_lbn; + if ((daddr == 0 && !incore(&vp->v_bufobj, metalbn)) || metalbn == bn) + break; + /* + * If we get here, we've either got the block in the cache + * or we have a disk address for it, go fetch it. + */ + if (bp) + bqrelse(bp); + + ap->in_exists = 1; + bp = getblk(vp, metalbn, bsize, 0, 0, 0); + if ((bp->b_flags & B_CACHE) == 0) { +#ifdef DIAGNOSTIC + if (!daddr) + panic("ufs_bmaparray: indirect block not in cache"); +#endif + bp->b_blkno = blkptrtodb(ump, daddr); + bp->b_iocmd = BIO_READ; + bp->b_flags &= ~B_INVAL; + bp->b_ioflags &= ~BIO_ERROR; + vfs_busy_pages(bp, 0); + bp->b_iooffset = dbtob(bp->b_blkno); + bstrategy(bp); + curthread->td_ru.ru_inblock++; + error = bufwait(bp); + if (error) { + brelse(bp); + return (error); + } + } + + daddr = ((int32_t *)bp->b_data)[ap->in_off]; + if (num == 1 && daddr && runp) { + for (bn = ap->in_off + 1; + bn < MNINDIR(ump) && *runp < maxrun && + is_sequential(ump, + ((int32_t *)bp->b_data)[bn - 1], + ((int32_t *)bp->b_data)[bn]); + ++bn, ++*runp); + bn = ap->in_off; + if (runb && bn) { + for(--bn; bn >= 0 && *runb < maxrun && + is_sequential(ump, ((int32_t *)bp->b_data)[bn], + ((int32_t *)bp->b_data)[bn+1]); + --bn, ++*runb); + } + } + } + if (bp) + bqrelse(bp); + + /* + * Since this is FFS independent code, we are out of scope for the + * definitions of BLK_NOCOPY and BLK_SNAP, but we do know that they + * will fall in the range 1..um_seqinc, so we use that test and + * return a request for a zeroed out buffer if attempts are made + * to read a BLK_NOCOPY or BLK_SNAP block. + */ + if ((ip->i_flags & SF_SNAPSHOT) && daddr > 0 && daddr < ump->um_seqinc){ + *bnp = -1; + return (0); + } + *bnp = blkptrtodb(ump, daddr); + if (*bnp == 0) { + *bnp = -1; + } + return (0); +} + +/* + * Create an array of logical block number/offset pairs which represent the + * path of indirect blocks required to access a data block. The first "pair" + * contains the logical block number of the appropriate single, double or + * triple indirect block and the offset into the inode indirect block array. + * Note, the logical block number of the inode single/double/triple indirect + * block appears twice in the array, once with the offset into the i_ib and + * once with the offset into the page itself. + */ +int +ext2_getlbns(vp, bn, ap, nump) + struct vnode *vp; + int32_t bn; + struct indir *ap; + int *nump; +{ + long blockcnt, metalbn, realbn; + struct ext2mount *ump; + int i, numlevels, off; + int64_t qblockcnt; + + ump = VFSTOEXT2(vp->v_mount); + if (nump) + *nump = 0; + numlevels = 0; + realbn = bn; + if ((long)bn < 0) + bn = -(long)bn; + + /* The first NDADDR blocks are direct blocks. */ + if (bn < NDADDR) + return (0); + + /* + * Determine the number of levels of indirection. After this loop + * is done, blockcnt indicates the number of data blocks possible + * at the previous level of indirection, and NIADDR - i is the number + * of levels of indirection needed to locate the requested block. + */ + for (blockcnt = 1, i = NIADDR, bn -= NDADDR;; i--, bn -= blockcnt) { + if (i == 0) + return (EFBIG); + /* + * Use int64_t's here to avoid overflow for triple indirect + * blocks when longs have 32 bits and the block size is more + * than 4K. + */ + qblockcnt = (int64_t)blockcnt * MNINDIR(ump); + if (bn < qblockcnt) + break; + blockcnt = qblockcnt; + } + + /* Calculate the address of the first meta-block. */ + if (realbn >= 0) + metalbn = -(realbn - bn + NIADDR - i); + else + metalbn = -(-realbn - bn + NIADDR - i); + + /* + * At each iteration, off is the offset into the bap array which is + * an array of disk addresses at the current level of indirection. + * The logical block number and the offset in that block are stored + * into the argument array. + */ + ap->in_lbn = metalbn; + ap->in_off = off = NIADDR - i; + ap->in_exists = 0; + ap++; + for (++numlevels; i <= NIADDR; i++) { + /* If searching for a meta-data block, quit when found. */ + if (metalbn == realbn) + break; + + off = (bn / blockcnt) % MNINDIR(ump); + + ++numlevels; + ap->in_lbn = metalbn; + ap->in_off = off; + ap->in_exists = 0; + ++ap; + + metalbn -= -1 + off * blockcnt; + blockcnt /= MNINDIR(ump); + } + if (nump) + *nump = numlevels; + return (0); +} diff --git a/sys/fs/ext2fs/ext2_dinode.h b/sys/fs/ext2fs/ext2_dinode.h new file mode 100755 index 0000000..3e45060 --- /dev/null +++ b/sys/fs/ext2fs/ext2_dinode.h @@ -0,0 +1,78 @@ +/*- + * Copyright (c) 2009 Aditya Sarawgi + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _FS_EXT2FS_EXT2_DINODE_H_ +#define _FS_EXT2FS_EXT2_DINODE_H_ + +#define e2di_size_high e2di_dacl + +/* + * Inode flags + * The current implementation uses only EXT2_IMMUTABLE and EXT2_APPEND flags + */ +#define EXT2_SECRM 0x00000001 /* Secure deletion */ +#define EXT2_UNRM 0x00000002 /* Undelete */ +#define EXT2_COMPR 0x00000004 /* Compress file */ +#define EXT2_SYNC 0x00000008 /* Synchronous updates */ +#define EXT2_IMMUTABLE 0x00000010 /* Immutable file */ +#define EXT2_APPEND 0x00000020 /* writes to file may only append */ +#define EXT2_NODUMP 0x00000040 /* do not dump file */ +#define EXT2_NOATIME 0x00000080 /* do not update atime */ + + +/* + * Structure of an inode on the disk + */ +struct ext2fs_dinode { + u_int16_t e2di_mode; /* 0: IFMT, permissions; see below. */ + u_int16_t e2di_uid; /* 2: Owner UID */ + u_int32_t e2di_size; /* 4: Size (in bytes) */ + u_int32_t e2di_atime; /* 8: Access time */ + u_int32_t e2di_ctime; /* 12: Create time */ + u_int32_t e2di_mtime; /* 16: Modification time */ + u_int32_t e2di_dtime; /* 20: Deletion time */ + u_int16_t e2di_gid; /* 24: Owner GID */ + u_int16_t e2di_nlink; /* 26: File link count */ + u_int32_t e2di_nblock; /* 28: Blocks count */ + u_int32_t e2di_flags; /* 32: Status flags (chflags) */ + u_int32_t e2di_linux_reserved1; /* 36 */ + u_int32_t e2di_blocks[EXT2_N_BLOCKS]; /* 40: disk blocks */ + u_int32_t e2di_gen; /* 100: generation number */ + u_int32_t e2di_facl; /* 104: file ACL (not implemented) */ + u_int32_t e2di_dacl; /* 108: dir ACL (not implemented) */ + u_int32_t e2di_faddr; /* 112: fragment address */ + u_int8_t e2di_nfrag; /* 116: fragment number */ + u_int8_t e2di_fsize; /* 117: fragment size */ + u_int16_t e2di_linux_reserved2; /* 118 */ + u_int16_t e2di_uid_high; /* 120: Owner UID top 16 bits */ + u_int16_t e2di_gid_high; /* 122: Owner GID top 16 bits */ + u_int32_t e2di_linux_reserved3; /* 124 */ +}; + +#endif /* _FS_EXT2FS_EXT2_DINODE_H_ */ + diff --git a/sys/fs/ext2fs/ext2_dir.h b/sys/fs/ext2fs/ext2_dir.h new file mode 100755 index 0000000..0676268 --- /dev/null +++ b/sys/fs/ext2fs/ext2_dir.h @@ -0,0 +1,81 @@ +/*- + * Copyright (c) 2009 Aditya Sarawgi + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _FS_EXT2FS_EXT2_DIR_H_ +#define _FS_EXT2FS_EXT2_DIR_H_ + +/* + * Structure of a directory entry + */ +#define EXT2FS_MAXNAMLEN 255 + +struct ext2fs_direct { + u_int32_t e2d_ino; /* inode number of entry */ + u_int16_t e2d_reclen; /* length of this record */ + u_int16_t e2d_namlen; /* length of string in d_name */ + char e2d_name[EXT2FS_MAXNAMLEN];/* name with length<=EXT2FS_MAXNAMLEN */ +}; +/* + * The new version of the directory entry. Since EXT2 structures are + * stored in intel byte order, and the name_len field could never be + * bigger than 255 chars, it's safe to reclaim the extra byte for the + * file_type field. + */ +struct ext2fs_direct_2 { + u_int32_t e2d_ino; /* inode number of entry */ + u_int16_t e2d_reclen; /* length of this record */ + u_int8_t e2d_namlen; /* length of string in d_name */ + u_int8_t e2d_type; /* file type */ + char e2d_name[EXT2FS_MAXNAMLEN];/* name with length<=EXT2FS_MAXNAMLEN */ +}; +/* + * Ext2 directory file types. Only the low 3 bits are used. The + * other bits are reserved for now. + */ +#define EXT2_FT_UNKNOWN 0 +#define EXT2_FT_REG_FILE 1 +#define EXT2_FT_DIR 2 +#define EXT2_FT_CHRDEV 3 +#define EXT2_FT_BLKDEV 4 +#define EXT2_FT_FIFO 5 +#define EXT2_FT_SOCK 6 +#define EXT2_FT_SYMLINK 7 + +#define EXT2_FT_MAX 8 + +/* + * EXT2_DIR_PAD defines the directory entries boundaries + * + * NOTE: It must be a multiple of 4 + */ +#define EXT2_DIR_PAD 4 +#define EXT2_DIR_ROUND (EXT2_DIR_PAD - 1) +#define EXT2_DIR_REC_LEN(name_len) (((name_len) + 8 + EXT2_DIR_ROUND) & \ + ~EXT2_DIR_ROUND) +#endif /* !_FS_EXT2FS_EXT2_DIR_H_ */ + diff --git a/sys/fs/ext2fs/ext2_extern.h b/sys/fs/ext2fs/ext2_extern.h new file mode 100644 index 0000000..60905cb --- /dev/null +++ b/sys/fs/ext2fs/ext2_extern.h @@ -0,0 +1,93 @@ +/*- + * modified for EXT2FS support in Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + */ +/*- + * Copyright (c) 1991, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ffs_extern.h 8.3 (Berkeley) 4/16/94 + * $FreeBSD$ + */ + +#ifndef _FS_EXT2FS_EXT2_EXTERN_H_ +#define _FS_EXT2FS_EXT2_EXTERN_H_ + +struct ext2fs_dinode; +struct indir; +struct inode; +struct mount; +struct vfsconf; +struct vnode; + +int ext2_alloc(struct inode *, + int32_t, int32_t, int, struct ucred *, int32_t *); +int ext2_balloc(struct inode *, + int32_t, int, struct ucred *, struct buf **, int); +int ext2_blkatoff(struct vnode *, off_t, char **, struct buf **); +void ext2_blkfree(struct inode *, int32_t, long); +int32_t ext2_blkpref(struct inode *, int32_t, int, int32_t *, int32_t); +int ext2_bmap(struct vop_bmap_args *); +int ext2_bmaparray(struct vnode *, int32_t, int32_t *, int *, int *); +void ext2_dirbad(struct inode *ip, doff_t offset, char *how); +void ext2_ei2i(struct ext2fs_dinode *, struct inode *); +int ext2_getlbns(struct vnode *, int32_t, struct indir *, int *); +void ext2_i2ei(struct inode *, struct ext2fs_dinode *); +void ext2_itimes(struct vnode *vp); +int ext2_reallocblks(struct vop_reallocblks_args *); +int ext2_reclaim(struct vop_reclaim_args *); +void ext2_setblock(struct m_ext2fs *, u_char *, int32_t); +int ext2_truncate(struct vnode *, off_t, int, struct ucred *, struct thread *); +int ext2_update(struct vnode *, int); +int ext2_valloc(struct vnode *, int, struct ucred *, struct vnode **); +int ext2_vfree(struct vnode *, ino_t, int); +int ext2_vinit(struct mount *, struct vop_vector *, struct vnode **vpp); +int ext2_lookup(struct vop_cachedlookup_args *); +int ext2_readdir(struct vop_readdir_args *); +void ext2_print_inode(struct inode *); +int ext2_direnter(struct inode *, + struct vnode *, struct componentname *); +int ext2_dirremove(struct vnode *, struct componentname *); +int ext2_dirrewrite(struct inode *, + struct inode *, struct componentname *); +int ext2_dirempty(struct inode *, ino_t, struct ucred *); +int ext2_checkpath(struct inode *, struct inode *, struct ucred *); +int cg_has_sb(int i); +int ext2_inactive(struct vop_inactive_args *); + +/* Flags to low-level allocation routines. */ +#define B_CLRBUF 0x01 /* Request allocated buffer be cleared. */ +#define B_SYNC 0x02 /* Do all allocations synchronously. */ +#define B_METAONLY 0x04 /* Return indirect block buffer. */ +#define B_NOWAIT 0x08 /* do not sleep to await lock */ + +extern struct vop_vector ext2_vnodeops; +extern struct vop_vector ext2_fifoops; + +#endif /* !_FS_EXT2FS_EXT2_EXTERN_H_ */ diff --git a/sys/fs/ext2fs/ext2_inode.c b/sys/fs/ext2fs/ext2_inode.c new file mode 100644 index 0000000..2cf60a7 --- /dev/null +++ b/sys/fs/ext2fs/ext2_inode.c @@ -0,0 +1,537 @@ +/*- + * modified for Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + */ +/*- + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ffs_inode.c 8.5 (Berkeley) 12/30/93 + * $FreeBSD$ + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/mount.h> +#include <sys/bio.h> +#include <sys/buf.h> +#include <sys/vnode.h> +#include <sys/malloc.h> + +#include <vm/vm.h> +#include <vm/vm_extern.h> + +#include <fs/ext2fs/inode.h> +#include <fs/ext2fs/ext2_mount.h> +#include <fs/ext2fs/ext2fs.h> +#include <fs/ext2fs/fs.h> +#include <fs/ext2fs/ext2_extern.h> + +static int ext2_indirtrunc(struct inode *, int32_t, int32_t, int32_t, int, + long *); + +/* + * Update the access, modified, and inode change times as specified by the + * IN_ACCESS, IN_UPDATE, and IN_CHANGE flags respectively. Write the inode + * to disk if the IN_MODIFIED flag is set (it may be set initially, or by + * the timestamp update). The IN_LAZYMOD flag is set to force a write + * later if not now. If we write now, then clear both IN_MODIFIED and + * IN_LAZYMOD to reflect the presumably successful write, and if waitfor is + * set, then wait for the write to complete. + */ +int +ext2_update(vp, waitfor) + struct vnode *vp; + int waitfor; +{ + struct m_ext2fs *fs; + struct buf *bp; + struct inode *ip; + int error; + + ASSERT_VOP_ELOCKED(vp, "ext2_update"); + ext2_itimes(vp); + ip = VTOI(vp); + if ((ip->i_flag & IN_MODIFIED) == 0 && waitfor == 0) + return (0); + ip->i_flag &= ~(IN_LAZYACCESS | IN_LAZYMOD | IN_MODIFIED); + fs = ip->i_e2fs; + if(fs->e2fs_ronly) + return (0); + if ((error = bread(ip->i_devvp, + fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), + (int)fs->e2fs_bsize, NOCRED, &bp)) != 0) { + brelse(bp); + return (error); + } + ext2_i2ei(ip, (struct ext2fs_dinode *)((char *)bp->b_data + + EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ip->i_number))); + if (waitfor && (vp->v_mount->mnt_kern_flag & MNTK_ASYNC) == 0) + return (bwrite(bp)); + else { + bdwrite(bp); + return (0); + } +} + +#define SINGLE 0 /* index of single indirect block */ +#define DOUBLE 1 /* index of double indirect block */ +#define TRIPLE 2 /* index of triple indirect block */ +/* + * Truncate the inode oip to at most length size, freeing the + * disk blocks. + */ +int +ext2_truncate(vp, length, flags, cred, td) + struct vnode *vp; + off_t length; + int flags; + struct ucred *cred; + struct thread *td; +{ + struct vnode *ovp = vp; + int32_t lastblock; + struct inode *oip; + int32_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR]; + int32_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR]; + struct bufobj *bo; + struct m_ext2fs *fs; + struct buf *bp; + int offset, size, level; + long count, nblocks, blocksreleased = 0; + int aflags, error, i, allerror; + off_t osize; + + oip = VTOI(ovp); + bo = &ovp->v_bufobj; + + ASSERT_VOP_LOCKED(vp, "ext2_truncate"); + + if (length < 0) + return (EINVAL); + + if (ovp->v_type == VLNK && + oip->i_size < ovp->v_mount->mnt_maxsymlinklen) { +#ifdef DIAGNOSTIC + if (length != 0) + panic("ext2_truncate: partial truncate of symlink"); +#endif + bzero((char *)&oip->i_shortlink, (u_int)oip->i_size); + oip->i_size = 0; + oip->i_flag |= IN_CHANGE | IN_UPDATE; + return (ext2_update(ovp, 1)); + } + if (oip->i_size == length) { + oip->i_flag |= IN_CHANGE | IN_UPDATE; + return (ext2_update(ovp, 0)); + } + fs = oip->i_e2fs; + osize = oip->i_size; + /* + * Lengthen the size of the file. We must ensure that the + * last byte of the file is allocated. Since the smallest + * value of osize is 0, length will be at least 1. + */ + if (osize < length) { + if (length > oip->i_e2fs->e2fs_maxfilesize) + return (EFBIG); + vnode_pager_setsize(ovp, length); + offset = blkoff(fs, length - 1); + lbn = lblkno(fs, length - 1); + aflags = B_CLRBUF; + if (flags & IO_SYNC) + aflags |= B_SYNC; + error = ext2_balloc(oip, lbn, offset + 1, cred, &bp, aflags); + if (error) { + vnode_pager_setsize(vp, osize); + return (error); + } + oip->i_size = length; + if (bp->b_bufsize == fs->e2fs_bsize) + bp->b_flags |= B_CLUSTEROK; + if (aflags & B_SYNC) + bwrite(bp); + else if (ovp->v_mount->mnt_flag & MNT_ASYNC) + bdwrite(bp); + else + bawrite(bp); + oip->i_flag |= IN_CHANGE | IN_UPDATE; + return (ext2_update(ovp, 1)); + } + /* + * Shorten the size of the file. If the file is not being + * truncated to a block boundry, the contents of the + * partial block following the end of the file must be + * zero'ed in case it ever become accessible again because + * of subsequent file growth. + */ + /* I don't understand the comment above */ + offset = blkoff(fs, length); + if (offset == 0) { + oip->i_size = length; + } else { + lbn = lblkno(fs, length); + aflags = B_CLRBUF; + if (flags & IO_SYNC) + aflags |= B_SYNC; + error = ext2_balloc(oip, lbn, offset, cred, &bp, aflags); + if (error) + return (error); + oip->i_size = length; + size = blksize(fs, oip, lbn); + bzero((char *)bp->b_data + offset, (u_int)(size - offset)); + allocbuf(bp, size); + if (bp->b_bufsize == fs->e2fs_bsize) + bp->b_flags |= B_CLUSTEROK; + if (aflags & B_SYNC) + bwrite(bp); + else if (ovp->v_mount->mnt_flag & MNT_ASYNC) + bdwrite(bp); + else + bawrite(bp); + } + /* + * Calculate index into inode's block list of + * last direct and indirect blocks (if any) + * which we want to keep. Lastblock is -1 when + * the file is truncated to 0. + */ + lastblock = lblkno(fs, length + fs->e2fs_bsize - 1) - 1; + lastiblock[SINGLE] = lastblock - NDADDR; + lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); + lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); + nblocks = btodb(fs->e2fs_bsize); + /* + * Update file and block pointers on disk before we start freeing + * blocks. If we crash before free'ing blocks below, the blocks + * will be returned to the free list. lastiblock values are also + * normalized to -1 for calls to ext2_indirtrunc below. + */ + bcopy((caddr_t)&oip->i_db[0], (caddr_t)oldblks, sizeof oldblks); + for (level = TRIPLE; level >= SINGLE; level--) + if (lastiblock[level] < 0) { + oip->i_ib[level] = 0; + lastiblock[level] = -1; + } + for (i = NDADDR - 1; i > lastblock; i--) + oip->i_db[i] = 0; + oip->i_flag |= IN_CHANGE | IN_UPDATE; + allerror = ext2_update(ovp, 1); + + /* + * Having written the new inode to disk, save its new configuration + * and put back the old block pointers long enough to process them. + * Note that we save the new block configuration so we can check it + * when we are done. + */ + bcopy((caddr_t)&oip->i_db[0], (caddr_t)newblks, sizeof newblks); + bcopy((caddr_t)oldblks, (caddr_t)&oip->i_db[0], sizeof oldblks); + oip->i_size = osize; + error = vtruncbuf(ovp, cred, td, length, (int)fs->e2fs_bsize); + if (error && (allerror == 0)) + allerror = error; + vnode_pager_setsize(ovp, length); + + /* + * Indirect blocks first. + */ + indir_lbn[SINGLE] = -NDADDR; + indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1; + indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1; + for (level = TRIPLE; level >= SINGLE; level--) { + bn = oip->i_ib[level]; + if (bn != 0) { + error = ext2_indirtrunc(oip, indir_lbn[level], + fsbtodb(fs, bn), lastiblock[level], level, &count); + if (error) + allerror = error; + blocksreleased += count; + if (lastiblock[level] < 0) { + oip->i_ib[level] = 0; + ext2_blkfree(oip, bn, fs->e2fs_fsize); + blocksreleased += nblocks; + } + } + if (lastiblock[level] >= 0) + goto done; + } + + /* + * All whole direct blocks or frags. + */ + for (i = NDADDR - 1; i > lastblock; i--) { + long bsize; + + bn = oip->i_db[i]; + if (bn == 0) + continue; + oip->i_db[i] = 0; + bsize = blksize(fs, oip, i); + ext2_blkfree(oip, bn, bsize); + blocksreleased += btodb(bsize); + } + if (lastblock < 0) + goto done; + + /* + * Finally, look for a change in size of the + * last direct block; release any frags. + */ + bn = oip->i_db[lastblock]; + if (bn != 0) { + long oldspace, newspace; + + /* + * Calculate amount of space we're giving + * back as old block size minus new block size. + */ + oldspace = blksize(fs, oip, lastblock); + oip->i_size = length; + newspace = blksize(fs, oip, lastblock); + if (newspace == 0) + panic("itrunc: newspace"); + if (oldspace - newspace > 0) { + /* + * Block number of space to be free'd is + * the old block # plus the number of frags + * required for the storage we're keeping. + */ + bn += numfrags(fs, newspace); + ext2_blkfree(oip, bn, oldspace - newspace); + blocksreleased += btodb(oldspace - newspace); + } + } +done: +#ifdef DIAGNOSTIC + for (level = SINGLE; level <= TRIPLE; level++) + if (newblks[NDADDR + level] != oip->i_ib[level]) + panic("itrunc1"); + for (i = 0; i < NDADDR; i++) + if (newblks[i] != oip->i_db[i]) + panic("itrunc2"); + BO_LOCK(bo); + if (length == 0 && (bo->bo_dirty.bv_cnt != 0 || + bo->bo_clean.bv_cnt != 0)) + panic("itrunc3"); + BO_UNLOCK(bo); +#endif /* DIAGNOSTIC */ + /* + * Put back the real size. + */ + oip->i_size = length; + oip->i_blocks -= blocksreleased; + if (oip->i_blocks < 0) /* sanity */ + oip->i_blocks = 0; + oip->i_flag |= IN_CHANGE; + vnode_pager_setsize(ovp, length); + return (allerror); +} + +/* + * Release blocks associated with the inode ip and stored in the indirect + * block bn. Blocks are free'd in LIFO order up to (but not including) + * lastbn. If level is greater than SINGLE, the block is an indirect block + * and recursive calls to indirtrunc must be used to cleanse other indirect + * blocks. + * + * NB: triple indirect blocks are untested. + */ + +static int +ext2_indirtrunc(ip, lbn, dbn, lastbn, level, countp) + struct inode *ip; + int32_t lbn, lastbn; + int32_t dbn; + int level; + long *countp; +{ + struct buf *bp; + struct m_ext2fs *fs = ip->i_e2fs; + struct vnode *vp; + int32_t *bap, *copy, nb, nlbn, last; + long blkcount, factor; + int i, nblocks, blocksreleased = 0; + int error = 0, allerror = 0; + + /* + * Calculate index in current block of last + * block to be kept. -1 indicates the entire + * block so we need not calculate the index. + */ + factor = 1; + for (i = SINGLE; i < level; i++) + factor *= NINDIR(fs); + last = lastbn; + if (lastbn > 0) + last /= factor; + nblocks = btodb(fs->e2fs_bsize); + /* + * Get buffer of block pointers, zero those entries corresponding + * to blocks to be free'd, and update on disk copy first. Since + * double(triple) indirect before single(double) indirect, calls + * to bmap on these blocks will fail. However, we already have + * the on disk address, so we have to set the b_blkno field + * explicitly instead of letting bread do everything for us. + */ + vp = ITOV(ip); + bp = getblk(vp, lbn, (int)fs->e2fs_bsize, 0, 0, 0); + if (bp->b_flags & (B_DONE | B_DELWRI)) { + } else { + bp->b_iocmd = BIO_READ; + if (bp->b_bcount > bp->b_bufsize) + panic("ext2_indirtrunc: bad buffer size"); + bp->b_blkno = dbn; + vfs_busy_pages(bp, 0); + bp->b_iooffset = dbtob(bp->b_blkno); + bstrategy(bp); + error = bufwait(bp); + } + if (error) { + brelse(bp); + *countp = 0; + return (error); + } + + bap = (int32_t *)bp->b_data; + copy = malloc(fs->e2fs_bsize, M_TEMP, M_WAITOK); + bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->e2fs_bsize); + bzero((caddr_t)&bap[last + 1], + (u_int)(NINDIR(fs) - (last + 1)) * sizeof (int32_t)); + if (last == -1) + bp->b_flags |= B_INVAL; + error = bwrite(bp); + if (error) + allerror = error; + bap = copy; + + /* + * Recursively free totally unused blocks. + */ + for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last; + i--, nlbn += factor) { + nb = bap[i]; + if (nb == 0) + continue; + if (level > SINGLE) { + if ((error = ext2_indirtrunc(ip, nlbn, + fsbtodb(fs, nb), (int32_t)-1, level - 1, &blkcount)) != 0) + allerror = error; + blocksreleased += blkcount; + } + ext2_blkfree(ip, nb, fs->e2fs_bsize); + blocksreleased += nblocks; + } + + /* + * Recursively free last partial block. + */ + if (level > SINGLE && lastbn >= 0) { + last = lastbn % factor; + nb = bap[i]; + if (nb != 0) { + if ((error = ext2_indirtrunc(ip, nlbn, fsbtodb(fs, nb), + last, level - 1, &blkcount)) != 0) + allerror = error; + blocksreleased += blkcount; + } + } + free(copy, M_TEMP); + *countp = blocksreleased; + return (allerror); +} + +/* + * discard preallocated blocks + */ +int +ext2_inactive(ap) + struct vop_inactive_args /* { + struct vnode *a_vp; + struct thread *a_td; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + struct inode *ip = VTOI(vp); + struct thread *td = ap->a_td; + int mode, error = 0; + + if (prtactive && vrefcnt(vp) != 0) + vprint("ext2_inactive: pushing active", vp); + + /* + * Ignore inodes related to stale file handles. + */ + if (ip->i_mode == 0) + goto out; + if (ip->i_nlink <= 0) { + error = ext2_truncate(vp, (off_t)0, 0, NOCRED, td); + ip->i_rdev = 0; + mode = ip->i_mode; + ip->i_mode = 0; + ip->i_flag |= IN_CHANGE | IN_UPDATE; + ext2_vfree(vp, ip->i_number, mode); + } + if (ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) + ext2_update(vp, 0); +out: + /* + * If we are done with the inode, reclaim it + * so that it can be reused immediately. + */ + if (ip->i_mode == 0) + vrecycle(vp, td); + return (error); +} + +/* + * Reclaim an inode so that it can be used for other purposes. + */ +int +ext2_reclaim(ap) + struct vop_reclaim_args /* { + struct vnode *a_vp; + struct thread *a_td; + } */ *ap; +{ + struct inode *ip; + struct vnode *vp = ap->a_vp; + + if (prtactive && vrefcnt(vp) != 0) + vprint("ufs_reclaim: pushing active", vp); + ip = VTOI(vp); + if (ip->i_flag & IN_LAZYMOD) { + ip->i_flag |= IN_MODIFIED; + ext2_update(vp, 0); + } + vfs_hash_remove(vp); + free(vp->v_data, M_EXT2NODE); + vp->v_data = 0; + vnode_destroy_vobject(vp); + return (0); +} diff --git a/sys/fs/ext2fs/ext2_inode_cnv.c b/sys/fs/ext2fs/ext2_inode_cnv.c new file mode 100644 index 0000000..b042a5a --- /dev/null +++ b/sys/fs/ext2fs/ext2_inode_cnv.c @@ -0,0 +1,138 @@ +/*- + * Copyright (c) 1995 The University of Utah and + * the Computer Systems Laboratory at the University of Utah (CSL). + * All rights reserved. + * + * Permission to use, copy, modify and distribute this software is hereby + * granted provided that (1) source code retains these copyright, permission, + * and disclaimer notices, and (2) redistributions including binaries + * reproduce the notices in supporting documentation, and (3) all advertising + * materials mentioning features or use of this software display the following + * acknowledgement: ``This product includes software developed by the + * Computer Systems Laboratory at the University of Utah.'' + * + * THE UNIVERSITY OF UTAH AND CSL ALLOW FREE USE OF THIS SOFTWARE IN ITS "AS + * IS" CONDITION. THE UNIVERSITY OF UTAH AND CSL DISCLAIM ANY LIABILITY OF + * ANY KIND FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * CSL requests users of this software to return to csl-dist@cs.utah.edu any + * improvements that they make and grant CSL redistribution rights. + * + * Utah $Hdr$ + * $FreeBSD$ + */ + +/* + * routines to convert on disk ext2 inodes into inodes and back + */ +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/lock.h> +#include <sys/stat.h> +#include <sys/vnode.h> + +#include <fs/ext2fs/inode.h> +#include <fs/ext2fs/ext2fs.h> +#include <fs/ext2fs/ext2_extern.h> +#include <fs/ext2fs/ext2_dinode.h> + +void +ext2_print_inode( in ) + struct inode *in; +{ + int i; + + printf( "Inode: %5d", in->i_number); + printf( /* "Inode: %5d" */ + " Type: %10s Mode: 0x%o Flags: 0x%x Version: %d\n", + "n/a", in->i_mode, in->i_flags, in->i_gen); + printf( "User: %5lu Group: %5lu Size: %lu\n", + (unsigned long)in->i_uid, (unsigned long)in->i_gid, + (unsigned long)in->i_size); + printf( "Links: %3d Blockcount: %d\n", + in->i_nlink, in->i_blocks); + printf( "ctime: 0x%x", in->i_ctime); + printf( "atime: 0x%x", in->i_atime); + printf( "mtime: 0x%x", in->i_mtime); + printf( "BLOCKS: "); + for(i=0; i < (in->i_blocks <= 24 ? ((in->i_blocks+1)/2): 12); i++) + printf("%d ", in->i_db[i]); + printf("\n"); +} + +/* + * raw ext2 inode to inode + */ +void +ext2_ei2i(ei, ip) + struct ext2fs_dinode *ei; + struct inode *ip; +{ + int i; + + ip->i_nlink = ei->e2di_nlink; + /* Godmar thinks - if the link count is zero, then the inode is + unused - according to ext2 standards. Ufs marks this fact + by setting i_mode to zero - why ? + I can see that this might lead to problems in an undelete. + */ + ip->i_mode = ei->e2di_nlink ? ei->e2di_mode : 0; + ip->i_size = ei->e2di_size; + if (S_ISREG(ip->i_mode)) + ip->i_size |= ((u_int64_t)ei->e2di_size_high) << 32; + ip->i_atime = ei->e2di_atime; + ip->i_mtime = ei->e2di_mtime; + ip->i_ctime = ei->e2di_ctime; + ip->i_flags = 0; + ip->i_flags |= (ei->e2di_flags & EXT2_APPEND) ? SF_APPEND : 0; + ip->i_flags |= (ei->e2di_flags & EXT2_IMMUTABLE) ? SF_IMMUTABLE : 0; + ip->i_flags |= (ei->e2di_flags & EXT2_NODUMP) ? UF_NODUMP : 0; + ip->i_blocks = ei->e2di_nblock; + ip->i_gen = ei->e2di_gen; + ip->i_uid = ei->e2di_uid; + ip->i_gid = ei->e2di_gid; + /* XXX use memcpy */ + for(i = 0; i < NDADDR; i++) + ip->i_db[i] = ei->e2di_blocks[i]; + for(i = 0; i < NIADDR; i++) + ip->i_ib[i] = ei->e2di_blocks[EXT2_NDIR_BLOCKS + i]; +} + +/* + * inode to raw ext2 inode + */ +void +ext2_i2ei(ip, ei) + struct inode *ip; + struct ext2fs_dinode *ei; +{ + int i; + + ei->e2di_mode = ip->i_mode; + ei->e2di_nlink = ip->i_nlink; + /* + Godmar thinks: if dtime is nonzero, ext2 says this inode + has been deleted, this would correspond to a zero link count + */ + ei->e2di_dtime = ei->e2di_nlink ? 0 : ip->i_mtime; + ei->e2di_size = ip->i_size; + if (S_ISREG(ip->i_mode)) + ei->e2di_size_high = ip->i_size >> 32; + ei->e2di_atime = ip->i_atime; + ei->e2di_mtime = ip->i_mtime; + ei->e2di_ctime = ip->i_ctime; + ei->e2di_flags = ip->i_flags; + ei->e2di_flags = 0; + ei->e2di_flags |= (ip->i_flags & SF_APPEND) ? EXT2_APPEND: 0; + ei->e2di_flags |= (ip->i_flags & SF_IMMUTABLE) ? EXT2_IMMUTABLE: 0; + ei->e2di_flags |= (ip->i_flags & UF_NODUMP) ? EXT2_NODUMP: 0; + ei->e2di_nblock = ip->i_blocks; + ei->e2di_gen = ip->i_gen; + ei->e2di_uid = ip->i_uid; + ei->e2di_gid = ip->i_gid; + /* XXX use memcpy */ + for(i = 0; i < NDADDR; i++) + ei->e2di_blocks[i] = ip->i_db[i]; + for(i = 0; i < NIADDR; i++) + ei->e2di_blocks[EXT2_NDIR_BLOCKS + i] = ip->i_ib[i]; +} diff --git a/sys/fs/ext2fs/ext2_lookup.c b/sys/fs/ext2fs/ext2_lookup.c new file mode 100644 index 0000000..56963b2 --- /dev/null +++ b/sys/fs/ext2fs/ext2_lookup.c @@ -0,0 +1,1101 @@ +/*- + * modified for Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + */ +/*- + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ufs_lookup.c 8.6 (Berkeley) 4/1/94 + * $FreeBSD$ + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/namei.h> +#include <sys/bio.h> +#include <sys/buf.h> +#include <sys/endian.h> +#include <sys/mount.h> +#include <sys/vnode.h> +#include <sys/malloc.h> +#include <sys/dirent.h> +#include <sys/sysctl.h> + +#include <ufs/ufs/dir.h> + +#include <fs/ext2fs/inode.h> +#include <fs/ext2fs/ext2_mount.h> +#include <fs/ext2fs/ext2_extern.h> +#include <fs/ext2fs/ext2fs.h> +#include <fs/ext2fs/ext2_dir.h> + +#ifdef DIAGNOSTIC +static int dirchk = 1; +#else +static int dirchk = 0; +#endif + +static SYSCTL_NODE(_vfs, OID_AUTO, e2fs, CTLFLAG_RD, 0, "EXT2FS filesystem"); +SYSCTL_INT(_vfs_e2fs, OID_AUTO, dircheck, CTLFLAG_RW, &dirchk, 0, ""); + +/* + DIRBLKSIZE in ffs is DEV_BSIZE (in most cases 512) + while it is the native blocksize in ext2fs - thus, a #define + is no longer appropriate +*/ +#undef DIRBLKSIZ + +static u_char ext2_ft_to_dt[] = { + DT_UNKNOWN, /* EXT2_FT_UNKNOWN */ + DT_REG, /* EXT2_FT_REG_FILE */ + DT_DIR, /* EXT2_FT_DIR */ + DT_CHR, /* EXT2_FT_CHRDEV */ + DT_BLK, /* EXT2_FT_BLKDEV */ + DT_FIFO, /* EXT2_FT_FIFO */ + DT_SOCK, /* EXT2_FT_SOCK */ + DT_LNK, /* EXT2_FT_SYMLINK */ +}; +#define FTTODT(ft) \ + ((ft) > sizeof(ext2_ft_to_dt) / sizeof(ext2_ft_to_dt[0]) ? \ + DT_UNKNOWN : ext2_ft_to_dt[(ft)]) + +static u_char dt_to_ext2_ft[] = { + EXT2_FT_UNKNOWN, /* DT_UNKNOWN */ + EXT2_FT_FIFO, /* DT_FIFO */ + EXT2_FT_CHRDEV, /* DT_CHR */ + EXT2_FT_UNKNOWN, /* unused */ + EXT2_FT_DIR, /* DT_DIR */ + EXT2_FT_UNKNOWN, /* unused */ + EXT2_FT_BLKDEV, /* DT_BLK */ + EXT2_FT_UNKNOWN, /* unused */ + EXT2_FT_REG_FILE, /* DT_REG */ + EXT2_FT_UNKNOWN, /* unused */ + EXT2_FT_SYMLINK, /* DT_LNK */ + EXT2_FT_UNKNOWN, /* unused */ + EXT2_FT_SOCK, /* DT_SOCK */ + EXT2_FT_UNKNOWN, /* unused */ + EXT2_FT_UNKNOWN, /* DT_WHT */ +}; +#define DTTOFT(dt) \ + ((dt) > sizeof(dt_to_ext2_ft) / sizeof(dt_to_ext2_ft[0]) ? \ + EXT2_FT_UNKNOWN : dt_to_ext2_ft[(dt)]) + +static int ext2_dirbadentry(struct vnode *dp, struct ext2fs_direct_2 *de, + int entryoffsetinblock); + +/* + * Vnode op for reading directories. + * + * The routine below assumes that the on-disk format of a directory + * is the same as that defined by <sys/dirent.h>. If the on-disk + * format changes, then it will be necessary to do a conversion + * from the on-disk format that read returns to the format defined + * by <sys/dirent.h>. + */ +/* + * this is exactly what we do here - the problem is that the conversion + * will blow up some entries by four bytes, so it can't be done in place. + * This is too bad. Right now the conversion is done entry by entry, the + * converted entry is sent via uiomove. + * + * XXX allocate a buffer, convert as many entries as possible, then send + * the whole buffer to uiomove + */ +int +ext2_readdir(ap) + struct vop_readdir_args /* { + struct vnode *a_vp; + struct uio *a_uio; + struct ucred *a_cred; + } */ *ap; +{ + struct uio *uio = ap->a_uio; + int count, error; + + struct ext2fs_direct_2 *edp, *dp; + int ncookies; + struct dirent dstdp; + struct uio auio; + struct iovec aiov; + caddr_t dirbuf; + int DIRBLKSIZ = VTOI(ap->a_vp)->i_e2fs->e2fs_bsize; + int readcnt; + off_t startoffset = uio->uio_offset; + + count = uio->uio_resid; + /* + * Avoid complications for partial directory entries by adjusting + * the i/o to end at a block boundary. Don't give up (like ufs + * does) if the initial adjustment gives a negative count, since + * many callers don't supply a large enough buffer. The correct + * size is a little larger than DIRBLKSIZ to allow for expansion + * of directory entries, but some callers just use 512. + */ + count -= (uio->uio_offset + count) & (DIRBLKSIZ -1); + if (count <= 0) + count += DIRBLKSIZ; + auio = *uio; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_resid = count; + auio.uio_segflg = UIO_SYSSPACE; + aiov.iov_len = count; + dirbuf = malloc(count, M_TEMP, M_WAITOK); + aiov.iov_base = dirbuf; + error = VOP_READ(ap->a_vp, &auio, 0, ap->a_cred); + if (error == 0) { + readcnt = count - auio.uio_resid; + edp = (struct ext2fs_direct_2 *)&dirbuf[readcnt]; + ncookies = 0; + bzero(&dstdp, offsetof(struct dirent, d_name)); + for (dp = (struct ext2fs_direct_2 *)dirbuf; + !error && uio->uio_resid > 0 && dp < edp; ) { + /*- + * "New" ext2fs directory entries differ in 3 ways + * from ufs on-disk ones: + * - the name is not necessarily NUL-terminated. + * - the file type field always exists and always + * follows the name length field. + * - the file type is encoded in a different way. + * + * "Old" ext2fs directory entries need no special + * conversions, since they are binary compatible + * with "new" entries having a file type of 0 (i.e., + * EXT2_FT_UNKNOWN). Splitting the old name length + * field didn't make a mess like it did in ufs, + * because ext2fs uses a machine-independent disk + * layout. + */ + dstdp.d_fileno = dp->e2d_ino; + dstdp.d_type = FTTODT(dp->e2d_type); + dstdp.d_namlen = dp->e2d_namlen; + dstdp.d_reclen = GENERIC_DIRSIZ(&dstdp); + bcopy(dp->e2d_name, dstdp.d_name, dstdp.d_namlen); + bzero(dstdp.d_name + dstdp.d_namlen, + dstdp.d_reclen - offsetof(struct dirent, d_name) - + dstdp.d_namlen); + + if (dp->e2d_reclen > 0) { + if(dstdp.d_reclen <= uio->uio_resid) { + /* advance dp */ + dp = (struct ext2fs_direct_2 *) + ((char *)dp + dp->e2d_reclen); + error = + uiomove(&dstdp, dstdp.d_reclen, uio); + if (!error) + ncookies++; + } else + break; + } else { + error = EIO; + break; + } + } + /* we need to correct uio_offset */ + uio->uio_offset = startoffset + (caddr_t)dp - dirbuf; + + if (!error && ap->a_ncookies != NULL) { + u_long *cookiep, *cookies, *ecookies; + off_t off; + + if (uio->uio_segflg != UIO_SYSSPACE || uio->uio_iovcnt != 1) + panic("ext2_readdir: unexpected uio from NFS server"); + cookies = malloc(ncookies * sizeof(u_long), M_TEMP, + M_WAITOK); + off = startoffset; + for (dp = (struct ext2fs_direct_2 *)dirbuf, + cookiep = cookies, ecookies = cookies + ncookies; + cookiep < ecookies; + dp = (struct ext2fs_direct_2 *)((caddr_t) dp + dp->e2d_reclen)) { + off += dp->e2d_reclen; + *cookiep++ = (u_long) off; + } + *ap->a_ncookies = ncookies; + *ap->a_cookies = cookies; + } + } + free(dirbuf, M_TEMP); + if (ap->a_eofflag) + *ap->a_eofflag = VTOI(ap->a_vp)->i_size <= uio->uio_offset; + return (error); +} + +/* + * Convert a component of a pathname into a pointer to a locked inode. + * This is a very central and rather complicated routine. + * If the file system is not maintained in a strict tree hierarchy, + * this can result in a deadlock situation (see comments in code below). + * + * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending + * on whether the name is to be looked up, created, renamed, or deleted. + * When CREATE, RENAME, or DELETE is specified, information usable in + * creating, renaming, or deleting a directory entry may be calculated. + * If flag has LOCKPARENT or'ed into it and the target of the pathname + * exists, lookup returns both the target and its parent directory locked. + * When creating or renaming and LOCKPARENT is specified, the target may + * not be ".". When deleting and LOCKPARENT is specified, the target may + * be "."., but the caller must check to ensure it does an vrele and vput + * instead of two vputs. + * + * Overall outline of ext2_lookup: + * + * search for name in directory, to found or notfound + * notfound: + * if creating, return locked directory, leaving info on available slots + * else return error + * found: + * if at end of path and deleting, return information to allow delete + * if at end of path and rewriting (RENAME and LOCKPARENT), lock target + * inode and return info to allow rewrite + * if not at end, add name to cache; if at end and neither creating + * nor deleting, add name to cache + */ +int +ext2_lookup(ap) + struct vop_cachedlookup_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + } */ *ap; +{ + struct vnode *vdp; /* vnode for directory being searched */ + struct inode *dp; /* inode for directory being searched */ + struct buf *bp; /* a buffer of directory entries */ + struct ext2fs_direct_2 *ep; /* the current directory entry */ + int entryoffsetinblock; /* offset of ep in bp's buffer */ + enum {NONE, COMPACT, FOUND} slotstatus; + doff_t slotoffset; /* offset of area with free space */ + int slotsize; /* size of area at slotoffset */ + doff_t i_diroff; /* cached i_diroff value */ + doff_t i_offset; /* cached i_offset value */ + int slotfreespace; /* amount of space free in slot */ + int slotneeded; /* size of the entry we're seeking */ + int numdirpasses; /* strategy for directory search */ + doff_t endsearch; /* offset to end directory search */ + doff_t prevoff; /* prev entry dp->i_offset */ + struct vnode *pdp; /* saved dp during symlink work */ + struct vnode *tdp; /* returned by VFS_VGET */ + doff_t enduseful; /* pointer past last used dir slot */ + u_long bmask; /* block offset mask */ + int namlen, error; + struct vnode **vpp = ap->a_vpp; + struct componentname *cnp = ap->a_cnp; + struct ucred *cred = cnp->cn_cred; + int flags = cnp->cn_flags; + int nameiop = cnp->cn_nameiop; + ino_t ino; + int ltype; + + int DIRBLKSIZ = VTOI(ap->a_dvp)->i_e2fs->e2fs_bsize; + + bp = NULL; + slotoffset = -1; + *vpp = NULL; + vdp = ap->a_dvp; + dp = VTOI(vdp); + /* + * We now have a segment name to search for, and a directory to search. + */ + + /* + * Suppress search for slots unless creating + * file and at end of pathname, in which case + * we watch for a place to put the new file in + * case it doesn't already exist. + */ + ino = 0; + i_diroff = dp->i_diroff; + slotstatus = FOUND; + slotfreespace = slotsize = slotneeded = 0; + if ((nameiop == CREATE || nameiop == RENAME) && + (flags & ISLASTCN)) { + slotstatus = NONE; + slotneeded = EXT2_DIR_REC_LEN(cnp->cn_namelen); + /* was + slotneeded = (sizeof(struct direct) - MAXNAMLEN + + cnp->cn_namelen + 3) &~ 3; */ + } + + /* + * If there is cached information on a previous search of + * this directory, pick up where we last left off. + * We cache only lookups as these are the most common + * and have the greatest payoff. Caching CREATE has little + * benefit as it usually must search the entire directory + * to determine that the entry does not exist. Caching the + * location of the last DELETE or RENAME has not reduced + * profiling time and hence has been removed in the interest + * of simplicity. + */ + bmask = VFSTOEXT2(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; + if (nameiop != LOOKUP || i_diroff == 0 || + i_diroff > dp->i_size) { + entryoffsetinblock = 0; + i_offset = 0; + numdirpasses = 1; + } else { + i_offset = i_diroff; + if ((entryoffsetinblock = i_offset & bmask) && + (error = ext2_blkatoff(vdp, (off_t)i_offset, NULL, + &bp))) + return (error); + numdirpasses = 2; + nchstats.ncs_2passes++; + } + prevoff = i_offset; + endsearch = roundup2(dp->i_size, DIRBLKSIZ); + enduseful = 0; + +searchloop: + while (i_offset < endsearch) { + /* + * If necessary, get the next directory block. + */ + if ((i_offset & bmask) == 0) { + if (bp != NULL) + brelse(bp); + if ((error = + ext2_blkatoff(vdp, (off_t)i_offset, NULL, + &bp)) != 0) + return (error); + entryoffsetinblock = 0; + } + /* + * If still looking for a slot, and at a DIRBLKSIZE + * boundary, have to start looking for free space again. + */ + if (slotstatus == NONE && + (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) { + slotoffset = -1; + slotfreespace = 0; + } + /* + * Get pointer to next entry. + * Full validation checks are slow, so we only check + * enough to insure forward progress through the + * directory. Complete checks can be run by setting + * "vfs.e2fs.dirchk" to be true. + */ + ep = (struct ext2fs_direct_2 *) + ((char *)bp->b_data + entryoffsetinblock); + if (ep->e2d_reclen == 0 || + (dirchk && ext2_dirbadentry(vdp, ep, entryoffsetinblock))) { + int i; + ext2_dirbad(dp, i_offset, "mangled entry"); + i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)); + i_offset += i; + entryoffsetinblock += i; + continue; + } + + /* + * If an appropriate sized slot has not yet been found, + * check to see if one is available. Also accumulate space + * in the current block so that we can determine if + * compaction is viable. + */ + if (slotstatus != FOUND) { + int size = ep->e2d_reclen; + + if (ep->e2d_ino != 0) + size -= EXT2_DIR_REC_LEN(ep->e2d_namlen); + if (size > 0) { + if (size >= slotneeded) { + slotstatus = FOUND; + slotoffset = i_offset; + slotsize = ep->e2d_reclen; + } else if (slotstatus == NONE) { + slotfreespace += size; + if (slotoffset == -1) + slotoffset = i_offset; + if (slotfreespace >= slotneeded) { + slotstatus = COMPACT; + slotsize = i_offset + + ep->e2d_reclen - slotoffset; + } + } + } + } + + /* + * Check for a name match. + */ + if (ep->e2d_ino) { + namlen = ep->e2d_namlen; + if (namlen == cnp->cn_namelen && + !bcmp(cnp->cn_nameptr, ep->e2d_name, + (unsigned)namlen)) { + /* + * Save directory entry's inode number and + * reclen in ndp->ni_ufs area, and release + * directory buffer. + */ + ino = ep->e2d_ino; + goto found; + } + } + prevoff = i_offset; + i_offset += ep->e2d_reclen; + entryoffsetinblock += ep->e2d_reclen; + if (ep->e2d_ino) + enduseful = i_offset; + } +/* notfound: */ + /* + * If we started in the middle of the directory and failed + * to find our target, we must check the beginning as well. + */ + if (numdirpasses == 2) { + numdirpasses--; + i_offset = 0; + endsearch = i_diroff; + goto searchloop; + } + dp->i_offset = i_offset; + if (bp != NULL) + brelse(bp); + /* + * If creating, and at end of pathname and current + * directory has not been removed, then can consider + * allowing file to be created. + */ + if ((nameiop == CREATE || nameiop == RENAME) && + (flags & ISLASTCN) && dp->i_nlink != 0) { + /* + * Access for write is interpreted as allowing + * creation of files in the directory. + */ + if ((error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_thread)) != 0) + return (error); + /* + * Return an indication of where the new directory + * entry should be put. If we didn't find a slot, + * then set dp->i_count to 0 indicating + * that the new slot belongs at the end of the + * directory. If we found a slot, then the new entry + * can be put in the range from dp->i_offset to + * dp->i_offset + dp->i_count. + */ + if (slotstatus == NONE) { + dp->i_offset = roundup2(dp->i_size, DIRBLKSIZ); + dp->i_count = 0; + enduseful = dp->i_offset; + } else { + dp->i_offset = slotoffset; + dp->i_count = slotsize; + if (enduseful < slotoffset + slotsize) + enduseful = slotoffset + slotsize; + } + dp->i_endoff = roundup2(enduseful, DIRBLKSIZ); + dp->i_flag |= IN_CHANGE | IN_UPDATE; + /* + * We return with the directory locked, so that + * the parameters we set up above will still be + * valid if we actually decide to do a direnter(). + * We return ni_vp == NULL to indicate that the entry + * does not currently exist; we leave a pointer to + * the (locked) directory inode in ndp->ni_dvp. + * The pathname buffer is saved so that the name + * can be obtained later. + * + * NB - if the directory is unlocked, then this + * information cannot be used. + */ + cnp->cn_flags |= SAVENAME; + return (EJUSTRETURN); + } + /* + * Insert name into cache (as non-existent) if appropriate. + */ + if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE) + cache_enter(vdp, *vpp, cnp); + return (ENOENT); + +found: + if (numdirpasses == 2) + nchstats.ncs_pass2++; + /* + * Check that directory length properly reflects presence + * of this entry. + */ + if (entryoffsetinblock + EXT2_DIR_REC_LEN(ep->e2d_namlen) + > dp->i_size) { + ext2_dirbad(dp, i_offset, "i_size too small"); + dp->i_size = entryoffsetinblock+EXT2_DIR_REC_LEN(ep->e2d_namlen); + dp->i_flag |= IN_CHANGE | IN_UPDATE; + } + brelse(bp); + + /* + * Found component in pathname. + * If the final component of path name, save information + * in the cache as to where the entry was found. + */ + if ((flags & ISLASTCN) && nameiop == LOOKUP) + dp->i_diroff = i_offset &~ (DIRBLKSIZ - 1); + dp->i_offset = i_offset; + /* + * If deleting, and at end of pathname, return + * parameters which can be used to remove file. + */ + if (nameiop == DELETE && (flags & ISLASTCN)) { + /* + * Write access to directory required to delete files. + */ + if ((error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_thread)) != 0) + return (error); + /* + * Return pointer to current entry in dp->i_offset, + * and distance past previous entry (if there + * is a previous entry in this block) in dp->i_count. + * Save directory inode pointer in ndp->ni_dvp for dirremove(). + */ + if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0) + dp->i_count = 0; + else + dp->i_count = dp->i_offset - prevoff; + if (dp->i_number == ino) { + VREF(vdp); + *vpp = vdp; + return (0); + } + if ((error = VFS_VGET(vdp->v_mount, ino, LK_EXCLUSIVE, + &tdp)) != 0) + return (error); + /* + * If directory is "sticky", then user must own + * the directory, or the file in it, else she + * may not delete it (unless she's root). This + * implements append-only directories. + */ + if ((dp->i_mode & ISVTX) && + cred->cr_uid != 0 && + cred->cr_uid != dp->i_uid && + VTOI(tdp)->i_uid != cred->cr_uid) { + vput(tdp); + return (EPERM); + } + *vpp = tdp; + return (0); + } + + /* + * If rewriting (RENAME), return the inode and the + * information required to rewrite the present directory + * Must get inode of directory entry to verify it's a + * regular file, or empty directory. + */ + if (nameiop == RENAME && (flags & ISLASTCN)) { + if ((error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_thread)) != 0) + return (error); + /* + * Careful about locking second inode. + * This can only occur if the target is ".". + */ + if (dp->i_number == ino) + return (EISDIR); + if ((error = VFS_VGET(vdp->v_mount, ino, LK_EXCLUSIVE, + &tdp)) != 0) + return (error); + *vpp = tdp; + cnp->cn_flags |= SAVENAME; + return (0); + } + + /* + * Step through the translation in the name. We do not `vput' the + * directory because we may need it again if a symbolic link + * is relative to the current directory. Instead we save it + * unlocked as "pdp". We must get the target inode before unlocking + * the directory to insure that the inode will not be removed + * before we get it. We prevent deadlock by always fetching + * inodes from the root, moving down the directory tree. Thus + * when following backward pointers ".." we must unlock the + * parent directory before getting the requested directory. + * There is a potential race condition here if both the current + * and parent directories are removed before the VFS_VGET for the + * inode associated with ".." returns. We hope that this occurs + * infrequently since we cannot avoid this race condition without + * implementing a sophisticated deadlock detection algorithm. + * Note also that this simple deadlock detection scheme will not + * work if the file system has any hard links other than ".." + * that point backwards in the directory structure. + */ + pdp = vdp; + if (flags & ISDOTDOT) { + ltype = VOP_ISLOCKED(pdp); + VOP_UNLOCK(pdp, 0); /* race to get the inode */ + error = VFS_VGET(vdp->v_mount, ino, cnp->cn_lkflags, &tdp); + vn_lock(pdp, ltype | LK_RETRY); + if (error != 0) + return (error); + *vpp = tdp; + } else if (dp->i_number == ino) { + VREF(vdp); /* we want ourself, ie "." */ + /* + * When we lookup "." we still can be asked to lock it + * differently. + */ + ltype = cnp->cn_lkflags & LK_TYPE_MASK; + if (ltype != VOP_ISLOCKED(vdp)) { + if (ltype == LK_EXCLUSIVE) + vn_lock(vdp, LK_UPGRADE | LK_RETRY); + else /* if (ltype == LK_SHARED) */ + vn_lock(vdp, LK_DOWNGRADE | LK_RETRY); + } + *vpp = vdp; + } else { + if ((error = VFS_VGET(vdp->v_mount, ino, cnp->cn_lkflags, + &tdp)) != 0) + return (error); + *vpp = tdp; + } + + /* + * Insert name into cache if appropriate. + */ + if (cnp->cn_flags & MAKEENTRY) + cache_enter(vdp, *vpp, cnp); + return (0); +} + +void +ext2_dirbad(ip, offset, how) + struct inode *ip; + doff_t offset; + char *how; +{ + struct mount *mp; + + mp = ITOV(ip)->v_mount; + if ((mp->mnt_flag & MNT_RDONLY) == 0) + panic("ext2_dirbad: %s: bad dir ino %lu at offset %ld: %s\n", + mp->mnt_stat.f_mntonname, (u_long)ip->i_number,(long)offset, how); + else + (void)printf("%s: bad dir ino %lu at offset %ld: %s\n", + mp->mnt_stat.f_mntonname, (u_long)ip->i_number, (long)offset, how); + +} + +/* + * Do consistency checking on a directory entry: + * record length must be multiple of 4 + * entry must fit in rest of its DIRBLKSIZ block + * record must be large enough to contain entry + * name is not longer than MAXNAMLEN + * name must be as long as advertised, and null terminated + */ +/* + * changed so that it confirms to ext2_check_dir_entry + */ +static int +ext2_dirbadentry(dp, de, entryoffsetinblock) + struct vnode *dp; + struct ext2fs_direct_2 *de; + int entryoffsetinblock; +{ + int DIRBLKSIZ = VTOI(dp)->i_e2fs->e2fs_bsize; + + char * error_msg = NULL; + + if (de->e2d_reclen < EXT2_DIR_REC_LEN(1)) + error_msg = "rec_len is smaller than minimal"; + else if (de->e2d_reclen % 4 != 0) + error_msg = "rec_len % 4 != 0"; + else if (de->e2d_reclen < EXT2_DIR_REC_LEN(de->e2d_namlen)) + error_msg = "reclen is too small for name_len"; + else if (entryoffsetinblock + de->e2d_reclen > DIRBLKSIZ) + error_msg = "directory entry across blocks"; + /* else LATER + if (de->inode > dir->i_sb->u.ext2_sb.s_es->s_inodes_count) + error_msg = "inode out of bounds"; + */ + + if (error_msg != NULL) { + printf("bad directory entry: %s\n", error_msg); + printf("offset=%d, inode=%lu, rec_len=%u, name_len=%u\n", + entryoffsetinblock, (unsigned long)de->e2d_ino, + de->e2d_reclen, de->e2d_namlen); + } + return error_msg == NULL ? 0 : 1; +} + +/* + * Write a directory entry after a call to namei, using the parameters + * that it left in nameidata. The argument ip is the inode which the new + * directory entry will refer to. Dvp is a pointer to the directory to + * be written, which was left locked by namei. Remaining parameters + * (dp->i_offset, dp->i_count) indicate how the space for the new + * entry is to be obtained. + */ +int +ext2_direnter(ip, dvp, cnp) + struct inode *ip; + struct vnode *dvp; + struct componentname *cnp; +{ + struct ext2fs_direct_2 *ep, *nep; + struct inode *dp; + struct buf *bp; + struct ext2fs_direct_2 newdir; + struct iovec aiov; + struct uio auio; + u_int dsize; + int error, loc, newentrysize, spacefree; + char *dirbuf; + int DIRBLKSIZ = ip->i_e2fs->e2fs_bsize; + + +#ifdef DIAGNOSTIC + if ((cnp->cn_flags & SAVENAME) == 0) + panic("direnter: missing name"); +#endif + dp = VTOI(dvp); + newdir.e2d_ino = ip->i_number; + newdir.e2d_namlen = cnp->cn_namelen; + if (EXT2_HAS_INCOMPAT_FEATURE(ip->i_e2fs, + EXT2F_INCOMPAT_FTYPE)) + newdir.e2d_type = DTTOFT(IFTODT(ip->i_mode)); + else + newdir.e2d_type = EXT2_FT_UNKNOWN; + bcopy(cnp->cn_nameptr, newdir.e2d_name, (unsigned)cnp->cn_namelen + 1); + newentrysize = EXT2_DIR_REC_LEN(newdir.e2d_namlen); + if (dp->i_count == 0) { + /* + * If dp->i_count is 0, then namei could find no + * space in the directory. Here, dp->i_offset will + * be on a directory block boundary and we will write the + * new entry into a fresh block. + */ + if (dp->i_offset & (DIRBLKSIZ - 1)) + panic("ext2_direnter: newblk"); + auio.uio_offset = dp->i_offset; + newdir.e2d_reclen = DIRBLKSIZ; + auio.uio_resid = newentrysize; + aiov.iov_len = newentrysize; + aiov.iov_base = (caddr_t)&newdir; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_rw = UIO_WRITE; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_td = (struct thread *)0; + error = VOP_WRITE(dvp, &auio, IO_SYNC, cnp->cn_cred); + if (DIRBLKSIZ > + VFSTOEXT2(dvp->v_mount)->um_mountp->mnt_stat.f_bsize) + /* XXX should grow with balloc() */ + panic("ext2_direnter: frag size"); + else if (!error) { + dp->i_size = roundup2(dp->i_size, DIRBLKSIZ); + dp->i_flag |= IN_CHANGE; + } + return (error); + } + + /* + * If dp->i_count is non-zero, then namei found space + * for the new entry in the range dp->i_offset to + * dp->i_offset + dp->i_count in the directory. + * To use this space, we may have to compact the entries located + * there, by copying them together towards the beginning of the + * block, leaving the free space in one usable chunk at the end. + */ + + /* + * Increase size of directory if entry eats into new space. + * This should never push the size past a new multiple of + * DIRBLKSIZE. + * + * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN. + */ + if (dp->i_offset + dp->i_count > dp->i_size) + dp->i_size = dp->i_offset + dp->i_count; + /* + * Get the block containing the space for the new directory entry. + */ + if ((error = ext2_blkatoff(dvp, (off_t)dp->i_offset, &dirbuf, + &bp)) != 0) + return (error); + /* + * Find space for the new entry. In the simple case, the entry at + * offset base will have the space. If it does not, then namei + * arranged that compacting the region dp->i_offset to + * dp->i_offset + dp->i_count would yield the + * space. + */ + ep = (struct ext2fs_direct_2 *)dirbuf; + dsize = EXT2_DIR_REC_LEN(ep->e2d_namlen); + spacefree = ep->e2d_reclen - dsize; + for (loc = ep->e2d_reclen; loc < dp->i_count; ) { + nep = (struct ext2fs_direct_2 *)(dirbuf + loc); + if (ep->e2d_ino) { + /* trim the existing slot */ + ep->e2d_reclen = dsize; + ep = (struct ext2fs_direct_2 *)((char *)ep + dsize); + } else { + /* overwrite; nothing there; header is ours */ + spacefree += dsize; + } + dsize = EXT2_DIR_REC_LEN(nep->e2d_namlen); + spacefree += nep->e2d_reclen - dsize; + loc += nep->e2d_reclen; + bcopy((caddr_t)nep, (caddr_t)ep, dsize); + } + /* + * Update the pointer fields in the previous entry (if any), + * copy in the new entry, and write out the block. + */ + if (ep->e2d_ino == 0) { + if (spacefree + dsize < newentrysize) + panic("ext2_direnter: compact1"); + newdir.e2d_reclen = spacefree + dsize; + } else { + if (spacefree < newentrysize) + panic("ext2_direnter: compact2"); + newdir.e2d_reclen = spacefree; + ep->e2d_reclen = dsize; + ep = (struct ext2fs_direct_2 *)((char *)ep + dsize); + } + bcopy((caddr_t)&newdir, (caddr_t)ep, (u_int)newentrysize); + error = bwrite(bp); + dp->i_flag |= IN_CHANGE | IN_UPDATE; + if (!error && dp->i_endoff && dp->i_endoff < dp->i_size) + error = ext2_truncate(dvp, (off_t)dp->i_endoff, IO_SYNC, + cnp->cn_cred, cnp->cn_thread); + return (error); +} + +/* + * Remove a directory entry after a call to namei, using + * the parameters which it left in nameidata. The entry + * dp->i_offset contains the offset into the directory of the + * entry to be eliminated. The dp->i_count field contains the + * size of the previous record in the directory. If this + * is 0, the first entry is being deleted, so we need only + * zero the inode number to mark the entry as free. If the + * entry is not the first in the directory, we must reclaim + * the space of the now empty record by adding the record size + * to the size of the previous entry. + */ +int +ext2_dirremove(dvp, cnp) + struct vnode *dvp; + struct componentname *cnp; +{ + struct inode *dp; + struct ext2fs_direct_2 *ep, *rep; + struct buf *bp; + int error; + + dp = VTOI(dvp); + if (dp->i_count == 0) { + /* + * First entry in block: set d_ino to zero. + */ + if ((error = + ext2_blkatoff(dvp, (off_t)dp->i_offset, (char **)&ep, + &bp)) != 0) + return (error); + ep->e2d_ino = 0; + error = bwrite(bp); + dp->i_flag |= IN_CHANGE | IN_UPDATE; + return (error); + } + /* + * Collapse new free space into previous entry. + */ + if ((error = ext2_blkatoff(dvp, (off_t)(dp->i_offset - dp->i_count), + (char **)&ep, &bp)) != 0) + return (error); + + /* Set 'rep' to the entry being removed. */ + if (dp->i_count == 0) + rep = ep; + else + rep = (struct ext2fs_direct_2 *)((char *)ep + ep->e2d_reclen); + ep->e2d_reclen += rep->e2d_reclen; + error = bwrite(bp); + dp->i_flag |= IN_CHANGE | IN_UPDATE; + return (error); +} + +/* + * Rewrite an existing directory entry to point at the inode + * supplied. The parameters describing the directory entry are + * set up by a call to namei. + */ +int +ext2_dirrewrite(dp, ip, cnp) + struct inode *dp, *ip; + struct componentname *cnp; +{ + struct buf *bp; + struct ext2fs_direct_2 *ep; + struct vnode *vdp = ITOV(dp); + int error; + + if ((error = ext2_blkatoff(vdp, (off_t)dp->i_offset, (char **)&ep, + &bp)) != 0) + return (error); + ep->e2d_ino = ip->i_number; + if (EXT2_HAS_INCOMPAT_FEATURE(ip->i_e2fs, + EXT2F_INCOMPAT_FTYPE)) + ep->e2d_type = DTTOFT(IFTODT(ip->i_mode)); + else + ep->e2d_type = EXT2_FT_UNKNOWN; + error = bwrite(bp); + dp->i_flag |= IN_CHANGE | IN_UPDATE; + return (error); +} + +/* + * Check if a directory is empty or not. + * Inode supplied must be locked. + * + * Using a struct dirtemplate here is not precisely + * what we want, but better than using a struct direct. + * + * NB: does not handle corrupted directories. + */ +int +ext2_dirempty(ip, parentino, cred) + struct inode *ip; + ino_t parentino; + struct ucred *cred; +{ + off_t off; + struct dirtemplate dbuf; + struct ext2fs_direct_2 *dp = (struct ext2fs_direct_2 *)&dbuf; + int error, count, namlen; +#define MINDIRSIZ (sizeof (struct dirtemplate) / 2) + + for (off = 0; off < ip->i_size; off += dp->e2d_reclen) { + error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ, + off, UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, cred, + NOCRED, &count, (struct thread *)0); + /* + * Since we read MINDIRSIZ, residual must + * be 0 unless we're at end of file. + */ + if (error || count != 0) + return (0); + /* avoid infinite loops */ + if (dp->e2d_reclen == 0) + return (0); + /* skip empty entries */ + if (dp->e2d_ino == 0) + continue; + /* accept only "." and ".." */ + namlen = dp->e2d_namlen; + if (namlen > 2) + return (0); + if (dp->e2d_name[0] != '.') + return (0); + /* + * At this point namlen must be 1 or 2. + * 1 implies ".", 2 implies ".." if second + * char is also "." + */ + if (namlen == 1) + continue; + if (dp->e2d_name[1] == '.' && dp->e2d_ino == parentino) + continue; + return (0); + } + return (1); +} + +/* + * Check if source directory is in the path of the target directory. + * Target is supplied locked, source is unlocked. + * The target is always vput before returning. + */ +int +ext2_checkpath(source, target, cred) + struct inode *source, *target; + struct ucred *cred; +{ + struct vnode *vp; + int error, rootino, namlen; + struct dirtemplate dirbuf; + + vp = ITOV(target); + if (target->i_number == source->i_number) { + error = EEXIST; + goto out; + } + rootino = ROOTINO; + error = 0; + if (target->i_number == rootino) + goto out; + + for (;;) { + if (vp->v_type != VDIR) { + error = ENOTDIR; + break; + } + error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf, + sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE, + IO_NODELOCKED | IO_NOMACCHECK, cred, NOCRED, NULL, + NULL); + if (error != 0) + break; + namlen = dirbuf.dotdot_type; /* like ufs little-endian */ + if (namlen != 2 || + dirbuf.dotdot_name[0] != '.' || + dirbuf.dotdot_name[1] != '.') { + error = ENOTDIR; + break; + } + if (dirbuf.dotdot_ino == source->i_number) { + error = EINVAL; + break; + } + if (dirbuf.dotdot_ino == rootino) + break; + vput(vp); + if ((error = VFS_VGET(vp->v_mount, dirbuf.dotdot_ino, + LK_EXCLUSIVE, &vp)) != 0) { + vp = NULL; + break; + } + } + +out: + if (error == ENOTDIR) + printf("checkpath: .. not a directory\n"); + if (vp != NULL) + vput(vp); + return (error); +} diff --git a/sys/fs/ext2fs/ext2_mount.h b/sys/fs/ext2fs/ext2_mount.h new file mode 100644 index 0000000..6bc051c --- /dev/null +++ b/sys/fs/ext2fs/ext2_mount.h @@ -0,0 +1,79 @@ +/*- + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ufsmount.h 8.6 (Berkeley) 3/30/95 + * $FreeBSD$ + */ + +#ifndef _FS_EXT2FS_EXT2_MOUNT_H_ +#define _FS_EXT2FS_EXT2_MOUNT_H_ + +#ifdef _KERNEL + +#ifdef MALLOC_DECLARE +MALLOC_DECLARE(M_EXT2NODE); +#endif + +struct vnode; + +/* This structure describes the ext2fs specific mount structure data. */ +struct ext2mount { + struct mount *um_mountp; /* filesystem vfs structure */ + struct cdev *um_dev; /* device mounted */ + struct vnode *um_devvp; /* block device mounted vnode */ + + struct m_ext2fs *um_e2fs; /* EXT2FS */ +#define em_e2fsb um_e2fs->e2fs + + u_long um_nindir; /* indirect ptrs per block */ + u_long um_bptrtodb; /* indir ptr to disk block */ + u_long um_seqinc; /* inc between seq blocks */ + + struct mtx um_lock; /* Protects ext2mount & fs */ + + struct g_consumer *um_cp; + struct bufobj *um_bo; +}; + +#define EXT2_LOCK(aa) mtx_lock(&(aa)->um_lock) +#define EXT2_UNLOCK(aa) mtx_unlock(&(aa)->um_lock) +#define EXT2_MTX(aa) (&(aa)->um_lock) + +/* Convert mount ptr to ext2fsmount ptr. */ +#define VFSTOEXT2(mp) ((struct ext2mount *)((mp)->mnt_data)) + +/* + * Macros to access file system parameters in the ufsmount structure. + * Used by ufs_bmap. + */ +#define MNINDIR(ump) ((ump)->um_nindir) +#define blkptrtodb(ump, b) ((b) << (ump)->um_bptrtodb) +#define is_sequential(ump, a, b) ((b) == (a) + ump->um_seqinc) +#endif /* _KERNEL */ + +#endif diff --git a/sys/fs/ext2fs/ext2_readwrite.c b/sys/fs/ext2fs/ext2_readwrite.c new file mode 100644 index 0000000..9c9749a --- /dev/null +++ b/sys/fs/ext2fs/ext2_readwrite.c @@ -0,0 +1,309 @@ +/*- + * modified for Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + */ +/*- + * Copyright (c) 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ufs_readwrite.c 8.7 (Berkeley) 1/21/94 + * $FreeBSD$ + */ + +/* XXX TODO: remove these obfuscations (as in ffs_vnops.c). */ +#define BLKSIZE(a, b, c) blksize(a, b, c) +#define FS struct m_ext2fs +#define I_FS i_e2fs +#define READ ext2_read +#define READ_S "ext2_read" +#define WRITE ext2_write +#define WRITE_S "ext2_write" + +/* + * Vnode op for reading. + */ +static int +READ(ap) + struct vop_read_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap; +{ + struct vnode *vp; + struct inode *ip; + struct uio *uio; + FS *fs; + struct buf *bp; + daddr_t lbn, nextlbn; + off_t bytesinfile; + long size, xfersize, blkoffset; + int error, orig_resid, seqcount; + seqcount = ap->a_ioflag >> IO_SEQSHIFT; + u_short mode; + + vp = ap->a_vp; + ip = VTOI(vp); + mode = ip->i_mode; + uio = ap->a_uio; + +#ifdef DIAGNOSTIC + if (uio->uio_rw != UIO_READ) + panic("%s: mode", READ_S); + + if (vp->v_type == VLNK) { + if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen) + panic("%s: short symlink", READ_S); + } else if (vp->v_type != VREG && vp->v_type != VDIR) + panic("%s: type %d", READ_S, vp->v_type); +#endif + orig_resid = uio->uio_resid; + KASSERT(orig_resid >= 0, ("ext2_read: uio->uio_resid < 0")); + if (orig_resid == 0) + return (0); + KASSERT(uio->uio_offset >= 0, ("ext2_read: uio->uio_offset < 0")); + fs = ip->I_FS; + if (uio->uio_offset < ip->i_size && uio->uio_offset >= fs->e2fs_maxfilesize) + return (EOVERFLOW); + for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { + if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) + break; + lbn = lblkno(fs, uio->uio_offset); + nextlbn = lbn + 1; + size = BLKSIZE(fs, ip, lbn); + blkoffset = blkoff(fs, uio->uio_offset); + + xfersize = fs->e2fs_fsize - blkoffset; + if (uio->uio_resid < xfersize) + xfersize = uio->uio_resid; + if (bytesinfile < xfersize) + xfersize = bytesinfile; + + if (lblktosize(fs, nextlbn) >= ip->i_size) + error = bread(vp, lbn, size, NOCRED, &bp); + else if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) + error = cluster_read(vp, ip->i_size, lbn, size, + NOCRED, blkoffset + uio->uio_resid, seqcount, &bp); + else if (seqcount > 1) { + int nextsize = BLKSIZE(fs, ip, nextlbn); + error = breadn(vp, lbn, + size, &nextlbn, &nextsize, 1, NOCRED, &bp); + } else + error = bread(vp, lbn, size, NOCRED, &bp); + if (error) { + brelse(bp); + bp = NULL; + break; + } + + /* + * We should only get non-zero b_resid when an I/O error + * has occurred, which should cause us to break above. + * However, if the short read did not cause an error, + * then we want to ensure that we do not uiomove bad + * or uninitialized data. + */ + size -= bp->b_resid; + if (size < xfersize) { + if (size == 0) + break; + xfersize = size; + } + error = uiomove((char *)bp->b_data + blkoffset, + (int)xfersize, uio); + if (error) + break; + + bqrelse(bp); + } + if (bp != NULL) + bqrelse(bp); + if ((error == 0 || uio->uio_resid != orig_resid) && + (vp->v_mount->mnt_flag & MNT_NOATIME) == 0) + ip->i_flag |= IN_ACCESS; + return (error); +} + +/* + * Vnode op for writing. + */ +static int +WRITE(ap) + struct vop_write_args /* { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + } */ *ap; +{ + struct vnode *vp; + struct uio *uio; + struct inode *ip; + FS *fs; + struct buf *bp; + struct thread *td; + daddr_t lbn; + off_t osize; + int blkoffset, error, flags, ioflag, resid, size, seqcount, xfersize; + + ioflag = ap->a_ioflag; + seqcount = ioflag >> IO_SEQSHIFT; + uio = ap->a_uio; + vp = ap->a_vp; + ip = VTOI(vp); + +#ifdef DIAGNOSTIC + if (uio->uio_rw != UIO_WRITE) + panic("%s: mode", WRITE_S); +#endif + + switch (vp->v_type) { + case VREG: + if (ioflag & IO_APPEND) + uio->uio_offset = ip->i_size; + if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) + return (EPERM); + /* FALLTHROUGH */ + case VLNK: + break; + case VDIR: + /* XXX differs from ffs -- this is called from ext2_mkdir(). */ + if ((ioflag & IO_SYNC) == 0) + panic("ext2_write: nonsync dir write"); + break; + default: + panic("ext2_write: type %p %d (%jd,%jd)", (void *)vp, + vp->v_type, (intmax_t)uio->uio_offset, + (intmax_t)uio->uio_resid); + } + + KASSERT(uio->uio_resid >= 0, ("ext2_write: uio->uio_resid < 0")); + KASSERT(uio->uio_offset >= 0, ("ext2_write: uio->uio_offset < 0")); + fs = ip->I_FS; + if ((uoff_t)uio->uio_offset + uio->uio_resid > fs->e2fs_maxfilesize) + return (EFBIG); + /* + * Maybe this should be above the vnode op call, but so long as + * file servers have no limits, I don't think it matters. + */ + td = uio->uio_td; + if (vp->v_type == VREG && td != NULL) { + PROC_LOCK(td->td_proc); + if (uio->uio_offset + uio->uio_resid > + lim_cur(td->td_proc, RLIMIT_FSIZE)) { + psignal(td->td_proc, SIGXFSZ); + PROC_UNLOCK(td->td_proc); + return (EFBIG); + } + PROC_UNLOCK(td->td_proc); + } + + resid = uio->uio_resid; + osize = ip->i_size; + flags = ioflag & IO_SYNC ? B_SYNC : 0; + + for (error = 0; uio->uio_resid > 0;) { + lbn = lblkno(fs, uio->uio_offset); + blkoffset = blkoff(fs, uio->uio_offset); + xfersize = fs->e2fs_fsize - blkoffset; + if (uio->uio_resid < xfersize) + xfersize = uio->uio_resid; + if (uio->uio_offset + xfersize > ip->i_size) + vnode_pager_setsize(vp, uio->uio_offset + xfersize); + + /* + * Avoid a data-consistency race between write() and mmap() + * by ensuring that newly allocated blocks are zeroed. The + * race can occur even in the case where the write covers + * the entire block. + */ + flags |= B_CLRBUF; + error = ext2_balloc(ip, lbn, blkoffset + xfersize, + ap->a_cred, &bp, flags); + if (error != 0) + break; + if (uio->uio_offset + xfersize > ip->i_size) + ip->i_size = uio->uio_offset + xfersize; + size = BLKSIZE(fs, ip, lbn) - bp->b_resid; + if (size < xfersize) + xfersize = size; + + error = + uiomove((char *)bp->b_data + blkoffset, (int)xfersize, uio); + if ((ioflag & IO_VMIO) && + LIST_FIRST(&bp->b_dep) == NULL) /* in ext2fs? */ + bp->b_flags |= B_RELBUF; + + if (ioflag & IO_SYNC) { + (void)bwrite(bp); + } else if (xfersize + blkoffset == fs->e2fs_fsize) { + if ((vp->v_mount->mnt_flag & MNT_NOCLUSTERW) == 0) { + bp->b_flags |= B_CLUSTEROK; + cluster_write(vp, bp, ip->i_size, seqcount); + } else { + bawrite(bp); + } + } else { + bp->b_flags |= B_CLUSTEROK; + bdwrite(bp); + } + if (error || xfersize == 0) + break; + } + /* + * If we successfully wrote any data, and we are not the superuser + * we clear the setuid and setgid bits as a precaution against + * tampering. + * XXX too late, the tamperer may have opened the file while we + * were writing the data (or before). + * XXX too early, if (error && ioflag & IO_UNIT) then we will + * unwrite the data. + */ + if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0) + ip->i_mode &= ~(ISUID | ISGID); + if (error) { + /* + * XXX should truncate to the last successfully written + * data if the uiomove() failed. + */ + if (ioflag & IO_UNIT) { + (void)ext2_truncate(vp, osize, + ioflag & IO_SYNC, ap->a_cred, uio->uio_td); + uio->uio_offset -= resid - uio->uio_resid; + uio->uio_resid = resid; + } + } + if (uio->uio_resid != resid) { + ip->i_flag |= IN_CHANGE | IN_UPDATE; + if (ioflag & IO_SYNC) + error = ext2_update(vp, 1); + } + return (error); +} diff --git a/sys/fs/ext2fs/ext2_subr.c b/sys/fs/ext2fs/ext2_subr.c new file mode 100644 index 0000000..dcb1b7c --- /dev/null +++ b/sys/fs/ext2fs/ext2_subr.c @@ -0,0 +1,120 @@ +/*- + * modified for Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + */ +/*- + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ffs_subr.c 8.2 (Berkeley) 9/21/93 + * $FreeBSD$ + */ + +#include <sys/param.h> + +#include <sys/proc.h> +#include <sys/systm.h> +#include <sys/bio.h> +#include <sys/buf.h> +#include <sys/lock.h> +#include <sys/ucred.h> +#include <sys/vnode.h> + +#include <fs/ext2fs/inode.h> +#include <fs/ext2fs/ext2_extern.h> +#include <fs/ext2fs/ext2fs.h> +#include <fs/ext2fs/fs.h> + +#ifdef KDB +void ext2_checkoverlap(struct buf *, struct inode *); +#endif + +/* + * Return buffer with the contents of block "offset" from the beginning of + * directory "ip". If "res" is non-zero, fill it in with a pointer to the + * remaining space in the directory. + */ +int +ext2_blkatoff(vp, offset, res, bpp) + struct vnode *vp; + off_t offset; + char **res; + struct buf **bpp; +{ + struct inode *ip; + struct m_ext2fs *fs; + struct buf *bp; + int32_t lbn; + int bsize, error; + + ip = VTOI(vp); + fs = ip->i_e2fs; + lbn = lblkno(fs, offset); + bsize = blksize(fs, ip, lbn); + + *bpp = NULL; + if ((error = bread(vp, lbn, bsize, NOCRED, &bp)) != 0) { + brelse(bp); + return (error); + } + if (res) + *res = (char *)bp->b_data + blkoff(fs, offset); + *bpp = bp; + return (0); +} + +#ifdef KDB +void +ext2_checkoverlap(bp, ip) + struct buf *bp; + struct inode *ip; +{ + struct buf *ebp, *ep; + int32_t start, last; + struct vnode *vp; + + ebp = &buf[nbuf]; + start = bp->b_blkno; + last = start + btodb(bp->b_bcount) - 1; + for (ep = buf; ep < ebp; ep++) { + if (ep == bp || (ep->b_flags & B_INVAL)) + continue; + vp = ip->i_devvp; + /* look for overlap */ + if (ep->b_bcount == 0 || ep->b_blkno > last || + ep->b_blkno + btodb(ep->b_bcount) <= start) + continue; + vprint("Disk overlap", vp); + (void)printf("\tstart %d, end %d overlap start %lld, end %ld\n", + start, last, (long long)ep->b_blkno, + (long)(ep->b_blkno + btodb(ep->b_bcount) - 1)); + panic("Disk buffer overlap"); + } +} +#endif /* KDB */ diff --git a/sys/fs/ext2fs/ext2_vfsops.c b/sys/fs/ext2fs/ext2_vfsops.c new file mode 100644 index 0000000..7690bb8 --- /dev/null +++ b/sys/fs/ext2fs/ext2_vfsops.c @@ -0,0 +1,1071 @@ +/*- + * modified for EXT2FS support in Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + */ +/*- + * Copyright (c) 1989, 1991, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94 + * $FreeBSD$ + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/namei.h> +#include <sys/priv.h> +#include <sys/proc.h> +#include <sys/kernel.h> +#include <sys/vnode.h> +#include <sys/mount.h> +#include <sys/bio.h> +#include <sys/buf.h> +#include <sys/conf.h> +#include <sys/fcntl.h> +#include <sys/malloc.h> +#include <sys/stat.h> +#include <sys/mutex.h> + +#include <geom/geom.h> +#include <geom/geom_vfs.h> + +#include <fs/ext2fs/ext2_mount.h> +#include <fs/ext2fs/inode.h> + +#include <fs/ext2fs/fs.h> +#include <fs/ext2fs/ext2_extern.h> +#include <fs/ext2fs/ext2fs.h> + +static int ext2_flushfiles(struct mount *mp, int flags, struct thread *td); +static int ext2_mountfs(struct vnode *, struct mount *); +static int ext2_reload(struct mount *mp, struct thread *td); +static int ext2_sbupdate(struct ext2mount *, int); +static int ext2_cgupdate(struct ext2mount *, int); +static vfs_unmount_t ext2_unmount; +static vfs_root_t ext2_root; +static vfs_statfs_t ext2_statfs; +static vfs_sync_t ext2_sync; +static vfs_vget_t ext2_vget; +static vfs_fhtovp_t ext2_fhtovp; +static vfs_mount_t ext2_mount; + +MALLOC_DEFINE(M_EXT2NODE, "ext2_node", "EXT2 vnode private part"); +static MALLOC_DEFINE(M_EXT2MNT, "ext2_mount", "EXT2 mount structure"); + +static struct vfsops ext2fs_vfsops = { + .vfs_fhtovp = ext2_fhtovp, + .vfs_mount = ext2_mount, + .vfs_root = ext2_root, /* root inode via vget */ + .vfs_statfs = ext2_statfs, + .vfs_sync = ext2_sync, + .vfs_unmount = ext2_unmount, + .vfs_vget = ext2_vget, +}; + +VFS_SET(ext2fs_vfsops, ext2fs, 0); + +static int ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev, + int ronly); +static int compute_sb_data(struct vnode * devvp, + struct ext2fs * es, struct m_ext2fs * fs); + +static const char *ext2_opts[] = { "from", "export", "acls", "noexec", + "noatime", "union", "suiddir", "multilabel", "nosymfollow", + "noclusterr", "noclusterw", "force", NULL }; + +/* + * VFS Operations. + * + * mount system call + */ +static int +ext2_mount(struct mount *mp) +{ + struct vfsoptlist *opts; + struct vnode *devvp; + struct thread *td; + struct ext2mount *ump = 0; + struct m_ext2fs *fs; + struct nameidata nd, *ndp = &nd; + accmode_t accmode; + char *path, *fspec; + int error, flags, len; + + td = curthread; + opts = mp->mnt_optnew; + + if (vfs_filteropt(opts, ext2_opts)) + return (EINVAL); + + vfs_getopt(opts, "fspath", (void **)&path, NULL); + /* Double-check the length of path.. */ + if (strlen(path) >= MAXMNTLEN - 1) + return (ENAMETOOLONG); + + fspec = NULL; + error = vfs_getopt(opts, "from", (void **)&fspec, &len); + if (!error && fspec[len - 1] != '\0') + return (EINVAL); + + /* + * If updating, check whether changing from read-only to + * read/write; if there is no device name, that's all we do. + */ + if (mp->mnt_flag & MNT_UPDATE) { + ump = VFSTOEXT2(mp); + fs = ump->um_e2fs; + error = 0; + if (fs->e2fs_ronly == 0 && + vfs_flagopt(opts, "ro", NULL, 0)) { + error = VFS_SYNC(mp, MNT_WAIT); + if (error) + return (error); + flags = WRITECLOSE; + if (mp->mnt_flag & MNT_FORCE) + flags |= FORCECLOSE; + error = ext2_flushfiles(mp, flags, td); + if ( error == 0 && fs->e2fs_wasvalid && ext2_cgupdate(ump, MNT_WAIT) == 0) { + fs->e2fs->e2fs_state |= E2FS_ISCLEAN; + ext2_sbupdate(ump, MNT_WAIT); + } + fs->e2fs_ronly = 1; + vfs_flagopt(opts, "ro", &mp->mnt_flag, MNT_RDONLY); + DROP_GIANT(); + g_topology_lock(); + g_access(ump->um_cp, 0, -1, 0); + g_topology_unlock(); + PICKUP_GIANT(); + } + if (!error && (mp->mnt_flag & MNT_RELOAD)) + error = ext2_reload(mp, td); + if (error) + return (error); + devvp = ump->um_devvp; + if (fs->e2fs_ronly && !vfs_flagopt(opts, "ro", NULL, 0)) { + if (ext2_check_sb_compat(fs->e2fs, devvp->v_rdev, 0)) + return (EPERM); + + /* + * If upgrade to read-write by non-root, then verify + * that user has necessary permissions on the device. + */ + vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); + error = VOP_ACCESS(devvp, VREAD | VWRITE, + td->td_ucred, td); + if (error) + error = priv_check(td, PRIV_VFS_MOUNT_PERM); + if (error) { + VOP_UNLOCK(devvp, 0); + return (error); + } + VOP_UNLOCK(devvp, 0); + DROP_GIANT(); + g_topology_lock(); + error = g_access(ump->um_cp, 0, 1, 0); + g_topology_unlock(); + PICKUP_GIANT(); + if (error) + return (error); + + if ((fs->e2fs->e2fs_state & E2FS_ISCLEAN) == 0 || + (fs->e2fs->e2fs_state & E2FS_ERRORS)) { + if (mp->mnt_flag & MNT_FORCE) { + printf( +"WARNING: %s was not properly dismounted\n", fs->e2fs_fsmnt); + } else { + printf( +"WARNING: R/W mount of %s denied. Filesystem is not clean - run fsck\n", + fs->e2fs_fsmnt); + return (EPERM); + } + } + fs->e2fs->e2fs_state &= ~E2FS_ISCLEAN; + (void)ext2_cgupdate(ump, MNT_WAIT); + fs->e2fs_ronly = 0; + MNT_ILOCK(mp); + mp->mnt_flag &= ~MNT_RDONLY; + MNT_IUNLOCK(mp); + } + if (vfs_flagopt(opts, "export", NULL, 0)) { + /* Process export requests in vfs_mount.c. */ + return (error); + } + } + + /* + * Not an update, or updating the name: look up the name + * and verify that it refers to a sensible disk device. + */ + if (fspec == NULL) + return (EINVAL); + NDINIT(ndp, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspec, td); + if ((error = namei(ndp)) != 0) + return (error); + NDFREE(ndp, NDF_ONLY_PNBUF); + devvp = ndp->ni_vp; + + if (!vn_isdisk(devvp, &error)) { + vput(devvp); + return (error); + } + + /* + * If mount by non-root, then verify that user has necessary + * permissions on the device. + * + * XXXRW: VOP_ACCESS() enough? + */ + accmode = VREAD; + if ((mp->mnt_flag & MNT_RDONLY) == 0) + accmode |= VWRITE; + error = VOP_ACCESS(devvp, accmode, td->td_ucred, td); + if (error) + error = priv_check(td, PRIV_VFS_MOUNT_PERM); + if (error) { + vput(devvp); + return (error); + } + + if ((mp->mnt_flag & MNT_UPDATE) == 0) { + error = ext2_mountfs(devvp, mp); + } else { + if (devvp != ump->um_devvp) { + vput(devvp); + return (EINVAL); /* needs translation */ + } else + vput(devvp); + } + if (error) { + vrele(devvp); + return (error); + } + ump = VFSTOEXT2(mp); + fs = ump->um_e2fs; + + /* + * Note that this strncpy() is ok because of a check at the start + * of ext2_mount(). + */ + strncpy(fs->e2fs_fsmnt, path, MAXMNTLEN); + fs->e2fs_fsmnt[MAXMNTLEN - 1] = '\0'; + vfs_mountedfrom(mp, fspec); + return (0); +} + +static int +ext2_check_sb_compat(struct ext2fs *es, struct cdev *dev, int ronly) +{ + + if (es->e2fs_magic != E2FS_MAGIC) { + printf("ext2fs: %s: wrong magic number %#x (expected %#x)\n", + devtoname(dev), es->e2fs_magic, E2FS_MAGIC); + return (1); + } + if (es->e2fs_rev > E2FS_REV0) { + if (es->e2fs_features_incompat & ~EXT2F_INCOMPAT_SUPP) { + printf( +"WARNING: mount of %s denied due to unsupported optional features\n", + devtoname(dev)); + return (1); + } + if (!ronly && + (es->e2fs_features_rocompat & ~EXT2F_ROCOMPAT_SUPP)) { + printf("WARNING: R/W mount of %s denied due to " + "unsupported optional features\n", devtoname(dev)); + return (1); + } + } + return (0); +} + +/* + * This computes the fields of the ext2_sb_info structure from the + * data in the ext2_super_block structure read in. + */ +static int +compute_sb_data(struct vnode *devvp, struct ext2fs *es, + struct m_ext2fs *fs) +{ + int db_count, error; + int i; + int logic_sb_block = 1; /* XXX for now */ + struct buf *bp; + + fs->e2fs_bsize = EXT2_MIN_BLOCK_SIZE << es->e2fs_log_bsize; + fs->e2fs_bshift = EXT2_MIN_BLOCK_LOG_SIZE + es->e2fs_log_bsize; + fs->e2fs_fsbtodb = es->e2fs_log_bsize + 1; + fs->e2fs_qbmask = fs->e2fs_bsize - 1; + fs->e2fs_blocksize_bits = es->e2fs_log_bsize + 10; + fs->e2fs_fsize = EXT2_MIN_FRAG_SIZE << es->e2fs_log_fsize; + if (fs->e2fs_fsize) + fs->e2fs_fpb = fs->e2fs_bsize / fs->e2fs_fsize; + fs->e2fs_bpg = es->e2fs_bpg; + fs->e2fs_fpg = es->e2fs_fpg; + fs->e2fs_ipg = es->e2fs_ipg; + if (es->e2fs_rev == E2FS_REV0) { + fs->e2fs_first_inode = E2FS_REV0_FIRST_INO; + fs->e2fs_isize = E2FS_REV0_INODE_SIZE ; + } else { + fs->e2fs_first_inode = es->e2fs_first_ino; + fs->e2fs_isize = es->e2fs_inode_size; + + /* + * Simple sanity check for superblock inode size value. + */ + if (fs->e2fs_isize < E2FS_REV0_INODE_SIZE || + fs->e2fs_isize > fs->e2fs_bsize || + (fs->e2fs_isize & (fs->e2fs_isize - 1)) != 0) { + printf("EXT2-fs: invalid inode size %d\n", + fs->e2fs_isize); + return (EIO); + } + } + fs->e2fs_ipb = fs->e2fs_bsize / EXT2_INODE_SIZE(fs); + fs->e2fs_itpg = fs->e2fs_ipg /fs->e2fs_ipb; + fs->e2fs_descpb = fs->e2fs_bsize / sizeof (struct ext2_gd); + /* s_resuid / s_resgid ? */ + fs->e2fs_gcount = (es->e2fs_bcount - es->e2fs_first_dblock + + EXT2_BLOCKS_PER_GROUP(fs) - 1) / EXT2_BLOCKS_PER_GROUP(fs); + db_count = (fs->e2fs_gcount + EXT2_DESC_PER_BLOCK(fs) - 1) / + EXT2_DESC_PER_BLOCK(fs); + fs->e2fs_gdbcount = db_count; + fs->e2fs_gd = malloc(db_count * fs->e2fs_bsize, + M_EXT2MNT, M_WAITOK); + fs->e2fs_contigdirs = malloc(fs->e2fs_gcount * sizeof(*fs->e2fs_contigdirs), + M_EXT2MNT, M_WAITOK); + + /* + * Adjust logic_sb_block. + * Godmar thinks: if the blocksize is greater than 1024, then + * the superblock is logically part of block zero. + */ + if(fs->e2fs_bsize > SBSIZE) + logic_sb_block = 0; + for (i = 0; i < db_count; i++) { + error = bread(devvp , + fsbtodb(fs, logic_sb_block + i + 1 ), + fs->e2fs_bsize, NOCRED, &bp); + if (error) { + free(fs->e2fs_gd, M_EXT2MNT); + brelse(bp); + return (error); + } + e2fs_cgload((struct ext2_gd *)bp->b_data, + &fs->e2fs_gd[ + i * fs->e2fs_bsize / sizeof(struct ext2_gd)], + fs->e2fs_bsize); + brelse(bp); + bp = NULL; + } + fs->e2fs_total_dir = 0; + for (i=0; i < fs->e2fs_gcount; i++){ + fs->e2fs_total_dir += fs->e2fs_gd[i].ext2bgd_ndirs; + fs->e2fs_contigdirs[i] = 0; + } + if (es->e2fs_rev == E2FS_REV0 || + (es->e2fs_features_rocompat & EXT2F_ROCOMPAT_LARGEFILE) == 0) + fs->e2fs_maxfilesize = 0x7fffffff; + else + fs->e2fs_maxfilesize = 0x7fffffffffffffff; + return (0); +} + +/* + * Reload all incore data for a filesystem (used after running fsck on + * the root filesystem and finding things to fix). The filesystem must + * be mounted read-only. + * + * Things to do to update the mount: + * 1) invalidate all cached meta-data. + * 2) re-read superblock from disk. + * 3) re-read summary information from disk. + * 4) invalidate all inactive vnodes. + * 5) invalidate all cached file data. + * 6) re-read inode data for all active vnodes. + * XXX we are missing some steps, in particular # 3, this has to be reviewed. + */ +static int +ext2_reload(struct mount *mp, struct thread *td) +{ + struct vnode *vp, *mvp, *devvp; + struct inode *ip; + struct buf *bp; + struct ext2fs *es; + struct m_ext2fs *fs; + int error; + + if ((mp->mnt_flag & MNT_RDONLY) == 0) + return (EINVAL); + /* + * Step 1: invalidate all cached meta-data. + */ + devvp = VFSTOEXT2(mp)->um_devvp; + vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); + if (vinvalbuf(devvp, 0, 0, 0) != 0) + panic("ext2_reload: dirty1"); + VOP_UNLOCK(devvp, 0); + + /* + * Step 2: re-read superblock from disk. + * constants have been adjusted for ext2 + */ + if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) + return (error); + es = (struct ext2fs *)bp->b_data; + if (ext2_check_sb_compat(es, devvp->v_rdev, 0) != 0) { + brelse(bp); + return (EIO); /* XXX needs translation */ + } + fs = VFSTOEXT2(mp)->um_e2fs; + bcopy(bp->b_data, fs->e2fs, sizeof(struct ext2fs)); + + if((error = compute_sb_data(devvp, es, fs)) != 0) { + brelse(bp); + return (error); + } +#ifdef UNKLAR + if (fs->fs_sbsize < SBSIZE) + bp->b_flags |= B_INVAL; +#endif + brelse(bp); + +loop: + MNT_ILOCK(mp); + MNT_VNODE_FOREACH(vp, mp, mvp) { + VI_LOCK(vp); + if (vp->v_iflag & VI_DOOMED) { + VI_UNLOCK(vp); + continue; + } + MNT_IUNLOCK(mp); + /* + * Step 4: invalidate all cached file data. + */ + if (vget(vp, LK_EXCLUSIVE | LK_INTERLOCK, td)) { + MNT_VNODE_FOREACH_ABORT(mp, mvp); + goto loop; + } + if (vinvalbuf(vp, 0, 0, 0)) + panic("ext2_reload: dirty2"); + + /* + * Step 5: re-read inode data for all active vnodes. + */ + ip = VTOI(vp); + error = bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), + (int)fs->e2fs_bsize, NOCRED, &bp); + if (error) { + VOP_UNLOCK(vp, 0); + vrele(vp); + MNT_VNODE_FOREACH_ABORT(mp, mvp); + return (error); + } + ext2_ei2i((struct ext2fs_dinode *) ((char *)bp->b_data + + EXT2_INODE_SIZE(fs) * ino_to_fsbo(fs, ip->i_number)), ip); + brelse(bp); + VOP_UNLOCK(vp, 0); + vrele(vp); + MNT_ILOCK(mp); + } + MNT_IUNLOCK(mp); + return (0); +} + +/* + * Common code for mount and mountroot. + */ +static int +ext2_mountfs(struct vnode *devvp, struct mount *mp) +{ + struct ext2mount *ump; + struct buf *bp; + struct m_ext2fs *fs; + struct ext2fs *es; + struct cdev *dev = devvp->v_rdev; + struct g_consumer *cp; + struct bufobj *bo; + int error; + int ronly; + + ronly = vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0); + /* XXX: use VOP_ACESS to check FS perms */ + DROP_GIANT(); + g_topology_lock(); + error = g_vfs_open(devvp, &cp, "ext2fs", ronly ? 0 : 1); + g_topology_unlock(); + PICKUP_GIANT(); + VOP_UNLOCK(devvp, 0); + if (error) + return (error); + + /* XXX: should we check for some sectorsize or 512 instead? */ + if (((SBSIZE % cp->provider->sectorsize) != 0) || + (SBSIZE < cp->provider->sectorsize)) { + DROP_GIANT(); + g_topology_lock(); + g_vfs_close(cp); + g_topology_unlock(); + PICKUP_GIANT(); + return (EINVAL); + } + + bo = &devvp->v_bufobj; + bo->bo_private = cp; + bo->bo_ops = g_vfs_bufops; + if (devvp->v_rdev->si_iosize_max != 0) + mp->mnt_iosize_max = devvp->v_rdev->si_iosize_max; + if (mp->mnt_iosize_max > MAXPHYS) + mp->mnt_iosize_max = MAXPHYS; + + bp = NULL; + ump = NULL; + if ((error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) != 0) + goto out; + es = (struct ext2fs *)bp->b_data; + if (ext2_check_sb_compat(es, dev, ronly) != 0) { + error = EINVAL; /* XXX needs translation */ + goto out; + } + if ((es->e2fs_state & E2FS_ISCLEAN) == 0 || + (es->e2fs_state & E2FS_ERRORS)) { + if (ronly || (mp->mnt_flag & MNT_FORCE)) { + printf( +"WARNING: Filesystem was not properly dismounted\n"); + } else { + printf( +"WARNING: R/W mount denied. Filesystem is not clean - run fsck\n"); + error = EPERM; + goto out; + } + } + ump = malloc(sizeof *ump, M_EXT2MNT, M_WAITOK); + bzero((caddr_t)ump, sizeof *ump); + + /* + * I don't know whether this is the right strategy. Note that + * we dynamically allocate both an ext2_sb_info and an ext2_super_block + * while Linux keeps the super block in a locked buffer. + */ + ump->um_e2fs = malloc(sizeof(struct m_ext2fs), + M_EXT2MNT, M_WAITOK); + ump->um_e2fs->e2fs = malloc(sizeof(struct ext2fs), + M_EXT2MNT, M_WAITOK); + mtx_init(EXT2_MTX(ump), "EXT2FS", "EXT2FS Lock", MTX_DEF); + bcopy(es, ump->um_e2fs->e2fs, (u_int)sizeof(struct ext2fs)); + if ((error = compute_sb_data(devvp, ump->um_e2fs->e2fs, ump->um_e2fs))) + goto out; + + brelse(bp); + bp = NULL; + fs = ump->um_e2fs; + fs->e2fs_ronly = ronly; /* ronly is set according to mnt_flags */ + + /* + * If the fs is not mounted read-only, make sure the super block is + * always written back on a sync(). + */ + fs->e2fs_wasvalid = fs->e2fs->e2fs_state & E2FS_ISCLEAN ? 1 : 0; + if (ronly == 0) { + fs->e2fs_fmod = 1; /* mark it modified */ + fs->e2fs->e2fs_state &= ~E2FS_ISCLEAN; /* set fs invalid */ + } + mp->mnt_data = ump; + mp->mnt_stat.f_fsid.val[0] = dev2udev(dev); + mp->mnt_stat.f_fsid.val[1] = mp->mnt_vfc->vfc_typenum; + mp->mnt_maxsymlinklen = EXT2_MAXSYMLINKLEN; + MNT_ILOCK(mp); + mp->mnt_flag |= MNT_LOCAL; + MNT_IUNLOCK(mp); + ump->um_mountp = mp; + ump->um_dev = dev; + ump->um_devvp = devvp; + ump->um_bo = &devvp->v_bufobj; + ump->um_cp = cp; + + /* + * Setting those two parameters allowed us to use + * ufs_bmap w/o changse! + */ + ump->um_nindir = EXT2_ADDR_PER_BLOCK(fs); + ump->um_bptrtodb = fs->e2fs->e2fs_log_bsize + 1; + ump->um_seqinc = EXT2_FRAGS_PER_BLOCK(fs); + if (ronly == 0) + ext2_sbupdate(ump, MNT_WAIT); + /* + * Initialize filesystem stat information in mount struct. + */ + MNT_ILOCK(mp); + mp->mnt_kern_flag |= MNTK_MPSAFE | MNTK_LOOKUP_SHARED | + MNTK_EXTENDED_SHARED; + MNT_IUNLOCK(mp); + return (0); +out: + if (bp) + brelse(bp); + if (cp != NULL) { + DROP_GIANT(); + g_topology_lock(); + g_vfs_close(cp); + g_topology_unlock(); + PICKUP_GIANT(); + } + if (ump) { + mtx_destroy(EXT2_MTX(ump)); + free(ump->um_e2fs->e2fs_gd, M_EXT2MNT); + free(ump->um_e2fs->e2fs_contigdirs, M_EXT2MNT); + free(ump->um_e2fs->e2fs, M_EXT2MNT); + free(ump->um_e2fs, M_EXT2MNT); + free(ump, M_EXT2MNT); + mp->mnt_data = NULL; + } + return (error); +} + +/* + * Unmount system call. + */ +static int +ext2_unmount(struct mount *mp, int mntflags) +{ + struct ext2mount *ump; + struct m_ext2fs *fs; + int error, flags, ronly; + + flags = 0; + if (mntflags & MNT_FORCE) { + if (mp->mnt_flag & MNT_ROOTFS) + return (EINVAL); + flags |= FORCECLOSE; + } + if ((error = ext2_flushfiles(mp, flags, curthread)) != 0) + return (error); + ump = VFSTOEXT2(mp); + fs = ump->um_e2fs; + ronly = fs->e2fs_ronly; + if (ronly == 0 && ext2_cgupdate(ump, MNT_WAIT) == 0) { + if (fs->e2fs_wasvalid) + fs->e2fs->e2fs_state |= E2FS_ISCLEAN; + ext2_sbupdate(ump, MNT_WAIT); + } + + DROP_GIANT(); + g_topology_lock(); + g_vfs_close(ump->um_cp); + g_topology_unlock(); + PICKUP_GIANT(); + vrele(ump->um_devvp); + free(fs->e2fs_gd, M_EXT2MNT); + free(fs->e2fs_contigdirs, M_EXT2MNT); + free(fs->e2fs, M_EXT2MNT); + free(fs, M_EXT2MNT); + free(ump, M_EXT2MNT); + mp->mnt_data = NULL; + MNT_ILOCK(mp); + mp->mnt_flag &= ~MNT_LOCAL; + MNT_IUNLOCK(mp); + return (error); +} + +/* + * Flush out all the files in a filesystem. + */ +static int +ext2_flushfiles(struct mount *mp, int flags, struct thread *td) +{ + int error; + + error = vflush(mp, 0, flags, td); + return (error); +} +/* + * Get file system statistics. + */ +int +ext2_statfs(struct mount *mp, struct statfs *sbp) +{ + struct ext2mount *ump; + struct m_ext2fs *fs; + uint32_t overhead, overhead_per_group, ngdb; + int i, ngroups; + + ump = VFSTOEXT2(mp); + fs = ump->um_e2fs; + if (fs->e2fs->e2fs_magic != E2FS_MAGIC) + panic("ext2fs_statvfs"); + + /* + * Compute the overhead (FS structures) + */ + overhead_per_group = + 1 /* block bitmap */ + + 1 /* inode bitmap */ + + fs->e2fs_itpg; + overhead = fs->e2fs->e2fs_first_dblock + + fs->e2fs_gcount * overhead_per_group; + if (fs->e2fs->e2fs_rev > E2FS_REV0 && + fs->e2fs->e2fs_features_rocompat & EXT2F_ROCOMPAT_SPARSESUPER) { + for (i = 0, ngroups = 0; i < fs->e2fs_gcount; i++) { + if (cg_has_sb(i)) + ngroups++; + } + } else { + ngroups = fs->e2fs_gcount; + } + ngdb = fs->e2fs_gdbcount; + if (fs->e2fs->e2fs_rev > E2FS_REV0 && + fs->e2fs->e2fs_features_compat & EXT2F_COMPAT_RESIZE) + ngdb += fs->e2fs->e2fs_reserved_ngdb; + overhead += ngroups * (1 /* superblock */ + ngdb); + + sbp->f_bsize = EXT2_FRAG_SIZE(fs); + sbp->f_iosize = EXT2_BLOCK_SIZE(fs); + sbp->f_blocks = fs->e2fs->e2fs_bcount - overhead; + sbp->f_bfree = fs->e2fs->e2fs_fbcount; + sbp->f_bavail = sbp->f_bfree - fs->e2fs->e2fs_rbcount; + sbp->f_files = fs->e2fs->e2fs_icount; + sbp->f_ffree = fs->e2fs->e2fs_ficount; + return (0); +} + +/* + * Go through the disk queues to initiate sandbagged IO; + * go through the inodes to write those that have been modified; + * initiate the writing of the super block if it has been modified. + * + * Note: we are always called with the filesystem marked `MPBUSY'. + */ +static int +ext2_sync(struct mount *mp, int waitfor) +{ + struct vnode *mvp, *vp; + struct thread *td; + struct inode *ip; + struct ext2mount *ump = VFSTOEXT2(mp); + struct m_ext2fs *fs; + int error, allerror = 0; + + td = curthread; + fs = ump->um_e2fs; + if (fs->e2fs_fmod != 0 && fs->e2fs_ronly != 0) { /* XXX */ + printf("fs = %s\n", fs->e2fs_fsmnt); + panic("ext2_sync: rofs mod"); + } + + /* + * Write back each (modified) inode. + */ + MNT_ILOCK(mp); +loop: + MNT_VNODE_FOREACH(vp, mp, mvp) { + VI_LOCK(vp); + if (vp->v_type == VNON || (vp->v_iflag & VI_DOOMED)) { + VI_UNLOCK(vp); + continue; + } + MNT_IUNLOCK(mp); + ip = VTOI(vp); + if ((ip->i_flag & + (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && + (vp->v_bufobj.bo_dirty.bv_cnt == 0 || + waitfor == MNT_LAZY)) { + VI_UNLOCK(vp); + MNT_ILOCK(mp); + continue; + } + error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK, td); + if (error) { + MNT_ILOCK(mp); + if (error == ENOENT) { + MNT_VNODE_FOREACH_ABORT_ILOCKED(mp, mvp); + goto loop; + } + continue; + } + if ((error = VOP_FSYNC(vp, waitfor, td)) != 0) + allerror = error; + VOP_UNLOCK(vp, 0); + vrele(vp); + MNT_ILOCK(mp); + } + MNT_IUNLOCK(mp); + + /* + * Force stale file system control information to be flushed. + */ + if (waitfor != MNT_LAZY) { + vn_lock(ump->um_devvp, LK_EXCLUSIVE | LK_RETRY); + if ((error = VOP_FSYNC(ump->um_devvp, waitfor, td)) != 0) + allerror = error; + VOP_UNLOCK(ump->um_devvp, 0); + } + + /* + * Write back modified superblock. + */ + if (fs->e2fs_fmod != 0) { + fs->e2fs_fmod = 0; + fs->e2fs->e2fs_wtime = time_second; + if ((error = ext2_cgupdate(ump, waitfor)) != 0) + allerror = error; + } + return (allerror); +} + +/* + * Look up an EXT2FS dinode number to find its incore vnode, otherwise read it + * in from disk. If it is in core, wait for the lock bit to clear, then + * return the inode locked. Detection and handling of mount points must be + * done by the calling routine. + */ +static int +ext2_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp) +{ + struct m_ext2fs *fs; + struct inode *ip; + struct ext2mount *ump; + struct buf *bp; + struct vnode *vp; + struct cdev *dev; + struct thread *td; + int i, error; + int used_blocks; + + td = curthread; + error = vfs_hash_get(mp, ino, flags, td, vpp, NULL, NULL); + if (error || *vpp != NULL) + return (error); + + ump = VFSTOEXT2(mp); + dev = ump->um_dev; + + /* + * If this malloc() is performed after the getnewvnode() + * it might block, leaving a vnode with a NULL v_data to be + * found by ext2_sync() if a sync happens to fire right then, + * which will cause a panic because ext2_sync() blindly + * dereferences vp->v_data (as well it should). + */ + ip = malloc(sizeof(struct inode), M_EXT2NODE, M_WAITOK | M_ZERO); + + /* Allocate a new vnode/inode. */ + if ((error = getnewvnode("ext2fs", mp, &ext2_vnodeops, &vp)) != 0) { + *vpp = NULL; + free(ip, M_EXT2NODE); + return (error); + } + vp->v_data = ip; + ip->i_vnode = vp; + ip->i_e2fs = fs = ump->um_e2fs; + ip->i_ump = ump; + ip->i_number = ino; + + lockmgr(vp->v_vnlock, LK_EXCLUSIVE, NULL); + error = insmntque(vp, mp); + if (error != 0) { + free(ip, M_EXT2NODE); + *vpp = NULL; + return (error); + } + error = vfs_hash_insert(vp, ino, flags, td, vpp, NULL, NULL); + if (error || *vpp != NULL) + return (error); + + /* Read in the disk contents for the inode, copy into the inode. */ + if ((error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), + (int)fs->e2fs_bsize, NOCRED, &bp)) != 0) { + /* + * The inode does not contain anything useful, so it would + * be misleading to leave it on its hash chain. With mode + * still zero, it will be unlinked and returned to the free + * list by vput(). + */ + brelse(bp); + vput(vp); + *vpp = NULL; + return (error); + } + /* convert ext2 inode to dinode */ + ext2_ei2i((struct ext2fs_dinode *) ((char *)bp->b_data + EXT2_INODE_SIZE(fs) * + ino_to_fsbo(fs, ino)), ip); + ip->i_block_group = ino_to_cg(fs, ino); + ip->i_next_alloc_block = 0; + ip->i_next_alloc_goal = 0; + ip->i_prealloc_count = 0; + ip->i_prealloc_block = 0; + + /* + * Now we want to make sure that block pointers for unused + * blocks are zeroed out - ext2_balloc depends on this + * although for regular files and directories only + */ + if(S_ISDIR(ip->i_mode) || S_ISREG(ip->i_mode)) { + used_blocks = (ip->i_size+fs->e2fs_bsize-1) / fs->e2fs_bsize; + for(i = used_blocks; i < EXT2_NDIR_BLOCKS; i++) + ip->i_db[i] = 0; + } +/* + ext2_print_inode(ip); +*/ + bqrelse(bp); + + /* + * Initialize the vnode from the inode, check for aliases. + * Note that the underlying vnode may have changed. + */ + if ((error = ext2_vinit(mp, &ext2_fifoops, &vp)) != 0) { + vput(vp); + *vpp = NULL; + return (error); + } + + /* + * Finish inode initialization now that aliasing has been resolved. + */ + ip->i_devvp = ump->um_devvp; + + /* + * Set up a generation number for this inode if it does not + * already have one. This should only happen on old filesystems. + */ + if (ip->i_gen == 0) { + ip->i_gen = random() / 2 + 1; + if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) + ip->i_flag |= IN_MODIFIED; + } + *vpp = vp; + return (0); +} + +/* + * File handle to vnode + * + * Have to be really careful about stale file handles: + * - check that the inode number is valid + * - call ext2_vget() to get the locked inode + * - check for an unallocated inode (i_mode == 0) + * - check that the given client host has export rights and return + * those rights via. exflagsp and credanonp + */ +static int +ext2_fhtovp(struct mount *mp, struct fid *fhp, struct vnode **vpp) +{ + struct inode *ip; + struct ufid *ufhp; + struct vnode *nvp; + struct m_ext2fs *fs; + int error; + + ufhp = (struct ufid *)fhp; + fs = VFSTOEXT2(mp)->um_e2fs; + if (ufhp->ufid_ino < ROOTINO || + ufhp->ufid_ino > fs->e2fs_gcount * fs->e2fs->e2fs_ipg) + return (ESTALE); + + error = VFS_VGET(mp, ufhp->ufid_ino, LK_EXCLUSIVE, &nvp); + if (error) { + *vpp = NULLVP; + return (error); + } + ip = VTOI(nvp); + if (ip->i_mode == 0 || + ip->i_gen != ufhp->ufid_gen || ip->i_nlink <= 0) { + vput(nvp); + *vpp = NULLVP; + return (ESTALE); + } + *vpp = nvp; + vnode_create_vobject(*vpp, 0, curthread); + return (0); +} + +/* + * Write a superblock and associated information back to disk. + */ +static int +ext2_sbupdate(struct ext2mount *mp, int waitfor) +{ + struct m_ext2fs *fs = mp->um_e2fs; + struct ext2fs *es = fs->e2fs; + struct buf *bp; + int error = 0; + + bp = getblk(mp->um_devvp, SBLOCK, SBSIZE, 0, 0, 0); + bcopy((caddr_t)es, bp->b_data, (u_int)sizeof(struct ext2fs)); + if (waitfor == MNT_WAIT) + error = bwrite(bp); + else + bawrite(bp); + + /* + * The buffers for group descriptors, inode bitmaps and block bitmaps + * are not busy at this point and are (hopefully) written by the + * usual sync mechanism. No need to write them here. + */ + return (error); +} +int +ext2_cgupdate(struct ext2mount *mp, int waitfor) +{ + struct m_ext2fs *fs = mp->um_e2fs; + struct buf *bp; + int i, error = 0, allerror = 0; + + allerror = ext2_sbupdate(mp, waitfor); + for (i = 0; i < fs->e2fs_gdbcount; i++) { + bp = getblk(mp->um_devvp, fsbtodb(fs, + fs->e2fs->e2fs_first_dblock + + 1 /* superblock */ + i), fs->e2fs_bsize, 0, 0, 0); + e2fs_cgsave(&fs->e2fs_gd[ + i * fs->e2fs_bsize / sizeof(struct ext2_gd)], + (struct ext2_gd *)bp->b_data, fs->e2fs_bsize); + if (waitfor == MNT_WAIT) + error = bwrite(bp); + else + bawrite(bp); + } + + if (!allerror && error) + allerror = error; + return (allerror); +} +/* + * Return the root of a filesystem. + */ +static int +ext2_root(struct mount *mp, int flags, struct vnode **vpp) +{ + struct vnode *nvp; + int error; + + error = VFS_VGET(mp, (ino_t)ROOTINO, LK_EXCLUSIVE, &nvp); + if (error) + return (error); + *vpp = nvp; + return (0); +} diff --git a/sys/fs/ext2fs/ext2_vnops.c b/sys/fs/ext2fs/ext2_vnops.c new file mode 100644 index 0000000..6b0d371 --- /dev/null +++ b/sys/fs/ext2fs/ext2_vnops.c @@ -0,0 +1,1676 @@ +/*- + * modified for EXT2FS support in Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + */ +/*- + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ufs_vnops.c 8.7 (Berkeley) 2/3/94 + * @(#)ufs_vnops.c 8.27 (Berkeley) 5/27/95 + * $FreeBSD$ + */ + +#include "opt_suiddir.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/resourcevar.h> +#include <sys/kernel.h> +#include <sys/fcntl.h> +#include <sys/stat.h> +#include <sys/bio.h> +#include <sys/buf.h> +#include <sys/endian.h> +#include <sys/priv.h> +#include <sys/proc.h> +#include <sys/mount.h> +#include <sys/unistd.h> +#include <sys/time.h> +#include <sys/vnode.h> +#include <sys/namei.h> +#include <sys/lockf.h> +#include <sys/event.h> +#include <sys/conf.h> +#include <sys/file.h> + +#include <vm/vm.h> +#include <vm/vm_extern.h> +#include <vm/vnode_pager.h> + +#include <fs/fifofs/fifo.h> + +#include <sys/signalvar.h> +#include <ufs/ufs/dir.h> + +#include <fs/ext2fs/inode.h> +#include <fs/ext2fs/ext2_mount.h> +#include <fs/ext2fs/fs.h> +#include <fs/ext2fs/ext2_extern.h> +#include <fs/ext2fs/ext2fs.h> +#include <fs/ext2fs/ext2_dir.h> + +static int ext2_makeinode(int mode, struct vnode *, struct vnode **, struct componentname *); +static void ext2_itimes_locked(struct vnode *); + +static vop_access_t ext2_access; +static int ext2_chmod(struct vnode *, int, struct ucred *, struct thread *); +static int ext2_chown(struct vnode *, uid_t, gid_t, struct ucred *, + struct thread *); +static vop_close_t ext2_close; +static vop_create_t ext2_create; +static vop_fsync_t ext2_fsync; +static vop_getattr_t ext2_getattr; +static vop_link_t ext2_link; +static vop_mkdir_t ext2_mkdir; +static vop_mknod_t ext2_mknod; +static vop_open_t ext2_open; +static vop_pathconf_t ext2_pathconf; +static vop_print_t ext2_print; +static vop_read_t ext2_read; +static vop_readlink_t ext2_readlink; +static vop_remove_t ext2_remove; +static vop_rename_t ext2_rename; +static vop_rmdir_t ext2_rmdir; +static vop_setattr_t ext2_setattr; +static vop_strategy_t ext2_strategy; +static vop_symlink_t ext2_symlink; +static vop_write_t ext2_write; +static vop_vptofh_t ext2_vptofh; +static vop_close_t ext2fifo_close; +static vop_kqfilter_t ext2fifo_kqfilter; + +/* Global vfs data structures for ext2. */ +struct vop_vector ext2_vnodeops = { + .vop_default = &default_vnodeops, + .vop_access = ext2_access, + .vop_bmap = ext2_bmap, + .vop_cachedlookup = ext2_lookup, + .vop_close = ext2_close, + .vop_create = ext2_create, + .vop_fsync = ext2_fsync, + .vop_getattr = ext2_getattr, + .vop_inactive = ext2_inactive, + .vop_link = ext2_link, + .vop_lookup = vfs_cache_lookup, + .vop_mkdir = ext2_mkdir, + .vop_mknod = ext2_mknod, + .vop_open = ext2_open, + .vop_pathconf = ext2_pathconf, + .vop_poll = vop_stdpoll, + .vop_print = ext2_print, + .vop_read = ext2_read, + .vop_readdir = ext2_readdir, + .vop_readlink = ext2_readlink, + .vop_reallocblks = ext2_reallocblks, + .vop_reclaim = ext2_reclaim, + .vop_remove = ext2_remove, + .vop_rename = ext2_rename, + .vop_rmdir = ext2_rmdir, + .vop_setattr = ext2_setattr, + .vop_strategy = ext2_strategy, + .vop_symlink = ext2_symlink, + .vop_write = ext2_write, + .vop_vptofh = ext2_vptofh, +}; + +struct vop_vector ext2_fifoops = { + .vop_default = &fifo_specops, + .vop_access = ext2_access, + .vop_close = ext2fifo_close, + .vop_fsync = ext2_fsync, + .vop_getattr = ext2_getattr, + .vop_inactive = ext2_inactive, + .vop_kqfilter = ext2fifo_kqfilter, + .vop_print = ext2_print, + .vop_read = VOP_PANIC, + .vop_reclaim = ext2_reclaim, + .vop_setattr = ext2_setattr, + .vop_write = VOP_PANIC, + .vop_vptofh = ext2_vptofh, +}; + +#include <fs/ext2fs/ext2_readwrite.c> + +/* + * A virgin directory (no blushing please). + * Note that the type and namlen fields are reversed relative to ext2. + * Also, we don't use `struct odirtemplate', since it would just cause + * endianness problems. + */ +static struct dirtemplate mastertemplate = { + 0, 12, 1, EXT2_FT_DIR, ".", + 0, DIRBLKSIZ - 12, 2, EXT2_FT_DIR, ".." +}; +static struct dirtemplate omastertemplate = { + 0, 12, 1, EXT2_FT_UNKNOWN, ".", + 0, DIRBLKSIZ - 12, 2, EXT2_FT_UNKNOWN, ".." +}; + +static void +ext2_itimes_locked(struct vnode *vp) +{ + struct inode *ip; + struct timespec ts; + + ASSERT_VI_LOCKED(vp, __func__); + + ip = VTOI(vp); + if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_UPDATE)) == 0) + return; + if ((vp->v_type == VBLK || vp->v_type == VCHR)) + ip->i_flag |= IN_LAZYMOD; + else + ip->i_flag |= IN_MODIFIED; + if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) { + vfs_timestamp(&ts); + if (ip->i_flag & IN_ACCESS) { + ip->i_atime = ts.tv_sec; + ip->i_atimensec = ts.tv_nsec; + } + if (ip->i_flag & IN_UPDATE) { + ip->i_mtime = ts.tv_sec; + ip->i_mtimensec = ts.tv_nsec; + ip->i_modrev++; + } + if (ip->i_flag & IN_CHANGE) { + ip->i_ctime = ts.tv_sec; + ip->i_ctimensec = ts.tv_nsec; + } + } + ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_UPDATE); +} + +void +ext2_itimes(struct vnode *vp) +{ + + VI_LOCK(vp); + ext2_itimes_locked(vp); + VI_UNLOCK(vp); +} + +/* + * Create a regular file + */ +static int +ext2_create(ap) + struct vop_create_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + } */ *ap; +{ + int error; + + error = + ext2_makeinode(MAKEIMODE(ap->a_vap->va_type, ap->a_vap->va_mode), + ap->a_dvp, ap->a_vpp, ap->a_cnp); + if (error) + return (error); + return (0); +} + +static int +ext2_open(ap) + struct vop_open_args /* { + struct vnode *a_vp; + int a_mode; + struct ucred *a_cred; + struct thread *a_td; + } */ *ap; +{ + + if (ap->a_vp->v_type == VBLK || ap->a_vp->v_type == VCHR) + return (EOPNOTSUPP); + + /* + * Files marked append-only must be opened for appending. + */ + if ((VTOI(ap->a_vp)->i_flags & APPEND) && + (ap->a_mode & (FWRITE | O_APPEND)) == FWRITE) + return (EPERM); + + vnode_create_vobject(ap->a_vp, VTOI(ap->a_vp)->i_size, ap->a_td); + + return (0); +} + +/* + * Close called. + * + * Update the times on the inode. + */ +static int +ext2_close(ap) + struct vop_close_args /* { + struct vnode *a_vp; + int a_fflag; + struct ucred *a_cred; + struct thread *a_td; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + + VI_LOCK(vp); + if (vp->v_usecount > 1) + ext2_itimes_locked(vp); + VI_UNLOCK(vp); + return (0); +} + +static int +ext2_access(ap) + struct vop_access_args /* { + struct vnode *a_vp; + accmode_t a_accmode; + struct ucred *a_cred; + struct thread *a_td; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + struct inode *ip = VTOI(vp); + accmode_t accmode = ap->a_accmode; + int error; + + if (vp->v_type == VBLK || vp->v_type == VCHR) + return (EOPNOTSUPP); + + /* + * Disallow write attempts on read-only file systems; + * unless the file is a socket, fifo, or a block or + * character device resident on the file system. + */ + if (accmode & VWRITE) { + switch (vp->v_type) { + case VDIR: + case VLNK: + case VREG: + if (vp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); + break; + default: + break; + } + } + + /* If immutable bit set, nobody gets to write it. */ + if ((accmode & VWRITE) && (ip->i_flags & (SF_IMMUTABLE | SF_SNAPSHOT))) + return (EPERM); + + error = vaccess(vp->v_type, ip->i_mode, ip->i_uid, ip->i_gid, + ap->a_accmode, ap->a_cred, NULL); + return (error); +} + +static int +ext2_getattr(ap) + struct vop_getattr_args /* { + struct vnode *a_vp; + struct vattr *a_vap; + struct ucred *a_cred; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + struct inode *ip = VTOI(vp); + struct vattr *vap = ap->a_vap; + + ext2_itimes(vp); + /* + * Copy from inode table + */ + vap->va_fsid = dev2udev(ip->i_devvp->v_rdev); + vap->va_fileid = ip->i_number; + vap->va_mode = ip->i_mode & ~IFMT; + vap->va_nlink = ip->i_nlink; + vap->va_uid = ip->i_uid; + vap->va_gid = ip->i_gid; + vap->va_rdev = ip->i_rdev; + vap->va_size = ip->i_size; + vap->va_atime.tv_sec = ip->i_atime; + vap->va_atime.tv_nsec = ip->i_atimensec; + vap->va_mtime.tv_sec = ip->i_mtime; + vap->va_mtime.tv_nsec = ip->i_mtimensec; + vap->va_ctime.tv_sec = ip->i_ctime; + vap->va_ctime.tv_nsec = ip->i_ctimensec; + vap->va_flags = ip->i_flags; + vap->va_gen = ip->i_gen; + vap->va_blocksize = vp->v_mount->mnt_stat.f_iosize; + vap->va_bytes = dbtob((u_quad_t)ip->i_blocks); + vap->va_type = IFTOVT(ip->i_mode); + vap->va_filerev = ip->i_modrev; + return (0); +} + +/* + * Set attribute vnode op. called from several syscalls + */ +static int +ext2_setattr(ap) + struct vop_setattr_args /* { + struct vnode *a_vp; + struct vattr *a_vap; + struct ucred *a_cred; + } */ *ap; +{ + struct vattr *vap = ap->a_vap; + struct vnode *vp = ap->a_vp; + struct inode *ip = VTOI(vp); + struct ucred *cred = ap->a_cred; + struct thread *td = curthread; + int error; + + /* + * Check for unsettable attributes. + */ + if ((vap->va_type != VNON) || (vap->va_nlink != VNOVAL) || + (vap->va_fsid != VNOVAL) || (vap->va_fileid != VNOVAL) || + (vap->va_blocksize != VNOVAL) || (vap->va_rdev != VNOVAL) || + ((int)vap->va_bytes != VNOVAL) || (vap->va_gen != VNOVAL)) { + return (EINVAL); + } + if (vap->va_flags != VNOVAL) { + if (vp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); + /* Disallow flags not supported by ext2fs. */ + if(vap->va_flags & ~(SF_APPEND | SF_IMMUTABLE | UF_NODUMP)) + return(EOPNOTSUPP); + /* + * Callers may only modify the file flags on objects they + * have VADMIN rights for. + */ + if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) + return (error); + /* + * Unprivileged processes and privileged processes in + * jail() are not permitted to unset system flags, or + * modify flags if any system flags are set. + * Privileged non-jail processes may not modify system flags + * if securelevel > 0 and any existing system flags are set. + */ + if (!priv_check_cred(cred, PRIV_VFS_SYSFLAGS, 0)) { + if (ip->i_flags + & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) { + error = securelevel_gt(cred, 0); + if (error) + return (error); + } + ip->i_flags = vap->va_flags; + } else { + if (ip->i_flags + & (SF_NOUNLINK | SF_IMMUTABLE | SF_APPEND)) + return (EPERM); + ip->i_flags &= SF_SETTABLE; + } + ip->i_flag |= IN_CHANGE; + if (vap->va_flags & (IMMUTABLE | APPEND)) + return (0); + } + if (ip->i_flags & (IMMUTABLE | APPEND)) + return (EPERM); + /* + * Go through the fields and update iff not VNOVAL. + */ + if (vap->va_uid != (uid_t)VNOVAL || vap->va_gid != (gid_t)VNOVAL) { + if (vp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); + if ((error = ext2_chown(vp, vap->va_uid, vap->va_gid, cred, + td)) != 0) + return (error); + } + if (vap->va_size != VNOVAL) { + /* + * Disallow write attempts on read-only file systems; + * unless the file is a socket, fifo, or a block or + * character device resident on the file system. + */ + switch (vp->v_type) { + case VDIR: + return (EISDIR); + case VLNK: + case VREG: + if (vp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); + break; + default: + break; + } + if ((error = ext2_truncate(vp, vap->va_size, 0, cred, td)) != 0) + return (error); + } + if (vap->va_atime.tv_sec != VNOVAL || vap->va_mtime.tv_sec != VNOVAL) { + if (vp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); + /* + * From utimes(2): + * If times is NULL, ... The caller must be the owner of + * the file, have permission to write the file, or be the + * super-user. + * If times is non-NULL, ... The caller must be the owner of + * the file or be the super-user. + */ + if ((error = VOP_ACCESS(vp, VADMIN, cred, td)) && + ((vap->va_vaflags & VA_UTIMES_NULL) == 0 || + (error = VOP_ACCESS(vp, VWRITE, cred, td)))) + return (error); + if (vap->va_atime.tv_sec != VNOVAL) + ip->i_flag |= IN_ACCESS; + if (vap->va_mtime.tv_sec != VNOVAL) + ip->i_flag |= IN_CHANGE | IN_UPDATE; + ext2_itimes(vp); + if (vap->va_atime.tv_sec != VNOVAL) { + ip->i_atime = vap->va_atime.tv_sec; + ip->i_atimensec = vap->va_atime.tv_nsec; + } + if (vap->va_mtime.tv_sec != VNOVAL) { + ip->i_mtime = vap->va_mtime.tv_sec; + ip->i_mtimensec = vap->va_mtime.tv_nsec; + } + error = ext2_update(vp, 0); + if (error) + return (error); + } + error = 0; + if (vap->va_mode != (mode_t)VNOVAL) { + if (vp->v_mount->mnt_flag & MNT_RDONLY) + return (EROFS); + error = ext2_chmod(vp, (int)vap->va_mode, cred, td); + } + return (error); +} + +/* + * Change the mode on a file. + * Inode must be locked before calling. + */ +static int +ext2_chmod(vp, mode, cred, td) + struct vnode *vp; + int mode; + struct ucred *cred; + struct thread *td; +{ + struct inode *ip = VTOI(vp); + int error; + + /* + * To modify the permissions on a file, must possess VADMIN + * for that file. + */ + if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) + return (error); + /* + * Privileged processes may set the sticky bit on non-directories, + * as well as set the setgid bit on a file with a group that the + * process is not a member of. + */ + if (vp->v_type != VDIR && (mode & S_ISTXT)) { + error = priv_check_cred(cred, PRIV_VFS_STICKYFILE, 0); + if (error) + return (EFTYPE); + } + if (!groupmember(ip->i_gid, cred) && (mode & ISGID)) { + error = priv_check_cred(cred, PRIV_VFS_SETGID, 0); + if (error) + return (error); + } + ip->i_mode &= ~ALLPERMS; + ip->i_mode |= (mode & ALLPERMS); + ip->i_flag |= IN_CHANGE; + return (0); +} + +/* + * Perform chown operation on inode ip; + * inode must be locked prior to call. + */ +static int +ext2_chown(vp, uid, gid, cred, td) + struct vnode *vp; + uid_t uid; + gid_t gid; + struct ucred *cred; + struct thread *td; +{ + struct inode *ip = VTOI(vp); + uid_t ouid; + gid_t ogid; + int error = 0; + + if (uid == (uid_t)VNOVAL) + uid = ip->i_uid; + if (gid == (gid_t)VNOVAL) + gid = ip->i_gid; + /* + * To modify the ownership of a file, must possess VADMIN + * for that file. + */ + if ((error = VOP_ACCESS(vp, VADMIN, cred, td))) + return (error); + /* + * To change the owner of a file, or change the group of a file + * to a group of which we are not a member, the caller must + * have privilege. + */ + if (uid != ip->i_uid || (gid != ip->i_gid && + !groupmember(gid, cred))) { + error = priv_check_cred(cred, PRIV_VFS_CHOWN, 0); + if (error) + return (error); + } + ogid = ip->i_gid; + ouid = ip->i_uid; + ip->i_gid = gid; + ip->i_uid = uid; + ip->i_flag |= IN_CHANGE; + if ((ip->i_mode & (ISUID | ISGID)) && (ouid != uid || ogid != gid)) { + if (priv_check_cred(cred, PRIV_VFS_RETAINSUGID, 0) != 0) + ip->i_mode &= ~(ISUID | ISGID); + } + return (0); +} + +/* + * Synch an open file. + */ +/* ARGSUSED */ +static int +ext2_fsync(ap) + struct vop_fsync_args /* { + struct vnode *a_vp; + struct ucred *a_cred; + int a_waitfor; + struct thread *a_td; + } */ *ap; +{ + /* + * Flush all dirty buffers associated with a vnode. + */ + + vop_stdfsync(ap); + + return (ext2_update(ap->a_vp, ap->a_waitfor == MNT_WAIT)); +} + +/* + * Mknod vnode call + */ +/* ARGSUSED */ +static int +ext2_mknod(ap) + struct vop_mknod_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + } */ *ap; +{ + struct vattr *vap = ap->a_vap; + struct vnode **vpp = ap->a_vpp; + struct inode *ip; + ino_t ino; + int error; + + error = ext2_makeinode(MAKEIMODE(vap->va_type, vap->va_mode), + ap->a_dvp, vpp, ap->a_cnp); + if (error) + return (error); + ip = VTOI(*vpp); + ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; + if (vap->va_rdev != VNOVAL) { + /* + * Want to be able to use this to make badblock + * inodes, so don't truncate the dev number. + */ + ip->i_rdev = vap->va_rdev; + } + /* + * Remove inode, then reload it through VFS_VGET so it is + * checked to see if it is an alias of an existing entry in + * the inode cache. XXX I don't believe this is necessary now. + */ + (*vpp)->v_type = VNON; + ino = ip->i_number; /* Save this before vgone() invalidates ip. */ + vgone(*vpp); + vput(*vpp); + error = VFS_VGET(ap->a_dvp->v_mount, ino, LK_EXCLUSIVE, vpp); + if (error) { + *vpp = NULL; + return (error); + } + return (0); +} + +static int +ext2_remove(ap) + struct vop_remove_args /* { + struct vnode *a_dvp; + struct vnode *a_vp; + struct componentname *a_cnp; + } */ *ap; +{ + struct inode *ip; + struct vnode *vp = ap->a_vp; + struct vnode *dvp = ap->a_dvp; + int error; + + ip = VTOI(vp); + if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || + (VTOI(dvp)->i_flags & APPEND)) { + error = EPERM; + goto out; + } + error = ext2_dirremove(dvp, ap->a_cnp); + if (error == 0) { + ip->i_nlink--; + ip->i_flag |= IN_CHANGE; + } +out: + return (error); +} + +/* + * link vnode call + */ +static int +ext2_link(ap) + struct vop_link_args /* { + struct vnode *a_tdvp; + struct vnode *a_vp; + struct componentname *a_cnp; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + struct vnode *tdvp = ap->a_tdvp; + struct componentname *cnp = ap->a_cnp; + struct inode *ip; + int error; + +#ifdef DIAGNOSTIC + if ((cnp->cn_flags & HASBUF) == 0) + panic("ext2_link: no name"); +#endif + if (tdvp->v_mount != vp->v_mount) { + error = EXDEV; + goto out; + } + ip = VTOI(vp); + if ((nlink_t)ip->i_nlink >= LINK_MAX) { + error = EMLINK; + goto out; + } + if (ip->i_flags & (IMMUTABLE | APPEND)) { + error = EPERM; + goto out; + } + ip->i_nlink++; + ip->i_flag |= IN_CHANGE; + error = ext2_update(vp, 1); + if (!error) + error = ext2_direnter(ip, tdvp, cnp); + if (error) { + ip->i_nlink--; + ip->i_flag |= IN_CHANGE; + } +out: + return (error); +} + +/* + * Rename system call. + * rename("foo", "bar"); + * is essentially + * unlink("bar"); + * link("foo", "bar"); + * unlink("foo"); + * but ``atomically''. Can't do full commit without saving state in the + * inode on disk which isn't feasible at this time. Best we can do is + * always guarantee the target exists. + * + * Basic algorithm is: + * + * 1) Bump link count on source while we're linking it to the + * target. This also ensure the inode won't be deleted out + * from underneath us while we work (it may be truncated by + * a concurrent `trunc' or `open' for creation). + * 2) Link source to destination. If destination already exists, + * delete it first. + * 3) Unlink source reference to inode if still around. If a + * directory was moved and the parent of the destination + * is different from the source, patch the ".." entry in the + * directory. + */ +static int +ext2_rename(ap) + struct vop_rename_args /* { + struct vnode *a_fdvp; + struct vnode *a_fvp; + struct componentname *a_fcnp; + struct vnode *a_tdvp; + struct vnode *a_tvp; + struct componentname *a_tcnp; + } */ *ap; +{ + struct vnode *tvp = ap->a_tvp; + struct vnode *tdvp = ap->a_tdvp; + struct vnode *fvp = ap->a_fvp; + struct vnode *fdvp = ap->a_fdvp; + struct componentname *tcnp = ap->a_tcnp; + struct componentname *fcnp = ap->a_fcnp; + struct inode *ip, *xp, *dp; + struct dirtemplate dirbuf; + int doingdirectory = 0, oldparent = 0, newparent = 0; + int error = 0; + u_char namlen; + +#ifdef DIAGNOSTIC + if ((tcnp->cn_flags & HASBUF) == 0 || + (fcnp->cn_flags & HASBUF) == 0) + panic("ext2_rename: no name"); +#endif + /* + * Check for cross-device rename. + */ + if ((fvp->v_mount != tdvp->v_mount) || + (tvp && (fvp->v_mount != tvp->v_mount))) { + error = EXDEV; +abortit: + if (tdvp == tvp) + vrele(tdvp); + else + vput(tdvp); + if (tvp) + vput(tvp); + vrele(fdvp); + vrele(fvp); + return (error); + } + + if (tvp && ((VTOI(tvp)->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) || + (VTOI(tdvp)->i_flags & APPEND))) { + error = EPERM; + goto abortit; + } + + /* + * Renaming a file to itself has no effect. The upper layers should + * not call us in that case. Temporarily just warn if they do. + */ + if (fvp == tvp) { + printf("ext2_rename: fvp == tvp (can't happen)\n"); + error = 0; + goto abortit; + } + + if ((error = vn_lock(fvp, LK_EXCLUSIVE)) != 0) + goto abortit; + dp = VTOI(fdvp); + ip = VTOI(fvp); + if (ip->i_nlink >= LINK_MAX) { + VOP_UNLOCK(fvp, 0); + error = EMLINK; + goto abortit; + } + if ((ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND)) + || (dp->i_flags & APPEND)) { + VOP_UNLOCK(fvp, 0); + error = EPERM; + goto abortit; + } + if ((ip->i_mode & IFMT) == IFDIR) { + /* + * Avoid ".", "..", and aliases of "." for obvious reasons. + */ + if ((fcnp->cn_namelen == 1 && fcnp->cn_nameptr[0] == '.') || + dp == ip || (fcnp->cn_flags | tcnp->cn_flags) & ISDOTDOT || + (ip->i_flag & IN_RENAME)) { + VOP_UNLOCK(fvp, 0); + error = EINVAL; + goto abortit; + } + ip->i_flag |= IN_RENAME; + oldparent = dp->i_number; + doingdirectory++; + } + vrele(fdvp); + + /* + * When the target exists, both the directory + * and target vnodes are returned locked. + */ + dp = VTOI(tdvp); + xp = NULL; + if (tvp) + xp = VTOI(tvp); + + /* + * 1) Bump link count while we're moving stuff + * around. If we crash somewhere before + * completing our work, the link count + * may be wrong, but correctable. + */ + ip->i_nlink++; + ip->i_flag |= IN_CHANGE; + if ((error = ext2_update(fvp, 1)) != 0) { + VOP_UNLOCK(fvp, 0); + goto bad; + } + + /* + * If ".." must be changed (ie the directory gets a new + * parent) then the source directory must not be in the + * directory heirarchy above the target, as this would + * orphan everything below the source directory. Also + * the user must have write permission in the source so + * as to be able to change "..". We must repeat the call + * to namei, as the parent directory is unlocked by the + * call to checkpath(). + */ + error = VOP_ACCESS(fvp, VWRITE, tcnp->cn_cred, tcnp->cn_thread); + VOP_UNLOCK(fvp, 0); + if (oldparent != dp->i_number) + newparent = dp->i_number; + if (doingdirectory && newparent) { + if (error) /* write access check above */ + goto bad; + if (xp != NULL) + vput(tvp); + error = ext2_checkpath(ip, dp, tcnp->cn_cred); + if (error) + goto out; + VREF(tdvp); + error = relookup(tdvp, &tvp, tcnp); + if (error) + goto out; + vrele(tdvp); + dp = VTOI(tdvp); + xp = NULL; + if (tvp) + xp = VTOI(tvp); + } + /* + * 2) If target doesn't exist, link the target + * to the source and unlink the source. + * Otherwise, rewrite the target directory + * entry to reference the source inode and + * expunge the original entry's existence. + */ + if (xp == NULL) { + if (dp->i_devvp != ip->i_devvp) + panic("ext2_rename: EXDEV"); + /* + * Account for ".." in new directory. + * When source and destination have the same + * parent we don't fool with the link count. + */ + if (doingdirectory && newparent) { + if ((nlink_t)dp->i_nlink >= LINK_MAX) { + error = EMLINK; + goto bad; + } + dp->i_nlink++; + dp->i_flag |= IN_CHANGE; + error = ext2_update(tdvp, 1); + if (error) + goto bad; + } + error = ext2_direnter(ip, tdvp, tcnp); + if (error) { + if (doingdirectory && newparent) { + dp->i_nlink--; + dp->i_flag |= IN_CHANGE; + (void)ext2_update(tdvp, 1); + } + goto bad; + } + vput(tdvp); + } else { + if (xp->i_devvp != dp->i_devvp || xp->i_devvp != ip->i_devvp) + panic("ext2_rename: EXDEV"); + /* + * Short circuit rename(foo, foo). + */ + if (xp->i_number == ip->i_number) + panic("ext2_rename: same file"); + /* + * If the parent directory is "sticky", then the user must + * own the parent directory, or the destination of the rename, + * otherwise the destination may not be changed (except by + * root). This implements append-only directories. + */ + if ((dp->i_mode & S_ISTXT) && tcnp->cn_cred->cr_uid != 0 && + tcnp->cn_cred->cr_uid != dp->i_uid && + xp->i_uid != tcnp->cn_cred->cr_uid) { + error = EPERM; + goto bad; + } + /* + * Target must be empty if a directory and have no links + * to it. Also, ensure source and target are compatible + * (both directories, or both not directories). + */ + if ((xp->i_mode&IFMT) == IFDIR) { + if (! ext2_dirempty(xp, dp->i_number, tcnp->cn_cred) || + xp->i_nlink > 2) { + error = ENOTEMPTY; + goto bad; + } + if (!doingdirectory) { + error = ENOTDIR; + goto bad; + } + cache_purge(tdvp); + } else if (doingdirectory) { + error = EISDIR; + goto bad; + } + error = ext2_dirrewrite(dp, ip, tcnp); + if (error) + goto bad; + /* + * If the target directory is in the same + * directory as the source directory, + * decrement the link count on the parent + * of the target directory. + */ + if (doingdirectory && !newparent) { + dp->i_nlink--; + dp->i_flag |= IN_CHANGE; + } + vput(tdvp); + /* + * Adjust the link count of the target to + * reflect the dirrewrite above. If this is + * a directory it is empty and there are + * no links to it, so we can squash the inode and + * any space associated with it. We disallowed + * renaming over top of a directory with links to + * it above, as the remaining link would point to + * a directory without "." or ".." entries. + */ + xp->i_nlink--; + if (doingdirectory) { + if (--xp->i_nlink != 0) + panic("ext2_rename: linked directory"); + error = ext2_truncate(tvp, (off_t)0, IO_SYNC, + tcnp->cn_cred, tcnp->cn_thread); + } + xp->i_flag |= IN_CHANGE; + vput(tvp); + xp = NULL; + } + + /* + * 3) Unlink the source. + */ + fcnp->cn_flags &= ~MODMASK; + fcnp->cn_flags |= LOCKPARENT | LOCKLEAF; + VREF(fdvp); + error = relookup(fdvp, &fvp, fcnp); + if (error == 0) + vrele(fdvp); + if (fvp != NULL) { + xp = VTOI(fvp); + dp = VTOI(fdvp); + } else { + /* + * From name has disappeared. + */ + if (doingdirectory) + panic("ext2_rename: lost dir entry"); + vrele(ap->a_fvp); + return (0); + } + /* + * Ensure that the directory entry still exists and has not + * changed while the new name has been entered. If the source is + * a file then the entry may have been unlinked or renamed. In + * either case there is no further work to be done. If the source + * is a directory then it cannot have been rmdir'ed; its link + * count of three would cause a rmdir to fail with ENOTEMPTY. + * The IN_RENAME flag ensures that it cannot be moved by another + * rename. + */ + if (xp != ip) { + if (doingdirectory) + panic("ext2_rename: lost dir entry"); + } else { + /* + * If the source is a directory with a + * new parent, the link count of the old + * parent directory must be decremented + * and ".." set to point to the new parent. + */ + if (doingdirectory && newparent) { + dp->i_nlink--; + dp->i_flag |= IN_CHANGE; + error = vn_rdwr(UIO_READ, fvp, (caddr_t)&dirbuf, + sizeof (struct dirtemplate), (off_t)0, + UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, + tcnp->cn_cred, NOCRED, NULL, NULL); + if (error == 0) { + /* Like ufs little-endian: */ + namlen = dirbuf.dotdot_type; + if (namlen != 2 || + dirbuf.dotdot_name[0] != '.' || + dirbuf.dotdot_name[1] != '.') { + ext2_dirbad(xp, (doff_t)12, + "rename: mangled dir"); + } else { + dirbuf.dotdot_ino = newparent; + (void) vn_rdwr(UIO_WRITE, fvp, + (caddr_t)&dirbuf, + sizeof (struct dirtemplate), + (off_t)0, UIO_SYSSPACE, + IO_NODELOCKED | IO_SYNC | + IO_NOMACCHECK, tcnp->cn_cred, + NOCRED, NULL, NULL); + cache_purge(fdvp); + } + } + } + error = ext2_dirremove(fdvp, fcnp); + if (!error) { + xp->i_nlink--; + xp->i_flag |= IN_CHANGE; + } + xp->i_flag &= ~IN_RENAME; + } + if (dp) + vput(fdvp); + if (xp) + vput(fvp); + vrele(ap->a_fvp); + return (error); + +bad: + if (xp) + vput(ITOV(xp)); + vput(ITOV(dp)); +out: + if (doingdirectory) + ip->i_flag &= ~IN_RENAME; + if (vn_lock(fvp, LK_EXCLUSIVE) == 0) { + ip->i_nlink--; + ip->i_flag |= IN_CHANGE; + ip->i_flag &= ~IN_RENAME; + vput(fvp); + } else + vrele(fvp); + return (error); +} + +/* + * Mkdir system call + */ +static int +ext2_mkdir(ap) + struct vop_mkdir_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + } */ *ap; +{ + struct vnode *dvp = ap->a_dvp; + struct vattr *vap = ap->a_vap; + struct componentname *cnp = ap->a_cnp; + struct inode *ip, *dp; + struct vnode *tvp; + struct dirtemplate dirtemplate, *dtp; + int error, dmode; + +#ifdef DIAGNOSTIC + if ((cnp->cn_flags & HASBUF) == 0) + panic("ext2_mkdir: no name"); +#endif + dp = VTOI(dvp); + if ((nlink_t)dp->i_nlink >= LINK_MAX) { + error = EMLINK; + goto out; + } + dmode = vap->va_mode & 0777; + dmode |= IFDIR; + /* + * Must simulate part of ext2_makeinode here to acquire the inode, + * but not have it entered in the parent directory. The entry is + * made later after writing "." and ".." entries. + */ + error = ext2_valloc(dvp, dmode, cnp->cn_cred, &tvp); + if (error) + goto out; + ip = VTOI(tvp); + ip->i_gid = dp->i_gid; +#ifdef SUIDDIR + { + /* + * if we are hacking owners here, (only do this where told to) + * and we are not giving it TOO root, (would subvert quotas) + * then go ahead and give it to the other user. + * The new directory also inherits the SUID bit. + * If user's UID and dir UID are the same, + * 'give it away' so that the SUID is still forced on. + */ + if ( (dvp->v_mount->mnt_flag & MNT_SUIDDIR) && + (dp->i_mode & ISUID) && dp->i_uid) { + dmode |= ISUID; + ip->i_uid = dp->i_uid; + } else { + ip->i_uid = cnp->cn_cred->cr_uid; + } + } +#else + ip->i_uid = cnp->cn_cred->cr_uid; +#endif + ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; + ip->i_mode = dmode; + tvp->v_type = VDIR; /* Rest init'd in getnewvnode(). */ + ip->i_nlink = 2; + if (cnp->cn_flags & ISWHITEOUT) + ip->i_flags |= UF_OPAQUE; + error = ext2_update(tvp, 1); + + /* + * Bump link count in parent directory + * to reflect work done below. Should + * be done before reference is created + * so reparation is possible if we crash. + */ + dp->i_nlink++; + dp->i_flag |= IN_CHANGE; + error = ext2_update(dvp, 1); + if (error) + goto bad; + + /* Initialize directory with "." and ".." from static template. */ + if (EXT2_HAS_INCOMPAT_FEATURE(ip->i_e2fs, + EXT2F_INCOMPAT_FTYPE)) + dtp = &mastertemplate; + else + dtp = &omastertemplate; + dirtemplate = *dtp; + dirtemplate.dot_ino = ip->i_number; + dirtemplate.dotdot_ino = dp->i_number; + /* note that in ext2 DIRBLKSIZ == blocksize, not DEV_BSIZE + * so let's just redefine it - for this function only + */ +#undef DIRBLKSIZ +#define DIRBLKSIZ VTOI(dvp)->i_e2fs->e2fs_bsize + dirtemplate.dotdot_reclen = DIRBLKSIZ - 12; + error = vn_rdwr(UIO_WRITE, tvp, (caddr_t)&dirtemplate, + sizeof (dirtemplate), (off_t)0, UIO_SYSSPACE, + IO_NODELOCKED | IO_SYNC | IO_NOMACCHECK, cnp->cn_cred, NOCRED, + NULL, NULL); + if (error) { + dp->i_nlink--; + dp->i_flag |= IN_CHANGE; + goto bad; + } + if (DIRBLKSIZ > VFSTOEXT2(dvp->v_mount)->um_mountp->mnt_stat.f_bsize) + /* XXX should grow with balloc() */ + panic("ext2_mkdir: blksize"); + else { + ip->i_size = DIRBLKSIZ; + ip->i_flag |= IN_CHANGE; + } + + /* Directory set up, now install its entry in the parent directory. */ + error = ext2_direnter(ip, dvp, cnp); + if (error) { + dp->i_nlink--; + dp->i_flag |= IN_CHANGE; + } +bad: + /* + * No need to do an explicit VOP_TRUNCATE here, vrele will do this + * for us because we set the link count to 0. + */ + if (error) { + ip->i_nlink = 0; + ip->i_flag |= IN_CHANGE; + vput(tvp); + } else + *ap->a_vpp = tvp; +out: + return (error); +#undef DIRBLKSIZ +#define DIRBLKSIZ DEV_BSIZE +} + +/* + * Rmdir system call. + */ +static int +ext2_rmdir(ap) + struct vop_rmdir_args /* { + struct vnode *a_dvp; + struct vnode *a_vp; + struct componentname *a_cnp; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + struct vnode *dvp = ap->a_dvp; + struct componentname *cnp = ap->a_cnp; + struct inode *ip, *dp; + int error; + + ip = VTOI(vp); + dp = VTOI(dvp); + + /* + * Verify the directory is empty (and valid). + * (Rmdir ".." won't be valid since + * ".." will contain a reference to + * the current directory and thus be + * non-empty.) + */ + error = 0; + if (ip->i_nlink != 2 || !ext2_dirempty(ip, dp->i_number, cnp->cn_cred)) { + error = ENOTEMPTY; + goto out; + } + if ((dp->i_flags & APPEND) + || (ip->i_flags & (NOUNLINK | IMMUTABLE | APPEND))) { + error = EPERM; + goto out; + } + /* + * Delete reference to directory before purging + * inode. If we crash in between, the directory + * will be reattached to lost+found, + */ + error = ext2_dirremove(dvp, cnp); + if (error) + goto out; + dp->i_nlink--; + dp->i_flag |= IN_CHANGE; + cache_purge(dvp); + VOP_UNLOCK(dvp, 0); + /* + * Truncate inode. The only stuff left + * in the directory is "." and "..". The + * "." reference is inconsequential since + * we're quashing it. The ".." reference + * has already been adjusted above. We've + * removed the "." reference and the reference + * in the parent directory, but there may be + * other hard links so decrement by 2 and + * worry about them later. + */ + ip->i_nlink -= 2; + error = ext2_truncate(vp, (off_t)0, IO_SYNC, cnp->cn_cred, + cnp->cn_thread); + cache_purge(ITOV(ip)); + vn_lock(dvp, LK_EXCLUSIVE | LK_RETRY); +out: + return (error); +} + +/* + * symlink -- make a symbolic link + */ +static int +ext2_symlink(ap) + struct vop_symlink_args /* { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + char *a_target; + } */ *ap; +{ + struct vnode *vp, **vpp = ap->a_vpp; + struct inode *ip; + int len, error; + + error = ext2_makeinode(IFLNK | ap->a_vap->va_mode, ap->a_dvp, + vpp, ap->a_cnp); + if (error) + return (error); + vp = *vpp; + len = strlen(ap->a_target); + if (len < vp->v_mount->mnt_maxsymlinklen) { + ip = VTOI(vp); + bcopy(ap->a_target, (char *)ip->i_shortlink, len); + ip->i_size = len; + ip->i_flag |= IN_CHANGE | IN_UPDATE; + } else + error = vn_rdwr(UIO_WRITE, vp, ap->a_target, len, (off_t)0, + UIO_SYSSPACE, IO_NODELOCKED | IO_NOMACCHECK, + ap->a_cnp->cn_cred, NOCRED, NULL, NULL); + if (error) + vput(vp); + return (error); +} + +/* + * Return target name of a symbolic link + */ +static int +ext2_readlink(ap) + struct vop_readlink_args /* { + struct vnode *a_vp; + struct uio *a_uio; + struct ucred *a_cred; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + struct inode *ip = VTOI(vp); + int isize; + + isize = ip->i_size; + if (isize < vp->v_mount->mnt_maxsymlinklen) { + uiomove((char *)ip->i_shortlink, isize, ap->a_uio); + return (0); + } + return (VOP_READ(vp, ap->a_uio, 0, ap->a_cred)); +} + +/* + * Calculate the logical to physical mapping if not done already, + * then call the device strategy routine. + * + * In order to be able to swap to a file, the ext2_bmaparray() operation may not + * deadlock on memory. See ext2_bmap() for details. + */ +static int +ext2_strategy(ap) + struct vop_strategy_args /* { + struct vnode *a_vp; + struct buf *a_bp; + } */ *ap; +{ + struct buf *bp = ap->a_bp; + struct vnode *vp = ap->a_vp; + struct inode *ip; + struct bufobj *bo; + int32_t blkno; + int error; + + ip = VTOI(vp); + if (vp->v_type == VBLK || vp->v_type == VCHR) + panic("ext2_strategy: spec"); + if (bp->b_blkno == bp->b_lblkno) { + error = ext2_bmaparray(vp, bp->b_lblkno, &blkno, NULL, NULL); + bp->b_blkno = blkno; + if (error) { + bp->b_error = error; + bp->b_ioflags |= BIO_ERROR; + bufdone(bp); + return (0); + } + if ((long)bp->b_blkno == -1) + vfs_bio_clrbuf(bp); + } + if ((long)bp->b_blkno == -1) { + bufdone(bp); + return (0); + } + bp->b_iooffset = dbtob(bp->b_blkno); + bo = VFSTOEXT2(vp->v_mount)->um_bo; + BO_STRATEGY(bo, bp); + return (0); +} + +/* + * Print out the contents of an inode. + */ +static int +ext2_print(ap) + struct vop_print_args /* { + struct vnode *a_vp; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + struct inode *ip = VTOI(vp); + + vn_printf(ip->i_devvp, "\tino %lu", (u_long)ip->i_number); + if (vp->v_type == VFIFO) + fifo_printinfo(vp); + printf("\n"); + return (0); +} + +/* + * Close wrapper for fifos. + * + * Update the times on the inode then do device close. + */ +static int +ext2fifo_close(ap) + struct vop_close_args /* { + struct vnode *a_vp; + int a_fflag; + struct ucred *a_cred; + struct thread *a_td; + } */ *ap; +{ + struct vnode *vp = ap->a_vp; + + VI_LOCK(vp); + if (vp->v_usecount > 1) + ext2_itimes_locked(vp); + VI_UNLOCK(vp); + return (fifo_specops.vop_close(ap)); +} + +/* + * Kqfilter wrapper for fifos. + * + * Fall through to ext2 kqfilter routines if needed + */ +static int +ext2fifo_kqfilter(ap) + struct vop_kqfilter_args *ap; +{ + int error; + + error = fifo_specops.vop_kqfilter(ap); + if (error) + error = vfs_kqfilter(ap); + return (error); +} + +/* + * Return POSIX pathconf information applicable to ext2 filesystems. + */ +static int +ext2_pathconf(ap) + struct vop_pathconf_args /* { + struct vnode *a_vp; + int a_name; + int *a_retval; + } */ *ap; +{ + + switch (ap->a_name) { + case _PC_LINK_MAX: + *ap->a_retval = LINK_MAX; + return (0); + case _PC_NAME_MAX: + *ap->a_retval = NAME_MAX; + return (0); + case _PC_PATH_MAX: + *ap->a_retval = PATH_MAX; + return (0); + case _PC_PIPE_BUF: + *ap->a_retval = PIPE_BUF; + return (0); + case _PC_CHOWN_RESTRICTED: + *ap->a_retval = 1; + return (0); + case _PC_NO_TRUNC: + *ap->a_retval = 1; + return (0); + default: + return (EINVAL); + } + /* NOTREACHED */ +} + +/* + * Vnode pointer to File handle + */ +/* ARGSUSED */ +static int +ext2_vptofh(ap) + struct vop_vptofh_args /* { + struct vnode *a_vp; + struct fid *a_fhp; + } */ *ap; +{ + struct inode *ip; + struct ufid *ufhp; + + ip = VTOI(ap->a_vp); + ufhp = (struct ufid *)ap->a_fhp; + ufhp->ufid_len = sizeof(struct ufid); + ufhp->ufid_ino = ip->i_number; + ufhp->ufid_gen = ip->i_gen; + return (0); +} + +/* + * Initialize the vnode associated with a new inode, handle aliased + * vnodes. + */ +int +ext2_vinit(mntp, fifoops, vpp) + struct mount *mntp; + struct vop_vector *fifoops; + struct vnode **vpp; +{ + struct inode *ip; + struct vnode *vp; + + vp = *vpp; + ip = VTOI(vp); + vp->v_type = IFTOVT(ip->i_mode); + if (vp->v_type == VFIFO) + vp->v_op = fifoops; + + if (ip->i_number == ROOTINO) + vp->v_vflag |= VV_ROOT; + ip->i_modrev = init_va_filerev(); + *vpp = vp; + return (0); +} + +/* + * Allocate a new inode. + */ +static int +ext2_makeinode(mode, dvp, vpp, cnp) + int mode; + struct vnode *dvp; + struct vnode **vpp; + struct componentname *cnp; +{ + struct inode *ip, *pdir; + struct vnode *tvp; + int error; + + pdir = VTOI(dvp); +#ifdef DIAGNOSTIC + if ((cnp->cn_flags & HASBUF) == 0) + panic("ext2_makeinode: no name"); +#endif + *vpp = NULL; + if ((mode & IFMT) == 0) + mode |= IFREG; + + error = ext2_valloc(dvp, mode, cnp->cn_cred, &tvp); + if (error) { + return (error); + } + ip = VTOI(tvp); + ip->i_gid = pdir->i_gid; +#ifdef SUIDDIR + { + /* + * if we are + * not the owner of the directory, + * and we are hacking owners here, (only do this where told to) + * and we are not giving it TOO root, (would subvert quotas) + * then go ahead and give it to the other user. + * Note that this drops off the execute bits for security. + */ + if ( (dvp->v_mount->mnt_flag & MNT_SUIDDIR) && + (pdir->i_mode & ISUID) && + (pdir->i_uid != cnp->cn_cred->cr_uid) && pdir->i_uid) { + ip->i_uid = pdir->i_uid; + mode &= ~07111; + } else { + ip->i_uid = cnp->cn_cred->cr_uid; + } + } +#else + ip->i_uid = cnp->cn_cred->cr_uid; +#endif + ip->i_flag |= IN_ACCESS | IN_CHANGE | IN_UPDATE; + ip->i_mode = mode; + tvp->v_type = IFTOVT(mode); /* Rest init'd in getnewvnode(). */ + ip->i_nlink = 1; + if ((ip->i_mode & ISGID) && !groupmember(ip->i_gid, cnp->cn_cred)) { + if (priv_check_cred(cnp->cn_cred, PRIV_VFS_RETAINSUGID, 0)) + ip->i_mode &= ~ISGID; + } + + if (cnp->cn_flags & ISWHITEOUT) + ip->i_flags |= UF_OPAQUE; + + /* + * Make sure inode goes to disk before directory entry. + */ + error = ext2_update(tvp, 1); + if (error) + goto bad; + error = ext2_direnter(ip, dvp, cnp); + if (error) + goto bad; + + *vpp = tvp; + return (0); + +bad: + /* + * Write error occurred trying to update the inode + * or the directory so must deallocate the inode. + */ + ip->i_nlink = 0; + ip->i_flag |= IN_CHANGE; + vput(tvp); + return (error); +} diff --git a/sys/fs/ext2fs/ext2fs.h b/sys/fs/ext2fs/ext2fs.h new file mode 100755 index 0000000..2efc57f --- /dev/null +++ b/sys/fs/ext2fs/ext2fs.h @@ -0,0 +1,329 @@ +/*- + * modified for EXT2FS support in Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + * + * $FreeBSD$ + */ +/*- + * Copyright (c) 2009 Aditya Sarawgi + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * + */ + +#ifndef _FS_EXT2FS_EXT2_FS_H +#define _FS_EXT2FS_EXT2_FS_H + +#include <sys/types.h> + +/* + * Special inode numbers + */ +#define EXT2_BAD_INO 1 /* Bad blocks inode */ +#define EXT2_ROOT_INO 2 /* Root inode */ +#define EXT2_BOOT_LOADER_INO 5 /* Boot loader inode */ +#define EXT2_UNDEL_DIR_INO 6 /* Undelete directory inode */ + +/* First non-reserved inode for old ext2 filesystems */ +#define E2FS_REV0_FIRST_INO 11 + +/* + * The second extended file system magic number + */ +#define E2FS_MAGIC 0xEF53 + +#if defined(_KERNEL) +/* + * FreeBSD passes the pointer to the in-core struct with relevant + * fields to EXT2_SB macro when accessing superblock fields. + */ +#define EXT2_SB(sb) (sb) +#else +/* Assume that user mode programs are passing in an ext2fs superblock, not + * a kernel struct super_block. This will allow us to call the feature-test + * macros from user land. */ +#define EXT2_SB(sb) (sb) +#endif + +/* + * Maximal count of links to a file + */ +#define EXT2_LINK_MAX 32000 + +/* + * Constants relative to the data blocks + */ +#define EXT2_NDIR_BLOCKS 12 +#define EXT2_IND_BLOCK EXT2_NDIR_BLOCKS +#define EXT2_DIND_BLOCK (EXT2_IND_BLOCK + 1) +#define EXT2_TIND_BLOCK (EXT2_DIND_BLOCK + 1) +#define EXT2_N_BLOCKS (EXT2_TIND_BLOCK + 1) +#define EXT2_MAXSYMLINKLEN (EXT2_N_BLOCKS * sizeof (uint32_t)) + +/* + * The path name on which the file system is mounted is maintained + * in fs_fsmnt. MAXMNTLEN defines the amount of space allocated in + * the super block for this name. + */ +#define MAXMNTLEN 512 + +/* + * Super block for an ext2fs file system. + */ +struct ext2fs { + u_int32_t e2fs_icount; /* Inode count */ + u_int32_t e2fs_bcount; /* blocks count */ + u_int32_t e2fs_rbcount; /* reserved blocks count */ + u_int32_t e2fs_fbcount; /* free blocks count */ + u_int32_t e2fs_ficount; /* free inodes count */ + u_int32_t e2fs_first_dblock; /* first data block */ + u_int32_t e2fs_log_bsize; /* block size = 1024*(2^e2fs_log_bsize) */ + u_int32_t e2fs_log_fsize; /* fragment size */ + u_int32_t e2fs_bpg; /* blocks per group */ + u_int32_t e2fs_fpg; /* frags per group */ + u_int32_t e2fs_ipg; /* inodes per group */ + u_int32_t e2fs_mtime; /* mount time */ + u_int32_t e2fs_wtime; /* write time */ + u_int16_t e2fs_mnt_count; /* mount count */ + u_int16_t e2fs_max_mnt_count; /* max mount count */ + u_int16_t e2fs_magic; /* magic number */ + u_int16_t e2fs_state; /* file system state */ + u_int16_t e2fs_beh; /* behavior on errors */ + u_int16_t e2fs_minrev; /* minor revision level */ + u_int32_t e2fs_lastfsck; /* time of last fsck */ + u_int32_t e2fs_fsckintv; /* max time between fscks */ + u_int32_t e2fs_creator; /* creator OS */ + u_int32_t e2fs_rev; /* revision level */ + u_int16_t e2fs_ruid; /* default uid for reserved blocks */ + u_int16_t e2fs_rgid; /* default gid for reserved blocks */ + /* EXT2_DYNAMIC_REV superblocks */ + u_int32_t e2fs_first_ino; /* first non-reserved inode */ + u_int16_t e2fs_inode_size; /* size of inode structure */ + u_int16_t e2fs_block_group_nr; /* block grp number of this sblk*/ + u_int32_t e2fs_features_compat; /* compatible feature set */ + u_int32_t e2fs_features_incompat; /* incompatible feature set */ + u_int32_t e2fs_features_rocompat; /* RO-compatible feature set */ + u_int8_t e2fs_uuid[16]; /* 128-bit uuid for volume */ + char e2fs_vname[16]; /* volume name */ + char e2fs_fsmnt[64]; /* name mounted on */ + u_int32_t e2fs_algo; /* For comcate for dir */ + u_int16_t e2fs_reserved_ngdb; /* # of reserved gd blocks for resize */ + u_int32_t reserved2[204]; +}; + + +/* Assume that user mode programs are passing in an ext2fs superblock, not + * a kernel struct super_block. This will allow us to call the feature-test + * macros from user land. */ +#define EXT2_SB(sb) (sb) + +/* + * In-Memory Superblock + */ + +struct m_ext2fs { + struct ext2fs * e2fs; + char e2fs_fsmnt[MAXMNTLEN];/* name mounted on */ + char e2fs_ronly; /* mounted read-only flag */ + char e2fs_fmod; /* super block modified flag */ + uint32_t e2fs_bsize; /* Block size */ + uint32_t e2fs_bshift; /* calc of logical block no */ + int32_t e2fs_bmask; /* calc of block offset */ + int32_t e2fs_bpg; /* Number of blocks per group */ + int64_t e2fs_qbmask; /* = s_blocksize -1 */ + uint32_t e2fs_fsbtodb; /* Shift to get disk block */ + uint32_t e2fs_ipg; /* Number of inodes per group */ + uint32_t e2fs_ipb; /* Number of inodes per block */ + uint32_t e2fs_itpg; /* Number of inode table per group */ + uint32_t e2fs_fsize; /* Size of fragments per block */ + uint32_t e2fs_fpb; /* Number of fragments per block */ + uint32_t e2fs_fpg; /* Number of fragments per group */ + uint32_t e2fs_dbpg; /* Number of descriptor blocks per group */ + uint32_t e2fs_descpb; /* Number of group descriptors per block */ + uint32_t e2fs_gdbcount; /* Number of group descriptors */ + uint32_t e2fs_gcount; /* Number of groups */ + uint32_t e2fs_first_inode;/* First inode on fs */ + int32_t e2fs_isize; /* Size of inode */ + uint32_t e2fs_mount_opt; + uint32_t e2fs_blocksize_bits; + uint32_t e2fs_total_dir; /* Total number of directories */ + uint8_t *e2fs_contigdirs; + char e2fs_wasvalid; /* valid at mount time */ + off_t e2fs_maxfilesize; + struct ext2_gd *e2fs_gd; /* Group Descriptors */ +}; + +/* + * The second extended file system version + */ +#define E2FS_DATE "95/08/09" +#define E2FS_VERSION "0.5b" + +/* + * Revision levels + */ +#define E2FS_REV0 0 /* The good old (original) format */ +#define E2FS_REV1 1 /* V2 format w/ dynamic inode sizes */ + +#define E2FS_CURRENT_REV E2FS_REV0 +#define E2FS_MAX_SUPP_REV E2FS_REV1 + +#define E2FS_REV0_INODE_SIZE 128 + +/* + * compatible/incompatible features + */ +#define EXT2F_COMPAT_PREALLOC 0x0001 +#define EXT2F_COMPAT_RESIZE 0x0010 + +#define EXT2F_ROCOMPAT_SPARSESUPER 0x0001 +#define EXT2F_ROCOMPAT_LARGEFILE 0x0002 +#define EXT2F_ROCOMPAT_BTREE_DIR 0x0004 + +#define EXT2F_INCOMPAT_COMP 0x0001 +#define EXT2F_INCOMPAT_FTYPE 0x0002 + +/* + * Features supported in this implementation + * + * We support the following REV1 features: + * - EXT2F_ROCOMPAT_SPARSESUPER + * - EXT2F_ROCOMPAT_LARGEFILE + * - EXT2F_INCOMPAT_FTYPE + */ +#define EXT2F_COMPAT_SUPP 0x0000 +#define EXT2F_ROCOMPAT_SUPP (EXT2F_ROCOMPAT_SPARSESUPER \ + | EXT2F_ROCOMPAT_LARGEFILE) +#define EXT2F_INCOMPAT_SUPP EXT2F_INCOMPAT_FTYPE + +/* + * Feature set definitions + */ +#define EXT2_HAS_COMPAT_FEATURE(sb,mask) \ + ( EXT2_SB(sb)->e2fs->e2fs_features_compat & htole32(mask) ) +#define EXT2_HAS_RO_COMPAT_FEATURE(sb,mask) \ + ( EXT2_SB(sb)->e2fs->e2fs_features_rocompat & htole32(mask) ) +#define EXT2_HAS_INCOMPAT_FEATURE(sb,mask) \ + ( EXT2_SB(sb)->e2fs->e2fs_features_incompat & htole32(mask) ) + +/* + * Definitions of behavior on errors + */ +#define E2FS_BEH_CONTINUE 1 /* continue operation */ +#define E2FS_BEH_READONLY 2 /* remount fs read only */ +#define E2FS_BEH_PANIC 3 /* cause panic */ +#define E2FS_BEH_DEFAULT E2FS_BEH_CONTINUE + +/* + * OS identification + */ +#define E2FS_OS_LINUX 0 +#define E2FS_OS_HURD 1 +#define E2FS_OS_MASIX 2 +#define E2FS_OS_FREEBSD 3 +#define E2FS_OS_LITES 4 + +/* + * File clean flags + */ +#define E2FS_ISCLEAN 0x0001 /* Unmounted cleanly */ +#define E2FS_ERRORS 0x0002 /* Errors detected */ + +/* ext2 file system block group descriptor */ + +struct ext2_gd { + u_int32_t ext2bgd_b_bitmap; /* blocks bitmap block */ + u_int32_t ext2bgd_i_bitmap; /* inodes bitmap block */ + u_int32_t ext2bgd_i_tables; /* inodes table block */ + u_int16_t ext2bgd_nbfree; /* number of free blocks */ + u_int16_t ext2bgd_nifree; /* number of free inodes */ + u_int16_t ext2bgd_ndirs; /* number of directories */ + u_int16_t reserved; + u_int32_t reserved2[3]; +}; + +/* EXT2FS metadatas are stored in little-endian byte order. These macros + * helps reading these metadatas + */ + +#define e2fs_cgload(old, new, size) memcpy((new), (old), (size)); +#define e2fs_cgsave(old, new, size) memcpy((new), (old), (size)); +/* + * Macro-instructions used to manage several block sizes + */ +#define EXT2_MIN_BLOCK_SIZE 1024 +#define EXT2_MAX_BLOCK_SIZE 4096 +#define EXT2_MIN_BLOCK_LOG_SIZE 10 +#if defined(_KERNEL) +# define EXT2_BLOCK_SIZE(s) ((s)->e2fs_bsize) +#else +# define EXT2_BLOCK_SIZE(s) (EXT2_MIN_BLOCK_SIZE << (s)->e2fs_log_bsize) +#endif +#define EXT2_ADDR_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (uint32_t)) +#if defined(_KERNEL) +# define EXT2_BLOCK_SIZE_BITS(s) ((s)->e2fs_blocksize_bits) +#else +# define EXT2_BLOCK_SIZE_BITS(s) ((s)->e2fs_log_bsize + 10) +#endif +#if defined(_KERNEL) +#define EXT2_ADDR_PER_BLOCK_BITS(s) (EXT2_SB(s)->s_addr_per_block_bits) +#define EXT2_INODE_SIZE(s) (EXT2_SB(s)->e2fs_isize) +#define EXT2_FIRST_INO(s) (EXT2_SB(s)->e2fs_first_inode) +#else +#define EXT2_INODE_SIZE(s) (((s)->s_rev_level == E2FS_REV0) ? \ + E2FS_REV0 : (s)->s_inode_size) +#define EXT2_FIRST_INO(s) (((s)->s_rev_level == E2FS_REV0) ? \ + E2FS_REV0 : (s)->e2fs_first_ino) +#endif + +/* + * Macro-instructions used to manage fragments + */ +#define EXT2_MIN_FRAG_SIZE 1024 +#define EXT2_MAX_FRAG_SIZE 4096 +#define EXT2_MIN_FRAG_LOG_SIZE 10 +#if defined(_KERNEL) +# define EXT2_FRAG_SIZE(s) (EXT2_SB(s)->e2fs_fsize) +# define EXT2_FRAGS_PER_BLOCK(s) (EXT2_SB(s)->e2fs_fpb) +#else +# define EXT2_FRAG_SIZE(s) (EXT2_MIN_FRAG_SIZE << (s)->e2fs_log_fsize) +# define EXT2_FRAGS_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / EXT2_FRAG_SIZE(s)) +#endif + +/* + * Macro-instructions used to manage group descriptors + */ +#if defined(_KERNEL) +# define EXT2_BLOCKS_PER_GROUP(s) (EXT2_SB(s)->e2fs_bpg) +# define EXT2_DESC_PER_BLOCK(s) (EXT2_SB(s)->e2fs_descpb) +# define EXT2_DESC_PER_BLOCK_BITS(s) (EXT2_SB(s)->s_desc_per_block_bits) +#else +# define EXT2_BLOCKS_PER_GROUP(s) ((s)->e2fs_bpg) +# define EXT2_DESC_PER_BLOCK(s) (EXT2_BLOCK_SIZE(s) / sizeof (struct ext2_gd)) + +#endif + +#endif /* _LINUX_EXT2_FS_H */ diff --git a/sys/fs/ext2fs/fs.h b/sys/fs/ext2fs/fs.h new file mode 100644 index 0000000..54c6b44 --- /dev/null +++ b/sys/fs/ext2fs/fs.h @@ -0,0 +1,152 @@ +/*- + * modified for EXT2FS support in Lites 1.1 + * + * Aug 1995, Godmar Back (gback@cs.utah.edu) + * University of Utah, Department of Computer Science + */ +/*- + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)fs.h 8.7 (Berkeley) 4/19/94 + * $FreeBSD$ + */ + +/* + * Each disk drive contains some number of file systems. + * A file system consists of a number of cylinder groups. + * Each cylinder group has inodes and data. + * + * A file system is described by its super-block, which in turn + * describes the cylinder groups. The super-block is critical + * data and is replicated in each cylinder group to protect against + * catastrophic loss. This is done at `newfs' time and the critical + * super-block data does not change, so the copies need not be + * referenced further unless disaster strikes. + * + * The first boot and super blocks are given in absolute disk addresses. + * The byte-offset forms are preferred, as they don't imply a sector size. + */ +#define SBSIZE 1024 +#define SBLOCK 2 + +/* + * The path name on which the file system is mounted is maintained + * in fs_fsmnt. MAXMNTLEN defines the amount of space allocated in + * the super block for this name. + */ +#define MAXMNTLEN 512 + +/* + * Grigoriy Orlov <gluk@ptci.ru> has done some extensive work to fine + * tune the layout preferences for directories within a filesystem. + * His algorithm can be tuned by adjusting the following parameters + * which tell the system the average file size and the average number + * of files per directory. These defaults are well selected for typical + * filesystems, but may need to be tuned for odd cases like filesystems + * being used for squid caches or news spools. + * AVFPDIR is the expected number of files per directory. AVGDIRSIZE is + * obtained by multiplying AVFPDIR and AVFILESIZ which is assumed to be + * 16384. + */ + +#define AFPDIR 64 +#define AVGDIRSIZE 1048576 + +/* + * Macros for access to superblock array structures + */ + +/* + * Turn file system block numbers into disk block addresses. + * This maps file system blocks to device size blocks. + */ +#define fsbtodb(fs, b) ((b) << ((fs)->e2fs_fsbtodb)) +#define dbtofsb(fs, b) ((b) >> ((fs)->e2fs_fsbtodb)) + +/* get group containing inode */ +#define ino_to_cg(fs, x) (((x) - 1) / (fs->e2fs_ipg)) + +/* get block containing inode from its number x */ +#define ino_to_fsba(fs, x) \ + ((fs)->e2fs_gd[ino_to_cg((fs), (x))].ext2bgd_i_tables + \ + (((x) - 1) % (fs)->e2fs->e2fs_ipg) / (fs)->e2fs_ipb) + +/* get offset for inode in block */ +#define ino_to_fsbo(fs, x) ((x-1) % (fs->e2fs_ipb)) + +/* + * Give cylinder group number for a file system block. + * Give cylinder group block number for a file system block. + */ +#define dtog(fs, d) (((d) - fs->e2fs->e2fs_first_dblock) / \ + EXT2_BLOCKS_PER_GROUP(fs)) +#define dtogd(fs, d) (((d) - fs->e2fs->e2fs_first_dblock) % \ + EXT2_BLOCKS_PER_GROUP(fs)) + +/* + * The following macros optimize certain frequently calculated + * quantities by using shifts and masks in place of divisions + * modulos and multiplications. + */ +#define blkoff(fs, loc) /* calculates (loc % fs->fs_bsize) */ \ + ((loc) & (fs)->e2fs_qbmask) + +#define lblktosize(fs, blk) /* calculates (blk * fs->fs_bsize) */ \ + ((blk) << (fs->e2fs_bshift)) + +#define lblkno(fs, loc) /* calculates (loc / fs->fs_bsize) */ \ + ((loc) >> (fs->e2fs_bshift)) + +/* no fragments -> logical block number equal # of frags */ +#define numfrags(fs, loc) /* calculates (loc / fs->fs_fsize) */ \ + ((loc) >> (fs->e2fs_bshift)) + +#define fragroundup(fs, size) /* calculates roundup(size, fs->fs_fsize) */ \ + roundup(size, fs->e2fs_fsize) + /* was (((size) + (fs)->fs_qfmask) & (fs)->fs_fmask) */ + +/* + * Determining the size of a file block in the file system. + * easy w/o fragments + */ +#define blksize(fs, ip, lbn) ((fs)->e2fs_fsize) + +/* + * INOPB is the number of inodes in a secondary storage block. + */ +#define INOPB(fs) (fs->e2fs_ipb) + +/* + * NINDIR is the number of indirects in a file system block. + */ +#define NINDIR(fs) (EXT2_ADDR_PER_BLOCK(fs)) + +extern int inside[], around[]; +extern u_char *fragtbl[]; + + diff --git a/sys/fs/ext2fs/inode.h b/sys/fs/ext2fs/inode.h new file mode 100644 index 0000000..f352cdc1 --- /dev/null +++ b/sys/fs/ext2fs/inode.h @@ -0,0 +1,170 @@ +/*- + * Copyright (c) 1982, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)inode.h 8.9 (Berkeley) 5/14/95 + * $FreeBSD$ + */ + +#ifndef _FS_EXT2FS_INODE_H_ +#define _FS_EXT2FS_INODE_H_ + +#include <sys/lock.h> +#include <sys/queue.h> + +#define ROOTINO ((ino_t)2) + +#define NDADDR 12 /* Direct addresses in inode. */ +#define NIADDR 3 /* Indirect addresses in inode. */ + +/* + * This must agree with the definition in <ufs/ufs/dir.h>. + */ +#define doff_t int32_t + +/* + * The inode is used to describe each active (or recently active) file in the + * EXT2FS filesystem. It is composed of two types of information. The first + * part is the information that is needed only while the file is active (such + * as the identity of the file and linkage to speed its lookup). The second + * part is the permanent meta-data associated with the file which is read in + * from the permanent dinode from long term storage when the file becomes + * active, and is put back when the file is no longer being used. + */ +struct inode { + struct vnode *i_vnode;/* Vnode associated with this inode. */ + struct vnode *i_devvp;/* Vnode for block I/O. */ + struct ext2mount *i_ump; + u_int32_t i_flag; /* flags, see below */ + ino_t i_number; /* The identity of the inode. */ + + struct m_ext2fs *i_e2fs; /* EXT2FS */ + u_quad_t i_modrev; /* Revision level for NFS lease. */ + /* + * Side effects; used during directory lookup. + */ + int32_t i_count; /* Size of free slot in directory. */ + doff_t i_endoff; /* End of useful stuff in directory. */ + doff_t i_diroff; /* Offset in dir, where we found last entry. */ + doff_t i_offset; /* Offset of free space in directory. */ + + u_int32_t i_block_group; + u_int32_t i_next_alloc_block; + u_int32_t i_next_alloc_goal; + u_int32_t i_prealloc_block; + u_int32_t i_prealloc_count; + + /* Fields from struct dinode in UFS. */ + u_int16_t i_mode; /* IFMT, permissions; see below. */ + int16_t i_nlink; /* File link count. */ + u_int64_t i_size; /* File byte count. */ + int32_t i_atime; /* Last access time. */ + int32_t i_atimensec; /* Last access time. */ + int32_t i_mtime; /* Last modified time. */ + int32_t i_mtimensec; /* Last modified time. */ + int32_t i_ctime; /* Last inode change time. */ + int32_t i_ctimensec; /* Last inode change time. */ + int32_t i_db[NDADDR]; /* Direct disk blocks. */ + int32_t i_ib[NIADDR]; /* Indirect disk blocks. */ + u_int32_t i_flags; /* Status flags (chflags). */ + int32_t i_blocks; /* Blocks actually held. */ + int32_t i_gen; /* Generation number. */ + u_int32_t i_uid; /* File owner. */ + u_int32_t i_gid; /* File group. */ +}; + +/* + * The di_db fields may be overlaid with other information for + * file types that do not have associated disk storage. Block + * and character devices overlay the first data block with their + * dev_t value. Short symbolic links place their path in the + * di_db area. + */ +#define i_shortlink i_db +#define i_rdev i_db[0] +#define MAXSYMLINKLEN ((NDADDR + NIADDR) * sizeof(int32_t)) + +/* File permissions. */ +#define IEXEC 0000100 /* Executable. */ +#define IWRITE 0000200 /* Writeable. */ +#define IREAD 0000400 /* Readable. */ +#define ISVTX 0001000 /* Sticky bit. */ +#define ISGID 0002000 /* Set-gid. */ +#define ISUID 0004000 /* Set-uid. */ + +/* File types. */ +#define IFMT 0170000 /* Mask of file type. */ +#define IFIFO 0010000 /* Named pipe (fifo). */ +#define IFCHR 0020000 /* Character device. */ +#define IFDIR 0040000 /* Directory file. */ +#define IFBLK 0060000 /* Block device. */ +#define IFREG 0100000 /* Regular file. */ +#define IFLNK 0120000 /* Symbolic link. */ +#define IFSOCK 0140000 /* UNIX domain socket. */ +#define IFWHT 0160000 /* Whiteout. */ + +/* These flags are kept in i_flag. */ +#define IN_ACCESS 0x0001 /* Access time update request. */ +#define IN_CHANGE 0x0002 /* Inode change time update request. */ +#define IN_UPDATE 0x0004 /* Modification time update request. */ +#define IN_MODIFIED 0x0008 /* Inode has been modified. */ +#define IN_RENAME 0x0010 /* Inode is being renamed. */ +#define IN_HASHED 0x0020 /* Inode is on hash list */ +#define IN_LAZYMOD 0x0040 /* Modified, but don't write yet. */ +#define IN_SPACECOUNTED 0x0080 /* Blocks to be freed in free count. */ +#define IN_LAZYACCESS 0x0100 /* Process IN_ACCESS after the + suspension finished */ +#ifdef _KERNEL +/* + * Structure used to pass around logical block paths generated by + * ext2_getlbns and used by truncate and bmap code. + */ +struct indir { + int32_t in_lbn; /* Logical block number. */ + int in_off; /* Offset in buffer. */ + int in_exists; /* Flag if the block exists. */ +}; + +/* Convert between inode pointers and vnode pointers. */ +#define VTOI(vp) ((struct inode *)(vp)->v_data) +#define ITOV(ip) ((ip)->i_vnode) + +/* This overlays the fid structure (see mount.h). */ +struct ufid { + u_int16_t ufid_len; /* Length of structure. */ + u_int16_t ufid_pad; /* Force 32-bit alignment. */ + ino_t ufid_ino; /* File number (ino). */ + int32_t ufid_gen; /* Generation number. */ +}; +#endif /* _KERNEL */ + +#endif /* !_FS_EXT2FS_INODE_H_ */ |