diff options
author | rgrimes <rgrimes@FreeBSD.org> | 1994-05-24 10:09:53 +0000 |
---|---|---|
committer | rgrimes <rgrimes@FreeBSD.org> | 1994-05-24 10:09:53 +0000 |
commit | 8fb65ce818b3e3c6f165b583b910af24000768a5 (patch) | |
tree | ba751e4f2166aefec707c9d7401c7ff432506642 /sys/ufs/ffs | |
parent | a6ce65d368e623088a4c1a29865889f431b15420 (diff) | |
download | FreeBSD-src-8fb65ce818b3e3c6f165b583b910af24000768a5.zip FreeBSD-src-8fb65ce818b3e3c6f165b583b910af24000768a5.tar.gz |
BSD 4.4 Lite Kernel Sources
Diffstat (limited to 'sys/ufs/ffs')
-rw-r--r-- | sys/ufs/ffs/ffs_alloc.c | 1474 | ||||
-rw-r--r-- | sys/ufs/ffs/ffs_balloc.c | 282 | ||||
-rw-r--r-- | sys/ufs/ffs/ffs_extern.h | 101 | ||||
-rw-r--r-- | sys/ufs/ffs/ffs_inode.c | 488 | ||||
-rw-r--r-- | sys/ufs/ffs/ffs_subr.c | 238 | ||||
-rw-r--r-- | sys/ufs/ffs/ffs_tables.c | 136 | ||||
-rw-r--r-- | sys/ufs/ffs/ffs_vfsops.c | 843 | ||||
-rw-r--r-- | sys/ufs/ffs/ffs_vnops.c | 288 | ||||
-rw-r--r-- | sys/ufs/ffs/fs.h | 489 |
9 files changed, 4339 insertions, 0 deletions
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c new file mode 100644 index 0000000..cdd2e4b --- /dev/null +++ b/sys/ufs/ffs/ffs_alloc.c @@ -0,0 +1,1474 @@ +/* + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ffs_alloc.c 8.8 (Berkeley) 2/21/94 + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/buf.h> +#include <sys/proc.h> +#include <sys/vnode.h> +#include <sys/mount.h> +#include <sys/kernel.h> +#include <sys/syslog.h> + +#include <vm/vm.h> + +#include <ufs/ufs/quota.h> +#include <ufs/ufs/inode.h> + +#include <ufs/ffs/fs.h> +#include <ufs/ffs/ffs_extern.h> + +extern u_long nextgennumber; + +static daddr_t ffs_alloccg __P((struct inode *, int, daddr_t, int)); +static daddr_t ffs_alloccgblk __P((struct fs *, struct cg *, daddr_t)); +static daddr_t ffs_clusteralloc __P((struct inode *, int, daddr_t, int)); +static ino_t ffs_dirpref __P((struct fs *)); +static daddr_t ffs_fragextend __P((struct inode *, int, long, int, int)); +static void ffs_fserr __P((struct fs *, u_int, char *)); +static u_long ffs_hashalloc + __P((struct inode *, int, long, int, u_long (*)())); +static ino_t ffs_nodealloccg __P((struct inode *, int, daddr_t, int)); +static daddr_t ffs_mapsearch __P((struct fs *, struct cg *, daddr_t, int)); + +/* + * Allocate a block in the file system. + * + * The size of the requested block is given, which must be some + * multiple of fs_fsize and <= fs_bsize. + * A preference may be optionally specified. If a preference is given + * the following hierarchy is used to allocate a block: + * 1) allocate the requested block. + * 2) allocate a rotationally optimal block in the same cylinder. + * 3) allocate a block in the same cylinder group. + * 4) quadradically rehash into other cylinder groups, until an + * available block is located. + * If no block preference is given the following heirarchy is used + * to allocate a block: + * 1) allocate a block in the cylinder group that contains the + * inode for the file. + * 2) quadradically rehash into other cylinder groups, until an + * available block is located. + */ +ffs_alloc(ip, lbn, bpref, size, cred, bnp) + register struct inode *ip; + daddr_t lbn, bpref; + int size; + struct ucred *cred; + daddr_t *bnp; +{ + register struct fs *fs; + daddr_t bno; + int cg, error; + + *bnp = 0; + fs = ip->i_fs; +#ifdef DIAGNOSTIC + if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { + printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n", + ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt); + panic("ffs_alloc: bad size"); + } + if (cred == NOCRED) + panic("ffs_alloc: missing credential\n"); +#endif /* DIAGNOSTIC */ + if (size == fs->fs_bsize && fs->fs_cstotal.cs_nbfree == 0) + goto nospace; + if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0) + goto nospace; +#ifdef QUOTA + if (error = chkdq(ip, (long)btodb(size), cred, 0)) + return (error); +#endif + if (bpref >= fs->fs_size) + bpref = 0; + if (bpref == 0) + cg = ino_to_cg(fs, ip->i_number); + else + cg = dtog(fs, bpref); + bno = (daddr_t)ffs_hashalloc(ip, cg, (long)bpref, size, + (u_long (*)())ffs_alloccg); + if (bno > 0) { + ip->i_blocks += btodb(size); + ip->i_flag |= IN_CHANGE | IN_UPDATE; + *bnp = bno; + return (0); + } +#ifdef QUOTA + /* + * Restore user's disk quota because allocation failed. + */ + (void) chkdq(ip, (long)-btodb(size), cred, FORCE); +#endif +nospace: + ffs_fserr(fs, cred->cr_uid, "file system full"); + uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt); + return (ENOSPC); +} + +/* + * Reallocate a fragment to a bigger size + * + * The number and size of the old block is given, and a preference + * and new size is also specified. The allocator attempts to extend + * the original block. Failing that, the regular block allocator is + * invoked to get an appropriate block. + */ +ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp) + register struct inode *ip; + daddr_t lbprev; + daddr_t bpref; + int osize, nsize; + struct ucred *cred; + struct buf **bpp; +{ + register struct fs *fs; + struct buf *bp; + int cg, request, error; + daddr_t bprev, bno; + + *bpp = 0; + fs = ip->i_fs; +#ifdef DIAGNOSTIC + if ((u_int)osize > fs->fs_bsize || fragoff(fs, osize) != 0 || + (u_int)nsize > fs->fs_bsize || fragoff(fs, nsize) != 0) { + printf( + "dev = 0x%x, bsize = %d, osize = %d, nsize = %d, fs = %s\n", + ip->i_dev, fs->fs_bsize, osize, nsize, fs->fs_fsmnt); + panic("ffs_realloccg: bad size"); + } + if (cred == NOCRED) + panic("ffs_realloccg: missing credential\n"); +#endif /* DIAGNOSTIC */ + if (cred->cr_uid != 0 && freespace(fs, fs->fs_minfree) <= 0) + goto nospace; + if ((bprev = ip->i_db[lbprev]) == 0) { + printf("dev = 0x%x, bsize = %d, bprev = %d, fs = %s\n", + ip->i_dev, fs->fs_bsize, bprev, fs->fs_fsmnt); + panic("ffs_realloccg: bad bprev"); + } + /* + * Allocate the extra space in the buffer. + */ + if (error = bread(ITOV(ip), lbprev, osize, NOCRED, &bp)) { + brelse(bp); + return (error); + } +#ifdef QUOTA + if (error = chkdq(ip, (long)btodb(nsize - osize), cred, 0)) { + brelse(bp); + return (error); + } +#endif + /* + * Check for extension in the existing location. + */ + cg = dtog(fs, bprev); + if (bno = ffs_fragextend(ip, cg, (long)bprev, osize, nsize)) { + if (bp->b_blkno != fsbtodb(fs, bno)) + panic("bad blockno"); + ip->i_blocks += btodb(nsize - osize); + ip->i_flag |= IN_CHANGE | IN_UPDATE; + allocbuf(bp, nsize); + bp->b_flags |= B_DONE; + bzero((char *)bp->b_data + osize, (u_int)nsize - osize); + *bpp = bp; + return (0); + } + /* + * Allocate a new disk location. + */ + if (bpref >= fs->fs_size) + bpref = 0; + switch ((int)fs->fs_optim) { + case FS_OPTSPACE: + /* + * Allocate an exact sized fragment. Although this makes + * best use of space, we will waste time relocating it if + * the file continues to grow. If the fragmentation is + * less than half of the minimum free reserve, we choose + * to begin optimizing for time. + */ + request = nsize; + if (fs->fs_minfree < 5 || + fs->fs_cstotal.cs_nffree > + fs->fs_dsize * fs->fs_minfree / (2 * 100)) + break; + log(LOG_NOTICE, "%s: optimization changed from SPACE to TIME\n", + fs->fs_fsmnt); + fs->fs_optim = FS_OPTTIME; + break; + case FS_OPTTIME: + /* + * At this point we have discovered a file that is trying to + * grow a small fragment to a larger fragment. To save time, + * we allocate a full sized block, then free the unused portion. + * If the file continues to grow, the `ffs_fragextend' call + * above will be able to grow it in place without further + * copying. If aberrant programs cause disk fragmentation to + * grow within 2% of the free reserve, we choose to begin + * optimizing for space. + */ + request = fs->fs_bsize; + if (fs->fs_cstotal.cs_nffree < + fs->fs_dsize * (fs->fs_minfree - 2) / 100) + break; + log(LOG_NOTICE, "%s: optimization changed from TIME to SPACE\n", + fs->fs_fsmnt); + fs->fs_optim = FS_OPTSPACE; + break; + default: + printf("dev = 0x%x, optim = %d, fs = %s\n", + ip->i_dev, fs->fs_optim, fs->fs_fsmnt); + panic("ffs_realloccg: bad optim"); + /* NOTREACHED */ + } + bno = (daddr_t)ffs_hashalloc(ip, cg, (long)bpref, request, + (u_long (*)())ffs_alloccg); + if (bno > 0) { + bp->b_blkno = fsbtodb(fs, bno); + (void) vnode_pager_uncache(ITOV(ip)); + ffs_blkfree(ip, bprev, (long)osize); + if (nsize < request) + ffs_blkfree(ip, bno + numfrags(fs, nsize), + (long)(request - nsize)); + ip->i_blocks += btodb(nsize - osize); + ip->i_flag |= IN_CHANGE | IN_UPDATE; + allocbuf(bp, nsize); + bp->b_flags |= B_DONE; + bzero((char *)bp->b_data + osize, (u_int)nsize - osize); + *bpp = bp; + return (0); + } +#ifdef QUOTA + /* + * Restore user's disk quota because allocation failed. + */ + (void) chkdq(ip, (long)-btodb(nsize - osize), cred, FORCE); +#endif + brelse(bp); +nospace: + /* + * no space available + */ + ffs_fserr(fs, cred->cr_uid, "file system full"); + uprintf("\n%s: write failed, file system is full\n", fs->fs_fsmnt); + return (ENOSPC); +} + +/* + * Reallocate a sequence of blocks into a contiguous sequence of blocks. + * + * The vnode and an array of buffer pointers for a range of sequential + * logical blocks to be made contiguous is given. The allocator attempts + * to find a range of sequential blocks starting as close as possible to + * an fs_rotdelay offset from the end of the allocation for the logical + * block immediately preceeding the current range. If successful, the + * physical block numbers in the buffer pointers and in the inode are + * changed to reflect the new allocation. If unsuccessful, the allocation + * is left unchanged. The success in doing the reallocation is returned. + * Note that the error return is not reflected back to the user. Rather + * the previous block allocation will be used. + */ +#include <sys/sysctl.h> +int doasyncfree = 1; +struct ctldebug debug14 = { "doasyncfree", &doasyncfree }; +int +ffs_reallocblks(ap) + struct vop_reallocblks_args /* { + struct vnode *a_vp; + struct cluster_save *a_buflist; + } */ *ap; +{ + struct fs *fs; + struct inode *ip; + struct vnode *vp; + struct buf *sbp, *ebp; + daddr_t *bap, *sbap, *ebap; + struct cluster_save *buflist; + daddr_t start_lbn, end_lbn, soff, eoff, newblk, blkno; + struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; + int i, len, start_lvl, end_lvl, pref, ssize; + + vp = ap->a_vp; + ip = VTOI(vp); + fs = ip->i_fs; + if (fs->fs_contigsumsize <= 0) + return (ENOSPC); + buflist = ap->a_buflist; + len = buflist->bs_nchildren; + start_lbn = buflist->bs_children[0]->b_lblkno; + end_lbn = start_lbn + len - 1; +#ifdef DIAGNOSTIC + for (i = 1; i < len; i++) + if (buflist->bs_children[i]->b_lblkno != start_lbn + i) + panic("ffs_reallocblks: non-cluster"); +#endif + /* + * If the latest allocation is in a new cylinder group, assume that + * the filesystem has decided to move and do not force it back to + * the previous cylinder group. + */ + if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != + dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) + return (ENOSPC); + if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) || + ufs_getlbns(vp, end_lbn, end_ap, &end_lvl)) + return (ENOSPC); + /* + * Get the starting offset and block map for the first block. + */ + if (start_lvl == 0) { + sbap = &ip->i_db[0]; + soff = start_lbn; + } else { + idp = &start_ap[start_lvl - 1]; + if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) { + brelse(sbp); + return (ENOSPC); + } + sbap = (daddr_t *)sbp->b_data; + soff = idp->in_off; + } + /* + * Find the preferred location for the cluster. + */ + pref = ffs_blkpref(ip, start_lbn, soff, sbap); + /* + * If the block range spans two block maps, get the second map. + */ + if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { + ssize = len; + } else { +#ifdef DIAGNOSTIC + if (start_ap[start_lvl-1].in_lbn == idp->in_lbn) + panic("ffs_reallocblk: start == end"); +#endif + ssize = len - (idp->in_off + 1); + if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp)) + goto fail; + ebap = (daddr_t *)ebp->b_data; + } + /* + * Search the block map looking for an allocation of the desired size. + */ + if ((newblk = (daddr_t)ffs_hashalloc(ip, dtog(fs, pref), (long)pref, + len, (u_long (*)())ffs_clusteralloc)) == 0) + goto fail; + /* + * We have found a new contiguous block. + * + * First we have to replace the old block pointers with the new + * block pointers in the inode and indirect blocks associated + * with the file. + */ + blkno = newblk; + for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) { + if (i == ssize) + bap = ebap; +#ifdef DIAGNOSTIC + if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap)) + panic("ffs_reallocblks: alloc mismatch"); +#endif + *bap++ = blkno; + } + /* + * Next we must write out the modified inode and indirect blocks. + * For strict correctness, the writes should be synchronous since + * the old block values may have been written to disk. In practise + * they are almost never written, but if we are concerned about + * strict correctness, the `doasyncfree' flag should be set to zero. + * + * The test on `doasyncfree' should be changed to test a flag + * that shows whether the associated buffers and inodes have + * been written. The flag should be set when the cluster is + * started and cleared whenever the buffer or inode is flushed. + * We can then check below to see if it is set, and do the + * synchronous write only when it has been cleared. + */ + if (sbap != &ip->i_db[0]) { + if (doasyncfree) + bdwrite(sbp); + else + bwrite(sbp); + } else { + ip->i_flag |= IN_CHANGE | IN_UPDATE; + if (!doasyncfree) + VOP_UPDATE(vp, &time, &time, MNT_WAIT); + } + if (ssize < len) + if (doasyncfree) + bdwrite(ebp); + else + bwrite(ebp); + /* + * Last, free the old blocks and assign the new blocks to the buffers. + */ + for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) { + ffs_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno), + fs->fs_bsize); + buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); + } + return (0); + +fail: + if (ssize < len) + brelse(ebp); + if (sbap != &ip->i_db[0]) + brelse(sbp); + return (ENOSPC); +} + +/* + * Allocate an inode in the file system. + * + * If allocating a directory, use ffs_dirpref to select the inode. + * If allocating in a directory, the following hierarchy is followed: + * 1) allocate the preferred inode. + * 2) allocate an inode in the same cylinder group. + * 3) quadradically rehash into other cylinder groups, until an + * available inode is located. + * If no inode preference is given the following heirarchy is used + * to allocate an inode: + * 1) allocate an inode in cylinder group 0. + * 2) quadradically rehash into other cylinder groups, until an + * available inode is located. + */ +ffs_valloc(ap) + struct vop_valloc_args /* { + struct vnode *a_pvp; + int a_mode; + struct ucred *a_cred; + struct vnode **a_vpp; + } */ *ap; +{ + register struct vnode *pvp = ap->a_pvp; + register struct inode *pip; + register struct fs *fs; + register struct inode *ip; + mode_t mode = ap->a_mode; + ino_t ino, ipref; + int cg, error; + + *ap->a_vpp = NULL; + pip = VTOI(pvp); + fs = pip->i_fs; + if (fs->fs_cstotal.cs_nifree == 0) + goto noinodes; + + if ((mode & IFMT) == IFDIR) + ipref = ffs_dirpref(fs); + else + ipref = pip->i_number; + if (ipref >= fs->fs_ncg * fs->fs_ipg) + ipref = 0; + cg = ino_to_cg(fs, ipref); + ino = (ino_t)ffs_hashalloc(pip, cg, (long)ipref, mode, ffs_nodealloccg); + if (ino == 0) + goto noinodes; + error = VFS_VGET(pvp->v_mount, ino, ap->a_vpp); + if (error) { + VOP_VFREE(pvp, ino, mode); + return (error); + } + ip = VTOI(*ap->a_vpp); + if (ip->i_mode) { + printf("mode = 0%o, inum = %d, fs = %s\n", + ip->i_mode, ip->i_number, fs->fs_fsmnt); + panic("ffs_valloc: dup alloc"); + } + if (ip->i_blocks) { /* XXX */ + printf("free inode %s/%d had %d blocks\n", + fs->fs_fsmnt, ino, ip->i_blocks); + ip->i_blocks = 0; + } + ip->i_flags = 0; + /* + * Set up a new generation number for this inode. + */ + if (++nextgennumber < (u_long)time.tv_sec) + nextgennumber = time.tv_sec; + ip->i_gen = nextgennumber; + return (0); +noinodes: + ffs_fserr(fs, ap->a_cred->cr_uid, "out of inodes"); + uprintf("\n%s: create/symlink failed, no inodes free\n", fs->fs_fsmnt); + return (ENOSPC); +} + +/* + * Find a cylinder to place a directory. + * + * The policy implemented by this algorithm is to select from + * among those cylinder groups with above the average number of + * free inodes, the one with the smallest number of directories. + */ +static ino_t +ffs_dirpref(fs) + register struct fs *fs; +{ + int cg, minndir, mincg, avgifree; + + avgifree = fs->fs_cstotal.cs_nifree / fs->fs_ncg; + minndir = fs->fs_ipg; + mincg = 0; + for (cg = 0; cg < fs->fs_ncg; cg++) + if (fs->fs_cs(fs, cg).cs_ndir < minndir && + fs->fs_cs(fs, cg).cs_nifree >= avgifree) { + mincg = cg; + minndir = fs->fs_cs(fs, cg).cs_ndir; + } + return ((ino_t)(fs->fs_ipg * mincg)); +} + +/* + * Select the desired position for the next block in a file. The file is + * logically divided into sections. The first section is composed of the + * direct blocks. Each additional section contains fs_maxbpg blocks. + * + * If no blocks have been allocated in the first section, the policy is to + * request a block in the same cylinder group as the inode that describes + * the file. If no blocks have been allocated in any other section, the + * policy is to place the section in a cylinder group with a greater than + * average number of free blocks. An appropriate cylinder group is found + * by using a rotor that sweeps the cylinder groups. When a new group of + * blocks is needed, the sweep begins in the cylinder group following the + * cylinder group from which the previous allocation was made. The sweep + * continues until a cylinder group with greater than the average number + * of free blocks is found. If the allocation is for the first block in an + * indirect block, the information on the previous allocation is unavailable; + * here a best guess is made based upon the logical block number being + * allocated. + * + * If a section is already partially allocated, the policy is to + * contiguously allocate fs_maxcontig blocks. The end of one of these + * contiguous blocks and the beginning of the next is physically separated + * so that the disk head will be in transit between them for at least + * fs_rotdelay milliseconds. This is to allow time for the processor to + * schedule another I/O transfer. + */ +daddr_t +ffs_blkpref(ip, lbn, indx, bap) + struct inode *ip; + daddr_t lbn; + int indx; + daddr_t *bap; +{ + register struct fs *fs; + register int cg; + int avgbfree, startcg; + daddr_t nextblk; + + fs = ip->i_fs; + if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { + if (lbn < NDADDR) { + cg = ino_to_cg(fs, ip->i_number); + return (fs->fs_fpg * cg + fs->fs_frag); + } + /* + * Find a cylinder with greater than average number of + * unused data blocks. + */ + if (indx == 0 || bap[indx - 1] == 0) + startcg = + ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; + else + startcg = dtog(fs, bap[indx - 1]) + 1; + startcg %= fs->fs_ncg; + avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; + for (cg = startcg; cg < fs->fs_ncg; cg++) + if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { + fs->fs_cgrotor = cg; + return (fs->fs_fpg * cg + fs->fs_frag); + } + for (cg = 0; cg <= startcg; cg++) + if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { + fs->fs_cgrotor = cg; + return (fs->fs_fpg * cg + fs->fs_frag); + } + return (NULL); + } + /* + * One or more previous blocks have been laid out. If less + * than fs_maxcontig previous blocks are contiguous, the + * next block is requested contiguously, otherwise it is + * requested rotationally delayed by fs_rotdelay milliseconds. + */ + nextblk = bap[indx - 1] + fs->fs_frag; + if (indx < fs->fs_maxcontig || bap[indx - fs->fs_maxcontig] + + blkstofrags(fs, fs->fs_maxcontig) != nextblk) + return (nextblk); + if (fs->fs_rotdelay != 0) + /* + * Here we convert ms of delay to frags as: + * (frags) = (ms) * (rev/sec) * (sect/rev) / + * ((sect/frag) * (ms/sec)) + * then round up to the next block. + */ + nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect / + (NSPF(fs) * 1000), fs->fs_frag); + return (nextblk); +} + +/* + * Implement the cylinder overflow algorithm. + * + * The policy implemented by this algorithm is: + * 1) allocate the block in its requested cylinder group. + * 2) quadradically rehash on the cylinder group number. + * 3) brute force search for a free block. + */ +/*VARARGS5*/ +static u_long +ffs_hashalloc(ip, cg, pref, size, allocator) + struct inode *ip; + int cg; + long pref; + int size; /* size for data blocks, mode for inodes */ + u_long (*allocator)(); +{ + register struct fs *fs; + long result; + int i, icg = cg; + + fs = ip->i_fs; + /* + * 1: preferred cylinder group + */ + result = (*allocator)(ip, cg, pref, size); + if (result) + return (result); + /* + * 2: quadratic rehash + */ + for (i = 1; i < fs->fs_ncg; i *= 2) { + cg += i; + if (cg >= fs->fs_ncg) + cg -= fs->fs_ncg; + result = (*allocator)(ip, cg, 0, size); + if (result) + return (result); + } + /* + * 3: brute force search + * Note that we start at i == 2, since 0 was checked initially, + * and 1 is always checked in the quadratic rehash. + */ + cg = (icg + 2) % fs->fs_ncg; + for (i = 2; i < fs->fs_ncg; i++) { + result = (*allocator)(ip, cg, 0, size); + if (result) + return (result); + cg++; + if (cg == fs->fs_ncg) + cg = 0; + } + return (NULL); +} + +/* + * Determine whether a fragment can be extended. + * + * Check to see if the necessary fragments are available, and + * if they are, allocate them. + */ +static daddr_t +ffs_fragextend(ip, cg, bprev, osize, nsize) + struct inode *ip; + int cg; + long bprev; + int osize, nsize; +{ + register struct fs *fs; + register struct cg *cgp; + struct buf *bp; + long bno; + int frags, bbase; + int i, error; + + fs = ip->i_fs; + if (fs->fs_cs(fs, cg).cs_nffree < numfrags(fs, nsize - osize)) + return (NULL); + frags = numfrags(fs, nsize); + bbase = fragnum(fs, bprev); + if (bbase > fragnum(fs, (bprev + frags - 1))) { + /* cannot extend across a block boundary */ + return (NULL); + } + error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), + (int)fs->fs_cgsize, NOCRED, &bp); + if (error) { + brelse(bp); + return (NULL); + } + cgp = (struct cg *)bp->b_data; + if (!cg_chkmagic(cgp)) { + brelse(bp); + return (NULL); + } + cgp->cg_time = time.tv_sec; + bno = dtogd(fs, bprev); + for (i = numfrags(fs, osize); i < frags; i++) + if (isclr(cg_blksfree(cgp), bno + i)) { + brelse(bp); + return (NULL); + } + /* + * the current fragment can be extended + * deduct the count on fragment being extended into + * increase the count on the remaining fragment (if any) + * allocate the extended piece + */ + for (i = frags; i < fs->fs_frag - bbase; i++) + if (isclr(cg_blksfree(cgp), bno + i)) + break; + cgp->cg_frsum[i - numfrags(fs, osize)]--; + if (i != frags) + cgp->cg_frsum[i - frags]++; + for (i = numfrags(fs, osize); i < frags; i++) { + clrbit(cg_blksfree(cgp), bno + i); + cgp->cg_cs.cs_nffree--; + fs->fs_cstotal.cs_nffree--; + fs->fs_cs(fs, cg).cs_nffree--; + } + fs->fs_fmod = 1; + bdwrite(bp); + return (bprev); +} + +/* + * Determine whether a block can be allocated. + * + * Check to see if a block of the appropriate size is available, + * and if it is, allocate it. + */ +static daddr_t +ffs_alloccg(ip, cg, bpref, size) + struct inode *ip; + int cg; + daddr_t bpref; + int size; +{ + register struct fs *fs; + register struct cg *cgp; + struct buf *bp; + register int i; + int error, bno, frags, allocsiz; + + fs = ip->i_fs; + if (fs->fs_cs(fs, cg).cs_nbfree == 0 && size == fs->fs_bsize) + return (NULL); + error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), + (int)fs->fs_cgsize, NOCRED, &bp); + if (error) { + brelse(bp); + return (NULL); + } + cgp = (struct cg *)bp->b_data; + if (!cg_chkmagic(cgp) || + (cgp->cg_cs.cs_nbfree == 0 && size == fs->fs_bsize)) { + brelse(bp); + return (NULL); + } + cgp->cg_time = time.tv_sec; + if (size == fs->fs_bsize) { + bno = ffs_alloccgblk(fs, cgp, bpref); + bdwrite(bp); + return (bno); + } + /* + * check to see if any fragments are already available + * allocsiz is the size which will be allocated, hacking + * it down to a smaller size if necessary + */ + frags = numfrags(fs, size); + for (allocsiz = frags; allocsiz < fs->fs_frag; allocsiz++) + if (cgp->cg_frsum[allocsiz] != 0) + break; + if (allocsiz == fs->fs_frag) { + /* + * no fragments were available, so a block will be + * allocated, and hacked up + */ + if (cgp->cg_cs.cs_nbfree == 0) { + brelse(bp); + return (NULL); + } + bno = ffs_alloccgblk(fs, cgp, bpref); + bpref = dtogd(fs, bno); + for (i = frags; i < fs->fs_frag; i++) + setbit(cg_blksfree(cgp), bpref + i); + i = fs->fs_frag - frags; + cgp->cg_cs.cs_nffree += i; + fs->fs_cstotal.cs_nffree += i; + fs->fs_cs(fs, cg).cs_nffree += i; + fs->fs_fmod = 1; + cgp->cg_frsum[i]++; + bdwrite(bp); + return (bno); + } + bno = ffs_mapsearch(fs, cgp, bpref, allocsiz); + if (bno < 0) { + brelse(bp); + return (NULL); + } + for (i = 0; i < frags; i++) + clrbit(cg_blksfree(cgp), bno + i); + cgp->cg_cs.cs_nffree -= frags; + fs->fs_cstotal.cs_nffree -= frags; + fs->fs_cs(fs, cg).cs_nffree -= frags; + fs->fs_fmod = 1; + cgp->cg_frsum[allocsiz]--; + if (frags != allocsiz) + cgp->cg_frsum[allocsiz - frags]++; + bdwrite(bp); + return (cg * fs->fs_fpg + bno); +} + +/* + * Allocate a block in a cylinder group. + * + * This algorithm implements the following policy: + * 1) allocate the requested block. + * 2) allocate a rotationally optimal block in the same cylinder. + * 3) allocate the next available block on the block rotor for the + * specified cylinder group. + * Note that this routine only allocates fs_bsize blocks; these + * blocks may be fragmented by the routine that allocates them. + */ +static daddr_t +ffs_alloccgblk(fs, cgp, bpref) + register struct fs *fs; + register struct cg *cgp; + daddr_t bpref; +{ + daddr_t bno, blkno; + int cylno, pos, delta; + short *cylbp; + register int i; + + if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) { + bpref = cgp->cg_rotor; + goto norot; + } + bpref = blknum(fs, bpref); + bpref = dtogd(fs, bpref); + /* + * if the requested block is available, use it + */ + if (ffs_isblock(fs, cg_blksfree(cgp), fragstoblks(fs, bpref))) { + bno = bpref; + goto gotit; + } + /* + * check for a block available on the same cylinder + */ + cylno = cbtocylno(fs, bpref); + if (cg_blktot(cgp)[cylno] == 0) + goto norot; + if (fs->fs_cpc == 0) { + /* + * Block layout information is not available. + * Leaving bpref unchanged means we take the + * next available free block following the one + * we just allocated. Hopefully this will at + * least hit a track cache on drives of unknown + * geometry (e.g. SCSI). + */ + goto norot; + } + /* + * check the summary information to see if a block is + * available in the requested cylinder starting at the + * requested rotational position and proceeding around. + */ + cylbp = cg_blks(fs, cgp, cylno); + pos = cbtorpos(fs, bpref); + for (i = pos; i < fs->fs_nrpos; i++) + if (cylbp[i] > 0) + break; + if (i == fs->fs_nrpos) + for (i = 0; i < pos; i++) + if (cylbp[i] > 0) + break; + if (cylbp[i] > 0) { + /* + * found a rotational position, now find the actual + * block. A panic if none is actually there. + */ + pos = cylno % fs->fs_cpc; + bno = (cylno - pos) * fs->fs_spc / NSPB(fs); + if (fs_postbl(fs, pos)[i] == -1) { + printf("pos = %d, i = %d, fs = %s\n", + pos, i, fs->fs_fsmnt); + panic("ffs_alloccgblk: cyl groups corrupted"); + } + for (i = fs_postbl(fs, pos)[i];; ) { + if (ffs_isblock(fs, cg_blksfree(cgp), bno + i)) { + bno = blkstofrags(fs, (bno + i)); + goto gotit; + } + delta = fs_rotbl(fs)[i]; + if (delta <= 0 || + delta + i > fragstoblks(fs, fs->fs_fpg)) + break; + i += delta; + } + printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt); + panic("ffs_alloccgblk: can't find blk in cyl"); + } +norot: + /* + * no blocks in the requested cylinder, so take next + * available one in this cylinder group. + */ + bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag); + if (bno < 0) + return (NULL); + cgp->cg_rotor = bno; +gotit: + blkno = fragstoblks(fs, bno); + ffs_clrblock(fs, cg_blksfree(cgp), (long)blkno); + ffs_clusteracct(fs, cgp, blkno, -1); + cgp->cg_cs.cs_nbfree--; + fs->fs_cstotal.cs_nbfree--; + fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--; + cylno = cbtocylno(fs, bno); + cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--; + cg_blktot(cgp)[cylno]--; + fs->fs_fmod = 1; + return (cgp->cg_cgx * fs->fs_fpg + bno); +} + +/* + * Determine whether a cluster can be allocated. + * + * We do not currently check for optimal rotational layout if there + * are multiple choices in the same cylinder group. Instead we just + * take the first one that we find following bpref. + */ +static daddr_t +ffs_clusteralloc(ip, cg, bpref, len) + struct inode *ip; + int cg; + daddr_t bpref; + int len; +{ + register struct fs *fs; + register struct cg *cgp; + struct buf *bp; + int i, run, bno, bit, map; + u_char *mapp; + + fs = ip->i_fs; + if (fs->fs_cs(fs, cg).cs_nbfree < len) + return (NULL); + if (bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, + NOCRED, &bp)) + goto fail; + cgp = (struct cg *)bp->b_data; + if (!cg_chkmagic(cgp)) + goto fail; + /* + * Check to see if a cluster of the needed size (or bigger) is + * available in this cylinder group. + */ + for (i = len; i <= fs->fs_contigsumsize; i++) + if (cg_clustersum(cgp)[i] > 0) + break; + if (i > fs->fs_contigsumsize) + goto fail; + /* + * Search the cluster map to find a big enough cluster. + * We take the first one that we find, even if it is larger + * than we need as we prefer to get one close to the previous + * block allocation. We do not search before the current + * preference point as we do not want to allocate a block + * that is allocated before the previous one (as we will + * then have to wait for another pass of the elevator + * algorithm before it will be read). We prefer to fail and + * be recalled to try an allocation in the next cylinder group. + */ + if (dtog(fs, bpref) != cg) + bpref = 0; + else + bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref))); + mapp = &cg_clustersfree(cgp)[bpref / NBBY]; + map = *mapp++; + bit = 1 << (bpref % NBBY); + for (run = 0, i = bpref; i < cgp->cg_nclusterblks; i++) { + if ((map & bit) == 0) { + run = 0; + } else { + run++; + if (run == len) + break; + } + if ((i & (NBBY - 1)) != (NBBY - 1)) { + bit <<= 1; + } else { + map = *mapp++; + bit = 1; + } + } + if (i == cgp->cg_nclusterblks) + goto fail; + /* + * Allocate the cluster that we have found. + */ + bno = cg * fs->fs_fpg + blkstofrags(fs, i - run + 1); + len = blkstofrags(fs, len); + for (i = 0; i < len; i += fs->fs_frag) + if (ffs_alloccgblk(fs, cgp, bno + i) != bno + i) + panic("ffs_clusteralloc: lost block"); + brelse(bp); + return (bno); + +fail: + brelse(bp); + return (0); +} + +/* + * Determine whether an inode can be allocated. + * + * Check to see if an inode is available, and if it is, + * allocate it using the following policy: + * 1) allocate the requested inode. + * 2) allocate the next available inode after the requested + * inode in the specified cylinder group. + */ +static ino_t +ffs_nodealloccg(ip, cg, ipref, mode) + struct inode *ip; + int cg; + daddr_t ipref; + int mode; +{ + register struct fs *fs; + register struct cg *cgp; + struct buf *bp; + int error, start, len, loc, map, i; + + fs = ip->i_fs; + if (fs->fs_cs(fs, cg).cs_nifree == 0) + return (NULL); + error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), + (int)fs->fs_cgsize, NOCRED, &bp); + if (error) { + brelse(bp); + return (NULL); + } + cgp = (struct cg *)bp->b_data; + if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) { + brelse(bp); + return (NULL); + } + cgp->cg_time = time.tv_sec; + if (ipref) { + ipref %= fs->fs_ipg; + if (isclr(cg_inosused(cgp), ipref)) + goto gotit; + } + start = cgp->cg_irotor / NBBY; + len = howmany(fs->fs_ipg - cgp->cg_irotor, NBBY); + loc = skpc(0xff, len, &cg_inosused(cgp)[start]); + if (loc == 0) { + len = start + 1; + start = 0; + loc = skpc(0xff, len, &cg_inosused(cgp)[0]); + if (loc == 0) { + printf("cg = %d, irotor = %d, fs = %s\n", + cg, cgp->cg_irotor, fs->fs_fsmnt); + panic("ffs_nodealloccg: map corrupted"); + /* NOTREACHED */ + } + } + i = start + len - loc; + map = cg_inosused(cgp)[i]; + ipref = i * NBBY; + for (i = 1; i < (1 << NBBY); i <<= 1, ipref++) { + if ((map & i) == 0) { + cgp->cg_irotor = ipref; + goto gotit; + } + } + printf("fs = %s\n", fs->fs_fsmnt); + panic("ffs_nodealloccg: block not in map"); + /* NOTREACHED */ +gotit: + setbit(cg_inosused(cgp), ipref); + cgp->cg_cs.cs_nifree--; + fs->fs_cstotal.cs_nifree--; + fs->fs_cs(fs, cg).cs_nifree--; + fs->fs_fmod = 1; + if ((mode & IFMT) == IFDIR) { + cgp->cg_cs.cs_ndir++; + fs->fs_cstotal.cs_ndir++; + fs->fs_cs(fs, cg).cs_ndir++; + } + bdwrite(bp); + return (cg * fs->fs_ipg + ipref); +} + +/* + * Free a block or fragment. + * + * The specified block or fragment is placed back in the + * free map. If a fragment is deallocated, a possible + * block reassembly is checked. + */ +ffs_blkfree(ip, bno, size) + register struct inode *ip; + daddr_t bno; + long size; +{ + register struct fs *fs; + register struct cg *cgp; + struct buf *bp; + daddr_t blkno; + int i, error, cg, blk, frags, bbase; + + fs = ip->i_fs; + if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0) { + printf("dev = 0x%x, bsize = %d, size = %d, fs = %s\n", + ip->i_dev, fs->fs_bsize, size, fs->fs_fsmnt); + panic("blkfree: bad size"); + } + cg = dtog(fs, bno); + if ((u_int)bno >= fs->fs_size) { + printf("bad block %d, ino %d\n", bno, ip->i_number); + ffs_fserr(fs, ip->i_uid, "bad block"); + return; + } + error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), + (int)fs->fs_cgsize, NOCRED, &bp); + if (error) { + brelse(bp); + return; + } + cgp = (struct cg *)bp->b_data; + if (!cg_chkmagic(cgp)) { + brelse(bp); + return; + } + cgp->cg_time = time.tv_sec; + bno = dtogd(fs, bno); + if (size == fs->fs_bsize) { + blkno = fragstoblks(fs, bno); + if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) { + printf("dev = 0x%x, block = %d, fs = %s\n", + ip->i_dev, bno, fs->fs_fsmnt); + panic("blkfree: freeing free block"); + } + ffs_setblock(fs, cg_blksfree(cgp), blkno); + ffs_clusteracct(fs, cgp, blkno, 1); + cgp->cg_cs.cs_nbfree++; + fs->fs_cstotal.cs_nbfree++; + fs->fs_cs(fs, cg).cs_nbfree++; + i = cbtocylno(fs, bno); + cg_blks(fs, cgp, i)[cbtorpos(fs, bno)]++; + cg_blktot(cgp)[i]++; + } else { + bbase = bno - fragnum(fs, bno); + /* + * decrement the counts associated with the old frags + */ + blk = blkmap(fs, cg_blksfree(cgp), bbase); + ffs_fragacct(fs, blk, cgp->cg_frsum, -1); + /* + * deallocate the fragment + */ + frags = numfrags(fs, size); + for (i = 0; i < frags; i++) { + if (isset(cg_blksfree(cgp), bno + i)) { + printf("dev = 0x%x, block = %d, fs = %s\n", + ip->i_dev, bno + i, fs->fs_fsmnt); + panic("blkfree: freeing free frag"); + } + setbit(cg_blksfree(cgp), bno + i); + } + cgp->cg_cs.cs_nffree += i; + fs->fs_cstotal.cs_nffree += i; + fs->fs_cs(fs, cg).cs_nffree += i; + /* + * add back in counts associated with the new frags + */ + blk = blkmap(fs, cg_blksfree(cgp), bbase); + ffs_fragacct(fs, blk, cgp->cg_frsum, 1); + /* + * if a complete block has been reassembled, account for it + */ + blkno = fragstoblks(fs, bbase); + if (ffs_isblock(fs, cg_blksfree(cgp), blkno)) { + cgp->cg_cs.cs_nffree -= fs->fs_frag; + fs->fs_cstotal.cs_nffree -= fs->fs_frag; + fs->fs_cs(fs, cg).cs_nffree -= fs->fs_frag; + ffs_clusteracct(fs, cgp, blkno, 1); + cgp->cg_cs.cs_nbfree++; + fs->fs_cstotal.cs_nbfree++; + fs->fs_cs(fs, cg).cs_nbfree++; + i = cbtocylno(fs, bbase); + cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++; + cg_blktot(cgp)[i]++; + } + } + fs->fs_fmod = 1; + bdwrite(bp); +} + +/* + * Free an inode. + * + * The specified inode is placed back in the free map. + */ +int +ffs_vfree(ap) + struct vop_vfree_args /* { + struct vnode *a_pvp; + ino_t a_ino; + int a_mode; + } */ *ap; +{ + register struct fs *fs; + register struct cg *cgp; + register struct inode *pip; + ino_t ino = ap->a_ino; + struct buf *bp; + int error, cg; + + pip = VTOI(ap->a_pvp); + fs = pip->i_fs; + if ((u_int)ino >= fs->fs_ipg * fs->fs_ncg) + panic("ifree: range: dev = 0x%x, ino = %d, fs = %s\n", + pip->i_dev, ino, fs->fs_fsmnt); + cg = ino_to_cg(fs, ino); + error = bread(pip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), + (int)fs->fs_cgsize, NOCRED, &bp); + if (error) { + brelse(bp); + return (0); + } + cgp = (struct cg *)bp->b_data; + if (!cg_chkmagic(cgp)) { + brelse(bp); + return (0); + } + cgp->cg_time = time.tv_sec; + ino %= fs->fs_ipg; + if (isclr(cg_inosused(cgp), ino)) { + printf("dev = 0x%x, ino = %d, fs = %s\n", + pip->i_dev, ino, fs->fs_fsmnt); + if (fs->fs_ronly == 0) + panic("ifree: freeing free inode"); + } + clrbit(cg_inosused(cgp), ino); + if (ino < cgp->cg_irotor) + cgp->cg_irotor = ino; + cgp->cg_cs.cs_nifree++; + fs->fs_cstotal.cs_nifree++; + fs->fs_cs(fs, cg).cs_nifree++; + if ((ap->a_mode & IFMT) == IFDIR) { + cgp->cg_cs.cs_ndir--; + fs->fs_cstotal.cs_ndir--; + fs->fs_cs(fs, cg).cs_ndir--; + } + fs->fs_fmod = 1; + bdwrite(bp); + return (0); +} + +/* + * Find a block of the specified size in the specified cylinder group. + * + * It is a panic if a request is made to find a block if none are + * available. + */ +static daddr_t +ffs_mapsearch(fs, cgp, bpref, allocsiz) + register struct fs *fs; + register struct cg *cgp; + daddr_t bpref; + int allocsiz; +{ + daddr_t bno; + int start, len, loc, i; + int blk, field, subfield, pos; + + /* + * find the fragment by searching through the free block + * map for an appropriate bit pattern + */ + if (bpref) + start = dtogd(fs, bpref) / NBBY; + else + start = cgp->cg_frotor / NBBY; + len = howmany(fs->fs_fpg, NBBY) - start; + loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[start], + (u_char *)fragtbl[fs->fs_frag], + (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); + if (loc == 0) { + len = start + 1; + start = 0; + loc = scanc((u_int)len, (u_char *)&cg_blksfree(cgp)[0], + (u_char *)fragtbl[fs->fs_frag], + (u_char)(1 << (allocsiz - 1 + (fs->fs_frag % NBBY)))); + if (loc == 0) { + printf("start = %d, len = %d, fs = %s\n", + start, len, fs->fs_fsmnt); + panic("ffs_alloccg: map corrupted"); + /* NOTREACHED */ + } + } + bno = (start + len - loc) * NBBY; + cgp->cg_frotor = bno; + /* + * found the byte in the map + * sift through the bits to find the selected frag + */ + for (i = bno + NBBY; bno < i; bno += fs->fs_frag) { + blk = blkmap(fs, cg_blksfree(cgp), bno); + blk <<= 1; + field = around[allocsiz]; + subfield = inside[allocsiz]; + for (pos = 0; pos <= fs->fs_frag - allocsiz; pos++) { + if ((blk & field) == subfield) + return (bno + pos); + field <<= 1; + subfield <<= 1; + } + } + printf("bno = %d, fs = %s\n", bno, fs->fs_fsmnt); + panic("ffs_alloccg: block not in map"); + return (-1); +} + +/* + * Update the cluster map because of an allocation or free. + * + * Cnt == 1 means free; cnt == -1 means allocating. + */ +ffs_clusteracct(fs, cgp, blkno, cnt) + struct fs *fs; + struct cg *cgp; + daddr_t blkno; + int cnt; +{ + long *sump; + u_char *freemapp, *mapp; + int i, start, end, forw, back, map, bit; + + if (fs->fs_contigsumsize <= 0) + return; + freemapp = cg_clustersfree(cgp); + sump = cg_clustersum(cgp); + /* + * Allocate or clear the actual block. + */ + if (cnt > 0) + setbit(freemapp, blkno); + else + clrbit(freemapp, blkno); + /* + * Find the size of the cluster going forward. + */ + start = blkno + 1; + end = start + fs->fs_contigsumsize; + if (end >= cgp->cg_nclusterblks) + end = cgp->cg_nclusterblks; + mapp = &freemapp[start / NBBY]; + map = *mapp++; + bit = 1 << (start % NBBY); + for (i = start; i < end; i++) { + if ((map & bit) == 0) + break; + if ((i & (NBBY - 1)) != (NBBY - 1)) { + bit <<= 1; + } else { + map = *mapp++; + bit = 1; + } + } + forw = i - start; + /* + * Find the size of the cluster going backward. + */ + start = blkno - 1; + end = start - fs->fs_contigsumsize; + if (end < 0) + end = -1; + mapp = &freemapp[start / NBBY]; + map = *mapp--; + bit = 1 << (start % NBBY); + for (i = start; i > end; i--) { + if ((map & bit) == 0) + break; + if ((i & (NBBY - 1)) != 0) { + bit >>= 1; + } else { + map = *mapp--; + bit = 1 << (NBBY - 1); + } + } + back = start - i; + /* + * Account for old cluster and the possibly new forward and + * back clusters. + */ + i = back + forw + 1; + if (i > fs->fs_contigsumsize) + i = fs->fs_contigsumsize; + sump[i] += cnt; + if (back > 0) + sump[back] -= cnt; + if (forw > 0) + sump[forw] -= cnt; +} + +/* + * Fserr prints the name of a file system with an error diagnostic. + * + * The form of the error message is: + * fs: error message + */ +static void +ffs_fserr(fs, uid, cp) + struct fs *fs; + u_int uid; + char *cp; +{ + + log(LOG_ERR, "uid %d on %s: %s\n", uid, fs->fs_fsmnt, cp); +} diff --git a/sys/ufs/ffs/ffs_balloc.c b/sys/ufs/ffs/ffs_balloc.c new file mode 100644 index 0000000..752feec --- /dev/null +++ b/sys/ufs/ffs/ffs_balloc.c @@ -0,0 +1,282 @@ +/* + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ffs_balloc.c 8.4 (Berkeley) 9/23/93 + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/buf.h> +#include <sys/proc.h> +#include <sys/file.h> +#include <sys/vnode.h> + +#include <vm/vm.h> + +#include <ufs/ufs/quota.h> +#include <ufs/ufs/inode.h> +#include <ufs/ufs/ufs_extern.h> + +#include <ufs/ffs/fs.h> +#include <ufs/ffs/ffs_extern.h> + +/* + * Balloc defines the structure of file system storage + * by allocating the physical blocks on a device given + * the inode and the logical block number in a file. + */ +ffs_balloc(ip, bn, size, cred, bpp, flags) + register struct inode *ip; + register daddr_t bn; + int size; + struct ucred *cred; + struct buf **bpp; + int flags; +{ + register struct fs *fs; + register daddr_t nb; + struct buf *bp, *nbp; + struct vnode *vp = ITOV(ip); + struct indir indirs[NIADDR + 2]; + daddr_t newb, lbn, *bap, pref; + int osize, nsize, num, i, error; + + *bpp = NULL; + if (bn < 0) + return (EFBIG); + fs = ip->i_fs; + lbn = bn; + + /* + * If the next write will extend the file into a new block, + * and the file is currently composed of a fragment + * this fragment has to be extended to be a full block. + */ + nb = lblkno(fs, ip->i_size); + if (nb < NDADDR && nb < bn) { + osize = blksize(fs, ip, nb); + if (osize < fs->fs_bsize && osize > 0) { + error = ffs_realloccg(ip, nb, + ffs_blkpref(ip, nb, (int)nb, &ip->i_db[0]), + osize, (int)fs->fs_bsize, cred, &bp); + if (error) + return (error); + ip->i_size = (nb + 1) * fs->fs_bsize; + vnode_pager_setsize(vp, (u_long)ip->i_size); + ip->i_db[nb] = dbtofsb(fs, bp->b_blkno); + ip->i_flag |= IN_CHANGE | IN_UPDATE; + if (flags & B_SYNC) + bwrite(bp); + else + bawrite(bp); + } + } + /* + * The first NDADDR blocks are direct blocks + */ + if (bn < NDADDR) { + nb = ip->i_db[bn]; + if (nb != 0 && ip->i_size >= (bn + 1) * fs->fs_bsize) { + error = bread(vp, bn, fs->fs_bsize, NOCRED, &bp); + if (error) { + brelse(bp); + return (error); + } + *bpp = bp; + return (0); + } + if (nb != 0) { + /* + * Consider need to reallocate a fragment. + */ + osize = fragroundup(fs, blkoff(fs, ip->i_size)); + nsize = fragroundup(fs, size); + if (nsize <= osize) { + error = bread(vp, bn, osize, NOCRED, &bp); + if (error) { + brelse(bp); + return (error); + } + } else { + error = ffs_realloccg(ip, bn, + ffs_blkpref(ip, bn, (int)bn, &ip->i_db[0]), + osize, nsize, cred, &bp); + if (error) + return (error); + } + } else { + if (ip->i_size < (bn + 1) * fs->fs_bsize) + nsize = fragroundup(fs, size); + else + nsize = fs->fs_bsize; + error = ffs_alloc(ip, bn, + ffs_blkpref(ip, bn, (int)bn, &ip->i_db[0]), + nsize, cred, &newb); + if (error) + return (error); + bp = getblk(vp, bn, nsize, 0, 0); + bp->b_blkno = fsbtodb(fs, newb); + if (flags & B_CLRBUF) + clrbuf(bp); + } + ip->i_db[bn] = dbtofsb(fs, bp->b_blkno); + ip->i_flag |= IN_CHANGE | IN_UPDATE; + *bpp = bp; + return (0); + } + /* + * Determine the number of levels of indirection. + */ + pref = 0; + if (error = ufs_getlbns(vp, bn, indirs, &num)) + return(error); +#ifdef DIAGNOSTIC + if (num < 1) + panic ("ffs_balloc: ufs_bmaparray returned indirect block\n"); +#endif + /* + * Fetch the first indirect block allocating if necessary. + */ + --num; + nb = ip->i_ib[indirs[0].in_off]; + if (nb == 0) { + pref = ffs_blkpref(ip, lbn, 0, (daddr_t *)0); + if (error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, + cred, &newb)) + return (error); + nb = newb; + bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0); + bp->b_blkno = fsbtodb(fs, newb); + clrbuf(bp); + /* + * Write synchronously so that indirect blocks + * never point at garbage. + */ + if (error = bwrite(bp)) { + ffs_blkfree(ip, nb, fs->fs_bsize); + return (error); + } + ip->i_ib[indirs[0].in_off] = newb; + ip->i_flag |= IN_CHANGE | IN_UPDATE; + } + /* + * Fetch through the indirect blocks, allocating as necessary. + */ + for (i = 1;;) { + error = bread(vp, + indirs[i].in_lbn, (int)fs->fs_bsize, NOCRED, &bp); + if (error) { + brelse(bp); + return (error); + } + bap = (daddr_t *)bp->b_data; + nb = bap[indirs[i].in_off]; + if (i == num) + break; + i += 1; + if (nb != 0) { + brelse(bp); + continue; + } + if (pref == 0) + pref = ffs_blkpref(ip, lbn, 0, (daddr_t *)0); + if (error = + ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) { + brelse(bp); + return (error); + } + nb = newb; + nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0); + nbp->b_blkno = fsbtodb(fs, nb); + clrbuf(nbp); + /* + * Write synchronously so that indirect blocks + * never point at garbage. + */ + if (error = bwrite(nbp)) { + ffs_blkfree(ip, nb, fs->fs_bsize); + brelse(bp); + return (error); + } + bap[indirs[i - 1].in_off] = nb; + /* + * If required, write synchronously, otherwise use + * delayed write. + */ + if (flags & B_SYNC) { + bwrite(bp); + } else { + bdwrite(bp); + } + } + /* + * Get the data block, allocating if necessary. + */ + if (nb == 0) { + pref = ffs_blkpref(ip, lbn, indirs[i].in_off, &bap[0]); + if (error = ffs_alloc(ip, + lbn, pref, (int)fs->fs_bsize, cred, &newb)) { + brelse(bp); + return (error); + } + nb = newb; + nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0); + nbp->b_blkno = fsbtodb(fs, nb); + if (flags & B_CLRBUF) + clrbuf(nbp); + bap[indirs[i].in_off] = nb; + /* + * If required, write synchronously, otherwise use + * delayed write. + */ + if (flags & B_SYNC) { + bwrite(bp); + } else { + bdwrite(bp); + } + *bpp = nbp; + return (0); + } + brelse(bp); + if (flags & B_CLRBUF) { + error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp); + if (error) { + brelse(nbp); + return (error); + } + } else { + nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0); + nbp->b_blkno = fsbtodb(fs, nb); + } + *bpp = nbp; + return (0); +} diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h new file mode 100644 index 0000000..ab467a2 --- /dev/null +++ b/sys/ufs/ffs/ffs_extern.h @@ -0,0 +1,101 @@ +/*- + * Copyright (c) 1991, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ffs_extern.h 8.3 (Berkeley) 4/16/94 + */ + +struct buf; +struct fid; +struct fs; +struct inode; +struct mount; +struct nameidata; +struct proc; +struct statfs; +struct timeval; +struct ucred; +struct uio; +struct vnode; +struct mbuf; + +__BEGIN_DECLS +int ffs_alloc __P((struct inode *, + daddr_t, daddr_t, int, struct ucred *, daddr_t *)); +int ffs_balloc __P((struct inode *, + daddr_t, int, struct ucred *, struct buf **, int)); +int ffs_blkatoff __P((struct vop_blkatoff_args *)); +int ffs_blkfree __P((struct inode *, daddr_t, long)); +daddr_t ffs_blkpref __P((struct inode *, daddr_t, int, daddr_t *)); +int ffs_bmap __P((struct vop_bmap_args *)); +void ffs_clrblock __P((struct fs *, u_char *, daddr_t)); +int ffs_fhtovp __P((struct mount *, struct fid *, struct mbuf *, + struct vnode **, int *, struct ucred **)); +void ffs_fragacct __P((struct fs *, int, long [], int)); +int ffs_fsync __P((struct vop_fsync_args *)); +int ffs_init __P((void)); +int ffs_isblock __P((struct fs *, u_char *, daddr_t)); +int ffs_mount __P((struct mount *, + char *, caddr_t, struct nameidata *, struct proc *)); +int ffs_mountfs __P((struct vnode *, struct mount *, struct proc *)); +int ffs_mountroot __P((void)); +int ffs_read __P((struct vop_read_args *)); +int ffs_reallocblks __P((struct vop_reallocblks_args *)); +int ffs_realloccg __P((struct inode *, + daddr_t, daddr_t, int, int, struct ucred *, struct buf **)); +int ffs_reclaim __P((struct vop_reclaim_args *)); +void ffs_setblock __P((struct fs *, u_char *, daddr_t)); +int ffs_statfs __P((struct mount *, struct statfs *, struct proc *)); +int ffs_sync __P((struct mount *, int, struct ucred *, struct proc *)); +int ffs_truncate __P((struct vop_truncate_args *)); +int ffs_unmount __P((struct mount *, int, struct proc *)); +int ffs_update __P((struct vop_update_args *)); +int ffs_valloc __P((struct vop_valloc_args *)); +int ffs_vfree __P((struct vop_vfree_args *)); +int ffs_vget __P((struct mount *, ino_t, struct vnode **)); +int ffs_vptofh __P((struct vnode *, struct fid *)); +int ffs_write __P((struct vop_write_args *)); + +int bwrite(); /* FFS needs a bwrite routine. XXX */ + +#ifdef DIAGNOSTIC +void ffs_checkoverlap __P((struct buf *, struct inode *)); +#endif +__END_DECLS + +extern int (**ffs_vnodeop_p)(); +extern int (**ffs_specop_p)(); +#ifdef FIFO +extern int (**ffs_fifoop_p)(); +#define FFS_FIFOOPS ffs_fifoop_p +#else +#define FFS_FIFOOPS NULL +#endif diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c new file mode 100644 index 0000000..b45aee5 --- /dev/null +++ b/sys/ufs/ffs/ffs_inode.c @@ -0,0 +1,488 @@ +/* + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ffs_inode.c 8.5 (Berkeley) 12/30/93 + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/mount.h> +#include <sys/proc.h> +#include <sys/file.h> +#include <sys/buf.h> +#include <sys/vnode.h> +#include <sys/kernel.h> +#include <sys/malloc.h> +#include <sys/trace.h> +#include <sys/resourcevar.h> + +#include <vm/vm.h> + +#include <ufs/ufs/quota.h> +#include <ufs/ufs/inode.h> +#include <ufs/ufs/ufsmount.h> +#include <ufs/ufs/ufs_extern.h> + +#include <ufs/ffs/fs.h> +#include <ufs/ffs/ffs_extern.h> + +static int ffs_indirtrunc __P((struct inode *, daddr_t, daddr_t, daddr_t, int, + long *)); + +int +ffs_init() +{ + return (ufs_init()); +} + +/* + * Update the access, modified, and inode change times as specified by the + * IACCESS, IUPDATE, and ICHANGE flags respectively. The IMODIFIED flag is + * used to specify that the inode needs to be updated but that the times have + * already been set. The access and modified times are taken from the second + * and third parameters; the inode change time is always taken from the current + * time. If waitfor is set, then wait for the disk write of the inode to + * complete. + */ +int +ffs_update(ap) + struct vop_update_args /* { + struct vnode *a_vp; + struct timeval *a_access; + struct timeval *a_modify; + int a_waitfor; + } */ *ap; +{ + register struct fs *fs; + struct buf *bp; + struct inode *ip; + int error; + + ip = VTOI(ap->a_vp); + if (ap->a_vp->v_mount->mnt_flag & MNT_RDONLY) { + ip->i_flag &= + ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE); + return (0); + } + if ((ip->i_flag & + (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0) + return (0); + if (ip->i_flag & IN_ACCESS) + ip->i_atime.ts_sec = ap->a_access->tv_sec; + if (ip->i_flag & IN_UPDATE) { + ip->i_mtime.ts_sec = ap->a_modify->tv_sec; + ip->i_modrev++; + } + if (ip->i_flag & IN_CHANGE) + ip->i_ctime.ts_sec = time.tv_sec; + ip->i_flag &= ~(IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE); + fs = ip->i_fs; + /* + * Ensure that uid and gid are correct. This is a temporary + * fix until fsck has been changed to do the update. + */ + if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */ + ip->i_din.di_ouid = ip->i_uid; /* XXX */ + ip->i_din.di_ogid = ip->i_gid; /* XXX */ + } /* XXX */ + if (error = bread(ip->i_devvp, + fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), + (int)fs->fs_bsize, NOCRED, &bp)) { + brelse(bp); + return (error); + } + *((struct dinode *)bp->b_data + + ino_to_fsbo(fs, ip->i_number)) = ip->i_din; + if (ap->a_waitfor) + return (bwrite(bp)); + else { + bdwrite(bp); + return (0); + } +} + +#define SINGLE 0 /* index of single indirect block */ +#define DOUBLE 1 /* index of double indirect block */ +#define TRIPLE 2 /* index of triple indirect block */ +/* + * Truncate the inode oip to at most length size, freeing the + * disk blocks. + */ +ffs_truncate(ap) + struct vop_truncate_args /* { + struct vnode *a_vp; + off_t a_length; + int a_flags; + struct ucred *a_cred; + struct proc *a_p; + } */ *ap; +{ + register struct vnode *ovp = ap->a_vp; + register daddr_t lastblock; + register struct inode *oip; + daddr_t bn, lbn, lastiblock[NIADDR], indir_lbn[NIADDR]; + daddr_t oldblks[NDADDR + NIADDR], newblks[NDADDR + NIADDR]; + off_t length = ap->a_length; + register struct fs *fs; + struct buf *bp; + int offset, size, level; + long count, nblocks, vflags, blocksreleased = 0; + struct timeval tv; + register int i; + int aflags, error, allerror; + off_t osize; + + oip = VTOI(ovp); + tv = time; + if (ovp->v_type == VLNK && + oip->i_size < ovp->v_mount->mnt_maxsymlinklen) { +#ifdef DIAGNOSTIC + if (length != 0) + panic("ffs_truncate: partial truncate of symlink"); +#endif + bzero((char *)&oip->i_shortlink, (u_int)oip->i_size); + oip->i_size = 0; + oip->i_flag |= IN_CHANGE | IN_UPDATE; + return (VOP_UPDATE(ovp, &tv, &tv, 1)); + } + if (oip->i_size == length) { + oip->i_flag |= IN_CHANGE | IN_UPDATE; + return (VOP_UPDATE(ovp, &tv, &tv, 0)); + } +#ifdef QUOTA + if (error = getinoquota(oip)) + return (error); +#endif + vnode_pager_setsize(ovp, (u_long)length); + fs = oip->i_fs; + osize = oip->i_size; + /* + * Lengthen the size of the file. We must ensure that the + * last byte of the file is allocated. Since the smallest + * value of oszie is 0, length will be at least 1. + */ + if (osize < length) { + offset = blkoff(fs, length - 1); + lbn = lblkno(fs, length - 1); + aflags = B_CLRBUF; + if (ap->a_flags & IO_SYNC) + aflags |= B_SYNC; + if (error = ffs_balloc(oip, lbn, offset + 1, ap->a_cred, &bp, + aflags)) + return (error); + oip->i_size = length; + (void) vnode_pager_uncache(ovp); + if (aflags & IO_SYNC) + bwrite(bp); + else + bawrite(bp); + oip->i_flag |= IN_CHANGE | IN_UPDATE; + return (VOP_UPDATE(ovp, &tv, &tv, 1)); + } + /* + * Shorten the size of the file. If the file is not being + * truncated to a block boundry, the contents of the + * partial block following the end of the file must be + * zero'ed in case it ever become accessable again because + * of subsequent file growth. + */ + offset = blkoff(fs, length); + if (offset == 0) { + oip->i_size = length; + } else { + lbn = lblkno(fs, length); + aflags = B_CLRBUF; + if (ap->a_flags & IO_SYNC) + aflags |= B_SYNC; + if (error = ffs_balloc(oip, lbn, offset, ap->a_cred, &bp, + aflags)) + return (error); + oip->i_size = length; + size = blksize(fs, oip, lbn); + (void) vnode_pager_uncache(ovp); + bzero((char *)bp->b_data + offset, (u_int)(size - offset)); + allocbuf(bp, size); + if (aflags & IO_SYNC) + bwrite(bp); + else + bawrite(bp); + } + /* + * Calculate index into inode's block list of + * last direct and indirect blocks (if any) + * which we want to keep. Lastblock is -1 when + * the file is truncated to 0. + */ + lastblock = lblkno(fs, length + fs->fs_bsize - 1) - 1; + lastiblock[SINGLE] = lastblock - NDADDR; + lastiblock[DOUBLE] = lastiblock[SINGLE] - NINDIR(fs); + lastiblock[TRIPLE] = lastiblock[DOUBLE] - NINDIR(fs) * NINDIR(fs); + nblocks = btodb(fs->fs_bsize); + /* + * Update file and block pointers on disk before we start freeing + * blocks. If we crash before free'ing blocks below, the blocks + * will be returned to the free list. lastiblock values are also + * normalized to -1 for calls to ffs_indirtrunc below. + */ + bcopy((caddr_t)&oip->i_db[0], (caddr_t)oldblks, sizeof oldblks); + for (level = TRIPLE; level >= SINGLE; level--) + if (lastiblock[level] < 0) { + oip->i_ib[level] = 0; + lastiblock[level] = -1; + } + for (i = NDADDR - 1; i > lastblock; i--) + oip->i_db[i] = 0; + oip->i_flag |= IN_CHANGE | IN_UPDATE; + if (error = VOP_UPDATE(ovp, &tv, &tv, MNT_WAIT)) + allerror = error; + /* + * Having written the new inode to disk, save its new configuration + * and put back the old block pointers long enough to process them. + * Note that we save the new block configuration so we can check it + * when we are done. + */ + bcopy((caddr_t)&oip->i_db[0], (caddr_t)newblks, sizeof newblks); + bcopy((caddr_t)oldblks, (caddr_t)&oip->i_db[0], sizeof oldblks); + oip->i_size = osize; + vflags = ((length > 0) ? V_SAVE : 0) | V_SAVEMETA; + allerror = vinvalbuf(ovp, vflags, ap->a_cred, ap->a_p, 0, 0); + + /* + * Indirect blocks first. + */ + indir_lbn[SINGLE] = -NDADDR; + indir_lbn[DOUBLE] = indir_lbn[SINGLE] - NINDIR(fs) - 1; + indir_lbn[TRIPLE] = indir_lbn[DOUBLE] - NINDIR(fs) * NINDIR(fs) - 1; + for (level = TRIPLE; level >= SINGLE; level--) { + bn = oip->i_ib[level]; + if (bn != 0) { + error = ffs_indirtrunc(oip, indir_lbn[level], + fsbtodb(fs, bn), lastiblock[level], level, &count); + if (error) + allerror = error; + blocksreleased += count; + if (lastiblock[level] < 0) { + oip->i_ib[level] = 0; + ffs_blkfree(oip, bn, fs->fs_bsize); + blocksreleased += nblocks; + } + } + if (lastiblock[level] >= 0) + goto done; + } + + /* + * All whole direct blocks or frags. + */ + for (i = NDADDR - 1; i > lastblock; i--) { + register long bsize; + + bn = oip->i_db[i]; + if (bn == 0) + continue; + oip->i_db[i] = 0; + bsize = blksize(fs, oip, i); + ffs_blkfree(oip, bn, bsize); + blocksreleased += btodb(bsize); + } + if (lastblock < 0) + goto done; + + /* + * Finally, look for a change in size of the + * last direct block; release any frags. + */ + bn = oip->i_db[lastblock]; + if (bn != 0) { + long oldspace, newspace; + + /* + * Calculate amount of space we're giving + * back as old block size minus new block size. + */ + oldspace = blksize(fs, oip, lastblock); + oip->i_size = length; + newspace = blksize(fs, oip, lastblock); + if (newspace == 0) + panic("itrunc: newspace"); + if (oldspace - newspace > 0) { + /* + * Block number of space to be free'd is + * the old block # plus the number of frags + * required for the storage we're keeping. + */ + bn += numfrags(fs, newspace); + ffs_blkfree(oip, bn, oldspace - newspace); + blocksreleased += btodb(oldspace - newspace); + } + } +done: +#ifdef DIAGNOSTIC + for (level = SINGLE; level <= TRIPLE; level++) + if (newblks[NDADDR + level] != oip->i_ib[level]) + panic("itrunc1"); + for (i = 0; i < NDADDR; i++) + if (newblks[i] != oip->i_db[i]) + panic("itrunc2"); + if (length == 0 && + (ovp->v_dirtyblkhd.lh_first || ovp->v_cleanblkhd.lh_first)) + panic("itrunc3"); +#endif /* DIAGNOSTIC */ + /* + * Put back the real size. + */ + oip->i_size = length; + oip->i_blocks -= blocksreleased; + if (oip->i_blocks < 0) /* sanity */ + oip->i_blocks = 0; + oip->i_flag |= IN_CHANGE; +#ifdef QUOTA + (void) chkdq(oip, -blocksreleased, NOCRED, 0); +#endif + return (allerror); +} + +/* + * Release blocks associated with the inode ip and stored in the indirect + * block bn. Blocks are free'd in LIFO order up to (but not including) + * lastbn. If level is greater than SINGLE, the block is an indirect block + * and recursive calls to indirtrunc must be used to cleanse other indirect + * blocks. + * + * NB: triple indirect blocks are untested. + */ +static int +ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp) + register struct inode *ip; + daddr_t lbn, lastbn; + daddr_t dbn; + int level; + long *countp; +{ + register int i; + struct buf *bp; + register struct fs *fs = ip->i_fs; + register daddr_t *bap; + struct vnode *vp; + daddr_t *copy, nb, nlbn, last; + long blkcount, factor; + int nblocks, blocksreleased = 0; + int error = 0, allerror = 0; + + /* + * Calculate index in current block of last + * block to be kept. -1 indicates the entire + * block so we need not calculate the index. + */ + factor = 1; + for (i = SINGLE; i < level; i++) + factor *= NINDIR(fs); + last = lastbn; + if (lastbn > 0) + last /= factor; + nblocks = btodb(fs->fs_bsize); + /* + * Get buffer of block pointers, zero those entries corresponding + * to blocks to be free'd, and update on disk copy first. Since + * double(triple) indirect before single(double) indirect, calls + * to bmap on these blocks will fail. However, we already have + * the on disk address, so we have to set the b_blkno field + * explicitly instead of letting bread do everything for us. + */ + vp = ITOV(ip); + bp = getblk(vp, lbn, (int)fs->fs_bsize, 0, 0); + if (bp->b_flags & (B_DONE | B_DELWRI)) { + /* Braces must be here in case trace evaluates to nothing. */ + trace(TR_BREADHIT, pack(vp, fs->fs_bsize), lbn); + } else { + trace(TR_BREADMISS, pack(vp, fs->fs_bsize), lbn); + curproc->p_stats->p_ru.ru_inblock++; /* pay for read */ + bp->b_flags |= B_READ; + if (bp->b_bcount > bp->b_bufsize) + panic("ffs_indirtrunc: bad buffer size"); + bp->b_blkno = dbn; + VOP_STRATEGY(bp); + error = biowait(bp); + } + if (error) { + brelse(bp); + *countp = 0; + return (error); + } + + bap = (daddr_t *)bp->b_data; + MALLOC(copy, daddr_t *, fs->fs_bsize, M_TEMP, M_WAITOK); + bcopy((caddr_t)bap, (caddr_t)copy, (u_int)fs->fs_bsize); + bzero((caddr_t)&bap[last + 1], + (u_int)(NINDIR(fs) - (last + 1)) * sizeof (daddr_t)); + if (last == -1) + bp->b_flags |= B_INVAL; + error = bwrite(bp); + if (error) + allerror = error; + bap = copy; + + /* + * Recursively free totally unused blocks. + */ + for (i = NINDIR(fs) - 1, nlbn = lbn + 1 - i * factor; i > last; + i--, nlbn += factor) { + nb = bap[i]; + if (nb == 0) + continue; + if (level > SINGLE) { + if (error = ffs_indirtrunc(ip, nlbn, + fsbtodb(fs, nb), (daddr_t)-1, level - 1, &blkcount)) + allerror = error; + blocksreleased += blkcount; + } + ffs_blkfree(ip, nb, fs->fs_bsize); + blocksreleased += nblocks; + } + + /* + * Recursively free last partial block. + */ + if (level > SINGLE && lastbn >= 0) { + last = lastbn % factor; + nb = bap[i]; + if (nb != 0) { + if (error = ffs_indirtrunc(ip, nlbn, fsbtodb(fs, nb), + last, level - 1, &blkcount)) + allerror = error; + blocksreleased += blkcount; + } + } + FREE(copy, M_TEMP); + *countp = blocksreleased; + return (allerror); +} diff --git a/sys/ufs/ffs/ffs_subr.c b/sys/ufs/ffs/ffs_subr.c new file mode 100644 index 0000000..c251b16 --- /dev/null +++ b/sys/ufs/ffs/ffs_subr.c @@ -0,0 +1,238 @@ +/* + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ffs_subr.c 8.2 (Berkeley) 9/21/93 + */ + +#include <sys/param.h> +#include <ufs/ffs/fs.h> + +#ifdef KERNEL +#include <sys/systm.h> +#include <sys/vnode.h> +#include <ufs/ffs/ffs_extern.h> +#include <sys/buf.h> +#include <ufs/ufs/quota.h> +#include <ufs/ufs/inode.h> + +/* + * Return buffer with the contents of block "offset" from the beginning of + * directory "ip". If "res" is non-zero, fill it in with a pointer to the + * remaining space in the directory. + */ +int +ffs_blkatoff(ap) + struct vop_blkatoff_args /* { + struct vnode *a_vp; + off_t a_offset; + char **a_res; + struct buf **a_bpp; + } */ *ap; +{ + struct inode *ip; + register struct fs *fs; + struct buf *bp; + daddr_t lbn; + int bsize, error; + + ip = VTOI(ap->a_vp); + fs = ip->i_fs; + lbn = lblkno(fs, ap->a_offset); + bsize = blksize(fs, ip, lbn); + + *ap->a_bpp = NULL; + if (error = bread(ap->a_vp, lbn, bsize, NOCRED, &bp)) { + brelse(bp); + return (error); + } + if (ap->a_res) + *ap->a_res = (char *)bp->b_data + blkoff(fs, ap->a_offset); + *ap->a_bpp = bp; + return (0); +} +#endif + +/* + * Update the frsum fields to reflect addition or deletion + * of some frags. + */ +void +ffs_fragacct(fs, fragmap, fraglist, cnt) + struct fs *fs; + int fragmap; + long fraglist[]; + int cnt; +{ + int inblk; + register int field, subfield; + register int siz, pos; + + inblk = (int)(fragtbl[fs->fs_frag][fragmap]) << 1; + fragmap <<= 1; + for (siz = 1; siz < fs->fs_frag; siz++) { + if ((inblk & (1 << (siz + (fs->fs_frag % NBBY)))) == 0) + continue; + field = around[siz]; + subfield = inside[siz]; + for (pos = siz; pos <= fs->fs_frag; pos++) { + if ((fragmap & field) == subfield) { + fraglist[siz] += cnt; + pos += siz; + field <<= siz; + subfield <<= siz; + } + field <<= 1; + subfield <<= 1; + } + } +} + +#if defined(KERNEL) && defined(DIAGNOSTIC) +void +ffs_checkoverlap(bp, ip) + struct buf *bp; + struct inode *ip; +{ + register struct buf *ebp, *ep; + register daddr_t start, last; + struct vnode *vp; + + ebp = &buf[nbuf]; + start = bp->b_blkno; + last = start + btodb(bp->b_bcount) - 1; + for (ep = buf; ep < ebp; ep++) { + if (ep == bp || (ep->b_flags & B_INVAL) || + ep->b_vp == NULLVP) + continue; + if (VOP_BMAP(ep->b_vp, (daddr_t)0, &vp, (daddr_t)0, NULL)) + continue; + if (vp != ip->i_devvp) + continue; + /* look for overlap */ + if (ep->b_bcount == 0 || ep->b_blkno > last || + ep->b_blkno + btodb(ep->b_bcount) <= start) + continue; + vprint("Disk overlap", vp); + (void)printf("\tstart %d, end %d overlap start %d, end %d\n", + start, last, ep->b_blkno, + ep->b_blkno + btodb(ep->b_bcount) - 1); + panic("Disk buffer overlap"); + } +} +#endif /* DIAGNOSTIC */ + +/* + * block operations + * + * check if a block is available + */ +int +ffs_isblock(fs, cp, h) + struct fs *fs; + unsigned char *cp; + daddr_t h; +{ + unsigned char mask; + + switch ((int)fs->fs_frag) { + case 8: + return (cp[h] == 0xff); + case 4: + mask = 0x0f << ((h & 0x1) << 2); + return ((cp[h >> 1] & mask) == mask); + case 2: + mask = 0x03 << ((h & 0x3) << 1); + return ((cp[h >> 2] & mask) == mask); + case 1: + mask = 0x01 << (h & 0x7); + return ((cp[h >> 3] & mask) == mask); + default: + panic("ffs_isblock"); + } +} + +/* + * take a block out of the map + */ +void +ffs_clrblock(fs, cp, h) + struct fs *fs; + u_char *cp; + daddr_t h; +{ + + switch ((int)fs->fs_frag) { + case 8: + cp[h] = 0; + return; + case 4: + cp[h >> 1] &= ~(0x0f << ((h & 0x1) << 2)); + return; + case 2: + cp[h >> 2] &= ~(0x03 << ((h & 0x3) << 1)); + return; + case 1: + cp[h >> 3] &= ~(0x01 << (h & 0x7)); + return; + default: + panic("ffs_clrblock"); + } +} + +/* + * put a block into the map + */ +void +ffs_setblock(fs, cp, h) + struct fs *fs; + unsigned char *cp; + daddr_t h; +{ + + switch ((int)fs->fs_frag) { + + case 8: + cp[h] = 0xff; + return; + case 4: + cp[h >> 1] |= (0x0f << ((h & 0x1) << 2)); + return; + case 2: + cp[h >> 2] |= (0x03 << ((h & 0x3) << 1)); + return; + case 1: + cp[h >> 3] |= (0x01 << (h & 0x7)); + return; + default: + panic("ffs_setblock"); + } +} diff --git a/sys/ufs/ffs/ffs_tables.c b/sys/ufs/ffs/ffs_tables.c new file mode 100644 index 0000000..8cf46b0 --- /dev/null +++ b/sys/ufs/ffs/ffs_tables.c @@ -0,0 +1,136 @@ +/* + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ffs_tables.c 8.1 (Berkeley) 6/11/93 + */ + +#include <sys/param.h> + +/* + * Bit patterns for identifying fragments in the block map + * used as ((map & around) == inside) + */ +int around[9] = { + 0x3, 0x7, 0xf, 0x1f, 0x3f, 0x7f, 0xff, 0x1ff, 0x3ff +}; +int inside[9] = { + 0x0, 0x2, 0x6, 0xe, 0x1e, 0x3e, 0x7e, 0xfe, 0x1fe +}; + +/* + * Given a block map bit pattern, the frag tables tell whether a + * particular size fragment is available. + * + * used as: + * if ((1 << (size - 1)) & fragtbl[fs->fs_frag][map] { + * at least one fragment of the indicated size is available + * } + * + * These tables are used by the scanc instruction on the VAX to + * quickly find an appropriate fragment. + */ +u_char fragtbl124[256] = { + 0x00, 0x16, 0x16, 0x2a, 0x16, 0x16, 0x26, 0x4e, + 0x16, 0x16, 0x16, 0x3e, 0x2a, 0x3e, 0x4e, 0x8a, + 0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e, + 0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e, + 0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e, + 0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e, + 0x2a, 0x3e, 0x3e, 0x2a, 0x3e, 0x3e, 0x2e, 0x6e, + 0x3e, 0x3e, 0x3e, 0x3e, 0x2a, 0x3e, 0x6e, 0xaa, + 0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e, + 0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e, + 0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e, + 0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e, + 0x26, 0x36, 0x36, 0x2e, 0x36, 0x36, 0x26, 0x6e, + 0x36, 0x36, 0x36, 0x3e, 0x2e, 0x3e, 0x6e, 0xae, + 0x4e, 0x5e, 0x5e, 0x6e, 0x5e, 0x5e, 0x6e, 0x4e, + 0x5e, 0x5e, 0x5e, 0x7e, 0x6e, 0x7e, 0x4e, 0xce, + 0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e, + 0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e, + 0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e, + 0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e, + 0x16, 0x16, 0x16, 0x3e, 0x16, 0x16, 0x36, 0x5e, + 0x16, 0x16, 0x16, 0x3e, 0x3e, 0x3e, 0x5e, 0x9e, + 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x7e, + 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x7e, 0xbe, + 0x2a, 0x3e, 0x3e, 0x2a, 0x3e, 0x3e, 0x2e, 0x6e, + 0x3e, 0x3e, 0x3e, 0x3e, 0x2a, 0x3e, 0x6e, 0xaa, + 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x7e, + 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x3e, 0x7e, 0xbe, + 0x4e, 0x5e, 0x5e, 0x6e, 0x5e, 0x5e, 0x6e, 0x4e, + 0x5e, 0x5e, 0x5e, 0x7e, 0x6e, 0x7e, 0x4e, 0xce, + 0x8a, 0x9e, 0x9e, 0xaa, 0x9e, 0x9e, 0xae, 0xce, + 0x9e, 0x9e, 0x9e, 0xbe, 0xaa, 0xbe, 0xce, 0x8a, +}; + +u_char fragtbl8[256] = { + 0x00, 0x01, 0x01, 0x02, 0x01, 0x01, 0x02, 0x04, + 0x01, 0x01, 0x01, 0x03, 0x02, 0x03, 0x04, 0x08, + 0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05, + 0x02, 0x03, 0x03, 0x02, 0x04, 0x05, 0x08, 0x10, + 0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05, + 0x01, 0x01, 0x01, 0x03, 0x03, 0x03, 0x05, 0x09, + 0x02, 0x03, 0x03, 0x02, 0x03, 0x03, 0x02, 0x06, + 0x04, 0x05, 0x05, 0x06, 0x08, 0x09, 0x10, 0x20, + 0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05, + 0x01, 0x01, 0x01, 0x03, 0x03, 0x03, 0x05, 0x09, + 0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05, + 0x03, 0x03, 0x03, 0x03, 0x05, 0x05, 0x09, 0x11, + 0x02, 0x03, 0x03, 0x02, 0x03, 0x03, 0x02, 0x06, + 0x03, 0x03, 0x03, 0x03, 0x02, 0x03, 0x06, 0x0a, + 0x04, 0x05, 0x05, 0x06, 0x05, 0x05, 0x06, 0x04, + 0x08, 0x09, 0x09, 0x0a, 0x10, 0x11, 0x20, 0x40, + 0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05, + 0x01, 0x01, 0x01, 0x03, 0x03, 0x03, 0x05, 0x09, + 0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05, + 0x03, 0x03, 0x03, 0x03, 0x05, 0x05, 0x09, 0x11, + 0x01, 0x01, 0x01, 0x03, 0x01, 0x01, 0x03, 0x05, + 0x01, 0x01, 0x01, 0x03, 0x03, 0x03, 0x05, 0x09, + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x07, + 0x05, 0x05, 0x05, 0x07, 0x09, 0x09, 0x11, 0x21, + 0x02, 0x03, 0x03, 0x02, 0x03, 0x03, 0x02, 0x06, + 0x03, 0x03, 0x03, 0x03, 0x02, 0x03, 0x06, 0x0a, + 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x07, + 0x02, 0x03, 0x03, 0x02, 0x06, 0x07, 0x0a, 0x12, + 0x04, 0x05, 0x05, 0x06, 0x05, 0x05, 0x06, 0x04, + 0x05, 0x05, 0x05, 0x07, 0x06, 0x07, 0x04, 0x0c, + 0x08, 0x09, 0x09, 0x0a, 0x09, 0x09, 0x0a, 0x0c, + 0x10, 0x11, 0x11, 0x12, 0x20, 0x21, 0x40, 0x80, +}; + +/* + * The actual fragtbl array. + */ +u_char *fragtbl[MAXFRAG + 1] = { + 0, fragtbl124, fragtbl124, 0, fragtbl124, 0, 0, 0, fragtbl8, +}; diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c new file mode 100644 index 0000000..505dd5d --- /dev/null +++ b/sys/ufs/ffs/ffs_vfsops.c @@ -0,0 +1,843 @@ +/* + * Copyright (c) 1989, 1991, 1993, 1994 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ffs_vfsops.c 8.8 (Berkeley) 4/18/94 + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/namei.h> +#include <sys/proc.h> +#include <sys/kernel.h> +#include <sys/vnode.h> +#include <sys/socket.h> +#include <sys/mount.h> +#include <sys/buf.h> +#include <sys/mbuf.h> +#include <sys/file.h> +#include <sys/disklabel.h> +#include <sys/ioctl.h> +#include <sys/errno.h> +#include <sys/malloc.h> + +#include <miscfs/specfs/specdev.h> + +#include <ufs/ufs/quota.h> +#include <ufs/ufs/ufsmount.h> +#include <ufs/ufs/inode.h> +#include <ufs/ufs/ufs_extern.h> + +#include <ufs/ffs/fs.h> +#include <ufs/ffs/ffs_extern.h> + +int ffs_sbupdate __P((struct ufsmount *, int)); + +struct vfsops ufs_vfsops = { + ffs_mount, + ufs_start, + ffs_unmount, + ufs_root, + ufs_quotactl, + ffs_statfs, + ffs_sync, + ffs_vget, + ffs_fhtovp, + ffs_vptofh, + ffs_init, +}; + +extern u_long nextgennumber; + +/* + * Called by main() when ufs is going to be mounted as root. + * + * Name is updated by mount(8) after booting. + */ +#define ROOTNAME "root_device" + +ffs_mountroot() +{ + extern struct vnode *rootvp; + register struct fs *fs; + register struct mount *mp; + struct proc *p = curproc; /* XXX */ + struct ufsmount *ump; + u_int size; + int error; + + /* + * Get vnodes for swapdev and rootdev. + */ + if (bdevvp(swapdev, &swapdev_vp) || bdevvp(rootdev, &rootvp)) + panic("ffs_mountroot: can't setup bdevvp's"); + + mp = malloc((u_long)sizeof(struct mount), M_MOUNT, M_WAITOK); + bzero((char *)mp, (u_long)sizeof(struct mount)); + mp->mnt_op = &ufs_vfsops; + mp->mnt_flag = MNT_RDONLY; + if (error = ffs_mountfs(rootvp, mp, p)) { + free(mp, M_MOUNT); + return (error); + } + if (error = vfs_lock(mp)) { + (void)ffs_unmount(mp, 0, p); + free(mp, M_MOUNT); + return (error); + } + TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); + mp->mnt_flag |= MNT_ROOTFS; + mp->mnt_vnodecovered = NULLVP; + ump = VFSTOUFS(mp); + fs = ump->um_fs; + bzero(fs->fs_fsmnt, sizeof(fs->fs_fsmnt)); + fs->fs_fsmnt[0] = '/'; + bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname, + MNAMELEN); + (void) copystr(ROOTNAME, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, + &size); + bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); + (void)ffs_statfs(mp, &mp->mnt_stat, p); + vfs_unlock(mp); + inittodr(fs->fs_time); + return (0); +} + +/* + * VFS Operations. + * + * mount system call + */ +int +ffs_mount(mp, path, data, ndp, p) + register struct mount *mp; + char *path; + caddr_t data; + struct nameidata *ndp; + struct proc *p; +{ + struct vnode *devvp; + struct ufs_args args; + struct ufsmount *ump; + register struct fs *fs; + u_int size; + int error, flags; + + if (error = copyin(data, (caddr_t)&args, sizeof (struct ufs_args))) + return (error); + /* + * If updating, check whether changing from read-only to + * read/write; if there is no device name, that's all we do. + */ + if (mp->mnt_flag & MNT_UPDATE) { + ump = VFSTOUFS(mp); + fs = ump->um_fs; + error = 0; + if (fs->fs_ronly == 0 && (mp->mnt_flag & MNT_RDONLY)) { + flags = WRITECLOSE; + if (mp->mnt_flag & MNT_FORCE) + flags |= FORCECLOSE; + if (vfs_busy(mp)) + return (EBUSY); + error = ffs_flushfiles(mp, flags, p); + vfs_unbusy(mp); + } + if (!error && (mp->mnt_flag & MNT_RELOAD)) + error = ffs_reload(mp, ndp->ni_cnd.cn_cred, p); + if (error) + return (error); + if (fs->fs_ronly && (mp->mnt_flag & MNT_WANTRDWR)) + fs->fs_ronly = 0; + if (args.fspec == 0) { + /* + * Process export requests. + */ + return (vfs_export(mp, &ump->um_export, &args.export)); + } + } + /* + * Not an update, or updating the name: look up the name + * and verify that it refers to a sensible block device. + */ + NDINIT(ndp, LOOKUP, FOLLOW, UIO_USERSPACE, args.fspec, p); + if (error = namei(ndp)) + return (error); + devvp = ndp->ni_vp; + + if (devvp->v_type != VBLK) { + vrele(devvp); + return (ENOTBLK); + } + if (major(devvp->v_rdev) >= nblkdev) { + vrele(devvp); + return (ENXIO); + } + if ((mp->mnt_flag & MNT_UPDATE) == 0) + error = ffs_mountfs(devvp, mp, p); + else { + if (devvp != ump->um_devvp) + error = EINVAL; /* needs translation */ + else + vrele(devvp); + } + if (error) { + vrele(devvp); + return (error); + } + ump = VFSTOUFS(mp); + fs = ump->um_fs; + (void) copyinstr(path, fs->fs_fsmnt, sizeof(fs->fs_fsmnt) - 1, &size); + bzero(fs->fs_fsmnt + size, sizeof(fs->fs_fsmnt) - size); + bcopy((caddr_t)fs->fs_fsmnt, (caddr_t)mp->mnt_stat.f_mntonname, + MNAMELEN); + (void) copyinstr(args.fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, + &size); + bzero(mp->mnt_stat.f_mntfromname + size, MNAMELEN - size); + (void)ffs_statfs(mp, &mp->mnt_stat, p); + return (0); +} + +/* + * Reload all incore data for a filesystem (used after running fsck on + * the root filesystem and finding things to fix). The filesystem must + * be mounted read-only. + * + * Things to do to update the mount: + * 1) invalidate all cached meta-data. + * 2) re-read superblock from disk. + * 3) re-read summary information from disk. + * 4) invalidate all inactive vnodes. + * 5) invalidate all cached file data. + * 6) re-read inode data for all active vnodes. + */ +ffs_reload(mountp, cred, p) + register struct mount *mountp; + struct ucred *cred; + struct proc *p; +{ + register struct vnode *vp, *nvp, *devvp; + struct inode *ip; + struct csum *space; + struct buf *bp; + struct fs *fs; + int i, blks, size, error; + + if ((mountp->mnt_flag & MNT_RDONLY) == 0) + return (EINVAL); + /* + * Step 1: invalidate all cached meta-data. + */ + devvp = VFSTOUFS(mountp)->um_devvp; + if (vinvalbuf(devvp, 0, cred, p, 0, 0)) + panic("ffs_reload: dirty1"); + /* + * Step 2: re-read superblock from disk. + */ + if (error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) + return (error); + fs = (struct fs *)bp->b_data; + if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE || + fs->fs_bsize < sizeof(struct fs)) { + brelse(bp); + return (EIO); /* XXX needs translation */ + } + fs = VFSTOUFS(mountp)->um_fs; + bcopy(&fs->fs_csp[0], &((struct fs *)bp->b_data)->fs_csp[0], + sizeof(fs->fs_csp)); + bcopy(bp->b_data, fs, (u_int)fs->fs_sbsize); + if (fs->fs_sbsize < SBSIZE) + bp->b_flags |= B_INVAL; + brelse(bp); + ffs_oldfscompat(fs); + /* + * Step 3: re-read summary information from disk. + */ + blks = howmany(fs->fs_cssize, fs->fs_fsize); + space = fs->fs_csp[0]; + for (i = 0; i < blks; i += fs->fs_frag) { + size = fs->fs_bsize; + if (i + fs->fs_frag > blks) + size = (blks - i) * fs->fs_fsize; + if (error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size, + NOCRED, &bp)) + return (error); + bcopy(bp->b_data, fs->fs_csp[fragstoblks(fs, i)], (u_int)size); + brelse(bp); + } +loop: + for (vp = mountp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { + nvp = vp->v_mntvnodes.le_next; + /* + * Step 4: invalidate all inactive vnodes. + */ + if (vp->v_usecount == 0) { + vgone(vp); + continue; + } + /* + * Step 5: invalidate all cached file data. + */ + if (vget(vp, 1)) + goto loop; + if (vinvalbuf(vp, 0, cred, p, 0, 0)) + panic("ffs_reload: dirty2"); + /* + * Step 6: re-read inode data for all active vnodes. + */ + ip = VTOI(vp); + if (error = + bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), + (int)fs->fs_bsize, NOCRED, &bp)) { + vput(vp); + return (error); + } + ip->i_din = *((struct dinode *)bp->b_data + + ino_to_fsbo(fs, ip->i_number)); + brelse(bp); + vput(vp); + if (vp->v_mount != mountp) + goto loop; + } + return (0); +} + +/* + * Common code for mount and mountroot + */ +int +ffs_mountfs(devvp, mp, p) + register struct vnode *devvp; + struct mount *mp; + struct proc *p; +{ + register struct ufsmount *ump; + struct buf *bp; + register struct fs *fs; + dev_t dev = devvp->v_rdev; + struct partinfo dpart; + caddr_t base, space; + int havepart = 0, blks; + int error, i, size; + int ronly; + extern struct vnode *rootvp; + + /* + * Disallow multiple mounts of the same device. + * Disallow mounting of a device that is currently in use + * (except for root, which might share swap device for miniroot). + * Flush out any old buffers remaining from a previous use. + */ + if (error = vfs_mountedon(devvp)) + return (error); + if (vcount(devvp) > 1 && devvp != rootvp) + return (EBUSY); + if (error = vinvalbuf(devvp, V_SAVE, p->p_ucred, p, 0, 0)) + return (error); + + ronly = (mp->mnt_flag & MNT_RDONLY) != 0; + if (error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p)) + return (error); + if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, NOCRED, p) != 0) + size = DEV_BSIZE; + else { + havepart = 1; + size = dpart.disklab->d_secsize; + } + + bp = NULL; + ump = NULL; + if (error = bread(devvp, SBLOCK, SBSIZE, NOCRED, &bp)) + goto out; + fs = (struct fs *)bp->b_data; + if (fs->fs_magic != FS_MAGIC || fs->fs_bsize > MAXBSIZE || + fs->fs_bsize < sizeof(struct fs)) { + error = EINVAL; /* XXX needs translation */ + goto out; + } + ump = malloc(sizeof *ump, M_UFSMNT, M_WAITOK); + bzero((caddr_t)ump, sizeof *ump); + ump->um_fs = malloc((u_long)fs->fs_sbsize, M_UFSMNT, + M_WAITOK); + bcopy(bp->b_data, ump->um_fs, (u_int)fs->fs_sbsize); + if (fs->fs_sbsize < SBSIZE) + bp->b_flags |= B_INVAL; + brelse(bp); + bp = NULL; + fs = ump->um_fs; + fs->fs_ronly = ronly; + if (ronly == 0) + fs->fs_fmod = 1; + blks = howmany(fs->fs_cssize, fs->fs_fsize); + base = space = malloc((u_long)fs->fs_cssize, M_UFSMNT, + M_WAITOK); + for (i = 0; i < blks; i += fs->fs_frag) { + size = fs->fs_bsize; + if (i + fs->fs_frag > blks) + size = (blks - i) * fs->fs_fsize; + error = bread(devvp, fsbtodb(fs, fs->fs_csaddr + i), size, + NOCRED, &bp); + if (error) { + free(base, M_UFSMNT); + goto out; + } + bcopy(bp->b_data, space, (u_int)size); + fs->fs_csp[fragstoblks(fs, i)] = (struct csum *)space; + space += size; + brelse(bp); + bp = NULL; + } + mp->mnt_data = (qaddr_t)ump; + mp->mnt_stat.f_fsid.val[0] = (long)dev; + mp->mnt_stat.f_fsid.val[1] = MOUNT_UFS; + mp->mnt_maxsymlinklen = fs->fs_maxsymlinklen; + mp->mnt_flag |= MNT_LOCAL; + ump->um_mountp = mp; + ump->um_dev = dev; + ump->um_devvp = devvp; + ump->um_nindir = fs->fs_nindir; + ump->um_bptrtodb = fs->fs_fsbtodb; + ump->um_seqinc = fs->fs_frag; + for (i = 0; i < MAXQUOTAS; i++) + ump->um_quotas[i] = NULLVP; + devvp->v_specflags |= SI_MOUNTEDON; + ffs_oldfscompat(fs); + return (0); +out: + if (bp) + brelse(bp); + (void)VOP_CLOSE(devvp, ronly ? FREAD : FREAD|FWRITE, NOCRED, p); + if (ump) { + free(ump->um_fs, M_UFSMNT); + free(ump, M_UFSMNT); + mp->mnt_data = (qaddr_t)0; + } + return (error); +} + +/* + * Sanity checks for old file systems. + * + * XXX - goes away some day. + */ +ffs_oldfscompat(fs) + struct fs *fs; +{ + int i; + + fs->fs_npsect = max(fs->fs_npsect, fs->fs_nsect); /* XXX */ + fs->fs_interleave = max(fs->fs_interleave, 1); /* XXX */ + if (fs->fs_postblformat == FS_42POSTBLFMT) /* XXX */ + fs->fs_nrpos = 8; /* XXX */ + if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */ + quad_t sizepb = fs->fs_bsize; /* XXX */ + /* XXX */ + fs->fs_maxfilesize = fs->fs_bsize * NDADDR - 1; /* XXX */ + for (i = 0; i < NIADDR; i++) { /* XXX */ + sizepb *= NINDIR(fs); /* XXX */ + fs->fs_maxfilesize += sizepb; /* XXX */ + } /* XXX */ + fs->fs_qbmask = ~fs->fs_bmask; /* XXX */ + fs->fs_qfmask = ~fs->fs_fmask; /* XXX */ + } /* XXX */ + return (0); +} + +/* + * unmount system call + */ +int +ffs_unmount(mp, mntflags, p) + struct mount *mp; + int mntflags; + struct proc *p; +{ + register struct ufsmount *ump; + register struct fs *fs; + int error, flags, ronly; + + flags = 0; + if (mntflags & MNT_FORCE) { + if (mp->mnt_flag & MNT_ROOTFS) + return (EINVAL); + flags |= FORCECLOSE; + } + if (error = ffs_flushfiles(mp, flags, p)) + return (error); + ump = VFSTOUFS(mp); + fs = ump->um_fs; + ronly = !fs->fs_ronly; + ump->um_devvp->v_specflags &= ~SI_MOUNTEDON; + error = VOP_CLOSE(ump->um_devvp, ronly ? FREAD : FREAD|FWRITE, + NOCRED, p); + vrele(ump->um_devvp); + free(fs->fs_csp[0], M_UFSMNT); + free(fs, M_UFSMNT); + free(ump, M_UFSMNT); + mp->mnt_data = (qaddr_t)0; + mp->mnt_flag &= ~MNT_LOCAL; + return (error); +} + +/* + * Flush out all the files in a filesystem. + */ +ffs_flushfiles(mp, flags, p) + register struct mount *mp; + int flags; + struct proc *p; +{ + extern int doforce; + register struct ufsmount *ump; + int i, error; + + if (!doforce) + flags &= ~FORCECLOSE; + ump = VFSTOUFS(mp); +#ifdef QUOTA + if (mp->mnt_flag & MNT_QUOTA) { + if (error = vflush(mp, NULLVP, SKIPSYSTEM|flags)) + return (error); + for (i = 0; i < MAXQUOTAS; i++) { + if (ump->um_quotas[i] == NULLVP) + continue; + quotaoff(p, mp, i); + } + /* + * Here we fall through to vflush again to ensure + * that we have gotten rid of all the system vnodes. + */ + } +#endif + error = vflush(mp, NULLVP, flags); + return (error); +} + +/* + * Get file system statistics. + */ +int +ffs_statfs(mp, sbp, p) + struct mount *mp; + register struct statfs *sbp; + struct proc *p; +{ + register struct ufsmount *ump; + register struct fs *fs; + + ump = VFSTOUFS(mp); + fs = ump->um_fs; + if (fs->fs_magic != FS_MAGIC) + panic("ffs_statfs"); + sbp->f_type = MOUNT_UFS; + sbp->f_bsize = fs->fs_fsize; + sbp->f_iosize = fs->fs_bsize; + sbp->f_blocks = fs->fs_dsize; + sbp->f_bfree = fs->fs_cstotal.cs_nbfree * fs->fs_frag + + fs->fs_cstotal.cs_nffree; + sbp->f_bavail = (fs->fs_dsize * (100 - fs->fs_minfree) / 100) - + (fs->fs_dsize - sbp->f_bfree); + sbp->f_files = fs->fs_ncg * fs->fs_ipg - ROOTINO; + sbp->f_ffree = fs->fs_cstotal.cs_nifree; + if (sbp != &mp->mnt_stat) { + bcopy((caddr_t)mp->mnt_stat.f_mntonname, + (caddr_t)&sbp->f_mntonname[0], MNAMELEN); + bcopy((caddr_t)mp->mnt_stat.f_mntfromname, + (caddr_t)&sbp->f_mntfromname[0], MNAMELEN); + } + return (0); +} + +/* + * Go through the disk queues to initiate sandbagged IO; + * go through the inodes to write those that have been modified; + * initiate the writing of the super block if it has been modified. + * + * Note: we are always called with the filesystem marked `MPBUSY'. + */ +int +ffs_sync(mp, waitfor, cred, p) + struct mount *mp; + int waitfor; + struct ucred *cred; + struct proc *p; +{ + register struct vnode *vp; + register struct inode *ip; + register struct ufsmount *ump = VFSTOUFS(mp); + register struct fs *fs; + int error, allerror = 0; + + fs = ump->um_fs; + /* + * Write back modified superblock. + * Consistency check that the superblock + * is still in the buffer cache. + */ + if (fs->fs_fmod != 0) { + if (fs->fs_ronly != 0) { /* XXX */ + printf("fs = %s\n", fs->fs_fsmnt); + panic("update: rofs mod"); + } + fs->fs_fmod = 0; + fs->fs_time = time.tv_sec; + allerror = ffs_sbupdate(ump, waitfor); + } + /* + * Write back each (modified) inode. + */ +loop: + for (vp = mp->mnt_vnodelist.lh_first; + vp != NULL; + vp = vp->v_mntvnodes.le_next) { + /* + * If the vnode that we are about to sync is no longer + * associated with this mount point, start over. + */ + if (vp->v_mount != mp) + goto loop; + if (VOP_ISLOCKED(vp)) + continue; + ip = VTOI(vp); + if ((ip->i_flag & + (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 && + vp->v_dirtyblkhd.lh_first == NULL) + continue; + if (vget(vp, 1)) + goto loop; + if (error = VOP_FSYNC(vp, cred, waitfor, p)) + allerror = error; + vput(vp); + } + /* + * Force stale file system control information to be flushed. + */ + if (error = VOP_FSYNC(ump->um_devvp, cred, waitfor, p)) + allerror = error; +#ifdef QUOTA + qsync(mp); +#endif + return (allerror); +} + +/* + * Look up a FFS dinode number to find its incore vnode, otherwise read it + * in from disk. If it is in core, wait for the lock bit to clear, then + * return the inode locked. Detection and handling of mount points must be + * done by the calling routine. + */ +int +ffs_vget(mp, ino, vpp) + struct mount *mp; + ino_t ino; + struct vnode **vpp; +{ + register struct fs *fs; + register struct inode *ip; + struct ufsmount *ump; + struct buf *bp; + struct vnode *vp; + dev_t dev; + int i, type, error; + + ump = VFSTOUFS(mp); + dev = ump->um_dev; + if ((*vpp = ufs_ihashget(dev, ino)) != NULL) + return (0); + + /* Allocate a new vnode/inode. */ + if (error = getnewvnode(VT_UFS, mp, ffs_vnodeop_p, &vp)) { + *vpp = NULL; + return (error); + } + type = ump->um_devvp->v_tag == VT_MFS ? M_MFSNODE : M_FFSNODE; /* XXX */ + MALLOC(ip, struct inode *, sizeof(struct inode), type, M_WAITOK); + bzero((caddr_t)ip, sizeof(struct inode)); + vp->v_data = ip; + ip->i_vnode = vp; + ip->i_fs = fs = ump->um_fs; + ip->i_dev = dev; + ip->i_number = ino; +#ifdef QUOTA + for (i = 0; i < MAXQUOTAS; i++) + ip->i_dquot[i] = NODQUOT; +#endif + /* + * Put it onto its hash chain and lock it so that other requests for + * this inode will block if they arrive while we are sleeping waiting + * for old data structures to be purged or for the contents of the + * disk portion of this inode to be read. + */ + ufs_ihashins(ip); + + /* Read in the disk contents for the inode, copy into the inode. */ + if (error = bread(ump->um_devvp, fsbtodb(fs, ino_to_fsba(fs, ino)), + (int)fs->fs_bsize, NOCRED, &bp)) { + /* + * The inode does not contain anything useful, so it would + * be misleading to leave it on its hash chain. With mode + * still zero, it will be unlinked and returned to the free + * list by vput(). + */ + vput(vp); + brelse(bp); + *vpp = NULL; + return (error); + } + ip->i_din = *((struct dinode *)bp->b_data + ino_to_fsbo(fs, ino)); + brelse(bp); + + /* + * Initialize the vnode from the inode, check for aliases. + * Note that the underlying vnode may have changed. + */ + if (error = ufs_vinit(mp, ffs_specop_p, FFS_FIFOOPS, &vp)) { + vput(vp); + *vpp = NULL; + return (error); + } + /* + * Finish inode initialization now that aliasing has been resolved. + */ + ip->i_devvp = ump->um_devvp; + VREF(ip->i_devvp); + /* + * Set up a generation number for this inode if it does not + * already have one. This should only happen on old filesystems. + */ + if (ip->i_gen == 0) { + if (++nextgennumber < (u_long)time.tv_sec) + nextgennumber = time.tv_sec; + ip->i_gen = nextgennumber; + if ((vp->v_mount->mnt_flag & MNT_RDONLY) == 0) + ip->i_flag |= IN_MODIFIED; + } + /* + * Ensure that uid and gid are correct. This is a temporary + * fix until fsck has been changed to do the update. + */ + if (fs->fs_inodefmt < FS_44INODEFMT) { /* XXX */ + ip->i_uid = ip->i_din.di_ouid; /* XXX */ + ip->i_gid = ip->i_din.di_ogid; /* XXX */ + } /* XXX */ + + *vpp = vp; + return (0); +} + +/* + * File handle to vnode + * + * Have to be really careful about stale file handles: + * - check that the inode number is valid + * - call ffs_vget() to get the locked inode + * - check for an unallocated inode (i_mode == 0) + * - check that the given client host has export rights and return + * those rights via. exflagsp and credanonp + */ +int +ffs_fhtovp(mp, fhp, nam, vpp, exflagsp, credanonp) + register struct mount *mp; + struct fid *fhp; + struct mbuf *nam; + struct vnode **vpp; + int *exflagsp; + struct ucred **credanonp; +{ + register struct ufid *ufhp; + struct fs *fs; + + ufhp = (struct ufid *)fhp; + fs = VFSTOUFS(mp)->um_fs; + if (ufhp->ufid_ino < ROOTINO || + ufhp->ufid_ino >= fs->fs_ncg * fs->fs_ipg) + return (ESTALE); + return (ufs_check_export(mp, ufhp, nam, vpp, exflagsp, credanonp)); +} + +/* + * Vnode pointer to File handle + */ +/* ARGSUSED */ +ffs_vptofh(vp, fhp) + struct vnode *vp; + struct fid *fhp; +{ + register struct inode *ip; + register struct ufid *ufhp; + + ip = VTOI(vp); + ufhp = (struct ufid *)fhp; + ufhp->ufid_len = sizeof(struct ufid); + ufhp->ufid_ino = ip->i_number; + ufhp->ufid_gen = ip->i_gen; + return (0); +} + +/* + * Write a superblock and associated information back to disk. + */ +int +ffs_sbupdate(mp, waitfor) + struct ufsmount *mp; + int waitfor; +{ + register struct fs *fs = mp->um_fs; + register struct buf *bp; + int blks; + caddr_t space; + int i, size, error = 0; + + bp = getblk(mp->um_devvp, SBLOCK, (int)fs->fs_sbsize, 0, 0); + bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize); + /* Restore compatibility to old file systems. XXX */ + if (fs->fs_postblformat == FS_42POSTBLFMT) /* XXX */ + ((struct fs *)bp->b_data)->fs_nrpos = -1; /* XXX */ + if (waitfor == MNT_WAIT) + error = bwrite(bp); + else + bawrite(bp); + blks = howmany(fs->fs_cssize, fs->fs_fsize); + space = (caddr_t)fs->fs_csp[0]; + for (i = 0; i < blks; i += fs->fs_frag) { + size = fs->fs_bsize; + if (i + fs->fs_frag > blks) + size = (blks - i) * fs->fs_fsize; + bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i), + size, 0, 0); + bcopy(space, bp->b_data, (u_int)size); + space += size; + if (waitfor == MNT_WAIT) + error = bwrite(bp); + else + bawrite(bp); + } + return (error); +} diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c new file mode 100644 index 0000000..59814f2 --- /dev/null +++ b/sys/ufs/ffs/ffs_vnops.c @@ -0,0 +1,288 @@ +/* + * Copyright (c) 1982, 1986, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)ffs_vnops.c 8.7 (Berkeley) 2/3/94 + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/resourcevar.h> +#include <sys/kernel.h> +#include <sys/file.h> +#include <sys/stat.h> +#include <sys/buf.h> +#include <sys/proc.h> +#include <sys/conf.h> +#include <sys/mount.h> +#include <sys/vnode.h> +#include <sys/malloc.h> + +#include <vm/vm.h> + +#include <miscfs/specfs/specdev.h> +#include <miscfs/fifofs/fifo.h> + +#include <ufs/ufs/lockf.h> +#include <ufs/ufs/quota.h> +#include <ufs/ufs/inode.h> +#include <ufs/ufs/dir.h> +#include <ufs/ufs/ufs_extern.h> + +#include <ufs/ffs/fs.h> +#include <ufs/ffs/ffs_extern.h> + +/* Global vfs data structures for ufs. */ +int (**ffs_vnodeop_p)(); +struct vnodeopv_entry_desc ffs_vnodeop_entries[] = { + { &vop_default_desc, vn_default_error }, + { &vop_lookup_desc, ufs_lookup }, /* lookup */ + { &vop_create_desc, ufs_create }, /* create */ + { &vop_mknod_desc, ufs_mknod }, /* mknod */ + { &vop_open_desc, ufs_open }, /* open */ + { &vop_close_desc, ufs_close }, /* close */ + { &vop_access_desc, ufs_access }, /* access */ + { &vop_getattr_desc, ufs_getattr }, /* getattr */ + { &vop_setattr_desc, ufs_setattr }, /* setattr */ + { &vop_read_desc, ffs_read }, /* read */ + { &vop_write_desc, ffs_write }, /* write */ + { &vop_ioctl_desc, ufs_ioctl }, /* ioctl */ + { &vop_select_desc, ufs_select }, /* select */ + { &vop_mmap_desc, ufs_mmap }, /* mmap */ + { &vop_fsync_desc, ffs_fsync }, /* fsync */ + { &vop_seek_desc, ufs_seek }, /* seek */ + { &vop_remove_desc, ufs_remove }, /* remove */ + { &vop_link_desc, ufs_link }, /* link */ + { &vop_rename_desc, ufs_rename }, /* rename */ + { &vop_mkdir_desc, ufs_mkdir }, /* mkdir */ + { &vop_rmdir_desc, ufs_rmdir }, /* rmdir */ + { &vop_symlink_desc, ufs_symlink }, /* symlink */ + { &vop_readdir_desc, ufs_readdir }, /* readdir */ + { &vop_readlink_desc, ufs_readlink }, /* readlink */ + { &vop_abortop_desc, ufs_abortop }, /* abortop */ + { &vop_inactive_desc, ufs_inactive }, /* inactive */ + { &vop_reclaim_desc, ufs_reclaim }, /* reclaim */ + { &vop_lock_desc, ufs_lock }, /* lock */ + { &vop_unlock_desc, ufs_unlock }, /* unlock */ + { &vop_bmap_desc, ufs_bmap }, /* bmap */ + { &vop_strategy_desc, ufs_strategy }, /* strategy */ + { &vop_print_desc, ufs_print }, /* print */ + { &vop_islocked_desc, ufs_islocked }, /* islocked */ + { &vop_pathconf_desc, ufs_pathconf }, /* pathconf */ + { &vop_advlock_desc, ufs_advlock }, /* advlock */ + { &vop_blkatoff_desc, ffs_blkatoff }, /* blkatoff */ + { &vop_valloc_desc, ffs_valloc }, /* valloc */ + { &vop_reallocblks_desc, ffs_reallocblks }, /* reallocblks */ + { &vop_vfree_desc, ffs_vfree }, /* vfree */ + { &vop_truncate_desc, ffs_truncate }, /* truncate */ + { &vop_update_desc, ffs_update }, /* update */ + { &vop_bwrite_desc, vn_bwrite }, + { (struct vnodeop_desc*)NULL, (int(*)())NULL } +}; +struct vnodeopv_desc ffs_vnodeop_opv_desc = + { &ffs_vnodeop_p, ffs_vnodeop_entries }; + +int (**ffs_specop_p)(); +struct vnodeopv_entry_desc ffs_specop_entries[] = { + { &vop_default_desc, vn_default_error }, + { &vop_lookup_desc, spec_lookup }, /* lookup */ + { &vop_create_desc, spec_create }, /* create */ + { &vop_mknod_desc, spec_mknod }, /* mknod */ + { &vop_open_desc, spec_open }, /* open */ + { &vop_close_desc, ufsspec_close }, /* close */ + { &vop_access_desc, ufs_access }, /* access */ + { &vop_getattr_desc, ufs_getattr }, /* getattr */ + { &vop_setattr_desc, ufs_setattr }, /* setattr */ + { &vop_read_desc, ufsspec_read }, /* read */ + { &vop_write_desc, ufsspec_write }, /* write */ + { &vop_ioctl_desc, spec_ioctl }, /* ioctl */ + { &vop_select_desc, spec_select }, /* select */ + { &vop_mmap_desc, spec_mmap }, /* mmap */ + { &vop_fsync_desc, ffs_fsync }, /* fsync */ + { &vop_seek_desc, spec_seek }, /* seek */ + { &vop_remove_desc, spec_remove }, /* remove */ + { &vop_link_desc, spec_link }, /* link */ + { &vop_rename_desc, spec_rename }, /* rename */ + { &vop_mkdir_desc, spec_mkdir }, /* mkdir */ + { &vop_rmdir_desc, spec_rmdir }, /* rmdir */ + { &vop_symlink_desc, spec_symlink }, /* symlink */ + { &vop_readdir_desc, spec_readdir }, /* readdir */ + { &vop_readlink_desc, spec_readlink }, /* readlink */ + { &vop_abortop_desc, spec_abortop }, /* abortop */ + { &vop_inactive_desc, ufs_inactive }, /* inactive */ + { &vop_reclaim_desc, ufs_reclaim }, /* reclaim */ + { &vop_lock_desc, ufs_lock }, /* lock */ + { &vop_unlock_desc, ufs_unlock }, /* unlock */ + { &vop_bmap_desc, spec_bmap }, /* bmap */ + { &vop_strategy_desc, spec_strategy }, /* strategy */ + { &vop_print_desc, ufs_print }, /* print */ + { &vop_islocked_desc, ufs_islocked }, /* islocked */ + { &vop_pathconf_desc, spec_pathconf }, /* pathconf */ + { &vop_advlock_desc, spec_advlock }, /* advlock */ + { &vop_blkatoff_desc, spec_blkatoff }, /* blkatoff */ + { &vop_valloc_desc, spec_valloc }, /* valloc */ + { &vop_reallocblks_desc, spec_reallocblks }, /* reallocblks */ + { &vop_vfree_desc, ffs_vfree }, /* vfree */ + { &vop_truncate_desc, spec_truncate }, /* truncate */ + { &vop_update_desc, ffs_update }, /* update */ + { &vop_bwrite_desc, vn_bwrite }, + { (struct vnodeop_desc*)NULL, (int(*)())NULL } +}; +struct vnodeopv_desc ffs_specop_opv_desc = + { &ffs_specop_p, ffs_specop_entries }; + +#ifdef FIFO +int (**ffs_fifoop_p)(); +struct vnodeopv_entry_desc ffs_fifoop_entries[] = { + { &vop_default_desc, vn_default_error }, + { &vop_lookup_desc, fifo_lookup }, /* lookup */ + { &vop_create_desc, fifo_create }, /* create */ + { &vop_mknod_desc, fifo_mknod }, /* mknod */ + { &vop_open_desc, fifo_open }, /* open */ + { &vop_close_desc, ufsfifo_close }, /* close */ + { &vop_access_desc, ufs_access }, /* access */ + { &vop_getattr_desc, ufs_getattr }, /* getattr */ + { &vop_setattr_desc, ufs_setattr }, /* setattr */ + { &vop_read_desc, ufsfifo_read }, /* read */ + { &vop_write_desc, ufsfifo_write }, /* write */ + { &vop_ioctl_desc, fifo_ioctl }, /* ioctl */ + { &vop_select_desc, fifo_select }, /* select */ + { &vop_mmap_desc, fifo_mmap }, /* mmap */ + { &vop_fsync_desc, ffs_fsync }, /* fsync */ + { &vop_seek_desc, fifo_seek }, /* seek */ + { &vop_remove_desc, fifo_remove }, /* remove */ + { &vop_link_desc, fifo_link }, /* link */ + { &vop_rename_desc, fifo_rename }, /* rename */ + { &vop_mkdir_desc, fifo_mkdir }, /* mkdir */ + { &vop_rmdir_desc, fifo_rmdir }, /* rmdir */ + { &vop_symlink_desc, fifo_symlink }, /* symlink */ + { &vop_readdir_desc, fifo_readdir }, /* readdir */ + { &vop_readlink_desc, fifo_readlink }, /* readlink */ + { &vop_abortop_desc, fifo_abortop }, /* abortop */ + { &vop_inactive_desc, ufs_inactive }, /* inactive */ + { &vop_reclaim_desc, ufs_reclaim }, /* reclaim */ + { &vop_lock_desc, ufs_lock }, /* lock */ + { &vop_unlock_desc, ufs_unlock }, /* unlock */ + { &vop_bmap_desc, fifo_bmap }, /* bmap */ + { &vop_strategy_desc, fifo_strategy }, /* strategy */ + { &vop_print_desc, ufs_print }, /* print */ + { &vop_islocked_desc, ufs_islocked }, /* islocked */ + { &vop_pathconf_desc, fifo_pathconf }, /* pathconf */ + { &vop_advlock_desc, fifo_advlock }, /* advlock */ + { &vop_blkatoff_desc, fifo_blkatoff }, /* blkatoff */ + { &vop_valloc_desc, fifo_valloc }, /* valloc */ + { &vop_reallocblks_desc, fifo_reallocblks }, /* reallocblks */ + { &vop_vfree_desc, ffs_vfree }, /* vfree */ + { &vop_truncate_desc, fifo_truncate }, /* truncate */ + { &vop_update_desc, ffs_update }, /* update */ + { &vop_bwrite_desc, vn_bwrite }, + { (struct vnodeop_desc*)NULL, (int(*)())NULL } +}; +struct vnodeopv_desc ffs_fifoop_opv_desc = + { &ffs_fifoop_p, ffs_fifoop_entries }; +#endif /* FIFO */ + +#ifdef DEBUG +/* + * Enabling cluster read/write operations. + */ +#include <sys/sysctl.h> +int doclusterread = 1; +struct ctldebug debug11 = { "doclusterread", &doclusterread }; +int doclusterwrite = 1; +struct ctldebug debug12 = { "doclusterwrite", &doclusterwrite }; +#else +/* XXX for ufs_readwrite */ +#define doclusterread 1 +#define doclusterwrite 1 +#endif + +#include <ufs/ufs/ufs_readwrite.c> + +/* + * Synch an open file. + */ +/* ARGSUSED */ +int +ffs_fsync(ap) + struct vop_fsync_args /* { + struct vnode *a_vp; + struct ucred *a_cred; + int a_waitfor; + struct proc *a_p; + } */ *ap; +{ + register struct vnode *vp = ap->a_vp; + register struct buf *bp; + struct timeval tv; + struct buf *nbp; + int s; + + /* + * Flush all dirty buffers associated with a vnode. + */ +loop: + s = splbio(); + for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { + nbp = bp->b_vnbufs.le_next; + if ((bp->b_flags & B_BUSY)) + continue; + if ((bp->b_flags & B_DELWRI) == 0) + panic("ffs_fsync: not dirty"); + bremfree(bp); + bp->b_flags |= B_BUSY; + splx(s); + /* + * Wait for I/O associated with indirect blocks to complete, + * since there is no way to quickly wait for them below. + */ + if (bp->b_vp == vp || ap->a_waitfor == MNT_NOWAIT) + (void) bawrite(bp); + else + (void) bwrite(bp); + goto loop; + } + if (ap->a_waitfor == MNT_WAIT) { + while (vp->v_numoutput) { + vp->v_flag |= VBWAIT; + sleep((caddr_t)&vp->v_numoutput, PRIBIO + 1); + } +#ifdef DIAGNOSTIC + if (vp->v_dirtyblkhd.lh_first) { + vprint("ffs_fsync: dirty", vp); + goto loop; + } +#endif + } + splx(s); + tv = time; + return (VOP_UPDATE(ap->a_vp, &tv, &tv, ap->a_waitfor == MNT_WAIT)); +} diff --git a/sys/ufs/ffs/fs.h b/sys/ufs/ffs/fs.h new file mode 100644 index 0000000..bef052f --- /dev/null +++ b/sys/ufs/ffs/fs.h @@ -0,0 +1,489 @@ +/* + * Copyright (c) 1982, 1986, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)fs.h 8.7 (Berkeley) 4/19/94 + */ + +/* + * Each disk drive contains some number of file systems. + * A file system consists of a number of cylinder groups. + * Each cylinder group has inodes and data. + * + * A file system is described by its super-block, which in turn + * describes the cylinder groups. The super-block is critical + * data and is replicated in each cylinder group to protect against + * catastrophic loss. This is done at `newfs' time and the critical + * super-block data does not change, so the copies need not be + * referenced further unless disaster strikes. + * + * For file system fs, the offsets of the various blocks of interest + * are given in the super block as: + * [fs->fs_sblkno] Super-block + * [fs->fs_cblkno] Cylinder group block + * [fs->fs_iblkno] Inode blocks + * [fs->fs_dblkno] Data blocks + * The beginning of cylinder group cg in fs, is given by + * the ``cgbase(fs, cg)'' macro. + * + * The first boot and super blocks are given in absolute disk addresses. + * The byte-offset forms are preferred, as they don't imply a sector size. + */ +#define BBSIZE 8192 +#define SBSIZE 8192 +#define BBOFF ((off_t)(0)) +#define SBOFF ((off_t)(BBOFF + BBSIZE)) +#define BBLOCK ((daddr_t)(0)) +#define SBLOCK ((daddr_t)(BBLOCK + BBSIZE / DEV_BSIZE)) + +/* + * Addresses stored in inodes are capable of addressing fragments + * of `blocks'. File system blocks of at most size MAXBSIZE can + * be optionally broken into 2, 4, or 8 pieces, each of which is + * addressible; these pieces may be DEV_BSIZE, or some multiple of + * a DEV_BSIZE unit. + * + * Large files consist of exclusively large data blocks. To avoid + * undue wasted disk space, the last data block of a small file may be + * allocated as only as many fragments of a large block as are + * necessary. The file system format retains only a single pointer + * to such a fragment, which is a piece of a single large block that + * has been divided. The size of such a fragment is determinable from + * information in the inode, using the ``blksize(fs, ip, lbn)'' macro. + * + * The file system records space availability at the fragment level; + * to determine block availability, aligned fragments are examined. + */ + +/* + * MINBSIZE is the smallest allowable block size. + * In order to insure that it is possible to create files of size + * 2^32 with only two levels of indirection, MINBSIZE is set to 4096. + * MINBSIZE must be big enough to hold a cylinder group block, + * thus changes to (struct cg) must keep its size within MINBSIZE. + * Note that super blocks are always of size SBSIZE, + * and that both SBSIZE and MAXBSIZE must be >= MINBSIZE. + */ +#define MINBSIZE 4096 + +/* + * The path name on which the file system is mounted is maintained + * in fs_fsmnt. MAXMNTLEN defines the amount of space allocated in + * the super block for this name. + * The limit on the amount of summary information per file system + * is defined by MAXCSBUFS. It is currently parameterized for a + * maximum of two million cylinders. + */ +#define MAXMNTLEN 512 +#define MAXCSBUFS 32 + +/* + * A summary of contiguous blocks of various sizes is maintained + * in each cylinder group. Normally this is set by the initial + * value of fs_maxcontig. To conserve space, a maximum summary size + * is set by FS_MAXCONTIG. + */ +#define FS_MAXCONTIG 16 + +/* + * MINFREE gives the minimum acceptable percentage of file system + * blocks which may be free. If the freelist drops below this level + * only the superuser may continue to allocate blocks. This may + * be set to 0 if no reserve of free blocks is deemed necessary, + * however throughput drops by fifty percent if the file system + * is run at between 95% and 100% full; thus the minimum default + * value of fs_minfree is 5%. However, to get good clustering + * performance, 10% is a better choice. hence we use 10% as our + * default value. With 10% free space, fragmentation is not a + * problem, so we choose to optimize for time. + */ +#define MINFREE 5 +#define DEFAULTOPT FS_OPTTIME + +/* + * Per cylinder group information; summarized in blocks allocated + * from first cylinder group data blocks. These blocks have to be + * read in from fs_csaddr (size fs_cssize) in addition to the + * super block. + * + * N.B. sizeof(struct csum) must be a power of two in order for + * the ``fs_cs'' macro to work (see below). + */ +struct csum { + long cs_ndir; /* number of directories */ + long cs_nbfree; /* number of free blocks */ + long cs_nifree; /* number of free inodes */ + long cs_nffree; /* number of free frags */ +}; + +/* + * Super block for a file system. + */ +struct fs { + struct fs *fs_link; /* linked list of file systems */ + struct fs *fs_rlink; /* used for incore super blocks */ + daddr_t fs_sblkno; /* addr of super-block in filesys */ + daddr_t fs_cblkno; /* offset of cyl-block in filesys */ + daddr_t fs_iblkno; /* offset of inode-blocks in filesys */ + daddr_t fs_dblkno; /* offset of first data after cg */ + long fs_cgoffset; /* cylinder group offset in cylinder */ + long fs_cgmask; /* used to calc mod fs_ntrak */ + time_t fs_time; /* last time written */ + long fs_size; /* number of blocks in fs */ + long fs_dsize; /* number of data blocks in fs */ + long fs_ncg; /* number of cylinder groups */ + long fs_bsize; /* size of basic blocks in fs */ + long fs_fsize; /* size of frag blocks in fs */ + long fs_frag; /* number of frags in a block in fs */ +/* these are configuration parameters */ + long fs_minfree; /* minimum percentage of free blocks */ + long fs_rotdelay; /* num of ms for optimal next block */ + long fs_rps; /* disk revolutions per second */ +/* these fields can be computed from the others */ + long fs_bmask; /* ``blkoff'' calc of blk offsets */ + long fs_fmask; /* ``fragoff'' calc of frag offsets */ + long fs_bshift; /* ``lblkno'' calc of logical blkno */ + long fs_fshift; /* ``numfrags'' calc number of frags */ +/* these are configuration parameters */ + long fs_maxcontig; /* max number of contiguous blks */ + long fs_maxbpg; /* max number of blks per cyl group */ +/* these fields can be computed from the others */ + long fs_fragshift; /* block to frag shift */ + long fs_fsbtodb; /* fsbtodb and dbtofsb shift constant */ + long fs_sbsize; /* actual size of super block */ + long fs_csmask; /* csum block offset */ + long fs_csshift; /* csum block number */ + long fs_nindir; /* value of NINDIR */ + long fs_inopb; /* value of INOPB */ + long fs_nspf; /* value of NSPF */ +/* yet another configuration parameter */ + long fs_optim; /* optimization preference, see below */ +/* these fields are derived from the hardware */ + long fs_npsect; /* # sectors/track including spares */ + long fs_interleave; /* hardware sector interleave */ + long fs_trackskew; /* sector 0 skew, per track */ + long fs_headswitch; /* head switch time, usec */ + long fs_trkseek; /* track-to-track seek, usec */ +/* sizes determined by number of cylinder groups and their sizes */ + daddr_t fs_csaddr; /* blk addr of cyl grp summary area */ + long fs_cssize; /* size of cyl grp summary area */ + long fs_cgsize; /* cylinder group size */ +/* these fields are derived from the hardware */ + long fs_ntrak; /* tracks per cylinder */ + long fs_nsect; /* sectors per track */ + long fs_spc; /* sectors per cylinder */ +/* this comes from the disk driver partitioning */ + long fs_ncyl; /* cylinders in file system */ +/* these fields can be computed from the others */ + long fs_cpg; /* cylinders per group */ + long fs_ipg; /* inodes per group */ + long fs_fpg; /* blocks per group * fs_frag */ +/* this data must be re-computed after crashes */ + struct csum fs_cstotal; /* cylinder summary information */ +/* these fields are cleared at mount time */ + char fs_fmod; /* super block modified flag */ + char fs_clean; /* file system is clean flag */ + char fs_ronly; /* mounted read-only flag */ + char fs_flags; /* currently unused flag */ + char fs_fsmnt[MAXMNTLEN]; /* name mounted on */ +/* these fields retain the current block allocation info */ + long fs_cgrotor; /* last cg searched */ + struct csum *fs_csp[MAXCSBUFS];/* list of fs_cs info buffers */ + long fs_cpc; /* cyl per cycle in postbl */ + short fs_opostbl[16][8]; /* old rotation block list head */ + long fs_sparecon[50]; /* reserved for future constants */ + long fs_contigsumsize; /* size of cluster summary array */ + long fs_maxsymlinklen; /* max length of an internal symlink */ + long fs_inodefmt; /* format of on-disk inodes */ + u_quad_t fs_maxfilesize; /* maximum representable file size */ + quad_t fs_qbmask; /* ~fs_bmask - for use with quad size */ + quad_t fs_qfmask; /* ~fs_fmask - for use with quad size */ + long fs_state; /* validate fs_clean field */ + long fs_postblformat; /* format of positional layout tables */ + long fs_nrpos; /* number of rotational positions */ + long fs_postbloff; /* (short) rotation block list head */ + long fs_rotbloff; /* (u_char) blocks for each rotation */ + long fs_magic; /* magic number */ + u_char fs_space[1]; /* list of blocks for each rotation */ +/* actually longer */ +}; +/* + * Filesystem idetification + */ +#define FS_MAGIC 0x011954 /* the fast filesystem magic number */ +#define FS_OKAY 0x7c269d38 /* superblock checksum */ +#define FS_42INODEFMT -1 /* 4.2BSD inode format */ +#define FS_44INODEFMT 2 /* 4.4BSD inode format */ +/* + * Preference for optimization. + */ +#define FS_OPTTIME 0 /* minimize allocation time */ +#define FS_OPTSPACE 1 /* minimize disk fragmentation */ + +/* + * Rotational layout table format types + */ +#define FS_42POSTBLFMT -1 /* 4.2BSD rotational table format */ +#define FS_DYNAMICPOSTBLFMT 1 /* dynamic rotational table format */ +/* + * Macros for access to superblock array structures + */ +#define fs_postbl(fs, cylno) \ + (((fs)->fs_postblformat == FS_42POSTBLFMT) \ + ? ((fs)->fs_opostbl[cylno]) \ + : ((short *)((char *)(fs) + (fs)->fs_postbloff) + (cylno) * (fs)->fs_nrpos)) +#define fs_rotbl(fs) \ + (((fs)->fs_postblformat == FS_42POSTBLFMT) \ + ? ((fs)->fs_space) \ + : ((u_char *)((char *)(fs) + (fs)->fs_rotbloff))) + +/* + * The size of a cylinder group is calculated by CGSIZE. The maximum size + * is limited by the fact that cylinder groups are at most one block. + * Its size is derived from the size of the maps maintained in the + * cylinder group and the (struct cg) size. + */ +#define CGSIZE(fs) \ + /* base cg */ (sizeof(struct cg) + sizeof(long) + \ + /* blktot size */ (fs)->fs_cpg * sizeof(long) + \ + /* blks size */ (fs)->fs_cpg * (fs)->fs_nrpos * sizeof(short) + \ + /* inode map */ howmany((fs)->fs_ipg, NBBY) + \ + /* block map */ howmany((fs)->fs_cpg * (fs)->fs_spc / NSPF(fs), NBBY) +\ + /* if present */ ((fs)->fs_contigsumsize <= 0 ? 0 : \ + /* cluster sum */ (fs)->fs_contigsumsize * sizeof(long) + \ + /* cluster map */ howmany((fs)->fs_cpg * (fs)->fs_spc / NSPB(fs), NBBY))) + +/* + * Convert cylinder group to base address of its global summary info. + * + * N.B. This macro assumes that sizeof(struct csum) is a power of two. + */ +#define fs_cs(fs, indx) \ + fs_csp[(indx) >> (fs)->fs_csshift][(indx) & ~(fs)->fs_csmask] + +/* + * Cylinder group block for a file system. + */ +#define CG_MAGIC 0x090255 +struct cg { + struct cg *cg_link; /* linked list of cyl groups */ + long cg_magic; /* magic number */ + time_t cg_time; /* time last written */ + long cg_cgx; /* we are the cgx'th cylinder group */ + short cg_ncyl; /* number of cyl's this cg */ + short cg_niblk; /* number of inode blocks this cg */ + long cg_ndblk; /* number of data blocks this cg */ + struct csum cg_cs; /* cylinder summary information */ + long cg_rotor; /* position of last used block */ + long cg_frotor; /* position of last used frag */ + long cg_irotor; /* position of last used inode */ + long cg_frsum[MAXFRAG]; /* counts of available frags */ + long cg_btotoff; /* (long) block totals per cylinder */ + long cg_boff; /* (short) free block positions */ + long cg_iusedoff; /* (char) used inode map */ + long cg_freeoff; /* (u_char) free block map */ + long cg_nextfreeoff; /* (u_char) next available space */ + long cg_clustersumoff; /* (long) counts of avail clusters */ + long cg_clusteroff; /* (char) free cluster map */ + long cg_nclusterblks; /* number of clusters this cg */ + long cg_sparecon[13]; /* reserved for future use */ + u_char cg_space[1]; /* space for cylinder group maps */ +/* actually longer */ +}; +/* + * Macros for access to cylinder group array structures + */ +#define cg_blktot(cgp) \ + (((cgp)->cg_magic != CG_MAGIC) \ + ? (((struct ocg *)(cgp))->cg_btot) \ + : ((long *)((char *)(cgp) + (cgp)->cg_btotoff))) +#define cg_blks(fs, cgp, cylno) \ + (((cgp)->cg_magic != CG_MAGIC) \ + ? (((struct ocg *)(cgp))->cg_b[cylno]) \ + : ((short *)((char *)(cgp) + (cgp)->cg_boff) + (cylno) * (fs)->fs_nrpos)) +#define cg_inosused(cgp) \ + (((cgp)->cg_magic != CG_MAGIC) \ + ? (((struct ocg *)(cgp))->cg_iused) \ + : ((char *)((char *)(cgp) + (cgp)->cg_iusedoff))) +#define cg_blksfree(cgp) \ + (((cgp)->cg_magic != CG_MAGIC) \ + ? (((struct ocg *)(cgp))->cg_free) \ + : ((u_char *)((char *)(cgp) + (cgp)->cg_freeoff))) +#define cg_chkmagic(cgp) \ + ((cgp)->cg_magic == CG_MAGIC || ((struct ocg *)(cgp))->cg_magic == CG_MAGIC) +#define cg_clustersfree(cgp) \ + ((u_char *)((char *)(cgp) + (cgp)->cg_clusteroff)) +#define cg_clustersum(cgp) \ + ((long *)((char *)(cgp) + (cgp)->cg_clustersumoff)) + +/* + * The following structure is defined + * for compatibility with old file systems. + */ +struct ocg { + struct ocg *cg_link; /* linked list of cyl groups */ + struct ocg *cg_rlink; /* used for incore cyl groups */ + time_t cg_time; /* time last written */ + long cg_cgx; /* we are the cgx'th cylinder group */ + short cg_ncyl; /* number of cyl's this cg */ + short cg_niblk; /* number of inode blocks this cg */ + long cg_ndblk; /* number of data blocks this cg */ + struct csum cg_cs; /* cylinder summary information */ + long cg_rotor; /* position of last used block */ + long cg_frotor; /* position of last used frag */ + long cg_irotor; /* position of last used inode */ + long cg_frsum[8]; /* counts of available frags */ + long cg_btot[32]; /* block totals per cylinder */ + short cg_b[32][8]; /* positions of free blocks */ + char cg_iused[256]; /* used inode map */ + long cg_magic; /* magic number */ + u_char cg_free[1]; /* free block map */ +/* actually longer */ +}; + +/* + * Turn file system block numbers into disk block addresses. + * This maps file system blocks to device size blocks. + */ +#define fsbtodb(fs, b) ((b) << (fs)->fs_fsbtodb) +#define dbtofsb(fs, b) ((b) >> (fs)->fs_fsbtodb) + +/* + * Cylinder group macros to locate things in cylinder groups. + * They calc file system addresses of cylinder group data structures. + */ +#define cgbase(fs, c) ((daddr_t)((fs)->fs_fpg * (c))) +#define cgdmin(fs, c) (cgstart(fs, c) + (fs)->fs_dblkno) /* 1st data */ +#define cgimin(fs, c) (cgstart(fs, c) + (fs)->fs_iblkno) /* inode blk */ +#define cgsblock(fs, c) (cgstart(fs, c) + (fs)->fs_sblkno) /* super blk */ +#define cgtod(fs, c) (cgstart(fs, c) + (fs)->fs_cblkno) /* cg block */ +#define cgstart(fs, c) \ + (cgbase(fs, c) + (fs)->fs_cgoffset * ((c) & ~((fs)->fs_cgmask))) + +/* + * Macros for handling inode numbers: + * inode number to file system block offset. + * inode number to cylinder group number. + * inode number to file system block address. + */ +#define ino_to_cg(fs, x) ((x) / (fs)->fs_ipg) +#define ino_to_fsba(fs, x) \ + ((daddr_t)(cgimin(fs, ino_to_cg(fs, x)) + \ + (blkstofrags((fs), (((x) % (fs)->fs_ipg) / INOPB(fs)))))) +#define ino_to_fsbo(fs, x) ((x) % INOPB(fs)) + +/* + * Give cylinder group number for a file system block. + * Give cylinder group block number for a file system block. + */ +#define dtog(fs, d) ((d) / (fs)->fs_fpg) +#define dtogd(fs, d) ((d) % (fs)->fs_fpg) + +/* + * Extract the bits for a block from a map. + * Compute the cylinder and rotational position of a cyl block addr. + */ +#define blkmap(fs, map, loc) \ + (((map)[(loc) / NBBY] >> ((loc) % NBBY)) & (0xff >> (NBBY - (fs)->fs_frag))) +#define cbtocylno(fs, bno) \ + ((bno) * NSPF(fs) / (fs)->fs_spc) +#define cbtorpos(fs, bno) \ + (((bno) * NSPF(fs) % (fs)->fs_spc / (fs)->fs_nsect * (fs)->fs_trackskew + \ + (bno) * NSPF(fs) % (fs)->fs_spc % (fs)->fs_nsect * (fs)->fs_interleave) % \ + (fs)->fs_nsect * (fs)->fs_nrpos / (fs)->fs_npsect) + +/* + * The following macros optimize certain frequently calculated + * quantities by using shifts and masks in place of divisions + * modulos and multiplications. + */ +#define blkoff(fs, loc) /* calculates (loc % fs->fs_bsize) */ \ + ((loc) & (fs)->fs_qbmask) +#define fragoff(fs, loc) /* calculates (loc % fs->fs_fsize) */ \ + ((loc) & (fs)->fs_qfmask) +#define lblktosize(fs, blk) /* calculates (blk * fs->fs_bsize) */ \ + ((blk) << (fs)->fs_bshift) +#define lblkno(fs, loc) /* calculates (loc / fs->fs_bsize) */ \ + ((loc) >> (fs)->fs_bshift) +#define numfrags(fs, loc) /* calculates (loc / fs->fs_fsize) */ \ + ((loc) >> (fs)->fs_fshift) +#define blkroundup(fs, size) /* calculates roundup(size, fs->fs_bsize) */ \ + (((size) + (fs)->fs_qbmask) & (fs)->fs_bmask) +#define fragroundup(fs, size) /* calculates roundup(size, fs->fs_fsize) */ \ + (((size) + (fs)->fs_qfmask) & (fs)->fs_fmask) +#define fragstoblks(fs, frags) /* calculates (frags / fs->fs_frag) */ \ + ((frags) >> (fs)->fs_fragshift) +#define blkstofrags(fs, blks) /* calculates (blks * fs->fs_frag) */ \ + ((blks) << (fs)->fs_fragshift) +#define fragnum(fs, fsb) /* calculates (fsb % fs->fs_frag) */ \ + ((fsb) & ((fs)->fs_frag - 1)) +#define blknum(fs, fsb) /* calculates rounddown(fsb, fs->fs_frag) */ \ + ((fsb) &~ ((fs)->fs_frag - 1)) + +/* + * Determine the number of available frags given a + * percentage to hold in reserve + */ +#define freespace(fs, percentreserved) \ + (blkstofrags((fs), (fs)->fs_cstotal.cs_nbfree) + \ + (fs)->fs_cstotal.cs_nffree - ((fs)->fs_dsize * (percentreserved) / 100)) + +/* + * Determining the size of a file block in the file system. + */ +#define blksize(fs, ip, lbn) \ + (((lbn) >= NDADDR || (ip)->i_size >= ((lbn) + 1) << (fs)->fs_bshift) \ + ? (fs)->fs_bsize \ + : (fragroundup(fs, blkoff(fs, (ip)->i_size)))) +#define dblksize(fs, dip, lbn) \ + (((lbn) >= NDADDR || (dip)->di_size >= ((lbn) + 1) << (fs)->fs_bshift) \ + ? (fs)->fs_bsize \ + : (fragroundup(fs, blkoff(fs, (dip)->di_size)))) + +/* + * Number of disk sectors per block; assumes DEV_BSIZE byte sector size. + */ +#define NSPB(fs) ((fs)->fs_nspf << (fs)->fs_fragshift) +#define NSPF(fs) ((fs)->fs_nspf) + +/* + * INOPB is the number of inodes in a secondary storage block. + */ +#define INOPB(fs) ((fs)->fs_inopb) +#define INOPF(fs) ((fs)->fs_inopb >> (fs)->fs_fragshift) + +/* + * NINDIR is the number of indirects in a file system block. + */ +#define NINDIR(fs) ((fs)->fs_nindir) + +extern int inside[], around[]; +extern u_char *fragtbl[]; |