diff options
Diffstat (limited to 'sys/ufs/ffs/ffs_alloc.c')
-rw-r--r-- | sys/ufs/ffs/ffs_alloc.c | 677 |
1 files changed, 449 insertions, 228 deletions
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c index 2dbb487..dbd757e 100644 --- a/sys/ufs/ffs/ffs_alloc.c +++ b/sys/ufs/ffs/ffs_alloc.c @@ -1,4 +1,16 @@ /* + * Copyright (c) 2002 Networks Associates Technology, Inc. + * All rights reserved. + * + * This software was developed for the FreeBSD Project by Marshall + * Kirk McKusick and Network Associates Laboratories, the Security + * Research Division of Network Associates, Inc. under DARPA/SPAWAR + * contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA CHATS + * research program + * + * Copyright (c) 1982, 1989, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. * Copyright (c) 1982, 1986, 1989, 1993 * The Regents of the University of California. All rights reserved. * @@ -46,6 +58,7 @@ #include <sys/vnode.h> #include <sys/mount.h> #include <sys/kernel.h> +#include <sys/stdint.h> #include <sys/sysctl.h> #include <sys/syslog.h> @@ -58,23 +71,25 @@ #include <ufs/ffs/fs.h> #include <ufs/ffs/ffs_extern.h> -typedef ufs_daddr_t allocfcn_t(struct inode *ip, int cg, ufs_daddr_t bpref, +typedef ufs2_daddr_t allocfcn_t(struct inode *ip, int cg, ufs2_daddr_t bpref, int size); -static ufs_daddr_t ffs_alloccg(struct inode *, int, ufs_daddr_t, int); -static ufs_daddr_t - ffs_alloccgblk(struct inode *, struct buf *, ufs_daddr_t); +static ufs2_daddr_t ffs_alloccg(struct inode *, int, ufs2_daddr_t, int); +static ufs2_daddr_t + ffs_alloccgblk(struct inode *, struct buf *, ufs2_daddr_t); #ifdef DIAGNOSTIC -static int ffs_checkblk(struct inode *, ufs_daddr_t, long); +static int ffs_checkblk(struct inode *, ufs2_daddr_t, long); #endif -static ufs_daddr_t ffs_clusteralloc(struct inode *, int, ufs_daddr_t, int); +static ufs2_daddr_t ffs_clusteralloc(struct inode *, int, ufs2_daddr_t, int); static ino_t ffs_dirpref(struct inode *); -static ufs_daddr_t ffs_fragextend(struct inode *, int, long, int, int); +static ufs2_daddr_t ffs_fragextend(struct inode *, int, ufs2_daddr_t, int, int); static void ffs_fserr(struct fs *, ino_t, char *); -static u_long ffs_hashalloc - (struct inode *, int, long, int, allocfcn_t *); -static ino_t ffs_nodealloccg(struct inode *, int, ufs_daddr_t, int); -static ufs_daddr_t ffs_mapsearch(struct fs *, struct cg *, ufs_daddr_t, int); +static ufs2_daddr_t ffs_hashalloc + (struct inode *, int, ufs2_daddr_t, int, allocfcn_t *); +static ino_t ffs_nodealloccg(struct inode *, int, ufs2_daddr_t, int); +static ufs1_daddr_t ffs_mapsearch(struct fs *, struct cg *, ufs2_daddr_t, int); +static int ffs_reallocblks_ufs1(struct vop_reallocblks_args *); +static int ffs_reallocblks_ufs2(struct vop_reallocblks_args *); /* * Allocate a block in the filesystem. @@ -98,13 +113,13 @@ static ufs_daddr_t ffs_mapsearch(struct fs *, struct cg *, ufs_daddr_t, int); int ffs_alloc(ip, lbn, bpref, size, cred, bnp) struct inode *ip; - ufs_daddr_t lbn, bpref; + ufs2_daddr_t lbn, bpref; int size; struct ucred *cred; - ufs_daddr_t *bnp; + ufs2_daddr_t *bnp; { struct fs *fs; - ufs_daddr_t bno; + ufs2_daddr_t bno; int cg, reclaimed; #ifdef QUOTA int error; @@ -130,7 +145,7 @@ retry: freespace(fs, fs->fs_minfree) - numfrags(fs, size) < 0) goto nospace; #ifdef QUOTA - error = chkdq(ip, (long)btodb(size), cred, 0); + error = chkdq(ip, btodb(size), cred, 0); if (error) return (error); #endif @@ -140,10 +155,9 @@ retry: cg = ino_to_cg(fs, ip->i_number); else cg = dtog(fs, bpref); - bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, size, - ffs_alloccg); + bno = ffs_hashalloc(ip, cg, bpref, size, ffs_alloccg); if (bno > 0) { - ip->i_blocks += btodb(size); + DIP(ip, i_blocks) += btodb(size); ip->i_flag |= IN_CHANGE | IN_UPDATE; *bnp = bno; return (0); @@ -152,7 +166,7 @@ retry: /* * Restore user's disk quota because allocation failed. */ - (void) chkdq(ip, (long)-btodb(size), cred, FORCE); + (void) chkdq(ip, -btodb(size), cred, FORCE); #endif nospace: if (fs->fs_pendingblocks > 0 && reclaimed == 0) { @@ -176,8 +190,8 @@ nospace: int ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp) struct inode *ip; - ufs_daddr_t lbprev; - ufs_daddr_t bpref; + ufs2_daddr_t lbprev; + ufs2_daddr_t bpref; int osize, nsize; struct ucred *cred; struct buf **bpp; @@ -186,7 +200,7 @@ ffs_realloccg(ip, lbprev, bpref, osize, nsize, cred, bpp) struct fs *fs; struct buf *bp; int cg, request, error, reclaimed; - ufs_daddr_t bprev, bno; + ufs2_daddr_t bprev, bno; *bpp = 0; vp = ITOV(ip); @@ -210,9 +224,9 @@ retry: if (suser_cred(cred, PRISON_ROOT) && freespace(fs, fs->fs_minfree) - numfrags(fs, nsize - osize) < 0) goto nospace; - if ((bprev = ip->i_db[lbprev]) == 0) { - printf("dev = %s, bsize = %ld, bprev = %ld, fs = %s\n", - devtoname(ip->i_dev), (long)fs->fs_bsize, (long)bprev, + if ((bprev = DIP(ip, i_db[lbprev])) == 0) { + printf("dev = %s, bsize = %ld, bprev = %lld, fs = %s\n", + devtoname(ip->i_dev), (long)fs->fs_bsize, (intmax_t)bprev, fs->fs_fsmnt); panic("ffs_realloccg: bad bprev"); } @@ -225,14 +239,14 @@ retry: return (error); } - if( bp->b_blkno == bp->b_lblkno) { - if( lbprev >= NDADDR) + if (bp->b_blkno == bp->b_lblkno) { + if (lbprev >= NDADDR) panic("ffs_realloccg: lbprev out of range"); bp->b_blkno = fsbtodb(fs, bprev); } #ifdef QUOTA - error = chkdq(ip, (long)btodb(nsize - osize), cred, 0); + error = chkdq(ip, btodb(nsize - osize), cred, 0); if (error) { brelse(bp); return (error); @@ -242,11 +256,11 @@ retry: * Check for extension in the existing location. */ cg = dtog(fs, bprev); - bno = ffs_fragextend(ip, cg, (long)bprev, osize, nsize); + bno = ffs_fragextend(ip, cg, bprev, osize, nsize); if (bno) { if (bp->b_blkno != fsbtodb(fs, bno)) panic("ffs_realloccg: bad blockno"); - ip->i_blocks += btodb(nsize - osize); + DIP(ip, i_blocks) += btodb(nsize - osize); ip->i_flag |= IN_CHANGE | IN_UPDATE; allocbuf(bp, nsize); bp->b_flags |= B_DONE; @@ -302,8 +316,7 @@ retry: panic("ffs_realloccg: bad optim"); /* NOTREACHED */ } - bno = (ufs_daddr_t)ffs_hashalloc(ip, cg, (long)bpref, request, - ffs_alloccg); + bno = ffs_hashalloc(ip, cg, bpref, request, ffs_alloccg); if (bno > 0) { bp->b_blkno = fsbtodb(fs, bno); if (!DOINGSOFTDEP(vp)) @@ -312,7 +325,7 @@ retry: if (nsize < request) ffs_blkfree(fs, ip->i_devvp, bno + numfrags(fs, nsize), (long)(request - nsize), ip->i_number); - ip->i_blocks += btodb(nsize - osize); + DIP(ip, i_blocks) += btodb(nsize - osize); ip->i_flag |= IN_CHANGE | IN_UPDATE; allocbuf(bp, nsize); bp->b_flags |= B_DONE; @@ -324,7 +337,7 @@ retry: /* * Restore user's disk quota because allocation failed. */ - (void) chkdq(ip, (long)-btodb(nsize - osize), cred, FORCE); + (void) chkdq(ip, -btodb(nsize - osize), cred, FORCE); #endif brelse(bp); nospace: @@ -346,14 +359,14 @@ nospace: * * The vnode and an array of buffer pointers for a range of sequential * logical blocks to be made contiguous is given. The allocator attempts - * to find a range of sequential blocks starting as close as possible to - * an fs_rotdelay offset from the end of the allocation for the logical - * block immediately preceding the current range. If successful, the - * physical block numbers in the buffer pointers and in the inode are - * changed to reflect the new allocation. If unsuccessful, the allocation - * is left unchanged. The success in doing the reallocation is returned. - * Note that the error return is not reflected back to the user. Rather - * the previous block allocation will be used. + * to find a range of sequential blocks starting as close as possible + * from the end of the allocation for the logical block immediately + * preceding the current range. If successful, the physical block numbers + * in the buffer pointers and in the inode are changed to reflect the new + * allocation. If unsuccessful, the allocation is left unchanged. The + * success in doing the reallocation is returned. Note that the error + * return is not reflected back to the user. Rather the previous block + * allocation will be used. */ SYSCTL_NODE(_vfs, OID_AUTO, ffs, CTLFLAG_RW, 0, "FFS filesystem"); @@ -375,18 +388,33 @@ ffs_reallocblks(ap) struct cluster_save *a_buflist; } */ *ap; { + + if (doreallocblks == 0) + return (ENOSPC); + if (VTOI(ap->a_vp)->i_ump->um_fstype == UFS1) + return (ffs_reallocblks_ufs1(ap)); + return (ffs_reallocblks_ufs2(ap)); +} + +static int +ffs_reallocblks_ufs1(ap) + struct vop_reallocblks_args /* { + struct vnode *a_vp; + struct cluster_save *a_buflist; + } */ *ap; +{ struct fs *fs; struct inode *ip; struct vnode *vp; struct buf *sbp, *ebp; - ufs_daddr_t *bap, *sbap, *ebap = 0; + ufs1_daddr_t *bap, *sbap, *ebap = 0; struct cluster_save *buflist; - ufs_daddr_t start_lbn, end_lbn, soff, newblk, blkno; + ufs_lbn_t start_lbn, end_lbn; + ufs1_daddr_t soff, newblk, blkno; + ufs2_daddr_t pref; struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; - int i, len, start_lvl, end_lvl, pref, ssize; + int i, len, start_lvl, end_lvl, ssize; - if (doreallocblks == 0) - return (ENOSPC); vp = ap->a_vp; ip = VTOI(vp); fs = ip->i_fs; @@ -425,7 +453,7 @@ ffs_reallocblks(ap) * Get the starting offset and block map for the first block. */ if (start_lvl == 0) { - sbap = &ip->i_db[0]; + sbap = &ip->i_din1->di_db[0]; soff = start_lbn; } else { idp = &start_ap[start_lvl - 1]; @@ -433,13 +461,13 @@ ffs_reallocblks(ap) brelse(sbp); return (ENOSPC); } - sbap = (ufs_daddr_t *)sbp->b_data; + sbap = (ufs1_daddr_t *)sbp->b_data; soff = idp->in_off; } /* * Find the preferred location for the cluster. */ - pref = ffs_blkpref(ip, start_lbn, soff, sbap); + pref = ffs_blkpref_ufs1(ip, start_lbn, soff, sbap); /* * If the block range spans two block maps, get the second map. */ @@ -453,12 +481,12 @@ ffs_reallocblks(ap) ssize = len - (idp->in_off + 1); if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp)) goto fail; - ebap = (ufs_daddr_t *)ebp->b_data; + ebap = (ufs1_daddr_t *)ebp->b_data; } /* * Search the block map looking for an allocation of the desired size. */ - if ((newblk = (ufs_daddr_t)ffs_hashalloc(ip, dtog(fs, pref), (long)pref, + if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref, len, ffs_clusteralloc)) == 0) goto fail; /* @@ -470,8 +498,8 @@ ffs_reallocblks(ap) */ #ifdef DEBUG if (prtrealloc) - printf("realloc: ino %d, lbns %d-%d\n\told:", ip->i_number, - start_lbn, end_lbn); + printf("realloc: ino %d, lbns %lld-%lld\n\told:", ip->i_number, + (intmax_t)start_lbn, (intmax_t)end_lbn); #endif blkno = newblk; for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) { @@ -491,7 +519,7 @@ ffs_reallocblks(ap) printf(" %d,", *bap); #endif if (DOINGSOFTDEP(vp)) { - if (sbap == &ip->i_db[0] && i < ssize) + if (sbap == &ip->i_din1->di_db[0] && i < ssize) softdep_setup_allocdirect(ip, start_lbn + i, blkno, *bap, fs->fs_bsize, fs->fs_bsize, buflist->bs_children[i]); @@ -516,7 +544,7 @@ ffs_reallocblks(ap) * We can then check below to see if it is set, and do the * synchronous write only when it has been cleared. */ - if (sbap != &ip->i_db[0]) { + if (sbap != &ip->i_din1->di_db[0]) { if (doasyncfree) bdwrite(sbp); else @@ -566,7 +594,209 @@ ffs_reallocblks(ap) fail: if (ssize < len) brelse(ebp); - if (sbap != &ip->i_db[0]) + if (sbap != &ip->i_din1->di_db[0]) + brelse(sbp); + return (ENOSPC); +} + +static int +ffs_reallocblks_ufs2(ap) + struct vop_reallocblks_args /* { + struct vnode *a_vp; + struct cluster_save *a_buflist; + } */ *ap; +{ + struct fs *fs; + struct inode *ip; + struct vnode *vp; + struct buf *sbp, *ebp; + ufs2_daddr_t *bap, *sbap, *ebap = 0; + struct cluster_save *buflist; + ufs_lbn_t start_lbn, end_lbn; + ufs2_daddr_t soff, newblk, blkno, pref; + struct indir start_ap[NIADDR + 1], end_ap[NIADDR + 1], *idp; + int i, len, start_lvl, end_lvl, ssize; + + vp = ap->a_vp; + ip = VTOI(vp); + fs = ip->i_fs; + if (fs->fs_contigsumsize <= 0) + return (ENOSPC); + buflist = ap->a_buflist; + len = buflist->bs_nchildren; + start_lbn = buflist->bs_children[0]->b_lblkno; + end_lbn = start_lbn + len - 1; +#ifdef DIAGNOSTIC + for (i = 0; i < len; i++) + if (!ffs_checkblk(ip, + dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) + panic("ffs_reallocblks: unallocated block 1"); + for (i = 1; i < len; i++) + if (buflist->bs_children[i]->b_lblkno != start_lbn + i) + panic("ffs_reallocblks: non-logical cluster"); + blkno = buflist->bs_children[0]->b_blkno; + ssize = fsbtodb(fs, fs->fs_frag); + for (i = 1; i < len - 1; i++) + if (buflist->bs_children[i]->b_blkno != blkno + (i * ssize)) + panic("ffs_reallocblks: non-physical cluster %d", i); +#endif + /* + * If the latest allocation is in a new cylinder group, assume that + * the filesystem has decided to move and do not force it back to + * the previous cylinder group. + */ + if (dtog(fs, dbtofsb(fs, buflist->bs_children[0]->b_blkno)) != + dtog(fs, dbtofsb(fs, buflist->bs_children[len - 1]->b_blkno))) + return (ENOSPC); + if (ufs_getlbns(vp, start_lbn, start_ap, &start_lvl) || + ufs_getlbns(vp, end_lbn, end_ap, &end_lvl)) + return (ENOSPC); + /* + * Get the starting offset and block map for the first block. + */ + if (start_lvl == 0) { + sbap = &ip->i_din2->di_db[0]; + soff = start_lbn; + } else { + idp = &start_ap[start_lvl - 1]; + if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &sbp)) { + brelse(sbp); + return (ENOSPC); + } + sbap = (ufs2_daddr_t *)sbp->b_data; + soff = idp->in_off; + } + /* + * Find the preferred location for the cluster. + */ + pref = ffs_blkpref_ufs2(ip, start_lbn, soff, sbap); + /* + * If the block range spans two block maps, get the second map. + */ + if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { + ssize = len; + } else { +#ifdef DIAGNOSTIC + if (start_ap[start_lvl-1].in_lbn == idp->in_lbn) + panic("ffs_reallocblk: start == end"); +#endif + ssize = len - (idp->in_off + 1); + if (bread(vp, idp->in_lbn, (int)fs->fs_bsize, NOCRED, &ebp)) + goto fail; + ebap = (ufs2_daddr_t *)ebp->b_data; + } + /* + * Search the block map looking for an allocation of the desired size. + */ + if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref, + len, ffs_clusteralloc)) == 0) + goto fail; + /* + * We have found a new contiguous block. + * + * First we have to replace the old block pointers with the new + * block pointers in the inode and indirect blocks associated + * with the file. + */ +#ifdef DEBUG + if (prtrealloc) + printf("realloc: ino %d, lbns %lld-%lld\n\told:", ip->i_number, + (intmax_t)start_lbn, (intmax_t)end_lbn); +#endif + blkno = newblk; + for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->fs_frag) { + if (i == ssize) { + bap = ebap; + soff = -i; + } +#ifdef DIAGNOSTIC + if (!ffs_checkblk(ip, + dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) + panic("ffs_reallocblks: unallocated block 2"); + if (dbtofsb(fs, buflist->bs_children[i]->b_blkno) != *bap) + panic("ffs_reallocblks: alloc mismatch"); +#endif +#ifdef DEBUG + if (prtrealloc) + printf(" %lld,", (intmax_t)*bap); +#endif + if (DOINGSOFTDEP(vp)) { + if (sbap == &ip->i_din2->di_db[0] && i < ssize) + softdep_setup_allocdirect(ip, start_lbn + i, + blkno, *bap, fs->fs_bsize, fs->fs_bsize, + buflist->bs_children[i]); + else + softdep_setup_allocindir_page(ip, start_lbn + i, + i < ssize ? sbp : ebp, soff + i, blkno, + *bap, buflist->bs_children[i]); + } + *bap++ = blkno; + } + /* + * Next we must write out the modified inode and indirect blocks. + * For strict correctness, the writes should be synchronous since + * the old block values may have been written to disk. In practise + * they are almost never written, but if we are concerned about + * strict correctness, the `doasyncfree' flag should be set to zero. + * + * The test on `doasyncfree' should be changed to test a flag + * that shows whether the associated buffers and inodes have + * been written. The flag should be set when the cluster is + * started and cleared whenever the buffer or inode is flushed. + * We can then check below to see if it is set, and do the + * synchronous write only when it has been cleared. + */ + if (sbap != &ip->i_din2->di_db[0]) { + if (doasyncfree) + bdwrite(sbp); + else + bwrite(sbp); + } else { + ip->i_flag |= IN_CHANGE | IN_UPDATE; + if (!doasyncfree) + UFS_UPDATE(vp, 1); + } + if (ssize < len) { + if (doasyncfree) + bdwrite(ebp); + else + bwrite(ebp); + } + /* + * Last, free the old blocks and assign the new blocks to the buffers. + */ +#ifdef DEBUG + if (prtrealloc) + printf("\n\tnew:"); +#endif + for (blkno = newblk, i = 0; i < len; i++, blkno += fs->fs_frag) { + if (!DOINGSOFTDEP(vp)) + ffs_blkfree(fs, ip->i_devvp, + dbtofsb(fs, buflist->bs_children[i]->b_blkno), + fs->fs_bsize, ip->i_number); + buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); +#ifdef DIAGNOSTIC + if (!ffs_checkblk(ip, + dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->fs_bsize)) + panic("ffs_reallocblks: unallocated block 3"); +#endif +#ifdef DEBUG + if (prtrealloc) + printf(" %d,", blkno); +#endif + } +#ifdef DEBUG + if (prtrealloc) { + prtrealloc--; + printf("\n"); + } +#endif + return (0); + +fail: + if (ssize < len) + brelse(ebp); + if (sbap != &ip->i_din2->di_db[0]) brelse(sbp); return (ENOSPC); } @@ -596,6 +826,7 @@ ffs_valloc(pvp, mode, cred, vpp) struct inode *pip; struct fs *fs; struct inode *ip; + struct timespec ts; ino_t ino, ipref; int cg, error; @@ -623,7 +854,7 @@ ffs_valloc(pvp, mode, cred, vpp) if (fs->fs_contigdirs[cg] > 0) fs->fs_contigdirs[cg]--; } - ino = (ino_t)ffs_hashalloc(pip, cg, (long)ipref, mode, + ino = (ino_t)ffs_hashalloc(pip, cg, ipref, mode, (allocfcn_t *)ffs_nodealloccg); if (ino == 0) goto noinodes; @@ -638,17 +869,24 @@ ffs_valloc(pvp, mode, cred, vpp) ip->i_mode, (u_long)ip->i_number, fs->fs_fsmnt); panic("ffs_valloc: dup alloc"); } - if (ip->i_blocks && (fs->fs_flags & FS_UNCLEAN) == 0) { /* XXX */ + if (DIP(ip, i_blocks) && (fs->fs_flags & FS_UNCLEAN) == 0) { /* XXX */ printf("free inode %s/%lu had %ld blocks\n", - fs->fs_fsmnt, (u_long)ino, (long)ip->i_blocks); - ip->i_blocks = 0; + fs->fs_fsmnt, (u_long)ino, (long)DIP(ip, i_blocks)); + DIP(ip, i_blocks) = 0; } ip->i_flags = 0; + DIP(ip, i_flags) = 0; /* * Set up a new generation number for this inode. */ if (ip->i_gen == 0 || ++ip->i_gen == 0) ip->i_gen = random() / 2 + 1; + DIP(ip, i_gen) = ip->i_gen; + if (fs->fs_magic == FS_UFS2_MAGIC) { + vfs_timestamp(&ts); + ip->i_din2->di_createtime = ts.tv_sec; + ip->i_din2->di_creatensec = ts.tv_nsec; + } return (0); noinodes: ffs_fserr(fs, pip->i_number, "out of inodes"); @@ -785,23 +1023,20 @@ ffs_dirpref(pip) * allocated. * * If a section is already partially allocated, the policy is to - * contiguously allocate fs_maxcontig blocks. The end of one of these - * contiguous blocks and the beginning of the next is physically separated - * so that the disk head will be in transit between them for at least - * fs_rotdelay milliseconds. This is to allow time for the processor to - * schedule another I/O transfer. + * contiguously allocate fs_maxcontig blocks. The end of one of these + * contiguous blocks and the beginning of the next is laid out + * contiguously if possible. */ -ufs_daddr_t -ffs_blkpref(ip, lbn, indx, bap) +ufs2_daddr_t +ffs_blkpref_ufs1(ip, lbn, indx, bap) struct inode *ip; - ufs_daddr_t lbn; + ufs_lbn_t lbn; int indx; - ufs_daddr_t *bap; + ufs1_daddr_t *bap; { struct fs *fs; int cg; int avgbfree, startcg; - ufs_daddr_t nextblk; fs = ip->i_fs; if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { @@ -833,25 +1068,58 @@ ffs_blkpref(ip, lbn, indx, bap) return (0); } /* - * One or more previous blocks have been laid out. If less - * than fs_maxcontig previous blocks are contiguous, the - * next block is requested contiguously, otherwise it is - * requested rotationally delayed by fs_rotdelay milliseconds. + * We just always try to lay things out contiguously. */ - nextblk = bap[indx - 1] + fs->fs_frag; - if (fs->fs_rotdelay == 0 || indx < fs->fs_maxcontig || - bap[indx - fs->fs_maxcontig] + - blkstofrags(fs, fs->fs_maxcontig) != nextblk) - return (nextblk); + return (bap[indx - 1] + fs->fs_frag); +} + +/* + * Same as above, but for UFS2 + */ +ufs2_daddr_t +ffs_blkpref_ufs2(ip, lbn, indx, bap) + struct inode *ip; + ufs_lbn_t lbn; + int indx; + ufs2_daddr_t *bap; +{ + struct fs *fs; + int cg; + int avgbfree, startcg; + + fs = ip->i_fs; + if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { + if (lbn < NDADDR + NINDIR(fs)) { + cg = ino_to_cg(fs, ip->i_number); + return (fs->fs_fpg * cg + fs->fs_frag); + } + /* + * Find a cylinder with greater than average number of + * unused data blocks. + */ + if (indx == 0 || bap[indx - 1] == 0) + startcg = + ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; + else + startcg = dtog(fs, bap[indx - 1]) + 1; + startcg %= fs->fs_ncg; + avgbfree = fs->fs_cstotal.cs_nbfree / fs->fs_ncg; + for (cg = startcg; cg < fs->fs_ncg; cg++) + if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { + fs->fs_cgrotor = cg; + return (fs->fs_fpg * cg + fs->fs_frag); + } + for (cg = 0; cg <= startcg; cg++) + if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { + fs->fs_cgrotor = cg; + return (fs->fs_fpg * cg + fs->fs_frag); + } + return (0); + } /* - * Here we convert ms of delay to frags as: - * (frags) = (ms) * (rev/sec) * (sect/rev) / - * ((sect/frag) * (ms/sec)) - * then round up to the next block. + * We just always try to lay things out contiguously. */ - nextblk += roundup(fs->fs_rotdelay * fs->fs_rps * fs->fs_nsect / - (NSPF(fs) * 1000), fs->fs_frag); - return (nextblk); + return (bap[indx - 1] + fs->fs_frag); } /* @@ -863,16 +1131,16 @@ ffs_blkpref(ip, lbn, indx, bap) * 3) brute force search for a free block. */ /*VARARGS5*/ -static u_long +static ufs2_daddr_t ffs_hashalloc(ip, cg, pref, size, allocator) struct inode *ip; int cg; - long pref; + ufs2_daddr_t pref; int size; /* size for data blocks, mode for inodes */ allocfcn_t *allocator; { struct fs *fs; - long result; /* XXX why not same type as we return? */ + ufs2_daddr_t result; int i, icg = cg; #ifdef DIAGNOSTIC @@ -920,11 +1188,11 @@ ffs_hashalloc(ip, cg, pref, size, allocator) * Check to see if the necessary fragments are available, and * if they are, allocate them. */ -static ufs_daddr_t +static ufs2_daddr_t ffs_fragextend(ip, cg, bprev, osize, nsize) struct inode *ip; int cg; - long bprev; + ufs2_daddr_t bprev; int osize, nsize; { struct fs *fs; @@ -956,7 +1224,7 @@ ffs_fragextend(ip, cg, bprev, osize, nsize) return (0); } bp->b_xflags |= BX_BKGRDWRITE; - cgp->cg_time = time_second; + cgp->cg_old_time = cgp->cg_time = time_second; bno = dtogd(fs, bprev); blksfree = cg_blksfree(cgp); for (i = numfrags(fs, osize); i < frags; i++) @@ -997,19 +1265,19 @@ ffs_fragextend(ip, cg, bprev, osize, nsize) * Check to see if a block of the appropriate size is available, * and if it is, allocate it. */ -static ufs_daddr_t +static ufs2_daddr_t ffs_alloccg(ip, cg, bpref, size) struct inode *ip; int cg; - ufs_daddr_t bpref; + ufs2_daddr_t bpref; int size; { struct fs *fs; struct cg *cgp; struct buf *bp; - int i; - ufs_daddr_t bno, blkno; - int allocsiz, error, frags; + ufs1_daddr_t bno; + ufs2_daddr_t blkno; + int i, allocsiz, error, frags; u_int8_t *blksfree; fs = ip->i_fs; @@ -1028,13 +1296,13 @@ ffs_alloccg(ip, cg, bpref, size) return (0); } bp->b_xflags |= BX_BKGRDWRITE; - cgp->cg_time = time_second; + cgp->cg_old_time = cgp->cg_time = time_second; if (size == fs->fs_bsize) { - bno = ffs_alloccgblk(ip, bp, bpref); + blkno = ffs_alloccgblk(ip, bp, bpref); if (fs->fs_active != 0) atomic_clear_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg)); bdwrite(bp); - return (bno); + return (blkno); } /* * check to see if any fragments are already available @@ -1055,10 +1323,10 @@ ffs_alloccg(ip, cg, bpref, size) brelse(bp); return (0); } - bno = ffs_alloccgblk(ip, bp, bpref); - bpref = dtogd(fs, bno); + blkno = ffs_alloccgblk(ip, bp, bpref); + bno = dtogd(fs, blkno); for (i = frags; i < fs->fs_frag; i++) - setbit(blksfree, bpref + i); + setbit(blksfree, bno + i); i = fs->fs_frag - frags; cgp->cg_cs.cs_nffree += i; fs->fs_cstotal.cs_nffree += i; @@ -1068,7 +1336,7 @@ ffs_alloccg(ip, cg, bpref, size) if (fs->fs_active != 0) atomic_clear_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg)); bdwrite(bp); - return (bno); + return (blkno); } bno = ffs_mapsearch(fs, cgp, bpref, allocsiz); if (bno < 0) { @@ -1090,7 +1358,7 @@ ffs_alloccg(ip, cg, bpref, size) if (fs->fs_active != 0) atomic_clear_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg)); bdwrite(bp); - return ((u_long)blkno); + return (blkno); } /* @@ -1104,18 +1372,16 @@ ffs_alloccg(ip, cg, bpref, size) * Note that this routine only allocates fs_bsize blocks; these * blocks may be fragmented by the routine that allocates them. */ -static ufs_daddr_t +static ufs2_daddr_t ffs_alloccgblk(ip, bp, bpref) struct inode *ip; struct buf *bp; - ufs_daddr_t bpref; + ufs2_daddr_t bpref; { struct fs *fs; struct cg *cgp; - ufs_daddr_t bno, blkno; - int cylno, pos, delta; - short *cylbp; - int i; + ufs1_daddr_t bno; + ufs2_daddr_t blkno; u_int8_t *blksfree; fs = ip->i_fs; @@ -1123,78 +1389,17 @@ ffs_alloccgblk(ip, bp, bpref) blksfree = cg_blksfree(cgp); if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) { bpref = cgp->cg_rotor; - goto norot; - } - bpref = blknum(fs, bpref); - bpref = dtogd(fs, bpref); - /* - * if the requested block is available, use it - */ - if (ffs_isblock(fs, blksfree, fragstoblks(fs, bpref))) { - bno = bpref; - goto gotit; - } - if (fs->fs_nrpos <= 1 || fs->fs_cpc == 0) { - /* - * Block layout information is not available. - * Leaving bpref unchanged means we take the - * next available free block following the one - * we just allocated. Hopefully this will at - * least hit a track cache on drives of unknown - * geometry (e.g. SCSI). - */ - goto norot; - } - /* - * check for a block available on the same cylinder - */ - cylno = cbtocylno(fs, bpref); - if (cg_blktot(cgp)[cylno] == 0) - goto norot; - /* - * check the summary information to see if a block is - * available in the requested cylinder starting at the - * requested rotational position and proceeding around. - */ - cylbp = cg_blks(fs, cgp, cylno); - pos = cbtorpos(fs, bpref); - for (i = pos; i < fs->fs_nrpos; i++) - if (cylbp[i] > 0) - break; - if (i == fs->fs_nrpos) - for (i = 0; i < pos; i++) - if (cylbp[i] > 0) - break; - if (cylbp[i] > 0) { + } else { + bpref = blknum(fs, bpref); + bno = dtogd(fs, bpref); /* - * found a rotational position, now find the actual - * block. A panic if none is actually there. + * if the requested block is available, use it */ - pos = cylno % fs->fs_cpc; - bno = (cylno - pos) * fs->fs_spc / NSPB(fs); - if (fs_postbl(fs, pos)[i] == -1) { - printf("pos = %d, i = %d, fs = %s\n", - pos, i, fs->fs_fsmnt); - panic("ffs_alloccgblk: cyl groups corrupted"); - } - for (i = fs_postbl(fs, pos)[i];; ) { - if (ffs_isblock(fs, blksfree, bno + i)) { - bno = blkstofrags(fs, (bno + i)); - goto gotit; - } - delta = fs_rotbl(fs)[i]; - if (delta <= 0 || - delta + i > fragstoblks(fs, fs->fs_fpg)) - break; - i += delta; - } - printf("pos = %d, i = %d, fs = %s\n", pos, i, fs->fs_fsmnt); - panic("ffs_alloccgblk: can't find blk in cyl"); + if (ffs_isblock(fs, blksfree, fragstoblks(fs, bno))) + goto gotit; } -norot: /* - * no blocks in the requested cylinder, so take next - * available one in this cylinder group. + * Take the next available block in this cylinder group. */ bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag); if (bno < 0) @@ -1207,9 +1412,6 @@ gotit: cgp->cg_cs.cs_nbfree--; fs->fs_cstotal.cs_nbfree--; fs->fs_cs(fs, cgp->cg_cgx).cs_nbfree--; - cylno = cbtocylno(fs, bno); - cg_blks(fs, cgp, cylno)[cbtorpos(fs, bno)]--; - cg_blktot(cgp)[cylno]--; fs->fs_fmod = 1; blkno = cgp->cg_cgx * fs->fs_fpg + bno; if (DOINGSOFTDEP(ITOV(ip))) @@ -1224,17 +1426,18 @@ gotit: * are multiple choices in the same cylinder group. Instead we just * take the first one that we find following bpref. */ -static ufs_daddr_t +static ufs2_daddr_t ffs_clusteralloc(ip, cg, bpref, len) struct inode *ip; int cg; - ufs_daddr_t bpref; + ufs2_daddr_t bpref; int len; { struct fs *fs; struct cg *cgp; struct buf *bp; - int i, got, run, bno, bit, map; + int i, run, bit, map, got; + ufs2_daddr_t bno; u_char *mapp; int32_t *lp; u_int8_t *blksfree; @@ -1319,7 +1522,7 @@ ffs_clusteralloc(ip, cg, bpref, len) panic("ffs_clusteralloc: allocated out of group"); len = blkstofrags(fs, len); for (i = 0; i < len; i += fs->fs_frag) - if ((got = ffs_alloccgblk(ip, bp, bno + i)) != bno + i) + if (ffs_alloccgblk(ip, bp, bno + i) != bno + i) panic("ffs_clusteralloc: lost block"); if (fs->fs_active != 0) atomic_clear_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg)); @@ -1344,13 +1547,14 @@ static ino_t ffs_nodealloccg(ip, cg, ipref, mode) struct inode *ip; int cg; - ufs_daddr_t ipref; + ufs2_daddr_t ipref; int mode; { struct fs *fs; struct cg *cgp; - struct buf *bp; + struct buf *bp, *ibp; u_int8_t *inosused; + struct ufs2_dinode *dp2; int error, start, len, loc, map, i; fs = ip->i_fs; @@ -1368,7 +1572,7 @@ ffs_nodealloccg(ip, cg, ipref, mode) return (0); } bp->b_xflags |= BX_BKGRDWRITE; - cgp->cg_time = time_second; + cgp->cg_old_time = cgp->cg_time = time_second; inosused = cg_inosused(cgp); if (ipref) { ipref %= fs->fs_ipg; @@ -1414,6 +1618,24 @@ gotit: fs->fs_cstotal.cs_ndir++; fs->fs_cs(fs, cg).cs_ndir++; } + /* + * Check to see if we need to initialize more inodes. + */ + if (fs->fs_magic == FS_UFS2_MAGIC && + ipref + INOPB(fs) > cgp->cg_initediblk && + cgp->cg_initediblk < cgp->cg_niblk) { + ibp = getblk(ip->i_devvp, fsbtodb(fs, + ino_to_fsba(fs, cg * fs->fs_ipg + cgp->cg_initediblk)), + (int)fs->fs_bsize, 0, 0); + bzero(ibp->b_data, (int)fs->fs_bsize); + dp2 = (struct ufs2_dinode *)(ibp->b_data); + for (i = 0; i < INOPB(fs); i++) { + dp2->di_gen = random() / 2 + 1; + dp2++; + } + bawrite(ibp); + cgp->cg_initediblk += INOPB(fs); + } if (fs->fs_active != 0) atomic_clear_int(&ACTIVECGNUM(fs, cg), ACTIVECGOFF(cg)); bdwrite(bp); @@ -1431,13 +1653,14 @@ void ffs_blkfree(fs, devvp, bno, size, inum) struct fs *fs; struct vnode *devvp; - ufs_daddr_t bno; + ufs2_daddr_t bno; long size; ino_t inum; { struct cg *cgp; struct buf *bp; - ufs_daddr_t fragno, cgbno; + ufs1_daddr_t fragno, cgbno; + ufs2_daddr_t cgblkno; int i, error, cg, blk, frags, bbase; u_int8_t *blksfree; dev_t dev; @@ -1446,11 +1669,11 @@ ffs_blkfree(fs, devvp, bno, size, inum) if (devvp->v_type != VCHR) { /* devvp is a snapshot */ dev = VTOI(devvp)->i_devvp->v_rdev; - cgbno = fragstoblks(fs, cgtod(fs, cg)); + cgblkno = fragstoblks(fs, cgtod(fs, cg)); } else { /* devvp is a normal disk device */ dev = devvp->v_rdev; - cgbno = fsbtodb(fs, cgtod(fs, cg)); + cgblkno = fsbtodb(fs, cgtod(fs, cg)); if ((devvp->v_flag & VCOPYONWRITE) && ffs_snapblkfree(fs, devvp, bno, size, inum)) return; @@ -1462,18 +1685,19 @@ ffs_blkfree(fs, devvp, bno, size, inum) panic("ffs_blkfree: deallocation on suspended filesystem"); if ((u_int)size > fs->fs_bsize || fragoff(fs, size) != 0 || fragnum(fs, bno) + numfrags(fs, size) > fs->fs_frag) { - printf("dev=%s, bno = %ld, bsize = %ld, size = %ld, fs = %s\n", - devtoname(dev), (long)bno, (long)fs->fs_bsize, + printf("dev=%s, bno = %lld, bsize = %ld, size = %ld, fs = %s\n", + devtoname(dev), (intmax_t)bno, (long)fs->fs_bsize, size, fs->fs_fsmnt); panic("ffs_blkfree: bad size"); } #endif if ((u_int)bno >= fs->fs_size) { - printf("bad block %ld, ino %lu\n", (long)bno, (u_long)inum); + printf("bad block %lld, ino %lu\n", (intmax_t)bno, + (u_long)inum); ffs_fserr(fs, inum, "bad block"); return; } - if ((error = bread(devvp, cgbno, (int)fs->fs_cgsize, NOCRED, &bp))) { + if ((error = bread(devvp, cgblkno, (int)fs->fs_cgsize, NOCRED, &bp))) { brelse(bp); return; } @@ -1483,7 +1707,7 @@ ffs_blkfree(fs, devvp, bno, size, inum) return; } bp->b_xflags |= BX_BKGRDWRITE; - cgp->cg_time = time_second; + cgp->cg_old_time = cgp->cg_time = time_second; cgbno = dtogd(fs, bno); blksfree = cg_blksfree(cgp); if (size == fs->fs_bsize) { @@ -1494,8 +1718,8 @@ ffs_blkfree(fs, devvp, bno, size, inum) brelse(bp); return; } - printf("dev = %s, block = %ld, fs = %s\n", - devtoname(dev), (long)bno, fs->fs_fsmnt); + printf("dev = %s, block = %lld, fs = %s\n", + devtoname(dev), (intmax_t)bno, fs->fs_fsmnt); panic("ffs_blkfree: freeing free block"); } ffs_setblock(fs, blksfree, fragno); @@ -1503,9 +1727,6 @@ ffs_blkfree(fs, devvp, bno, size, inum) cgp->cg_cs.cs_nbfree++; fs->fs_cstotal.cs_nbfree++; fs->fs_cs(fs, cg).cs_nbfree++; - i = cbtocylno(fs, cgbno); - cg_blks(fs, cgp, i)[cbtorpos(fs, cgbno)]++; - cg_blktot(cgp)[i]++; } else { bbase = cgbno - fragnum(fs, cgbno); /* @@ -1519,8 +1740,8 @@ ffs_blkfree(fs, devvp, bno, size, inum) frags = numfrags(fs, size); for (i = 0; i < frags; i++) { if (isset(blksfree, cgbno + i)) { - printf("dev = %s, block = %ld, fs = %s\n", - devtoname(dev), (long)(bno + i), + printf("dev = %s, block = %lld, fs = %s\n", + devtoname(dev), (intmax_t)(bno + i), fs->fs_fsmnt); panic("ffs_blkfree: freeing free frag"); } @@ -1546,9 +1767,6 @@ ffs_blkfree(fs, devvp, bno, size, inum) cgp->cg_cs.cs_nbfree++; fs->fs_cstotal.cs_nbfree++; fs->fs_cs(fs, cg).cs_nbfree++; - i = cbtocylno(fs, bbase); - cg_blks(fs, cgp, i)[cbtorpos(fs, bbase)]++; - cg_blktot(cgp)[i]++; } } fs->fs_fmod = 1; @@ -1565,12 +1783,13 @@ ffs_blkfree(fs, devvp, bno, size, inum) static int ffs_checkblk(ip, bno, size) struct inode *ip; - ufs_daddr_t bno; + ufs2_daddr_t bno; long size; { struct fs *fs; struct cg *cgp; struct buf *bp; + ufs1_daddr_t cgbno; int i, error, frags, free; u_int8_t *blksfree; @@ -1581,7 +1800,7 @@ ffs_checkblk(ip, bno, size) panic("ffs_checkblk: bad size"); } if ((u_int)bno >= fs->fs_size) - panic("ffs_checkblk: bad block %d", bno); + panic("ffs_checkblk: bad block %lld", (intmax_t)bno); error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, dtog(fs, bno))), (int)fs->fs_cgsize, NOCRED, &bp); if (error) @@ -1591,13 +1810,13 @@ ffs_checkblk(ip, bno, size) panic("ffs_checkblk: cg magic mismatch"); bp->b_xflags |= BX_BKGRDWRITE; blksfree = cg_blksfree(cgp); - bno = dtogd(fs, bno); + cgbno = dtogd(fs, bno); if (size == fs->fs_bsize) { - free = ffs_isblock(fs, blksfree, fragstoblks(fs, bno)); + free = ffs_isblock(fs, blksfree, fragstoblks(fs, cgbno)); } else { frags = numfrags(fs, size); for (free = 0, i = 0; i < frags; i++) - if (isset(blksfree, bno + i)) + if (isset(blksfree, cgbno + i)) free++; if (free != 0 && free != frags) panic("ffs_checkblk: partially free fragment"); @@ -1636,7 +1855,8 @@ ffs_freefile(fs, devvp, ino, mode) { struct cg *cgp; struct buf *bp; - int error, cgbno, cg; + ufs2_daddr_t cgbno; + int error, cg; u_int8_t *inosused; dev_t dev; @@ -1663,7 +1883,7 @@ ffs_freefile(fs, devvp, ino, mode) return (0); } bp->b_xflags |= BX_BKGRDWRITE; - cgp->cg_time = time_second; + cgp->cg_old_time = cgp->cg_time = time_second; inosused = cg_inosused(cgp); ino %= fs->fs_ipg; if (isclr(inosused, ino)) { @@ -1696,14 +1916,14 @@ ffs_freefile(fs, devvp, ino, mode) * It is a panic if a request is made to find a block if none are * available. */ -static ufs_daddr_t +static ufs1_daddr_t ffs_mapsearch(fs, cgp, bpref, allocsiz) struct fs *fs; struct cg *cgp; - ufs_daddr_t bpref; + ufs2_daddr_t bpref; int allocsiz; { - ufs_daddr_t bno; + ufs1_daddr_t bno; int start, len, loc, i; int blk, field, subfield, pos; u_int8_t *blksfree; @@ -1766,7 +1986,7 @@ void ffs_clusteracct(fs, cgp, blkno, cnt) struct fs *fs; struct cg *cgp; - ufs_daddr_t blkno; + ufs1_daddr_t blkno; int cnt; { int32_t *sump; @@ -1925,7 +2145,7 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS) struct inode *ip; struct mount *mp; struct fs *fs; - ufs_daddr_t blkno; + ufs2_daddr_t blkno; long blkcnt, blksize; struct file *fp; int filetype, error; @@ -1979,6 +2199,7 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS) break; ip = VTOI(vp); ip->i_nlink += cmd.size; + DIP(ip, i_nlink) = ip->i_nlink; ip->i_effnlink += cmd.size; ip->i_flag |= IN_CHANGE; if (DOINGSOFTDEP(vp)) @@ -1997,7 +2218,7 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS) if ((error = VFS_VGET(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &vp))) break; ip = VTOI(vp); - ip->i_blocks += cmd.size; + DIP(ip, i_blocks) += cmd.size; ip->i_flag |= IN_CHANGE; vput(vp); break; @@ -2035,17 +2256,17 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS) #ifdef DEBUG if (fsckcmds) { if (cmd.size == 1) - printf("%s: free block %d\n", + printf("%s: free block %lld\n", mp->mnt_stat.f_mntonname, - (ufs_daddr_t)cmd.value); + (intmax_t)cmd.value); else - printf("%s: free blocks %d-%ld\n", + printf("%s: free blocks %lld-%lld\n", mp->mnt_stat.f_mntonname, - (ufs_daddr_t)cmd.value, - (ufs_daddr_t)cmd.value + cmd.size - 1); + (intmax_t)cmd.value, + (intmax_t)cmd.value + cmd.size - 1); } #endif /* DEBUG */ - blkno = (ufs_daddr_t)cmd.value; + blkno = cmd.value; blkcnt = cmd.size; blksize = fs->fs_frag - (blkno % fs->fs_frag); while (blkcnt > 0) { |