diff options
author | pfg <pfg@FreeBSD.org> | 2011-12-15 20:31:18 +0000 |
---|---|---|
committer | pfg <pfg@FreeBSD.org> | 2011-12-15 20:31:18 +0000 |
commit | 37ef732e3706f181d980062a33f308832cd40d73 (patch) | |
tree | d9583affe60423447807fff7c05f2b9f2a1c852b | |
parent | 1551a9d47765ddb3229399b4bb3fbacf4faaf66b (diff) | |
download | FreeBSD-src-37ef732e3706f181d980062a33f308832cd40d73.zip FreeBSD-src-37ef732e3706f181d980062a33f308832cd40d73.tar.gz |
Bring in reallocblk to ext2fs.
The feature has been standard for a while in UFS as a means to reduce
fragmentation, therefore maintaining consistent performance with
filesystem aging. This is also very similar to what ext4 calls
"delayed allocation".
In his 2010 GSoC, Zheng Liu ported and benchmarked the missing
FANCY_REALLOC code to find more consistent performance improvements than
with the preallocation approach.
PR: 159233
Author: Zheng Liu <gnehzuil AT SPAMFREE gmail DOT com>
Sponsored by: Google Inc.
Approved by: jhb (mentor)
MFC after: 2 weeks
-rw-r--r-- | sys/fs/ext2fs/ext2_alloc.c | 188 | ||||
-rw-r--r-- | sys/fs/ext2fs/ext2_extern.h | 3 | ||||
-rw-r--r-- | sys/fs/ext2fs/ext2_subr.c | 104 | ||||
-rw-r--r-- | sys/fs/ext2fs/ext2_vfsops.c | 57 | ||||
-rwxr-xr-x | sys/fs/ext2fs/ext2fs.h | 21 |
5 files changed, 335 insertions, 38 deletions
diff --git a/sys/fs/ext2fs/ext2_alloc.c b/sys/fs/ext2fs/ext2_alloc.c index 2f19b87..3d07af5 100644 --- a/sys/fs/ext2fs/ext2_alloc.c +++ b/sys/fs/ext2fs/ext2_alloc.c @@ -42,6 +42,7 @@ #include <sys/vnode.h> #include <sys/stat.h> #include <sys/mount.h> +#include <sys/sysctl.h> #include <sys/syslog.h> #include <sys/buf.h> @@ -52,6 +53,7 @@ #include <fs/ext2fs/ext2_extern.h> static daddr_t ext2_alloccg(struct inode *, int, daddr_t, int); +static daddr_t ext2_clusteralloc(struct inode *, int, daddr_t, int); static u_long ext2_dirpref(struct inode *); static void ext2_fserr(struct m_ext2fs *, uid_t, char *); static u_long ext2_hashalloc(struct inode *, int, long, int, @@ -59,9 +61,6 @@ static u_long ext2_hashalloc(struct inode *, int, long, int, int)); static daddr_t ext2_nodealloccg(struct inode *, int, daddr_t, int); static daddr_t ext2_mapsearch(struct m_ext2fs *, char *, daddr_t); -#ifdef FANCY_REALLOC -static int ext2_reallocblks(struct vop_reallocblks_args *); -#endif /* * Allocate a block in the file system. @@ -113,20 +112,20 @@ ext2_alloc(ip, lbn, bpref, size, cred, bnp) if (bpref >= fs->e2fs->e2fs_bcount) bpref = 0; if (bpref == 0) - cg = ino_to_cg(fs, ip->i_number); - else - cg = dtog(fs, bpref); - bno = (daddr_t)ext2_hashalloc(ip, cg, bpref, fs->e2fs_bsize, - ext2_alloccg); - if (bno > 0) { + cg = ino_to_cg(fs, ip->i_number); + else + cg = dtog(fs, bpref); + bno = (daddr_t)ext2_hashalloc(ip, cg, bpref, fs->e2fs_bsize, + ext2_alloccg); + if (bno > 0) { /* set next_alloc fields as done in block_getblk */ ip->i_next_alloc_block = lbn; ip->i_next_alloc_goal = bno; - ip->i_blocks += btodb(fs->e2fs_bsize); - ip->i_flag |= IN_CHANGE | IN_UPDATE; - *bnp = bno; - return (0); + ip->i_blocks += btodb(fs->e2fs_bsize); + ip->i_flag |= IN_CHANGE | IN_UPDATE; + *bnp = bno; + return (0); } nospace: EXT2_UNLOCK(ump); @@ -150,7 +149,6 @@ nospace: * the previous block allocation will be used. */ -#ifdef FANCY_REALLOC static SYSCTL_NODE(_vfs, OID_AUTO, ext2fs, CTLFLAG_RW, 0, "EXT2FS filesystem"); static int doasyncfree = 1; @@ -159,7 +157,6 @@ SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doasyncfree, CTLFLAG_RW, &doasyncfree, 0, static int doreallocblks = 1; SYSCTL_INT(_vfs_ext2fs, OID_AUTO, doreallocblks, CTLFLAG_RW, &doreallocblks, 0, ""); -#endif int ext2_reallocblks(ap) @@ -168,11 +165,6 @@ ext2_reallocblks(ap) struct cluster_save *a_buflist; } */ *ap; { -#ifndef FANCY_REALLOC -/* printf("ext2_reallocblks not implemented\n"); */ -return ENOSPC; -#else - struct m_ext2fs *fs; struct inode *ip; struct vnode *vp; @@ -184,14 +176,17 @@ return ENOSPC; int32_t start_lbn, end_lbn, soff, newblk, blkno; int i, len, start_lvl, end_lvl, pref, ssize; + if (doreallocblks == 0) + return (ENOSPC); + vp = ap->a_vp; ip = VTOI(vp); fs = ip->i_e2fs; ump = ip->i_ump; -#ifdef UNKLAR - if (fs->fs_contigsumsize <= 0) + + if (fs->e2fs_contigsumsize <= 0) return (ENOSPC); -#endif + buflist = ap->a_buflist; len = buflist->bs_nchildren; start_lbn = buflist->bs_children[0]->b_lblkno; @@ -228,11 +223,6 @@ return ENOSPC; soff = idp->in_off; } /* - * Find the preferred location for the cluster. - */ - EXT2_LOCK(ump); - pref = ext2_blkpref(ip, start_lbn, soff, sbap, 0); - /* * If the block range spans two block maps, get the second map. */ if (end_lvl == 0 || (idp = &end_ap[end_lvl - 1])->in_off + 1 >= len) { @@ -243,13 +233,16 @@ return ENOSPC; panic("ext2_reallocblk: start == end"); #endif ssize = len - (idp->in_off + 1); - if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &ebp)){ - EXT2_UNLOCK(ump); + if (bread(vp, idp->in_lbn, (int)fs->e2fs_bsize, NOCRED, &ebp)) goto fail; - } ebap = (int32_t *)ebp->b_data; } /* + * Find the preferred location for the cluster. + */ + EXT2_LOCK(ump); + pref = ext2_blkpref(ip, start_lbn, soff, sbap, 0); + /* * Search the block map looking for an allocation of the desired size. */ if ((newblk = (int32_t)ext2_hashalloc(ip, dtog(fs, pref), pref, @@ -264,15 +257,23 @@ return ENOSPC; * block pointers in the inode and indirect blocks associated * with the file. */ +#ifdef DEBUG + printf("realloc: ino %d, lbns %jd-%jd\n\told:", ip->i_number, + (intmax_t)start_lbn, (intmax_t)end_lbn); +#endif /* DEBUG */ blkno = newblk; for (bap = &sbap[soff], i = 0; i < len; i++, blkno += fs->e2fs_fpb) { - if (i == ssize) + if (i == ssize) { bap = ebap; soff = -i; + } #ifdef DIAGNOSTIC if (buflist->bs_children[i]->b_blkno != fsbtodb(fs, *bap)) panic("ext2_reallocblks: alloc mismatch"); #endif +#ifdef DEBUG + printf(" %d,", *bap); +#endif /* DEBUG */ *bap++ = blkno; } /* @@ -308,11 +309,20 @@ return ENOSPC; /* * Last, free the old blocks and assign the new blocks to the buffers. */ +#ifdef DEBUG + printf("\n\tnew:"); +#endif /* DEBUG */ for (blkno = newblk, i = 0; i < len; i++, blkno += fs->e2fs_fpb) { ext2_blkfree(ip, dbtofsb(fs, buflist->bs_children[i]->b_blkno), fs->e2fs_bsize); buflist->bs_children[i]->b_blkno = fsbtodb(fs, blkno); +#ifdef DEBUG + printf(" %d,", blkno); +#endif /* DEBUG */ } +#ifdef DEBUG + printf("\n"); +#endif /* DEBUG */ return (0); fail: @@ -321,8 +331,6 @@ fail: if (sbap != &ip->i_db[0]) brelse(sbp); return (ENOSPC); - -#endif /* FANCY_REALLOC */ } /* @@ -747,6 +755,7 @@ gotit: #endif setbit(bbp, bno); EXT2_LOCK(ump); + ext2_clusteracct(fs, bbp, cg, bno, -1); fs->e2fs->e2fs_fbcount--; fs->e2fs_gd[cg].ext2bgd_nbfree--; fs->e2fs_fmod = 1; @@ -756,6 +765,116 @@ gotit: } /* + * Determine whether a cluster can be allocated. + */ +static daddr_t +ext2_clusteralloc(struct inode *ip, int cg, daddr_t bpref, int len) +{ + struct m_ext2fs *fs; + struct ext2mount *ump; + struct buf *bp; + char *bbp; + int bit, error, got, i, loc, run; + int32_t *lp; + daddr_t bno; + + fs = ip->i_e2fs; + ump = ip->i_ump; + + if (fs->e2fs_maxcluster[cg] < len) + return (0); + + EXT2_UNLOCK(ump); + error = bread(ip->i_devvp, + fsbtodb(fs, fs->e2fs_gd[cg].ext2bgd_b_bitmap), + (int)fs->e2fs_bsize, NOCRED, &bp); + if (error) + goto fail_lock; + + bbp = (char *)bp->b_data; + bp->b_xflags |= BX_BKGRDWRITE; + + EXT2_LOCK(ump); + /* + * Check to see if a cluster of the needed size (or bigger) is + * available in this cylinder group. + */ + lp = &fs->e2fs_clustersum[cg].cs_sum[len]; + for (i = len; i <= fs->e2fs_contigsumsize; i++) + if (*lp++ > 0) + break; + if (i > fs->e2fs_contigsumsize) { + /* + * Update the cluster summary information to reflect + * the true maximum-sized cluster so that future cluster + * allocation requests can avoid reading the bitmap only + * to find no cluster. + */ + lp = &fs->e2fs_clustersum[cg].cs_sum[len - 1]; + for (i = len - 1; i > 0; i--) + if (*lp-- > 0) + break; + fs->e2fs_maxcluster[cg] = i; + goto fail; + } + EXT2_UNLOCK(ump); + + /* Search the bitmap to find a big enough cluster like in FFS. */ + if (dtog(fs, bpref) != cg) + bpref = 0; + if (bpref != 0) + bpref = dtogd(fs, bpref); + loc = bpref / NBBY; + bit = 1 << (bpref % NBBY); + for (run = 0, got = bpref; got < fs->e2fs->e2fs_fpg; got++) { + if ((bbp[loc] & bit) != 0) + run = 0; + else { + run++; + if (run == len) + break; + } + if ((got & (NBBY - 1)) != (NBBY - 1)) + bit <<= 1; + else { + loc++; + bit = 1; + } + } + + if (got >= fs->e2fs->e2fs_fpg) + goto fail_lock; + + /* Allocate the cluster that we found. */ + for (i = 1; i < len; i++) + if (!isclr(bbp, got - run + i)) + panic("ext2_clusteralloc: map mismatch"); + + bno = got - run + 1; + if (bno >= fs->e2fs->e2fs_fpg) + panic("ext2_clusteralloc: allocated out of group"); + + EXT2_LOCK(ump); + for (i = 0; i < len; i += fs->e2fs_fpb) { + setbit(bbp, bno + i); + ext2_clusteracct(fs, bbp, cg, bno + i, -1); + fs->e2fs->e2fs_fbcount--; + fs->e2fs_gd[cg].ext2bgd_nbfree--; + } + fs->e2fs_fmod = 1; + EXT2_UNLOCK(ump); + + bdwrite(bp); + return (cg * fs->e2fs->e2fs_fpg + fs->e2fs->e2fs_first_dblock + bno); + +fail_lock: + EXT2_LOCK(ump); +fail: + brelse(bp); + return (0); +} + +/* * Determine whether an inode can be allocated. * * Check to see if an inode is available, and if it is, @@ -877,6 +996,7 @@ ext2_blkfree(ip, bno, size) } clrbit(bbp, bno); EXT2_LOCK(ump); + ext2_clusteracct(fs, bbp, cg, bno, 1); fs->e2fs->e2fs_fbcount++; fs->e2fs_gd[cg].ext2bgd_nbfree++; fs->e2fs_fmod = 1; diff --git a/sys/fs/ext2fs/ext2_extern.h b/sys/fs/ext2fs/ext2_extern.h index 821809f..d3ab058 100644 --- a/sys/fs/ext2fs/ext2_extern.h +++ b/sys/fs/ext2fs/ext2_extern.h @@ -55,12 +55,13 @@ void ext2_blkfree(struct inode *, int32_t, long); int32_t ext2_blkpref(struct inode *, int32_t, int, int32_t *, int32_t); int ext2_bmap(struct vop_bmap_args *); int ext2_bmaparray(struct vnode *, int32_t, int32_t *, int *, int *); +void ext2_clusteracct(struct m_ext2fs *, char *, int, daddr_t, int); void ext2_dirbad(struct inode *ip, doff_t offset, char *how); void ext2_ei2i(struct ext2fs_dinode *, struct inode *); int ext2_getlbns(struct vnode *, int32_t, struct indir *, int *); void ext2_i2ei(struct inode *, struct ext2fs_dinode *); +int ext2_reallocblks(struct vop_reallocblks_args *); void ext2_itimes(struct vnode *vp); -int ext2_reallocblks(struct vop_reallocblks_args *); int ext2_reclaim(struct vop_reclaim_args *); void ext2_setblock(struct m_ext2fs *, u_char *, int32_t); int ext2_truncate(struct vnode *, off_t, int, struct ucred *, struct thread *); diff --git a/sys/fs/ext2fs/ext2_subr.c b/sys/fs/ext2fs/ext2_subr.c index 70ade83..b8d59bc 100644 --- a/sys/fs/ext2fs/ext2_subr.c +++ b/sys/fs/ext2fs/ext2_subr.c @@ -120,3 +120,107 @@ ext2_checkoverlap(bp, ip) } } #endif /* KDB */ + +/* + * Update the cluster map because of an allocation of free like ffs. + * + * Cnt == 1 means free; cnt == -1 means allocating. + */ +void +ext2_clusteracct(struct m_ext2fs *fs, char *bbp, int cg, daddr_t bno, int cnt) +{ + int32_t *sump = fs->e2fs_clustersum[cg].cs_sum; + int32_t *lp; + int back, bit, end, forw, i, loc, start; + + /* Initialize the cluster summary array. */ + if (fs->e2fs_clustersum[cg].cs_init == 0) { + int run = 0; + bit = 1; + loc = 0; + + for (i = 0; i < fs->e2fs->e2fs_fpg; i++) { + if ((bbp[loc] & bit) == 0) + run++; + else if (run != 0) { + if (run > fs->e2fs_contigsumsize) + run = fs->e2fs_contigsumsize; + sump[run]++; + run = 0; + } + if ((i & (NBBY - 1)) != (NBBY - 1)) + bit <<= 1; + else { + loc++; + bit = 1; + } + } + if (run != 0) { + if (run > fs->e2fs_contigsumsize) + run = fs->e2fs_contigsumsize; + sump[run]++; + } + fs->e2fs_clustersum[cg].cs_init = 1; + } + + if (fs->e2fs_contigsumsize <= 0) + return; + + /* Find the size of the cluster going forward. */ + start = bno + 1; + end = start + fs->e2fs_contigsumsize; + if (end > fs->e2fs->e2fs_fpg) + end = fs->e2fs->e2fs_fpg; + loc = start / NBBY; + bit = 1 << (start % NBBY); + for (i = start; i < end; i++) { + if ((bbp[loc] & bit) != 0) + break; + if ((i & (NBBY - 1)) != (NBBY - 1)) + bit <<= 1; + else { + loc++; + bit = 1; + } + } + forw = i - start; + + /* Find the size of the cluster going backward. */ + start = bno - 1; + end = start - fs->e2fs_contigsumsize; + if (end < 0) + end = -1; + loc = start / NBBY; + bit = 1 << (start % NBBY); + for (i = start; i > end; i--) { + if ((bbp[loc] & bit) != 0) + break; + if ((i & (NBBY - 1)) != 0) + bit >>= 1; + else { + loc--; + bit = 1 << (NBBY - 1); + } + } + back = start - i; + + /* + * Account for old cluster and the possibly new forward and + * back clusters. + */ + i = back + forw + 1; + if (i > fs->e2fs_contigsumsize) + i = fs->e2fs_contigsumsize; + sump[i] += cnt; + if (back > 0) + sump[back] -= cnt; + if (forw > 0) + sump[forw] -= cnt; + + /* Update cluster summary information. */ + lp = &sump[fs->e2fs_contigsumsize]; + for (i = fs->e2fs_contigsumsize; i > 0; i--) + if (*lp-- > 0) + break; + fs->e2fs_maxcluster[cg] = i; +} diff --git a/sys/fs/ext2fs/ext2_vfsops.c b/sys/fs/ext2fs/ext2_vfsops.c index 1b47e91..9395f98 100644 --- a/sys/fs/ext2fs/ext2_vfsops.c +++ b/sys/fs/ext2fs/ext2_vfsops.c @@ -405,7 +405,7 @@ compute_sb_data(struct vnode *devvp, struct ext2fs *es, * Things to do to update the mount: * 1) invalidate all cached meta-data. * 2) re-read superblock from disk. - * 3) re-read summary information from disk. + * 3) invalidate all cluster summary information. * 4) invalidate all inactive vnodes. * 5) invalidate all cached file data. * 6) re-read inode data for all active vnodes. @@ -419,7 +419,9 @@ ext2_reload(struct mount *mp, struct thread *td) struct buf *bp; struct ext2fs *es; struct m_ext2fs *fs; - int error; + struct csum *sump; + int error, i; + int32_t *lp; if ((mp->mnt_flag & MNT_RDONLY) == 0) return (EINVAL); @@ -456,6 +458,19 @@ ext2_reload(struct mount *mp, struct thread *td) #endif brelse(bp); + /* + * Step 3: invalidate all cluster summary information. + */ + if (fs->e2fs_contigsumsize > 0) { + lp = fs->e2fs_maxcluster; + sump = fs->e2fs_clustersum; + for (i = 0; i < fs->e2fs_gcount; i++, sump++) { + *lp++ = fs->e2fs_contigsumsize; + sump->cs_init = 0; + bzero(sump->cs_sum, fs->e2fs_contigsumsize + 1); + } + } + loop: MNT_ILOCK(mp); MNT_VNODE_FOREACH(vp, mp, mvp) { @@ -511,8 +526,11 @@ ext2_mountfs(struct vnode *devvp, struct mount *mp) struct cdev *dev = devvp->v_rdev; struct g_consumer *cp; struct bufobj *bo; + struct csum *sump; int error; int ronly; + int i, size; + int32_t *lp; ronly = vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0); /* XXX: use VOP_ACESS to check FS perms */ @@ -582,6 +600,33 @@ ext2_mountfs(struct vnode *devvp, struct mount *mp) if ((error = compute_sb_data(devvp, ump->um_e2fs->e2fs, ump->um_e2fs))) goto out; + /* + * Calculate the maximum contiguous blocks and size of cluster summary + * array. In FFS this is done by newfs; however the superblock in + * ext2fs doesn't have these variables so we just can calculate + * them here. + */ + ump->um_e2fs->e2fs_maxcontig = MAX(1, MAXPHYS / ump->um_e2fs->e2fs_bsize); + if (ump->um_e2fs->e2fs_maxcontig > 0) + ump->um_e2fs->e2fs_contigsumsize = + MIN(ump->um_e2fs->e2fs_maxcontig, EXT2_MAXCONTIG); + else + ump->um_e2fs->e2fs_contigsumsize = 0; + if (ump->um_e2fs->e2fs_contigsumsize > 0) { + size = ump->um_e2fs->e2fs_gcount * sizeof(int32_t); + ump->um_e2fs->e2fs_maxcluster = malloc(size, M_EXT2MNT, M_WAITOK); + size = ump->um_e2fs->e2fs_gcount * sizeof(struct csum); + ump->um_e2fs->e2fs_clustersum = malloc(size, M_EXT2MNT, M_WAITOK); + lp = ump->um_e2fs->e2fs_maxcluster; + sump = ump->um_e2fs->e2fs_clustersum; + for (i = 0; i < ump->um_e2fs->e2fs_gcount; i++, sump++) { + *lp++ = ump->um_e2fs->e2fs_contigsumsize; + sump->cs_init = 0; + sump->cs_sum = malloc((ump->um_e2fs->e2fs_contigsumsize + 1) * + sizeof(int32_t), M_EXT2MNT, M_WAITOK | M_ZERO); + } + } + brelse(bp); bp = NULL; fs = ump->um_e2fs; @@ -656,7 +701,8 @@ ext2_unmount(struct mount *mp, int mntflags) { struct ext2mount *ump; struct m_ext2fs *fs; - int error, flags, ronly; + struct csum *sump; + int error, flags, i, ronly; flags = 0; if (mntflags & MNT_FORCE) { @@ -681,6 +727,11 @@ ext2_unmount(struct mount *mp, int mntflags) g_topology_unlock(); PICKUP_GIANT(); vrele(ump->um_devvp); + sump = fs->e2fs_clustersum; + for (i = 0; i < fs->e2fs_gcount; i++, sump++) + free(sump->cs_sum, M_EXT2MNT); + free(fs->e2fs_clustersum, M_EXT2MNT); + free(fs->e2fs_maxcluster, M_EXT2MNT); free(fs->e2fs_gd, M_EXT2MNT); free(fs->e2fs_contigdirs, M_EXT2MNT); free(fs->e2fs, M_EXT2MNT); diff --git a/sys/fs/ext2fs/ext2fs.h b/sys/fs/ext2fs/ext2fs.h index baac2de..7dc93dc 100755 --- a/sys/fs/ext2fs/ext2fs.h +++ b/sys/fs/ext2fs/ext2fs.h @@ -45,6 +45,16 @@ #define EXT2_LINK_MAX 32000 /* + * A summary of contiguous blocks of various sizes is maintained + * in each cylinder group. Normally this is set by the initial + * value of fs_maxcontig. + * + * XXX:FS_MAXCONTIG is set to 16 to conserve space. Here we set + * EXT2_MAXCONTIG to 32 for better performance. + */ +#define EXT2_MAXCONTIG 32 + +/* * Constants relative to the data blocks */ #define EXT2_NDIR_BLOCKS 12 @@ -151,6 +161,10 @@ struct m_ext2fs { char e2fs_wasvalid; /* valid at mount time */ off_t e2fs_maxfilesize; struct ext2_gd *e2fs_gd; /* Group Descriptors */ + int32_t e2fs_maxcontig; /* max number of contiguous blks */ + int32_t e2fs_contigsumsize; /* size of cluster summary array */ + int32_t *e2fs_maxcluster; /* max cluster in each cyl group */ + struct csum *e2fs_clustersum; /* cluster summary in each cyl group */ }; /* @@ -253,6 +267,13 @@ struct ext2_gd { uint32_t reserved2[3]; }; +/* cluster summary information */ + +struct csum { + int8_t cs_init; /* cluster summary has been initialized */ + int32_t *cs_sum; /* cluster summary array */ +}; + /* EXT2FS metadatas are stored in little-endian byte order. These macros * helps reading these metadatas */ |