summaryrefslogtreecommitdiffstats
path: root/sys/ufs/ffs/ffs_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/ufs/ffs/ffs_alloc.c')
-rw-r--r--sys/ufs/ffs/ffs_alloc.c228
1 files changed, 163 insertions, 65 deletions
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c
index ab21b89..d7db636 100644
--- a/sys/ufs/ffs/ffs_alloc.c
+++ b/sys/ufs/ffs/ffs_alloc.c
@@ -817,15 +817,6 @@ ffs_reallocblks_ufs2(ap)
UFS_LOCK(ump);
pref = ffs_blkpref_ufs2(ip, start_lbn, soff, sbap);
/*
- * Skip a block for the first indirect block. Indirect blocks are
- * usually initially laid out in a good position between the data
- * blocks, but block reallocation would usually destroy locality by
- * moving them out of the way to make room for data blocks if we
- * didn't compensate here.
- */
- if (start_lbn == NDADDR)
- pref += fs->fs_frag;
- /*
* Search the block map looking for an allocation of the desired size.
*/
if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref,
@@ -1090,7 +1081,7 @@ ffs_dirpref(pip)
struct inode *pip;
{
struct fs *fs;
- u_int cg, prefcg, dirsize, cgsize;
+ int cg, prefcg, dirsize, cgsize;
u_int avgifree, avgbfree, avgndir, curdirsize;
u_int minifree, minbfree, maxndir;
u_int mincg, minndir;
@@ -1158,6 +1149,22 @@ ffs_dirpref(pip)
* Limit number of dirs in one cg and reserve space for
* regular files, but only if we have no deficit in
* inodes or space.
+ *
+ * We are trying to find a suitable cylinder group nearby
+ * our preferred cylinder group to place a new directory.
+ * We scan from our preferred cylinder group forward looking
+ * for a cylinder group that meets our criterion. If we get
+ * to the final cylinder group and do not find anything,
+ * we start scanning backwards from our preferred cylinder
+ * group. The ideal would be to alternate looking forward
+ * and backward, but that is just too complex to code for
+ * the gain it would get. The most likely place where the
+ * backward scan would take effect is when we start near
+ * the end of the filesystem and do not find anything from
+ * where we are to the end. In that case, scanning backward
+ * will likely find us a suitable cylinder group much closer
+ * to our desired location than if we were to start scanning
+ * forward from the beginning of the filesystem.
*/
prefcg = ino_to_cg(fs, pip->i_number);
for (cg = prefcg; cg < fs->fs_ncg; cg++)
@@ -1167,7 +1174,7 @@ ffs_dirpref(pip)
if (fs->fs_contigdirs[cg] < maxcontigdirs)
return ((ino_t)(fs->fs_ipg * cg));
}
- for (cg = 0; cg < prefcg; cg++)
+ for (cg = prefcg - 1; cg >= 0; cg--)
if (fs->fs_cs(fs, cg).cs_ndir < maxndir &&
fs->fs_cs(fs, cg).cs_nifree >= minifree &&
fs->fs_cs(fs, cg).cs_nbfree >= minbfree) {
@@ -1180,7 +1187,7 @@ ffs_dirpref(pip)
for (cg = prefcg; cg < fs->fs_ncg; cg++)
if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
return ((ino_t)(fs->fs_ipg * cg));
- for (cg = 0; cg < prefcg; cg++)
+ for (cg = prefcg - 1; cg >= 0; cg--)
if (fs->fs_cs(fs, cg).cs_nifree >= avgifree)
break;
return ((ino_t)(fs->fs_ipg * cg));
@@ -1193,9 +1200,15 @@ ffs_dirpref(pip)
*
* If no blocks have been allocated in the first section, the policy is to
* request a block in the same cylinder group as the inode that describes
- * the file. If no blocks have been allocated in any other section, the
- * policy is to place the section in a cylinder group with a greater than
- * average number of free blocks. An appropriate cylinder group is found
+ * the file. The first indirect is allocated immediately following the last
+ * direct block and the data blocks for the first indirect immediately
+ * follow it.
+ *
+ * If no blocks have been allocated in any other section, the indirect
+ * block(s) are allocated in the same cylinder group as its inode in an
+ * area reserved immediately following the inode blocks. The policy for
+ * the data blocks is to place them in a cylinder group with a greater than
+ * average number of free blocks. An appropriate cylinder group is found
* by using a rotor that sweeps the cylinder groups. When a new group of
* blocks is needed, the sweep begins in the cylinder group following the
* cylinder group from which the previous allocation was made. The sweep
@@ -1218,39 +1231,78 @@ ffs_blkpref_ufs1(ip, lbn, indx, bap)
ufs1_daddr_t *bap;
{
struct fs *fs;
- u_int cg;
+ u_int cg, inocg;
u_int avgbfree, startcg;
ufs2_daddr_t pref;
+ KASSERT(indx <= 0 || bap != NULL, ("need non-NULL bap"));
mtx_assert(UFS_MTX(ip->i_ump), MA_OWNED);
fs = ip->i_fs;
/*
- * If we are allocating the first indirect block, try to place it
- * immediately following the last direct block.
- *
+ * Allocation of indirect blocks is indicated by passing negative
+ * values in indx: -1 for single indirect, -2 for double indirect,
+ * -3 for triple indirect. As noted below, we attempt to allocate
+ * the first indirect inline with the file data. For all later
+ * indirect blocks, the data is often allocated in other cylinder
+ * groups. However to speed random file access and to speed up
+ * fsck, the filesystem reserves the first fs_metaspace blocks
+ * (typically half of fs_minfree) of the data area of each cylinder
+ * group to hold these later indirect blocks.
+ */
+ inocg = ino_to_cg(fs, ip->i_number);
+ if (indx < 0) {
+ /*
+ * Our preference for indirect blocks is the zone at the
+ * beginning of the inode's cylinder group data area that
+ * we try to reserve for indirect blocks.
+ */
+ pref = cgmeta(fs, inocg);
+ /*
+ * If we are allocating the first indirect block, try to
+ * place it immediately following the last direct block.
+ */
+ if (indx == -1 && lbn < NDADDR + NINDIR(fs) &&
+ ip->i_din1->di_db[NDADDR - 1] != 0)
+ pref = ip->i_din1->di_db[NDADDR - 1] + fs->fs_frag;
+ return (pref);
+ }
+ /*
* If we are allocating the first data block in the first indirect
- * block, try to place it immediately following the indirect block.
+ * block and the indirect has been allocated in the data block area,
+ * try to place it immediately following the indirect block.
*/
if (lbn == NDADDR) {
- pref = ip->i_din1->di_db[NDADDR - 1];
- if (bap == NULL && pref != 0)
- return (pref + fs->fs_frag);
pref = ip->i_din1->di_ib[0];
- if (pref != 0)
+ if (pref != 0 && pref >= cgdata(fs, inocg) &&
+ pref < cgbase(fs, inocg + 1))
return (pref + fs->fs_frag);
}
+ /*
+ * If we are at the beginning of a file, or we have already allocated
+ * the maximum number of blocks per cylinder group, or we do not
+ * have a block allocated immediately preceeding us, then we need
+ * to decide where to start allocating new blocks.
+ */
if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
- if (lbn < NDADDR + NINDIR(fs)) {
- cg = ino_to_cg(fs, ip->i_number);
- return (cgbase(fs, cg) + fs->fs_frag);
- }
+ /*
+ * If we are allocating a directory data block, we want
+ * to place it in the metadata area.
+ */
+ if ((ip->i_mode & IFMT) == IFDIR)
+ return (cgmeta(fs, inocg));
+ /*
+ * Until we fill all the direct and all the first indirect's
+ * blocks, we try to allocate in the data area of the inode's
+ * cylinder group.
+ */
+ if (lbn < NDADDR + NINDIR(fs))
+ return (cgdata(fs, inocg));
/*
* Find a cylinder with greater than average number of
* unused data blocks.
*/
if (indx == 0 || bap[indx - 1] == 0)
- startcg =
- ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
+ startcg = inocg + lbn / fs->fs_maxbpg;
else
startcg = dtog(fs, bap[indx - 1]) + 1;
startcg %= fs->fs_ncg;
@@ -1258,17 +1310,17 @@ ffs_blkpref_ufs1(ip, lbn, indx, bap)
for (cg = startcg; cg < fs->fs_ncg; cg++)
if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
fs->fs_cgrotor = cg;
- return (cgbase(fs, cg) + fs->fs_frag);
+ return (cgdata(fs, cg));
}
for (cg = 0; cg <= startcg; cg++)
if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
fs->fs_cgrotor = cg;
- return (cgbase(fs, cg) + fs->fs_frag);
+ return (cgdata(fs, cg));
}
return (0);
}
/*
- * We just always try to lay things out contiguously.
+ * Otherwise, we just always try to lay things out contiguously.
*/
return (bap[indx - 1] + fs->fs_frag);
}
@@ -1284,39 +1336,78 @@ ffs_blkpref_ufs2(ip, lbn, indx, bap)
ufs2_daddr_t *bap;
{
struct fs *fs;
- u_int cg;
+ u_int cg, inocg;
u_int avgbfree, startcg;
ufs2_daddr_t pref;
+ KASSERT(indx <= 0 || bap != NULL, ("need non-NULL bap"));
mtx_assert(UFS_MTX(ip->i_ump), MA_OWNED);
fs = ip->i_fs;
/*
- * If we are allocating the first indirect block, try to place it
- * immediately following the last direct block.
- *
+ * Allocation of indirect blocks is indicated by passing negative
+ * values in indx: -1 for single indirect, -2 for double indirect,
+ * -3 for triple indirect. As noted below, we attempt to allocate
+ * the first indirect inline with the file data. For all later
+ * indirect blocks, the data is often allocated in other cylinder
+ * groups. However to speed random file access and to speed up
+ * fsck, the filesystem reserves the first fs_metaspace blocks
+ * (typically half of fs_minfree) of the data area of each cylinder
+ * group to hold these later indirect blocks.
+ */
+ inocg = ino_to_cg(fs, ip->i_number);
+ if (indx < 0) {
+ /*
+ * Our preference for indirect blocks is the zone at the
+ * beginning of the inode's cylinder group data area that
+ * we try to reserve for indirect blocks.
+ */
+ pref = cgmeta(fs, inocg);
+ /*
+ * If we are allocating the first indirect block, try to
+ * place it immediately following the last direct block.
+ */
+ if (indx == -1 && lbn < NDADDR + NINDIR(fs) &&
+ ip->i_din2->di_db[NDADDR - 1] != 0)
+ pref = ip->i_din2->di_db[NDADDR - 1] + fs->fs_frag;
+ return (pref);
+ }
+ /*
* If we are allocating the first data block in the first indirect
- * block, try to place it immediately following the indirect block.
+ * block and the indirect has been allocated in the data block area,
+ * try to place it immediately following the indirect block.
*/
if (lbn == NDADDR) {
- pref = ip->i_din1->di_db[NDADDR - 1];
- if (bap == NULL && pref != 0)
- return (pref + fs->fs_frag);
- pref = ip->i_din1->di_ib[0];
- if (pref != 0)
+ pref = ip->i_din2->di_ib[0];
+ if (pref != 0 && pref >= cgdata(fs, inocg) &&
+ pref < cgbase(fs, inocg + 1))
return (pref + fs->fs_frag);
}
+ /*
+ * If we are at the beginning of a file, or we have already allocated
+ * the maximum number of blocks per cylinder group, or we do not
+ * have a block allocated immediately preceeding us, then we need
+ * to decide where to start allocating new blocks.
+ */
if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) {
- if (lbn < NDADDR + NINDIR(fs)) {
- cg = ino_to_cg(fs, ip->i_number);
- return (cgbase(fs, cg) + fs->fs_frag);
- }
+ /*
+ * If we are allocating a directory data block, we want
+ * to place it in the metadata area.
+ */
+ if ((ip->i_mode & IFMT) == IFDIR)
+ return (cgmeta(fs, inocg));
+ /*
+ * Until we fill all the direct and all the first indirect's
+ * blocks, we try to allocate in the data area of the inode's
+ * cylinder group.
+ */
+ if (lbn < NDADDR + NINDIR(fs))
+ return (cgdata(fs, inocg));
/*
* Find a cylinder with greater than average number of
* unused data blocks.
*/
if (indx == 0 || bap[indx - 1] == 0)
- startcg =
- ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg;
+ startcg = inocg + lbn / fs->fs_maxbpg;
else
startcg = dtog(fs, bap[indx - 1]) + 1;
startcg %= fs->fs_ncg;
@@ -1324,17 +1415,17 @@ ffs_blkpref_ufs2(ip, lbn, indx, bap)
for (cg = startcg; cg < fs->fs_ncg; cg++)
if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
fs->fs_cgrotor = cg;
- return (cgbase(fs, cg) + fs->fs_frag);
+ return (cgdata(fs, cg));
}
for (cg = 0; cg <= startcg; cg++)
if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) {
fs->fs_cgrotor = cg;
- return (cgbase(fs, cg) + fs->fs_frag);
+ return (cgdata(fs, cg));
}
return (0);
}
/*
- * We just always try to lay things out contiguously.
+ * Otherwise, we just always try to lay things out contiguously.
*/
return (bap[indx - 1] + fs->fs_frag);
}
@@ -1611,31 +1702,37 @@ ffs_alloccgblk(ip, bp, bpref, size)
ufs1_daddr_t bno;
ufs2_daddr_t blkno;
u_int8_t *blksfree;
- int i;
+ int i, cgbpref;
fs = ip->i_fs;
ump = ip->i_ump;
mtx_assert(UFS_MTX(ump), MA_OWNED);
cgp = (struct cg *)bp->b_data;
blksfree = cg_blksfree(cgp);
- if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) {
+ if (bpref == 0) {
bpref = cgp->cg_rotor;
- } else {
- bpref = blknum(fs, bpref);
- bno = dtogd(fs, bpref);
- /*
- * if the requested block is available, use it
- */
- if (ffs_isblock(fs, blksfree, fragstoblks(fs, bno)))
- goto gotit;
+ } else if ((cgbpref = dtog(fs, bpref)) != cgp->cg_cgx) {
+ /* map bpref to correct zone in this cg */
+ if (bpref < cgdata(fs, cgbpref))
+ bpref = cgmeta(fs, cgp->cg_cgx);
+ else
+ bpref = cgdata(fs, cgp->cg_cgx);
}
/*
+ * if the requested block is available, use it
+ */
+ bno = dtogd(fs, blknum(fs, bpref));
+ if (ffs_isblock(fs, blksfree, fragstoblks(fs, bno)))
+ goto gotit;
+ /*
* Take the next available block in this cylinder group.
*/
bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag);
if (bno < 0)
return (0);
- cgp->cg_rotor = bno;
+ /* Update cg_rotor only if allocated from the data zone */
+ if (bno >= dtogd(fs, cgdata(fs, cgp->cg_cgx)))
+ cgp->cg_rotor = bno;
gotit:
blkno = fragstoblks(fs, bno);
ffs_clrblock(fs, blksfree, (long)blkno);
@@ -1742,9 +1839,10 @@ ffs_clusteralloc(ip, cg, bpref, len, unused)
* be recalled to try an allocation in the next cylinder group.
*/
if (dtog(fs, bpref) != cg)
- bpref = 0;
+ bpref = cgdata(fs, cg);
else
- bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref)));
+ bpref = blknum(fs, bpref);
+ bpref = fragstoblks(fs, dtogd(fs, bpref));
mapp = &cg_clustersfree(cgp)[bpref / NBBY];
map = *mapp++;
bit = 1 << (bpref % NBBY);
OpenPOWER on IntegriCloud