diff options
Diffstat (limited to 'sys/ufs/ffs/ffs_alloc.c')
-rw-r--r-- | sys/ufs/ffs/ffs_alloc.c | 228 |
1 files changed, 163 insertions, 65 deletions
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c index ab21b89..d7db636 100644 --- a/sys/ufs/ffs/ffs_alloc.c +++ b/sys/ufs/ffs/ffs_alloc.c @@ -817,15 +817,6 @@ ffs_reallocblks_ufs2(ap) UFS_LOCK(ump); pref = ffs_blkpref_ufs2(ip, start_lbn, soff, sbap); /* - * Skip a block for the first indirect block. Indirect blocks are - * usually initially laid out in a good position between the data - * blocks, but block reallocation would usually destroy locality by - * moving them out of the way to make room for data blocks if we - * didn't compensate here. - */ - if (start_lbn == NDADDR) - pref += fs->fs_frag; - /* * Search the block map looking for an allocation of the desired size. */ if ((newblk = ffs_hashalloc(ip, dtog(fs, pref), pref, @@ -1090,7 +1081,7 @@ ffs_dirpref(pip) struct inode *pip; { struct fs *fs; - u_int cg, prefcg, dirsize, cgsize; + int cg, prefcg, dirsize, cgsize; u_int avgifree, avgbfree, avgndir, curdirsize; u_int minifree, minbfree, maxndir; u_int mincg, minndir; @@ -1158,6 +1149,22 @@ ffs_dirpref(pip) * Limit number of dirs in one cg and reserve space for * regular files, but only if we have no deficit in * inodes or space. + * + * We are trying to find a suitable cylinder group nearby + * our preferred cylinder group to place a new directory. + * We scan from our preferred cylinder group forward looking + * for a cylinder group that meets our criterion. If we get + * to the final cylinder group and do not find anything, + * we start scanning backwards from our preferred cylinder + * group. The ideal would be to alternate looking forward + * and backward, but that is just too complex to code for + * the gain it would get. The most likely place where the + * backward scan would take effect is when we start near + * the end of the filesystem and do not find anything from + * where we are to the end. In that case, scanning backward + * will likely find us a suitable cylinder group much closer + * to our desired location than if we were to start scanning + * forward from the beginning of the filesystem. */ prefcg = ino_to_cg(fs, pip->i_number); for (cg = prefcg; cg < fs->fs_ncg; cg++) @@ -1167,7 +1174,7 @@ ffs_dirpref(pip) if (fs->fs_contigdirs[cg] < maxcontigdirs) return ((ino_t)(fs->fs_ipg * cg)); } - for (cg = 0; cg < prefcg; cg++) + for (cg = prefcg - 1; cg >= 0; cg--) if (fs->fs_cs(fs, cg).cs_ndir < maxndir && fs->fs_cs(fs, cg).cs_nifree >= minifree && fs->fs_cs(fs, cg).cs_nbfree >= minbfree) { @@ -1180,7 +1187,7 @@ ffs_dirpref(pip) for (cg = prefcg; cg < fs->fs_ncg; cg++) if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) return ((ino_t)(fs->fs_ipg * cg)); - for (cg = 0; cg < prefcg; cg++) + for (cg = prefcg - 1; cg >= 0; cg--) if (fs->fs_cs(fs, cg).cs_nifree >= avgifree) break; return ((ino_t)(fs->fs_ipg * cg)); @@ -1193,9 +1200,15 @@ ffs_dirpref(pip) * * If no blocks have been allocated in the first section, the policy is to * request a block in the same cylinder group as the inode that describes - * the file. If no blocks have been allocated in any other section, the - * policy is to place the section in a cylinder group with a greater than - * average number of free blocks. An appropriate cylinder group is found + * the file. The first indirect is allocated immediately following the last + * direct block and the data blocks for the first indirect immediately + * follow it. + * + * If no blocks have been allocated in any other section, the indirect + * block(s) are allocated in the same cylinder group as its inode in an + * area reserved immediately following the inode blocks. The policy for + * the data blocks is to place them in a cylinder group with a greater than + * average number of free blocks. An appropriate cylinder group is found * by using a rotor that sweeps the cylinder groups. When a new group of * blocks is needed, the sweep begins in the cylinder group following the * cylinder group from which the previous allocation was made. The sweep @@ -1218,39 +1231,78 @@ ffs_blkpref_ufs1(ip, lbn, indx, bap) ufs1_daddr_t *bap; { struct fs *fs; - u_int cg; + u_int cg, inocg; u_int avgbfree, startcg; ufs2_daddr_t pref; + KASSERT(indx <= 0 || bap != NULL, ("need non-NULL bap")); mtx_assert(UFS_MTX(ip->i_ump), MA_OWNED); fs = ip->i_fs; /* - * If we are allocating the first indirect block, try to place it - * immediately following the last direct block. - * + * Allocation of indirect blocks is indicated by passing negative + * values in indx: -1 for single indirect, -2 for double indirect, + * -3 for triple indirect. As noted below, we attempt to allocate + * the first indirect inline with the file data. For all later + * indirect blocks, the data is often allocated in other cylinder + * groups. However to speed random file access and to speed up + * fsck, the filesystem reserves the first fs_metaspace blocks + * (typically half of fs_minfree) of the data area of each cylinder + * group to hold these later indirect blocks. + */ + inocg = ino_to_cg(fs, ip->i_number); + if (indx < 0) { + /* + * Our preference for indirect blocks is the zone at the + * beginning of the inode's cylinder group data area that + * we try to reserve for indirect blocks. + */ + pref = cgmeta(fs, inocg); + /* + * If we are allocating the first indirect block, try to + * place it immediately following the last direct block. + */ + if (indx == -1 && lbn < NDADDR + NINDIR(fs) && + ip->i_din1->di_db[NDADDR - 1] != 0) + pref = ip->i_din1->di_db[NDADDR - 1] + fs->fs_frag; + return (pref); + } + /* * If we are allocating the first data block in the first indirect - * block, try to place it immediately following the indirect block. + * block and the indirect has been allocated in the data block area, + * try to place it immediately following the indirect block. */ if (lbn == NDADDR) { - pref = ip->i_din1->di_db[NDADDR - 1]; - if (bap == NULL && pref != 0) - return (pref + fs->fs_frag); pref = ip->i_din1->di_ib[0]; - if (pref != 0) + if (pref != 0 && pref >= cgdata(fs, inocg) && + pref < cgbase(fs, inocg + 1)) return (pref + fs->fs_frag); } + /* + * If we are at the beginning of a file, or we have already allocated + * the maximum number of blocks per cylinder group, or we do not + * have a block allocated immediately preceeding us, then we need + * to decide where to start allocating new blocks. + */ if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { - if (lbn < NDADDR + NINDIR(fs)) { - cg = ino_to_cg(fs, ip->i_number); - return (cgbase(fs, cg) + fs->fs_frag); - } + /* + * If we are allocating a directory data block, we want + * to place it in the metadata area. + */ + if ((ip->i_mode & IFMT) == IFDIR) + return (cgmeta(fs, inocg)); + /* + * Until we fill all the direct and all the first indirect's + * blocks, we try to allocate in the data area of the inode's + * cylinder group. + */ + if (lbn < NDADDR + NINDIR(fs)) + return (cgdata(fs, inocg)); /* * Find a cylinder with greater than average number of * unused data blocks. */ if (indx == 0 || bap[indx - 1] == 0) - startcg = - ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; + startcg = inocg + lbn / fs->fs_maxbpg; else startcg = dtog(fs, bap[indx - 1]) + 1; startcg %= fs->fs_ncg; @@ -1258,17 +1310,17 @@ ffs_blkpref_ufs1(ip, lbn, indx, bap) for (cg = startcg; cg < fs->fs_ncg; cg++) if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { fs->fs_cgrotor = cg; - return (cgbase(fs, cg) + fs->fs_frag); + return (cgdata(fs, cg)); } for (cg = 0; cg <= startcg; cg++) if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { fs->fs_cgrotor = cg; - return (cgbase(fs, cg) + fs->fs_frag); + return (cgdata(fs, cg)); } return (0); } /* - * We just always try to lay things out contiguously. + * Otherwise, we just always try to lay things out contiguously. */ return (bap[indx - 1] + fs->fs_frag); } @@ -1284,39 +1336,78 @@ ffs_blkpref_ufs2(ip, lbn, indx, bap) ufs2_daddr_t *bap; { struct fs *fs; - u_int cg; + u_int cg, inocg; u_int avgbfree, startcg; ufs2_daddr_t pref; + KASSERT(indx <= 0 || bap != NULL, ("need non-NULL bap")); mtx_assert(UFS_MTX(ip->i_ump), MA_OWNED); fs = ip->i_fs; /* - * If we are allocating the first indirect block, try to place it - * immediately following the last direct block. - * + * Allocation of indirect blocks is indicated by passing negative + * values in indx: -1 for single indirect, -2 for double indirect, + * -3 for triple indirect. As noted below, we attempt to allocate + * the first indirect inline with the file data. For all later + * indirect blocks, the data is often allocated in other cylinder + * groups. However to speed random file access and to speed up + * fsck, the filesystem reserves the first fs_metaspace blocks + * (typically half of fs_minfree) of the data area of each cylinder + * group to hold these later indirect blocks. + */ + inocg = ino_to_cg(fs, ip->i_number); + if (indx < 0) { + /* + * Our preference for indirect blocks is the zone at the + * beginning of the inode's cylinder group data area that + * we try to reserve for indirect blocks. + */ + pref = cgmeta(fs, inocg); + /* + * If we are allocating the first indirect block, try to + * place it immediately following the last direct block. + */ + if (indx == -1 && lbn < NDADDR + NINDIR(fs) && + ip->i_din2->di_db[NDADDR - 1] != 0) + pref = ip->i_din2->di_db[NDADDR - 1] + fs->fs_frag; + return (pref); + } + /* * If we are allocating the first data block in the first indirect - * block, try to place it immediately following the indirect block. + * block and the indirect has been allocated in the data block area, + * try to place it immediately following the indirect block. */ if (lbn == NDADDR) { - pref = ip->i_din1->di_db[NDADDR - 1]; - if (bap == NULL && pref != 0) - return (pref + fs->fs_frag); - pref = ip->i_din1->di_ib[0]; - if (pref != 0) + pref = ip->i_din2->di_ib[0]; + if (pref != 0 && pref >= cgdata(fs, inocg) && + pref < cgbase(fs, inocg + 1)) return (pref + fs->fs_frag); } + /* + * If we are at the beginning of a file, or we have already allocated + * the maximum number of blocks per cylinder group, or we do not + * have a block allocated immediately preceeding us, then we need + * to decide where to start allocating new blocks. + */ if (indx % fs->fs_maxbpg == 0 || bap[indx - 1] == 0) { - if (lbn < NDADDR + NINDIR(fs)) { - cg = ino_to_cg(fs, ip->i_number); - return (cgbase(fs, cg) + fs->fs_frag); - } + /* + * If we are allocating a directory data block, we want + * to place it in the metadata area. + */ + if ((ip->i_mode & IFMT) == IFDIR) + return (cgmeta(fs, inocg)); + /* + * Until we fill all the direct and all the first indirect's + * blocks, we try to allocate in the data area of the inode's + * cylinder group. + */ + if (lbn < NDADDR + NINDIR(fs)) + return (cgdata(fs, inocg)); /* * Find a cylinder with greater than average number of * unused data blocks. */ if (indx == 0 || bap[indx - 1] == 0) - startcg = - ino_to_cg(fs, ip->i_number) + lbn / fs->fs_maxbpg; + startcg = inocg + lbn / fs->fs_maxbpg; else startcg = dtog(fs, bap[indx - 1]) + 1; startcg %= fs->fs_ncg; @@ -1324,17 +1415,17 @@ ffs_blkpref_ufs2(ip, lbn, indx, bap) for (cg = startcg; cg < fs->fs_ncg; cg++) if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { fs->fs_cgrotor = cg; - return (cgbase(fs, cg) + fs->fs_frag); + return (cgdata(fs, cg)); } for (cg = 0; cg <= startcg; cg++) if (fs->fs_cs(fs, cg).cs_nbfree >= avgbfree) { fs->fs_cgrotor = cg; - return (cgbase(fs, cg) + fs->fs_frag); + return (cgdata(fs, cg)); } return (0); } /* - * We just always try to lay things out contiguously. + * Otherwise, we just always try to lay things out contiguously. */ return (bap[indx - 1] + fs->fs_frag); } @@ -1611,31 +1702,37 @@ ffs_alloccgblk(ip, bp, bpref, size) ufs1_daddr_t bno; ufs2_daddr_t blkno; u_int8_t *blksfree; - int i; + int i, cgbpref; fs = ip->i_fs; ump = ip->i_ump; mtx_assert(UFS_MTX(ump), MA_OWNED); cgp = (struct cg *)bp->b_data; blksfree = cg_blksfree(cgp); - if (bpref == 0 || dtog(fs, bpref) != cgp->cg_cgx) { + if (bpref == 0) { bpref = cgp->cg_rotor; - } else { - bpref = blknum(fs, bpref); - bno = dtogd(fs, bpref); - /* - * if the requested block is available, use it - */ - if (ffs_isblock(fs, blksfree, fragstoblks(fs, bno))) - goto gotit; + } else if ((cgbpref = dtog(fs, bpref)) != cgp->cg_cgx) { + /* map bpref to correct zone in this cg */ + if (bpref < cgdata(fs, cgbpref)) + bpref = cgmeta(fs, cgp->cg_cgx); + else + bpref = cgdata(fs, cgp->cg_cgx); } /* + * if the requested block is available, use it + */ + bno = dtogd(fs, blknum(fs, bpref)); + if (ffs_isblock(fs, blksfree, fragstoblks(fs, bno))) + goto gotit; + /* * Take the next available block in this cylinder group. */ bno = ffs_mapsearch(fs, cgp, bpref, (int)fs->fs_frag); if (bno < 0) return (0); - cgp->cg_rotor = bno; + /* Update cg_rotor only if allocated from the data zone */ + if (bno >= dtogd(fs, cgdata(fs, cgp->cg_cgx))) + cgp->cg_rotor = bno; gotit: blkno = fragstoblks(fs, bno); ffs_clrblock(fs, blksfree, (long)blkno); @@ -1742,9 +1839,10 @@ ffs_clusteralloc(ip, cg, bpref, len, unused) * be recalled to try an allocation in the next cylinder group. */ if (dtog(fs, bpref) != cg) - bpref = 0; + bpref = cgdata(fs, cg); else - bpref = fragstoblks(fs, dtogd(fs, blknum(fs, bpref))); + bpref = blknum(fs, bpref); + bpref = fragstoblks(fs, dtogd(fs, bpref)); mapp = &cg_clustersfree(cgp)[bpref / NBBY]; map = *mapp++; bit = 1 << (bpref % NBBY); |