diff options
Diffstat (limited to 'sys/ufs/ffs/ffs_alloc.c')
-rw-r--r-- | sys/ufs/ffs/ffs_alloc.c | 73 |
1 files changed, 67 insertions, 6 deletions
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c index abe4073..789a7cf 100644 --- a/sys/ufs/ffs/ffs_alloc.c +++ b/sys/ufs/ffs/ffs_alloc.c @@ -1790,6 +1790,17 @@ fail: return (0); } +static inline struct buf * +getinobuf(struct inode *ip, u_int cg, u_int32_t cginoblk, int gbflags) +{ + struct fs *fs; + + fs = ip->i_fs; + return (getblk(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs, + cg * fs->fs_ipg + cginoblk)), (int)fs->fs_bsize, 0, 0, + gbflags)); +} + /* * Determine whether an inode can be allocated. * @@ -1814,9 +1825,11 @@ ffs_nodealloccg(ip, cg, ipref, mode, unused) u_int8_t *inosused, *loc; struct ufs2_dinode *dp2; int error, start, len, i; + u_int32_t old_initediblk; fs = ip->i_fs; ump = ip->i_ump; +check_nifree: if (fs->fs_cs(fs, cg).cs_nifree == 0) return (0); UFS_UNLOCK(ump); @@ -1828,13 +1841,13 @@ ffs_nodealloccg(ip, cg, ipref, mode, unused) return (0); } cgp = (struct cg *)bp->b_data; +restart: if (!cg_chkmagic(cgp) || cgp->cg_cs.cs_nifree == 0) { brelse(bp); UFS_LOCK(ump); return (0); } bp->b_xflags |= BX_BKGRDWRITE; - cgp->cg_old_time = cgp->cg_time = time_second; inosused = cg_inosused(cgp); if (ipref) { ipref %= fs->fs_ipg; @@ -1856,7 +1869,6 @@ ffs_nodealloccg(ip, cg, ipref, mode, unused) } } ipref = (loc - inosused) * NBBY + ffs(~*loc) - 1; - cgp->cg_irotor = ipref; gotit: /* * Check to see if we need to initialize more inodes. @@ -1864,9 +1876,37 @@ gotit: if (fs->fs_magic == FS_UFS2_MAGIC && ipref + INOPB(fs) > cgp->cg_initediblk && cgp->cg_initediblk < cgp->cg_niblk) { - ibp = getblk(ip->i_devvp, fsbtodb(fs, - ino_to_fsba(fs, cg * fs->fs_ipg + cgp->cg_initediblk)), - (int)fs->fs_bsize, 0, 0, 0); + old_initediblk = cgp->cg_initediblk; + + /* + * Free the cylinder group lock before writing the + * initialized inode block. Entering the + * babarrierwrite() with the cylinder group lock + * causes lock order violation between the lock and + * snaplk. + * + * Another thread can decide to initialize the same + * inode block, but whichever thread first gets the + * cylinder group lock after writing the newly + * allocated inode block will update it and the other + * will realize that it has lost and leave the + * cylinder group unchanged. + */ + ibp = getinobuf(ip, cg, old_initediblk, GB_LOCK_NOWAIT); + brelse(bp); + if (ibp == NULL) { + /* + * The inode block buffer is already owned by + * another thread, which must initialize it. + * Wait on the buffer to allow another thread + * to finish the updates, with dropped cg + * buffer lock, then retry. + */ + ibp = getinobuf(ip, cg, old_initediblk, 0); + brelse(ibp); + UFS_LOCK(ump); + goto check_nifree; + } bzero(ibp->b_data, (int)fs->fs_bsize); dp2 = (struct ufs2_dinode *)(ibp->b_data); for (i = 0; i < INOPB(fs); i++) { @@ -1883,8 +1923,29 @@ gotit: * loading of newly created filesystems. */ babarrierwrite(ibp); - cgp->cg_initediblk += INOPB(fs); + + /* + * After the inode block is written, try to update the + * cg initediblk pointer. If another thread beat us + * to it, then leave it unchanged as the other thread + * has already set it correctly. + */ + error = bread(ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), + (int)fs->fs_cgsize, NOCRED, &bp); + UFS_LOCK(ump); + ACTIVECLEAR(fs, cg); + UFS_UNLOCK(ump); + if (error != 0) { + brelse(bp); + return (error); + } + cgp = (struct cg *)bp->b_data; + if (cgp->cg_initediblk == old_initediblk) + cgp->cg_initediblk += INOPB(fs); + goto restart; } + cgp->cg_old_time = cgp->cg_time = time_second; + cgp->cg_irotor = ipref; UFS_LOCK(ump); ACTIVECLEAR(fs, cg); setbit(inosused, ipref); |