diff options
Diffstat (limited to 'sys/ufs')
-rw-r--r-- | sys/ufs/ffs/ffs_alloc.c | 193 | ||||
-rw-r--r-- | sys/ufs/ffs/ffs_balloc.c | 28 | ||||
-rw-r--r-- | sys/ufs/ffs/ffs_extern.h | 9 | ||||
-rw-r--r-- | sys/ufs/ffs/ffs_inode.c | 4 | ||||
-rw-r--r-- | sys/ufs/ffs/ffs_snapshot.c | 3 | ||||
-rw-r--r-- | sys/ufs/ffs/ffs_softdep.c | 311 | ||||
-rw-r--r-- | sys/ufs/ffs/ffs_vfsops.c | 160 | ||||
-rw-r--r-- | sys/ufs/ffs/fs.h | 4 | ||||
-rw-r--r-- | sys/ufs/ffs/softdep.h | 3 | ||||
-rw-r--r-- | sys/ufs/ufs/ufs_inode.c | 9 | ||||
-rw-r--r-- | sys/ufs/ufs/ufsmount.h | 1 |
11 files changed, 571 insertions, 154 deletions
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c index 7d7866c..82506fb 100644 --- a/sys/ufs/ffs/ffs_alloc.c +++ b/sys/ufs/ffs/ffs_alloc.c @@ -1829,7 +1829,7 @@ gotit: } UFS_UNLOCK(ump); if (DOINGSOFTDEP(ITOV(ip))) - softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref); + softdep_setup_inomapdep(bp, ip, cg * fs->fs_ipg + ipref, mode); bdwrite(bp); if (ibp != NULL) bawrite(ibp); @@ -2038,7 +2038,11 @@ ffs_blkfree(ump, fs, devvp, bno, size, inum, vtype, dephd) ffs_snapblkfree(fs, devvp, bno, size, inum, vtype, dephd)) { return; } - if (!ump->um_candelete) { + /* + * Nothing to delay if TRIM is disabled, or the operation is + * performed on the snapshot. + */ + if (!ump->um_candelete || devvp->v_type == VREG) { ffs_blkfree_cg(ump, fs, devvp, bno, size, inum, dephd); return; } @@ -2377,6 +2381,18 @@ ffs_fserr(fs, inum, cp) * in the current directory is oldvalue then change it to newvalue. * unlink(nameptr, oldvalue) - Verify that the inode number associated * with nameptr in the current directory is oldvalue then unlink it. + * + * The following functions may only be used on a quiescent filesystem + * by the soft updates journal. They are not safe to be run on an active + * filesystem. + * + * setinode(inode, dip) - the specified disk inode is replaced with the + * contents pointed to by dip. + * setbufoutput(fd, flags) - output associated with the specified file + * descriptor (which must reference the character device supporting + * the filesystem) switches from using physio to running through the + * buffer cache when flags is set to 1. The descriptor reverts to + * physio for output when flags is set to zero. */ static int sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS); @@ -2423,11 +2439,21 @@ static SYSCTL_NODE(_vfs_ffs, FFS_SET_DOTDOT, setdotdot, CTLFLAG_WR, static SYSCTL_NODE(_vfs_ffs, FFS_UNLINK, unlink, CTLFLAG_WR, sysctl_ffs_fsck, "Unlink a Duplicate Name"); +static SYSCTL_NODE(_vfs_ffs, FFS_SET_INODE, setinode, CTLFLAG_WR, + sysctl_ffs_fsck, "Update an On-Disk Inode"); + +static SYSCTL_NODE(_vfs_ffs, FFS_SET_BUFOUTPUT, setbufoutput, CTLFLAG_WR, + sysctl_ffs_fsck, "Set Buffered Writing for Descriptor"); + +#define DEBUG 1 #ifdef DEBUG -static int fsckcmds = 0; +static int fsckcmds = 1; SYSCTL_INT(_debug, OID_AUTO, fsckcmds, CTLFLAG_RW, &fsckcmds, 0, ""); #endif /* DEBUG */ +static int buffered_write(struct file *, struct uio *, struct ucred *, + int, struct thread *); + static int sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS) { @@ -2441,8 +2467,10 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS) ufs2_daddr_t blkno; long blkcnt, blksize; struct filedesc *fdp; - struct file *fp; + struct file *fp, *vfp; int vfslocked, filetype, error; + static struct fileops *origops, bufferedops; + static int outcnt = 0; if (req->newlen > sizeof cmd) return (EBADRPC); @@ -2450,7 +2478,7 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS) return (error); if (cmd.version != FFS_CMD_VERSION) return (ERPCMISMATCH); - if ((error = getvnode(curproc->p_fd, cmd.handle, &fp)) != 0) + if ((error = getvnode(td->td_proc->p_fd, cmd.handle, &fp)) != 0) return (error); vp = fp->f_data; if (vp->v_type != VREG && vp->v_type != VDIR) { @@ -2463,12 +2491,13 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS) fdrop(fp, td); return (EINVAL); } - if (mp->mnt_flag & MNT_RDONLY) { + ump = VFSTOUFS(mp); + if ((mp->mnt_flag & MNT_RDONLY) && + ump->um_fsckpid != td->td_proc->p_pid) { vn_finished_write(mp); fdrop(fp, td); return (EROFS); } - ump = VFSTOUFS(mp); fs = ump->um_fs; filetype = IFREG; @@ -2489,7 +2518,7 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS) case FFS_ADJ_REFCNT: #ifdef DEBUG if (fsckcmds) { - printf("%s: adjust inode %jd count by %jd\n", + printf("%s: adjust inode %jd link count by %jd\n", mp->mnt_stat.f_mntonname, (intmax_t)cmd.value, (intmax_t)cmd.size); } @@ -2500,7 +2529,8 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS) ip->i_nlink += cmd.size; DIP_SET(ip, i_nlink, ip->i_nlink); ip->i_effnlink += cmd.size; - ip->i_flag |= IN_CHANGE; + ip->i_flag |= IN_CHANGE | IN_MODIFIED; + error = ffs_update(vp, 1); if (DOINGSOFTDEP(vp)) softdep_change_linkcnt(ip); vput(vp); @@ -2518,7 +2548,8 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS) break; ip = VTOI(vp); DIP_SET(ip, i_blocks, DIP(ip, i_blocks) + cmd.size); - ip->i_flag |= IN_CHANGE; + ip->i_flag |= IN_CHANGE | IN_MODIFIED; + error = ffs_update(vp, 1); vput(vp); break; @@ -2718,6 +2749,78 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS) UIO_USERSPACE, (ino_t)cmd.size); break; + case FFS_SET_INODE: + if (ump->um_fsckpid != td->td_proc->p_pid) { + error = EPERM; + break; + } +#ifdef DEBUG + if (fsckcmds && outcnt++ < 100) { + printf("%s: update inode %jd\n", + mp->mnt_stat.f_mntonname, (intmax_t)cmd.value); + } +#endif /* DEBUG */ + if ((error = ffs_vget(mp, (ino_t)cmd.value, LK_EXCLUSIVE, &vp))) + break; + vfslocked = VFS_LOCK_GIANT(vp->v_mount); + AUDIT_ARG_VNODE1(vp); + ip = VTOI(vp); + if (ip->i_ump->um_fstype == UFS1) + error = copyin((void *)(intptr_t)cmd.size, ip->i_din1, + sizeof(struct ufs1_dinode)); + else + error = copyin((void *)(intptr_t)cmd.size, ip->i_din2, + sizeof(struct ufs2_dinode)); + if (error) { + vput(vp); + VFS_UNLOCK_GIANT(vfslocked); + break; + } + ip->i_flag |= IN_CHANGE | IN_MODIFIED; + error = ffs_update(vp, 1); + vput(vp); + VFS_UNLOCK_GIANT(vfslocked); + break; + + case FFS_SET_BUFOUTPUT: + if (ump->um_fsckpid != td->td_proc->p_pid) { + error = EPERM; + break; + } + if (VTOI(vp)->i_ump != ump) { + error = EINVAL; + break; + } +#ifdef DEBUG + if (fsckcmds) { + printf("%s: %s buffered output for descriptor %jd\n", + mp->mnt_stat.f_mntonname, + cmd.size == 1 ? "enable" : "disable", + (intmax_t)cmd.value); + } +#endif /* DEBUG */ + if ((error = getvnode(td->td_proc->p_fd, cmd.value, &vfp)) != 0) + break; + if (vfp->f_vnode->v_type != VCHR) { + fdrop(vfp, td); + error = EINVAL; + break; + } + if (origops == NULL) { + origops = vfp->f_ops; + bcopy((void *)origops, (void *)&bufferedops, + sizeof(bufferedops)); + bufferedops.fo_write = buffered_write; + } + if (cmd.size == 1) + atomic_store_rel_ptr((volatile uintptr_t *)&vfp->f_ops, + (uintptr_t)&bufferedops); + else + atomic_store_rel_ptr((volatile uintptr_t *)&vfp->f_ops, + (uintptr_t)origops); + fdrop(vfp, td); + break; + default: #ifdef DEBUG if (fsckcmds) { @@ -2733,3 +2836,73 @@ sysctl_ffs_fsck(SYSCTL_HANDLER_ARGS) vn_finished_write(mp); return (error); } + +/* + * Function to switch a descriptor to use the buffer cache to stage + * its I/O. This is needed so that writes to the filesystem device + * will give snapshots a chance to copy modified blocks for which it + * needs to retain copies. + */ +static int +buffered_write(fp, uio, active_cred, flags, td) + struct file *fp; + struct uio *uio; + struct ucred *active_cred; + int flags; + struct thread *td; +{ + struct vnode *devvp; + struct inode *ip; + struct buf *bp; + struct fs *fs; + int error, vfslocked; + daddr_t lbn; + static int outcnt = 0; + + /* + * The devvp is associated with the /dev filesystem. To discover + * the filesystem with which the device is associated, we depend + * on the application setting the current directory to a location + * within the filesystem being written. Yes, this is an ugly hack. + */ + devvp = fp->f_vnode; + ip = VTOI(td->td_proc->p_fd->fd_cdir); + if (ip->i_devvp != devvp) + return (EINVAL); + fs = ip->i_fs; + vfslocked = VFS_LOCK_GIANT(ip->i_vnode->v_mount); + vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY); + if ((flags & FOF_OFFSET) == 0) + uio->uio_offset = fp->f_offset; +#ifdef DEBUG + if (fsckcmds && outcnt++ < 100) { + printf("%s: buffered write for block %jd\n", + fs->fs_fsmnt, (intmax_t)btodb(uio->uio_offset)); + } +#endif /* DEBUG */ + /* + * All I/O must be contained within a filesystem block, start on + * a fragment boundary, and be a multiple of fragments in length. + */ + if (uio->uio_resid > fs->fs_bsize - (uio->uio_offset % fs->fs_bsize) || + fragoff(fs, uio->uio_offset) != 0 || + fragoff(fs, uio->uio_resid) != 0) { + error = EINVAL; + goto out; + } + lbn = numfrags(fs, uio->uio_offset); + bp = getblk(devvp, lbn, uio->uio_resid, 0, 0, 0); + bp->b_flags |= B_RELBUF; + if ((error = uiomove((char *)bp->b_data, uio->uio_resid, uio)) != 0) { + brelse(bp); + goto out; + } + error = bwrite(bp); + if ((flags & FOF_OFFSET) == 0) + fp->f_offset = uio->uio_offset; + fp->f_nextoff = uio->uio_offset; +out: + VOP_UNLOCK(devvp, 0); + VFS_UNLOCK_GIANT(vfslocked); + return (error); +} diff --git a/sys/ufs/ffs/ffs_balloc.c b/sys/ufs/ffs/ffs_balloc.c index 63a4eba..f6b078f 100644 --- a/sys/ufs/ffs/ffs_balloc.c +++ b/sys/ufs/ffs/ffs_balloc.c @@ -234,9 +234,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size, if (num < 1) panic ("ffs_balloc_ufs1: ufs_getlbns returned indirect block"); #endif - saved_inbdflush = ~TDP_INBDFLUSH | (curthread->td_pflags & - TDP_INBDFLUSH); - curthread->td_pflags |= TDP_INBDFLUSH; + saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH); /* * Fetch the first indirect block allocating if necessary. */ @@ -250,7 +248,7 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size, pref = ffs_blkpref_ufs1(ip, lbn, 0, (ufs1_daddr_t *)0); if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred, &newb)) != 0) { - curthread->td_pflags &= saved_inbdflush; + curthread_pflags_restore(saved_inbdflush); return (error); } nb = newb; @@ -356,7 +354,7 @@ retry: * If asked only for the indirect block, then return it. */ if (flags & BA_METAONLY) { - curthread->td_pflags &= saved_inbdflush; + curthread_pflags_restore(saved_inbdflush); *bpp = bp; return (0); } @@ -406,7 +404,7 @@ retry: bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } - curthread->td_pflags &= saved_inbdflush; + curthread_pflags_restore(saved_inbdflush); *bpp = nbp; return (0); } @@ -428,11 +426,11 @@ retry: nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); } - curthread->td_pflags &= saved_inbdflush; + curthread_pflags_restore(saved_inbdflush); *bpp = nbp; return (0); fail: - curthread->td_pflags &= saved_inbdflush; + curthread_pflags_restore(saved_inbdflush); /* * If we have failed to allocate any blocks, simply return the error. * This is the usual case and avoids the need to fsync the file. @@ -774,9 +772,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size, if (num < 1) panic ("ffs_balloc_ufs2: ufs_getlbns returned indirect block"); #endif - saved_inbdflush = ~TDP_INBDFLUSH | (curthread->td_pflags & - TDP_INBDFLUSH); - curthread->td_pflags |= TDP_INBDFLUSH; + saved_inbdflush = curthread_pflags_set(TDP_INBDFLUSH); /* * Fetch the first indirect block allocating if necessary. */ @@ -790,7 +786,7 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size, pref = ffs_blkpref_ufs2(ip, lbn, 0, (ufs2_daddr_t *)0); if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, flags, cred, &newb)) != 0) { - curthread->td_pflags &= saved_inbdflush; + curthread_pflags_restore(saved_inbdflush); return (error); } nb = newb; @@ -896,7 +892,7 @@ retry: * If asked only for the indirect block, then return it. */ if (flags & BA_METAONLY) { - curthread->td_pflags &= saved_inbdflush; + curthread_pflags_restore(saved_inbdflush); *bpp = bp; return (0); } @@ -946,7 +942,7 @@ retry: bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } - curthread->td_pflags &= saved_inbdflush; + curthread_pflags_restore(saved_inbdflush); *bpp = nbp; return (0); } @@ -974,11 +970,11 @@ retry: nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0, 0); nbp->b_blkno = fsbtodb(fs, nb); } - curthread->td_pflags &= saved_inbdflush; + curthread_pflags_restore(saved_inbdflush); *bpp = nbp; return (0); fail: - curthread->td_pflags &= saved_inbdflush; + curthread_pflags_restore(saved_inbdflush); /* * If we have failed to allocate any blocks, simply return the error. * This is the usual case and avoids the need to fsync the file. diff --git a/sys/ufs/ffs/ffs_extern.h b/sys/ufs/ffs/ffs_extern.h index fb1b1fb..70bcf1d 100644 --- a/sys/ufs/ffs/ffs_extern.h +++ b/sys/ufs/ffs/ffs_extern.h @@ -33,7 +33,10 @@ #ifndef _UFS_FFS_EXTERN_H #define _UFS_FFS_EXTERN_H -enum vtype; +#ifndef _KERNEL +#error "No user-serving parts inside" +#else + struct buf; struct cg; struct fid; @@ -127,7 +130,7 @@ void softdep_freefile(struct vnode *, ino_t, int); int softdep_request_cleanup(struct fs *, struct vnode *, struct ucred *, int); void softdep_setup_freeblocks(struct inode *, off_t, int); -void softdep_setup_inomapdep(struct buf *, struct inode *, ino_t); +void softdep_setup_inomapdep(struct buf *, struct inode *, ino_t, int); void softdep_setup_blkmapdep(struct buf *, struct mount *, ufs2_daddr_t, int, int); void softdep_setup_allocdirect(struct inode *, ufs_lbn_t, ufs2_daddr_t, @@ -168,7 +171,6 @@ void softdep_freework(struct workhead *); int ffs_rdonly(struct inode *); -#ifdef _KERNEL TAILQ_HEAD(snaphead, inode); struct snapdata { @@ -178,6 +180,7 @@ struct snapdata { daddr_t *sn_blklist; struct lock sn_lock; }; + #endif /* _KERNEL */ #endif /* !_UFS_FFS_EXTERN_H */ diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c index a7b43e2..0034029 100644 --- a/sys/ufs/ffs/ffs_inode.c +++ b/sys/ufs/ffs/ffs_inode.c @@ -90,7 +90,7 @@ ffs_update(vp, waitfor) return (0); ip->i_flag &= ~(IN_LAZYACCESS | IN_LAZYMOD | IN_MODIFIED); fs = ip->i_fs; - if (fs->fs_ronly) + if (fs->fs_ronly && ip->i_ump->um_fsckpid == 0) return (0); error = bread(ip->i_devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)), (int)fs->fs_bsize, NOCRED, &bp); @@ -128,7 +128,7 @@ ffs_pages_remove(struct vnode *vp, vm_pindex_t start, vm_pindex_t end) if ((object = vp->v_object) == NULL) return; VM_OBJECT_LOCK(object); - vm_object_page_remove(object, start, end, FALSE); + vm_object_page_remove(object, start, end, 0); VM_OBJECT_UNLOCK(object); } diff --git a/sys/ufs/ffs/ffs_snapshot.c b/sys/ufs/ffs/ffs_snapshot.c index 8d236bd..c8dd4c6 100644 --- a/sys/ufs/ffs/ffs_snapshot.c +++ b/sys/ufs/ffs/ffs_snapshot.c @@ -1918,7 +1918,8 @@ retry: if (savedcbp) { vp = savedcbp->b_vp; bawrite(savedcbp); - if ((vtype == VDIR || dopersistence) && ip->i_effnlink > 0) + if ((vtype == VDIR || dopersistence) && + VTOI(vp)->i_effnlink > 0) (void) ffs_syncvnode(vp, MNT_WAIT); } /* diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c index 3734a5d..fccb296 100644 --- a/sys/ufs/ffs/ffs_softdep.c +++ b/sys/ufs/ffs/ffs_softdep.c @@ -142,10 +142,11 @@ softdep_setup_sbupdate(ump, fs, bp) } void -softdep_setup_inomapdep(bp, ip, newinum) +softdep_setup_inomapdep(bp, ip, newinum, mode) struct buf *bp; struct inode *ip; ino_t newinum; + int mode; { panic("softdep_setup_inomapdep called"); @@ -789,6 +790,8 @@ static void diradd_inode_written(struct diradd *, struct inodedep *); static int handle_written_indirdep(struct indirdep *, struct buf *, struct buf**); static int handle_written_inodeblock(struct inodedep *, struct buf *); +static int jnewblk_rollforward(struct jnewblk *, struct fs *, struct cg *, + uint8_t *); static int handle_written_bmsafemap(struct bmsafemap *, struct buf *); static void handle_written_jaddref(struct jaddref *); static void handle_written_jremref(struct jremref *); @@ -820,6 +823,8 @@ static void handle_allocindir_partdone(struct allocindir *); static void initiate_write_filepage(struct pagedep *, struct buf *); static void initiate_write_indirdep(struct indirdep*, struct buf *); static void handle_written_mkdir(struct mkdir *, int); +static int jnewblk_rollback(struct jnewblk *, struct fs *, struct cg *, + uint8_t *); static void initiate_write_bmsafemap(struct bmsafemap *, struct buf *); static void initiate_write_inodeblock_ufs1(struct inodedep *, struct buf *); static void initiate_write_inodeblock_ufs2(struct inodedep *, struct buf *); @@ -875,6 +880,7 @@ static inline void setup_freeext(struct freeblks *, struct inode *, int, int); static inline void setup_freeindir(struct freeblks *, struct inode *, int, ufs_lbn_t, int); static inline struct freeblks *newfreeblks(struct mount *, struct inode *); +static void freeblks_free(struct ufsmount *, struct freeblks *, int); static void indir_trunc(struct freework *, ufs2_daddr_t, ufs_lbn_t); ufs2_daddr_t blkcount(struct fs *, ufs2_daddr_t, off_t); static int trunc_check_buf(struct buf *, int *, ufs_lbn_t, int, int); @@ -935,6 +941,7 @@ static void wake_worklist(struct worklist *); static void wait_worklist(struct worklist *, char *); static void remove_from_worklist(struct worklist *); static void softdep_flush(void); +static void softdep_flushjournal(struct mount *); static int softdep_speedup(void); static void worklist_speedup(void); static int journal_mount(struct mount *, struct fs *, struct ucred *); @@ -3046,6 +3053,25 @@ jfsync_write(jfsync, jseg, data) rec->jt_extsize = jfsync->jfs_extsize; } +static void +softdep_flushjournal(mp) + struct mount *mp; +{ + struct jblocks *jblocks; + struct ufsmount *ump; + + if ((mp->mnt_kern_flag & MNTK_SUJ) == 0) + return; + ump = VFSTOUFS(mp); + jblocks = ump->softdep_jblocks; + ACQUIRE_LOCK(&lk); + while (ump->softdep_on_journal) { + jblocks->jb_needseg = 1; + softdep_process_journal(mp, NULL, MNT_WAIT); + } + FREE_LOCK(&lk); +} + /* * Flush some journal records to disk. */ @@ -4310,7 +4336,6 @@ softdep_setup_create(dp, ip) inoreflst); KASSERT(jaddref != NULL && jaddref->ja_parent == dp->i_number, ("softdep_setup_create: No addref structure present.")); - jaddref->ja_mode = ip->i_mode; } softdep_prelink(dvp, NULL); FREE_LOCK(&lk); @@ -4417,7 +4442,6 @@ softdep_setup_mkdir(dp, ip) KASSERT(jaddref->ja_parent == dp->i_number, ("softdep_setup_mkdir: bad parent %d", jaddref->ja_parent)); - jaddref->ja_mode = ip->i_mode; TAILQ_INSERT_BEFORE(&jaddref->ja_ref, &dotaddref->ja_ref, if_deps); } @@ -4637,10 +4661,11 @@ softdep_revert_rmdir(dp, ip) * Called just after updating the cylinder group block to allocate an inode. */ void -softdep_setup_inomapdep(bp, ip, newinum) +softdep_setup_inomapdep(bp, ip, newinum, mode) struct buf *bp; /* buffer for cylgroup block with inode map */ struct inode *ip; /* inode related to allocation */ ino_t newinum; /* new inode number being allocated */ + int mode; { struct inodedep *inodedep; struct bmsafemap *bmsafemap; @@ -4657,7 +4682,7 @@ softdep_setup_inomapdep(bp, ip, newinum) * can be dependent on it. */ if (mp->mnt_kern_flag & MNTK_SUJ) { - jaddref = newjaddref(ip, newinum, 0, 0, 0); + jaddref = newjaddref(ip, newinum, 0, 0, mode); jaddref->ja_state |= NEWBLOCK; } @@ -5014,14 +5039,12 @@ jnewblk_merge(new, old, wkhd) if (jnewblk->jn_blkno != njnewblk->jn_blkno) panic("jnewblk_merge: Merging disparate blocks."); /* - * The record may be rolled back in the cg update bits - * appropriately. NEWBLOCK here alerts the cg rollback code - * that the frag bits have changed. + * The record may be rolled back in the cg. */ if (jnewblk->jn_state & UNDONE) { - njnewblk->jn_state |= UNDONE | NEWBLOCK; - njnewblk->jn_state &= ~ATTACHED; jnewblk->jn_state &= ~UNDONE; + njnewblk->jn_state |= UNDONE; + njnewblk->jn_state &= ~ATTACHED; } /* * We modify the newer addref and free the older so that if neither @@ -5176,7 +5199,7 @@ newfreefrag(ip, blkno, size, lbn) freefrag->ff_blkno = blkno; freefrag->ff_fragsize = size; - if (fs->fs_flags & FS_SUJ) { + if ((ip->i_ump->um_mountp->mnt_kern_flag & MNTK_SUJ) != 0) { freefrag->ff_jdep = (struct worklist *) newjfreefrag(freefrag, ip, blkno, size, lbn); } else { @@ -5729,7 +5752,6 @@ newfreeblks(mp, ip) freeblks->fb_modrev = DIP(ip, i_modrev); freeblks->fb_devvp = ip->i_devvp; freeblks->fb_chkcnt = 0; - freeblks->fb_freecnt = 0; freeblks->fb_len = 0; return (freeblks); @@ -6177,7 +6199,7 @@ softdep_journal_freeblocks(ip, cred, length, flags) quotaref(vp, freeblks->fb_quota); (void) chkdq(ip, -datablocks, NOCRED, 0); #endif - freeblks->fb_chkcnt = datablocks; + freeblks->fb_chkcnt = -datablocks; UFS_LOCK(ip->i_ump); fs->fs_pendingblocks += datablocks; UFS_UNLOCK(ip->i_ump); @@ -6407,7 +6429,7 @@ softdep_setup_freeblocks(ip, length, flags) quotaref(vp, freeblks->fb_quota); (void) chkdq(ip, -datablocks, NOCRED, 0); #endif - freeblks->fb_chkcnt = datablocks; + freeblks->fb_chkcnt = -datablocks; UFS_LOCK(ip->i_ump); fs->fs_pendingblocks += datablocks; UFS_UNLOCK(ip->i_ump); @@ -7262,8 +7284,8 @@ freework_freeblock(freework) freeblks->fb_cgwait++; WORKLIST_INSERT(&wkhd, &freework->fw_list); } - freeblks->fb_freecnt += btodb(bsize); FREE_LOCK(&lk); + freeblks_free(ump, freeblks, btodb(bsize)); ffs_blkfree(ump, fs, freeblks->fb_devvp, freework->fw_blkno, bsize, freeblks->fb_inum, freeblks->fb_vtype, &wkhd); ACQUIRE_LOCK(&lk); @@ -7437,6 +7459,33 @@ handle_workitem_freeblocks(freeblks, flags) } /* + * Handle completion of block free via truncate. This allows fs_pending + * to track the actual free block count more closely than if we only updated + * it at the end. We must be careful to handle cases where the block count + * on free was incorrect. + */ +static void +freeblks_free(ump, freeblks, blocks) + struct ufsmount *ump; + struct freeblks *freeblks; + int blocks; +{ + struct fs *fs; + ufs2_daddr_t remain; + + UFS_LOCK(ump); + remain = -freeblks->fb_chkcnt; + freeblks->fb_chkcnt += blocks; + if (remain > 0) { + if (remain < blocks) + blocks = remain; + fs = ump->um_fs; + fs->fs_pendingblocks -= blocks; + } + UFS_UNLOCK(ump); +} + +/* * Once all of the freework workitems are complete we can retire the * freeblocks dependency and any journal work awaiting completion. This * can not be called until all other dependencies are stable on disk. @@ -7456,7 +7505,7 @@ handle_complete_freeblocks(freeblks, flags) ump = VFSTOUFS(freeblks->fb_list.wk_mp); fs = ump->um_fs; flags = LK_EXCLUSIVE | flags; - spare = freeblks->fb_freecnt - freeblks->fb_chkcnt; + spare = freeblks->fb_chkcnt; /* * If we did not release the expected number of blocks we may have @@ -7479,9 +7528,9 @@ handle_complete_freeblocks(freeblks, flags) } vput(vp); } - if (freeblks->fb_chkcnt) { + if (spare < 0) { UFS_LOCK(ump); - fs->fs_pendingblocks -= freeblks->fb_chkcnt; + fs->fs_pendingblocks += spare; UFS_UNLOCK(ump); } #ifdef QUOTA @@ -7537,7 +7586,7 @@ indir_trunc(freework, dbn, lbn) ufs2_daddr_t nb, nnb, *bap2 = 0; ufs_lbn_t lbnadd, nlbn; int i, nblocks, ufs1fmt; - int fs_pendingblocks; + int freedblocks; int goingaway; int freedeps; int needj; @@ -7679,16 +7728,18 @@ indir_trunc(freework, dbn, lbn) bp->b_flags |= B_INVAL | B_NOCACHE; brelse(bp); } - fs_pendingblocks = 0; + freedblocks = 0; if (level == 0) - fs_pendingblocks = (nblocks * cnt); + freedblocks = (nblocks * cnt); + if (needj == 0) + freedblocks += nblocks; + freeblks_free(ump, freeblks, freedblocks); /* * If we are journaling set up the ref counts and offset so this * indirect can be completed when its children are free. */ if (needj) { ACQUIRE_LOCK(&lk); - freeblks->fb_freecnt += fs_pendingblocks; freework->fw_off = i; freework->fw_ref += freedeps; freework->fw_ref -= NINDIR(fs) + 1; @@ -7702,12 +7753,10 @@ indir_trunc(freework, dbn, lbn) /* * If we're not journaling we can free the indirect now. */ - fs_pendingblocks += nblocks; dbn = dbtofsb(fs, dbn); ffs_blkfree(ump, fs, freeblks->fb_devvp, dbn, fs->fs_bsize, freeblks->fb_inum, freeblks->fb_vtype, NULL); /* Non SUJ softdep does single-threaded truncations. */ - freeblks->fb_freecnt += fs_pendingblocks; if (freework->fw_blkno == dbn) { freework->fw_state |= ALLCOMPLETE; ACQUIRE_LOCK(&lk); @@ -8879,7 +8928,7 @@ softdep_setup_sbupdate(ump, fs, bp) struct sbdep *sbdep; struct worklist *wk; - if ((fs->fs_flags & FS_SUJ) == 0) + if ((ump->um_mountp->mnt_kern_flag & MNTK_SUJ) == 0) return; LIST_FOREACH(wk, &bp->b_dep, wk_list) if (wk->wk_type == D_SBDEP) @@ -10233,6 +10282,70 @@ softdep_setup_blkfree(mp, bp, blkno, frags, wkhd) FREE_LOCK(&lk); } +/* + * Revert a block allocation when the journal record that describes it + * is not yet written. + */ +int +jnewblk_rollback(jnewblk, fs, cgp, blksfree) + struct jnewblk *jnewblk; + struct fs *fs; + struct cg *cgp; + uint8_t *blksfree; +{ + ufs1_daddr_t fragno; + long cgbno, bbase; + int frags, blk; + int i; + + frags = 0; + cgbno = dtogd(fs, jnewblk->jn_blkno); + /* + * We have to test which frags need to be rolled back. We may + * be operating on a stale copy when doing background writes. + */ + for (i = jnewblk->jn_oldfrags; i < jnewblk->jn_frags; i++) + if (isclr(blksfree, cgbno + i)) + frags++; + if (frags == 0) + return (0); + /* + * This is mostly ffs_blkfree() sans some validation and + * superblock updates. + */ + if (frags == fs->fs_frag) { + fragno = fragstoblks(fs, cgbno); + ffs_setblock(fs, blksfree, fragno); + ffs_clusteracct(fs, cgp, fragno, 1); + cgp->cg_cs.cs_nbfree++; + } else { + cgbno += jnewblk->jn_oldfrags; + bbase = cgbno - fragnum(fs, cgbno); + /* Decrement the old frags. */ + blk = blkmap(fs, blksfree, bbase); + ffs_fragacct(fs, blk, cgp->cg_frsum, -1); + /* Deallocate the fragment */ + for (i = 0; i < frags; i++) + setbit(blksfree, cgbno + i); + cgp->cg_cs.cs_nffree += frags; + /* Add back in counts associated with the new frags */ + blk = blkmap(fs, blksfree, bbase); + ffs_fragacct(fs, blk, cgp->cg_frsum, 1); + /* If a complete block has been reassembled, account for it. */ + fragno = fragstoblks(fs, bbase); + if (ffs_isblock(fs, blksfree, fragno)) { + cgp->cg_cs.cs_nffree -= fs->fs_frag; + ffs_clusteracct(fs, cgp, fragno, 1); + cgp->cg_cs.cs_nbfree++; + } + } + stat_jnewblk++; + jnewblk->jn_state &= ~ATTACHED; + jnewblk->jn_state |= UNDONE; + + return (frags); +} + static void initiate_write_bmsafemap(bmsafemap, bp) struct bmsafemap *bmsafemap; @@ -10244,10 +10357,7 @@ initiate_write_bmsafemap(bmsafemap, bp) uint8_t *blksfree; struct cg *cgp; struct fs *fs; - int cleared; ino_t ino; - long bno; - int i; if (bmsafemap->sm_state & IOSTARTED) panic("initiate_write_bmsafemap: Already started\n"); @@ -10286,25 +10396,9 @@ initiate_write_bmsafemap(bmsafemap, bp) fs = VFSTOUFS(bmsafemap->sm_list.wk_mp)->um_fs; blksfree = cg_blksfree(cgp); LIST_FOREACH(jnewblk, &bmsafemap->sm_jnewblkhd, jn_deps) { - bno = dtogd(fs, jnewblk->jn_blkno); - cleared = 0; - for (i = jnewblk->jn_oldfrags; i < jnewblk->jn_frags; - i++) { - if (isclr(blksfree, bno + i)) { - cleared = 1; - setbit(blksfree, bno + i); - } - } - /* - * We may not clear the block if it's a background - * copy. In that case there is no reason to detach - * it. - */ - if (cleared) { - stat_jnewblk++; - jnewblk->jn_state &= ~ATTACHED; - jnewblk->jn_state |= UNDONE; - } else if ((bp->b_xflags & BX_BKGRDMARKER) == 0) + if (jnewblk_rollback(jnewblk, fs, cgp, blksfree)) + continue; + if ((bp->b_xflags & BX_BKGRDMARKER) == 0) panic("initiate_write_bmsafemap: block %jd " "marked free", jnewblk->jn_blkno); } @@ -10578,6 +10672,9 @@ handle_jwork(wkhd) case D_FREEDEP: free_freedep(WK_FREEDEP(wk)); continue; + case D_FREEFRAG: + rele_jseg(WK_JSEG(WK_FREEFRAG(wk)->ff_jdep)); + WORKITEM_FREE(wk, D_FREEFRAG); case D_FREEWORK: handle_written_freework(WK_FREEWORK(wk)); continue; @@ -11050,6 +11147,58 @@ bmsafemap_rollbacks(bmsafemap) } /* + * Re-apply an allocation when a cg write is complete. + */ +static int +jnewblk_rollforward(jnewblk, fs, cgp, blksfree) + struct jnewblk *jnewblk; + struct fs *fs; + struct cg *cgp; + uint8_t *blksfree; +{ + ufs1_daddr_t fragno; + ufs2_daddr_t blkno; + long cgbno, bbase; + int frags, blk; + int i; + + frags = 0; + cgbno = dtogd(fs, jnewblk->jn_blkno); + for (i = jnewblk->jn_oldfrags; i < jnewblk->jn_frags; i++) { + if (isclr(blksfree, cgbno + i)) + panic("jnewblk_rollforward: re-allocated fragment"); + frags++; + } + if (frags == fs->fs_frag) { + blkno = fragstoblks(fs, cgbno); + ffs_clrblock(fs, blksfree, (long)blkno); + ffs_clusteracct(fs, cgp, blkno, -1); + cgp->cg_cs.cs_nbfree--; + } else { + bbase = cgbno - fragnum(fs, cgbno); + cgbno += jnewblk->jn_oldfrags; + /* If a complete block had been reassembled, account for it. */ + fragno = fragstoblks(fs, bbase); + if (ffs_isblock(fs, blksfree, fragno)) { + cgp->cg_cs.cs_nffree += fs->fs_frag; + ffs_clusteracct(fs, cgp, fragno, -1); + cgp->cg_cs.cs_nbfree--; + } + /* Decrement the old frags. */ + blk = blkmap(fs, blksfree, bbase); + ffs_fragacct(fs, blk, cgp->cg_frsum, -1); + /* Allocate the fragment */ + for (i = 0; i < frags; i++) + clrbit(blksfree, cgbno + i); + cgp->cg_cs.cs_nffree -= frags; + /* Add back in counts associated with the new frags */ + blk = blkmap(fs, blksfree, bbase); + ffs_fragacct(fs, blk, cgp->cg_frsum, 1); + } + return (frags); +} + +/* * Complete a write to a bmsafemap structure. Roll forward any bitmap * changes if it's not a background write. Set all written dependencies * to DEPCOMPLETE and free the structure if possible. @@ -11069,9 +11218,7 @@ handle_written_bmsafemap(bmsafemap, bp) struct cg *cgp; struct fs *fs; ino_t ino; - long bno; int chgs; - int i; if ((bmsafemap->sm_state & IOSTARTED) == 0) panic("initiate_write_bmsafemap: Not started\n"); @@ -11121,18 +11268,9 @@ handle_written_bmsafemap(bmsafemap, bp) jntmp) { if ((jnewblk->jn_state & UNDONE) == 0) continue; - bno = dtogd(fs, jnewblk->jn_blkno); - for (i = jnewblk->jn_oldfrags; i < jnewblk->jn_frags; - i++) { - if (bp->b_xflags & BX_BKGRDMARKER) - break; - if ((jnewblk->jn_state & NEWBLOCK) == 0 && - isclr(blksfree, bno + i)) - panic("handle_written_bmsafemap: " - "re-allocated fragment"); - clrbit(blksfree, bno + i); + if ((bp->b_xflags & BX_BKGRDMARKER) == 0 && + jnewblk_rollforward(jnewblk, fs, cgp, blksfree)) chgs = 1; - } jnewblk->jn_state &= ~(UNDONE | NEWBLOCK); jnewblk->jn_state |= ATTACHED; free_jnewblk(jnewblk); @@ -11826,6 +11964,11 @@ softdep_sync_metadata(struct vnode *vp) * truncations are started, and inode references are journaled. */ ACQUIRE_LOCK(&lk); + /* + * Write all journal records to prevent rollbacks on devvp. + */ + if (vp->v_type == VCHR) + softdep_flushjournal(vp->v_mount); error = flush_inodedep_deps(vp, vp->v_mount, VTOI(vp)->i_number); /* * Ensure that all truncates are written so we won't find deps on @@ -11965,6 +12108,8 @@ top: continue; case D_FREEWORK: + case D_FREEDEP: + case D_JSEGDEP: continue; default: @@ -12393,33 +12538,36 @@ softdep_request_cleanup(fs, vp, cred, resource) int error; mp = vp->v_mount; - ump = VTOI(vp)->i_ump; + ump = VFSTOUFS(mp); mtx_assert(UFS_MTX(ump), MA_OWNED); if (resource == FLUSH_BLOCKS_WAIT) stat_cleanup_blkrequests += 1; else stat_cleanup_inorequests += 1; + /* * If we are being called because of a process doing a - * copy-on-write, then it is not safe to update the vnode - * as we may recurse into the copy-on-write routine. + * copy-on-write, then it is not safe to process any + * worklist items as we will recurse into the copyonwrite + * routine. This will result in an incoherent snapshot. */ - if (!(curthread->td_pflags & TDP_COWINPROGRESS)) { - UFS_UNLOCK(ump); - error = ffs_update(vp, 1); + if (curthread->td_pflags & TDP_COWINPROGRESS) + return (0); + UFS_UNLOCK(ump); + error = ffs_update(vp, 1); + if (error != 0) { UFS_LOCK(ump); - if (error != 0) - return (0); + return (0); } /* * If we are in need of resources, consider pausing for * tickdelay to give ourselves some breathing room. */ - UFS_UNLOCK(ump); ACQUIRE_LOCK(&lk); + process_removes(vp); + process_truncates(vp); request_cleanup(UFSTOVFS(ump), resource); FREE_LOCK(&lk); - UFS_LOCK(ump); /* * Now clean up at least as many resources as we will need. * @@ -12451,29 +12599,23 @@ softdep_request_cleanup(fs, vp, cred, resource) roundup((fs->fs_dsize * fs->fs_minfree / 100) - fs->fs_cstotal.cs_nffree, fs->fs_frag)); } else { + UFS_LOCK(ump); printf("softdep_request_cleanup: Unknown resource type %d\n", resource); return (0); } starttime = time_second; retry: - while ((resource == FLUSH_BLOCKS_WAIT && ump->softdep_on_worklist > 0 && - fs->fs_cstotal.cs_nbfree <= needed) || - (resource == FLUSH_INODES_WAIT && fs->fs_pendinginodes > 0 && - fs->fs_cstotal.cs_nifree <= needed)) { - UFS_UNLOCK(ump); + if ((resource == FLUSH_BLOCKS_WAIT && ump->softdep_on_worklist > 0 && + fs->fs_cstotal.cs_nbfree <= needed) || + (resource == FLUSH_INODES_WAIT && fs->fs_pendinginodes > 0 && + fs->fs_cstotal.cs_nifree <= needed)) { ACQUIRE_LOCK(&lk); - process_removes(vp); - process_truncates(vp); if (ump->softdep_on_worklist > 0 && - process_worklist_item(UFSTOVFS(ump), 1, LK_NOWAIT) != 0) { + process_worklist_item(UFSTOVFS(ump), + ump->softdep_on_worklist, LK_NOWAIT) != 0) stat_worklist_push += 1; - FREE_LOCK(&lk); - UFS_LOCK(ump); - continue; - } FREE_LOCK(&lk); - UFS_LOCK(ump); } /* * If we still need resources and there are no more worklist @@ -12487,7 +12629,6 @@ retry: fs->fs_cstotal.cs_nbfree <= needed) || (resource == FLUSH_INODES_WAIT && fs->fs_pendinginodes > 0 && fs->fs_cstotal.cs_nifree <= needed)) { - UFS_UNLOCK(ump); MNT_ILOCK(mp); MNT_VNODE_FOREACH(lvp, mp, mvp) { VI_LOCK(lvp); @@ -12516,7 +12657,6 @@ retry: VOP_FSYNC(lvp, MNT_NOWAIT, curthread); VOP_UNLOCK(lvp, 0); } - UFS_LOCK(ump); if (ump->softdep_on_worklist > 0) { stat_cleanup_retries += 1; goto retry; @@ -12525,6 +12665,7 @@ retry: } if (time_second - starttime > stat_cleanup_high_delay) stat_cleanup_high_delay = time_second - starttime; + UFS_LOCK(ump); return (1); } diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c index b0f2d7e..35852bf 100644 --- a/sys/ufs/ffs/ffs_vfsops.c +++ b/sys/ufs/ffs/ffs_vfsops.c @@ -132,8 +132,8 @@ static struct buf_ops ffs_ops = { */ static const char *ffs_opts[] = { "acls", "async", "noatime", "noclusterr", "noclusterw", "noexec", "export", "force", "from", "groupquota", - "multilabel", "nfsv4acls", "snapshot", "nosuid", "suiddir", "nosymfollow", - "sync", "union", "userquota", NULL }; + "multilabel", "nfsv4acls", "fsckpid", "snapshot", "nosuid", "suiddir", + "nosymfollow", "sync", "union", "userquota", NULL }; static int ffs_mount(struct mount *mp) @@ -142,6 +142,7 @@ ffs_mount(struct mount *mp) struct thread *td; struct ufsmount *ump = 0; struct fs *fs; + pid_t fsckpid = 0; int error, flags; u_int mntorflags; accmode_t accmode; @@ -184,6 +185,29 @@ ffs_mount(struct mount *mp) vfs_deleteopt(mp->mnt_opt, "snapshot"); } + if (vfs_getopt(mp->mnt_optnew, "fsckpid", NULL, NULL) == 0 && + vfs_scanopt(mp->mnt_optnew, "fsckpid", "%d", &fsckpid) == 1) { + /* + * Once we have set the restricted PID, do not + * persist "fsckpid" in the options list. + */ + vfs_deleteopt(mp->mnt_optnew, "fsckpid"); + vfs_deleteopt(mp->mnt_opt, "fsckpid"); + if (mp->mnt_flag & MNT_UPDATE) { + if (VFSTOUFS(mp)->um_fs->fs_ronly == 0 && + vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0) == 0) { + printf("Checker enable: Must be read-only\n"); + return (EINVAL); + } + } else if (vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0) == 0) { + printf("Checker enable: Must be read-only\n"); + return (EINVAL); + } + /* Set to -1 if we are done */ + if (fsckpid == 0) + fsckpid = -1; + } + if (vfs_getopt(mp->mnt_optnew, "nfsv4acls", NULL, NULL) == 0) { if (mntorflags & MNT_ACLS) { printf("WARNING: \"acls\" and \"nfsv4acls\" " @@ -204,6 +228,20 @@ ffs_mount(struct mount *mp) ump = VFSTOUFS(mp); fs = ump->um_fs; devvp = ump->um_devvp; + if (fsckpid == -1 && ump->um_fsckpid > 0) { + if ((error = ffs_flushfiles(mp, WRITECLOSE, td)) != 0 || + (error = ffs_sbupdate(ump, MNT_WAIT, 0)) != 0) + return (error); + DROP_GIANT(); + g_topology_lock(); + /* + * Return to normal read-only mode. + */ + error = g_access(ump->um_cp, 0, -1, 0); + g_topology_unlock(); + PICKUP_GIANT(); + ump->um_fsckpid = 0; + } if (fs->fs_ronly == 0 && vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) { /* @@ -273,7 +311,10 @@ ffs_mount(struct mount *mp) softdep_unmount(mp); DROP_GIANT(); g_topology_lock(); - g_access(ump->um_cp, 0, -1, 0); + /* + * Drop our write and exclusive access. + */ + g_access(ump->um_cp, 0, -1, -1); g_topology_unlock(); PICKUP_GIANT(); fs->fs_ronly = 1; @@ -292,6 +333,13 @@ ffs_mount(struct mount *mp) if (fs->fs_ronly && !vfs_flagopt(mp->mnt_optnew, "ro", NULL, 0)) { /* + * If we are running a checker, do not allow upgrade. + */ + if (ump->um_fsckpid > 0) { + printf("Active checker, cannot rw upgrade\n"); + return (EINVAL); + } + /* * If upgrade to read-write by non-root, then verify * that user has necessary permissions on the device. */ @@ -327,13 +375,9 @@ ffs_mount(struct mount *mp) DROP_GIANT(); g_topology_lock(); /* - * If we're the root device, we may not have an E count - * yet, get it now. + * Request exclusive write access. */ - if (ump->um_cp->ace == 0) - error = g_access(ump->um_cp, 0, 1, 1); - else - error = g_access(ump->um_cp, 0, 1, 0); + error = g_access(ump->um_cp, 0, 1, 1); g_topology_unlock(); PICKUP_GIANT(); if (error) @@ -389,6 +433,39 @@ ffs_mount(struct mount *mp) mp->mnt_flag |= MNT_NFS4ACLS; MNT_IUNLOCK(mp); } + /* + * If this is a request from fsck to clean up the filesystem, + * then allow the specified pid to proceed. + */ + if (fsckpid > 0) { + if (ump->um_fsckpid != 0) { + printf("Active checker already running on %s\n", + fs->fs_fsmnt); + return (EINVAL); + } + KASSERT((mp->mnt_flag & MNT_SOFTDEP) == 0, + ("soft updates enabled on read-only file system")); + DROP_GIANT(); + g_topology_lock(); + /* + * Request write access. + */ + error = g_access(ump->um_cp, 0, 1, 0); + g_topology_unlock(); + PICKUP_GIANT(); + if (error) { + printf("Checker activation failed on %s\n", + fs->fs_fsmnt); + return (error); + } + ump->um_fsckpid = fsckpid; + if (fs->fs_snapinum[0] != 0) + ffs_snapshot_mount(mp); + fs->fs_mtime = time_second; + fs->fs_fmod = 1; + fs->fs_clean = 0; + (void) ffs_sbupdate(ump, MNT_WAIT, 0); + } /* * If this is a snapshot request, take the snapshot. @@ -452,6 +529,31 @@ ffs_mount(struct mount *mp) vrele(devvp); return (error); } + if (fsckpid > 0) { + KASSERT((mp->mnt_flag & MNT_SOFTDEP) == 0, + ("soft updates enabled on read-only file system")); + ump = VFSTOUFS(mp); + fs = ump->um_fs; + DROP_GIANT(); + g_topology_lock(); + /* + * Request write access. + */ + error = g_access(ump->um_cp, 0, 1, 0); + g_topology_unlock(); + PICKUP_GIANT(); + if (error) { + printf("Checker activation failed on %s\n", + fs->fs_fsmnt); + } else { + ump->um_fsckpid = fsckpid; + if (fs->fs_snapinum[0] != 0) + ffs_snapshot_mount(mp); + fs->fs_mtime = time_second; + fs->fs_clean = 0; + (void) ffs_sbupdate(ump, MNT_WAIT, 0); + } + } } vfs_mountedfrom(mp, fspec); return (0); @@ -665,13 +767,6 @@ ffs_mountfs(devvp, mp, td) DROP_GIANT(); g_topology_lock(); error = g_vfs_open(devvp, &cp, "ffs", ronly ? 0 : 1); - - /* - * If we are a root mount, drop the E flag so fsck can do its magic. - * We will pick it up again when we remount R/W. - */ - if (error == 0 && ronly && (mp->mnt_flag & MNT_ROOTFS)) - error = g_access(cp, 0, 0, -1); g_topology_unlock(); PICKUP_GIANT(); VOP_UNLOCK(devvp, 0); @@ -932,7 +1027,7 @@ ffs_mountfs(devvp, mp, td) strlcpy(fs->fs_fsmnt, mp->mnt_stat.f_mntonname, MAXMNTLEN); mp->mnt_stat.f_iosize = fs->fs_bsize; - if( mp->mnt_flag & MNT_ROOTFS) { + if (mp->mnt_flag & MNT_ROOTFS) { /* * Root mount; update timestamp in mount structure. * this will be used by the common root mount code @@ -1169,7 +1264,7 @@ ffs_unmount(mp, mntflags) } UFS_UNLOCK(ump); softdep_unmount(mp); - if (fs->fs_ronly == 0) { + if (fs->fs_ronly == 0 || ump->um_fsckpid > 0) { fs->fs_clean = fs->fs_flags & (FS_UNCLEAN|FS_NEEDSFSCK) ? 0 : 1; error = ffs_sbupdate(ump, MNT_WAIT, 0); if (error && error != ENXIO) { @@ -1183,6 +1278,13 @@ ffs_unmount(mp, mntflags) } DROP_GIANT(); g_topology_lock(); + if (ump->um_fsckpid > 0) { + /* + * Return to normal read-only mode. + */ + error = g_access(ump->um_cp, 0, -1, 0); + ump->um_fsckpid = 0; + } g_vfs_close(ump->um_cp); g_topology_unlock(); PICKUP_GIANT(); @@ -1331,7 +1433,7 @@ ffs_sync(mp, waitfor) td = curthread; fs = ump->um_fs; - if (fs->fs_fmod != 0 && fs->fs_ronly != 0) { /* XXX */ + if (fs->fs_fmod != 0 && fs->fs_ronly != 0 && ump->um_fsckpid == 0) { printf("fs = %s\n", fs->fs_fsmnt); panic("ffs_sync: rofs mod"); } @@ -1689,12 +1791,12 @@ ffs_uninit(vfsp) * Write a superblock and associated information back to disk. */ int -ffs_sbupdate(mp, waitfor, suspended) - struct ufsmount *mp; +ffs_sbupdate(ump, waitfor, suspended) + struct ufsmount *ump; int waitfor; int suspended; { - struct fs *fs = mp->um_fs; + struct fs *fs = ump->um_fs; struct buf *sbbp; struct buf *bp; int blks; @@ -1702,14 +1804,14 @@ ffs_sbupdate(mp, waitfor, suspended) int i, size, error, allerror = 0; if (fs->fs_ronly == 1 && - (mp->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) != - (MNT_RDONLY | MNT_UPDATE)) + (ump->um_mountp->mnt_flag & (MNT_RDONLY | MNT_UPDATE)) != + (MNT_RDONLY | MNT_UPDATE) && ump->um_fsckpid == 0) panic("ffs_sbupdate: write read-only filesystem"); /* * We use the superblock's buf to serialize calls to ffs_sbupdate(). */ - sbbp = getblk(mp->um_devvp, btodb(fs->fs_sblockloc), (int)fs->fs_sbsize, - 0, 0, 0); + sbbp = getblk(ump->um_devvp, btodb(fs->fs_sblockloc), + (int)fs->fs_sbsize, 0, 0, 0); /* * First write back the summary information. */ @@ -1719,7 +1821,7 @@ ffs_sbupdate(mp, waitfor, suspended) size = fs->fs_bsize; if (i + fs->fs_frag > blks) size = (blks - i) * fs->fs_fsize; - bp = getblk(mp->um_devvp, fsbtodb(fs, fs->fs_csaddr + i), + bp = getblk(ump->um_devvp, fsbtodb(fs, fs->fs_csaddr + i), size, 0, 0, 0); bcopy(space, bp->b_data, (u_int)size); space = (char *)space + size; @@ -1755,9 +1857,9 @@ ffs_sbupdate(mp, waitfor, suspended) fs->fs_fmod = 0; fs->fs_time = time_second; if (fs->fs_flags & FS_DOSOFTDEP) - softdep_setup_sbupdate(mp, (struct fs *)bp->b_data, bp); + softdep_setup_sbupdate(ump, (struct fs *)bp->b_data, bp); bcopy((caddr_t)fs, bp->b_data, (u_int)fs->fs_sbsize); - ffs_oldfscompat_write((struct fs *)bp->b_data, mp); + ffs_oldfscompat_write((struct fs *)bp->b_data, ump); if (suspended) bp->b_flags |= B_VALIDSUSPWRT; if (waitfor != MNT_WAIT) diff --git a/sys/ufs/ffs/fs.h b/sys/ufs/ffs/fs.h index a0b8e5b..b1e2174 100644 --- a/sys/ufs/ffs/fs.h +++ b/sys/ufs/ffs/fs.h @@ -214,7 +214,9 @@ #define FFS_SET_CWD 12 /* set current directory */ #define FFS_SET_DOTDOT 13 /* set inode number for ".." */ #define FFS_UNLINK 14 /* remove a name in the filesystem */ -#define FFS_MAXID 15 /* number of valid ffs ids */ +#define FFS_SET_INODE 15 /* update an on-disk inode */ +#define FFS_SET_BUFOUTPUT 16 /* set buffered writing on descriptor */ +#define FFS_MAXID 16 /* number of valid ffs ids */ /* * Command structure passed in to the filesystem to adjust filesystem values. diff --git a/sys/ufs/ffs/softdep.h b/sys/ufs/ffs/softdep.h index 80c7315..b251ba8 100644 --- a/sys/ufs/ffs/softdep.h +++ b/sys/ufs/ffs/softdep.h @@ -536,8 +536,7 @@ struct freeblks { #endif uint64_t fb_modrev; /* Inode revision at start of trunc. */ off_t fb_len; /* Length we're truncating to. */ - ufs2_daddr_t fb_chkcnt; /* Expected blks released. */ - ufs2_daddr_t fb_freecnt; /* Actual blocks released. */ + ufs2_daddr_t fb_chkcnt; /* Blocks released. */ ino_t fb_inum; /* inode owner of blocks */ enum vtype fb_vtype; /* inode owner's file type */ uid_t fb_uid; /* uid of previous owner of blocks */ diff --git a/sys/ufs/ufs/ufs_inode.c b/sys/ufs/ufs/ufs_inode.c index 7ddbe97..129c26d 100644 --- a/sys/ufs/ufs/ufs_inode.c +++ b/sys/ufs/ufs/ufs_inode.c @@ -120,15 +120,14 @@ ufs_inactive(ap) isize = ip->i_size; if (ip->i_ump->um_fstype == UFS2) isize += ip->i_din2->di_extsize; - if (ip->i_effnlink <= 0 && isize && !UFS_RDONLY(ip)) { + if (ip->i_effnlink <= 0 && isize && !UFS_RDONLY(ip)) + error = UFS_TRUNCATE(vp, (off_t)0, IO_EXT | IO_NORMAL, + NOCRED, td); + if (ip->i_nlink <= 0 && ip->i_mode && !UFS_RDONLY(ip)) { #ifdef QUOTA if (!getinoquota(ip)) (void)chkiq(ip, -1, NOCRED, FORCE); #endif - error = UFS_TRUNCATE(vp, (off_t)0, IO_EXT | IO_NORMAL, - NOCRED, td); - } - if (ip->i_nlink <= 0 && ip->i_mode && !UFS_RDONLY(ip)) { #ifdef UFS_EXTATTR ufs_extattr_vnode_inactive(vp, td); #endif diff --git a/sys/ufs/ufs/ufsmount.h b/sys/ufs/ufs/ufsmount.h index 7874105..6447dce 100644 --- a/sys/ufs/ufs/ufsmount.h +++ b/sys/ufs/ufs/ufsmount.h @@ -77,6 +77,7 @@ struct ufsmount { u_long um_bptrtodb; /* indir ptr to disk block */ u_long um_seqinc; /* inc between seq blocks */ struct mtx um_lock; /* Protects ufsmount & fs */ + pid_t um_fsckpid; /* PID permitted fsck sysctls */ long um_numindirdeps; /* outstanding indirdeps */ struct workhead softdep_workitem_pending; /* softdep work queue */ struct worklist *softdep_worklist_tail; /* Tail pointer for above */ |