From 53224f018aac13056c11af9d1233317b1754149c Mon Sep 17 00:00:00 2001 From: kib Date: Mon, 2 Jul 2012 21:01:03 +0000 Subject: Extend the KPI to lock and unlock f_offset member of struct file. It now fully encapsulates all accesses to f_offset, and extends f_offset locking to other consumers that need it, in particular, to lseek() and variants of getdirentries(). Ensure that on 32bit architectures f_offset, which is 64bit quantity, always read and written under the mtxpool protection. This fixes apparently easy to trigger race when parallel lseek()s or lseek() and read/write could destroy file offset. The already broken ABI emulations, including iBCS and SysV, are not converted (yet). Tested by: pho No objections from: jhb MFC after: 3 weeks --- sys/kern/vfs_syscalls.c | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) (limited to 'sys/kern/vfs_syscalls.c') diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 27b419c..66ab5a3 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -1981,7 +1981,7 @@ sys_lseek(td, uap) struct file *fp; struct vnode *vp; struct vattr vattr; - off_t offset, size; + off_t foffset, offset, size; int error, noneg; int vfslocked; @@ -1993,18 +1993,19 @@ sys_lseek(td, uap) return (ESPIPE); } vp = fp->f_vnode; + foffset = foffset_lock(fp, 0); vfslocked = VFS_LOCK_GIANT(vp->v_mount); noneg = (vp->v_type != VCHR); offset = uap->offset; switch (uap->whence) { case L_INCR: if (noneg && - (fp->f_offset < 0 || - (offset > 0 && fp->f_offset > OFF_MAX - offset))) { + (foffset < 0 || + (offset > 0 && foffset > OFF_MAX - offset))) { error = EOVERFLOW; break; } - offset += fp->f_offset; + offset += foffset; break; case L_XTND: vn_lock(vp, LK_SHARED | LK_RETRY); @@ -2044,12 +2045,12 @@ sys_lseek(td, uap) error = EINVAL; if (error != 0) goto drop; - fp->f_offset = offset; VFS_KNOTE_UNLOCKED(vp, 0); - *(off_t *)(td->td_retval) = fp->f_offset; + *(off_t *)(td->td_retval) = offset; drop: fdrop(fp, td); VFS_UNLOCK_GIANT(vfslocked); + foffset_unlock(fp, offset, error != 0 ? FOF_NOUPDATE : 0); return (error); } @@ -3982,6 +3983,7 @@ kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, caddr_t dirbuf; int error, eofflag, readcnt, vfslocked; long loff; + off_t foffset; /* XXX arbitrary sanity limit on `count'. */ if (uap->count > 64 * 1024) @@ -3994,10 +3996,12 @@ kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, return (EBADF); } vp = fp->f_vnode; + foffset = foffset_lock(fp, 0); unionread: vfslocked = VFS_LOCK_GIANT(vp->v_mount); if (vp->v_type != VDIR) { VFS_UNLOCK_GIANT(vfslocked); + foffset_unlock(fp, foffset, 0); fdrop(fp, td); return (EINVAL); } @@ -4010,12 +4014,13 @@ unionread: auio.uio_td = td; auio.uio_resid = uap->count; vn_lock(vp, LK_SHARED | LK_RETRY); - loff = auio.uio_offset = fp->f_offset; + loff = auio.uio_offset = foffset; #ifdef MAC error = mac_vnode_check_readdir(td->td_ucred, vp); if (error) { VOP_UNLOCK(vp, 0); VFS_UNLOCK_GIANT(vfslocked); + foffset_unlock(fp, foffset, FOF_NOUPDATE); fdrop(fp, td); return (error); } @@ -4024,7 +4029,7 @@ unionread: if (vp->v_mount->mnt_maxsymlinklen <= 0) { error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); - fp->f_offset = auio.uio_offset; + foffset = auio.uio_offset; } else # endif { @@ -4036,7 +4041,7 @@ unionread: kiov.iov_base = dirbuf; error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, NULL, NULL); - fp->f_offset = kuio.uio_offset; + foffset = kuio.uio_offset; if (error == 0) { readcnt = uap->count - kuio.uio_resid; edp = (struct dirent *)&dirbuf[readcnt]; @@ -4074,6 +4079,7 @@ unionread: if (error) { VOP_UNLOCK(vp, 0); VFS_UNLOCK_GIANT(vfslocked); + foffset_unlock(fp, foffset, 0); fdrop(fp, td); return (error); } @@ -4085,13 +4091,14 @@ unionread: VREF(vp); fp->f_vnode = vp; fp->f_data = vp; - fp->f_offset = 0; + foffset = 0; vput(tvp); VFS_UNLOCK_GIANT(vfslocked); goto unionread; } VOP_UNLOCK(vp, 0); VFS_UNLOCK_GIANT(vfslocked); + foffset_unlock(fp, foffset, 0); fdrop(fp, td); td->td_retval[0] = uap->count - auio.uio_resid; if (error == 0) @@ -4144,6 +4151,7 @@ kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, int vfslocked; long loff; int error, eofflag; + off_t foffset; AUDIT_ARG_FD(fd); if (count > IOSIZE_MAX) @@ -4157,6 +4165,7 @@ kern_getdirentries(struct thread *td, int fd, char *buf, u_int count, return (EBADF); } vp = fp->f_vnode; + foffset = foffset_lock(fp, 0); unionread: vfslocked = VFS_LOCK_GIANT(vp->v_mount); if (vp->v_type != VDIR) { @@ -4173,14 +4182,14 @@ unionread: auio.uio_td = td; vn_lock(vp, LK_SHARED | LK_RETRY); AUDIT_ARG_VNODE1(vp); - loff = auio.uio_offset = fp->f_offset; + loff = auio.uio_offset = foffset; #ifdef MAC error = mac_vnode_check_readdir(td->td_ucred, vp); if (error == 0) #endif error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); - fp->f_offset = auio.uio_offset; + foffset = auio.uio_offset; if (error) { VOP_UNLOCK(vp, 0); VFS_UNLOCK_GIANT(vfslocked); @@ -4194,7 +4203,7 @@ unionread: VREF(vp); fp->f_vnode = vp; fp->f_data = vp; - fp->f_offset = 0; + foffset = 0; vput(tvp); VFS_UNLOCK_GIANT(vfslocked); goto unionread; @@ -4206,6 +4215,7 @@ unionread: *residp = auio.uio_resid; td->td_retval[0] = count - auio.uio_resid; fail: + foffset_unlock(fp, foffset, 0); fdrop(fp, td); return (error); } -- cgit v1.1