diff options
Diffstat (limited to 'sys')
40 files changed, 867 insertions, 430 deletions
diff --git a/sys/conf/files.i386 b/sys/conf/files.i386 index ea285e9..e9e493f 100644 --- a/sys/conf/files.i386 +++ b/sys/conf/files.i386 @@ -1,7 +1,7 @@ # This file tells config what files go into building a kernel, # files marked standard are always included. # -# $Id: files.i386,v 1.181 1997/12/09 12:04:49 jamil Exp $ +# $Id: files.i386,v 1.182 1997/12/12 14:08:45 peter Exp $ # # The long compile-with and dependency lines are required because of # limitations in config: backslash-newline doesn't work in strings, and @@ -277,6 +277,7 @@ libkern/strncmp.c standard libkern/strncpy.c standard libkern/udivdi3.c standard libkern/umoddi3.c standard +libkern/cmpdi2.c standard gnu/i386/fpemul/div_small.s optional gpl_math_emulate gnu/i386/fpemul/errors.c optional gpl_math_emulate gnu/i386/fpemul/fpu_arith.c optional gpl_math_emulate diff --git a/sys/fs/procfs/procfs_map.c b/sys/fs/procfs/procfs_map.c index 184cee9..7033e1c 100644 --- a/sys/fs/procfs/procfs_map.c +++ b/sys/fs/procfs/procfs_map.c @@ -36,7 +36,7 @@ * * @(#)procfs_status.c 8.3 (Berkeley) 2/17/94 * - * $Id: procfs_map.c,v 1.12 1997/08/02 14:32:12 bde Exp $ + * $Id: procfs_map.c,v 1.13 1997/11/14 22:57:46 tegge Exp $ */ #include <sys/param.h> @@ -101,7 +101,7 @@ procfs_domap(curp, p, pfs, uio) continue; obj = entry->object.vm_object; - if (obj && (obj->ref_count == 1)) + if (obj && (obj->shadow_count == 1)) privateresident = obj->resident_page_count; else privateresident = 0; diff --git a/sys/fs/procfs/procfs_vnops.c b/sys/fs/procfs/procfs_vnops.c index b8bd8e9..00cca89 100644 --- a/sys/fs/procfs/procfs_vnops.c +++ b/sys/fs/procfs/procfs_vnops.c @@ -36,7 +36,7 @@ * * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95 * - * $Id: procfs_vnops.c,v 1.50 1997/12/27 02:56:25 bde Exp $ + * $Id: procfs_vnops.c,v 1.51 1998/01/06 01:37:12 sef Exp $ */ /* @@ -186,7 +186,7 @@ procfs_close(ap) * vnode. While one would expect v_usecount to be 1 at * that point, it seems that (according to John Dyson) * the VM system will bump up the usecount. So: if the - * usecount is 2, and VVMIO is set, then this is really + * usecount is 2, and VOBJBUF is set, then this is really * the last close. Otherwise, if the usecount is < 2 * then it is definitely the last close. * If this is the last close, then it checks to see if @@ -197,10 +197,7 @@ procfs_close(ap) * told to stop on an event, but then the requesting process * has gone away or forgotten about it. */ - if (((ap->a_vp->v_usecount == 2 - && ap->a_vp->v_object - && (ap->a_vp->v_flag & VVMIO)) || - (ap->a_vp->v_usecount < 2)) + if ((ap->a_vp->v_usecount < 2) && (p = pfind(pfs->pfs_pid)) && !(p->p_pfsflags & PF_LINGER)) { p->p_stops = 0; diff --git a/sys/fs/specfs/spec_vnops.c b/sys/fs/specfs/spec_vnops.c index 6da09a6..3bad030 100644 --- a/sys/fs/specfs/spec_vnops.c +++ b/sys/fs/specfs/spec_vnops.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)spec_vnops.c 8.14 (Berkeley) 5/21/95 - * $Id: spec_vnops.c,v 1.51 1997/10/27 13:33:42 bde Exp $ + * $Id: spec_vnops.c,v 1.52 1997/12/29 00:23:16 dyson Exp $ */ #include <sys/param.h> @@ -232,6 +232,7 @@ spec_open(ap) (ap->a_mode & FWRITE) && (bdevsw[maj]->d_flags & D_TYPEMASK) == D_DISK) return (EPERM); + /* * Do not allow opens of block devices that are * currently mounted. @@ -392,10 +393,14 @@ spec_write(ap) brelse(bp); return (error); } + if (vp->v_flag & VOBJBUF) + bp->b_flags |= B_CLUSTEROK; error = uiomove((char *)bp->b_data + on, n, uio); if (n + on == bsize) { - /* bawrite(bp); */ - cluster_write(bp, 0); + if ((vp->v_flag & VOBJBUF) && (on == 0)) + vfs_bio_awrite(bp); + else + bawrite(bp); } else bdwrite(bp); } while (error == 0 && uio->uio_resid > 0 && n != 0); @@ -499,10 +504,15 @@ loop: continue; if ((bp->b_flags & B_DELWRI) == 0) panic("spec_fsync: not dirty"); - bremfree(bp); - bp->b_flags |= B_BUSY; - splx(s); - bawrite(bp); + if ((vp->v_flag & VOBJBUF) && (bp->b_flags & B_CLUSTEROK)) { + vfs_bio_awrite(bp); + splx(s); + } else { + bremfree(bp); + bp->b_flags |= B_BUSY; + splx(s); + bawrite(bp); + } goto loop; } if (ap->a_waitfor == MNT_WAIT) { @@ -631,6 +641,7 @@ spec_close(ap) error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0); if (error) return (error); + /* * We do not want to really close the device if it * is still in use unless we are trying to close it diff --git a/sys/i386/conf/files.i386 b/sys/i386/conf/files.i386 index ea285e9..e9e493f 100644 --- a/sys/i386/conf/files.i386 +++ b/sys/i386/conf/files.i386 @@ -1,7 +1,7 @@ # This file tells config what files go into building a kernel, # files marked standard are always included. # -# $Id: files.i386,v 1.181 1997/12/09 12:04:49 jamil Exp $ +# $Id: files.i386,v 1.182 1997/12/12 14:08:45 peter Exp $ # # The long compile-with and dependency lines are required because of # limitations in config: backslash-newline doesn't work in strings, and @@ -277,6 +277,7 @@ libkern/strncmp.c standard libkern/strncpy.c standard libkern/udivdi3.c standard libkern/umoddi3.c standard +libkern/cmpdi2.c standard gnu/i386/fpemul/div_small.s optional gpl_math_emulate gnu/i386/fpemul/errors.c optional gpl_math_emulate gnu/i386/fpemul/fpu_arith.c optional gpl_math_emulate diff --git a/sys/kern/imgact_aout.c b/sys/kern/imgact_aout.c index 7bc26e1..48cf2e3 100644 --- a/sys/kern/imgact_aout.c +++ b/sys/kern/imgact_aout.c @@ -23,7 +23,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: imgact_aout.c,v 1.35 1997/08/26 00:02:23 bde Exp $ + * $Id: imgact_aout.c,v 1.36 1997/09/02 20:05:33 bde Exp $ */ #include <sys/param.h> @@ -47,7 +47,7 @@ static int exec_aout_imgact __P((struct image_params *imgp)); -static int +int exec_aout_imgact(imgp) struct image_params *imgp; { @@ -177,7 +177,8 @@ exec_aout_imgact(imgp) * instruction of the same name. */ vmaddr = virtual_offset + a_out->a_text + a_out->a_data; - error = vm_map_find(&vmspace->vm_map, NULL, 0, &vmaddr, bss_size, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0); + error = vm_map_find(&vmspace->vm_map, NULL, 0, + &vmaddr, bss_size, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0); if (error) return (error); } diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index 5667868..eb40b91 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -23,7 +23,7 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id: kern_exec.c,v 1.71 1997/12/20 03:05:45 sef Exp $ + * $Id: kern_exec.c,v 1.72 1997/12/27 02:56:21 bde Exp $ */ #include <sys/param.h> @@ -185,7 +185,8 @@ interpret: * header into it. */ imgp->image_header = malloc(PAGE_SIZE, M_TEMP, M_WAITOK); - error = vn_rdwr(UIO_READ, imgp->vp, (void *)imgp->image_header, PAGE_SIZE, 0, + error = vn_rdwr(UIO_READ, imgp->vp, + (void *)imgp->image_header, PAGE_SIZE, 0, UIO_SYSSPACE, IO_NODELOCKED, p->p_ucred, &resid, p); /* * Clear out any remaining junk. @@ -316,7 +317,7 @@ interpret: p->p_ucred->cr_gid = attr.va_gid; setsugid(p); } else { - if (p->p_ucred->cr_uid == p->p_cred->p_ruid && + if (p->p_ucred->cr_uid == p->p_cred->p_ruid && p->p_ucred->cr_gid == p->p_cred->p_rgid) p->p_flag &= ~P_SUGID; } diff --git a/sys/kern/kern_subr.c b/sys/kern/kern_subr.c index cae8521..169711d 100644 --- a/sys/kern/kern_subr.c +++ b/sys/kern/kern_subr.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94 - * $Id: kern_subr.c,v 1.13 1997/10/10 18:14:23 phk Exp $ + * $Id: kern_subr.c,v 1.14 1997/12/19 09:03:23 dyson Exp $ */ #include <sys/param.h> @@ -147,7 +147,7 @@ uiomoveco(cp, n, uio, obj) ((((int) cp) & PAGE_MASK) == 0)) { error = vm_uiomove(&curproc->p_vmspace->vm_map, obj, uio->uio_offset, cnt, - (vm_offset_t) iov->iov_base); + (vm_offset_t) iov->iov_base, NULL); } else { error = copyout(cp, iov->iov_base, cnt); } @@ -177,6 +177,69 @@ uiomoveco(cp, n, uio, obj) return (0); } +int +uioread(n, uio, obj, nread) + int n; + struct uio *uio; + struct vm_object *obj; + int *nread; +{ + int npagesmoved; + struct iovec *iov; + u_int cnt, tcnt; + int error; + + *nread = 0; + error = 0; + + while (n > 0 && uio->uio_resid) { + iov = uio->uio_iov; + cnt = iov->iov_len; + if (cnt == 0) { + uio->uio_iov++; + uio->uio_iovcnt--; + continue; + } + if (cnt > n) + cnt = n; + + if ((uio->uio_segflg == UIO_USERSPACE) && + ((((int) iov->iov_base) & PAGE_MASK) == 0) && + ((uio->uio_offset & PAGE_MASK) == 0) ) { + + if (cnt < PAGE_SIZE) + break; + + cnt &= ~PAGE_MASK; + + error = vm_uiomove(&curproc->p_vmspace->vm_map, obj, + uio->uio_offset, cnt, + (vm_offset_t) iov->iov_base, &npagesmoved); + + if (npagesmoved == 0) + break; + + tcnt = npagesmoved * PAGE_SIZE; + if (tcnt != cnt) { + cnt = tcnt; + } + + if (error) + break; + + iov->iov_base += cnt; + iov->iov_len -= cnt; + uio->uio_resid -= cnt; + uio->uio_offset += cnt; + *nread += cnt; + n -= cnt; + } else { + break; + } + } + return error; +} + /* * Give next character to user as result of read. */ diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index f5c966f..98cf7aa 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -11,7 +11,7 @@ * 2. Absolutely no warranty of function or purpose is made by the author * John S. Dyson. * - * $Id: vfs_bio.c,v 1.139 1997/12/07 04:06:41 dyson Exp $ + * $Id: vfs_bio.c,v 1.140 1997/12/22 11:54:00 dyson Exp $ */ /* @@ -204,8 +204,8 @@ bufinit() * Remove the probability of deadlock conditions by limiting the * number of dirty buffers. */ - hidirtybuffers = nbuf / 6 + 20; - lodirtybuffers = nbuf / 12 + 10; + hidirtybuffers = nbuf / 8 + 20; + lodirtybuffers = nbuf / 16 + 10; numdirtybuffers = 0; lofreebuffers = nbuf / 18 + 5; hifreebuffers = 2 * lofreebuffers; @@ -396,7 +396,7 @@ bwrite(struct buf * bp) return (0); } -void +inline void vfs_bio_need_satisfy(void) { ++numfreebuffers; if (!needsbuffer) @@ -850,6 +850,8 @@ vfs_bio_awrite(struct buf * bp) int ncl; struct buf *bpa; int nwritten; + int size; + int maxcl; s = splbio(); /* @@ -858,8 +860,6 @@ vfs_bio_awrite(struct buf * bp) if ((vp->v_type == VREG) && (vp->v_mount != 0) && /* Only on nodes that have the size info */ (bp->b_flags & (B_CLUSTEROK | B_INVAL)) == B_CLUSTEROK) { - int size; - int maxcl; size = vp->v_mount->mnt_stat.f_iosize; maxcl = MAXPHYS / size; @@ -885,7 +885,33 @@ vfs_bio_awrite(struct buf * bp) splx(s); return nwritten; } + } else if ((vp->v_flag & VOBJBUF) && (vp->v_type == VBLK) && + ((size = bp->b_bufsize) >= PAGE_SIZE)) { + maxcl = MAXPHYS / size; + for (i = 1; i < maxcl; i++) { + if ((bpa = gbincore(vp, lblkno + i)) && + ((bpa->b_flags & (B_BUSY | B_DELWRI | B_CLUSTEROK | B_INVAL)) == + (B_DELWRI | B_CLUSTEROK)) && + (bpa->b_bufsize == size)) { + if (bpa->b_blkno != + bp->b_blkno + ((i * size) >> DEV_BSHIFT)) + break; + } else { + break; + } + } + ncl = i; + /* + * this is a possible cluster write + */ + if (ncl != 1) { + nwritten = cluster_wbuild(vp, size, lblkno, ncl); + printf("Block cluster: (%d, %d)\n", lblkno, nwritten); + splx(s); + return nwritten; + } } + bremfree(bp); splx(s); /* @@ -902,7 +928,8 @@ vfs_bio_awrite(struct buf * bp) * Find a buffer header which is available for use. */ static struct buf * -getnewbuf(struct vnode *vp, int slpflag, int slptimeo, int size, int maxsize) +getnewbuf(struct vnode *vp, daddr_t blkno, + int slpflag, int slptimeo, int size, int maxsize) { struct buf *bp, *bp1; int nbyteswritten = 0; @@ -981,6 +1008,33 @@ trytofreespace: /* if we are a delayed write, convert to an async write */ if ((bp->b_flags & (B_DELWRI | B_INVAL)) == B_DELWRI) { + /* + * If our delayed write is likely to be used soon, then + * recycle back onto the LRU queue. + */ + if (vp && (bp->b_vp == vp) && (bp->b_qindex == QUEUE_LRU) && + (bp->b_lblkno >= blkno) && (maxsize > 0)) { + + if (bp->b_usecount > 0) { + if (bp->b_lblkno < blkno + (MAXPHYS / maxsize)) { + + TAILQ_REMOVE(&bufqueues[QUEUE_LRU], bp, b_freelist); + + if (TAILQ_FIRST(&bufqueues[QUEUE_LRU]) != NULL) { + TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist); + bp->b_usecount--; + goto start; + } + TAILQ_INSERT_TAIL(&bufqueues[QUEUE_LRU], bp, b_freelist); + } + } + } + + /* + * Certain layered filesystems can recursively re-enter the vfs_bio + * code, due to delayed writes. This helps keep the system from + * deadlocking. + */ if (writerecursion > 0) { bp = TAILQ_FIRST(&bufqueues[QUEUE_AGE]); while (bp) { @@ -1201,7 +1255,7 @@ inmem(struct vnode * vp, daddr_t blkno) return 1; if (vp->v_mount == NULL) return 0; - if ((vp->v_object == NULL) || (vp->v_flag & VVMIO) == 0) + if ((vp->v_object == NULL) || (vp->v_flag & VOBJBUF) == 0) return 0; obj = vp->v_object; @@ -1351,7 +1405,8 @@ loop: } else { vm_object_t obj; - if ((bp = getnewbuf(vp, slpflag, slptimeo, size, maxsize)) == 0) { + if ((bp = getnewbuf(vp, blkno, + slpflag, slptimeo, size, maxsize)) == 0) { if (slpflag || slptimeo) { splx(s); return NULL; @@ -1381,7 +1436,7 @@ loop: bh = BUFHASH(vp, blkno); LIST_INSERT_HEAD(bh, bp, b_hash); - if ((obj = vp->v_object) && (vp->v_flag & VVMIO)) { + if ((obj = vp->v_object) && (vp->v_flag & VOBJBUF)) { bp->b_flags |= (B_VMIO | B_CACHE); #if defined(VFS_BIO_DEBUG) if (vp->v_type != VREG && vp->v_type != VBLK) @@ -1414,7 +1469,7 @@ geteblk(int size) int s; s = splbio(); - while ((bp = getnewbuf(0, 0, 0, size, MAXBSIZE)) == 0); + while ((bp = getnewbuf(0, (daddr_t) 0, 0, 0, size, MAXBSIZE)) == 0); splx(s); allocbuf(bp, size); bp->b_flags |= B_INVAL; @@ -1696,7 +1751,10 @@ biowait(register struct buf * bp) #if defined(NO_SCHEDULE_MODS) tsleep(bp, PRIBIO, "biowait", 0); #else - tsleep(bp, curproc->p_usrpri, "biowait", 0); + if (bp->b_flags & B_READ) + tsleep(bp, PRIBIO, "biord", 0); + else + tsleep(bp, curproc->p_usrpri, "biowr", 0); #endif splx(s); if (bp->b_flags & B_EINTR) { @@ -1770,7 +1828,7 @@ biodone(register struct buf * bp) panic("biodone: missing VM object"); } - if ((vp->v_flag & VVMIO) == 0) { + if ((vp->v_flag & VOBJBUF) == 0) { panic("biodone: vnode is not setup for merged cache"); } #endif diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c index 1fff926..3d82c5e 100644 --- a/sys/kern/vfs_cluster.c +++ b/sys/kern/vfs_cluster.c @@ -33,7 +33,7 @@ * SUCH DAMAGE. * * @(#)vfs_cluster.c 8.7 (Berkeley) 2/13/94 - * $Id: vfs_cluster.c,v 1.48 1997/08/02 14:31:43 bde Exp $ + * $Id: vfs_cluster.c,v 1.49 1997/11/07 08:53:05 phk Exp $ */ #include <sys/param.h> @@ -492,8 +492,13 @@ cluster_write(bp, filesize) int async; vp = bp->b_vp; - async = vp->v_mount->mnt_flag & MNT_ASYNC; - lblocksize = vp->v_mount->mnt_stat.f_iosize; + if (vp->v_type == VREG) { + async = vp->v_mount->mnt_flag & MNT_ASYNC; + lblocksize = vp->v_mount->mnt_stat.f_iosize; + } else { + async = 0; + lblocksize = bp->b_bufsize; + } lbn = bp->b_lblkno; /* Initialize vnode to beginning of file. */ @@ -565,7 +570,8 @@ cluster_write(bp, filesize) * cluster as large as possible, otherwise find size of * existing cluster. */ - if (((u_quad_t) (lbn + 1) * lblocksize) != filesize && + if ((vp->v_type == VREG) && + ((u_quad_t) (lbn + 1) * lblocksize) != filesize && (bp->b_blkno == bp->b_lblkno) && (VOP_BMAP(vp, lbn, NULL, &bp->b_blkno, &maxclen, NULL) || bp->b_blkno == -1)) { @@ -621,7 +627,7 @@ cluster_wbuild(vp, size, start_lbn, len) int dbsize = btodb(size); while (len > 0) { s = splbio(); - if ( ((tbp = gbincore(vp, start_lbn)) == NULL) || + if (((tbp = gbincore(vp, start_lbn)) == NULL) || ((tbp->b_flags & (B_INVAL|B_BUSY|B_DELWRI)) != B_DELWRI)) { ++start_lbn; --len; @@ -672,7 +678,8 @@ cluster_wbuild(vp, size, start_lbn, len) bp->b_blkno = tbp->b_blkno; bp->b_lblkno = tbp->b_lblkno; (vm_offset_t) bp->b_data |= ((vm_offset_t) tbp->b_data) & PAGE_MASK; - bp->b_flags |= B_CALL | B_BUSY | B_CLUSTER | (tbp->b_flags & (B_VMIO|B_NEEDCOMMIT)); + bp->b_flags |= B_CALL | B_BUSY | B_CLUSTER | + (tbp->b_flags & (B_VMIO|B_NEEDCOMMIT)); bp->b_iodone = cluster_callback; pbgetvp(vp, bp); diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index a681825..e2be193 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -189,7 +189,13 @@ vop_stdlock(ap) struct proc *a_p; } */ *ap; { - struct lock *l = (struct lock*)ap->a_vp->v_data; + struct lock *l; + + if ((l = (struct lock *)ap->a_vp->v_data) == NULL) { + if (ap->a_flags & LK_INTERLOCK) + simple_unlock(&ap->a_vp->v_interlock); + return 0; + } return (lockmgr(l, ap->a_flags, &ap->a_vp->v_interlock, ap->a_p)); } @@ -202,7 +208,13 @@ vop_stdunlock(ap) struct proc *a_p; } */ *ap; { - struct lock *l = (struct lock*)ap->a_vp->v_data; + struct lock *l; + + if ((l = (struct lock *)ap->a_vp->v_data) == NULL) { + if (ap->a_flags & LK_INTERLOCK) + simple_unlock(&ap->a_vp->v_interlock); + return 0; + } return (lockmgr(l, ap->a_flags | LK_RELEASE, &ap->a_vp->v_interlock, ap->a_p)); @@ -214,7 +226,10 @@ vop_stdislocked(ap) struct vnode *a_vp; } */ *ap; { - struct lock *l = (struct lock*)ap->a_vp->v_data; + struct lock *l; + + if ((l = (struct lock *)ap->a_vp->v_data) == NULL) + return 0; return (lockstatus(l)); } diff --git a/sys/kern/vfs_export.c b/sys/kern/vfs_export.c index 9a371b8..7120a81 100644 --- a/sys/kern/vfs_export.c +++ b/sys/kern/vfs_export.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 - * $Id: vfs_subr.c,v 1.118 1997/12/29 01:03:41 dyson Exp $ + * $Id: vfs_subr.c,v 1.119 1997/12/29 16:54:03 dyson Exp $ */ /* @@ -83,7 +83,6 @@ static void vfree __P((struct vnode *)); static void vgonel __P((struct vnode *vp, struct proc *p)); static unsigned long numvnodes; SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, ""); -static void vputrele __P((struct vnode *vp, int put)); enum vtype iftovt_tab[16] = { VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, @@ -108,7 +107,7 @@ SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, "") static u_long freevnodes = 0; SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, ""); -int vfs_ioopt = 0; +int vfs_ioopt = 1; SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, ""); struct mntlist mountlist; /* mounted filesystem list */ @@ -352,7 +351,9 @@ getnewvnode(tag, mp, vops, vpp) struct vnode **vpp; { struct proc *p = curproc; /* XXX */ - struct vnode *vp; + struct vnode *vp, *tvp; + vm_object_t object; + TAILQ_HEAD(freelst, vnode) vnode_tmp_list; /* * We take the least recently used vnode from the freelist @@ -362,6 +363,7 @@ getnewvnode(tag, mp, vops, vpp) */ simple_lock(&vnode_free_list_slock); + TAILQ_INIT(&vnode_tmp_list); if (wantfreevnodes && freevnodes < wantfreevnodes) { vp = NULL; @@ -377,9 +379,11 @@ getnewvnode(tag, mp, vops, vpp) if (vp->v_usecount) panic("free vnode isn't"); - if (vp->v_object && vp->v_object->resident_page_count) { + object = vp->v_object; + if (object && (object->resident_page_count || object->ref_count)) { /* Don't recycle if it's caching some pages */ - simple_unlock(&vp->v_interlock); + TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); + TAILQ_INSERT_TAIL(&vnode_tmp_list, vp, v_freelist); continue; } else if (LIST_FIRST(&vp->v_cache_src)) { /* Don't recycle if active in the namecache */ @@ -391,6 +395,12 @@ getnewvnode(tag, mp, vops, vpp) } } + TAILQ_FOREACH(tvp, &vnode_tmp_list, v_freelist) { + TAILQ_REMOVE(&vnode_tmp_list, tvp, v_freelist); + TAILQ_INSERT_TAIL(&vnode_free_list, tvp, v_freelist); + simple_unlock(&tvp->v_interlock); + } + if (vp) { vp->v_flag |= VDOOMED; TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); @@ -429,6 +439,7 @@ getnewvnode(tag, mp, vops, vpp) vp = (struct vnode *) malloc((u_long) sizeof *vp, M_VNODE, M_WAITOK); bzero((char *) vp, sizeof *vp); + simple_lock_init(&vp->v_interlock); vp->v_dd = vp; cache_purge(vp); LIST_INIT(&vp->v_cache_src); @@ -553,7 +564,16 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) * check for it. */ if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { - (void) VOP_BWRITE(bp); + if (bp->b_vp == vp) { + if (bp->b_flags & B_CLUSTEROK) { + vfs_bio_awrite(bp); + } else { + bp->b_flags |= B_ASYNC; + VOP_BWRITE(bp); + } + } else { + (void) VOP_BWRITE(bp); + } break; } bp->b_flags |= (B_INVAL|B_NOCACHE|B_RELBUF); @@ -571,11 +591,18 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) /* * Destroy the copy in the VM cache, too. */ + simple_lock(&vp->v_interlock); object = vp->v_object; if (object != NULL) { - vm_object_page_remove(object, 0, object->size, - (flags & V_SAVE) ? TRUE : FALSE); + if (flags & V_SAVEMETA) + vm_object_page_remove(object, 0, object->size, + (flags & V_SAVE) ? TRUE : FALSE); + else + vm_object_page_remove(object, 0, 0, + (flags & V_SAVE) ? TRUE : FALSE); } + simple_unlock(&vp->v_interlock); + if (!(flags & V_SAVEMETA) && (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) panic("vinvalbuf: flush failed"); @@ -863,13 +890,11 @@ vget(vp, flags, p) /* * Create the VM object, if needed */ - if (((vp->v_type == VREG) || (vp->v_type == VBLK)) && - ((vp->v_object == NULL) || - (vp->v_object->flags & OBJ_VFS_REF) == 0 || + if ((flags & LK_NOOBJ) == 0 && + (vp->v_type == VREG) && + ((vp->v_object == NULL) || (vp->v_object->flags & OBJ_DEAD))) { - simple_unlock(&vp->v_interlock); vfs_object_create(vp, curproc, curproc->p_ucred, 0); - simple_lock(&vp->v_interlock); } if (flags & LK_TYPE_MASK) { if (error = vn_lock(vp, flags | LK_INTERLOCK, p)) @@ -909,7 +934,10 @@ vrele(vp) vp->v_usecount--; simple_unlock(&vp->v_interlock); - } else if (vp->v_usecount == 1) { + return; + } + + if (vp->v_usecount == 1) { vp->v_usecount--; @@ -927,6 +955,7 @@ vrele(vp) } else { #ifdef DIAGNOSTIC vprint("vrele: negative ref count", vp); + simple_unlock(&vp->v_interlock); #endif panic("vrele: negative ref cnt"); } @@ -942,17 +971,20 @@ vput(vp) if (vp == NULL) panic("vput: null vp"); #endif + simple_lock(&vp->v_interlock); if (vp->v_usecount > 1) { vp->v_usecount--; VOP_UNLOCK(vp, LK_INTERLOCK, p); + return; + + } - } else if (vp->v_usecount == 1) { + if (vp->v_usecount == 1) { vp->v_usecount--; - if (VSHOULDFREE(vp)) vfree(vp); /* @@ -1110,8 +1142,7 @@ vclean(vp, flags, p) int flags; struct proc *p; { - int active, irefed; - vm_object_t object; + int active; /* * Check to see if the vnode is in use. If so we have to reference it @@ -1120,6 +1151,10 @@ vclean(vp, flags, p) */ if ((active = vp->v_usecount)) vp->v_usecount++; + + if (vp->v_object) { + vp->v_object->flags |= OBJ_DEAD; + } /* * Prevent the vnode from being recycled or brought into use while we * clean it out. @@ -1136,19 +1171,14 @@ vclean(vp, flags, p) */ VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); - object = vp->v_object; - /* * Clean out any buffers associated with the vnode. */ - if (flags & DOCLOSE) + if (vp->v_object) + vm_object_terminate(vp->v_object); + else vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); - if (vp->v_object && (vp->v_object->flags & OBJ_VFS_REF)) { - vp->v_object->flags &= ~OBJ_VFS_REF; - vm_object_deallocate(object); - } - /* * If purging an active vnode, it must be closed and * deactivated before being reclaimed. Note that the @@ -1257,6 +1287,10 @@ vop_revoke(ap) */ simple_lock(&vp->v_interlock); vp->v_flag &= ~VXLOCK; + if (vp->v_flag & VXWANT) { + vp->v_flag &= ~VXWANT; + wakeup(vp); + } } vgonel(vp, p); return (0); @@ -1321,10 +1355,6 @@ vgonel(vp, p) return; } - if (vp->v_object) { - vp->v_object->flags |= OBJ_VNODE_GONE; - } - /* * Clean out the filesystem specific data. */ @@ -1392,6 +1422,7 @@ vgonel(vp, p) } vp->v_type = VBAD; + simple_unlock(&vp->v_interlock); } /* @@ -1488,6 +1519,8 @@ vprint(label, vp) strcat(buf, "|VDOOMED"); if (vp->v_flag & VFREE) strcat(buf, "|VFREE"); + if (vp->v_flag & VOBJBUF) + strcat(buf, "|VOBJBUF"); if (buf[0] != '\0') printf(" flags (%s)", &buf[1]); if (vp->v_data == NULL) { @@ -1999,21 +2032,41 @@ vfs_export_lookup(mp, nep, nam) void vfs_msync(struct mount *mp, int flags) { struct vnode *vp, *nvp; + int anyio, tries; + + tries = 5; loop: + anyio = 0; for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { - if (vp->v_mount != mp) - goto loop; nvp = vp->v_mntvnodes.le_next; - if (VOP_ISLOCKED(vp) && (flags != MNT_WAIT)) + + if (vp->v_mount != mp) { + goto loop; + } + + if ((vp->v_flag & VXLOCK) || + (VOP_ISLOCKED(vp) && (flags != MNT_WAIT))) { continue; + } + + simple_lock(&vp->v_interlock); if (vp->v_object && (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curproc); - vm_object_page_clean(vp->v_object, 0, 0, TRUE); - VOP_UNLOCK(vp, 0, curproc); + if (!vget(vp, + LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) { + if (vp->v_object) { + vm_object_page_clean(vp->v_object, 0, 0, TRUE); + anyio = 1; + } + vput(vp); + } + } else { + simple_unlock(&vp->v_interlock); } } + if (anyio && (--tries > 0)) + goto loop; } /* @@ -2021,6 +2074,8 @@ loop: * is done for all VREG files in the system. Some filesystems might * afford the additional metadata buffering capability of the * VMIO code by making the device node be VMIO mode also. + * + * If !waslocked, must be called with interlock. */ int vfs_object_create(vp, p, cred, waslocked) @@ -2033,44 +2088,49 @@ vfs_object_create(vp, p, cred, waslocked) vm_object_t object; int error = 0; - if ((vp->v_type != VREG) && (vp->v_type != VBLK)) + if ((vp->v_type != VREG) && (vp->v_type != VBLK)) { return 0; + } + + if (!waslocked) + vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY, p); retry: if ((object = vp->v_object) == NULL) { if (vp->v_type == VREG) { if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0) goto retn; - (void) vnode_pager_alloc(vp, + object = vnode_pager_alloc(vp, OFF_TO_IDX(round_page(vat.va_size)), 0, 0); - vp->v_object->flags |= OBJ_VFS_REF; - } else { + } else if (major(vp->v_rdev) < nblkdev) { /* * This simply allocates the biggest object possible * for a VBLK vnode. This should be fixed, but doesn't * cause any problems (yet). */ - (void) vnode_pager_alloc(vp, INT_MAX, 0, 0); - vp->v_object->flags |= OBJ_VFS_REF; + object = vnode_pager_alloc(vp, INT_MAX, 0, 0); } + object->ref_count--; + vp->v_usecount--; } else { if (object->flags & OBJ_DEAD) { - if (waslocked) - VOP_UNLOCK(vp, 0, p); + VOP_UNLOCK(vp, 0, p); tsleep(object, PVM, "vodead", 0); - if (waslocked) - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); goto retry; } - if ((object->flags & OBJ_VFS_REF) == 0) { - vm_object_reference(object); - object->flags |= OBJ_VFS_REF; - } } - if (vp->v_object) - vp->v_flag |= VVMIO; + + if (vp->v_object) { + vp->v_flag |= VOBJBUF; + } retn: + if (!waslocked) { + simple_lock(&vp->v_interlock); + VOP_UNLOCK(vp, LK_INTERLOCK, p); + } + return error; } diff --git a/sys/kern/vfs_extattr.c b/sys/kern/vfs_extattr.c index 69751c4..b7be81e 100644 --- a/sys/kern/vfs_extattr.c +++ b/sys/kern/vfs_extattr.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 - * $Id: vfs_syscalls.c,v 1.87 1997/12/27 02:56:23 bde Exp $ + * $Id: vfs_syscalls.c,v 1.88 1997/12/29 00:22:50 dyson Exp $ */ /* For 4.3 integer FS ID compatibility */ @@ -428,8 +428,8 @@ dounmount(mp, flags, p) if (mp->mnt_flag & MNT_EXPUBLIC) vfs_setpublicfs(NULL, NULL, NULL); + vfs_msync(mp, MNT_WAIT); mp->mnt_flag &=~ MNT_ASYNC; - vfs_msync(mp, MNT_NOWAIT); cache_purgevfs(mp); /* remove cache entries for this file sys */ if (((mp->mnt_flag & MNT_RDONLY) || (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) || @@ -919,6 +919,8 @@ open(p, uap) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); fp->f_flag |= FHASLOCK; } + if ((vp->v_type == VREG) && (vp->v_object == NULL)) + vfs_object_create(vp, p, p->p_ucred, TRUE); VOP_UNLOCK(vp, 0, p); p->p_retval[0] = indx; return (0); @@ -1102,14 +1104,14 @@ link(p, uap) struct nameidata nd; int error; - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); + NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (vp->v_type == VDIR) error = EPERM; /* POSIX */ else { - NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, link), p); + NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p); error = namei(&nd); if (!error) { if (nd.ni_vp != NULL) { @@ -1161,7 +1163,7 @@ symlink(p, uap) path = zalloc(namei_zone); if (error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) goto out; - NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, link), p); + NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p); if (error = namei(&nd)) goto out; if (nd.ni_vp) { @@ -1266,7 +1268,7 @@ unlink(p, uap) if (!error) { VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); - error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); + error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == vp) @@ -1395,7 +1397,7 @@ access(p, uap) t_gid = cred->cr_groups[0]; cred->cr_uid = p->p_cred->p_ruid; cred->cr_groups[0] = p->p_cred->p_rgid; - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) goto out1; @@ -1444,7 +1446,7 @@ ostat(p, uap) int error; struct nameidata nd; - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); @@ -1481,7 +1483,7 @@ olstat(p, uap) int error; struct nameidata nd; - NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, + NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); @@ -1549,7 +1551,7 @@ stat(p, uap) int error; struct nameidata nd; - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); @@ -1584,7 +1586,7 @@ lstat(p, uap) struct stat sb; struct nameidata nd; - NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, + NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); @@ -1620,7 +1622,7 @@ pathconf(p, uap) int error; struct nameidata nd; - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); @@ -1655,7 +1657,7 @@ readlink(p, uap) int error; struct nameidata nd; - NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, + NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); @@ -2196,7 +2198,7 @@ rename(p, uap) if (error = namei(&fromnd)) return (error); fvp = fromnd.ni_vp; - NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART, + NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ, UIO_USERSPACE, SCARG(uap, to), p); if (fromnd.ni_vp->v_type == VDIR) tond.ni_cnd.cn_flags |= WILLBEDIR; @@ -2235,8 +2237,9 @@ rename(p, uap) out: if (!error) { VOP_LEASE(tdvp, p, p->p_ucred, LEASE_WRITE); - if (fromnd.ni_dvp != tdvp) + if (fromnd.ni_dvp != tdvp) { VOP_LEASE(fromnd.ni_dvp, p, p->p_ucred, LEASE_WRITE); + } if (tvp) { VOP_LEASE(tvp, p, p->p_ucred, LEASE_WRITE); } diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c index 8ceedd4..31754fd 100644 --- a/sys/kern/vfs_lookup.c +++ b/sys/kern/vfs_lookup.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)vfs_lookup.c 8.4 (Berkeley) 2/16/94 - * $Id: vfs_lookup.c,v 1.21 1997/12/27 02:56:22 bde Exp $ + * $Id: vfs_lookup.c,v 1.22 1997/12/29 00:22:38 dyson Exp $ */ #include "opt_ktrace.h" @@ -165,6 +165,13 @@ namei(ndp) zfree(namei_zone, cnp->cn_pnbuf); else cnp->cn_flags |= HASBUF; + + if (ndp->ni_vp && ndp->ni_vp->v_type == VREG && + (cnp->cn_nameiop != DELETE) && + ((cnp->cn_flags & (NOOBJ|LOCKLEAF)) == LOCKLEAF)) + vfs_object_create(ndp->ni_vp, + ndp->ni_cnd.cn_proc, ndp->ni_cnd.cn_cred, 1); + return (0); } if ((cnp->cn_flags & LOCKPARENT) && ndp->ni_pathlen == 1) @@ -538,8 +545,6 @@ nextname: if (!wantparent) vrele(ndp->ni_dvp); - vfs_object_create(dp, ndp->ni_cnd.cn_proc, ndp->ni_cnd.cn_cred, 1); - if ((cnp->cn_flags & LOCKLEAF) == 0) VOP_UNLOCK(dp, 0, p); return (0); @@ -687,7 +692,9 @@ relookup(dvp, vpp, cnp) if (!wantparent) vrele(dvp); - vfs_object_create(dp, cnp->cn_proc, cnp->cn_cred, 1); + if (dp->v_type == VREG && + ((cnp->cn_flags & (NOOBJ|LOCKLEAF)) == LOCKLEAF)) + vfs_object_create(dp, cnp->cn_proc, cnp->cn_cred, 1); if ((cnp->cn_flags & LOCKLEAF) == 0) VOP_UNLOCK(dp, 0, p); diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 9a371b8..7120a81 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)vfs_subr.c 8.31 (Berkeley) 5/26/95 - * $Id: vfs_subr.c,v 1.118 1997/12/29 01:03:41 dyson Exp $ + * $Id: vfs_subr.c,v 1.119 1997/12/29 16:54:03 dyson Exp $ */ /* @@ -83,7 +83,6 @@ static void vfree __P((struct vnode *)); static void vgonel __P((struct vnode *vp, struct proc *p)); static unsigned long numvnodes; SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, ""); -static void vputrele __P((struct vnode *vp, int put)); enum vtype iftovt_tab[16] = { VNON, VFIFO, VCHR, VNON, VDIR, VNON, VBLK, VNON, @@ -108,7 +107,7 @@ SYSCTL_INT(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, "") static u_long freevnodes = 0; SYSCTL_INT(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, ""); -int vfs_ioopt = 0; +int vfs_ioopt = 1; SYSCTL_INT(_vfs, OID_AUTO, ioopt, CTLFLAG_RW, &vfs_ioopt, 0, ""); struct mntlist mountlist; /* mounted filesystem list */ @@ -352,7 +351,9 @@ getnewvnode(tag, mp, vops, vpp) struct vnode **vpp; { struct proc *p = curproc; /* XXX */ - struct vnode *vp; + struct vnode *vp, *tvp; + vm_object_t object; + TAILQ_HEAD(freelst, vnode) vnode_tmp_list; /* * We take the least recently used vnode from the freelist @@ -362,6 +363,7 @@ getnewvnode(tag, mp, vops, vpp) */ simple_lock(&vnode_free_list_slock); + TAILQ_INIT(&vnode_tmp_list); if (wantfreevnodes && freevnodes < wantfreevnodes) { vp = NULL; @@ -377,9 +379,11 @@ getnewvnode(tag, mp, vops, vpp) if (vp->v_usecount) panic("free vnode isn't"); - if (vp->v_object && vp->v_object->resident_page_count) { + object = vp->v_object; + if (object && (object->resident_page_count || object->ref_count)) { /* Don't recycle if it's caching some pages */ - simple_unlock(&vp->v_interlock); + TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); + TAILQ_INSERT_TAIL(&vnode_tmp_list, vp, v_freelist); continue; } else if (LIST_FIRST(&vp->v_cache_src)) { /* Don't recycle if active in the namecache */ @@ -391,6 +395,12 @@ getnewvnode(tag, mp, vops, vpp) } } + TAILQ_FOREACH(tvp, &vnode_tmp_list, v_freelist) { + TAILQ_REMOVE(&vnode_tmp_list, tvp, v_freelist); + TAILQ_INSERT_TAIL(&vnode_free_list, tvp, v_freelist); + simple_unlock(&tvp->v_interlock); + } + if (vp) { vp->v_flag |= VDOOMED; TAILQ_REMOVE(&vnode_free_list, vp, v_freelist); @@ -429,6 +439,7 @@ getnewvnode(tag, mp, vops, vpp) vp = (struct vnode *) malloc((u_long) sizeof *vp, M_VNODE, M_WAITOK); bzero((char *) vp, sizeof *vp); + simple_lock_init(&vp->v_interlock); vp->v_dd = vp; cache_purge(vp); LIST_INIT(&vp->v_cache_src); @@ -553,7 +564,16 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) * check for it. */ if ((bp->b_flags & B_DELWRI) && (flags & V_SAVE)) { - (void) VOP_BWRITE(bp); + if (bp->b_vp == vp) { + if (bp->b_flags & B_CLUSTEROK) { + vfs_bio_awrite(bp); + } else { + bp->b_flags |= B_ASYNC; + VOP_BWRITE(bp); + } + } else { + (void) VOP_BWRITE(bp); + } break; } bp->b_flags |= (B_INVAL|B_NOCACHE|B_RELBUF); @@ -571,11 +591,18 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) /* * Destroy the copy in the VM cache, too. */ + simple_lock(&vp->v_interlock); object = vp->v_object; if (object != NULL) { - vm_object_page_remove(object, 0, object->size, - (flags & V_SAVE) ? TRUE : FALSE); + if (flags & V_SAVEMETA) + vm_object_page_remove(object, 0, object->size, + (flags & V_SAVE) ? TRUE : FALSE); + else + vm_object_page_remove(object, 0, 0, + (flags & V_SAVE) ? TRUE : FALSE); } + simple_unlock(&vp->v_interlock); + if (!(flags & V_SAVEMETA) && (vp->v_dirtyblkhd.lh_first || vp->v_cleanblkhd.lh_first)) panic("vinvalbuf: flush failed"); @@ -863,13 +890,11 @@ vget(vp, flags, p) /* * Create the VM object, if needed */ - if (((vp->v_type == VREG) || (vp->v_type == VBLK)) && - ((vp->v_object == NULL) || - (vp->v_object->flags & OBJ_VFS_REF) == 0 || + if ((flags & LK_NOOBJ) == 0 && + (vp->v_type == VREG) && + ((vp->v_object == NULL) || (vp->v_object->flags & OBJ_DEAD))) { - simple_unlock(&vp->v_interlock); vfs_object_create(vp, curproc, curproc->p_ucred, 0); - simple_lock(&vp->v_interlock); } if (flags & LK_TYPE_MASK) { if (error = vn_lock(vp, flags | LK_INTERLOCK, p)) @@ -909,7 +934,10 @@ vrele(vp) vp->v_usecount--; simple_unlock(&vp->v_interlock); - } else if (vp->v_usecount == 1) { + return; + } + + if (vp->v_usecount == 1) { vp->v_usecount--; @@ -927,6 +955,7 @@ vrele(vp) } else { #ifdef DIAGNOSTIC vprint("vrele: negative ref count", vp); + simple_unlock(&vp->v_interlock); #endif panic("vrele: negative ref cnt"); } @@ -942,17 +971,20 @@ vput(vp) if (vp == NULL) panic("vput: null vp"); #endif + simple_lock(&vp->v_interlock); if (vp->v_usecount > 1) { vp->v_usecount--; VOP_UNLOCK(vp, LK_INTERLOCK, p); + return; + + } - } else if (vp->v_usecount == 1) { + if (vp->v_usecount == 1) { vp->v_usecount--; - if (VSHOULDFREE(vp)) vfree(vp); /* @@ -1110,8 +1142,7 @@ vclean(vp, flags, p) int flags; struct proc *p; { - int active, irefed; - vm_object_t object; + int active; /* * Check to see if the vnode is in use. If so we have to reference it @@ -1120,6 +1151,10 @@ vclean(vp, flags, p) */ if ((active = vp->v_usecount)) vp->v_usecount++; + + if (vp->v_object) { + vp->v_object->flags |= OBJ_DEAD; + } /* * Prevent the vnode from being recycled or brought into use while we * clean it out. @@ -1136,19 +1171,14 @@ vclean(vp, flags, p) */ VOP_LOCK(vp, LK_DRAIN | LK_INTERLOCK, p); - object = vp->v_object; - /* * Clean out any buffers associated with the vnode. */ - if (flags & DOCLOSE) + if (vp->v_object) + vm_object_terminate(vp->v_object); + else vinvalbuf(vp, V_SAVE, NOCRED, p, 0, 0); - if (vp->v_object && (vp->v_object->flags & OBJ_VFS_REF)) { - vp->v_object->flags &= ~OBJ_VFS_REF; - vm_object_deallocate(object); - } - /* * If purging an active vnode, it must be closed and * deactivated before being reclaimed. Note that the @@ -1257,6 +1287,10 @@ vop_revoke(ap) */ simple_lock(&vp->v_interlock); vp->v_flag &= ~VXLOCK; + if (vp->v_flag & VXWANT) { + vp->v_flag &= ~VXWANT; + wakeup(vp); + } } vgonel(vp, p); return (0); @@ -1321,10 +1355,6 @@ vgonel(vp, p) return; } - if (vp->v_object) { - vp->v_object->flags |= OBJ_VNODE_GONE; - } - /* * Clean out the filesystem specific data. */ @@ -1392,6 +1422,7 @@ vgonel(vp, p) } vp->v_type = VBAD; + simple_unlock(&vp->v_interlock); } /* @@ -1488,6 +1519,8 @@ vprint(label, vp) strcat(buf, "|VDOOMED"); if (vp->v_flag & VFREE) strcat(buf, "|VFREE"); + if (vp->v_flag & VOBJBUF) + strcat(buf, "|VOBJBUF"); if (buf[0] != '\0') printf(" flags (%s)", &buf[1]); if (vp->v_data == NULL) { @@ -1999,21 +2032,41 @@ vfs_export_lookup(mp, nep, nam) void vfs_msync(struct mount *mp, int flags) { struct vnode *vp, *nvp; + int anyio, tries; + + tries = 5; loop: + anyio = 0; for (vp = mp->mnt_vnodelist.lh_first; vp != NULL; vp = nvp) { - if (vp->v_mount != mp) - goto loop; nvp = vp->v_mntvnodes.le_next; - if (VOP_ISLOCKED(vp) && (flags != MNT_WAIT)) + + if (vp->v_mount != mp) { + goto loop; + } + + if ((vp->v_flag & VXLOCK) || + (VOP_ISLOCKED(vp) && (flags != MNT_WAIT))) { continue; + } + + simple_lock(&vp->v_interlock); if (vp->v_object && (vp->v_object->flags & OBJ_MIGHTBEDIRTY)) { - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, curproc); - vm_object_page_clean(vp->v_object, 0, 0, TRUE); - VOP_UNLOCK(vp, 0, curproc); + if (!vget(vp, + LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) { + if (vp->v_object) { + vm_object_page_clean(vp->v_object, 0, 0, TRUE); + anyio = 1; + } + vput(vp); + } + } else { + simple_unlock(&vp->v_interlock); } } + if (anyio && (--tries > 0)) + goto loop; } /* @@ -2021,6 +2074,8 @@ loop: * is done for all VREG files in the system. Some filesystems might * afford the additional metadata buffering capability of the * VMIO code by making the device node be VMIO mode also. + * + * If !waslocked, must be called with interlock. */ int vfs_object_create(vp, p, cred, waslocked) @@ -2033,44 +2088,49 @@ vfs_object_create(vp, p, cred, waslocked) vm_object_t object; int error = 0; - if ((vp->v_type != VREG) && (vp->v_type != VBLK)) + if ((vp->v_type != VREG) && (vp->v_type != VBLK)) { return 0; + } + + if (!waslocked) + vn_lock(vp, LK_EXCLUSIVE | LK_INTERLOCK | LK_RETRY, p); retry: if ((object = vp->v_object) == NULL) { if (vp->v_type == VREG) { if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0) goto retn; - (void) vnode_pager_alloc(vp, + object = vnode_pager_alloc(vp, OFF_TO_IDX(round_page(vat.va_size)), 0, 0); - vp->v_object->flags |= OBJ_VFS_REF; - } else { + } else if (major(vp->v_rdev) < nblkdev) { /* * This simply allocates the biggest object possible * for a VBLK vnode. This should be fixed, but doesn't * cause any problems (yet). */ - (void) vnode_pager_alloc(vp, INT_MAX, 0, 0); - vp->v_object->flags |= OBJ_VFS_REF; + object = vnode_pager_alloc(vp, INT_MAX, 0, 0); } + object->ref_count--; + vp->v_usecount--; } else { if (object->flags & OBJ_DEAD) { - if (waslocked) - VOP_UNLOCK(vp, 0, p); + VOP_UNLOCK(vp, 0, p); tsleep(object, PVM, "vodead", 0); - if (waslocked) - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); goto retry; } - if ((object->flags & OBJ_VFS_REF) == 0) { - vm_object_reference(object); - object->flags |= OBJ_VFS_REF; - } } - if (vp->v_object) - vp->v_flag |= VVMIO; + + if (vp->v_object) { + vp->v_flag |= VOBJBUF; + } retn: + if (!waslocked) { + simple_lock(&vp->v_interlock); + VOP_UNLOCK(vp, LK_INTERLOCK, p); + } + return error; } diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 69751c4..b7be81e 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 - * $Id: vfs_syscalls.c,v 1.87 1997/12/27 02:56:23 bde Exp $ + * $Id: vfs_syscalls.c,v 1.88 1997/12/29 00:22:50 dyson Exp $ */ /* For 4.3 integer FS ID compatibility */ @@ -428,8 +428,8 @@ dounmount(mp, flags, p) if (mp->mnt_flag & MNT_EXPUBLIC) vfs_setpublicfs(NULL, NULL, NULL); + vfs_msync(mp, MNT_WAIT); mp->mnt_flag &=~ MNT_ASYNC; - vfs_msync(mp, MNT_NOWAIT); cache_purgevfs(mp); /* remove cache entries for this file sys */ if (((mp->mnt_flag & MNT_RDONLY) || (error = VFS_SYNC(mp, MNT_WAIT, p->p_ucred, p)) == 0) || @@ -919,6 +919,8 @@ open(p, uap) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); fp->f_flag |= FHASLOCK; } + if ((vp->v_type == VREG) && (vp->v_object == NULL)) + vfs_object_create(vp, p, p->p_ucred, TRUE); VOP_UNLOCK(vp, 0, p); p->p_retval[0] = indx; return (0); @@ -1102,14 +1104,14 @@ link(p, uap) struct nameidata nd; int error; - NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); + NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); vp = nd.ni_vp; if (vp->v_type == VDIR) error = EPERM; /* POSIX */ else { - NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, link), p); + NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p); error = namei(&nd); if (!error) { if (nd.ni_vp != NULL) { @@ -1161,7 +1163,7 @@ symlink(p, uap) path = zalloc(namei_zone); if (error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) goto out; - NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, link), p); + NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), p); if (error = namei(&nd)) goto out; if (nd.ni_vp) { @@ -1266,7 +1268,7 @@ unlink(p, uap) if (!error) { VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE); - error = VOP_REMOVE(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); + error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); } else { VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd); if (nd.ni_dvp == vp) @@ -1395,7 +1397,7 @@ access(p, uap) t_gid = cred->cr_groups[0]; cred->cr_uid = p->p_cred->p_ruid; cred->cr_groups[0] = p->p_cred->p_rgid; - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) goto out1; @@ -1444,7 +1446,7 @@ ostat(p, uap) int error; struct nameidata nd; - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); @@ -1481,7 +1483,7 @@ olstat(p, uap) int error; struct nameidata nd; - NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, + NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); @@ -1549,7 +1551,7 @@ stat(p, uap) int error; struct nameidata nd; - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); @@ -1584,7 +1586,7 @@ lstat(p, uap) struct stat sb; struct nameidata nd; - NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, + NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); @@ -1620,7 +1622,7 @@ pathconf(p, uap) int error; struct nameidata nd; - NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); @@ -1655,7 +1657,7 @@ readlink(p, uap) int error; struct nameidata nd; - NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF, UIO_USERSPACE, + NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, SCARG(uap, path), p); if (error = namei(&nd)) return (error); @@ -2196,7 +2198,7 @@ rename(p, uap) if (error = namei(&fromnd)) return (error); fvp = fromnd.ni_vp; - NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART, + NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ, UIO_USERSPACE, SCARG(uap, to), p); if (fromnd.ni_vp->v_type == VDIR) tond.ni_cnd.cn_flags |= WILLBEDIR; @@ -2235,8 +2237,9 @@ rename(p, uap) out: if (!error) { VOP_LEASE(tdvp, p, p->p_ucred, LEASE_WRITE); - if (fromnd.ni_dvp != tdvp) + if (fromnd.ni_dvp != tdvp) { VOP_LEASE(fromnd.ni_dvp, p, p->p_ucred, LEASE_WRITE); + } if (tvp) { VOP_LEASE(tvp, p, p->p_ucred, LEASE_WRITE); } diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index e0c9b4a..c4b8d66 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)vfs_vnops.c 8.2 (Berkeley) 1/21/94 - * $Id: vfs_vnops.c,v 1.44 1997/12/29 00:22:55 dyson Exp $ + * $Id: vfs_vnops.c,v 1.45 1997/12/29 01:03:43 dyson Exp $ */ #include <sys/param.h> @@ -511,7 +511,7 @@ vn_lock(vp, flags, p) if (vp->v_flag & VXLOCK) { vp->v_flag |= VXWANT; simple_unlock(&vp->v_interlock); - if (tsleep((caddr_t)vp, PINOD, "vn_lock", 60*hz)) { + if (tsleep((caddr_t)vp, PINOD, "vn_lock", 120*hz)) { vprint("vn_lock: timeout:", vp); } error = ENOENT; diff --git a/sys/libkern/cmpdi2.c b/sys/libkern/cmpdi2.c index 238bd4e..d9c486b 100644 --- a/sys/libkern/cmpdi2.c +++ b/sys/libkern/cmpdi2.c @@ -34,10 +34,10 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * - * $Id$ + * $Id: cmpdi2.c,v 1.4 1997/02/22 09:39:52 peter Exp $ */ -#include "quad.h" +#include <libkern/quad.h> /* * Return 0, 1, or 2 as a <, =, > b respectively. diff --git a/sys/miscfs/procfs/procfs_map.c b/sys/miscfs/procfs/procfs_map.c index 184cee9..7033e1c 100644 --- a/sys/miscfs/procfs/procfs_map.c +++ b/sys/miscfs/procfs/procfs_map.c @@ -36,7 +36,7 @@ * * @(#)procfs_status.c 8.3 (Berkeley) 2/17/94 * - * $Id: procfs_map.c,v 1.12 1997/08/02 14:32:12 bde Exp $ + * $Id: procfs_map.c,v 1.13 1997/11/14 22:57:46 tegge Exp $ */ #include <sys/param.h> @@ -101,7 +101,7 @@ procfs_domap(curp, p, pfs, uio) continue; obj = entry->object.vm_object; - if (obj && (obj->ref_count == 1)) + if (obj && (obj->shadow_count == 1)) privateresident = obj->resident_page_count; else privateresident = 0; diff --git a/sys/miscfs/procfs/procfs_vnops.c b/sys/miscfs/procfs/procfs_vnops.c index b8bd8e9..00cca89 100644 --- a/sys/miscfs/procfs/procfs_vnops.c +++ b/sys/miscfs/procfs/procfs_vnops.c @@ -36,7 +36,7 @@ * * @(#)procfs_vnops.c 8.18 (Berkeley) 5/21/95 * - * $Id: procfs_vnops.c,v 1.50 1997/12/27 02:56:25 bde Exp $ + * $Id: procfs_vnops.c,v 1.51 1998/01/06 01:37:12 sef Exp $ */ /* @@ -186,7 +186,7 @@ procfs_close(ap) * vnode. While one would expect v_usecount to be 1 at * that point, it seems that (according to John Dyson) * the VM system will bump up the usecount. So: if the - * usecount is 2, and VVMIO is set, then this is really + * usecount is 2, and VOBJBUF is set, then this is really * the last close. Otherwise, if the usecount is < 2 * then it is definitely the last close. * If this is the last close, then it checks to see if @@ -197,10 +197,7 @@ procfs_close(ap) * told to stop on an event, but then the requesting process * has gone away or forgotten about it. */ - if (((ap->a_vp->v_usecount == 2 - && ap->a_vp->v_object - && (ap->a_vp->v_flag & VVMIO)) || - (ap->a_vp->v_usecount < 2)) + if ((ap->a_vp->v_usecount < 2) && (p = pfind(pfs->pfs_pid)) && !(p->p_pfsflags & PF_LINGER)) { p->p_stops = 0; diff --git a/sys/miscfs/specfs/spec_vnops.c b/sys/miscfs/specfs/spec_vnops.c index 6da09a6..3bad030 100644 --- a/sys/miscfs/specfs/spec_vnops.c +++ b/sys/miscfs/specfs/spec_vnops.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)spec_vnops.c 8.14 (Berkeley) 5/21/95 - * $Id: spec_vnops.c,v 1.51 1997/10/27 13:33:42 bde Exp $ + * $Id: spec_vnops.c,v 1.52 1997/12/29 00:23:16 dyson Exp $ */ #include <sys/param.h> @@ -232,6 +232,7 @@ spec_open(ap) (ap->a_mode & FWRITE) && (bdevsw[maj]->d_flags & D_TYPEMASK) == D_DISK) return (EPERM); + /* * Do not allow opens of block devices that are * currently mounted. @@ -392,10 +393,14 @@ spec_write(ap) brelse(bp); return (error); } + if (vp->v_flag & VOBJBUF) + bp->b_flags |= B_CLUSTEROK; error = uiomove((char *)bp->b_data + on, n, uio); if (n + on == bsize) { - /* bawrite(bp); */ - cluster_write(bp, 0); + if ((vp->v_flag & VOBJBUF) && (on == 0)) + vfs_bio_awrite(bp); + else + bawrite(bp); } else bdwrite(bp); } while (error == 0 && uio->uio_resid > 0 && n != 0); @@ -499,10 +504,15 @@ loop: continue; if ((bp->b_flags & B_DELWRI) == 0) panic("spec_fsync: not dirty"); - bremfree(bp); - bp->b_flags |= B_BUSY; - splx(s); - bawrite(bp); + if ((vp->v_flag & VOBJBUF) && (bp->b_flags & B_CLUSTEROK)) { + vfs_bio_awrite(bp); + splx(s); + } else { + bremfree(bp); + bp->b_flags |= B_BUSY; + splx(s); + bawrite(bp); + } goto loop; } if (ap->a_waitfor == MNT_WAIT) { @@ -631,6 +641,7 @@ spec_close(ap) error = vinvalbuf(vp, V_SAVE, ap->a_cred, ap->a_p, 0, 0); if (error) return (error); + /* * We do not want to really close the device if it * is still in use unless we are trying to close it diff --git a/sys/nfs/nfs_bio.c b/sys/nfs/nfs_bio.c index 0356f42..4857d39 100644 --- a/sys/nfs/nfs_bio.c +++ b/sys/nfs/nfs_bio.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95 - * $Id: nfs_bio.c,v 1.44 1997/09/10 19:52:25 phk Exp $ + * $Id: nfs_bio.c,v 1.45 1997/12/08 00:59:08 dyson Exp $ */ @@ -84,8 +84,8 @@ nfs_getpages(ap) int error; vm_page_t m; - if (!(ap->a_vp->v_flag & VVMIO)) { - printf("nfs_getpages: called with non-VMIO vnode??\n"); + if ((ap->a_vp->v_object) == NULL) { + printf("nfs_getpages: called with non-merged cache vnode??\n"); return EOPNOTSUPP; } diff --git a/sys/nfsclient/nfs_bio.c b/sys/nfsclient/nfs_bio.c index 0356f42..4857d39 100644 --- a/sys/nfsclient/nfs_bio.c +++ b/sys/nfsclient/nfs_bio.c @@ -34,7 +34,7 @@ * SUCH DAMAGE. * * @(#)nfs_bio.c 8.9 (Berkeley) 3/30/95 - * $Id: nfs_bio.c,v 1.44 1997/09/10 19:52:25 phk Exp $ + * $Id: nfs_bio.c,v 1.45 1997/12/08 00:59:08 dyson Exp $ */ @@ -84,8 +84,8 @@ nfs_getpages(ap) int error; vm_page_t m; - if (!(ap->a_vp->v_flag & VVMIO)) { - printf("nfs_getpages: called with non-VMIO vnode??\n"); + if ((ap->a_vp->v_object) == NULL) { + printf("nfs_getpages: called with non-merged cache vnode??\n"); return EOPNOTSUPP; } diff --git a/sys/sys/lock.h b/sys/sys/lock.h index af98a7a..18a9edd 100644 --- a/sys/sys/lock.h +++ b/sys/sys/lock.h @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * @(#)lock.h 8.12 (Berkeley) 5/19/95 - * $Id: lock.h,v 1.9 1997/08/30 07:59:47 fsmp Exp $ + * $Id: lock.h,v 1.10 1997/09/21 04:24:02 dyson Exp $ */ #ifndef _LOCK_H_ @@ -133,6 +133,7 @@ struct lock { #define LK_INTERLOCK 0x00010000 /* unlock passed simple lock after getting lk_interlock */ #define LK_RETRY 0x00020000 /* vn_lock: retry until locked */ +#define LK_NOOBJ 0x00040000 /* vget: don't create object */ /* * Internal state flags corresponding to lk_sharecount, and lk_waitcount diff --git a/sys/sys/lockmgr.h b/sys/sys/lockmgr.h index af98a7a..18a9edd 100644 --- a/sys/sys/lockmgr.h +++ b/sys/sys/lockmgr.h @@ -35,7 +35,7 @@ * SUCH DAMAGE. * * @(#)lock.h 8.12 (Berkeley) 5/19/95 - * $Id: lock.h,v 1.9 1997/08/30 07:59:47 fsmp Exp $ + * $Id: lock.h,v 1.10 1997/09/21 04:24:02 dyson Exp $ */ #ifndef _LOCK_H_ @@ -133,6 +133,7 @@ struct lock { #define LK_INTERLOCK 0x00010000 /* unlock passed simple lock after getting lk_interlock */ #define LK_RETRY 0x00020000 /* vn_lock: retry until locked */ +#define LK_NOOBJ 0x00040000 /* vget: don't create object */ /* * Internal state flags corresponding to lk_sharecount, and lk_waitcount diff --git a/sys/sys/namei.h b/sys/sys/namei.h index 09acc2a..5be36f3 100644 --- a/sys/sys/namei.h +++ b/sys/sys/namei.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)namei.h 8.5 (Berkeley) 1/9/95 - * $Id: namei.h,v 1.16 1997/09/07 05:27:18 bde Exp $ + * $Id: namei.h,v 1.17 1997/09/07 17:08:32 bde Exp $ */ #ifndef _SYS_NAMEI_H_ @@ -109,6 +109,7 @@ struct nameidata { #define WANTPARENT 0x0010 /* want parent vnode returned unlocked */ #define NOCACHE 0x0020 /* name must not be left in cache */ #define FOLLOW 0x0040 /* follow symbolic links */ +#define NOOBJ 0x0080 /* don't create object */ #define NOFOLLOW 0x0000 /* do not follow symbolic links (pseudo) */ #define MODMASK 0x00fc /* mask of operational modifiers */ /* diff --git a/sys/sys/uio.h b/sys/sys/uio.h index ca61e66..5373f88 100644 --- a/sys/sys/uio.h +++ b/sys/sys/uio.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)uio.h 8.5 (Berkeley) 2/22/94 - * $Id: uio.h,v 1.7 1997/12/19 09:03:37 dyson Exp $ + * $Id: uio.h,v 1.8 1997/12/19 10:03:31 bde Exp $ */ #ifndef _SYS_UIO_H_ @@ -78,6 +78,7 @@ struct vm_object; int uiomove __P((caddr_t, int, struct uio *)); int uiomoveco __P((caddr_t, int, struct uio *, struct vm_object *)); +int uioread __P((int, struct uio *, struct vm_object *, int *)); #else /* !KERNEL */ diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index 9eb0809..fa70aae 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)vnode.h 8.7 (Berkeley) 2/4/94 - * $Id: vnode.h,v 1.61 1997/12/29 01:03:55 dyson Exp $ + * $Id: vnode.h,v 1.62 1997/12/29 16:53:53 dyson Exp $ */ #ifndef _SYS_VNODE_H_ @@ -146,14 +146,13 @@ struct vnode { #define VBWAIT 0x00400 /* waiting for output to complete */ #define VALIASED 0x00800 /* vnode has an alias */ #define VDIROP 0x01000 /* LFS: vnode is involved in a directory op */ -#define VVMIO 0x02000 /* VMIO flag */ +#define VOBJBUF 0x02000 /* Allocate buffers in VM object */ #define VNINACT 0x04000 /* LFS: skip ufs_inactive() in lfs_vunref */ #define VAGE 0x08000 /* Insert vnode at head of free list */ #define VOLOCK 0x10000 /* vnode is locked waiting for an object */ #define VOWANT 0x20000 /* a process is waiting for VOLOCK */ #define VDOOMED 0x40000 /* This vnode is being recycled */ #define VFREE 0x80000 /* This vnode is on the freelist */ -#define VOBJREF 0x100000 /* This vnode is referenced by it's object */ /* * Vnode attributes. A field value of VNOVAL represents a field whose value diff --git a/sys/ufs/ffs/ffs_balloc.c b/sys/ufs/ffs/ffs_balloc.c index d66d48f..c4edbd3 100644 --- a/sys/ufs/ffs/ffs_balloc.c +++ b/sys/ufs/ffs/ffs_balloc.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95 - * $Id: ffs_balloc.c,v 1.15 1997/08/02 14:33:18 bde Exp $ + * $Id: ffs_balloc.c,v 1.16 1997/12/05 19:55:49 bde Exp $ */ #include <sys/param.h> @@ -237,6 +237,8 @@ ffs_balloc(ip, lbn, size, cred, bpp, flags) if (flags & B_SYNC) { bwrite(bp); } else { + if (bp->b_bufsize == fs->fs_bsize) + bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } } @@ -265,6 +267,8 @@ ffs_balloc(ip, lbn, size, cred, bpp, flags) if (flags & B_SYNC) { bwrite(bp); } else { + if (bp->b_bufsize == fs->fs_bsize) + bp->b_flags |= B_CLUSTEROK; bdwrite(bp); } *bpp = nbp; diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c index 92bb2a7..77f72d3 100644 --- a/sys/ufs/ffs/ffs_inode.c +++ b/sys/ufs/ffs/ffs_inode.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)ffs_inode.c 8.13 (Berkeley) 4/21/95 - * $Id: ffs_inode.c,v 1.28 1997/10/16 10:49:28 phk Exp $ + * $Id: ffs_inode.c,v 1.29 1997/10/16 20:32:34 phk Exp $ */ #include "opt_quota.h" @@ -134,7 +134,8 @@ ffs_update(vp, access, modify, waitfor) if (waitfor && (vp->v_mount->mnt_flag & MNT_ASYNC) == 0) return (bwrite(bp)); else { - bp->b_flags |= B_CLUSTEROK; + if (bp->b_bufsize == fs->fs_bsize) + bp->b_flags |= B_CLUSTEROK; bdwrite(bp); return (0); } @@ -214,6 +215,8 @@ ffs_truncate(vp, length, flags, cred, p) return (error); oip->i_size = length; vnode_pager_setsize(ovp, length); + if (bp->b_bufsize == fs->fs_bsize) + bp->b_flags |= B_CLUSTEROK; if (aflags & B_SYNC) bwrite(bp); else if (ovp->v_mount->mnt_flag & MNT_ASYNC) @@ -245,6 +248,8 @@ ffs_truncate(vp, length, flags, cred, p) size = blksize(fs, oip, lbn); bzero((char *)bp->b_data + offset, (u_int)(size - offset)); allocbuf(bp, size); + if (bp->b_bufsize == fs->fs_bsize) + bp->b_flags |= B_CLUSTEROK; if (aflags & B_SYNC) bwrite(bp); else if (ovp->v_mount->mnt_flag & MNT_ASYNC) diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c index 16f77c6..e78bceb 100644 --- a/sys/ufs/ffs/ffs_vfsops.c +++ b/sys/ufs/ffs/ffs_vfsops.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)ffs_vfsops.c 8.31 (Berkeley) 5/20/95 - * $Id: ffs_vfsops.c,v 1.62 1997/11/12 05:42:25 julian Exp $ + * $Id: ffs_vfsops.c,v 1.63 1997/12/29 00:24:28 dyson Exp $ */ #include "opt_quota.h" @@ -63,6 +63,7 @@ #include <vm/vm_prot.h> #include <vm/vm_page.h> #include <vm/vm_extern.h> +#include <vm/vm_object.h> static MALLOC_DEFINE(M_FFSNODE, "FFS node", "FFS vnode private part"); @@ -364,6 +365,7 @@ ffs_reload(mp, cred, p) struct buf *bp; struct fs *fs, *newfs; struct partinfo dpart; + dev_t dev; int i, blks, size, error; int32_t *lp; @@ -375,6 +377,18 @@ ffs_reload(mp, cred, p) devvp = VFSTOUFS(mp)->um_devvp; if (vinvalbuf(devvp, 0, cred, p, 0, 0)) panic("ffs_reload: dirty1"); + + dev = devvp->v_rdev; + /* + * Only VMIO the backing device if the backing device is a real + * block device. This excludes the original MFS implementation. + * Note that it is optional that the backing device be VMIOed. This + * increases the opportunity for metadata caching. + */ + if ((devvp->v_type == VBLK) && (major(dev) < nblkdev)) { + vfs_object_create(devvp, p, p->p_ucred, 0); + } + /* * Step 2: re-read superblock from disk. */ @@ -509,17 +523,31 @@ ffs_mountfs(devvp, mp, p, malloctype) if (error) return (error); ncount = vcount(devvp); +/* if (devvp->v_object) ncount -= 1; +*/ if (ncount > 1 && devvp != rootvp) return (EBUSY); if (error = vinvalbuf(devvp, V_SAVE, cred, p, 0, 0)) return (error); + /* + * Only VMIO the backing device if the backing device is a real + * block device. This excludes the original MFS implementation. + * Note that it is optional that the backing device be VMIOed. This + * increases the opportunity for metadata caching. + */ + if ((devvp->v_type == VBLK) && (major(dev) < nblkdev)) { + vfs_object_create(devvp, p, p->p_ucred, 0); + } + + ronly = (mp->mnt_flag & MNT_RDONLY) != 0; error = VOP_OPEN(devvp, ronly ? FREAD : FREAD|FWRITE, FSCRED, p); if (error) return (error); + if (VOP_IOCTL(devvp, DIOCGPART, (caddr_t)&dpart, FREAD, cred, p) != 0) size = DEV_BSIZE; else @@ -641,15 +669,6 @@ ffs_mountfs(devvp, mp, p, malloctype) fs->fs_clean = 0; (void) ffs_sbupdate(ump, MNT_WAIT); } - /* - * Only VMIO the backing device if the backing device is a real - * block device. This excludes the original MFS implementation. - * Note that it is optional that the backing device be VMIOed. This - * increases the opportunity for metadata caching. - */ - if ((devvp->v_type == VBLK) && (major(devvp->v_rdev) < nblkdev)) { - vfs_object_create(devvp, p, p->p_ucred, 0); - } return (0); out: if (bp) @@ -727,6 +746,10 @@ ffs_unmount(mp, mntflags, p) } ump->um_devvp->v_specflags &= ~SI_MOUNTEDON; + vinvalbuf(ump->um_devvp, V_SAVE, NOCRED, p, 0, 0); + if (ump->um_devvp->v_object) + vm_object_terminate(ump->um_devvp->v_object); + error = VOP_CLOSE(ump->um_devvp, fs->fs_ronly ? FREAD : FREAD|FWRITE, NOCRED, p); diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c index 341b811..d3b68b7 100644 --- a/sys/ufs/ffs/ffs_vnops.c +++ b/sys/ufs/ffs/ffs_vnops.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)ffs_vnops.c 8.15 (Berkeley) 5/14/95 - * $Id: ffs_vnops.c,v 1.36 1997/10/16 20:32:35 phk Exp $ + * $Id: ffs_vnops.c,v 1.37 1997/10/27 13:33:45 bde Exp $ */ #include <sys/param.h> @@ -124,6 +124,16 @@ ffs_fsync(ap) struct buf *nbp; int pass; int s; + daddr_t lbn; + + + if (vp->v_type == VBLK) { + lbn = INT_MAX; + } else { + struct inode *ip; + ip = VTOI(vp); + lbn = lblkno(ip->i_fs, (ip->i_size + ip->i_fs->fs_bsize - 1)); + } pass = 0; /* @@ -133,24 +143,40 @@ loop: s = splbio(); for (bp = vp->v_dirtyblkhd.lh_first; bp; bp = nbp) { nbp = bp->b_vnbufs.le_next; - if ((bp->b_flags & B_BUSY) || (pass == 0 && (bp->b_blkno < 0))) + if ((bp->b_flags & B_BUSY) || (pass == 0 && (bp->b_lblkno < 0))) continue; if ((bp->b_flags & B_DELWRI) == 0) panic("ffs_fsync: not dirty"); - if (bp->b_vp != vp || ap->a_waitfor != MNT_NOWAIT) { + if (((bp->b_vp != vp) || (ap->a_waitfor != MNT_NOWAIT)) || + ((vp->v_type != VREG) && (vp->v_type != VBLK))) { bremfree(bp); bp->b_flags |= B_BUSY; splx(s); + /* * Wait for I/O associated with indirect blocks to complete, * since there is no way to quickly wait for them below. */ - if (bp->b_vp == vp || ap->a_waitfor == MNT_NOWAIT) - (void) bawrite(bp); - else + if ((bp->b_vp == vp) && (ap->a_waitfor == MNT_NOWAIT)) { + if (bp->b_flags & B_CLUSTEROK) { + bdwrite(bp); + (void) vfs_bio_awrite(bp); + } else { + (void) bawrite(bp); + } + } else { (void) bwrite(bp); + } + + } else if ((vp->v_type == VREG) && (bp->b_lblkno >= lbn)) { + + bremfree(bp); + bp->b_flags |= B_BUSY | B_INVAL | B_NOCACHE; + brelse(bp); + splx(s); + } else { vfs_bio_awrite(bp); splx(s); @@ -182,4 +208,3 @@ loop: gettime(&tv); return (UFS_UPDATE(ap->a_vp, &tv, &tv, ap->a_waitfor == MNT_WAIT)); } - diff --git a/sys/ufs/ufs/ufs_readwrite.c b/sys/ufs/ufs/ufs_readwrite.c index 1484082..3ae1a4a 100644 --- a/sys/ufs/ufs/ufs_readwrite.c +++ b/sys/ufs/ufs/ufs_readwrite.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)ufs_readwrite.c 8.11 (Berkeley) 5/8/95 - * $Id: ufs_readwrite.c,v 1.37 1997/12/21 10:41:19 dyson Exp $ + * $Id: ufs_readwrite.c,v 1.38 1997/12/29 01:03:50 dyson Exp $ */ #ifdef LFS_READWRITE @@ -108,8 +108,34 @@ READ(ap) return (EFBIG); for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) { + if ((bytesinfile = ip->i_size - uio->uio_offset) <= 0) break; + +#if 1 + if ((vfs_ioopt > 1) && vp->v_object) { + int nread, toread; + vm_object_reference(vp->v_object); + toread = uio->uio_resid; + if (toread > bytesinfile) + toread = bytesinfile; + if (toread >= PAGE_SIZE) { + error = uioread(toread, uio, vp->v_object, &nread); + if ((uio->uio_resid == 0) || (error != 0)) { + if (!(vp->v_mount->mnt_flag & MNT_NOATIME)) + ip->i_flag |= IN_ACCESS; + vm_object_vndeallocate(vp->v_object); + return error; + } + if (nread > 0) { + vm_object_vndeallocate(vp->v_object); + continue; + } + } + vm_object_vndeallocate(vp->v_object); + } +#endif + lbn = lblkno(fs, uio->uio_offset); nextlbn = lbn + 1; size = BLKSIZE(fs, ip, lbn); diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index a14512d..f45d377 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -66,7 +66,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_fault.c,v 1.71 1997/09/01 03:17:15 bde Exp $ + * $Id: vm_fault.c,v 1.72 1997/12/19 09:03:10 dyson Exp $ */ /* @@ -222,6 +222,15 @@ RetryFault:; } } + /* + * Make a reference to this object to prevent its disposal while we + * are messing with it. Once we have the reference, the map is free + * to be diddled. Since objects reference their shadows (and copies), + * they will stay around as well. + */ + vm_object_reference(first_object); + first_object->paging_in_progress++; + vp = vnode_pager_lock(first_object); if ((fault_type & VM_PROT_WRITE) && (first_object->type == OBJT_VNODE)) { @@ -236,16 +245,6 @@ RetryFault:; first_m = NULL; /* - * Make a reference to this object to prevent its disposal while we - * are messing with it. Once we have the reference, the map is free - * to be diddled. Since objects reference their shadows (and copies), - * they will stay around as well. - */ - - first_object->ref_count++; - first_object->paging_in_progress++; - - /* * INVARIANTS (through entire routine): * * 1) At all times, we must either have the object lock or a busy diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 11604ce..17b0e75 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_map.c,v 1.102 1997/12/29 00:24:43 dyson Exp $ + * $Id: vm_map.c,v 1.103 1997/12/29 01:03:34 dyson Exp $ */ /* @@ -558,6 +558,8 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset, if ((object == NULL) && (prev_entry != &map->header) && (( prev_entry->eflags & (MAP_ENTRY_IS_A_MAP | MAP_ENTRY_IS_SUB_MAP)) == 0) && + ((prev_entry->object.vm_object == NULL) || + (prev_entry->object.vm_object->type == OBJT_DEFAULT)) && (prev_entry->end == start) && (prev_entry->wired_count == 0)) { @@ -757,7 +759,8 @@ vm_map_simplify_entry(map, entry) prevsize = prev->end - prev->start; if ( (prev->end == entry->start) && (prev->object.vm_object == entry->object.vm_object) && - (!prev->object.vm_object || (prev->object.vm_object->behavior == entry->object.vm_object->behavior)) && + (!prev->object.vm_object || + (prev->object.vm_object->behavior == entry->object.vm_object->behavior)) && (!prev->object.vm_object || (prev->offset + prevsize == entry->offset)) && (prev->eflags == entry->eflags) && @@ -783,7 +786,8 @@ vm_map_simplify_entry(map, entry) esize = entry->end - entry->start; if ((entry->end == next->start) && (next->object.vm_object == entry->object.vm_object) && - (!next->object.vm_object || (next->object.vm_object->behavior == entry->object.vm_object->behavior)) && + (!next->object.vm_object || + (next->object.vm_object->behavior == entry->object.vm_object->behavior)) && (!entry->object.vm_object || (entry->offset + esize == next->offset)) && (next->eflags == entry->eflags) && @@ -2012,7 +2016,7 @@ vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry) (src_entry->object.vm_object->type == OBJT_DEFAULT || src_entry->object.vm_object->type == OBJT_SWAP)) vm_object_collapse(src_entry->object.vm_object); - ++src_entry->object.vm_object->ref_count; + vm_object_reference(src_entry->object.vm_object); src_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY); dst_entry->eflags |= (MAP_ENTRY_COW|MAP_ENTRY_NEEDS_COPY); dst_entry->object.vm_object = @@ -2099,7 +2103,7 @@ vmspace_fork(vm1) new_entry = vm_map_entry_create(new_map); *new_entry = *old_entry; new_entry->wired_count = 0; - ++object->ref_count; + vm_object_reference(object); /* * Insert the entry into the new map -- we know we're @@ -2458,12 +2462,13 @@ vm_map_lookup_done(map, entry) * operations. */ int -vm_uiomove(mapa, srcobject, cp, cnt, uaddra) +vm_uiomove(mapa, srcobject, cp, cnt, uaddra, npages) vm_map_t mapa; vm_object_t srcobject; off_t cp; int cnt; vm_offset_t uaddra; + int *npages; { vm_map_t map; vm_object_t first_object, object; @@ -2475,6 +2480,9 @@ vm_uiomove(mapa, srcobject, cp, cnt, uaddra) vm_pindex_t first_pindex, osize, oindex; off_t ooffset; + if (npages) + *npages = 0; + while (cnt > 0) { map = mapa; uaddr = uaddra; @@ -2485,11 +2493,6 @@ vm_uiomove(mapa, srcobject, cp, cnt, uaddra) return EFAULT; } -#if 0 - printf("foff: 0x%x, uaddr: 0x%x\norig entry: (0x%x, 0x%x), ", - (int) cp, uaddr, first_entry->start, first_entry->end); -#endif - vm_map_clip_start(map, first_entry, uaddr); tcnt = cnt; @@ -2500,11 +2503,27 @@ vm_uiomove(mapa, srcobject, cp, cnt, uaddra) start = first_entry->start; end = first_entry->end; -#if 0 - printf("new entry: (0x%x, 0x%x)\n", start, end); -#endif osize = atop(tcnt); + + if (npages) { + vm_pindex_t src_index, idx; + src_index = OFF_TO_IDX(cp); + for (idx = 0; idx < osize; idx++) { + vm_page_t m; + if ((m = vm_page_lookup(srcobject, src_index + idx)) == NULL) { + vm_map_lookup_done(map, first_entry); + return 0; + } + if ((m->flags & PG_BUSY) || m->busy || + m->hold_count || m->wire_count || + ((m->valid & VM_PAGE_BITS_ALL) != VM_PAGE_BITS_ALL)) { + vm_map_lookup_done(map, first_entry); + return 0; + } + } + } + oindex = OFF_TO_IDX(first_entry->offset); /* @@ -2538,7 +2557,7 @@ vm_uiomove(mapa, srcobject, cp, cnt, uaddra) object = srcobject; object->flags |= OBJ_OPT; - object->ref_count++; + vm_object_reference(object); ooffset = cp; vm_object_shadow(&object, &ooffset, osize); @@ -2577,6 +2596,8 @@ vm_uiomove(mapa, srcobject, cp, cnt, uaddra) cnt -= tcnt; uaddra += tcnt; cp += tcnt; + if (npages) + *npages += osize; } return 0; } @@ -2616,14 +2637,12 @@ vm_freeze_copyopts(object, froma, toa) int s; vm_object_t robject, robjectn; vm_pindex_t idx, from, to; + return; - if (vfs_ioopt == 0 || (object == NULL) || ((object->flags & OBJ_OPT) == 0)) + if ((vfs_ioopt == 0) || (object == NULL) || + ((object->flags & OBJ_OPT) == 0)) return; -#if 0 - printf("sc: %d, rc: %d\n", object->shadow_count, object->ref_count); -#endif - if (object->shadow_count > object->ref_count) panic("vm_freeze_copyopts: sc > rc"); @@ -2643,7 +2662,7 @@ vm_freeze_copyopts(object, froma, toa) if ((bo_pindex + robject->size) < froma) continue; - robject->ref_count++; + vm_object_reference(robject); while (robject->paging_in_progress) { robject->flags |= OBJ_PIPWNT; tsleep(robject, PVM, "objfrz", 0); @@ -2714,9 +2733,6 @@ retryout: vm_object_pip_wakeup(robject); if (((from - bo_pindex) == 0) && ((to - bo_pindex) == robject->size)) { -#if 0 - printf("removing obj: %d, %d\n", object->shadow_count, object->ref_count); -#endif object->shadow_count--; TAILQ_REMOVE(&object->shadow_head, robject, shadow_list); @@ -2729,9 +2745,8 @@ retryout: vm_object_deallocate(object); vm_object_deallocate(robject); return; - } else { - object->ref_count--; } + vm_object_deallocate(object); } vm_object_deallocate(robject); } @@ -2750,16 +2765,18 @@ retryout: */ DB_SHOW_COMMAND(map, vm_map_print) { + static int nlines; /* XXX convert args. */ register vm_map_t map = (vm_map_t)addr; boolean_t full = have_addr; register vm_map_entry_t entry; - db_iprintf("%s map 0x%x: pmap=0x%x,ref=%d,nentries=%d,version=%d\n", + db_iprintf("%s map 0x%x: pmap=0x%x, ref=%d, nentries=%d, version=%d\n", (map->is_main_map ? "Task" : "Share"), (int) map, (int) (map->pmap), map->ref_count, map->nentries, map->timestamp); + nlines++; if (!full && db_indent) return; @@ -2767,23 +2784,34 @@ DB_SHOW_COMMAND(map, vm_map_print) db_indent += 2; for (entry = map->header.next; entry != &map->header; entry = entry->next) { - db_iprintf("map entry 0x%x: start=0x%x, end=0x%x, ", +#if 0 + if (nlines > 18) { + db_printf("--More--"); + cngetc(); + db_printf("\r"); + nlines = 0; + } +#endif + + db_iprintf("map entry 0x%x: start=0x%x, end=0x%x\n", (int) entry, (int) entry->start, (int) entry->end); + nlines++; if (map->is_main_map) { static char *inheritance_name[4] = {"share", "copy", "none", "donate_copy"}; - db_printf("prot=%x/%x/%s, ", + db_iprintf(" prot=%x/%x/%s", entry->protection, entry->max_protection, inheritance_name[entry->inheritance]); if (entry->wired_count != 0) - db_printf("wired, "); + db_printf(", wired"); } if (entry->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) { - db_printf("share=0x%x, offset=0x%x\n", + db_printf(", share=0x%x, offset=0x%x\n", (int) entry->object.share_map, (int) entry->offset); + nlines++; if ((entry->prev == &map->header) || ((entry->prev->eflags & MAP_ENTRY_IS_A_MAP) == 0) || (entry->prev->object.share_map != @@ -2794,13 +2822,14 @@ DB_SHOW_COMMAND(map, vm_map_print) db_indent -= 2; } } else { - db_printf("object=0x%x, offset=0x%x", + db_printf(", object=0x%x, offset=0x%x", (int) entry->object.vm_object, (int) entry->offset); if (entry->eflags & MAP_ENTRY_COW) db_printf(", copy (%s)", (entry->eflags & MAP_ENTRY_NEEDS_COPY) ? "needed" : "done"); db_printf("\n"); + nlines++; if ((entry->prev == &map->header) || (entry->prev->eflags & MAP_ENTRY_IS_A_MAP) || @@ -2809,10 +2838,31 @@ DB_SHOW_COMMAND(map, vm_map_print) db_indent += 2; vm_object_print((int)entry->object.vm_object, full, 0, (char *)0); + nlines += 4; db_indent -= 2; } } } db_indent -= 2; + if (db_indent == 0) + nlines = 0; } + + +DB_SHOW_COMMAND(procvm, procvm) +{ + struct proc *p; + + if (have_addr) { + p = (struct proc *) addr; + } else { + p = curproc; + } + + printf("p = 0x%x, vmspace = 0x%x, map = 0x%x, pmap = 0x%x\n", + p, p->p_vmspace, &p->p_vmspace->vm_map, &p->p_vmspace->vm_pmap); + + vm_map_print ((int) &p->p_vmspace->vm_map, 1, 0, NULL); +} + #endif /* DDB */ diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index 666205b..d70a2b1 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_map.h,v 1.28 1997/08/18 02:06:24 dyson Exp $ + * $Id: vm_map.h,v 1.29 1997/12/19 09:03:12 dyson Exp $ */ /* @@ -336,7 +336,7 @@ int vm_map_submap __P((vm_map_t, vm_offset_t, vm_offset_t, vm_map_t)); void vm_map_madvise __P((vm_map_t, pmap_t, vm_offset_t, vm_offset_t, int)); void vm_map_simplify_entry __P((vm_map_t, vm_map_entry_t)); void vm_init2 __P((void)); -int vm_uiomove __P((vm_map_t, vm_object_t, off_t, int, vm_offset_t)); +int vm_uiomove __P((vm_map_t, vm_object_t, off_t, int, vm_offset_t, int *)); void vm_freeze_copyopts __P((vm_object_t, vm_pindex_t, vm_pindex_t)); #endif diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 221d7fd..a279525 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_object.c,v 1.102 1997/12/19 09:03:14 dyson Exp $ + * $Id: vm_object.c,v 1.103 1997/12/29 00:24:49 dyson Exp $ */ /* @@ -94,7 +94,6 @@ static void vm_object_qcollapse __P((vm_object_t object)); #ifdef not_used static void vm_object_deactivate_pages __P((vm_object_t)); #endif -static void vm_object_terminate __P((vm_object_t)); /* * Virtual memory objects maintain the actual data @@ -236,18 +235,36 @@ vm_object_reference(object) { if (object == NULL) return; + +#if defined(DIAGNOSTIC) + if (object->flags & OBJ_DEAD) + panic("vm_object_reference: attempting to reference dead obj"); +#endif + + object->ref_count++; + if (object->type == OBJT_VNODE) + vget((struct vnode *) object->handle, LK_NOOBJ, curproc); +} + +inline void +vm_object_vndeallocate(object) + vm_object_t object; +{ + struct vnode *vp = (struct vnode *) object->handle; +#if defined(DIAGNOSTIC) + if (object->type != OBJT_VNODE) + panic("vm_object_vndeallocate: not a vnode object"); + if (vp == NULL) + panic("vm_object_vndeallocate: missing vp"); if (object->ref_count == 0) { - panic("vm_object_reference: attempting to reference deallocated obj"); + vprint("vm_object_vndeallocate", vp); + panic("vm_object_vndeallocate: bad object reference count"); } - object->ref_count++; - if ((object->type == OBJT_VNODE) && (object->flags & OBJ_VFS_REF)) { - struct vnode *vp; - vp = (struct vnode *)object->handle; - simple_lock(&vp->v_interlock); - if (vp->v_flag & VOBJREF) - vp->v_flag |= VOBJREF; - ++vp->v_usecount; - simple_unlock(&vp->v_interlock); +#endif + + object->ref_count--; + if (object->type == OBJT_VNODE) { + vrele(vp); } } @@ -266,11 +283,16 @@ void vm_object_deallocate(object) vm_object_t object; { + int s; vm_object_t temp; - struct vnode *vp; while (object != NULL) { + if (object->type == OBJT_VNODE) { + vm_object_vndeallocate(object); + return; + } + if (object->ref_count == 0) { panic("vm_object_deallocate: object deallocated too many times"); } else if (object->ref_count > 2) { @@ -282,94 +304,68 @@ vm_object_deallocate(object) * Here on ref_count of one or two, which are special cases for * objects. */ - vp = NULL; - if (object->type == OBJT_VNODE) { - vp = (struct vnode *)object->handle; - if (vp->v_flag & VOBJREF) { - if (object->ref_count < 2) { - panic("vm_object_deallocate: " - "not enough references for OBJT_VNODE: %d", - object->ref_count); - } else { + if ((object->ref_count == 2) && (object->shadow_count == 1)) { - /* - * Freeze optimized copies. - */ - vm_freeze_copyopts(object, 0, object->size); - - /* - * Loose our reference to the vnode. - */ - vp->v_flag &= ~VOBJREF; - vrele(vp); - } - } - } - - /* - * Lose the reference - */ - if (object->ref_count == 2) { object->ref_count--; if ((object->handle == NULL) && (object->type == OBJT_DEFAULT || object->type == OBJT_SWAP)) { vm_object_t robject; + robject = TAILQ_FIRST(&object->shadow_head); - if ((robject != NULL) && - (robject->handle == NULL) && +#if defined(DIAGNOSTIC) + if (robject == NULL) + panic("vm_object_deallocate: ref_count: %d," + " shadow_count: %d", + object->ref_count, object->shadow_count); +#endif + if ((robject->handle == NULL) && (robject->type == OBJT_DEFAULT || robject->type == OBJT_SWAP)) { - int s; - robject->ref_count += 2; - object->ref_count += 2; - - do { - s = splvm(); - while (robject->paging_in_progress) { - robject->flags |= OBJ_PIPWNT; - tsleep(robject, PVM, "objde1", 0); - } - - while (object->paging_in_progress) { - object->flags |= OBJ_PIPWNT; - tsleep(object, PVM, "objde2", 0); - } - splx(s); - } while( object->paging_in_progress || robject->paging_in_progress); + robject->ref_count++; + + retry: + s = splvm(); + if (robject->paging_in_progress) { + robject->flags |= OBJ_PIPWNT; + tsleep(robject, PVM, "objde1", 0); + goto retry; + } + + if (object->paging_in_progress) { + object->flags |= OBJ_PIPWNT; + tsleep(object, PVM, "objde2", 0); + goto retry; + } + splx(s); - object->ref_count -= 2; - robject->ref_count -= 2; - if( robject->ref_count == 0) { - robject->ref_count += 1; + if( robject->ref_count == 1) { + robject->ref_count--; object = robject; - continue; + goto doterm; } - vm_object_collapse(robject); - return; + + object = robject; + vm_object_collapse(object); + continue; } } - /* - * If there are still references, then we are done. - */ - return; - } - /* - * Make sure no one uses us. - */ - object->flags |= OBJ_DEAD; + return; - if (vp) - vp->v_flag &= ~VTEXT; + } else { + object->ref_count--; + if (object->ref_count != 0) + return; + } - object->ref_count--; +doterm: temp = object->backing_object; if (temp) { TAILQ_REMOVE(&temp->shadow_head, object, shadow_list); - --temp->shadow_count; + temp->shadow_count--; } vm_object_terminate(object); /* unlocks and deallocates object */ @@ -383,15 +379,17 @@ vm_object_deallocate(object) * * The object must be locked. */ -static void +void vm_object_terminate(object) register vm_object_t object; { register vm_page_t p; int s; - if (object->flags & OBJ_VFS_REF) - panic("vm_object_deallocate: freeing VFS_REF'ed object"); + /* + * Make sure no one uses us. + */ + object->flags |= OBJ_DEAD; /* * wait for the pageout daemon to be done with the object @@ -403,29 +401,44 @@ vm_object_terminate(object) } splx(s); +#if defined(DIAGNOSTIC) if (object->paging_in_progress != 0) panic("vm_object_deallocate: pageout in progress"); +#endif /* * Clean and free the pages, as appropriate. All references to the * object are gone, so we don't need to lock it. */ if (object->type == OBJT_VNODE) { - struct vnode *vp = object->handle; + struct vnode *vp; + + /* + * Freeze optimized copies. + */ + vm_freeze_copyopts(object, 0, object->size); + + /* + * Clean pages and flush buffers. + */ vm_object_page_clean(object, 0, 0, TRUE); + + vp = (struct vnode *) object->handle; vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0); - } - /* - * Now free the pages. For internal objects, this also removes them - * from paging queues. - */ - while ((p = TAILQ_FIRST(&object->memq)) != NULL) { - if (p->busy || (p->flags & PG_BUSY)) - printf("vm_object_terminate: freeing busy page\n"); - PAGE_WAKEUP(p); - vm_page_free(p); - cnt.v_pfree++; + } else { + + /* + * Now free the pages. For internal objects, this also removes them + * from paging queues. + */ + while ((p = TAILQ_FIRST(&object->memq)) != NULL) { + if (p->busy || (p->flags & PG_BUSY)) + printf("vm_object_terminate: freeing busy page\n"); + PAGE_WAKEUP(p); + vm_page_free(p); + cnt.v_pfree++; + } } /* @@ -1122,6 +1135,7 @@ vm_object_collapse(object) object_collapses++; } else { + vm_object_t new_backing_object; /* * If all of the pages in the backing object are * shadowed by the parent object, the parent object no @@ -1173,25 +1187,26 @@ vm_object_collapse(object) * it, since its reference count is at least 2. */ - TAILQ_REMOVE(&object->backing_object->shadow_head, + TAILQ_REMOVE(&backing_object->shadow_head, object, shadow_list); - --object->backing_object->shadow_count; - vm_object_reference(object->backing_object = backing_object->backing_object); - if (object->backing_object) { - TAILQ_INSERT_TAIL(&object->backing_object->shadow_head, + --backing_object->shadow_count; + + new_backing_object = backing_object->backing_object; + if (object->backing_object = new_backing_object) { + vm_object_reference(new_backing_object); + TAILQ_INSERT_TAIL(&new_backing_object->shadow_head, object, shadow_list); - ++object->backing_object->shadow_count; + ++new_backing_object->shadow_count; + object->backing_object_offset += + backing_object->backing_object_offset; } - object->backing_object_offset += backing_object->backing_object_offset; /* * Drop the reference count on backing_object. Since * its ref_count was at least 2, it will not vanish; * so we don't need to call vm_object_deallocate. */ - if (backing_object->ref_count == 1) - printf("should have called obj deallocate\n"); - backing_object->ref_count--; + vm_object_deallocate(backing_object); object_bypasses++; @@ -1220,18 +1235,20 @@ vm_object_page_remove(object, start, end, clean_only) { register vm_page_t p, next; unsigned int size; - int s; + int s, all; if (object == NULL) return; + all = ((end == 0) && (start == 0)); + object->paging_in_progress++; again: size = end - start; - if (size > 4 || size >= object->size / 4) { + if (all || size > 4 || size >= object->size / 4) { for (p = TAILQ_FIRST(&object->memq); p != NULL; p = next) { next = TAILQ_NEXT(p, listq); - if ((start <= p->pindex) && (p->pindex < end)) { + if (all || ((start <= p->pindex) && (p->pindex < end))) { if (p->wire_count != 0) { vm_page_protect(p, VM_PROT_NONE); p->valid = 0; @@ -1516,12 +1533,17 @@ DB_SHOW_COMMAND(object, vm_object_print_static) if (object == NULL) return; - db_iprintf("Object 0x%x: size=0x%x, res=%d, ref=%d, ", - (int) object, (int) object->size, - object->resident_page_count, object->ref_count); - db_printf("offset=0x%x, backing_object=(0x%x)+0x%x\n", + db_iprintf("Object 0x%x: type=%d, size=0x%x, res=%d, ref=%d, flags=0x%x\n", + (int) object, (int) object->type, (int) object->size, + object->resident_page_count, + object->ref_count, + object->flags); + db_iprintf(" sref=%d, offset=0x%x, backing_object(%d)=(0x%x)+0x%x\n", + object->shadow_count, (int) object->paging_offset, - (int) object->backing_object, (int) object->backing_object_offset); + (((int)object->backing_object)?object->backing_object->ref_count:0), + (int) object->backing_object, + (int) object->backing_object_offset); if (!full) return; diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h index a13a5bf..ac86c6c 100644 --- a/sys/vm/vm_object.h +++ b/sys/vm/vm_object.h @@ -61,7 +61,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_object.h,v 1.39 1997/12/19 09:03:16 dyson Exp $ + * $Id: vm_object.h,v 1.40 1997/12/29 00:24:55 dyson Exp $ */ /* @@ -122,16 +122,13 @@ struct vm_object { /* * Flags */ -#define OBJ_CANPERSIST 0x0001 /* allow to persist */ #define OBJ_ACTIVE 0x0004 /* active objects */ #define OBJ_DEAD 0x0008 /* dead objects (during rundown) */ -#define OBJ_PIPWNT 0x0040 /* paging in progress wanted */ +#define OBJ_PIPWNT 0x0040 /* paging in progress wanted */ #define OBJ_WRITEABLE 0x0080 /* object has been made writable */ -#define OBJ_MIGHTBEDIRTY 0x0100 /* object might be dirty */ +#define OBJ_MIGHTBEDIRTY 0x0100 /* object might be dirty */ #define OBJ_CLEANING 0x0200 -#define OBJ_VFS_REF 0x0400 /* object is refed by vfs layer */ -#define OBJ_VNODE_GONE 0x0800 /* vnode is gone */ -#define OBJ_OPT 0x1000 /* I/O optimization */ +#define OBJ_OPT 0x1000 /* I/O optimization */ #define OBJ_NORMAL 0x0 /* default behavior */ #define OBJ_SEQUENTIAL 0x1 /* expect sequential accesses */ @@ -170,6 +167,8 @@ boolean_t vm_object_coalesce __P((vm_object_t, vm_pindex_t, vm_size_t, vm_size_t void vm_object_collapse __P((vm_object_t)); void vm_object_copy __P((vm_object_t, vm_pindex_t, vm_object_t *, vm_pindex_t *, boolean_t *)); void vm_object_deallocate __P((vm_object_t)); +void vm_object_terminate __P((vm_object_t)); +void vm_object_vndeallocate __P((vm_object_t)); void vm_object_init __P((void)); void vm_object_page_clean __P((vm_object_t, vm_pindex_t, vm_pindex_t, boolean_t)); void vm_object_page_remove __P((vm_object_t, vm_pindex_t, vm_pindex_t, boolean_t)); diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index 99ee5a4..eda7f30 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -65,7 +65,7 @@ * any improvements or extensions that they make and grant Carnegie the * rights to redistribute these changes. * - * $Id: vm_pageout.c,v 1.104 1997/12/24 15:05:25 dyson Exp $ + * $Id: vm_pageout.c,v 1.105 1997/12/29 00:25:03 dyson Exp $ */ /* @@ -695,11 +695,7 @@ rescan0: */ if ((m->flags & PG_REFERENCED) != 0) { m->flags &= ~PG_REFERENCED; -#if 0 - pmap_clear_reference(VM_PAGE_TO_PHYS(m)); -#else actcount = pmap_ts_referenced(VM_PAGE_TO_PHYS(m)); -#endif vm_page_activate(m); m->act_count += (actcount + ACT_ADVANCE + 1); continue; diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c index 360188a..f3ed776 100644 --- a/sys/vm/vnode_pager.c +++ b/sys/vm/vnode_pager.c @@ -38,7 +38,7 @@ * SUCH DAMAGE. * * from: @(#)vnode_pager.c 7.5 (Berkeley) 4/20/91 - * $Id: vnode_pager.c,v 1.77 1997/12/19 09:03:17 dyson Exp $ + * $Id: vnode_pager.c,v 1.78 1997/12/29 00:25:11 dyson Exp $ */ /* @@ -140,26 +140,18 @@ vnode_pager_alloc(void *handle, vm_size_t size, vm_prot_t prot, * And an object of the appropriate size */ object = vm_object_allocate(OBJT_VNODE, size); - if (vp->v_type == VREG) - object->flags = OBJ_CANPERSIST; - else - object->flags = 0; + object->flags = 0; object->un_pager.vnp.vnp_size = (vm_ooffset_t) size * PAGE_SIZE; object->handle = handle; vp->v_object = object; + vp->v_usecount++; } else { - /* - * vm_object_reference() will remove the object from the cache if - * found and gain a reference to the object. - */ - vm_object_reference(object); + object->ref_count++; + vp->v_usecount++; } - if (vp->v_type == VREG) - vp->v_flag |= VVMIO; - vp->v_flag &= ~VOLOCK; if (vp->v_flag & VOWANT) { vp->v_flag &= ~VOWANT; @@ -186,10 +178,11 @@ vnode_pager_dealloc(object) splx(s); } + object->flags |= OBJ_DEAD; object->handle = NULL; - + object->type = OBJT_DEFAULT; vp->v_object = NULL; - vp->v_flag &= ~(VTEXT | VVMIO); + vp->v_flag &= ~(VTEXT|VOBJBUF); } static boolean_t @@ -541,8 +534,7 @@ vnode_pager_getpages(object, m, count, reqpage) { int rtval; struct vnode *vp; - if (object->flags & OBJ_VNODE_GONE) - return VM_PAGER_ERROR; + vp = object->handle; rtval = VOP_GETPAGES(vp, m, count*PAGE_SIZE, reqpage, 0); if (rtval == EOPNOTSUPP) @@ -643,7 +635,7 @@ vnode_pager_leaf_getpages(object, m, count, reqpage) IDX_TO_OFF(m[i]->pindex), &runpg); if (firstaddr == -1) { if (i == reqpage && foff < object->un_pager.vnp.vnp_size) { - panic("vnode_pager_putpages: unexpected missing page: firstaddr: %d, foff: %ld, vnp_size: %d", + panic("vnode_pager_getpages: unexpected missing page: firstaddr: %d, foff: %ld, vnp_size: %d", firstaddr, foff, object->un_pager.vnp.vnp_size); } vnode_pager_freepage(m[i]); @@ -792,9 +784,6 @@ vnode_pager_putpages(object, m, count, sync, rtvals) int rtval; struct vnode *vp; - if (object->flags & OBJ_VNODE_GONE) - return VM_PAGER_ERROR; - vp = object->handle; rtval = VOP_PUTPAGES(vp, m, count*PAGE_SIZE, sync, rtvals, 0); if (rtval == EOPNOTSUPP) |