diff options
Diffstat (limited to 'sys')
-rw-r--r-- | sys/kern/vfs_bio.c | 12 | ||||
-rw-r--r-- | sys/nfsclient/nfs.h | 2 | ||||
-rw-r--r-- | sys/nfsclient/nfs_bio.c | 59 | ||||
-rw-r--r-- | sys/nfsclient/nfs_vnops.c | 23 | ||||
-rw-r--r-- | sys/vm/vm_page.c | 14 | ||||
-rw-r--r-- | sys/vm/vnode_pager.c | 29 |
6 files changed, 128 insertions, 11 deletions
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index f250367..29646d9 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -2271,9 +2271,21 @@ loop: * to softupdates re-dirtying the buffer. In the latter * case, B_CACHE is set after the first write completes, * preventing further loops. + * NOTE! b*write() sets B_CACHE. If we cleared B_CACHE + * above while extending the buffer, we cannot allow the + * buffer to remain with B_CACHE set after the write + * completes or it will represent a corrupt state. To + * deal with this we set B_NOCACHE to scrap the buffer + * after the write. + * + * We might be able to do something fancy, like setting + * B_CACHE in bwrite() except if B_DELWRI is already set, + * so the below call doesn't set B_CACHE, but that gets real + * confusing. This is much easier. */ if ((bp->b_flags & (B_CACHE|B_DELWRI)) == B_DELWRI) { + bp->b_flags |= B_NOCACHE; BUF_WRITE(bp); goto loop; } diff --git a/sys/nfsclient/nfs.h b/sys/nfsclient/nfs.h index 5811883..42db54c 100644 --- a/sys/nfsclient/nfs.h +++ b/sys/nfsclient/nfs.h @@ -288,6 +288,8 @@ void nfs_clearcommit(struct mount *); int nfs_writebp(struct buf *, int, struct thread *); int nfs_fsinfo(struct nfsmount *, struct vnode *, struct ucred *, struct thread *); +int nfs_meta_setsize (struct vnode *, struct ucred *, + struct thread *, u_quad_t); #endif /* _KERNEL */ diff --git a/sys/nfsclient/nfs_bio.c b/sys/nfsclient/nfs_bio.c index 754d561..2f3a44b 100644 --- a/sys/nfsclient/nfs_bio.c +++ b/sys/nfsclient/nfs_bio.c @@ -206,8 +206,14 @@ nfs_getpages(struct vop_getpages_args *ap) vm_page_set_validclean(m, 0, size - toff); /* handled by vm_fault now */ /* vm_page_zero_invalid(m, TRUE); */ + } else { + /* + * Read operation was short. If no error occured + * we may have hit a zero-fill section. We simply + * leave valid set to 0. + */ + ; } - if (i != ap->a_reqpage) { /* * Whether or not to leave the page activated is up in @@ -831,9 +837,7 @@ again: else bcount = np->n_size - (off_t)lbn * biosize; } - bp = nfs_getcacheblk(vp, lbn, bcount, td); - if (uio->uio_offset + n > np->n_size) { np->n_size = uio->uio_offset + n; np->n_flag |= NMODIFIED; @@ -1299,11 +1303,13 @@ nfs_doio(struct buf *bp, struct ucred *cr, struct thread *td) io.iov_len = uiop->uio_resid = bp->b_bcount; io.iov_base = bp->b_data; uiop->uio_rw = UIO_READ; + switch (vp->v_type) { case VREG: uiop->uio_offset = ((off_t)bp->b_blkno) * DEV_BSIZE; nfsstats.read_bios++; error = nfs_readrpc(vp, uiop, cr); + if (!error) { if (uiop->uio_resid) { /* @@ -1315,7 +1321,7 @@ nfs_doio(struct buf *bp, struct ucred *cr, struct thread *td) * writes, but that is not possible any longer. */ int nread = bp->b_bcount - uiop->uio_resid; - int left = bp->b_bcount - nread; + int left = uiop->uio_resid; if (left > 0) bzero((char *)bp->b_data + nread, left); @@ -1485,3 +1491,48 @@ nfs_doio(struct buf *bp, struct ucred *cr, struct thread *td) bufdone(bp); return (error); } + +/* + * Used to aid in handling ftruncate() operations on the NFS client side. + * Truncation creates a number of special problems for NFS. We have to + * throw away VM pages and buffer cache buffers that are beyond EOF, and + * we have to properly handle VM pages or (potentially dirty) buffers + * that straddle the truncation point. + */ + +int +nfs_meta_setsize(struct vnode *vp, struct ucred *cred, struct thread *td, u_quad_t nsize) +{ + struct nfsnode *np = VTONFS(vp); + u_quad_t tsize = np->n_size; + int biosize = vp->v_mount->mnt_stat.f_iosize; + int error = 0; + + np->n_size = nsize; + + if (np->n_size < tsize) { + struct buf *bp; + daddr_t lbn; + int bufsize; + + /* + * vtruncbuf() doesn't get the buffer overlapping the + * truncation point. We may have a B_DELWRI and/or B_CACHE + * buffer that now needs to be truncated. + */ + error = vtruncbuf(vp, cred, td, nsize, biosize); + lbn = nsize / biosize; + bufsize = nsize & (biosize - 1); + bp = nfs_getcacheblk(vp, lbn, bufsize, td); + if (bp->b_dirtyoff > bp->b_bcount) + bp->b_dirtyoff = bp->b_bcount; + if (bp->b_dirtyend > bp->b_bcount) + bp->b_dirtyend = bp->b_bcount; + bp->b_flags |= B_RELBUF; /* don't leave garbage around */ + brelse(bp); + } else { + vnode_pager_setsize(vp, nsize); + } + return(error); +} + diff --git a/sys/nfsclient/nfs_vnops.c b/sys/nfsclient/nfs_vnops.c index ae81f0f..be9f533b 100644 --- a/sys/nfsclient/nfs_vnops.c +++ b/sys/nfsclient/nfs_vnops.c @@ -643,7 +643,18 @@ nfs_setattr(struct vop_setattr_args *ap) */ if (vp->v_mount->mnt_flag & MNT_RDONLY) return (EROFS); - vnode_pager_setsize(vp, vap->va_size); + + /* + * We run vnode_pager_setsize() early (why?), + * we must set np->n_size now to avoid vinvalbuf + * V_SAVE races that might setsize a lower + * value. + */ + + tsize = np->n_size; + error = nfs_meta_setsize(vp, ap->a_cred, + ap->a_td, vap->va_size); + if (np->n_flag & NMODIFIED) { if (vap->va_size == 0) error = nfs_vinvalbuf(vp, 0, @@ -656,8 +667,7 @@ nfs_setattr(struct vop_setattr_args *ap) return (error); } } - tsize = np->n_size; - np->n_size = np->n_vattr.va_size = vap->va_size; + np->n_vattr.va_size = vap->va_size; }; } else if ((vap->va_mtime.tv_sec != VNOVAL || vap->va_atime.tv_sec != VNOVAL) && (np->n_flag & NMODIFIED) && @@ -1049,10 +1059,12 @@ nfs_readrpc(struct vnode *vp, struct uio *uiop, struct ucred *cred) m_freem(mrep); tsiz -= retlen; if (v3) { - if (eof || retlen == 0) + if (eof || retlen == 0) { tsiz = 0; - } else if (retlen < len) + } + } else if (retlen < len) { tsiz = 0; + } } nfsmout: return (error); @@ -3114,3 +3126,4 @@ nfsfifo_close(struct vop_close_args *ap) } return (VOCALL(fifo_vnodeop_p, VOFFSET(vop_close), ap)); } + diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 0eb06fc..abc4194 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -1630,10 +1630,24 @@ vm_page_set_validclean(vm_page_t m, int base, int size) * use this opportunity to clear the PG_NOSYNC flag. If a process * takes a write fault on a MAP_NOSYNC memory area the flag will * be set again. + * + * We set valid bits inclusive of any overlap, but we can only + * clear dirty bits for DEV_BSIZE chunks that are fully within + * the range. */ pagebits = vm_page_bits(base, size); m->valid |= pagebits; +#if 0 /* NOT YET */ + if ((frag = base & (DEV_BSIZE - 1)) != 0) { + frag = DEV_BSIZE - frag; + base += frag; + size -= frag; + if (size < 0) + size = 0; + } + pagebits = vm_page_bits(base, size & (DEV_BSIZE - 1)); +#endif m->dirty &= ~pagebits; if (base == 0 && size == PAGE_SIZE) { pmap_clear_modify(m); diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c index 9e6363b..b0348c6 100644 --- a/sys/vm/vnode_pager.c +++ b/sys/vm/vnode_pager.c @@ -298,14 +298,18 @@ vnode_pager_setsize(vp, nsize) } /* * this gets rid of garbage at the end of a page that is now - * only partially backed by the vnode... + * only partially backed by the vnode. + * + * XXX for some reason (I don't know yet), if we take a + * completely invalid page and mark it partially valid + * it can screw up NFS reads, so we don't allow the case. */ if (nsize & PAGE_MASK) { vm_offset_t kva; vm_page_t m; m = vm_page_lookup(object, OFF_TO_IDX(nsize)); - if (m) { + if (m && m->valid) { int base = (int)nsize & PAGE_MASK; int size = PAGE_SIZE - base; @@ -318,6 +322,20 @@ vnode_pager_setsize(vp, nsize) vm_pager_unmap_page(kva); /* + * XXX work around SMP data integrity race + * by unmapping the page from user processes. + * The garbage we just cleared may be mapped + * to a user process running on another cpu + * and this code is not running through normal + * I/O channels which handle SMP issues for + * us, so unmap page to synchronize all cpus. + * + * XXX should vm_pager_unmap_page() have + * dealt with this? + */ + vm_page_protect(m, VM_PROT_NONE); + + /* * Clear out partial-page dirty bits. This * has the side effect of setting the valid * bits, but that is ok. There are a bunch @@ -325,6 +343,10 @@ vnode_pager_setsize(vp, nsize) * m->dirty == VM_PAGE_BITS_ALL. The file EOF * case is one of them. If the page is still * partially dirty, make it fully dirty. + * + * note that we do not clear out the valid + * bits. This would prevent bogus_page + * replacement from working properly. */ vm_page_set_validclean(m, base, size); if (m->dirty != 0) @@ -965,6 +987,9 @@ vnode_pager_generic_putpages(vp, m, bytecount, flags, rtvals) * may not properly clear the dirty bits for the entire page (which * could be VM_PAGE_BITS_ALL due to the page having been mmap()d). * With the page locked we are free to fix-up the dirty bits here. + * + * We do not under any circumstances truncate the valid bits, as + * this will screw up bogus page replacement. */ if (maxsize + poffset > object->un_pager.vnp.vnp_size) { if (object->un_pager.vnp.vnp_size > poffset) { |