summaryrefslogtreecommitdiffstats
path: root/sys/vm
diff options
context:
space:
mode:
authoralc <alc@FreeBSD.org>1999-05-02 23:57:16 +0000
committeralc <alc@FreeBSD.org>1999-05-02 23:57:16 +0000
commit5cb08a2652f36ddab7172faf6b766038472c1647 (patch)
treec47eaa3332628f6c725ca32dda81aa44d24e2ac2 /sys/vm
parentc75d7e89c3e63bc9b8e9863a5cc985649edf5f9a (diff)
downloadFreeBSD-src-5cb08a2652f36ddab7172faf6b766038472c1647.zip
FreeBSD-src-5cb08a2652f36ddab7172faf6b766038472c1647.tar.gz
The VFS/BIO subsystem contained a number of hacks in order to optimize
piecemeal, middle-of-file writes for NFS. These hacks have caused no end of trouble, especially when combined with mmap(). I've removed them. Instead, NFS will issue a read-before-write to fully instantiate the struct buf containing the write. NFS does, however, optimize piecemeal appends to files. For most common file operations, you will not notice the difference. The sole remaining fragment in the VFS/BIO system is b_dirtyoff/end, which NFS uses to avoid cache coherency issues with read-merge-write style operations. NFS also optimizes the write-covers-entire-buffer case by avoiding the read-before-write. There is quite a bit of room for further optimization in these areas. The VM system marks pages fully-valid (AKA vm_page_t->valid = VM_PAGE_BITS_ALL) in several places, most noteably in vm_fault. This is not correct operation. The vm_pager_get_pages() code is now responsible for marking VM pages all-valid. A number of VM helper routines have been added to aid in zeroing-out the invalid portions of a VM page prior to the page being marked all-valid. This operation is necessary to properly support mmap(). The zeroing occurs most often when dealing with file-EOF situations. Several bugs have been fixed in the NFS subsystem, including bits handling file and directory EOF situations and buf->b_flags consistancy issues relating to clearing B_ERROR & B_INVAL, and handling B_DONE. getblk() and allocbuf() have been rewritten. B_CACHE operation is now formally defined in comments and more straightforward in implementation. B_CACHE for VMIO buffers is based on the validity of the backing store. B_CACHE for non-VMIO buffers is based simply on whether the buffer is B_INVAL or not (B_CACHE set if B_INVAL clear, and vise-versa). biodone() is now responsible for setting B_CACHE when a successful read completes. B_CACHE is also set when a bdwrite() is initiated and when a bwrite() is initiated. VFS VOP_BWRITE routines (there are only two - nfs_bwrite() and bwrite()) are now expected to set B_CACHE. This means that bowrite() and bawrite() also set B_CACHE indirectly. There are a number of places in the code which were previously using buf->b_bufsize (which is DEV_BSIZE aligned) when they should have been using buf->b_bcount. These have been fixed. getblk() now clears B_DONE on return because the rest of the system is so bad about dealing with B_DONE. Major fixes to NFS/TCP have been made. A server-side bug could cause requests to be lost by the server due to nfs_realign() overwriting other rpc's in the same TCP mbuf chain. The server's kernel must be recompiled to get the benefit of the fixes. Submitted by: Matthew Dillon <dillon@apollo.backplane.com>
Diffstat (limited to 'sys/vm')
-rw-r--r--sys/vm/vm_fault.c30
-rw-r--r--sys/vm/vm_page.c51
-rw-r--r--sys/vm/vm_page.h8
-rw-r--r--sys/vm/vm_pager.c5
-rw-r--r--sys/vm/vm_pager.h18
-rw-r--r--sys/vm/vnode_pager.c5
6 files changed, 95 insertions, 22 deletions
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
index 047f10f..882d52e 100644
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@@ -66,7 +66,7 @@
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*
- * $Id: vm_fault.c,v 1.100 1999/02/17 09:08:29 dillon Exp $
+ * $Id: vm_fault.c,v 1.101 1999/02/25 06:00:52 alc Exp $
*/
/*
@@ -409,6 +409,12 @@ readrest:
firstpindex = fs.first_pindex -
2*(VM_FAULT_READ_BEHIND + VM_FAULT_READ_AHEAD + 1);
+ /*
+ * note: partially valid pages cannot be
+ * included in the lookahead - NFS piecemeal
+ * writes will barf on it badly.
+ */
+
for(tmppindex = fs.first_pindex - 1;
tmppindex >= firstpindex;
--tmppindex) {
@@ -552,12 +558,16 @@ readrest:
}
fs.first_m = NULL;
+ /*
+ * Zero the page if necessary and mark it valid.
+ */
if ((fs.m->flags & PG_ZERO) == 0) {
vm_page_zero_fill(fs.m);
- }
- else
+ } else {
cnt.v_ozfod++;
+ }
cnt.v_zfod++;
+ fs.m->valid = VM_PAGE_BITS_ALL;
break; /* break to PAGE HAS BEEN FOUND */
} else {
if (fs.object != fs.first_object) {
@@ -788,14 +798,24 @@ readrest:
#endif
unlock_things(&fs);
- fs.m->valid = VM_PAGE_BITS_ALL;
- vm_page_flag_clear(fs.m, PG_ZERO);
+
+ /*
+ * Sanity check: page must be completely valid or it is not fit to
+ * map into user space. vm_pager_get_pages() ensures this.
+ */
+
+ if (fs.m->valid != VM_PAGE_BITS_ALL) {
+ vm_page_zero_invalid(fs.m, TRUE);
+ printf("Warning: page %p partially invalid on fault\n", fs.m);
+ }
pmap_enter(fs.map->pmap, vaddr, VM_PAGE_TO_PHYS(fs.m), prot, wired);
+
if (((fault_flags & VM_FAULT_WIRE_MASK) == 0) && (wired == 0)) {
pmap_prefault(fs.map->pmap, vaddr, fs.entry);
}
+ vm_page_flag_clear(fs.m, PG_ZERO);
vm_page_flag_set(fs.m, PG_MAPPED|PG_REFERENCED);
if (fault_flags & VM_FAULT_HOLD)
vm_page_hold(fs.m);
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index e07ea63..0d85a94 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -34,7 +34,7 @@
* SUCH DAMAGE.
*
* from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91
- * $Id: vm_page.c,v 1.128 1999/03/19 05:21:03 alc Exp $
+ * $Id: vm_page.c,v 1.129 1999/04/05 19:38:29 julian Exp $
*/
/*
@@ -1460,14 +1460,16 @@ vm_page_bits(int base, int size)
}
/*
- * set a page valid and clean. May not block.
+ * vm_page_set_validclean:
*
- * In order to maintain consistancy due to the DEV_BSIZE granularity
- * of the valid bits, we have to zero non-DEV_BSIZE aligned portions of
- * the page at the beginning and end of the valid range when the
- * associated valid bits are not already set.
+ * Sets portions of a page valid and clean. The arguments are expected
+ * to be DEV_BSIZE aligned but if they aren't the bitmap is inclusive
+ * of any partial chunks touched by the range. The invalid portion of
+ * such chunks will be zero'd.
*
- * (base + size) must be less then or equal to PAGE_SIZE.
+ * This routine may not block.
+ *
+ * (base + size) must be less then or equal to PAGE_SIZE.
*/
void
vm_page_set_validclean(m, base, size)
@@ -1529,8 +1531,35 @@ vm_page_set_validclean(m, base, size)
pmap_clear_modify(VM_PAGE_TO_PHYS(m));
}
+#if 0
+
+void
+vm_page_set_dirty(m, base, size)
+ vm_page_t m;
+ int base;
+ int size;
+{
+ m->dirty |= vm_page_bits(base, size);
+}
+
+#endif
+
+void
+vm_page_clear_dirty(m, base, size)
+ vm_page_t m;
+ int base;
+ int size;
+{
+ m->dirty &= ~vm_page_bits(base, size);
+}
+
/*
- * set a page (partially) invalid. May not block.
+ * vm_page_set_invalid:
+ *
+ * Invalidates DEV_BSIZE'd chunks within a page. Both the
+ * valid and dirty bits for the effected areas are cleared.
+ *
+ * May not block.
*/
void
vm_page_set_invalid(m, base, size)
@@ -1540,9 +1569,9 @@ vm_page_set_invalid(m, base, size)
{
int bits;
- m->valid &= ~(bits = vm_page_bits(base, size));
- if (m->valid == 0)
- m->dirty &= ~bits;
+ bits = vm_page_bits(base, size);
+ m->valid &= ~bits;
+ m->dirty &= ~bits;
m->object->generation++;
}
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
index 8072f66..abff794 100644
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@@ -61,7 +61,7 @@
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*
- * $Id: vm_page.h,v 1.58 1999/03/15 05:09:48 julian Exp $
+ * $Id: vm_page.h,v 1.59 1999/04/05 19:38:29 julian Exp $
*/
/*
@@ -101,6 +101,10 @@
* Fields in this structure are locked either by the lock on the
* object that the page belongs to (O) or by the lock on the page
* queues (P).
+ *
+ * The 'valid' and 'dirty' fields are distinct. A page may have dirty
+ * bits set without having associated valid bits set. This is used by
+ * NFS to implement piecemeal writes.
*/
TAILQ_HEAD(pglist, vm_page);
@@ -404,6 +408,8 @@ void vm_page_wire __P((vm_page_t));
void vm_page_unqueue __P((vm_page_t));
void vm_page_unqueue_nowakeup __P((vm_page_t));
void vm_page_set_validclean __P((vm_page_t, int, int));
+void vm_page_set_dirty __P((vm_page_t, int, int));
+void vm_page_clear_dirty __P((vm_page_t, int, int));
void vm_page_set_invalid __P((vm_page_t, int, int));
static __inline boolean_t vm_page_zero_fill __P((vm_page_t));
int vm_page_is_valid __P((vm_page_t, int, int));
diff --git a/sys/vm/vm_pager.c b/sys/vm/vm_pager.c
index 36a905e..dbacceb 100644
--- a/sys/vm/vm_pager.c
+++ b/sys/vm/vm_pager.c
@@ -61,7 +61,7 @@
* any improvements or extensions that they make and grant Carnegie the
* rights to redistribute these changes.
*
- * $Id: vm_pager.c,v 1.44 1999/03/14 09:20:00 julian Exp $
+ * $Id: vm_pager.c,v 1.45 1999/04/11 02:16:27 eivind Exp $
*/
/*
@@ -523,6 +523,9 @@ vm_pager_chain_iodone(struct buf *nbp)
* Obtain a physical buffer and chain it to its parent buffer. When
* I/O completes, the parent buffer will be B_SIGNAL'd. Errors are
* automatically propogated to the parent
+ *
+ * Since these are brand new buffers, we do not have to clear B_INVAL
+ * and B_ERROR because they are already clear.
*/
struct buf *
diff --git a/sys/vm/vm_pager.h b/sys/vm/vm_pager.h
index 82b6574..aff14ab 100644
--- a/sys/vm/vm_pager.h
+++ b/sys/vm/vm_pager.h
@@ -36,7 +36,7 @@
* SUCH DAMAGE.
*
* @(#)vm_pager.h 8.4 (Berkeley) 1/12/94
- * $Id: vm_pager.h,v 1.20 1999/01/24 02:32:15 dillon Exp $
+ * $Id: vm_pager.h,v 1.21 1999/03/14 09:20:00 julian Exp $
*/
/*
@@ -110,6 +110,14 @@ void flushchainbuf(struct buf *nbp);
void waitchainbuf(struct buf *bp, int count, int done);
void autochaindone(struct buf *bp);
+/*
+ * vm_page_get_pages:
+ *
+ * Retrieve pages from the VM system in order to map them into an object
+ * ( or into VM space somewhere ). If the pagein was successful, we
+ * must fully validate it.
+ */
+
static __inline int
vm_pager_get_pages(
vm_object_t object,
@@ -117,7 +125,13 @@ vm_pager_get_pages(
int count,
int reqpage
) {
- return ((*pagertab[object->type]->pgo_getpages)(object, m, count, reqpage));
+ int r;
+
+ r = (*pagertab[object->type]->pgo_getpages)(object, m, count, reqpage);
+ if (r == VM_PAGER_OK && m[reqpage]->valid != VM_PAGE_BITS_ALL) {
+ vm_page_zero_invalid(m[reqpage], TRUE);
+ }
+ return(r);
}
static __inline void
diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c
index 628bec7..83f379a 100644
--- a/sys/vm/vnode_pager.c
+++ b/sys/vm/vnode_pager.c
@@ -38,7 +38,7 @@
* SUCH DAMAGE.
*
* from: @(#)vnode_pager.c 7.5 (Berkeley) 4/20/91
- * $Id: vnode_pager.c,v 1.106 1999/04/05 19:38:29 julian Exp $
+ * $Id: vnode_pager.c,v 1.107 1999/04/10 20:52:11 dt Exp $
*/
/*
@@ -789,7 +789,8 @@ vnode_pager_generic_getpages(vp, m, bytecount, reqpage)
* read.
*/
vm_page_set_validclean(mt, 0, size - tfoff);
- vm_page_zero_invalid(mt, FALSE);
+ /* handled by vm_fault now */
+ /* vm_page_zero_invalid(mt, FALSE); */
}
vm_page_flag_clear(mt, PG_ZERO);
OpenPOWER on IntegriCloud