summaryrefslogtreecommitdiffstats
path: root/sys/sys/bio.h
diff options
context:
space:
mode:
authoralc <alc@FreeBSD.org>1999-05-02 23:57:16 +0000
committeralc <alc@FreeBSD.org>1999-05-02 23:57:16 +0000
commit5cb08a2652f36ddab7172faf6b766038472c1647 (patch)
treec47eaa3332628f6c725ca32dda81aa44d24e2ac2 /sys/sys/bio.h
parentc75d7e89c3e63bc9b8e9863a5cc985649edf5f9a (diff)
downloadFreeBSD-src-5cb08a2652f36ddab7172faf6b766038472c1647.zip
FreeBSD-src-5cb08a2652f36ddab7172faf6b766038472c1647.tar.gz
The VFS/BIO subsystem contained a number of hacks in order to optimize
piecemeal, middle-of-file writes for NFS. These hacks have caused no end of trouble, especially when combined with mmap(). I've removed them. Instead, NFS will issue a read-before-write to fully instantiate the struct buf containing the write. NFS does, however, optimize piecemeal appends to files. For most common file operations, you will not notice the difference. The sole remaining fragment in the VFS/BIO system is b_dirtyoff/end, which NFS uses to avoid cache coherency issues with read-merge-write style operations. NFS also optimizes the write-covers-entire-buffer case by avoiding the read-before-write. There is quite a bit of room for further optimization in these areas. The VM system marks pages fully-valid (AKA vm_page_t->valid = VM_PAGE_BITS_ALL) in several places, most noteably in vm_fault. This is not correct operation. The vm_pager_get_pages() code is now responsible for marking VM pages all-valid. A number of VM helper routines have been added to aid in zeroing-out the invalid portions of a VM page prior to the page being marked all-valid. This operation is necessary to properly support mmap(). The zeroing occurs most often when dealing with file-EOF situations. Several bugs have been fixed in the NFS subsystem, including bits handling file and directory EOF situations and buf->b_flags consistancy issues relating to clearing B_ERROR & B_INVAL, and handling B_DONE. getblk() and allocbuf() have been rewritten. B_CACHE operation is now formally defined in comments and more straightforward in implementation. B_CACHE for VMIO buffers is based on the validity of the backing store. B_CACHE for non-VMIO buffers is based simply on whether the buffer is B_INVAL or not (B_CACHE set if B_INVAL clear, and vise-versa). biodone() is now responsible for setting B_CACHE when a successful read completes. B_CACHE is also set when a bdwrite() is initiated and when a bwrite() is initiated. VFS VOP_BWRITE routines (there are only two - nfs_bwrite() and bwrite()) are now expected to set B_CACHE. This means that bowrite() and bawrite() also set B_CACHE indirectly. There are a number of places in the code which were previously using buf->b_bufsize (which is DEV_BSIZE aligned) when they should have been using buf->b_bcount. These have been fixed. getblk() now clears B_DONE on return because the rest of the system is so bad about dealing with B_DONE. Major fixes to NFS/TCP have been made. A server-side bug could cause requests to be lost by the server due to nfs_realign() overwriting other rpc's in the same TCP mbuf chain. The server's kernel must be recompiled to get the benefit of the fixes. Submitted by: Matthew Dillon <dillon@apollo.backplane.com>
Diffstat (limited to 'sys/sys/bio.h')
-rw-r--r--sys/sys/bio.h40
1 files changed, 36 insertions, 4 deletions
diff --git a/sys/sys/bio.h b/sys/sys/bio.h
index d2ce212..2e88ca7 100644
--- a/sys/sys/bio.h
+++ b/sys/sys/bio.h
@@ -36,7 +36,7 @@
* SUCH DAMAGE.
*
* @(#)buf.h 8.9 (Berkeley) 3/30/95
- * $Id: buf.h,v 1.64 1999/03/02 04:04:28 mckusick Exp $
+ * $Id: buf.h,v 1.65 1999/03/12 02:24:55 julian Exp $
*/
#ifndef _SYS_BUF_H_
@@ -78,6 +78,19 @@ struct iodone_chain {
/*
* The buffer header describes an I/O operation in the kernel.
+ *
+ * NOTES:
+ * b_bufsize, b_bcount. b_bufsize is the allocation size of the
+ * buffer, either DEV_BSIZE or PAGE_SIZE aligned. b_bcount is the
+ * originally requested buffer size and can serve as a bounds check
+ * against EOF. For most, but not all uses, b_bcount == b_bufsize.
+ *
+ * b_dirtyoff, b_dirtyend. Buffers support piecemeal, unaligned
+ * ranges of dirty data that need to be written to backing store.
+ * The range is typically clipped at b_bcount ( not b_bufsize ).
+ *
+ * b_resid. Number of bytes remaining in I/O. After an I/O operation
+ * completes, b_resid is usually 0 indicating 100% success.
*/
struct buf {
LIST_ENTRY(buf) b_hash; /* Hash chain. */
@@ -109,8 +122,10 @@ struct buf {
int b_dirtyend; /* Offset of end of dirty region. */
struct ucred *b_rcred; /* Read credentials reference. */
struct ucred *b_wcred; /* Write credentials reference. */
+#if 0
int b_validoff; /* Offset in buffer of valid region. */
int b_validend; /* Offset of end of valid region. */
+#endif
daddr_t b_pblkno; /* physical block number */
void *b_saveaddr; /* Original b_addr for physio. */
caddr_t b_savekva; /* saved kva for transfer while bouncing */
@@ -151,9 +166,24 @@ struct buf {
* Buffer vp reassignments are illegal in this case.
*
* B_CACHE This may only be set if the buffer is entirely valid.
- * The situation where B_DELWRI is set and B_CACHE gets
- * cleared MUST be committed to disk so B_DELWRI can
- * also be cleared.
+ * The situation where B_DELWRI is set and B_CACHE is
+ * clear MUST be committed to disk by getblk() so
+ * B_DELWRI can also be cleared. See the comments for
+ * getblk() in kern/vfs_bio.c. If B_CACHE is clear,
+ * the caller is expected to clear B_ERROR|B_INVAL,
+ * set B_READ, and initiate an I/O.
+ *
+ * The 'entire buffer' is defined to be the range from
+ * 0 through b_bcount.
+ *
+ * B_MALLOC Request that the buffer be allocated from the malloc
+ * pool, DEV_BSIZE aligned instead of PAGE_SIZE aligned.
+ *
+ * B_VMIO Indicates that the buffer is tied into an VM object.
+ * The buffer's data is always PAGE_SIZE aligned even
+ * if b_bufsize and b_bcount are not. ( b_bufsize is
+ * always at least DEV_BSIZE aligned, though ).
+ *
*/
#define B_AGE 0x00000001 /* Move to age queue when I/O done. */
@@ -356,6 +386,7 @@ void cluster_write __P((struct buf *, u_quad_t));
int physio __P((void (*)(struct buf *), struct buf *, dev_t,
int, u_int (*)(struct buf *), struct uio *));
u_int minphys __P((struct buf *));
+void vfs_bio_set_validclean __P((struct buf *, int base, int size));
void vfs_bio_clrbuf __P((struct buf *));
void vfs_busy_pages __P((struct buf *, int clear_modify));
void vfs_unbusy_pages __P((struct buf *));
@@ -371,6 +402,7 @@ int allocbuf __P((struct buf *bp, int size));
void reassignbuf __P((struct buf *, struct vnode *));
void pbreassignbuf __P((struct buf *, struct vnode *));
struct buf *trypbuf __P((int *));
+
#endif /* KERNEL */
#endif /* !_SYS_BUF_H_ */
OpenPOWER on IntegriCloud