diff options
author | alc <alc@FreeBSD.org> | 1999-05-02 23:57:16 +0000 |
---|---|---|
committer | alc <alc@FreeBSD.org> | 1999-05-02 23:57:16 +0000 |
commit | 5cb08a2652f36ddab7172faf6b766038472c1647 (patch) | |
tree | c47eaa3332628f6c725ca32dda81aa44d24e2ac2 /sys/sys/bio.h | |
parent | c75d7e89c3e63bc9b8e9863a5cc985649edf5f9a (diff) | |
download | FreeBSD-src-5cb08a2652f36ddab7172faf6b766038472c1647.zip FreeBSD-src-5cb08a2652f36ddab7172faf6b766038472c1647.tar.gz |
The VFS/BIO subsystem contained a number of hacks in order to optimize
piecemeal, middle-of-file writes for NFS. These hacks have caused no
end of trouble, especially when combined with mmap(). I've removed
them. Instead, NFS will issue a read-before-write to fully
instantiate the struct buf containing the write. NFS does, however,
optimize piecemeal appends to files. For most common file operations,
you will not notice the difference. The sole remaining fragment in
the VFS/BIO system is b_dirtyoff/end, which NFS uses to avoid cache
coherency issues with read-merge-write style operations. NFS also
optimizes the write-covers-entire-buffer case by avoiding the
read-before-write. There is quite a bit of room for further
optimization in these areas.
The VM system marks pages fully-valid (AKA vm_page_t->valid =
VM_PAGE_BITS_ALL) in several places, most noteably in vm_fault. This
is not correct operation. The vm_pager_get_pages() code is now
responsible for marking VM pages all-valid. A number of VM helper
routines have been added to aid in zeroing-out the invalid portions of
a VM page prior to the page being marked all-valid. This operation is
necessary to properly support mmap(). The zeroing occurs most often
when dealing with file-EOF situations. Several bugs have been fixed
in the NFS subsystem, including bits handling file and directory EOF
situations and buf->b_flags consistancy issues relating to clearing
B_ERROR & B_INVAL, and handling B_DONE.
getblk() and allocbuf() have been rewritten. B_CACHE operation is now
formally defined in comments and more straightforward in
implementation. B_CACHE for VMIO buffers is based on the validity of
the backing store. B_CACHE for non-VMIO buffers is based simply on
whether the buffer is B_INVAL or not (B_CACHE set if B_INVAL clear,
and vise-versa). biodone() is now responsible for setting B_CACHE
when a successful read completes. B_CACHE is also set when a bdwrite()
is initiated and when a bwrite() is initiated. VFS VOP_BWRITE
routines (there are only two - nfs_bwrite() and bwrite()) are now
expected to set B_CACHE. This means that bowrite() and bawrite() also
set B_CACHE indirectly.
There are a number of places in the code which were previously using
buf->b_bufsize (which is DEV_BSIZE aligned) when they should have
been using buf->b_bcount. These have been fixed. getblk() now clears
B_DONE on return because the rest of the system is so bad about
dealing with B_DONE.
Major fixes to NFS/TCP have been made. A server-side bug could cause
requests to be lost by the server due to nfs_realign() overwriting
other rpc's in the same TCP mbuf chain. The server's kernel must be
recompiled to get the benefit of the fixes.
Submitted by: Matthew Dillon <dillon@apollo.backplane.com>
Diffstat (limited to 'sys/sys/bio.h')
-rw-r--r-- | sys/sys/bio.h | 40 |
1 files changed, 36 insertions, 4 deletions
diff --git a/sys/sys/bio.h b/sys/sys/bio.h index d2ce212..2e88ca7 100644 --- a/sys/sys/bio.h +++ b/sys/sys/bio.h @@ -36,7 +36,7 @@ * SUCH DAMAGE. * * @(#)buf.h 8.9 (Berkeley) 3/30/95 - * $Id: buf.h,v 1.64 1999/03/02 04:04:28 mckusick Exp $ + * $Id: buf.h,v 1.65 1999/03/12 02:24:55 julian Exp $ */ #ifndef _SYS_BUF_H_ @@ -78,6 +78,19 @@ struct iodone_chain { /* * The buffer header describes an I/O operation in the kernel. + * + * NOTES: + * b_bufsize, b_bcount. b_bufsize is the allocation size of the + * buffer, either DEV_BSIZE or PAGE_SIZE aligned. b_bcount is the + * originally requested buffer size and can serve as a bounds check + * against EOF. For most, but not all uses, b_bcount == b_bufsize. + * + * b_dirtyoff, b_dirtyend. Buffers support piecemeal, unaligned + * ranges of dirty data that need to be written to backing store. + * The range is typically clipped at b_bcount ( not b_bufsize ). + * + * b_resid. Number of bytes remaining in I/O. After an I/O operation + * completes, b_resid is usually 0 indicating 100% success. */ struct buf { LIST_ENTRY(buf) b_hash; /* Hash chain. */ @@ -109,8 +122,10 @@ struct buf { int b_dirtyend; /* Offset of end of dirty region. */ struct ucred *b_rcred; /* Read credentials reference. */ struct ucred *b_wcred; /* Write credentials reference. */ +#if 0 int b_validoff; /* Offset in buffer of valid region. */ int b_validend; /* Offset of end of valid region. */ +#endif daddr_t b_pblkno; /* physical block number */ void *b_saveaddr; /* Original b_addr for physio. */ caddr_t b_savekva; /* saved kva for transfer while bouncing */ @@ -151,9 +166,24 @@ struct buf { * Buffer vp reassignments are illegal in this case. * * B_CACHE This may only be set if the buffer is entirely valid. - * The situation where B_DELWRI is set and B_CACHE gets - * cleared MUST be committed to disk so B_DELWRI can - * also be cleared. + * The situation where B_DELWRI is set and B_CACHE is + * clear MUST be committed to disk by getblk() so + * B_DELWRI can also be cleared. See the comments for + * getblk() in kern/vfs_bio.c. If B_CACHE is clear, + * the caller is expected to clear B_ERROR|B_INVAL, + * set B_READ, and initiate an I/O. + * + * The 'entire buffer' is defined to be the range from + * 0 through b_bcount. + * + * B_MALLOC Request that the buffer be allocated from the malloc + * pool, DEV_BSIZE aligned instead of PAGE_SIZE aligned. + * + * B_VMIO Indicates that the buffer is tied into an VM object. + * The buffer's data is always PAGE_SIZE aligned even + * if b_bufsize and b_bcount are not. ( b_bufsize is + * always at least DEV_BSIZE aligned, though ). + * */ #define B_AGE 0x00000001 /* Move to age queue when I/O done. */ @@ -356,6 +386,7 @@ void cluster_write __P((struct buf *, u_quad_t)); int physio __P((void (*)(struct buf *), struct buf *, dev_t, int, u_int (*)(struct buf *), struct uio *)); u_int minphys __P((struct buf *)); +void vfs_bio_set_validclean __P((struct buf *, int base, int size)); void vfs_bio_clrbuf __P((struct buf *)); void vfs_busy_pages __P((struct buf *, int clear_modify)); void vfs_unbusy_pages __P((struct buf *)); @@ -371,6 +402,7 @@ int allocbuf __P((struct buf *bp, int size)); void reassignbuf __P((struct buf *, struct vnode *)); void pbreassignbuf __P((struct buf *, struct vnode *)); struct buf *trypbuf __P((int *)); + #endif /* KERNEL */ #endif /* !_SYS_BUF_H_ */ |