summaryrefslogtreecommitdiffstats
path: root/sys/kern
diff options
context:
space:
mode:
authordillon <dillon@FreeBSD.org>2001-05-24 07:22:27 +0000
committerdillon <dillon@FreeBSD.org>2001-05-24 07:22:27 +0000
commita179ee09ab9ca2d9d1d09dc4752c53a13609f5e9 (patch)
treefaca8401754525a67aa26f144230806cf238e370 /sys/kern
parenta26134411c10ba2364d3d85686667b8a87f0015f (diff)
downloadFreeBSD-src-a179ee09ab9ca2d9d1d09dc4752c53a13609f5e9.zip
FreeBSD-src-a179ee09ab9ca2d9d1d09dc4752c53a13609f5e9.tar.gz
This patch implements O_DIRECT about 80% of the way. It takes a patchset
Tor created a while ago, removes the raw I/O piece (that has cache coherency problems), and adds a buffer cache / VM freeing piece. Essentially this patch causes O_DIRECT I/O to not be left in the cache, but does not prevent it from going through the cache, hence the 80%. For the last 20% we need a method by which the I/O can be issued directly to buffer supplied by the user process and bypass the buffer cache entirely, but still maintain cache coherency. I also have the code working under -stable but the changes made to sys/file.h may not be MFCable, so an MFC is not on the table yet. Submitted by: tegge, dillon
Diffstat (limited to 'sys/kern')
-rw-r--r--sys/kern/vfs_bio.c9
-rw-r--r--sys/kern/vfs_cluster.c9
-rw-r--r--sys/kern/vfs_vnops.c4
3 files changed, 20 insertions, 2 deletions
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index b06625b..246fc4c 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -1249,7 +1249,7 @@ brelse(struct buf * bp)
/* unlock */
BUF_UNLOCK(bp);
- bp->b_flags &= ~(B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF);
+ bp->b_flags &= ~(B_ASYNC | B_NOCACHE | B_AGE | B_RELBUF | B_DIRECT);
bp->b_ioflags &= ~BIO_ORDERED;
if ((bp->b_flags & B_DELWRI) == 0 && (bp->b_xflags & BX_VNDIRTY))
panic("brelse: not dirty");
@@ -1264,6 +1264,8 @@ brelse(struct buf * bp)
* biodone() to requeue an async I/O on completion. It is also used when
* known good buffers need to be requeued but we think we may need the data
* again soon.
+ *
+ * XXX we should be able to leave the B_RELBUF hint set on completion.
*/
void
bqrelse(struct buf * bp)
@@ -1355,12 +1357,15 @@ vfs_vmio_release(bp)
vm_page_flag_clear(m, PG_ZERO);
/*
* Might as well free the page if we can and it has
- * no valid data.
+ * no valid data. We also free the page if the
+ * buffer was used for direct I/O
*/
if ((bp->b_flags & B_ASYNC) == 0 && !m->valid && m->hold_count == 0) {
vm_page_busy(m);
vm_page_protect(m, VM_PROT_NONE);
vm_page_free(m);
+ } else if (bp->b_flags & B_DIRECT) {
+ vm_page_try_to_free(m);
} else if (vm_page_count_severe()) {
vm_page_try_to_cache(m);
}
diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c
index 0eb47bd..c9c09cb 100644
--- a/sys/kern/vfs_cluster.c
+++ b/sys/kern/vfs_cluster.c
@@ -505,6 +505,15 @@ cluster_callback(bp)
tbp->b_dirtyoff = tbp->b_dirtyend = 0;
tbp->b_flags &= ~B_INVAL;
tbp->b_ioflags &= ~BIO_ERROR;
+ /*
+ * XXX the bdwrite()/bqrelse() issued during
+ * cluster building clears B_RELBUF (see bqrelse()
+ * comment). If direct I/O was specified, we have
+ * to restore it here to allow the buffer and VM
+ * to be freed.
+ */
+ if (tbp->b_flags & B_DIRECT)
+ tbp->b_flags |= B_RELBUF;
}
bufdone(tbp);
}
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index fd13579..de7a7ce 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -352,6 +352,8 @@ vn_read(fp, uio, cred, flags, p)
ioflag = 0;
if (fp->f_flag & FNONBLOCK)
ioflag |= IO_NDELAY;
+ if (fp->f_flag & O_DIRECT)
+ ioflag |= IO_DIRECT;
VOP_LEASE(vp, p, cred, LEASE_READ);
vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, p);
if ((flags & FOF_OFFSET) == 0)
@@ -393,6 +395,8 @@ vn_write(fp, uio, cred, flags, p)
ioflag |= IO_APPEND;
if (fp->f_flag & FNONBLOCK)
ioflag |= IO_NDELAY;
+ if (fp->f_flag & O_DIRECT)
+ ioflag |= IO_DIRECT;
if ((fp->f_flag & O_FSYNC) ||
(vp->v_mount && (vp->v_mount->mnt_flag & MNT_SYNCHRONOUS)))
ioflag |= IO_SYNC;
OpenPOWER on IntegriCloud