summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authordillon <dillon@FreeBSD.org>2002-10-18 22:52:41 +0000
committerdillon <dillon@FreeBSD.org>2002-10-18 22:52:41 +0000
commitd155b8f135db19093c4bb4a6668074b484da2bb1 (patch)
tree731ae9c899653584bb216cb7e808cabc8ae7b32c /sys
parent181311508173b9c70205bd14450f104550fe7bac (diff)
downloadFreeBSD-src-d155b8f135db19093c4bb4a6668074b484da2bb1.zip
FreeBSD-src-d155b8f135db19093c4bb4a6668074b484da2bb1.tar.gz
Fix a file-rewrite performance case for UFS[2]. When rewriting portions
of a file in chunks that are less then the filesystem block size, if the data is not already cached the system will perform a read-before-write. The problem is that it does this on a block-by-block basis, breaking up the I/Os and making clustering impossible for the writes. Programs such as INN using cyclic file buffers suffer greatly. This problem is only going to get worse as we use larger and larger filesystem block sizes. The solution is to extend the sequential heuristic so UFS[2] can perform a far larger read and readahead when dealing with this case. (note: maximum disk write bandwidth is 27MB/sec thru filesystem) (note: filesystem blocksize in test is 8K (1K frag)) dd if=/dev/zero of=test.dat bs=1k count=2m conv=notrunc Before: (note half of these are reads) tty da0 da1 acd0 cpu tin tout KB/t tps MB/s KB/t tps MB/s KB/t tps MB/s us ni sy in id 0 76 14.21 598 8.30 0.00 0 0.00 0.00 0 0.00 0 0 7 1 92 0 76 14.09 813 11.19 0.00 0 0.00 0.00 0 0.00 0 0 9 5 86 0 76 14.28 821 11.45 0.00 0 0.00 0.00 0 0.00 0 0 8 1 91 After: (note half of these are reads) tty da0 da1 acd0 cpu tin tout KB/t tps MB/s KB/t tps MB/s KB/t tps MB/s us ni sy in id 0 76 63.62 434 26.99 0.00 0 0.00 0.00 0 0.00 0 0 18 1 80 0 76 63.58 424 26.30 0.00 0 0.00 0.00 0 0.00 0 0 17 2 82 0 76 63.82 438 27.32 0.00 0 0.00 0.00 0 0.00 1 0 19 2 79 Reviewed by: mckusick Approved by: re X-MFC after: immediately (was heavily tested in -stable for 4 months)
Diffstat (limited to 'sys')
-rw-r--r--sys/ufs/ffs/ffs_balloc.c24
-rw-r--r--sys/ufs/ffs/ffs_vnops.c7
-rw-r--r--sys/ufs/ufs/ufs_extern.h12
3 files changed, 36 insertions, 7 deletions
diff --git a/sys/ufs/ffs/ffs_balloc.c b/sys/ufs/ffs/ffs_balloc.c
index d9e8a08..4256f97 100644
--- a/sys/ufs/ffs/ffs_balloc.c
+++ b/sys/ufs/ffs/ffs_balloc.c
@@ -341,7 +341,14 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size,
}
brelse(bp);
if (flags & BA_CLRBUF) {
- error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
+ int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
+ if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
+ error = cluster_read(vp, ip->i_size, lbn,
+ (int)fs->fs_bsize, NOCRED,
+ MAXBSIZE, seqcount, &nbp);
+ } else {
+ error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
+ }
if (error) {
brelse(nbp);
goto fail;
@@ -788,8 +795,21 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size,
return (0);
}
brelse(bp);
+ /*
+ * If requested clear invalid portions of the buffer. If we
+ * have to do a read-before-write (typical if BA_CLRBUF is set),
+ * try to do some read-ahead in the sequential case to reduce
+ * the number of I/O transactions.
+ */
if (flags & BA_CLRBUF) {
- error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
+ int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT;
+ if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) {
+ error = cluster_read(vp, ip->i_size, lbn,
+ (int)fs->fs_bsize, NOCRED,
+ MAXBSIZE, seqcount, &nbp);
+ } else {
+ error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp);
+ }
if (error) {
brelse(nbp);
goto fail;
diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c
index add1318..626b156 100644
--- a/sys/ufs/ffs/ffs_vnops.c
+++ b/sys/ufs/ffs/ffs_vnops.c
@@ -749,9 +749,12 @@ ffs_write(ap)
resid = uio->uio_resid;
osize = ip->i_size;
- flags = 0;
+ if (seqcount > BA_SEQMAX)
+ flags = BA_SEQMAX << BA_SEQSHIFT;
+ else
+ flags = seqcount << BA_SEQSHIFT;
if ((ioflag & IO_SYNC) && !DOINGASYNC(vp))
- flags = IO_SYNC;
+ flags |= IO_SYNC;
#ifdef ENABLE_VFS_IOOPT
if (object && (object->flags & OBJ_OPT)) {
diff --git a/sys/ufs/ufs/ufs_extern.h b/sys/ufs/ufs/ufs_extern.h
index 47fc4ae..f7e5df9 100644
--- a/sys/ufs/ufs/ufs_extern.h
+++ b/sys/ufs/ufs/ufs_extern.h
@@ -110,10 +110,16 @@ void softdep_releasefile(struct inode *);
int softdep_slowdown(struct vnode *);
/*
- * Flags to low-level allocation routines.
- * The low 16-bits are reserved for IO_ flags from vnode.h.
+ * Flags to low-level allocation routines. The low 16-bits are reserved
+ * for IO_ flags from vnode.h.
+ *
+ * Note: The general vfs code typically limits the sequential heuristic
+ * count to 127. See sequential_heuristic() in kern/vfs_vnops.c
*/
-#define BA_CLRBUF 0x00010000 /* Request alloced buffer be cleared. */
+#define BA_CLRBUF 0x00010000 /* Clear invalid areas of buffer. */
#define BA_METAONLY 0x00020000 /* Return indirect block buffer. */
+#define BA_SEQMASK 0x7F000000 /* Bits holding seq heuristic. */
+#define BA_SEQSHIFT 24
+#define BA_SEQMAX 0x7F
#endif /* !_UFS_UFS_EXTERN_H_ */
OpenPOWER on IntegriCloud