diff options
author | dillon <dillon@FreeBSD.org> | 2002-10-18 22:52:41 +0000 |
---|---|---|
committer | dillon <dillon@FreeBSD.org> | 2002-10-18 22:52:41 +0000 |
commit | d155b8f135db19093c4bb4a6668074b484da2bb1 (patch) | |
tree | 731ae9c899653584bb216cb7e808cabc8ae7b32c /sys/ufs | |
parent | 181311508173b9c70205bd14450f104550fe7bac (diff) | |
download | FreeBSD-src-d155b8f135db19093c4bb4a6668074b484da2bb1.zip FreeBSD-src-d155b8f135db19093c4bb4a6668074b484da2bb1.tar.gz |
Fix a file-rewrite performance case for UFS[2]. When rewriting portions
of a file in chunks that are less then the filesystem block size, if the
data is not already cached the system will perform a read-before-write.
The problem is that it does this on a block-by-block basis, breaking up the
I/Os and making clustering impossible for the writes. Programs such
as INN using cyclic file buffers suffer greatly. This problem is only going
to get worse as we use larger and larger filesystem block sizes.
The solution is to extend the sequential heuristic so UFS[2] can perform
a far larger read and readahead when dealing with this case.
(note: maximum disk write bandwidth is 27MB/sec thru filesystem)
(note: filesystem blocksize in test is 8K (1K frag))
dd if=/dev/zero of=test.dat bs=1k count=2m conv=notrunc
Before: (note half of these are reads)
tty da0 da1 acd0 cpu
tin tout KB/t tps MB/s KB/t tps MB/s KB/t tps MB/s us ni sy in id
0 76 14.21 598 8.30 0.00 0 0.00 0.00 0 0.00 0 0 7 1 92
0 76 14.09 813 11.19 0.00 0 0.00 0.00 0 0.00 0 0 9 5 86
0 76 14.28 821 11.45 0.00 0 0.00 0.00 0 0.00 0 0 8 1 91
After: (note half of these are reads)
tty da0 da1 acd0 cpu
tin tout KB/t tps MB/s KB/t tps MB/s KB/t tps MB/s us ni sy in id
0 76 63.62 434 26.99 0.00 0 0.00 0.00 0 0.00 0 0 18 1 80
0 76 63.58 424 26.30 0.00 0 0.00 0.00 0 0.00 0 0 17 2 82
0 76 63.82 438 27.32 0.00 0 0.00 0.00 0 0.00 1 0 19 2 79
Reviewed by: mckusick
Approved by: re
X-MFC after: immediately (was heavily tested in -stable for 4 months)
Diffstat (limited to 'sys/ufs')
-rw-r--r-- | sys/ufs/ffs/ffs_balloc.c | 24 | ||||
-rw-r--r-- | sys/ufs/ffs/ffs_vnops.c | 7 | ||||
-rw-r--r-- | sys/ufs/ufs/ufs_extern.h | 12 |
3 files changed, 36 insertions, 7 deletions
diff --git a/sys/ufs/ffs/ffs_balloc.c b/sys/ufs/ffs/ffs_balloc.c index d9e8a08..4256f97 100644 --- a/sys/ufs/ffs/ffs_balloc.c +++ b/sys/ufs/ffs/ffs_balloc.c @@ -341,7 +341,14 @@ ffs_balloc_ufs1(struct vnode *vp, off_t startoffset, int size, } brelse(bp); if (flags & BA_CLRBUF) { - error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp); + int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; + if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { + error = cluster_read(vp, ip->i_size, lbn, + (int)fs->fs_bsize, NOCRED, + MAXBSIZE, seqcount, &nbp); + } else { + error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp); + } if (error) { brelse(nbp); goto fail; @@ -788,8 +795,21 @@ ffs_balloc_ufs2(struct vnode *vp, off_t startoffset, int size, return (0); } brelse(bp); + /* + * If requested clear invalid portions of the buffer. If we + * have to do a read-before-write (typical if BA_CLRBUF is set), + * try to do some read-ahead in the sequential case to reduce + * the number of I/O transactions. + */ if (flags & BA_CLRBUF) { - error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp); + int seqcount = (flags & BA_SEQMASK) >> BA_SEQSHIFT; + if (seqcount && (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { + error = cluster_read(vp, ip->i_size, lbn, + (int)fs->fs_bsize, NOCRED, + MAXBSIZE, seqcount, &nbp); + } else { + error = bread(vp, lbn, (int)fs->fs_bsize, NOCRED, &nbp); + } if (error) { brelse(nbp); goto fail; diff --git a/sys/ufs/ffs/ffs_vnops.c b/sys/ufs/ffs/ffs_vnops.c index add1318..626b156 100644 --- a/sys/ufs/ffs/ffs_vnops.c +++ b/sys/ufs/ffs/ffs_vnops.c @@ -749,9 +749,12 @@ ffs_write(ap) resid = uio->uio_resid; osize = ip->i_size; - flags = 0; + if (seqcount > BA_SEQMAX) + flags = BA_SEQMAX << BA_SEQSHIFT; + else + flags = seqcount << BA_SEQSHIFT; if ((ioflag & IO_SYNC) && !DOINGASYNC(vp)) - flags = IO_SYNC; + flags |= IO_SYNC; #ifdef ENABLE_VFS_IOOPT if (object && (object->flags & OBJ_OPT)) { diff --git a/sys/ufs/ufs/ufs_extern.h b/sys/ufs/ufs/ufs_extern.h index 47fc4ae..f7e5df9 100644 --- a/sys/ufs/ufs/ufs_extern.h +++ b/sys/ufs/ufs/ufs_extern.h @@ -110,10 +110,16 @@ void softdep_releasefile(struct inode *); int softdep_slowdown(struct vnode *); /* - * Flags to low-level allocation routines. - * The low 16-bits are reserved for IO_ flags from vnode.h. + * Flags to low-level allocation routines. The low 16-bits are reserved + * for IO_ flags from vnode.h. + * + * Note: The general vfs code typically limits the sequential heuristic + * count to 127. See sequential_heuristic() in kern/vfs_vnops.c */ -#define BA_CLRBUF 0x00010000 /* Request alloced buffer be cleared. */ +#define BA_CLRBUF 0x00010000 /* Clear invalid areas of buffer. */ #define BA_METAONLY 0x00020000 /* Return indirect block buffer. */ +#define BA_SEQMASK 0x7F000000 /* Bits holding seq heuristic. */ +#define BA_SEQSHIFT 24 +#define BA_SEQMAX 0x7F #endif /* !_UFS_UFS_EXTERN_H_ */ |