diff options
author | mm <mm@FreeBSD.org> | 2010-05-15 07:07:38 +0000 |
---|---|---|
committer | mm <mm@FreeBSD.org> | 2010-05-15 07:07:38 +0000 |
commit | 018b8bae0947a733c31d9f7ccdbc15db17559e8b (patch) | |
tree | 95a98c40e554eb141ade648d883f5353aaa2244f | |
parent | 02073848fcd5fdf7354e9cbadb34bf5c1c67c896 (diff) | |
download | FreeBSD-src-018b8bae0947a733c31d9f7ccdbc15db17559e8b.zip FreeBSD-src-018b8bae0947a733c31d9f7ccdbc15db17559e8b.tar.gz |
MFC r207481, r207956:
MFC r207481 [1]:
Add sysctl and loader tunable vfs.zfs.txg.write_limit_override.
This tunable improves fine-tuning of ZFS write throttling.
MFC r207956 [2]:
Fix possible hang when replaying large truncations.
OpenSolaris onnv revision: 7904:6a124a4ca9c5
PR: kern/146108 [1]
Suggested by: Nikolay Denev <ndenev at gmail.com> [1]
Obtained from: OpenSolaris (Bug ID 6761624) [2]
Approved by: pjd, delphij (mentor)
-rw-r--r-- | sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c | 6 | ||||
-rw-r--r-- | sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c | 24 |
2 files changed, 30 insertions, 0 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c index 143b4b8..8650fa1 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c @@ -38,6 +38,7 @@ static void txg_quiesce_thread(void *arg); int zfs_txg_timeout = 30; /* max seconds worth of delta per txg */ extern int zfs_txg_synctime; +extern uint64_t zfs_write_limit_override; SYSCTL_DECL(_vfs_zfs); SYSCTL_NODE(_vfs_zfs, OID_AUTO, txg, CTLFLAG_RW, 0, @@ -48,6 +49,11 @@ SYSCTL_INT(_vfs_zfs_txg, OID_AUTO, timeout, CTLFLAG_RDTUN, &zfs_txg_timeout, 0, TUNABLE_INT("vfs.zfs.txg.synctime", &zfs_txg_synctime); SYSCTL_INT(_vfs_zfs_txg, OID_AUTO, synctime, CTLFLAG_RDTUN, &zfs_txg_synctime, 0, "Target seconds to sync a txg"); +TUNABLE_QUAD("vfs.zfs.txg.write_limit_override", &zfs_write_limit_override); +SYSCTL_QUAD(_vfs_zfs_txg, OID_AUTO, write_limit_override, CTLFLAG_RW, + &zfs_write_limit_override, 0, + "Override maximum size of a txg to this size in bytes, " + "value of 0 means don't override"); /* * Prepare the txg subsystem. diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c index 030e56c..1e79091 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c @@ -1567,6 +1567,29 @@ zil_replay_log_record(zilog_t *zilog, lr_t *lr, void *zra, uint64_t claim_txg) } /* + * Replay of large truncates can end up needing additional txs + * and a different txg. If they are nested within the replay tx + * as below then a hang is possible. So we do the truncate here + * and redo the truncate later (a no-op) and update the sequence + * number whilst in the replay tx. Fortunately, it's safe to repeat + * a truncate if we crash and the truncate commits. A create over + * an existing file will also come in as a TX_TRUNCATE record. + * + * Note, remove of large files and renames over large files is + * handled by putting the deleted object on a stable list + * and if necessary force deleting the object outside of the replay + * transaction using the zr_replay_cleaner. + */ + if (txtype == TX_TRUNCATE) { + *zr->zr_txgp = TXG_NOWAIT; + error = zr->zr_replay[TX_TRUNCATE](zr->zr_arg, zr->zr_lrbuf, + zr->zr_byteswap); + if (error) + goto bad; + zr->zr_byteswap = 0; /* only byteswap once */ + } + + /* * We must now do two things atomically: replay this log record, * and update the log header to reflect the fact that we did so. * We use the DMU's ability to assign into a specific txg to do this. @@ -1636,6 +1659,7 @@ zil_replay_log_record(zilog_t *zilog, lr_t *lr, void *zra, uint64_t claim_txg) dprintf("pass %d, retrying\n", pass); } +bad: ASSERT(error && error != ERESTART); name = kmem_alloc(MAXNAMELEN, KM_SLEEP); dmu_objset_name(zr->zr_os, name); |