summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authormm <mm@FreeBSD.org>2010-05-15 07:07:38 +0000
committermm <mm@FreeBSD.org>2010-05-15 07:07:38 +0000
commit018b8bae0947a733c31d9f7ccdbc15db17559e8b (patch)
tree95a98c40e554eb141ade648d883f5353aaa2244f
parent02073848fcd5fdf7354e9cbadb34bf5c1c67c896 (diff)
downloadFreeBSD-src-018b8bae0947a733c31d9f7ccdbc15db17559e8b.zip
FreeBSD-src-018b8bae0947a733c31d9f7ccdbc15db17559e8b.tar.gz
MFC r207481, r207956:
MFC r207481 [1]: Add sysctl and loader tunable vfs.zfs.txg.write_limit_override. This tunable improves fine-tuning of ZFS write throttling. MFC r207956 [2]: Fix possible hang when replaying large truncations. OpenSolaris onnv revision: 7904:6a124a4ca9c5 PR: kern/146108 [1] Suggested by: Nikolay Denev <ndenev at gmail.com> [1] Obtained from: OpenSolaris (Bug ID 6761624) [2] Approved by: pjd, delphij (mentor)
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c6
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c24
2 files changed, 30 insertions, 0 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c
index 143b4b8..8650fa1 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/txg.c
@@ -38,6 +38,7 @@ static void txg_quiesce_thread(void *arg);
int zfs_txg_timeout = 30; /* max seconds worth of delta per txg */
extern int zfs_txg_synctime;
+extern uint64_t zfs_write_limit_override;
SYSCTL_DECL(_vfs_zfs);
SYSCTL_NODE(_vfs_zfs, OID_AUTO, txg, CTLFLAG_RW, 0,
@@ -48,6 +49,11 @@ SYSCTL_INT(_vfs_zfs_txg, OID_AUTO, timeout, CTLFLAG_RDTUN, &zfs_txg_timeout, 0,
TUNABLE_INT("vfs.zfs.txg.synctime", &zfs_txg_synctime);
SYSCTL_INT(_vfs_zfs_txg, OID_AUTO, synctime, CTLFLAG_RDTUN, &zfs_txg_synctime,
0, "Target seconds to sync a txg");
+TUNABLE_QUAD("vfs.zfs.txg.write_limit_override", &zfs_write_limit_override);
+SYSCTL_QUAD(_vfs_zfs_txg, OID_AUTO, write_limit_override, CTLFLAG_RW,
+ &zfs_write_limit_override, 0,
+ "Override maximum size of a txg to this size in bytes, "
+ "value of 0 means don't override");
/*
* Prepare the txg subsystem.
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c
index 030e56c..1e79091 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zil.c
@@ -1567,6 +1567,29 @@ zil_replay_log_record(zilog_t *zilog, lr_t *lr, void *zra, uint64_t claim_txg)
}
/*
+ * Replay of large truncates can end up needing additional txs
+ * and a different txg. If they are nested within the replay tx
+ * as below then a hang is possible. So we do the truncate here
+ * and redo the truncate later (a no-op) and update the sequence
+ * number whilst in the replay tx. Fortunately, it's safe to repeat
+ * a truncate if we crash and the truncate commits. A create over
+ * an existing file will also come in as a TX_TRUNCATE record.
+ *
+ * Note, remove of large files and renames over large files is
+ * handled by putting the deleted object on a stable list
+ * and if necessary force deleting the object outside of the replay
+ * transaction using the zr_replay_cleaner.
+ */
+ if (txtype == TX_TRUNCATE) {
+ *zr->zr_txgp = TXG_NOWAIT;
+ error = zr->zr_replay[TX_TRUNCATE](zr->zr_arg, zr->zr_lrbuf,
+ zr->zr_byteswap);
+ if (error)
+ goto bad;
+ zr->zr_byteswap = 0; /* only byteswap once */
+ }
+
+ /*
* We must now do two things atomically: replay this log record,
* and update the log header to reflect the fact that we did so.
* We use the DMU's ability to assign into a specific txg to do this.
@@ -1636,6 +1659,7 @@ zil_replay_log_record(zilog_t *zilog, lr_t *lr, void *zra, uint64_t claim_txg)
dprintf("pass %d, retrying\n", pass);
}
+bad:
ASSERT(error && error != ERESTART);
name = kmem_alloc(MAXNAMELEN, KM_SLEEP);
dmu_objset_name(zr->zr_os, name);
OpenPOWER on IntegriCloud