summaryrefslogtreecommitdiffstats
path: root/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_log.c
diff options
context:
space:
mode:
authormav <mav@FreeBSD.org>2017-03-17 07:56:20 +0000
committermav <mav@FreeBSD.org>2017-03-17 07:56:20 +0000
commit38d7b659cae939cb4f963c833a02011cbff0c673 (patch)
treeb38dea45dae3c6b046140a7f3fb614899982ecec /sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_log.c
parent4e0e9017929d742b8b45a542ca73a0158e08a172 (diff)
downloadFreeBSD-src-38d7b659cae939cb4f963c833a02011cbff0c673.zip
FreeBSD-src-38d7b659cae939cb4f963c833a02011cbff0c673.tar.gz
MFC r308782:
After some ZIL changes 6 years ago zil_slog_limit got partially broken due to zl_itx_list_sz not updated when async itx'es upgraded to sync. Actually because of other changes about that time zl_itx_list_sz is not really required to implement the functionality, so this patch removes some unneeded broken code and variables. Original idea of zil_slog_limit was to reduce chance of SLOG abuse by single heavy logger, that increased latency for other (more latency critical) loggers, by pushing heavy log out into the main pool instead of SLOG. Beside huge latency increase for heavy writers, this implementation caused double write of all data, since the log records were explicitly prepared for SLOG. Since we now have I/O scheduler, I've found it can be much more efficient to reduce priority of heavy logger SLOG writes from ZIO_PRIORITY_SYNC_WRITE to ZIO_PRIORITY_ASYNC_WRITE, while still leave them on SLOG. Existing ZIL implementation had problem with space efficiency when it has to write large chunks of data into log blocks of limited size. In some cases efficiency stopped to almost as low as 50%. In case of ZIL stored on spinning rust, that also reduced log write speed in half, since head had to uselessly fly over allocated but not written areas. This change improves the situation by offloading problematic operations from z*_log_write() to zil_lwb_commit(), which knows real situation of log blocks allocation and can split large requests into pieces much more efficiently. Also as side effect it removes one of two data copy operations done by ZIL code WR_COPIED case. While there, untangle and unify code of z*_log_write() functions. Also zfs_log_write() alike to zvol_log_write() can now handle writes crossing block boundary, that may also improve efficiency if ZPL is made to do that. Sponsored by: iXsystems, Inc.
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_log.c')
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_log.c37
1 files changed, 15 insertions, 22 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_log.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_log.c
index 6933ccd..048bd9d 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_log.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zfs_log.c
@@ -459,20 +459,17 @@ void
zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
znode_t *zp, offset_t off, ssize_t resid, int ioflag)
{
+ uint32_t blocksize = zp->z_blksz;
itx_wr_state_t write_state;
- boolean_t slogging;
uintptr_t fsync_cnt;
- ssize_t immediate_write_sz;
if (zil_replaying(zilog, tx) || zp->z_unlinked)
return;
- immediate_write_sz = (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
- ? 0 : zfs_immediate_write_sz;
-
- slogging = spa_has_slogs(zilog->zl_spa) &&
- (zilog->zl_logbias == ZFS_LOGBIAS_LATENCY);
- if (resid > immediate_write_sz && !slogging && resid <= zp->z_blksz)
+ if (zilog->zl_logbias == ZFS_LOGBIAS_THROUGHPUT)
+ write_state = WR_INDIRECT;
+ else if (!spa_has_slogs(zilog->zl_spa) &&
+ resid >= zfs_immediate_write_sz)
write_state = WR_INDIRECT;
else if (ioflag & (FSYNC | FDSYNC))
write_state = WR_COPIED;
@@ -486,30 +483,26 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
while (resid) {
itx_t *itx;
lr_write_t *lr;
- ssize_t len;
+ itx_wr_state_t wr_state = write_state;
+ ssize_t len = resid;
- /*
- * If the write would overflow the largest block then split it.
- */
- if (write_state != WR_INDIRECT && resid > ZIL_MAX_LOG_DATA)
- len = SPA_OLD_MAXBLOCKSIZE >> 1;
- else
- len = resid;
+ if (wr_state == WR_COPIED && resid > ZIL_MAX_COPIED_DATA)
+ wr_state = WR_NEED_COPY;
+ else if (wr_state == WR_INDIRECT)
+ len = MIN(blocksize - P2PHASE(off, blocksize), resid);
itx = zil_itx_create(txtype, sizeof (*lr) +
- (write_state == WR_COPIED ? len : 0));
+ (wr_state == WR_COPIED ? len : 0));
lr = (lr_write_t *)&itx->itx_lr;
- if (write_state == WR_COPIED && dmu_read(zp->z_zfsvfs->z_os,
+ if (wr_state == WR_COPIED && dmu_read(zp->z_zfsvfs->z_os,
zp->z_id, off, len, lr + 1, DMU_READ_NO_PREFETCH) != 0) {
zil_itx_destroy(itx);
itx = zil_itx_create(txtype, sizeof (*lr));
lr = (lr_write_t *)&itx->itx_lr;
- write_state = WR_NEED_COPY;
+ wr_state = WR_NEED_COPY;
}
- itx->itx_wr_state = write_state;
- if (write_state == WR_NEED_COPY)
- itx->itx_sod += len;
+ itx->itx_wr_state = wr_state;
lr->lr_foid = zp->z_id;
lr->lr_offset = off;
lr->lr_length = len;
OpenPOWER on IntegriCloud