summaryrefslogtreecommitdiffstats
path: root/sys/cam
diff options
context:
space:
mode:
authormav <mav@FreeBSD.org>2015-08-19 17:41:49 +0000
committermav <mav@FreeBSD.org>2015-08-19 17:41:49 +0000
commit374a9ddfc6c70f65a571a496c971b2dc17a4cc37 (patch)
treed650366a02a4ec964febf28c736529f6b863d815 /sys/cam
parent4e7de07e7eb40f6e8b0bca752fd51eca9425ccc6 (diff)
downloadFreeBSD-src-374a9ddfc6c70f65a571a496c971b2dc17a4cc37.zip
FreeBSD-src-374a9ddfc6c70f65a571a496c971b2dc17a4cc37.tar.gz
MFC r286320: Issue all reads of single XCOPY segment simultaneously.
During vMotion and Clone VMware by default runs multiple sequential 4MB XCOPY requests same time. If CTL issues reads sequentially in 1MB chunks for each XCOPY command, reads from different commands are not detected as sequential by serseq option code and allowed to execute simultaneously. Such read pattern confused ZFS prefetcher, causing suboptimal disk access. Issuing all reads same time make serseq code work properly, serializing reads both within each XCOPY command and between them. My tests with ZFS pool of 14 disks in RAID10 shows prefetcher efficiency improved from 37% to 99.7%, copying speed improved by 10-60%, average read latency reduced twice on HDD layer and by five times on zvol layer.
Diffstat (limited to 'sys/cam')
-rw-r--r--sys/cam/ctl/ctl_tpc.c9
1 files changed, 4 insertions, 5 deletions
diff --git a/sys/cam/ctl/ctl_tpc.c b/sys/cam/ctl/ctl_tpc.c
index 490cddd..662ee3d 100644
--- a/sys/cam/ctl/ctl_tpc.c
+++ b/sys/cam/ctl/ctl_tpc.c
@@ -817,7 +817,7 @@ tpc_process_b2b(struct tpc_list *list)
struct scsi_ec_segment_b2b *seg;
struct scsi_ec_cscd_dtsp *sdstp, *ddstp;
struct tpc_io *tior, *tiow;
- struct runl run, *prun;
+ struct runl run;
uint64_t sl, dl;
off_t srclba, dstlba, numbytes, donebytes, roundbytes;
int numlba;
@@ -889,8 +889,7 @@ tpc_process_b2b(struct tpc_list *list)
list->segsectors = numbytes / dstblock;
donebytes = 0;
TAILQ_INIT(&run);
- prun = &run;
- list->tbdio = 1;
+ list->tbdio = 0;
while (donebytes < numbytes) {
roundbytes = numbytes - donebytes;
if (roundbytes > TPC_MAX_IO_SIZE) {
@@ -942,8 +941,8 @@ tpc_process_b2b(struct tpc_list *list)
tiow->io->io_hdr.ctl_private[CTL_PRIV_FRONTEND].ptr = tiow;
TAILQ_INSERT_TAIL(&tior->run, tiow, rlinks);
- TAILQ_INSERT_TAIL(prun, tior, rlinks);
- prun = &tior->run;
+ TAILQ_INSERT_TAIL(&run, tior, rlinks);
+ list->tbdio++;
donebytes += roundbytes;
srclba += roundbytes / srcblock;
dstlba += roundbytes / dstblock;
OpenPOWER on IntegriCloud