From d40593dd9033d39a5e4cc32915c5eb28f3e858b6 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Mon, 7 Jul 2014 16:38:58 +0200
Subject: block/backup: Fix hang for unaligned image size

When doing a block backup of an image with an unaligned size (with
respect to the BACKUP_CLUSTER_SIZE), qemu would check the allocation
status of sectors after the end of the image. bdrv_is_allocated()
returns a result that is valid for 0 sectors in this case, so the backup
job ran into an endless loop.

Stop looping when seeing a result valid for 0 sectors, we're at EOF then.

The test case looks somewhat unrelated at first sight because I
originally tried to reproduce a different suspected bug that turned out
to not exist. Still a good test case and it accidentally found this one.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
---
 block/backup.c             |   2 +-
 tests/qemu-iotests/028     |  27 ++++-
 tests/qemu-iotests/028.out | 269 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 296 insertions(+), 2 deletions(-)

diff --git a/block/backup.c b/block/backup.c
index 7978ae2..d0b0225 100644
--- a/block/backup.c
+++ b/block/backup.c
@@ -307,7 +307,7 @@ static void coroutine_fn backup_run(void *opaque)
                                 BACKUP_SECTORS_PER_CLUSTER - i, &n);
                     i += n;
 
-                    if (alloced == 1) {
+                    if (alloced == 1 || n == 0) {
                         break;
                     }
                 }
diff --git a/tests/qemu-iotests/028 b/tests/qemu-iotests/028
index a99e4fa..d5718c5 100755
--- a/tests/qemu-iotests/028
+++ b/tests/qemu-iotests/028
@@ -33,7 +33,8 @@ status=1	# failure is the default!
 
 _cleanup()
 {
-	_cleanup_test_img
+    rm -f "${TEST_IMG}.copy"
+    _cleanup_test_img
 }
 trap "_cleanup; exit \$status" 0 1 2 3 15
 
@@ -41,6 +42,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15
 . ./common.rc
 . ./common.filter
 . ./common.pattern
+. ./common.qemu
 
 # Any format supporting backing files except vmdk and qcow which do not support
 # smaller backing files.
@@ -99,6 +101,29 @@ _check_test_img
 # Rebase it on top of its base image
 $QEMU_IMG rebase -b "$TEST_IMG.base" "$TEST_IMG"
 
+echo
+echo block-backup
+echo
+
+qemu_comm_method="monitor"
+_launch_qemu -drive file="${TEST_IMG}",cache=${CACHEMODE},id=disk
+h=$QEMU_HANDLE
+QEMU_COMM_TIMEOUT=1
+
+_send_qemu_cmd $h "drive_backup disk ${TEST_IMG}.copy" "(qemu)"
+qemu_cmd_repeat=20 _send_qemu_cmd $h "info block-jobs" "No active jobs"
+_send_qemu_cmd $h 'quit' ""
+
+# Base image sectors
+TEST_IMG="${TEST_IMG}.copy" io readv $(( offset )) 512 1024 32
+
+# Image sectors
+TEST_IMG="${TEST_IMG}.copy" io readv $(( offset + 512 )) 512 1024 64
+
+# Zero sectors beyond end of base image
+TEST_IMG="${TEST_IMG}.copy" io_zero readv $(( offset + 32 * 1024 )) 512 1024 32
+
+
 _check_test_img
 
 # success, all done
diff --git a/tests/qemu-iotests/028.out b/tests/qemu-iotests/028.out
index 8affb7f..38099e4 100644
--- a/tests/qemu-iotests/028.out
+++ b/tests/qemu-iotests/028.out
@@ -465,5 +465,274 @@ read 512/512 bytes at offset 3221257728
 read 512/512 bytes at offset 3221258752
 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 No errors were found on the image.
+
+block-backup
+
+QEMU X.Y.Z monitor - type 'help' for more information
+(qemu) d[K[Ddr[K[D[Ddri[K[D[D[Ddriv[K[D[D[D[Ddrive[K[D[D[D[D[Ddrive_[K[D[D[D[D[D[Ddrive_b[K[D[D[D[D[D[D[Ddrive_ba[K[D[D[D[D[D[D[D[Ddrive_bac[K[D[D[D[D[D[D[D[D[Ddrive_back[K[D[D[D[D[D[D[D[D[D[Ddrive_backu[K[D[D[D[D[D[D[D[D[D[D[Ddrive_backup[K[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup [K[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup d[K[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup di[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup dis[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk [K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /h[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /ho[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /hom[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/k[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kw[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwo[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwol[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf/[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf/s[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf/so[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf/sou[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf/sour[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf/sourc[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf/source[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf/source/[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf/source/q[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf/source/qe[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf/source/qem[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf/source/qemu[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf/source/qemu/[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf/source/qemu/t[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf/source/qemu/te[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf/source/qemu/tes[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf/source/qemu/test[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf/source/qemu/tests[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf/source/qemu/tests/[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf/source/qemu/tests/q[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf/source/qemu/tests/qe[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf/source/qemu/tests/qem[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf/source/qemu/tests/qemu[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf/source/qemu/tests/qemu-[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf/source/qemu/tests/qemu-i[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf/source/qemu/tests/qemu-io[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf/source/qemu/tests/qemu-iot[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf/source/qemu/tests/qemu-iote[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf/source/qemu/tests/qemu-iotes[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf/source/qemu/tests/qemu-iotest[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf/source/qemu/tests/qemu-iotests[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf/source/qemu/tests/qemu-iotests/[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf/source/qemu/tests/qemu-iotests/s[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf/source/qemu/tests/qemu-iotests/sc[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf/source/qemu/tests/qemu-iotests/scr[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf/source/qemu/tests/qemu-iotests/scra[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf/source/qemu/tests/qemu-iotests/scrat[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk /home/kwolf/source/qemu/tests/qemu-iotests/scratc[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk TEST_DIR[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk TEST_DIR/[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk TEST_DIR/t[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk TEST_DIR/t.[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk TEST_DIR/t.q[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk TEST_DIR/t.qc[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk TEST_DIR/t.qco[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk TEST_DIR/t.qcow[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk TEST_DIR/t.qcow2[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk TEST_DIR/t.qcow2.[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk TEST_DIR/t.qcow2.c[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk TEST_DIR/t.qcow2.co[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk TEST_DIR/t.qcow2.cop[K[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[D[Ddrive_backup disk TEST_DIR/t.qcow2.copy[K
+Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 size=4294968832 backing_file='TEST_DIR/t.qcow2.base' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off
+(qemu) i[K[Din[K[D[Dinf[K[D[D[Dinfo[K[D[D[D[Dinfo [K[D[D[D[D[Dinfo b[K[D[D[D[D[D[Dinfo bl[K[D[D[D[D[D[D[Dinfo blo[K[D[D[D[D[D[D[D[Dinfo bloc[K[D[D[D[D[D[D[D[D[Dinfo block[K[D[D[D[D[D[D[D[D[D[Dinfo block-[K[D[D[D[D[D[D[D[D[D[D[Dinfo block-j[K[D[D[D[D[D[D[D[D[D[D[D[Dinfo block-jo[K[D[D[D[D[D[D[D[D[D[D[D[D[Dinfo block-job[K[D[D[D[D[D[D[D[D[D[D[D[D[D[Dinfo block-jobs[K
+Type backup, device disk: Completed 0 of 4294968832 bytes, speed limit 0 bytes/s
+i[K[Din[K[D[Dinf[K[D[D[Dinfo[K[D[D[D[Dinfo [K[D[D[D[D[Dinfo b[K[D[D[D[D[D[Dinfo bl[K[D[D[D[D[D[D[Dinfo blo[K[D[D[D[D[D[D[D[Dinfo bloc[K[D[D[D[D[D[D[D[D[Dinfo block[K[D[D[D[D[D[D[D[D[D[Dinfo block-[K[D[D[D[D[D[D[D[D[D[D[Dinfo block-j[K[D[D[D[D[D[D[D[D[D[D[D[Dinfo block-jo[K[D[D[D[D[D[D[D[D[D[D[D[D[Dinfo block-job[K[D[D[D[D[D[D[D[D[D[D[D[D[D[Dinfo block-jobs[K
+No active jobs
+=== IO: pattern 195
+read 512/512 bytes at offset 3221194240
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221195264
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221196288
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221197312
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221198336
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221199360
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221200384
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221201408
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221202432
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221203456
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221204480
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221205504
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221206528
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221207552
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221208576
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221209600
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221210624
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221211648
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221212672
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221213696
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221214720
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221215744
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221216768
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221217792
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221218816
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221219840
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221220864
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221221888
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221222912
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221223936
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221224960
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221225984
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+=== IO: pattern 196
+read 512/512 bytes at offset 3221194752
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221195776
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221196800
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221197824
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221198848
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221199872
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221200896
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221201920
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221202944
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221203968
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221204992
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221206016
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221207040
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221208064
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221209088
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221210112
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221211136
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221212160
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221213184
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221214208
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221215232
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221216256
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221217280
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221218304
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221219328
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221220352
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221221376
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221222400
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221223424
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221224448
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221225472
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221226496
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221227520
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221228544
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221229568
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221230592
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221231616
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221232640
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221233664
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221234688
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221235712
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221236736
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221237760
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221238784
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221239808
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221240832
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221241856
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221242880
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221243904
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221244928
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221245952
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221246976
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221248000
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221249024
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221250048
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221251072
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221252096
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221253120
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221254144
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221255168
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221256192
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221257216
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221258240
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221259264
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+=== IO: pattern 0
+read 512/512 bytes at offset 3221227008
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221228032
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221229056
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221230080
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221231104
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221232128
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221233152
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221234176
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221235200
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221236224
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221237248
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221238272
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221239296
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221240320
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221241344
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221242368
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221243392
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221244416
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221245440
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221246464
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221247488
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221248512
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221249536
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221250560
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221251584
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221252608
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221253632
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221254656
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221255680
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221256704
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221257728
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
+read 512/512 bytes at offset 3221258752
+512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
 No errors were found on the image.
 *** done
-- 
cgit v1.1


From 01fb2705bd19a6e9c1207446793064dbd141df5f Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Mon, 7 Jul 2014 17:00:37 +0200
Subject: block: Fix bdrv_is_allocated() return value

bdrv_is_allocated() should return either 0 or 1 in successful cases.
We're lucky that currently, the callers that rely on this (e.g. because
they check for ret == 1) don't seem to break badly. They just might skip
some optimisation or in the case of qemu-io 'map' print separate lines
where a single line would suffice. In theory, a wrong allocation status
could lead to image corruption with certain operations, so let's fix
this quickly.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
---
 block.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/block.c b/block.c
index 8800a6b..c9629a4 100644
--- a/block.c
+++ b/block.c
@@ -4040,7 +4040,7 @@ int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num,
     if (ret < 0) {
         return ret;
     }
-    return (ret & BDRV_BLOCK_ALLOCATED);
+    return !!(ret & BDRV_BLOCK_ALLOCATED);
 }
 
 /*
-- 
cgit v1.1


From b47ec2c4562117728be36ec2edfbdf9ef2868c6e Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 7 Jul 2014 15:18:01 +0200
Subject: block: prefer aio_poll to qemu_aio_wait

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 block.c        | 2 +-
 blockjob.c     | 2 +-
 qemu-io-cmds.c | 4 ++--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/block.c b/block.c
index c9629a4..510430d 100644
--- a/block.c
+++ b/block.c
@@ -471,7 +471,7 @@ int bdrv_create(BlockDriver *drv, const char* filename,
         co = qemu_coroutine_create(bdrv_create_co_entry);
         qemu_coroutine_enter(co, &cco);
         while (cco.ret == NOT_DONE) {
-            qemu_aio_wait();
+            aio_poll(qemu_get_aio_context(), true);
         }
     }
 
diff --git a/blockjob.c b/blockjob.c
index 67a64ea..ca0b4e2 100644
--- a/blockjob.c
+++ b/blockjob.c
@@ -187,7 +187,7 @@ int block_job_cancel_sync(BlockJob *job)
     job->opaque = &data;
     block_job_cancel(job);
     while (data.ret == -EINPROGRESS) {
-        qemu_aio_wait();
+        aio_poll(bdrv_get_aio_context(bs), true);
     }
     return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret;
 }
diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c
index 60c1ceb..c503fc6 100644
--- a/qemu-io-cmds.c
+++ b/qemu-io-cmds.c
@@ -483,7 +483,7 @@ static int do_co_write_zeroes(BlockDriverState *bs, int64_t offset, int count,
     co = qemu_coroutine_create(co_write_zeroes_entry);
     qemu_coroutine_enter(co, &data);
     while (!data.done) {
-        qemu_aio_wait();
+        aio_poll(bdrv_get_aio_context(bs), true);
     }
     if (data.ret < 0) {
         return data.ret;
@@ -2027,7 +2027,7 @@ static const cmdinfo_t resume_cmd = {
 static int wait_break_f(BlockDriverState *bs, int argc, char **argv)
 {
     while (!bdrv_debug_is_suspended(bs, argv[1])) {
-        qemu_aio_wait();
+        aio_poll(bdrv_get_aio_context(bs), true);
     }
 
     return 0;
-- 
cgit v1.1


From 87f68d318222563822b5c6b28192215fc4b4e441 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 7 Jul 2014 15:18:02 +0200
Subject: block: drop aio functions that operate on the main AioContext

The main AioContext should be accessed explicitly via qemu_get_aio_context().
Most of the time, using it is not the right thing to do.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 aio-posix.c               |  4 ++--
 aio-win32.c               |  6 +++---
 include/block/aio.h       | 17 ++---------------
 include/block/blockjob.h  |  4 ++--
 include/block/coroutine.h |  2 +-
 main-loop.c               | 21 ---------------------
 tests/test-thread-pool.c  |  4 ++--
 7 files changed, 12 insertions(+), 46 deletions(-)

diff --git a/aio-posix.c b/aio-posix.c
index f921d4f..44c4df3 100644
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -125,7 +125,7 @@ static bool aio_dispatch(AioContext *ctx)
     bool progress = false;
 
     /*
-     * We have to walk very carefully in case qemu_aio_set_fd_handler is
+     * We have to walk very carefully in case aio_set_fd_handler is
      * called while we're walking.
      */
     node = QLIST_FIRST(&ctx->aio_handlers);
@@ -183,7 +183,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
     /*
      * If there are callbacks left that have been queued, we need to call them.
      * Do not call select in this case, because it is possible that the caller
-     * does not need a complete flush (as is the case for qemu_aio_wait loops).
+     * does not need a complete flush (as is the case for aio_poll loops).
      */
     if (aio_bh_poll(ctx)) {
         blocking = false;
diff --git a/aio-win32.c b/aio-win32.c
index 23f4e5b..c12f61e 100644
--- a/aio-win32.c
+++ b/aio-win32.c
@@ -102,7 +102,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
     /*
      * If there are callbacks left that have been queued, we need to call then.
      * Do not call select in this case, because it is possible that the caller
-     * does not need a complete flush (as is the case for qemu_aio_wait loops).
+     * does not need a complete flush (as is the case for aio_poll loops).
      */
     if (aio_bh_poll(ctx)) {
         blocking = false;
@@ -115,7 +115,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
     /*
      * Then dispatch any pending callbacks from the GSource.
      *
-     * We have to walk very carefully in case qemu_aio_set_fd_handler is
+     * We have to walk very carefully in case aio_set_fd_handler is
      * called while we're walking.
      */
     node = QLIST_FIRST(&ctx->aio_handlers);
@@ -177,7 +177,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
         blocking = false;
 
         /* we have to walk very carefully in case
-         * qemu_aio_set_fd_handler is called while we're walking */
+         * aio_set_fd_handler is called while we're walking */
         node = QLIST_FIRST(&ctx->aio_handlers);
         while (node) {
             AioHandler *tmp;
diff --git a/include/block/aio.h b/include/block/aio.h
index a92511b..d81250c 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -220,7 +220,7 @@ bool aio_poll(AioContext *ctx, bool blocking);
 #ifdef CONFIG_POSIX
 /* Register a file descriptor and associated callbacks.  Behaves very similarly
  * to qemu_set_fd_handler2.  Unlike qemu_set_fd_handler2, these callbacks will
- * be invoked when using qemu_aio_wait().
+ * be invoked when using aio_poll().
  *
  * Code that invokes AIO completion functions should rely on this function
  * instead of qemu_set_fd_handler[2].
@@ -234,7 +234,7 @@ void aio_set_fd_handler(AioContext *ctx,
 
 /* Register an event notifier and associated callbacks.  Behaves very similarly
  * to event_notifier_set_handler.  Unlike event_notifier_set_handler, these callbacks
- * will be invoked when using qemu_aio_wait().
+ * will be invoked when using aio_poll().
  *
  * Code that invokes AIO completion functions should rely on this function
  * instead of event_notifier_set_handler.
@@ -251,19 +251,6 @@ GSource *aio_get_g_source(AioContext *ctx);
 /* Return the ThreadPool bound to this AioContext */
 struct ThreadPool *aio_get_thread_pool(AioContext *ctx);
 
-/* Functions to operate on the main QEMU AioContext.  */
-
-bool qemu_aio_wait(void);
-void qemu_aio_set_event_notifier(EventNotifier *notifier,
-                                 EventNotifierHandler *io_read);
-
-#ifdef CONFIG_POSIX
-void qemu_aio_set_fd_handler(int fd,
-                             IOHandler *io_read,
-                             IOHandler *io_write,
-                             void *opaque);
-#endif
-
 /**
  * aio_timer_new:
  * @ctx: the aio context
diff --git a/include/block/blockjob.h b/include/block/blockjob.h
index f3cf63f..60aa835 100644
--- a/include/block/blockjob.h
+++ b/include/block/blockjob.h
@@ -74,7 +74,7 @@ struct BlockJob {
      * Set to true if the job should cancel itself.  The flag must
      * always be tested just before toggling the busy flag from false
      * to true.  After a job has been cancelled, it should only yield
-     * if #qemu_aio_wait will ("sooner or later") reenter the coroutine.
+     * if #aio_poll will ("sooner or later") reenter the coroutine.
      */
     bool cancelled;
 
@@ -87,7 +87,7 @@ struct BlockJob {
     /**
      * Set to false by the job while it is in a quiescent state, where
      * no I/O is pending and the job has yielded on any condition
-     * that is not detected by #qemu_aio_wait, such as a timer.
+     * that is not detected by #aio_poll, such as a timer.
      */
     bool busy;
 
diff --git a/include/block/coroutine.h b/include/block/coroutine.h
index a1797ae..b408f96 100644
--- a/include/block/coroutine.h
+++ b/include/block/coroutine.h
@@ -212,7 +212,7 @@ void coroutine_fn co_sleep_ns(QEMUClockType type, int64_t ns);
  * Yield the coroutine for a given duration
  *
  * Behaves similarly to co_sleep_ns(), but the sleeping coroutine will be
- * resumed when using qemu_aio_wait().
+ * resumed when using aio_poll().
  */
 void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type,
                                   int64_t ns);
diff --git a/main-loop.c b/main-loop.c
index 8a85493..3cc79f8 100644
--- a/main-loop.c
+++ b/main-loop.c
@@ -498,24 +498,3 @@ QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque)
 {
     return aio_bh_new(qemu_aio_context, cb, opaque);
 }
-
-bool qemu_aio_wait(void)
-{
-    return aio_poll(qemu_aio_context, true);
-}
-
-#ifdef CONFIG_POSIX
-void qemu_aio_set_fd_handler(int fd,
-                             IOHandler *io_read,
-                             IOHandler *io_write,
-                             void *opaque)
-{
-    aio_set_fd_handler(qemu_aio_context, fd, io_read, io_write, opaque);
-}
-#endif
-
-void qemu_aio_set_event_notifier(EventNotifier *notifier,
-                                 EventNotifierHandler *io_read)
-{
-    aio_set_event_notifier(qemu_aio_context, notifier, io_read);
-}
diff --git a/tests/test-thread-pool.c b/tests/test-thread-pool.c
index aa156bc..f40b7fc 100644
--- a/tests/test-thread-pool.c
+++ b/tests/test-thread-pool.c
@@ -83,7 +83,7 @@ static void co_test_cb(void *opaque)
     data->ret = 0;
     active--;
 
-    /* The test continues in test_submit_co, after qemu_aio_wait_all... */
+    /* The test continues in test_submit_co, after aio_poll... */
 }
 
 static void test_submit_co(void)
@@ -98,7 +98,7 @@ static void test_submit_co(void)
     g_assert_cmpint(active, ==, 1);
     g_assert_cmpint(data.ret, ==, -EINPROGRESS);
 
-    /* qemu_aio_wait_all will execute the rest of the coroutine.  */
+    /* aio_poll will execute the rest of the coroutine.  */
 
     while (data.ret == -EINPROGRESS) {
         aio_poll(ctx, true);
-- 
cgit v1.1


From ef508f427b348c7f0ef2bfe7c080fe5fcaee9f6b Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 7 Jul 2014 15:18:03 +0200
Subject: test-aio: fix GSource-based timer test

The current test depends too much on the implementation of the AioContext
GSource.  Just iterate on the main loop until the callback has been invoked
the right number of times.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 tests/test-aio.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/tests/test-aio.c b/tests/test-aio.c
index e5f8b55..264dab9 100644
--- a/tests/test-aio.c
+++ b/tests/test-aio.c
@@ -806,17 +806,16 @@ static void test_source_timer_schedule(void)
     g_usleep(1 * G_USEC_PER_SEC);
     g_assert_cmpint(data.n, ==, 0);
 
-    g_assert(g_main_context_iteration(NULL, false));
+    g_assert(g_main_context_iteration(NULL, true));
     g_assert_cmpint(data.n, ==, 1);
+    expiry += data.ns;
 
-    /* The comment above was not kidding when it said this wakes up itself */
-    do {
-        g_assert(g_main_context_iteration(NULL, true));
-    } while (qemu_clock_get_ns(data.clock_type) <= expiry);
-    g_usleep(1 * G_USEC_PER_SEC);
-    g_main_context_iteration(NULL, false);
+    while (data.n < 2) {
+        g_main_context_iteration(NULL, true);
+    }
 
     g_assert_cmpint(data.n, ==, 2);
+    g_assert(qemu_clock_get_ns(data.clock_type) > expiry);
 
     aio_set_fd_handler(ctx, pipefd[0], NULL, NULL, NULL);
     close(pipefd[0]);
-- 
cgit v1.1


From 0ceb849bd336a5f9b6e1ed56d45cf5773d251ad8 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 7 Jul 2014 15:18:04 +0200
Subject: AioContext: speed up aio_notify

In many cases, the call to event_notifier_set in aio_notify is unnecessary.
In particular, if we are executing aio_dispatch, or if aio_poll is not
blocking, we know that we will soon get to the next loop iteration (if
necessary); the thread that hosts the AioContext's event loop does not
need any nudging.

The patch includes a Promela formal model that shows that this really
works and does not need any further complication such as generation
counts.  It needs a memory barrier though.

The generation counts are not needed because any change to
ctx->dispatching after the memory barrier is okay for aio_notify.
If it changes from zero to one, it is the right thing to skip
event_notifier_set.  If it changes from one to zero, the
event_notifier_set is unnecessary but harmless.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 aio-posix.c             |  34 +++++++++++++++-
 async.c                 |  19 ++++++++-
 docs/aio_notify.promela | 104 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/block/aio.h     |   9 +++++
 4 files changed, 164 insertions(+), 2 deletions(-)
 create mode 100644 docs/aio_notify.promela

diff --git a/aio-posix.c b/aio-posix.c
index 44c4df3..2eada2e 100644
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -175,11 +175,38 @@ static bool aio_dispatch(AioContext *ctx)
 bool aio_poll(AioContext *ctx, bool blocking)
 {
     AioHandler *node;
+    bool was_dispatching;
     int ret;
     bool progress;
 
+    was_dispatching = ctx->dispatching;
     progress = false;
 
+    /* aio_notify can avoid the expensive event_notifier_set if
+     * everything (file descriptors, bottom halves, timers) will
+     * be re-evaluated before the next blocking poll().  This happens
+     * in two cases:
+     *
+     * 1) when aio_poll is called with blocking == false
+     *
+     * 2) when we are called after poll().  If we are called before
+     *    poll(), bottom halves will not be re-evaluated and we need
+     *    aio_notify() if blocking == true.
+     *
+     * The first aio_dispatch() only does something when AioContext is
+     * running as a GSource, and in that case aio_poll is used only
+     * with blocking == false, so this optimization is already quite
+     * effective.  However, the code is ugly and should be restructured
+     * to have a single aio_dispatch() call.  To do this, we need to
+     * reorganize aio_poll into a prepare/poll/dispatch model like
+     * glib's.
+     *
+     * If we're in a nested event loop, ctx->dispatching might be true.
+     * In that case we can restore it just before returning, but we
+     * have to clear it now.
+     */
+    aio_set_dispatching(ctx, !blocking);
+
     /*
      * If there are callbacks left that have been queued, we need to call them.
      * Do not call select in this case, because it is possible that the caller
@@ -190,12 +217,14 @@ bool aio_poll(AioContext *ctx, bool blocking)
         progress = true;
     }
 
+    /* Re-evaluate condition (1) above.  */
+    aio_set_dispatching(ctx, !blocking);
     if (aio_dispatch(ctx)) {
         progress = true;
     }
 
     if (progress && !blocking) {
-        return true;
+        goto out;
     }
 
     ctx->walking_handlers++;
@@ -234,9 +263,12 @@ bool aio_poll(AioContext *ctx, bool blocking)
     }
 
     /* Run dispatch even if there were no readable fds to run timers */
+    aio_set_dispatching(ctx, true);
     if (aio_dispatch(ctx)) {
         progress = true;
     }
 
+out:
+    aio_set_dispatching(ctx, was_dispatching);
     return progress;
 }
diff --git a/async.c b/async.c
index 5b6fe6b..34af0b2 100644
--- a/async.c
+++ b/async.c
@@ -26,6 +26,7 @@
 #include "block/aio.h"
 #include "block/thread-pool.h"
 #include "qemu/main-loop.h"
+#include "qemu/atomic.h"
 
 /***********************************************************/
 /* bottom halves (can be seen as timers which expire ASAP) */
@@ -247,9 +248,25 @@ ThreadPool *aio_get_thread_pool(AioContext *ctx)
     return ctx->thread_pool;
 }
 
+void aio_set_dispatching(AioContext *ctx, bool dispatching)
+{
+    ctx->dispatching = dispatching;
+    if (!dispatching) {
+        /* Write ctx->dispatching before reading e.g. bh->scheduled.
+         * Optimization: this is only needed when we're entering the "unsafe"
+         * phase where other threads must call event_notifier_set.
+         */
+        smp_mb();
+    }
+}
+
 void aio_notify(AioContext *ctx)
 {
-    event_notifier_set(&ctx->notifier);
+    /* Write e.g. bh->scheduled before reading ctx->dispatching.  */
+    smp_mb();
+    if (!ctx->dispatching) {
+        event_notifier_set(&ctx->notifier);
+    }
 }
 
 static void aio_timerlist_notify(void *opaque)
diff --git a/docs/aio_notify.promela b/docs/aio_notify.promela
new file mode 100644
index 0000000..ad3f6f0
--- /dev/null
+++ b/docs/aio_notify.promela
@@ -0,0 +1,104 @@
+/*
+ * This model describes the interaction between aio_set_dispatching()
+ * and aio_notify().
+ *
+ * Author: Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This file is in the public domain.  If you really want a license,
+ * the WTFPL will do.
+ *
+ * To simulate it:
+ *     spin -p docs/aio_notify.promela
+ *
+ * To verify it:
+ *     spin -a docs/aio_notify.promela
+ *     gcc -O2 pan.c
+ *     ./a.out -a
+ */
+
+#define MAX   4
+#define LAST  (1 << (MAX - 1))
+#define FINAL ((LAST << 1) - 1)
+
+bool dispatching;
+bool event;
+
+int req, done;
+
+active proctype waiter()
+{
+     int fetch, blocking;
+
+     do
+        :: done != FINAL -> {
+            // Computing "blocking" is separate from execution of the
+            // "bottom half"
+            blocking = (req == 0);
+
+            // This is our "bottom half"
+            atomic { fetch = req; req = 0; }
+            done = done | fetch;
+
+            // Wait for a nudge from the other side
+            do
+                :: event == 1 -> { event = 0; break; }
+                :: !blocking  -> break;
+            od;
+
+            dispatching = 1;
+
+            // If you are simulating this model, you may want to add
+            // something like this here:
+            //
+            //      int foo; foo++; foo++; foo++;
+            //
+            // This only wastes some time and makes it more likely
+            // that the notifier process hits the "fast path".
+
+            dispatching = 0;
+        }
+        :: else -> break;
+    od
+}
+
+active proctype notifier()
+{
+    int next = 1;
+    int sets = 0;
+
+    do
+        :: next <= LAST -> {
+            // generate a request
+            req = req | next;
+            next = next << 1;
+
+            // aio_notify
+            if
+                :: dispatching == 0 -> sets++; event = 1;
+                :: else             -> skip;
+            fi;
+
+            // Test both synchronous and asynchronous delivery
+            if
+                :: 1 -> do
+                            :: req == 0 -> break;
+                        od;
+                :: 1 -> skip;
+            fi;
+        }
+        :: else -> break;
+    od;
+    printf("Skipped %d event_notifier_set\n", MAX - sets);
+}
+
+#define p (done == FINAL)
+
+never  {
+    do
+        :: 1                      // after an arbitrarily long prefix
+        :: p -> break             // p becomes true
+    od;
+    do
+        :: !p -> accept: break    // it then must remains true forever after
+    od
+}
diff --git a/include/block/aio.h b/include/block/aio.h
index d81250c..433e7ff 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -60,8 +60,14 @@ struct AioContext {
      */
     int walking_handlers;
 
+    /* Used to avoid unnecessary event_notifier_set calls in aio_notify.
+     * Writes protected by lock or BQL, reads are lockless.
+     */
+    bool dispatching;
+
     /* lock to protect between bh's adders and deleter */
     QemuMutex bh_lock;
+
     /* Anchor of the list of Bottom Halves belonging to the context */
     struct QEMUBH *first_bh;
 
@@ -83,6 +89,9 @@ struct AioContext {
     QEMUTimerListGroup tlg;
 };
 
+/* Used internally to synchronize aio_poll against qemu_bh_schedule.  */
+void aio_set_dispatching(AioContext *ctx, bool dispatching);
+
 /**
  * aio_context_new: Allocate a new AioContext.
  *
-- 
cgit v1.1


From 33f461e0c5d8efa21ef7e746be561fc57a1df106 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Thu, 3 Jul 2014 13:21:24 +0200
Subject: block: Make qiov match the request size until EOF

If a read request goes across EOF, the block driver sees a shortened
request that stops at EOF (the rest is memsetted in block.c), however
the original qiov was used for this request.

This patch makes the qiov size match the request size, avoiding a
potential buffer overflow in raw-posix.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
---
 block.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/block.c b/block.c
index 510430d..0143268 100644
--- a/block.c
+++ b/block.c
@@ -3054,8 +3054,20 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
         max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num),
                                   align >> BDRV_SECTOR_BITS);
         if (max_nb_sectors > 0) {
-            ret = drv->bdrv_co_readv(bs, sector_num,
-                                     MIN(nb_sectors, max_nb_sectors), qiov);
+            QEMUIOVector local_qiov;
+            size_t local_sectors;
+
+            max_nb_sectors = MIN(max_nb_sectors, SIZE_MAX / BDRV_SECTOR_BITS);
+            local_sectors = MIN(max_nb_sectors, nb_sectors);
+
+            qemu_iovec_init(&local_qiov, qiov->niov);
+            qemu_iovec_concat(&local_qiov, qiov, 0,
+                              local_sectors * BDRV_SECTOR_SIZE);
+
+            ret = drv->bdrv_co_readv(bs, sector_num, local_sectors,
+                                     &local_qiov);
+
+            qemu_iovec_destroy(&local_qiov);
         } else {
             ret = 0;
         }
-- 
cgit v1.1


From 44deba5a52576508f27edadf953e435141e2a76a Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Thu, 3 Jul 2014 14:43:32 +0200
Subject: qcow2: Make qiov match request size until backing file EOF

If a qcow2 image has a shorter backing file and a read request to
unallocated clusters goes across EOF of the backing file, the backing
file sees a shortened request and the rest is filled with zeros.
However, the original too long qiov was used with the shortened request.

This patch makes the qiov size match the request size, avoiding a
potential buffer overflow in raw-posix.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
---
 block/qcow2.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/block/qcow2.c b/block/qcow2.c
index 67e55c9..b0faa69 100644
--- a/block/qcow2.c
+++ b/block/qcow2.c
@@ -1020,11 +1020,20 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num,
                 n1 = qcow2_backing_read1(bs->backing_hd, &hd_qiov,
                     sector_num, cur_nr_sectors);
                 if (n1 > 0) {
+                    QEMUIOVector local_qiov;
+
+                    qemu_iovec_init(&local_qiov, hd_qiov.niov);
+                    qemu_iovec_concat(&local_qiov, &hd_qiov, 0,
+                                      n1 * BDRV_SECTOR_SIZE);
+
                     BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO);
                     qemu_co_mutex_unlock(&s->lock);
                     ret = bdrv_co_readv(bs->backing_hd, sector_num,
-                                        n1, &hd_qiov);
+                                        n1, &local_qiov);
                     qemu_co_mutex_lock(&s->lock);
+
+                    qemu_iovec_destroy(&local_qiov);
+
                     if (ret < 0) {
                         goto fail;
                     }
-- 
cgit v1.1


From f06ee3d4aa547df8d7d2317b2b6db7a88c1f3744 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Fri, 4 Jul 2014 17:11:28 +0200
Subject: qed: Make qiov match request size until backing file EOF

If a QED image has a shorter backing file and a read request to
unallocated clusters goes across EOF of the backing file, the backing
file sees a shortened request and the rest is filled with zeros.
However, the original too long qiov was used with the shortened request.

This patch makes the qiov size match the request size, avoiding a
potential buffer overflow in raw-posix.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
---
 block/qed.c | 38 ++++++++++++++++++++++++++++++--------
 block/qed.h |  1 +
 2 files changed, 31 insertions(+), 8 deletions(-)

diff --git a/block/qed.c b/block/qed.c
index b69374b..cd4872b 100644
--- a/block/qed.c
+++ b/block/qed.c
@@ -761,17 +761,19 @@ static BDRVQEDState *acb_to_s(QEDAIOCB *acb)
 /**
  * Read from the backing file or zero-fill if no backing file
  *
- * @s:          QED state
- * @pos:        Byte position in device
- * @qiov:       Destination I/O vector
- * @cb:         Completion function
- * @opaque:     User data for completion function
+ * @s:              QED state
+ * @pos:            Byte position in device
+ * @qiov:           Destination I/O vector
+ * @backing_qiov:   Possibly shortened copy of qiov, to be allocated here
+ * @cb:             Completion function
+ * @opaque:         User data for completion function
  *
  * This function reads qiov->size bytes starting at pos from the backing file.
  * If there is no backing file then zeroes are read.
  */
 static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
                                   QEMUIOVector *qiov,
+                                  QEMUIOVector **backing_qiov,
                                   BlockDriverCompletionFunc *cb, void *opaque)
 {
     uint64_t backing_length = 0;
@@ -804,15 +806,21 @@ static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos,
     /* If the read straddles the end of the backing file, shorten it */
     size = MIN((uint64_t)backing_length - pos, qiov->size);
 
+    assert(*backing_qiov == NULL);
+    *backing_qiov = g_new(QEMUIOVector, 1);
+    qemu_iovec_init(*backing_qiov, qiov->niov);
+    qemu_iovec_concat(*backing_qiov, qiov, 0, size);
+
     BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO);
     bdrv_aio_readv(s->bs->backing_hd, pos / BDRV_SECTOR_SIZE,
-                   qiov, size / BDRV_SECTOR_SIZE, cb, opaque);
+                   *backing_qiov, size / BDRV_SECTOR_SIZE, cb, opaque);
 }
 
 typedef struct {
     GenericCB gencb;
     BDRVQEDState *s;
     QEMUIOVector qiov;
+    QEMUIOVector *backing_qiov;
     struct iovec iov;
     uint64_t offset;
 } CopyFromBackingFileCB;
@@ -829,6 +837,12 @@ static void qed_copy_from_backing_file_write(void *opaque, int ret)
     CopyFromBackingFileCB *copy_cb = opaque;
     BDRVQEDState *s = copy_cb->s;
 
+    if (copy_cb->backing_qiov) {
+        qemu_iovec_destroy(copy_cb->backing_qiov);
+        g_free(copy_cb->backing_qiov);
+        copy_cb->backing_qiov = NULL;
+    }
+
     if (ret) {
         qed_copy_from_backing_file_cb(copy_cb, ret);
         return;
@@ -866,11 +880,12 @@ static void qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos,
     copy_cb = gencb_alloc(sizeof(*copy_cb), cb, opaque);
     copy_cb->s = s;
     copy_cb->offset = offset;
+    copy_cb->backing_qiov = NULL;
     copy_cb->iov.iov_base = qemu_blockalign(s->bs, len);
     copy_cb->iov.iov_len = len;
     qemu_iovec_init_external(&copy_cb->qiov, &copy_cb->iov, 1);
 
-    qed_read_backing_file(s, pos, &copy_cb->qiov,
+    qed_read_backing_file(s, pos, &copy_cb->qiov, &copy_cb->backing_qiov,
                           qed_copy_from_backing_file_write, copy_cb);
 }
 
@@ -1313,7 +1328,7 @@ static void qed_aio_read_data(void *opaque, int ret,
         return;
     } else if (ret != QED_CLUSTER_FOUND) {
         qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov,
-                              qed_aio_next_io, acb);
+                              &acb->backing_qiov, qed_aio_next_io, acb);
         return;
     }
 
@@ -1339,6 +1354,12 @@ static void qed_aio_next_io(void *opaque, int ret)
 
     trace_qed_aio_next_io(s, acb, ret, acb->cur_pos + acb->cur_qiov.size);
 
+    if (acb->backing_qiov) {
+        qemu_iovec_destroy(acb->backing_qiov);
+        g_free(acb->backing_qiov);
+        acb->backing_qiov = NULL;
+    }
+
     /* Handle I/O error */
     if (ret) {
         qed_aio_complete(acb, ret);
@@ -1378,6 +1399,7 @@ static BlockDriverAIOCB *qed_aio_setup(BlockDriverState *bs,
     acb->qiov_offset = 0;
     acb->cur_pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE;
     acb->end_pos = acb->cur_pos + nb_sectors * BDRV_SECTOR_SIZE;
+    acb->backing_qiov = NULL;
     acb->request.l2_table = NULL;
     qemu_iovec_init(&acb->cur_qiov, qiov->niov);
 
diff --git a/block/qed.h b/block/qed.h
index b024751..2b0e724 100644
--- a/block/qed.h
+++ b/block/qed.h
@@ -142,6 +142,7 @@ typedef struct QEDAIOCB {
 
     /* Current cluster scatter-gather list */
     QEMUIOVector cur_qiov;
+    QEMUIOVector *backing_qiov;
     uint64_t cur_pos;               /* position on block device, in bytes */
     uint64_t cur_cluster;           /* cluster offset in image file */
     unsigned int cur_nclusters;     /* number of clusters being accessed */
-- 
cgit v1.1


From 8eb029c26ecf61da6c2b45c3c23472e8c477ba34 Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Tue, 1 Jul 2014 16:09:54 +0200
Subject: block: Assert qiov length matches request length

At least raw-posix relies on this because it can allocate bounce buffers
based on the request length, but access it using all of the qiov entries
later.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Max Reitz <mreitz@redhat.com>
---
 block.c           |  2 ++
 block/raw-posix.c | 15 +++++++++++----
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/block.c b/block.c
index 0143268..3e252a2 100644
--- a/block.c
+++ b/block.c
@@ -3010,6 +3010,7 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs,
 
     assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
     assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+    assert(!qiov || bytes == qiov->size);
 
     /* Handle Copy on Read and associated serialisation */
     if (flags & BDRV_REQ_COPY_ON_READ) {
@@ -3279,6 +3280,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs,
 
     assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0);
     assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
+    assert(!qiov || bytes == qiov->size);
 
     waited = wait_serialising_requests(req);
     assert(!waited || !req->serialising);
diff --git a/block/raw-posix.c b/block/raw-posix.c
index a857def..2bcc73d 100644
--- a/block/raw-posix.c
+++ b/block/raw-posix.c
@@ -790,6 +790,7 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
             memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len);
             p += aiocb->aio_iov[i].iov_len;
         }
+        assert(p - buf == aiocb->aio_nbytes);
     }
 
     nbytes = handle_aiocb_rw_linear(aiocb, buf);
@@ -804,9 +805,11 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb)
                 copy = aiocb->aio_iov[i].iov_len;
             }
             memcpy(aiocb->aio_iov[i].iov_base, p, copy);
+            assert(count >= copy);
             p     += copy;
             count -= copy;
         }
+        assert(count == 0);
     }
     qemu_vfree(buf);
 
@@ -993,12 +996,14 @@ static int paio_submit_co(BlockDriverState *bs, int fd,
     acb->aio_type = type;
     acb->aio_fildes = fd;
 
+    acb->aio_nbytes = nb_sectors * BDRV_SECTOR_SIZE;
+    acb->aio_offset = sector_num * BDRV_SECTOR_SIZE;
+
     if (qiov) {
         acb->aio_iov = qiov->iov;
         acb->aio_niov = qiov->niov;
+        assert(qiov->size == acb->aio_nbytes);
     }
-    acb->aio_nbytes = nb_sectors * 512;
-    acb->aio_offset = sector_num * 512;
 
     trace_paio_submit_co(sector_num, nb_sectors, type);
     pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
@@ -1016,12 +1021,14 @@ static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd,
     acb->aio_type = type;
     acb->aio_fildes = fd;
 
+    acb->aio_nbytes = nb_sectors * BDRV_SECTOR_SIZE;
+    acb->aio_offset = sector_num * BDRV_SECTOR_SIZE;
+
     if (qiov) {
         acb->aio_iov = qiov->iov;
         acb->aio_niov = qiov->niov;
+        assert(qiov->size == acb->aio_nbytes);
     }
-    acb->aio_nbytes = nb_sectors * 512;
-    acb->aio_offset = sector_num * 512;
 
     trace_paio_submit(acb, opaque, sector_num, nb_sectors, type);
     pool = aio_get_thread_pool(bdrv_get_aio_context(bs));
-- 
cgit v1.1


From 0a21ea3289c5a3b982386e3eaaa37627c18f5e35 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Wed, 9 Jul 2014 10:05:46 +0200
Subject: virtio-blk: avoid dataplane VirtIOBlockReq early free

VirtIOBlockReq is freed later by virtio_blk_free_request() in
hw/block/virtio-blk.c.  Remove this extraneous g_slice_free().

This patch fixes the following segfault:

  0x00005555556373af in virtio_blk_rw_complete (opaque=0x5555565ff5e0, ret=0) at hw/block/virtio-blk.c:99
  99          bdrv_acct_done(req->dev->bs, &req->acct);
  (gdb) print req
  $1 = (VirtIOBlockReq *) 0x5555565ff5e0
  (gdb) print req->dev
  $2 = (VirtIOBlock *) 0x0
  (gdb) bt
  #0  0x00005555556373af in virtio_blk_rw_complete (opaque=0x5555565ff5e0, ret=0) at hw/block/virtio-blk.c:99
  #1  0x0000555555840ebe in bdrv_co_em_bh (opaque=0x5555566152d0) at block.c:4675
  #2  0x000055555583de77 in aio_bh_poll (ctx=ctx@entry=0x5555563a8150) at async.c:81
  #3  0x000055555584b7a7 in aio_poll (ctx=0x5555563a8150, blocking=blocking@entry=true) at aio-posix.c:188
  #4  0x00005555556e520e in iothread_run (opaque=0x5555563a7fd8) at iothread.c:41
  #5  0x00007ffff42ba124 in start_thread () from /usr/lib/libpthread.so.0
  #6  0x00007ffff16d14bd in clone () from /usr/lib/libc.so.6

Reported-by: Max Reitz <mreitz@redhat.com>
Cc: Fam Zheng <famz@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Tested-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/block/dataplane/virtio-blk.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index 4bc0729..bed9f13 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -68,7 +68,6 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status)
     vring_push(&req->dev->dataplane->vring, req->elem,
                req->qiov.size + sizeof(*req->in));
     notify_guest(req->dev->dataplane);
-    g_slice_free(VirtIOBlockReq, req);
 }
 
 static void handle_notify(EventNotifier *e)
-- 
cgit v1.1


From abd764250fbce6f285513d74f03eb5c526e520f6 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Wed, 9 Jul 2014 10:05:47 +0200
Subject: dataplane: do not free VirtQueueElement in vring_push()

VirtQueueElement is allocated in vring_pop() so it seems to make sense
that vring_push() should free it.  Alas, virtio-blk frees
VirtQueueElement itself in virtio_blk_free_request().

This patch solves a double-free assertion in glib's g_slice_free().

Rename vring_free_element() to vring_unmap_element() since it no longer
frees the VirtQueueElement.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Tested-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/virtio/dataplane/vring.c         | 9 ++++-----
 include/hw/virtio/dataplane/vring.h | 1 -
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/hw/virtio/dataplane/vring.c b/hw/virtio/dataplane/vring.c
index 665a1ff..5d17d39 100644
--- a/hw/virtio/dataplane/vring.c
+++ b/hw/virtio/dataplane/vring.c
@@ -272,7 +272,7 @@ static int get_indirect(Vring *vring, VirtQueueElement *elem,
     return 0;
 }
 
-void vring_free_element(VirtQueueElement *elem)
+static void vring_unmap_element(VirtQueueElement *elem)
 {
     int i;
 
@@ -287,8 +287,6 @@ void vring_free_element(VirtQueueElement *elem)
     for (i = 0; i < elem->in_num; i++) {
         vring_unmap(elem->in_sg[i].iov_base, true);
     }
-
-    g_slice_free(VirtQueueElement, elem);
 }
 
 /* This looks in the virtqueue and for the first available buffer, and converts
@@ -402,7 +400,8 @@ out:
         vring->broken = true;
     }
     if (elem) {
-        vring_free_element(elem);
+        vring_unmap_element(elem);
+        g_slice_free(VirtQueueElement, elem);
     }
     *p_elem = NULL;
     return ret;
@@ -418,7 +417,7 @@ void vring_push(Vring *vring, VirtQueueElement *elem, int len)
     unsigned int head = elem->index;
     uint16_t new;
 
-    vring_free_element(elem);
+    vring_unmap_element(elem);
 
     /* Don't touch vring if a fatal error occurred */
     if (vring->broken) {
diff --git a/include/hw/virtio/dataplane/vring.h b/include/hw/virtio/dataplane/vring.h
index 63e7bf4..b23edd2 100644
--- a/include/hw/virtio/dataplane/vring.h
+++ b/include/hw/virtio/dataplane/vring.h
@@ -55,6 +55,5 @@ bool vring_enable_notification(VirtIODevice *vdev, Vring *vring);
 bool vring_should_notify(VirtIODevice *vdev, Vring *vring);
 int vring_pop(VirtIODevice *vdev, Vring *vring, VirtQueueElement **elem);
 void vring_push(Vring *vring, VirtQueueElement *elem, int len);
-void vring_free_element(VirtQueueElement *elem);
 
 #endif /* VRING_H */
-- 
cgit v1.1


From 869d66af53d8e04709456c9cae5cca7c560d4b93 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Wed, 9 Jul 2014 10:05:48 +0200
Subject: virtio-blk: avoid g_slice_new0() for VirtIOBlockReq and
 VirtQueueElement

In commit de6c8042ec55da18702fa51f09072fcaa315edc3 ("virtio-blk: Avoid
zeroing every request structure") we avoided the 40 KB memset when
allocating VirtIOBlockReq.

The memset was reintroduced in commit
671ec3f056559f22a2531a91dce3a258b9b5eb8a ("virtio-blk: Convert
VirtIOBlockReq.elem to pointer").

It must be fixed again to avoid a performance regression.

Cc: Fam Zheng <famz@redhat.com>
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/block/virtio-blk.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index aec3146..b06a56d 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -31,9 +31,11 @@
 
 static VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
 {
-    VirtIOBlockReq *req = g_slice_new0(VirtIOBlockReq);
+    VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq);
     req->dev = s;
-    req->elem = g_slice_new0(VirtQueueElement);
+    req->qiov.size = 0;
+    req->next = NULL;
+    req->elem = g_slice_new(VirtQueueElement);
     return req;
 }
 
-- 
cgit v1.1


From f897bf751fbd95e4015b95d202c706548586813a Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Wed, 9 Jul 2014 10:05:49 +0200
Subject: virtio-blk: embed VirtQueueElement in VirtIOBlockReq

The memory allocation between hw/block/virtio-blk.c,
hw/block/dataplane/virtio-blk.c, and hw/virtio/dataplane/vring.c is
messy.  Structs are allocated in different files than they are freed in.
This is risky and makes memory leaks easier.

Embed VirtQueueElement in VirtIOBlockReq to reduce the amount of memory
allocation we need to juggle.  This also makes vring.c and virtio.c
slightly more similar.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/block/dataplane/virtio-blk.c     | 29 +++++++++++------------
 hw/block/virtio-blk.c               | 46 ++++++++++++++++++-------------------
 hw/virtio/dataplane/vring.c         | 17 +++++---------
 include/hw/virtio/dataplane/vring.h |  2 +-
 include/hw/virtio/virtio-blk.h      |  6 ++++-
 5 files changed, 48 insertions(+), 52 deletions(-)

diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c
index bed9f13..227bb15 100644
--- a/hw/block/dataplane/virtio-blk.c
+++ b/hw/block/dataplane/virtio-blk.c
@@ -65,7 +65,7 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status)
 {
     stb_p(&req->in->status, status);
 
-    vring_push(&req->dev->dataplane->vring, req->elem,
+    vring_push(&req->dev->dataplane->vring, &req->elem,
                req->qiov.size + sizeof(*req->in));
     notify_guest(req->dev->dataplane);
 }
@@ -74,33 +74,32 @@ static void handle_notify(EventNotifier *e)
 {
     VirtIOBlockDataPlane *s = container_of(e, VirtIOBlockDataPlane,
                                            host_notifier);
-
-    VirtQueueElement *elem;
-    VirtIOBlockReq *req;
-    int ret;
-    MultiReqBuffer mrb = {
-        .num_writes = 0,
-    };
+    VirtIOBlock *vblk = VIRTIO_BLK(s->vdev);
 
     event_notifier_test_and_clear(&s->host_notifier);
     bdrv_io_plug(s->blk->conf.bs);
     for (;;) {
+        MultiReqBuffer mrb = {
+            .num_writes = 0,
+        };
+        int ret;
+
         /* Disable guest->host notifies to avoid unnecessary vmexits */
         vring_disable_notification(s->vdev, &s->vring);
 
         for (;;) {
-            ret = vring_pop(s->vdev, &s->vring, &elem);
+            VirtIOBlockReq *req = virtio_blk_alloc_request(vblk);
+
+            ret = vring_pop(s->vdev, &s->vring, &req->elem);
             if (ret < 0) {
-                assert(elem == NULL);
+                virtio_blk_free_request(req);
                 break; /* no more requests */
             }
 
-            trace_virtio_blk_data_plane_process_request(s, elem->out_num,
-                                                        elem->in_num, elem->index);
+            trace_virtio_blk_data_plane_process_request(s, req->elem.out_num,
+                                                        req->elem.in_num,
+                                                        req->elem.index);
 
-            req = g_slice_new(VirtIOBlockReq);
-            req->dev = VIRTIO_BLK(s->vdev);
-            req->elem = elem;
             virtio_blk_handle_request(req, &mrb);
         }
 
diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index b06a56d..02cd6b0 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -29,20 +29,18 @@
 #include "hw/virtio/virtio-bus.h"
 #include "hw/virtio/virtio-access.h"
 
-static VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
+VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s)
 {
     VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq);
     req->dev = s;
     req->qiov.size = 0;
     req->next = NULL;
-    req->elem = g_slice_new(VirtQueueElement);
     return req;
 }
 
-static void virtio_blk_free_request(VirtIOBlockReq *req)
+void virtio_blk_free_request(VirtIOBlockReq *req)
 {
     if (req) {
-        g_slice_free(VirtQueueElement, req->elem);
         g_slice_free(VirtIOBlockReq, req);
     }
 }
@@ -56,7 +54,7 @@ static void virtio_blk_complete_request(VirtIOBlockReq *req,
     trace_virtio_blk_req_complete(req, status);
 
     stb_p(&req->in->status, status);
-    virtqueue_push(s->vq, req->elem, req->qiov.size + sizeof(*req->in));
+    virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in));
     virtio_notify(vdev, s->vq);
 }
 
@@ -121,7 +119,7 @@ static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s)
 {
     VirtIOBlockReq *req = virtio_blk_alloc_request(s);
 
-    if (!virtqueue_pop(s->vq, req->elem)) {
+    if (!virtqueue_pop(s->vq, &req->elem)) {
         virtio_blk_free_request(req);
         return NULL;
     }
@@ -254,7 +252,7 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req)
 {
     int status;
 
-    status = virtio_blk_handle_scsi_req(req->dev, req->elem);
+    status = virtio_blk_handle_scsi_req(req->dev, &req->elem);
     virtio_blk_req_complete(req, status);
     virtio_blk_free_request(req);
 }
@@ -351,12 +349,12 @@ static void virtio_blk_handle_read(VirtIOBlockReq *req)
 void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
 {
     uint32_t type;
-    struct iovec *in_iov = req->elem->in_sg;
-    struct iovec *iov = req->elem->out_sg;
-    unsigned in_num = req->elem->in_num;
-    unsigned out_num = req->elem->out_num;
+    struct iovec *in_iov = req->elem.in_sg;
+    struct iovec *iov = req->elem.out_sg;
+    unsigned in_num = req->elem.in_num;
+    unsigned out_num = req->elem.out_num;
 
-    if (req->elem->out_num < 1 || req->elem->in_num < 1) {
+    if (req->elem.out_num < 1 || req->elem.in_num < 1) {
         error_report("virtio-blk missing headers");
         exit(1);
     }
@@ -393,19 +391,19 @@ void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb)
          * NB: per existing s/n string convention the string is
          * terminated by '\0' only when shorter than buffer.
          */
-        strncpy(req->elem->in_sg[0].iov_base,
+        strncpy(req->elem.in_sg[0].iov_base,
                 s->blk.serial ? s->blk.serial : "",
-                MIN(req->elem->in_sg[0].iov_len, VIRTIO_BLK_ID_BYTES));
+                MIN(req->elem.in_sg[0].iov_len, VIRTIO_BLK_ID_BYTES));
         virtio_blk_req_complete(req, VIRTIO_BLK_S_OK);
         virtio_blk_free_request(req);
     } else if (type & VIRTIO_BLK_T_OUT) {
-        qemu_iovec_init_external(&req->qiov, &req->elem->out_sg[1],
-                                 req->elem->out_num - 1);
+        qemu_iovec_init_external(&req->qiov, &req->elem.out_sg[1],
+                                 req->elem.out_num - 1);
         virtio_blk_handle_write(req, mrb);
     } else if (type == VIRTIO_BLK_T_IN || type == VIRTIO_BLK_T_BARRIER) {
         /* VIRTIO_BLK_T_IN is 0, so we can't just & it. */
-        qemu_iovec_init_external(&req->qiov, &req->elem->in_sg[0],
-                                 req->elem->in_num - 1);
+        qemu_iovec_init_external(&req->qiov, &req->elem.in_sg[0],
+                                 req->elem.in_num - 1);
         virtio_blk_handle_read(req);
     } else {
         virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP);
@@ -629,7 +627,7 @@ static void virtio_blk_save_device(VirtIODevice *vdev, QEMUFile *f)
 
     while (req) {
         qemu_put_sbyte(f, 1);
-        qemu_put_buffer(f, (unsigned char *)req->elem,
+        qemu_put_buffer(f, (unsigned char *)&req->elem,
                         sizeof(VirtQueueElement));
         req = req->next;
     }
@@ -654,15 +652,15 @@ static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f,
 
     while (qemu_get_sbyte(f)) {
         VirtIOBlockReq *req = virtio_blk_alloc_request(s);
-        qemu_get_buffer(f, (unsigned char *)req->elem,
+        qemu_get_buffer(f, (unsigned char *)&req->elem,
                         sizeof(VirtQueueElement));
         req->next = s->rq;
         s->rq = req;
 
-        virtqueue_map_sg(req->elem->in_sg, req->elem->in_addr,
-            req->elem->in_num, 1);
-        virtqueue_map_sg(req->elem->out_sg, req->elem->out_addr,
-            req->elem->out_num, 0);
+        virtqueue_map_sg(req->elem.in_sg, req->elem.in_addr,
+            req->elem.in_num, 1);
+        virtqueue_map_sg(req->elem.out_sg, req->elem.out_addr,
+            req->elem.out_num, 0);
     }
 
     return 0;
diff --git a/hw/virtio/dataplane/vring.c b/hw/virtio/dataplane/vring.c
index 5d17d39..67cb2b8 100644
--- a/hw/virtio/dataplane/vring.c
+++ b/hw/virtio/dataplane/vring.c
@@ -301,14 +301,16 @@ static void vring_unmap_element(VirtQueueElement *elem)
  * Stolen from linux/drivers/vhost/vhost.c.
  */
 int vring_pop(VirtIODevice *vdev, Vring *vring,
-              VirtQueueElement **p_elem)
+              VirtQueueElement *elem)
 {
     struct vring_desc desc;
     unsigned int i, head, found = 0, num = vring->vr.num;
     uint16_t avail_idx, last_avail_idx;
-    VirtQueueElement *elem = NULL;
     int ret;
 
+    /* Initialize elem so it can be safely unmapped */
+    elem->in_num = elem->out_num = 0;
+
     /* If there was a fatal error then refuse operation */
     if (vring->broken) {
         ret = -EFAULT;
@@ -340,10 +342,8 @@ int vring_pop(VirtIODevice *vdev, Vring *vring,
      * the index we've seen. */
     head = vring->vr.avail->ring[last_avail_idx % num];
 
-    elem = g_slice_new(VirtQueueElement);
     elem->index = head;
-    elem->in_num = elem->out_num = 0;
-    
+
     /* If their number is silly, that's an error. */
     if (unlikely(head >= num)) {
         error_report("Guest says index %u > %u is available", head, num);
@@ -391,7 +391,6 @@ int vring_pop(VirtIODevice *vdev, Vring *vring,
 
     /* On success, increment avail index. */
     vring->last_avail_idx++;
-    *p_elem = elem;
     return head;
 
 out:
@@ -399,11 +398,7 @@ out:
     if (ret == -EFAULT) {
         vring->broken = true;
     }
-    if (elem) {
-        vring_unmap_element(elem);
-        g_slice_free(VirtQueueElement, elem);
-    }
-    *p_elem = NULL;
+    vring_unmap_element(elem);
     return ret;
 }
 
diff --git a/include/hw/virtio/dataplane/vring.h b/include/hw/virtio/dataplane/vring.h
index b23edd2..af73ee2 100644
--- a/include/hw/virtio/dataplane/vring.h
+++ b/include/hw/virtio/dataplane/vring.h
@@ -53,7 +53,7 @@ void vring_teardown(Vring *vring, VirtIODevice *vdev, int n);
 void vring_disable_notification(VirtIODevice *vdev, Vring *vring);
 bool vring_enable_notification(VirtIODevice *vdev, Vring *vring);
 bool vring_should_notify(VirtIODevice *vdev, Vring *vring);
-int vring_pop(VirtIODevice *vdev, Vring *vring, VirtQueueElement **elem);
+int vring_pop(VirtIODevice *vdev, Vring *vring, VirtQueueElement *elem);
 void vring_push(Vring *vring, VirtQueueElement *elem, int len);
 
 #endif /* VRING_H */
diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h
index b3080a2..afb7b8d 100644
--- a/include/hw/virtio/virtio-blk.h
+++ b/include/hw/virtio/virtio-blk.h
@@ -144,7 +144,7 @@ typedef struct MultiReqBuffer {
 
 typedef struct VirtIOBlockReq {
     VirtIOBlock *dev;
-    VirtQueueElement *elem;
+    VirtQueueElement elem;
     struct virtio_blk_inhdr *in;
     struct virtio_blk_outhdr out;
     QEMUIOVector qiov;
@@ -152,6 +152,10 @@ typedef struct VirtIOBlockReq {
     BlockAcctCookie acct;
 } VirtIOBlockReq;
 
+VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s);
+
+void virtio_blk_free_request(VirtIOBlockReq *req);
+
 int virtio_blk_handle_scsi_req(VirtIOBlock *blk,
                                VirtQueueElement *elem);
 
-- 
cgit v1.1


From acfb23ad3dd8d0ab385a10e483776ba7dcf927ad Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 9 Jul 2014 10:49:46 +0200
Subject: AioContext: do not rely on aio_poll(ctx, true) result to end a loop

Currently, whenever aio_poll(ctx, true) has completed all pending
work it returns true *and* the next call to aio_poll(ctx, true)
will not block.

This invariant has its roots in qemu_aio_flush()'s implementation
as "while (qemu_aio_wait()) {}".  However, qemu_aio_flush() does
not exist anymore and bdrv_drain_all() is implemented differently;
and this invariant is complicated to maintain and subtly different
from the return value of GMainLoop's g_main_context_iteration.

All calls to aio_poll(ctx, true) except one are guarded by a
while() loop checking for a request to be incomplete, or a
BlockDriverState to be idle.  The one remaining call (in
iothread.c) uses this to delay the aio_context_release/acquire
pair until the AioContext is quiescent, however:

- we can do the same just by using non-blocking aio_poll,
  similar to how vl.c invokes main_loop_wait

- it is buggy, because it does not ensure that the AioContext
  is released between an aio_notify and the next time the
  iothread goes to sleep.  This leads to hangs when stopping
  the dataplane thread.

In the end, these semantics are a bad match for the current
users of AioContext.  So modify that one exception in iothread.c,
which also fixes the hangs, as well as the testcase so that
it use the same idiom as the actual QEMU code.

Reported-by: Christian Borntraeger <borntraeger@de.ibm.com>
Tested-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 include/block/aio.h |  6 +++---
 iothread.c          |  5 ++++-
 tests/test-aio.c    | 25 +++++++++++++------------
 3 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/include/block/aio.h b/include/block/aio.h
index 433e7ff..c23de3c 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -214,9 +214,9 @@ bool aio_pending(AioContext *ctx);
 /* Progress in completing AIO work to occur.  This can issue new pending
  * aio as a result of executing I/O completion or bh callbacks.
  *
- * If there is no pending AIO operation or completion (bottom half),
- * return false.  If there are pending AIO operations of bottom halves,
- * return true.
+ * Return whether any progress was made by executing AIO or bottom half
+ * handlers.  If @blocking == true, this should always be true except
+ * if someone called aio_notify.
  *
  * If there are no pending bottom halves, but there are pending AIO
  * operations, it may not be possible to make any progress without
diff --git a/iothread.c b/iothread.c
index 1fbf9f1..d9403cf 100644
--- a/iothread.c
+++ b/iothread.c
@@ -30,6 +30,7 @@ typedef ObjectClass IOThreadClass;
 static void *iothread_run(void *opaque)
 {
     IOThread *iothread = opaque;
+    bool blocking;
 
     qemu_mutex_lock(&iothread->init_done_lock);
     iothread->thread_id = qemu_get_thread_id();
@@ -38,8 +39,10 @@ static void *iothread_run(void *opaque)
 
     while (!iothread->stopping) {
         aio_context_acquire(iothread->ctx);
-        while (!iothread->stopping && aio_poll(iothread->ctx, true)) {
+        blocking = true;
+        while (!iothread->stopping && aio_poll(iothread->ctx, blocking)) {
             /* Progress was made, keep going */
+            blocking = false;
         }
         aio_context_release(iothread->ctx);
     }
diff --git a/tests/test-aio.c b/tests/test-aio.c
index 264dab9..4c40a49 100644
--- a/tests/test-aio.c
+++ b/tests/test-aio.c
@@ -24,14 +24,6 @@ typedef struct {
     bool auto_set;
 } EventNotifierTestData;
 
-/* Wait until there are no more BHs or AIO requests */
-static void wait_for_aio(void)
-{
-    while (aio_poll(ctx, true)) {
-        /* Do nothing */
-    }
-}
-
 /* Wait until event notifier becomes inactive */
 static void wait_until_inactive(EventNotifierTestData *data)
 {
@@ -204,7 +196,9 @@ static void test_bh_schedule10(void)
     g_assert(aio_poll(ctx, true));
     g_assert_cmpint(data.n, ==, 2);
 
-    wait_for_aio();
+    while (data.n < 10) {
+        aio_poll(ctx, true);
+    }
     g_assert_cmpint(data.n, ==, 10);
 
     g_assert(!aio_poll(ctx, false));
@@ -252,7 +246,9 @@ static void test_bh_delete_from_cb(void)
     qemu_bh_schedule(data1.bh);
     g_assert_cmpint(data1.n, ==, 0);
 
-    wait_for_aio();
+    while (data1.n < data1.max) {
+        aio_poll(ctx, true);
+    }
     g_assert_cmpint(data1.n, ==, data1.max);
     g_assert(data1.bh == NULL);
 
@@ -287,7 +283,12 @@ static void test_bh_delete_from_cb_many(void)
     g_assert_cmpint(data4.n, ==, 1);
     g_assert(data1.bh == NULL);
 
-    wait_for_aio();
+    while (data1.n < data1.max ||
+           data2.n < data2.max ||
+           data3.n < data3.max ||
+           data4.n < data4.max) {
+        aio_poll(ctx, true);
+    }
     g_assert_cmpint(data1.n, ==, data1.max);
     g_assert_cmpint(data2.n, ==, data2.max);
     g_assert_cmpint(data3.n, ==, data3.max);
@@ -306,7 +307,7 @@ static void test_bh_flush(void)
     qemu_bh_schedule(data.bh);
     g_assert_cmpint(data.n, ==, 0);
 
-    wait_for_aio();
+    g_assert(aio_poll(ctx, true));
     g_assert_cmpint(data.n, ==, 1);
 
     g_assert(!aio_poll(ctx, false));
-- 
cgit v1.1


From b8864245b140c801e826de4acf6d6516e2c550b2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Andreas=20F=C3=A4rber?= <afaerber@suse.de>
Date: Wed, 9 Jul 2014 22:28:49 +0200
Subject: tests: Fix unterminated string output visitor enum human string
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The buffer was being allocated of size string length plus two.
Around the string two quotes were being added, but no terminating NUL.
It was then compared using g_assert_cmpstr(), resulting in fairly random
assertion failures:

 ERROR:tests/test-string-output-visitor.c:213:test_visitor_out_enum: assertion failed (str == str_human): ("\"value1\"" == "\"value1\"\001EEEEEEEEEEEEEE\0171")

There is no g_assert_cmpnstr() counterpart, so use g_strdup_printf()
for safely assembling the string in the first place.

Cc: Hu Tao <hutao@cn.fujitsu.com>
Cc: Michael S. Tsirkin <mst@redhat.com>
Suggested-by: Eric Blake <eblake@redhat.com>
Fixes: b4900c0 tests: add human format test for string output visitor
Signed-off-by: Andreas Färber <afaerber@suse.de>
Reviewed-by: Eric Blake <eblake@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 tests/test-string-output-visitor.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/tests/test-string-output-visitor.c b/tests/test-string-output-visitor.c
index e89e43c..101fb27 100644
--- a/tests/test-string-output-visitor.c
+++ b/tests/test-string-output-visitor.c
@@ -196,16 +196,11 @@ static void test_visitor_out_enum(TestOutputVisitorData *data,
 
     for (i = 0; i < ENUM_ONE_MAX; i++) {
         char *str_human;
-        int len;
 
         visit_type_EnumOne(data->ov, &i, "unused", &err);
         g_assert(!err);
 
-        len = strlen(EnumOne_lookup[i]) + 2;
-        str_human = g_malloc0(len);
-        str_human[0] = '"';
-        strncpy(str_human + 1, EnumOne_lookup[i], strlen(EnumOne_lookup[i]));
-        str_human[len - 1] = '"';
+        str_human = g_strdup_printf("\"%s\"", EnumOne_lookup[i]);
 
         str = string_output_get_string(data->sov);
         g_assert(str != NULL);
-- 
cgit v1.1


From 80504dcaa11097b99842312014f5954a54fe1826 Mon Sep 17 00:00:00 2001
From: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
Date: Wed, 9 Jul 2014 18:06:32 +0300
Subject: qtest: fix vhost-user-test compilation with old GLib

Mising G_TIME_SPAN_SECOND definition breaks the RHEL6 compilation as GLib
version before 2.26 does not have it. In such case just define it.

Reported-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Nikolay Nikolaev <n.nikolaev@virtualopensystems.com>
Tested-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 tests/vhost-user-test.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c
index 2af2381..406ba70 100644
--- a/tests/vhost-user-test.c
+++ b/tests/vhost-user-test.c
@@ -22,6 +22,10 @@
 #include <qemu/sockets.h>
 
 /* GLIB version compatibility flags */
+#if !GLIB_CHECK_VERSION(2, 26, 0)
+#define G_TIME_SPAN_SECOND              (G_GINT64_CONSTANT(1000000))
+#endif
+
 #if GLIB_CHECK_VERSION(2, 28, 0)
 #define HAVE_MONOTONIC_TIME
 #endif
-- 
cgit v1.1


From 58f423fbd5f7f435e8dc56dfa565b043d20d1e1b Mon Sep 17 00:00:00 2001
From: Kevin Wolf <kwolf@redhat.com>
Date: Wed, 9 Jul 2014 19:17:30 +0200
Subject: dma-helpers: Fix too long qiov

If the size of the scatter/gather list isn't a multiple of 512, the
number of sectors for the block layer request is rounded down, resulting
in a qiov that doesn't match the request length. Truncate the qiov to the
new length of the request.

This fixes the IDE qtest case /x86_64/ide/bmdma/short_prdt.

Signed-off-by: Kevin Wolf <kwolf@redhat.com>
Reviewed-by: Eric Blake <eblake@redhat.com>
---
 dma-helpers.c         |  4 ++++
 include/qemu-common.h |  1 +
 util/iov.c            | 13 +++++++++++++
 3 files changed, 18 insertions(+)

diff --git a/dma-helpers.c b/dma-helpers.c
index 53cbe92..499b52b 100644
--- a/dma-helpers.c
+++ b/dma-helpers.c
@@ -170,6 +170,10 @@ static void dma_bdrv_cb(void *opaque, int ret)
         return;
     }
 
+    if (dbs->iov.size & ~BDRV_SECTOR_MASK) {
+        qemu_iovec_discard_back(&dbs->iov, dbs->iov.size & ~BDRV_SECTOR_MASK);
+    }
+
     dbs->acb = dbs->io_func(dbs->bs, dbs->sector_num, &dbs->iov,
                             dbs->iov.size / 512, dma_bdrv_cb, dbs);
     assert(dbs->acb);
diff --git a/include/qemu-common.h b/include/qemu-common.h
index ae76197..6ef8282 100644
--- a/include/qemu-common.h
+++ b/include/qemu-common.h
@@ -329,6 +329,7 @@ size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset,
                          int fillc, size_t bytes);
 ssize_t qemu_iovec_compare(QEMUIOVector *a, QEMUIOVector *b);
 void qemu_iovec_clone(QEMUIOVector *dest, const QEMUIOVector *src, void *buf);
+void qemu_iovec_discard_back(QEMUIOVector *qiov, size_t bytes);
 
 bool buffer_is_zero(const void *buf, size_t len);
 
diff --git a/util/iov.c b/util/iov.c
index 2b4f46d..24566c8 100644
--- a/util/iov.c
+++ b/util/iov.c
@@ -550,3 +550,16 @@ size_t iov_discard_back(struct iovec *iov, unsigned int *iov_cnt,
 
     return total;
 }
+
+void qemu_iovec_discard_back(QEMUIOVector *qiov, size_t bytes)
+{
+    size_t total;
+    unsigned int niov = qiov->niov;
+
+    assert(qiov->size >= bytes);
+    total = iov_discard_back(qiov->iov, &niov, bytes);
+    assert(total == bytes);
+
+    qiov->niov = niov;
+    qiov->size -= bytes;
+}
-- 
cgit v1.1


From d0e14376eefc40b07c8fb42c132c2202c66dcb0b Mon Sep 17 00:00:00 2001
From: Markus Armbruster <armbru@redhat.com>
Date: Wed, 9 Jul 2014 19:07:29 +0200
Subject: virtio-blk: Factor common checks out of
 virtio_blk_handle_read/write()

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/block/virtio-blk.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 02cd6b0..075e6e6 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -288,6 +288,18 @@ static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb)
     bdrv_aio_flush(req->dev->bs, virtio_blk_flush_complete, req);
 }
 
+static bool virtio_blk_sect_range_ok(VirtIOBlock *dev,
+                                     uint64_t sector, size_t size)
+{
+    if (sector & dev->sector_mask) {
+        return false;
+    }
+    if (size % dev->conf->logical_block_size) {
+        return false;
+    }
+    return true;
+}
+
 static void virtio_blk_handle_write(VirtIOBlockReq *req, MultiReqBuffer *mrb)
 {
     BlockRequest *blkreq;
@@ -299,11 +311,7 @@ static void virtio_blk_handle_write(VirtIOBlockReq *req, MultiReqBuffer *mrb)
 
     trace_virtio_blk_handle_write(req, sector, req->qiov.size / 512);
 
-    if (sector & req->dev->sector_mask) {
-        virtio_blk_rw_complete(req, -EIO);
-        return;
-    }
-    if (req->qiov.size % req->dev->conf->logical_block_size) {
+    if (!virtio_blk_sect_range_ok(req->dev, sector, req->qiov.size)) {
         virtio_blk_rw_complete(req, -EIO);
         return;
     }
@@ -333,11 +341,7 @@ static void virtio_blk_handle_read(VirtIOBlockReq *req)
 
     trace_virtio_blk_handle_read(req, sector, req->qiov.size / 512);
 
-    if (sector & req->dev->sector_mask) {
-        virtio_blk_rw_complete(req, -EIO);
-        return;
-    }
-    if (req->qiov.size % req->dev->conf->logical_block_size) {
+    if (!virtio_blk_sect_range_ok(req->dev, sector, req->qiov.size)) {
         virtio_blk_rw_complete(req, -EIO);
         return;
     }
-- 
cgit v1.1


From 42e38c1fd0199155d32f3464aedce282d3d7f6a1 Mon Sep 17 00:00:00 2001
From: Markus Armbruster <armbru@redhat.com>
Date: Wed, 9 Jul 2014 19:07:30 +0200
Subject: virtio-blk: Bypass error action and I/O accounting on invalid r/w

When a device model's I/O operation fails, we execute the error
action.  This lets layers above QEMU implement thin provisioning, or
attempt to correct errors before they reach the guest.  But when the
I/O operation fails because it's invalid, reporting the error to the
guest is the only sensible action.

If the guest's read or write asks for an invalid sector range, fail
the request right away, without considering the error action.  No
change with error action BDRV_ACTION_REPORT.

Furthermore, bypass I/O accounting, because we want to track only I/O
that actually reaches the block layer.

The next commit will extend "invalid sector range" to cover attempts
to read/write beyond the end of the medium.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/block/virtio-blk.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index 075e6e6..e6e6276 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -307,15 +307,16 @@ static void virtio_blk_handle_write(VirtIOBlockReq *req, MultiReqBuffer *mrb)
 
     sector = virtio_ldq_p(VIRTIO_DEVICE(req->dev), &req->out.sector);
 
-    bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_WRITE);
-
     trace_virtio_blk_handle_write(req, sector, req->qiov.size / 512);
 
     if (!virtio_blk_sect_range_ok(req->dev, sector, req->qiov.size)) {
-        virtio_blk_rw_complete(req, -EIO);
+        virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
+        virtio_blk_free_request(req);
         return;
     }
 
+    bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_WRITE);
+
     if (mrb->num_writes == 32) {
         virtio_submit_multiwrite(req->dev->bs, mrb);
     }
@@ -337,14 +338,15 @@ static void virtio_blk_handle_read(VirtIOBlockReq *req)
 
     sector = virtio_ldq_p(VIRTIO_DEVICE(req->dev), &req->out.sector);
 
-    bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_READ);
-
     trace_virtio_blk_handle_read(req, sector, req->qiov.size / 512);
 
     if (!virtio_blk_sect_range_ok(req->dev, sector, req->qiov.size)) {
-        virtio_blk_rw_complete(req, -EIO);
+        virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR);
+        virtio_blk_free_request(req);
         return;
     }
+
+    bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_READ);
     bdrv_aio_readv(req->dev->bs, sector, &req->qiov,
                    req->qiov.size / BDRV_SECTOR_SIZE,
                    virtio_blk_rw_complete, req);
-- 
cgit v1.1


From 3c2daac0b98952a858277878cb11294256b39e43 Mon Sep 17 00:00:00 2001
From: Markus Armbruster <armbru@redhat.com>
Date: Wed, 9 Jul 2014 19:07:31 +0200
Subject: virtio-blk: Treat read/write beyond end as invalid

The block layer fails such reads and writes just fine.  However, they
then get treated like valid operations that fail: the error action
gets executed.  Unwanted; reporting the error to the guest is the only
sensible action.

Reject them before passing them to the block layer.  This bypasses the
error action and I/O accounting.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/block/virtio-blk.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c
index e6e6276..c241c50 100644
--- a/hw/block/virtio-blk.c
+++ b/hw/block/virtio-blk.c
@@ -291,12 +291,19 @@ static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb)
 static bool virtio_blk_sect_range_ok(VirtIOBlock *dev,
                                      uint64_t sector, size_t size)
 {
+    uint64_t nb_sectors = size >> BDRV_SECTOR_BITS;
+    uint64_t total_sectors;
+
     if (sector & dev->sector_mask) {
         return false;
     }
     if (size % dev->conf->logical_block_size) {
         return false;
     }
+    bdrv_get_geometry(dev->bs, &total_sectors);
+    if (sector > total_sectors || nb_sectors > total_sectors - sector) {
+        return false;
+    }
     return true;
 }
 
-- 
cgit v1.1


From 58ac321135af890b503ebe56d0d00e184779918f Mon Sep 17 00:00:00 2001
From: Markus Armbruster <armbru@redhat.com>
Date: Wed, 9 Jul 2014 19:07:32 +0200
Subject: ide: Treat read/write beyond end as invalid

The block layer fails such reads and writes just fine.  However, they
then get treated like valid operations that fail: the error action
gets executed.  Unwanted; reporting the error to the guest is the only
sensible action.

Reject them before passing them to the block layer.  This bypasses the
error action and I/O accounting.  Not quite correct for DMA, because
DMA can fail after some success, and when that happens, the part that
succeeded isn't counted.  Tolerable, because I/O accounting is an
inconsistent mess anyway.

Signed-off-by: Markus Armbruster <armbru@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
---
 hw/ide/core.c | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/hw/ide/core.c b/hw/ide/core.c
index 3a38f1e..db191a6 100644
--- a/hw/ide/core.c
+++ b/hw/ide/core.c
@@ -499,6 +499,18 @@ static void ide_rw_error(IDEState *s) {
     ide_set_irq(s->bus);
 }
 
+static bool ide_sect_range_ok(IDEState *s,
+                              uint64_t sector, uint64_t nb_sectors)
+{
+    uint64_t total_sectors;
+
+    bdrv_get_geometry(s->bs, &total_sectors);
+    if (sector > total_sectors || nb_sectors > total_sectors - sector) {
+        return false;
+    }
+    return true;
+}
+
 static void ide_sector_read_cb(void *opaque, int ret)
 {
     IDEState *s = opaque;
@@ -554,6 +566,11 @@ void ide_sector_read(IDEState *s)
     printf("sector=%" PRId64 "\n", sector_num);
 #endif
 
+    if (!ide_sect_range_ok(s, sector_num, n)) {
+        ide_rw_error(s);
+        return;
+    }
+
     s->iov.iov_base = s->io_buffer;
     s->iov.iov_len  = n * BDRV_SECTOR_SIZE;
     qemu_iovec_init_external(&s->qiov, &s->iov, 1);
@@ -671,6 +688,12 @@ void ide_dma_cb(void *opaque, int ret)
            sector_num, n, s->dma_cmd);
 #endif
 
+    if (!ide_sect_range_ok(s, sector_num, n)) {
+        dma_buf_commit(s);
+        ide_dma_error(s);
+        return;
+    }
+
     switch (s->dma_cmd) {
     case IDE_DMA_READ:
         s->bus->dma->aiocb = dma_bdrv_read(s->bs, &s->sg, sector_num,
@@ -790,6 +813,11 @@ void ide_sector_write(IDEState *s)
         n = s->req_nb_sectors;
     }
 
+    if (!ide_sect_range_ok(s, sector_num, n)) {
+        ide_rw_error(s);
+        return;
+    }
+
     s->iov.iov_base = s->io_buffer;
     s->iov.iov_len  = n * BDRV_SECTOR_SIZE;
     qemu_iovec_init_external(&s->qiov, &s->iov, 1);
-- 
cgit v1.1