From d40593dd9033d39a5e4cc32915c5eb28f3e858b6 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 7 Jul 2014 16:38:58 +0200 Subject: block/backup: Fix hang for unaligned image size When doing a block backup of an image with an unaligned size (with respect to the BACKUP_CLUSTER_SIZE), qemu would check the allocation status of sectors after the end of the image. bdrv_is_allocated() returns a result that is valid for 0 sectors in this case, so the backup job ran into an endless loop. Stop looping when seeing a result valid for 0 sectors, we're at EOF then. The test case looks somewhat unrelated at first sight because I originally tried to reproduce a different suspected bug that turned out to not exist. Still a good test case and it accidentally found this one. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake --- block/backup.c | 2 +- tests/qemu-iotests/028 | 27 ++++- tests/qemu-iotests/028.out | 269 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 296 insertions(+), 2 deletions(-) diff --git a/block/backup.c b/block/backup.c index 7978ae2..d0b0225 100644 --- a/block/backup.c +++ b/block/backup.c @@ -307,7 +307,7 @@ static void coroutine_fn backup_run(void *opaque) BACKUP_SECTORS_PER_CLUSTER - i, &n); i += n; - if (alloced == 1) { + if (alloced == 1 || n == 0) { break; } } diff --git a/tests/qemu-iotests/028 b/tests/qemu-iotests/028 index a99e4fa..d5718c5 100755 --- a/tests/qemu-iotests/028 +++ b/tests/qemu-iotests/028 @@ -33,7 +33,8 @@ status=1 # failure is the default! _cleanup() { - _cleanup_test_img + rm -f "${TEST_IMG}.copy" + _cleanup_test_img } trap "_cleanup; exit \$status" 0 1 2 3 15 @@ -41,6 +42,7 @@ trap "_cleanup; exit \$status" 0 1 2 3 15 . ./common.rc . ./common.filter . ./common.pattern +. ./common.qemu # Any format supporting backing files except vmdk and qcow which do not support # smaller backing files. @@ -99,6 +101,29 @@ _check_test_img # Rebase it on top of its base image $QEMU_IMG rebase -b "$TEST_IMG.base" "$TEST_IMG" +echo +echo block-backup +echo + +qemu_comm_method="monitor" +_launch_qemu -drive file="${TEST_IMG}",cache=${CACHEMODE},id=disk +h=$QEMU_HANDLE +QEMU_COMM_TIMEOUT=1 + +_send_qemu_cmd $h "drive_backup disk ${TEST_IMG}.copy" "(qemu)" +qemu_cmd_repeat=20 _send_qemu_cmd $h "info block-jobs" "No active jobs" +_send_qemu_cmd $h 'quit' "" + +# Base image sectors +TEST_IMG="${TEST_IMG}.copy" io readv $(( offset )) 512 1024 32 + +# Image sectors +TEST_IMG="${TEST_IMG}.copy" io readv $(( offset + 512 )) 512 1024 64 + +# Zero sectors beyond end of base image +TEST_IMG="${TEST_IMG}.copy" io_zero readv $(( offset + 32 * 1024 )) 512 1024 32 + + _check_test_img # success, all done diff --git a/tests/qemu-iotests/028.out b/tests/qemu-iotests/028.out index 8affb7f..38099e4 100644 --- a/tests/qemu-iotests/028.out +++ b/tests/qemu-iotests/028.out @@ -465,5 +465,274 @@ read 512/512 bytes at offset 3221257728 read 512/512 bytes at offset 3221258752 512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) No errors were found on the image. + +block-backup + +QEMU X.Y.Z monitor - type 'help' for more information +(qemu) ddrdridrivdrivedrive_drive_bdrive_badrive_bacdrive_backdrive_backudrive_backupdrive_backup drive_backup ddrive_backup didrive_backup disdrive_backup diskdrive_backup disk drive_backup disk /drive_backup disk /hdrive_backup disk /hodrive_backup disk /homdrive_backup disk /homedrive_backup disk /home/drive_backup disk /home/kdrive_backup disk /home/kwdrive_backup disk /home/kwodrive_backup disk /home/kwoldrive_backup disk /home/kwolfdrive_backup disk /home/kwolf/drive_backup disk /home/kwolf/sdrive_backup disk /home/kwolf/sodrive_backup disk /home/kwolf/soudrive_backup disk /home/kwolf/sourdrive_backup disk /home/kwolf/sourcdrive_backup disk /home/kwolf/sourcedrive_backup disk /home/kwolf/source/drive_backup disk /home/kwolf/source/qdrive_backup disk /home/kwolf/source/qedrive_backup disk /home/kwolf/source/qemdrive_backup disk /home/kwolf/source/qemudrive_backup disk /home/kwolf/source/qemu/drive_backup disk /home/kwolf/source/qemu/tdrive_backup disk /home/kwolf/source/qemu/tedrive_backup disk /home/kwolf/source/qemu/tesdrive_backup disk /home/kwolf/source/qemu/testdrive_backup disk /home/kwolf/source/qemu/testsdrive_backup disk /home/kwolf/source/qemu/tests/drive_backup disk /home/kwolf/source/qemu/tests/qdrive_backup disk /home/kwolf/source/qemu/tests/qedrive_backup disk /home/kwolf/source/qemu/tests/qemdrive_backup disk /home/kwolf/source/qemu/tests/qemudrive_backup disk /home/kwolf/source/qemu/tests/qemu-drive_backup disk /home/kwolf/source/qemu/tests/qemu-idrive_backup disk /home/kwolf/source/qemu/tests/qemu-iodrive_backup disk /home/kwolf/source/qemu/tests/qemu-iotdrive_backup disk /home/kwolf/source/qemu/tests/qemu-iotedrive_backup disk /home/kwolf/source/qemu/tests/qemu-iotesdrive_backup disk /home/kwolf/source/qemu/tests/qemu-iotestdrive_backup disk /home/kwolf/source/qemu/tests/qemu-iotestsdrive_backup disk /home/kwolf/source/qemu/tests/qemu-iotests/drive_backup disk /home/kwolf/source/qemu/tests/qemu-iotests/sdrive_backup disk /home/kwolf/source/qemu/tests/qemu-iotests/scdrive_backup disk /home/kwolf/source/qemu/tests/qemu-iotests/scrdrive_backup disk /home/kwolf/source/qemu/tests/qemu-iotests/scradrive_backup disk /home/kwolf/source/qemu/tests/qemu-iotests/scratdrive_backup disk /home/kwolf/source/qemu/tests/qemu-iotests/scratcdrive_backup disk TEST_DIRdrive_backup disk TEST_DIR/drive_backup disk TEST_DIR/tdrive_backup disk TEST_DIR/t.drive_backup disk TEST_DIR/t.qdrive_backup disk TEST_DIR/t.qcdrive_backup disk TEST_DIR/t.qcodrive_backup disk TEST_DIR/t.qcowdrive_backup disk TEST_DIR/t.qcow2drive_backup disk TEST_DIR/t.qcow2.drive_backup disk TEST_DIR/t.qcow2.cdrive_backup disk TEST_DIR/t.qcow2.codrive_backup disk TEST_DIR/t.qcow2.copdrive_backup disk TEST_DIR/t.qcow2.copy +Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 size=4294968832 backing_file='TEST_DIR/t.qcow2.base' backing_fmt='qcow2' encryption=off cluster_size=65536 lazy_refcounts=off +(qemu) iininfinfoinfo info binfo blinfo bloinfo blocinfo blockinfo block-info block-jinfo block-joinfo block-jobinfo block-jobs +Type backup, device disk: Completed 0 of 4294968832 bytes, speed limit 0 bytes/s +iininfinfoinfo info binfo blinfo bloinfo blocinfo blockinfo block-info block-jinfo block-joinfo block-jobinfo block-jobs +No active jobs +=== IO: pattern 195 +read 512/512 bytes at offset 3221194240 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221195264 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221196288 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221197312 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221198336 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221199360 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221200384 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221201408 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221202432 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221203456 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221204480 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221205504 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221206528 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221207552 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221208576 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221209600 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221210624 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221211648 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221212672 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221213696 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221214720 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221215744 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221216768 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221217792 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221218816 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221219840 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221220864 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221221888 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221222912 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221223936 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221224960 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221225984 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +=== IO: pattern 196 +read 512/512 bytes at offset 3221194752 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221195776 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221196800 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221197824 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221198848 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221199872 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221200896 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221201920 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221202944 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221203968 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221204992 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221206016 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221207040 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221208064 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221209088 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221210112 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221211136 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221212160 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221213184 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221214208 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221215232 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221216256 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221217280 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221218304 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221219328 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221220352 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221221376 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221222400 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221223424 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221224448 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221225472 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221226496 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221227520 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221228544 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221229568 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221230592 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221231616 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221232640 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221233664 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221234688 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221235712 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221236736 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221237760 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221238784 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221239808 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221240832 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221241856 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221242880 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221243904 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221244928 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221245952 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221246976 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221248000 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221249024 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221250048 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221251072 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221252096 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221253120 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221254144 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221255168 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221256192 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221257216 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221258240 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221259264 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +=== IO: pattern 0 +read 512/512 bytes at offset 3221227008 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221228032 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221229056 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221230080 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221231104 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221232128 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221233152 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221234176 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221235200 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221236224 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221237248 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221238272 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221239296 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221240320 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221241344 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221242368 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221243392 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221244416 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221245440 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221246464 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221247488 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221248512 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221249536 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221250560 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221251584 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221252608 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221253632 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221254656 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221255680 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221256704 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221257728 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 3221258752 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) No errors were found on the image. *** done -- cgit v1.1 From 01fb2705bd19a6e9c1207446793064dbd141df5f Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 7 Jul 2014 17:00:37 +0200 Subject: block: Fix bdrv_is_allocated() return value bdrv_is_allocated() should return either 0 or 1 in successful cases. We're lucky that currently, the callers that rely on this (e.g. because they check for ret == 1) don't seem to break badly. They just might skip some optimisation or in the case of qemu-io 'map' print separate lines where a single line would suffice. In theory, a wrong allocation status could lead to image corruption with certain operations, so let's fix this quickly. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake --- block.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block.c b/block.c index 8800a6b..c9629a4 100644 --- a/block.c +++ b/block.c @@ -4040,7 +4040,7 @@ int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, if (ret < 0) { return ret; } - return (ret & BDRV_BLOCK_ALLOCATED); + return !!(ret & BDRV_BLOCK_ALLOCATED); } /* -- cgit v1.1 From b47ec2c4562117728be36ec2edfbdf9ef2868c6e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 7 Jul 2014 15:18:01 +0200 Subject: block: prefer aio_poll to qemu_aio_wait Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- block.c | 2 +- blockjob.c | 2 +- qemu-io-cmds.c | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/block.c b/block.c index c9629a4..510430d 100644 --- a/block.c +++ b/block.c @@ -471,7 +471,7 @@ int bdrv_create(BlockDriver *drv, const char* filename, co = qemu_coroutine_create(bdrv_create_co_entry); qemu_coroutine_enter(co, &cco); while (cco.ret == NOT_DONE) { - qemu_aio_wait(); + aio_poll(qemu_get_aio_context(), true); } } diff --git a/blockjob.c b/blockjob.c index 67a64ea..ca0b4e2 100644 --- a/blockjob.c +++ b/blockjob.c @@ -187,7 +187,7 @@ int block_job_cancel_sync(BlockJob *job) job->opaque = &data; block_job_cancel(job); while (data.ret == -EINPROGRESS) { - qemu_aio_wait(); + aio_poll(bdrv_get_aio_context(bs), true); } return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret; } diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index 60c1ceb..c503fc6 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -483,7 +483,7 @@ static int do_co_write_zeroes(BlockDriverState *bs, int64_t offset, int count, co = qemu_coroutine_create(co_write_zeroes_entry); qemu_coroutine_enter(co, &data); while (!data.done) { - qemu_aio_wait(); + aio_poll(bdrv_get_aio_context(bs), true); } if (data.ret < 0) { return data.ret; @@ -2027,7 +2027,7 @@ static const cmdinfo_t resume_cmd = { static int wait_break_f(BlockDriverState *bs, int argc, char **argv) { while (!bdrv_debug_is_suspended(bs, argv[1])) { - qemu_aio_wait(); + aio_poll(bdrv_get_aio_context(bs), true); } return 0; -- cgit v1.1 From 87f68d318222563822b5c6b28192215fc4b4e441 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 7 Jul 2014 15:18:02 +0200 Subject: block: drop aio functions that operate on the main AioContext The main AioContext should be accessed explicitly via qemu_get_aio_context(). Most of the time, using it is not the right thing to do. Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- aio-posix.c | 4 ++-- aio-win32.c | 6 +++--- include/block/aio.h | 17 ++--------------- include/block/blockjob.h | 4 ++-- include/block/coroutine.h | 2 +- main-loop.c | 21 --------------------- tests/test-thread-pool.c | 4 ++-- 7 files changed, 12 insertions(+), 46 deletions(-) diff --git a/aio-posix.c b/aio-posix.c index f921d4f..44c4df3 100644 --- a/aio-posix.c +++ b/aio-posix.c @@ -125,7 +125,7 @@ static bool aio_dispatch(AioContext *ctx) bool progress = false; /* - * We have to walk very carefully in case qemu_aio_set_fd_handler is + * We have to walk very carefully in case aio_set_fd_handler is * called while we're walking. */ node = QLIST_FIRST(&ctx->aio_handlers); @@ -183,7 +183,7 @@ bool aio_poll(AioContext *ctx, bool blocking) /* * If there are callbacks left that have been queued, we need to call them. * Do not call select in this case, because it is possible that the caller - * does not need a complete flush (as is the case for qemu_aio_wait loops). + * does not need a complete flush (as is the case for aio_poll loops). */ if (aio_bh_poll(ctx)) { blocking = false; diff --git a/aio-win32.c b/aio-win32.c index 23f4e5b..c12f61e 100644 --- a/aio-win32.c +++ b/aio-win32.c @@ -102,7 +102,7 @@ bool aio_poll(AioContext *ctx, bool blocking) /* * If there are callbacks left that have been queued, we need to call then. * Do not call select in this case, because it is possible that the caller - * does not need a complete flush (as is the case for qemu_aio_wait loops). + * does not need a complete flush (as is the case for aio_poll loops). */ if (aio_bh_poll(ctx)) { blocking = false; @@ -115,7 +115,7 @@ bool aio_poll(AioContext *ctx, bool blocking) /* * Then dispatch any pending callbacks from the GSource. * - * We have to walk very carefully in case qemu_aio_set_fd_handler is + * We have to walk very carefully in case aio_set_fd_handler is * called while we're walking. */ node = QLIST_FIRST(&ctx->aio_handlers); @@ -177,7 +177,7 @@ bool aio_poll(AioContext *ctx, bool blocking) blocking = false; /* we have to walk very carefully in case - * qemu_aio_set_fd_handler is called while we're walking */ + * aio_set_fd_handler is called while we're walking */ node = QLIST_FIRST(&ctx->aio_handlers); while (node) { AioHandler *tmp; diff --git a/include/block/aio.h b/include/block/aio.h index a92511b..d81250c 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -220,7 +220,7 @@ bool aio_poll(AioContext *ctx, bool blocking); #ifdef CONFIG_POSIX /* Register a file descriptor and associated callbacks. Behaves very similarly * to qemu_set_fd_handler2. Unlike qemu_set_fd_handler2, these callbacks will - * be invoked when using qemu_aio_wait(). + * be invoked when using aio_poll(). * * Code that invokes AIO completion functions should rely on this function * instead of qemu_set_fd_handler[2]. @@ -234,7 +234,7 @@ void aio_set_fd_handler(AioContext *ctx, /* Register an event notifier and associated callbacks. Behaves very similarly * to event_notifier_set_handler. Unlike event_notifier_set_handler, these callbacks - * will be invoked when using qemu_aio_wait(). + * will be invoked when using aio_poll(). * * Code that invokes AIO completion functions should rely on this function * instead of event_notifier_set_handler. @@ -251,19 +251,6 @@ GSource *aio_get_g_source(AioContext *ctx); /* Return the ThreadPool bound to this AioContext */ struct ThreadPool *aio_get_thread_pool(AioContext *ctx); -/* Functions to operate on the main QEMU AioContext. */ - -bool qemu_aio_wait(void); -void qemu_aio_set_event_notifier(EventNotifier *notifier, - EventNotifierHandler *io_read); - -#ifdef CONFIG_POSIX -void qemu_aio_set_fd_handler(int fd, - IOHandler *io_read, - IOHandler *io_write, - void *opaque); -#endif - /** * aio_timer_new: * @ctx: the aio context diff --git a/include/block/blockjob.h b/include/block/blockjob.h index f3cf63f..60aa835 100644 --- a/include/block/blockjob.h +++ b/include/block/blockjob.h @@ -74,7 +74,7 @@ struct BlockJob { * Set to true if the job should cancel itself. The flag must * always be tested just before toggling the busy flag from false * to true. After a job has been cancelled, it should only yield - * if #qemu_aio_wait will ("sooner or later") reenter the coroutine. + * if #aio_poll will ("sooner or later") reenter the coroutine. */ bool cancelled; @@ -87,7 +87,7 @@ struct BlockJob { /** * Set to false by the job while it is in a quiescent state, where * no I/O is pending and the job has yielded on any condition - * that is not detected by #qemu_aio_wait, such as a timer. + * that is not detected by #aio_poll, such as a timer. */ bool busy; diff --git a/include/block/coroutine.h b/include/block/coroutine.h index a1797ae..b408f96 100644 --- a/include/block/coroutine.h +++ b/include/block/coroutine.h @@ -212,7 +212,7 @@ void coroutine_fn co_sleep_ns(QEMUClockType type, int64_t ns); * Yield the coroutine for a given duration * * Behaves similarly to co_sleep_ns(), but the sleeping coroutine will be - * resumed when using qemu_aio_wait(). + * resumed when using aio_poll(). */ void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type, int64_t ns); diff --git a/main-loop.c b/main-loop.c index 8a85493..3cc79f8 100644 --- a/main-loop.c +++ b/main-loop.c @@ -498,24 +498,3 @@ QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque) { return aio_bh_new(qemu_aio_context, cb, opaque); } - -bool qemu_aio_wait(void) -{ - return aio_poll(qemu_aio_context, true); -} - -#ifdef CONFIG_POSIX -void qemu_aio_set_fd_handler(int fd, - IOHandler *io_read, - IOHandler *io_write, - void *opaque) -{ - aio_set_fd_handler(qemu_aio_context, fd, io_read, io_write, opaque); -} -#endif - -void qemu_aio_set_event_notifier(EventNotifier *notifier, - EventNotifierHandler *io_read) -{ - aio_set_event_notifier(qemu_aio_context, notifier, io_read); -} diff --git a/tests/test-thread-pool.c b/tests/test-thread-pool.c index aa156bc..f40b7fc 100644 --- a/tests/test-thread-pool.c +++ b/tests/test-thread-pool.c @@ -83,7 +83,7 @@ static void co_test_cb(void *opaque) data->ret = 0; active--; - /* The test continues in test_submit_co, after qemu_aio_wait_all... */ + /* The test continues in test_submit_co, after aio_poll... */ } static void test_submit_co(void) @@ -98,7 +98,7 @@ static void test_submit_co(void) g_assert_cmpint(active, ==, 1); g_assert_cmpint(data.ret, ==, -EINPROGRESS); - /* qemu_aio_wait_all will execute the rest of the coroutine. */ + /* aio_poll will execute the rest of the coroutine. */ while (data.ret == -EINPROGRESS) { aio_poll(ctx, true); -- cgit v1.1 From ef508f427b348c7f0ef2bfe7c080fe5fcaee9f6b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 7 Jul 2014 15:18:03 +0200 Subject: test-aio: fix GSource-based timer test The current test depends too much on the implementation of the AioContext GSource. Just iterate on the main loop until the callback has been invoked the right number of times. Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- tests/test-aio.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/tests/test-aio.c b/tests/test-aio.c index e5f8b55..264dab9 100644 --- a/tests/test-aio.c +++ b/tests/test-aio.c @@ -806,17 +806,16 @@ static void test_source_timer_schedule(void) g_usleep(1 * G_USEC_PER_SEC); g_assert_cmpint(data.n, ==, 0); - g_assert(g_main_context_iteration(NULL, false)); + g_assert(g_main_context_iteration(NULL, true)); g_assert_cmpint(data.n, ==, 1); + expiry += data.ns; - /* The comment above was not kidding when it said this wakes up itself */ - do { - g_assert(g_main_context_iteration(NULL, true)); - } while (qemu_clock_get_ns(data.clock_type) <= expiry); - g_usleep(1 * G_USEC_PER_SEC); - g_main_context_iteration(NULL, false); + while (data.n < 2) { + g_main_context_iteration(NULL, true); + } g_assert_cmpint(data.n, ==, 2); + g_assert(qemu_clock_get_ns(data.clock_type) > expiry); aio_set_fd_handler(ctx, pipefd[0], NULL, NULL, NULL); close(pipefd[0]); -- cgit v1.1 From 0ceb849bd336a5f9b6e1ed56d45cf5773d251ad8 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 7 Jul 2014 15:18:04 +0200 Subject: AioContext: speed up aio_notify In many cases, the call to event_notifier_set in aio_notify is unnecessary. In particular, if we are executing aio_dispatch, or if aio_poll is not blocking, we know that we will soon get to the next loop iteration (if necessary); the thread that hosts the AioContext's event loop does not need any nudging. The patch includes a Promela formal model that shows that this really works and does not need any further complication such as generation counts. It needs a memory barrier though. The generation counts are not needed because any change to ctx->dispatching after the memory barrier is okay for aio_notify. If it changes from zero to one, it is the right thing to skip event_notifier_set. If it changes from one to zero, the event_notifier_set is unnecessary but harmless. Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- aio-posix.c | 34 +++++++++++++++- async.c | 19 ++++++++- docs/aio_notify.promela | 104 ++++++++++++++++++++++++++++++++++++++++++++++++ include/block/aio.h | 9 +++++ 4 files changed, 164 insertions(+), 2 deletions(-) create mode 100644 docs/aio_notify.promela diff --git a/aio-posix.c b/aio-posix.c index 44c4df3..2eada2e 100644 --- a/aio-posix.c +++ b/aio-posix.c @@ -175,11 +175,38 @@ static bool aio_dispatch(AioContext *ctx) bool aio_poll(AioContext *ctx, bool blocking) { AioHandler *node; + bool was_dispatching; int ret; bool progress; + was_dispatching = ctx->dispatching; progress = false; + /* aio_notify can avoid the expensive event_notifier_set if + * everything (file descriptors, bottom halves, timers) will + * be re-evaluated before the next blocking poll(). This happens + * in two cases: + * + * 1) when aio_poll is called with blocking == false + * + * 2) when we are called after poll(). If we are called before + * poll(), bottom halves will not be re-evaluated and we need + * aio_notify() if blocking == true. + * + * The first aio_dispatch() only does something when AioContext is + * running as a GSource, and in that case aio_poll is used only + * with blocking == false, so this optimization is already quite + * effective. However, the code is ugly and should be restructured + * to have a single aio_dispatch() call. To do this, we need to + * reorganize aio_poll into a prepare/poll/dispatch model like + * glib's. + * + * If we're in a nested event loop, ctx->dispatching might be true. + * In that case we can restore it just before returning, but we + * have to clear it now. + */ + aio_set_dispatching(ctx, !blocking); + /* * If there are callbacks left that have been queued, we need to call them. * Do not call select in this case, because it is possible that the caller @@ -190,12 +217,14 @@ bool aio_poll(AioContext *ctx, bool blocking) progress = true; } + /* Re-evaluate condition (1) above. */ + aio_set_dispatching(ctx, !blocking); if (aio_dispatch(ctx)) { progress = true; } if (progress && !blocking) { - return true; + goto out; } ctx->walking_handlers++; @@ -234,9 +263,12 @@ bool aio_poll(AioContext *ctx, bool blocking) } /* Run dispatch even if there were no readable fds to run timers */ + aio_set_dispatching(ctx, true); if (aio_dispatch(ctx)) { progress = true; } +out: + aio_set_dispatching(ctx, was_dispatching); return progress; } diff --git a/async.c b/async.c index 5b6fe6b..34af0b2 100644 --- a/async.c +++ b/async.c @@ -26,6 +26,7 @@ #include "block/aio.h" #include "block/thread-pool.h" #include "qemu/main-loop.h" +#include "qemu/atomic.h" /***********************************************************/ /* bottom halves (can be seen as timers which expire ASAP) */ @@ -247,9 +248,25 @@ ThreadPool *aio_get_thread_pool(AioContext *ctx) return ctx->thread_pool; } +void aio_set_dispatching(AioContext *ctx, bool dispatching) +{ + ctx->dispatching = dispatching; + if (!dispatching) { + /* Write ctx->dispatching before reading e.g. bh->scheduled. + * Optimization: this is only needed when we're entering the "unsafe" + * phase where other threads must call event_notifier_set. + */ + smp_mb(); + } +} + void aio_notify(AioContext *ctx) { - event_notifier_set(&ctx->notifier); + /* Write e.g. bh->scheduled before reading ctx->dispatching. */ + smp_mb(); + if (!ctx->dispatching) { + event_notifier_set(&ctx->notifier); + } } static void aio_timerlist_notify(void *opaque) diff --git a/docs/aio_notify.promela b/docs/aio_notify.promela new file mode 100644 index 0000000..ad3f6f0 --- /dev/null +++ b/docs/aio_notify.promela @@ -0,0 +1,104 @@ +/* + * This model describes the interaction between aio_set_dispatching() + * and aio_notify(). + * + * Author: Paolo Bonzini + * + * This file is in the public domain. If you really want a license, + * the WTFPL will do. + * + * To simulate it: + * spin -p docs/aio_notify.promela + * + * To verify it: + * spin -a docs/aio_notify.promela + * gcc -O2 pan.c + * ./a.out -a + */ + +#define MAX 4 +#define LAST (1 << (MAX - 1)) +#define FINAL ((LAST << 1) - 1) + +bool dispatching; +bool event; + +int req, done; + +active proctype waiter() +{ + int fetch, blocking; + + do + :: done != FINAL -> { + // Computing "blocking" is separate from execution of the + // "bottom half" + blocking = (req == 0); + + // This is our "bottom half" + atomic { fetch = req; req = 0; } + done = done | fetch; + + // Wait for a nudge from the other side + do + :: event == 1 -> { event = 0; break; } + :: !blocking -> break; + od; + + dispatching = 1; + + // If you are simulating this model, you may want to add + // something like this here: + // + // int foo; foo++; foo++; foo++; + // + // This only wastes some time and makes it more likely + // that the notifier process hits the "fast path". + + dispatching = 0; + } + :: else -> break; + od +} + +active proctype notifier() +{ + int next = 1; + int sets = 0; + + do + :: next <= LAST -> { + // generate a request + req = req | next; + next = next << 1; + + // aio_notify + if + :: dispatching == 0 -> sets++; event = 1; + :: else -> skip; + fi; + + // Test both synchronous and asynchronous delivery + if + :: 1 -> do + :: req == 0 -> break; + od; + :: 1 -> skip; + fi; + } + :: else -> break; + od; + printf("Skipped %d event_notifier_set\n", MAX - sets); +} + +#define p (done == FINAL) + +never { + do + :: 1 // after an arbitrarily long prefix + :: p -> break // p becomes true + od; + do + :: !p -> accept: break // it then must remains true forever after + od +} diff --git a/include/block/aio.h b/include/block/aio.h index d81250c..433e7ff 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -60,8 +60,14 @@ struct AioContext { */ int walking_handlers; + /* Used to avoid unnecessary event_notifier_set calls in aio_notify. + * Writes protected by lock or BQL, reads are lockless. + */ + bool dispatching; + /* lock to protect between bh's adders and deleter */ QemuMutex bh_lock; + /* Anchor of the list of Bottom Halves belonging to the context */ struct QEMUBH *first_bh; @@ -83,6 +89,9 @@ struct AioContext { QEMUTimerListGroup tlg; }; +/* Used internally to synchronize aio_poll against qemu_bh_schedule. */ +void aio_set_dispatching(AioContext *ctx, bool dispatching); + /** * aio_context_new: Allocate a new AioContext. * -- cgit v1.1 From 33f461e0c5d8efa21ef7e746be561fc57a1df106 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 3 Jul 2014 13:21:24 +0200 Subject: block: Make qiov match the request size until EOF If a read request goes across EOF, the block driver sees a shortened request that stops at EOF (the rest is memsetted in block.c), however the original qiov was used for this request. This patch makes the qiov size match the request size, avoiding a potential buffer overflow in raw-posix. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz --- block.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/block.c b/block.c index 510430d..0143268 100644 --- a/block.c +++ b/block.c @@ -3054,8 +3054,20 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs, max_nb_sectors = ROUND_UP(MAX(0, total_sectors - sector_num), align >> BDRV_SECTOR_BITS); if (max_nb_sectors > 0) { - ret = drv->bdrv_co_readv(bs, sector_num, - MIN(nb_sectors, max_nb_sectors), qiov); + QEMUIOVector local_qiov; + size_t local_sectors; + + max_nb_sectors = MIN(max_nb_sectors, SIZE_MAX / BDRV_SECTOR_BITS); + local_sectors = MIN(max_nb_sectors, nb_sectors); + + qemu_iovec_init(&local_qiov, qiov->niov); + qemu_iovec_concat(&local_qiov, qiov, 0, + local_sectors * BDRV_SECTOR_SIZE); + + ret = drv->bdrv_co_readv(bs, sector_num, local_sectors, + &local_qiov); + + qemu_iovec_destroy(&local_qiov); } else { ret = 0; } -- cgit v1.1 From 44deba5a52576508f27edadf953e435141e2a76a Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 3 Jul 2014 14:43:32 +0200 Subject: qcow2: Make qiov match request size until backing file EOF If a qcow2 image has a shorter backing file and a read request to unallocated clusters goes across EOF of the backing file, the backing file sees a shortened request and the rest is filled with zeros. However, the original too long qiov was used with the shortened request. This patch makes the qiov size match the request size, avoiding a potential buffer overflow in raw-posix. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz --- block/qcow2.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/block/qcow2.c b/block/qcow2.c index 67e55c9..b0faa69 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1020,11 +1020,20 @@ static coroutine_fn int qcow2_co_readv(BlockDriverState *bs, int64_t sector_num, n1 = qcow2_backing_read1(bs->backing_hd, &hd_qiov, sector_num, cur_nr_sectors); if (n1 > 0) { + QEMUIOVector local_qiov; + + qemu_iovec_init(&local_qiov, hd_qiov.niov); + qemu_iovec_concat(&local_qiov, &hd_qiov, 0, + n1 * BDRV_SECTOR_SIZE); + BLKDBG_EVENT(bs->file, BLKDBG_READ_BACKING_AIO); qemu_co_mutex_unlock(&s->lock); ret = bdrv_co_readv(bs->backing_hd, sector_num, - n1, &hd_qiov); + n1, &local_qiov); qemu_co_mutex_lock(&s->lock); + + qemu_iovec_destroy(&local_qiov); + if (ret < 0) { goto fail; } -- cgit v1.1 From f06ee3d4aa547df8d7d2317b2b6db7a88c1f3744 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 4 Jul 2014 17:11:28 +0200 Subject: qed: Make qiov match request size until backing file EOF If a QED image has a shorter backing file and a read request to unallocated clusters goes across EOF of the backing file, the backing file sees a shortened request and the rest is filled with zeros. However, the original too long qiov was used with the shortened request. This patch makes the qiov size match the request size, avoiding a potential buffer overflow in raw-posix. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake --- block/qed.c | 38 ++++++++++++++++++++++++++++++-------- block/qed.h | 1 + 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/block/qed.c b/block/qed.c index b69374b..cd4872b 100644 --- a/block/qed.c +++ b/block/qed.c @@ -761,17 +761,19 @@ static BDRVQEDState *acb_to_s(QEDAIOCB *acb) /** * Read from the backing file or zero-fill if no backing file * - * @s: QED state - * @pos: Byte position in device - * @qiov: Destination I/O vector - * @cb: Completion function - * @opaque: User data for completion function + * @s: QED state + * @pos: Byte position in device + * @qiov: Destination I/O vector + * @backing_qiov: Possibly shortened copy of qiov, to be allocated here + * @cb: Completion function + * @opaque: User data for completion function * * This function reads qiov->size bytes starting at pos from the backing file. * If there is no backing file then zeroes are read. */ static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos, QEMUIOVector *qiov, + QEMUIOVector **backing_qiov, BlockDriverCompletionFunc *cb, void *opaque) { uint64_t backing_length = 0; @@ -804,15 +806,21 @@ static void qed_read_backing_file(BDRVQEDState *s, uint64_t pos, /* If the read straddles the end of the backing file, shorten it */ size = MIN((uint64_t)backing_length - pos, qiov->size); + assert(*backing_qiov == NULL); + *backing_qiov = g_new(QEMUIOVector, 1); + qemu_iovec_init(*backing_qiov, qiov->niov); + qemu_iovec_concat(*backing_qiov, qiov, 0, size); + BLKDBG_EVENT(s->bs->file, BLKDBG_READ_BACKING_AIO); bdrv_aio_readv(s->bs->backing_hd, pos / BDRV_SECTOR_SIZE, - qiov, size / BDRV_SECTOR_SIZE, cb, opaque); + *backing_qiov, size / BDRV_SECTOR_SIZE, cb, opaque); } typedef struct { GenericCB gencb; BDRVQEDState *s; QEMUIOVector qiov; + QEMUIOVector *backing_qiov; struct iovec iov; uint64_t offset; } CopyFromBackingFileCB; @@ -829,6 +837,12 @@ static void qed_copy_from_backing_file_write(void *opaque, int ret) CopyFromBackingFileCB *copy_cb = opaque; BDRVQEDState *s = copy_cb->s; + if (copy_cb->backing_qiov) { + qemu_iovec_destroy(copy_cb->backing_qiov); + g_free(copy_cb->backing_qiov); + copy_cb->backing_qiov = NULL; + } + if (ret) { qed_copy_from_backing_file_cb(copy_cb, ret); return; @@ -866,11 +880,12 @@ static void qed_copy_from_backing_file(BDRVQEDState *s, uint64_t pos, copy_cb = gencb_alloc(sizeof(*copy_cb), cb, opaque); copy_cb->s = s; copy_cb->offset = offset; + copy_cb->backing_qiov = NULL; copy_cb->iov.iov_base = qemu_blockalign(s->bs, len); copy_cb->iov.iov_len = len; qemu_iovec_init_external(©_cb->qiov, ©_cb->iov, 1); - qed_read_backing_file(s, pos, ©_cb->qiov, + qed_read_backing_file(s, pos, ©_cb->qiov, ©_cb->backing_qiov, qed_copy_from_backing_file_write, copy_cb); } @@ -1313,7 +1328,7 @@ static void qed_aio_read_data(void *opaque, int ret, return; } else if (ret != QED_CLUSTER_FOUND) { qed_read_backing_file(s, acb->cur_pos, &acb->cur_qiov, - qed_aio_next_io, acb); + &acb->backing_qiov, qed_aio_next_io, acb); return; } @@ -1339,6 +1354,12 @@ static void qed_aio_next_io(void *opaque, int ret) trace_qed_aio_next_io(s, acb, ret, acb->cur_pos + acb->cur_qiov.size); + if (acb->backing_qiov) { + qemu_iovec_destroy(acb->backing_qiov); + g_free(acb->backing_qiov); + acb->backing_qiov = NULL; + } + /* Handle I/O error */ if (ret) { qed_aio_complete(acb, ret); @@ -1378,6 +1399,7 @@ static BlockDriverAIOCB *qed_aio_setup(BlockDriverState *bs, acb->qiov_offset = 0; acb->cur_pos = (uint64_t)sector_num * BDRV_SECTOR_SIZE; acb->end_pos = acb->cur_pos + nb_sectors * BDRV_SECTOR_SIZE; + acb->backing_qiov = NULL; acb->request.l2_table = NULL; qemu_iovec_init(&acb->cur_qiov, qiov->niov); diff --git a/block/qed.h b/block/qed.h index b024751..2b0e724 100644 --- a/block/qed.h +++ b/block/qed.h @@ -142,6 +142,7 @@ typedef struct QEDAIOCB { /* Current cluster scatter-gather list */ QEMUIOVector cur_qiov; + QEMUIOVector *backing_qiov; uint64_t cur_pos; /* position on block device, in bytes */ uint64_t cur_cluster; /* cluster offset in image file */ unsigned int cur_nclusters; /* number of clusters being accessed */ -- cgit v1.1 From 8eb029c26ecf61da6c2b45c3c23472e8c477ba34 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 1 Jul 2014 16:09:54 +0200 Subject: block: Assert qiov length matches request length At least raw-posix relies on this because it can allocate bounce buffers based on the request length, but access it using all of the qiov entries later. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz --- block.c | 2 ++ block/raw-posix.c | 15 +++++++++++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/block.c b/block.c index 0143268..3e252a2 100644 --- a/block.c +++ b/block.c @@ -3010,6 +3010,7 @@ static int coroutine_fn bdrv_aligned_preadv(BlockDriverState *bs, assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); + assert(!qiov || bytes == qiov->size); /* Handle Copy on Read and associated serialisation */ if (flags & BDRV_REQ_COPY_ON_READ) { @@ -3279,6 +3280,7 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs, assert((offset & (BDRV_SECTOR_SIZE - 1)) == 0); assert((bytes & (BDRV_SECTOR_SIZE - 1)) == 0); + assert(!qiov || bytes == qiov->size); waited = wait_serialising_requests(req); assert(!waited || !req->serialising); diff --git a/block/raw-posix.c b/block/raw-posix.c index a857def..2bcc73d 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -790,6 +790,7 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb) memcpy(p, aiocb->aio_iov[i].iov_base, aiocb->aio_iov[i].iov_len); p += aiocb->aio_iov[i].iov_len; } + assert(p - buf == aiocb->aio_nbytes); } nbytes = handle_aiocb_rw_linear(aiocb, buf); @@ -804,9 +805,11 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb) copy = aiocb->aio_iov[i].iov_len; } memcpy(aiocb->aio_iov[i].iov_base, p, copy); + assert(count >= copy); p += copy; count -= copy; } + assert(count == 0); } qemu_vfree(buf); @@ -993,12 +996,14 @@ static int paio_submit_co(BlockDriverState *bs, int fd, acb->aio_type = type; acb->aio_fildes = fd; + acb->aio_nbytes = nb_sectors * BDRV_SECTOR_SIZE; + acb->aio_offset = sector_num * BDRV_SECTOR_SIZE; + if (qiov) { acb->aio_iov = qiov->iov; acb->aio_niov = qiov->niov; + assert(qiov->size == acb->aio_nbytes); } - acb->aio_nbytes = nb_sectors * 512; - acb->aio_offset = sector_num * 512; trace_paio_submit_co(sector_num, nb_sectors, type); pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); @@ -1016,12 +1021,14 @@ static BlockDriverAIOCB *paio_submit(BlockDriverState *bs, int fd, acb->aio_type = type; acb->aio_fildes = fd; + acb->aio_nbytes = nb_sectors * BDRV_SECTOR_SIZE; + acb->aio_offset = sector_num * BDRV_SECTOR_SIZE; + if (qiov) { acb->aio_iov = qiov->iov; acb->aio_niov = qiov->niov; + assert(qiov->size == acb->aio_nbytes); } - acb->aio_nbytes = nb_sectors * 512; - acb->aio_offset = sector_num * 512; trace_paio_submit(acb, opaque, sector_num, nb_sectors, type); pool = aio_get_thread_pool(bdrv_get_aio_context(bs)); -- cgit v1.1 From 0a21ea3289c5a3b982386e3eaaa37627c18f5e35 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 9 Jul 2014 10:05:46 +0200 Subject: virtio-blk: avoid dataplane VirtIOBlockReq early free VirtIOBlockReq is freed later by virtio_blk_free_request() in hw/block/virtio-blk.c. Remove this extraneous g_slice_free(). This patch fixes the following segfault: 0x00005555556373af in virtio_blk_rw_complete (opaque=0x5555565ff5e0, ret=0) at hw/block/virtio-blk.c:99 99 bdrv_acct_done(req->dev->bs, &req->acct); (gdb) print req $1 = (VirtIOBlockReq *) 0x5555565ff5e0 (gdb) print req->dev $2 = (VirtIOBlock *) 0x0 (gdb) bt #0 0x00005555556373af in virtio_blk_rw_complete (opaque=0x5555565ff5e0, ret=0) at hw/block/virtio-blk.c:99 #1 0x0000555555840ebe in bdrv_co_em_bh (opaque=0x5555566152d0) at block.c:4675 #2 0x000055555583de77 in aio_bh_poll (ctx=ctx@entry=0x5555563a8150) at async.c:81 #3 0x000055555584b7a7 in aio_poll (ctx=0x5555563a8150, blocking=blocking@entry=true) at aio-posix.c:188 #4 0x00005555556e520e in iothread_run (opaque=0x5555563a7fd8) at iothread.c:41 #5 0x00007ffff42ba124 in start_thread () from /usr/lib/libpthread.so.0 #6 0x00007ffff16d14bd in clone () from /usr/lib/libc.so.6 Reported-by: Max Reitz Cc: Fam Zheng Signed-off-by: Stefan Hajnoczi Tested-by: Christian Borntraeger Signed-off-by: Kevin Wolf --- hw/block/dataplane/virtio-blk.c | 1 - 1 file changed, 1 deletion(-) diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index 4bc0729..bed9f13 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -68,7 +68,6 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status) vring_push(&req->dev->dataplane->vring, req->elem, req->qiov.size + sizeof(*req->in)); notify_guest(req->dev->dataplane); - g_slice_free(VirtIOBlockReq, req); } static void handle_notify(EventNotifier *e) -- cgit v1.1 From abd764250fbce6f285513d74f03eb5c526e520f6 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 9 Jul 2014 10:05:47 +0200 Subject: dataplane: do not free VirtQueueElement in vring_push() VirtQueueElement is allocated in vring_pop() so it seems to make sense that vring_push() should free it. Alas, virtio-blk frees VirtQueueElement itself in virtio_blk_free_request(). This patch solves a double-free assertion in glib's g_slice_free(). Rename vring_free_element() to vring_unmap_element() since it no longer frees the VirtQueueElement. Signed-off-by: Stefan Hajnoczi Tested-by: Christian Borntraeger Signed-off-by: Kevin Wolf --- hw/virtio/dataplane/vring.c | 9 ++++----- include/hw/virtio/dataplane/vring.h | 1 - 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/hw/virtio/dataplane/vring.c b/hw/virtio/dataplane/vring.c index 665a1ff..5d17d39 100644 --- a/hw/virtio/dataplane/vring.c +++ b/hw/virtio/dataplane/vring.c @@ -272,7 +272,7 @@ static int get_indirect(Vring *vring, VirtQueueElement *elem, return 0; } -void vring_free_element(VirtQueueElement *elem) +static void vring_unmap_element(VirtQueueElement *elem) { int i; @@ -287,8 +287,6 @@ void vring_free_element(VirtQueueElement *elem) for (i = 0; i < elem->in_num; i++) { vring_unmap(elem->in_sg[i].iov_base, true); } - - g_slice_free(VirtQueueElement, elem); } /* This looks in the virtqueue and for the first available buffer, and converts @@ -402,7 +400,8 @@ out: vring->broken = true; } if (elem) { - vring_free_element(elem); + vring_unmap_element(elem); + g_slice_free(VirtQueueElement, elem); } *p_elem = NULL; return ret; @@ -418,7 +417,7 @@ void vring_push(Vring *vring, VirtQueueElement *elem, int len) unsigned int head = elem->index; uint16_t new; - vring_free_element(elem); + vring_unmap_element(elem); /* Don't touch vring if a fatal error occurred */ if (vring->broken) { diff --git a/include/hw/virtio/dataplane/vring.h b/include/hw/virtio/dataplane/vring.h index 63e7bf4..b23edd2 100644 --- a/include/hw/virtio/dataplane/vring.h +++ b/include/hw/virtio/dataplane/vring.h @@ -55,6 +55,5 @@ bool vring_enable_notification(VirtIODevice *vdev, Vring *vring); bool vring_should_notify(VirtIODevice *vdev, Vring *vring); int vring_pop(VirtIODevice *vdev, Vring *vring, VirtQueueElement **elem); void vring_push(Vring *vring, VirtQueueElement *elem, int len); -void vring_free_element(VirtQueueElement *elem); #endif /* VRING_H */ -- cgit v1.1 From 869d66af53d8e04709456c9cae5cca7c560d4b93 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 9 Jul 2014 10:05:48 +0200 Subject: virtio-blk: avoid g_slice_new0() for VirtIOBlockReq and VirtQueueElement In commit de6c8042ec55da18702fa51f09072fcaa315edc3 ("virtio-blk: Avoid zeroing every request structure") we avoided the 40 KB memset when allocating VirtIOBlockReq. The memset was reintroduced in commit 671ec3f056559f22a2531a91dce3a258b9b5eb8a ("virtio-blk: Convert VirtIOBlockReq.elem to pointer"). It must be fixed again to avoid a performance regression. Cc: Fam Zheng Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- hw/block/virtio-blk.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index aec3146..b06a56d 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -31,9 +31,11 @@ static VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s) { - VirtIOBlockReq *req = g_slice_new0(VirtIOBlockReq); + VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq); req->dev = s; - req->elem = g_slice_new0(VirtQueueElement); + req->qiov.size = 0; + req->next = NULL; + req->elem = g_slice_new(VirtQueueElement); return req; } -- cgit v1.1 From f897bf751fbd95e4015b95d202c706548586813a Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 9 Jul 2014 10:05:49 +0200 Subject: virtio-blk: embed VirtQueueElement in VirtIOBlockReq The memory allocation between hw/block/virtio-blk.c, hw/block/dataplane/virtio-blk.c, and hw/virtio/dataplane/vring.c is messy. Structs are allocated in different files than they are freed in. This is risky and makes memory leaks easier. Embed VirtQueueElement in VirtIOBlockReq to reduce the amount of memory allocation we need to juggle. This also makes vring.c and virtio.c slightly more similar. Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- hw/block/dataplane/virtio-blk.c | 29 +++++++++++------------ hw/block/virtio-blk.c | 46 ++++++++++++++++++------------------- hw/virtio/dataplane/vring.c | 17 +++++--------- include/hw/virtio/dataplane/vring.h | 2 +- include/hw/virtio/virtio-blk.h | 6 ++++- 5 files changed, 48 insertions(+), 52 deletions(-) diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index bed9f13..227bb15 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -65,7 +65,7 @@ static void complete_request_vring(VirtIOBlockReq *req, unsigned char status) { stb_p(&req->in->status, status); - vring_push(&req->dev->dataplane->vring, req->elem, + vring_push(&req->dev->dataplane->vring, &req->elem, req->qiov.size + sizeof(*req->in)); notify_guest(req->dev->dataplane); } @@ -74,33 +74,32 @@ static void handle_notify(EventNotifier *e) { VirtIOBlockDataPlane *s = container_of(e, VirtIOBlockDataPlane, host_notifier); - - VirtQueueElement *elem; - VirtIOBlockReq *req; - int ret; - MultiReqBuffer mrb = { - .num_writes = 0, - }; + VirtIOBlock *vblk = VIRTIO_BLK(s->vdev); event_notifier_test_and_clear(&s->host_notifier); bdrv_io_plug(s->blk->conf.bs); for (;;) { + MultiReqBuffer mrb = { + .num_writes = 0, + }; + int ret; + /* Disable guest->host notifies to avoid unnecessary vmexits */ vring_disable_notification(s->vdev, &s->vring); for (;;) { - ret = vring_pop(s->vdev, &s->vring, &elem); + VirtIOBlockReq *req = virtio_blk_alloc_request(vblk); + + ret = vring_pop(s->vdev, &s->vring, &req->elem); if (ret < 0) { - assert(elem == NULL); + virtio_blk_free_request(req); break; /* no more requests */ } - trace_virtio_blk_data_plane_process_request(s, elem->out_num, - elem->in_num, elem->index); + trace_virtio_blk_data_plane_process_request(s, req->elem.out_num, + req->elem.in_num, + req->elem.index); - req = g_slice_new(VirtIOBlockReq); - req->dev = VIRTIO_BLK(s->vdev); - req->elem = elem; virtio_blk_handle_request(req, &mrb); } diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index b06a56d..02cd6b0 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -29,20 +29,18 @@ #include "hw/virtio/virtio-bus.h" #include "hw/virtio/virtio-access.h" -static VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s) +VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s) { VirtIOBlockReq *req = g_slice_new(VirtIOBlockReq); req->dev = s; req->qiov.size = 0; req->next = NULL; - req->elem = g_slice_new(VirtQueueElement); return req; } -static void virtio_blk_free_request(VirtIOBlockReq *req) +void virtio_blk_free_request(VirtIOBlockReq *req) { if (req) { - g_slice_free(VirtQueueElement, req->elem); g_slice_free(VirtIOBlockReq, req); } } @@ -56,7 +54,7 @@ static void virtio_blk_complete_request(VirtIOBlockReq *req, trace_virtio_blk_req_complete(req, status); stb_p(&req->in->status, status); - virtqueue_push(s->vq, req->elem, req->qiov.size + sizeof(*req->in)); + virtqueue_push(s->vq, &req->elem, req->qiov.size + sizeof(*req->in)); virtio_notify(vdev, s->vq); } @@ -121,7 +119,7 @@ static VirtIOBlockReq *virtio_blk_get_request(VirtIOBlock *s) { VirtIOBlockReq *req = virtio_blk_alloc_request(s); - if (!virtqueue_pop(s->vq, req->elem)) { + if (!virtqueue_pop(s->vq, &req->elem)) { virtio_blk_free_request(req); return NULL; } @@ -254,7 +252,7 @@ static void virtio_blk_handle_scsi(VirtIOBlockReq *req) { int status; - status = virtio_blk_handle_scsi_req(req->dev, req->elem); + status = virtio_blk_handle_scsi_req(req->dev, &req->elem); virtio_blk_req_complete(req, status); virtio_blk_free_request(req); } @@ -351,12 +349,12 @@ static void virtio_blk_handle_read(VirtIOBlockReq *req) void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) { uint32_t type; - struct iovec *in_iov = req->elem->in_sg; - struct iovec *iov = req->elem->out_sg; - unsigned in_num = req->elem->in_num; - unsigned out_num = req->elem->out_num; + struct iovec *in_iov = req->elem.in_sg; + struct iovec *iov = req->elem.out_sg; + unsigned in_num = req->elem.in_num; + unsigned out_num = req->elem.out_num; - if (req->elem->out_num < 1 || req->elem->in_num < 1) { + if (req->elem.out_num < 1 || req->elem.in_num < 1) { error_report("virtio-blk missing headers"); exit(1); } @@ -393,19 +391,19 @@ void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) * NB: per existing s/n string convention the string is * terminated by '\0' only when shorter than buffer. */ - strncpy(req->elem->in_sg[0].iov_base, + strncpy(req->elem.in_sg[0].iov_base, s->blk.serial ? s->blk.serial : "", - MIN(req->elem->in_sg[0].iov_len, VIRTIO_BLK_ID_BYTES)); + MIN(req->elem.in_sg[0].iov_len, VIRTIO_BLK_ID_BYTES)); virtio_blk_req_complete(req, VIRTIO_BLK_S_OK); virtio_blk_free_request(req); } else if (type & VIRTIO_BLK_T_OUT) { - qemu_iovec_init_external(&req->qiov, &req->elem->out_sg[1], - req->elem->out_num - 1); + qemu_iovec_init_external(&req->qiov, &req->elem.out_sg[1], + req->elem.out_num - 1); virtio_blk_handle_write(req, mrb); } else if (type == VIRTIO_BLK_T_IN || type == VIRTIO_BLK_T_BARRIER) { /* VIRTIO_BLK_T_IN is 0, so we can't just & it. */ - qemu_iovec_init_external(&req->qiov, &req->elem->in_sg[0], - req->elem->in_num - 1); + qemu_iovec_init_external(&req->qiov, &req->elem.in_sg[0], + req->elem.in_num - 1); virtio_blk_handle_read(req); } else { virtio_blk_req_complete(req, VIRTIO_BLK_S_UNSUPP); @@ -629,7 +627,7 @@ static void virtio_blk_save_device(VirtIODevice *vdev, QEMUFile *f) while (req) { qemu_put_sbyte(f, 1); - qemu_put_buffer(f, (unsigned char *)req->elem, + qemu_put_buffer(f, (unsigned char *)&req->elem, sizeof(VirtQueueElement)); req = req->next; } @@ -654,15 +652,15 @@ static int virtio_blk_load_device(VirtIODevice *vdev, QEMUFile *f, while (qemu_get_sbyte(f)) { VirtIOBlockReq *req = virtio_blk_alloc_request(s); - qemu_get_buffer(f, (unsigned char *)req->elem, + qemu_get_buffer(f, (unsigned char *)&req->elem, sizeof(VirtQueueElement)); req->next = s->rq; s->rq = req; - virtqueue_map_sg(req->elem->in_sg, req->elem->in_addr, - req->elem->in_num, 1); - virtqueue_map_sg(req->elem->out_sg, req->elem->out_addr, - req->elem->out_num, 0); + virtqueue_map_sg(req->elem.in_sg, req->elem.in_addr, + req->elem.in_num, 1); + virtqueue_map_sg(req->elem.out_sg, req->elem.out_addr, + req->elem.out_num, 0); } return 0; diff --git a/hw/virtio/dataplane/vring.c b/hw/virtio/dataplane/vring.c index 5d17d39..67cb2b8 100644 --- a/hw/virtio/dataplane/vring.c +++ b/hw/virtio/dataplane/vring.c @@ -301,14 +301,16 @@ static void vring_unmap_element(VirtQueueElement *elem) * Stolen from linux/drivers/vhost/vhost.c. */ int vring_pop(VirtIODevice *vdev, Vring *vring, - VirtQueueElement **p_elem) + VirtQueueElement *elem) { struct vring_desc desc; unsigned int i, head, found = 0, num = vring->vr.num; uint16_t avail_idx, last_avail_idx; - VirtQueueElement *elem = NULL; int ret; + /* Initialize elem so it can be safely unmapped */ + elem->in_num = elem->out_num = 0; + /* If there was a fatal error then refuse operation */ if (vring->broken) { ret = -EFAULT; @@ -340,10 +342,8 @@ int vring_pop(VirtIODevice *vdev, Vring *vring, * the index we've seen. */ head = vring->vr.avail->ring[last_avail_idx % num]; - elem = g_slice_new(VirtQueueElement); elem->index = head; - elem->in_num = elem->out_num = 0; - + /* If their number is silly, that's an error. */ if (unlikely(head >= num)) { error_report("Guest says index %u > %u is available", head, num); @@ -391,7 +391,6 @@ int vring_pop(VirtIODevice *vdev, Vring *vring, /* On success, increment avail index. */ vring->last_avail_idx++; - *p_elem = elem; return head; out: @@ -399,11 +398,7 @@ out: if (ret == -EFAULT) { vring->broken = true; } - if (elem) { - vring_unmap_element(elem); - g_slice_free(VirtQueueElement, elem); - } - *p_elem = NULL; + vring_unmap_element(elem); return ret; } diff --git a/include/hw/virtio/dataplane/vring.h b/include/hw/virtio/dataplane/vring.h index b23edd2..af73ee2 100644 --- a/include/hw/virtio/dataplane/vring.h +++ b/include/hw/virtio/dataplane/vring.h @@ -53,7 +53,7 @@ void vring_teardown(Vring *vring, VirtIODevice *vdev, int n); void vring_disable_notification(VirtIODevice *vdev, Vring *vring); bool vring_enable_notification(VirtIODevice *vdev, Vring *vring); bool vring_should_notify(VirtIODevice *vdev, Vring *vring); -int vring_pop(VirtIODevice *vdev, Vring *vring, VirtQueueElement **elem); +int vring_pop(VirtIODevice *vdev, Vring *vring, VirtQueueElement *elem); void vring_push(Vring *vring, VirtQueueElement *elem, int len); #endif /* VRING_H */ diff --git a/include/hw/virtio/virtio-blk.h b/include/hw/virtio/virtio-blk.h index b3080a2..afb7b8d 100644 --- a/include/hw/virtio/virtio-blk.h +++ b/include/hw/virtio/virtio-blk.h @@ -144,7 +144,7 @@ typedef struct MultiReqBuffer { typedef struct VirtIOBlockReq { VirtIOBlock *dev; - VirtQueueElement *elem; + VirtQueueElement elem; struct virtio_blk_inhdr *in; struct virtio_blk_outhdr out; QEMUIOVector qiov; @@ -152,6 +152,10 @@ typedef struct VirtIOBlockReq { BlockAcctCookie acct; } VirtIOBlockReq; +VirtIOBlockReq *virtio_blk_alloc_request(VirtIOBlock *s); + +void virtio_blk_free_request(VirtIOBlockReq *req); + int virtio_blk_handle_scsi_req(VirtIOBlock *blk, VirtQueueElement *elem); -- cgit v1.1 From acfb23ad3dd8d0ab385a10e483776ba7dcf927ad Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 9 Jul 2014 10:49:46 +0200 Subject: AioContext: do not rely on aio_poll(ctx, true) result to end a loop Currently, whenever aio_poll(ctx, true) has completed all pending work it returns true *and* the next call to aio_poll(ctx, true) will not block. This invariant has its roots in qemu_aio_flush()'s implementation as "while (qemu_aio_wait()) {}". However, qemu_aio_flush() does not exist anymore and bdrv_drain_all() is implemented differently; and this invariant is complicated to maintain and subtly different from the return value of GMainLoop's g_main_context_iteration. All calls to aio_poll(ctx, true) except one are guarded by a while() loop checking for a request to be incomplete, or a BlockDriverState to be idle. The one remaining call (in iothread.c) uses this to delay the aio_context_release/acquire pair until the AioContext is quiescent, however: - we can do the same just by using non-blocking aio_poll, similar to how vl.c invokes main_loop_wait - it is buggy, because it does not ensure that the AioContext is released between an aio_notify and the next time the iothread goes to sleep. This leads to hangs when stopping the dataplane thread. In the end, these semantics are a bad match for the current users of AioContext. So modify that one exception in iothread.c, which also fixes the hangs, as well as the testcase so that it use the same idiom as the actual QEMU code. Reported-by: Christian Borntraeger Tested-by: Christian Borntraeger Signed-off-by: Paolo Bonzini Signed-off-by: Kevin Wolf --- include/block/aio.h | 6 +++--- iothread.c | 5 ++++- tests/test-aio.c | 25 +++++++++++++------------ 3 files changed, 20 insertions(+), 16 deletions(-) diff --git a/include/block/aio.h b/include/block/aio.h index 433e7ff..c23de3c 100644 --- a/include/block/aio.h +++ b/include/block/aio.h @@ -214,9 +214,9 @@ bool aio_pending(AioContext *ctx); /* Progress in completing AIO work to occur. This can issue new pending * aio as a result of executing I/O completion or bh callbacks. * - * If there is no pending AIO operation or completion (bottom half), - * return false. If there are pending AIO operations of bottom halves, - * return true. + * Return whether any progress was made by executing AIO or bottom half + * handlers. If @blocking == true, this should always be true except + * if someone called aio_notify. * * If there are no pending bottom halves, but there are pending AIO * operations, it may not be possible to make any progress without diff --git a/iothread.c b/iothread.c index 1fbf9f1..d9403cf 100644 --- a/iothread.c +++ b/iothread.c @@ -30,6 +30,7 @@ typedef ObjectClass IOThreadClass; static void *iothread_run(void *opaque) { IOThread *iothread = opaque; + bool blocking; qemu_mutex_lock(&iothread->init_done_lock); iothread->thread_id = qemu_get_thread_id(); @@ -38,8 +39,10 @@ static void *iothread_run(void *opaque) while (!iothread->stopping) { aio_context_acquire(iothread->ctx); - while (!iothread->stopping && aio_poll(iothread->ctx, true)) { + blocking = true; + while (!iothread->stopping && aio_poll(iothread->ctx, blocking)) { /* Progress was made, keep going */ + blocking = false; } aio_context_release(iothread->ctx); } diff --git a/tests/test-aio.c b/tests/test-aio.c index 264dab9..4c40a49 100644 --- a/tests/test-aio.c +++ b/tests/test-aio.c @@ -24,14 +24,6 @@ typedef struct { bool auto_set; } EventNotifierTestData; -/* Wait until there are no more BHs or AIO requests */ -static void wait_for_aio(void) -{ - while (aio_poll(ctx, true)) { - /* Do nothing */ - } -} - /* Wait until event notifier becomes inactive */ static void wait_until_inactive(EventNotifierTestData *data) { @@ -204,7 +196,9 @@ static void test_bh_schedule10(void) g_assert(aio_poll(ctx, true)); g_assert_cmpint(data.n, ==, 2); - wait_for_aio(); + while (data.n < 10) { + aio_poll(ctx, true); + } g_assert_cmpint(data.n, ==, 10); g_assert(!aio_poll(ctx, false)); @@ -252,7 +246,9 @@ static void test_bh_delete_from_cb(void) qemu_bh_schedule(data1.bh); g_assert_cmpint(data1.n, ==, 0); - wait_for_aio(); + while (data1.n < data1.max) { + aio_poll(ctx, true); + } g_assert_cmpint(data1.n, ==, data1.max); g_assert(data1.bh == NULL); @@ -287,7 +283,12 @@ static void test_bh_delete_from_cb_many(void) g_assert_cmpint(data4.n, ==, 1); g_assert(data1.bh == NULL); - wait_for_aio(); + while (data1.n < data1.max || + data2.n < data2.max || + data3.n < data3.max || + data4.n < data4.max) { + aio_poll(ctx, true); + } g_assert_cmpint(data1.n, ==, data1.max); g_assert_cmpint(data2.n, ==, data2.max); g_assert_cmpint(data3.n, ==, data3.max); @@ -306,7 +307,7 @@ static void test_bh_flush(void) qemu_bh_schedule(data.bh); g_assert_cmpint(data.n, ==, 0); - wait_for_aio(); + g_assert(aio_poll(ctx, true)); g_assert_cmpint(data.n, ==, 1); g_assert(!aio_poll(ctx, false)); -- cgit v1.1 From b8864245b140c801e826de4acf6d6516e2c550b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andreas=20F=C3=A4rber?= Date: Wed, 9 Jul 2014 22:28:49 +0200 Subject: tests: Fix unterminated string output visitor enum human string MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The buffer was being allocated of size string length plus two. Around the string two quotes were being added, but no terminating NUL. It was then compared using g_assert_cmpstr(), resulting in fairly random assertion failures: ERROR:tests/test-string-output-visitor.c:213:test_visitor_out_enum: assertion failed (str == str_human): ("\"value1\"" == "\"value1\"\001EEEEEEEEEEEEEE\0171") There is no g_assert_cmpnstr() counterpart, so use g_strdup_printf() for safely assembling the string in the first place. Cc: Hu Tao Cc: Michael S. Tsirkin Suggested-by: Eric Blake Fixes: b4900c0 tests: add human format test for string output visitor Signed-off-by: Andreas Färber Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- tests/test-string-output-visitor.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/tests/test-string-output-visitor.c b/tests/test-string-output-visitor.c index e89e43c..101fb27 100644 --- a/tests/test-string-output-visitor.c +++ b/tests/test-string-output-visitor.c @@ -196,16 +196,11 @@ static void test_visitor_out_enum(TestOutputVisitorData *data, for (i = 0; i < ENUM_ONE_MAX; i++) { char *str_human; - int len; visit_type_EnumOne(data->ov, &i, "unused", &err); g_assert(!err); - len = strlen(EnumOne_lookup[i]) + 2; - str_human = g_malloc0(len); - str_human[0] = '"'; - strncpy(str_human + 1, EnumOne_lookup[i], strlen(EnumOne_lookup[i])); - str_human[len - 1] = '"'; + str_human = g_strdup_printf("\"%s\"", EnumOne_lookup[i]); str = string_output_get_string(data->sov); g_assert(str != NULL); -- cgit v1.1 From 80504dcaa11097b99842312014f5954a54fe1826 Mon Sep 17 00:00:00 2001 From: Nikolay Nikolaev Date: Wed, 9 Jul 2014 18:06:32 +0300 Subject: qtest: fix vhost-user-test compilation with old GLib Mising G_TIME_SPAN_SECOND definition breaks the RHEL6 compilation as GLib version before 2.26 does not have it. In such case just define it. Reported-by: Kevin Wolf Signed-off-by: Nikolay Nikolaev Tested-by: Kevin Wolf Signed-off-by: Kevin Wolf --- tests/vhost-user-test.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/vhost-user-test.c b/tests/vhost-user-test.c index 2af2381..406ba70 100644 --- a/tests/vhost-user-test.c +++ b/tests/vhost-user-test.c @@ -22,6 +22,10 @@ #include /* GLIB version compatibility flags */ +#if !GLIB_CHECK_VERSION(2, 26, 0) +#define G_TIME_SPAN_SECOND (G_GINT64_CONSTANT(1000000)) +#endif + #if GLIB_CHECK_VERSION(2, 28, 0) #define HAVE_MONOTONIC_TIME #endif -- cgit v1.1 From 58f423fbd5f7f435e8dc56dfa565b043d20d1e1b Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 9 Jul 2014 19:17:30 +0200 Subject: dma-helpers: Fix too long qiov If the size of the scatter/gather list isn't a multiple of 512, the number of sectors for the block layer request is rounded down, resulting in a qiov that doesn't match the request length. Truncate the qiov to the new length of the request. This fixes the IDE qtest case /x86_64/ide/bmdma/short_prdt. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake --- dma-helpers.c | 4 ++++ include/qemu-common.h | 1 + util/iov.c | 13 +++++++++++++ 3 files changed, 18 insertions(+) diff --git a/dma-helpers.c b/dma-helpers.c index 53cbe92..499b52b 100644 --- a/dma-helpers.c +++ b/dma-helpers.c @@ -170,6 +170,10 @@ static void dma_bdrv_cb(void *opaque, int ret) return; } + if (dbs->iov.size & ~BDRV_SECTOR_MASK) { + qemu_iovec_discard_back(&dbs->iov, dbs->iov.size & ~BDRV_SECTOR_MASK); + } + dbs->acb = dbs->io_func(dbs->bs, dbs->sector_num, &dbs->iov, dbs->iov.size / 512, dma_bdrv_cb, dbs); assert(dbs->acb); diff --git a/include/qemu-common.h b/include/qemu-common.h index ae76197..6ef8282 100644 --- a/include/qemu-common.h +++ b/include/qemu-common.h @@ -329,6 +329,7 @@ size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset, int fillc, size_t bytes); ssize_t qemu_iovec_compare(QEMUIOVector *a, QEMUIOVector *b); void qemu_iovec_clone(QEMUIOVector *dest, const QEMUIOVector *src, void *buf); +void qemu_iovec_discard_back(QEMUIOVector *qiov, size_t bytes); bool buffer_is_zero(const void *buf, size_t len); diff --git a/util/iov.c b/util/iov.c index 2b4f46d..24566c8 100644 --- a/util/iov.c +++ b/util/iov.c @@ -550,3 +550,16 @@ size_t iov_discard_back(struct iovec *iov, unsigned int *iov_cnt, return total; } + +void qemu_iovec_discard_back(QEMUIOVector *qiov, size_t bytes) +{ + size_t total; + unsigned int niov = qiov->niov; + + assert(qiov->size >= bytes); + total = iov_discard_back(qiov->iov, &niov, bytes); + assert(total == bytes); + + qiov->niov = niov; + qiov->size -= bytes; +} -- cgit v1.1 From d0e14376eefc40b07c8fb42c132c2202c66dcb0b Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Wed, 9 Jul 2014 19:07:29 +0200 Subject: virtio-blk: Factor common checks out of virtio_blk_handle_read/write() Signed-off-by: Markus Armbruster Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- hw/block/virtio-blk.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 02cd6b0..075e6e6 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -288,6 +288,18 @@ static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb) bdrv_aio_flush(req->dev->bs, virtio_blk_flush_complete, req); } +static bool virtio_blk_sect_range_ok(VirtIOBlock *dev, + uint64_t sector, size_t size) +{ + if (sector & dev->sector_mask) { + return false; + } + if (size % dev->conf->logical_block_size) { + return false; + } + return true; +} + static void virtio_blk_handle_write(VirtIOBlockReq *req, MultiReqBuffer *mrb) { BlockRequest *blkreq; @@ -299,11 +311,7 @@ static void virtio_blk_handle_write(VirtIOBlockReq *req, MultiReqBuffer *mrb) trace_virtio_blk_handle_write(req, sector, req->qiov.size / 512); - if (sector & req->dev->sector_mask) { - virtio_blk_rw_complete(req, -EIO); - return; - } - if (req->qiov.size % req->dev->conf->logical_block_size) { + if (!virtio_blk_sect_range_ok(req->dev, sector, req->qiov.size)) { virtio_blk_rw_complete(req, -EIO); return; } @@ -333,11 +341,7 @@ static void virtio_blk_handle_read(VirtIOBlockReq *req) trace_virtio_blk_handle_read(req, sector, req->qiov.size / 512); - if (sector & req->dev->sector_mask) { - virtio_blk_rw_complete(req, -EIO); - return; - } - if (req->qiov.size % req->dev->conf->logical_block_size) { + if (!virtio_blk_sect_range_ok(req->dev, sector, req->qiov.size)) { virtio_blk_rw_complete(req, -EIO); return; } -- cgit v1.1 From 42e38c1fd0199155d32f3464aedce282d3d7f6a1 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Wed, 9 Jul 2014 19:07:30 +0200 Subject: virtio-blk: Bypass error action and I/O accounting on invalid r/w When a device model's I/O operation fails, we execute the error action. This lets layers above QEMU implement thin provisioning, or attempt to correct errors before they reach the guest. But when the I/O operation fails because it's invalid, reporting the error to the guest is the only sensible action. If the guest's read or write asks for an invalid sector range, fail the request right away, without considering the error action. No change with error action BDRV_ACTION_REPORT. Furthermore, bypass I/O accounting, because we want to track only I/O that actually reaches the block layer. The next commit will extend "invalid sector range" to cover attempts to read/write beyond the end of the medium. Signed-off-by: Markus Armbruster Signed-off-by: Kevin Wolf --- hw/block/virtio-blk.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index 075e6e6..e6e6276 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -307,15 +307,16 @@ static void virtio_blk_handle_write(VirtIOBlockReq *req, MultiReqBuffer *mrb) sector = virtio_ldq_p(VIRTIO_DEVICE(req->dev), &req->out.sector); - bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_WRITE); - trace_virtio_blk_handle_write(req, sector, req->qiov.size / 512); if (!virtio_blk_sect_range_ok(req->dev, sector, req->qiov.size)) { - virtio_blk_rw_complete(req, -EIO); + virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); + virtio_blk_free_request(req); return; } + bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_WRITE); + if (mrb->num_writes == 32) { virtio_submit_multiwrite(req->dev->bs, mrb); } @@ -337,14 +338,15 @@ static void virtio_blk_handle_read(VirtIOBlockReq *req) sector = virtio_ldq_p(VIRTIO_DEVICE(req->dev), &req->out.sector); - bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_READ); - trace_virtio_blk_handle_read(req, sector, req->qiov.size / 512); if (!virtio_blk_sect_range_ok(req->dev, sector, req->qiov.size)) { - virtio_blk_rw_complete(req, -EIO); + virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); + virtio_blk_free_request(req); return; } + + bdrv_acct_start(req->dev->bs, &req->acct, req->qiov.size, BDRV_ACCT_READ); bdrv_aio_readv(req->dev->bs, sector, &req->qiov, req->qiov.size / BDRV_SECTOR_SIZE, virtio_blk_rw_complete, req); -- cgit v1.1 From 3c2daac0b98952a858277878cb11294256b39e43 Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Wed, 9 Jul 2014 19:07:31 +0200 Subject: virtio-blk: Treat read/write beyond end as invalid The block layer fails such reads and writes just fine. However, they then get treated like valid operations that fail: the error action gets executed. Unwanted; reporting the error to the guest is the only sensible action. Reject them before passing them to the block layer. This bypasses the error action and I/O accounting. Signed-off-by: Markus Armbruster Reviewed-by: Fam Zheng Signed-off-by: Kevin Wolf --- hw/block/virtio-blk.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index e6e6276..c241c50 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -291,12 +291,19 @@ static void virtio_blk_handle_flush(VirtIOBlockReq *req, MultiReqBuffer *mrb) static bool virtio_blk_sect_range_ok(VirtIOBlock *dev, uint64_t sector, size_t size) { + uint64_t nb_sectors = size >> BDRV_SECTOR_BITS; + uint64_t total_sectors; + if (sector & dev->sector_mask) { return false; } if (size % dev->conf->logical_block_size) { return false; } + bdrv_get_geometry(dev->bs, &total_sectors); + if (sector > total_sectors || nb_sectors > total_sectors - sector) { + return false; + } return true; } -- cgit v1.1 From 58ac321135af890b503ebe56d0d00e184779918f Mon Sep 17 00:00:00 2001 From: Markus Armbruster Date: Wed, 9 Jul 2014 19:07:32 +0200 Subject: ide: Treat read/write beyond end as invalid The block layer fails such reads and writes just fine. However, they then get treated like valid operations that fail: the error action gets executed. Unwanted; reporting the error to the guest is the only sensible action. Reject them before passing them to the block layer. This bypasses the error action and I/O accounting. Not quite correct for DMA, because DMA can fail after some success, and when that happens, the part that succeeded isn't counted. Tolerable, because I/O accounting is an inconsistent mess anyway. Signed-off-by: Markus Armbruster Signed-off-by: Kevin Wolf --- hw/ide/core.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/hw/ide/core.c b/hw/ide/core.c index 3a38f1e..db191a6 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -499,6 +499,18 @@ static void ide_rw_error(IDEState *s) { ide_set_irq(s->bus); } +static bool ide_sect_range_ok(IDEState *s, + uint64_t sector, uint64_t nb_sectors) +{ + uint64_t total_sectors; + + bdrv_get_geometry(s->bs, &total_sectors); + if (sector > total_sectors || nb_sectors > total_sectors - sector) { + return false; + } + return true; +} + static void ide_sector_read_cb(void *opaque, int ret) { IDEState *s = opaque; @@ -554,6 +566,11 @@ void ide_sector_read(IDEState *s) printf("sector=%" PRId64 "\n", sector_num); #endif + if (!ide_sect_range_ok(s, sector_num, n)) { + ide_rw_error(s); + return; + } + s->iov.iov_base = s->io_buffer; s->iov.iov_len = n * BDRV_SECTOR_SIZE; qemu_iovec_init_external(&s->qiov, &s->iov, 1); @@ -671,6 +688,12 @@ void ide_dma_cb(void *opaque, int ret) sector_num, n, s->dma_cmd); #endif + if (!ide_sect_range_ok(s, sector_num, n)) { + dma_buf_commit(s); + ide_dma_error(s); + return; + } + switch (s->dma_cmd) { case IDE_DMA_READ: s->bus->dma->aiocb = dma_bdrv_read(s->bs, &s->sg, sector_num, @@ -790,6 +813,11 @@ void ide_sector_write(IDEState *s) n = s->req_nb_sectors; } + if (!ide_sect_range_ok(s, sector_num, n)) { + ide_rw_error(s); + return; + } + s->iov.iov_base = s->io_buffer; s->iov.iov_len = n * BDRV_SECTOR_SIZE; qemu_iovec_init_external(&s->qiov, &s->iov, 1); -- cgit v1.1