From f406c03c093f1451ac0ba7fde31eeb78e5e5e417 Mon Sep 17 00:00:00 2001 From: Alexander Yarygin Date: Wed, 10 Jun 2015 14:38:17 +0300 Subject: block: Let bdrv_drain_all() to call aio_poll() for each AioContext After the commit 9b536adc ("block: acquire AioContext in bdrv_drain_all()") the aio_poll() function got called for every BlockDriverState, in assumption that every device may have its own AioContext. If we have thousands of disks attached, there are a lot of BlockDriverStates but only a few AioContexts, leading to tons of unnecessary aio_poll() calls. This patch changes the bdrv_drain_all() function allowing it find shared AioContexts and to call aio_poll() only for unique ones. Cc: Christian Borntraeger Cc: Cornelia Huck Cc: Kevin Wolf Cc: Paolo Bonzini Cc: Stefan Hajnoczi Signed-off-by: Alexander Yarygin Reviewed-by: Stefan Hajnoczi Tested-by: Christian Borntraeger Message-id: 1433936297-7098-4-git-send-email-yarygin@linux.vnet.ibm.com Signed-off-by: Stefan Hajnoczi --- block/io.c | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/block/io.c b/block/io.c index 9cc729b..43f85ab 100644 --- a/block/io.c +++ b/block/io.c @@ -233,17 +233,6 @@ static bool bdrv_requests_pending(BlockDriverState *bs) return false; } -static bool bdrv_drain_one(BlockDriverState *bs) -{ - bool bs_busy; - - bdrv_flush_io_queue(bs); - bdrv_start_throttled_reqs(bs); - bs_busy = bdrv_requests_pending(bs); - bs_busy |= aio_poll(bdrv_get_aio_context(bs), bs_busy); - return bs_busy; -} - /* * Wait for pending requests to complete on a single BlockDriverState subtree * @@ -256,8 +245,13 @@ static bool bdrv_drain_one(BlockDriverState *bs) */ void bdrv_drain(BlockDriverState *bs) { - while (bdrv_drain_one(bs)) { + bool busy = true; + + while (busy) { /* Keep iterating */ + bdrv_flush_io_queue(bs); + busy = bdrv_requests_pending(bs); + busy |= aio_poll(bdrv_get_aio_context(bs), busy); } } @@ -278,6 +272,7 @@ void bdrv_drain_all(void) /* Always run first iteration so any pending completion BHs run */ bool busy = true; BlockDriverState *bs = NULL; + GSList *aio_ctxs = NULL, *ctx; while ((bs = bdrv_next(bs))) { AioContext *aio_context = bdrv_get_aio_context(bs); @@ -287,17 +282,30 @@ void bdrv_drain_all(void) block_job_pause(bs->job); } aio_context_release(aio_context); + + if (!aio_ctxs || !g_slist_find(aio_ctxs, aio_context)) { + aio_ctxs = g_slist_prepend(aio_ctxs, aio_context); + } } while (busy) { busy = false; - bs = NULL; - while ((bs = bdrv_next(bs))) { - AioContext *aio_context = bdrv_get_aio_context(bs); + for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) { + AioContext *aio_context = ctx->data; + bs = NULL; aio_context_acquire(aio_context); - busy |= bdrv_drain_one(bs); + while ((bs = bdrv_next(bs))) { + if (aio_context == bdrv_get_aio_context(bs)) { + bdrv_flush_io_queue(bs); + if (bdrv_requests_pending(bs)) { + busy = true; + aio_poll(aio_context, busy); + } + } + } + busy |= aio_poll(aio_context, false); aio_context_release(aio_context); } } @@ -312,6 +320,7 @@ void bdrv_drain_all(void) } aio_context_release(aio_context); } + g_slist_free(aio_ctxs); } /** @@ -2562,4 +2571,5 @@ void bdrv_flush_io_queue(BlockDriverState *bs) } else if (bs->file) { bdrv_flush_io_queue(bs->file); } + bdrv_start_throttled_reqs(bs); } -- cgit v1.1 From 2f388b93a147258f9dbc83ebe63365edac4aa7a2 Mon Sep 17 00:00:00 2001 From: Alberto Garcia Date: Mon, 15 Jun 2015 18:41:15 +0300 Subject: throttle: Check current timers before updating any_timer_armed[] Calling throttle_group_config() cancels all timers from a particular BlockDriverState, so any_timer_armed[] should be updated accordingly. However, with the current code it may happen that a timer is armed in a different BlockDriverState from the same group, so any_timer_armed[] would be set to false in a situation where there is still a timer armed. The consequence is that we might end up with two timers armed. This should not have any noticeable impact however, since all accesses to the ThrottleGroup are protected by a lock, and the situation would become normal again shortly thereafter as soon as all timers have been fired. The correct way to solve this is to check that we're actually cancelling a timer before updating any_timer_armed[]. Signed-off-by: Alberto Garcia Message-id: 1434382875-3998-1-git-send-email-berto@igalia.com Signed-off-by: Stefan Hajnoczi --- block/throttle-groups.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/block/throttle-groups.c b/block/throttle-groups.c index efc462f..1abc6fc 100644 --- a/block/throttle-groups.c +++ b/block/throttle-groups.c @@ -324,9 +324,14 @@ void throttle_group_config(BlockDriverState *bs, ThrottleConfig *cfg) ThrottleState *ts = bs->throttle_state; ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts); qemu_mutex_lock(&tg->lock); - throttle_config(ts, tt, cfg); /* throttle_config() cancels the timers */ - tg->any_timer_armed[0] = tg->any_timer_armed[1] = false; + if (timer_pending(tt->timers[0])) { + tg->any_timer_armed[0] = false; + } + if (timer_pending(tt->timers[1])) { + tg->any_timer_armed[1] = false; + } + throttle_config(ts, tt, cfg); qemu_mutex_unlock(&tg->lock); } -- cgit v1.1 From 97b0385a346829cf03efe131a26a4b6a4cd0a21f Mon Sep 17 00:00:00 2001 From: Alexander Yarygin Date: Wed, 17 Jun 2015 13:37:19 +0300 Subject: block-backend: Introduce blk_drain() This patch introduces the blk_drain() function which allows to replace blk_drain_all() when only one BlockDriverState needs to be drained. Cc: Christian Borntraeger Cc: Cornelia Huck Cc: Kevin Wolf Cc: Paolo Bonzini Cc: Stefan Hajnoczi Signed-off-by: Alexander Yarygin Reviewed-by: Paolo Bonzini Acked-by: Kevin Wolf Message-id: 1434537440-28236-2-git-send-email-yarygin@linux.vnet.ibm.com Signed-off-by: Stefan Hajnoczi --- block/block-backend.c | 5 +++++ include/sysemu/block-backend.h | 1 + 2 files changed, 6 insertions(+) diff --git a/block/block-backend.c b/block/block-backend.c index 93e46f3..aee8a12 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -700,6 +700,11 @@ int blk_flush_all(void) return bdrv_flush_all(); } +void blk_drain(BlockBackend *blk) +{ + bdrv_drain(blk->bs); +} + void blk_drain_all(void) { bdrv_drain_all(); diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index b4a4d5e..8fc960f 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -118,6 +118,7 @@ int blk_co_discard(BlockBackend *blk, int64_t sector_num, int nb_sectors); int blk_co_flush(BlockBackend *blk); int blk_flush(BlockBackend *blk); int blk_flush_all(void); +void blk_drain(BlockBackend *blk); void blk_drain_all(void); BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read); BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read, -- cgit v1.1 From 6e40b3bfc7e82823cf4df5f0bf668f56db41e53a Mon Sep 17 00:00:00 2001 From: Alexander Yarygin Date: Wed, 17 Jun 2015 13:37:20 +0300 Subject: virtio-blk: Use blk_drain() to drain IO requests Each call of the virtio_blk_reset() function calls blk_drain_all(), which works for all existing BlockDriverStates, while draining only one is needed. This patch replaces blk_drain_all() by blk_drain() in virtio_blk_reset(). virtio_blk_data_plane_stop() should be called after draining because it restores vblk->complete_request. Cc: "Michael S. Tsirkin" Cc: Christian Borntraeger Cc: Cornelia Huck Cc: Kevin Wolf Cc: Paolo Bonzini Cc: Stefan Hajnoczi Signed-off-by: Alexander Yarygin Message-id: 1434537440-28236-3-git-send-email-yarygin@linux.vnet.ibm.com Signed-off-by: Stefan Hajnoczi --- hw/block/virtio-blk.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index cd539aa..bbb1fc1 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -651,16 +651,21 @@ static void virtio_blk_dma_restart_cb(void *opaque, int running, static void virtio_blk_reset(VirtIODevice *vdev) { VirtIOBlock *s = VIRTIO_BLK(vdev); - - if (s->dataplane) { - virtio_blk_data_plane_stop(s->dataplane); - } + AioContext *ctx; /* * This should cancel pending requests, but can't do nicely until there * are per-device request lists. */ - blk_drain_all(); + ctx = blk_get_aio_context(s->blk); + aio_context_acquire(ctx); + blk_drain(s->blk); + + if (s->dataplane) { + virtio_blk_data_plane_stop(s->dataplane); + } + aio_context_release(ctx); + blk_set_enable_write_cache(s->blk, s->original_wce); } -- cgit v1.1 From c6a8c3283f1d53e360073bdb32f87a97e78e2880 Mon Sep 17 00:00:00 2001 From: Wen Congyang Date: Fri, 22 May 2015 09:29:46 +0800 Subject: util/hbitmap: Add an API to reset all set bits in hbitmap The function bdrv_clear_dirty_bitmap() is updated to use faster hbitmap_reset_all() call. Signed-off-by: Wen Congyang Signed-off-by: zhanghailiang Signed-off-by: Gonglei Acked-by: Paolo Bonzini Reviewed-by: Eric Blake Reviewed-by: John Snow Message-id: 555E868A.60506@cn.fujitsu.com Signed-off-by: Stefan Hajnoczi --- block.c | 2 +- include/qemu/hbitmap.h | 8 ++++++++ tests/test-hbitmap.c | 38 ++++++++++++++++++++++++++++++++++++++ util/hbitmap.c | 13 +++++++++++++ 4 files changed, 60 insertions(+), 1 deletion(-) diff --git a/block.c b/block.c index e2e33fd..7168575 100644 --- a/block.c +++ b/block.c @@ -3513,7 +3513,7 @@ void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap) { assert(bdrv_dirty_bitmap_enabled(bitmap)); - hbitmap_reset(bitmap->bitmap, 0, bitmap->size); + hbitmap_reset_all(bitmap->bitmap); } void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, diff --git a/include/qemu/hbitmap.h b/include/qemu/hbitmap.h index f0a85f8..bb94a00 100644 --- a/include/qemu/hbitmap.h +++ b/include/qemu/hbitmap.h @@ -132,6 +132,14 @@ void hbitmap_set(HBitmap *hb, uint64_t start, uint64_t count); void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count); /** + * hbitmap_reset_all: + * @hb: HBitmap to operate on. + * + * Reset all bits in an HBitmap. + */ +void hbitmap_reset_all(HBitmap *hb); + +/** * hbitmap_get: * @hb: HBitmap to operate on. * @item: Bit to query (0-based). diff --git a/tests/test-hbitmap.c b/tests/test-hbitmap.c index 9f41b5f..161eeb4 100644 --- a/tests/test-hbitmap.c +++ b/tests/test-hbitmap.c @@ -184,6 +184,23 @@ static void hbitmap_test_reset(TestHBitmapData *data, } } +static void hbitmap_test_reset_all(TestHBitmapData *data) +{ + size_t n; + + hbitmap_reset_all(data->hb); + + n = (data->size + BITS_PER_LONG - 1) / BITS_PER_LONG; + if (n == 0) { + n = 1; + } + memset(data->bits, 0, n * sizeof(unsigned long)); + + if (data->granularity == 0) { + hbitmap_test_check(data, 0); + } +} + static void hbitmap_test_check_get(TestHBitmapData *data) { uint64_t count = 0; @@ -364,6 +381,26 @@ static void test_hbitmap_reset(TestHBitmapData *data, hbitmap_test_set(data, L3 / 2, L3); } +static void test_hbitmap_reset_all(TestHBitmapData *data, + const void *unused) +{ + hbitmap_test_init(data, L3 * 2, 0); + hbitmap_test_set(data, L1 - 1, L1 + 2); + hbitmap_test_reset_all(data); + hbitmap_test_set(data, 0, L1 * 3); + hbitmap_test_reset_all(data); + hbitmap_test_set(data, L2, L1); + hbitmap_test_reset_all(data); + hbitmap_test_set(data, L2, L3 - L2 + 1); + hbitmap_test_reset_all(data); + hbitmap_test_set(data, L3 - 1, 3); + hbitmap_test_reset_all(data); + hbitmap_test_set(data, 0, L3 * 2); + hbitmap_test_reset_all(data); + hbitmap_test_set(data, L3 / 2, L3); + hbitmap_test_reset_all(data); +} + static void test_hbitmap_granularity(TestHBitmapData *data, const void *unused) { @@ -627,6 +664,7 @@ int main(int argc, char **argv) hbitmap_test_add("/hbitmap/set/overlap", test_hbitmap_set_overlap); hbitmap_test_add("/hbitmap/reset/empty", test_hbitmap_reset_empty); hbitmap_test_add("/hbitmap/reset/general", test_hbitmap_reset); + hbitmap_test_add("/hbitmap/reset/all", test_hbitmap_reset_all); hbitmap_test_add("/hbitmap/granularity", test_hbitmap_granularity); hbitmap_test_add("/hbitmap/truncate/nop", test_hbitmap_truncate_nop); diff --git a/util/hbitmap.c b/util/hbitmap.c index a10c7ae..50b888f 100644 --- a/util/hbitmap.c +++ b/util/hbitmap.c @@ -356,6 +356,19 @@ void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count) hb_reset_between(hb, HBITMAP_LEVELS - 1, start, last); } +void hbitmap_reset_all(HBitmap *hb) +{ + unsigned int i; + + /* Same as hbitmap_alloc() except for memset() instead of malloc() */ + for (i = HBITMAP_LEVELS; --i >= 1; ) { + memset(hb->levels[i], 0, hb->sizes[i] * sizeof(unsigned long)); + } + + hb->levels[0][0] = 1UL << (BITS_PER_LONG - 1); + hb->count = 0; +} + bool hbitmap_get(const HBitmap *hb, uint64_t item) { /* Compute position and bit in the last layer. */ -- cgit v1.1 From d5941ddae82a35771656d7e35f64f7f8f19c5627 Mon Sep 17 00:00:00 2001 From: Wolfgang Bumiller Date: Fri, 19 Jun 2015 11:35:29 +0200 Subject: vvfat: add a label option Until now the vvfat volume label was hardcoded to be "QEMU VVFAT", now you can pass a file.label=labelname option to the -drive to change it. The FAT structure defines the volume label to be limited to 11 bytes and is filled up spaces when shorter than that. The trailing spaces however aren't exposed to the user by operating systems. [Added missing comment '#' characters in block-core.json to fix build errors. --Stefan] Signed-off-by: Wolfgang Bumiller Message-id: 1434706529-13895-2-git-send-email-w.bumiller@proxmox.com Reviewed-by: Eric Blake Signed-off-by: Stefan Hajnoczi --- block/vvfat.c | 25 ++++++++++++++++++++++--- qapi/block-core.json | 6 +++++- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/block/vvfat.c b/block/vvfat.c index c35550c..2068697 100644 --- a/block/vvfat.c +++ b/block/vvfat.c @@ -323,6 +323,7 @@ typedef struct BDRVVVFATState { int fat_type; /* 16 or 32 */ array_t fat,directory,mapping; + char volume_label[11]; unsigned int cluster_size; unsigned int sectors_per_cluster; @@ -860,7 +861,7 @@ static int init_directories(BDRVVVFATState* s, { direntry_t* entry=array_get_next(&(s->directory)); entry->attributes=0x28; /* archive | volume label */ - memcpy(entry->name, "QEMU VVFAT ", sizeof(entry->name)); + memcpy(entry->name, s->volume_label, sizeof(entry->name)); } /* Now build FAT, and write back information into directory */ @@ -969,7 +970,8 @@ static int init_directories(BDRVVVFATState* s, bootsector->u.fat16.signature=0x29; bootsector->u.fat16.id=cpu_to_le32(0xfabe1afd); - memcpy(bootsector->u.fat16.volume_label,"QEMU VVFAT ",11); + memcpy(bootsector->u.fat16.volume_label, s->volume_label, + sizeof(bootsector->u.fat16.volume_label)); memcpy(bootsector->fat_type,(s->fat_type==12?"FAT12 ":s->fat_type==16?"FAT16 ":"FAT32 "),8); bootsector->magic[0]=0x55; bootsector->magic[1]=0xaa; @@ -1009,6 +1011,11 @@ static QemuOptsList runtime_opts = { .help = "Create a floppy rather than a hard disk image", }, { + .name = "label", + .type = QEMU_OPT_STRING, + .help = "Use a volume label other than QEMU VVFAT", + }, + { .name = "rw", .type = QEMU_OPT_BOOL, .help = "Make the image writable", @@ -1070,7 +1077,7 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags, BDRVVVFATState *s = bs->opaque; int cyls, heads, secs; bool floppy; - const char *dirname; + const char *dirname, *label; QemuOpts *opts; Error *local_err = NULL; int ret; @@ -1097,6 +1104,18 @@ static int vvfat_open(BlockDriverState *bs, QDict *options, int flags, s->fat_type = qemu_opt_get_number(opts, "fat-type", 0); floppy = qemu_opt_get_bool(opts, "floppy", false); + memset(s->volume_label, ' ', sizeof(s->volume_label)); + label = qemu_opt_get(opts, "label"); + if (label) { + size_t label_length = strlen(label); + if (label_length > 11) { + error_setg(errp, "vvfat label cannot be longer than 11 bytes"); + ret = -EINVAL; + goto fail; + } + memcpy(s->volume_label, label, label_length); + } + if (floppy) { /* 1.44MB or 2.88MB floppy. 2.88MB can be FAT12 (default) or FAT16. */ if (!s->fat_type) { diff --git a/qapi/block-core.json b/qapi/block-core.json index afa9d3d..766d690 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1453,13 +1453,17 @@ # @fat-type: #optional FAT type: 12, 16 or 32 # @floppy: #optional whether to export a floppy image (true) or # partitioned hard disk (false; default) +# @label: #optional set the volume label, limited to 11 bytes. FAT16 and +# FAT32 traditionally have some restrictions on labels, which are +# ignored by most operating systems. Defaults to "QEMU VVFAT". +# (since 2.4) # @rw: #optional whether to allow write operations (default: false) # # Since: 1.7 ## { 'struct': 'BlockdevOptionsVVFAT', 'data': { 'dir': 'str', '*fat-type': 'int', '*floppy': 'bool', - '*rw': 'bool' } } + '*label': 'str', '*rw': 'bool' } } ## # @BlockdevOptionsGenericFormat -- cgit v1.1 From 25940fa7e57ffce9d495b4c2aadc39790535856d Mon Sep 17 00:00:00 2001 From: Lu Lina Date: Fri, 19 Jun 2015 14:27:34 +0800 Subject: nvme: Fix memleak in nvme_dma_read_prp Signed-off-by: Lu Lina Acked-by: Keith Busch Message-id: 1434695254-69808-1-git-send-email-kathy.wangting@huawei.com Signed-off-by: Stefan Hajnoczi --- hw/block/nvme.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 4b6d5e6..c6a6a0e 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -154,6 +154,7 @@ static uint16_t nvme_dma_read_prp(NvmeCtrl *n, uint8_t *ptr, uint32_t len, qemu_sglist_destroy(&qsg); return NVME_INVALID_FIELD | NVME_DNR; } + qemu_sglist_destroy(&qsg); return NVME_SUCCESS; } -- cgit v1.1 From b192af8acc597a6e8068873434e56e0c7de1b7d3 Mon Sep 17 00:00:00 2001 From: Dimitris Aragiorgis Date: Tue, 23 Jun 2015 13:44:56 +0300 Subject: block: Use bdrv_is_sg() everywhere Instead of checking bs->sg use bdrv_is_sg() consistently throughout the code. Signed-off-by: Dimitris Aragiorgis Reviewed-by: Paolo Bonzini Reviewed-by: Stefan Hajnoczi Message-id: 1435056300-14924-2-git-send-email-dimara@arrikto.com Signed-off-by: Stefan Hajnoczi --- block.c | 6 +++--- block/iscsi.c | 2 +- block/raw-posix.c | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/block.c b/block.c index 7168575..81233be 100644 --- a/block.c +++ b/block.c @@ -585,7 +585,7 @@ static int find_image_format(BlockDriverState *bs, const char *filename, int ret = 0; /* Return the raw BlockDriver * to scsi-generic devices or empty drives */ - if (bs->sg || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) { + if (bdrv_is_sg(bs) || !bdrv_is_inserted(bs) || bdrv_getlength(bs) == 0) { *pdrv = &bdrv_raw; return ret; } @@ -617,7 +617,7 @@ static int refresh_total_sectors(BlockDriverState *bs, int64_t hint) BlockDriver *drv = bs->drv; /* Do not attempt drv->bdrv_getlength() on scsi-generic devices */ - if (bs->sg) + if (bdrv_is_sg(bs)) return 0; /* query actual device if possible, otherwise just trust the hint */ @@ -948,7 +948,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, assert(bdrv_opt_mem_align(bs) != 0); assert(bdrv_min_mem_align(bs) != 0); - assert((bs->request_alignment != 0) || bs->sg); + assert((bs->request_alignment != 0) || bdrv_is_sg(bs)); qemu_opts_del(opts); return 0; diff --git a/block/iscsi.c b/block/iscsi.c index 5f7b60c..aff8198 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -628,7 +628,7 @@ static int coroutine_fn iscsi_co_flush(BlockDriverState *bs) IscsiLun *iscsilun = bs->opaque; struct IscsiTask iTask; - if (bs->sg) { + if (bdrv_is_sg(bs)) { return 0; } diff --git a/block/raw-posix.c b/block/raw-posix.c index a967464..b2097fc 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -305,9 +305,9 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp) char *buf; size_t max_align = MAX(MAX_BLOCKSIZE, getpagesize()); - /* For /dev/sg devices the alignment is not really used. + /* For SCSI generic devices the alignment is not really used. With buffered I/O, we don't have any restrictions. */ - if (bs->sg || !s->needs_alignment) { + if (bdrv_is_sg(bs) || !s->needs_alignment) { bs->request_alignment = 1; s->buf_align = 1; return; -- cgit v1.1 From 1b6bc94d5d43ff3e39abadae19f2dbcb0954eb93 Mon Sep 17 00:00:00 2001 From: Dimitris Aragiorgis Date: Tue, 23 Jun 2015 13:44:57 +0300 Subject: Fix migration in case of scsi-generic During migration, QEMU uses fsync()/fdatasync() on the open file descriptor for read-write block devices to flush data just before stopping the VM. However, fsync() on a scsi-generic device returns -EINVAL which causes the migration to fail. This patch skips flushing data in case of an SG device, since submitting SCSI commands directly via an SG character device (e.g. /dev/sg0) bypasses the page cache completely, anyway. Note that fsync() not only flushes the page cache but also the disk cache. The scsi-generic device never sends flushes, and for migration it assumes that the same SCSI device is used by the destination host, so it does not issue any SCSI SYNCHRONIZE CACHE (10) command. Finally, remove the bdrv_is_sg() test from iscsi_co_flush() since this is now redundant (we flush the underlying protocol at the end of bdrv_co_flush() which, with this patch, we never reach). Signed-off-by: Dimitris Aragiorgis Reviewed-by: Stefan Hajnoczi Message-id: 1435056300-14924-3-git-send-email-dimara@arrikto.com Signed-off-by: Stefan Hajnoczi --- block/io.c | 3 ++- block/iscsi.c | 4 ---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/block/io.c b/block/io.c index 43f85ab..e295992 100644 --- a/block/io.c +++ b/block/io.c @@ -2265,7 +2265,8 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs) { int ret; - if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) { + if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs) || + bdrv_is_sg(bs)) { return 0; } diff --git a/block/iscsi.c b/block/iscsi.c index aff8198..49cee4d 100644 --- a/block/iscsi.c +++ b/block/iscsi.c @@ -628,10 +628,6 @@ static int coroutine_fn iscsi_co_flush(BlockDriverState *bs) IscsiLun *iscsilun = bs->opaque; struct IscsiTask iTask; - if (bdrv_is_sg(bs)) { - return 0; - } - if (!iscsilun->force_next_flush) { return 0; } -- cgit v1.1 From bcb225550dcc0d6fcef8e97012bae572ba78f73a Mon Sep 17 00:00:00 2001 From: Dimitris Aragiorgis Date: Tue, 23 Jun 2015 13:44:58 +0300 Subject: raw-posix: DPRINTF instead of DEBUG_BLOCK_PRINT Building the QEMU tools fails if we #define DEBUG_BLOCK inside block/raw-posix.c. Here instead of adding qemu-log.o in block-obj-y so that DEBUG_BLOCK_PRINT can be used, we substitute the latter with a simple DPRINTF() (that does not cause bit-rot). Signed-off-by: Dimitris Aragiorgis Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-id: 1435056300-14924-4-git-send-email-dimara@arrikto.com Signed-off-by: Stefan Hajnoczi --- block/raw-posix.c | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/block/raw-posix.c b/block/raw-posix.c index b2097fc..23c4577 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -99,12 +99,18 @@ //#define DEBUG_FLOPPY //#define DEBUG_BLOCK -#if defined(DEBUG_BLOCK) -#define DEBUG_BLOCK_PRINT(formatCstr, ...) do { if (qemu_log_enabled()) \ - { qemu_log(formatCstr, ## __VA_ARGS__); qemu_log_flush(); } } while (0) + +#ifdef DEBUG_BLOCK +# define DEBUG_BLOCK_PRINT 1 #else -#define DEBUG_BLOCK_PRINT(formatCstr, ...) +# define DEBUG_BLOCK_PRINT 0 #endif +#define DPRINTF(fmt, ...) \ +do { \ + if (DEBUG_BLOCK_PRINT) { \ + printf(fmt, ## __VA_ARGS__); \ + } \ +} while (0) /* OS X does not have O_DSYNC */ #ifndef O_DSYNC @@ -1020,6 +1026,7 @@ static ssize_t handle_aiocb_rw(RawPosixAIOData *aiocb) static int xfs_write_zeroes(BDRVRawState *s, int64_t offset, uint64_t bytes) { struct xfs_flock64 fl; + int err; memset(&fl, 0, sizeof(fl)); fl.l_whence = SEEK_SET; @@ -1027,8 +1034,9 @@ static int xfs_write_zeroes(BDRVRawState *s, int64_t offset, uint64_t bytes) fl.l_len = bytes; if (xfsctl(NULL, s->fd, XFS_IOC_ZERO_RANGE, &fl) < 0) { - DEBUG_BLOCK_PRINT("cannot write zero range (%s)\n", strerror(errno)); - return -errno; + err = errno; + DPRINTF("cannot write zero range (%s)\n", strerror(errno)); + return -err; } return 0; @@ -1037,6 +1045,7 @@ static int xfs_write_zeroes(BDRVRawState *s, int64_t offset, uint64_t bytes) static int xfs_discard(BDRVRawState *s, int64_t offset, uint64_t bytes) { struct xfs_flock64 fl; + int err; memset(&fl, 0, sizeof(fl)); fl.l_whence = SEEK_SET; @@ -1044,8 +1053,9 @@ static int xfs_discard(BDRVRawState *s, int64_t offset, uint64_t bytes) fl.l_len = bytes; if (xfsctl(NULL, s->fd, XFS_IOC_UNRESVSP64, &fl) < 0) { - DEBUG_BLOCK_PRINT("cannot punch hole (%s)\n", strerror(errno)); - return -errno; + err = errno; + DPRINTF("cannot punch hole (%s)\n", strerror(errno)); + return -err; } return 0; -- cgit v1.1 From a93a3982a6645463fa822131d38b17284edd5633 Mon Sep 17 00:00:00 2001 From: Dimitris Aragiorgis Date: Tue, 23 Jun 2015 13:44:59 +0300 Subject: raw-posix: Use DPRINTF for DEBUG_FLOPPY Get rid of several #ifdef DEBUG_FLOPPY and substitute them with DPRINTF. Signed-off-by: Dimitris Aragiorgis Reviewed-by: Eric Blake Reviewed-by: Stefan Hajnoczi Message-id: 1435056300-14924-5-git-send-email-dimara@arrikto.com Signed-off-by: Stefan Hajnoczi --- block/raw-posix.c | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/block/raw-posix.c b/block/raw-posix.c index 23c4577..57a243f 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -96,8 +96,6 @@ #include #endif -//#define DEBUG_FLOPPY - //#define DEBUG_BLOCK #ifdef DEBUG_BLOCK @@ -2172,16 +2170,12 @@ static int fd_open(BlockDriverState *bs) (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->fd_open_time) >= FD_OPEN_TIMEOUT) { qemu_close(s->fd); s->fd = -1; -#ifdef DEBUG_FLOPPY - printf("Floppy closed\n"); -#endif + DPRINTF("Floppy closed\n"); } if (s->fd < 0) { if (s->fd_got_error && (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - s->fd_error_time) < FD_OPEN_TIMEOUT) { -#ifdef DEBUG_FLOPPY - printf("No floppy (open delayed)\n"); -#endif + DPRINTF("No floppy (open delayed)\n"); return -EIO; } s->fd = qemu_open(bs->filename, s->open_flags & ~O_NONBLOCK); @@ -2190,14 +2184,10 @@ static int fd_open(BlockDriverState *bs) s->fd_got_error = 1; if (last_media_present) s->fd_media_changed = 1; -#ifdef DEBUG_FLOPPY - printf("No floppy\n"); -#endif + DPRINTF("No floppy\n"); return -EIO; } -#ifdef DEBUG_FLOPPY - printf("Floppy opened\n"); -#endif + DPRINTF("Floppy opened\n"); } if (!last_media_present) s->fd_media_changed = 1; @@ -2465,9 +2455,7 @@ static int floppy_media_changed(BlockDriverState *bs) fd_open(bs); ret = s->fd_media_changed; s->fd_media_changed = 0; -#ifdef DEBUG_FLOPPY - printf("Floppy changed=%d\n", ret); -#endif + DPRINTF("Floppy changed=%d\n", ret); return ret; } -- cgit v1.1 From 3307ed7b3fac5ba99eb3b84904b0b7cdc3592a61 Mon Sep 17 00:00:00 2001 From: Dimitris Aragiorgis Date: Tue, 23 Jun 2015 13:45:00 +0300 Subject: raw-posix: Introduce hdev_is_sg() Until now, an SG device was identified only by checking if its path started with "/dev/sg". Then, hdev_open() would set the bs->sg flag accordingly. The patch relies on the actual properties of the device instead of the specified file path. To this end, test for an SG device (e.g. /dev/sg0) by ensuring that all of the following holds: - The specified file name corresponds to a character device - The device supports the SG_GET_VERSION_NUM ioctl - The device supports the SG_GET_SCSI_ID ioctl Signed-off-by: Dimitris Aragiorgis Message-id: 1435056300-14924-6-git-send-email-dimara@arrikto.com Signed-off-by: Stefan Hajnoczi --- block/raw-posix.c | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/block/raw-posix.c b/block/raw-posix.c index 57a243f..cbe6574 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -59,6 +59,7 @@ #include #include #include +#include #ifdef __s390__ #include #endif @@ -2086,15 +2087,38 @@ static void hdev_parse_filename(const char *filename, QDict *options, qdict_put_obj(options, "filename", QOBJECT(qstring_from_str(filename))); } +static bool hdev_is_sg(BlockDriverState *bs) +{ + +#if defined(__linux__) + + struct stat st; + struct sg_scsi_id scsiid; + int sg_version; + + if (stat(bs->filename, &st) >= 0 && S_ISCHR(st.st_mode) && + !bdrv_ioctl(bs, SG_GET_VERSION_NUM, &sg_version) && + !bdrv_ioctl(bs, SG_GET_SCSI_ID, &scsiid)) { + DPRINTF("SG device found: type=%d, version=%d\n", + scsiid.scsi_type, sg_version); + return true; + } + +#endif + + return false; +} + static int hdev_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { BDRVRawState *s = bs->opaque; Error *local_err = NULL; int ret; - const char *filename = qdict_get_str(options, "filename"); #if defined(__APPLE__) && defined(__MACH__) + const char *filename = qdict_get_str(options, "filename"); + if (strstart(filename, "/dev/cdrom", NULL)) { kern_return_t kernResult; io_iterator_t mediaIterator; @@ -2123,16 +2147,6 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags, #endif s->type = FTYPE_FILE; -#if defined(__linux__) - { - char resolved_path[ MAXPATHLEN ], *temp; - - temp = realpath(filename, resolved_path); - if (temp && strstart(temp, "/dev/sg", NULL)) { - bs->sg = 1; - } - } -#endif ret = raw_open_common(bs, options, flags, 0, &local_err); if (ret < 0) { @@ -2142,6 +2156,9 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags, return ret; } + /* Since this does ioctl the device must be already opened */ + bs->sg = hdev_is_sg(bs); + if (flags & BDRV_O_RDWR) { ret = check_hdev_writable(s); if (ret < 0) { -- cgit v1.1 From 6b64640dd25846c4de42aa433db56e0ff975993a Mon Sep 17 00:00:00 2001 From: Wen Congyang Date: Thu, 21 May 2015 09:50:10 +0800 Subject: iov: don't touch iov in iov_send_recv() Signed-off-by: Wen Congyang Message-id: 555D39D2.4000705@cn.fujitsu.com Signed-off-by: Stefan Hajnoczi --- include/qemu/iov.h | 2 +- util/iov.c | 14 +++++++++++++- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/include/qemu/iov.h b/include/qemu/iov.h index 68d25f2..569b2c2 100644 --- a/include/qemu/iov.h +++ b/include/qemu/iov.h @@ -75,7 +75,7 @@ size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt, * For iov_send_recv() _whole_ area being sent or received * should be within the iovec, not only beginning of it. */ -ssize_t iov_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, +ssize_t iov_send_recv(int sockfd, const struct iovec *iov, unsigned iov_cnt, size_t offset, size_t bytes, bool do_send); #define iov_recv(sockfd, iov, iov_cnt, offset, bytes) \ iov_send_recv(sockfd, iov, iov_cnt, offset, bytes, false) diff --git a/util/iov.c b/util/iov.c index 2fb18e6..a0d5934 100644 --- a/util/iov.c +++ b/util/iov.c @@ -133,7 +133,7 @@ do_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, bool do_send) #endif } -ssize_t iov_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, +ssize_t iov_send_recv(int sockfd, const struct iovec *_iov, unsigned iov_cnt, size_t offset, size_t bytes, bool do_send) { @@ -141,6 +141,16 @@ ssize_t iov_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, ssize_t ret; size_t orig_len, tail; unsigned niov; + struct iovec *local_iov, *iov; + + if (bytes <= 0) { + return 0; + } + + local_iov = g_new0(struct iovec, iov_cnt); + iov_copy(local_iov, iov_cnt, _iov, iov_cnt, offset, bytes); + offset = 0; + iov = local_iov; while (bytes > 0) { /* Find the start position, skipping `offset' bytes: @@ -187,6 +197,7 @@ ssize_t iov_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, if (ret < 0) { assert(errno != EINTR); + g_free(local_iov); if (errno == EAGAIN && total > 0) { return total; } @@ -205,6 +216,7 @@ ssize_t iov_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, bytes -= ret; } + g_free(local_iov); return total; } -- cgit v1.1 From a30c4eb2ce7b2c15ab556be3cfe2340c17271ddd Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 23 Jun 2015 15:56:09 +0100 Subject: qemu-iotests: fix 051.out after qdev error message change Commit f006cf7fa9a63ba8e4ccf57d46231ce594301727 ("qdev-monitor: Propagate errors through qdev_device_add()") dropped a meaningless error message. This change in output caused qemu-iotests 051 to fail: QEMU_PROG: -device ide-drive,drive=disk: Device initialization failed. -QEMU_PROG: -device ide-drive,drive=disk: Device 'ide-drive' could not be initialized Update 051.out so the test passes again. Cc: Markus Armbruster Signed-off-by: Stefan Hajnoczi Reviewed-by: Fam Zheng Message-id: 1435071369-30936-1-git-send-email-stefanha@redhat.com --- tests/qemu-iotests/051.out | 8 -------- 1 file changed, 8 deletions(-) diff --git a/tests/qemu-iotests/051.out b/tests/qemu-iotests/051.out index 652dd63..23c2823 100644 --- a/tests/qemu-iotests/051.out +++ b/tests/qemu-iotests/051.out @@ -52,7 +52,6 @@ QEMU_PROG: -drive file=TEST_DIR/t.qcow2,driver=qcow2,format=qcow2: Cannot specif Testing: -device virtio-scsi-pci -device scsi-hd QEMU X.Y.Z monitor - type 'help' for more information (qemu) QEMU_PROG: -device scsi-hd: drive property not set -QEMU_PROG: -device scsi-hd: Device 'scsi-hd' could not be initialized === Overriding backing file === @@ -128,7 +127,6 @@ QEMU_PROG: Initialization of device ide-hd failed: Device initialization failed. Testing: -drive if=virtio QEMU X.Y.Z monitor - type 'help' for more information (qemu) QEMU_PROG: -drive if=virtio: Device needs media, but drive is empty -QEMU_PROG: -drive if=virtio: Device 'virtio-blk-pci' could not be initialized Testing: -drive if=scsi QEMU X.Y.Z monitor - type 'help' for more information @@ -146,23 +144,19 @@ Testing: -drive if=none,id=disk -device ide-drive,drive=disk QEMU X.Y.Z monitor - type 'help' for more information (qemu) QEMU_PROG: -device ide-drive,drive=disk: Device needs media, but drive is empty QEMU_PROG: -device ide-drive,drive=disk: Device initialization failed. -QEMU_PROG: -device ide-drive,drive=disk: Device 'ide-drive' could not be initialized Testing: -drive if=none,id=disk -device ide-hd,drive=disk QEMU X.Y.Z monitor - type 'help' for more information (qemu) QEMU_PROG: -device ide-hd,drive=disk: Device needs media, but drive is empty QEMU_PROG: -device ide-hd,drive=disk: Device initialization failed. -QEMU_PROG: -device ide-hd,drive=disk: Device 'ide-hd' could not be initialized Testing: -drive if=none,id=disk -device lsi53c895a -device scsi-disk,drive=disk QEMU X.Y.Z monitor - type 'help' for more information (qemu) QEMU_PROG: -device scsi-disk,drive=disk: Device needs media, but drive is empty -QEMU_PROG: -device scsi-disk,drive=disk: Device 'scsi-disk' could not be initialized Testing: -drive if=none,id=disk -device lsi53c895a -device scsi-hd,drive=disk QEMU X.Y.Z monitor - type 'help' for more information (qemu) QEMU_PROG: -device scsi-hd,drive=disk: Device needs media, but drive is empty -QEMU_PROG: -device scsi-hd,drive=disk: Device 'scsi-hd' could not be initialized === Read-only === @@ -204,13 +198,11 @@ Testing: -drive file=TEST_DIR/t.qcow2,if=none,id=disk,readonly=on -device ide-dr QEMU X.Y.Z monitor - type 'help' for more information (qemu) QEMU_PROG: -device ide-drive,drive=disk: Can't use a read-only drive QEMU_PROG: -device ide-drive,drive=disk: Device initialization failed. -QEMU_PROG: -device ide-drive,drive=disk: Device 'ide-drive' could not be initialized Testing: -drive file=TEST_DIR/t.qcow2,if=none,id=disk,readonly=on -device ide-hd,drive=disk QEMU X.Y.Z monitor - type 'help' for more information (qemu) QEMU_PROG: -device ide-hd,drive=disk: Can't use a read-only drive QEMU_PROG: -device ide-hd,drive=disk: Device initialization failed. -QEMU_PROG: -device ide-hd,drive=disk: Device 'ide-hd' could not be initialized Testing: -drive file=TEST_DIR/t.qcow2,if=none,id=disk,readonly=on -device lsi53c895a -device scsi-disk,drive=disk QEMU X.Y.Z monitor - type 'help' for more information -- cgit v1.1 From 12048545019cd1d64c8147ea9277648e685fa489 Mon Sep 17 00:00:00 2001 From: Gonglei Date: Wed, 24 Jun 2015 17:29:24 +0800 Subject: virito-blk: drop duplicate check in_num = req->elem.in_num, and req->elem.in_num is checked in line 489, so the check about in_num variable is superflous, let's drop it. Signed-off-by: Gonglei Reviewed-by: Fam Zheng Message-id: 1435138164-11728-1-git-send-email-arei.gonglei@huawei.com Signed-off-by: Stefan Hajnoczi --- hw/block/virtio-blk.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hw/block/virtio-blk.c b/hw/block/virtio-blk.c index bbb1fc1..6aefda4 100644 --- a/hw/block/virtio-blk.c +++ b/hw/block/virtio-blk.c @@ -499,8 +499,7 @@ void virtio_blk_handle_request(VirtIOBlockReq *req, MultiReqBuffer *mrb) iov_discard_front(&iov, &out_num, sizeof(req->out)); - if (in_num < 1 || - in_iov[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) { + if (in_iov[in_num - 1].iov_len < sizeof(struct virtio_blk_inhdr)) { error_report("virtio-blk request inhdr too short"); exit(1); } -- cgit v1.1