From 9e80202352dd49bdd9e67b8b906d86f058431505 Mon Sep 17 00:00:00 2001 From: Timothy Pearson Date: Sat, 11 May 2019 15:12:49 -0500 Subject: Initial import of abandoned HQEMU version 2.5.2 --- src/include/block/accounting.h | 85 +++++ src/include/block/aio.h | 445 +++++++++++++++++++++++ src/include/block/block.h | 641 +++++++++++++++++++++++++++++++++ src/include/block/block_int.h | 701 ++++++++++++++++++++++++++++++++++++ src/include/block/blockjob.h | 446 +++++++++++++++++++++++ src/include/block/nbd.h | 106 ++++++ src/include/block/qapi.h | 46 +++ src/include/block/scsi.h | 309 ++++++++++++++++ src/include/block/snapshot.h | 96 +++++ src/include/block/thread-pool.h | 37 ++ src/include/block/throttle-groups.h | 46 +++ src/include/block/write-threshold.h | 64 ++++ 12 files changed, 3022 insertions(+) create mode 100644 src/include/block/accounting.h create mode 100644 src/include/block/aio.h create mode 100644 src/include/block/block.h create mode 100644 src/include/block/block_int.h create mode 100644 src/include/block/blockjob.h create mode 100644 src/include/block/nbd.h create mode 100644 src/include/block/qapi.h create mode 100644 src/include/block/scsi.h create mode 100644 src/include/block/snapshot.h create mode 100644 src/include/block/thread-pool.h create mode 100644 src/include/block/throttle-groups.h create mode 100644 src/include/block/write-threshold.h (limited to 'src/include/block') diff --git a/src/include/block/accounting.h b/src/include/block/accounting.h new file mode 100644 index 0000000..0f46cb4 --- /dev/null +++ b/src/include/block/accounting.h @@ -0,0 +1,85 @@ +/* + * QEMU System Emulator block accounting + * + * Copyright (c) 2011 Christoph Hellwig + * Copyright (c) 2015 Igalia, S.L. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef BLOCK_ACCOUNTING_H +#define BLOCK_ACCOUNTING_H + +#include +#include + +#include "qemu/typedefs.h" +#include "qemu/timed-average.h" + +typedef struct BlockAcctTimedStats BlockAcctTimedStats; + +enum BlockAcctType { + BLOCK_ACCT_READ, + BLOCK_ACCT_WRITE, + BLOCK_ACCT_FLUSH, + BLOCK_MAX_IOTYPE, +}; + +struct BlockAcctTimedStats { + TimedAverage latency[BLOCK_MAX_IOTYPE]; + unsigned interval_length; /* in seconds */ + QSLIST_ENTRY(BlockAcctTimedStats) entries; +}; + +typedef struct BlockAcctStats { + uint64_t nr_bytes[BLOCK_MAX_IOTYPE]; + uint64_t nr_ops[BLOCK_MAX_IOTYPE]; + uint64_t invalid_ops[BLOCK_MAX_IOTYPE]; + uint64_t failed_ops[BLOCK_MAX_IOTYPE]; + uint64_t total_time_ns[BLOCK_MAX_IOTYPE]; + uint64_t merged[BLOCK_MAX_IOTYPE]; + int64_t last_access_time_ns; + QSLIST_HEAD(, BlockAcctTimedStats) intervals; + bool account_invalid; + bool account_failed; +} BlockAcctStats; + +typedef struct BlockAcctCookie { + int64_t bytes; + int64_t start_time_ns; + enum BlockAcctType type; +} BlockAcctCookie; + +void block_acct_init(BlockAcctStats *stats, bool account_invalid, + bool account_failed); +void block_acct_cleanup(BlockAcctStats *stats); +void block_acct_add_interval(BlockAcctStats *stats, unsigned interval_length); +BlockAcctTimedStats *block_acct_interval_next(BlockAcctStats *stats, + BlockAcctTimedStats *s); +void block_acct_start(BlockAcctStats *stats, BlockAcctCookie *cookie, + int64_t bytes, enum BlockAcctType type); +void block_acct_done(BlockAcctStats *stats, BlockAcctCookie *cookie); +void block_acct_failed(BlockAcctStats *stats, BlockAcctCookie *cookie); +void block_acct_invalid(BlockAcctStats *stats, enum BlockAcctType type); +void block_acct_merge_done(BlockAcctStats *stats, enum BlockAcctType type, + int num_requests); +int64_t block_acct_idle_time_ns(BlockAcctStats *stats); +double block_acct_queue_depth(BlockAcctTimedStats *stats, + enum BlockAcctType type); + +#endif diff --git a/src/include/block/aio.h b/src/include/block/aio.h new file mode 100644 index 0000000..e086e3b --- /dev/null +++ b/src/include/block/aio.h @@ -0,0 +1,445 @@ +/* + * QEMU aio implementation + * + * Copyright IBM, Corp. 2008 + * + * Authors: + * Anthony Liguori + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_AIO_H +#define QEMU_AIO_H + +#include "qemu/typedefs.h" +#include "qemu-common.h" +#include "qemu/queue.h" +#include "qemu/event_notifier.h" +#include "qemu/thread.h" +#include "qemu/rfifolock.h" +#include "qemu/timer.h" + +typedef struct BlockAIOCB BlockAIOCB; +typedef void BlockCompletionFunc(void *opaque, int ret); + +typedef struct AIOCBInfo { + void (*cancel_async)(BlockAIOCB *acb); + AioContext *(*get_aio_context)(BlockAIOCB *acb); + size_t aiocb_size; +} AIOCBInfo; + +struct BlockAIOCB { + const AIOCBInfo *aiocb_info; + BlockDriverState *bs; + BlockCompletionFunc *cb; + void *opaque; + int refcnt; +}; + +void *qemu_aio_get(const AIOCBInfo *aiocb_info, BlockDriverState *bs, + BlockCompletionFunc *cb, void *opaque); +void qemu_aio_unref(void *p); +void qemu_aio_ref(void *p); + +typedef struct AioHandler AioHandler; +typedef void QEMUBHFunc(void *opaque); +typedef void IOHandler(void *opaque); + +struct AioContext { + GSource source; + + /* Protects all fields from multi-threaded access */ + RFifoLock lock; + + /* The list of registered AIO handlers */ + QLIST_HEAD(, AioHandler) aio_handlers; + + /* This is a simple lock used to protect the aio_handlers list. + * Specifically, it's used to ensure that no callbacks are removed while + * we're walking and dispatching callbacks. + */ + int walking_handlers; + + /* Used to avoid unnecessary event_notifier_set calls in aio_notify; + * accessed with atomic primitives. If this field is 0, everything + * (file descriptors, bottom halves, timers) will be re-evaluated + * before the next blocking poll(), thus the event_notifier_set call + * can be skipped. If it is non-zero, you may need to wake up a + * concurrent aio_poll or the glib main event loop, making + * event_notifier_set necessary. + * + * Bit 0 is reserved for GSource usage of the AioContext, and is 1 + * between a call to aio_ctx_check and the next call to aio_ctx_dispatch. + * Bits 1-31 simply count the number of active calls to aio_poll + * that are in the prepare or poll phase. + * + * The GSource and aio_poll must use a different mechanism because + * there is no certainty that a call to GSource's prepare callback + * (via g_main_context_prepare) is indeed followed by check and + * dispatch. It's not clear whether this would be a bug, but let's + * play safe and allow it---it will just cause extra calls to + * event_notifier_set until the next call to dispatch. + * + * Instead, the aio_poll calls include both the prepare and the + * dispatch phase, hence a simple counter is enough for them. + */ + uint32_t notify_me; + + /* lock to protect between bh's adders and deleter */ + QemuMutex bh_lock; + + /* Anchor of the list of Bottom Halves belonging to the context */ + struct QEMUBH *first_bh; + + /* A simple lock used to protect the first_bh list, and ensure that + * no callbacks are removed while we're walking and dispatching callbacks. + */ + int walking_bh; + + /* Used by aio_notify. + * + * "notified" is used to avoid expensive event_notifier_test_and_clear + * calls. When it is clear, the EventNotifier is clear, or one thread + * is going to clear "notified" before processing more events. False + * positives are possible, i.e. "notified" could be set even though the + * EventNotifier is clear. + * + * Note that event_notifier_set *cannot* be optimized the same way. For + * more information on the problem that would result, see "#ifdef BUG2" + * in the docs/aio_notify_accept.promela formal model. + */ + bool notified; + EventNotifier notifier; + + /* Scheduling this BH forces the event loop it iterate */ + QEMUBH *notify_dummy_bh; + + /* Thread pool for performing work and receiving completion callbacks */ + struct ThreadPool *thread_pool; + + /* TimerLists for calling timers - one per clock type */ + QEMUTimerListGroup tlg; + + int external_disable_cnt; + + /* epoll(7) state used when built with CONFIG_EPOLL */ + int epollfd; + bool epoll_enabled; + bool epoll_available; +}; + +/** + * aio_context_new: Allocate a new AioContext. + * + * AioContext provide a mini event-loop that can be waited on synchronously. + * They also provide bottom halves, a service to execute a piece of code + * as soon as possible. + */ +AioContext *aio_context_new(Error **errp); + +/** + * aio_context_ref: + * @ctx: The AioContext to operate on. + * + * Add a reference to an AioContext. + */ +void aio_context_ref(AioContext *ctx); + +/** + * aio_context_unref: + * @ctx: The AioContext to operate on. + * + * Drop a reference to an AioContext. + */ +void aio_context_unref(AioContext *ctx); + +/* Take ownership of the AioContext. If the AioContext will be shared between + * threads, and a thread does not want to be interrupted, it will have to + * take ownership around calls to aio_poll(). Otherwise, aio_poll() + * automatically takes care of calling aio_context_acquire and + * aio_context_release. + * + * Access to timers and BHs from a thread that has not acquired AioContext + * is possible. Access to callbacks for now must be done while the AioContext + * is owned by the thread (FIXME). + */ +void aio_context_acquire(AioContext *ctx); + +/* Relinquish ownership of the AioContext. */ +void aio_context_release(AioContext *ctx); + +/** + * aio_bh_new: Allocate a new bottom half structure. + * + * Bottom halves are lightweight callbacks whose invocation is guaranteed + * to be wait-free, thread-safe and signal-safe. The #QEMUBH structure + * is opaque and must be allocated prior to its use. + */ +QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque); + +/** + * aio_notify: Force processing of pending events. + * + * Similar to signaling a condition variable, aio_notify forces + * aio_wait to exit, so that the next call will re-examine pending events. + * The caller of aio_notify will usually call aio_wait again very soon, + * or go through another iteration of the GLib main loop. Hence, aio_notify + * also has the side effect of recalculating the sets of file descriptors + * that the main loop waits for. + * + * Calling aio_notify is rarely necessary, because for example scheduling + * a bottom half calls it already. + */ +void aio_notify(AioContext *ctx); + +/** + * aio_notify_accept: Acknowledge receiving an aio_notify. + * + * aio_notify() uses an EventNotifier in order to wake up a sleeping + * aio_poll() or g_main_context_iteration(). Calls to aio_notify() are + * usually rare, but the AioContext has to clear the EventNotifier on + * every aio_poll() or g_main_context_iteration() in order to avoid + * busy waiting. This event_notifier_test_and_clear() cannot be done + * using the usual aio_context_set_event_notifier(), because it must + * be done before processing all events (file descriptors, bottom halves, + * timers). + * + * aio_notify_accept() is an optimized event_notifier_test_and_clear() + * that is specific to an AioContext's notifier; it is used internally + * to clear the EventNotifier only if aio_notify() had been called. + */ +void aio_notify_accept(AioContext *ctx); + +/** + * aio_bh_call: Executes callback function of the specified BH. + */ +void aio_bh_call(QEMUBH *bh); + +/** + * aio_bh_poll: Poll bottom halves for an AioContext. + * + * These are internal functions used by the QEMU main loop. + * And notice that multiple occurrences of aio_bh_poll cannot + * be called concurrently + */ +int aio_bh_poll(AioContext *ctx); + +/** + * qemu_bh_schedule: Schedule a bottom half. + * + * Scheduling a bottom half interrupts the main loop and causes the + * execution of the callback that was passed to qemu_bh_new. + * + * Bottom halves that are scheduled from a bottom half handler are instantly + * invoked. This can create an infinite loop if a bottom half handler + * schedules itself. + * + * @bh: The bottom half to be scheduled. + */ +void qemu_bh_schedule(QEMUBH *bh); + +/** + * qemu_bh_cancel: Cancel execution of a bottom half. + * + * Canceling execution of a bottom half undoes the effect of calls to + * qemu_bh_schedule without freeing its resources yet. While cancellation + * itself is also wait-free and thread-safe, it can of course race with the + * loop that executes bottom halves unless you are holding the iothread + * mutex. This makes it mostly useless if you are not holding the mutex. + * + * @bh: The bottom half to be canceled. + */ +void qemu_bh_cancel(QEMUBH *bh); + +/** + *qemu_bh_delete: Cancel execution of a bottom half and free its resources. + * + * Deleting a bottom half frees the memory that was allocated for it by + * qemu_bh_new. It also implies canceling the bottom half if it was + * scheduled. + * This func is async. The bottom half will do the delete action at the finial + * end. + * + * @bh: The bottom half to be deleted. + */ +void qemu_bh_delete(QEMUBH *bh); + +/* Return whether there are any pending callbacks from the GSource + * attached to the AioContext, before g_poll is invoked. + * + * This is used internally in the implementation of the GSource. + */ +bool aio_prepare(AioContext *ctx); + +/* Return whether there are any pending callbacks from the GSource + * attached to the AioContext, after g_poll is invoked. + * + * This is used internally in the implementation of the GSource. + */ +bool aio_pending(AioContext *ctx); + +/* Dispatch any pending callbacks from the GSource attached to the AioContext. + * + * This is used internally in the implementation of the GSource. + */ +bool aio_dispatch(AioContext *ctx); + +/* Progress in completing AIO work to occur. This can issue new pending + * aio as a result of executing I/O completion or bh callbacks. + * + * Return whether any progress was made by executing AIO or bottom half + * handlers. If @blocking == true, this should always be true except + * if someone called aio_notify. + * + * If there are no pending bottom halves, but there are pending AIO + * operations, it may not be possible to make any progress without + * blocking. If @blocking is true, this function will wait until one + * or more AIO events have completed, to ensure something has moved + * before returning. + */ +bool aio_poll(AioContext *ctx, bool blocking); + +/* Register a file descriptor and associated callbacks. Behaves very similarly + * to qemu_set_fd_handler. Unlike qemu_set_fd_handler, these callbacks will + * be invoked when using aio_poll(). + * + * Code that invokes AIO completion functions should rely on this function + * instead of qemu_set_fd_handler[2]. + */ +void aio_set_fd_handler(AioContext *ctx, + int fd, + bool is_external, + IOHandler *io_read, + IOHandler *io_write, + void *opaque); + +/* Register an event notifier and associated callbacks. Behaves very similarly + * to event_notifier_set_handler. Unlike event_notifier_set_handler, these callbacks + * will be invoked when using aio_poll(). + * + * Code that invokes AIO completion functions should rely on this function + * instead of event_notifier_set_handler. + */ +void aio_set_event_notifier(AioContext *ctx, + EventNotifier *notifier, + bool is_external, + EventNotifierHandler *io_read); + +/* Return a GSource that lets the main loop poll the file descriptors attached + * to this AioContext. + */ +GSource *aio_get_g_source(AioContext *ctx); + +/* Return the ThreadPool bound to this AioContext */ +struct ThreadPool *aio_get_thread_pool(AioContext *ctx); + +/** + * aio_timer_new: + * @ctx: the aio context + * @type: the clock type + * @scale: the scale + * @cb: the callback to call on timer expiry + * @opaque: the opaque pointer to pass to the callback + * + * Allocate a new timer attached to the context @ctx. + * The function is responsible for memory allocation. + * + * The preferred interface is aio_timer_init. Use that + * unless you really need dynamic memory allocation. + * + * Returns: a pointer to the new timer + */ +static inline QEMUTimer *aio_timer_new(AioContext *ctx, QEMUClockType type, + int scale, + QEMUTimerCB *cb, void *opaque) +{ + return timer_new_tl(ctx->tlg.tl[type], scale, cb, opaque); +} + +/** + * aio_timer_init: + * @ctx: the aio context + * @ts: the timer + * @type: the clock type + * @scale: the scale + * @cb: the callback to call on timer expiry + * @opaque: the opaque pointer to pass to the callback + * + * Initialise a new timer attached to the context @ctx. + * The caller is responsible for memory allocation. + */ +static inline void aio_timer_init(AioContext *ctx, + QEMUTimer *ts, QEMUClockType type, + int scale, + QEMUTimerCB *cb, void *opaque) +{ + timer_init_tl(ts, ctx->tlg.tl[type], scale, cb, opaque); +} + +/** + * aio_compute_timeout: + * @ctx: the aio context + * + * Compute the timeout that a blocking aio_poll should use. + */ +int64_t aio_compute_timeout(AioContext *ctx); + +/** + * aio_disable_external: + * @ctx: the aio context + * + * Disable the further processing of external clients. + */ +static inline void aio_disable_external(AioContext *ctx) +{ + atomic_inc(&ctx->external_disable_cnt); +} + +/** + * aio_enable_external: + * @ctx: the aio context + * + * Enable the processing of external clients. + */ +static inline void aio_enable_external(AioContext *ctx) +{ + assert(ctx->external_disable_cnt > 0); + atomic_dec(&ctx->external_disable_cnt); +} + +/** + * aio_external_disabled: + * @ctx: the aio context + * + * Return true if the external clients are disabled. + */ +static inline bool aio_external_disabled(AioContext *ctx) +{ + return atomic_read(&ctx->external_disable_cnt); +} + +/** + * aio_node_check: + * @ctx: the aio context + * @is_external: Whether or not the checked node is an external event source. + * + * Check if the node's is_external flag is okay to be polled by the ctx at this + * moment. True means green light. + */ +static inline bool aio_node_check(AioContext *ctx, bool is_external) +{ + return !is_external || !atomic_read(&ctx->external_disable_cnt); +} + +/** + * aio_context_setup: + * @ctx: the aio context + * + * Initialize the aio context. + */ +void aio_context_setup(AioContext *ctx, Error **errp); + +#endif diff --git a/src/include/block/block.h b/src/include/block/block.h new file mode 100644 index 0000000..d83d420 --- /dev/null +++ b/src/include/block/block.h @@ -0,0 +1,641 @@ +#ifndef BLOCK_H +#define BLOCK_H + +#include "block/aio.h" +#include "qemu-common.h" +#include "qemu/option.h" +#include "qemu/coroutine.h" +#include "block/accounting.h" +#include "qapi/qmp/qobject.h" +#include "qapi-types.h" + +/* block.c */ +typedef struct BlockDriver BlockDriver; +typedef struct BlockJob BlockJob; +typedef struct BdrvChild BdrvChild; +typedef struct BdrvChildRole BdrvChildRole; +typedef struct BlockJobTxn BlockJobTxn; + +typedef struct BlockDriverInfo { + /* in bytes, 0 if irrelevant */ + int cluster_size; + /* offset at which the VM state can be saved (0 if not possible) */ + int64_t vm_state_offset; + bool is_dirty; + /* + * True if unallocated blocks read back as zeroes. This is equivalent + * to the LBPRZ flag in the SCSI logical block provisioning page. + */ + bool unallocated_blocks_are_zero; + /* + * True if the driver can optimize writing zeroes by unmapping + * sectors. This is equivalent to the BLKDISCARDZEROES ioctl in Linux + * with the difference that in qemu a discard is allowed to silently + * fail. Therefore we have to use bdrv_write_zeroes with the + * BDRV_REQ_MAY_UNMAP flag for an optimized zero write with unmapping. + * After this call the driver has to guarantee that the contents read + * back as zero. It is additionally required that the block device is + * opened with BDRV_O_UNMAP flag for this to work. + */ + bool can_write_zeroes_with_unmap; + /* + * True if this block driver only supports compressed writes + */ + bool needs_compressed_writes; +} BlockDriverInfo; + +typedef struct BlockFragInfo { + uint64_t allocated_clusters; + uint64_t total_clusters; + uint64_t fragmented_clusters; + uint64_t compressed_clusters; +} BlockFragInfo; + +typedef enum { + BDRV_REQ_COPY_ON_READ = 0x1, + BDRV_REQ_ZERO_WRITE = 0x2, + /* The BDRV_REQ_MAY_UNMAP flag is used to indicate that the block driver + * is allowed to optimize a write zeroes request by unmapping (discarding) + * blocks if it is guaranteed that the result will read back as + * zeroes. The flag is only passed to the driver if the block device is + * opened with BDRV_O_UNMAP. + */ + BDRV_REQ_MAY_UNMAP = 0x4, + BDRV_REQ_NO_SERIALISING = 0x8, +} BdrvRequestFlags; + +typedef struct BlockSizes { + uint32_t phys; + uint32_t log; +} BlockSizes; + +typedef struct HDGeometry { + uint32_t heads; + uint32_t sectors; + uint32_t cylinders; +} HDGeometry; + +#define BDRV_O_RDWR 0x0002 +#define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save writes in a snapshot */ +#define BDRV_O_TEMPORARY 0x0010 /* delete the file after use */ +#define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */ +#define BDRV_O_CACHE_WB 0x0040 /* use write-back caching */ +#define BDRV_O_NATIVE_AIO 0x0080 /* use native AIO instead of the thread pool */ +#define BDRV_O_NO_BACKING 0x0100 /* don't open the backing file */ +#define BDRV_O_NO_FLUSH 0x0200 /* disable flushing on this disk */ +#define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */ +#define BDRV_O_INCOMING 0x0800 /* consistency hint for incoming migration */ +#define BDRV_O_CHECK 0x1000 /* open solely for consistency check */ +#define BDRV_O_ALLOW_RDWR 0x2000 /* allow reopen to change from r/o to r/w */ +#define BDRV_O_UNMAP 0x4000 /* execute guest UNMAP/TRIM operations */ +#define BDRV_O_PROTOCOL 0x8000 /* if no block driver is explicitly given: + select an appropriate protocol driver, + ignoring the format layer */ + +#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_CACHE_WB | BDRV_O_NO_FLUSH) + + +/* Option names of options parsed by the block layer */ + +#define BDRV_OPT_CACHE_WB "cache.writeback" +#define BDRV_OPT_CACHE_DIRECT "cache.direct" +#define BDRV_OPT_CACHE_NO_FLUSH "cache.no-flush" + + +#define BDRV_SECTOR_BITS 9 +#define BDRV_SECTOR_SIZE (1ULL << BDRV_SECTOR_BITS) +#define BDRV_SECTOR_MASK ~(BDRV_SECTOR_SIZE - 1) + +#define BDRV_REQUEST_MAX_SECTORS MIN(SIZE_MAX >> BDRV_SECTOR_BITS, \ + INT_MAX >> BDRV_SECTOR_BITS) + +/* + * Allocation status flags + * BDRV_BLOCK_DATA: data is read from bs->file or another file + * BDRV_BLOCK_ZERO: sectors read as zero + * BDRV_BLOCK_OFFSET_VALID: sector stored in bs->file as raw data + * BDRV_BLOCK_ALLOCATED: the content of the block is determined by this + * layer (as opposed to the backing file) + * BDRV_BLOCK_RAW: used internally to indicate that the request + * was answered by the raw driver and that one + * should look in bs->file directly. + * + * If BDRV_BLOCK_OFFSET_VALID is set, bits 9-62 represent the offset in + * bs->file where sector data can be read from as raw data. + * + * DATA == 0 && ZERO == 0 means that data is read from backing_hd if present. + * + * DATA ZERO OFFSET_VALID + * t t t sectors read as zero, bs->file is zero at offset + * t f t sectors read as valid from bs->file at offset + * f t t sectors preallocated, read as zero, bs->file not + * necessarily zero at offset + * f f t sectors preallocated but read from backing_hd, + * bs->file contains garbage at offset + * t t f sectors preallocated, read as zero, unknown offset + * t f f sectors read from unknown file or offset + * f t f not allocated or unknown offset, read as zero + * f f f not allocated or unknown offset, read from backing_hd + */ +#define BDRV_BLOCK_DATA 0x01 +#define BDRV_BLOCK_ZERO 0x02 +#define BDRV_BLOCK_OFFSET_VALID 0x04 +#define BDRV_BLOCK_RAW 0x08 +#define BDRV_BLOCK_ALLOCATED 0x10 +#define BDRV_BLOCK_OFFSET_MASK BDRV_SECTOR_MASK + +typedef QSIMPLEQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue; + +typedef struct BDRVReopenState { + BlockDriverState *bs; + int flags; + QDict *options; + void *opaque; +} BDRVReopenState; + +/* + * Block operation types + */ +typedef enum BlockOpType { + BLOCK_OP_TYPE_BACKUP_SOURCE, + BLOCK_OP_TYPE_BACKUP_TARGET, + BLOCK_OP_TYPE_CHANGE, + BLOCK_OP_TYPE_COMMIT_SOURCE, + BLOCK_OP_TYPE_COMMIT_TARGET, + BLOCK_OP_TYPE_DATAPLANE, + BLOCK_OP_TYPE_DRIVE_DEL, + BLOCK_OP_TYPE_EJECT, + BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT, + BLOCK_OP_TYPE_INTERNAL_SNAPSHOT, + BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE, + BLOCK_OP_TYPE_MIRROR, + BLOCK_OP_TYPE_RESIZE, + BLOCK_OP_TYPE_STREAM, + BLOCK_OP_TYPE_REPLACE, + BLOCK_OP_TYPE_MAX, +} BlockOpType; + +void bdrv_info_print(Monitor *mon, const QObject *data); +void bdrv_info(Monitor *mon, QObject **ret_data); +void bdrv_stats_print(Monitor *mon, const QObject *data); +void bdrv_info_stats(Monitor *mon, QObject **ret_data); + +/* disk I/O throttling */ +void bdrv_io_limits_enable(BlockDriverState *bs, const char *group); +void bdrv_io_limits_disable(BlockDriverState *bs); +void bdrv_io_limits_update_group(BlockDriverState *bs, const char *group); + +void bdrv_init(void); +void bdrv_init_with_whitelist(void); +BlockDriver *bdrv_find_protocol(const char *filename, + bool allow_protocol_prefix, + Error **errp); +BlockDriver *bdrv_find_format(const char *format_name); +int bdrv_create(BlockDriver *drv, const char* filename, + QemuOpts *opts, Error **errp); +int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp); +BlockDriverState *bdrv_new_root(void); +BlockDriverState *bdrv_new(void); +void bdrv_device_remove(BlockDriverState *bs); +void bdrv_make_anon(BlockDriverState *bs); +void bdrv_swap(BlockDriverState *bs_new, BlockDriverState *bs_old); +void bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top); +void bdrv_replace_in_backing_chain(BlockDriverState *old, + BlockDriverState *new); + +int bdrv_parse_cache_flags(const char *mode, int *flags); +int bdrv_parse_discard_flags(const char *mode, int *flags); +BdrvChild *bdrv_open_child(const char *filename, + QDict *options, const char *bdref_key, + BlockDriverState* parent, + const BdrvChildRole *child_role, + bool allow_none, Error **errp); +void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd); +int bdrv_open_backing_file(BlockDriverState *bs, QDict *options, Error **errp); +int bdrv_append_temp_snapshot(BlockDriverState *bs, int flags, Error **errp); +int bdrv_open(BlockDriverState **pbs, const char *filename, + const char *reference, QDict *options, int flags, Error **errp); +BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue, + BlockDriverState *bs, + QDict *options, int flags); +int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp); +int bdrv_reopen(BlockDriverState *bs, int bdrv_flags, Error **errp); +int bdrv_reopen_prepare(BDRVReopenState *reopen_state, + BlockReopenQueue *queue, Error **errp); +void bdrv_reopen_commit(BDRVReopenState *reopen_state); +void bdrv_reopen_abort(BDRVReopenState *reopen_state); +void bdrv_close(BlockDriverState *bs); +void bdrv_add_close_notifier(BlockDriverState *bs, Notifier *notify); +int bdrv_read(BlockDriverState *bs, int64_t sector_num, + uint8_t *buf, int nb_sectors); +int bdrv_read_unthrottled(BlockDriverState *bs, int64_t sector_num, + uint8_t *buf, int nb_sectors); +int bdrv_write(BlockDriverState *bs, int64_t sector_num, + const uint8_t *buf, int nb_sectors); +int bdrv_write_zeroes(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, BdrvRequestFlags flags); +BlockAIOCB *bdrv_aio_write_zeroes(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, BdrvRequestFlags flags, + BlockCompletionFunc *cb, void *opaque); +int bdrv_make_zero(BlockDriverState *bs, BdrvRequestFlags flags); +int bdrv_pread(BlockDriverState *bs, int64_t offset, + void *buf, int count); +int bdrv_pwrite(BlockDriverState *bs, int64_t offset, + const void *buf, int count); +int bdrv_pwritev(BlockDriverState *bs, int64_t offset, QEMUIOVector *qiov); +int bdrv_pwrite_sync(BlockDriverState *bs, int64_t offset, + const void *buf, int count); +int coroutine_fn bdrv_co_readv(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov); +int coroutine_fn bdrv_co_copy_on_readv(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); +int coroutine_fn bdrv_co_readv_no_serialising(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); +int coroutine_fn bdrv_co_writev(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, QEMUIOVector *qiov); +/* + * Efficiently zero a region of the disk image. Note that this is a regular + * I/O request like read or write and should have a reasonable size. This + * function is not suitable for zeroing the entire image in a single request + * because it may allocate memory for the entire region. + */ +int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, BdrvRequestFlags flags); +BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs, + const char *backing_file); +int bdrv_get_backing_file_depth(BlockDriverState *bs); +void bdrv_refresh_filename(BlockDriverState *bs); +int bdrv_truncate(BlockDriverState *bs, int64_t offset); +int64_t bdrv_nb_sectors(BlockDriverState *bs); +int64_t bdrv_getlength(BlockDriverState *bs); +int64_t bdrv_get_allocated_file_size(BlockDriverState *bs); +void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr); +void bdrv_refresh_limits(BlockDriverState *bs, Error **errp); +int bdrv_commit(BlockDriverState *bs); +int bdrv_commit_all(void); +int bdrv_change_backing_file(BlockDriverState *bs, + const char *backing_file, const char *backing_fmt); +void bdrv_register(BlockDriver *bdrv); +int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, + BlockDriverState *base, + const char *backing_file_str); +BlockDriverState *bdrv_find_overlay(BlockDriverState *active, + BlockDriverState *bs); +BlockDriverState *bdrv_find_base(BlockDriverState *bs); + + +typedef struct BdrvCheckResult { + int corruptions; + int leaks; + int check_errors; + int corruptions_fixed; + int leaks_fixed; + int64_t image_end_offset; + BlockFragInfo bfi; +} BdrvCheckResult; + +typedef enum { + BDRV_FIX_LEAKS = 1, + BDRV_FIX_ERRORS = 2, +} BdrvCheckMode; + +int bdrv_check(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix); + +/* The units of offset and total_work_size may be chosen arbitrarily by the + * block driver; total_work_size may change during the course of the amendment + * operation */ +typedef void BlockDriverAmendStatusCB(BlockDriverState *bs, int64_t offset, + int64_t total_work_size); +int bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts, + BlockDriverAmendStatusCB *status_cb); + +/* external snapshots */ +bool bdrv_recurse_is_first_non_filter(BlockDriverState *bs, + BlockDriverState *candidate); +bool bdrv_is_first_non_filter(BlockDriverState *candidate); + +/* check if a named node can be replaced when doing drive-mirror */ +BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs, + const char *node_name, Error **errp); + +/* async block I/O */ +typedef void BlockDriverDirtyHandler(BlockDriverState *bs, int64_t sector, + int sector_num); +BlockAIOCB *bdrv_aio_readv(BlockDriverState *bs, int64_t sector_num, + QEMUIOVector *iov, int nb_sectors, + BlockCompletionFunc *cb, void *opaque); +BlockAIOCB *bdrv_aio_writev(BlockDriverState *bs, int64_t sector_num, + QEMUIOVector *iov, int nb_sectors, + BlockCompletionFunc *cb, void *opaque); +BlockAIOCB *bdrv_aio_flush(BlockDriverState *bs, + BlockCompletionFunc *cb, void *opaque); +BlockAIOCB *bdrv_aio_discard(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + BlockCompletionFunc *cb, void *opaque); +void bdrv_aio_cancel(BlockAIOCB *acb); +void bdrv_aio_cancel_async(BlockAIOCB *acb); + +typedef struct BlockRequest { + /* Fields to be filled by multiwrite caller */ + union { + struct { + int64_t sector; + int nb_sectors; + int flags; + QEMUIOVector *qiov; + }; + struct { + int req; + void *buf; + }; + }; + BlockCompletionFunc *cb; + void *opaque; + + /* Filled by multiwrite implementation */ + int error; +} BlockRequest; + +int bdrv_aio_multiwrite(BlockDriverState *bs, BlockRequest *reqs, + int num_reqs); + +/* sg packet commands */ +int bdrv_ioctl(BlockDriverState *bs, unsigned long int req, void *buf); +BlockAIOCB *bdrv_aio_ioctl(BlockDriverState *bs, + unsigned long int req, void *buf, + BlockCompletionFunc *cb, void *opaque); + +/* Invalidate any cached metadata used by image formats */ +void bdrv_invalidate_cache(BlockDriverState *bs, Error **errp); +void bdrv_invalidate_cache_all(Error **errp); + +/* Ensure contents are flushed to disk. */ +int bdrv_flush(BlockDriverState *bs); +int coroutine_fn bdrv_co_flush(BlockDriverState *bs); +int bdrv_flush_all(void); +void bdrv_close_all(void); +void bdrv_drain(BlockDriverState *bs); +void bdrv_drain_all(void); + +int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors); +int bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors); +int bdrv_has_zero_init_1(BlockDriverState *bs); +int bdrv_has_zero_init(BlockDriverState *bs); +bool bdrv_unallocated_blocks_are_zero(BlockDriverState *bs); +bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs); +int64_t bdrv_get_block_status(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, int *pnum); +int64_t bdrv_get_block_status_above(BlockDriverState *bs, + BlockDriverState *base, + int64_t sector_num, + int nb_sectors, int *pnum); +int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, + int *pnum); +int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base, + int64_t sector_num, int nb_sectors, int *pnum); + +int bdrv_is_read_only(BlockDriverState *bs); +int bdrv_is_sg(BlockDriverState *bs); +int bdrv_enable_write_cache(BlockDriverState *bs); +void bdrv_set_enable_write_cache(BlockDriverState *bs, bool wce); +bool bdrv_is_inserted(BlockDriverState *bs); +int bdrv_media_changed(BlockDriverState *bs); +void bdrv_lock_medium(BlockDriverState *bs, bool locked); +void bdrv_eject(BlockDriverState *bs, bool eject_flag); +const char *bdrv_get_format_name(BlockDriverState *bs); +BlockDriverState *bdrv_find_node(const char *node_name); +BlockDeviceInfoList *bdrv_named_nodes_list(Error **errp); +BlockDriverState *bdrv_lookup_bs(const char *device, + const char *node_name, + Error **errp); +bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base); +BlockDriverState *bdrv_next_node(BlockDriverState *bs); +BlockDriverState *bdrv_next(BlockDriverState *bs); +int bdrv_is_encrypted(BlockDriverState *bs); +int bdrv_key_required(BlockDriverState *bs); +int bdrv_set_key(BlockDriverState *bs, const char *key); +void bdrv_add_key(BlockDriverState *bs, const char *key, Error **errp); +int bdrv_query_missing_keys(void); +void bdrv_iterate_format(void (*it)(void *opaque, const char *name), + void *opaque); +const char *bdrv_get_node_name(const BlockDriverState *bs); +const char *bdrv_get_device_name(const BlockDriverState *bs); +const char *bdrv_get_device_or_node_name(const BlockDriverState *bs); +int bdrv_get_flags(BlockDriverState *bs); +int bdrv_write_compressed(BlockDriverState *bs, int64_t sector_num, + const uint8_t *buf, int nb_sectors); +int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi); +ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs); +void bdrv_round_to_clusters(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + int64_t *cluster_sector_num, + int *cluster_nb_sectors); + +const char *bdrv_get_encrypted_filename(BlockDriverState *bs); +void bdrv_get_backing_filename(BlockDriverState *bs, + char *filename, int filename_size); +void bdrv_get_full_backing_filename(BlockDriverState *bs, + char *dest, size_t sz, Error **errp); +void bdrv_get_full_backing_filename_from_filename(const char *backed, + const char *backing, + char *dest, size_t sz, + Error **errp); +int bdrv_is_snapshot(BlockDriverState *bs); + +int path_has_protocol(const char *path); +int path_is_absolute(const char *path); +void path_combine(char *dest, int dest_size, + const char *base_path, + const char *filename); + +int bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos); +int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf, + int64_t pos, int size); + +int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf, + int64_t pos, int size); + +void bdrv_img_create(const char *filename, const char *fmt, + const char *base_filename, const char *base_fmt, + char *options, uint64_t img_size, int flags, + Error **errp, bool quiet); + +/* Returns the alignment in bytes that is required so that no bounce buffer + * is required throughout the stack */ +size_t bdrv_min_mem_align(BlockDriverState *bs); +/* Returns optimal alignment in bytes for bounce buffer */ +size_t bdrv_opt_mem_align(BlockDriverState *bs); +void *qemu_blockalign(BlockDriverState *bs, size_t size); +void *qemu_blockalign0(BlockDriverState *bs, size_t size); +void *qemu_try_blockalign(BlockDriverState *bs, size_t size); +void *qemu_try_blockalign0(BlockDriverState *bs, size_t size); +bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov); + +struct HBitmapIter; +typedef struct BdrvDirtyBitmap BdrvDirtyBitmap; +BdrvDirtyBitmap *bdrv_create_dirty_bitmap(BlockDriverState *bs, + uint32_t granularity, + const char *name, + Error **errp); +int bdrv_dirty_bitmap_create_successor(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, + Error **errp); +BdrvDirtyBitmap *bdrv_dirty_bitmap_abdicate(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, + Error **errp); +BdrvDirtyBitmap *bdrv_reclaim_dirty_bitmap(BlockDriverState *bs, + BdrvDirtyBitmap *bitmap, + Error **errp); +BdrvDirtyBitmap *bdrv_find_dirty_bitmap(BlockDriverState *bs, + const char *name); +void bdrv_dirty_bitmap_make_anon(BdrvDirtyBitmap *bitmap); +void bdrv_release_dirty_bitmap(BlockDriverState *bs, BdrvDirtyBitmap *bitmap); +void bdrv_disable_dirty_bitmap(BdrvDirtyBitmap *bitmap); +void bdrv_enable_dirty_bitmap(BdrvDirtyBitmap *bitmap); +BlockDirtyInfoList *bdrv_query_dirty_bitmaps(BlockDriverState *bs); +uint32_t bdrv_get_default_bitmap_granularity(BlockDriverState *bs); +uint32_t bdrv_dirty_bitmap_granularity(BdrvDirtyBitmap *bitmap); +bool bdrv_dirty_bitmap_enabled(BdrvDirtyBitmap *bitmap); +bool bdrv_dirty_bitmap_frozen(BdrvDirtyBitmap *bitmap); +DirtyBitmapStatus bdrv_dirty_bitmap_status(BdrvDirtyBitmap *bitmap); +int bdrv_get_dirty(BlockDriverState *bs, BdrvDirtyBitmap *bitmap, int64_t sector); +void bdrv_set_dirty_bitmap(BdrvDirtyBitmap *bitmap, + int64_t cur_sector, int nr_sectors); +void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap, + int64_t cur_sector, int nr_sectors); +void bdrv_dirty_iter_init(BdrvDirtyBitmap *bitmap, struct HBitmapIter *hbi); +void bdrv_set_dirty_iter(struct HBitmapIter *hbi, int64_t offset); +int64_t bdrv_get_dirty_count(BdrvDirtyBitmap *bitmap); + +void bdrv_enable_copy_on_read(BlockDriverState *bs); +void bdrv_disable_copy_on_read(BlockDriverState *bs); + +void bdrv_ref(BlockDriverState *bs); +void bdrv_unref(BlockDriverState *bs); +void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child); + +bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp); +void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason); +void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason); +void bdrv_op_block_all(BlockDriverState *bs, Error *reason); +void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason); +bool bdrv_op_blocker_is_empty(BlockDriverState *bs); + +typedef enum { + BLKDBG_L1_UPDATE, + + BLKDBG_L1_GROW_ALLOC_TABLE, + BLKDBG_L1_GROW_WRITE_TABLE, + BLKDBG_L1_GROW_ACTIVATE_TABLE, + + BLKDBG_L2_LOAD, + BLKDBG_L2_UPDATE, + BLKDBG_L2_UPDATE_COMPRESSED, + BLKDBG_L2_ALLOC_COW_READ, + BLKDBG_L2_ALLOC_WRITE, + + BLKDBG_READ_AIO, + BLKDBG_READ_BACKING_AIO, + BLKDBG_READ_COMPRESSED, + + BLKDBG_WRITE_AIO, + BLKDBG_WRITE_COMPRESSED, + + BLKDBG_VMSTATE_LOAD, + BLKDBG_VMSTATE_SAVE, + + BLKDBG_COW_READ, + BLKDBG_COW_WRITE, + + BLKDBG_REFTABLE_LOAD, + BLKDBG_REFTABLE_GROW, + BLKDBG_REFTABLE_UPDATE, + + BLKDBG_REFBLOCK_LOAD, + BLKDBG_REFBLOCK_UPDATE, + BLKDBG_REFBLOCK_UPDATE_PART, + BLKDBG_REFBLOCK_ALLOC, + BLKDBG_REFBLOCK_ALLOC_HOOKUP, + BLKDBG_REFBLOCK_ALLOC_WRITE, + BLKDBG_REFBLOCK_ALLOC_WRITE_BLOCKS, + BLKDBG_REFBLOCK_ALLOC_WRITE_TABLE, + BLKDBG_REFBLOCK_ALLOC_SWITCH_TABLE, + + BLKDBG_CLUSTER_ALLOC, + BLKDBG_CLUSTER_ALLOC_BYTES, + BLKDBG_CLUSTER_FREE, + + BLKDBG_FLUSH_TO_OS, + BLKDBG_FLUSH_TO_DISK, + + BLKDBG_PWRITEV_RMW_HEAD, + BLKDBG_PWRITEV_RMW_AFTER_HEAD, + BLKDBG_PWRITEV_RMW_TAIL, + BLKDBG_PWRITEV_RMW_AFTER_TAIL, + BLKDBG_PWRITEV, + BLKDBG_PWRITEV_ZERO, + BLKDBG_PWRITEV_DONE, + + BLKDBG_EMPTY_IMAGE_PREPARE, + + BLKDBG_EVENT_MAX, +} BlkDebugEvent; + +#define BLKDBG_EVENT(child, evt) \ + do { \ + if (child) { \ + bdrv_debug_event(child->bs, evt); \ + } \ + } while (0) + +void bdrv_debug_event(BlockDriverState *bs, BlkDebugEvent event); + +int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event, + const char *tag); +int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag); +int bdrv_debug_resume(BlockDriverState *bs, const char *tag); +bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag); + +/** + * bdrv_get_aio_context: + * + * Returns: the currently bound #AioContext + */ +AioContext *bdrv_get_aio_context(BlockDriverState *bs); + +/** + * bdrv_set_aio_context: + * + * Changes the #AioContext used for fd handlers, timers, and BHs by this + * BlockDriverState and all its children. + * + * This function must be called with iothread lock held. + */ +void bdrv_set_aio_context(BlockDriverState *bs, AioContext *new_context); +int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz); +int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo); + +void bdrv_io_plug(BlockDriverState *bs); +void bdrv_io_unplug(BlockDriverState *bs); +void bdrv_flush_io_queue(BlockDriverState *bs); + +/** + * bdrv_drained_begin: + * + * Begin a quiesced section for exclusive access to the BDS, by disabling + * external request sources including NBD server and device model. Note that + * this doesn't block timers or coroutines from submitting more requests, which + * means block_job_pause is still necessary. + * + * This function can be recursive. + */ +void bdrv_drained_begin(BlockDriverState *bs); + +/** + * bdrv_drained_end: + * + * End a quiescent section started by bdrv_drained_begin(). + */ +void bdrv_drained_end(BlockDriverState *bs); + +#endif diff --git a/src/include/block/block_int.h b/src/include/block/block_int.h new file mode 100644 index 0000000..7029d41 --- /dev/null +++ b/src/include/block/block_int.h @@ -0,0 +1,701 @@ +/* + * QEMU System Emulator block driver + * + * Copyright (c) 2003 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef BLOCK_INT_H +#define BLOCK_INT_H + +#include "block/accounting.h" +#include "block/block.h" +#include "block/throttle-groups.h" +#include "qemu/option.h" +#include "qemu/queue.h" +#include "qemu/coroutine.h" +#include "qemu/timer.h" +#include "qapi-types.h" +#include "qemu/hbitmap.h" +#include "block/snapshot.h" +#include "qemu/main-loop.h" +#include "qemu/throttle.h" + +#define BLOCK_FLAG_ENCRYPT 1 +#define BLOCK_FLAG_COMPAT6 4 +#define BLOCK_FLAG_LAZY_REFCOUNTS 8 + +#define BLOCK_OPT_SIZE "size" +#define BLOCK_OPT_ENCRYPT "encryption" +#define BLOCK_OPT_COMPAT6 "compat6" +#define BLOCK_OPT_BACKING_FILE "backing_file" +#define BLOCK_OPT_BACKING_FMT "backing_fmt" +#define BLOCK_OPT_CLUSTER_SIZE "cluster_size" +#define BLOCK_OPT_TABLE_SIZE "table_size" +#define BLOCK_OPT_PREALLOC "preallocation" +#define BLOCK_OPT_SUBFMT "subformat" +#define BLOCK_OPT_COMPAT_LEVEL "compat" +#define BLOCK_OPT_LAZY_REFCOUNTS "lazy_refcounts" +#define BLOCK_OPT_ADAPTER_TYPE "adapter_type" +#define BLOCK_OPT_REDUNDANCY "redundancy" +#define BLOCK_OPT_NOCOW "nocow" +#define BLOCK_OPT_OBJECT_SIZE "object_size" +#define BLOCK_OPT_REFCOUNT_BITS "refcount_bits" + +#define BLOCK_PROBE_BUF_SIZE 512 + +enum BdrvTrackedRequestType { + BDRV_TRACKED_READ, + BDRV_TRACKED_WRITE, + BDRV_TRACKED_FLUSH, + BDRV_TRACKED_IOCTL, + BDRV_TRACKED_DISCARD, +}; + +typedef struct BdrvTrackedRequest { + BlockDriverState *bs; + int64_t offset; + unsigned int bytes; + enum BdrvTrackedRequestType type; + + bool serialising; + int64_t overlap_offset; + unsigned int overlap_bytes; + + QLIST_ENTRY(BdrvTrackedRequest) list; + Coroutine *co; /* owner, used for deadlock detection */ + CoQueue wait_queue; /* coroutines blocked on this request */ + + struct BdrvTrackedRequest *waiting_for; +} BdrvTrackedRequest; + +struct BlockDriver { + const char *format_name; + int instance_size; + + /* set to true if the BlockDriver is a block filter */ + bool is_filter; + /* for snapshots block filter like Quorum can implement the + * following recursive callback. + * It's purpose is to recurse on the filter children while calling + * bdrv_recurse_is_first_non_filter on them. + * For a sample implementation look in the future Quorum block filter. + */ + bool (*bdrv_recurse_is_first_non_filter)(BlockDriverState *bs, + BlockDriverState *candidate); + + int (*bdrv_probe)(const uint8_t *buf, int buf_size, const char *filename); + int (*bdrv_probe_device)(const char *filename); + + /* Any driver implementing this callback is expected to be able to handle + * NULL file names in its .bdrv_open() implementation */ + void (*bdrv_parse_filename)(const char *filename, QDict *options, Error **errp); + /* Drivers not implementing bdrv_parse_filename nor bdrv_open should have + * this field set to true, except ones that are defined only by their + * child's bs. + * An example of the last type will be the quorum block driver. + */ + bool bdrv_needs_filename; + + /* Set if a driver can support backing files */ + bool supports_backing; + + /* For handling image reopen for split or non-split files */ + int (*bdrv_reopen_prepare)(BDRVReopenState *reopen_state, + BlockReopenQueue *queue, Error **errp); + void (*bdrv_reopen_commit)(BDRVReopenState *reopen_state); + void (*bdrv_reopen_abort)(BDRVReopenState *reopen_state); + + int (*bdrv_open)(BlockDriverState *bs, QDict *options, int flags, + Error **errp); + int (*bdrv_file_open)(BlockDriverState *bs, QDict *options, int flags, + Error **errp); + int (*bdrv_read)(BlockDriverState *bs, int64_t sector_num, + uint8_t *buf, int nb_sectors); + int (*bdrv_write)(BlockDriverState *bs, int64_t sector_num, + const uint8_t *buf, int nb_sectors); + void (*bdrv_close)(BlockDriverState *bs); + int (*bdrv_create)(const char *filename, QemuOpts *opts, Error **errp); + int (*bdrv_set_key)(BlockDriverState *bs, const char *key); + int (*bdrv_make_empty)(BlockDriverState *bs); + + void (*bdrv_refresh_filename)(BlockDriverState *bs); + + /* aio */ + BlockAIOCB *(*bdrv_aio_readv)(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockCompletionFunc *cb, void *opaque); + BlockAIOCB *(*bdrv_aio_writev)(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockCompletionFunc *cb, void *opaque); + BlockAIOCB *(*bdrv_aio_flush)(BlockDriverState *bs, + BlockCompletionFunc *cb, void *opaque); + BlockAIOCB *(*bdrv_aio_discard)(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + BlockCompletionFunc *cb, void *opaque); + + int coroutine_fn (*bdrv_co_readv)(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); + int coroutine_fn (*bdrv_co_writev)(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov); + /* + * Efficiently zero a region of the disk image. Typically an image format + * would use a compact metadata representation to implement this. This + * function pointer may be NULL and .bdrv_co_writev() will be called + * instead. + */ + int coroutine_fn (*bdrv_co_write_zeroes)(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, BdrvRequestFlags flags); + int coroutine_fn (*bdrv_co_discard)(BlockDriverState *bs, + int64_t sector_num, int nb_sectors); + int64_t coroutine_fn (*bdrv_co_get_block_status)(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, int *pnum); + + /* + * Invalidate any cached meta-data. + */ + void (*bdrv_invalidate_cache)(BlockDriverState *bs, Error **errp); + + /* + * Flushes all data that was already written to the OS all the way down to + * the disk (for example raw-posix calls fsync()). + */ + int coroutine_fn (*bdrv_co_flush_to_disk)(BlockDriverState *bs); + + /* + * Flushes all internal caches to the OS. The data may still sit in a + * writeback cache of the host OS, but it will survive a crash of the qemu + * process. + */ + int coroutine_fn (*bdrv_co_flush_to_os)(BlockDriverState *bs); + + const char *protocol_name; + int (*bdrv_truncate)(BlockDriverState *bs, int64_t offset); + + int64_t (*bdrv_getlength)(BlockDriverState *bs); + bool has_variable_length; + int64_t (*bdrv_get_allocated_file_size)(BlockDriverState *bs); + + int (*bdrv_write_compressed)(BlockDriverState *bs, int64_t sector_num, + const uint8_t *buf, int nb_sectors); + + int (*bdrv_snapshot_create)(BlockDriverState *bs, + QEMUSnapshotInfo *sn_info); + int (*bdrv_snapshot_goto)(BlockDriverState *bs, + const char *snapshot_id); + int (*bdrv_snapshot_delete)(BlockDriverState *bs, + const char *snapshot_id, + const char *name, + Error **errp); + int (*bdrv_snapshot_list)(BlockDriverState *bs, + QEMUSnapshotInfo **psn_info); + int (*bdrv_snapshot_load_tmp)(BlockDriverState *bs, + const char *snapshot_id, + const char *name, + Error **errp); + int (*bdrv_get_info)(BlockDriverState *bs, BlockDriverInfo *bdi); + ImageInfoSpecific *(*bdrv_get_specific_info)(BlockDriverState *bs); + + int (*bdrv_save_vmstate)(BlockDriverState *bs, QEMUIOVector *qiov, + int64_t pos); + int (*bdrv_load_vmstate)(BlockDriverState *bs, uint8_t *buf, + int64_t pos, int size); + + int (*bdrv_change_backing_file)(BlockDriverState *bs, + const char *backing_file, const char *backing_fmt); + + /* removable device specific */ + bool (*bdrv_is_inserted)(BlockDriverState *bs); + int (*bdrv_media_changed)(BlockDriverState *bs); + void (*bdrv_eject)(BlockDriverState *bs, bool eject_flag); + void (*bdrv_lock_medium)(BlockDriverState *bs, bool locked); + + /* to control generic scsi devices */ + BlockAIOCB *(*bdrv_aio_ioctl)(BlockDriverState *bs, + unsigned long int req, void *buf, + BlockCompletionFunc *cb, void *opaque); + + /* List of options for creating images, terminated by name == NULL */ + QemuOptsList *create_opts; + + /* + * Returns 0 for completed check, -errno for internal errors. + * The check results are stored in result. + */ + int (*bdrv_check)(BlockDriverState* bs, BdrvCheckResult *result, + BdrvCheckMode fix); + + int (*bdrv_amend_options)(BlockDriverState *bs, QemuOpts *opts, + BlockDriverAmendStatusCB *status_cb); + + void (*bdrv_debug_event)(BlockDriverState *bs, BlkDebugEvent event); + + /* TODO Better pass a option string/QDict/QemuOpts to add any rule? */ + int (*bdrv_debug_breakpoint)(BlockDriverState *bs, const char *event, + const char *tag); + int (*bdrv_debug_remove_breakpoint)(BlockDriverState *bs, + const char *tag); + int (*bdrv_debug_resume)(BlockDriverState *bs, const char *tag); + bool (*bdrv_debug_is_suspended)(BlockDriverState *bs, const char *tag); + + void (*bdrv_refresh_limits)(BlockDriverState *bs, Error **errp); + + /* + * Returns 1 if newly created images are guaranteed to contain only + * zeros, 0 otherwise. + */ + int (*bdrv_has_zero_init)(BlockDriverState *bs); + + /* Remove fd handlers, timers, and other event loop callbacks so the event + * loop is no longer in use. Called with no in-flight requests and in + * depth-first traversal order with parents before child nodes. + */ + void (*bdrv_detach_aio_context)(BlockDriverState *bs); + + /* Add fd handlers, timers, and other event loop callbacks so I/O requests + * can be processed again. Called with no in-flight requests and in + * depth-first traversal order with child nodes before parent nodes. + */ + void (*bdrv_attach_aio_context)(BlockDriverState *bs, + AioContext *new_context); + + /* io queue for linux-aio */ + void (*bdrv_io_plug)(BlockDriverState *bs); + void (*bdrv_io_unplug)(BlockDriverState *bs); + void (*bdrv_flush_io_queue)(BlockDriverState *bs); + + /** + * Try to get @bs's logical and physical block size. + * On success, store them in @bsz and return zero. + * On failure, return negative errno. + */ + int (*bdrv_probe_blocksizes)(BlockDriverState *bs, BlockSizes *bsz); + /** + * Try to get @bs's geometry (cyls, heads, sectors) + * On success, store them in @geo and return 0. + * On failure return -errno. + * Only drivers that want to override guest geometry implement this + * callback; see hd_geometry_guess(). + */ + int (*bdrv_probe_geometry)(BlockDriverState *bs, HDGeometry *geo); + + /** + * Drain and stop any internal sources of requests in the driver, and + * remain so until next I/O callback (e.g. bdrv_co_writev) is called. + */ + void (*bdrv_drain)(BlockDriverState *bs); + + QLIST_ENTRY(BlockDriver) list; +}; + +typedef struct BlockLimits { + /* maximum number of sectors that can be discarded at once */ + int max_discard; + + /* optimal alignment for discard requests in sectors */ + int64_t discard_alignment; + + /* maximum number of sectors that can zeroized at once */ + int max_write_zeroes; + + /* optimal alignment for write zeroes requests in sectors */ + int64_t write_zeroes_alignment; + + /* optimal transfer length in sectors */ + int opt_transfer_length; + + /* maximal transfer length in sectors */ + int max_transfer_length; + + /* memory alignment so that no bounce buffer is needed */ + size_t min_mem_alignment; + + /* memory alignment for bounce buffer */ + size_t opt_mem_alignment; +} BlockLimits; + +typedef struct BdrvOpBlocker BdrvOpBlocker; + +typedef struct BdrvAioNotifier { + void (*attached_aio_context)(AioContext *new_context, void *opaque); + void (*detach_aio_context)(void *opaque); + + void *opaque; + + QLIST_ENTRY(BdrvAioNotifier) list; +} BdrvAioNotifier; + +struct BdrvChildRole { + int (*inherit_flags)(int parent_flags); +}; + +extern const BdrvChildRole child_file; +extern const BdrvChildRole child_format; + +struct BdrvChild { + BlockDriverState *bs; + const BdrvChildRole *role; + QLIST_ENTRY(BdrvChild) next; + QLIST_ENTRY(BdrvChild) next_parent; +}; + +/* + * Note: the function bdrv_append() copies and swaps contents of + * BlockDriverStates, so if you add new fields to this struct, please + * inspect bdrv_append() to determine if the new fields need to be + * copied as well. + */ +struct BlockDriverState { + int64_t total_sectors; /* if we are reading a disk image, give its + size in sectors */ + int read_only; /* if true, the media is read only */ + int open_flags; /* flags used to open the file, re-used for re-open */ + int encrypted; /* if true, the media is encrypted */ + int valid_key; /* if true, a valid encryption key has been set */ + int sg; /* if true, the device is a /dev/sg* */ + int copy_on_read; /* if true, copy read backing sectors into image + note this is a reference count */ + bool probed; + + BlockDriver *drv; /* NULL means no media */ + void *opaque; + + BlockBackend *blk; /* owning backend, if any */ + + AioContext *aio_context; /* event loop used for fd handlers, timers, etc */ + /* long-running tasks intended to always use the same AioContext as this + * BDS may register themselves in this list to be notified of changes + * regarding this BDS's context */ + QLIST_HEAD(, BdrvAioNotifier) aio_notifiers; + + char filename[PATH_MAX]; + char backing_file[PATH_MAX]; /* if non zero, the image is a diff of + this file image */ + char backing_format[16]; /* if non-zero and backing_file exists */ + + QDict *full_open_options; + char exact_filename[PATH_MAX]; + + BdrvChild *backing; + BdrvChild *file; + + NotifierList close_notifiers; + + /* Callback before write request is processed */ + NotifierWithReturnList before_write_notifiers; + + /* number of in-flight serialising requests */ + unsigned int serialising_in_flight; + + /* I/O throttling. + * throttle_state tells us if this BDS has I/O limits configured. + * io_limits_enabled tells us if they are currently being + * enforced, but it can be temporarily set to false */ + CoQueue throttled_reqs[2]; + bool io_limits_enabled; + /* The following fields are protected by the ThrottleGroup lock. + * See the ThrottleGroup documentation for details. */ + ThrottleState *throttle_state; + ThrottleTimers throttle_timers; + unsigned pending_reqs[2]; + QLIST_ENTRY(BlockDriverState) round_robin; + + /* Offset after the highest byte written to */ + uint64_t wr_highest_offset; + + /* I/O Limits */ + BlockLimits bl; + + /* Whether produces zeros when read beyond eof */ + bool zero_beyond_eof; + + /* Alignment requirement for offset/length of I/O requests */ + unsigned int request_alignment; + + /* do we need to tell the quest if we have a volatile write cache? */ + int enable_write_cache; + + /* the following member gives a name to every node on the bs graph. */ + char node_name[32]; + /* element of the list of named nodes building the graph */ + QTAILQ_ENTRY(BlockDriverState) node_list; + /* element of the list of "drives" the guest sees */ + QTAILQ_ENTRY(BlockDriverState) device_list; + QLIST_HEAD(, BdrvDirtyBitmap) dirty_bitmaps; + int refcnt; + + QLIST_HEAD(, BdrvTrackedRequest) tracked_requests; + + /* operation blockers */ + QLIST_HEAD(, BdrvOpBlocker) op_blockers[BLOCK_OP_TYPE_MAX]; + + /* long-running background operation */ + BlockJob *job; + + /* The node that this node inherited default options from (and a reopen on + * which can affect this node by changing these defaults). This is always a + * parent node of this node. */ + BlockDriverState *inherits_from; + QLIST_HEAD(, BdrvChild) children; + QLIST_HEAD(, BdrvChild) parents; + + QDict *options; + BlockdevDetectZeroesOptions detect_zeroes; + + /* The error object in use for blocking operations on backing_hd */ + Error *backing_blocker; + + /* threshold limit for writes, in bytes. "High water mark". */ + uint64_t write_threshold_offset; + NotifierWithReturn write_threshold_notifier; + + int quiesce_counter; +}; + +struct BlockBackendRootState { + int open_flags; + bool read_only; + BlockdevDetectZeroesOptions detect_zeroes; + + char *throttle_group; + ThrottleState *throttle_state; +}; + +static inline BlockDriverState *backing_bs(BlockDriverState *bs) +{ + return bs->backing ? bs->backing->bs : NULL; +} + + +/* Essential block drivers which must always be statically linked into qemu, and + * which therefore can be accessed without using bdrv_find_format() */ +extern BlockDriver bdrv_file; +extern BlockDriver bdrv_raw; +extern BlockDriver bdrv_qcow2; + +extern QTAILQ_HEAD(BdrvStates, BlockDriverState) bdrv_states; + +/** + * bdrv_setup_io_funcs: + * + * Prepare a #BlockDriver for I/O request processing by populating + * unimplemented coroutine and AIO interfaces with generic wrapper functions + * that fall back to implemented interfaces. + */ +void bdrv_setup_io_funcs(BlockDriver *bdrv); + +int get_tmp_filename(char *filename, int size); +BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size, + const char *filename); + +void bdrv_set_io_limits(BlockDriverState *bs, + ThrottleConfig *cfg); + + +/** + * bdrv_add_before_write_notifier: + * + * Register a callback that is invoked before write requests are processed but + * after any throttling or waiting for overlapping requests. + */ +void bdrv_add_before_write_notifier(BlockDriverState *bs, + NotifierWithReturn *notifier); + +/** + * bdrv_detach_aio_context: + * + * May be called from .bdrv_detach_aio_context() to detach children from the + * current #AioContext. This is only needed by block drivers that manage their + * own children. Both ->file and ->backing are automatically handled and + * block drivers should not call this function on them explicitly. + */ +void bdrv_detach_aio_context(BlockDriverState *bs); + +/** + * bdrv_attach_aio_context: + * + * May be called from .bdrv_attach_aio_context() to attach children to the new + * #AioContext. This is only needed by block drivers that manage their own + * children. Both ->file and ->backing are automatically handled and block + * drivers should not call this function on them explicitly. + */ +void bdrv_attach_aio_context(BlockDriverState *bs, + AioContext *new_context); + +/** + * bdrv_add_aio_context_notifier: + * + * If a long-running job intends to be always run in the same AioContext as a + * certain BDS, it may use this function to be notified of changes regarding the + * association of the BDS to an AioContext. + * + * attached_aio_context() is called after the target BDS has been attached to a + * new AioContext; detach_aio_context() is called before the target BDS is being + * detached from its old AioContext. + */ +void bdrv_add_aio_context_notifier(BlockDriverState *bs, + void (*attached_aio_context)(AioContext *new_context, void *opaque), + void (*detach_aio_context)(void *opaque), void *opaque); + +/** + * bdrv_remove_aio_context_notifier: + * + * Unsubscribe of change notifications regarding the BDS's AioContext. The + * parameters given here have to be the same as those given to + * bdrv_add_aio_context_notifier(). + */ +void bdrv_remove_aio_context_notifier(BlockDriverState *bs, + void (*aio_context_attached)(AioContext *, + void *), + void (*aio_context_detached)(void *), + void *opaque); + +#ifdef _WIN32 +int is_windows_drive(const char *filename); +#endif + +/** + * stream_start: + * @bs: Block device to operate on. + * @base: Block device that will become the new base, or %NULL to + * flatten the whole backing file chain onto @bs. + * @base_id: The file name that will be written to @bs as the new + * backing file if the job completes. Ignored if @base is %NULL. + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @on_error: The action to take upon error. + * @cb: Completion function for the job. + * @opaque: Opaque pointer value passed to @cb. + * @errp: Error object. + * + * Start a streaming operation on @bs. Clusters that are unallocated + * in @bs, but allocated in any image between @base and @bs (both + * exclusive) will be written to @bs. At the end of a successful + * streaming job, the backing file of @bs will be changed to + * @base_id in the written image and to @base in the live BlockDriverState. + */ +void stream_start(BlockDriverState *bs, BlockDriverState *base, + const char *base_id, int64_t speed, BlockdevOnError on_error, + BlockCompletionFunc *cb, + void *opaque, Error **errp); + +/** + * commit_start: + * @bs: Active block device. + * @top: Top block device to be committed. + * @base: Block device that will be written into, and become the new top. + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @on_error: The action to take upon error. + * @cb: Completion function for the job. + * @opaque: Opaque pointer value passed to @cb. + * @backing_file_str: String to use as the backing file in @top's overlay + * @errp: Error object. + * + */ +void commit_start(BlockDriverState *bs, BlockDriverState *base, + BlockDriverState *top, int64_t speed, + BlockdevOnError on_error, BlockCompletionFunc *cb, + void *opaque, const char *backing_file_str, Error **errp); +/** + * commit_active_start: + * @bs: Active block device to be committed. + * @base: Block device that will be written into, and become the new top. + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @on_error: The action to take upon error. + * @cb: Completion function for the job. + * @opaque: Opaque pointer value passed to @cb. + * @errp: Error object. + * + */ +void commit_active_start(BlockDriverState *bs, BlockDriverState *base, + int64_t speed, + BlockdevOnError on_error, + BlockCompletionFunc *cb, + void *opaque, Error **errp); +/* + * mirror_start: + * @bs: Block device to operate on. + * @target: Block device to write to. + * @replaces: Block graph node name to replace once the mirror is done. Can + * only be used when full mirroring is selected. + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @granularity: The chosen granularity for the dirty bitmap. + * @buf_size: The amount of data that can be in flight at one time. + * @mode: Whether to collapse all images in the chain to the target. + * @on_source_error: The action to take upon error reading from the source. + * @on_target_error: The action to take upon error writing to the target. + * @unmap: Whether to unmap target where source sectors only contain zeroes. + * @cb: Completion function for the job. + * @opaque: Opaque pointer value passed to @cb. + * @errp: Error object. + * + * Start a mirroring operation on @bs. Clusters that are allocated + * in @bs will be written to @bs until the job is cancelled or + * manually completed. At the end of a successful mirroring job, + * @bs will be switched to read from @target. + */ +void mirror_start(BlockDriverState *bs, BlockDriverState *target, + const char *replaces, + int64_t speed, uint32_t granularity, int64_t buf_size, + MirrorSyncMode mode, BlockdevOnError on_source_error, + BlockdevOnError on_target_error, + bool unmap, + BlockCompletionFunc *cb, + void *opaque, Error **errp); + +/* + * backup_start: + * @bs: Block device to operate on. + * @target: Block device to write to. + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @sync_mode: What parts of the disk image should be copied to the destination. + * @sync_bitmap: The dirty bitmap if sync_mode is MIRROR_SYNC_MODE_INCREMENTAL. + * @on_source_error: The action to take upon error reading from the source. + * @on_target_error: The action to take upon error writing to the target. + * @cb: Completion function for the job. + * @opaque: Opaque pointer value passed to @cb. + * @txn: Transaction that this job is part of (may be NULL). + * + * Start a backup operation on @bs. Clusters in @bs are written to @target + * until the job is cancelled or manually completed. + */ +void backup_start(BlockDriverState *bs, BlockDriverState *target, + int64_t speed, MirrorSyncMode sync_mode, + BdrvDirtyBitmap *sync_bitmap, + BlockdevOnError on_source_error, + BlockdevOnError on_target_error, + BlockCompletionFunc *cb, void *opaque, + BlockJobTxn *txn, Error **errp); + +void blk_set_bs(BlockBackend *blk, BlockDriverState *bs); + +void blk_dev_change_media_cb(BlockBackend *blk, bool load); +bool blk_dev_has_removable_media(BlockBackend *blk); +bool blk_dev_has_tray(BlockBackend *blk); +void blk_dev_eject_request(BlockBackend *blk, bool force); +bool blk_dev_is_tray_open(BlockBackend *blk); +bool blk_dev_is_medium_locked(BlockBackend *blk); +void blk_dev_resize_cb(BlockBackend *blk); + +void bdrv_set_dirty(BlockDriverState *bs, int64_t cur_sector, int nr_sectors); +bool bdrv_requests_pending(BlockDriverState *bs); + +void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out); +void bdrv_undo_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *in); + +#endif /* BLOCK_INT_H */ diff --git a/src/include/block/blockjob.h b/src/include/block/blockjob.h new file mode 100644 index 0000000..d84ccd8 --- /dev/null +++ b/src/include/block/blockjob.h @@ -0,0 +1,446 @@ +/* + * Declarations for long-running block device operations + * + * Copyright (c) 2011 IBM Corp. + * Copyright (c) 2012 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef BLOCKJOB_H +#define BLOCKJOB_H 1 + +#include "block/block.h" + +/** + * BlockJobDriver: + * + * A class type for block job driver. + */ +typedef struct BlockJobDriver { + /** Derived BlockJob struct size */ + size_t instance_size; + + /** String describing the operation, part of query-block-jobs QMP API */ + BlockJobType job_type; + + /** Optional callback for job types that support setting a speed limit */ + void (*set_speed)(BlockJob *job, int64_t speed, Error **errp); + + /** Optional callback for job types that need to forward I/O status reset */ + void (*iostatus_reset)(BlockJob *job); + + /** + * Optional callback for job types whose completion must be triggered + * manually. + */ + void (*complete)(BlockJob *job, Error **errp); + + /** + * If the callback is not NULL, it will be invoked when all the jobs + * belonging to the same transaction complete; or upon this job's + * completion if it is not in a transaction. Skipped if NULL. + * + * All jobs will complete with a call to either .commit() or .abort() but + * never both. + */ + void (*commit)(BlockJob *job); + + /** + * If the callback is not NULL, it will be invoked when any job in the + * same transaction fails; or upon this job's failure (due to error or + * cancellation) if it is not in a transaction. Skipped if NULL. + * + * All jobs will complete with a call to either .commit() or .abort() but + * never both. + */ + void (*abort)(BlockJob *job); +} BlockJobDriver; + +/** + * BlockJob: + * + * Long-running operation on a BlockDriverState. + */ +struct BlockJob { + /** The job type, including the job vtable. */ + const BlockJobDriver *driver; + + /** The block device on which the job is operating. */ + BlockDriverState *bs; + + /** + * The ID of the block job. Currently the BlockBackend name of the BDS + * owning the job at the time when the job is started. + * + * TODO Decouple block job IDs from BlockBackend names + */ + char *id; + + /** + * The coroutine that executes the job. If not NULL, it is + * reentered when busy is false and the job is cancelled. + */ + Coroutine *co; + + /** + * Set to true if the job should cancel itself. The flag must + * always be tested just before toggling the busy flag from false + * to true. After a job has been cancelled, it should only yield + * if #aio_poll will ("sooner or later") reenter the coroutine. + */ + bool cancelled; + + /** + * Counter for pause request. If non-zero, the block job is either paused, + * or if busy == true will pause itself as soon as possible. + */ + int pause_count; + + /** + * Set to true if the job is paused by user. Can be unpaused with the + * block-job-resume QMP command. + */ + bool user_paused; + + /** + * Set to false by the job while it is in a quiescent state, where + * no I/O is pending and the job has yielded on any condition + * that is not detected by #aio_poll, such as a timer. + */ + bool busy; + + /** + * Set to true when the job is ready to be completed. + */ + bool ready; + + /** Status that is published by the query-block-jobs QMP API */ + BlockDeviceIoStatus iostatus; + + /** Offset that is published by the query-block-jobs QMP API */ + int64_t offset; + + /** Length that is published by the query-block-jobs QMP API */ + int64_t len; + + /** Speed that was set with @block_job_set_speed. */ + int64_t speed; + + /** The completion function that will be called when the job completes. */ + BlockCompletionFunc *cb; + + /** Block other operations when block job is running */ + Error *blocker; + + /** The opaque value that is passed to the completion function. */ + void *opaque; + + /** Reference count of the block job */ + int refcnt; + + /* True if this job has reported completion by calling block_job_completed. + */ + bool completed; + + /* ret code passed to block_job_completed. + */ + int ret; + + /** Non-NULL if this job is part of a transaction */ + BlockJobTxn *txn; + QLIST_ENTRY(BlockJob) txn_list; +}; + +/** + * block_job_create: + * @job_type: The class object for the newly-created job. + * @bs: The block + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @cb: Completion function for the job. + * @opaque: Opaque pointer value passed to @cb. + * @errp: Error object. + * + * Create a new long-running block device job and return it. The job + * will call @cb asynchronously when the job completes. Note that + * @bs may have been closed at the time the @cb it is called. If + * this is the case, the job may be reported as either cancelled or + * completed. + * + * This function is not part of the public job interface; it should be + * called from a wrapper that is specific to the job type. + */ +void *block_job_create(const BlockJobDriver *driver, BlockDriverState *bs, + int64_t speed, BlockCompletionFunc *cb, + void *opaque, Error **errp); + +/** + * block_job_sleep_ns: + * @job: The job that calls the function. + * @clock: The clock to sleep on. + * @ns: How many nanoseconds to stop for. + * + * Put the job to sleep (assuming that it wasn't canceled) for @ns + * nanoseconds. Canceling the job will interrupt the wait immediately. + */ +void block_job_sleep_ns(BlockJob *job, QEMUClockType type, int64_t ns); + +/** + * block_job_yield: + * @job: The job that calls the function. + * + * Yield the block job coroutine. + */ +void block_job_yield(BlockJob *job); + +/** + * block_job_ref: + * @bs: The block device. + * + * Grab a reference to the block job. Should be paired with block_job_unref. + */ +void block_job_ref(BlockJob *job); + +/** + * block_job_unref: + * @bs: The block device. + * + * Release reference to the block job and release resources if it is the last + * reference. + */ +void block_job_unref(BlockJob *job); + +/** + * block_job_completed: + * @job: The job being completed. + * @ret: The status code. + * + * Call the completion function that was registered at creation time, and + * free @job. + */ +void block_job_completed(BlockJob *job, int ret); + +/** + * block_job_set_speed: + * @job: The job to set the speed for. + * @speed: The new value + * @errp: Error object. + * + * Set a rate-limiting parameter for the job; the actual meaning may + * vary depending on the job type. + */ +void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp); + +/** + * block_job_cancel: + * @job: The job to be canceled. + * + * Asynchronously cancel the specified job. + */ +void block_job_cancel(BlockJob *job); + +/** + * block_job_complete: + * @job: The job to be completed. + * @errp: Error object. + * + * Asynchronously complete the specified job. + */ +void block_job_complete(BlockJob *job, Error **errp); + +/** + * block_job_is_cancelled: + * @job: The job being queried. + * + * Returns whether the job is scheduled for cancellation. + */ +bool block_job_is_cancelled(BlockJob *job); + +/** + * block_job_query: + * @job: The job to get information about. + * + * Return information about a job. + */ +BlockJobInfo *block_job_query(BlockJob *job); + +/** + * block_job_pause: + * @job: The job to be paused. + * + * Asynchronously pause the specified job. + */ +void block_job_pause(BlockJob *job); + +/** + * block_job_resume: + * @job: The job to be resumed. + * + * Resume the specified job. Must be paired with a preceding block_job_pause. + */ +void block_job_resume(BlockJob *job); + +/** + * block_job_enter: + * @job: The job to enter. + * + * Continue the specified job by entering the coroutine. + */ +void block_job_enter(BlockJob *job); + +/** + * block_job_event_cancelled: + * @job: The job whose information is requested. + * + * Send a BLOCK_JOB_CANCELLED event for the specified job. + */ +void block_job_event_cancelled(BlockJob *job); + +/** + * block_job_ready: + * @job: The job which is now ready to complete. + * @msg: Error message. Only present on failure. + * + * Send a BLOCK_JOB_COMPLETED event for the specified job. + */ +void block_job_event_completed(BlockJob *job, const char *msg); + +/** + * block_job_ready: + * @job: The job which is now ready to complete. + * + * Send a BLOCK_JOB_READY event for the specified job. + */ +void block_job_event_ready(BlockJob *job); + +/** + * block_job_is_paused: + * @job: The job being queried. + * + * Returns whether the job is currently paused, or will pause + * as soon as it reaches a sleeping point. + */ +bool block_job_is_paused(BlockJob *job); + +/** + * block_job_cancel_sync: + * @job: The job to be canceled. + * + * Synchronously cancel the job. The completion callback is called + * before the function returns. The job may actually complete + * instead of canceling itself; the circumstances under which this + * happens depend on the kind of job that is active. + * + * Returns the return value from the job if the job actually completed + * during the call, or -ECANCELED if it was canceled. + */ +int block_job_cancel_sync(BlockJob *job); + +/** + * block_job_complete_sync: + * @job: The job to be completed. + * @errp: Error object which may be set by block_job_complete(); this is not + * necessarily set on every error, the job return value has to be + * checked as well. + * + * Synchronously complete the job. The completion callback is called before the + * function returns, unless it is NULL (which is permissible when using this + * function). + * + * Returns the return value from the job. + */ +int block_job_complete_sync(BlockJob *job, Error **errp); + +/** + * block_job_iostatus_reset: + * @job: The job whose I/O status should be reset. + * + * Reset I/O status on @job and on BlockDriverState objects it uses, + * other than job->bs. + */ +void block_job_iostatus_reset(BlockJob *job); + +/** + * block_job_error_action: + * @job: The job to signal an error for. + * @bs: The block device on which to set an I/O error. + * @on_err: The error action setting. + * @is_read: Whether the operation was a read. + * @error: The error that was reported. + * + * Report an I/O error for a block job and possibly stop the VM. Return the + * action that was selected based on @on_err and @error. + */ +BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs, + BlockdevOnError on_err, + int is_read, int error); + +typedef void BlockJobDeferToMainLoopFn(BlockJob *job, void *opaque); + +/** + * block_job_defer_to_main_loop: + * @job: The job + * @fn: The function to run in the main loop + * @opaque: The opaque value that is passed to @fn + * + * Execute a given function in the main loop with the BlockDriverState + * AioContext acquired. Block jobs must call bdrv_unref(), bdrv_close(), and + * anything that uses bdrv_drain_all() in the main loop. + * + * The @job AioContext is held while @fn executes. + */ +void block_job_defer_to_main_loop(BlockJob *job, + BlockJobDeferToMainLoopFn *fn, + void *opaque); + +/** + * block_job_txn_new: + * + * Allocate and return a new block job transaction. Jobs can be added to the + * transaction using block_job_txn_add_job(). + * + * The transaction is automatically freed when the last job completes or is + * cancelled. + * + * All jobs in the transaction either complete successfully or fail/cancel as a + * group. Jobs wait for each other before completing. Cancelling one job + * cancels all jobs in the transaction. + */ +BlockJobTxn *block_job_txn_new(void); + +/** + * block_job_txn_unref: + * + * Release a reference that was previously acquired with block_job_txn_add_job + * or block_job_txn_new. If it's the last reference to the object, it will be + * freed. + */ +void block_job_txn_unref(BlockJobTxn *txn); + +/** + * block_job_txn_add_job: + * @txn: The transaction (may be NULL) + * @job: Job to add to the transaction + * + * Add @job to the transaction. The @job must not already be in a transaction. + * The caller must call either block_job_txn_unref() or block_job_completed() + * to release the reference that is automatically grabbed here. + */ +void block_job_txn_add_job(BlockJobTxn *txn, BlockJob *job); + +#endif diff --git a/src/include/block/nbd.h b/src/include/block/nbd.h new file mode 100644 index 0000000..65f409d --- /dev/null +++ b/src/include/block/nbd.h @@ -0,0 +1,106 @@ +/* + * Copyright (C) 2005 Anthony Liguori + * + * Network Block Device + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; under version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef NBD_H +#define NBD_H + +#include + +#include "qemu-common.h" +#include "qemu/option.h" + +struct nbd_request { + uint32_t magic; + uint32_t type; + uint64_t handle; + uint64_t from; + uint32_t len; +} QEMU_PACKED; + +struct nbd_reply { + uint32_t magic; + uint32_t error; + uint64_t handle; +} QEMU_PACKED; + +#define NBD_FLAG_HAS_FLAGS (1 << 0) /* Flags are there */ +#define NBD_FLAG_READ_ONLY (1 << 1) /* Device is read-only */ +#define NBD_FLAG_SEND_FLUSH (1 << 2) /* Send FLUSH */ +#define NBD_FLAG_SEND_FUA (1 << 3) /* Send FUA (Force Unit Access) */ +#define NBD_FLAG_ROTATIONAL (1 << 4) /* Use elevator algorithm - rotational media */ +#define NBD_FLAG_SEND_TRIM (1 << 5) /* Send TRIM (discard) */ + +/* New-style global flags. */ +#define NBD_FLAG_FIXED_NEWSTYLE (1 << 0) /* Fixed newstyle protocol. */ + +/* New-style client flags. */ +#define NBD_FLAG_C_FIXED_NEWSTYLE (1 << 0) /* Fixed newstyle protocol. */ + +/* Reply types. */ +#define NBD_REP_ACK (1) /* Data sending finished. */ +#define NBD_REP_SERVER (2) /* Export description. */ +#define NBD_REP_ERR_UNSUP ((UINT32_C(1) << 31) | 1) /* Unknown option. */ +#define NBD_REP_ERR_INVALID ((UINT32_C(1) << 31) | 3) /* Invalid length. */ + +#define NBD_CMD_MASK_COMMAND 0x0000ffff +#define NBD_CMD_FLAG_FUA (1 << 16) + +enum { + NBD_CMD_READ = 0, + NBD_CMD_WRITE = 1, + NBD_CMD_DISC = 2, + NBD_CMD_FLUSH = 3, + NBD_CMD_TRIM = 4 +}; + +#define NBD_DEFAULT_PORT 10809 + +/* Maximum size of a single READ/WRITE data buffer */ +#define NBD_MAX_BUFFER_SIZE (32 * 1024 * 1024) + +ssize_t nbd_wr_sync(int fd, void *buffer, size_t size, bool do_read); +int nbd_receive_negotiate(int csock, const char *name, uint32_t *flags, + off_t *size, Error **errp); +int nbd_init(int fd, int csock, uint32_t flags, off_t size); +ssize_t nbd_send_request(int csock, struct nbd_request *request); +ssize_t nbd_receive_reply(int csock, struct nbd_reply *reply); +int nbd_client(int fd); +int nbd_disconnect(int fd); + +typedef struct NBDExport NBDExport; +typedef struct NBDClient NBDClient; + +NBDExport *nbd_export_new(BlockBackend *blk, off_t dev_offset, off_t size, + uint32_t nbdflags, void (*close)(NBDExport *), + Error **errp); +void nbd_export_close(NBDExport *exp); +void nbd_export_get(NBDExport *exp); +void nbd_export_put(NBDExport *exp); + +BlockBackend *nbd_export_get_blockdev(NBDExport *exp); + +NBDExport *nbd_export_find(const char *name); +void nbd_export_set_name(NBDExport *exp, const char *name); +void nbd_export_close_all(void); + +NBDClient *nbd_client_new(NBDExport *exp, int csock, + void (*close)(NBDClient *)); +void nbd_client_get(NBDClient *client); +void nbd_client_put(NBDClient *client); + +#endif diff --git a/src/include/block/qapi.h b/src/include/block/qapi.h new file mode 100644 index 0000000..327549d --- /dev/null +++ b/src/include/block/qapi.h @@ -0,0 +1,46 @@ +/* + * Block layer qmp and info dump related functions + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef BLOCK_QAPI_H +#define BLOCK_QAPI_H + +#include "qapi-types.h" +#include "block/block.h" +#include "block/snapshot.h" + +BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp); +int bdrv_query_snapshot_info_list(BlockDriverState *bs, + SnapshotInfoList **p_list, + Error **errp); +void bdrv_query_image_info(BlockDriverState *bs, + ImageInfo **p_info, + Error **errp); + +void bdrv_snapshot_dump(fprintf_function func_fprintf, void *f, + QEMUSnapshotInfo *sn); +void bdrv_image_info_specific_dump(fprintf_function func_fprintf, void *f, + ImageInfoSpecific *info_spec); +void bdrv_image_info_dump(fprintf_function func_fprintf, void *f, + ImageInfo *info); +#endif diff --git a/src/include/block/scsi.h b/src/include/block/scsi.h new file mode 100644 index 0000000..a311341 --- /dev/null +++ b/src/include/block/scsi.h @@ -0,0 +1,309 @@ +/* Copyright (C) 1998, 1999 Free Software Foundation, Inc. + This file is part of the GNU C Library. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, see . +*/ + +/* + * This header file contains public constants and structures used by + * the scsi code for linux. + */ +#ifndef HW_SCSI_DEFS_H +#define HW_SCSI_DEFS_H 1 + +/* + * SCSI opcodes + */ + +#define TEST_UNIT_READY 0x00 +#define REWIND 0x01 +#define REQUEST_SENSE 0x03 +#define FORMAT_UNIT 0x04 +#define READ_BLOCK_LIMITS 0x05 +#define INITIALIZE_ELEMENT_STATUS 0x07 +#define REASSIGN_BLOCKS 0x07 +#define READ_6 0x08 +#define WRITE_6 0x0a +#define SET_CAPACITY 0x0b +#define READ_REVERSE 0x0f +#define WRITE_FILEMARKS 0x10 +#define SPACE 0x11 +#define INQUIRY 0x12 +#define RECOVER_BUFFERED_DATA 0x14 +#define MODE_SELECT 0x15 +#define RESERVE 0x16 +#define RELEASE 0x17 +#define COPY 0x18 +#define ERASE 0x19 +#define MODE_SENSE 0x1a +#define LOAD_UNLOAD 0x1b +#define START_STOP 0x1b +#define RECEIVE_DIAGNOSTIC 0x1c +#define SEND_DIAGNOSTIC 0x1d +#define ALLOW_MEDIUM_REMOVAL 0x1e +#define READ_CAPACITY_10 0x25 +#define READ_10 0x28 +#define WRITE_10 0x2a +#define SEEK_10 0x2b +#define LOCATE_10 0x2b +#define POSITION_TO_ELEMENT 0x2b +#define WRITE_VERIFY_10 0x2e +#define VERIFY_10 0x2f +#define SEARCH_HIGH 0x30 +#define SEARCH_EQUAL 0x31 +#define SEARCH_LOW 0x32 +#define SET_LIMITS 0x33 +#define PRE_FETCH 0x34 +#define READ_POSITION 0x34 +#define SYNCHRONIZE_CACHE 0x35 +#define LOCK_UNLOCK_CACHE 0x36 +#define INITIALIZE_ELEMENT_STATUS_WITH_RANGE 0x37 +#define READ_DEFECT_DATA 0x37 +#define MEDIUM_SCAN 0x38 +#define COMPARE 0x39 +#define COPY_VERIFY 0x3a +#define WRITE_BUFFER 0x3b +#define READ_BUFFER 0x3c +#define UPDATE_BLOCK 0x3d +#define READ_LONG_10 0x3e +#define WRITE_LONG_10 0x3f +#define CHANGE_DEFINITION 0x40 +#define WRITE_SAME_10 0x41 +#define UNMAP 0x42 +#define READ_TOC 0x43 +#define REPORT_DENSITY_SUPPORT 0x44 +#define GET_CONFIGURATION 0x46 +#define SANITIZE 0x48 +#define GET_EVENT_STATUS_NOTIFICATION 0x4a +#define LOG_SELECT 0x4c +#define LOG_SENSE 0x4d +#define READ_DISC_INFORMATION 0x51 +#define RESERVE_TRACK 0x53 +#define MODE_SELECT_10 0x55 +#define RESERVE_10 0x56 +#define RELEASE_10 0x57 +#define MODE_SENSE_10 0x5a +#define SEND_CUE_SHEET 0x5d +#define PERSISTENT_RESERVE_IN 0x5e +#define PERSISTENT_RESERVE_OUT 0x5f +#define VARLENGTH_CDB 0x7f +#define WRITE_FILEMARKS_16 0x80 +#define READ_REVERSE_16 0x81 +#define ALLOW_OVERWRITE 0x82 +#define EXTENDED_COPY 0x83 +#define ATA_PASSTHROUGH_16 0x85 +#define ACCESS_CONTROL_IN 0x86 +#define ACCESS_CONTROL_OUT 0x87 +#define READ_16 0x88 +#define COMPARE_AND_WRITE 0x89 +#define WRITE_16 0x8a +#define WRITE_VERIFY_16 0x8e +#define VERIFY_16 0x8f +#define PRE_FETCH_16 0x90 +#define SPACE_16 0x91 +#define SYNCHRONIZE_CACHE_16 0x91 +#define LOCATE_16 0x92 +#define WRITE_SAME_16 0x93 +#define ERASE_16 0x93 +#define SERVICE_ACTION_IN_16 0x9e +#define WRITE_LONG_16 0x9f +#define REPORT_LUNS 0xa0 +#define ATA_PASSTHROUGH_12 0xa1 +#define MAINTENANCE_IN 0xa3 +#define MAINTENANCE_OUT 0xa4 +#define MOVE_MEDIUM 0xa5 +#define EXCHANGE_MEDIUM 0xa6 +#define SET_READ_AHEAD 0xa7 +#define READ_12 0xa8 +#define WRITE_12 0xaa +#define SERVICE_ACTION_IN_12 0xab +#define ERASE_12 0xac +#define READ_DVD_STRUCTURE 0xad +#define WRITE_VERIFY_12 0xae +#define VERIFY_12 0xaf +#define SEARCH_HIGH_12 0xb0 +#define SEARCH_EQUAL_12 0xb1 +#define SEARCH_LOW_12 0xb2 +#define READ_ELEMENT_STATUS 0xb8 +#define SEND_VOLUME_TAG 0xb6 +#define READ_DEFECT_DATA_12 0xb7 +#define SET_CD_SPEED 0xbb +#define MECHANISM_STATUS 0xbd +#define READ_CD 0xbe +#define SEND_DVD_STRUCTURE 0xbf + +const char *scsi_command_name(uint8_t cmd); + +/* + * SERVICE ACTION IN subcodes + */ +#define SAI_READ_CAPACITY_16 0x10 + +/* + * READ POSITION service action codes + */ +#define SHORT_FORM_BLOCK_ID 0x00 +#define SHORT_FORM_VENDOR_SPECIFIC 0x01 +#define LONG_FORM 0x06 +#define EXTENDED_FORM 0x08 + +/* + * SAM Status codes + */ + +#define GOOD 0x00 +#define CHECK_CONDITION 0x02 +#define CONDITION_GOOD 0x04 +#define BUSY 0x08 +#define INTERMEDIATE_GOOD 0x10 +#define INTERMEDIATE_C_GOOD 0x14 +#define RESERVATION_CONFLICT 0x18 +#define COMMAND_TERMINATED 0x22 +#define TASK_SET_FULL 0x28 +#define ACA_ACTIVE 0x30 +#define TASK_ABORTED 0x40 + +#define STATUS_MASK 0x3e + +/* + * SENSE KEYS + */ + +#define NO_SENSE 0x00 +#define RECOVERED_ERROR 0x01 +#define NOT_READY 0x02 +#define MEDIUM_ERROR 0x03 +#define HARDWARE_ERROR 0x04 +#define ILLEGAL_REQUEST 0x05 +#define UNIT_ATTENTION 0x06 +#define DATA_PROTECT 0x07 +#define BLANK_CHECK 0x08 +#define COPY_ABORTED 0x0a +#define ABORTED_COMMAND 0x0b +#define VOLUME_OVERFLOW 0x0d +#define MISCOMPARE 0x0e + + +/* + * DEVICE TYPES + */ + +#define TYPE_DISK 0x00 +#define TYPE_TAPE 0x01 +#define TYPE_PRINTER 0x02 +#define TYPE_PROCESSOR 0x03 /* HP scanners use this */ +#define TYPE_WORM 0x04 /* Treated as ROM by our system */ +#define TYPE_ROM 0x05 +#define TYPE_SCANNER 0x06 +#define TYPE_MOD 0x07 /* Magneto-optical disk - + * - treated as TYPE_DISK */ +#define TYPE_MEDIUM_CHANGER 0x08 +#define TYPE_STORAGE_ARRAY 0x0c /* Storage array device */ +#define TYPE_ENCLOSURE 0x0d /* Enclosure Services Device */ +#define TYPE_RBC 0x0e /* Simplified Direct-Access Device */ +#define TYPE_OSD 0x11 /* Object-storage Device */ +#define TYPE_WLUN 0x1e /* Well known LUN */ +#define TYPE_NOT_PRESENT 0x1f +#define TYPE_INACTIVE 0x20 +#define TYPE_NO_LUN 0x7f + +/* Mode page codes for mode sense/set */ +#define MODE_PAGE_R_W_ERROR 0x01 +#define MODE_PAGE_HD_GEOMETRY 0x04 +#define MODE_PAGE_FLEXIBLE_DISK_GEOMETRY 0x05 +#define MODE_PAGE_CACHING 0x08 +#define MODE_PAGE_AUDIO_CTL 0x0e +#define MODE_PAGE_POWER 0x1a +#define MODE_PAGE_FAULT_FAIL 0x1c +#define MODE_PAGE_TO_PROTECT 0x1d +#define MODE_PAGE_CAPABILITIES 0x2a +#define MODE_PAGE_ALLS 0x3f +/* Not in Mt. Fuji, but in ATAPI 2.6 -- deprecated now in favor + * of MODE_PAGE_SENSE_POWER */ +#define MODE_PAGE_CDROM 0x0d + +/* Event notification classes for GET EVENT STATUS NOTIFICATION */ +#define GESN_NO_EVENTS 0 +#define GESN_OPERATIONAL_CHANGE 1 +#define GESN_POWER_MANAGEMENT 2 +#define GESN_EXTERNAL_REQUEST 3 +#define GESN_MEDIA 4 +#define GESN_MULTIPLE_HOSTS 5 +#define GESN_DEVICE_BUSY 6 + +/* Event codes for MEDIA event status notification */ +#define MEC_NO_CHANGE 0 +#define MEC_EJECT_REQUESTED 1 +#define MEC_NEW_MEDIA 2 +#define MEC_MEDIA_REMOVAL 3 /* only for media changers */ +#define MEC_MEDIA_CHANGED 4 /* only for media changers */ +#define MEC_BG_FORMAT_COMPLETED 5 /* MRW or DVD+RW b/g format completed */ +#define MEC_BG_FORMAT_RESTARTED 6 /* MRW or DVD+RW b/g format restarted */ + +#define MS_TRAY_OPEN 1 +#define MS_MEDIA_PRESENT 2 + +/* + * Based on values from but extending CD_MINS + * to the maximum common size allowed by the Orange's Book ATIP + * + * 90 and 99 min CDs are also available but using them as the + * upper limit reduces the effectiveness of the heuristic to + * detect DVDs burned to less than 25% of their maximum capacity + */ + +/* Some generally useful CD-ROM information */ +#define CD_MINS 80 /* max. minutes per CD */ +#define CD_SECS 60 /* seconds per minute */ +#define CD_FRAMES 75 /* frames per second */ +#define CD_FRAMESIZE 2048 /* bytes per frame, "cooked" mode */ +#define CD_MAX_BYTES (CD_MINS * CD_SECS * CD_FRAMES * CD_FRAMESIZE) +#define CD_MAX_SECTORS (CD_MAX_BYTES / 512) + +/* + * The MMC values are not IDE specific and might need to be moved + * to a common header if they are also needed for the SCSI emulation + */ + +/* Profile list from MMC-6 revision 1 table 91 */ +#define MMC_PROFILE_NONE 0x0000 +#define MMC_PROFILE_CD_ROM 0x0008 +#define MMC_PROFILE_CD_R 0x0009 +#define MMC_PROFILE_CD_RW 0x000A +#define MMC_PROFILE_DVD_ROM 0x0010 +#define MMC_PROFILE_DVD_R_SR 0x0011 +#define MMC_PROFILE_DVD_RAM 0x0012 +#define MMC_PROFILE_DVD_RW_RO 0x0013 +#define MMC_PROFILE_DVD_RW_SR 0x0014 +#define MMC_PROFILE_DVD_R_DL_SR 0x0015 +#define MMC_PROFILE_DVD_R_DL_JR 0x0016 +#define MMC_PROFILE_DVD_RW_DL 0x0017 +#define MMC_PROFILE_DVD_DDR 0x0018 +#define MMC_PROFILE_DVD_PLUS_RW 0x001A +#define MMC_PROFILE_DVD_PLUS_R 0x001B +#define MMC_PROFILE_DVD_PLUS_RW_DL 0x002A +#define MMC_PROFILE_DVD_PLUS_R_DL 0x002B +#define MMC_PROFILE_BD_ROM 0x0040 +#define MMC_PROFILE_BD_R_SRM 0x0041 +#define MMC_PROFILE_BD_R_RRM 0x0042 +#define MMC_PROFILE_BD_RE 0x0043 +#define MMC_PROFILE_HDDVD_ROM 0x0050 +#define MMC_PROFILE_HDDVD_R 0x0051 +#define MMC_PROFILE_HDDVD_RAM 0x0052 +#define MMC_PROFILE_HDDVD_RW 0x0053 +#define MMC_PROFILE_HDDVD_R_DL 0x0058 +#define MMC_PROFILE_HDDVD_RW_DL 0x005A +#define MMC_PROFILE_INVALID 0xFFFF + +#endif diff --git a/src/include/block/snapshot.h b/src/include/block/snapshot.h new file mode 100644 index 0000000..c6910da6 --- /dev/null +++ b/src/include/block/snapshot.h @@ -0,0 +1,96 @@ +/* + * Block layer snapshot related functions + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#ifndef SNAPSHOT_H +#define SNAPSHOT_H + +#include "qemu-common.h" +#include "qapi/error.h" +#include "qemu/option.h" + + +#define SNAPSHOT_OPT_BASE "snapshot." +#define SNAPSHOT_OPT_ID "snapshot.id" +#define SNAPSHOT_OPT_NAME "snapshot.name" + +extern QemuOptsList internal_snapshot_opts; + +typedef struct QEMUSnapshotInfo { + char id_str[128]; /* unique snapshot id */ + /* the following fields are informative. They are not needed for + the consistency of the snapshot */ + char name[256]; /* user chosen name */ + uint64_t vm_state_size; /* VM state info size */ + uint32_t date_sec; /* UTC date of the snapshot */ + uint32_t date_nsec; + uint64_t vm_clock_nsec; /* VM clock relative to boot */ +} QEMUSnapshotInfo; + +int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info, + const char *name); +bool bdrv_snapshot_find_by_id_and_name(BlockDriverState *bs, + const char *id, + const char *name, + QEMUSnapshotInfo *sn_info, + Error **errp); +int bdrv_can_snapshot(BlockDriverState *bs); +int bdrv_snapshot_create(BlockDriverState *bs, + QEMUSnapshotInfo *sn_info); +int bdrv_snapshot_goto(BlockDriverState *bs, + const char *snapshot_id); +int bdrv_snapshot_delete(BlockDriverState *bs, + const char *snapshot_id, + const char *name, + Error **errp); +int bdrv_snapshot_delete_by_id_or_name(BlockDriverState *bs, + const char *id_or_name, + Error **errp); +int bdrv_snapshot_list(BlockDriverState *bs, + QEMUSnapshotInfo **psn_info); +int bdrv_snapshot_load_tmp(BlockDriverState *bs, + const char *snapshot_id, + const char *name, + Error **errp); +int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs, + const char *id_or_name, + Error **errp); + + +/* Group operations. All block drivers are involved. + * These functions will properly handle dataplane (take aio_context_acquire + * when appropriate for appropriate block drivers */ + +bool bdrv_all_can_snapshot(BlockDriverState **first_bad_bs); +int bdrv_all_delete_snapshot(const char *name, BlockDriverState **first_bsd_bs, + Error **err); +int bdrv_all_goto_snapshot(const char *name, BlockDriverState **first_bsd_bs); +int bdrv_all_find_snapshot(const char *name, BlockDriverState **first_bad_bs); +int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn, + BlockDriverState *vm_state_bs, + uint64_t vm_state_size, + BlockDriverState **first_bad_bs); + +BlockDriverState *bdrv_all_find_vmstate_bs(void); + +#endif diff --git a/src/include/block/thread-pool.h b/src/include/block/thread-pool.h new file mode 100644 index 0000000..42eb5e8 --- /dev/null +++ b/src/include/block/thread-pool.h @@ -0,0 +1,37 @@ +/* + * QEMU block layer thread pool + * + * Copyright IBM, Corp. 2008 + * Copyright Red Hat, Inc. 2012 + * + * Authors: + * Anthony Liguori + * Paolo Bonzini + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#ifndef QEMU_THREAD_POOL_H +#define QEMU_THREAD_POOL_H 1 + +#include "block/block.h" + +typedef int ThreadPoolFunc(void *opaque); + +typedef struct ThreadPool ThreadPool; + +ThreadPool *thread_pool_new(struct AioContext *ctx); +void thread_pool_free(ThreadPool *pool); + +BlockAIOCB *thread_pool_submit_aio(ThreadPool *pool, + ThreadPoolFunc *func, void *arg, + BlockCompletionFunc *cb, void *opaque); +int coroutine_fn thread_pool_submit_co(ThreadPool *pool, + ThreadPoolFunc *func, void *arg); +void thread_pool_submit(ThreadPool *pool, ThreadPoolFunc *func, void *arg); + +#endif diff --git a/src/include/block/throttle-groups.h b/src/include/block/throttle-groups.h new file mode 100644 index 0000000..aba28f3 --- /dev/null +++ b/src/include/block/throttle-groups.h @@ -0,0 +1,46 @@ +/* + * QEMU block throttling group infrastructure + * + * Copyright (C) Nodalink, EURL. 2014 + * Copyright (C) Igalia, S.L. 2015 + * + * Authors: + * BenoƮt Canet + * Alberto Garcia + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see . + */ + +#ifndef THROTTLE_GROUPS_H +#define THROTTLE_GROUPS_H + +#include "qemu/throttle.h" +#include "block/block_int.h" + +const char *throttle_group_get_name(BlockDriverState *bs); + +ThrottleState *throttle_group_incref(const char *name); +void throttle_group_unref(ThrottleState *ts); + +void throttle_group_config(BlockDriverState *bs, ThrottleConfig *cfg); +void throttle_group_get_config(BlockDriverState *bs, ThrottleConfig *cfg); + +void throttle_group_register_bs(BlockDriverState *bs, const char *groupname); +void throttle_group_unregister_bs(BlockDriverState *bs); + +void coroutine_fn throttle_group_co_io_limits_intercept(BlockDriverState *bs, + unsigned int bytes, + bool is_write); + +#endif diff --git a/src/include/block/write-threshold.h b/src/include/block/write-threshold.h new file mode 100644 index 0000000..f1b899c --- /dev/null +++ b/src/include/block/write-threshold.h @@ -0,0 +1,64 @@ +/* + * QEMU System Emulator block write threshold notification + * + * Copyright Red Hat, Inc. 2014 + * + * Authors: + * Francesco Romani + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + */ +#ifndef BLOCK_WRITE_THRESHOLD_H +#define BLOCK_WRITE_THRESHOLD_H + +#include + +#include "qemu/typedefs.h" +#include "qemu-common.h" + +/* + * bdrv_write_threshold_set: + * + * Set the write threshold for block devices, in bytes. + * Notify when a write exceeds the threshold, meaning the device + * is becoming full, so it can be transparently resized. + * To be used with thin-provisioned block devices. + * + * Use threshold_bytes == 0 to disable. + */ +void bdrv_write_threshold_set(BlockDriverState *bs, uint64_t threshold_bytes); + +/* + * bdrv_write_threshold_get + * + * Get the configured write threshold, in bytes. + * Zero means no threshold configured. + */ +uint64_t bdrv_write_threshold_get(const BlockDriverState *bs); + +/* + * bdrv_write_threshold_is_set + * + * Tell if a write threshold is set for a given BDS. + */ +bool bdrv_write_threshold_is_set(const BlockDriverState *bs); + +/* + * bdrv_write_threshold_exceeded + * + * Return the extent of a write request that exceeded the threshold, + * or zero if the request is below the threshold. + * Return zero also if the threshold was not set. + * + * NOTE: here we assume the following holds for each request this code + * deals with: + * + * assert((req->offset + req->bytes) <= UINT64_MAX) + * + * Please not there is *not* an actual C assert(). + */ +uint64_t bdrv_write_threshold_exceeded(const BlockDriverState *bs, + const BdrvTrackedRequest *req); + +#endif -- cgit v1.1