From 946d58be1533bf843b499df12e1d9f97b28245c8 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 25 Sep 2012 15:47:36 +0200 Subject: block-migration: Flush requests in blk_mig_cleanup When cancelling block migration, all in-flight requests of the block migration must be completed before the data can be freed. This was visible as failing assertions and segfaults. Reported-by: Peter Lieven Signed-off-by: Kevin Wolf --- block-migration.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block-migration.c b/block-migration.c index 7def8ab..ed93301 100644 --- a/block-migration.c +++ b/block-migration.c @@ -519,6 +519,8 @@ static void blk_mig_cleanup(void) BlkMigDevState *bmds; BlkMigBlock *blk; + bdrv_drain_all(); + set_dirty_tracking(0); while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) { -- cgit v1.1 From 870f5681c9dbafc738082b1fd48e0cc013bf43c7 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Tue, 25 Sep 2012 12:29:39 -0400 Subject: block: after creating a live snapshot, make old image read-only Currently, after a live snapshot of a drive, the image that has been 'demoted' to be below the new active layer remains r/w. This patch reopens it read-only. Note that we do not check for error on the reopen(), because we will not abort the snapshots if the reopen fails. This patch depends on the bdrv_reopen() series. Signed-off-by: Jeff Cody Signed-off-by: Kevin Wolf --- blockdev.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/blockdev.c b/blockdev.c index e5d450f..0267fa3 100644 --- a/blockdev.c +++ b/blockdev.c @@ -805,6 +805,11 @@ void qmp_transaction(BlockdevActionList *dev_list, Error **errp) QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) { /* This removes our old bs from the bdrv_states, and adds the new bs */ bdrv_append(states->new_bs, states->old_bs); + /* We don't need (or want) to use the transactional + * bdrv_reopen_multiple() across all the entries at once, because we + * don't want to abort all of them if one of them fails the reopen */ + bdrv_reopen(states->new_bs, states->new_bs->open_flags & ~BDRV_O_RDWR, + NULL); } /* success */ -- cgit v1.1 From 00f78533326c5ba2e62fafada16655aa558a5520 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 24 Sep 2012 14:40:56 +0530 Subject: aio: Fix qemu_aio_wait() to maintain correct walking_handlers count Fix qemu_aio_wait() to ensure that registered aio handlers don't get deleted when they are still active. This is ensured by maintaning the right count of walking_handlers. Signed-off-by: Paolo Bonzini Signed-off-by: Bharata B Rao Signed-off-by: Kevin Wolf --- aio.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/aio.c b/aio.c index 0a9eb10..99b8b72 100644 --- a/aio.c +++ b/aio.c @@ -119,7 +119,7 @@ bool qemu_aio_wait(void) return true; } - walking_handlers = 1; + walking_handlers++; FD_ZERO(&rdfds); FD_ZERO(&wrfds); @@ -147,7 +147,7 @@ bool qemu_aio_wait(void) } } - walking_handlers = 0; + walking_handlers--; /* No AIO operations? Get us out of here */ if (!busy) { @@ -159,7 +159,7 @@ bool qemu_aio_wait(void) /* if we have any readable fds, dispatch event */ if (ret > 0) { - walking_handlers = 1; + walking_handlers++; /* we have to walk very carefully in case * qemu_aio_set_fd_handler is called while we're walking */ @@ -187,7 +187,7 @@ bool qemu_aio_wait(void) } } - walking_handlers = 0; + walking_handlers--; } return true; -- cgit v1.1 From ca0defb95c69f88a905d64adbe253e7bb8fde14a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 24 Sep 2012 14:42:02 +0530 Subject: qemu: URI parsing library Add a new URI parsing library to QEMU. The code has been borrowed from libxml2 and libvirt. Signed-off-by: Paolo Bonzini Signed-off-by: Bharata B Rao Signed-off-by: Kevin Wolf --- Makefile.objs | 2 +- uri.c | 2249 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ uri.h | 113 +++ 3 files changed, 2363 insertions(+), 1 deletion(-) create mode 100644 uri.c create mode 100644 uri.h diff --git a/Makefile.objs b/Makefile.objs index 4412757..7c1c682 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -42,7 +42,7 @@ coroutine-obj-$(CONFIG_WIN32) += coroutine-win32.o # block-obj-y is code used by both qemu system emulation and qemu-img block-obj-y = cutils.o iov.o cache-utils.o qemu-option.o module.o async.o -block-obj-y += nbd.o block.o aio.o aes.o qemu-config.o qemu-progress.o qemu-sockets.o +block-obj-y += nbd.o block.o aio.o aes.o qemu-config.o qemu-progress.o qemu-sockets.o uri.o block-obj-y += $(coroutine-obj-y) $(qobject-obj-y) $(version-obj-y) block-obj-$(CONFIG_POSIX) += posix-aio-compat.o block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o diff --git a/uri.c b/uri.c new file mode 100644 index 0000000..dd922de --- /dev/null +++ b/uri.c @@ -0,0 +1,2249 @@ +/** + * uri.c: set of generic URI related routines + * + * Reference: RFCs 3986, 2732 and 2373 + * + * Copyright (C) 1998-2003 Daniel Veillard. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * DANIEL VEILLARD BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Except as contained in this notice, the name of Daniel Veillard shall not + * be used in advertising or otherwise to promote the sale, use or other + * dealings in this Software without prior written authorization from him. + * + * daniel@veillard.com + * + ** + * + * Copyright (C) 2007, 2009-2010 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Authors: + * Richard W.M. Jones + * + */ + +#include +#include +#include + +#include "uri.h" + +static void uri_clean(URI *uri); + +/* + * Old rule from 2396 used in legacy handling code + * alpha = lowalpha | upalpha + */ +#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x)) + + +/* + * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | + * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | + * "u" | "v" | "w" | "x" | "y" | "z" + */ + +#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z')) + +/* + * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | + * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | + * "U" | "V" | "W" | "X" | "Y" | "Z" + */ +#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z')) + +#ifdef IS_DIGIT +#undef IS_DIGIT +#endif +/* + * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" + */ +#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9')) + +/* + * alphanum = alpha | digit + */ + +#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x)) + +/* + * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" + */ + +#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \ + ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \ + ((x) == '(') || ((x) == ')')) + +/* + * unwise = "{" | "}" | "|" | "\" | "^" | "`" + */ + +#define IS_UNWISE(p) \ + (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \ + ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \ + ((*(p) == ']')) || ((*(p) == '`'))) +/* + * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," | + * "[" | "]" + */ + +#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \ + ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \ + ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \ + ((x) == ']')) + +/* + * unreserved = alphanum | mark + */ + +#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x)) + +/* + * Skip to next pointer char, handle escaped sequences + */ + +#define NEXT(p) ((*p == '%')? p += 3 : p++) + +/* + * Productions from the spec. + * + * authority = server | reg_name + * reg_name = 1*( unreserved | escaped | "$" | "," | + * ";" | ":" | "@" | "&" | "=" | "+" ) + * + * path = [ abs_path | opaque_part ] + */ + + +/************************************************************************ + * * + * RFC 3986 parser * + * * + ************************************************************************/ + +#define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9')) +#define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \ + ((*(p) >= 'A') && (*(p) <= 'Z'))) +#define ISA_HEXDIG(p) \ + (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \ + ((*(p) >= 'A') && (*(p) <= 'F'))) + +/* + * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + * / "*" / "+" / "," / ";" / "=" + */ +#define ISA_SUB_DELIM(p) \ + (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \ + ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \ + ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \ + ((*(p) == '=')) || ((*(p) == '\''))) + +/* + * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" + */ +#define ISA_GEN_DELIM(p) \ + (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \ + ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \ + ((*(p) == '@'))) + +/* + * reserved = gen-delims / sub-delims + */ +#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p))) + +/* + * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + */ +#define ISA_UNRESERVED(p) \ + ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \ + ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~'))) + +/* + * pct-encoded = "%" HEXDIG HEXDIG + */ +#define ISA_PCT_ENCODED(p) \ + ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2))) + +/* + * pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + */ +#define ISA_PCHAR(p) \ + (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \ + ((*(p) == ':')) || ((*(p) == '@'))) + +/** + * rfc3986_parse_scheme: + * @uri: pointer to an URI structure + * @str: pointer to the string to analyze + * + * Parse an URI scheme + * + * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_scheme(URI *uri, const char **str) { + const char *cur; + + if (str == NULL) + return(-1); + + cur = *str; + if (!ISA_ALPHA(cur)) + return(2); + cur++; + while (ISA_ALPHA(cur) || ISA_DIGIT(cur) || + (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++; + if (uri != NULL) { + if (uri->scheme != NULL) g_free(uri->scheme); + uri->scheme = g_strndup(*str, cur - *str); + } + *str = cur; + return(0); +} + +/** + * rfc3986_parse_fragment: + * @uri: pointer to an URI structure + * @str: pointer to the string to analyze + * + * Parse the query part of an URI + * + * fragment = *( pchar / "/" / "?" ) + * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']' + * in the fragment identifier but this is used very broadly for + * xpointer scheme selection, so we are allowing it here to not break + * for example all the DocBook processing chains. + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_fragment(URI *uri, const char **str) +{ + const char *cur; + + if (str == NULL) + return (-1); + + cur = *str; + + while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') || + (*cur == '[') || (*cur == ']') || + ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) + NEXT(cur); + if (uri != NULL) { + if (uri->fragment != NULL) + g_free(uri->fragment); + if (uri->cleanup & 2) + uri->fragment = g_strndup(*str, cur - *str); + else + uri->fragment = uri_string_unescape(*str, cur - *str, NULL); + } + *str = cur; + return (0); +} + +/** + * rfc3986_parse_query: + * @uri: pointer to an URI structure + * @str: pointer to the string to analyze + * + * Parse the query part of an URI + * + * query = *uric + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_query(URI *uri, const char **str) +{ + const char *cur; + + if (str == NULL) + return (-1); + + cur = *str; + + while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') || + ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) + NEXT(cur); + if (uri != NULL) { + if (uri->query != NULL) + g_free (uri->query); + uri->query = g_strndup (*str, cur - *str); + } + *str = cur; + return (0); +} + +/** + * rfc3986_parse_port: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse a port part and fills in the appropriate fields + * of the @uri structure + * + * port = *DIGIT + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_port(URI *uri, const char **str) +{ + const char *cur = *str; + + if (ISA_DIGIT(cur)) { + if (uri != NULL) + uri->port = 0; + while (ISA_DIGIT(cur)) { + if (uri != NULL) + uri->port = uri->port * 10 + (*cur - '0'); + cur++; + } + *str = cur; + return(0); + } + return(1); +} + +/** + * rfc3986_parse_user_info: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an user informations part and fills in the appropriate fields + * of the @uri structure + * + * userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_user_info(URI *uri, const char **str) +{ + const char *cur; + + cur = *str; + while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || + ISA_SUB_DELIM(cur) || (*cur == ':')) + NEXT(cur); + if (*cur == '@') { + if (uri != NULL) { + if (uri->user != NULL) g_free(uri->user); + if (uri->cleanup & 2) + uri->user = g_strndup(*str, cur - *str); + else + uri->user = uri_string_unescape(*str, cur - *str, NULL); + } + *str = cur; + return(0); + } + return(1); +} + +/** + * rfc3986_parse_dec_octet: + * @str: the string to analyze + * + * dec-octet = DIGIT ; 0-9 + * / %x31-39 DIGIT ; 10-99 + * / "1" 2DIGIT ; 100-199 + * / "2" %x30-34 DIGIT ; 200-249 + * / "25" %x30-35 ; 250-255 + * + * Skip a dec-octet. + * + * Returns 0 if found and skipped, 1 otherwise + */ +static int +rfc3986_parse_dec_octet(const char **str) { + const char *cur = *str; + + if (!(ISA_DIGIT(cur))) + return(1); + if (!ISA_DIGIT(cur+1)) + cur++; + else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2))) + cur += 2; + else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2))) + cur += 3; + else if ((*cur == '2') && (*(cur + 1) >= '0') && + (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2))) + cur += 3; + else if ((*cur == '2') && (*(cur + 1) == '5') && + (*(cur + 2) >= '0') && (*(cur + 1) <= '5')) + cur += 3; + else + return(1); + *str = cur; + return(0); +} +/** + * rfc3986_parse_host: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an host part and fills in the appropriate fields + * of the @uri structure + * + * host = IP-literal / IPv4address / reg-name + * IP-literal = "[" ( IPv6address / IPvFuture ) "]" + * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet + * reg-name = *( unreserved / pct-encoded / sub-delims ) + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_host(URI *uri, const char **str) +{ + const char *cur = *str; + const char *host; + + host = cur; + /* + * IPv6 and future adressing scheme are enclosed between brackets + */ + if (*cur == '[') { + cur++; + while ((*cur != ']') && (*cur != 0)) + cur++; + if (*cur != ']') + return(1); + cur++; + goto found; + } + /* + * try to parse an IPv4 + */ + if (ISA_DIGIT(cur)) { + if (rfc3986_parse_dec_octet(&cur) != 0) + goto not_ipv4; + if (*cur != '.') + goto not_ipv4; + cur++; + if (rfc3986_parse_dec_octet(&cur) != 0) + goto not_ipv4; + if (*cur != '.') + goto not_ipv4; + if (rfc3986_parse_dec_octet(&cur) != 0) + goto not_ipv4; + if (*cur != '.') + goto not_ipv4; + if (rfc3986_parse_dec_octet(&cur) != 0) + goto not_ipv4; + goto found; +not_ipv4: + cur = *str; + } + /* + * then this should be a hostname which can be empty + */ + while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur)) + NEXT(cur); +found: + if (uri != NULL) { + if (uri->authority != NULL) g_free(uri->authority); + uri->authority = NULL; + if (uri->server != NULL) g_free(uri->server); + if (cur != host) { + if (uri->cleanup & 2) + uri->server = g_strndup(host, cur - host); + else + uri->server = uri_string_unescape(host, cur - host, NULL); + } else + uri->server = NULL; + } + *str = cur; + return(0); +} + +/** + * rfc3986_parse_authority: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an authority part and fills in the appropriate fields + * of the @uri structure + * + * authority = [ userinfo "@" ] host [ ":" port ] + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_authority(URI *uri, const char **str) +{ + const char *cur; + int ret; + + cur = *str; + /* + * try to parse an userinfo and check for the trailing @ + */ + ret = rfc3986_parse_user_info(uri, &cur); + if ((ret != 0) || (*cur != '@')) + cur = *str; + else + cur++; + ret = rfc3986_parse_host(uri, &cur); + if (ret != 0) return(ret); + if (*cur == ':') { + cur++; + ret = rfc3986_parse_port(uri, &cur); + if (ret != 0) return(ret); + } + *str = cur; + return(0); +} + +/** + * rfc3986_parse_segment: + * @str: the string to analyze + * @forbid: an optional forbidden character + * @empty: allow an empty segment + * + * Parse a segment and fills in the appropriate fields + * of the @uri structure + * + * segment = *pchar + * segment-nz = 1*pchar + * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) + * ; non-zero-length segment without any colon ":" + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_segment(const char **str, char forbid, int empty) +{ + const char *cur; + + cur = *str; + if (!ISA_PCHAR(cur)) { + if (empty) + return(0); + return(1); + } + while (ISA_PCHAR(cur) && (*cur != forbid)) + NEXT(cur); + *str = cur; + return (0); +} + +/** + * rfc3986_parse_path_ab_empty: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an path absolute or empty and fills in the appropriate fields + * of the @uri structure + * + * path-abempty = *( "/" segment ) + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_path_ab_empty(URI *uri, const char **str) +{ + const char *cur; + int ret; + + cur = *str; + + while (*cur == '/') { + cur++; + ret = rfc3986_parse_segment(&cur, 0, 1); + if (ret != 0) return(ret); + } + if (uri != NULL) { + if (uri->path != NULL) g_free(uri->path); + if (*str != cur) { + if (uri->cleanup & 2) + uri->path = g_strndup(*str, cur - *str); + else + uri->path = uri_string_unescape(*str, cur - *str, NULL); + } else { + uri->path = NULL; + } + } + *str = cur; + return (0); +} + +/** + * rfc3986_parse_path_absolute: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an path absolute and fills in the appropriate fields + * of the @uri structure + * + * path-absolute = "/" [ segment-nz *( "/" segment ) ] + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_path_absolute(URI *uri, const char **str) +{ + const char *cur; + int ret; + + cur = *str; + + if (*cur != '/') + return(1); + cur++; + ret = rfc3986_parse_segment(&cur, 0, 0); + if (ret == 0) { + while (*cur == '/') { + cur++; + ret = rfc3986_parse_segment(&cur, 0, 1); + if (ret != 0) return(ret); + } + } + if (uri != NULL) { + if (uri->path != NULL) g_free(uri->path); + if (cur != *str) { + if (uri->cleanup & 2) + uri->path = g_strndup(*str, cur - *str); + else + uri->path = uri_string_unescape(*str, cur - *str, NULL); + } else { + uri->path = NULL; + } + } + *str = cur; + return (0); +} + +/** + * rfc3986_parse_path_rootless: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an path without root and fills in the appropriate fields + * of the @uri structure + * + * path-rootless = segment-nz *( "/" segment ) + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_path_rootless(URI *uri, const char **str) +{ + const char *cur; + int ret; + + cur = *str; + + ret = rfc3986_parse_segment(&cur, 0, 0); + if (ret != 0) return(ret); + while (*cur == '/') { + cur++; + ret = rfc3986_parse_segment(&cur, 0, 1); + if (ret != 0) return(ret); + } + if (uri != NULL) { + if (uri->path != NULL) g_free(uri->path); + if (cur != *str) { + if (uri->cleanup & 2) + uri->path = g_strndup(*str, cur - *str); + else + uri->path = uri_string_unescape(*str, cur - *str, NULL); + } else { + uri->path = NULL; + } + } + *str = cur; + return (0); +} + +/** + * rfc3986_parse_path_no_scheme: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an path which is not a scheme and fills in the appropriate fields + * of the @uri structure + * + * path-noscheme = segment-nz-nc *( "/" segment ) + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_path_no_scheme(URI *uri, const char **str) +{ + const char *cur; + int ret; + + cur = *str; + + ret = rfc3986_parse_segment(&cur, ':', 0); + if (ret != 0) return(ret); + while (*cur == '/') { + cur++; + ret = rfc3986_parse_segment(&cur, 0, 1); + if (ret != 0) return(ret); + } + if (uri != NULL) { + if (uri->path != NULL) g_free(uri->path); + if (cur != *str) { + if (uri->cleanup & 2) + uri->path = g_strndup(*str, cur - *str); + else + uri->path = uri_string_unescape(*str, cur - *str, NULL); + } else { + uri->path = NULL; + } + } + *str = cur; + return (0); +} + +/** + * rfc3986_parse_hier_part: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an hierarchical part and fills in the appropriate fields + * of the @uri structure + * + * hier-part = "//" authority path-abempty + * / path-absolute + * / path-rootless + * / path-empty + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_hier_part(URI *uri, const char **str) +{ + const char *cur; + int ret; + + cur = *str; + + if ((*cur == '/') && (*(cur + 1) == '/')) { + cur += 2; + ret = rfc3986_parse_authority(uri, &cur); + if (ret != 0) return(ret); + ret = rfc3986_parse_path_ab_empty(uri, &cur); + if (ret != 0) return(ret); + *str = cur; + return(0); + } else if (*cur == '/') { + ret = rfc3986_parse_path_absolute(uri, &cur); + if (ret != 0) return(ret); + } else if (ISA_PCHAR(cur)) { + ret = rfc3986_parse_path_rootless(uri, &cur); + if (ret != 0) return(ret); + } else { + /* path-empty is effectively empty */ + if (uri != NULL) { + if (uri->path != NULL) g_free(uri->path); + uri->path = NULL; + } + } + *str = cur; + return (0); +} + +/** + * rfc3986_parse_relative_ref: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an URI string and fills in the appropriate fields + * of the @uri structure + * + * relative-ref = relative-part [ "?" query ] [ "#" fragment ] + * relative-part = "//" authority path-abempty + * / path-absolute + * / path-noscheme + * / path-empty + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_relative_ref(URI *uri, const char *str) { + int ret; + + if ((*str == '/') && (*(str + 1) == '/')) { + str += 2; + ret = rfc3986_parse_authority(uri, &str); + if (ret != 0) return(ret); + ret = rfc3986_parse_path_ab_empty(uri, &str); + if (ret != 0) return(ret); + } else if (*str == '/') { + ret = rfc3986_parse_path_absolute(uri, &str); + if (ret != 0) return(ret); + } else if (ISA_PCHAR(str)) { + ret = rfc3986_parse_path_no_scheme(uri, &str); + if (ret != 0) return(ret); + } else { + /* path-empty is effectively empty */ + if (uri != NULL) { + if (uri->path != NULL) g_free(uri->path); + uri->path = NULL; + } + } + + if (*str == '?') { + str++; + ret = rfc3986_parse_query(uri, &str); + if (ret != 0) return(ret); + } + if (*str == '#') { + str++; + ret = rfc3986_parse_fragment(uri, &str); + if (ret != 0) return(ret); + } + if (*str != 0) { + uri_clean(uri); + return(1); + } + return(0); +} + + +/** + * rfc3986_parse: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an URI string and fills in the appropriate fields + * of the @uri structure + * + * scheme ":" hier-part [ "?" query ] [ "#" fragment ] + * + * Returns 0 or the error code + */ +static int +rfc3986_parse(URI *uri, const char *str) { + int ret; + + ret = rfc3986_parse_scheme(uri, &str); + if (ret != 0) return(ret); + if (*str != ':') { + return(1); + } + str++; + ret = rfc3986_parse_hier_part(uri, &str); + if (ret != 0) return(ret); + if (*str == '?') { + str++; + ret = rfc3986_parse_query(uri, &str); + if (ret != 0) return(ret); + } + if (*str == '#') { + str++; + ret = rfc3986_parse_fragment(uri, &str); + if (ret != 0) return(ret); + } + if (*str != 0) { + uri_clean(uri); + return(1); + } + return(0); +} + +/** + * rfc3986_parse_uri_reference: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an URI reference string and fills in the appropriate fields + * of the @uri structure + * + * URI-reference = URI / relative-ref + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_uri_reference(URI *uri, const char *str) { + int ret; + + if (str == NULL) + return(-1); + uri_clean(uri); + + /* + * Try first to parse absolute refs, then fallback to relative if + * it fails. + */ + ret = rfc3986_parse(uri, str); + if (ret != 0) { + uri_clean(uri); + ret = rfc3986_parse_relative_ref(uri, str); + if (ret != 0) { + uri_clean(uri); + return(ret); + } + } + return(0); +} + +/** + * uri_parse: + * @str: the URI string to analyze + * + * Parse an URI based on RFC 3986 + * + * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] + * + * Returns a newly built URI or NULL in case of error + */ +URI * +uri_parse(const char *str) { + URI *uri; + int ret; + + if (str == NULL) + return(NULL); + uri = uri_new(); + if (uri != NULL) { + ret = rfc3986_parse_uri_reference(uri, str); + if (ret) { + uri_free(uri); + return(NULL); + } + } + return(uri); +} + +/** + * uri_parse_into: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an URI reference string based on RFC 3986 and fills in the + * appropriate fields of the @uri structure + * + * URI-reference = URI / relative-ref + * + * Returns 0 or the error code + */ +int +uri_parse_into(URI *uri, const char *str) { + return(rfc3986_parse_uri_reference(uri, str)); +} + +/** + * uri_parse_raw: + * @str: the URI string to analyze + * @raw: if 1 unescaping of URI pieces are disabled + * + * Parse an URI but allows to keep intact the original fragments. + * + * URI-reference = URI / relative-ref + * + * Returns a newly built URI or NULL in case of error + */ +URI * +uri_parse_raw(const char *str, int raw) { + URI *uri; + int ret; + + if (str == NULL) + return(NULL); + uri = uri_new(); + if (uri != NULL) { + if (raw) { + uri->cleanup |= 2; + } + ret = uri_parse_into(uri, str); + if (ret) { + uri_free(uri); + return(NULL); + } + } + return(uri); +} + +/************************************************************************ + * * + * Generic URI structure functions * + * * + ************************************************************************/ + +/** + * uri_new: + * + * Simply creates an empty URI + * + * Returns the new structure or NULL in case of error + */ +URI * +uri_new(void) { + URI *ret; + + ret = (URI *) g_malloc(sizeof(URI)); + memset(ret, 0, sizeof(URI)); + return(ret); +} + +/** + * realloc2n: + * + * Function to handle properly a reallocation when saving an URI + * Also imposes some limit on the length of an URI string output + */ +static char * +realloc2n(char *ret, int *max) { + char *temp; + int tmp; + + tmp = *max * 2; + temp = g_realloc(ret, (tmp + 1)); + *max = tmp; + return(temp); +} + +/** + * uri_to_string: + * @uri: pointer to an URI + * + * Save the URI as an escaped string + * + * Returns a new string (to be deallocated by caller) + */ +char * +uri_to_string(URI *uri) { + char *ret = NULL; + char *temp; + const char *p; + int len; + int max; + + if (uri == NULL) return(NULL); + + + max = 80; + ret = g_malloc(max + 1); + len = 0; + + if (uri->scheme != NULL) { + p = uri->scheme; + while (*p != 0) { + if (len >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + ret[len++] = *p++; + } + if (len >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + ret[len++] = ':'; + } + if (uri->opaque != NULL) { + p = uri->opaque; + while (*p != 0) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p))) + ret[len++] = *p++; + else { + int val = *(unsigned char *)p++; + int hi = val / 0x10, lo = val % 0x10; + ret[len++] = '%'; + ret[len++] = hi + (hi > 9? 'A'-10 : '0'); + ret[len++] = lo + (lo > 9? 'A'-10 : '0'); + } + } + } else { + if (uri->server != NULL) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + ret[len++] = '/'; + ret[len++] = '/'; + if (uri->user != NULL) { + p = uri->user; + while (*p != 0) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + if ((IS_UNRESERVED(*(p))) || + ((*(p) == ';')) || ((*(p) == ':')) || + ((*(p) == '&')) || ((*(p) == '=')) || + ((*(p) == '+')) || ((*(p) == '$')) || + ((*(p) == ','))) + ret[len++] = *p++; + else { + int val = *(unsigned char *)p++; + int hi = val / 0x10, lo = val % 0x10; + ret[len++] = '%'; + ret[len++] = hi + (hi > 9? 'A'-10 : '0'); + ret[len++] = lo + (lo > 9? 'A'-10 : '0'); + } + } + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + ret[len++] = '@'; + } + p = uri->server; + while (*p != 0) { + if (len >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + ret[len++] = *p++; + } + if (uri->port > 0) { + if (len + 10 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + len += snprintf(&ret[len], max - len, ":%d", uri->port); + } + } else if (uri->authority != NULL) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + ret[len++] = '/'; + ret[len++] = '/'; + p = uri->authority; + while (*p != 0) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + if ((IS_UNRESERVED(*(p))) || + ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || + ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || + ((*(p) == '=')) || ((*(p) == '+'))) + ret[len++] = *p++; + else { + int val = *(unsigned char *)p++; + int hi = val / 0x10, lo = val % 0x10; + ret[len++] = '%'; + ret[len++] = hi + (hi > 9? 'A'-10 : '0'); + ret[len++] = lo + (lo > 9? 'A'-10 : '0'); + } + } + } else if (uri->scheme != NULL) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + ret[len++] = '/'; + ret[len++] = '/'; + } + if (uri->path != NULL) { + p = uri->path; + /* + * the colon in file:///d: should not be escaped or + * Windows accesses fail later. + */ + if ((uri->scheme != NULL) && + (p[0] == '/') && + (((p[1] >= 'a') && (p[1] <= 'z')) || + ((p[1] >= 'A') && (p[1] <= 'Z'))) && + (p[2] == ':') && + (!strcmp(uri->scheme, "file"))) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + ret[len++] = *p++; + ret[len++] = *p++; + ret[len++] = *p++; + } + while (*p != 0) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) || + ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || + ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || + ((*(p) == ','))) + ret[len++] = *p++; + else { + int val = *(unsigned char *)p++; + int hi = val / 0x10, lo = val % 0x10; + ret[len++] = '%'; + ret[len++] = hi + (hi > 9? 'A'-10 : '0'); + ret[len++] = lo + (lo > 9? 'A'-10 : '0'); + } + } + } + if (uri->query != NULL) { + if (len + 1 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + ret[len++] = '?'; + p = uri->query; + while (*p != 0) { + if (len + 1 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + ret[len++] = *p++; + } + } + } + if (uri->fragment != NULL) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + ret[len++] = '#'; + p = uri->fragment; + while (*p != 0) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p)))) + ret[len++] = *p++; + else { + int val = *(unsigned char *)p++; + int hi = val / 0x10, lo = val % 0x10; + ret[len++] = '%'; + ret[len++] = hi + (hi > 9? 'A'-10 : '0'); + ret[len++] = lo + (lo > 9? 'A'-10 : '0'); + } + } + } + if (len >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + ret[len] = 0; + return(ret); + +mem_error: + g_free(ret); + return(NULL); +} + +/** + * uri_clean: + * @uri: pointer to an URI + * + * Make sure the URI struct is free of content + */ +static void +uri_clean(URI *uri) { + if (uri == NULL) return; + + if (uri->scheme != NULL) g_free(uri->scheme); + uri->scheme = NULL; + if (uri->server != NULL) g_free(uri->server); + uri->server = NULL; + if (uri->user != NULL) g_free(uri->user); + uri->user = NULL; + if (uri->path != NULL) g_free(uri->path); + uri->path = NULL; + if (uri->fragment != NULL) g_free(uri->fragment); + uri->fragment = NULL; + if (uri->opaque != NULL) g_free(uri->opaque); + uri->opaque = NULL; + if (uri->authority != NULL) g_free(uri->authority); + uri->authority = NULL; + if (uri->query != NULL) g_free(uri->query); + uri->query = NULL; +} + +/** + * uri_free: + * @uri: pointer to an URI + * + * Free up the URI struct + */ +void +uri_free(URI *uri) { + uri_clean(uri); + g_free(uri); +} + +/************************************************************************ + * * + * Helper functions * + * * + ************************************************************************/ + +/** + * normalize_uri_path: + * @path: pointer to the path string + * + * Applies the 5 normalization steps to a path string--that is, RFC 2396 + * Section 5.2, steps 6.c through 6.g. + * + * Normalization occurs directly on the string, no new allocation is done + * + * Returns 0 or an error code + */ +static int +normalize_uri_path(char *path) { + char *cur, *out; + + if (path == NULL) + return(-1); + + /* Skip all initial "/" chars. We want to get to the beginning of the + * first non-empty segment. + */ + cur = path; + while (cur[0] == '/') + ++cur; + if (cur[0] == '\0') + return(0); + + /* Keep everything we've seen so far. */ + out = cur; + + /* + * Analyze each segment in sequence for cases (c) and (d). + */ + while (cur[0] != '\0') { + /* + * c) All occurrences of "./", where "." is a complete path segment, + * are removed from the buffer string. + */ + if ((cur[0] == '.') && (cur[1] == '/')) { + cur += 2; + /* '//' normalization should be done at this point too */ + while (cur[0] == '/') + cur++; + continue; + } + + /* + * d) If the buffer string ends with "." as a complete path segment, + * that "." is removed. + */ + if ((cur[0] == '.') && (cur[1] == '\0')) + break; + + /* Otherwise keep the segment. */ + while (cur[0] != '/') { + if (cur[0] == '\0') + goto done_cd; + (out++)[0] = (cur++)[0]; + } + /* nomalize // */ + while ((cur[0] == '/') && (cur[1] == '/')) + cur++; + + (out++)[0] = (cur++)[0]; + } + done_cd: + out[0] = '\0'; + + /* Reset to the beginning of the first segment for the next sequence. */ + cur = path; + while (cur[0] == '/') + ++cur; + if (cur[0] == '\0') + return(0); + + /* + * Analyze each segment in sequence for cases (e) and (f). + * + * e) All occurrences of "/../", where is a + * complete path segment not equal to "..", are removed from the + * buffer string. Removal of these path segments is performed + * iteratively, removing the leftmost matching pattern on each + * iteration, until no matching pattern remains. + * + * f) If the buffer string ends with "/..", where + * is a complete path segment not equal to "..", that + * "/.." is removed. + * + * To satisfy the "iterative" clause in (e), we need to collapse the + * string every time we find something that needs to be removed. Thus, + * we don't need to keep two pointers into the string: we only need a + * "current position" pointer. + */ + while (1) { + char *segp, *tmp; + + /* At the beginning of each iteration of this loop, "cur" points to + * the first character of the segment we want to examine. + */ + + /* Find the end of the current segment. */ + segp = cur; + while ((segp[0] != '/') && (segp[0] != '\0')) + ++segp; + + /* If this is the last segment, we're done (we need at least two + * segments to meet the criteria for the (e) and (f) cases). + */ + if (segp[0] == '\0') + break; + + /* If the first segment is "..", or if the next segment _isn't_ "..", + * keep this segment and try the next one. + */ + ++segp; + if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3)) + || ((segp[0] != '.') || (segp[1] != '.') + || ((segp[2] != '/') && (segp[2] != '\0')))) { + cur = segp; + continue; + } + + /* If we get here, remove this segment and the next one and back up + * to the previous segment (if there is one), to implement the + * "iteratively" clause. It's pretty much impossible to back up + * while maintaining two pointers into the buffer, so just compact + * the whole buffer now. + */ + + /* If this is the end of the buffer, we're done. */ + if (segp[2] == '\0') { + cur[0] = '\0'; + break; + } + /* Valgrind complained, strcpy(cur, segp + 3); */ + /* string will overlap, do not use strcpy */ + tmp = cur; + segp += 3; + while ((*tmp++ = *segp++) != 0) + ; + + /* If there are no previous segments, then keep going from here. */ + segp = cur; + while ((segp > path) && ((--segp)[0] == '/')) + ; + if (segp == path) + continue; + + /* "segp" is pointing to the end of a previous segment; find it's + * start. We need to back up to the previous segment and start + * over with that to handle things like "foo/bar/../..". If we + * don't do this, then on the first pass we'll remove the "bar/..", + * but be pointing at the second ".." so we won't realize we can also + * remove the "foo/..". + */ + cur = segp; + while ((cur > path) && (cur[-1] != '/')) + --cur; + } + out[0] = '\0'; + + /* + * g) If the resulting buffer string still begins with one or more + * complete path segments of "..", then the reference is + * considered to be in error. Implementations may handle this + * error by retaining these components in the resolved path (i.e., + * treating them as part of the final URI), by removing them from + * the resolved path (i.e., discarding relative levels above the + * root), or by avoiding traversal of the reference. + * + * We discard them from the final path. + */ + if (path[0] == '/') { + cur = path; + while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.') + && ((cur[3] == '/') || (cur[3] == '\0'))) + cur += 3; + + if (cur != path) { + out = path; + while (cur[0] != '\0') + (out++)[0] = (cur++)[0]; + out[0] = 0; + } + } + + return(0); +} + +static int is_hex(char c) { + if (((c >= '0') && (c <= '9')) || + ((c >= 'a') && (c <= 'f')) || + ((c >= 'A') && (c <= 'F'))) + return(1); + return(0); +} + + +/** + * uri_string_unescape: + * @str: the string to unescape + * @len: the length in bytes to unescape (or <= 0 to indicate full string) + * @target: optional destination buffer + * + * Unescaping routine, but does not check that the string is an URI. The + * output is a direct unsigned char translation of %XX values (no encoding) + * Note that the length of the result can only be smaller or same size as + * the input string. + * + * Returns a copy of the string, but unescaped, will return NULL only in case + * of error + */ +char * +uri_string_unescape(const char *str, int len, char *target) { + char *ret, *out; + const char *in; + + if (str == NULL) + return(NULL); + if (len <= 0) len = strlen(str); + if (len < 0) return(NULL); + + if (target == NULL) { + ret = g_malloc(len + 1); + } else + ret = target; + in = str; + out = ret; + while(len > 0) { + if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) { + in++; + if ((*in >= '0') && (*in <= '9')) + *out = (*in - '0'); + else if ((*in >= 'a') && (*in <= 'f')) + *out = (*in - 'a') + 10; + else if ((*in >= 'A') && (*in <= 'F')) + *out = (*in - 'A') + 10; + in++; + if ((*in >= '0') && (*in <= '9')) + *out = *out * 16 + (*in - '0'); + else if ((*in >= 'a') && (*in <= 'f')) + *out = *out * 16 + (*in - 'a') + 10; + else if ((*in >= 'A') && (*in <= 'F')) + *out = *out * 16 + (*in - 'A') + 10; + in++; + len -= 3; + out++; + } else { + *out++ = *in++; + len--; + } + } + *out = 0; + return(ret); +} + +/** + * uri_string_escape: + * @str: string to escape + * @list: exception list string of chars not to escape + * + * This routine escapes a string to hex, ignoring reserved characters (a-z) + * and the characters in the exception list. + * + * Returns a new escaped string or NULL in case of error. + */ +char * +uri_string_escape(const char *str, const char *list) { + char *ret, ch; + char *temp; + const char *in; + int len, out; + + if (str == NULL) + return(NULL); + if (str[0] == 0) + return(g_strdup(str)); + len = strlen(str); + if (!(len > 0)) return(NULL); + + len += 20; + ret = g_malloc(len); + in = str; + out = 0; + while(*in != 0) { + if (len - out <= 3) { + temp = realloc2n(ret, &len); + ret = temp; + } + + ch = *in; + + if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!strchr(list, ch))) { + unsigned char val; + ret[out++] = '%'; + val = ch >> 4; + if (val <= 9) + ret[out++] = '0' + val; + else + ret[out++] = 'A' + val - 0xA; + val = ch & 0xF; + if (val <= 9) + ret[out++] = '0' + val; + else + ret[out++] = 'A' + val - 0xA; + in++; + } else { + ret[out++] = *in++; + } + + } + ret[out] = 0; + return(ret); +} + +/************************************************************************ + * * + * Public functions * + * * + ************************************************************************/ + +/** + * uri_resolve: + * @URI: the URI instance found in the document + * @base: the base value + * + * Computes he final URI of the reference done by checking that + * the given URI is valid, and building the final URI using the + * base URI. This is processed according to section 5.2 of the + * RFC 2396 + * + * 5.2. Resolving Relative References to Absolute Form + * + * Returns a new URI string (to be freed by the caller) or NULL in case + * of error. + */ +char * +uri_resolve(const char *uri, const char *base) { + char *val = NULL; + int ret, len, indx, cur, out; + URI *ref = NULL; + URI *bas = NULL; + URI *res = NULL; + + /* + * 1) The URI reference is parsed into the potential four components and + * fragment identifier, as described in Section 4.3. + * + * NOTE that a completely empty URI is treated by modern browsers + * as a reference to "." rather than as a synonym for the current + * URI. Should we do that here? + */ + if (uri == NULL) + ret = -1; + else { + if (*uri) { + ref = uri_new(); + if (ref == NULL) + goto done; + ret = uri_parse_into(ref, uri); + } + else + ret = 0; + } + if (ret != 0) + goto done; + if ((ref != NULL) && (ref->scheme != NULL)) { + /* + * The URI is absolute don't modify. + */ + val = g_strdup(uri); + goto done; + } + if (base == NULL) + ret = -1; + else { + bas = uri_new(); + if (bas == NULL) + goto done; + ret = uri_parse_into(bas, base); + } + if (ret != 0) { + if (ref) + val = uri_to_string(ref); + goto done; + } + if (ref == NULL) { + /* + * the base fragment must be ignored + */ + if (bas->fragment != NULL) { + g_free(bas->fragment); + bas->fragment = NULL; + } + val = uri_to_string(bas); + goto done; + } + + /* + * 2) If the path component is empty and the scheme, authority, and + * query components are undefined, then it is a reference to the + * current document and we are done. Otherwise, the reference URI's + * query and fragment components are defined as found (or not found) + * within the URI reference and not inherited from the base URI. + * + * NOTE that in modern browsers, the parsing differs from the above + * in the following aspect: the query component is allowed to be + * defined while still treating this as a reference to the current + * document. + */ + res = uri_new(); + if (res == NULL) + goto done; + if ((ref->scheme == NULL) && (ref->path == NULL) && + ((ref->authority == NULL) && (ref->server == NULL))) { + if (bas->scheme != NULL) + res->scheme = g_strdup(bas->scheme); + if (bas->authority != NULL) + res->authority = g_strdup(bas->authority); + else if (bas->server != NULL) { + res->server = g_strdup(bas->server); + if (bas->user != NULL) + res->user = g_strdup(bas->user); + res->port = bas->port; + } + if (bas->path != NULL) + res->path = g_strdup(bas->path); + if (ref->query != NULL) + res->query = g_strdup (ref->query); + else if (bas->query != NULL) + res->query = g_strdup(bas->query); + if (ref->fragment != NULL) + res->fragment = g_strdup(ref->fragment); + goto step_7; + } + + /* + * 3) If the scheme component is defined, indicating that the reference + * starts with a scheme name, then the reference is interpreted as an + * absolute URI and we are done. Otherwise, the reference URI's + * scheme is inherited from the base URI's scheme component. + */ + if (ref->scheme != NULL) { + val = uri_to_string(ref); + goto done; + } + if (bas->scheme != NULL) + res->scheme = g_strdup(bas->scheme); + + if (ref->query != NULL) + res->query = g_strdup(ref->query); + if (ref->fragment != NULL) + res->fragment = g_strdup(ref->fragment); + + /* + * 4) If the authority component is defined, then the reference is a + * network-path and we skip to step 7. Otherwise, the reference + * URI's authority is inherited from the base URI's authority + * component, which will also be undefined if the URI scheme does not + * use an authority component. + */ + if ((ref->authority != NULL) || (ref->server != NULL)) { + if (ref->authority != NULL) + res->authority = g_strdup(ref->authority); + else { + res->server = g_strdup(ref->server); + if (ref->user != NULL) + res->user = g_strdup(ref->user); + res->port = ref->port; + } + if (ref->path != NULL) + res->path = g_strdup(ref->path); + goto step_7; + } + if (bas->authority != NULL) + res->authority = g_strdup(bas->authority); + else if (bas->server != NULL) { + res->server = g_strdup(bas->server); + if (bas->user != NULL) + res->user = g_strdup(bas->user); + res->port = bas->port; + } + + /* + * 5) If the path component begins with a slash character ("/"), then + * the reference is an absolute-path and we skip to step 7. + */ + if ((ref->path != NULL) && (ref->path[0] == '/')) { + res->path = g_strdup(ref->path); + goto step_7; + } + + + /* + * 6) If this step is reached, then we are resolving a relative-path + * reference. The relative path needs to be merged with the base + * URI's path. Although there are many ways to do this, we will + * describe a simple method using a separate string buffer. + * + * Allocate a buffer large enough for the result string. + */ + len = 2; /* extra / and 0 */ + if (ref->path != NULL) + len += strlen(ref->path); + if (bas->path != NULL) + len += strlen(bas->path); + res->path = g_malloc(len); + res->path[0] = 0; + + /* + * a) All but the last segment of the base URI's path component is + * copied to the buffer. In other words, any characters after the + * last (right-most) slash character, if any, are excluded. + */ + cur = 0; + out = 0; + if (bas->path != NULL) { + while (bas->path[cur] != 0) { + while ((bas->path[cur] != 0) && (bas->path[cur] != '/')) + cur++; + if (bas->path[cur] == 0) + break; + + cur++; + while (out < cur) { + res->path[out] = bas->path[out]; + out++; + } + } + } + res->path[out] = 0; + + /* + * b) The reference's path component is appended to the buffer + * string. + */ + if (ref->path != NULL && ref->path[0] != 0) { + indx = 0; + /* + * Ensure the path includes a '/' + */ + if ((out == 0) && (bas->server != NULL)) + res->path[out++] = '/'; + while (ref->path[indx] != 0) { + res->path[out++] = ref->path[indx++]; + } + } + res->path[out] = 0; + + /* + * Steps c) to h) are really path normalization steps + */ + normalize_uri_path(res->path); + +step_7: + + /* + * 7) The resulting URI components, including any inherited from the + * base URI, are recombined to give the absolute form of the URI + * reference. + */ + val = uri_to_string(res); + +done: + if (ref != NULL) + uri_free(ref); + if (bas != NULL) + uri_free(bas); + if (res != NULL) + uri_free(res); + return(val); +} + +/** + * uri_resolve_relative: + * @URI: the URI reference under consideration + * @base: the base value + * + * Expresses the URI of the reference in terms relative to the + * base. Some examples of this operation include: + * base = "http://site1.com/docs/book1.html" + * URI input URI returned + * docs/pic1.gif pic1.gif + * docs/img/pic1.gif img/pic1.gif + * img/pic1.gif ../img/pic1.gif + * http://site1.com/docs/pic1.gif pic1.gif + * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif + * + * base = "docs/book1.html" + * URI input URI returned + * docs/pic1.gif pic1.gif + * docs/img/pic1.gif img/pic1.gif + * img/pic1.gif ../img/pic1.gif + * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif + * + * + * Note: if the URI reference is really wierd or complicated, it may be + * worthwhile to first convert it into a "nice" one by calling + * uri_resolve (using 'base') before calling this routine, + * since this routine (for reasonable efficiency) assumes URI has + * already been through some validation. + * + * Returns a new URI string (to be freed by the caller) or NULL in case + * error. + */ +char * +uri_resolve_relative (const char *uri, const char * base) +{ + char *val = NULL; + int ret; + int ix; + int pos = 0; + int nbslash = 0; + int len; + URI *ref = NULL; + URI *bas = NULL; + char *bptr, *uptr, *vptr; + int remove_path = 0; + + if ((uri == NULL) || (*uri == 0)) + return NULL; + + /* + * First parse URI into a standard form + */ + ref = uri_new (); + if (ref == NULL) + return NULL; + /* If URI not already in "relative" form */ + if (uri[0] != '.') { + ret = uri_parse_into (ref, uri); + if (ret != 0) + goto done; /* Error in URI, return NULL */ + } else + ref->path = g_strdup(uri); + + /* + * Next parse base into the same standard form + */ + if ((base == NULL) || (*base == 0)) { + val = g_strdup (uri); + goto done; + } + bas = uri_new (); + if (bas == NULL) + goto done; + if (base[0] != '.') { + ret = uri_parse_into (bas, base); + if (ret != 0) + goto done; /* Error in base, return NULL */ + } else + bas->path = g_strdup(base); + + /* + * If the scheme / server on the URI differs from the base, + * just return the URI + */ + if ((ref->scheme != NULL) && + ((bas->scheme == NULL) || + (strcmp (bas->scheme, ref->scheme)) || + (strcmp (bas->server, ref->server)))) { + val = g_strdup (uri); + goto done; + } + if (!strcmp(bas->path, ref->path)) { + val = g_strdup(""); + goto done; + } + if (bas->path == NULL) { + val = g_strdup(ref->path); + goto done; + } + if (ref->path == NULL) { + ref->path = (char *) "/"; + remove_path = 1; + } + + /* + * At this point (at last!) we can compare the two paths + * + * First we take care of the special case where either of the + * two path components may be missing (bug 316224) + */ + if (bas->path == NULL) { + if (ref->path != NULL) { + uptr = ref->path; + if (*uptr == '/') + uptr++; + /* exception characters from uri_to_string */ + val = uri_string_escape(uptr, "/;&=+$,"); + } + goto done; + } + bptr = bas->path; + if (ref->path == NULL) { + for (ix = 0; bptr[ix] != 0; ix++) { + if (bptr[ix] == '/') + nbslash++; + } + uptr = NULL; + len = 1; /* this is for a string terminator only */ + } else { + /* + * Next we compare the two strings and find where they first differ + */ + if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/')) + pos += 2; + if ((*bptr == '.') && (bptr[1] == '/')) + bptr += 2; + else if ((*bptr == '/') && (ref->path[pos] != '/')) + bptr++; + while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0)) + pos++; + + if (bptr[pos] == ref->path[pos]) { + val = g_strdup(""); + goto done; /* (I can't imagine why anyone would do this) */ + } + + /* + * In URI, "back up" to the last '/' encountered. This will be the + * beginning of the "unique" suffix of URI + */ + ix = pos; + if ((ref->path[ix] == '/') && (ix > 0)) + ix--; + else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/')) + ix -= 2; + for (; ix > 0; ix--) { + if (ref->path[ix] == '/') + break; + } + if (ix == 0) { + uptr = ref->path; + } else { + ix++; + uptr = &ref->path[ix]; + } + + /* + * In base, count the number of '/' from the differing point + */ + if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */ + for (; bptr[ix] != 0; ix++) { + if (bptr[ix] == '/') + nbslash++; + } + } + len = strlen (uptr) + 1; + } + + if (nbslash == 0) { + if (uptr != NULL) + /* exception characters from uri_to_string */ + val = uri_string_escape(uptr, "/;&=+$,"); + goto done; + } + + /* + * Allocate just enough space for the returned string - + * length of the remainder of the URI, plus enough space + * for the "../" groups, plus one for the terminator + */ + val = g_malloc (len + 3 * nbslash); + vptr = val; + /* + * Put in as many "../" as needed + */ + for (; nbslash>0; nbslash--) { + *vptr++ = '.'; + *vptr++ = '.'; + *vptr++ = '/'; + } + /* + * Finish up with the end of the URI + */ + if (uptr != NULL) { + if ((vptr > val) && (len > 0) && + (uptr[0] == '/') && (vptr[-1] == '/')) { + memcpy (vptr, uptr + 1, len - 1); + vptr[len - 2] = 0; + } else { + memcpy (vptr, uptr, len); + vptr[len - 1] = 0; + } + } else { + vptr[len - 1] = 0; + } + + /* escape the freshly-built path */ + vptr = val; + /* exception characters from uri_to_string */ + val = uri_string_escape(vptr, "/;&=+$,"); + g_free(vptr); + +done: + /* + * Free the working variables + */ + if (remove_path != 0) + ref->path = NULL; + if (ref != NULL) + uri_free (ref); + if (bas != NULL) + uri_free (bas); + + return val; +} + +/* + * Utility functions to help parse and assemble query strings. + */ + +struct QueryParams * +query_params_new (int init_alloc) +{ + struct QueryParams *ps; + + if (init_alloc <= 0) init_alloc = 1; + + ps = g_new(QueryParams, 1); + ps->n = 0; + ps->alloc = init_alloc; + ps->p = g_new(QueryParam, ps->alloc); + + return ps; +} + +/* Ensure there is space to store at least one more parameter + * at the end of the set. + */ +static int +query_params_append (struct QueryParams *ps, + const char *name, const char *value) +{ + if (ps->n >= ps->alloc) { + ps->p = g_renew(QueryParam, ps->p, ps->alloc * 2); + ps->alloc *= 2; + } + + ps->p[ps->n].name = g_strdup(name); + ps->p[ps->n].value = value ? g_strdup(value) : NULL; + ps->p[ps->n].ignore = 0; + ps->n++; + + return 0; +} + +void +query_params_free (struct QueryParams *ps) +{ + int i; + + for (i = 0; i < ps->n; ++i) { + g_free (ps->p[i].name); + g_free (ps->p[i].value); + } + g_free (ps->p); + g_free (ps); +} + +struct QueryParams * +query_params_parse (const char *query) +{ + struct QueryParams *ps; + const char *end, *eq; + + ps = query_params_new (0); + if (!query || query[0] == '\0') return ps; + + while (*query) { + char *name = NULL, *value = NULL; + + /* Find the next separator, or end of the string. */ + end = strchr (query, '&'); + if (!end) + end = strchr (query, ';'); + if (!end) + end = query + strlen (query); + + /* Find the first '=' character between here and end. */ + eq = strchr (query, '='); + if (eq && eq >= end) eq = NULL; + + /* Empty section (eg. "&&"). */ + if (end == query) + goto next; + + /* If there is no '=' character, then we have just "name" + * and consistent with CGI.pm we assume value is "". + */ + else if (!eq) { + name = uri_string_unescape (query, end - query, NULL); + value = NULL; + } + /* Or if we have "name=" here (works around annoying + * problem when calling uri_string_unescape with len = 0). + */ + else if (eq+1 == end) { + name = uri_string_unescape (query, eq - query, NULL); + value = g_new0(char, 1); + } + /* If the '=' character is at the beginning then we have + * "=value" and consistent with CGI.pm we _ignore_ this. + */ + else if (query == eq) + goto next; + + /* Otherwise it's "name=value". */ + else { + name = uri_string_unescape (query, eq - query, NULL); + value = uri_string_unescape (eq+1, end - (eq+1), NULL); + } + + /* Append to the parameter set. */ + query_params_append (ps, name, value); + g_free(name); + g_free(value); + + next: + query = end; + if (*query) query ++; /* skip '&' separator */ + } + + return ps; +} diff --git a/uri.h b/uri.h new file mode 100644 index 0000000..de99b3b --- /dev/null +++ b/uri.h @@ -0,0 +1,113 @@ +/** + * Summary: library of generic URI related routines + * Description: library of generic URI related routines + * Implements RFC 2396 + * + * Copyright (C) 1998-2003 Daniel Veillard. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * DANIEL VEILLARD BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Except as contained in this notice, the name of Daniel Veillard shall not + * be used in advertising or otherwise to promote the sale, use or other + * dealings in this Software without prior written authorization from him. + * + * Author: Daniel Veillard + ** + * Copyright (C) 2007 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Authors: + * Richard W.M. Jones + * + * Utility functions to help parse and assemble query strings. + */ + +#ifndef QEMU_URI_H +#define QEMU_URI_H + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * URI: + * + * A parsed URI reference. This is a struct containing the various fields + * as described in RFC 2396 but separated for further processing. + */ +typedef struct URI { + char *scheme; /* the URI scheme */ + char *opaque; /* opaque part */ + char *authority; /* the authority part */ + char *server; /* the server part */ + char *user; /* the user part */ + int port; /* the port number */ + char *path; /* the path string */ + char *fragment; /* the fragment identifier */ + int cleanup; /* parsing potentially unclean URI */ + char *query; /* the query string (as it appears in the URI) */ +} URI; + +URI *uri_new(void); +char *uri_resolve(const char *URI, const char *base); +char *uri_resolve_relative(const char *URI, const char *base); +URI *uri_parse(const char *str); +URI *uri_parse_raw(const char *str, int raw); +int uri_parse_into(URI *uri, const char *str); +char *uri_to_string(URI *uri); +char *uri_string_escape(const char *str, const char *list); +char *uri_string_unescape(const char *str, int len, char *target); +void uri_free(URI *uri); + +/* Single web service query parameter 'name=value'. */ +typedef struct QueryParam { + char *name; /* Name (unescaped). */ + char *value; /* Value (unescaped). */ + int ignore; /* Ignore this field in qparam_get_query */ +} QueryParam; + +/* Set of parameters. */ +typedef struct QueryParams { + int n; /* number of parameters used */ + int alloc; /* allocated space */ + QueryParam *p; /* array of parameters */ +} QueryParams; + +struct QueryParams *query_params_new (int init_alloc); +int query_param_append (QueryParams *ps, const char *name, const char *value); +extern char *query_param_to_string (const QueryParams *ps); +extern QueryParams *query_params_parse (const char *query); +extern void query_params_free (QueryParams *ps); + +#ifdef __cplusplus +} +#endif +#endif /* QEMU_URI_H */ -- cgit v1.1 From 2db2bfc0ccac5fd68dbf0ceb70fbc372c5d8a8c7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 27 Sep 2012 19:27:43 +0530 Subject: aio: Another fix to the walking_handlers logic The AIO dispatch loop will call QLIST_REMOVE and g_free even if there are other pending calls to qemu_aio_wait outside the current one. Signed-off-by: Paolo Bonzini Signed-off-by: Bharata B Rao Signed-off-by: Kevin Wolf --- aio.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/aio.c b/aio.c index 99b8b72..c738a4e 100644 --- a/aio.c +++ b/aio.c @@ -159,14 +159,14 @@ bool qemu_aio_wait(void) /* if we have any readable fds, dispatch event */ if (ret > 0) { - walking_handlers++; - /* we have to walk very carefully in case * qemu_aio_set_fd_handler is called while we're walking */ node = QLIST_FIRST(&aio_handlers); while (node) { AioHandler *tmp; + walking_handlers++; + if (!node->deleted && FD_ISSET(node->fd, &rdfds) && node->io_read) { @@ -181,13 +181,13 @@ bool qemu_aio_wait(void) tmp = node; node = QLIST_NEXT(node, node); - if (tmp->deleted) { + walking_handlers--; + + if (!walking_handlers && tmp->deleted) { QLIST_REMOVE(tmp, node); g_free(tmp); } } - - walking_handlers--; } return true; -- cgit v1.1 From eb100396b9d2658e8bf9cdc14bfcb16a9152f464 Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Mon, 24 Sep 2012 14:42:45 +0530 Subject: configure: Add a config option for GlusterFS as block backend GlusterFS support in QEMU depends on libgfapi, libgfrpc and libgfxdr provided by GlusterFS. Signed-off-by: Bharata B Rao Signed-off-by: Kevin Wolf --- configure | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/configure b/configure index 8f99b7b..72d4b0d 100755 --- a/configure +++ b/configure @@ -219,6 +219,7 @@ want_tools="yes" libiscsi="" coroutine="" seccomp="" +glusterfs="" # parse CC options first for opt do @@ -856,6 +857,10 @@ for opt do ;; --disable-seccomp) seccomp="no" ;; + --disable-glusterfs) glusterfs="no" + ;; + --enable-glusterfs) glusterfs="yes" + ;; *) echo "ERROR: unknown option $opt"; show_help="yes" ;; esac @@ -1128,6 +1133,8 @@ echo " --disable-seccomp disable seccomp support" echo " --enable-seccomp enables seccomp support" echo " --with-coroutine=BACKEND coroutine backend. Supported options:" echo " gthread, ucontext, sigaltstack, windows" +echo " --enable-glusterfs enable GlusterFS backend" +echo " --disable-glusterfs disable GlusterFS backend" echo "" echo "NOTE: The object files are built at the place where configure is launched" exit 1 @@ -2303,6 +2310,29 @@ EOF fi fi +########################################## +# glusterfs probe +if test "$glusterfs" != "no" ; then + cat > $TMPC < +int main(void) { + (void) glfs_new("volume"); + return 0; +} +EOF + glusterfs_libs="-lgfapi -lgfrpc -lgfxdr" + if compile_prog "" "$glusterfs_libs" ; then + glusterfs=yes + libs_tools="$glusterfs_libs $libs_tools" + libs_softmmu="$glusterfs_libs $libs_softmmu" + else + if test "$glusterfs" = "yes" ; then + feature_not_found "GlusterFS backend support" + fi + glusterfs=no + fi +fi + # # Check for xxxat() functions when we are building linux-user # emulator. This is done because older glibc versions don't @@ -3170,6 +3200,7 @@ echo "libiscsi support $libiscsi" echo "build guest agent $guest_agent" echo "seccomp support $seccomp" echo "coroutine backend $coroutine_backend" +echo "GlusterFS support $glusterfs" if test "$sdl_too_old" = "yes"; then echo "-> Your SDL version is too old - please upgrade to have SDL support" @@ -3516,6 +3547,10 @@ if test "$has_environ" = "yes" ; then echo "CONFIG_HAS_ENVIRON=y" >> $config_host_mak fi +if test "$glusterfs" = "yes" ; then + echo "CONFIG_GLUSTERFS=y" >> $config_host_mak +fi + # USB host support case "$usb" in linux) -- cgit v1.1 From 8d6d89cb63c57569864ecdeb84d3a1c2ebd031cc Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Thu, 27 Sep 2012 19:30:32 +0530 Subject: block: Support GlusterFS as a QEMU block backend. This patch adds gluster as the new block backend in QEMU. This gives QEMU the ability to boot VM images from gluster volumes. Its already possible to boot from VM images on gluster volumes using FUSE mount, but this patchset provides the ability to boot VM images from gluster volumes by by-passing the FUSE layer in gluster. This is made possible by using libgfapi routines to perform IO on gluster volumes directly. VM Image on gluster volume is specified like this: file=gluster[+transport]://[server[:port]]/volname/image[?socket=...] 'gluster' is the protocol. 'transport' specifies the transport type used to connect to gluster management daemon (glusterd). Valid transport types are tcp, unix and rdma. If a transport type isn't specified, then tcp type is assumed. 'server' specifies the server where the volume file specification for the given volume resides. This can be either hostname, ipv4 address or ipv6 address. ipv6 address needs to be within square brackets [ ]. If transport type is 'unix', then 'server' field should not be specifed. The 'socket' field needs to be populated with the path to unix domain socket. 'port' is the port number on which glusterd is listening. This is optional and if not specified, QEMU will send 0 which will make gluster to use the default port. If the transport type is unix, then 'port' should not be specified. 'volname' is the name of the gluster volume which contains the VM image. 'image' is the path to the actual VM image that resides on gluster volume. Examples: file=gluster://1.2.3.4/testvol/a.img file=gluster+tcp://1.2.3.4/testvol/a.img file=gluster+tcp://1.2.3.4:24007/testvol/dir/a.img file=gluster+tcp://[1:2:3:4:5:6:7:8]/testvol/dir/a.img file=gluster+tcp://[1:2:3:4:5:6:7:8]:24007/testvol/dir/a.img file=gluster+tcp://server.domain.com:24007/testvol/dir/a.img file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket file=gluster+rdma://1.2.3.4:24007/testvol/a.img Signed-off-by: Bharata B Rao Signed-off-by: Kevin Wolf --- block/Makefile.objs | 1 + block/gluster.c | 624 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 625 insertions(+) create mode 100644 block/gluster.c diff --git a/block/Makefile.objs b/block/Makefile.objs index b5754d3..a1ae67f 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -9,3 +9,4 @@ block-obj-$(CONFIG_POSIX) += raw-posix.o block-obj-$(CONFIG_LIBISCSI) += iscsi.o block-obj-$(CONFIG_CURL) += curl.o block-obj-$(CONFIG_RBD) += rbd.o +block-obj-$(CONFIG_GLUSTERFS) += gluster.o diff --git a/block/gluster.c b/block/gluster.c new file mode 100644 index 0000000..3588d73 --- /dev/null +++ b/block/gluster.c @@ -0,0 +1,624 @@ +/* + * GlusterFS backend for QEMU + * + * Copyright (C) 2012 Bharata B Rao + * + * Pipe handling mechanism in AIO implementation is derived from + * block/rbd.c. Hence, + * + * Copyright (C) 2010-2011 Christian Brunner , + * Josh Durgin + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ +#include +#include "block_int.h" +#include "qemu_socket.h" +#include "uri.h" + +typedef struct GlusterAIOCB { + BlockDriverAIOCB common; + int64_t size; + int ret; + bool *finished; + QEMUBH *bh; +} GlusterAIOCB; + +typedef struct BDRVGlusterState { + struct glfs *glfs; + int fds[2]; + struct glfs_fd *fd; + int qemu_aio_count; + int event_reader_pos; + GlusterAIOCB *event_acb; +} BDRVGlusterState; + +#define GLUSTER_FD_READ 0 +#define GLUSTER_FD_WRITE 1 + +typedef struct GlusterConf { + char *server; + int port; + char *volname; + char *image; + char *transport; +} GlusterConf; + +static void qemu_gluster_gconf_free(GlusterConf *gconf) +{ + g_free(gconf->server); + g_free(gconf->volname); + g_free(gconf->image); + g_free(gconf->transport); + g_free(gconf); +} + +static int parse_volume_options(GlusterConf *gconf, char *path) +{ + char *p, *q; + + if (!path) { + return -EINVAL; + } + + /* volume */ + p = q = path + strspn(path, "/"); + p += strcspn(p, "/"); + if (*p == '\0') { + return -EINVAL; + } + gconf->volname = g_strndup(q, p - q); + + /* image */ + p += strspn(p, "/"); + if (*p == '\0') { + return -EINVAL; + } + gconf->image = g_strdup(p); + return 0; +} + +/* + * file=gluster[+transport]://[server[:port]]/volname/image[?socket=...] + * + * 'gluster' is the protocol. + * + * 'transport' specifies the transport type used to connect to gluster + * management daemon (glusterd). Valid transport types are + * tcp, unix and rdma. If a transport type isn't specified, then tcp + * type is assumed. + * + * 'server' specifies the server where the volume file specification for + * the given volume resides. This can be either hostname, ipv4 address + * or ipv6 address. ipv6 address needs to be within square brackets [ ]. + * If transport type is 'unix', then 'server' field should not be specifed. + * The 'socket' field needs to be populated with the path to unix domain + * socket. + * + * 'port' is the port number on which glusterd is listening. This is optional + * and if not specified, QEMU will send 0 which will make gluster to use the + * default port. If the transport type is unix, then 'port' should not be + * specified. + * + * 'volname' is the name of the gluster volume which contains the VM image. + * + * 'image' is the path to the actual VM image that resides on gluster volume. + * + * Examples: + * + * file=gluster://1.2.3.4/testvol/a.img + * file=gluster+tcp://1.2.3.4/testvol/a.img + * file=gluster+tcp://1.2.3.4:24007/testvol/dir/a.img + * file=gluster+tcp://[1:2:3:4:5:6:7:8]/testvol/dir/a.img + * file=gluster+tcp://[1:2:3:4:5:6:7:8]:24007/testvol/dir/a.img + * file=gluster+tcp://server.domain.com:24007/testvol/dir/a.img + * file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket + * file=gluster+rdma://1.2.3.4:24007/testvol/a.img + */ +static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename) +{ + URI *uri; + QueryParams *qp = NULL; + bool is_unix = false; + int ret = 0; + + uri = uri_parse(filename); + if (!uri) { + return -EINVAL; + } + + /* transport */ + if (!strcmp(uri->scheme, "gluster")) { + gconf->transport = g_strdup("tcp"); + } else if (!strcmp(uri->scheme, "gluster+tcp")) { + gconf->transport = g_strdup("tcp"); + } else if (!strcmp(uri->scheme, "gluster+unix")) { + gconf->transport = g_strdup("unix"); + is_unix = true; + } else if (!strcmp(uri->scheme, "gluster+rdma")) { + gconf->transport = g_strdup("rdma"); + } else { + ret = -EINVAL; + goto out; + } + + ret = parse_volume_options(gconf, uri->path); + if (ret < 0) { + goto out; + } + + qp = query_params_parse(uri->query); + if (qp->n > 1 || (is_unix && !qp->n) || (!is_unix && qp->n)) { + ret = -EINVAL; + goto out; + } + + if (is_unix) { + if (uri->server || uri->port) { + ret = -EINVAL; + goto out; + } + if (strcmp(qp->p[0].name, "socket")) { + ret = -EINVAL; + goto out; + } + gconf->server = g_strdup(qp->p[0].value); + } else { + gconf->server = g_strdup(uri->server); + gconf->port = uri->port; + } + +out: + if (qp) { + query_params_free(qp); + } + uri_free(uri); + return ret; +} + +static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename) +{ + struct glfs *glfs = NULL; + int ret; + int old_errno; + + ret = qemu_gluster_parseuri(gconf, filename); + if (ret < 0) { + error_report("Usage: file=gluster[+transport]://[server[:port]]/" + "volname/image[?socket=...]"); + errno = -ret; + goto out; + } + + glfs = glfs_new(gconf->volname); + if (!glfs) { + goto out; + } + + ret = glfs_set_volfile_server(glfs, gconf->transport, gconf->server, + gconf->port); + if (ret < 0) { + goto out; + } + + /* + * TODO: Use GF_LOG_ERROR instead of hard code value of 4 here when + * GlusterFS makes GF_LOG_* macros available to libgfapi users. + */ + ret = glfs_set_logging(glfs, "-", 4); + if (ret < 0) { + goto out; + } + + ret = glfs_init(glfs); + if (ret) { + error_report("Gluster connection failed for server=%s port=%d " + "volume=%s image=%s transport=%s\n", gconf->server, gconf->port, + gconf->volname, gconf->image, gconf->transport); + goto out; + } + return glfs; + +out: + if (glfs) { + old_errno = errno; + glfs_fini(glfs); + errno = old_errno; + } + return NULL; +} + +static void qemu_gluster_complete_aio(GlusterAIOCB *acb, BDRVGlusterState *s) +{ + int ret; + bool *finished = acb->finished; + BlockDriverCompletionFunc *cb = acb->common.cb; + void *opaque = acb->common.opaque; + + if (!acb->ret || acb->ret == acb->size) { + ret = 0; /* Success */ + } else if (acb->ret < 0) { + ret = acb->ret; /* Read/Write failed */ + } else { + ret = -EIO; /* Partial read/write - fail it */ + } + + s->qemu_aio_count--; + qemu_aio_release(acb); + cb(opaque, ret); + if (finished) { + *finished = true; + } +} + +static void qemu_gluster_aio_event_reader(void *opaque) +{ + BDRVGlusterState *s = opaque; + ssize_t ret; + + do { + char *p = (char *)&s->event_acb; + + ret = read(s->fds[GLUSTER_FD_READ], p + s->event_reader_pos, + sizeof(s->event_acb) - s->event_reader_pos); + if (ret > 0) { + s->event_reader_pos += ret; + if (s->event_reader_pos == sizeof(s->event_acb)) { + s->event_reader_pos = 0; + qemu_gluster_complete_aio(s->event_acb, s); + } + } + } while (ret < 0 && errno == EINTR); +} + +static int qemu_gluster_aio_flush_cb(void *opaque) +{ + BDRVGlusterState *s = opaque; + + return (s->qemu_aio_count > 0); +} + +static int qemu_gluster_open(BlockDriverState *bs, const char *filename, + int bdrv_flags) +{ + BDRVGlusterState *s = bs->opaque; + int open_flags = O_BINARY; + int ret = 0; + GlusterConf *gconf = g_malloc0(sizeof(GlusterConf)); + + s->glfs = qemu_gluster_init(gconf, filename); + if (!s->glfs) { + ret = -errno; + goto out; + } + + if (bdrv_flags & BDRV_O_RDWR) { + open_flags |= O_RDWR; + } else { + open_flags |= O_RDONLY; + } + + if ((bdrv_flags & BDRV_O_NOCACHE)) { + open_flags |= O_DIRECT; + } + + s->fd = glfs_open(s->glfs, gconf->image, open_flags); + if (!s->fd) { + ret = -errno; + goto out; + } + + ret = qemu_pipe(s->fds); + if (ret < 0) { + ret = -errno; + goto out; + } + fcntl(s->fds[GLUSTER_FD_READ], F_SETFL, O_NONBLOCK); + qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], + qemu_gluster_aio_event_reader, NULL, qemu_gluster_aio_flush_cb, s); + +out: + qemu_gluster_gconf_free(gconf); + if (!ret) { + return ret; + } + if (s->fd) { + glfs_close(s->fd); + } + if (s->glfs) { + glfs_fini(s->glfs); + } + return ret; +} + +static int qemu_gluster_create(const char *filename, + QEMUOptionParameter *options) +{ + struct glfs *glfs; + struct glfs_fd *fd; + int ret = 0; + int64_t total_size = 0; + GlusterConf *gconf = g_malloc0(sizeof(GlusterConf)); + + glfs = qemu_gluster_init(gconf, filename); + if (!glfs) { + ret = -errno; + goto out; + } + + while (options && options->name) { + if (!strcmp(options->name, BLOCK_OPT_SIZE)) { + total_size = options->value.n / BDRV_SECTOR_SIZE; + } + options++; + } + + fd = glfs_creat(glfs, gconf->image, + O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR); + if (!fd) { + ret = -errno; + } else { + if (glfs_ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) { + ret = -errno; + } + if (glfs_close(fd) != 0) { + ret = -errno; + } + } +out: + qemu_gluster_gconf_free(gconf); + if (glfs) { + glfs_fini(glfs); + } + return ret; +} + +static void qemu_gluster_aio_cancel(BlockDriverAIOCB *blockacb) +{ + GlusterAIOCB *acb = (GlusterAIOCB *)blockacb; + bool finished = false; + + acb->finished = &finished; + while (!finished) { + qemu_aio_wait(); + } +} + +static AIOPool gluster_aio_pool = { + .aiocb_size = sizeof(GlusterAIOCB), + .cancel = qemu_gluster_aio_cancel, +}; + +static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg) +{ + GlusterAIOCB *acb = (GlusterAIOCB *)arg; + BlockDriverState *bs = acb->common.bs; + BDRVGlusterState *s = bs->opaque; + int retval; + + acb->ret = ret; + retval = qemu_write_full(s->fds[GLUSTER_FD_WRITE], &acb, sizeof(acb)); + if (retval != sizeof(acb)) { + /* + * Gluster AIO callback thread failed to notify the waiting + * QEMU thread about IO completion. + * + * Complete this IO request and make the disk inaccessible for + * subsequent reads and writes. + */ + error_report("Gluster failed to notify QEMU about IO completion"); + + qemu_mutex_lock_iothread(); /* We are in gluster thread context */ + acb->common.cb(acb->common.opaque, -EIO); + qemu_aio_release(acb); + s->qemu_aio_count--; + close(s->fds[GLUSTER_FD_READ]); + close(s->fds[GLUSTER_FD_WRITE]); + qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL, + NULL); + bs->drv = NULL; /* Make the disk inaccessible */ + qemu_mutex_unlock_iothread(); + } +} + +static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque, int write) +{ + int ret; + GlusterAIOCB *acb; + BDRVGlusterState *s = bs->opaque; + size_t size; + off_t offset; + + offset = sector_num * BDRV_SECTOR_SIZE; + size = nb_sectors * BDRV_SECTOR_SIZE; + s->qemu_aio_count++; + + acb = qemu_aio_get(&gluster_aio_pool, bs, cb, opaque); + acb->size = size; + acb->ret = 0; + acb->finished = NULL; + + if (write) { + ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0, + &gluster_finish_aiocb, acb); + } else { + ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0, + &gluster_finish_aiocb, acb); + } + + if (ret < 0) { + goto out; + } + return &acb->common; + +out: + s->qemu_aio_count--; + qemu_aio_release(acb); + return NULL; +} + +static BlockDriverAIOCB *qemu_gluster_aio_readv(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); +} + +static BlockDriverAIOCB *qemu_gluster_aio_writev(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); +} + +static BlockDriverAIOCB *qemu_gluster_aio_flush(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque) +{ + int ret; + GlusterAIOCB *acb; + BDRVGlusterState *s = bs->opaque; + + acb = qemu_aio_get(&gluster_aio_pool, bs, cb, opaque); + acb->size = 0; + acb->ret = 0; + acb->finished = NULL; + s->qemu_aio_count++; + + ret = glfs_fsync_async(s->fd, &gluster_finish_aiocb, acb); + if (ret < 0) { + goto out; + } + return &acb->common; + +out: + s->qemu_aio_count--; + qemu_aio_release(acb); + return NULL; +} + +static int64_t qemu_gluster_getlength(BlockDriverState *bs) +{ + BDRVGlusterState *s = bs->opaque; + int64_t ret; + + ret = glfs_lseek(s->fd, 0, SEEK_END); + if (ret < 0) { + return -errno; + } else { + return ret; + } +} + +static int64_t qemu_gluster_allocated_file_size(BlockDriverState *bs) +{ + BDRVGlusterState *s = bs->opaque; + struct stat st; + int ret; + + ret = glfs_fstat(s->fd, &st); + if (ret < 0) { + return -errno; + } else { + return st.st_blocks * 512; + } +} + +static void qemu_gluster_close(BlockDriverState *bs) +{ + BDRVGlusterState *s = bs->opaque; + + close(s->fds[GLUSTER_FD_READ]); + close(s->fds[GLUSTER_FD_WRITE]); + qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL, NULL); + + if (s->fd) { + glfs_close(s->fd); + s->fd = NULL; + } + glfs_fini(s->glfs); +} + +static QEMUOptionParameter qemu_gluster_create_options[] = { + { + .name = BLOCK_OPT_SIZE, + .type = OPT_SIZE, + .help = "Virtual disk size" + }, + { NULL } +}; + +static BlockDriver bdrv_gluster = { + .format_name = "gluster", + .protocol_name = "gluster", + .instance_size = sizeof(BDRVGlusterState), + .bdrv_file_open = qemu_gluster_open, + .bdrv_close = qemu_gluster_close, + .bdrv_create = qemu_gluster_create, + .bdrv_getlength = qemu_gluster_getlength, + .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, + .bdrv_aio_readv = qemu_gluster_aio_readv, + .bdrv_aio_writev = qemu_gluster_aio_writev, + .bdrv_aio_flush = qemu_gluster_aio_flush, + .create_options = qemu_gluster_create_options, +}; + +static BlockDriver bdrv_gluster_tcp = { + .format_name = "gluster", + .protocol_name = "gluster+tcp", + .instance_size = sizeof(BDRVGlusterState), + .bdrv_file_open = qemu_gluster_open, + .bdrv_close = qemu_gluster_close, + .bdrv_create = qemu_gluster_create, + .bdrv_getlength = qemu_gluster_getlength, + .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, + .bdrv_aio_readv = qemu_gluster_aio_readv, + .bdrv_aio_writev = qemu_gluster_aio_writev, + .bdrv_aio_flush = qemu_gluster_aio_flush, + .create_options = qemu_gluster_create_options, +}; + +static BlockDriver bdrv_gluster_unix = { + .format_name = "gluster", + .protocol_name = "gluster+unix", + .instance_size = sizeof(BDRVGlusterState), + .bdrv_file_open = qemu_gluster_open, + .bdrv_close = qemu_gluster_close, + .bdrv_create = qemu_gluster_create, + .bdrv_getlength = qemu_gluster_getlength, + .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, + .bdrv_aio_readv = qemu_gluster_aio_readv, + .bdrv_aio_writev = qemu_gluster_aio_writev, + .bdrv_aio_flush = qemu_gluster_aio_flush, + .create_options = qemu_gluster_create_options, +}; + +static BlockDriver bdrv_gluster_rdma = { + .format_name = "gluster", + .protocol_name = "gluster+rdma", + .instance_size = sizeof(BDRVGlusterState), + .bdrv_file_open = qemu_gluster_open, + .bdrv_close = qemu_gluster_close, + .bdrv_create = qemu_gluster_create, + .bdrv_getlength = qemu_gluster_getlength, + .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, + .bdrv_aio_readv = qemu_gluster_aio_readv, + .bdrv_aio_writev = qemu_gluster_aio_writev, + .bdrv_aio_flush = qemu_gluster_aio_flush, + .create_options = qemu_gluster_create_options, +}; + +static void bdrv_gluster_init(void) +{ + bdrv_register(&bdrv_gluster_rdma); + bdrv_register(&bdrv_gluster_unix); + bdrv_register(&bdrv_gluster_tcp); + bdrv_register(&bdrv_gluster); +} + +block_init(bdrv_gluster_init); -- cgit v1.1 From 6ebdcee2d8e9e4b41ffe4e49039927550848b926 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Thu, 27 Sep 2012 13:29:12 -0400 Subject: block: add support functions for live commit, to find and delete images. Add bdrv_find_overlay(), and bdrv_drop_intermediate(). bdrv_find_overlay(): given 'bs' and the active (topmost) BDS of an image chain, find the image that is the immediate top of 'bs' bdrv_drop_intermediate(): Given 3 BDS (active, top, base), drop images above base up to and including top, and set base to be the backing file of top's overlay node. E.g., this converts: bottom <- base <- intermediate <- top <- active to bottom <- base <- active Signed-off-by: Jeff Cody Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block.c | 143 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ block.h | 4 ++ 2 files changed, 147 insertions(+) diff --git a/block.c b/block.c index 751ebdc..54209a5 100644 --- a/block.c +++ b/block.c @@ -1724,6 +1724,149 @@ int bdrv_change_backing_file(BlockDriverState *bs, return ret; } +/* + * Finds the image layer in the chain that has 'bs' as its backing file. + * + * active is the current topmost image. + * + * Returns NULL if bs is not found in active's image chain, + * or if active == bs. + */ +BlockDriverState *bdrv_find_overlay(BlockDriverState *active, + BlockDriverState *bs) +{ + BlockDriverState *overlay = NULL; + BlockDriverState *intermediate; + + assert(active != NULL); + assert(bs != NULL); + + /* if bs is the same as active, then by definition it has no overlay + */ + if (active == bs) { + return NULL; + } + + intermediate = active; + while (intermediate->backing_hd) { + if (intermediate->backing_hd == bs) { + overlay = intermediate; + break; + } + intermediate = intermediate->backing_hd; + } + + return overlay; +} + +typedef struct BlkIntermediateStates { + BlockDriverState *bs; + QSIMPLEQ_ENTRY(BlkIntermediateStates) entry; +} BlkIntermediateStates; + + +/* + * Drops images above 'base' up to and including 'top', and sets the image + * above 'top' to have base as its backing file. + * + * Requires that the overlay to 'top' is opened r/w, so that the backing file + * information in 'bs' can be properly updated. + * + * E.g., this will convert the following chain: + * bottom <- base <- intermediate <- top <- active + * + * to + * + * bottom <- base <- active + * + * It is allowed for bottom==base, in which case it converts: + * + * base <- intermediate <- top <- active + * + * to + * + * base <- active + * + * Error conditions: + * if active == top, that is considered an error + * + */ +int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, + BlockDriverState *base) +{ + BlockDriverState *intermediate; + BlockDriverState *base_bs = NULL; + BlockDriverState *new_top_bs = NULL; + BlkIntermediateStates *intermediate_state, *next; + int ret = -EIO; + + QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete; + QSIMPLEQ_INIT(&states_to_delete); + + if (!top->drv || !base->drv) { + goto exit; + } + + new_top_bs = bdrv_find_overlay(active, top); + + if (new_top_bs == NULL) { + /* we could not find the image above 'top', this is an error */ + goto exit; + } + + /* special case of new_top_bs->backing_hd already pointing to base - nothing + * to do, no intermediate images */ + if (new_top_bs->backing_hd == base) { + ret = 0; + goto exit; + } + + intermediate = top; + + /* now we will go down through the list, and add each BDS we find + * into our deletion queue, until we hit the 'base' + */ + while (intermediate) { + intermediate_state = g_malloc0(sizeof(BlkIntermediateStates)); + intermediate_state->bs = intermediate; + QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry); + + if (intermediate->backing_hd == base) { + base_bs = intermediate->backing_hd; + break; + } + intermediate = intermediate->backing_hd; + } + if (base_bs == NULL) { + /* something went wrong, we did not end at the base. safely + * unravel everything, and exit with error */ + goto exit; + } + + /* success - we can delete the intermediate states, and link top->base */ + ret = bdrv_change_backing_file(new_top_bs, base_bs->filename, + base_bs->drv ? base_bs->drv->format_name : ""); + if (ret) { + goto exit; + } + new_top_bs->backing_hd = base_bs; + + + QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) { + /* so that bdrv_close() does not recursively close the chain */ + intermediate_state->bs->backing_hd = NULL; + bdrv_delete(intermediate_state->bs); + } + ret = 0; + +exit: + QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) { + g_free(intermediate_state); + } + return ret; +} + + static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset, size_t size) { diff --git a/block.h b/block.h index b1095d8..8c9b424 100644 --- a/block.h +++ b/block.h @@ -203,6 +203,10 @@ int bdrv_commit_all(void); int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file, const char *backing_fmt); void bdrv_register(BlockDriver *bdrv); +int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, + BlockDriverState *base); +BlockDriverState *bdrv_find_overlay(BlockDriverState *active, + BlockDriverState *bs); typedef struct BdrvCheckResult { -- cgit v1.1 From 747ff602636fd83daae7ee4b1dd6e8d257a89fea Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Thu, 27 Sep 2012 13:29:13 -0400 Subject: block: add live block commit functionality This adds the live commit coroutine. This iteration focuses on the commit only below the active layer, and not the active layer itself. The behaviour is similar to block streaming; the sectors are walked through, and anything that exists above 'base' is committed back down into base. At the end, intermediate images are deleted, and the chain stitched together. Images are restored to their original open flags upon completion. Signed-off-by: Jeff Cody Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block/Makefile.objs | 1 + block/commit.c | 267 ++++++++++++++++++++++++++++++++++++++++++++++++++++ block_int.h | 16 ++++ trace-events | 2 + 4 files changed, 286 insertions(+) create mode 100644 block/commit.c diff --git a/block/Makefile.objs b/block/Makefile.objs index a1ae67f..81fd43c 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -4,6 +4,7 @@ block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o block-obj-y += qed-check.o block-obj-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o block-obj-y += stream.o +block-obj-y += commit.o block-obj-$(CONFIG_WIN32) += raw-win32.o block-obj-$(CONFIG_POSIX) += raw-posix.o block-obj-$(CONFIG_LIBISCSI) += iscsi.o diff --git a/block/commit.c b/block/commit.c new file mode 100644 index 0000000..624ec5f --- /dev/null +++ b/block/commit.c @@ -0,0 +1,267 @@ +/* + * Live block commit + * + * Copyright Red Hat, Inc. 2012 + * + * Authors: + * Jeff Cody + * Based on stream.c by Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include "trace.h" +#include "block_int.h" +#include "qemu/ratelimit.h" + +enum { + /* + * Size of data buffer for populating the image file. This should be large + * enough to process multiple clusters in a single call, so that populating + * contiguous regions of the image is efficient. + */ + COMMIT_BUFFER_SIZE = 512 * 1024, /* in bytes */ +}; + +#define SLICE_TIME 100000000ULL /* ns */ + +typedef struct CommitBlockJob { + BlockJob common; + RateLimit limit; + BlockDriverState *active; + BlockDriverState *top; + BlockDriverState *base; + BlockErrorAction on_error; + int base_flags; + int orig_overlay_flags; +} CommitBlockJob; + +static int coroutine_fn commit_populate(BlockDriverState *bs, + BlockDriverState *base, + int64_t sector_num, int nb_sectors, + void *buf) +{ + int ret = 0; + + ret = bdrv_read(bs, sector_num, buf, nb_sectors); + if (ret) { + return ret; + } + + ret = bdrv_write(base, sector_num, buf, nb_sectors); + if (ret) { + return ret; + } + + return 0; +} + +static void coroutine_fn commit_run(void *opaque) +{ + CommitBlockJob *s = opaque; + BlockDriverState *active = s->active; + BlockDriverState *top = s->top; + BlockDriverState *base = s->base; + BlockDriverState *overlay_bs = NULL; + int64_t sector_num, end; + int ret = 0; + int n = 0; + void *buf; + int bytes_written = 0; + int64_t base_len; + + ret = s->common.len = bdrv_getlength(top); + + + if (s->common.len < 0) { + goto exit_restore_reopen; + } + + ret = base_len = bdrv_getlength(base); + if (base_len < 0) { + goto exit_restore_reopen; + } + + if (base_len < s->common.len) { + ret = bdrv_truncate(base, s->common.len); + if (ret) { + goto exit_restore_reopen; + } + } + + overlay_bs = bdrv_find_overlay(active, top); + + end = s->common.len >> BDRV_SECTOR_BITS; + buf = qemu_blockalign(top, COMMIT_BUFFER_SIZE); + + for (sector_num = 0; sector_num < end; sector_num += n) { + uint64_t delay_ns = 0; + bool copy; + +wait: + /* Note that even when no rate limit is applied we need to yield + * with no pending I/O here so that qemu_aio_flush() returns. + */ + block_job_sleep_ns(&s->common, rt_clock, delay_ns); + if (block_job_is_cancelled(&s->common)) { + break; + } + /* Copy if allocated above the base */ + ret = bdrv_co_is_allocated_above(top, base, sector_num, + COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE, + &n); + copy = (ret == 1); + trace_commit_one_iteration(s, sector_num, n, ret); + if (copy) { + if (s->common.speed) { + delay_ns = ratelimit_calculate_delay(&s->limit, n); + if (delay_ns > 0) { + goto wait; + } + } + ret = commit_populate(top, base, sector_num, n, buf); + bytes_written += n * BDRV_SECTOR_SIZE; + } + if (ret < 0) { + if (s->on_error == BLOCK_ERR_STOP_ANY || + s->on_error == BLOCK_ERR_REPORT || + (s->on_error == BLOCK_ERR_STOP_ENOSPC && ret == -ENOSPC)) { + goto exit_free_buf; + } else { + n = 0; + continue; + } + } + /* Publish progress */ + s->common.offset += n * BDRV_SECTOR_SIZE; + } + + ret = 0; + + if (!block_job_is_cancelled(&s->common) && sector_num == end) { + /* success */ + ret = bdrv_drop_intermediate(active, top, base); + } + +exit_free_buf: + qemu_vfree(buf); + +exit_restore_reopen: + /* restore base open flags here if appropriate (e.g., change the base back + * to r/o). These reopens do not need to be atomic, since we won't abort + * even on failure here */ + if (s->base_flags != bdrv_get_flags(base)) { + bdrv_reopen(base, s->base_flags, NULL); + } + if (s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) { + bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL); + } + + block_job_complete(&s->common, ret); +} + +static void commit_set_speed(BlockJob *job, int64_t speed, Error **errp) +{ + CommitBlockJob *s = container_of(job, CommitBlockJob, common); + + if (speed < 0) { + error_set(errp, QERR_INVALID_PARAMETER, "speed"); + return; + } + ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME); +} + +static BlockJobType commit_job_type = { + .instance_size = sizeof(CommitBlockJob), + .job_type = "commit", + .set_speed = commit_set_speed, +}; + +void commit_start(BlockDriverState *bs, BlockDriverState *base, + BlockDriverState *top, int64_t speed, + BlockErrorAction on_error, BlockDriverCompletionFunc *cb, + void *opaque, Error **errp) +{ + CommitBlockJob *s; + BlockReopenQueue *reopen_queue = NULL; + int orig_overlay_flags; + int orig_base_flags; + BlockDriverState *overlay_bs; + Error *local_err = NULL; + + if ((on_error == BLOCK_ERR_STOP_ANY || + on_error == BLOCK_ERR_STOP_ENOSPC) && + !bdrv_iostatus_is_enabled(bs)) { + error_set(errp, QERR_INVALID_PARAMETER_COMBINATION); + return; + } + + /* Once we support top == active layer, remove this check */ + if (top == bs) { + error_setg(errp, + "Top image as the active layer is currently unsupported"); + return; + } + + if (top == base) { + error_setg(errp, "Invalid files for merge: top and base are the same"); + return; + } + + /* top and base may be valid, but let's make sure that base is reachable + * from top */ + if (bdrv_find_backing_image(top, base->filename) != base) { + error_setg(errp, + "Base (%s) is not reachable from top (%s)", + base->filename, top->filename); + return; + } + + overlay_bs = bdrv_find_overlay(bs, top); + + if (overlay_bs == NULL) { + error_setg(errp, "Could not find overlay image for %s:", top->filename); + return; + } + + orig_base_flags = bdrv_get_flags(base); + orig_overlay_flags = bdrv_get_flags(overlay_bs); + + /* convert base & overlay_bs to r/w, if necessary */ + if (!(orig_base_flags & BDRV_O_RDWR)) { + reopen_queue = bdrv_reopen_queue(reopen_queue, base, + orig_base_flags | BDRV_O_RDWR); + } + if (!(orig_overlay_flags & BDRV_O_RDWR)) { + reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs, + orig_overlay_flags | BDRV_O_RDWR); + } + if (reopen_queue) { + bdrv_reopen_multiple(reopen_queue, &local_err); + if (local_err != NULL) { + error_propagate(errp, local_err); + return; + } + } + + + s = block_job_create(&commit_job_type, bs, speed, cb, opaque, errp); + if (!s) { + return; + } + + s->base = base; + s->top = top; + s->active = bs; + + s->base_flags = orig_base_flags; + s->orig_overlay_flags = orig_overlay_flags; + + s->on_error = on_error; + s->common.co = qemu_coroutine_create(commit_run); + + trace_commit_start(bs, base, top, s, s->common.co, opaque); + qemu_coroutine_enter(s->common.co, s); +} diff --git a/block_int.h b/block_int.h index ac4245c..56164a7 100644 --- a/block_int.h +++ b/block_int.h @@ -463,4 +463,20 @@ void stream_start(BlockDriverState *bs, BlockDriverState *base, BlockDriverCompletionFunc *cb, void *opaque, Error **errp); +/** + * commit_start: + * @bs: Top Block device + * @base: Block device that will be written into, and become the new top + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @on_error: The action to take upon error. + * @cb: Completion function for the job. + * @opaque: Opaque pointer value passed to @cb. + * @errp: Error object. + * + */ +void commit_start(BlockDriverState *bs, BlockDriverState *base, + BlockDriverState *top, int64_t speed, + BlockErrorAction on_error, BlockDriverCompletionFunc *cb, + void *opaque, Error **errp); + #endif /* BLOCK_INT_H */ diff --git a/trace-events b/trace-events index f5b5097..dbc3007 100644 --- a/trace-events +++ b/trace-events @@ -74,6 +74,8 @@ bdrv_co_do_copy_on_readv(void *bs, int64_t sector_num, int nb_sectors, int64_t c # block/stream.c stream_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d" stream_start(void *bs, void *base, void *s, void *co, void *opaque) "bs %p base %p s %p co %p opaque %p" +commit_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d" +commit_start(void *bs, void *base, void *top, void *s, void *co, void *opaque) "bs %p base %p top %p s %p co %p opaque %p" # blockdev.c qmp_block_job_cancel(void *job) "job %p" -- cgit v1.1 From 9abf2dbaf6ae990c498ec0245986bddcd6b013ea Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Thu, 27 Sep 2012 13:29:14 -0400 Subject: blockdev: rename block_stream_cb to a generic block_job_cb Signed-off-by: Jeff Cody Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- blockdev.c | 6 +++--- trace-events | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/blockdev.c b/blockdev.c index 0267fa3..f910ac5 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1070,12 +1070,12 @@ static QObject *qobject_from_block_job(BlockJob *job) job->speed); } -static void block_stream_cb(void *opaque, int ret) +static void block_job_cb(void *opaque, int ret) { BlockDriverState *bs = opaque; QObject *obj; - trace_block_stream_cb(bs, bs->job, ret); + trace_block_job_cb(bs, bs->job, ret); assert(bs->job); obj = qobject_from_block_job(bs->job); @@ -1117,7 +1117,7 @@ void qmp_block_stream(const char *device, bool has_base, } stream_start(bs, base_bs, base, has_speed ? speed : 0, - block_stream_cb, bs, &local_err); + block_job_cb, bs, &local_err); if (error_is_set(&local_err)) { error_propagate(errp, local_err); return; diff --git a/trace-events b/trace-events index dbc3007..29771a7 100644 --- a/trace-events +++ b/trace-events @@ -79,7 +79,7 @@ commit_start(void *bs, void *base, void *top, void *s, void *co, void *opaque) " # blockdev.c qmp_block_job_cancel(void *job) "job %p" -block_stream_cb(void *bs, void *job, int ret) "bs %p job %p ret %d" +block_job_cb(void *bs, void *job, int ret) "bs %p job %p ret %d" qmp_block_stream(void *bs, void *job) "bs %p job %p" # hw/virtio-blk.c -- cgit v1.1 From 79fac5680d3680c9fb43d14a8d4e39ced25530f8 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Thu, 27 Sep 2012 13:29:15 -0400 Subject: block: helper function, to find the base image of a chain This is a simple helper function, that will return the base image of a given image chain. Signed-off-by: Jeff Cody Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block.c | 16 ++++++++++++++++ block.h | 1 + 2 files changed, 17 insertions(+) diff --git a/block.c b/block.c index 54209a5..d7a6d14 100644 --- a/block.c +++ b/block.c @@ -3117,6 +3117,22 @@ int bdrv_get_backing_file_depth(BlockDriverState *bs) return 1 + bdrv_get_backing_file_depth(bs->backing_hd); } +BlockDriverState *bdrv_find_base(BlockDriverState *bs) +{ + BlockDriverState *curr_bs = NULL; + + if (!bs) { + return NULL; + } + + curr_bs = bs; + + while (curr_bs->backing_hd) { + curr_bs = curr_bs->backing_hd; + } + return curr_bs; +} + #define NB_SUFFIXES 4 char *get_human_readable_size(char *buf, int buf_size, int64_t size) diff --git a/block.h b/block.h index 8c9b424..e9249c4 100644 --- a/block.h +++ b/block.h @@ -207,6 +207,7 @@ int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, BlockDriverState *base); BlockDriverState *bdrv_find_overlay(BlockDriverState *active, BlockDriverState *bs); +BlockDriverState *bdrv_find_base(BlockDriverState *bs); typedef struct BdrvCheckResult { -- cgit v1.1 From ed61fc10e8c8d2d1287f7edae92e44f5c97c540d Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Thu, 27 Sep 2012 13:29:16 -0400 Subject: QAPI: add command for live block commit, 'block-commit' The command for live block commit is added, which has the following arguments: device: the block device to perform the commit on (mandatory) base: the base image to commit into; optional (if not specified, it is the underlying original image) top: the top image of the commit - all data from inside top down to base will be committed into base (mandatory for now; see note, below) speed: maximum speed, in bytes/sec Note: Eventually this command will support merging down the active layer, but that code is not yet complete. If the active layer is passed in as top, then an error will be returned. Once merging down the active layer is supported, the 'top' argument may become optional, and default to the active layer. The is done as a block job, so upon completion a BLOCK_JOB_COMPLETED will be emitted. Signed-off-by: Jeff Cody Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- QMP/qmp-events.txt | 6 ++++-- blockdev.c | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ qapi-schema.json | 34 ++++++++++++++++++++++++++++++++ qmp-commands.hx | 6 ++++++ 4 files changed, 102 insertions(+), 2 deletions(-) diff --git a/QMP/qmp-events.txt b/QMP/qmp-events.txt index 2878058..4491020 100644 --- a/QMP/qmp-events.txt +++ b/QMP/qmp-events.txt @@ -50,7 +50,8 @@ Emitted when a block job has been cancelled. Data: -- "type": Job type ("stream" for image streaming, json-string) +- "type": Job type (json-string; "stream" for image streaming + "commit" for block commit) - "device": Device name (json-string) - "len": Maximum progress value (json-int) - "offset": Current progress value (json-int) @@ -73,7 +74,8 @@ Emitted when a block job has completed. Data: -- "type": Job type ("stream" for image streaming, json-string) +- "type": Job type (json-string; "stream" for image streaming + "commit" for block commit) - "device": Device name (json-string) - "len": Maximum progress value (json-int) - "offset": Current progress value (json-int) diff --git a/blockdev.c b/blockdev.c index f910ac5..cea22e4 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1131,6 +1131,64 @@ void qmp_block_stream(const char *device, bool has_base, trace_qmp_block_stream(bs, bs->job); } +void qmp_block_commit(const char *device, + bool has_base, const char *base, const char *top, + bool has_speed, int64_t speed, + Error **errp) +{ + BlockDriverState *bs; + BlockDriverState *base_bs, *top_bs; + Error *local_err = NULL; + /* This will be part of the QMP command, if/when the + * BlockdevOnError change for blkmirror makes it in + */ + BlockErrorAction on_error = BLOCK_ERR_REPORT; + + /* drain all i/o before commits */ + bdrv_drain_all(); + + bs = bdrv_find(device); + if (!bs) { + error_set(errp, QERR_DEVICE_NOT_FOUND, device); + return; + } + if (base && has_base) { + base_bs = bdrv_find_backing_image(bs, base); + } else { + base_bs = bdrv_find_base(bs); + } + + if (base_bs == NULL) { + error_set(errp, QERR_BASE_NOT_FOUND, base ? base : "NULL"); + return; + } + + /* default top_bs is the active layer */ + top_bs = bs; + + if (top) { + if (strcmp(bs->filename, top) != 0) { + top_bs = bdrv_find_backing_image(bs, top); + } + } + + if (top_bs == NULL) { + error_setg(errp, "Top image file %s not found", top ? top : "NULL"); + return; + } + + commit_start(bs, base_bs, top_bs, speed, on_error, block_job_cb, bs, + &local_err); + if (local_err != NULL) { + error_propagate(errp, local_err); + return; + } + /* Grab a reference so hotplug does not delete the BlockDriverState from + * underneath us. + */ + drive_get_ref(drive_get_by_blockdev(bs)); +} + static BlockJob *find_block_job(const char *device) { BlockDriverState *bs; diff --git a/qapi-schema.json b/qapi-schema.json index 14e4419..5816545 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1468,6 +1468,40 @@ 'returns': 'str' } ## +# @block-commit +# +# Live commit of data from overlay image nodes into backing nodes - i.e., +# writes data between 'top' and 'base' into 'base'. +# +# @device: the name of the device +# +# @base: #optional The file name of the backing image to write data into. +# If not specified, this is the deepest backing image +# +# @top: The file name of the backing image within the image chain, +# which contains the topmost data to be committed down. +# Note, the active layer as 'top' is currently unsupported. +# +# If top == base, that is an error. +# +# +# @speed: #optional the maximum speed, in bytes per second +# +# Returns: Nothing on success +# If commit or stream is already active on this device, DeviceInUse +# If @device does not exist, DeviceNotFound +# If image commit is not supported by this device, NotSupported +# If @base or @top is invalid, a generic error is returned +# If @top is the active layer, or omitted, a generic error is returned +# If @speed is invalid, InvalidParameter +# +# Since: 1.3 +# +## +{ 'command': 'block-commit', + 'data': { 'device': 'str', '*base': 'str', 'top': 'str', + '*speed': 'int' } } + # @migrate_cancel # # Cancel the current executing migration process. diff --git a/qmp-commands.hx b/qmp-commands.hx index 6e21ddb..a55a3f5 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -792,6 +792,12 @@ EQMP }, { + .name = "block-commit", + .args_type = "device:B,base:s?,top:s,speed:o?", + .mhandler.cmd_new = qmp_marshal_input_block_commit, + }, + + { .name = "block-job-set-speed", .args_type = "device:B,speed:o", .mhandler.cmd_new = qmp_marshal_input_block_job_set_speed, -- cgit v1.1 From 747051cd97c384e70eec0ceb905f08e630b6a1c4 Mon Sep 17 00:00:00 2001 From: Jeff Cody Date: Thu, 27 Sep 2012 13:29:17 -0400 Subject: qemu-iotests: add initial tests for live block commit Derived from the streaming test cases (030), this adds the following 9 tests: 1. For the following image chain, commit [mid] into [backing], and use qemu-io to verify [backing] has its original data, as well as the data from [mid] [backing] <-- [mid] <-- [test] 2. Verifies that 'block-commit' with the 'speed' parameter sets the speed parameter, as reported by 'query-block-jobs' 3. Verifies that a bogus 'device' parameter to 'block-commit' results in error 4-9: Appropriate error values returned for the following argument errors: * top == base * top is nonexistent * base is nonexistent * top == active layer (this is currently not supported) * top and base arguments are reversed * top argument is omitted Signed-off-by: Jeff Cody Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- tests/qemu-iotests/040 | 178 +++++++++++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/040.out | 5 ++ tests/qemu-iotests/group | 1 + 3 files changed, 184 insertions(+) create mode 100755 tests/qemu-iotests/040 create mode 100644 tests/qemu-iotests/040.out diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040 new file mode 100755 index 0000000..258e7ea --- /dev/null +++ b/tests/qemu-iotests/040 @@ -0,0 +1,178 @@ +#!/usr/bin/env python +# +# Tests for image block commit. +# +# Copyright (C) 2012 IBM, Corp. +# Copyright (C) 2012 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# Test for live block commit +# Derived from Image Streaming Test 030 + +import time +import os +import iotests +from iotests import qemu_img, qemu_io +import struct + +backing_img = os.path.join(iotests.test_dir, 'backing.img') +mid_img = os.path.join(iotests.test_dir, 'mid.img') +test_img = os.path.join(iotests.test_dir, 'test.img') + +class ImageCommitTestCase(iotests.QMPTestCase): + '''Abstract base class for image commit test cases''' + + def assert_no_active_commit(self): + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return', []) + + def cancel_and_wait(self, drive='drive0'): + '''Cancel a block job and wait for it to finish''' + result = self.vm.qmp('block-job-cancel', device=drive) + self.assert_qmp(result, 'return', {}) + + cancelled = False + while not cancelled: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_CANCELLED': + self.assert_qmp(event, 'data/type', 'commit') + self.assert_qmp(event, 'data/device', drive) + cancelled = True + + self.assert_no_active_commit() + + def create_image(self, name, size): + file = open(name, 'w') + i = 0 + while i < size: + sector = struct.pack('>l504xl', i / 512, i / 512) + file.write(sector) + i = i + 512 + file.close() + + +class TestSingleDrive(ImageCommitTestCase): + image_len = 1 * 1024 * 1024 + test_len = 1 * 1024 * 256 + + def setUp(self): + self.create_image(backing_img, TestSingleDrive.image_len) + qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, mid_img) + qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % mid_img, test_img) + qemu_io('-c', 'write -P 0xab 0 524288', backing_img) + qemu_io('-c', 'write -P 0xef 524288 524288', mid_img) + self.vm = iotests.VM().add_drive(test_img) + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + os.remove(test_img) + os.remove(mid_img) + os.remove(backing_img) + + def test_commit(self): + self.assert_no_active_commit() + result = self.vm.qmp('block-commit', device='drive0', top='%s' % mid_img) + self.assert_qmp(result, 'return', {}) + + completed = False + while not completed: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_COMPLETED': + self.assert_qmp(event, 'data/type', 'commit') + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/offset', self.image_len) + self.assert_qmp(event, 'data/len', self.image_len) + completed = True + + self.assert_no_active_commit() + self.vm.shutdown() + + self.assertEqual(-1, qemu_io('-c', 'read -P 0xab 0 524288', backing_img).find("verification failed")) + self.assertEqual(-1, qemu_io('-c', 'read -P 0xef 524288 524288', backing_img).find("verification failed")) + + def test_device_not_found(self): + result = self.vm.qmp('block-commit', device='nonexistent', top='%s' % mid_img) + self.assert_qmp(result, 'error/class', 'DeviceNotFound') + + def test_top_same_base(self): + self.assert_no_active_commit() + result = self.vm.qmp('block-commit', device='drive0', top='%s' % backing_img, base='%s' % backing_img) + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', 'Invalid files for merge: top and base are the same') + + def test_top_invalid(self): + self.assert_no_active_commit() + result = self.vm.qmp('block-commit', device='drive0', top='badfile', base='%s' % backing_img) + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', 'Top image file badfile not found') + + def test_base_invalid(self): + self.assert_no_active_commit() + result = self.vm.qmp('block-commit', device='drive0', top='%s' % mid_img, base='badfile') + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', 'Base \'badfile\' not found') + + def test_top_is_active(self): + self.assert_no_active_commit() + result = self.vm.qmp('block-commit', device='drive0', top='%s' % test_img, base='%s' % backing_img) + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', 'Top image as the active layer is currently unsupported') + + def test_top_and_base_reversed(self): + self.assert_no_active_commit() + result = self.vm.qmp('block-commit', device='drive0', top='%s' % backing_img, base='%s' % mid_img) + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', 'Base (%(1)s) is not reachable from top (%(2)s)' % {"1" : mid_img, "2" : backing_img}) + + def test_top_omitted(self): + self.assert_no_active_commit() + result = self.vm.qmp('block-commit', device='drive0') + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', "Parameter 'top' is missing") + + +class TestSetSpeed(ImageCommitTestCase): + image_len = 80 * 1024 * 1024 # MB + + def setUp(self): + qemu_img('create', backing_img, str(TestSetSpeed.image_len)) + qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, mid_img) + qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % mid_img, test_img) + self.vm = iotests.VM().add_drive(test_img) + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + os.remove(test_img) + os.remove(mid_img) + os.remove(backing_img) + + def test_set_speed(self): + self.assert_no_active_commit() + + result = self.vm.qmp('block-commit', device='drive0', top=mid_img, speed=1024 * 1024) + self.assert_qmp(result, 'return', {}) + + # Ensure the speed we set was accepted + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/device', 'drive0') + self.assert_qmp(result, 'return[0]/speed', 1024 * 1024) + + self.cancel_and_wait() + + +if __name__ == '__main__': + iotests.main(supported_fmts=['qcow2', 'qed']) diff --git a/tests/qemu-iotests/040.out b/tests/qemu-iotests/040.out new file mode 100644 index 0000000..dae404e --- /dev/null +++ b/tests/qemu-iotests/040.out @@ -0,0 +1,5 @@ +......... +---------------------------------------------------------------------- +Ran 9 tests + +OK diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index ebb5ca4..4b54fa6 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -46,3 +46,4 @@ 037 rw auto backing 038 rw auto backing 039 rw auto +040 rw auto -- cgit v1.1 From 7ef1507045d35bd4d220cf3bfe0e80e7ac101d00 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 28 Sep 2012 17:22:44 +0200 Subject: qerror/block: introduce QERR_BLOCK_JOB_NOT_ACTIVE The DeviceNotActive text is not a particularly good match, add a separate text while keeping the same class. Signed-off-by: Paolo Bonzini Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- blockdev.c | 4 ++-- qerror.h | 3 +++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/blockdev.c b/blockdev.c index cea22e4..d824612 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1205,7 +1205,7 @@ void qmp_block_job_set_speed(const char *device, int64_t speed, Error **errp) BlockJob *job = find_block_job(device); if (!job) { - error_set(errp, QERR_DEVICE_NOT_ACTIVE, device); + error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device); return; } @@ -1217,7 +1217,7 @@ void qmp_block_job_cancel(const char *device, Error **errp) BlockJob *job = find_block_job(device); if (!job) { - error_set(errp, QERR_DEVICE_NOT_ACTIVE, device); + error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device); return; } diff --git a/qerror.h b/qerror.h index d0a76a4..485c773 100644 --- a/qerror.h +++ b/qerror.h @@ -48,6 +48,9 @@ void assert_no_error(Error *err); #define QERR_BASE_NOT_FOUND \ ERROR_CLASS_GENERIC_ERROR, "Base '%s' not found" +#define QERR_BLOCK_JOB_NOT_ACTIVE \ + ERROR_CLASS_DEVICE_NOT_ACTIVE, "No active block job on device '%s'" + #define QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED \ ERROR_CLASS_GENERIC_ERROR, "Block format '%s' used by device '%s' does not support feature '%s'" -- cgit v1.1 From 7e03a9342fff50f2a6a4086906fa66e6c6d4351d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 28 Sep 2012 17:22:46 +0200 Subject: block: fix documentation of block_job_cancel_sync Do this in a separate commit before we move the functions to blockjob.h. Signed-off-by: Paolo Bonzini Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block_int.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/block_int.h b/block_int.h index 56164a7..6b6b3ab 100644 --- a/block_int.h +++ b/block_int.h @@ -425,15 +425,13 @@ void block_job_cancel(BlockJob *job); bool block_job_is_cancelled(BlockJob *job); /** - * block_job_cancel: + * block_job_cancel_sync: * @job: The job to be canceled. * - * Asynchronously cancel the job and wait for it to reach a quiescent - * state. Note that the completion callback will still be called - * asynchronously, hence it is *not* valid to call #bdrv_delete - * immediately after #block_job_cancel_sync. Users of block jobs - * will usually protect the BlockDriverState objects with a reference - * count, should this be a concern. + * Synchronously cancel the job. The completion callback is called + * before the function returns. The job may actually complete + * instead of canceling itself; the circumstances under which this + * happens depend on the kind of job that is active. * * Returns the return value from the job if the job actually completed * during the call, or -ECANCELED if it was canceled. -- cgit v1.1 From 2f0c9fe64c6a2887047b7eab05cd85b2643234c8 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 28 Sep 2012 17:22:47 +0200 Subject: block: move job APIs to separate files Signed-off-by: Paolo Bonzini Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- Makefile.objs | 5 +- block.c | 128 +------------------------------------ block.h | 2 + block/Makefile.objs | 5 +- block/commit.c | 1 + block/stream.c | 1 + block_int.h | 151 -------------------------------------------- blockdev.c | 1 + blockjob.c | 163 +++++++++++++++++++++++++++++++++++++++++++++++ blockjob.h | 179 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 10 files changed, 354 insertions(+), 282 deletions(-) create mode 100644 blockjob.c create mode 100644 blockjob.h diff --git a/Makefile.objs b/Makefile.objs index 7c1c682..b1f3e22 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -42,7 +42,8 @@ coroutine-obj-$(CONFIG_WIN32) += coroutine-win32.o # block-obj-y is code used by both qemu system emulation and qemu-img block-obj-y = cutils.o iov.o cache-utils.o qemu-option.o module.o async.o -block-obj-y += nbd.o block.o aio.o aes.o qemu-config.o qemu-progress.o qemu-sockets.o uri.o +block-obj-y += nbd.o block.o blockjob.o aio.o aes.o qemu-config.o +block-obj-y += qemu-progress.o qemu-sockets.o uri.o block-obj-y += $(coroutine-obj-y) $(qobject-obj-y) $(version-obj-y) block-obj-$(CONFIG_POSIX) += posix-aio-compat.o block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o @@ -59,7 +60,7 @@ endif # suppress *all* target specific code in case of system emulation, i.e. a # single QEMU executable should support all CPUs and machines. -common-obj-y = $(block-obj-y) blockdev.o +common-obj-y = $(block-obj-y) blockdev.o block/ common-obj-y += net.o net/ common-obj-y += qom/ common-obj-y += readline.o console.o cursor.o diff --git a/block.c b/block.c index d7a6d14..8202f27 100644 --- a/block.c +++ b/block.c @@ -26,6 +26,7 @@ #include "trace.h" #include "monitor.h" #include "block_int.h" +#include "blockjob.h" #include "module.h" #include "qjson.h" #include "qemu-coroutine.h" @@ -4406,130 +4407,3 @@ out: return ret; } - -void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs, - int64_t speed, BlockDriverCompletionFunc *cb, - void *opaque, Error **errp) -{ - BlockJob *job; - - if (bs->job || bdrv_in_use(bs)) { - error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs)); - return NULL; - } - bdrv_set_in_use(bs, 1); - - job = g_malloc0(job_type->instance_size); - job->job_type = job_type; - job->bs = bs; - job->cb = cb; - job->opaque = opaque; - job->busy = true; - bs->job = job; - - /* Only set speed when necessary to avoid NotSupported error */ - if (speed != 0) { - Error *local_err = NULL; - - block_job_set_speed(job, speed, &local_err); - if (error_is_set(&local_err)) { - bs->job = NULL; - g_free(job); - bdrv_set_in_use(bs, 0); - error_propagate(errp, local_err); - return NULL; - } - } - return job; -} - -void block_job_complete(BlockJob *job, int ret) -{ - BlockDriverState *bs = job->bs; - - assert(bs->job == job); - job->cb(job->opaque, ret); - bs->job = NULL; - g_free(job); - bdrv_set_in_use(bs, 0); -} - -void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp) -{ - Error *local_err = NULL; - - if (!job->job_type->set_speed) { - error_set(errp, QERR_NOT_SUPPORTED); - return; - } - job->job_type->set_speed(job, speed, &local_err); - if (error_is_set(&local_err)) { - error_propagate(errp, local_err); - return; - } - - job->speed = speed; -} - -void block_job_cancel(BlockJob *job) -{ - job->cancelled = true; - if (job->co && !job->busy) { - qemu_coroutine_enter(job->co, NULL); - } -} - -bool block_job_is_cancelled(BlockJob *job) -{ - return job->cancelled; -} - -struct BlockCancelData { - BlockJob *job; - BlockDriverCompletionFunc *cb; - void *opaque; - bool cancelled; - int ret; -}; - -static void block_job_cancel_cb(void *opaque, int ret) -{ - struct BlockCancelData *data = opaque; - - data->cancelled = block_job_is_cancelled(data->job); - data->ret = ret; - data->cb(data->opaque, ret); -} - -int block_job_cancel_sync(BlockJob *job) -{ - struct BlockCancelData data; - BlockDriverState *bs = job->bs; - - assert(bs->job == job); - - /* Set up our own callback to store the result and chain to - * the original callback. - */ - data.job = job; - data.cb = job->cb; - data.opaque = job->opaque; - data.ret = -EINPROGRESS; - job->cb = block_job_cancel_cb; - job->opaque = &data; - block_job_cancel(job); - while (data.ret == -EINPROGRESS) { - qemu_aio_wait(); - } - return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret; -} - -void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns) -{ - /* Check cancellation *before* setting busy = false, too! */ - if (!block_job_is_cancelled(job)) { - job->busy = false; - co_sleep_ns(clock, ns); - job->busy = true; - } -} diff --git a/block.h b/block.h index e9249c4..bd002d5 100644 --- a/block.h +++ b/block.h @@ -6,9 +6,11 @@ #include "qemu-option.h" #include "qemu-coroutine.h" #include "qobject.h" +#include "qapi-types.h" /* block.c */ typedef struct BlockDriver BlockDriver; +typedef struct BlockJob BlockJob; typedef struct BlockDriverInfo { /* in bytes, 0 if irrelevant */ diff --git a/block/Makefile.objs b/block/Makefile.objs index 81fd43c..554f429 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -3,11 +3,12 @@ block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-c block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o block-obj-y += qed-check.o block-obj-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o -block-obj-y += stream.o -block-obj-y += commit.o block-obj-$(CONFIG_WIN32) += raw-win32.o block-obj-$(CONFIG_POSIX) += raw-posix.o block-obj-$(CONFIG_LIBISCSI) += iscsi.o block-obj-$(CONFIG_CURL) += curl.o block-obj-$(CONFIG_RBD) += rbd.o block-obj-$(CONFIG_GLUSTERFS) += gluster.o + +common-obj-y += stream.o +common-obj-y += commit.o diff --git a/block/commit.c b/block/commit.c index 624ec5f..cabb470 100644 --- a/block/commit.c +++ b/block/commit.c @@ -14,6 +14,7 @@ #include "trace.h" #include "block_int.h" +#include "blockjob.h" #include "qemu/ratelimit.h" enum { diff --git a/block/stream.c b/block/stream.c index c4f87dd..57e4be7 100644 --- a/block/stream.c +++ b/block/stream.c @@ -13,6 +13,7 @@ #include "trace.h" #include "block_int.h" +#include "blockjob.h" #include "qemu/ratelimit.h" enum { diff --git a/block_int.h b/block_int.h index 6b6b3ab..61dc73b 100644 --- a/block_int.h +++ b/block_int.h @@ -67,73 +67,6 @@ typedef struct BlockIOBaseValue { uint64_t ios[2]; } BlockIOBaseValue; -typedef struct BlockJob BlockJob; - -/** - * BlockJobType: - * - * A class type for block job objects. - */ -typedef struct BlockJobType { - /** Derived BlockJob struct size */ - size_t instance_size; - - /** String describing the operation, part of query-block-jobs QMP API */ - const char *job_type; - - /** Optional callback for job types that support setting a speed limit */ - void (*set_speed)(BlockJob *job, int64_t speed, Error **errp); -} BlockJobType; - -/** - * BlockJob: - * - * Long-running operation on a BlockDriverState. - */ -struct BlockJob { - /** The job type, including the job vtable. */ - const BlockJobType *job_type; - - /** The block device on which the job is operating. */ - BlockDriverState *bs; - - /** - * The coroutine that executes the job. If not NULL, it is - * reentered when busy is false and the job is cancelled. - */ - Coroutine *co; - - /** - * Set to true if the job should cancel itself. The flag must - * always be tested just before toggling the busy flag from false - * to true. After a job has been cancelled, it should only yield - * if #qemu_aio_wait will ("sooner or later") reenter the coroutine. - */ - bool cancelled; - - /** - * Set to false by the job while it is in a quiescent state, where - * no I/O is pending and the job has yielded on any condition - * that is not detected by #qemu_aio_wait, such as a timer. - */ - bool busy; - - /** Offset that is published by the query-block-jobs QMP API */ - int64_t offset; - - /** Length that is published by the query-block-jobs QMP API */ - int64_t len; - - /** Speed that was set with @block_job_set_speed. */ - int64_t speed; - - /** The completion function that will be called when the job completes. */ - BlockDriverCompletionFunc *cb; - - /** The opaque value that is passed to the completion function. */ - void *opaque; -}; - struct BlockDriver { const char *format_name; int instance_size; @@ -355,90 +288,6 @@ int is_windows_drive(const char *filename); #endif /** - * block_job_create: - * @job_type: The class object for the newly-created job. - * @bs: The block - * @speed: The maximum speed, in bytes per second, or 0 for unlimited. - * @cb: Completion function for the job. - * @opaque: Opaque pointer value passed to @cb. - * @errp: Error object. - * - * Create a new long-running block device job and return it. The job - * will call @cb asynchronously when the job completes. Note that - * @bs may have been closed at the time the @cb it is called. If - * this is the case, the job may be reported as either cancelled or - * completed. - * - * This function is not part of the public job interface; it should be - * called from a wrapper that is specific to the job type. - */ -void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs, - int64_t speed, BlockDriverCompletionFunc *cb, - void *opaque, Error **errp); - -/** - * block_job_sleep_ns: - * @job: The job that calls the function. - * @clock: The clock to sleep on. - * @ns: How many nanoseconds to stop for. - * - * Put the job to sleep (assuming that it wasn't canceled) for @ns - * nanoseconds. Canceling the job will interrupt the wait immediately. - */ -void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns); - -/** - * block_job_complete: - * @job: The job being completed. - * @ret: The status code. - * - * Call the completion function that was registered at creation time, and - * free @job. - */ -void block_job_complete(BlockJob *job, int ret); - -/** - * block_job_set_speed: - * @job: The job to set the speed for. - * @speed: The new value - * @errp: Error object. - * - * Set a rate-limiting parameter for the job; the actual meaning may - * vary depending on the job type. - */ -void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp); - -/** - * block_job_cancel: - * @job: The job to be canceled. - * - * Asynchronously cancel the specified job. - */ -void block_job_cancel(BlockJob *job); - -/** - * block_job_is_cancelled: - * @job: The job being queried. - * - * Returns whether the job is scheduled for cancellation. - */ -bool block_job_is_cancelled(BlockJob *job); - -/** - * block_job_cancel_sync: - * @job: The job to be canceled. - * - * Synchronously cancel the job. The completion callback is called - * before the function returns. The job may actually complete - * instead of canceling itself; the circumstances under which this - * happens depend on the kind of job that is active. - * - * Returns the return value from the job if the job actually completed - * during the call, or -ECANCELED if it was canceled. - */ -int block_job_cancel_sync(BlockJob *job); - -/** * stream_start: * @bs: Block device to operate on. * @base: Block device that will become the new base, or %NULL to diff --git a/blockdev.c b/blockdev.c index d824612..d3f91c0 100644 --- a/blockdev.c +++ b/blockdev.c @@ -9,6 +9,7 @@ #include "blockdev.h" #include "hw/block-common.h" +#include "blockjob.h" #include "monitor.h" #include "qerror.h" #include "qemu-option.h" diff --git a/blockjob.c b/blockjob.c new file mode 100644 index 0000000..9737a43 --- /dev/null +++ b/blockjob.c @@ -0,0 +1,163 @@ +/* + * QEMU System Emulator block driver + * + * Copyright (c) 2011 IBM Corp. + * Copyright (c) 2012 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "config-host.h" +#include "qemu-common.h" +#include "trace.h" +#include "monitor.h" +#include "block.h" +#include "blockjob.h" +#include "block_int.h" +#include "qjson.h" +#include "qemu-coroutine.h" +#include "qmp-commands.h" +#include "qemu-timer.h" + +void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs, + int64_t speed, BlockDriverCompletionFunc *cb, + void *opaque, Error **errp) +{ + BlockJob *job; + + if (bs->job || bdrv_in_use(bs)) { + error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs)); + return NULL; + } + bdrv_set_in_use(bs, 1); + + job = g_malloc0(job_type->instance_size); + job->job_type = job_type; + job->bs = bs; + job->cb = cb; + job->opaque = opaque; + job->busy = true; + bs->job = job; + + /* Only set speed when necessary to avoid NotSupported error */ + if (speed != 0) { + Error *local_err = NULL; + + block_job_set_speed(job, speed, &local_err); + if (error_is_set(&local_err)) { + bs->job = NULL; + g_free(job); + bdrv_set_in_use(bs, 0); + error_propagate(errp, local_err); + return NULL; + } + } + return job; +} + +void block_job_complete(BlockJob *job, int ret) +{ + BlockDriverState *bs = job->bs; + + assert(bs->job == job); + job->cb(job->opaque, ret); + bs->job = NULL; + g_free(job); + bdrv_set_in_use(bs, 0); +} + +void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp) +{ + Error *local_err = NULL; + + if (!job->job_type->set_speed) { + error_set(errp, QERR_NOT_SUPPORTED); + return; + } + job->job_type->set_speed(job, speed, &local_err); + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); + return; + } + + job->speed = speed; +} + +void block_job_cancel(BlockJob *job) +{ + job->cancelled = true; + if (job->co && !job->busy) { + qemu_coroutine_enter(job->co, NULL); + } +} + +bool block_job_is_cancelled(BlockJob *job) +{ + return job->cancelled; +} + +struct BlockCancelData { + BlockJob *job; + BlockDriverCompletionFunc *cb; + void *opaque; + bool cancelled; + int ret; +}; + +static void block_job_cancel_cb(void *opaque, int ret) +{ + struct BlockCancelData *data = opaque; + + data->cancelled = block_job_is_cancelled(data->job); + data->ret = ret; + data->cb(data->opaque, ret); +} + +int block_job_cancel_sync(BlockJob *job) +{ + struct BlockCancelData data; + BlockDriverState *bs = job->bs; + + assert(bs->job == job); + + /* Set up our own callback to store the result and chain to + * the original callback. + */ + data.job = job; + data.cb = job->cb; + data.opaque = job->opaque; + data.ret = -EINPROGRESS; + job->cb = block_job_cancel_cb; + job->opaque = &data; + block_job_cancel(job); + while (data.ret == -EINPROGRESS) { + qemu_aio_wait(); + } + return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret; +} + +void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns) +{ + /* Check cancellation *before* setting busy = false, too! */ + if (!block_job_is_cancelled(job)) { + job->busy = false; + co_sleep_ns(clock, ns); + job->busy = true; + } +} diff --git a/blockjob.h b/blockjob.h new file mode 100644 index 0000000..753f5bc --- /dev/null +++ b/blockjob.h @@ -0,0 +1,179 @@ +/* + * Declarations for long-running block device operations + * + * Copyright (c) 2011 IBM Corp. + * Copyright (c) 2012 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef BLOCKJOB_H +#define BLOCKJOB_H 1 + +#include "block.h" + +/** + * BlockJobType: + * + * A class type for block job objects. + */ +typedef struct BlockJobType { + /** Derived BlockJob struct size */ + size_t instance_size; + + /** String describing the operation, part of query-block-jobs QMP API */ + const char *job_type; + + /** Optional callback for job types that support setting a speed limit */ + void (*set_speed)(BlockJob *job, int64_t speed, Error **errp); +} BlockJobType; + +/** + * BlockJob: + * + * Long-running operation on a BlockDriverState. + */ +struct BlockJob { + /** The job type, including the job vtable. */ + const BlockJobType *job_type; + + /** The block device on which the job is operating. */ + BlockDriverState *bs; + + /** + * The coroutine that executes the job. If not NULL, it is + * reentered when busy is false and the job is cancelled. + */ + Coroutine *co; + + /** + * Set to true if the job should cancel itself. The flag must + * always be tested just before toggling the busy flag from false + * to true. After a job has been cancelled, it should only yield + * if #qemu_aio_wait will ("sooner or later") reenter the coroutine. + */ + bool cancelled; + + /** + * Set to false by the job while it is in a quiescent state, where + * no I/O is pending and the job has yielded on any condition + * that is not detected by #qemu_aio_wait, such as a timer. + */ + bool busy; + + /** Offset that is published by the query-block-jobs QMP API */ + int64_t offset; + + /** Length that is published by the query-block-jobs QMP API */ + int64_t len; + + /** Speed that was set with @block_job_set_speed. */ + int64_t speed; + + /** The completion function that will be called when the job completes. */ + BlockDriverCompletionFunc *cb; + + /** The opaque value that is passed to the completion function. */ + void *opaque; +}; + +/** + * block_job_create: + * @job_type: The class object for the newly-created job. + * @bs: The block + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @cb: Completion function for the job. + * @opaque: Opaque pointer value passed to @cb. + * @errp: Error object. + * + * Create a new long-running block device job and return it. The job + * will call @cb asynchronously when the job completes. Note that + * @bs may have been closed at the time the @cb it is called. If + * this is the case, the job may be reported as either cancelled or + * completed. + * + * This function is not part of the public job interface; it should be + * called from a wrapper that is specific to the job type. + */ +void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs, + int64_t speed, BlockDriverCompletionFunc *cb, + void *opaque, Error **errp); + +/** + * block_job_sleep_ns: + * @job: The job that calls the function. + * @clock: The clock to sleep on. + * @ns: How many nanoseconds to stop for. + * + * Put the job to sleep (assuming that it wasn't canceled) for @ns + * nanoseconds. Canceling the job will interrupt the wait immediately. + */ +void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns); + +/** + * block_job_complete: + * @job: The job being completed. + * @ret: The status code. + * + * Call the completion function that was registered at creation time, and + * free @job. + */ +void block_job_complete(BlockJob *job, int ret); + +/** + * block_job_set_speed: + * @job: The job to set the speed for. + * @speed: The new value + * @errp: Error object. + * + * Set a rate-limiting parameter for the job; the actual meaning may + * vary depending on the job type. + */ +void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp); + +/** + * block_job_cancel: + * @job: The job to be canceled. + * + * Asynchronously cancel the specified job. + */ +void block_job_cancel(BlockJob *job); + +/** + * block_job_is_cancelled: + * @job: The job being queried. + * + * Returns whether the job is scheduled for cancellation. + */ +bool block_job_is_cancelled(BlockJob *job); + +/** + * block_job_cancel_sync: + * @job: The job to be canceled. + * + * Synchronously cancel the job. The completion callback is called + * before the function returns. The job may actually complete + * instead of canceling itself; the circumstances under which this + * happens depend on the kind of job that is active. + * + * Returns the return value from the job if the job actually completed + * during the call, or -ECANCELED if it was canceled. + */ +int block_job_cancel_sync(BlockJob *job); + +#endif -- cgit v1.1 From 30e628b709fcd30db298878e435e3bc93919c48c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 28 Sep 2012 17:22:48 +0200 Subject: block: add block_job_query Extract it out of the implementation of info block-jobs. Signed-off-by: Paolo Bonzini Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- blockdev.c | 15 ++------------- blockjob.c | 11 +++++++++++ blockjob.h | 8 ++++++++ 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/blockdev.c b/blockdev.c index d3f91c0..9a98ce9 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1232,19 +1232,8 @@ static void do_qmp_query_block_jobs_one(void *opaque, BlockDriverState *bs) BlockJob *job = bs->job; if (job) { - BlockJobInfoList *elem; - BlockJobInfo *info = g_new(BlockJobInfo, 1); - *info = (BlockJobInfo){ - .type = g_strdup(job->job_type->job_type), - .device = g_strdup(bdrv_get_device_name(bs)), - .len = job->len, - .offset = job->offset, - .speed = job->speed, - }; - - elem = g_new0(BlockJobInfoList, 1); - elem->value = info; - + BlockJobInfoList *elem = g_new0(BlockJobInfoList, 1); + elem->value = block_job_query(bs->job); (*prev)->next = elem; *prev = elem; } diff --git a/blockjob.c b/blockjob.c index 9737a43..dea63f8 100644 --- a/blockjob.c +++ b/blockjob.c @@ -161,3 +161,14 @@ void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns) job->busy = true; } } + +BlockJobInfo *block_job_query(BlockJob *job) +{ + BlockJobInfo *info = g_new0(BlockJobInfo, 1); + info->type = g_strdup(job->job_type->job_type); + info->device = g_strdup(bdrv_get_device_name(job->bs)); + info->len = job->len; + info->offset = job->offset; + info->speed = job->speed; + return info; +} diff --git a/blockjob.h b/blockjob.h index 753f5bc..f3d8d58 100644 --- a/blockjob.h +++ b/blockjob.h @@ -163,6 +163,14 @@ void block_job_cancel(BlockJob *job); bool block_job_is_cancelled(BlockJob *job); /** + * block_job_query: + * @job: The job to get information about. + * + * Return information about a job. + */ +BlockJobInfo *block_job_query(BlockJob *job); + +/** * block_job_cancel_sync: * @job: The job to be canceled. * -- cgit v1.1 From 8d65883fff22e00d70f5880a26b7a1248c59a2d8 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 28 Sep 2012 17:22:49 +0200 Subject: qmp: add 'busy' member to BlockJobInfo Because pausing a job is asynchronous, we need to know whether it has completed. This is described by the "busy" field of BlockJob; copy it to BlockJobInfo. Signed-off-by: Paolo Bonzini Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- blockjob.c | 1 + qapi-schema.json | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/blockjob.c b/blockjob.c index dea63f8..64c9d2d 100644 --- a/blockjob.c +++ b/blockjob.c @@ -168,6 +168,7 @@ BlockJobInfo *block_job_query(BlockJob *job) info->type = g_strdup(job->job_type->job_type); info->device = g_strdup(bdrv_get_device_name(job->bs)); info->len = job->len; + info->busy = job->busy; info->offset = job->offset; info->speed = job->speed; return info; diff --git a/qapi-schema.json b/qapi-schema.json index 5816545..6fc6eda 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1098,6 +1098,9 @@ # # @len: the maximum progress value # +# @busy: false if the job is known to be in a quiescent state, with +# no pending I/O. Since 1.3. +# # @offset: the current progress value # # @speed: the rate limit, bytes per second @@ -1106,7 +1109,7 @@ ## { 'type': 'BlockJobInfo', 'data': {'type': 'str', 'device': 'str', 'len': 'int', - 'offset': 'int', 'speed': 'int'} } + 'offset': 'int', 'busy': 'bool', 'speed': 'int'} } ## # @query-block-jobs: -- cgit v1.1 From 8acc72a4d20910d522516dab31272fe66da8da28 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 28 Sep 2012 17:22:50 +0200 Subject: block: add support for job pause/resume Job pausing reuses the existing support for cancellable sleeps. A pause happens at the next sleeping point and lasts until the coroutine is re-entered explicitly. Cancellation was already doing a forced resume, so implement it explicitly in terms of resume. Paused jobs cannot be canceled without first resuming them. This ensures that I/O errors are never missed by management. Signed-off-by: Paolo Bonzini Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- blockdev.c | 4 ++++ blockjob.c | 35 ++++++++++++++++++++++++++++++----- blockjob.h | 31 +++++++++++++++++++++++++++++++ qapi-schema.json | 5 ++++- qerror.h | 3 +++ 5 files changed, 72 insertions(+), 6 deletions(-) diff --git a/blockdev.c b/blockdev.c index 9a98ce9..612dd71 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1221,6 +1221,10 @@ void qmp_block_job_cancel(const char *device, Error **errp) error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device); return; } + if (job->paused) { + error_set(errp, QERR_BLOCK_JOB_PAUSED, device); + return; + } trace_qmp_block_job_cancel(job); block_job_cancel(job); diff --git a/blockjob.c b/blockjob.c index 64c9d2d..8219f73 100644 --- a/blockjob.c +++ b/blockjob.c @@ -99,14 +99,30 @@ void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp) job->speed = speed; } -void block_job_cancel(BlockJob *job) +void block_job_pause(BlockJob *job) { - job->cancelled = true; + job->paused = true; +} + +bool block_job_is_paused(BlockJob *job) +{ + return job->paused; +} + +void block_job_resume(BlockJob *job) +{ + job->paused = false; if (job->co && !job->busy) { qemu_coroutine_enter(job->co, NULL); } } +void block_job_cancel(BlockJob *job) +{ + job->cancelled = true; + block_job_resume(job); +} + bool block_job_is_cancelled(BlockJob *job) { return job->cancelled; @@ -154,12 +170,20 @@ int block_job_cancel_sync(BlockJob *job) void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns) { + assert(job->busy); + /* Check cancellation *before* setting busy = false, too! */ - if (!block_job_is_cancelled(job)) { - job->busy = false; + if (block_job_is_cancelled(job)) { + return; + } + + job->busy = false; + if (block_job_is_paused(job)) { + qemu_coroutine_yield(); + } else { co_sleep_ns(clock, ns); - job->busy = true; } + job->busy = true; } BlockJobInfo *block_job_query(BlockJob *job) @@ -169,6 +193,7 @@ BlockJobInfo *block_job_query(BlockJob *job) info->device = g_strdup(bdrv_get_device_name(job->bs)); info->len = job->len; info->busy = job->busy; + info->paused = job->paused; info->offset = job->offset; info->speed = job->speed; return info; diff --git a/blockjob.h b/blockjob.h index f3d8d58..ece5afa 100644 --- a/blockjob.h +++ b/blockjob.h @@ -70,6 +70,12 @@ struct BlockJob { bool cancelled; /** + * Set to true if the job is either paused, or will pause itself + * as soon as possible (if busy == true). + */ + bool paused; + + /** * Set to false by the job while it is in a quiescent state, where * no I/O is pending and the job has yielded on any condition * that is not detected by #qemu_aio_wait, such as a timer. @@ -171,6 +177,31 @@ bool block_job_is_cancelled(BlockJob *job); BlockJobInfo *block_job_query(BlockJob *job); /** + * block_job_pause: + * @job: The job to be paused. + * + * Asynchronously pause the specified job. + */ +void block_job_pause(BlockJob *job); + +/** + * block_job_resume: + * @job: The job to be resumed. + * + * Resume the specified job. + */ +void block_job_resume(BlockJob *job); + +/** + * block_job_is_paused: + * @job: The job being queried. + * + * Returns whether the job is currently paused, or will pause + * as soon as it reaches a sleeping point. + */ +bool block_job_is_paused(BlockJob *job); + +/** * block_job_cancel_sync: * @job: The job to be canceled. * diff --git a/qapi-schema.json b/qapi-schema.json index 6fc6eda..86a6c7f 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1101,6 +1101,9 @@ # @busy: false if the job is known to be in a quiescent state, with # no pending I/O. Since 1.3. # +# @paused: whether the job is paused or, if @busy is true, will +# pause itself as soon as possible. Since 1.3. +# # @offset: the current progress value # # @speed: the rate limit, bytes per second @@ -1109,7 +1112,7 @@ ## { 'type': 'BlockJobInfo', 'data': {'type': 'str', 'device': 'str', 'len': 'int', - 'offset': 'int', 'busy': 'bool', 'speed': 'int'} } + 'offset': 'int', 'busy': 'bool', 'paused': 'bool', 'speed': 'int'} } ## # @query-block-jobs: diff --git a/qerror.h b/qerror.h index 485c773..c91708c 100644 --- a/qerror.h +++ b/qerror.h @@ -51,6 +51,9 @@ void assert_no_error(Error *err); #define QERR_BLOCK_JOB_NOT_ACTIVE \ ERROR_CLASS_DEVICE_NOT_ACTIVE, "No active block job on device '%s'" +#define QERR_BLOCK_JOB_PAUSED \ + ERROR_CLASS_GENERIC_ERROR, "The block job for device '%s' is currently paused" + #define QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED \ ERROR_CLASS_GENERIC_ERROR, "Block format '%s' used by device '%s' does not support feature '%s'" -- cgit v1.1 From 6e37fb811ac86739e5ed30dba3a8e4848bd21b56 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 28 Sep 2012 17:22:51 +0200 Subject: qmp: add block-job-pause and block-job-resume Add QMP commands matching the functionality. Paused jobs cannot be canceled without first resuming them. This ensures that I/O errors are never missed by management. However, an optional force argument can be specified to allow that. Signed-off-by: Paolo Bonzini Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- blockdev.c | 35 +++++++++++++++++++++++++++++++++-- hmp-commands.hx | 35 ++++++++++++++++++++++++++++++++--- hmp.c | 23 ++++++++++++++++++++++- hmp.h | 2 ++ qapi-schema.json | 46 +++++++++++++++++++++++++++++++++++++++++++++- qmp-commands.hx | 12 +++++++++++- trace-events | 2 ++ 7 files changed, 147 insertions(+), 8 deletions(-) diff --git a/blockdev.c b/blockdev.c index 612dd71..f097e57 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1213,15 +1213,20 @@ void qmp_block_job_set_speed(const char *device, int64_t speed, Error **errp) block_job_set_speed(job, speed, errp); } -void qmp_block_job_cancel(const char *device, Error **errp) +void qmp_block_job_cancel(const char *device, + bool has_force, bool force, Error **errp) { BlockJob *job = find_block_job(device); + if (!has_force) { + force = false; + } + if (!job) { error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device); return; } - if (job->paused) { + if (job->paused && !force) { error_set(errp, QERR_BLOCK_JOB_PAUSED, device); return; } @@ -1230,6 +1235,32 @@ void qmp_block_job_cancel(const char *device, Error **errp) block_job_cancel(job); } +void qmp_block_job_pause(const char *device, Error **errp) +{ + BlockJob *job = find_block_job(device); + + if (!job) { + error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device); + return; + } + + trace_qmp_block_job_pause(job); + block_job_pause(job); +} + +void qmp_block_job_resume(const char *device, Error **errp) +{ + BlockJob *job = find_block_job(device); + + if (!job) { + error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device); + return; + } + + trace_qmp_block_job_resume(job); + block_job_resume(job); +} + static void do_qmp_query_block_jobs_one(void *opaque, BlockDriverState *bs) { BlockJobInfoList **prev = opaque; diff --git a/hmp-commands.hx b/hmp-commands.hx index ed67e99..27d90a2 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -99,9 +99,10 @@ ETEXI { .name = "block_job_cancel", - .args_type = "device:B", - .params = "device", - .help = "stop an active background block operation", + .args_type = "force:-f,device:B", + .params = "[-f] device", + .help = "stop an active background block operation (use -f" + "\n\t\t\t if the operation is currently paused)", .mhandler.cmd = hmp_block_job_cancel, }, @@ -112,6 +113,34 @@ Stop an active block streaming operation. ETEXI { + .name = "block_job_pause", + .args_type = "device:B", + .params = "device", + .help = "pause an active background block operation", + .mhandler.cmd = hmp_block_job_pause, + }, + +STEXI +@item block_job_pause +@findex block_job_pause +Pause an active block streaming operation. +ETEXI + + { + .name = "block_job_resume", + .args_type = "device:B", + .params = "device", + .help = "resume a paused background block operation", + .mhandler.cmd = hmp_block_job_resume, + }, + +STEXI +@item block_job_resume +@findex block_job_resume +Resume a paused block streaming operation. +ETEXI + + { .name = "eject", .args_type = "force:-f,device:B", .params = "[-f] device", diff --git a/hmp.c b/hmp.c index ba6fbd3..55601f7 100644 --- a/hmp.c +++ b/hmp.c @@ -950,8 +950,29 @@ void hmp_block_job_cancel(Monitor *mon, const QDict *qdict) { Error *error = NULL; const char *device = qdict_get_str(qdict, "device"); + bool force = qdict_get_try_bool(qdict, "force", 0); - qmp_block_job_cancel(device, &error); + qmp_block_job_cancel(device, true, force, &error); + + hmp_handle_error(mon, &error); +} + +void hmp_block_job_pause(Monitor *mon, const QDict *qdict) +{ + Error *error = NULL; + const char *device = qdict_get_str(qdict, "device"); + + qmp_block_job_pause(device, &error); + + hmp_handle_error(mon, &error); +} + +void hmp_block_job_resume(Monitor *mon, const QDict *qdict) +{ + Error *error = NULL; + const char *device = qdict_get_str(qdict, "device"); + + qmp_block_job_resume(device, &error); hmp_handle_error(mon, &error); } diff --git a/hmp.h b/hmp.h index 48b9c59..71ea384 100644 --- a/hmp.h +++ b/hmp.h @@ -64,6 +64,8 @@ void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict); void hmp_block_stream(Monitor *mon, const QDict *qdict); void hmp_block_job_set_speed(Monitor *mon, const QDict *qdict); void hmp_block_job_cancel(Monitor *mon, const QDict *qdict); +void hmp_block_job_pause(Monitor *mon, const QDict *qdict); +void hmp_block_job_resume(Monitor *mon, const QDict *qdict); void hmp_migrate(Monitor *mon, const QDict *qdict); void hmp_device_del(Monitor *mon, const QDict *qdict); void hmp_dump_guest_memory(Monitor *mon, const QDict *qdict); diff --git a/qapi-schema.json b/qapi-schema.json index 86a6c7f..0f2b1a0 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1893,12 +1893,56 @@ # # @device: the device name # +# @force: #optional whether to allow cancellation of a paused job (default +# false). Since 1.3. +# # Returns: Nothing on success # If no background operation is active on this device, DeviceNotActive # # Since: 1.1 ## -{ 'command': 'block-job-cancel', 'data': { 'device': 'str' } } +{ 'command': 'block-job-cancel', 'data': { 'device': 'str', '*force': 'bool' } } + +## +# @block-job-pause: +# +# Pause an active background block operation. +# +# This command returns immediately after marking the active background block +# operation for pausing. It is an error to call this command if no +# operation is in progress. Pausing an already paused job has no cumulative +# effect; a single block-job-resume command will resume the job. +# +# The operation will pause as soon as possible. No event is emitted when +# the operation is actually paused. Cancelling a paused job automatically +# resumes it. +# +# @device: the device name +# +# Returns: Nothing on success +# If no background operation is active on this device, DeviceNotActive +# +# Since: 1.3 +## +{ 'command': 'block-job-pause', 'data': { 'device': 'str' } } + +## +# @block-job-resume: +# +# Resume an active background block operation. +# +# This command returns immediately after resuming a paused background block +# operation. It is an error to call this command if no operation is in +# progress. Resuming an already running job is not an error. +# +# @device: the device name +# +# Returns: Nothing on success +# If no background operation is active on this device, DeviceNotActive +# +# Since: 1.3 +## +{ 'command': 'block-job-resume', 'data': { 'device': 'str' } } ## # @ObjectTypeInfo: diff --git a/qmp-commands.hx b/qmp-commands.hx index a55a3f5..71d7c25 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -805,10 +805,20 @@ EQMP { .name = "block-job-cancel", - .args_type = "device:B", + .args_type = "device:B,force:b?", .mhandler.cmd_new = qmp_marshal_input_block_job_cancel, }, { + .name = "block-job-pause", + .args_type = "device:B", + .mhandler.cmd_new = qmp_marshal_input_block_job_pause, + }, + { + .name = "block-job-resume", + .args_type = "device:B", + .mhandler.cmd_new = qmp_marshal_input_block_job_resume, + }, + { .name = "transaction", .args_type = "actions:q", .mhandler.cmd_new = qmp_marshal_input_transaction, diff --git a/trace-events b/trace-events index 29771a7..42b66f1 100644 --- a/trace-events +++ b/trace-events @@ -79,6 +79,8 @@ commit_start(void *bs, void *base, void *top, void *s, void *co, void *opaque) " # blockdev.c qmp_block_job_cancel(void *job) "job %p" +qmp_block_job_pause(void *job) "job %p" +qmp_block_job_resume(void *job) "job %p" block_job_cb(void *bs, void *job, int ret) "bs %p job %p ret %d" qmp_block_stream(void *bs, void *job) "bs %p job %p" -- cgit v1.1 From 0c81734765c9af1705f8e531b9431d63ee8ffd3d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 28 Sep 2012 17:22:52 +0200 Subject: qemu-iotests: add test for pausing a streaming operation These check that a paused streaming job does not advance its offset. Sometimes the new test fails; the map is different between the source and the destination of the streaming because qemu-io does not always pack adjacent clusters that have the same allocated/unallocated state. However, this also happens with the existing test_stream testcase, and is better fixed in qemu-io. Signed-off-by: Paolo Bonzini Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- tests/qemu-iotests/030 | 40 ++++++++++++++++++++++++++++++++++++++-- tests/qemu-iotests/030.out | 4 ++-- tests/qemu-iotests/group | 2 +- 3 files changed, 41 insertions(+), 5 deletions(-) diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030 index 55b16f8..dfacdf1 100755 --- a/tests/qemu-iotests/030 +++ b/tests/qemu-iotests/030 @@ -18,6 +18,7 @@ # along with this program. If not, see . # +import time import os import iotests from iotests import qemu_img, qemu_io @@ -98,6 +99,43 @@ class TestSingleDrive(ImageStreamingTestCase): qemu_io('-c', 'map', test_img), 'image file map does not match backing file after streaming') + def test_stream_pause(self): + self.assert_no_active_streams() + + result = self.vm.qmp('block-stream', device='drive0') + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('block-job-pause', device='drive0') + self.assert_qmp(result, 'return', {}) + + time.sleep(1) + result = self.vm.qmp('query-block-jobs') + offset = self.dictpath(result, 'return[0]/offset') + + time.sleep(1) + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/offset', offset) + + result = self.vm.qmp('block-job-resume', device='drive0') + self.assert_qmp(result, 'return', {}) + + completed = False + while not completed: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_COMPLETED': + self.assert_qmp(event, 'data/type', 'stream') + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/offset', self.image_len) + self.assert_qmp(event, 'data/len', self.image_len) + completed = True + + self.assert_no_active_streams() + self.vm.shutdown() + + self.assertEqual(qemu_io('-c', 'map', backing_img), + qemu_io('-c', 'map', test_img), + 'image file map does not match backing file after streaming') + def test_stream_partial(self): self.assert_no_active_streams() @@ -173,8 +211,6 @@ class TestStreamStop(ImageStreamingTestCase): os.remove(backing_img) def test_stream_stop(self): - import time - self.assert_no_active_streams() result = self.vm.qmp('block-stream', device='drive0') diff --git a/tests/qemu-iotests/030.out b/tests/qemu-iotests/030.out index 2f7d390..594c16f 100644 --- a/tests/qemu-iotests/030.out +++ b/tests/qemu-iotests/030.out @@ -1,5 +1,5 @@ -....... +........ ---------------------------------------------------------------------- -Ran 7 tests +Ran 8 tests OK diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 4b54fa6..66d2ba9 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -36,7 +36,7 @@ 027 rw auto quick 028 rw backing auto 029 rw auto quick -030 rw auto +030 rw auto backing 031 rw auto quick 032 rw auto 033 rw auto -- cgit v1.1 From ff06f5f351c3b19d5cdcb8bcb9f9cc9a01cac066 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 28 Sep 2012 17:22:54 +0200 Subject: iostatus: rename BlockErrorAction, BlockQMPEventAction We want to remove knowledge of BLOCK_ERR_STOP_ENOSPC from drivers; drivers should only be told whether to stop/report/ignore the error. On the other hand, we want to keep using the nicer BlockErrorAction name in the drivers. So rename the enums, while leaving aside the names of the enum values for now. Signed-off-by: Paolo Bonzini Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block.c | 8 ++++---- block.h | 12 ++++++------ block_int.h | 2 +- hw/ide/core.c | 2 +- hw/scsi-disk.c | 2 +- hw/virtio-blk.c | 2 +- 6 files changed, 14 insertions(+), 14 deletions(-) diff --git a/block.c b/block.c index 8202f27..7b45082 100644 --- a/block.c +++ b/block.c @@ -1387,7 +1387,7 @@ void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops, } void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv, - BlockQMPEventAction action, int is_read) + BlockErrorAction action, int is_read) { QObject *data; const char *action_str; @@ -2474,14 +2474,14 @@ void bdrv_set_io_limits(BlockDriverState *bs, bs->io_limits_enabled = bdrv_io_limits_enabled(bs); } -void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error, - BlockErrorAction on_write_error) +void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error, + BlockdevOnError on_write_error) { bs->on_read_error = on_read_error; bs->on_write_error = on_write_error; } -BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read) +BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, int is_read) { return is_read ? bs->on_read_error : bs->on_write_error; } diff --git a/block.h b/block.h index bd002d5..038621f 100644 --- a/block.h +++ b/block.h @@ -93,11 +93,11 @@ typedef struct BlockDevOps { typedef enum { BLOCK_ERR_REPORT, BLOCK_ERR_IGNORE, BLOCK_ERR_STOP_ENOSPC, BLOCK_ERR_STOP_ANY -} BlockErrorAction; +} BlockdevOnError; typedef enum { BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP -} BlockQMPEventAction; +} BlockErrorAction; typedef QSIMPLEQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue; @@ -114,7 +114,7 @@ void bdrv_iostatus_disable(BlockDriverState *bs); bool bdrv_iostatus_is_enabled(const BlockDriverState *bs); void bdrv_iostatus_set_err(BlockDriverState *bs, int error); void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv, - BlockQMPEventAction action, int is_read); + BlockErrorAction action, int is_read); void bdrv_info_print(Monitor *mon, const QObject *data); void bdrv_info(Monitor *mon, QObject **ret_data); void bdrv_stats_print(Monitor *mon, const QObject *data); @@ -284,9 +284,9 @@ int bdrv_has_zero_init(BlockDriverState *bs); int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum); -void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error, - BlockErrorAction on_write_error); -BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read); +void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error, + BlockdevOnError on_write_error); +BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, int is_read); int bdrv_is_read_only(BlockDriverState *bs); int bdrv_is_sg(BlockDriverState *bs); int bdrv_enable_write_cache(BlockDriverState *bs); diff --git a/block_int.h b/block_int.h index 61dc73b..b98c770 100644 --- a/block_int.h +++ b/block_int.h @@ -262,7 +262,7 @@ struct BlockDriverState { /* NOTE: the following infos are only hints for real hardware drivers. They are not used by the block driver */ - BlockErrorAction on_read_error, on_write_error; + BlockdevOnError on_read_error, on_write_error; bool iostatus_enabled; BlockDeviceIoStatus iostatus; char device_name[32]; diff --git a/hw/ide/core.c b/hw/ide/core.c index d6fb69c..57b9fa4 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -557,7 +557,7 @@ void ide_dma_error(IDEState *s) static int ide_handle_rw_error(IDEState *s, int error, int op) { int is_read = (op & BM_STATUS_RETRY_READ); - BlockErrorAction action = bdrv_get_on_error(s->bs, is_read); + BlockdevOnError action = bdrv_get_on_error(s->bs, is_read); if (action == BLOCK_ERR_IGNORE) { bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_IGNORE, is_read); diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c index 95e9158..fef83a3 100644 --- a/hw/scsi-disk.c +++ b/hw/scsi-disk.c @@ -388,7 +388,7 @@ static int scsi_handle_rw_error(SCSIDiskReq *r, int error) { int is_read = (r->req.cmd.xfer == SCSI_XFER_FROM_DEV); SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - BlockErrorAction action = bdrv_get_on_error(s->qdev.conf.bs, is_read); + BlockdevOnError action = bdrv_get_on_error(s->qdev.conf.bs, is_read); if (action == BLOCK_ERR_IGNORE) { bdrv_emit_qmp_error_event(s->qdev.conf.bs, BDRV_ACTION_IGNORE, is_read); diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c index 6f6d172..01e537d 100644 --- a/hw/virtio-blk.c +++ b/hw/virtio-blk.c @@ -66,7 +66,7 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, int status) static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, int is_read) { - BlockErrorAction action = bdrv_get_on_error(req->dev->bs, is_read); + BlockdevOnError action = bdrv_get_on_error(req->dev->bs, is_read); VirtIOBlock *s = req->dev; if (action == BLOCK_ERR_IGNORE) { -- cgit v1.1 From 92aa5c6d77ac29574c1717bcf57827fa1e586f31 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 28 Sep 2012 17:22:55 +0200 Subject: iostatus: move BlockdevOnError declaration to QAPI This will let block-stream reuse the enum. Places that used the enums are renamed accordingly. Signed-off-by: Paolo Bonzini Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block.c | 6 +++--- block.h | 5 ----- block/commit.c | 14 +++++++------- block_int.h | 2 +- blockdev.c | 14 +++++++------- hw/fdc.c | 4 ++-- hw/ide/core.c | 6 +++--- hw/scsi-disk.c | 6 +++--- hw/scsi-generic.c | 4 ++-- hw/virtio-blk.c | 6 +++--- qapi-schema.json | 23 +++++++++++++++++++++++ 11 files changed, 54 insertions(+), 36 deletions(-) diff --git a/block.c b/block.c index 7b45082..1c3ebd7 100644 --- a/block.c +++ b/block.c @@ -4209,9 +4209,9 @@ void bdrv_iostatus_enable(BlockDriverState *bs) bool bdrv_iostatus_is_enabled(const BlockDriverState *bs) { return (bs->iostatus_enabled && - (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC || - bs->on_write_error == BLOCK_ERR_STOP_ANY || - bs->on_read_error == BLOCK_ERR_STOP_ANY)); + (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC || + bs->on_write_error == BLOCKDEV_ON_ERROR_STOP || + bs->on_read_error == BLOCKDEV_ON_ERROR_STOP)); } void bdrv_iostatus_disable(BlockDriverState *bs) diff --git a/block.h b/block.h index 038621f..ee81129 100644 --- a/block.h +++ b/block.h @@ -91,11 +91,6 @@ typedef struct BlockDevOps { #define BDRV_SECTOR_MASK ~(BDRV_SECTOR_SIZE - 1) typedef enum { - BLOCK_ERR_REPORT, BLOCK_ERR_IGNORE, BLOCK_ERR_STOP_ENOSPC, - BLOCK_ERR_STOP_ANY -} BlockdevOnError; - -typedef enum { BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP } BlockErrorAction; diff --git a/block/commit.c b/block/commit.c index cabb470..733c914 100644 --- a/block/commit.c +++ b/block/commit.c @@ -34,7 +34,7 @@ typedef struct CommitBlockJob { BlockDriverState *active; BlockDriverState *top; BlockDriverState *base; - BlockErrorAction on_error; + BlockdevOnError on_error; int base_flags; int orig_overlay_flags; } CommitBlockJob; @@ -126,9 +126,9 @@ wait: bytes_written += n * BDRV_SECTOR_SIZE; } if (ret < 0) { - if (s->on_error == BLOCK_ERR_STOP_ANY || - s->on_error == BLOCK_ERR_REPORT || - (s->on_error == BLOCK_ERR_STOP_ENOSPC && ret == -ENOSPC)) { + if (s->on_error == BLOCKDEV_ON_ERROR_STOP || + s->on_error == BLOCKDEV_ON_ERROR_REPORT|| + (s->on_error == BLOCKDEV_ON_ERROR_ENOSPC && ret == -ENOSPC)) { goto exit_free_buf; } else { n = 0; @@ -182,7 +182,7 @@ static BlockJobType commit_job_type = { void commit_start(BlockDriverState *bs, BlockDriverState *base, BlockDriverState *top, int64_t speed, - BlockErrorAction on_error, BlockDriverCompletionFunc *cb, + BlockdevOnError on_error, BlockDriverCompletionFunc *cb, void *opaque, Error **errp) { CommitBlockJob *s; @@ -192,8 +192,8 @@ void commit_start(BlockDriverState *bs, BlockDriverState *base, BlockDriverState *overlay_bs; Error *local_err = NULL; - if ((on_error == BLOCK_ERR_STOP_ANY || - on_error == BLOCK_ERR_STOP_ENOSPC) && + if ((on_error == BLOCKDEV_ON_ERROR_STOP || + on_error == BLOCKDEV_ON_ERROR_ENOSPC) && !bdrv_iostatus_is_enabled(bs)) { error_set(errp, QERR_INVALID_PARAMETER_COMBINATION); return; diff --git a/block_int.h b/block_int.h index b98c770..615aafc 100644 --- a/block_int.h +++ b/block_int.h @@ -323,7 +323,7 @@ void stream_start(BlockDriverState *bs, BlockDriverState *base, */ void commit_start(BlockDriverState *bs, BlockDriverState *base, BlockDriverState *top, int64_t speed, - BlockErrorAction on_error, BlockDriverCompletionFunc *cb, + BlockdevOnError on_error, BlockDriverCompletionFunc *cb, void *opaque, Error **errp); #endif /* BLOCK_INT_H */ diff --git a/blockdev.c b/blockdev.c index f097e57..6330715 100644 --- a/blockdev.c +++ b/blockdev.c @@ -241,13 +241,13 @@ static void drive_put_ref_bh_schedule(DriveInfo *dinfo) static int parse_block_error_action(const char *buf, int is_read) { if (!strcmp(buf, "ignore")) { - return BLOCK_ERR_IGNORE; + return BLOCKDEV_ON_ERROR_IGNORE; } else if (!is_read && !strcmp(buf, "enospc")) { - return BLOCK_ERR_STOP_ENOSPC; + return BLOCKDEV_ON_ERROR_ENOSPC; } else if (!strcmp(buf, "stop")) { - return BLOCK_ERR_STOP_ANY; + return BLOCKDEV_ON_ERROR_STOP; } else if (!strcmp(buf, "report")) { - return BLOCK_ERR_REPORT; + return BLOCKDEV_ON_ERROR_REPORT; } else { error_report("'%s' invalid %s error action", buf, is_read ? "read" : "write"); @@ -433,7 +433,7 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi) return NULL; } - on_write_error = BLOCK_ERR_STOP_ENOSPC; + on_write_error = BLOCKDEV_ON_ERROR_ENOSPC; if ((buf = qemu_opt_get(opts, "werror")) != NULL) { if (type != IF_IDE && type != IF_SCSI && type != IF_VIRTIO && type != IF_NONE) { error_report("werror is not supported by this bus type"); @@ -446,7 +446,7 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi) } } - on_read_error = BLOCK_ERR_REPORT; + on_read_error = BLOCKDEV_ON_ERROR_REPORT; if ((buf = qemu_opt_get(opts, "rerror")) != NULL) { if (type != IF_IDE && type != IF_VIRTIO && type != IF_SCSI && type != IF_NONE) { error_report("rerror is not supported by this bus type"); @@ -1143,7 +1143,7 @@ void qmp_block_commit(const char *device, /* This will be part of the QMP command, if/when the * BlockdevOnError change for blkmirror makes it in */ - BlockErrorAction on_error = BLOCK_ERR_REPORT; + BlockdevOnError on_error = BLOCKDEV_ON_ERROR_REPORT; /* drain all i/o before commits */ bdrv_drain_all(); diff --git a/hw/fdc.c b/hw/fdc.c index 08830c1..43b0f20 100644 --- a/hw/fdc.c +++ b/hw/fdc.c @@ -1994,11 +1994,11 @@ static int fdctrl_connect_drives(FDCtrl *fdctrl) drive->fdctrl = fdctrl; if (drive->bs) { - if (bdrv_get_on_error(drive->bs, 0) != BLOCK_ERR_STOP_ENOSPC) { + if (bdrv_get_on_error(drive->bs, 0) != BLOCKDEV_ON_ERROR_ENOSPC) { error_report("fdc doesn't support drive option werror"); return -1; } - if (bdrv_get_on_error(drive->bs, 1) != BLOCK_ERR_REPORT) { + if (bdrv_get_on_error(drive->bs, 1) != BLOCKDEV_ON_ERROR_REPORT) { error_report("fdc doesn't support drive option rerror"); return -1; } diff --git a/hw/ide/core.c b/hw/ide/core.c index 57b9fa4..2620e87 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -559,13 +559,13 @@ static int ide_handle_rw_error(IDEState *s, int error, int op) int is_read = (op & BM_STATUS_RETRY_READ); BlockdevOnError action = bdrv_get_on_error(s->bs, is_read); - if (action == BLOCK_ERR_IGNORE) { + if (action == BLOCKDEV_ON_ERROR_IGNORE) { bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_IGNORE, is_read); return 0; } - if ((error == ENOSPC && action == BLOCK_ERR_STOP_ENOSPC) - || action == BLOCK_ERR_STOP_ANY) { + if ((error == ENOSPC && action == BLOCKDEV_ON_ERROR_ENOSPC) + || action == BLOCKDEV_ON_ERROR_STOP) { s->bus->dma->ops->set_unit(s->bus->dma, s->unit); s->bus->error_status = op; bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_STOP, is_read); diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c index fef83a3..c295326 100644 --- a/hw/scsi-disk.c +++ b/hw/scsi-disk.c @@ -390,13 +390,13 @@ static int scsi_handle_rw_error(SCSIDiskReq *r, int error) SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); BlockdevOnError action = bdrv_get_on_error(s->qdev.conf.bs, is_read); - if (action == BLOCK_ERR_IGNORE) { + if (action == BLOCKDEV_ON_ERROR_IGNORE) { bdrv_emit_qmp_error_event(s->qdev.conf.bs, BDRV_ACTION_IGNORE, is_read); return 0; } - if ((error == ENOSPC && action == BLOCK_ERR_STOP_ENOSPC) - || action == BLOCK_ERR_STOP_ANY) { + if ((error == ENOSPC && action == BLOCKDEV_ON_ERROR_ENOSPC) + || action == BLOCKDEV_ON_ERROR_STOP) { bdrv_emit_qmp_error_event(s->qdev.conf.bs, BDRV_ACTION_STOP, is_read); vm_stop(RUN_STATE_IO_ERROR); diff --git a/hw/scsi-generic.c b/hw/scsi-generic.c index a5eb663..d904534 100644 --- a/hw/scsi-generic.c +++ b/hw/scsi-generic.c @@ -400,11 +400,11 @@ static int scsi_generic_initfn(SCSIDevice *s) return -1; } - if (bdrv_get_on_error(s->conf.bs, 0) != BLOCK_ERR_STOP_ENOSPC) { + if (bdrv_get_on_error(s->conf.bs, 0) != BLOCKDEV_ON_ERROR_ENOSPC) { error_report("Device doesn't support drive option werror"); return -1; } - if (bdrv_get_on_error(s->conf.bs, 1) != BLOCK_ERR_REPORT) { + if (bdrv_get_on_error(s->conf.bs, 1) != BLOCKDEV_ON_ERROR_REPORT) { error_report("Device doesn't support drive option rerror"); return -1; } diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c index 01e537d..f178fa8 100644 --- a/hw/virtio-blk.c +++ b/hw/virtio-blk.c @@ -69,13 +69,13 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, BlockdevOnError action = bdrv_get_on_error(req->dev->bs, is_read); VirtIOBlock *s = req->dev; - if (action == BLOCK_ERR_IGNORE) { + if (action == BLOCKDEV_ON_ERROR_IGNORE) { bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_IGNORE, is_read); return 0; } - if ((error == ENOSPC && action == BLOCK_ERR_STOP_ENOSPC) - || action == BLOCK_ERR_STOP_ANY) { + if ((error == ENOSPC && action == BLOCKDEV_ON_ERROR_ENOSPC) + || action == BLOCKDEV_ON_ERROR_STOP) { req->next = s->rq; s->rq = req; bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_STOP, is_read); diff --git a/qapi-schema.json b/qapi-schema.json index 0f2b1a0..a726413 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1088,6 +1088,29 @@ { 'command': 'query-pci', 'returns': ['PciInfo'] } ## +# @BlockdevOnError: +# +# An enumeration of possible behaviors for errors on I/O operations. +# The exact meaning depends on whether the I/O was initiated by a guest +# or by a block job +# +# @report: for guest operations, report the error to the guest; +# for jobs, cancel the job +# +# @ignore: ignore the error, only report a QMP event (BLOCK_IO_ERROR +# or BLOCK_JOB_ERROR) +# +# @enospc: same as @stop on ENOSPC, same as @report otherwise. +# +# @stop: for guest operations, stop the virtual machine; +# for jobs, pause the job +# +# Since: 1.3 +## +{ 'enum': 'BlockdevOnError', + 'data': ['report', 'ignore', 'enospc', 'stop'] } + +## # @BlockJobInfo: # # Information about a long-running block device operation. -- cgit v1.1 From 1ceee0d5cc841fc9ca8e72b81450b598ab307f14 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 28 Sep 2012 17:22:56 +0200 Subject: iostatus: change is_read to a bool Do this while we are touching this part of the code, before introducing more uses of "int is_read". Signed-off-by: Paolo Bonzini Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block.c | 4 ++-- block.h | 4 ++-- blockdev.c | 2 +- hw/ide/core.c | 2 +- hw/ide/pci.c | 4 ++-- hw/scsi-disk.c | 2 +- hw/virtio-blk.c | 4 ++-- 7 files changed, 11 insertions(+), 11 deletions(-) diff --git a/block.c b/block.c index 1c3ebd7..0bae046 100644 --- a/block.c +++ b/block.c @@ -1387,7 +1387,7 @@ void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops, } void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv, - BlockErrorAction action, int is_read) + BlockErrorAction action, bool is_read) { QObject *data; const char *action_str; @@ -2481,7 +2481,7 @@ void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error, bs->on_write_error = on_write_error; } -BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, int is_read) +BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read) { return is_read ? bs->on_read_error : bs->on_write_error; } diff --git a/block.h b/block.h index ee81129..47dd905 100644 --- a/block.h +++ b/block.h @@ -109,7 +109,7 @@ void bdrv_iostatus_disable(BlockDriverState *bs); bool bdrv_iostatus_is_enabled(const BlockDriverState *bs); void bdrv_iostatus_set_err(BlockDriverState *bs, int error); void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv, - BlockErrorAction action, int is_read); + BlockErrorAction action, bool is_read); void bdrv_info_print(Monitor *mon, const QObject *data); void bdrv_info(Monitor *mon, QObject **ret_data); void bdrv_stats_print(Monitor *mon, const QObject *data); @@ -281,7 +281,7 @@ int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error, BlockdevOnError on_write_error); -BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, int is_read); +BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read); int bdrv_is_read_only(BlockDriverState *bs); int bdrv_is_sg(BlockDriverState *bs); int bdrv_enable_write_cache(BlockDriverState *bs); diff --git a/blockdev.c b/blockdev.c index 6330715..d52a830 100644 --- a/blockdev.c +++ b/blockdev.c @@ -238,7 +238,7 @@ static void drive_put_ref_bh_schedule(DriveInfo *dinfo) qemu_bh_schedule(s->bh); } -static int parse_block_error_action(const char *buf, int is_read) +static int parse_block_error_action(const char *buf, bool is_read) { if (!strcmp(buf, "ignore")) { return BLOCKDEV_ON_ERROR_IGNORE; diff --git a/hw/ide/core.c b/hw/ide/core.c index 2620e87..c03db4a 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -556,7 +556,7 @@ void ide_dma_error(IDEState *s) static int ide_handle_rw_error(IDEState *s, int error, int op) { - int is_read = (op & BM_STATUS_RETRY_READ); + bool is_read = (op & BM_STATUS_RETRY_READ) != 0; BlockdevOnError action = bdrv_get_on_error(s->bs, is_read); if (action == BLOCKDEV_ON_ERROR_IGNORE) { diff --git a/hw/ide/pci.c b/hw/ide/pci.c index 88c0942..644533f 100644 --- a/hw/ide/pci.c +++ b/hw/ide/pci.c @@ -188,7 +188,7 @@ static void bmdma_restart_bh(void *opaque) { BMDMAState *bm = opaque; IDEBus *bus = bm->bus; - int is_read; + bool is_read; int error_status; qemu_bh_delete(bm->bh); @@ -198,7 +198,7 @@ static void bmdma_restart_bh(void *opaque) return; } - is_read = !!(bus->error_status & BM_STATUS_RETRY_READ); + is_read = (bus->error_status & BM_STATUS_RETRY_READ) != 0; /* The error status must be cleared before resubmitting the request: The * request may fail again, and this case can only be distinguished if the diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c index c295326..2dd99a9 100644 --- a/hw/scsi-disk.c +++ b/hw/scsi-disk.c @@ -386,7 +386,7 @@ static void scsi_read_data(SCSIRequest *req) */ static int scsi_handle_rw_error(SCSIDiskReq *r, int error) { - int is_read = (r->req.cmd.xfer == SCSI_XFER_FROM_DEV); + bool is_read = (r->req.cmd.xfer == SCSI_XFER_FROM_DEV); SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); BlockdevOnError action = bdrv_get_on_error(s->qdev.conf.bs, is_read); diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c index f178fa8..1ac2483 100644 --- a/hw/virtio-blk.c +++ b/hw/virtio-blk.c @@ -64,7 +64,7 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, int status) } static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, - int is_read) + bool is_read) { BlockdevOnError action = bdrv_get_on_error(req->dev->bs, is_read); VirtIOBlock *s = req->dev; @@ -98,7 +98,7 @@ static void virtio_blk_rw_complete(void *opaque, int ret) trace_virtio_blk_rw_complete(req, ret); if (ret) { - int is_read = !(ldl_p(&req->out->type) & VIRTIO_BLK_T_OUT); + bool is_read = !(ldl_p(&req->out->type) & VIRTIO_BLK_T_OUT); if (virtio_blk_handle_rw_error(req, -ret, is_read)) return; } -- cgit v1.1 From 3e1caa5f76a9104a0d574b0f28b3dafe986a8408 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 28 Sep 2012 17:22:57 +0200 Subject: iostatus: reorganize io error code Move the common part of IDE/SCSI/virtio error handling to the block layer. The new function bdrv_error_action subsumes all three of bdrv_emit_qmp_error_event, vm_stop, bdrv_iostatus_set_err. The same scheme will be used for errors in block jobs. Signed-off-by: Paolo Bonzini Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block.c | 46 ++++++++++++++++++++++++++++++++++++++-------- block.h | 5 +++-- hw/ide/core.c | 20 +++++--------------- hw/scsi-disk.c | 23 +++++++---------------- hw/virtio-blk.c | 19 +++++-------------- qemu-tool.c | 6 ++++++ 6 files changed, 64 insertions(+), 55 deletions(-) diff --git a/block.c b/block.c index 0bae046..8b0ba67 100644 --- a/block.c +++ b/block.c @@ -29,6 +29,7 @@ #include "blockjob.h" #include "module.h" #include "qjson.h" +#include "sysemu.h" #include "qemu-coroutine.h" #include "qmp-commands.h" #include "qemu-timer.h" @@ -1386,8 +1387,8 @@ void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops, } } -void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv, - BlockErrorAction action, bool is_read) +static void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv, + BlockErrorAction action, bool is_read) { QObject *data; const char *action_str; @@ -2486,6 +2487,39 @@ BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read) return is_read ? bs->on_read_error : bs->on_write_error; } +BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error) +{ + BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error; + + switch (on_err) { + case BLOCKDEV_ON_ERROR_ENOSPC: + return (error == ENOSPC) ? BDRV_ACTION_STOP : BDRV_ACTION_REPORT; + case BLOCKDEV_ON_ERROR_STOP: + return BDRV_ACTION_STOP; + case BLOCKDEV_ON_ERROR_REPORT: + return BDRV_ACTION_REPORT; + case BLOCKDEV_ON_ERROR_IGNORE: + return BDRV_ACTION_IGNORE; + default: + abort(); + } +} + +/* This is done by device models because, while the block layer knows + * about the error, it does not know whether an operation comes from + * the device or the block layer (from a job, for example). + */ +void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action, + bool is_read, int error) +{ + assert(error >= 0); + bdrv_emit_qmp_error_event(bs, action, is_read); + if (action == BDRV_ACTION_STOP) { + vm_stop(RUN_STATE_IO_ERROR); + bdrv_iostatus_set_err(bs, error); + } +} + int bdrv_is_read_only(BlockDriverState *bs) { return bs->read_only; @@ -4226,14 +4260,10 @@ void bdrv_iostatus_reset(BlockDriverState *bs) } } -/* XXX: Today this is set by device models because it makes the implementation - quite simple. However, the block layer knows about the error, so it's - possible to implement this without device models being involved */ void bdrv_iostatus_set_err(BlockDriverState *bs, int error) { - if (bdrv_iostatus_is_enabled(bs) && - bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { - assert(error >= 0); + assert(bdrv_iostatus_is_enabled(bs)); + if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE : BLOCK_DEVICE_IO_STATUS_FAILED; } diff --git a/block.h b/block.h index 47dd905..e2d89d7 100644 --- a/block.h +++ b/block.h @@ -108,8 +108,6 @@ void bdrv_iostatus_reset(BlockDriverState *bs); void bdrv_iostatus_disable(BlockDriverState *bs); bool bdrv_iostatus_is_enabled(const BlockDriverState *bs); void bdrv_iostatus_set_err(BlockDriverState *bs, int error); -void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv, - BlockErrorAction action, bool is_read); void bdrv_info_print(Monitor *mon, const QObject *data); void bdrv_info(Monitor *mon, QObject **ret_data); void bdrv_stats_print(Monitor *mon, const QObject *data); @@ -282,6 +280,9 @@ int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error, BlockdevOnError on_write_error); BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read); +BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error); +void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action, + bool is_read, int error); int bdrv_is_read_only(BlockDriverState *bs); int bdrv_is_sg(BlockDriverState *bs); int bdrv_enable_write_cache(BlockDriverState *bs); diff --git a/hw/ide/core.c b/hw/ide/core.c index c03db4a..d683a8c 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -557,31 +557,21 @@ void ide_dma_error(IDEState *s) static int ide_handle_rw_error(IDEState *s, int error, int op) { bool is_read = (op & BM_STATUS_RETRY_READ) != 0; - BlockdevOnError action = bdrv_get_on_error(s->bs, is_read); + BlockErrorAction action = bdrv_get_error_action(s->bs, is_read, error); - if (action == BLOCKDEV_ON_ERROR_IGNORE) { - bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_IGNORE, is_read); - return 0; - } - - if ((error == ENOSPC && action == BLOCKDEV_ON_ERROR_ENOSPC) - || action == BLOCKDEV_ON_ERROR_STOP) { + if (action == BDRV_ACTION_STOP) { s->bus->dma->ops->set_unit(s->bus->dma, s->unit); s->bus->error_status = op; - bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_STOP, is_read); - vm_stop(RUN_STATE_IO_ERROR); - bdrv_iostatus_set_err(s->bs, error); - } else { + } else if (action == BDRV_ACTION_REPORT) { if (op & BM_STATUS_DMA_RETRY) { dma_buf_commit(s); ide_dma_error(s); } else { ide_rw_error(s); } - bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_REPORT, is_read); } - - return 1; + bdrv_error_action(s->bs, action, is_read, error); + return action != BDRV_ACTION_IGNORE; } void ide_dma_cb(void *opaque, int ret) diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c index 2dd99a9..99bb02e 100644 --- a/hw/scsi-disk.c +++ b/hw/scsi-disk.c @@ -388,21 +388,9 @@ static int scsi_handle_rw_error(SCSIDiskReq *r, int error) { bool is_read = (r->req.cmd.xfer == SCSI_XFER_FROM_DEV); SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - BlockdevOnError action = bdrv_get_on_error(s->qdev.conf.bs, is_read); + BlockErrorAction action = bdrv_get_error_action(s->qdev.conf.bs, is_read, error); - if (action == BLOCKDEV_ON_ERROR_IGNORE) { - bdrv_emit_qmp_error_event(s->qdev.conf.bs, BDRV_ACTION_IGNORE, is_read); - return 0; - } - - if ((error == ENOSPC && action == BLOCKDEV_ON_ERROR_ENOSPC) - || action == BLOCKDEV_ON_ERROR_STOP) { - - bdrv_emit_qmp_error_event(s->qdev.conf.bs, BDRV_ACTION_STOP, is_read); - vm_stop(RUN_STATE_IO_ERROR); - bdrv_iostatus_set_err(s->qdev.conf.bs, error); - scsi_req_retry(&r->req); - } else { + if (action == BDRV_ACTION_REPORT) { switch (error) { case ENOMEDIUM: scsi_check_condition(r, SENSE_CODE(NO_MEDIUM)); @@ -417,9 +405,12 @@ static int scsi_handle_rw_error(SCSIDiskReq *r, int error) scsi_check_condition(r, SENSE_CODE(IO_ERROR)); break; } - bdrv_emit_qmp_error_event(s->qdev.conf.bs, BDRV_ACTION_REPORT, is_read); } - return 1; + bdrv_error_action(s->qdev.conf.bs, action, is_read, error); + if (action == BDRV_ACTION_STOP) { + scsi_req_retry(&r->req); + } + return action != BDRV_ACTION_IGNORE; } static void scsi_write_complete(void * opaque, int ret) diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c index 1ac2483..e25cc96 100644 --- a/hw/virtio-blk.c +++ b/hw/virtio-blk.c @@ -66,29 +66,20 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, int status) static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, bool is_read) { - BlockdevOnError action = bdrv_get_on_error(req->dev->bs, is_read); + BlockErrorAction action = bdrv_get_error_action(req->dev->bs, is_read, error); VirtIOBlock *s = req->dev; - if (action == BLOCKDEV_ON_ERROR_IGNORE) { - bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_IGNORE, is_read); - return 0; - } - - if ((error == ENOSPC && action == BLOCKDEV_ON_ERROR_ENOSPC) - || action == BLOCKDEV_ON_ERROR_STOP) { + if (action == BDRV_ACTION_STOP) { req->next = s->rq; s->rq = req; - bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_STOP, is_read); - vm_stop(RUN_STATE_IO_ERROR); - bdrv_iostatus_set_err(s->bs, error); - } else { + } else if (action == BDRV_ACTION_REPORT) { virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); bdrv_acct_done(s->bs, &req->acct); g_free(req); - bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_REPORT, is_read); } - return 1; + bdrv_error_action(s->bs, action, is_read, error); + return action != BDRV_ACTION_IGNORE; } static void virtio_blk_rw_complete(void *opaque, int ret) diff --git a/qemu-tool.c b/qemu-tool.c index 18205ba..f2f9813 100644 --- a/qemu-tool.c +++ b/qemu-tool.c @@ -19,6 +19,7 @@ #include "qemu-log.h" #include "migration.h" #include "main-loop.h" +#include "sysemu.h" #include "qemu_socket.h" #include "slirp/libslirp.h" @@ -37,6 +38,11 @@ const char *qemu_get_vm_name(void) Monitor *cur_mon; +void vm_stop(RunState state) +{ + abort(); +} + int monitor_cur_is_qmp(void) { return 0; -- cgit v1.1 From 32c81a4a6ecc3f50efc9c270a269e4d3d8a9fbd5 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 28 Sep 2012 17:22:58 +0200 Subject: block: introduce block job error The following behaviors are possible: 'report': The behavior is the same as in 1.1. An I/O error, respectively during a read or a write, will complete the job immediately with an error code. 'ignore': An I/O error, respectively during a read or a write, will be ignored. For streaming, the job will complete with an error and the backing file will be left in place. For mirroring, the sector will be marked again as dirty and re-examined later. 'stop': The job will be paused and the job iostatus will be set to failed or nospace, while the VM will keep running. This can only be specified if the block device has rerror=stop and werror=stop or enospc. 'enospc': Behaves as 'stop' for ENOSPC errors, 'report' for others. In all cases, even for 'report', the I/O error is reported as a QMP event BLOCK_JOB_ERROR, with the same arguments as BLOCK_IO_ERROR. It is possible that while stopping the VM a BLOCK_IO_ERROR event will be reported and will clobber the event from BLOCK_JOB_ERROR, or vice versa. This is not really avoidable since stopping the VM completes all pending I/O requests. In fact, it is already possible now that a series of BLOCK_IO_ERROR events are reported with rerror=stop, because vm_stop calls bdrv_drain_all and this can generate further errors. Signed-off-by: Paolo Bonzini Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- QMP/qmp-events.txt | 22 +++++++++++++++++++ block.c | 9 ++++---- block_int.h | 4 ++++ blockjob.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++------ blockjob.h | 25 ++++++++++++++++++++++ monitor.c | 1 + monitor.h | 1 + qapi-schema.json | 7 +++++- 8 files changed, 120 insertions(+), 12 deletions(-) diff --git a/QMP/qmp-events.txt b/QMP/qmp-events.txt index 4491020..987c575 100644 --- a/QMP/qmp-events.txt +++ b/QMP/qmp-events.txt @@ -96,6 +96,28 @@ Example: "speed": 0 }, "timestamp": { "seconds": 1267061043, "microseconds": 959568 } } +BLOCK_JOB_ERROR +--------------- + +Emitted when a block job encounters an error. + +Data: + +- "device": device name (json-string) +- "operation": I/O operation (json-string, "read" or "write") +- "action": action that has been taken, it's one of the following (json-string): + "ignore": error has been ignored, the job may fail later + "report": error will be reported and the job canceled + "stop": error caused job to be paused + +Example: + +{ "event": "BLOCK_JOB_ERROR", + "data": { "device": "ide0-hd1", + "operation": "write", + "action": "stop" }, + "timestamp": { "seconds": 1265044230, "microseconds": 450486 } } + DEVICE_TRAY_MOVED ----------------- diff --git a/block.c b/block.c index 8b0ba67..c108a76 100644 --- a/block.c +++ b/block.c @@ -1387,8 +1387,9 @@ void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops, } } -static void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv, - BlockErrorAction action, bool is_read) +void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv, + enum MonitorEvent ev, + BlockErrorAction action, bool is_read) { QObject *data; const char *action_str; @@ -1411,7 +1412,7 @@ static void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv, bdrv->device_name, action_str, is_read ? "read" : "write"); - monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data); + monitor_protocol_event(ev, data); qobject_decref(data); } @@ -2513,7 +2514,7 @@ void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action, bool is_read, int error) { assert(error >= 0); - bdrv_emit_qmp_error_event(bs, action, is_read); + bdrv_emit_qmp_error_event(bs, QEVENT_BLOCK_IO_ERROR, action, is_read); if (action == BDRV_ACTION_STOP) { vm_stop(RUN_STATE_IO_ERROR); bdrv_iostatus_set_err(bs, error); diff --git a/block_int.h b/block_int.h index 615aafc..785d43d 100644 --- a/block_int.h +++ b/block_int.h @@ -31,6 +31,7 @@ #include "qemu-timer.h" #include "qapi-types.h" #include "qerror.h" +#include "monitor.h" #define BLOCK_FLAG_ENCRYPT 1 #define BLOCK_FLAG_COMPAT6 4 @@ -286,6 +287,9 @@ void bdrv_set_io_limits(BlockDriverState *bs, #ifdef _WIN32 int is_windows_drive(const char *filename); #endif +void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv, + enum MonitorEvent ev, + BlockErrorAction action, bool is_read); /** * stream_start: diff --git a/blockjob.c b/blockjob.c index 8219f73..f55f55a 100644 --- a/blockjob.c +++ b/blockjob.c @@ -112,6 +112,7 @@ bool block_job_is_paused(BlockJob *job) void block_job_resume(BlockJob *job) { job->paused = false; + block_job_iostatus_reset(job); if (job->co && !job->busy) { qemu_coroutine_enter(job->co, NULL); } @@ -128,6 +129,11 @@ bool block_job_is_cancelled(BlockJob *job) return job->cancelled; } +void block_job_iostatus_reset(BlockJob *job) +{ + job->iostatus = BLOCK_DEVICE_IO_STATUS_OK; +} + struct BlockCancelData { BlockJob *job; BlockDriverCompletionFunc *cb; @@ -189,12 +195,55 @@ void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns) BlockJobInfo *block_job_query(BlockJob *job) { BlockJobInfo *info = g_new0(BlockJobInfo, 1); - info->type = g_strdup(job->job_type->job_type); - info->device = g_strdup(bdrv_get_device_name(job->bs)); - info->len = job->len; - info->busy = job->busy; - info->paused = job->paused; - info->offset = job->offset; - info->speed = job->speed; + info->type = g_strdup(job->job_type->job_type); + info->device = g_strdup(bdrv_get_device_name(job->bs)); + info->len = job->len; + info->busy = job->busy; + info->paused = job->paused; + info->offset = job->offset; + info->speed = job->speed; + info->io_status = job->iostatus; return info; } + +static void block_job_iostatus_set_err(BlockJob *job, int error) +{ + if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { + job->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE : + BLOCK_DEVICE_IO_STATUS_FAILED; + } +} + + +BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs, + BlockdevOnError on_err, + int is_read, int error) +{ + BlockErrorAction action; + + switch (on_err) { + case BLOCKDEV_ON_ERROR_ENOSPC: + action = (error == ENOSPC) ? BDRV_ACTION_STOP : BDRV_ACTION_REPORT; + break; + case BLOCKDEV_ON_ERROR_STOP: + action = BDRV_ACTION_STOP; + break; + case BLOCKDEV_ON_ERROR_REPORT: + action = BDRV_ACTION_REPORT; + break; + case BLOCKDEV_ON_ERROR_IGNORE: + action = BDRV_ACTION_IGNORE; + break; + default: + abort(); + } + bdrv_emit_qmp_error_event(job->bs, QEVENT_BLOCK_JOB_ERROR, action, is_read); + if (action == BDRV_ACTION_STOP) { + block_job_pause(job); + block_job_iostatus_set_err(job, error); + if (bs != job->bs) { + bdrv_iostatus_set_err(bs, error); + } + } + return action; +} diff --git a/blockjob.h b/blockjob.h index ece5afa..930cc3c 100644 --- a/blockjob.h +++ b/blockjob.h @@ -82,6 +82,9 @@ struct BlockJob { */ bool busy; + /** Status that is published by the query-block-jobs QMP API */ + BlockDeviceIoStatus iostatus; + /** Offset that is published by the query-block-jobs QMP API */ int64_t offset; @@ -215,4 +218,26 @@ bool block_job_is_paused(BlockJob *job); */ int block_job_cancel_sync(BlockJob *job); +/** + * block_job_iostatus_reset: + * @job: The job whose I/O status should be reset. + * + * Reset I/O status on @job. + */ +void block_job_iostatus_reset(BlockJob *job); + +/** + * block_job_error_action: + * @job: The job to signal an error for. + * @bs: The block device on which to set an I/O error. + * @on_err: The error action setting. + * @is_read: Whether the operation was a read. + * @error: The error that was reported. + * + * Report an I/O error for a block job and possibly stop the VM. Return the + * action that was selected based on @on_err and @error. + */ +BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs, + BlockdevOnError on_err, + int is_read, int error); #endif diff --git a/monitor.c b/monitor.c index 67064e2..d4bd5fe 100644 --- a/monitor.c +++ b/monitor.c @@ -450,6 +450,7 @@ static const char *monitor_event_names[] = { [QEVENT_SPICE_DISCONNECTED] = "SPICE_DISCONNECTED", [QEVENT_BLOCK_JOB_COMPLETED] = "BLOCK_JOB_COMPLETED", [QEVENT_BLOCK_JOB_CANCELLED] = "BLOCK_JOB_CANCELLED", + [QEVENT_BLOCK_JOB_ERROR] = "BLOCK_JOB_ERROR", [QEVENT_DEVICE_TRAY_MOVED] = "DEVICE_TRAY_MOVED", [QEVENT_SUSPEND] = "SUSPEND", [QEVENT_SUSPEND_DISK] = "SUSPEND_DISK", diff --git a/monitor.h b/monitor.h index 64c1561..43040af 100644 --- a/monitor.h +++ b/monitor.h @@ -38,6 +38,7 @@ typedef enum MonitorEvent { QEVENT_SPICE_DISCONNECTED, QEVENT_BLOCK_JOB_COMPLETED, QEVENT_BLOCK_JOB_CANCELLED, + QEVENT_BLOCK_JOB_ERROR, QEVENT_DEVICE_TRAY_MOVED, QEVENT_SUSPEND, QEVENT_SUSPEND_DISK, diff --git a/qapi-schema.json b/qapi-schema.json index a726413..14e7a0d 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1131,11 +1131,14 @@ # # @speed: the rate limit, bytes per second # +# @io-status: the status of the job (since 1.3) +# # Since: 1.1 ## { 'type': 'BlockJobInfo', 'data': {'type': 'str', 'device': 'str', 'len': 'int', - 'offset': 'int', 'busy': 'bool', 'paused': 'bool', 'speed': 'int'} } + 'offset': 'int', 'busy': 'bool', 'paused': 'bool', 'speed': 'int', + 'io-status': 'BlockDeviceIoStatus'} } ## # @query-block-jobs: @@ -1958,6 +1961,8 @@ # operation. It is an error to call this command if no operation is in # progress. Resuming an already running job is not an error. # +# This command also clears the error status of the job. +# # @device: the device name # # Returns: Nothing on success -- cgit v1.1 From 1d809098aa9518cda41c2cf6e660d3d602614907 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 28 Sep 2012 17:22:59 +0200 Subject: stream: add on-error argument This patch adds support for error management to streaming. Signed-off-by: Paolo Bonzini Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block/stream.c | 28 +++++++++++++++++++++++++++- block_int.h | 3 ++- blockdev.c | 11 ++++++++--- hmp.c | 3 ++- qapi-schema.json | 9 +++++++-- qmp-commands.hx | 2 +- 6 files changed, 47 insertions(+), 9 deletions(-) diff --git a/block/stream.c b/block/stream.c index 57e4be7..7926652 100644 --- a/block/stream.c +++ b/block/stream.c @@ -31,6 +31,7 @@ typedef struct StreamBlockJob { BlockJob common; RateLimit limit; BlockDriverState *base; + BlockdevOnError on_error; char backing_file_id[1024]; } StreamBlockJob; @@ -78,6 +79,7 @@ static void coroutine_fn stream_run(void *opaque) BlockDriverState *bs = s->common.bs; BlockDriverState *base = s->base; int64_t sector_num, end; + int error = 0; int ret = 0; int n = 0; void *buf; @@ -142,7 +144,19 @@ wait: ret = stream_populate(bs, sector_num, n, buf); } if (ret < 0) { - break; + BlockErrorAction action = + block_job_error_action(&s->common, s->common.bs, s->on_error, + true, -ret); + if (action == BDRV_ACTION_STOP) { + n = 0; + continue; + } + if (error == 0) { + error = ret; + } + if (action == BDRV_ACTION_REPORT) { + break; + } } ret = 0; @@ -154,6 +168,9 @@ wait: bdrv_disable_copy_on_read(bs); } + /* Do not remove the backing file if an error was there but ignored. */ + ret = error; + if (!block_job_is_cancelled(&s->common) && sector_num == end && ret == 0) { const char *base_id = NULL, *base_fmt = NULL; if (base) { @@ -189,11 +206,19 @@ static BlockJobType stream_job_type = { void stream_start(BlockDriverState *bs, BlockDriverState *base, const char *base_id, int64_t speed, + BlockdevOnError on_error, BlockDriverCompletionFunc *cb, void *opaque, Error **errp) { StreamBlockJob *s; + if ((on_error == BLOCKDEV_ON_ERROR_STOP || + on_error == BLOCKDEV_ON_ERROR_ENOSPC) && + !bdrv_iostatus_is_enabled(bs)) { + error_set(errp, QERR_INVALID_PARAMETER, "on-error"); + return; + } + s = block_job_create(&stream_job_type, bs, speed, cb, opaque, errp); if (!s) { return; @@ -204,6 +229,7 @@ void stream_start(BlockDriverState *bs, BlockDriverState *base, pstrcpy(s->backing_file_id, sizeof(s->backing_file_id), base_id); } + s->on_error = on_error; s->common.co = qemu_coroutine_create(stream_run); trace_stream_start(bs, base, s, s->common.co, opaque); qemu_coroutine_enter(s->common.co, s); diff --git a/block_int.h b/block_int.h index 785d43d..f4bae04 100644 --- a/block_int.h +++ b/block_int.h @@ -299,6 +299,7 @@ void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv, * @base_id: The file name that will be written to @bs as the new * backing file if the job completes. Ignored if @base is %NULL. * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @on_error: The action to take upon error. * @cb: Completion function for the job. * @opaque: Opaque pointer value passed to @cb. * @errp: Error object. @@ -310,7 +311,7 @@ void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv, * @base_id in the written image and to @base in the live BlockDriverState. */ void stream_start(BlockDriverState *bs, BlockDriverState *base, - const char *base_id, int64_t speed, + const char *base_id, int64_t speed, BlockdevOnError on_error, BlockDriverCompletionFunc *cb, void *opaque, Error **errp); diff --git a/blockdev.c b/blockdev.c index d52a830..5f18dfa 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1096,13 +1096,18 @@ static void block_job_cb(void *opaque, int ret) } void qmp_block_stream(const char *device, bool has_base, - const char *base, bool has_speed, - int64_t speed, Error **errp) + const char *base, bool has_speed, int64_t speed, + bool has_on_error, BlockdevOnError on_error, + Error **errp) { BlockDriverState *bs; BlockDriverState *base_bs = NULL; Error *local_err = NULL; + if (!has_on_error) { + on_error = BLOCKDEV_ON_ERROR_REPORT; + } + bs = bdrv_find(device); if (!bs) { error_set(errp, QERR_DEVICE_NOT_FOUND, device); @@ -1118,7 +1123,7 @@ void qmp_block_stream(const char *device, bool has_base, } stream_start(bs, base_bs, base, has_speed ? speed : 0, - block_job_cb, bs, &local_err); + on_error, block_job_cb, bs, &local_err); if (error_is_set(&local_err)) { error_propagate(errp, local_err); return; diff --git a/hmp.c b/hmp.c index 55601f7..df789b2 100644 --- a/hmp.c +++ b/hmp.c @@ -930,7 +930,8 @@ void hmp_block_stream(Monitor *mon, const QDict *qdict) int64_t speed = qdict_get_try_int(qdict, "speed", 0); qmp_block_stream(device, base != NULL, base, - qdict_haskey(qdict, "speed"), speed, &error); + qdict_haskey(qdict, "speed"), speed, + BLOCKDEV_ON_ERROR_REPORT, true, &error); hmp_handle_error(mon, &error); } diff --git a/qapi-schema.json b/qapi-schema.json index 14e7a0d..768b4c7 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1869,13 +1869,18 @@ # # @speed: #optional the maximum speed, in bytes per second # +# @on-error: #optional the action to take on an error (default report). +# 'stop' and 'enospc' can only be used if the block device +# supports io-status (see BlockInfo). Since 1.3. +# # Returns: Nothing on success # If @device does not exist, DeviceNotFound # # Since: 1.1 ## -{ 'command': 'block-stream', 'data': { 'device': 'str', '*base': 'str', - '*speed': 'int' } } +{ 'command': 'block-stream', + 'data': { 'device': 'str', '*base': 'str', '*speed': 'int', + '*on-error': 'BlockdevOnError' } } ## # @block-job-set-speed: diff --git a/qmp-commands.hx b/qmp-commands.hx index 71d7c25..ea93b1d 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -787,7 +787,7 @@ EQMP { .name = "block-stream", - .args_type = "device:B,base:s?,speed:o?", + .args_type = "device:B,base:s?,speed:o?,on-error:s?", .mhandler.cmd_new = qmp_marshal_input_block_stream, }, -- cgit v1.1 From 8f96b5be92fbd74798b97b1dc1ff5fbbe249ed11 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 28 Sep 2012 17:23:00 +0200 Subject: blkdebug: process all set_state rules in the old state Currently it is impossible to write a blkdebug script that ping-pongs between two states, because the second set-state rule will use the state that is set in the first. If you have [set-state] event = "..." state = "1" new_state = "2" [set-state] event = "..." state = "2" new_state = "1" for example the state will remain locked at 1. This can be fixed by first processing all rules, and then setting the state. Signed-off-by: Paolo Bonzini Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- block/blkdebug.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/block/blkdebug.c b/block/blkdebug.c index 59dcea0..1206d52 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -28,6 +28,7 @@ typedef struct BDRVBlkdebugState { int state; + int new_state; QLIST_HEAD(, BlkdebugRule) rules[BLKDBG_EVENT_MAX]; QSIMPLEQ_HEAD(, BlkdebugRule) active_rules; } BDRVBlkdebugState; @@ -403,12 +404,12 @@ static void blkdebug_close(BlockDriverState *bs) } static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule, - int old_state, bool injected) + bool injected) { BDRVBlkdebugState *s = bs->opaque; /* Only process rules for the current state */ - if (rule->state && rule->state != old_state) { + if (rule->state && rule->state != s->state) { return injected; } @@ -423,7 +424,7 @@ static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule, break; case ACTION_SET_STATE: - s->state = rule->options.set_state.new_state; + s->new_state = rule->options.set_state.new_state; break; } return injected; @@ -433,15 +434,16 @@ static void blkdebug_debug_event(BlockDriverState *bs, BlkDebugEvent event) { BDRVBlkdebugState *s = bs->opaque; struct BlkdebugRule *rule; - int old_state = s->state; bool injected; assert((int)event >= 0 && event < BLKDBG_EVENT_MAX); injected = false; + s->new_state = s->state; QLIST_FOREACH(rule, &s->rules[event], next) { - injected = process_rule(bs, rule, old_state, injected); + injected = process_rule(bs, rule, injected); } + s->state = s->new_state; } static int64_t blkdebug_getlength(BlockDriverState *bs) -- cgit v1.1 From 4f45056841abced5d57485edf0ff1d2ffc042cb1 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 28 Sep 2012 17:23:01 +0200 Subject: qemu-iotests: map underscore to dash in QMP argument names iotests.py provides a convenience function that uses Python keyword arguments to represent QMP command arguments. However, almost all QMP commands use dashes for argument names (the sole exception is block_set_io_throttle), and dashes are not allowed in a keyword argument name. Hence provide automatic conversion of underscores to dashes. Signed-off-by: Paolo Bonzini Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- tests/qemu-iotests/iotests.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index e05b1d6..a94ea75 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -19,6 +19,7 @@ import os import re import subprocess +import string import unittest import sys; sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'QMP')) import qmp @@ -96,9 +97,14 @@ class VM(object): os.remove(self._qemu_log_path) self._popen = None + underscore_to_dash = string.maketrans('_', '-') def qmp(self, cmd, **args): '''Invoke a QMP command and return the result dict''' - return self._qmp.cmd(cmd, args=args) + qmp_args = dict() + for k in args.keys(): + qmp_args[k.translate(self.underscore_to_dash)] = args[k] + + return self._qmp.cmd(cmd, args=qmp_args) def get_qmp_events(self, wait=False): '''Poll for queued QMP events and return a list of dicts''' -- cgit v1.1 From 90f0b71153c6a85d03967244b9889f892841d835 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 28 Sep 2012 17:23:02 +0200 Subject: qemu-iotests: add tests for streaming error handling Add a test for each of report/ignore/stop. The tests use blkdebug to generate an error in the middle of a script. The error is recoverable (once = "on") so that we can test resuming a job after stopping for an error. Signed-off-by: Paolo Bonzini Reviewed-by: Eric Blake Signed-off-by: Kevin Wolf --- tests/qemu-iotests/030 | 220 ++++++++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/030.out | 4 +- tests/qemu-iotests/iotests.py | 7 ++ 3 files changed, 229 insertions(+), 2 deletions(-) diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030 index dfacdf1..dd4ef11 100755 --- a/tests/qemu-iotests/030 +++ b/tests/qemu-iotests/030 @@ -195,6 +195,226 @@ class TestSmallerBackingFile(ImageStreamingTestCase): self.assert_no_active_streams() self.vm.shutdown() +class TestErrors(ImageStreamingTestCase): + image_len = 2 * 1024 * 1024 # MB + + # this should match STREAM_BUFFER_SIZE/512 in block/stream.c + STREAM_BUFFER_SIZE = 512 * 1024 + + def create_blkdebug_file(self, name, event, errno): + file = open(name, 'w') + file.write(''' +[inject-error] +state = "1" +event = "%s" +errno = "%d" +immediately = "off" +once = "on" +sector = "%d" + +[set-state] +state = "1" +event = "%s" +new_state = "2" + +[set-state] +state = "2" +event = "%s" +new_state = "1" +''' % (event, errno, self.STREAM_BUFFER_SIZE / 512, event, event)) + file.close() + +class TestEIO(TestErrors): + def setUp(self): + self.blkdebug_file = backing_img + ".blkdebug" + self.create_image(backing_img, TestErrors.image_len) + self.create_blkdebug_file(self.blkdebug_file, "read_aio", 5) + qemu_img('create', '-f', iotests.imgfmt, + '-o', 'backing_file=blkdebug:%s:%s,backing_fmt=raw' + % (self.blkdebug_file, backing_img), + test_img) + self.vm = iotests.VM().add_drive(test_img) + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + os.remove(test_img) + os.remove(backing_img) + os.remove(self.blkdebug_file) + + def test_report(self): + self.assert_no_active_streams() + + result = self.vm.qmp('block-stream', device='drive0') + self.assert_qmp(result, 'return', {}) + + completed = False + error = False + while not completed: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_ERROR': + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/operation', 'read') + error = True + elif event['event'] == 'BLOCK_JOB_COMPLETED': + self.assertTrue(error, 'job completed unexpectedly') + self.assert_qmp(event, 'data/type', 'stream') + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/error', 'Input/output error') + self.assert_qmp(event, 'data/offset', self.STREAM_BUFFER_SIZE) + self.assert_qmp(event, 'data/len', self.image_len) + completed = True + + self.assert_no_active_streams() + self.vm.shutdown() + + def test_ignore(self): + self.assert_no_active_streams() + + result = self.vm.qmp('block-stream', device='drive0', on_error='ignore') + self.assert_qmp(result, 'return', {}) + + error = False + completed = False + while not completed: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_ERROR': + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/operation', 'read') + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/paused', False) + error = True + elif event['event'] == 'BLOCK_JOB_COMPLETED': + self.assertTrue(error, 'job completed unexpectedly') + self.assert_qmp(event, 'data/type', 'stream') + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/error', 'Input/output error') + self.assert_qmp(event, 'data/offset', self.image_len) + self.assert_qmp(event, 'data/len', self.image_len) + completed = True + + self.assert_no_active_streams() + self.vm.shutdown() + + def test_stop(self): + self.assert_no_active_streams() + + result = self.vm.qmp('block-stream', device='drive0', on_error='stop') + self.assert_qmp(result, 'return', {}) + + error = False + completed = False + while not completed: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_ERROR': + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/operation', 'read') + + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/paused', True) + self.assert_qmp(result, 'return[0]/offset', self.STREAM_BUFFER_SIZE) + self.assert_qmp(result, 'return[0]/io-status', 'failed') + + result = self.vm.qmp('block-job-resume', device='drive0') + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/paused', False) + self.assert_qmp(result, 'return[0]/io-status', 'ok') + error = True + elif event['event'] == 'BLOCK_JOB_COMPLETED': + self.assertTrue(error, 'job completed unexpectedly') + self.assert_qmp(event, 'data/type', 'stream') + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp_absent(event, 'data/error') + self.assert_qmp(event, 'data/offset', self.image_len) + self.assert_qmp(event, 'data/len', self.image_len) + completed = True + + self.assert_no_active_streams() + self.vm.shutdown() + + def test_enospc(self): + self.assert_no_active_streams() + + result = self.vm.qmp('block-stream', device='drive0', on_error='enospc') + self.assert_qmp(result, 'return', {}) + + completed = False + error = False + while not completed: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_ERROR': + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/operation', 'read') + error = True + elif event['event'] == 'BLOCK_JOB_COMPLETED': + self.assertTrue(error, 'job completed unexpectedly') + self.assert_qmp(event, 'data/type', 'stream') + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/error', 'Input/output error') + self.assert_qmp(event, 'data/offset', self.STREAM_BUFFER_SIZE) + self.assert_qmp(event, 'data/len', self.image_len) + completed = True + + self.assert_no_active_streams() + self.vm.shutdown() + +class TestENOSPC(TestErrors): + def setUp(self): + self.blkdebug_file = backing_img + ".blkdebug" + self.create_image(backing_img, TestErrors.image_len) + self.create_blkdebug_file(self.blkdebug_file, "read_aio", 28) + qemu_img('create', '-f', iotests.imgfmt, + '-o', 'backing_file=blkdebug:%s:%s,backing_fmt=raw' + % (self.blkdebug_file, backing_img), + test_img) + self.vm = iotests.VM().add_drive(test_img) + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + os.remove(test_img) + os.remove(backing_img) + os.remove(self.blkdebug_file) + + def test_enospc(self): + self.assert_no_active_streams() + + result = self.vm.qmp('block-stream', device='drive0', on_error='enospc') + self.assert_qmp(result, 'return', {}) + + error = False + completed = False + while not completed: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_ERROR': + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/operation', 'read') + + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/paused', True) + self.assert_qmp(result, 'return[0]/offset', self.STREAM_BUFFER_SIZE) + self.assert_qmp(result, 'return[0]/io-status', 'nospace') + + result = self.vm.qmp('block-job-resume', device='drive0') + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/paused', False) + self.assert_qmp(result, 'return[0]/io-status', 'ok') + error = True + elif event['event'] == 'BLOCK_JOB_COMPLETED': + self.assertTrue(error, 'job completed unexpectedly') + self.assert_qmp(event, 'data/type', 'stream') + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp_absent(event, 'data/error') + self.assert_qmp(event, 'data/offset', self.image_len) + self.assert_qmp(event, 'data/len', self.image_len) + completed = True + + self.assert_no_active_streams() + self.vm.shutdown() class TestStreamStop(ImageStreamingTestCase): image_len = 8 * 1024 * 1024 * 1024 # GB diff --git a/tests/qemu-iotests/030.out b/tests/qemu-iotests/030.out index 594c16f..fa16b5c 100644 --- a/tests/qemu-iotests/030.out +++ b/tests/qemu-iotests/030.out @@ -1,5 +1,5 @@ -........ +............. ---------------------------------------------------------------------- -Ran 8 tests +Ran 13 tests OK diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index a94ea75..3c60b2d 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -138,6 +138,13 @@ class QMPTestCase(unittest.TestCase): self.fail('invalid index "%s" in path "%s" in "%s"' % (idx, path, str(d))) return d + def assert_qmp_absent(self, d, path): + try: + result = self.dictpath(d, path) + except AssertionError: + return + self.fail('path "%s" has value "%s"' % (path, str(result))) + def assert_qmp(self, d, path, value): '''Assert that the value for a specific path in a QMP dict matches''' result = self.dictpath(d, path) -- cgit v1.1