summaryrefslogtreecommitdiffstats
path: root/src/util
diff options
context:
space:
mode:
authorTimothy Pearson <tpearson@raptorengineering.com>2019-05-11 15:12:49 -0500
committerTimothy Pearson <tpearson@raptorengineering.com>2019-05-11 15:12:49 -0500
commit9e80202352dd49bdd9e67b8b906d86f058431505 (patch)
tree5673c17aad6e3833da8c4ff21b5a11f666ec9fbe /src/util
downloadhqemu-9e80202352dd49bdd9e67b8b906d86f058431505.zip
hqemu-9e80202352dd49bdd9e67b8b906d86f058431505.tar.gz
Initial import of abandoned HQEMU version 2.5.2HEADmaster
Diffstat (limited to 'src/util')
-rw-r--r--src/util/Makefile.objs32
-rw-r--r--src/util/acl.c187
-rw-r--r--src/util/bitmap.c339
-rw-r--r--src/util/bitops.c158
-rw-r--r--src/util/buffer.c171
-rw-r--r--src/util/compatfd.c110
-rw-r--r--src/util/coroutine-gthread.c198
-rw-r--r--src/util/coroutine-sigaltstack.c293
-rw-r--r--src/util/coroutine-ucontext.c194
-rw-r--r--src/util/coroutine-win32.c101
-rw-r--r--src/util/crc32c.c115
-rw-r--r--src/util/cutils.c691
-rw-r--r--src/util/envlist.c241
-rw-r--r--src/util/error.c241
-rw-r--r--src/util/event_notifier-posix.c122
-rw-r--r--src/util/event_notifier-win32.c59
-rw-r--r--src/util/fifo8.c125
-rw-r--r--src/util/getauxval.c109
-rw-r--r--src/util/hbitmap.c495
-rw-r--r--src/util/hexdump.c37
-rw-r--r--src/util/host-utils.c162
-rw-r--r--src/util/id.c65
-rw-r--r--src/util/iov.c577
-rw-r--r--src/util/memfd.c162
-rw-r--r--src/util/mmap-alloc.c110
-rw-r--r--src/util/module.c219
-rw-r--r--src/util/notify.c71
-rw-r--r--src/util/osdep.c437
-rw-r--r--src/util/oslib-posix.c521
-rw-r--r--src/util/oslib-win32.c507
-rw-r--r--src/util/path.c181
-rw-r--r--src/util/qemu-config.c573
-rw-r--r--src/util/qemu-coroutine-io.c91
-rw-r--r--src/util/qemu-coroutine-lock.c186
-rw-r--r--src/util/qemu-coroutine-sleep.c41
-rw-r--r--src/util/qemu-coroutine.c146
-rw-r--r--src/util/qemu-error.c239
-rw-r--r--src/util/qemu-openpty.c137
-rw-r--r--src/util/qemu-option.c1205
-rw-r--r--src/util/qemu-progress.c159
-rw-r--r--src/util/qemu-sockets.c1167
-rw-r--r--src/util/qemu-thread-posix.c518
-rw-r--r--src/util/qemu-thread-win32.c479
-rw-r--r--src/util/qemu-timer-common.c61
-rw-r--r--src/util/rcu.c352
-rw-r--r--src/util/readline.c515
-rw-r--r--src/util/rfifolock.c78
-rw-r--r--src/util/throttle.c445
-rw-r--r--src/util/timed-average.c231
-rw-r--r--src/util/unicode.c100
-rw-r--r--src/util/uri.c2204
51 files changed, 15957 insertions, 0 deletions
diff --git a/src/util/Makefile.objs b/src/util/Makefile.objs
new file mode 100644
index 0000000..89dd80e
--- /dev/null
+++ b/src/util/Makefile.objs
@@ -0,0 +1,32 @@
+util-obj-y = osdep.o cutils.o unicode.o qemu-timer-common.o
+util-obj-$(CONFIG_POSIX) += compatfd.o
+util-obj-$(CONFIG_POSIX) += event_notifier-posix.o
+util-obj-$(CONFIG_POSIX) += mmap-alloc.o
+util-obj-$(CONFIG_POSIX) += oslib-posix.o
+util-obj-$(CONFIG_POSIX) += qemu-openpty.o
+util-obj-$(CONFIG_POSIX) += qemu-thread-posix.o
+util-obj-$(CONFIG_WIN32) += event_notifier-win32.o
+util-obj-$(CONFIG_POSIX) += memfd.o
+util-obj-$(CONFIG_WIN32) += oslib-win32.o
+util-obj-$(CONFIG_WIN32) += qemu-thread-win32.o
+util-obj-y += envlist.o path.o module.o
+util-obj-$(call lnot,$(CONFIG_INT128)) += host-utils.o
+util-obj-y += bitmap.o bitops.o hbitmap.o
+util-obj-y += fifo8.o
+util-obj-y += acl.o
+util-obj-y += error.o qemu-error.o
+util-obj-y += id.o
+util-obj-y += iov.o qemu-config.o qemu-sockets.o uri.o notify.o
+util-obj-y += qemu-option.o qemu-progress.o
+util-obj-y += hexdump.o
+util-obj-y += crc32c.o
+util-obj-y += throttle.o
+util-obj-y += getauxval.o
+util-obj-y += readline.o
+util-obj-y += rfifolock.o
+util-obj-y += rcu.o
+util-obj-y += qemu-coroutine.o qemu-coroutine-lock.o qemu-coroutine-io.o
+util-obj-y += qemu-coroutine-sleep.o
+util-obj-y += coroutine-$(CONFIG_COROUTINE_BACKEND).o
+util-obj-y += buffer.o
+util-obj-y += timed-average.o
diff --git a/src/util/acl.c b/src/util/acl.c
new file mode 100644
index 0000000..571d686
--- /dev/null
+++ b/src/util/acl.c
@@ -0,0 +1,187 @@
+/*
+ * QEMU access control list management
+ *
+ * Copyright (C) 2009 Red Hat, Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+
+#include "qemu-common.h"
+#include "qemu/acl.h"
+
+#ifdef CONFIG_FNMATCH
+#include <fnmatch.h>
+#endif
+
+
+static unsigned int nacls = 0;
+static qemu_acl **acls = NULL;
+
+
+
+qemu_acl *qemu_acl_find(const char *aclname)
+{
+ int i;
+ for (i = 0 ; i < nacls ; i++) {
+ if (strcmp(acls[i]->aclname, aclname) == 0)
+ return acls[i];
+ }
+
+ return NULL;
+}
+
+qemu_acl *qemu_acl_init(const char *aclname)
+{
+ qemu_acl *acl;
+
+ acl = qemu_acl_find(aclname);
+ if (acl)
+ return acl;
+
+ acl = g_malloc(sizeof(*acl));
+ acl->aclname = g_strdup(aclname);
+ /* Deny by default, so there is no window of "open
+ * access" between QEMU starting, and the user setting
+ * up ACLs in the monitor */
+ acl->defaultDeny = 1;
+
+ acl->nentries = 0;
+ QTAILQ_INIT(&acl->entries);
+
+ acls = g_realloc(acls, sizeof(*acls) * (nacls +1));
+ acls[nacls] = acl;
+ nacls++;
+
+ return acl;
+}
+
+int qemu_acl_party_is_allowed(qemu_acl *acl,
+ const char *party)
+{
+ qemu_acl_entry *entry;
+
+ QTAILQ_FOREACH(entry, &acl->entries, next) {
+#ifdef CONFIG_FNMATCH
+ if (fnmatch(entry->match, party, 0) == 0)
+ return entry->deny ? 0 : 1;
+#else
+ /* No fnmatch, so fallback to exact string matching
+ * instead of allowing wildcards */
+ if (strcmp(entry->match, party) == 0)
+ return entry->deny ? 0 : 1;
+#endif
+ }
+
+ return acl->defaultDeny ? 0 : 1;
+}
+
+
+void qemu_acl_reset(qemu_acl *acl)
+{
+ qemu_acl_entry *entry, *next_entry;
+
+ /* Put back to deny by default, so there is no window
+ * of "open access" while the user re-initializes the
+ * access control list */
+ acl->defaultDeny = 1;
+ QTAILQ_FOREACH_SAFE(entry, &acl->entries, next, next_entry) {
+ QTAILQ_REMOVE(&acl->entries, entry, next);
+ g_free(entry->match);
+ g_free(entry);
+ }
+ acl->nentries = 0;
+}
+
+
+int qemu_acl_append(qemu_acl *acl,
+ int deny,
+ const char *match)
+{
+ qemu_acl_entry *entry;
+
+ entry = g_malloc(sizeof(*entry));
+ entry->match = g_strdup(match);
+ entry->deny = deny;
+
+ QTAILQ_INSERT_TAIL(&acl->entries, entry, next);
+ acl->nentries++;
+
+ return acl->nentries;
+}
+
+
+int qemu_acl_insert(qemu_acl *acl,
+ int deny,
+ const char *match,
+ int index)
+{
+ qemu_acl_entry *tmp;
+ int i = 0;
+
+ if (index <= 0)
+ return -1;
+ if (index > acl->nentries) {
+ return qemu_acl_append(acl, deny, match);
+ }
+
+ QTAILQ_FOREACH(tmp, &acl->entries, next) {
+ i++;
+ if (i == index) {
+ qemu_acl_entry *entry;
+ entry = g_malloc(sizeof(*entry));
+ entry->match = g_strdup(match);
+ entry->deny = deny;
+
+ QTAILQ_INSERT_BEFORE(tmp, entry, next);
+ acl->nentries++;
+ break;
+ }
+ }
+
+ return i;
+}
+
+int qemu_acl_remove(qemu_acl *acl,
+ const char *match)
+{
+ qemu_acl_entry *entry;
+ int i = 0;
+
+ QTAILQ_FOREACH(entry, &acl->entries, next) {
+ i++;
+ if (strcmp(entry->match, match) == 0) {
+ QTAILQ_REMOVE(&acl->entries, entry, next);
+ acl->nentries--;
+ g_free(entry->match);
+ g_free(entry);
+ return i;
+ }
+ }
+ return -1;
+}
+
+
+/*
+ * Local variables:
+ * c-indent-level: 4
+ * c-basic-offset: 4
+ * tab-width: 8
+ * End:
+ */
diff --git a/src/util/bitmap.c b/src/util/bitmap.c
new file mode 100644
index 0000000..44f0f48
--- /dev/null
+++ b/src/util/bitmap.c
@@ -0,0 +1,339 @@
+/*
+ * Bitmap Module
+ *
+ * Stolen from linux/src/lib/bitmap.c
+ *
+ * Copyright (C) 2010 Corentin Chary
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.
+ */
+
+#include "qemu/bitops.h"
+#include "qemu/bitmap.h"
+#include "qemu/atomic.h"
+
+/*
+ * bitmaps provide an array of bits, implemented using an
+ * array of unsigned longs. The number of valid bits in a
+ * given bitmap does _not_ need to be an exact multiple of
+ * BITS_PER_LONG.
+ *
+ * The possible unused bits in the last, partially used word
+ * of a bitmap are 'don't care'. The implementation makes
+ * no particular effort to keep them zero. It ensures that
+ * their value will not affect the results of any operation.
+ * The bitmap operations that return Boolean (bitmap_empty,
+ * for example) or scalar (bitmap_weight, for example) results
+ * carefully filter out these unused bits from impacting their
+ * results.
+ *
+ * These operations actually hold to a slightly stronger rule:
+ * if you don't input any bitmaps to these ops that have some
+ * unused bits set, then they won't output any set unused bits
+ * in output bitmaps.
+ *
+ * The byte ordering of bitmaps is more natural on little
+ * endian architectures.
+ */
+
+int slow_bitmap_empty(const unsigned long *bitmap, long bits)
+{
+ long k, lim = bits/BITS_PER_LONG;
+
+ for (k = 0; k < lim; ++k) {
+ if (bitmap[k]) {
+ return 0;
+ }
+ }
+ if (bits % BITS_PER_LONG) {
+ if (bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) {
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+int slow_bitmap_full(const unsigned long *bitmap, long bits)
+{
+ long k, lim = bits/BITS_PER_LONG;
+
+ for (k = 0; k < lim; ++k) {
+ if (~bitmap[k]) {
+ return 0;
+ }
+ }
+
+ if (bits % BITS_PER_LONG) {
+ if (~bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) {
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+int slow_bitmap_equal(const unsigned long *bitmap1,
+ const unsigned long *bitmap2, long bits)
+{
+ long k, lim = bits/BITS_PER_LONG;
+
+ for (k = 0; k < lim; ++k) {
+ if (bitmap1[k] != bitmap2[k]) {
+ return 0;
+ }
+ }
+
+ if (bits % BITS_PER_LONG) {
+ if ((bitmap1[k] ^ bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) {
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+void slow_bitmap_complement(unsigned long *dst, const unsigned long *src,
+ long bits)
+{
+ long k, lim = bits/BITS_PER_LONG;
+
+ for (k = 0; k < lim; ++k) {
+ dst[k] = ~src[k];
+ }
+
+ if (bits % BITS_PER_LONG) {
+ dst[k] = ~src[k] & BITMAP_LAST_WORD_MASK(bits);
+ }
+}
+
+int slow_bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, long bits)
+{
+ long k;
+ long nr = BITS_TO_LONGS(bits);
+ unsigned long result = 0;
+
+ for (k = 0; k < nr; k++) {
+ result |= (dst[k] = bitmap1[k] & bitmap2[k]);
+ }
+ return result != 0;
+}
+
+void slow_bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, long bits)
+{
+ long k;
+ long nr = BITS_TO_LONGS(bits);
+
+ for (k = 0; k < nr; k++) {
+ dst[k] = bitmap1[k] | bitmap2[k];
+ }
+}
+
+void slow_bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, long bits)
+{
+ long k;
+ long nr = BITS_TO_LONGS(bits);
+
+ for (k = 0; k < nr; k++) {
+ dst[k] = bitmap1[k] ^ bitmap2[k];
+ }
+}
+
+int slow_bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, long bits)
+{
+ long k;
+ long nr = BITS_TO_LONGS(bits);
+ unsigned long result = 0;
+
+ for (k = 0; k < nr; k++) {
+ result |= (dst[k] = bitmap1[k] & ~bitmap2[k]);
+ }
+ return result != 0;
+}
+
+#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG))
+
+void bitmap_set(unsigned long *map, long start, long nr)
+{
+ unsigned long *p = map + BIT_WORD(start);
+ const long size = start + nr;
+ int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
+ unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
+
+ while (nr - bits_to_set >= 0) {
+ *p |= mask_to_set;
+ nr -= bits_to_set;
+ bits_to_set = BITS_PER_LONG;
+ mask_to_set = ~0UL;
+ p++;
+ }
+ if (nr) {
+ mask_to_set &= BITMAP_LAST_WORD_MASK(size);
+ *p |= mask_to_set;
+ }
+}
+
+void bitmap_set_atomic(unsigned long *map, long start, long nr)
+{
+ unsigned long *p = map + BIT_WORD(start);
+ const long size = start + nr;
+ int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
+ unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
+
+ /* First word */
+ if (nr - bits_to_set > 0) {
+ atomic_or(p, mask_to_set);
+ nr -= bits_to_set;
+ bits_to_set = BITS_PER_LONG;
+ mask_to_set = ~0UL;
+ p++;
+ }
+
+ /* Full words */
+ if (bits_to_set == BITS_PER_LONG) {
+ while (nr >= BITS_PER_LONG) {
+ *p = ~0UL;
+ nr -= BITS_PER_LONG;
+ p++;
+ }
+ }
+
+ /* Last word */
+ if (nr) {
+ mask_to_set &= BITMAP_LAST_WORD_MASK(size);
+ atomic_or(p, mask_to_set);
+ } else {
+ /* If we avoided the full barrier in atomic_or(), issue a
+ * barrier to account for the assignments in the while loop.
+ */
+ smp_mb();
+ }
+}
+
+void bitmap_clear(unsigned long *map, long start, long nr)
+{
+ unsigned long *p = map + BIT_WORD(start);
+ const long size = start + nr;
+ int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
+ unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
+
+ while (nr - bits_to_clear >= 0) {
+ *p &= ~mask_to_clear;
+ nr -= bits_to_clear;
+ bits_to_clear = BITS_PER_LONG;
+ mask_to_clear = ~0UL;
+ p++;
+ }
+ if (nr) {
+ mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
+ *p &= ~mask_to_clear;
+ }
+}
+
+bool bitmap_test_and_clear_atomic(unsigned long *map, long start, long nr)
+{
+ unsigned long *p = map + BIT_WORD(start);
+ const long size = start + nr;
+ int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
+ unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
+ unsigned long dirty = 0;
+ unsigned long old_bits;
+
+ /* First word */
+ if (nr - bits_to_clear > 0) {
+ old_bits = atomic_fetch_and(p, ~mask_to_clear);
+ dirty |= old_bits & mask_to_clear;
+ nr -= bits_to_clear;
+ bits_to_clear = BITS_PER_LONG;
+ mask_to_clear = ~0UL;
+ p++;
+ }
+
+ /* Full words */
+ if (bits_to_clear == BITS_PER_LONG) {
+ while (nr >= BITS_PER_LONG) {
+ if (*p) {
+ old_bits = atomic_xchg(p, 0);
+ dirty |= old_bits;
+ }
+ nr -= BITS_PER_LONG;
+ p++;
+ }
+ }
+
+ /* Last word */
+ if (nr) {
+ mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
+ old_bits = atomic_fetch_and(p, ~mask_to_clear);
+ dirty |= old_bits & mask_to_clear;
+ } else {
+ if (!dirty) {
+ smp_mb();
+ }
+ }
+
+ return dirty != 0;
+}
+
+#define ALIGN_MASK(x,mask) (((x)+(mask))&~(mask))
+
+/**
+ * bitmap_find_next_zero_area - find a contiguous aligned zero area
+ * @map: The address to base the search on
+ * @size: The bitmap size in bits
+ * @start: The bitnumber to start searching at
+ * @nr: The number of zeroed bits we're looking for
+ * @align_mask: Alignment mask for zero area
+ *
+ * The @align_mask should be one less than a power of 2; the effect is that
+ * the bit offset of all zero areas this function finds is multiples of that
+ * power of 2. A @align_mask of 0 means no alignment is required.
+ */
+unsigned long bitmap_find_next_zero_area(unsigned long *map,
+ unsigned long size,
+ unsigned long start,
+ unsigned long nr,
+ unsigned long align_mask)
+{
+ unsigned long index, end, i;
+again:
+ index = find_next_zero_bit(map, size, start);
+
+ /* Align allocation */
+ index = ALIGN_MASK(index, align_mask);
+
+ end = index + nr;
+ if (end > size) {
+ return end;
+ }
+ i = find_next_bit(map, end, index);
+ if (i < end) {
+ start = i + 1;
+ goto again;
+ }
+ return index;
+}
+
+int slow_bitmap_intersects(const unsigned long *bitmap1,
+ const unsigned long *bitmap2, long bits)
+{
+ long k, lim = bits/BITS_PER_LONG;
+
+ for (k = 0; k < lim; ++k) {
+ if (bitmap1[k] & bitmap2[k]) {
+ return 1;
+ }
+ }
+
+ if (bits % BITS_PER_LONG) {
+ if ((bitmap1[k] & bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) {
+ return 1;
+ }
+ }
+ return 0;
+}
diff --git a/src/util/bitops.c b/src/util/bitops.c
new file mode 100644
index 0000000..227c38b
--- /dev/null
+++ b/src/util/bitops.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ * Copyright (C) 2008 IBM Corporation
+ * Written by Rusty Russell <rusty@rustcorp.com.au>
+ * (Inspired by David Howell's find_next_bit implementation)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "qemu/bitops.h"
+
+#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
+
+/*
+ * Find the next set bit in a memory region.
+ */
+unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
+ unsigned long offset)
+{
+ const unsigned long *p = addr + BITOP_WORD(offset);
+ unsigned long result = offset & ~(BITS_PER_LONG-1);
+ unsigned long tmp;
+
+ if (offset >= size) {
+ return size;
+ }
+ size -= result;
+ offset %= BITS_PER_LONG;
+ if (offset) {
+ tmp = *(p++);
+ tmp &= (~0UL << offset);
+ if (size < BITS_PER_LONG) {
+ goto found_first;
+ }
+ if (tmp) {
+ goto found_middle;
+ }
+ size -= BITS_PER_LONG;
+ result += BITS_PER_LONG;
+ }
+ while (size >= 4*BITS_PER_LONG) {
+ unsigned long d1, d2, d3;
+ tmp = *p;
+ d1 = *(p+1);
+ d2 = *(p+2);
+ d3 = *(p+3);
+ if (tmp) {
+ goto found_middle;
+ }
+ if (d1 | d2 | d3) {
+ break;
+ }
+ p += 4;
+ result += 4*BITS_PER_LONG;
+ size -= 4*BITS_PER_LONG;
+ }
+ while (size >= BITS_PER_LONG) {
+ if ((tmp = *(p++))) {
+ goto found_middle;
+ }
+ result += BITS_PER_LONG;
+ size -= BITS_PER_LONG;
+ }
+ if (!size) {
+ return result;
+ }
+ tmp = *p;
+
+found_first:
+ tmp &= (~0UL >> (BITS_PER_LONG - size));
+ if (tmp == 0UL) { /* Are any bits set? */
+ return result + size; /* Nope. */
+ }
+found_middle:
+ return result + ctzl(tmp);
+}
+
+/*
+ * This implementation of find_{first,next}_zero_bit was stolen from
+ * Linus' asm-alpha/bitops.h.
+ */
+unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
+ unsigned long offset)
+{
+ const unsigned long *p = addr + BITOP_WORD(offset);
+ unsigned long result = offset & ~(BITS_PER_LONG-1);
+ unsigned long tmp;
+
+ if (offset >= size) {
+ return size;
+ }
+ size -= result;
+ offset %= BITS_PER_LONG;
+ if (offset) {
+ tmp = *(p++);
+ tmp |= ~0UL >> (BITS_PER_LONG - offset);
+ if (size < BITS_PER_LONG) {
+ goto found_first;
+ }
+ if (~tmp) {
+ goto found_middle;
+ }
+ size -= BITS_PER_LONG;
+ result += BITS_PER_LONG;
+ }
+ while (size & ~(BITS_PER_LONG-1)) {
+ if (~(tmp = *(p++))) {
+ goto found_middle;
+ }
+ result += BITS_PER_LONG;
+ size -= BITS_PER_LONG;
+ }
+ if (!size) {
+ return result;
+ }
+ tmp = *p;
+
+found_first:
+ tmp |= ~0UL << size;
+ if (tmp == ~0UL) { /* Are any bits zero? */
+ return result + size; /* Nope. */
+ }
+found_middle:
+ return result + ctzl(~tmp);
+}
+
+unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
+{
+ unsigned long words;
+ unsigned long tmp;
+
+ /* Start at final word. */
+ words = size / BITS_PER_LONG;
+
+ /* Partial final word? */
+ if (size & (BITS_PER_LONG-1)) {
+ tmp = (addr[words] & (~0UL >> (BITS_PER_LONG
+ - (size & (BITS_PER_LONG-1)))));
+ if (tmp) {
+ goto found;
+ }
+ }
+
+ while (words) {
+ tmp = addr[--words];
+ if (tmp) {
+ found:
+ return words * BITS_PER_LONG + BITS_PER_LONG - 1 - clzl(tmp);
+ }
+ }
+
+ /* Not found */
+ return size;
+}
diff --git a/src/util/buffer.c b/src/util/buffer.c
new file mode 100644
index 0000000..8b27c08
--- /dev/null
+++ b/src/util/buffer.c
@@ -0,0 +1,171 @@
+/*
+ * QEMU generic buffers
+ *
+ * Copyright (c) 2015 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/buffer.h"
+#include "trace.h"
+
+#define BUFFER_MIN_INIT_SIZE 4096
+#define BUFFER_MIN_SHRINK_SIZE 65536
+
+/* define the factor alpha for the expentional smoothing
+ * that is used in the average size calculation. a shift
+ * of 7 results in an alpha of 1/2^7. */
+#define BUFFER_AVG_SIZE_SHIFT 7
+
+static size_t buffer_req_size(Buffer *buffer, size_t len)
+{
+ return MAX(BUFFER_MIN_INIT_SIZE,
+ pow2ceil(buffer->offset + len));
+}
+
+static void buffer_adj_size(Buffer *buffer, size_t len)
+{
+ size_t old = buffer->capacity;
+ buffer->capacity = buffer_req_size(buffer, len);
+ buffer->buffer = g_realloc(buffer->buffer, buffer->capacity);
+ trace_buffer_resize(buffer->name ?: "unnamed",
+ old, buffer->capacity);
+
+ /* make it even harder for the buffer to shrink, reset average size
+ * to currenty capacity if it is larger than the average. */
+ buffer->avg_size = MAX(buffer->avg_size,
+ buffer->capacity << BUFFER_AVG_SIZE_SHIFT);
+}
+
+void buffer_init(Buffer *buffer, const char *name, ...)
+{
+ va_list ap;
+
+ va_start(ap, name);
+ buffer->name = g_strdup_vprintf(name, ap);
+ va_end(ap);
+}
+
+static uint64_t buffer_get_avg_size(Buffer *buffer)
+{
+ return buffer->avg_size >> BUFFER_AVG_SIZE_SHIFT;
+}
+
+void buffer_shrink(Buffer *buffer)
+{
+ size_t new;
+
+ /* Calculate the average size of the buffer as
+ * avg_size = avg_size * ( 1 - a ) + required_size * a
+ * where a is 1 / 2 ^ BUFFER_AVG_SIZE_SHIFT. */
+ buffer->avg_size *= (1 << BUFFER_AVG_SIZE_SHIFT) - 1;
+ buffer->avg_size >>= BUFFER_AVG_SIZE_SHIFT;
+ buffer->avg_size += buffer_req_size(buffer, 0);
+
+ /* And then only shrink if the average size of the buffer is much
+ * too big, to avoid bumping up & down the buffers all the time.
+ * realloc() isn't exactly cheap ... */
+ new = buffer_req_size(buffer, buffer_get_avg_size(buffer));
+ if (new < buffer->capacity >> 3 &&
+ new >= BUFFER_MIN_SHRINK_SIZE) {
+ buffer_adj_size(buffer, buffer_get_avg_size(buffer));
+ }
+
+ buffer_adj_size(buffer, 0);
+}
+
+void buffer_reserve(Buffer *buffer, size_t len)
+{
+ if ((buffer->capacity - buffer->offset) < len) {
+ buffer_adj_size(buffer, len);
+ }
+}
+
+gboolean buffer_empty(Buffer *buffer)
+{
+ return buffer->offset == 0;
+}
+
+uint8_t *buffer_end(Buffer *buffer)
+{
+ return buffer->buffer + buffer->offset;
+}
+
+void buffer_reset(Buffer *buffer)
+{
+ buffer->offset = 0;
+ buffer_shrink(buffer);
+}
+
+void buffer_free(Buffer *buffer)
+{
+ trace_buffer_free(buffer->name ?: "unnamed", buffer->capacity);
+ g_free(buffer->buffer);
+ g_free(buffer->name);
+ buffer->offset = 0;
+ buffer->capacity = 0;
+ buffer->buffer = NULL;
+ buffer->name = NULL;
+}
+
+void buffer_append(Buffer *buffer, const void *data, size_t len)
+{
+ memcpy(buffer->buffer + buffer->offset, data, len);
+ buffer->offset += len;
+}
+
+void buffer_advance(Buffer *buffer, size_t len)
+{
+ memmove(buffer->buffer, buffer->buffer + len,
+ (buffer->offset - len));
+ buffer->offset -= len;
+ buffer_shrink(buffer);
+}
+
+void buffer_move_empty(Buffer *to, Buffer *from)
+{
+ trace_buffer_move_empty(to->name ?: "unnamed",
+ from->offset,
+ from->name ?: "unnamed");
+ assert(to->offset == 0);
+
+ g_free(to->buffer);
+ to->offset = from->offset;
+ to->capacity = from->capacity;
+ to->buffer = from->buffer;
+
+ from->offset = 0;
+ from->capacity = 0;
+ from->buffer = NULL;
+}
+
+void buffer_move(Buffer *to, Buffer *from)
+{
+ if (to->offset == 0) {
+ buffer_move_empty(to, from);
+ return;
+ }
+
+ trace_buffer_move(to->name ?: "unnamed",
+ from->offset,
+ from->name ?: "unnamed");
+ buffer_reserve(to, from->offset);
+ buffer_append(to, from->buffer, from->offset);
+
+ g_free(from->buffer);
+ from->offset = 0;
+ from->capacity = 0;
+ from->buffer = NULL;
+}
diff --git a/src/util/compatfd.c b/src/util/compatfd.c
new file mode 100644
index 0000000..e857150
--- /dev/null
+++ b/src/util/compatfd.c
@@ -0,0 +1,110 @@
+/*
+ * signalfd/eventfd compatibility
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu-common.h"
+#include "qemu/compatfd.h"
+#include "qemu/thread.h"
+
+#include <sys/syscall.h>
+
+struct sigfd_compat_info
+{
+ sigset_t mask;
+ int fd;
+};
+
+static void *sigwait_compat(void *opaque)
+{
+ struct sigfd_compat_info *info = opaque;
+
+ while (1) {
+ int sig;
+ int err;
+
+ err = sigwait(&info->mask, &sig);
+ if (err != 0) {
+ if (errno == EINTR) {
+ continue;
+ } else {
+ return NULL;
+ }
+ } else {
+ struct qemu_signalfd_siginfo buffer;
+ size_t offset = 0;
+
+ memset(&buffer, 0, sizeof(buffer));
+ buffer.ssi_signo = sig;
+
+ while (offset < sizeof(buffer)) {
+ ssize_t len;
+
+ len = write(info->fd, (char *)&buffer + offset,
+ sizeof(buffer) - offset);
+ if (len == -1 && errno == EINTR)
+ continue;
+
+ if (len <= 0) {
+ return NULL;
+ }
+
+ offset += len;
+ }
+ }
+ }
+}
+
+static int qemu_signalfd_compat(const sigset_t *mask)
+{
+ struct sigfd_compat_info *info;
+ QemuThread thread;
+ int fds[2];
+
+ info = malloc(sizeof(*info));
+ if (info == NULL) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ if (pipe(fds) == -1) {
+ free(info);
+ return -1;
+ }
+
+ qemu_set_cloexec(fds[0]);
+ qemu_set_cloexec(fds[1]);
+
+ memcpy(&info->mask, mask, sizeof(*mask));
+ info->fd = fds[1];
+
+ qemu_thread_create(&thread, "signalfd_compat", sigwait_compat, info,
+ QEMU_THREAD_DETACHED);
+
+ return fds[0];
+}
+
+int qemu_signalfd(const sigset_t *mask)
+{
+#if defined(CONFIG_SIGNALFD)
+ int ret;
+
+ ret = syscall(SYS_signalfd, -1, mask, _NSIG / 8);
+ if (ret != -1) {
+ qemu_set_cloexec(ret);
+ return ret;
+ }
+#endif
+
+ return qemu_signalfd_compat(mask);
+}
diff --git a/src/util/coroutine-gthread.c b/src/util/coroutine-gthread.c
new file mode 100644
index 0000000..0bcd778
--- /dev/null
+++ b/src/util/coroutine-gthread.c
@@ -0,0 +1,198 @@
+/*
+ * GThread coroutine initialization code
+ *
+ * Copyright (C) 2006 Anthony Liguori <anthony@codemonkey.ws>
+ * Copyright (C) 2011 Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.0 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <glib.h>
+#include "qemu-common.h"
+#include "qemu/coroutine_int.h"
+
+typedef struct {
+ Coroutine base;
+ GThread *thread;
+ bool runnable;
+ bool free_on_thread_exit;
+ CoroutineAction action;
+} CoroutineGThread;
+
+static CompatGMutex coroutine_lock;
+static CompatGCond coroutine_cond;
+
+/* GLib 2.31 and beyond deprecated various parts of the thread API,
+ * but the new interfaces are not available in older GLib versions
+ * so we have to cope with both.
+ */
+#if GLIB_CHECK_VERSION(2, 31, 0)
+/* Awkwardly, the GPrivate API doesn't provide a way to update the
+ * GDestroyNotify handler for the coroutine key dynamically. So instead
+ * we track whether or not the CoroutineGThread should be freed on
+ * thread exit / coroutine key update using the free_on_thread_exit
+ * field.
+ */
+static void coroutine_destroy_notify(gpointer data)
+{
+ CoroutineGThread *co = data;
+ if (co && co->free_on_thread_exit) {
+ g_free(co);
+ }
+}
+
+static GPrivate coroutine_key = G_PRIVATE_INIT(coroutine_destroy_notify);
+
+static inline CoroutineGThread *get_coroutine_key(void)
+{
+ return g_private_get(&coroutine_key);
+}
+
+static inline void set_coroutine_key(CoroutineGThread *co,
+ bool free_on_thread_exit)
+{
+ /* Unlike g_static_private_set() this does not call the GDestroyNotify
+ * if the previous value of the key was NULL. Fortunately we only need
+ * the GDestroyNotify in the non-NULL key case.
+ */
+ co->free_on_thread_exit = free_on_thread_exit;
+ g_private_replace(&coroutine_key, co);
+}
+
+static inline GThread *create_thread(GThreadFunc func, gpointer data)
+{
+ return g_thread_new("coroutine", func, data);
+}
+
+#else
+
+/* Handle older GLib versions */
+
+static GStaticPrivate coroutine_key = G_STATIC_PRIVATE_INIT;
+
+static inline CoroutineGThread *get_coroutine_key(void)
+{
+ return g_static_private_get(&coroutine_key);
+}
+
+static inline void set_coroutine_key(CoroutineGThread *co,
+ bool free_on_thread_exit)
+{
+ g_static_private_set(&coroutine_key, co,
+ free_on_thread_exit ? (GDestroyNotify)g_free : NULL);
+}
+
+static inline GThread *create_thread(GThreadFunc func, gpointer data)
+{
+ return g_thread_create_full(func, data, 0, TRUE, TRUE,
+ G_THREAD_PRIORITY_NORMAL, NULL);
+}
+
+#endif
+
+
+static void __attribute__((constructor)) coroutine_init(void)
+{
+#if !GLIB_CHECK_VERSION(2, 31, 0)
+ if (!g_thread_supported()) {
+ g_thread_init(NULL);
+ }
+#endif
+}
+
+static void coroutine_wait_runnable_locked(CoroutineGThread *co)
+{
+ while (!co->runnable) {
+ g_cond_wait(&coroutine_cond, &coroutine_lock);
+ }
+}
+
+static void coroutine_wait_runnable(CoroutineGThread *co)
+{
+ g_mutex_lock(&coroutine_lock);
+ coroutine_wait_runnable_locked(co);
+ g_mutex_unlock(&coroutine_lock);
+}
+
+static gpointer coroutine_thread(gpointer opaque)
+{
+ CoroutineGThread *co = opaque;
+
+ set_coroutine_key(co, false);
+ coroutine_wait_runnable(co);
+ co->base.entry(co->base.entry_arg);
+ qemu_coroutine_switch(&co->base, co->base.caller, COROUTINE_TERMINATE);
+ return NULL;
+}
+
+Coroutine *qemu_coroutine_new(void)
+{
+ CoroutineGThread *co;
+
+ co = g_malloc0(sizeof(*co));
+ co->thread = create_thread(coroutine_thread, co);
+ if (!co->thread) {
+ g_free(co);
+ return NULL;
+ }
+ return &co->base;
+}
+
+void qemu_coroutine_delete(Coroutine *co_)
+{
+ CoroutineGThread *co = DO_UPCAST(CoroutineGThread, base, co_);
+
+ g_thread_join(co->thread);
+ g_free(co);
+}
+
+CoroutineAction qemu_coroutine_switch(Coroutine *from_,
+ Coroutine *to_,
+ CoroutineAction action)
+{
+ CoroutineGThread *from = DO_UPCAST(CoroutineGThread, base, from_);
+ CoroutineGThread *to = DO_UPCAST(CoroutineGThread, base, to_);
+
+ g_mutex_lock(&coroutine_lock);
+ from->runnable = false;
+ from->action = action;
+ to->runnable = true;
+ to->action = action;
+ g_cond_broadcast(&coroutine_cond);
+
+ if (action != COROUTINE_TERMINATE) {
+ coroutine_wait_runnable_locked(from);
+ }
+ g_mutex_unlock(&coroutine_lock);
+ return from->action;
+}
+
+Coroutine *qemu_coroutine_self(void)
+{
+ CoroutineGThread *co = get_coroutine_key();
+ if (!co) {
+ co = g_malloc0(sizeof(*co));
+ co->runnable = true;
+ set_coroutine_key(co, true);
+ }
+
+ return &co->base;
+}
+
+bool qemu_in_coroutine(void)
+{
+ CoroutineGThread *co = get_coroutine_key();
+
+ return co && co->base.caller;
+}
diff --git a/src/util/coroutine-sigaltstack.c b/src/util/coroutine-sigaltstack.c
new file mode 100644
index 0000000..39842a4
--- /dev/null
+++ b/src/util/coroutine-sigaltstack.c
@@ -0,0 +1,293 @@
+/*
+ * sigaltstack coroutine initialization code
+ *
+ * Copyright (C) 2006 Anthony Liguori <anthony@codemonkey.ws>
+ * Copyright (C) 2011 Kevin Wolf <kwolf@redhat.com>
+ * Copyright (C) 2012 Alex Barcelo <abarcelo@ac.upc.edu>
+** This file is partly based on pth_mctx.c, from the GNU Portable Threads
+** Copyright (c) 1999-2006 Ralf S. Engelschall <rse@engelschall.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* XXX Is there a nicer way to disable glibc's stack check for longjmp? */
+#ifdef _FORTIFY_SOURCE
+#undef _FORTIFY_SOURCE
+#endif
+#include <stdlib.h>
+#include <setjmp.h>
+#include <stdint.h>
+#include <pthread.h>
+#include <signal.h>
+#include "qemu-common.h"
+#include "qemu/coroutine_int.h"
+
+typedef struct {
+ Coroutine base;
+ void *stack;
+ sigjmp_buf env;
+} CoroutineUContext;
+
+/**
+ * Per-thread coroutine bookkeeping
+ */
+typedef struct {
+ /** Currently executing coroutine */
+ Coroutine *current;
+
+ /** The default coroutine */
+ CoroutineUContext leader;
+
+ /** Information for the signal handler (trampoline) */
+ sigjmp_buf tr_reenter;
+ volatile sig_atomic_t tr_called;
+ void *tr_handler;
+} CoroutineThreadState;
+
+static pthread_key_t thread_state_key;
+
+static CoroutineThreadState *coroutine_get_thread_state(void)
+{
+ CoroutineThreadState *s = pthread_getspecific(thread_state_key);
+
+ if (!s) {
+ s = g_malloc0(sizeof(*s));
+ s->current = &s->leader.base;
+ pthread_setspecific(thread_state_key, s);
+ }
+ return s;
+}
+
+static void qemu_coroutine_thread_cleanup(void *opaque)
+{
+ CoroutineThreadState *s = opaque;
+
+ g_free(s);
+}
+
+static void __attribute__((constructor)) coroutine_init(void)
+{
+ int ret;
+
+ ret = pthread_key_create(&thread_state_key, qemu_coroutine_thread_cleanup);
+ if (ret != 0) {
+ fprintf(stderr, "unable to create leader key: %s\n", strerror(errno));
+ abort();
+ }
+}
+
+/* "boot" function
+ * This is what starts the coroutine, is called from the trampoline
+ * (from the signal handler when it is not signal handling, read ahead
+ * for more information).
+ */
+static void coroutine_bootstrap(CoroutineUContext *self, Coroutine *co)
+{
+ /* Initialize longjmp environment and switch back the caller */
+ if (!sigsetjmp(self->env, 0)) {
+ siglongjmp(*(sigjmp_buf *)co->entry_arg, 1);
+ }
+
+ while (true) {
+ co->entry(co->entry_arg);
+ qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
+ }
+}
+
+/*
+ * This is used as the signal handler. This is called with the brand new stack
+ * (thanks to sigaltstack). We have to return, given that this is a signal
+ * handler and the sigmask and some other things are changed.
+ */
+static void coroutine_trampoline(int signal)
+{
+ CoroutineUContext *self;
+ Coroutine *co;
+ CoroutineThreadState *coTS;
+
+ /* Get the thread specific information */
+ coTS = coroutine_get_thread_state();
+ self = coTS->tr_handler;
+ coTS->tr_called = 1;
+ co = &self->base;
+
+ /*
+ * Here we have to do a bit of a ping pong between the caller, given that
+ * this is a signal handler and we have to do a return "soon". Then the
+ * caller can reestablish everything and do a siglongjmp here again.
+ */
+ if (!sigsetjmp(coTS->tr_reenter, 0)) {
+ return;
+ }
+
+ /*
+ * Ok, the caller has siglongjmp'ed back to us, so now prepare
+ * us for the real machine state switching. We have to jump
+ * into another function here to get a new stack context for
+ * the auto variables (which have to be auto-variables
+ * because the start of the thread happens later). Else with
+ * PIC (i.e. Position Independent Code which is used when PTH
+ * is built as a shared library) most platforms would
+ * horrible core dump as experience showed.
+ */
+ coroutine_bootstrap(self, co);
+}
+
+Coroutine *qemu_coroutine_new(void)
+{
+ const size_t stack_size = 1 << 20;
+ CoroutineUContext *co;
+ CoroutineThreadState *coTS;
+ struct sigaction sa;
+ struct sigaction osa;
+ stack_t ss;
+ stack_t oss;
+ sigset_t sigs;
+ sigset_t osigs;
+ sigjmp_buf old_env;
+
+ /* The way to manipulate stack is with the sigaltstack function. We
+ * prepare a stack, with it delivering a signal to ourselves and then
+ * put sigsetjmp/siglongjmp where needed.
+ * This has been done keeping coroutine-ucontext as a model and with the
+ * pth ideas (GNU Portable Threads). See coroutine-ucontext for the basics
+ * of the coroutines and see pth_mctx.c (from the pth project) for the
+ * sigaltstack way of manipulating stacks.
+ */
+
+ co = g_malloc0(sizeof(*co));
+ co->stack = g_malloc(stack_size);
+ co->base.entry_arg = &old_env; /* stash away our jmp_buf */
+
+ coTS = coroutine_get_thread_state();
+ coTS->tr_handler = co;
+
+ /*
+ * Preserve the SIGUSR2 signal state, block SIGUSR2,
+ * and establish our signal handler. The signal will
+ * later transfer control onto the signal stack.
+ */
+ sigemptyset(&sigs);
+ sigaddset(&sigs, SIGUSR2);
+ pthread_sigmask(SIG_BLOCK, &sigs, &osigs);
+ sa.sa_handler = coroutine_trampoline;
+ sigfillset(&sa.sa_mask);
+ sa.sa_flags = SA_ONSTACK;
+ if (sigaction(SIGUSR2, &sa, &osa) != 0) {
+ abort();
+ }
+
+ /*
+ * Set the new stack.
+ */
+ ss.ss_sp = co->stack;
+ ss.ss_size = stack_size;
+ ss.ss_flags = 0;
+ if (sigaltstack(&ss, &oss) < 0) {
+ abort();
+ }
+
+ /*
+ * Now transfer control onto the signal stack and set it up.
+ * It will return immediately via "return" after the sigsetjmp()
+ * was performed. Be careful here with race conditions. The
+ * signal can be delivered the first time sigsuspend() is
+ * called.
+ */
+ coTS->tr_called = 0;
+ pthread_kill(pthread_self(), SIGUSR2);
+ sigfillset(&sigs);
+ sigdelset(&sigs, SIGUSR2);
+ while (!coTS->tr_called) {
+ sigsuspend(&sigs);
+ }
+
+ /*
+ * Inform the system that we are back off the signal stack by
+ * removing the alternative signal stack. Be careful here: It
+ * first has to be disabled, before it can be removed.
+ */
+ sigaltstack(NULL, &ss);
+ ss.ss_flags = SS_DISABLE;
+ if (sigaltstack(&ss, NULL) < 0) {
+ abort();
+ }
+ sigaltstack(NULL, &ss);
+ if (!(oss.ss_flags & SS_DISABLE)) {
+ sigaltstack(&oss, NULL);
+ }
+
+ /*
+ * Restore the old SIGUSR2 signal handler and mask
+ */
+ sigaction(SIGUSR2, &osa, NULL);
+ pthread_sigmask(SIG_SETMASK, &osigs, NULL);
+
+ /*
+ * Now enter the trampoline again, but this time not as a signal
+ * handler. Instead we jump into it directly. The functionally
+ * redundant ping-pong pointer arithmetic is necessary to avoid
+ * type-conversion warnings related to the `volatile' qualifier and
+ * the fact that `jmp_buf' usually is an array type.
+ */
+ if (!sigsetjmp(old_env, 0)) {
+ siglongjmp(coTS->tr_reenter, 1);
+ }
+
+ /*
+ * Ok, we returned again, so now we're finished
+ */
+
+ return &co->base;
+}
+
+void qemu_coroutine_delete(Coroutine *co_)
+{
+ CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_);
+
+ g_free(co->stack);
+ g_free(co);
+}
+
+CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
+ CoroutineAction action)
+{
+ CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_);
+ CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_);
+ CoroutineThreadState *s = coroutine_get_thread_state();
+ int ret;
+
+ s->current = to_;
+
+ ret = sigsetjmp(from->env, 0);
+ if (ret == 0) {
+ siglongjmp(to->env, action);
+ }
+ return ret;
+}
+
+Coroutine *qemu_coroutine_self(void)
+{
+ CoroutineThreadState *s = coroutine_get_thread_state();
+
+ return s->current;
+}
+
+bool qemu_in_coroutine(void)
+{
+ CoroutineThreadState *s = pthread_getspecific(thread_state_key);
+
+ return s && s->current->caller;
+}
+
diff --git a/src/util/coroutine-ucontext.c b/src/util/coroutine-ucontext.c
new file mode 100644
index 0000000..26cbebb
--- /dev/null
+++ b/src/util/coroutine-ucontext.c
@@ -0,0 +1,194 @@
+/*
+ * ucontext coroutine initialization code
+ *
+ * Copyright (C) 2006 Anthony Liguori <anthony@codemonkey.ws>
+ * Copyright (C) 2011 Kevin Wolf <kwolf@redhat.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.0 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* XXX Is there a nicer way to disable glibc's stack check for longjmp? */
+#ifdef _FORTIFY_SOURCE
+#undef _FORTIFY_SOURCE
+#endif
+#include <stdlib.h>
+#include <setjmp.h>
+#include <stdint.h>
+#include <ucontext.h>
+#include "qemu-common.h"
+#include "qemu/coroutine_int.h"
+
+#ifdef CONFIG_VALGRIND_H
+#include <valgrind/valgrind.h>
+#endif
+
+typedef struct {
+ Coroutine base;
+ void *stack;
+ sigjmp_buf env;
+
+#ifdef CONFIG_VALGRIND_H
+ unsigned int valgrind_stack_id;
+#endif
+
+} CoroutineUContext;
+
+/**
+ * Per-thread coroutine bookkeeping
+ */
+static __thread CoroutineUContext leader;
+static __thread Coroutine *current;
+
+/*
+ * va_args to makecontext() must be type 'int', so passing
+ * the pointer we need may require several int args. This
+ * union is a quick hack to let us do that
+ */
+union cc_arg {
+ void *p;
+ int i[2];
+};
+
+static void coroutine_trampoline(int i0, int i1)
+{
+ union cc_arg arg;
+ CoroutineUContext *self;
+ Coroutine *co;
+
+ arg.i[0] = i0;
+ arg.i[1] = i1;
+ self = arg.p;
+ co = &self->base;
+
+ /* Initialize longjmp environment and switch back the caller */
+ if (!sigsetjmp(self->env, 0)) {
+ siglongjmp(*(sigjmp_buf *)co->entry_arg, 1);
+ }
+
+ while (true) {
+ co->entry(co->entry_arg);
+ qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
+ }
+}
+
+Coroutine *qemu_coroutine_new(void)
+{
+ const size_t stack_size = 1 << 20;
+ CoroutineUContext *co;
+ ucontext_t old_uc, uc;
+ sigjmp_buf old_env;
+ union cc_arg arg = {0};
+
+ /* The ucontext functions preserve signal masks which incurs a
+ * system call overhead. sigsetjmp(buf, 0)/siglongjmp() does not
+ * preserve signal masks but only works on the current stack.
+ * Since we need a way to create and switch to a new stack, use
+ * the ucontext functions for that but sigsetjmp()/siglongjmp() for
+ * everything else.
+ */
+
+ if (getcontext(&uc) == -1) {
+ abort();
+ }
+
+ co = g_malloc0(sizeof(*co));
+ co->stack = g_malloc(stack_size);
+ co->base.entry_arg = &old_env; /* stash away our jmp_buf */
+
+ uc.uc_link = &old_uc;
+ uc.uc_stack.ss_sp = co->stack;
+ uc.uc_stack.ss_size = stack_size;
+ uc.uc_stack.ss_flags = 0;
+
+#ifdef CONFIG_VALGRIND_H
+ co->valgrind_stack_id =
+ VALGRIND_STACK_REGISTER(co->stack, co->stack + stack_size);
+#endif
+
+ arg.p = co;
+
+ makecontext(&uc, (void (*)(void))coroutine_trampoline,
+ 2, arg.i[0], arg.i[1]);
+
+ /* swapcontext() in, siglongjmp() back out */
+ if (!sigsetjmp(old_env, 0)) {
+ swapcontext(&old_uc, &uc);
+ }
+ return &co->base;
+}
+
+#ifdef CONFIG_VALGRIND_H
+#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE
+/* Work around an unused variable in the valgrind.h macro... */
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
+#endif
+static inline void valgrind_stack_deregister(CoroutineUContext *co)
+{
+ VALGRIND_STACK_DEREGISTER(co->valgrind_stack_id);
+}
+#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE
+#pragma GCC diagnostic pop
+#endif
+#endif
+
+void qemu_coroutine_delete(Coroutine *co_)
+{
+ CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_);
+
+#ifdef CONFIG_VALGRIND_H
+ valgrind_stack_deregister(co);
+#endif
+
+ g_free(co->stack);
+ g_free(co);
+}
+
+/* This function is marked noinline to prevent GCC from inlining it
+ * into coroutine_trampoline(). If we allow it to do that then it
+ * hoists the code to get the address of the TLS variable "current"
+ * out of the while() loop. This is an invalid transformation because
+ * the sigsetjmp() call may be called when running thread A but
+ * return in thread B, and so we might be in a different thread
+ * context each time round the loop.
+ */
+CoroutineAction __attribute__((noinline))
+qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
+ CoroutineAction action)
+{
+ CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_);
+ CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_);
+ int ret;
+
+ current = to_;
+
+ ret = sigsetjmp(from->env, 0);
+ if (ret == 0) {
+ siglongjmp(to->env, action);
+ }
+ return ret;
+}
+
+Coroutine *qemu_coroutine_self(void)
+{
+ if (!current) {
+ current = &leader.base;
+ }
+ return current;
+}
+
+bool qemu_in_coroutine(void)
+{
+ return current && current->caller;
+}
diff --git a/src/util/coroutine-win32.c b/src/util/coroutine-win32.c
new file mode 100644
index 0000000..4f922c5
--- /dev/null
+++ b/src/util/coroutine-win32.c
@@ -0,0 +1,101 @@
+/*
+ * Win32 coroutine initialization code
+ *
+ * Copyright (c) 2011 Kevin Wolf <kwolf@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+#include "qemu/coroutine_int.h"
+
+typedef struct
+{
+ Coroutine base;
+
+ LPVOID fiber;
+ CoroutineAction action;
+} CoroutineWin32;
+
+static __thread CoroutineWin32 leader;
+static __thread Coroutine *current;
+
+/* This function is marked noinline to prevent GCC from inlining it
+ * into coroutine_trampoline(). If we allow it to do that then it
+ * hoists the code to get the address of the TLS variable "current"
+ * out of the while() loop. This is an invalid transformation because
+ * the SwitchToFiber() call may be called when running thread A but
+ * return in thread B, and so we might be in a different thread
+ * context each time round the loop.
+ */
+CoroutineAction __attribute__((noinline))
+qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
+ CoroutineAction action)
+{
+ CoroutineWin32 *from = DO_UPCAST(CoroutineWin32, base, from_);
+ CoroutineWin32 *to = DO_UPCAST(CoroutineWin32, base, to_);
+
+ current = to_;
+
+ to->action = action;
+ SwitchToFiber(to->fiber);
+ return from->action;
+}
+
+static void CALLBACK coroutine_trampoline(void *co_)
+{
+ Coroutine *co = co_;
+
+ while (true) {
+ co->entry(co->entry_arg);
+ qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
+ }
+}
+
+Coroutine *qemu_coroutine_new(void)
+{
+ const size_t stack_size = 1 << 20;
+ CoroutineWin32 *co;
+
+ co = g_malloc0(sizeof(*co));
+ co->fiber = CreateFiber(stack_size, coroutine_trampoline, &co->base);
+ return &co->base;
+}
+
+void qemu_coroutine_delete(Coroutine *co_)
+{
+ CoroutineWin32 *co = DO_UPCAST(CoroutineWin32, base, co_);
+
+ DeleteFiber(co->fiber);
+ g_free(co);
+}
+
+Coroutine *qemu_coroutine_self(void)
+{
+ if (!current) {
+ current = &leader.base;
+ leader.fiber = ConvertThreadToFiber(NULL);
+ }
+ return current;
+}
+
+bool qemu_in_coroutine(void)
+{
+ return current && current->caller;
+}
diff --git a/src/util/crc32c.c b/src/util/crc32c.c
new file mode 100644
index 0000000..8866327
--- /dev/null
+++ b/src/util/crc32c.c
@@ -0,0 +1,115 @@
+/*
+ * Castagnoli CRC32C Checksum Algorithm
+ *
+ * Polynomial: 0x11EDC6F41
+ *
+ * Castagnoli93: Guy Castagnoli and Stefan Braeuer and Martin Herrman
+ * "Optimization of Cyclic Redundancy-Check Codes with 24
+ * and 32 Parity Bits",IEEE Transactions on Communication,
+ * Volume 41, Number 6, June 1993
+ *
+ * Copyright (c) 2013 Red Hat, Inc.,
+ *
+ * Authors:
+ * Jeff Cody <jcody@redhat.com>
+ *
+ * Based on the Linux kernel cryptographic crc32c module,
+ *
+ * Copyright (c) 2004 Cisco Systems, Inc.
+ * Copyright (c) 2008 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include "qemu-common.h"
+#include "qemu/crc32c.h"
+
+/*
+ * This is the CRC-32C table
+ * Generated with:
+ * width = 32 bits
+ * poly = 0x1EDC6F41
+ * reflect input bytes = true
+ * reflect output bytes = true
+ */
+
+static const uint32_t crc32c_table[256] = {
+ 0x00000000L, 0xF26B8303L, 0xE13B70F7L, 0x1350F3F4L,
+ 0xC79A971FL, 0x35F1141CL, 0x26A1E7E8L, 0xD4CA64EBL,
+ 0x8AD958CFL, 0x78B2DBCCL, 0x6BE22838L, 0x9989AB3BL,
+ 0x4D43CFD0L, 0xBF284CD3L, 0xAC78BF27L, 0x5E133C24L,
+ 0x105EC76FL, 0xE235446CL, 0xF165B798L, 0x030E349BL,
+ 0xD7C45070L, 0x25AFD373L, 0x36FF2087L, 0xC494A384L,
+ 0x9A879FA0L, 0x68EC1CA3L, 0x7BBCEF57L, 0x89D76C54L,
+ 0x5D1D08BFL, 0xAF768BBCL, 0xBC267848L, 0x4E4DFB4BL,
+ 0x20BD8EDEL, 0xD2D60DDDL, 0xC186FE29L, 0x33ED7D2AL,
+ 0xE72719C1L, 0x154C9AC2L, 0x061C6936L, 0xF477EA35L,
+ 0xAA64D611L, 0x580F5512L, 0x4B5FA6E6L, 0xB93425E5L,
+ 0x6DFE410EL, 0x9F95C20DL, 0x8CC531F9L, 0x7EAEB2FAL,
+ 0x30E349B1L, 0xC288CAB2L, 0xD1D83946L, 0x23B3BA45L,
+ 0xF779DEAEL, 0x05125DADL, 0x1642AE59L, 0xE4292D5AL,
+ 0xBA3A117EL, 0x4851927DL, 0x5B016189L, 0xA96AE28AL,
+ 0x7DA08661L, 0x8FCB0562L, 0x9C9BF696L, 0x6EF07595L,
+ 0x417B1DBCL, 0xB3109EBFL, 0xA0406D4BL, 0x522BEE48L,
+ 0x86E18AA3L, 0x748A09A0L, 0x67DAFA54L, 0x95B17957L,
+ 0xCBA24573L, 0x39C9C670L, 0x2A993584L, 0xD8F2B687L,
+ 0x0C38D26CL, 0xFE53516FL, 0xED03A29BL, 0x1F682198L,
+ 0x5125DAD3L, 0xA34E59D0L, 0xB01EAA24L, 0x42752927L,
+ 0x96BF4DCCL, 0x64D4CECFL, 0x77843D3BL, 0x85EFBE38L,
+ 0xDBFC821CL, 0x2997011FL, 0x3AC7F2EBL, 0xC8AC71E8L,
+ 0x1C661503L, 0xEE0D9600L, 0xFD5D65F4L, 0x0F36E6F7L,
+ 0x61C69362L, 0x93AD1061L, 0x80FDE395L, 0x72966096L,
+ 0xA65C047DL, 0x5437877EL, 0x4767748AL, 0xB50CF789L,
+ 0xEB1FCBADL, 0x197448AEL, 0x0A24BB5AL, 0xF84F3859L,
+ 0x2C855CB2L, 0xDEEEDFB1L, 0xCDBE2C45L, 0x3FD5AF46L,
+ 0x7198540DL, 0x83F3D70EL, 0x90A324FAL, 0x62C8A7F9L,
+ 0xB602C312L, 0x44694011L, 0x5739B3E5L, 0xA55230E6L,
+ 0xFB410CC2L, 0x092A8FC1L, 0x1A7A7C35L, 0xE811FF36L,
+ 0x3CDB9BDDL, 0xCEB018DEL, 0xDDE0EB2AL, 0x2F8B6829L,
+ 0x82F63B78L, 0x709DB87BL, 0x63CD4B8FL, 0x91A6C88CL,
+ 0x456CAC67L, 0xB7072F64L, 0xA457DC90L, 0x563C5F93L,
+ 0x082F63B7L, 0xFA44E0B4L, 0xE9141340L, 0x1B7F9043L,
+ 0xCFB5F4A8L, 0x3DDE77ABL, 0x2E8E845FL, 0xDCE5075CL,
+ 0x92A8FC17L, 0x60C37F14L, 0x73938CE0L, 0x81F80FE3L,
+ 0x55326B08L, 0xA759E80BL, 0xB4091BFFL, 0x466298FCL,
+ 0x1871A4D8L, 0xEA1A27DBL, 0xF94AD42FL, 0x0B21572CL,
+ 0xDFEB33C7L, 0x2D80B0C4L, 0x3ED04330L, 0xCCBBC033L,
+ 0xA24BB5A6L, 0x502036A5L, 0x4370C551L, 0xB11B4652L,
+ 0x65D122B9L, 0x97BAA1BAL, 0x84EA524EL, 0x7681D14DL,
+ 0x2892ED69L, 0xDAF96E6AL, 0xC9A99D9EL, 0x3BC21E9DL,
+ 0xEF087A76L, 0x1D63F975L, 0x0E330A81L, 0xFC588982L,
+ 0xB21572C9L, 0x407EF1CAL, 0x532E023EL, 0xA145813DL,
+ 0x758FE5D6L, 0x87E466D5L, 0x94B49521L, 0x66DF1622L,
+ 0x38CC2A06L, 0xCAA7A905L, 0xD9F75AF1L, 0x2B9CD9F2L,
+ 0xFF56BD19L, 0x0D3D3E1AL, 0x1E6DCDEEL, 0xEC064EEDL,
+ 0xC38D26C4L, 0x31E6A5C7L, 0x22B65633L, 0xD0DDD530L,
+ 0x0417B1DBL, 0xF67C32D8L, 0xE52CC12CL, 0x1747422FL,
+ 0x49547E0BL, 0xBB3FFD08L, 0xA86F0EFCL, 0x5A048DFFL,
+ 0x8ECEE914L, 0x7CA56A17L, 0x6FF599E3L, 0x9D9E1AE0L,
+ 0xD3D3E1ABL, 0x21B862A8L, 0x32E8915CL, 0xC083125FL,
+ 0x144976B4L, 0xE622F5B7L, 0xF5720643L, 0x07198540L,
+ 0x590AB964L, 0xAB613A67L, 0xB831C993L, 0x4A5A4A90L,
+ 0x9E902E7BL, 0x6CFBAD78L, 0x7FAB5E8CL, 0x8DC0DD8FL,
+ 0xE330A81AL, 0x115B2B19L, 0x020BD8EDL, 0xF0605BEEL,
+ 0x24AA3F05L, 0xD6C1BC06L, 0xC5914FF2L, 0x37FACCF1L,
+ 0x69E9F0D5L, 0x9B8273D6L, 0x88D28022L, 0x7AB90321L,
+ 0xAE7367CAL, 0x5C18E4C9L, 0x4F48173DL, 0xBD23943EL,
+ 0xF36E6F75L, 0x0105EC76L, 0x12551F82L, 0xE03E9C81L,
+ 0x34F4F86AL, 0xC69F7B69L, 0xD5CF889DL, 0x27A40B9EL,
+ 0x79B737BAL, 0x8BDCB4B9L, 0x988C474DL, 0x6AE7C44EL,
+ 0xBE2DA0A5L, 0x4C4623A6L, 0x5F16D052L, 0xAD7D5351L
+};
+
+
+uint32_t crc32c(uint32_t crc, const uint8_t *data, unsigned int length)
+{
+ while (length--) {
+ crc = crc32c_table[(crc ^ *data++) & 0xFFL] ^ (crc >> 8);
+ }
+ return crc^0xffffffff;
+}
+
diff --git a/src/util/cutils.c b/src/util/cutils.c
new file mode 100644
index 0000000..cfeb848
--- /dev/null
+++ b/src/util/cutils.c
@@ -0,0 +1,691 @@
+/*
+ * Simple C functions to supplement the C library
+ *
+ * Copyright (c) 2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu-common.h"
+#include "qemu/host-utils.h"
+#include <math.h>
+#include <limits.h>
+#include <errno.h>
+
+#include "qemu/sockets.h"
+#include "qemu/iov.h"
+#include "net/net.h"
+
+void strpadcpy(char *buf, int buf_size, const char *str, char pad)
+{
+ int len = qemu_strnlen(str, buf_size);
+ memcpy(buf, str, len);
+ memset(buf + len, pad, buf_size - len);
+}
+
+void pstrcpy(char *buf, int buf_size, const char *str)
+{
+ int c;
+ char *q = buf;
+
+ if (buf_size <= 0)
+ return;
+
+ for(;;) {
+ c = *str++;
+ if (c == 0 || q >= buf + buf_size - 1)
+ break;
+ *q++ = c;
+ }
+ *q = '\0';
+}
+
+/* strcat and truncate. */
+char *pstrcat(char *buf, int buf_size, const char *s)
+{
+ int len;
+ len = strlen(buf);
+ if (len < buf_size)
+ pstrcpy(buf + len, buf_size - len, s);
+ return buf;
+}
+
+int strstart(const char *str, const char *val, const char **ptr)
+{
+ const char *p, *q;
+ p = str;
+ q = val;
+ while (*q != '\0') {
+ if (*p != *q)
+ return 0;
+ p++;
+ q++;
+ }
+ if (ptr)
+ *ptr = p;
+ return 1;
+}
+
+int stristart(const char *str, const char *val, const char **ptr)
+{
+ const char *p, *q;
+ p = str;
+ q = val;
+ while (*q != '\0') {
+ if (qemu_toupper(*p) != qemu_toupper(*q))
+ return 0;
+ p++;
+ q++;
+ }
+ if (ptr)
+ *ptr = p;
+ return 1;
+}
+
+/* XXX: use host strnlen if available ? */
+int qemu_strnlen(const char *s, int max_len)
+{
+ int i;
+
+ for(i = 0; i < max_len; i++) {
+ if (s[i] == '\0') {
+ break;
+ }
+ }
+ return i;
+}
+
+char *qemu_strsep(char **input, const char *delim)
+{
+ char *result = *input;
+ if (result != NULL) {
+ char *p;
+
+ for (p = result; *p != '\0'; p++) {
+ if (strchr(delim, *p)) {
+ break;
+ }
+ }
+ if (*p == '\0') {
+ *input = NULL;
+ } else {
+ *p = '\0';
+ *input = p + 1;
+ }
+ }
+ return result;
+}
+
+time_t mktimegm(struct tm *tm)
+{
+ time_t t;
+ int y = tm->tm_year + 1900, m = tm->tm_mon + 1, d = tm->tm_mday;
+ if (m < 3) {
+ m += 12;
+ y--;
+ }
+ t = 86400ULL * (d + (153 * m - 457) / 5 + 365 * y + y / 4 - y / 100 +
+ y / 400 - 719469);
+ t += 3600 * tm->tm_hour + 60 * tm->tm_min + tm->tm_sec;
+ return t;
+}
+
+/*
+ * Make sure data goes on disk, but if possible do not bother to
+ * write out the inode just for timestamp updates.
+ *
+ * Unfortunately even in 2009 many operating systems do not support
+ * fdatasync and have to fall back to fsync.
+ */
+int qemu_fdatasync(int fd)
+{
+#ifdef CONFIG_FDATASYNC
+ return fdatasync(fd);
+#else
+ return fsync(fd);
+#endif
+}
+
+/*
+ * Searches for an area with non-zero content in a buffer
+ *
+ * Attention! The len must be a multiple of
+ * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR * sizeof(VECTYPE)
+ * and addr must be a multiple of sizeof(VECTYPE) due to
+ * restriction of optimizations in this function.
+ *
+ * can_use_buffer_find_nonzero_offset() can be used to check
+ * these requirements.
+ *
+ * The return value is the offset of the non-zero area rounded
+ * down to a multiple of sizeof(VECTYPE) for the first
+ * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR chunks and down to
+ * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR * sizeof(VECTYPE)
+ * afterwards.
+ *
+ * If the buffer is all zero the return value is equal to len.
+ */
+
+size_t buffer_find_nonzero_offset(const void *buf, size_t len)
+{
+ const VECTYPE *p = buf;
+ const VECTYPE zero = (VECTYPE){0};
+ size_t i;
+
+ assert(can_use_buffer_find_nonzero_offset(buf, len));
+
+ if (!len) {
+ return 0;
+ }
+
+ for (i = 0; i < BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR; i++) {
+ if (!ALL_EQ(p[i], zero)) {
+ return i * sizeof(VECTYPE);
+ }
+ }
+
+ for (i = BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR;
+ i < len / sizeof(VECTYPE);
+ i += BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR) {
+ VECTYPE tmp0 = VEC_OR(p[i + 0], p[i + 1]);
+ VECTYPE tmp1 = VEC_OR(p[i + 2], p[i + 3]);
+ VECTYPE tmp2 = VEC_OR(p[i + 4], p[i + 5]);
+ VECTYPE tmp3 = VEC_OR(p[i + 6], p[i + 7]);
+ VECTYPE tmp01 = VEC_OR(tmp0, tmp1);
+ VECTYPE tmp23 = VEC_OR(tmp2, tmp3);
+ if (!ALL_EQ(VEC_OR(tmp01, tmp23), zero)) {
+ break;
+ }
+ }
+
+ return i * sizeof(VECTYPE);
+}
+
+/*
+ * Checks if a buffer is all zeroes
+ *
+ * Attention! The len must be a multiple of 4 * sizeof(long) due to
+ * restriction of optimizations in this function.
+ */
+bool buffer_is_zero(const void *buf, size_t len)
+{
+ /*
+ * Use long as the biggest available internal data type that fits into the
+ * CPU register and unroll the loop to smooth out the effect of memory
+ * latency.
+ */
+
+ size_t i;
+ long d0, d1, d2, d3;
+ const long * const data = buf;
+
+ /* use vector optimized zero check if possible */
+ if (can_use_buffer_find_nonzero_offset(buf, len)) {
+ return buffer_find_nonzero_offset(buf, len) == len;
+ }
+
+ assert(len % (4 * sizeof(long)) == 0);
+ len /= sizeof(long);
+
+ for (i = 0; i < len; i += 4) {
+ d0 = data[i + 0];
+ d1 = data[i + 1];
+ d2 = data[i + 2];
+ d3 = data[i + 3];
+
+ if (d0 || d1 || d2 || d3) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+#ifndef _WIN32
+/* Sets a specific flag */
+int fcntl_setfl(int fd, int flag)
+{
+ int flags;
+
+ flags = fcntl(fd, F_GETFL);
+ if (flags == -1)
+ return -errno;
+
+ if (fcntl(fd, F_SETFL, flags | flag) == -1)
+ return -errno;
+
+ return 0;
+}
+#endif
+
+static int64_t suffix_mul(char suffix, int64_t unit)
+{
+ switch (qemu_toupper(suffix)) {
+ case QEMU_STRTOSZ_DEFSUFFIX_B:
+ return 1;
+ case QEMU_STRTOSZ_DEFSUFFIX_KB:
+ return unit;
+ case QEMU_STRTOSZ_DEFSUFFIX_MB:
+ return unit * unit;
+ case QEMU_STRTOSZ_DEFSUFFIX_GB:
+ return unit * unit * unit;
+ case QEMU_STRTOSZ_DEFSUFFIX_TB:
+ return unit * unit * unit * unit;
+ case QEMU_STRTOSZ_DEFSUFFIX_PB:
+ return unit * unit * unit * unit * unit;
+ case QEMU_STRTOSZ_DEFSUFFIX_EB:
+ return unit * unit * unit * unit * unit * unit;
+ }
+ return -1;
+}
+
+/*
+ * Convert string to bytes, allowing either B/b for bytes, K/k for KB,
+ * M/m for MB, G/g for GB or T/t for TB. End pointer will be returned
+ * in *end, if not NULL. Return -ERANGE on overflow, Return -EINVAL on
+ * other error.
+ */
+int64_t qemu_strtosz_suffix_unit(const char *nptr, char **end,
+ const char default_suffix, int64_t unit)
+{
+ int64_t retval = -EINVAL;
+ char *endptr;
+ unsigned char c;
+ int mul_required = 0;
+ double val, mul, integral, fraction;
+
+ errno = 0;
+ val = strtod(nptr, &endptr);
+ if (isnan(val) || endptr == nptr || errno != 0) {
+ goto fail;
+ }
+ fraction = modf(val, &integral);
+ if (fraction != 0) {
+ mul_required = 1;
+ }
+ c = *endptr;
+ mul = suffix_mul(c, unit);
+ if (mul >= 0) {
+ endptr++;
+ } else {
+ mul = suffix_mul(default_suffix, unit);
+ assert(mul >= 0);
+ }
+ if (mul == 1 && mul_required) {
+ goto fail;
+ }
+ if ((val * mul >= INT64_MAX) || val < 0) {
+ retval = -ERANGE;
+ goto fail;
+ }
+ retval = val * mul;
+
+fail:
+ if (end) {
+ *end = endptr;
+ }
+
+ return retval;
+}
+
+int64_t qemu_strtosz_suffix(const char *nptr, char **end,
+ const char default_suffix)
+{
+ return qemu_strtosz_suffix_unit(nptr, end, default_suffix, 1024);
+}
+
+int64_t qemu_strtosz(const char *nptr, char **end)
+{
+ return qemu_strtosz_suffix(nptr, end, QEMU_STRTOSZ_DEFSUFFIX_MB);
+}
+
+/**
+ * Helper function for qemu_strto*l() functions.
+ */
+static int check_strtox_error(const char *p, char *endptr, const char **next,
+ int err)
+{
+ /* If no conversion was performed, prefer BSD behavior over glibc
+ * behavior.
+ */
+ if (err == 0 && endptr == p) {
+ err = EINVAL;
+ }
+ if (!next && *endptr) {
+ return -EINVAL;
+ }
+ if (next) {
+ *next = endptr;
+ }
+ return -err;
+}
+
+/**
+ * QEMU wrappers for strtol(), strtoll(), strtoul(), strotull() C functions.
+ *
+ * Convert ASCII string @nptr to a long integer value
+ * from the given @base. Parameters @nptr, @endptr, @base
+ * follows same semantics as strtol() C function.
+ *
+ * Unlike from strtol() function, if @endptr is not NULL, this
+ * function will return -EINVAL whenever it cannot fully convert
+ * the string in @nptr with given @base to a long. This function returns
+ * the result of the conversion only through the @result parameter.
+ *
+ * If NULL is passed in @endptr, then the whole string in @ntpr
+ * is a number otherwise it returns -EINVAL.
+ *
+ * RETURN VALUE
+ * Unlike from strtol() function, this wrapper returns either
+ * -EINVAL or the errno set by strtol() function (e.g -ERANGE).
+ * If the conversion overflows, -ERANGE is returned, and @result
+ * is set to the max value of the desired type
+ * (e.g. LONG_MAX, LLONG_MAX, ULONG_MAX, ULLONG_MAX). If the case
+ * of underflow, -ERANGE is returned, and @result is set to the min
+ * value of the desired type. For strtol(), strtoll(), @result is set to
+ * LONG_MIN, LLONG_MIN, respectively, and for strtoul(), strtoull() it
+ * is set to 0.
+ */
+int qemu_strtol(const char *nptr, const char **endptr, int base,
+ long *result)
+{
+ char *p;
+ int err = 0;
+ if (!nptr) {
+ if (endptr) {
+ *endptr = nptr;
+ }
+ err = -EINVAL;
+ } else {
+ errno = 0;
+ *result = strtol(nptr, &p, base);
+ err = check_strtox_error(nptr, p, endptr, errno);
+ }
+ return err;
+}
+
+/**
+ * Converts ASCII string to an unsigned long integer.
+ *
+ * If string contains a negative number, value will be converted to
+ * the unsigned representation of the signed value, unless the original
+ * (nonnegated) value would overflow, in this case, it will set @result
+ * to ULONG_MAX, and return ERANGE.
+ *
+ * The same behavior holds, for qemu_strtoull() but sets @result to
+ * ULLONG_MAX instead of ULONG_MAX.
+ *
+ * See qemu_strtol() documentation for more info.
+ */
+int qemu_strtoul(const char *nptr, const char **endptr, int base,
+ unsigned long *result)
+{
+ char *p;
+ int err = 0;
+ if (!nptr) {
+ if (endptr) {
+ *endptr = nptr;
+ }
+ err = -EINVAL;
+ } else {
+ errno = 0;
+ *result = strtoul(nptr, &p, base);
+ /* Windows returns 1 for negative out-of-range values. */
+ if (errno == ERANGE) {
+ *result = -1;
+ }
+ err = check_strtox_error(nptr, p, endptr, errno);
+ }
+ return err;
+}
+
+/**
+ * Converts ASCII string to a long long integer.
+ *
+ * See qemu_strtol() documentation for more info.
+ */
+int qemu_strtoll(const char *nptr, const char **endptr, int base,
+ int64_t *result)
+{
+ char *p;
+ int err = 0;
+ if (!nptr) {
+ if (endptr) {
+ *endptr = nptr;
+ }
+ err = -EINVAL;
+ } else {
+ errno = 0;
+ *result = strtoll(nptr, &p, base);
+ err = check_strtox_error(nptr, p, endptr, errno);
+ }
+ return err;
+}
+
+/**
+ * Converts ASCII string to an unsigned long long integer.
+ *
+ * See qemu_strtol() documentation for more info.
+ */
+int qemu_strtoull(const char *nptr, const char **endptr, int base,
+ uint64_t *result)
+{
+ char *p;
+ int err = 0;
+ if (!nptr) {
+ if (endptr) {
+ *endptr = nptr;
+ }
+ err = -EINVAL;
+ } else {
+ errno = 0;
+ *result = strtoull(nptr, &p, base);
+ /* Windows returns 1 for negative out-of-range values. */
+ if (errno == ERANGE) {
+ *result = -1;
+ }
+ err = check_strtox_error(nptr, p, endptr, errno);
+ }
+ return err;
+}
+
+/**
+ * parse_uint:
+ *
+ * @s: String to parse
+ * @value: Destination for parsed integer value
+ * @endptr: Destination for pointer to first character not consumed
+ * @base: integer base, between 2 and 36 inclusive, or 0
+ *
+ * Parse unsigned integer
+ *
+ * Parsed syntax is like strtoull()'s: arbitrary whitespace, a single optional
+ * '+' or '-', an optional "0x" if @base is 0 or 16, one or more digits.
+ *
+ * If @s is null, or @base is invalid, or @s doesn't start with an
+ * integer in the syntax above, set *@value to 0, *@endptr to @s, and
+ * return -EINVAL.
+ *
+ * Set *@endptr to point right beyond the parsed integer (even if the integer
+ * overflows or is negative, all digits will be parsed and *@endptr will
+ * point right beyond them).
+ *
+ * If the integer is negative, set *@value to 0, and return -ERANGE.
+ *
+ * If the integer overflows unsigned long long, set *@value to
+ * ULLONG_MAX, and return -ERANGE.
+ *
+ * Else, set *@value to the parsed integer, and return 0.
+ */
+int parse_uint(const char *s, unsigned long long *value, char **endptr,
+ int base)
+{
+ int r = 0;
+ char *endp = (char *)s;
+ unsigned long long val = 0;
+
+ if (!s) {
+ r = -EINVAL;
+ goto out;
+ }
+
+ errno = 0;
+ val = strtoull(s, &endp, base);
+ if (errno) {
+ r = -errno;
+ goto out;
+ }
+
+ if (endp == s) {
+ r = -EINVAL;
+ goto out;
+ }
+
+ /* make sure we reject negative numbers: */
+ while (isspace((unsigned char)*s)) {
+ s++;
+ }
+ if (*s == '-') {
+ val = 0;
+ r = -ERANGE;
+ goto out;
+ }
+
+out:
+ *value = val;
+ *endptr = endp;
+ return r;
+}
+
+/**
+ * parse_uint_full:
+ *
+ * @s: String to parse
+ * @value: Destination for parsed integer value
+ * @base: integer base, between 2 and 36 inclusive, or 0
+ *
+ * Parse unsigned integer from entire string
+ *
+ * Have the same behavior of parse_uint(), but with an additional check
+ * for additional data after the parsed number. If extra characters are present
+ * after the parsed number, the function will return -EINVAL, and *@v will
+ * be set to 0.
+ */
+int parse_uint_full(const char *s, unsigned long long *value, int base)
+{
+ char *endp;
+ int r;
+
+ r = parse_uint(s, value, &endp, base);
+ if (r < 0) {
+ return r;
+ }
+ if (*endp) {
+ *value = 0;
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+int qemu_parse_fd(const char *param)
+{
+ long fd;
+ char *endptr;
+
+ errno = 0;
+ fd = strtol(param, &endptr, 10);
+ if (param == endptr /* no conversion performed */ ||
+ errno != 0 /* not representable as long; possibly others */ ||
+ *endptr != '\0' /* final string not empty */ ||
+ fd < 0 /* invalid as file descriptor */ ||
+ fd > INT_MAX /* not representable as int */) {
+ return -1;
+ }
+ return fd;
+}
+
+/*
+ * Implementation of ULEB128 (http://en.wikipedia.org/wiki/LEB128)
+ * Input is limited to 14-bit numbers
+ */
+int uleb128_encode_small(uint8_t *out, uint32_t n)
+{
+ g_assert(n <= 0x3fff);
+ if (n < 0x80) {
+ *out++ = n;
+ return 1;
+ } else {
+ *out++ = (n & 0x7f) | 0x80;
+ *out++ = n >> 7;
+ return 2;
+ }
+}
+
+int uleb128_decode_small(const uint8_t *in, uint32_t *n)
+{
+ if (!(*in & 0x80)) {
+ *n = *in++;
+ return 1;
+ } else {
+ *n = *in++ & 0x7f;
+ /* we exceed 14 bit number */
+ if (*in & 0x80) {
+ return -1;
+ }
+ *n |= *in++ << 7;
+ return 2;
+ }
+}
+
+/*
+ * helper to parse debug environment variables
+ */
+int parse_debug_env(const char *name, int max, int initial)
+{
+ char *debug_env = getenv(name);
+ char *inv = NULL;
+ long debug;
+
+ if (!debug_env) {
+ return initial;
+ }
+ errno = 0;
+ debug = strtol(debug_env, &inv, 10);
+ if (inv == debug_env) {
+ return initial;
+ }
+ if (debug < 0 || debug > max || errno != 0) {
+ fprintf(stderr, "warning: %s not in [0, %d]", name, max);
+ return initial;
+ }
+ return debug;
+}
+
+/*
+ * Helper to print ethernet mac address
+ */
+const char *qemu_ether_ntoa(const MACAddr *mac)
+{
+ static char ret[18];
+
+ snprintf(ret, sizeof(ret), "%02x:%02x:%02x:%02x:%02x:%02x",
+ mac->a[0], mac->a[1], mac->a[2], mac->a[3], mac->a[4], mac->a[5]);
+
+ return ret;
+}
diff --git a/src/util/envlist.c b/src/util/envlist.c
new file mode 100644
index 0000000..099a544
--- /dev/null
+++ b/src/util/envlist.c
@@ -0,0 +1,241 @@
+#include "qemu-common.h"
+#include "qemu/queue.h"
+#include "qemu/envlist.h"
+
+struct envlist_entry {
+ const char *ev_var; /* actual env value */
+ QLIST_ENTRY(envlist_entry) ev_link;
+};
+
+struct envlist {
+ QLIST_HEAD(, envlist_entry) el_entries; /* actual entries */
+ size_t el_count; /* number of entries */
+};
+
+static int envlist_parse(envlist_t *envlist,
+ const char *env, int (*)(envlist_t *, const char *));
+
+/*
+ * Allocates new envlist and returns pointer to that or
+ * NULL in case of error.
+ */
+envlist_t *
+envlist_create(void)
+{
+ envlist_t *envlist;
+
+ if ((envlist = malloc(sizeof (*envlist))) == NULL)
+ return (NULL);
+
+ QLIST_INIT(&envlist->el_entries);
+ envlist->el_count = 0;
+
+ return (envlist);
+}
+
+/*
+ * Releases given envlist and its entries.
+ */
+void
+envlist_free(envlist_t *envlist)
+{
+ struct envlist_entry *entry;
+
+ assert(envlist != NULL);
+
+ while (envlist->el_entries.lh_first != NULL) {
+ entry = envlist->el_entries.lh_first;
+ QLIST_REMOVE(entry, ev_link);
+
+ free((char *)entry->ev_var);
+ free(entry);
+ }
+ free(envlist);
+}
+
+/*
+ * Parses comma separated list of set/modify environment
+ * variable entries and updates given enlist accordingly.
+ *
+ * For example:
+ * envlist_parse(el, "HOME=foo,SHELL=/bin/sh");
+ *
+ * inserts/sets environment variables HOME and SHELL.
+ *
+ * Returns 0 on success, errno otherwise.
+ */
+int
+envlist_parse_set(envlist_t *envlist, const char *env)
+{
+ return (envlist_parse(envlist, env, &envlist_setenv));
+}
+
+/*
+ * Parses comma separated list of unset environment variable
+ * entries and removes given variables from given envlist.
+ *
+ * Returns 0 on success, errno otherwise.
+ */
+int
+envlist_parse_unset(envlist_t *envlist, const char *env)
+{
+ return (envlist_parse(envlist, env, &envlist_unsetenv));
+}
+
+/*
+ * Parses comma separated list of set, modify or unset entries
+ * and calls given callback for each entry.
+ *
+ * Returns 0 in case of success, errno otherwise.
+ */
+static int
+envlist_parse(envlist_t *envlist, const char *env,
+ int (*callback)(envlist_t *, const char *))
+{
+ char *tmpenv, *envvar;
+ char *envsave = NULL;
+ int ret = 0;
+ assert(callback != NULL);
+
+ if ((envlist == NULL) || (env == NULL))
+ return (EINVAL);
+
+ if ((tmpenv = strdup(env)) == NULL)
+ return (errno);
+ envsave = tmpenv;
+
+ do {
+ envvar = strchr(tmpenv, ',');
+ if (envvar != NULL) {
+ *envvar = '\0';
+ }
+ if ((*callback)(envlist, tmpenv) != 0) {
+ ret = errno;
+ break;
+ }
+ tmpenv = envvar + 1;
+ } while (envvar != NULL);
+
+ free(envsave);
+ return ret;
+}
+
+/*
+ * Sets environment value to envlist in similar manner
+ * than putenv(3).
+ *
+ * Returns 0 in success, errno otherwise.
+ */
+int
+envlist_setenv(envlist_t *envlist, const char *env)
+{
+ struct envlist_entry *entry = NULL;
+ const char *eq_sign;
+ size_t envname_len;
+
+ if ((envlist == NULL) || (env == NULL))
+ return (EINVAL);
+
+ /* find out first equals sign in given env */
+ if ((eq_sign = strchr(env, '=')) == NULL)
+ return (EINVAL);
+ envname_len = eq_sign - env + 1;
+
+ /*
+ * If there already exists variable with given name
+ * we remove and release it before allocating a whole
+ * new entry.
+ */
+ for (entry = envlist->el_entries.lh_first; entry != NULL;
+ entry = entry->ev_link.le_next) {
+ if (strncmp(entry->ev_var, env, envname_len) == 0)
+ break;
+ }
+
+ if (entry != NULL) {
+ QLIST_REMOVE(entry, ev_link);
+ free((char *)entry->ev_var);
+ free(entry);
+ } else {
+ envlist->el_count++;
+ }
+
+ if ((entry = malloc(sizeof (*entry))) == NULL)
+ return (errno);
+ if ((entry->ev_var = strdup(env)) == NULL) {
+ free(entry);
+ return (errno);
+ }
+ QLIST_INSERT_HEAD(&envlist->el_entries, entry, ev_link);
+
+ return (0);
+}
+
+/*
+ * Removes given env value from envlist in similar manner
+ * than unsetenv(3). Returns 0 in success, errno otherwise.
+ */
+int
+envlist_unsetenv(envlist_t *envlist, const char *env)
+{
+ struct envlist_entry *entry;
+ size_t envname_len;
+
+ if ((envlist == NULL) || (env == NULL))
+ return (EINVAL);
+
+ /* env is not allowed to contain '=' */
+ if (strchr(env, '=') != NULL)
+ return (EINVAL);
+
+ /*
+ * Find out the requested entry and remove
+ * it from the list.
+ */
+ envname_len = strlen(env);
+ for (entry = envlist->el_entries.lh_first; entry != NULL;
+ entry = entry->ev_link.le_next) {
+ if (strncmp(entry->ev_var, env, envname_len) == 0)
+ break;
+ }
+ if (entry != NULL) {
+ QLIST_REMOVE(entry, ev_link);
+ free((char *)entry->ev_var);
+ free(entry);
+
+ envlist->el_count--;
+ }
+ return (0);
+}
+
+/*
+ * Returns given envlist as array of strings (in same form that
+ * global variable environ is). Caller must free returned memory
+ * by calling free(3) for each element and for the array. Returned
+ * array and given envlist are not related (no common references).
+ *
+ * If caller provides count pointer, number of items in array is
+ * stored there. In case of error, NULL is returned and no memory
+ * is allocated.
+ */
+char **
+envlist_to_environ(const envlist_t *envlist, size_t *count)
+{
+ struct envlist_entry *entry;
+ char **env, **penv;
+
+ penv = env = malloc((envlist->el_count + 1) * sizeof (char *));
+ if (env == NULL)
+ return (NULL);
+
+ for (entry = envlist->el_entries.lh_first; entry != NULL;
+ entry = entry->ev_link.le_next) {
+ *(penv++) = strdup(entry->ev_var);
+ }
+ *penv = NULL; /* NULL terminate the list */
+
+ if (count != NULL)
+ *count = envlist->el_count;
+
+ return (env);
+}
diff --git a/src/util/error.c b/src/util/error.c
new file mode 100644
index 0000000..80c89a2
--- /dev/null
+++ b/src/util/error.c
@@ -0,0 +1,241 @@
+/*
+ * QEMU Error Objects
+ *
+ * Copyright IBM, Corp. 2011
+ * Copyright (C) 2011-2015 Red Hat, Inc.
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ * Markus Armbruster <armbru@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2. See
+ * the COPYING.LIB file in the top-level directory.
+ */
+
+#include "qemu-common.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+
+struct Error
+{
+ char *msg;
+ ErrorClass err_class;
+ const char *src, *func;
+ int line;
+ GString *hint;
+};
+
+Error *error_abort;
+Error *error_fatal;
+
+static void error_handle_fatal(Error **errp, Error *err)
+{
+ if (errp == &error_abort) {
+ fprintf(stderr, "Unexpected error in %s() at %s:%d:\n",
+ err->func, err->src, err->line);
+ error_report_err(err);
+ abort();
+ }
+ if (errp == &error_fatal) {
+ error_report_err(err);
+ exit(1);
+ }
+}
+
+static void error_setv(Error **errp,
+ const char *src, int line, const char *func,
+ ErrorClass err_class, const char *fmt, va_list ap)
+{
+ Error *err;
+ int saved_errno = errno;
+
+ if (errp == NULL) {
+ return;
+ }
+ assert(*errp == NULL);
+
+ err = g_malloc0(sizeof(*err));
+ err->msg = g_strdup_vprintf(fmt, ap);
+ err->err_class = err_class;
+ err->src = src;
+ err->line = line;
+ err->func = func;
+
+ error_handle_fatal(errp, err);
+ *errp = err;
+
+ errno = saved_errno;
+}
+
+void error_set_internal(Error **errp,
+ const char *src, int line, const char *func,
+ ErrorClass err_class, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ error_setv(errp, src, line, func, err_class, fmt, ap);
+ va_end(ap);
+}
+
+void error_setg_internal(Error **errp,
+ const char *src, int line, const char *func,
+ const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ error_setv(errp, src, line, func, ERROR_CLASS_GENERIC_ERROR, fmt, ap);
+ va_end(ap);
+}
+
+void error_setg_errno_internal(Error **errp,
+ const char *src, int line, const char *func,
+ int os_errno, const char *fmt, ...)
+{
+ va_list ap;
+ char *msg;
+ int saved_errno = errno;
+
+ if (errp == NULL) {
+ return;
+ }
+
+ va_start(ap, fmt);
+ error_setv(errp, src, line, func, ERROR_CLASS_GENERIC_ERROR, fmt, ap);
+ va_end(ap);
+
+ if (os_errno != 0) {
+ msg = (*errp)->msg;
+ (*errp)->msg = g_strdup_printf("%s: %s", msg, strerror(os_errno));
+ g_free(msg);
+ }
+
+ errno = saved_errno;
+}
+
+void error_setg_file_open_internal(Error **errp,
+ const char *src, int line, const char *func,
+ int os_errno, const char *filename)
+{
+ error_setg_errno_internal(errp, src, line, func, os_errno,
+ "Could not open '%s'", filename);
+}
+
+void error_append_hint(Error **errp, const char *fmt, ...)
+{
+ va_list ap;
+ int saved_errno = errno;
+ Error *err;
+
+ if (!errp) {
+ return;
+ }
+ err = *errp;
+ assert(err && errp != &error_abort);
+
+ if (!err->hint) {
+ err->hint = g_string_new(NULL);
+ }
+ va_start(ap, fmt);
+ g_string_append_vprintf(err->hint, fmt, ap);
+ va_end(ap);
+
+ errno = saved_errno;
+}
+
+#ifdef _WIN32
+
+void error_setg_win32_internal(Error **errp,
+ const char *src, int line, const char *func,
+ int win32_err, const char *fmt, ...)
+{
+ va_list ap;
+ char *msg1, *msg2;
+
+ if (errp == NULL) {
+ return;
+ }
+
+ va_start(ap, fmt);
+ error_setv(errp, src, line, func, ERROR_CLASS_GENERIC_ERROR, fmt, ap);
+ va_end(ap);
+
+ if (win32_err != 0) {
+ msg1 = (*errp)->msg;
+ msg2 = g_win32_error_message(win32_err);
+ (*errp)->msg = g_strdup_printf("%s: %s (error: %x)", msg1, msg2,
+ (unsigned)win32_err);
+ g_free(msg2);
+ g_free(msg1);
+ }
+}
+
+#endif
+
+Error *error_copy(const Error *err)
+{
+ Error *err_new;
+
+ err_new = g_malloc0(sizeof(*err));
+ err_new->msg = g_strdup(err->msg);
+ err_new->err_class = err->err_class;
+ err_new->src = err->src;
+ err_new->line = err->line;
+ err_new->func = err->func;
+ if (err->hint) {
+ err_new->hint = g_string_new(err->hint->str);
+ }
+
+ return err_new;
+}
+
+ErrorClass error_get_class(const Error *err)
+{
+ return err->err_class;
+}
+
+const char *error_get_pretty(Error *err)
+{
+ return err->msg;
+}
+
+void error_report_err(Error *err)
+{
+ error_report("%s", error_get_pretty(err));
+ if (err->hint) {
+ error_printf_unless_qmp("%s\n", err->hint->str);
+ }
+ error_free(err);
+}
+
+void error_free(Error *err)
+{
+ if (err) {
+ g_free(err->msg);
+ if (err->hint) {
+ g_string_free(err->hint, true);
+ }
+ g_free(err);
+ }
+}
+
+void error_free_or_abort(Error **errp)
+{
+ assert(errp && *errp);
+ error_free(*errp);
+ *errp = NULL;
+}
+
+void error_propagate(Error **dst_errp, Error *local_err)
+{
+ if (!local_err) {
+ return;
+ }
+ error_handle_fatal(dst_errp, local_err);
+ if (dst_errp && !*dst_errp) {
+ *dst_errp = local_err;
+ } else {
+ error_free(local_err);
+ }
+}
diff --git a/src/util/event_notifier-posix.c b/src/util/event_notifier-posix.c
new file mode 100644
index 0000000..d4a0c63
--- /dev/null
+++ b/src/util/event_notifier-posix.c
@@ -0,0 +1,122 @@
+/*
+ * event notifier support
+ *
+ * Copyright Red Hat, Inc. 2010
+ *
+ * Authors:
+ * Michael S. Tsirkin <mst@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu-common.h"
+#include "qemu/event_notifier.h"
+#include "sysemu/char.h"
+#include "qemu/main-loop.h"
+
+#ifdef CONFIG_EVENTFD
+#include <sys/eventfd.h>
+#endif
+
+void event_notifier_init_fd(EventNotifier *e, int fd)
+{
+ e->rfd = fd;
+ e->wfd = fd;
+}
+
+int event_notifier_init(EventNotifier *e, int active)
+{
+ int fds[2];
+ int ret;
+
+#ifdef CONFIG_EVENTFD
+ ret = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+#else
+ ret = -1;
+ errno = ENOSYS;
+#endif
+ if (ret >= 0) {
+ e->rfd = e->wfd = ret;
+ } else {
+ if (errno != ENOSYS) {
+ return -errno;
+ }
+ if (qemu_pipe(fds) < 0) {
+ return -errno;
+ }
+ ret = fcntl_setfl(fds[0], O_NONBLOCK);
+ if (ret < 0) {
+ ret = -errno;
+ goto fail;
+ }
+ ret = fcntl_setfl(fds[1], O_NONBLOCK);
+ if (ret < 0) {
+ ret = -errno;
+ goto fail;
+ }
+ e->rfd = fds[0];
+ e->wfd = fds[1];
+ }
+ if (active) {
+ event_notifier_set(e);
+ }
+ return 0;
+
+fail:
+ close(fds[0]);
+ close(fds[1]);
+ return ret;
+}
+
+void event_notifier_cleanup(EventNotifier *e)
+{
+ if (e->rfd != e->wfd) {
+ close(e->rfd);
+ }
+ close(e->wfd);
+}
+
+int event_notifier_get_fd(const EventNotifier *e)
+{
+ return e->rfd;
+}
+
+int event_notifier_set_handler(EventNotifier *e,
+ EventNotifierHandler *handler)
+{
+ qemu_set_fd_handler(e->rfd, (IOHandler *)handler, NULL, e);
+ return 0;
+}
+
+int event_notifier_set(EventNotifier *e)
+{
+ static const uint64_t value = 1;
+ ssize_t ret;
+
+ do {
+ ret = write(e->wfd, &value, sizeof(value));
+ } while (ret < 0 && errno == EINTR);
+
+ /* EAGAIN is fine, a read must be pending. */
+ if (ret < 0 && errno != EAGAIN) {
+ return -errno;
+ }
+ return 0;
+}
+
+int event_notifier_test_and_clear(EventNotifier *e)
+{
+ int value;
+ ssize_t len;
+ char buffer[512];
+
+ /* Drain the notify pipe. For eventfd, only 8 bytes will be read. */
+ value = 0;
+ do {
+ len = read(e->rfd, buffer, sizeof(buffer));
+ value |= (len > 0);
+ } while ((len == -1 && errno == EINTR) || len == sizeof(buffer));
+
+ return value;
+}
diff --git a/src/util/event_notifier-win32.c b/src/util/event_notifier-win32.c
new file mode 100644
index 0000000..6dbb530
--- /dev/null
+++ b/src/util/event_notifier-win32.c
@@ -0,0 +1,59 @@
+/*
+ * event notifier support
+ *
+ * Copyright Red Hat, Inc. 2010
+ *
+ * Authors:
+ * Michael S. Tsirkin <mst@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu-common.h"
+#include "qemu/event_notifier.h"
+#include "qemu/main-loop.h"
+
+int event_notifier_init(EventNotifier *e, int active)
+{
+ e->event = CreateEvent(NULL, TRUE, FALSE, NULL);
+ assert(e->event);
+ return 0;
+}
+
+void event_notifier_cleanup(EventNotifier *e)
+{
+ CloseHandle(e->event);
+}
+
+HANDLE event_notifier_get_handle(EventNotifier *e)
+{
+ return e->event;
+}
+
+int event_notifier_set_handler(EventNotifier *e,
+ EventNotifierHandler *handler)
+{
+ if (handler) {
+ return qemu_add_wait_object(e->event, (IOHandler *)handler, e);
+ } else {
+ qemu_del_wait_object(e->event, (IOHandler *)handler, e);
+ return 0;
+ }
+}
+
+int event_notifier_set(EventNotifier *e)
+{
+ SetEvent(e->event);
+ return 0;
+}
+
+int event_notifier_test_and_clear(EventNotifier *e)
+{
+ int ret = WaitForSingleObject(e->event, 0);
+ if (ret == WAIT_OBJECT_0) {
+ ResetEvent(e->event);
+ return true;
+ }
+ return false;
+}
diff --git a/src/util/fifo8.c b/src/util/fifo8.c
new file mode 100644
index 0000000..0ea5ad9
--- /dev/null
+++ b/src/util/fifo8.c
@@ -0,0 +1,125 @@
+/*
+ * Generic FIFO component, implemented as a circular buffer.
+ *
+ * Copyright (c) 2012 Peter A. G. Crosthwaite
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu-common.h"
+#include "qemu/fifo8.h"
+
+void fifo8_create(Fifo8 *fifo, uint32_t capacity)
+{
+ fifo->data = g_new(uint8_t, capacity);
+ fifo->capacity = capacity;
+ fifo->head = 0;
+ fifo->num = 0;
+}
+
+void fifo8_destroy(Fifo8 *fifo)
+{
+ g_free(fifo->data);
+}
+
+void fifo8_push(Fifo8 *fifo, uint8_t data)
+{
+ if (fifo->num == fifo->capacity) {
+ abort();
+ }
+ fifo->data[(fifo->head + fifo->num) % fifo->capacity] = data;
+ fifo->num++;
+}
+
+void fifo8_push_all(Fifo8 *fifo, const uint8_t *data, uint32_t num)
+{
+ uint32_t start, avail;
+
+ if (fifo->num + num > fifo->capacity) {
+ abort();
+ }
+
+ start = (fifo->head + fifo->num) % fifo->capacity;
+
+ if (start + num <= fifo->capacity) {
+ memcpy(&fifo->data[start], data, num);
+ } else {
+ avail = fifo->capacity - start;
+ memcpy(&fifo->data[start], data, avail);
+ memcpy(&fifo->data[0], &data[avail], num - avail);
+ }
+
+ fifo->num += num;
+}
+
+uint8_t fifo8_pop(Fifo8 *fifo)
+{
+ uint8_t ret;
+
+ if (fifo->num == 0) {
+ abort();
+ }
+ ret = fifo->data[fifo->head++];
+ fifo->head %= fifo->capacity;
+ fifo->num--;
+ return ret;
+}
+
+const uint8_t *fifo8_pop_buf(Fifo8 *fifo, uint32_t max, uint32_t *num)
+{
+ uint8_t *ret;
+
+ if (max == 0 || max > fifo->num) {
+ abort();
+ }
+ *num = MIN(fifo->capacity - fifo->head, max);
+ ret = &fifo->data[fifo->head];
+ fifo->head += *num;
+ fifo->head %= fifo->capacity;
+ fifo->num -= *num;
+ return ret;
+}
+
+void fifo8_reset(Fifo8 *fifo)
+{
+ fifo->num = 0;
+ fifo->head = 0;
+}
+
+bool fifo8_is_empty(Fifo8 *fifo)
+{
+ return (fifo->num == 0);
+}
+
+bool fifo8_is_full(Fifo8 *fifo)
+{
+ return (fifo->num == fifo->capacity);
+}
+
+uint32_t fifo8_num_free(Fifo8 *fifo)
+{
+ return fifo->capacity - fifo->num;
+}
+
+uint32_t fifo8_num_used(Fifo8 *fifo)
+{
+ return fifo->num;
+}
+
+const VMStateDescription vmstate_fifo8 = {
+ .name = "Fifo8",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_VBUFFER_UINT32(data, Fifo8, 1, NULL, 0, capacity),
+ VMSTATE_UINT32(head, Fifo8),
+ VMSTATE_UINT32(num, Fifo8),
+ VMSTATE_END_OF_LIST()
+ }
+};
diff --git a/src/util/getauxval.c b/src/util/getauxval.c
new file mode 100644
index 0000000..1732ace
--- /dev/null
+++ b/src/util/getauxval.c
@@ -0,0 +1,109 @@
+/*
+ * QEMU access to the auxiliary vector
+ *
+ * Copyright (C) 2013 Red Hat, Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+#include "qemu/osdep.h"
+
+#ifdef CONFIG_GETAUXVAL
+/* Don't inline this in qemu/osdep.h, because pulling in <sys/auxv.h> for
+ the system declaration of getauxval pulls in the system <elf.h>, which
+ conflicts with qemu's version. */
+
+#include <sys/auxv.h>
+
+unsigned long qemu_getauxval(unsigned long key)
+{
+ return getauxval(key);
+}
+#elif defined(__linux__)
+#include "elf.h"
+
+/* Our elf.h doesn't contain Elf32_auxv_t and Elf64_auxv_t, which is ok because
+ that just makes it easier to define it properly for the host here. */
+typedef struct {
+ unsigned long a_type;
+ unsigned long a_val;
+} ElfW_auxv_t;
+
+static const ElfW_auxv_t *auxv;
+
+static const ElfW_auxv_t *qemu_init_auxval(void)
+{
+ ElfW_auxv_t *a;
+ ssize_t size = 512, r, ofs;
+ int fd;
+
+ /* Allocate some initial storage. Make sure the first entry is set
+ to end-of-list, so that we've got a valid list in case of error. */
+ auxv = a = g_malloc(size);
+ a[0].a_type = 0;
+ a[0].a_val = 0;
+
+ fd = open("/proc/self/auxv", O_RDONLY);
+ if (fd < 0) {
+ return a;
+ }
+
+ /* Read the first SIZE bytes. Hopefully, this covers everything. */
+ r = read(fd, a, size);
+
+ if (r == size) {
+ /* Continue to expand until we do get a partial read. */
+ do {
+ ofs = size;
+ size *= 2;
+ auxv = a = g_realloc(a, size);
+ r = read(fd, (char *)a + ofs, ofs);
+ } while (r == ofs);
+ }
+
+ close(fd);
+ return a;
+}
+
+unsigned long qemu_getauxval(unsigned long type)
+{
+ const ElfW_auxv_t *a = auxv;
+
+ if (unlikely(a == NULL)) {
+ a = qemu_init_auxval();
+ }
+
+ for (; a->a_type != 0; a++) {
+ if (a->a_type == type) {
+ return a->a_val;
+ }
+ }
+
+ return 0;
+}
+
+#else
+
+unsigned long qemu_getauxval(unsigned long type)
+{
+ return 0;
+}
+
+#endif
diff --git a/src/util/hbitmap.c b/src/util/hbitmap.c
new file mode 100644
index 0000000..50b888f
--- /dev/null
+++ b/src/util/hbitmap.c
@@ -0,0 +1,495 @@
+/*
+ * Hierarchical Bitmap Data Type
+ *
+ * Copyright Red Hat, Inc., 2012
+ *
+ * Author: Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+
+#include <string.h>
+#include <glib.h>
+#include <assert.h>
+#include "qemu/osdep.h"
+#include "qemu/hbitmap.h"
+#include "qemu/host-utils.h"
+#include "trace.h"
+
+/* HBitmaps provides an array of bits. The bits are stored as usual in an
+ * array of unsigned longs, but HBitmap is also optimized to provide fast
+ * iteration over set bits; going from one bit to the next is O(logB n)
+ * worst case, with B = sizeof(long) * CHAR_BIT: the result is low enough
+ * that the number of levels is in fact fixed.
+ *
+ * In order to do this, it stacks multiple bitmaps with progressively coarser
+ * granularity; in all levels except the last, bit N is set iff the N-th
+ * unsigned long is nonzero in the immediately next level. When iteration
+ * completes on the last level it can examine the 2nd-last level to quickly
+ * skip entire words, and even do so recursively to skip blocks of 64 words or
+ * powers thereof (32 on 32-bit machines).
+ *
+ * Given an index in the bitmap, it can be split in group of bits like
+ * this (for the 64-bit case):
+ *
+ * bits 0-57 => word in the last bitmap | bits 58-63 => bit in the word
+ * bits 0-51 => word in the 2nd-last bitmap | bits 52-57 => bit in the word
+ * bits 0-45 => word in the 3rd-last bitmap | bits 46-51 => bit in the word
+ *
+ * So it is easy to move up simply by shifting the index right by
+ * log2(BITS_PER_LONG) bits. To move down, you shift the index left
+ * similarly, and add the word index within the group. Iteration uses
+ * ffs (find first set bit) to find the next word to examine; this
+ * operation can be done in constant time in most current architectures.
+ *
+ * Setting or clearing a range of m bits on all levels, the work to perform
+ * is O(m + m/W + m/W^2 + ...), which is O(m) like on a regular bitmap.
+ *
+ * When iterating on a bitmap, each bit (on any level) is only visited
+ * once. Hence, The total cost of visiting a bitmap with m bits in it is
+ * the number of bits that are set in all bitmaps. Unless the bitmap is
+ * extremely sparse, this is also O(m + m/W + m/W^2 + ...), so the amortized
+ * cost of advancing from one bit to the next is usually constant (worst case
+ * O(logB n) as in the non-amortized complexity).
+ */
+
+struct HBitmap {
+ /* Number of total bits in the bottom level. */
+ uint64_t size;
+
+ /* Number of set bits in the bottom level. */
+ uint64_t count;
+
+ /* A scaling factor. Given a granularity of G, each bit in the bitmap will
+ * will actually represent a group of 2^G elements. Each operation on a
+ * range of bits first rounds the bits to determine which group they land
+ * in, and then affect the entire page; iteration will only visit the first
+ * bit of each group. Here is an example of operations in a size-16,
+ * granularity-1 HBitmap:
+ *
+ * initial state 00000000
+ * set(start=0, count=9) 11111000 (iter: 0, 2, 4, 6, 8)
+ * reset(start=1, count=3) 00111000 (iter: 4, 6, 8)
+ * set(start=9, count=2) 00111100 (iter: 4, 6, 8, 10)
+ * reset(start=5, count=5) 00000000
+ *
+ * From an implementation point of view, when setting or resetting bits,
+ * the bitmap will scale bit numbers right by this amount of bits. When
+ * iterating, the bitmap will scale bit numbers left by this amount of
+ * bits.
+ */
+ int granularity;
+
+ /* A number of progressively less coarse bitmaps (i.e. level 0 is the
+ * coarsest). Each bit in level N represents a word in level N+1 that
+ * has a set bit, except the last level where each bit represents the
+ * actual bitmap.
+ *
+ * Note that all bitmaps have the same number of levels. Even a 1-bit
+ * bitmap will still allocate HBITMAP_LEVELS arrays.
+ */
+ unsigned long *levels[HBITMAP_LEVELS];
+
+ /* The length of each levels[] array. */
+ uint64_t sizes[HBITMAP_LEVELS];
+};
+
+/* Advance hbi to the next nonzero word and return it. hbi->pos
+ * is updated. Returns zero if we reach the end of the bitmap.
+ */
+unsigned long hbitmap_iter_skip_words(HBitmapIter *hbi)
+{
+ size_t pos = hbi->pos;
+ const HBitmap *hb = hbi->hb;
+ unsigned i = HBITMAP_LEVELS - 1;
+
+ unsigned long cur;
+ do {
+ cur = hbi->cur[--i];
+ pos >>= BITS_PER_LEVEL;
+ } while (cur == 0);
+
+ /* Check for end of iteration. We always use fewer than BITS_PER_LONG
+ * bits in the level 0 bitmap; thus we can repurpose the most significant
+ * bit as a sentinel. The sentinel is set in hbitmap_alloc and ensures
+ * that the above loop ends even without an explicit check on i.
+ */
+
+ if (i == 0 && cur == (1UL << (BITS_PER_LONG - 1))) {
+ return 0;
+ }
+ for (; i < HBITMAP_LEVELS - 1; i++) {
+ /* Shift back pos to the left, matching the right shifts above.
+ * The index of this word's least significant set bit provides
+ * the low-order bits.
+ */
+ assert(cur);
+ pos = (pos << BITS_PER_LEVEL) + ctzl(cur);
+ hbi->cur[i] = cur & (cur - 1);
+
+ /* Set up next level for iteration. */
+ cur = hb->levels[i + 1][pos];
+ }
+
+ hbi->pos = pos;
+ trace_hbitmap_iter_skip_words(hbi->hb, hbi, pos, cur);
+
+ assert(cur);
+ return cur;
+}
+
+void hbitmap_iter_init(HBitmapIter *hbi, const HBitmap *hb, uint64_t first)
+{
+ unsigned i, bit;
+ uint64_t pos;
+
+ hbi->hb = hb;
+ pos = first >> hb->granularity;
+ assert(pos < hb->size);
+ hbi->pos = pos >> BITS_PER_LEVEL;
+ hbi->granularity = hb->granularity;
+
+ for (i = HBITMAP_LEVELS; i-- > 0; ) {
+ bit = pos & (BITS_PER_LONG - 1);
+ pos >>= BITS_PER_LEVEL;
+
+ /* Drop bits representing items before first. */
+ hbi->cur[i] = hb->levels[i][pos] & ~((1UL << bit) - 1);
+
+ /* We have already added level i+1, so the lowest set bit has
+ * been processed. Clear it.
+ */
+ if (i != HBITMAP_LEVELS - 1) {
+ hbi->cur[i] &= ~(1UL << bit);
+ }
+ }
+}
+
+bool hbitmap_empty(const HBitmap *hb)
+{
+ return hb->count == 0;
+}
+
+int hbitmap_granularity(const HBitmap *hb)
+{
+ return hb->granularity;
+}
+
+uint64_t hbitmap_count(const HBitmap *hb)
+{
+ return hb->count << hb->granularity;
+}
+
+/* Count the number of set bits between start and end, not accounting for
+ * the granularity. Also an example of how to use hbitmap_iter_next_word.
+ */
+static uint64_t hb_count_between(HBitmap *hb, uint64_t start, uint64_t last)
+{
+ HBitmapIter hbi;
+ uint64_t count = 0;
+ uint64_t end = last + 1;
+ unsigned long cur;
+ size_t pos;
+
+ hbitmap_iter_init(&hbi, hb, start << hb->granularity);
+ for (;;) {
+ pos = hbitmap_iter_next_word(&hbi, &cur);
+ if (pos >= (end >> BITS_PER_LEVEL)) {
+ break;
+ }
+ count += ctpopl(cur);
+ }
+
+ if (pos == (end >> BITS_PER_LEVEL)) {
+ /* Drop bits representing the END-th and subsequent items. */
+ int bit = end & (BITS_PER_LONG - 1);
+ cur &= (1UL << bit) - 1;
+ count += ctpopl(cur);
+ }
+
+ return count;
+}
+
+/* Setting starts at the last layer and propagates up if an element
+ * changes from zero to non-zero.
+ */
+static inline bool hb_set_elem(unsigned long *elem, uint64_t start, uint64_t last)
+{
+ unsigned long mask;
+ bool changed;
+
+ assert((last >> BITS_PER_LEVEL) == (start >> BITS_PER_LEVEL));
+ assert(start <= last);
+
+ mask = 2UL << (last & (BITS_PER_LONG - 1));
+ mask -= 1UL << (start & (BITS_PER_LONG - 1));
+ changed = (*elem == 0);
+ *elem |= mask;
+ return changed;
+}
+
+/* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)... */
+static void hb_set_between(HBitmap *hb, int level, uint64_t start, uint64_t last)
+{
+ size_t pos = start >> BITS_PER_LEVEL;
+ size_t lastpos = last >> BITS_PER_LEVEL;
+ bool changed = false;
+ size_t i;
+
+ i = pos;
+ if (i < lastpos) {
+ uint64_t next = (start | (BITS_PER_LONG - 1)) + 1;
+ changed |= hb_set_elem(&hb->levels[level][i], start, next - 1);
+ for (;;) {
+ start = next;
+ next += BITS_PER_LONG;
+ if (++i == lastpos) {
+ break;
+ }
+ changed |= (hb->levels[level][i] == 0);
+ hb->levels[level][i] = ~0UL;
+ }
+ }
+ changed |= hb_set_elem(&hb->levels[level][i], start, last);
+
+ /* If there was any change in this layer, we may have to update
+ * the one above.
+ */
+ if (level > 0 && changed) {
+ hb_set_between(hb, level - 1, pos, lastpos);
+ }
+}
+
+void hbitmap_set(HBitmap *hb, uint64_t start, uint64_t count)
+{
+ /* Compute range in the last layer. */
+ uint64_t last = start + count - 1;
+
+ trace_hbitmap_set(hb, start, count,
+ start >> hb->granularity, last >> hb->granularity);
+
+ start >>= hb->granularity;
+ last >>= hb->granularity;
+ count = last - start + 1;
+
+ hb->count += count - hb_count_between(hb, start, last);
+ hb_set_between(hb, HBITMAP_LEVELS - 1, start, last);
+}
+
+/* Resetting works the other way round: propagate up if the new
+ * value is zero.
+ */
+static inline bool hb_reset_elem(unsigned long *elem, uint64_t start, uint64_t last)
+{
+ unsigned long mask;
+ bool blanked;
+
+ assert((last >> BITS_PER_LEVEL) == (start >> BITS_PER_LEVEL));
+ assert(start <= last);
+
+ mask = 2UL << (last & (BITS_PER_LONG - 1));
+ mask -= 1UL << (start & (BITS_PER_LONG - 1));
+ blanked = *elem != 0 && ((*elem & ~mask) == 0);
+ *elem &= ~mask;
+ return blanked;
+}
+
+/* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)... */
+static void hb_reset_between(HBitmap *hb, int level, uint64_t start, uint64_t last)
+{
+ size_t pos = start >> BITS_PER_LEVEL;
+ size_t lastpos = last >> BITS_PER_LEVEL;
+ bool changed = false;
+ size_t i;
+
+ i = pos;
+ if (i < lastpos) {
+ uint64_t next = (start | (BITS_PER_LONG - 1)) + 1;
+
+ /* Here we need a more complex test than when setting bits. Even if
+ * something was changed, we must not blank bits in the upper level
+ * unless the lower-level word became entirely zero. So, remove pos
+ * from the upper-level range if bits remain set.
+ */
+ if (hb_reset_elem(&hb->levels[level][i], start, next - 1)) {
+ changed = true;
+ } else {
+ pos++;
+ }
+
+ for (;;) {
+ start = next;
+ next += BITS_PER_LONG;
+ if (++i == lastpos) {
+ break;
+ }
+ changed |= (hb->levels[level][i] != 0);
+ hb->levels[level][i] = 0UL;
+ }
+ }
+
+ /* Same as above, this time for lastpos. */
+ if (hb_reset_elem(&hb->levels[level][i], start, last)) {
+ changed = true;
+ } else {
+ lastpos--;
+ }
+
+ if (level > 0 && changed) {
+ hb_reset_between(hb, level - 1, pos, lastpos);
+ }
+}
+
+void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count)
+{
+ /* Compute range in the last layer. */
+ uint64_t last = start + count - 1;
+
+ trace_hbitmap_reset(hb, start, count,
+ start >> hb->granularity, last >> hb->granularity);
+
+ start >>= hb->granularity;
+ last >>= hb->granularity;
+
+ hb->count -= hb_count_between(hb, start, last);
+ hb_reset_between(hb, HBITMAP_LEVELS - 1, start, last);
+}
+
+void hbitmap_reset_all(HBitmap *hb)
+{
+ unsigned int i;
+
+ /* Same as hbitmap_alloc() except for memset() instead of malloc() */
+ for (i = HBITMAP_LEVELS; --i >= 1; ) {
+ memset(hb->levels[i], 0, hb->sizes[i] * sizeof(unsigned long));
+ }
+
+ hb->levels[0][0] = 1UL << (BITS_PER_LONG - 1);
+ hb->count = 0;
+}
+
+bool hbitmap_get(const HBitmap *hb, uint64_t item)
+{
+ /* Compute position and bit in the last layer. */
+ uint64_t pos = item >> hb->granularity;
+ unsigned long bit = 1UL << (pos & (BITS_PER_LONG - 1));
+
+ return (hb->levels[HBITMAP_LEVELS - 1][pos >> BITS_PER_LEVEL] & bit) != 0;
+}
+
+void hbitmap_free(HBitmap *hb)
+{
+ unsigned i;
+ for (i = HBITMAP_LEVELS; i-- > 0; ) {
+ g_free(hb->levels[i]);
+ }
+ g_free(hb);
+}
+
+HBitmap *hbitmap_alloc(uint64_t size, int granularity)
+{
+ HBitmap *hb = g_new0(struct HBitmap, 1);
+ unsigned i;
+
+ assert(granularity >= 0 && granularity < 64);
+ size = (size + (1ULL << granularity) - 1) >> granularity;
+ assert(size <= ((uint64_t)1 << HBITMAP_LOG_MAX_SIZE));
+
+ hb->size = size;
+ hb->granularity = granularity;
+ for (i = HBITMAP_LEVELS; i-- > 0; ) {
+ size = MAX((size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1);
+ hb->sizes[i] = size;
+ hb->levels[i] = g_new0(unsigned long, size);
+ }
+
+ /* We necessarily have free bits in level 0 due to the definition
+ * of HBITMAP_LEVELS, so use one for a sentinel. This speeds up
+ * hbitmap_iter_skip_words.
+ */
+ assert(size == 1);
+ hb->levels[0][0] |= 1UL << (BITS_PER_LONG - 1);
+ return hb;
+}
+
+void hbitmap_truncate(HBitmap *hb, uint64_t size)
+{
+ bool shrink;
+ unsigned i;
+ uint64_t num_elements = size;
+ uint64_t old;
+
+ /* Size comes in as logical elements, adjust for granularity. */
+ size = (size + (1ULL << hb->granularity) - 1) >> hb->granularity;
+ assert(size <= ((uint64_t)1 << HBITMAP_LOG_MAX_SIZE));
+ shrink = size < hb->size;
+
+ /* bit sizes are identical; nothing to do. */
+ if (size == hb->size) {
+ return;
+ }
+
+ /* If we're losing bits, let's clear those bits before we invalidate all of
+ * our invariants. This helps keep the bitcount consistent, and will prevent
+ * us from carrying around garbage bits beyond the end of the map.
+ */
+ if (shrink) {
+ /* Don't clear partial granularity groups;
+ * start at the first full one. */
+ uint64_t start = QEMU_ALIGN_UP(num_elements, 1 << hb->granularity);
+ uint64_t fix_count = (hb->size << hb->granularity) - start;
+
+ assert(fix_count);
+ hbitmap_reset(hb, start, fix_count);
+ }
+
+ hb->size = size;
+ for (i = HBITMAP_LEVELS; i-- > 0; ) {
+ size = MAX(BITS_TO_LONGS(size), 1);
+ if (hb->sizes[i] == size) {
+ break;
+ }
+ old = hb->sizes[i];
+ hb->sizes[i] = size;
+ hb->levels[i] = g_realloc(hb->levels[i], size * sizeof(unsigned long));
+ if (!shrink) {
+ memset(&hb->levels[i][old], 0x00,
+ (size - old) * sizeof(*hb->levels[i]));
+ }
+ }
+}
+
+
+/**
+ * Given HBitmaps A and B, let A := A (BITOR) B.
+ * Bitmap B will not be modified.
+ *
+ * @return true if the merge was successful,
+ * false if it was not attempted.
+ */
+bool hbitmap_merge(HBitmap *a, const HBitmap *b)
+{
+ int i;
+ uint64_t j;
+
+ if ((a->size != b->size) || (a->granularity != b->granularity)) {
+ return false;
+ }
+
+ if (hbitmap_count(b) == 0) {
+ return true;
+ }
+
+ /* This merge is O(size), as BITS_PER_LONG and HBITMAP_LEVELS are constant.
+ * It may be possible to improve running times for sparsely populated maps
+ * by using hbitmap_iter_next, but this is suboptimal for dense maps.
+ */
+ for (i = HBITMAP_LEVELS - 1; i >= 0; i--) {
+ for (j = 0; j < a->sizes[i]; j++) {
+ a->levels[i][j] |= b->levels[i][j];
+ }
+ }
+
+ return true;
+}
diff --git a/src/util/hexdump.c b/src/util/hexdump.c
new file mode 100644
index 0000000..969b340
--- /dev/null
+++ b/src/util/hexdump.c
@@ -0,0 +1,37 @@
+/*
+ * Helper to hexdump a buffer
+ *
+ * Copyright (c) 2013 Red Hat, Inc.
+ * Copyright (c) 2013 Gerd Hoffmann <kraxel@redhat.com>
+ * Copyright (c) 2013 Peter Crosthwaite <peter.crosthwaite@xilinx.com>
+ * Copyright (c) 2013 Xilinx, Inc
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu-common.h"
+
+void qemu_hexdump(const char *buf, FILE *fp, const char *prefix, size_t size)
+{
+ unsigned int b;
+
+ for (b = 0; b < size; b++) {
+ if ((b % 16) == 0) {
+ fprintf(fp, "%s: %04x:", prefix, b);
+ }
+ if ((b % 4) == 0) {
+ fprintf(fp, " ");
+ }
+ fprintf(fp, " %02x", (unsigned char)buf[b]);
+ if ((b % 16) == 15) {
+ fprintf(fp, "\n");
+ }
+ }
+ if ((b % 16) != 0) {
+ fprintf(fp, "\n");
+ }
+}
diff --git a/src/util/host-utils.c b/src/util/host-utils.c
new file mode 100644
index 0000000..102e5bf
--- /dev/null
+++ b/src/util/host-utils.c
@@ -0,0 +1,162 @@
+/*
+ * Utility compute operations used by translated code.
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ * Copyright (c) 2007 Aurelien Jarno
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <stdlib.h>
+#include <stdint.h>
+#include "qemu/host-utils.h"
+
+/* Long integer helpers */
+static inline void mul64(uint64_t *plow, uint64_t *phigh,
+ uint64_t a, uint64_t b)
+{
+ typedef union {
+ uint64_t ll;
+ struct {
+#ifdef HOST_WORDS_BIGENDIAN
+ uint32_t high, low;
+#else
+ uint32_t low, high;
+#endif
+ } l;
+ } LL;
+ LL rl, rm, rn, rh, a0, b0;
+ uint64_t c;
+
+ a0.ll = a;
+ b0.ll = b;
+
+ rl.ll = (uint64_t)a0.l.low * b0.l.low;
+ rm.ll = (uint64_t)a0.l.low * b0.l.high;
+ rn.ll = (uint64_t)a0.l.high * b0.l.low;
+ rh.ll = (uint64_t)a0.l.high * b0.l.high;
+
+ c = (uint64_t)rl.l.high + rm.l.low + rn.l.low;
+ rl.l.high = c;
+ c >>= 32;
+ c = c + rm.l.high + rn.l.high + rh.l.low;
+ rh.l.low = c;
+ rh.l.high += (uint32_t)(c >> 32);
+
+ *plow = rl.ll;
+ *phigh = rh.ll;
+}
+
+/* Unsigned 64x64 -> 128 multiplication */
+void mulu64 (uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b)
+{
+ mul64(plow, phigh, a, b);
+}
+
+/* Signed 64x64 -> 128 multiplication */
+void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b)
+{
+ uint64_t rh;
+
+ mul64(plow, &rh, a, b);
+
+ /* Adjust for signs. */
+ if (b < 0) {
+ rh -= a;
+ }
+ if (a < 0) {
+ rh -= b;
+ }
+ *phigh = rh;
+}
+
+/* Unsigned 128x64 division. Returns 1 if overflow (divide by zero or */
+/* quotient exceeds 64 bits). Otherwise returns quotient via plow and */
+/* remainder via phigh. */
+int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
+{
+ uint64_t dhi = *phigh;
+ uint64_t dlo = *plow;
+ unsigned i;
+ uint64_t carry = 0;
+
+ if (divisor == 0) {
+ return 1;
+ } else if (dhi == 0) {
+ *plow = dlo / divisor;
+ *phigh = dlo % divisor;
+ return 0;
+ } else if (dhi > divisor) {
+ return 1;
+ } else {
+
+ for (i = 0; i < 64; i++) {
+ carry = dhi >> 63;
+ dhi = (dhi << 1) | (dlo >> 63);
+ if (carry || (dhi >= divisor)) {
+ dhi -= divisor;
+ carry = 1;
+ } else {
+ carry = 0;
+ }
+ dlo = (dlo << 1) | carry;
+ }
+
+ *plow = dlo;
+ *phigh = dhi;
+ return 0;
+ }
+}
+
+int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
+{
+ int sgn_dvdnd = *phigh < 0;
+ int sgn_divsr = divisor < 0;
+ int overflow = 0;
+
+ if (sgn_dvdnd) {
+ *plow = ~(*plow);
+ *phigh = ~(*phigh);
+ if (*plow == (int64_t)-1) {
+ *plow = 0;
+ (*phigh)++;
+ } else {
+ (*plow)++;
+ }
+ }
+
+ if (sgn_divsr) {
+ divisor = 0 - divisor;
+ }
+
+ overflow = divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor);
+
+ if (sgn_dvdnd ^ sgn_divsr) {
+ *plow = 0 - *plow;
+ }
+
+ if (!overflow) {
+ if ((*plow < 0) ^ (sgn_dvdnd ^ sgn_divsr)) {
+ overflow = 1;
+ }
+ }
+
+ return overflow;
+}
+
diff --git a/src/util/id.c b/src/util/id.c
new file mode 100644
index 0000000..7883fbe
--- /dev/null
+++ b/src/util/id.c
@@ -0,0 +1,65 @@
+/*
+ * Dealing with identifiers
+ *
+ * Copyright (C) 2014 Red Hat, Inc.
+ *
+ * Authors:
+ * Markus Armbruster <armbru@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1
+ * or later. See the COPYING.LIB file in the top-level directory.
+ */
+
+#include "qemu-common.h"
+
+bool id_wellformed(const char *id)
+{
+ int i;
+
+ if (!qemu_isalpha(id[0])) {
+ return false;
+ }
+ for (i = 1; id[i]; i++) {
+ if (!qemu_isalnum(id[i]) && !strchr("-._", id[i])) {
+ return false;
+ }
+ }
+ return true;
+}
+
+#define ID_SPECIAL_CHAR '#'
+
+static const char *const id_subsys_str[ID_MAX] = {
+ [ID_QDEV] = "qdev",
+ [ID_BLOCK] = "block",
+};
+
+/*
+ * Generates an ID of the form PREFIX SUBSYSTEM NUMBER
+ * where:
+ *
+ * - PREFIX is the reserved character '#'
+ * - SUBSYSTEM identifies the subsystem creating the ID
+ * - NUMBER is a decimal number unique within SUBSYSTEM.
+ *
+ * Example: "#block146"
+ *
+ * Note that these IDs do not satisfy id_wellformed().
+ *
+ * The caller is responsible for freeing the returned string with g_free()
+ */
+char *id_generate(IdSubSystems id)
+{
+ static uint64_t id_counters[ID_MAX];
+ uint32_t rnd;
+
+ assert(id < ARRAY_SIZE(id_subsys_str));
+ assert(id_subsys_str[id]);
+
+ rnd = g_random_int_range(0, 100);
+
+ return g_strdup_printf("%c%s%" PRIu64 "%02" PRId32, ID_SPECIAL_CHAR,
+ id_subsys_str[id],
+ id_counters[id]++,
+ rnd);
+}
diff --git a/src/util/iov.c b/src/util/iov.c
new file mode 100644
index 0000000..a0d5934
--- /dev/null
+++ b/src/util/iov.c
@@ -0,0 +1,577 @@
+/*
+ * Helpers for getting linearized buffers from iov / filling buffers into iovs
+ *
+ * Copyright IBM, Corp. 2007, 2008
+ * Copyright (C) 2010 Red Hat, Inc.
+ *
+ * Author(s):
+ * Anthony Liguori <aliguori@us.ibm.com>
+ * Amit Shah <amit.shah@redhat.com>
+ * Michael Tokarev <mjt@tls.msk.ru>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu/iov.h"
+#include "qemu/sockets.h"
+
+size_t iov_from_buf(const struct iovec *iov, unsigned int iov_cnt,
+ size_t offset, const void *buf, size_t bytes)
+{
+ size_t done;
+ unsigned int i;
+ for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) {
+ if (offset < iov[i].iov_len) {
+ size_t len = MIN(iov[i].iov_len - offset, bytes - done);
+ memcpy(iov[i].iov_base + offset, buf + done, len);
+ done += len;
+ offset = 0;
+ } else {
+ offset -= iov[i].iov_len;
+ }
+ }
+ assert(offset == 0);
+ return done;
+}
+
+size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt,
+ size_t offset, void *buf, size_t bytes)
+{
+ size_t done;
+ unsigned int i;
+ for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) {
+ if (offset < iov[i].iov_len) {
+ size_t len = MIN(iov[i].iov_len - offset, bytes - done);
+ memcpy(buf + done, iov[i].iov_base + offset, len);
+ done += len;
+ offset = 0;
+ } else {
+ offset -= iov[i].iov_len;
+ }
+ }
+ assert(offset == 0);
+ return done;
+}
+
+size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt,
+ size_t offset, int fillc, size_t bytes)
+{
+ size_t done;
+ unsigned int i;
+ for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) {
+ if (offset < iov[i].iov_len) {
+ size_t len = MIN(iov[i].iov_len - offset, bytes - done);
+ memset(iov[i].iov_base + offset, fillc, len);
+ done += len;
+ offset = 0;
+ } else {
+ offset -= iov[i].iov_len;
+ }
+ }
+ assert(offset == 0);
+ return done;
+}
+
+size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt)
+{
+ size_t len;
+ unsigned int i;
+
+ len = 0;
+ for (i = 0; i < iov_cnt; i++) {
+ len += iov[i].iov_len;
+ }
+ return len;
+}
+
+/* helper function for iov_send_recv() */
+static ssize_t
+do_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, bool do_send)
+{
+#ifdef CONFIG_POSIX
+ ssize_t ret;
+ struct msghdr msg;
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_iov = iov;
+ msg.msg_iovlen = iov_cnt;
+ do {
+ ret = do_send
+ ? sendmsg(sockfd, &msg, 0)
+ : recvmsg(sockfd, &msg, 0);
+ } while (ret < 0 && errno == EINTR);
+ return ret;
+#else
+ /* else send piece-by-piece */
+ /*XXX Note: windows has WSASend() and WSARecv() */
+ unsigned i = 0;
+ ssize_t ret = 0;
+ while (i < iov_cnt) {
+ ssize_t r = do_send
+ ? send(sockfd, iov[i].iov_base, iov[i].iov_len, 0)
+ : recv(sockfd, iov[i].iov_base, iov[i].iov_len, 0);
+ if (r > 0) {
+ ret += r;
+ } else if (!r) {
+ break;
+ } else if (errno == EINTR) {
+ continue;
+ } else {
+ /* else it is some "other" error,
+ * only return if there was no data processed. */
+ if (ret == 0) {
+ ret = -1;
+ }
+ break;
+ }
+ i++;
+ }
+ return ret;
+#endif
+}
+
+ssize_t iov_send_recv(int sockfd, const struct iovec *_iov, unsigned iov_cnt,
+ size_t offset, size_t bytes,
+ bool do_send)
+{
+ ssize_t total = 0;
+ ssize_t ret;
+ size_t orig_len, tail;
+ unsigned niov;
+ struct iovec *local_iov, *iov;
+
+ if (bytes <= 0) {
+ return 0;
+ }
+
+ local_iov = g_new0(struct iovec, iov_cnt);
+ iov_copy(local_iov, iov_cnt, _iov, iov_cnt, offset, bytes);
+ offset = 0;
+ iov = local_iov;
+
+ while (bytes > 0) {
+ /* Find the start position, skipping `offset' bytes:
+ * first, skip all full-sized vector elements, */
+ for (niov = 0; niov < iov_cnt && offset >= iov[niov].iov_len; ++niov) {
+ offset -= iov[niov].iov_len;
+ }
+
+ /* niov == iov_cnt would only be valid if bytes == 0, which
+ * we already ruled out in the loop condition. */
+ assert(niov < iov_cnt);
+ iov += niov;
+ iov_cnt -= niov;
+
+ if (offset) {
+ /* second, skip `offset' bytes from the (now) first element,
+ * undo it on exit */
+ iov[0].iov_base += offset;
+ iov[0].iov_len -= offset;
+ }
+ /* Find the end position skipping `bytes' bytes: */
+ /* first, skip all full-sized elements */
+ tail = bytes;
+ for (niov = 0; niov < iov_cnt && iov[niov].iov_len <= tail; ++niov) {
+ tail -= iov[niov].iov_len;
+ }
+ if (tail) {
+ /* second, fixup the last element, and remember the original
+ * length */
+ assert(niov < iov_cnt);
+ assert(iov[niov].iov_len > tail);
+ orig_len = iov[niov].iov_len;
+ iov[niov++].iov_len = tail;
+ ret = do_send_recv(sockfd, iov, niov, do_send);
+ /* Undo the changes above before checking for errors */
+ iov[niov-1].iov_len = orig_len;
+ } else {
+ ret = do_send_recv(sockfd, iov, niov, do_send);
+ }
+ if (offset) {
+ iov[0].iov_base -= offset;
+ iov[0].iov_len += offset;
+ }
+
+ if (ret < 0) {
+ assert(errno != EINTR);
+ g_free(local_iov);
+ if (errno == EAGAIN && total > 0) {
+ return total;
+ }
+ return -1;
+ }
+
+ if (ret == 0 && !do_send) {
+ /* recv returns 0 when the peer has performed an orderly
+ * shutdown. */
+ break;
+ }
+
+ /* Prepare for the next iteration */
+ offset += ret;
+ total += ret;
+ bytes -= ret;
+ }
+
+ g_free(local_iov);
+ return total;
+}
+
+
+void iov_hexdump(const struct iovec *iov, const unsigned int iov_cnt,
+ FILE *fp, const char *prefix, size_t limit)
+{
+ int v;
+ size_t size = 0;
+ char *buf;
+
+ for (v = 0; v < iov_cnt; v++) {
+ size += iov[v].iov_len;
+ }
+ size = size > limit ? limit : size;
+ buf = g_malloc(size);
+ iov_to_buf(iov, iov_cnt, 0, buf, size);
+ qemu_hexdump(buf, fp, prefix, size);
+ g_free(buf);
+}
+
+unsigned iov_copy(struct iovec *dst_iov, unsigned int dst_iov_cnt,
+ const struct iovec *iov, unsigned int iov_cnt,
+ size_t offset, size_t bytes)
+{
+ size_t len;
+ unsigned int i, j;
+ for (i = 0, j = 0; i < iov_cnt && j < dst_iov_cnt && bytes; i++) {
+ if (offset >= iov[i].iov_len) {
+ offset -= iov[i].iov_len;
+ continue;
+ }
+ len = MIN(bytes, iov[i].iov_len - offset);
+
+ dst_iov[j].iov_base = iov[i].iov_base + offset;
+ dst_iov[j].iov_len = len;
+ j++;
+ bytes -= len;
+ offset = 0;
+ }
+ assert(offset == 0);
+ return j;
+}
+
+/* io vectors */
+
+void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint)
+{
+ qiov->iov = g_new(struct iovec, alloc_hint);
+ qiov->niov = 0;
+ qiov->nalloc = alloc_hint;
+ qiov->size = 0;
+}
+
+void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov)
+{
+ int i;
+
+ qiov->iov = iov;
+ qiov->niov = niov;
+ qiov->nalloc = -1;
+ qiov->size = 0;
+ for (i = 0; i < niov; i++)
+ qiov->size += iov[i].iov_len;
+}
+
+void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len)
+{
+ assert(qiov->nalloc != -1);
+
+ if (qiov->niov == qiov->nalloc) {
+ qiov->nalloc = 2 * qiov->nalloc + 1;
+ qiov->iov = g_renew(struct iovec, qiov->iov, qiov->nalloc);
+ }
+ qiov->iov[qiov->niov].iov_base = base;
+ qiov->iov[qiov->niov].iov_len = len;
+ qiov->size += len;
+ ++qiov->niov;
+}
+
+/*
+ * Concatenates (partial) iovecs from src_iov to the end of dst.
+ * It starts copying after skipping `soffset' bytes at the
+ * beginning of src and adds individual vectors from src to
+ * dst copies up to `sbytes' bytes total, or up to the end
+ * of src_iov if it comes first. This way, it is okay to specify
+ * very large value for `sbytes' to indicate "up to the end
+ * of src".
+ * Only vector pointers are processed, not the actual data buffers.
+ */
+size_t qemu_iovec_concat_iov(QEMUIOVector *dst,
+ struct iovec *src_iov, unsigned int src_cnt,
+ size_t soffset, size_t sbytes)
+{
+ int i;
+ size_t done;
+
+ if (!sbytes) {
+ return 0;
+ }
+ assert(dst->nalloc != -1);
+ for (i = 0, done = 0; done < sbytes && i < src_cnt; i++) {
+ if (soffset < src_iov[i].iov_len) {
+ size_t len = MIN(src_iov[i].iov_len - soffset, sbytes - done);
+ qemu_iovec_add(dst, src_iov[i].iov_base + soffset, len);
+ done += len;
+ soffset = 0;
+ } else {
+ soffset -= src_iov[i].iov_len;
+ }
+ }
+ assert(soffset == 0); /* offset beyond end of src */
+
+ return done;
+}
+
+/*
+ * Concatenates (partial) iovecs from src to the end of dst.
+ * It starts copying after skipping `soffset' bytes at the
+ * beginning of src and adds individual vectors from src to
+ * dst copies up to `sbytes' bytes total, or up to the end
+ * of src if it comes first. This way, it is okay to specify
+ * very large value for `sbytes' to indicate "up to the end
+ * of src".
+ * Only vector pointers are processed, not the actual data buffers.
+ */
+void qemu_iovec_concat(QEMUIOVector *dst,
+ QEMUIOVector *src, size_t soffset, size_t sbytes)
+{
+ qemu_iovec_concat_iov(dst, src->iov, src->niov, soffset, sbytes);
+}
+
+/*
+ * Check if the contents of the iovecs are all zero
+ */
+bool qemu_iovec_is_zero(QEMUIOVector *qiov)
+{
+ int i;
+ for (i = 0; i < qiov->niov; i++) {
+ size_t offs = QEMU_ALIGN_DOWN(qiov->iov[i].iov_len, 4 * sizeof(long));
+ uint8_t *ptr = qiov->iov[i].iov_base;
+ if (offs && !buffer_is_zero(qiov->iov[i].iov_base, offs)) {
+ return false;
+ }
+ for (; offs < qiov->iov[i].iov_len; offs++) {
+ if (ptr[offs]) {
+ return false;
+ }
+ }
+ }
+ return true;
+}
+
+void qemu_iovec_destroy(QEMUIOVector *qiov)
+{
+ assert(qiov->nalloc != -1);
+
+ qemu_iovec_reset(qiov);
+ g_free(qiov->iov);
+ qiov->nalloc = 0;
+ qiov->iov = NULL;
+}
+
+void qemu_iovec_reset(QEMUIOVector *qiov)
+{
+ assert(qiov->nalloc != -1);
+
+ qiov->niov = 0;
+ qiov->size = 0;
+}
+
+size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset,
+ void *buf, size_t bytes)
+{
+ return iov_to_buf(qiov->iov, qiov->niov, offset, buf, bytes);
+}
+
+size_t qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset,
+ const void *buf, size_t bytes)
+{
+ return iov_from_buf(qiov->iov, qiov->niov, offset, buf, bytes);
+}
+
+size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset,
+ int fillc, size_t bytes)
+{
+ return iov_memset(qiov->iov, qiov->niov, offset, fillc, bytes);
+}
+
+/**
+ * Check that I/O vector contents are identical
+ *
+ * The IO vectors must have the same structure (same length of all parts).
+ * A typical usage is to compare vectors created with qemu_iovec_clone().
+ *
+ * @a: I/O vector
+ * @b: I/O vector
+ * @ret: Offset to first mismatching byte or -1 if match
+ */
+ssize_t qemu_iovec_compare(QEMUIOVector *a, QEMUIOVector *b)
+{
+ int i;
+ ssize_t offset = 0;
+
+ assert(a->niov == b->niov);
+ for (i = 0; i < a->niov; i++) {
+ size_t len = 0;
+ uint8_t *p = (uint8_t *)a->iov[i].iov_base;
+ uint8_t *q = (uint8_t *)b->iov[i].iov_base;
+
+ assert(a->iov[i].iov_len == b->iov[i].iov_len);
+ while (len < a->iov[i].iov_len && *p++ == *q++) {
+ len++;
+ }
+
+ offset += len;
+
+ if (len != a->iov[i].iov_len) {
+ return offset;
+ }
+ }
+ return -1;
+}
+
+typedef struct {
+ int src_index;
+ struct iovec *src_iov;
+ void *dest_base;
+} IOVectorSortElem;
+
+static int sortelem_cmp_src_base(const void *a, const void *b)
+{
+ const IOVectorSortElem *elem_a = a;
+ const IOVectorSortElem *elem_b = b;
+
+ /* Don't overflow */
+ if (elem_a->src_iov->iov_base < elem_b->src_iov->iov_base) {
+ return -1;
+ } else if (elem_a->src_iov->iov_base > elem_b->src_iov->iov_base) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+static int sortelem_cmp_src_index(const void *a, const void *b)
+{
+ const IOVectorSortElem *elem_a = a;
+ const IOVectorSortElem *elem_b = b;
+
+ return elem_a->src_index - elem_b->src_index;
+}
+
+/**
+ * Copy contents of I/O vector
+ *
+ * The relative relationships of overlapping iovecs are preserved. This is
+ * necessary to ensure identical semantics in the cloned I/O vector.
+ */
+void qemu_iovec_clone(QEMUIOVector *dest, const QEMUIOVector *src, void *buf)
+{
+ IOVectorSortElem sortelems[src->niov];
+ void *last_end;
+ int i;
+
+ /* Sort by source iovecs by base address */
+ for (i = 0; i < src->niov; i++) {
+ sortelems[i].src_index = i;
+ sortelems[i].src_iov = &src->iov[i];
+ }
+ qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_base);
+
+ /* Allocate buffer space taking into account overlapping iovecs */
+ last_end = NULL;
+ for (i = 0; i < src->niov; i++) {
+ struct iovec *cur = sortelems[i].src_iov;
+ ptrdiff_t rewind = 0;
+
+ /* Detect overlap */
+ if (last_end && last_end > cur->iov_base) {
+ rewind = last_end - cur->iov_base;
+ }
+
+ sortelems[i].dest_base = buf - rewind;
+ buf += cur->iov_len - MIN(rewind, cur->iov_len);
+ last_end = MAX(cur->iov_base + cur->iov_len, last_end);
+ }
+
+ /* Sort by source iovec index and build destination iovec */
+ qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_index);
+ for (i = 0; i < src->niov; i++) {
+ qemu_iovec_add(dest, sortelems[i].dest_base, src->iov[i].iov_len);
+ }
+}
+
+size_t iov_discard_front(struct iovec **iov, unsigned int *iov_cnt,
+ size_t bytes)
+{
+ size_t total = 0;
+ struct iovec *cur;
+
+ for (cur = *iov; *iov_cnt > 0; cur++) {
+ if (cur->iov_len > bytes) {
+ cur->iov_base += bytes;
+ cur->iov_len -= bytes;
+ total += bytes;
+ break;
+ }
+
+ bytes -= cur->iov_len;
+ total += cur->iov_len;
+ *iov_cnt -= 1;
+ }
+
+ *iov = cur;
+ return total;
+}
+
+size_t iov_discard_back(struct iovec *iov, unsigned int *iov_cnt,
+ size_t bytes)
+{
+ size_t total = 0;
+ struct iovec *cur;
+
+ if (*iov_cnt == 0) {
+ return 0;
+ }
+
+ cur = iov + (*iov_cnt - 1);
+
+ while (*iov_cnt > 0) {
+ if (cur->iov_len > bytes) {
+ cur->iov_len -= bytes;
+ total += bytes;
+ break;
+ }
+
+ bytes -= cur->iov_len;
+ total += cur->iov_len;
+ cur--;
+ *iov_cnt -= 1;
+ }
+
+ return total;
+}
+
+void qemu_iovec_discard_back(QEMUIOVector *qiov, size_t bytes)
+{
+ size_t total;
+ unsigned int niov = qiov->niov;
+
+ assert(qiov->size >= bytes);
+ total = iov_discard_back(qiov->iov, &niov, bytes);
+ assert(total == bytes);
+
+ qiov->niov = niov;
+ qiov->size -= bytes;
+}
diff --git a/src/util/memfd.c b/src/util/memfd.c
new file mode 100644
index 0000000..587ef5a
--- /dev/null
+++ b/src/util/memfd.c
@@ -0,0 +1,162 @@
+/*
+ * memfd.c
+ *
+ * Copyright (c) 2015 Red Hat, Inc.
+ *
+ * QEMU library functions on POSIX which are shared between QEMU and
+ * the QEMU tools.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+
+#include <glib.h>
+#include <glib/gprintf.h>
+
+#include <sys/mman.h>
+
+#include "qemu/memfd.h"
+
+#ifdef CONFIG_MEMFD
+#include <sys/memfd.h>
+#elif defined CONFIG_LINUX
+#include <sys/syscall.h>
+#include <asm/unistd.h>
+
+static int qemu_memfd_create(const char *name, unsigned int flags)
+{
+#ifdef __NR_memfd_create
+ return syscall(__NR_memfd_create, name, flags);
+#else
+ return -1;
+#endif
+}
+#endif
+
+#ifndef MFD_CLOEXEC
+#define MFD_CLOEXEC 0x0001U
+#endif
+
+#ifndef MFD_ALLOW_SEALING
+#define MFD_ALLOW_SEALING 0x0002U
+#endif
+
+/*
+ * This is a best-effort helper for shared memory allocation, with
+ * optional sealing. The helper will do his best to allocate using
+ * memfd with sealing, but may fallback on other methods without
+ * sealing.
+ */
+void *qemu_memfd_alloc(const char *name, size_t size, unsigned int seals,
+ int *fd)
+{
+ void *ptr;
+ int mfd = -1;
+
+ *fd = -1;
+
+#ifdef CONFIG_LINUX
+ if (seals) {
+ mfd = qemu_memfd_create(name, MFD_ALLOW_SEALING | MFD_CLOEXEC);
+ }
+
+ if (mfd == -1) {
+ /* some systems have memfd without sealing */
+ mfd = qemu_memfd_create(name, MFD_CLOEXEC);
+ seals = 0;
+ }
+#endif
+
+ if (mfd != -1) {
+ if (ftruncate(mfd, size) == -1) {
+ perror("ftruncate");
+ close(mfd);
+ return NULL;
+ }
+
+ if (seals && fcntl(mfd, F_ADD_SEALS, seals) == -1) {
+ perror("fcntl");
+ close(mfd);
+ return NULL;
+ }
+ } else {
+ const char *tmpdir = g_get_tmp_dir();
+ gchar *fname;
+
+ fname = g_strdup_printf("%s/memfd-XXXXXX", tmpdir);
+ mfd = mkstemp(fname);
+ unlink(fname);
+ g_free(fname);
+
+ if (mfd == -1) {
+ perror("mkstemp");
+ return NULL;
+ }
+
+ if (ftruncate(mfd, size) == -1) {
+ perror("ftruncate");
+ close(mfd);
+ return NULL;
+ }
+ }
+
+ ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, mfd, 0);
+ if (ptr == MAP_FAILED) {
+ perror("mmap");
+ close(mfd);
+ return NULL;
+ }
+
+ *fd = mfd;
+ return ptr;
+}
+
+void qemu_memfd_free(void *ptr, size_t size, int fd)
+{
+ if (ptr) {
+ munmap(ptr, size);
+ }
+
+ if (fd != -1) {
+ close(fd);
+ }
+}
+
+enum {
+ MEMFD_KO,
+ MEMFD_OK,
+ MEMFD_TODO
+};
+
+bool qemu_memfd_check(void)
+{
+ static int memfd_check = MEMFD_TODO;
+
+ if (memfd_check == MEMFD_TODO) {
+ int fd;
+ void *ptr;
+
+ ptr = qemu_memfd_alloc("test", 4096, 0, &fd);
+ memfd_check = ptr ? MEMFD_OK : MEMFD_KO;
+ qemu_memfd_free(ptr, 4096, fd);
+ }
+
+ return memfd_check == MEMFD_OK;
+}
diff --git a/src/util/mmap-alloc.c b/src/util/mmap-alloc.c
new file mode 100644
index 0000000..54793a5
--- /dev/null
+++ b/src/util/mmap-alloc.c
@@ -0,0 +1,110 @@
+/*
+ * Support for RAM backed by mmaped host memory.
+ *
+ * Copyright (c) 2015 Red Hat, Inc.
+ *
+ * Authors:
+ * Michael S. Tsirkin <mst@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+#include <qemu/mmap-alloc.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <assert.h>
+
+#define HUGETLBFS_MAGIC 0x958458f6
+
+#ifdef CONFIG_LINUX
+#include <sys/vfs.h>
+#endif
+
+size_t qemu_fd_getpagesize(int fd)
+{
+#ifdef CONFIG_LINUX
+ struct statfs fs;
+ int ret;
+
+ if (fd != -1) {
+ do {
+ ret = fstatfs(fd, &fs);
+ } while (ret != 0 && errno == EINTR);
+
+ if (ret == 0 && fs.f_type == HUGETLBFS_MAGIC) {
+ return fs.f_bsize;
+ }
+ }
+#endif
+
+ return getpagesize();
+}
+
+void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared)
+{
+ /*
+ * Note: this always allocates at least one extra page of virtual address
+ * space, even if size is already aligned.
+ */
+ size_t total = size + align;
+#if defined(__powerpc64__) && defined(__linux__)
+ /* On ppc64 mappings in the same segment (aka slice) must share the same
+ * page size. Since we will be re-allocating part of this segment
+ * from the supplied fd, we should make sure to use the same page size,
+ * unless we are using the system page size, in which case anonymous memory
+ * is OK. Use align as a hint for the page size.
+ * In this case, set MAP_NORESERVE to avoid allocating backing store memory.
+ */
+ int anonfd = fd == -1 || qemu_fd_getpagesize(fd) == getpagesize() ? -1 : fd;
+ int flags = anonfd == -1 ? MAP_ANONYMOUS : MAP_NORESERVE;
+ void *ptr = mmap(0, total, PROT_NONE, flags | MAP_PRIVATE, anonfd, 0);
+#else
+ void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+#endif
+ size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr;
+ void *ptr1;
+
+ if (ptr == MAP_FAILED) {
+ return MAP_FAILED;
+ }
+
+ /* Make sure align is a power of 2 */
+ assert(!(align & (align - 1)));
+ /* Always align to host page size */
+ assert(align >= getpagesize());
+
+ ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE,
+ MAP_FIXED |
+ (fd == -1 ? MAP_ANONYMOUS : 0) |
+ (shared ? MAP_SHARED : MAP_PRIVATE),
+ fd, 0);
+ if (ptr1 == MAP_FAILED) {
+ munmap(ptr, total);
+ return MAP_FAILED;
+ }
+
+ ptr += offset;
+ total -= offset;
+
+ if (offset > 0) {
+ munmap(ptr - offset, offset);
+ }
+
+ /*
+ * Leave a single PROT_NONE page allocated after the RAM block, to serve as
+ * a guard page guarding against potential buffer overflows.
+ */
+ if (total > size + getpagesize()) {
+ munmap(ptr + size + getpagesize(), total - size - getpagesize());
+ }
+
+ return ptr;
+}
+
+void qemu_ram_munmap(void *ptr, size_t size)
+{
+ if (ptr) {
+ /* Unmap both the RAM block and the guard page */
+ munmap(ptr, size + getpagesize());
+ }
+}
diff --git a/src/util/module.c b/src/util/module.c
new file mode 100644
index 0000000..4bd4a94
--- /dev/null
+++ b/src/util/module.c
@@ -0,0 +1,219 @@
+/*
+ * QEMU Module Infrastructure
+ *
+ * Copyright IBM, Corp. 2009
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include <stdlib.h>
+#include "qemu-common.h"
+#ifdef CONFIG_MODULES
+#include <gmodule.h>
+#endif
+#include "qemu/queue.h"
+#include "qemu/module.h"
+
+typedef struct ModuleEntry
+{
+ void (*init)(void);
+ QTAILQ_ENTRY(ModuleEntry) node;
+ module_init_type type;
+} ModuleEntry;
+
+typedef QTAILQ_HEAD(, ModuleEntry) ModuleTypeList;
+
+static ModuleTypeList init_type_list[MODULE_INIT_MAX];
+
+static ModuleTypeList dso_init_list;
+
+static void init_lists(void)
+{
+ static int inited;
+ int i;
+
+ if (inited) {
+ return;
+ }
+
+ for (i = 0; i < MODULE_INIT_MAX; i++) {
+ QTAILQ_INIT(&init_type_list[i]);
+ }
+
+ QTAILQ_INIT(&dso_init_list);
+
+ inited = 1;
+}
+
+
+static ModuleTypeList *find_type(module_init_type type)
+{
+ ModuleTypeList *l;
+
+ init_lists();
+
+ l = &init_type_list[type];
+
+ return l;
+}
+
+void register_module_init(void (*fn)(void), module_init_type type)
+{
+ ModuleEntry *e;
+ ModuleTypeList *l;
+
+ e = g_malloc0(sizeof(*e));
+ e->init = fn;
+ e->type = type;
+
+ l = find_type(type);
+
+ QTAILQ_INSERT_TAIL(l, e, node);
+}
+
+void register_dso_module_init(void (*fn)(void), module_init_type type)
+{
+ ModuleEntry *e;
+
+ init_lists();
+
+ e = g_malloc0(sizeof(*e));
+ e->init = fn;
+ e->type = type;
+
+ QTAILQ_INSERT_TAIL(&dso_init_list, e, node);
+}
+
+static void module_load(module_init_type type);
+
+void module_call_init(module_init_type type)
+{
+ ModuleTypeList *l;
+ ModuleEntry *e;
+
+ module_load(type);
+ l = find_type(type);
+
+ QTAILQ_FOREACH(e, l, node) {
+ e->init();
+ }
+}
+
+#ifdef CONFIG_MODULES
+static int module_load_file(const char *fname)
+{
+ GModule *g_module;
+ void (*sym)(void);
+ const char *dsosuf = HOST_DSOSUF;
+ int len = strlen(fname);
+ int suf_len = strlen(dsosuf);
+ ModuleEntry *e, *next;
+ int ret;
+
+ if (len <= suf_len || strcmp(&fname[len - suf_len], dsosuf)) {
+ /* wrong suffix */
+ ret = -EINVAL;
+ goto out;
+ }
+ if (access(fname, F_OK)) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ assert(QTAILQ_EMPTY(&dso_init_list));
+
+ g_module = g_module_open(fname, G_MODULE_BIND_LAZY | G_MODULE_BIND_LOCAL);
+ if (!g_module) {
+ fprintf(stderr, "Failed to open module: %s\n",
+ g_module_error());
+ ret = -EINVAL;
+ goto out;
+ }
+ if (!g_module_symbol(g_module, DSO_STAMP_FUN_STR, (gpointer *)&sym)) {
+ fprintf(stderr, "Failed to initialize module: %s\n",
+ fname);
+ /* Print some info if this is a QEMU module (but from different build),
+ * this will make debugging user problems easier. */
+ if (g_module_symbol(g_module, "qemu_module_dummy", (gpointer *)&sym)) {
+ fprintf(stderr,
+ "Note: only modules from the same build can be loaded.\n");
+ }
+ g_module_close(g_module);
+ ret = -EINVAL;
+ } else {
+ QTAILQ_FOREACH(e, &dso_init_list, node) {
+ register_module_init(e->init, e->type);
+ }
+ ret = 0;
+ }
+
+ QTAILQ_FOREACH_SAFE(e, &dso_init_list, node, next) {
+ QTAILQ_REMOVE(&dso_init_list, e, node);
+ g_free(e);
+ }
+out:
+ return ret;
+}
+#endif
+
+static void module_load(module_init_type type)
+{
+#ifdef CONFIG_MODULES
+ char *fname = NULL;
+ const char **mp;
+ static const char *block_modules[] = {
+ CONFIG_BLOCK_MODULES
+ };
+ char *exec_dir;
+ char *dirs[3];
+ int i = 0;
+ int ret;
+
+ if (!g_module_supported()) {
+ fprintf(stderr, "Module is not supported by system.\n");
+ return;
+ }
+
+ switch (type) {
+ case MODULE_INIT_BLOCK:
+ mp = block_modules;
+ break;
+ default:
+ /* no other types have dynamic modules for now*/
+ return;
+ }
+
+ exec_dir = qemu_get_exec_dir();
+ dirs[i++] = g_strdup_printf("%s", CONFIG_QEMU_MODDIR);
+ dirs[i++] = g_strdup_printf("%s/..", exec_dir ? : "");
+ dirs[i++] = g_strdup_printf("%s", exec_dir ? : "");
+ assert(i == ARRAY_SIZE(dirs));
+ g_free(exec_dir);
+ exec_dir = NULL;
+
+ for ( ; *mp; mp++) {
+ for (i = 0; i < ARRAY_SIZE(dirs); i++) {
+ fname = g_strdup_printf("%s/%s%s", dirs[i], *mp, HOST_DSOSUF);
+ ret = module_load_file(fname);
+ g_free(fname);
+ fname = NULL;
+ /* Try loading until loaded a module file */
+ if (!ret) {
+ break;
+ }
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(dirs); i++) {
+ g_free(dirs[i]);
+ }
+
+#endif
+}
diff --git a/src/util/notify.c b/src/util/notify.c
new file mode 100644
index 0000000..f215dfc
--- /dev/null
+++ b/src/util/notify.c
@@ -0,0 +1,71 @@
+/*
+ * Notifier lists
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ * Anthony Liguori <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2. See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu-common.h"
+#include "qemu/notify.h"
+
+void notifier_list_init(NotifierList *list)
+{
+ QLIST_INIT(&list->notifiers);
+}
+
+void notifier_list_add(NotifierList *list, Notifier *notifier)
+{
+ QLIST_INSERT_HEAD(&list->notifiers, notifier, node);
+}
+
+void notifier_remove(Notifier *notifier)
+{
+ QLIST_REMOVE(notifier, node);
+}
+
+void notifier_list_notify(NotifierList *list, void *data)
+{
+ Notifier *notifier, *next;
+
+ QLIST_FOREACH_SAFE(notifier, &list->notifiers, node, next) {
+ notifier->notify(notifier, data);
+ }
+}
+
+void notifier_with_return_list_init(NotifierWithReturnList *list)
+{
+ QLIST_INIT(&list->notifiers);
+}
+
+void notifier_with_return_list_add(NotifierWithReturnList *list,
+ NotifierWithReturn *notifier)
+{
+ QLIST_INSERT_HEAD(&list->notifiers, notifier, node);
+}
+
+void notifier_with_return_remove(NotifierWithReturn *notifier)
+{
+ QLIST_REMOVE(notifier, node);
+}
+
+int notifier_with_return_list_notify(NotifierWithReturnList *list, void *data)
+{
+ NotifierWithReturn *notifier, *next;
+ int ret = 0;
+
+ QLIST_FOREACH_SAFE(notifier, &list->notifiers, node, next) {
+ ret = notifier->notify(notifier, data);
+ if (ret != 0) {
+ break;
+ }
+ }
+ return ret;
+}
diff --git a/src/util/osdep.c b/src/util/osdep.c
new file mode 100644
index 0000000..534b511
--- /dev/null
+++ b/src/util/osdep.c
@@ -0,0 +1,437 @@
+/*
+ * QEMU low level functions
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+/* Needed early for CONFIG_BSD etc. */
+#include "config-host.h"
+
+#if defined(CONFIG_MADVISE) || defined(CONFIG_POSIX_MADVISE)
+#include <sys/mman.h>
+#endif
+
+#ifdef CONFIG_SOLARIS
+#include <sys/types.h>
+#include <sys/statvfs.h>
+/* See MySQL bug #7156 (http://bugs.mysql.com/bug.php?id=7156) for
+ discussion about Solaris header problems */
+extern int madvise(caddr_t, size_t, int);
+#endif
+
+#include "qemu-common.h"
+#include "qemu/sockets.h"
+#include "qemu/error-report.h"
+#include "monitor/monitor.h"
+
+static bool fips_enabled = false;
+
+/* Starting on QEMU 2.5, qemu_hw_version() returns "2.5+" by default
+ * instead of QEMU_VERSION, so setting hw_version on MachineClass
+ * is no longer mandatory.
+ *
+ * Do NOT change this string, or it will break compatibility on all
+ * machine classes that don't set hw_version.
+ */
+static const char *hw_version = "2.5+";
+
+int socket_set_cork(int fd, int v)
+{
+#if defined(SOL_TCP) && defined(TCP_CORK)
+ return qemu_setsockopt(fd, SOL_TCP, TCP_CORK, &v, sizeof(v));
+#else
+ return 0;
+#endif
+}
+
+int socket_set_nodelay(int fd)
+{
+ int v = 1;
+ return qemu_setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &v, sizeof(v));
+}
+
+int qemu_madvise(void *addr, size_t len, int advice)
+{
+ if (advice == QEMU_MADV_INVALID) {
+ errno = EINVAL;
+ return -1;
+ }
+#if defined(CONFIG_MADVISE)
+ return madvise(addr, len, advice);
+#elif defined(CONFIG_POSIX_MADVISE)
+ return posix_madvise(addr, len, advice);
+#else
+ errno = EINVAL;
+ return -1;
+#endif
+}
+
+#ifndef _WIN32
+/*
+ * Dups an fd and sets the flags
+ */
+static int qemu_dup_flags(int fd, int flags)
+{
+ int ret;
+ int serrno;
+ int dup_flags;
+
+#ifdef F_DUPFD_CLOEXEC
+ ret = fcntl(fd, F_DUPFD_CLOEXEC, 0);
+#else
+ ret = dup(fd);
+ if (ret != -1) {
+ qemu_set_cloexec(ret);
+ }
+#endif
+ if (ret == -1) {
+ goto fail;
+ }
+
+ dup_flags = fcntl(ret, F_GETFL);
+ if (dup_flags == -1) {
+ goto fail;
+ }
+
+ if ((flags & O_SYNC) != (dup_flags & O_SYNC)) {
+ errno = EINVAL;
+ goto fail;
+ }
+
+ /* Set/unset flags that we can with fcntl */
+ if (fcntl(ret, F_SETFL, flags) == -1) {
+ goto fail;
+ }
+
+ /* Truncate the file in the cases that open() would truncate it */
+ if (flags & O_TRUNC ||
+ ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))) {
+ if (ftruncate(ret, 0) == -1) {
+ goto fail;
+ }
+ }
+
+ return ret;
+
+fail:
+ serrno = errno;
+ if (ret != -1) {
+ close(ret);
+ }
+ errno = serrno;
+ return -1;
+}
+
+static int qemu_parse_fdset(const char *param)
+{
+ return qemu_parse_fd(param);
+}
+#endif
+
+/*
+ * Opens a file with FD_CLOEXEC set
+ */
+int qemu_open(const char *name, int flags, ...)
+{
+ int ret;
+ int mode = 0;
+
+#ifndef _WIN32
+ const char *fdset_id_str;
+
+ /* Attempt dup of fd from fd set */
+ if (strstart(name, "/dev/fdset/", &fdset_id_str)) {
+ int64_t fdset_id;
+ int fd, dupfd;
+
+ fdset_id = qemu_parse_fdset(fdset_id_str);
+ if (fdset_id == -1) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ fd = monitor_fdset_get_fd(fdset_id, flags);
+ if (fd == -1) {
+ return -1;
+ }
+
+ dupfd = qemu_dup_flags(fd, flags);
+ if (dupfd == -1) {
+ return -1;
+ }
+
+ ret = monitor_fdset_dup_fd_add(fdset_id, dupfd);
+ if (ret == -1) {
+ close(dupfd);
+ errno = EINVAL;
+ return -1;
+ }
+
+ return dupfd;
+ }
+#endif
+
+ if (flags & O_CREAT) {
+ va_list ap;
+
+ va_start(ap, flags);
+ mode = va_arg(ap, int);
+ va_end(ap);
+ }
+
+#ifdef O_CLOEXEC
+ ret = open(name, flags | O_CLOEXEC, mode);
+#else
+ ret = open(name, flags, mode);
+ if (ret >= 0) {
+ qemu_set_cloexec(ret);
+ }
+#endif
+
+#ifdef O_DIRECT
+ if (ret == -1 && errno == EINVAL && (flags & O_DIRECT)) {
+ error_report("file system may not support O_DIRECT");
+ errno = EINVAL; /* in case it was clobbered */
+ }
+#endif /* O_DIRECT */
+
+ return ret;
+}
+
+int qemu_close(int fd)
+{
+ int64_t fdset_id;
+
+ /* Close fd that was dup'd from an fdset */
+ fdset_id = monitor_fdset_dup_fd_find(fd);
+ if (fdset_id != -1) {
+ int ret;
+
+ ret = close(fd);
+ if (ret == 0) {
+ monitor_fdset_dup_fd_remove(fd);
+ }
+
+ return ret;
+ }
+
+ return close(fd);
+}
+
+/*
+ * A variant of write(2) which handles partial write.
+ *
+ * Return the number of bytes transferred.
+ * Set errno if fewer than `count' bytes are written.
+ *
+ * This function don't work with non-blocking fd's.
+ * Any of the possibilities with non-bloking fd's is bad:
+ * - return a short write (then name is wrong)
+ * - busy wait adding (errno == EAGAIN) to the loop
+ */
+ssize_t qemu_write_full(int fd, const void *buf, size_t count)
+{
+ ssize_t ret = 0;
+ ssize_t total = 0;
+
+ while (count) {
+ ret = write(fd, buf, count);
+ if (ret < 0) {
+ if (errno == EINTR)
+ continue;
+ break;
+ }
+
+ count -= ret;
+ buf += ret;
+ total += ret;
+ }
+
+ return total;
+}
+
+/*
+ * Opens a socket with FD_CLOEXEC set
+ */
+int qemu_socket(int domain, int type, int protocol)
+{
+ int ret;
+
+#ifdef SOCK_CLOEXEC
+ ret = socket(domain, type | SOCK_CLOEXEC, protocol);
+ if (ret != -1 || errno != EINVAL) {
+ return ret;
+ }
+#endif
+ ret = socket(domain, type, protocol);
+ if (ret >= 0) {
+ qemu_set_cloexec(ret);
+ }
+
+ return ret;
+}
+
+/*
+ * Accept a connection and set FD_CLOEXEC
+ */
+int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen)
+{
+ int ret;
+
+#ifdef CONFIG_ACCEPT4
+ ret = accept4(s, addr, addrlen, SOCK_CLOEXEC);
+ if (ret != -1 || errno != ENOSYS) {
+ return ret;
+ }
+#endif
+ ret = accept(s, addr, addrlen);
+ if (ret >= 0) {
+ qemu_set_cloexec(ret);
+ }
+
+ return ret;
+}
+
+void qemu_set_hw_version(const char *version)
+{
+ hw_version = version;
+}
+
+const char *qemu_hw_version(void)
+{
+ return hw_version;
+}
+
+void fips_set_state(bool requested)
+{
+#ifdef __linux__
+ if (requested) {
+ FILE *fds = fopen("/proc/sys/crypto/fips_enabled", "r");
+ if (fds != NULL) {
+ fips_enabled = (fgetc(fds) == '1');
+ fclose(fds);
+ }
+ }
+#else
+ fips_enabled = false;
+#endif /* __linux__ */
+
+#ifdef _FIPS_DEBUG
+ fprintf(stderr, "FIPS mode %s (requested %s)\n",
+ (fips_enabled ? "enabled" : "disabled"),
+ (requested ? "enabled" : "disabled"));
+#endif
+}
+
+bool fips_get_state(void)
+{
+ return fips_enabled;
+}
+
+#ifdef _WIN32
+static void socket_cleanup(void)
+{
+ WSACleanup();
+}
+#endif
+
+int socket_init(void)
+{
+#ifdef _WIN32
+ WSADATA Data;
+ int ret, err;
+
+ ret = WSAStartup(MAKEWORD(2, 2), &Data);
+ if (ret != 0) {
+ err = WSAGetLastError();
+ fprintf(stderr, "WSAStartup: %d\n", err);
+ return -1;
+ }
+ atexit(socket_cleanup);
+#endif
+ return 0;
+}
+
+#if !GLIB_CHECK_VERSION(2, 31, 0)
+/* Ensure that glib is running in multi-threaded mode
+ * Old versions of glib require explicit initialization. Failure to do
+ * this results in the single-threaded code paths being taken inside
+ * glib. For example, the g_slice allocator will not be thread-safe
+ * and cause crashes.
+ */
+static void __attribute__((constructor)) thread_init(void)
+{
+ if (!g_thread_supported()) {
+ g_thread_init(NULL);
+ }
+}
+#endif
+
+#ifndef CONFIG_IOVEC
+/* helper function for iov_send_recv() */
+static ssize_t
+readv_writev(int fd, const struct iovec *iov, int iov_cnt, bool do_write)
+{
+ unsigned i = 0;
+ ssize_t ret = 0;
+ while (i < iov_cnt) {
+ ssize_t r = do_write
+ ? write(fd, iov[i].iov_base, iov[i].iov_len)
+ : read(fd, iov[i].iov_base, iov[i].iov_len);
+ if (r > 0) {
+ ret += r;
+ } else if (!r) {
+ break;
+ } else if (errno == EINTR) {
+ continue;
+ } else {
+ /* else it is some "other" error,
+ * only return if there was no data processed. */
+ if (ret == 0) {
+ ret = -1;
+ }
+ break;
+ }
+ i++;
+ }
+ return ret;
+}
+
+ssize_t
+readv(int fd, const struct iovec *iov, int iov_cnt)
+{
+ return readv_writev(fd, iov, iov_cnt, false);
+}
+
+ssize_t
+writev(int fd, const struct iovec *iov, int iov_cnt)
+{
+ return readv_writev(fd, iov, iov_cnt, true);
+}
+#endif
diff --git a/src/util/oslib-posix.c b/src/util/oslib-posix.c
new file mode 100644
index 0000000..d25f671
--- /dev/null
+++ b/src/util/oslib-posix.c
@@ -0,0 +1,521 @@
+/*
+ * os-posix-lib.c
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2010 Red Hat, Inc.
+ *
+ * QEMU library functions on POSIX which are shared between QEMU and
+ * the QEMU tools.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/* The following block of code temporarily renames the daemon() function so the
+ compiler does not see the warning associated with it in stdlib.h on OSX */
+#ifdef __APPLE__
+#define daemon qemu_fake_daemon_function
+#include <stdlib.h>
+#undef daemon
+extern int daemon(int, int);
+#endif
+
+#if defined(__linux__) && (defined(__x86_64__) || defined(__arm__))
+ /* Use 2 MiB alignment so transparent hugepages can be used by KVM.
+ Valgrind does not support alignments larger than 1 MiB,
+ therefore we need special code which handles running on Valgrind. */
+# define QEMU_VMALLOC_ALIGN (512 * 4096)
+#elif defined(__linux__) && defined(__s390x__)
+ /* Use 1 MiB (segment size) alignment so gmap can be used by KVM. */
+# define QEMU_VMALLOC_ALIGN (256 * 4096)
+#else
+# define QEMU_VMALLOC_ALIGN getpagesize()
+#endif
+
+#include <termios.h>
+#include <unistd.h>
+#include <termios.h>
+
+#include <glib/gprintf.h>
+
+#include "config-host.h"
+#include "sysemu/sysemu.h"
+#include "trace.h"
+#include "qemu/sockets.h"
+#include <sys/mman.h>
+#include <libgen.h>
+#include <setjmp.h>
+#include <sys/signal.h>
+
+#ifdef CONFIG_LINUX
+#include <sys/syscall.h>
+#endif
+
+#ifdef __FreeBSD__
+#include <sys/sysctl.h>
+#endif
+
+#include <qemu/mmap-alloc.h>
+
+int qemu_get_thread_id(void)
+{
+#if defined(__linux__)
+ return syscall(SYS_gettid);
+#else
+ return getpid();
+#endif
+}
+
+int qemu_daemon(int nochdir, int noclose)
+{
+ return daemon(nochdir, noclose);
+}
+
+void *qemu_oom_check(void *ptr)
+{
+ if (ptr == NULL) {
+ fprintf(stderr, "Failed to allocate memory: %s\n", strerror(errno));
+ abort();
+ }
+ return ptr;
+}
+
+void *qemu_try_memalign(size_t alignment, size_t size)
+{
+ void *ptr;
+
+ if (alignment < sizeof(void*)) {
+ alignment = sizeof(void*);
+ }
+
+#if defined(_POSIX_C_SOURCE) && !defined(__sun__)
+ int ret;
+ ret = posix_memalign(&ptr, alignment, size);
+ if (ret != 0) {
+ errno = ret;
+ ptr = NULL;
+ }
+#elif defined(CONFIG_BSD)
+ ptr = valloc(size);
+#else
+ ptr = memalign(alignment, size);
+#endif
+ trace_qemu_memalign(alignment, size, ptr);
+ return ptr;
+}
+
+void *qemu_memalign(size_t alignment, size_t size)
+{
+ return qemu_oom_check(qemu_try_memalign(alignment, size));
+}
+
+/* alloc shared memory pages */
+void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment)
+{
+ size_t align = QEMU_VMALLOC_ALIGN;
+ void *ptr = qemu_ram_mmap(-1, size, align, false);
+
+ if (ptr == MAP_FAILED) {
+ return NULL;
+ }
+
+ if (alignment) {
+ *alignment = align;
+ }
+
+ trace_qemu_anon_ram_alloc(size, ptr);
+ return ptr;
+}
+
+void qemu_vfree(void *ptr)
+{
+ trace_qemu_vfree(ptr);
+ free(ptr);
+}
+
+void qemu_anon_ram_free(void *ptr, size_t size)
+{
+ trace_qemu_anon_ram_free(ptr, size);
+ qemu_ram_munmap(ptr, size);
+}
+
+void qemu_set_block(int fd)
+{
+ int f;
+ f = fcntl(fd, F_GETFL);
+ fcntl(fd, F_SETFL, f & ~O_NONBLOCK);
+}
+
+void qemu_set_nonblock(int fd)
+{
+ int f;
+ f = fcntl(fd, F_GETFL);
+ fcntl(fd, F_SETFL, f | O_NONBLOCK);
+}
+
+int socket_set_fast_reuse(int fd)
+{
+ int val = 1, ret;
+
+ ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
+ (const char *)&val, sizeof(val));
+
+ assert(ret == 0);
+
+ return ret;
+}
+
+void qemu_set_cloexec(int fd)
+{
+ int f;
+ f = fcntl(fd, F_GETFD);
+ fcntl(fd, F_SETFD, f | FD_CLOEXEC);
+}
+
+/*
+ * Creates a pipe with FD_CLOEXEC set on both file descriptors
+ */
+int qemu_pipe(int pipefd[2])
+{
+ int ret;
+
+#ifdef CONFIG_PIPE2
+ ret = pipe2(pipefd, O_CLOEXEC);
+ if (ret != -1 || errno != ENOSYS) {
+ return ret;
+ }
+#endif
+ ret = pipe(pipefd);
+ if (ret == 0) {
+ qemu_set_cloexec(pipefd[0]);
+ qemu_set_cloexec(pipefd[1]);
+ }
+
+ return ret;
+}
+
+int qemu_utimens(const char *path, const struct timespec *times)
+{
+ struct timeval tv[2], tv_now;
+ struct stat st;
+ int i;
+#ifdef CONFIG_UTIMENSAT
+ int ret;
+
+ ret = utimensat(AT_FDCWD, path, times, AT_SYMLINK_NOFOLLOW);
+ if (ret != -1 || errno != ENOSYS) {
+ return ret;
+ }
+#endif
+ /* Fallback: use utimes() instead of utimensat() */
+
+ /* happy if special cases */
+ if (times[0].tv_nsec == UTIME_OMIT && times[1].tv_nsec == UTIME_OMIT) {
+ return 0;
+ }
+ if (times[0].tv_nsec == UTIME_NOW && times[1].tv_nsec == UTIME_NOW) {
+ return utimes(path, NULL);
+ }
+
+ /* prepare for hard cases */
+ if (times[0].tv_nsec == UTIME_NOW || times[1].tv_nsec == UTIME_NOW) {
+ gettimeofday(&tv_now, NULL);
+ }
+ if (times[0].tv_nsec == UTIME_OMIT || times[1].tv_nsec == UTIME_OMIT) {
+ stat(path, &st);
+ }
+
+ for (i = 0; i < 2; i++) {
+ if (times[i].tv_nsec == UTIME_NOW) {
+ tv[i].tv_sec = tv_now.tv_sec;
+ tv[i].tv_usec = tv_now.tv_usec;
+ } else if (times[i].tv_nsec == UTIME_OMIT) {
+ tv[i].tv_sec = (i == 0) ? st.st_atime : st.st_mtime;
+ tv[i].tv_usec = 0;
+ } else {
+ tv[i].tv_sec = times[i].tv_sec;
+ tv[i].tv_usec = times[i].tv_nsec / 1000;
+ }
+ }
+
+ return utimes(path, &tv[0]);
+}
+
+char *
+qemu_get_local_state_pathname(const char *relative_pathname)
+{
+ return g_strdup_printf("%s/%s", CONFIG_QEMU_LOCALSTATEDIR,
+ relative_pathname);
+}
+
+void qemu_set_tty_echo(int fd, bool echo)
+{
+ struct termios tty;
+
+ tcgetattr(fd, &tty);
+
+ if (echo) {
+ tty.c_lflag |= ECHO | ECHONL | ICANON | IEXTEN;
+ } else {
+ tty.c_lflag &= ~(ECHO | ECHONL | ICANON | IEXTEN);
+ }
+
+ tcsetattr(fd, TCSANOW, &tty);
+}
+
+static char exec_dir[PATH_MAX];
+
+void qemu_init_exec_dir(const char *argv0)
+{
+ char *dir;
+ char *p = NULL;
+ char buf[PATH_MAX];
+
+ assert(!exec_dir[0]);
+
+#if defined(__linux__)
+ {
+ int len;
+ len = readlink("/proc/self/exe", buf, sizeof(buf) - 1);
+ if (len > 0) {
+ buf[len] = 0;
+ p = buf;
+ }
+ }
+#elif defined(__FreeBSD__)
+ {
+ static int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
+ size_t len = sizeof(buf) - 1;
+
+ *buf = '\0';
+ if (!sysctl(mib, ARRAY_SIZE(mib), buf, &len, NULL, 0) &&
+ *buf) {
+ buf[sizeof(buf) - 1] = '\0';
+ p = buf;
+ }
+ }
+#endif
+ /* If we don't have any way of figuring out the actual executable
+ location then try argv[0]. */
+ if (!p) {
+ if (!argv0) {
+ return;
+ }
+ p = realpath(argv0, buf);
+ if (!p) {
+ return;
+ }
+ }
+ dir = dirname(p);
+
+ pstrcpy(exec_dir, sizeof(exec_dir), dir);
+}
+
+char *qemu_get_exec_dir(void)
+{
+ return g_strdup(exec_dir);
+}
+
+static sigjmp_buf sigjump;
+
+static void sigbus_handler(int signal)
+{
+ siglongjmp(sigjump, 1);
+}
+
+void os_mem_prealloc(int fd, char *area, size_t memory)
+{
+ int ret;
+ struct sigaction act, oldact;
+ sigset_t set, oldset;
+
+ memset(&act, 0, sizeof(act));
+ act.sa_handler = &sigbus_handler;
+ act.sa_flags = 0;
+
+ ret = sigaction(SIGBUS, &act, &oldact);
+ if (ret) {
+ perror("os_mem_prealloc: failed to install signal handler");
+ exit(1);
+ }
+
+ /* unblock SIGBUS */
+ sigemptyset(&set);
+ sigaddset(&set, SIGBUS);
+ pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
+
+ if (sigsetjmp(sigjump, 1)) {
+ fprintf(stderr, "os_mem_prealloc: Insufficient free host memory "
+ "pages available to allocate guest RAM\n");
+ exit(1);
+ } else {
+ int i;
+ size_t hpagesize = qemu_fd_getpagesize(fd);
+ size_t numpages = DIV_ROUND_UP(memory, hpagesize);
+
+ /* MAP_POPULATE silently ignores failures */
+ for (i = 0; i < numpages; i++) {
+ memset(area + (hpagesize * i), 0, 1);
+ }
+
+ ret = sigaction(SIGBUS, &oldact, NULL);
+ if (ret) {
+ perror("os_mem_prealloc: failed to reinstall signal handler");
+ exit(1);
+ }
+
+ pthread_sigmask(SIG_SETMASK, &oldset, NULL);
+ }
+}
+
+
+static struct termios oldtty;
+
+static void term_exit(void)
+{
+ tcsetattr(0, TCSANOW, &oldtty);
+}
+
+static void term_init(void)
+{
+ struct termios tty;
+
+ tcgetattr(0, &tty);
+ oldtty = tty;
+
+ tty.c_iflag &= ~(IGNBRK|BRKINT|PARMRK|ISTRIP
+ |INLCR|IGNCR|ICRNL|IXON);
+ tty.c_oflag |= OPOST;
+ tty.c_lflag &= ~(ECHO|ECHONL|ICANON|IEXTEN);
+ tty.c_cflag &= ~(CSIZE|PARENB);
+ tty.c_cflag |= CS8;
+ tty.c_cc[VMIN] = 1;
+ tty.c_cc[VTIME] = 0;
+
+ tcsetattr(0, TCSANOW, &tty);
+
+ atexit(term_exit);
+}
+
+int qemu_read_password(char *buf, int buf_size)
+{
+ uint8_t ch;
+ int i, ret;
+
+ printf("password: ");
+ fflush(stdout);
+ term_init();
+ i = 0;
+ for (;;) {
+ ret = read(0, &ch, 1);
+ if (ret == -1) {
+ if (errno == EAGAIN || errno == EINTR) {
+ continue;
+ } else {
+ break;
+ }
+ } else if (ret == 0) {
+ ret = -1;
+ break;
+ } else {
+ if (ch == '\r' ||
+ ch == '\n') {
+ ret = 0;
+ break;
+ }
+ if (i < (buf_size - 1)) {
+ buf[i++] = ch;
+ }
+ }
+ }
+ term_exit();
+ buf[i] = '\0';
+ printf("\n");
+ return ret;
+}
+
+
+pid_t qemu_fork(Error **errp)
+{
+ sigset_t oldmask, newmask;
+ struct sigaction sig_action;
+ int saved_errno;
+ pid_t pid;
+
+ /*
+ * Need to block signals now, so that child process can safely
+ * kill off caller's signal handlers without a race.
+ */
+ sigfillset(&newmask);
+ if (pthread_sigmask(SIG_SETMASK, &newmask, &oldmask) != 0) {
+ error_setg_errno(errp, errno,
+ "cannot block signals");
+ return -1;
+ }
+
+ pid = fork();
+ saved_errno = errno;
+
+ if (pid < 0) {
+ /* attempt to restore signal mask, but ignore failure, to
+ * avoid obscuring the fork failure */
+ (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
+ error_setg_errno(errp, saved_errno,
+ "cannot fork child process");
+ errno = saved_errno;
+ return -1;
+ } else if (pid) {
+ /* parent process */
+
+ /* Restore our original signal mask now that the child is
+ * safely running. Only documented failures are EFAULT (not
+ * possible, since we are using just-grabbed mask) or EINVAL
+ * (not possible, since we are using correct arguments). */
+ (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
+ } else {
+ /* child process */
+ size_t i;
+
+ /* Clear out all signal handlers from parent so nothing
+ * unexpected can happen in our child once we unblock
+ * signals */
+ sig_action.sa_handler = SIG_DFL;
+ sig_action.sa_flags = 0;
+ sigemptyset(&sig_action.sa_mask);
+
+ for (i = 1; i < NSIG; i++) {
+ /* Only possible errors are EFAULT or EINVAL The former
+ * won't happen, the latter we expect, so no need to check
+ * return value */
+ (void)sigaction(i, &sig_action, NULL);
+ }
+
+ /* Unmask all signals in child, since we've no idea what the
+ * caller's done with their signal mask and don't want to
+ * propagate that to children */
+ sigemptyset(&newmask);
+ if (pthread_sigmask(SIG_SETMASK, &newmask, NULL) != 0) {
+ Error *local_err = NULL;
+ error_setg_errno(&local_err, errno,
+ "cannot unblock signals");
+ error_report_err(local_err);
+ _exit(1);
+ }
+ }
+ return pid;
+}
diff --git a/src/util/oslib-win32.c b/src/util/oslib-win32.c
new file mode 100644
index 0000000..6a47019
--- /dev/null
+++ b/src/util/oslib-win32.c
@@ -0,0 +1,507 @@
+/*
+ * os-win32.c
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2010 Red Hat, Inc.
+ *
+ * QEMU library functions for win32 which are shared between QEMU and
+ * the QEMU tools.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ * The implementation of g_poll (functions poll_rest, g_poll) at the end of
+ * this file are based on code from GNOME glib-2 and use a different license,
+ * see the license comment there.
+ */
+#include <windows.h>
+#include <glib.h>
+#include <stdlib.h>
+#include "config-host.h"
+#include "sysemu/sysemu.h"
+#include "qemu/main-loop.h"
+#include "trace.h"
+#include "qemu/sockets.h"
+
+/* this must come after including "trace.h" */
+#include <shlobj.h>
+
+void *qemu_oom_check(void *ptr)
+{
+ if (ptr == NULL) {
+ fprintf(stderr, "Failed to allocate memory: %lu\n", GetLastError());
+ abort();
+ }
+ return ptr;
+}
+
+void *qemu_try_memalign(size_t alignment, size_t size)
+{
+ void *ptr;
+
+ if (!size) {
+ abort();
+ }
+ ptr = VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
+ trace_qemu_memalign(alignment, size, ptr);
+ return ptr;
+}
+
+void *qemu_memalign(size_t alignment, size_t size)
+{
+ return qemu_oom_check(qemu_try_memalign(alignment, size));
+}
+
+void *qemu_anon_ram_alloc(size_t size, uint64_t *align)
+{
+ void *ptr;
+
+ /* FIXME: this is not exactly optimal solution since VirtualAlloc
+ has 64Kb granularity, but at least it guarantees us that the
+ memory is page aligned. */
+ ptr = VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
+ trace_qemu_anon_ram_alloc(size, ptr);
+ return ptr;
+}
+
+void qemu_vfree(void *ptr)
+{
+ trace_qemu_vfree(ptr);
+ if (ptr) {
+ VirtualFree(ptr, 0, MEM_RELEASE);
+ }
+}
+
+void qemu_anon_ram_free(void *ptr, size_t size)
+{
+ trace_qemu_anon_ram_free(ptr, size);
+ if (ptr) {
+ VirtualFree(ptr, 0, MEM_RELEASE);
+ }
+}
+
+#ifndef CONFIG_LOCALTIME_R
+/* FIXME: add proper locking */
+struct tm *gmtime_r(const time_t *timep, struct tm *result)
+{
+ struct tm *p = gmtime(timep);
+ memset(result, 0, sizeof(*result));
+ if (p) {
+ *result = *p;
+ p = result;
+ }
+ return p;
+}
+
+/* FIXME: add proper locking */
+struct tm *localtime_r(const time_t *timep, struct tm *result)
+{
+ struct tm *p = localtime(timep);
+ memset(result, 0, sizeof(*result));
+ if (p) {
+ *result = *p;
+ p = result;
+ }
+ return p;
+}
+#endif /* CONFIG_LOCALTIME_R */
+
+void qemu_set_block(int fd)
+{
+ unsigned long opt = 0;
+ WSAEventSelect(fd, NULL, 0);
+ ioctlsocket(fd, FIONBIO, &opt);
+}
+
+void qemu_set_nonblock(int fd)
+{
+ unsigned long opt = 1;
+ ioctlsocket(fd, FIONBIO, &opt);
+ qemu_fd_register(fd);
+}
+
+int socket_set_fast_reuse(int fd)
+{
+ /* Enabling the reuse of an endpoint that was used by a socket still in
+ * TIME_WAIT state is usually performed by setting SO_REUSEADDR. On Windows
+ * fast reuse is the default and SO_REUSEADDR does strange things. So we
+ * don't have to do anything here. More info can be found at:
+ * http://msdn.microsoft.com/en-us/library/windows/desktop/ms740621.aspx */
+ return 0;
+}
+
+int inet_aton(const char *cp, struct in_addr *ia)
+{
+ uint32_t addr = inet_addr(cp);
+ if (addr == 0xffffffff) {
+ return 0;
+ }
+ ia->s_addr = addr;
+ return 1;
+}
+
+void qemu_set_cloexec(int fd)
+{
+}
+
+/* Offset between 1/1/1601 and 1/1/1970 in 100 nanosec units */
+#define _W32_FT_OFFSET (116444736000000000ULL)
+
+int qemu_gettimeofday(qemu_timeval *tp)
+{
+ union {
+ unsigned long long ns100; /*time since 1 Jan 1601 in 100ns units */
+ FILETIME ft;
+ } _now;
+
+ if(tp) {
+ GetSystemTimeAsFileTime (&_now.ft);
+ tp->tv_usec=(long)((_now.ns100 / 10ULL) % 1000000ULL );
+ tp->tv_sec= (long)((_now.ns100 - _W32_FT_OFFSET) / 10000000ULL);
+ }
+ /* Always return 0 as per Open Group Base Specifications Issue 6.
+ Do not set errno on error. */
+ return 0;
+}
+
+int qemu_get_thread_id(void)
+{
+ return GetCurrentThreadId();
+}
+
+char *
+qemu_get_local_state_pathname(const char *relative_pathname)
+{
+ HRESULT result;
+ char base_path[MAX_PATH+1] = "";
+
+ result = SHGetFolderPath(NULL, CSIDL_COMMON_APPDATA, NULL,
+ /* SHGFP_TYPE_CURRENT */ 0, base_path);
+ if (result != S_OK) {
+ /* misconfigured environment */
+ g_critical("CSIDL_COMMON_APPDATA unavailable: %ld", (long)result);
+ abort();
+ }
+ return g_strdup_printf("%s" G_DIR_SEPARATOR_S "%s", base_path,
+ relative_pathname);
+}
+
+void qemu_set_tty_echo(int fd, bool echo)
+{
+ HANDLE handle = (HANDLE)_get_osfhandle(fd);
+ DWORD dwMode = 0;
+
+ if (handle == INVALID_HANDLE_VALUE) {
+ return;
+ }
+
+ GetConsoleMode(handle, &dwMode);
+
+ if (echo) {
+ SetConsoleMode(handle, dwMode | ENABLE_ECHO_INPUT | ENABLE_LINE_INPUT);
+ } else {
+ SetConsoleMode(handle,
+ dwMode & ~(ENABLE_ECHO_INPUT | ENABLE_LINE_INPUT));
+ }
+}
+
+static char exec_dir[PATH_MAX];
+
+void qemu_init_exec_dir(const char *argv0)
+{
+
+ char *p;
+ char buf[MAX_PATH];
+ DWORD len;
+
+ len = GetModuleFileName(NULL, buf, sizeof(buf) - 1);
+ if (len == 0) {
+ return;
+ }
+
+ buf[len] = 0;
+ p = buf + len - 1;
+ while (p != buf && *p != '\\') {
+ p--;
+ }
+ *p = 0;
+ if (access(buf, R_OK) == 0) {
+ pstrcpy(exec_dir, sizeof(exec_dir), buf);
+ }
+}
+
+char *qemu_get_exec_dir(void)
+{
+ return g_strdup(exec_dir);
+}
+
+/*
+ * The original implementation of g_poll from glib has a problem on Windows
+ * when using timeouts < 10 ms.
+ *
+ * Whenever g_poll is called with timeout < 10 ms, it does a quick poll instead
+ * of wait. This causes significant performance degradation of QEMU.
+ *
+ * The following code is a copy of the original code from glib/gpoll.c
+ * (glib commit 20f4d1820b8d4d0fc4447188e33efffd6d4a88d8 from 2014-02-19).
+ * Some debug code was removed and the code was reformatted.
+ * All other code modifications are marked with 'QEMU'.
+ */
+
+/*
+ * gpoll.c: poll(2) abstraction
+ * Copyright 1998 Owen Taylor
+ * Copyright 2008 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+static int poll_rest(gboolean poll_msgs, HANDLE *handles, gint nhandles,
+ GPollFD *fds, guint nfds, gint timeout)
+{
+ DWORD ready;
+ GPollFD *f;
+ int recursed_result;
+
+ if (poll_msgs) {
+ /* Wait for either messages or handles
+ * -> Use MsgWaitForMultipleObjectsEx
+ */
+ ready = MsgWaitForMultipleObjectsEx(nhandles, handles, timeout,
+ QS_ALLINPUT, MWMO_ALERTABLE);
+
+ if (ready == WAIT_FAILED) {
+ gchar *emsg = g_win32_error_message(GetLastError());
+ g_warning("MsgWaitForMultipleObjectsEx failed: %s", emsg);
+ g_free(emsg);
+ }
+ } else if (nhandles == 0) {
+ /* No handles to wait for, just the timeout */
+ if (timeout == INFINITE) {
+ ready = WAIT_FAILED;
+ } else {
+ SleepEx(timeout, TRUE);
+ ready = WAIT_TIMEOUT;
+ }
+ } else {
+ /* Wait for just handles
+ * -> Use WaitForMultipleObjectsEx
+ */
+ ready =
+ WaitForMultipleObjectsEx(nhandles, handles, FALSE, timeout, TRUE);
+ if (ready == WAIT_FAILED) {
+ gchar *emsg = g_win32_error_message(GetLastError());
+ g_warning("WaitForMultipleObjectsEx failed: %s", emsg);
+ g_free(emsg);
+ }
+ }
+
+ if (ready == WAIT_FAILED) {
+ return -1;
+ } else if (ready == WAIT_TIMEOUT || ready == WAIT_IO_COMPLETION) {
+ return 0;
+ } else if (poll_msgs && ready == WAIT_OBJECT_0 + nhandles) {
+ for (f = fds; f < &fds[nfds]; ++f) {
+ if (f->fd == G_WIN32_MSG_HANDLE && f->events & G_IO_IN) {
+ f->revents |= G_IO_IN;
+ }
+ }
+
+ /* If we have a timeout, or no handles to poll, be satisfied
+ * with just noticing we have messages waiting.
+ */
+ if (timeout != 0 || nhandles == 0) {
+ return 1;
+ }
+
+ /* If no timeout and handles to poll, recurse to poll them,
+ * too.
+ */
+ recursed_result = poll_rest(FALSE, handles, nhandles, fds, nfds, 0);
+ return (recursed_result == -1) ? -1 : 1 + recursed_result;
+ } else if (/* QEMU: removed the following unneeded statement which causes
+ * a compiler warning: ready >= WAIT_OBJECT_0 && */
+ ready < WAIT_OBJECT_0 + nhandles) {
+ for (f = fds; f < &fds[nfds]; ++f) {
+ if ((HANDLE) f->fd == handles[ready - WAIT_OBJECT_0]) {
+ f->revents = f->events;
+ }
+ }
+
+ /* If no timeout and polling several handles, recurse to poll
+ * the rest of them.
+ */
+ if (timeout == 0 && nhandles > 1) {
+ /* Remove the handle that fired */
+ int i;
+ if (ready < nhandles - 1) {
+ for (i = ready - WAIT_OBJECT_0 + 1; i < nhandles; i++) {
+ handles[i-1] = handles[i];
+ }
+ }
+ nhandles--;
+ recursed_result = poll_rest(FALSE, handles, nhandles, fds, nfds, 0);
+ return (recursed_result == -1) ? -1 : 1 + recursed_result;
+ }
+ return 1;
+ }
+
+ return 0;
+}
+
+gint g_poll(GPollFD *fds, guint nfds, gint timeout)
+{
+ HANDLE handles[MAXIMUM_WAIT_OBJECTS];
+ gboolean poll_msgs = FALSE;
+ GPollFD *f;
+ gint nhandles = 0;
+ int retval;
+
+ for (f = fds; f < &fds[nfds]; ++f) {
+ if (f->fd == G_WIN32_MSG_HANDLE && (f->events & G_IO_IN)) {
+ poll_msgs = TRUE;
+ } else if (f->fd > 0) {
+ /* Don't add the same handle several times into the array, as
+ * docs say that is not allowed, even if it actually does seem
+ * to work.
+ */
+ gint i;
+
+ for (i = 0; i < nhandles; i++) {
+ if (handles[i] == (HANDLE) f->fd) {
+ break;
+ }
+ }
+
+ if (i == nhandles) {
+ if (nhandles == MAXIMUM_WAIT_OBJECTS) {
+ g_warning("Too many handles to wait for!\n");
+ break;
+ } else {
+ handles[nhandles++] = (HANDLE) f->fd;
+ }
+ }
+ }
+ }
+
+ for (f = fds; f < &fds[nfds]; ++f) {
+ f->revents = 0;
+ }
+
+ if (timeout == -1) {
+ timeout = INFINITE;
+ }
+
+ /* Polling for several things? */
+ if (nhandles > 1 || (nhandles > 0 && poll_msgs)) {
+ /* First check if one or several of them are immediately
+ * available
+ */
+ retval = poll_rest(poll_msgs, handles, nhandles, fds, nfds, 0);
+
+ /* If not, and we have a significant timeout, poll again with
+ * timeout then. Note that this will return indication for only
+ * one event, or only for messages. We ignore timeouts less than
+ * ten milliseconds as they are mostly pointless on Windows, the
+ * MsgWaitForMultipleObjectsEx() call will timeout right away
+ * anyway.
+ *
+ * Modification for QEMU: replaced timeout >= 10 by timeout > 0.
+ */
+ if (retval == 0 && (timeout == INFINITE || timeout > 0)) {
+ retval = poll_rest(poll_msgs, handles, nhandles,
+ fds, nfds, timeout);
+ }
+ } else {
+ /* Just polling for one thing, so no need to check first if
+ * available immediately
+ */
+ retval = poll_rest(poll_msgs, handles, nhandles, fds, nfds, timeout);
+ }
+
+ if (retval == -1) {
+ for (f = fds; f < &fds[nfds]; ++f) {
+ f->revents = 0;
+ }
+ }
+
+ return retval;
+}
+
+int getpagesize(void)
+{
+ SYSTEM_INFO system_info;
+
+ GetSystemInfo(&system_info);
+ return system_info.dwPageSize;
+}
+
+void os_mem_prealloc(int fd, char *area, size_t memory)
+{
+ int i;
+ size_t pagesize = getpagesize();
+
+ memory = (memory + pagesize - 1) & -pagesize;
+ for (i = 0; i < memory / pagesize; i++) {
+ memset(area + pagesize * i, 0, 1);
+ }
+}
+
+
+/* XXX: put correct support for win32 */
+int qemu_read_password(char *buf, int buf_size)
+{
+ int c, i;
+
+ printf("Password: ");
+ fflush(stdout);
+ i = 0;
+ for (;;) {
+ c = getchar();
+ if (c < 0) {
+ buf[i] = '\0';
+ return -1;
+ } else if (c == '\n') {
+ break;
+ } else if (i < (buf_size - 1)) {
+ buf[i++] = c;
+ }
+ }
+ buf[i] = '\0';
+ return 0;
+}
+
+
+pid_t qemu_fork(Error **errp)
+{
+ errno = ENOSYS;
+ error_setg_errno(errp, errno,
+ "cannot fork child process");
+ return -1;
+}
diff --git a/src/util/path.c b/src/util/path.c
new file mode 100644
index 0000000..4e4877e
--- /dev/null
+++ b/src/util/path.c
@@ -0,0 +1,181 @@
+/* Code to mangle pathnames into those matching a given prefix.
+ eg. open("/lib/foo.so") => open("/usr/gnemul/i386-linux/lib/foo.so");
+
+ The assumption is that this area does not change.
+*/
+#include <sys/types.h>
+#include <sys/param.h>
+#include <dirent.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <stdio.h>
+#include "qemu-common.h"
+
+struct pathelem
+{
+ /* Name of this, eg. lib */
+ char *name;
+ /* Full path name, eg. /usr/gnemul/x86-linux/lib. */
+ char *pathname;
+ struct pathelem *parent;
+ /* Children */
+ unsigned int num_entries;
+ struct pathelem *entries[0];
+};
+
+static struct pathelem *base;
+
+/* First N chars of S1 match S2, and S2 is N chars long. */
+static int strneq(const char *s1, unsigned int n, const char *s2)
+{
+ unsigned int i;
+
+ for (i = 0; i < n; i++)
+ if (s1[i] != s2[i])
+ return 0;
+ return s2[i] == 0;
+}
+
+static struct pathelem *add_entry(struct pathelem *root, const char *name,
+ unsigned type);
+
+static struct pathelem *new_entry(const char *root,
+ struct pathelem *parent,
+ const char *name)
+{
+ struct pathelem *new = g_malloc(sizeof(*new));
+ new->name = g_strdup(name);
+ new->pathname = g_strdup_printf("%s/%s", root, name);
+ new->num_entries = 0;
+ return new;
+}
+
+#define streq(a,b) (strcmp((a), (b)) == 0)
+
+/* Not all systems provide this feature */
+#if defined(DT_DIR) && defined(DT_UNKNOWN) && defined(DT_LNK)
+# define dirent_type(dirent) ((dirent)->d_type)
+# define is_dir_maybe(type) \
+ ((type) == DT_DIR || (type) == DT_UNKNOWN || (type) == DT_LNK)
+#else
+# define dirent_type(dirent) (1)
+# define is_dir_maybe(type) (type)
+#endif
+
+static struct pathelem *add_dir_maybe(struct pathelem *path)
+{
+ DIR *dir;
+
+ if ((dir = opendir(path->pathname)) != NULL) {
+ struct dirent *dirent;
+
+ while ((dirent = readdir(dir)) != NULL) {
+ if (!streq(dirent->d_name,".") && !streq(dirent->d_name,"..")){
+ path = add_entry(path, dirent->d_name, dirent_type(dirent));
+ }
+ }
+ closedir(dir);
+ }
+ return path;
+}
+
+static struct pathelem *add_entry(struct pathelem *root, const char *name,
+ unsigned type)
+{
+ struct pathelem **e;
+
+ root->num_entries++;
+
+ root = g_realloc(root, sizeof(*root)
+ + sizeof(root->entries[0])*root->num_entries);
+ e = &root->entries[root->num_entries-1];
+
+ *e = new_entry(root->pathname, root, name);
+ if (is_dir_maybe(type)) {
+ *e = add_dir_maybe(*e);
+ }
+
+ return root;
+}
+
+/* This needs to be done after tree is stabilized (ie. no more reallocs!). */
+static void set_parents(struct pathelem *child, struct pathelem *parent)
+{
+ unsigned int i;
+
+ child->parent = parent;
+ for (i = 0; i < child->num_entries; i++)
+ set_parents(child->entries[i], child);
+}
+
+/* FIXME: Doesn't handle DIR/.. where DIR is not in emulated dir. */
+static const char *
+follow_path(const struct pathelem *cursor, const char *name)
+{
+ unsigned int i, namelen;
+
+ name += strspn(name, "/");
+ namelen = strcspn(name, "/");
+
+ if (namelen == 0)
+ return cursor->pathname;
+
+ if (strneq(name, namelen, ".."))
+ return follow_path(cursor->parent, name + namelen);
+
+ if (strneq(name, namelen, "."))
+ return follow_path(cursor, name + namelen);
+
+ for (i = 0; i < cursor->num_entries; i++)
+ if (strneq(name, namelen, cursor->entries[i]->name))
+ return follow_path(cursor->entries[i], name + namelen);
+
+ /* Not found */
+ return NULL;
+}
+
+void init_paths(const char *prefix)
+{
+ char pref_buf[PATH_MAX];
+
+ if (prefix[0] == '\0' ||
+ !strcmp(prefix, "/"))
+ return;
+
+ if (prefix[0] != '/') {
+ char *cwd = getcwd(NULL, 0);
+ size_t pref_buf_len = sizeof(pref_buf);
+
+ if (!cwd)
+ abort();
+ pstrcpy(pref_buf, sizeof(pref_buf), cwd);
+ pstrcat(pref_buf, pref_buf_len, "/");
+ pstrcat(pref_buf, pref_buf_len, prefix);
+ free(cwd);
+ } else
+ pstrcpy(pref_buf, sizeof(pref_buf), prefix + 1);
+
+ base = new_entry("", NULL, pref_buf);
+ base = add_dir_maybe(base);
+ if (base->num_entries == 0) {
+ g_free(base->pathname);
+ g_free(base->name);
+ g_free(base);
+ base = NULL;
+ } else {
+ set_parents(base, base);
+ }
+}
+
+/* Look for path in emulation dir, otherwise return name. */
+const char *path(const char *name)
+{
+ /* Only do absolute paths: quick and dirty, but should mostly be OK.
+ Could do relative by tracking cwd. */
+ if (!base || !name || name[0] != '/')
+ return name;
+
+ return follow_path(base, name) ?: name;
+}
diff --git a/src/util/qemu-config.c b/src/util/qemu-config.c
new file mode 100644
index 0000000..687fd34
--- /dev/null
+++ b/src/util/qemu-config.c
@@ -0,0 +1,573 @@
+#include "qemu-common.h"
+#include "qemu/error-report.h"
+#include "qemu/option.h"
+#include "qemu/config-file.h"
+#include "qapi/error.h"
+#include "qmp-commands.h"
+
+static QemuOptsList *vm_config_groups[48];
+static QemuOptsList *drive_config_groups[4];
+
+static QemuOptsList *find_list(QemuOptsList **lists, const char *group,
+ Error **errp)
+{
+ int i;
+
+ for (i = 0; lists[i] != NULL; i++) {
+ if (strcmp(lists[i]->name, group) == 0)
+ break;
+ }
+ if (lists[i] == NULL) {
+ error_setg(errp, "There is no option group '%s'", group);
+ }
+ return lists[i];
+}
+
+QemuOptsList *qemu_find_opts(const char *group)
+{
+ QemuOptsList *ret;
+ Error *local_err = NULL;
+
+ ret = find_list(vm_config_groups, group, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ }
+
+ return ret;
+}
+
+QemuOpts *qemu_find_opts_singleton(const char *group)
+{
+ QemuOptsList *list;
+ QemuOpts *opts;
+
+ list = qemu_find_opts(group);
+ assert(list);
+ opts = qemu_opts_find(list, NULL);
+ if (!opts) {
+ opts = qemu_opts_create(list, NULL, 0, &error_abort);
+ }
+ return opts;
+}
+
+static CommandLineParameterInfoList *query_option_descs(const QemuOptDesc *desc)
+{
+ CommandLineParameterInfoList *param_list = NULL, *entry;
+ CommandLineParameterInfo *info;
+ int i;
+
+ for (i = 0; desc[i].name != NULL; i++) {
+ info = g_malloc0(sizeof(*info));
+ info->name = g_strdup(desc[i].name);
+
+ switch (desc[i].type) {
+ case QEMU_OPT_STRING:
+ info->type = COMMAND_LINE_PARAMETER_TYPE_STRING;
+ break;
+ case QEMU_OPT_BOOL:
+ info->type = COMMAND_LINE_PARAMETER_TYPE_BOOLEAN;
+ break;
+ case QEMU_OPT_NUMBER:
+ info->type = COMMAND_LINE_PARAMETER_TYPE_NUMBER;
+ break;
+ case QEMU_OPT_SIZE:
+ info->type = COMMAND_LINE_PARAMETER_TYPE_SIZE;
+ break;
+ }
+
+ if (desc[i].help) {
+ info->has_help = true;
+ info->help = g_strdup(desc[i].help);
+ }
+ if (desc[i].def_value_str) {
+ info->has_q_default = true;
+ info->q_default = g_strdup(desc[i].def_value_str);
+ }
+
+ entry = g_malloc0(sizeof(*entry));
+ entry->value = info;
+ entry->next = param_list;
+ param_list = entry;
+ }
+
+ return param_list;
+}
+
+/* remove repeated entry from the info list */
+static void cleanup_infolist(CommandLineParameterInfoList *head)
+{
+ CommandLineParameterInfoList *pre_entry, *cur, *del_entry;
+
+ cur = head;
+ while (cur->next) {
+ pre_entry = head;
+ while (pre_entry != cur->next) {
+ if (!strcmp(pre_entry->value->name, cur->next->value->name)) {
+ del_entry = cur->next;
+ cur->next = cur->next->next;
+ g_free(del_entry);
+ break;
+ }
+ pre_entry = pre_entry->next;
+ }
+ cur = cur->next;
+ }
+}
+
+/* merge the description items of two parameter infolists */
+static void connect_infolist(CommandLineParameterInfoList *head,
+ CommandLineParameterInfoList *new)
+{
+ CommandLineParameterInfoList *cur;
+
+ cur = head;
+ while (cur->next) {
+ cur = cur->next;
+ }
+ cur->next = new;
+}
+
+/* access all the local QemuOptsLists for drive option */
+static CommandLineParameterInfoList *get_drive_infolist(void)
+{
+ CommandLineParameterInfoList *head = NULL, *cur;
+ int i;
+
+ for (i = 0; drive_config_groups[i] != NULL; i++) {
+ if (!head) {
+ head = query_option_descs(drive_config_groups[i]->desc);
+ } else {
+ cur = query_option_descs(drive_config_groups[i]->desc);
+ connect_infolist(head, cur);
+ }
+ }
+ cleanup_infolist(head);
+
+ return head;
+}
+
+/* restore machine options that are now machine's properties */
+static QemuOptsList machine_opts = {
+ .merge_lists = true,
+ .head = QTAILQ_HEAD_INITIALIZER(machine_opts.head),
+ .desc = {
+ {
+ .name = "type",
+ .type = QEMU_OPT_STRING,
+ .help = "emulated machine"
+ },{
+ .name = "accel",
+ .type = QEMU_OPT_STRING,
+ .help = "accelerator list",
+ },{
+ .name = "kernel_irqchip",
+ .type = QEMU_OPT_BOOL,
+ .help = "use KVM in-kernel irqchip",
+ },{
+ .name = "kvm_shadow_mem",
+ .type = QEMU_OPT_SIZE,
+ .help = "KVM shadow MMU size",
+ },{
+ .name = "kernel",
+ .type = QEMU_OPT_STRING,
+ .help = "Linux kernel image file",
+ },{
+ .name = "initrd",
+ .type = QEMU_OPT_STRING,
+ .help = "Linux initial ramdisk file",
+ },{
+ .name = "append",
+ .type = QEMU_OPT_STRING,
+ .help = "Linux kernel command line",
+ },{
+ .name = "dtb",
+ .type = QEMU_OPT_STRING,
+ .help = "Linux kernel device tree file",
+ },{
+ .name = "dumpdtb",
+ .type = QEMU_OPT_STRING,
+ .help = "Dump current dtb to a file and quit",
+ },{
+ .name = "phandle_start",
+ .type = QEMU_OPT_NUMBER,
+ .help = "The first phandle ID we may generate dynamically",
+ },{
+ .name = "dt_compatible",
+ .type = QEMU_OPT_STRING,
+ .help = "Overrides the \"compatible\" property of the dt root node",
+ },{
+ .name = "dump-guest-core",
+ .type = QEMU_OPT_BOOL,
+ .help = "Include guest memory in a core dump",
+ },{
+ .name = "mem-merge",
+ .type = QEMU_OPT_BOOL,
+ .help = "enable/disable memory merge support",
+ },{
+ .name = "usb",
+ .type = QEMU_OPT_BOOL,
+ .help = "Set on/off to enable/disable usb",
+ },{
+ .name = "firmware",
+ .type = QEMU_OPT_STRING,
+ .help = "firmware image",
+ },{
+ .name = "iommu",
+ .type = QEMU_OPT_BOOL,
+ .help = "Set on/off to enable/disable Intel IOMMU (VT-d)",
+ },{
+ .name = "suppress-vmdesc",
+ .type = QEMU_OPT_BOOL,
+ .help = "Set on to disable self-describing migration",
+ },{
+ .name = "aes-key-wrap",
+ .type = QEMU_OPT_BOOL,
+ .help = "enable/disable AES key wrapping using the CPACF wrapping key",
+ },{
+ .name = "dea-key-wrap",
+ .type = QEMU_OPT_BOOL,
+ .help = "enable/disable DEA key wrapping using the CPACF wrapping key",
+ },
+ { /* End of list */ }
+ }
+};
+
+CommandLineOptionInfoList *qmp_query_command_line_options(bool has_option,
+ const char *option,
+ Error **errp)
+{
+ CommandLineOptionInfoList *conf_list = NULL, *entry;
+ CommandLineOptionInfo *info;
+ int i;
+
+ for (i = 0; vm_config_groups[i] != NULL; i++) {
+ if (!has_option || !strcmp(option, vm_config_groups[i]->name)) {
+ info = g_malloc0(sizeof(*info));
+ info->option = g_strdup(vm_config_groups[i]->name);
+ if (!strcmp("drive", vm_config_groups[i]->name)) {
+ info->parameters = get_drive_infolist();
+ } else if (!strcmp("machine", vm_config_groups[i]->name)) {
+ info->parameters = query_option_descs(machine_opts.desc);
+ } else {
+ info->parameters =
+ query_option_descs(vm_config_groups[i]->desc);
+ }
+ entry = g_malloc0(sizeof(*entry));
+ entry->value = info;
+ entry->next = conf_list;
+ conf_list = entry;
+ }
+ }
+
+ if (conf_list == NULL) {
+ error_setg(errp, "invalid option name: %s", option);
+ }
+
+ return conf_list;
+}
+
+QemuOptsList *qemu_find_opts_err(const char *group, Error **errp)
+{
+ return find_list(vm_config_groups, group, errp);
+}
+
+void qemu_add_drive_opts(QemuOptsList *list)
+{
+ int entries, i;
+
+ entries = ARRAY_SIZE(drive_config_groups);
+ entries--; /* keep list NULL terminated */
+ for (i = 0; i < entries; i++) {
+ if (drive_config_groups[i] == NULL) {
+ drive_config_groups[i] = list;
+ return;
+ }
+ }
+ fprintf(stderr, "ran out of space in drive_config_groups");
+ abort();
+}
+
+void qemu_add_opts(QemuOptsList *list)
+{
+ int entries, i;
+
+ entries = ARRAY_SIZE(vm_config_groups);
+ entries--; /* keep list NULL terminated */
+ for (i = 0; i < entries; i++) {
+ if (vm_config_groups[i] == NULL) {
+ vm_config_groups[i] = list;
+ return;
+ }
+ }
+ fprintf(stderr, "ran out of space in vm_config_groups");
+ abort();
+}
+
+int qemu_set_option(const char *str)
+{
+ Error *local_err = NULL;
+ char group[64], id[64], arg[64];
+ QemuOptsList *list;
+ QemuOpts *opts;
+ int rc, offset;
+
+ rc = sscanf(str, "%63[^.].%63[^.].%63[^=]%n", group, id, arg, &offset);
+ if (rc < 3 || str[offset] != '=') {
+ error_report("can't parse: \"%s\"", str);
+ return -1;
+ }
+
+ list = qemu_find_opts(group);
+ if (list == NULL) {
+ return -1;
+ }
+
+ opts = qemu_opts_find(list, id);
+ if (!opts) {
+ error_report("there is no %s \"%s\" defined",
+ list->name, id);
+ return -1;
+ }
+
+ qemu_opt_set(opts, arg, str + offset + 1, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ return -1;
+ }
+ return 0;
+}
+
+struct ConfigWriteData {
+ QemuOptsList *list;
+ FILE *fp;
+};
+
+static int config_write_opt(void *opaque, const char *name, const char *value,
+ Error **errp)
+{
+ struct ConfigWriteData *data = opaque;
+
+ fprintf(data->fp, " %s = \"%s\"\n", name, value);
+ return 0;
+}
+
+static int config_write_opts(void *opaque, QemuOpts *opts, Error **errp)
+{
+ struct ConfigWriteData *data = opaque;
+ const char *id = qemu_opts_id(opts);
+
+ if (id) {
+ fprintf(data->fp, "[%s \"%s\"]\n", data->list->name, id);
+ } else {
+ fprintf(data->fp, "[%s]\n", data->list->name);
+ }
+ qemu_opt_foreach(opts, config_write_opt, data, NULL);
+ fprintf(data->fp, "\n");
+ return 0;
+}
+
+void qemu_config_write(FILE *fp)
+{
+ struct ConfigWriteData data = { .fp = fp };
+ QemuOptsList **lists = vm_config_groups;
+ int i;
+
+ fprintf(fp, "# qemu config file\n\n");
+ for (i = 0; lists[i] != NULL; i++) {
+ data.list = lists[i];
+ qemu_opts_foreach(data.list, config_write_opts, &data, NULL);
+ }
+}
+
+int qemu_config_parse(FILE *fp, QemuOptsList **lists, const char *fname)
+{
+ char line[1024], group[64], id[64], arg[64], value[1024];
+ Location loc;
+ QemuOptsList *list = NULL;
+ Error *local_err = NULL;
+ QemuOpts *opts = NULL;
+ int res = -1, lno = 0;
+
+ loc_push_none(&loc);
+ while (fgets(line, sizeof(line), fp) != NULL) {
+ loc_set_file(fname, ++lno);
+ if (line[0] == '\n') {
+ /* skip empty lines */
+ continue;
+ }
+ if (line[0] == '#') {
+ /* comment */
+ continue;
+ }
+ if (sscanf(line, "[%63s \"%63[^\"]\"]", group, id) == 2) {
+ /* group with id */
+ list = find_list(lists, group, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ goto out;
+ }
+ opts = qemu_opts_create(list, id, 1, NULL);
+ continue;
+ }
+ if (sscanf(line, "[%63[^]]]", group) == 1) {
+ /* group without id */
+ list = find_list(lists, group, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ goto out;
+ }
+ opts = qemu_opts_create(list, NULL, 0, &error_abort);
+ continue;
+ }
+ value[0] = '\0';
+ if (sscanf(line, " %63s = \"%1023[^\"]\"", arg, value) == 2 ||
+ sscanf(line, " %63s = \"\"", arg) == 1) {
+ /* arg = value */
+ if (opts == NULL) {
+ error_report("no group defined");
+ goto out;
+ }
+ qemu_opt_set(opts, arg, value, &local_err);
+ if (local_err) {
+ error_report_err(local_err);
+ goto out;
+ }
+ continue;
+ }
+ error_report("parse error");
+ goto out;
+ }
+ if (ferror(fp)) {
+ error_report("error reading file");
+ goto out;
+ }
+ res = 0;
+out:
+ loc_pop(&loc);
+ return res;
+}
+
+int qemu_read_config_file(const char *filename)
+{
+ FILE *f = fopen(filename, "r");
+ int ret;
+
+ if (f == NULL) {
+ return -errno;
+ }
+
+ ret = qemu_config_parse(f, vm_config_groups, filename);
+ fclose(f);
+
+ if (ret == 0) {
+ return 0;
+ } else {
+ return -EINVAL;
+ }
+}
+
+static void config_parse_qdict_section(QDict *options, QemuOptsList *opts,
+ Error **errp)
+{
+ QemuOpts *subopts;
+ QDict *subqdict;
+ QList *list = NULL;
+ Error *local_err = NULL;
+ size_t orig_size, enum_size;
+ char *prefix;
+
+ prefix = g_strdup_printf("%s.", opts->name);
+ qdict_extract_subqdict(options, &subqdict, prefix);
+ g_free(prefix);
+ orig_size = qdict_size(subqdict);
+ if (!orig_size) {
+ goto out;
+ }
+
+ subopts = qemu_opts_create(opts, NULL, 0, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ goto out;
+ }
+
+ qemu_opts_absorb_qdict(subopts, subqdict, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ goto out;
+ }
+
+ enum_size = qdict_size(subqdict);
+ if (enum_size < orig_size && enum_size) {
+ error_setg(errp, "Unknown option '%s' for [%s]",
+ qdict_first(subqdict)->key, opts->name);
+ goto out;
+ }
+
+ if (enum_size) {
+ /* Multiple, enumerated sections */
+ QListEntry *list_entry;
+ unsigned i = 0;
+
+ /* Not required anymore */
+ qemu_opts_del(subopts);
+
+ qdict_array_split(subqdict, &list);
+ if (qdict_size(subqdict)) {
+ error_setg(errp, "Unused option '%s' for [%s]",
+ qdict_first(subqdict)->key, opts->name);
+ goto out;
+ }
+
+ QLIST_FOREACH_ENTRY(list, list_entry) {
+ QDict *section = qobject_to_qdict(qlist_entry_obj(list_entry));
+ char *opt_name;
+
+ if (!section) {
+ error_setg(errp, "[%s] section (index %u) does not consist of "
+ "keys", opts->name, i);
+ goto out;
+ }
+
+ opt_name = g_strdup_printf("%s.%u", opts->name, i++);
+ subopts = qemu_opts_create(opts, opt_name, 1, &local_err);
+ g_free(opt_name);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ goto out;
+ }
+
+ qemu_opts_absorb_qdict(subopts, section, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ qemu_opts_del(subopts);
+ goto out;
+ }
+
+ if (qdict_size(section)) {
+ error_setg(errp, "[%s] section doesn't support the option '%s'",
+ opts->name, qdict_first(section)->key);
+ qemu_opts_del(subopts);
+ goto out;
+ }
+ }
+ }
+
+out:
+ QDECREF(subqdict);
+ QDECREF(list);
+}
+
+void qemu_config_parse_qdict(QDict *options, QemuOptsList **lists,
+ Error **errp)
+{
+ int i;
+ Error *local_err = NULL;
+
+ for (i = 0; lists[i]; i++) {
+ config_parse_qdict_section(options, lists[i], &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ }
+}
diff --git a/src/util/qemu-coroutine-io.c b/src/util/qemu-coroutine-io.c
new file mode 100644
index 0000000..e1eae73
--- /dev/null
+++ b/src/util/qemu-coroutine-io.c
@@ -0,0 +1,91 @@
+/*
+ * Coroutine-aware I/O functions
+ *
+ * Copyright (C) 2009-2010 Nippon Telegraph and Telephone Corporation.
+ * Copyright (c) 2011, Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu-common.h"
+#include "qemu/sockets.h"
+#include "qemu/coroutine.h"
+#include "qemu/iov.h"
+#include "qemu/main-loop.h"
+
+ssize_t coroutine_fn
+qemu_co_sendv_recvv(int sockfd, struct iovec *iov, unsigned iov_cnt,
+ size_t offset, size_t bytes, bool do_send)
+{
+ size_t done = 0;
+ ssize_t ret;
+ int err;
+ while (done < bytes) {
+ ret = iov_send_recv(sockfd, iov, iov_cnt,
+ offset + done, bytes - done, do_send);
+ if (ret > 0) {
+ done += ret;
+ } else if (ret < 0) {
+ err = socket_error();
+ if (err == EAGAIN || err == EWOULDBLOCK) {
+ qemu_coroutine_yield();
+ } else if (done == 0) {
+ return -err;
+ } else {
+ break;
+ }
+ } else if (ret == 0 && !do_send) {
+ /* write (send) should never return 0.
+ * read (recv) returns 0 for end-of-file (-data).
+ * In both cases there's little point retrying,
+ * but we do for write anyway, just in case */
+ break;
+ }
+ }
+ return done;
+}
+
+ssize_t coroutine_fn
+qemu_co_send_recv(int sockfd, void *buf, size_t bytes, bool do_send)
+{
+ struct iovec iov = { .iov_base = buf, .iov_len = bytes };
+ return qemu_co_sendv_recvv(sockfd, &iov, 1, 0, bytes, do_send);
+}
+
+typedef struct {
+ Coroutine *co;
+ int fd;
+} FDYieldUntilData;
+
+static void fd_coroutine_enter(void *opaque)
+{
+ FDYieldUntilData *data = opaque;
+ qemu_set_fd_handler(data->fd, NULL, NULL, NULL);
+ qemu_coroutine_enter(data->co, NULL);
+}
+
+void coroutine_fn yield_until_fd_readable(int fd)
+{
+ FDYieldUntilData data;
+
+ assert(qemu_in_coroutine());
+ data.co = qemu_coroutine_self();
+ data.fd = fd;
+ qemu_set_fd_handler(fd, fd_coroutine_enter, NULL, &data);
+ qemu_coroutine_yield();
+}
diff --git a/src/util/qemu-coroutine-lock.c b/src/util/qemu-coroutine-lock.c
new file mode 100644
index 0000000..130ee19
--- /dev/null
+++ b/src/util/qemu-coroutine-lock.c
@@ -0,0 +1,186 @@
+/*
+ * coroutine queues and locks
+ *
+ * Copyright (c) 2011 Kevin Wolf <kwolf@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+#include "qemu/coroutine.h"
+#include "qemu/coroutine_int.h"
+#include "qemu/queue.h"
+#include "trace.h"
+
+void qemu_co_queue_init(CoQueue *queue)
+{
+ QTAILQ_INIT(&queue->entries);
+}
+
+void coroutine_fn qemu_co_queue_wait(CoQueue *queue)
+{
+ Coroutine *self = qemu_coroutine_self();
+ QTAILQ_INSERT_TAIL(&queue->entries, self, co_queue_next);
+ qemu_coroutine_yield();
+ assert(qemu_in_coroutine());
+}
+
+/**
+ * qemu_co_queue_run_restart:
+ *
+ * Enter each coroutine that was previously marked for restart by
+ * qemu_co_queue_next() or qemu_co_queue_restart_all(). This function is
+ * invoked by the core coroutine code when the current coroutine yields or
+ * terminates.
+ */
+void qemu_co_queue_run_restart(Coroutine *co)
+{
+ Coroutine *next;
+
+ trace_qemu_co_queue_run_restart(co);
+ while ((next = QTAILQ_FIRST(&co->co_queue_wakeup))) {
+ QTAILQ_REMOVE(&co->co_queue_wakeup, next, co_queue_next);
+ qemu_coroutine_enter(next, NULL);
+ }
+}
+
+static bool qemu_co_queue_do_restart(CoQueue *queue, bool single)
+{
+ Coroutine *self = qemu_coroutine_self();
+ Coroutine *next;
+
+ if (QTAILQ_EMPTY(&queue->entries)) {
+ return false;
+ }
+
+ while ((next = QTAILQ_FIRST(&queue->entries)) != NULL) {
+ QTAILQ_REMOVE(&queue->entries, next, co_queue_next);
+ QTAILQ_INSERT_TAIL(&self->co_queue_wakeup, next, co_queue_next);
+ trace_qemu_co_queue_next(next);
+ if (single) {
+ break;
+ }
+ }
+ return true;
+}
+
+bool coroutine_fn qemu_co_queue_next(CoQueue *queue)
+{
+ assert(qemu_in_coroutine());
+ return qemu_co_queue_do_restart(queue, true);
+}
+
+void coroutine_fn qemu_co_queue_restart_all(CoQueue *queue)
+{
+ assert(qemu_in_coroutine());
+ qemu_co_queue_do_restart(queue, false);
+}
+
+bool qemu_co_enter_next(CoQueue *queue)
+{
+ Coroutine *next;
+
+ next = QTAILQ_FIRST(&queue->entries);
+ if (!next) {
+ return false;
+ }
+
+ QTAILQ_REMOVE(&queue->entries, next, co_queue_next);
+ qemu_coroutine_enter(next, NULL);
+ return true;
+}
+
+bool qemu_co_queue_empty(CoQueue *queue)
+{
+ return QTAILQ_FIRST(&queue->entries) == NULL;
+}
+
+void qemu_co_mutex_init(CoMutex *mutex)
+{
+ memset(mutex, 0, sizeof(*mutex));
+ qemu_co_queue_init(&mutex->queue);
+}
+
+void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex)
+{
+ Coroutine *self = qemu_coroutine_self();
+
+ trace_qemu_co_mutex_lock_entry(mutex, self);
+
+ while (mutex->locked) {
+ qemu_co_queue_wait(&mutex->queue);
+ }
+
+ mutex->locked = true;
+
+ trace_qemu_co_mutex_lock_return(mutex, self);
+}
+
+void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex)
+{
+ Coroutine *self = qemu_coroutine_self();
+
+ trace_qemu_co_mutex_unlock_entry(mutex, self);
+
+ assert(mutex->locked == true);
+ assert(qemu_in_coroutine());
+
+ mutex->locked = false;
+ qemu_co_queue_next(&mutex->queue);
+
+ trace_qemu_co_mutex_unlock_return(mutex, self);
+}
+
+void qemu_co_rwlock_init(CoRwlock *lock)
+{
+ memset(lock, 0, sizeof(*lock));
+ qemu_co_queue_init(&lock->queue);
+}
+
+void qemu_co_rwlock_rdlock(CoRwlock *lock)
+{
+ while (lock->writer) {
+ qemu_co_queue_wait(&lock->queue);
+ }
+ lock->reader++;
+}
+
+void qemu_co_rwlock_unlock(CoRwlock *lock)
+{
+ assert(qemu_in_coroutine());
+ if (lock->writer) {
+ lock->writer = false;
+ qemu_co_queue_restart_all(&lock->queue);
+ } else {
+ lock->reader--;
+ assert(lock->reader >= 0);
+ /* Wakeup only one waiting writer */
+ if (!lock->reader) {
+ qemu_co_queue_next(&lock->queue);
+ }
+ }
+}
+
+void qemu_co_rwlock_wrlock(CoRwlock *lock)
+{
+ while (lock->writer || lock->reader) {
+ qemu_co_queue_wait(&lock->queue);
+ }
+ lock->writer = true;
+}
diff --git a/src/util/qemu-coroutine-sleep.c b/src/util/qemu-coroutine-sleep.c
new file mode 100644
index 0000000..b35db56
--- /dev/null
+++ b/src/util/qemu-coroutine-sleep.c
@@ -0,0 +1,41 @@
+/*
+ * QEMU coroutine sleep
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/coroutine.h"
+#include "qemu/timer.h"
+#include "block/aio.h"
+
+typedef struct CoSleepCB {
+ QEMUTimer *ts;
+ Coroutine *co;
+} CoSleepCB;
+
+static void co_sleep_cb(void *opaque)
+{
+ CoSleepCB *sleep_cb = opaque;
+
+ qemu_coroutine_enter(sleep_cb->co, NULL);
+}
+
+void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type,
+ int64_t ns)
+{
+ CoSleepCB sleep_cb = {
+ .co = qemu_coroutine_self(),
+ };
+ sleep_cb.ts = aio_timer_new(ctx, type, SCALE_NS, co_sleep_cb, &sleep_cb);
+ timer_mod(sleep_cb.ts, qemu_clock_get_ns(type) + ns);
+ qemu_coroutine_yield();
+ timer_del(sleep_cb.ts);
+ timer_free(sleep_cb.ts);
+}
diff --git a/src/util/qemu-coroutine.c b/src/util/qemu-coroutine.c
new file mode 100644
index 0000000..8953560
--- /dev/null
+++ b/src/util/qemu-coroutine.c
@@ -0,0 +1,146 @@
+/*
+ * QEMU coroutines
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
+ * Kevin Wolf <kwolf@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "trace.h"
+#include "qemu-common.h"
+#include "qemu/thread.h"
+#include "qemu/atomic.h"
+#include "qemu/coroutine.h"
+#include "qemu/coroutine_int.h"
+
+enum {
+ POOL_BATCH_SIZE = 64,
+};
+
+/** Free list to speed up creation */
+static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool);
+static unsigned int release_pool_size;
+static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool);
+static __thread unsigned int alloc_pool_size;
+static __thread Notifier coroutine_pool_cleanup_notifier;
+
+static void coroutine_pool_cleanup(Notifier *n, void *value)
+{
+ Coroutine *co;
+ Coroutine *tmp;
+
+ QSLIST_FOREACH_SAFE(co, &alloc_pool, pool_next, tmp) {
+ QSLIST_REMOVE_HEAD(&alloc_pool, pool_next);
+ qemu_coroutine_delete(co);
+ }
+}
+
+Coroutine *qemu_coroutine_create(CoroutineEntry *entry)
+{
+ Coroutine *co = NULL;
+
+ if (CONFIG_COROUTINE_POOL) {
+ co = QSLIST_FIRST(&alloc_pool);
+ if (!co) {
+ if (release_pool_size > POOL_BATCH_SIZE) {
+ /* Slow path; a good place to register the destructor, too. */
+ if (!coroutine_pool_cleanup_notifier.notify) {
+ coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup;
+ qemu_thread_atexit_add(&coroutine_pool_cleanup_notifier);
+ }
+
+ /* This is not exact; there could be a little skew between
+ * release_pool_size and the actual size of release_pool. But
+ * it is just a heuristic, it does not need to be perfect.
+ */
+ alloc_pool_size = atomic_xchg(&release_pool_size, 0);
+ QSLIST_MOVE_ATOMIC(&alloc_pool, &release_pool);
+ co = QSLIST_FIRST(&alloc_pool);
+ }
+ }
+ if (co) {
+ QSLIST_REMOVE_HEAD(&alloc_pool, pool_next);
+ alloc_pool_size--;
+ }
+ }
+
+ if (!co) {
+ co = qemu_coroutine_new();
+ }
+
+ co->entry = entry;
+ QTAILQ_INIT(&co->co_queue_wakeup);
+ return co;
+}
+
+static void coroutine_delete(Coroutine *co)
+{
+ co->caller = NULL;
+
+ if (CONFIG_COROUTINE_POOL) {
+ if (release_pool_size < POOL_BATCH_SIZE * 2) {
+ QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next);
+ atomic_inc(&release_pool_size);
+ return;
+ }
+ if (alloc_pool_size < POOL_BATCH_SIZE) {
+ QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next);
+ alloc_pool_size++;
+ return;
+ }
+ }
+
+ qemu_coroutine_delete(co);
+}
+
+void qemu_coroutine_enter(Coroutine *co, void *opaque)
+{
+ Coroutine *self = qemu_coroutine_self();
+ CoroutineAction ret;
+
+ trace_qemu_coroutine_enter(self, co, opaque);
+
+ if (co->caller) {
+ fprintf(stderr, "Co-routine re-entered recursively\n");
+ abort();
+ }
+
+ co->caller = self;
+ co->entry_arg = opaque;
+ ret = qemu_coroutine_switch(self, co, COROUTINE_ENTER);
+
+ qemu_co_queue_run_restart(co);
+
+ switch (ret) {
+ case COROUTINE_YIELD:
+ return;
+ case COROUTINE_TERMINATE:
+ trace_qemu_coroutine_terminate(co);
+ coroutine_delete(co);
+ return;
+ default:
+ abort();
+ }
+}
+
+void coroutine_fn qemu_coroutine_yield(void)
+{
+ Coroutine *self = qemu_coroutine_self();
+ Coroutine *to = self->caller;
+
+ trace_qemu_coroutine_yield(self, to);
+
+ if (!to) {
+ fprintf(stderr, "Co-routine is yielding to no one\n");
+ abort();
+ }
+
+ self->caller = NULL;
+ qemu_coroutine_switch(self, to, COROUTINE_YIELD);
+}
diff --git a/src/util/qemu-error.c b/src/util/qemu-error.c
new file mode 100644
index 0000000..c1574bb
--- /dev/null
+++ b/src/util/qemu-error.c
@@ -0,0 +1,239 @@
+/*
+ * Error reporting
+ *
+ * Copyright (C) 2010 Red Hat Inc.
+ *
+ * Authors:
+ * Markus Armbruster <armbru@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include <stdio.h>
+#include "monitor/monitor.h"
+#include "qemu/error-report.h"
+
+/*
+ * Print to current monitor if we have one, else to stderr.
+ * TODO should return int, so callers can calculate width, but that
+ * requires surgery to monitor_vprintf(). Left for another day.
+ */
+void error_vprintf(const char *fmt, va_list ap)
+{
+ if (cur_mon && !monitor_cur_is_qmp()) {
+ monitor_vprintf(cur_mon, fmt, ap);
+ } else {
+ vfprintf(stderr, fmt, ap);
+ }
+}
+
+/*
+ * Print to current monitor if we have one, else to stderr.
+ * TODO just like error_vprintf()
+ */
+void error_printf(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ error_vprintf(fmt, ap);
+ va_end(ap);
+}
+
+void error_printf_unless_qmp(const char *fmt, ...)
+{
+ va_list ap;
+
+ if (!monitor_cur_is_qmp()) {
+ va_start(ap, fmt);
+ error_vprintf(fmt, ap);
+ va_end(ap);
+ }
+}
+
+static Location std_loc = {
+ .kind = LOC_NONE
+};
+static Location *cur_loc = &std_loc;
+
+/*
+ * Push location saved in LOC onto the location stack, return it.
+ * The top of that stack is the current location.
+ * Needs a matching loc_pop().
+ */
+Location *loc_push_restore(Location *loc)
+{
+ assert(!loc->prev);
+ loc->prev = cur_loc;
+ cur_loc = loc;
+ return loc;
+}
+
+/*
+ * Initialize *LOC to "nowhere", push it onto the location stack.
+ * The top of that stack is the current location.
+ * Needs a matching loc_pop().
+ * Return LOC.
+ */
+Location *loc_push_none(Location *loc)
+{
+ loc->kind = LOC_NONE;
+ loc->prev = NULL;
+ return loc_push_restore(loc);
+}
+
+/*
+ * Pop the location stack.
+ * LOC must be the current location, i.e. the top of the stack.
+ */
+Location *loc_pop(Location *loc)
+{
+ assert(cur_loc == loc && loc->prev);
+ cur_loc = loc->prev;
+ loc->prev = NULL;
+ return loc;
+}
+
+/*
+ * Save the current location in LOC, return LOC.
+ */
+Location *loc_save(Location *loc)
+{
+ *loc = *cur_loc;
+ loc->prev = NULL;
+ return loc;
+}
+
+/*
+ * Change the current location to the one saved in LOC.
+ */
+void loc_restore(Location *loc)
+{
+ Location *prev = cur_loc->prev;
+ assert(!loc->prev);
+ *cur_loc = *loc;
+ cur_loc->prev = prev;
+}
+
+/*
+ * Change the current location to "nowhere in particular".
+ */
+void loc_set_none(void)
+{
+ cur_loc->kind = LOC_NONE;
+}
+
+/*
+ * Change the current location to argument ARGV[IDX..IDX+CNT-1].
+ */
+void loc_set_cmdline(char **argv, int idx, int cnt)
+{
+ cur_loc->kind = LOC_CMDLINE;
+ cur_loc->num = cnt;
+ cur_loc->ptr = argv + idx;
+}
+
+/*
+ * Change the current location to file FNAME, line LNO.
+ */
+void loc_set_file(const char *fname, int lno)
+{
+ assert (fname || cur_loc->kind == LOC_FILE);
+ cur_loc->kind = LOC_FILE;
+ cur_loc->num = lno;
+ if (fname) {
+ cur_loc->ptr = fname;
+ }
+}
+
+static const char *progname;
+
+/*
+ * Set the program name for error_print_loc().
+ */
+void error_set_progname(const char *argv0)
+{
+ const char *p = strrchr(argv0, '/');
+ progname = p ? p + 1 : argv0;
+}
+
+const char *error_get_progname(void)
+{
+ return progname;
+}
+
+/*
+ * Print current location to current monitor if we have one, else to stderr.
+ */
+static void error_print_loc(void)
+{
+ const char *sep = "";
+ int i;
+ const char *const *argp;
+
+ if (!cur_mon && progname) {
+ fprintf(stderr, "%s:", progname);
+ sep = " ";
+ }
+ switch (cur_loc->kind) {
+ case LOC_CMDLINE:
+ argp = cur_loc->ptr;
+ for (i = 0; i < cur_loc->num; i++) {
+ error_printf("%s%s", sep, argp[i]);
+ sep = " ";
+ }
+ error_printf(": ");
+ break;
+ case LOC_FILE:
+ error_printf("%s:", (const char *)cur_loc->ptr);
+ if (cur_loc->num) {
+ error_printf("%d:", cur_loc->num);
+ }
+ error_printf(" ");
+ break;
+ default:
+ error_printf("%s", sep);
+ }
+}
+
+bool enable_timestamp_msg;
+/*
+ * Print an error message to current monitor if we have one, else to stderr.
+ * Format arguments like vsprintf(). The result should not contain
+ * newlines.
+ * Prepend the current location and append a newline.
+ * It's wrong to call this in a QMP monitor. Use error_setg() there.
+ */
+void error_vreport(const char *fmt, va_list ap)
+{
+ GTimeVal tv;
+ gchar *timestr;
+
+ if (enable_timestamp_msg && !cur_mon) {
+ g_get_current_time(&tv);
+ timestr = g_time_val_to_iso8601(&tv);
+ error_printf("%s ", timestr);
+ g_free(timestr);
+ }
+
+ error_print_loc();
+ error_vprintf(fmt, ap);
+ error_printf("\n");
+}
+
+/*
+ * Print an error message to current monitor if we have one, else to stderr.
+ * Format arguments like sprintf(). The result should not contain
+ * newlines.
+ * Prepend the current location and append a newline.
+ * It's wrong to call this in a QMP monitor. Use error_setg() there.
+ */
+void error_report(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ error_vreport(fmt, ap);
+ va_end(ap);
+}
diff --git a/src/util/qemu-openpty.c b/src/util/qemu-openpty.c
new file mode 100644
index 0000000..4c53211
--- /dev/null
+++ b/src/util/qemu-openpty.c
@@ -0,0 +1,137 @@
+/*
+ * qemu-openpty.c
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2010 Red Hat, Inc.
+ *
+ * Wrapper function qemu_openpty() implementation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/*
+ * This is not part of oslib-posix.c because this function
+ * uses openpty() which often in -lutil, and if we add this
+ * dependency to oslib-posix.o, every app will have to be
+ * linked with -lutil.
+ */
+
+#include "config-host.h"
+#include "qemu-common.h"
+
+#if defined(__GLIBC__)
+# include <pty.h>
+#elif defined CONFIG_BSD
+# include <termios.h>
+# if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__)
+# include <libutil.h>
+# else
+# include <util.h>
+# endif
+#elif defined CONFIG_SOLARIS
+# include <termios.h>
+# include <stropts.h>
+#else
+# include <termios.h>
+#endif
+
+#ifdef __sun__
+/* Once Solaris has openpty(), this is going to be removed. */
+static int openpty(int *amaster, int *aslave, char *name,
+ struct termios *termp, struct winsize *winp)
+{
+ const char *slave;
+ int mfd = -1, sfd = -1;
+
+ *amaster = *aslave = -1;
+
+ mfd = open("/dev/ptmx", O_RDWR | O_NOCTTY);
+ if (mfd < 0)
+ goto err;
+
+ if (grantpt(mfd) == -1 || unlockpt(mfd) == -1)
+ goto err;
+
+ if ((slave = ptsname(mfd)) == NULL)
+ goto err;
+
+ if ((sfd = open(slave, O_RDONLY | O_NOCTTY)) == -1)
+ goto err;
+
+ if (ioctl(sfd, I_PUSH, "ptem") == -1 ||
+ (termp != NULL && tcgetattr(sfd, termp) < 0))
+ goto err;
+
+ if (amaster)
+ *amaster = mfd;
+ if (aslave)
+ *aslave = sfd;
+ if (winp)
+ ioctl(sfd, TIOCSWINSZ, winp);
+
+ return 0;
+
+err:
+ if (sfd != -1)
+ close(sfd);
+ close(mfd);
+ return -1;
+}
+
+static void cfmakeraw (struct termios *termios_p)
+{
+ termios_p->c_iflag &=
+ ~(IGNBRK|BRKINT|PARMRK|ISTRIP|INLCR|IGNCR|ICRNL|IXON);
+ termios_p->c_oflag &= ~OPOST;
+ termios_p->c_lflag &= ~(ECHO|ECHONL|ICANON|ISIG|IEXTEN);
+ termios_p->c_cflag &= ~(CSIZE|PARENB);
+ termios_p->c_cflag |= CS8;
+
+ termios_p->c_cc[VMIN] = 0;
+ termios_p->c_cc[VTIME] = 0;
+}
+#endif
+
+int qemu_openpty_raw(int *aslave, char *pty_name)
+{
+ int amaster;
+ struct termios tty;
+#if defined(__OpenBSD__) || defined(__DragonFly__)
+ char pty_buf[PATH_MAX];
+#define q_ptsname(x) pty_buf
+#else
+ char *pty_buf = NULL;
+#define q_ptsname(x) ptsname(x)
+#endif
+
+ if (openpty(&amaster, aslave, pty_buf, NULL, NULL) < 0) {
+ return -1;
+ }
+
+ /* Set raw attributes on the pty. */
+ tcgetattr(*aslave, &tty);
+ cfmakeraw(&tty);
+ tcsetattr(*aslave, TCSAFLUSH, &tty);
+
+ if (pty_name) {
+ strcpy(pty_name, q_ptsname(amaster));
+ }
+
+ return amaster;
+}
diff --git a/src/util/qemu-option.c b/src/util/qemu-option.c
new file mode 100644
index 0000000..a50ecea
--- /dev/null
+++ b/src/util/qemu-option.c
@@ -0,0 +1,1205 @@
+/*
+ * Commandline option parsing functions
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2009 Kevin Wolf <kwolf@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "qemu-common.h"
+#include "qemu/error-report.h"
+#include "qapi/qmp/types.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
+#include "qemu/option_int.h"
+
+/*
+ * Extracts the name of an option from the parameter string (p points at the
+ * first byte of the option name)
+ *
+ * The option name is delimited by delim (usually , or =) or the string end
+ * and is copied into buf. If the option name is longer than buf_size, it is
+ * truncated. buf is always zero terminated.
+ *
+ * The return value is the position of the delimiter/zero byte after the option
+ * name in p.
+ */
+const char *get_opt_name(char *buf, int buf_size, const char *p, char delim)
+{
+ char *q;
+
+ q = buf;
+ while (*p != '\0' && *p != delim) {
+ if (q && (q - buf) < buf_size - 1)
+ *q++ = *p;
+ p++;
+ }
+ if (q)
+ *q = '\0';
+
+ return p;
+}
+
+/*
+ * Extracts the value of an option from the parameter string p (p points at the
+ * first byte of the option value)
+ *
+ * This function is comparable to get_opt_name with the difference that the
+ * delimiter is fixed to be comma which starts a new option. To specify an
+ * option value that contains commas, double each comma.
+ */
+const char *get_opt_value(char *buf, int buf_size, const char *p)
+{
+ char *q;
+
+ q = buf;
+ while (*p != '\0') {
+ if (*p == ',') {
+ if (*(p + 1) != ',')
+ break;
+ p++;
+ }
+ if (q && (q - buf) < buf_size - 1)
+ *q++ = *p;
+ p++;
+ }
+ if (q)
+ *q = '\0';
+
+ return p;
+}
+
+int get_next_param_value(char *buf, int buf_size,
+ const char *tag, const char **pstr)
+{
+ const char *p;
+ char option[128];
+
+ p = *pstr;
+ for(;;) {
+ p = get_opt_name(option, sizeof(option), p, '=');
+ if (*p != '=')
+ break;
+ p++;
+ if (!strcmp(tag, option)) {
+ *pstr = get_opt_value(buf, buf_size, p);
+ if (**pstr == ',') {
+ (*pstr)++;
+ }
+ return strlen(buf);
+ } else {
+ p = get_opt_value(NULL, 0, p);
+ }
+ if (*p != ',')
+ break;
+ p++;
+ }
+ return 0;
+}
+
+int get_param_value(char *buf, int buf_size,
+ const char *tag, const char *str)
+{
+ return get_next_param_value(buf, buf_size, tag, &str);
+}
+
+static void parse_option_bool(const char *name, const char *value, bool *ret,
+ Error **errp)
+{
+ if (value != NULL) {
+ if (!strcmp(value, "on")) {
+ *ret = 1;
+ } else if (!strcmp(value, "off")) {
+ *ret = 0;
+ } else {
+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
+ name, "'on' or 'off'");
+ }
+ } else {
+ *ret = 1;
+ }
+}
+
+static void parse_option_number(const char *name, const char *value,
+ uint64_t *ret, Error **errp)
+{
+ char *postfix;
+ uint64_t number;
+
+ if (value != NULL) {
+ number = strtoull(value, &postfix, 0);
+ if (*postfix != '\0') {
+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name, "a number");
+ return;
+ }
+ *ret = number;
+ } else {
+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name, "a number");
+ }
+}
+
+static const QemuOptDesc *find_desc_by_name(const QemuOptDesc *desc,
+ const char *name)
+{
+ int i;
+
+ for (i = 0; desc[i].name != NULL; i++) {
+ if (strcmp(desc[i].name, name) == 0) {
+ return &desc[i];
+ }
+ }
+
+ return NULL;
+}
+
+void parse_option_size(const char *name, const char *value,
+ uint64_t *ret, Error **errp)
+{
+ char *postfix;
+ double sizef;
+
+ if (value != NULL) {
+ sizef = strtod(value, &postfix);
+ if (sizef < 0 || sizef > UINT64_MAX) {
+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name,
+ "a non-negative number below 2^64");
+ return;
+ }
+ switch (*postfix) {
+ case 'T':
+ sizef *= 1024;
+ /* fall through */
+ case 'G':
+ sizef *= 1024;
+ /* fall through */
+ case 'M':
+ sizef *= 1024;
+ /* fall through */
+ case 'K':
+ case 'k':
+ sizef *= 1024;
+ /* fall through */
+ case 'b':
+ case '\0':
+ *ret = (uint64_t) sizef;
+ break;
+ default:
+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name, "a size");
+ error_append_hint(errp, "You may use k, M, G or T suffixes for "
+ "kilobytes, megabytes, gigabytes and terabytes.");
+ return;
+ }
+ } else {
+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name, "a size");
+ }
+}
+
+bool has_help_option(const char *param)
+{
+ size_t buflen = strlen(param) + 1;
+ char *buf = g_malloc(buflen);
+ const char *p = param;
+ bool result = false;
+
+ while (*p) {
+ p = get_opt_value(buf, buflen, p);
+ if (*p) {
+ p++;
+ }
+
+ if (is_help_option(buf)) {
+ result = true;
+ goto out;
+ }
+ }
+
+out:
+ g_free(buf);
+ return result;
+}
+
+bool is_valid_option_list(const char *param)
+{
+ size_t buflen = strlen(param) + 1;
+ char *buf = g_malloc(buflen);
+ const char *p = param;
+ bool result = true;
+
+ while (*p) {
+ p = get_opt_value(buf, buflen, p);
+ if (*p && !*++p) {
+ result = false;
+ goto out;
+ }
+
+ if (!*buf || *buf == ',') {
+ result = false;
+ goto out;
+ }
+ }
+
+out:
+ g_free(buf);
+ return result;
+}
+
+void qemu_opts_print_help(QemuOptsList *list)
+{
+ QemuOptDesc *desc;
+
+ assert(list);
+ desc = list->desc;
+ printf("Supported options:\n");
+ while (desc && desc->name) {
+ printf("%-16s %s\n", desc->name,
+ desc->help ? desc->help : "No description available");
+ desc++;
+ }
+}
+/* ------------------------------------------------------------------ */
+
+QemuOpt *qemu_opt_find(QemuOpts *opts, const char *name)
+{
+ QemuOpt *opt;
+
+ QTAILQ_FOREACH_REVERSE(opt, &opts->head, QemuOptHead, next) {
+ if (strcmp(opt->name, name) != 0)
+ continue;
+ return opt;
+ }
+ return NULL;
+}
+
+static void qemu_opt_del(QemuOpt *opt)
+{
+ QTAILQ_REMOVE(&opt->opts->head, opt, next);
+ g_free(opt->name);
+ g_free(opt->str);
+ g_free(opt);
+}
+
+/* qemu_opt_set allows many settings for the same option.
+ * This function deletes all settings for an option.
+ */
+static void qemu_opt_del_all(QemuOpts *opts, const char *name)
+{
+ QemuOpt *opt, *next_opt;
+
+ QTAILQ_FOREACH_SAFE(opt, &opts->head, next, next_opt) {
+ if (!strcmp(opt->name, name)) {
+ qemu_opt_del(opt);
+ }
+ }
+}
+
+const char *qemu_opt_get(QemuOpts *opts, const char *name)
+{
+ QemuOpt *opt;
+
+ if (opts == NULL) {
+ return NULL;
+ }
+
+ opt = qemu_opt_find(opts, name);
+ if (!opt) {
+ const QemuOptDesc *desc = find_desc_by_name(opts->list->desc, name);
+ if (desc && desc->def_value_str) {
+ return desc->def_value_str;
+ }
+ }
+ return opt ? opt->str : NULL;
+}
+
+/* Get a known option (or its default) and remove it from the list
+ * all in one action. Return a malloced string of the option value.
+ * Result must be freed by caller with g_free().
+ */
+char *qemu_opt_get_del(QemuOpts *opts, const char *name)
+{
+ QemuOpt *opt;
+ const QemuOptDesc *desc;
+ char *str = NULL;
+
+ if (opts == NULL) {
+ return NULL;
+ }
+
+ opt = qemu_opt_find(opts, name);
+ if (!opt) {
+ desc = find_desc_by_name(opts->list->desc, name);
+ if (desc && desc->def_value_str) {
+ str = g_strdup(desc->def_value_str);
+ }
+ return str;
+ }
+ str = opt->str;
+ opt->str = NULL;
+ qemu_opt_del_all(opts, name);
+ return str;
+}
+
+bool qemu_opt_has_help_opt(QemuOpts *opts)
+{
+ QemuOpt *opt;
+
+ QTAILQ_FOREACH_REVERSE(opt, &opts->head, QemuOptHead, next) {
+ if (is_help_option(opt->name)) {
+ return true;
+ }
+ }
+ return false;
+}
+
+static bool qemu_opt_get_bool_helper(QemuOpts *opts, const char *name,
+ bool defval, bool del)
+{
+ QemuOpt *opt;
+ bool ret = defval;
+
+ if (opts == NULL) {
+ return ret;
+ }
+
+ opt = qemu_opt_find(opts, name);
+ if (opt == NULL) {
+ const QemuOptDesc *desc = find_desc_by_name(opts->list->desc, name);
+ if (desc && desc->def_value_str) {
+ parse_option_bool(name, desc->def_value_str, &ret, &error_abort);
+ }
+ return ret;
+ }
+ assert(opt->desc && opt->desc->type == QEMU_OPT_BOOL);
+ ret = opt->value.boolean;
+ if (del) {
+ qemu_opt_del_all(opts, name);
+ }
+ return ret;
+}
+
+bool qemu_opt_get_bool(QemuOpts *opts, const char *name, bool defval)
+{
+ return qemu_opt_get_bool_helper(opts, name, defval, false);
+}
+
+bool qemu_opt_get_bool_del(QemuOpts *opts, const char *name, bool defval)
+{
+ return qemu_opt_get_bool_helper(opts, name, defval, true);
+}
+
+static uint64_t qemu_opt_get_number_helper(QemuOpts *opts, const char *name,
+ uint64_t defval, bool del)
+{
+ QemuOpt *opt;
+ uint64_t ret = defval;
+
+ if (opts == NULL) {
+ return ret;
+ }
+
+ opt = qemu_opt_find(opts, name);
+ if (opt == NULL) {
+ const QemuOptDesc *desc = find_desc_by_name(opts->list->desc, name);
+ if (desc && desc->def_value_str) {
+ parse_option_number(name, desc->def_value_str, &ret, &error_abort);
+ }
+ return ret;
+ }
+ assert(opt->desc && opt->desc->type == QEMU_OPT_NUMBER);
+ ret = opt->value.uint;
+ if (del) {
+ qemu_opt_del_all(opts, name);
+ }
+ return ret;
+}
+
+uint64_t qemu_opt_get_number(QemuOpts *opts, const char *name, uint64_t defval)
+{
+ return qemu_opt_get_number_helper(opts, name, defval, false);
+}
+
+uint64_t qemu_opt_get_number_del(QemuOpts *opts, const char *name,
+ uint64_t defval)
+{
+ return qemu_opt_get_number_helper(opts, name, defval, true);
+}
+
+static uint64_t qemu_opt_get_size_helper(QemuOpts *opts, const char *name,
+ uint64_t defval, bool del)
+{
+ QemuOpt *opt;
+ uint64_t ret = defval;
+
+ if (opts == NULL) {
+ return ret;
+ }
+
+ opt = qemu_opt_find(opts, name);
+ if (opt == NULL) {
+ const QemuOptDesc *desc = find_desc_by_name(opts->list->desc, name);
+ if (desc && desc->def_value_str) {
+ parse_option_size(name, desc->def_value_str, &ret, &error_abort);
+ }
+ return ret;
+ }
+ assert(opt->desc && opt->desc->type == QEMU_OPT_SIZE);
+ ret = opt->value.uint;
+ if (del) {
+ qemu_opt_del_all(opts, name);
+ }
+ return ret;
+}
+
+uint64_t qemu_opt_get_size(QemuOpts *opts, const char *name, uint64_t defval)
+{
+ return qemu_opt_get_size_helper(opts, name, defval, false);
+}
+
+uint64_t qemu_opt_get_size_del(QemuOpts *opts, const char *name,
+ uint64_t defval)
+{
+ return qemu_opt_get_size_helper(opts, name, defval, true);
+}
+
+static void qemu_opt_parse(QemuOpt *opt, Error **errp)
+{
+ if (opt->desc == NULL)
+ return;
+
+ switch (opt->desc->type) {
+ case QEMU_OPT_STRING:
+ /* nothing */
+ return;
+ case QEMU_OPT_BOOL:
+ parse_option_bool(opt->name, opt->str, &opt->value.boolean, errp);
+ break;
+ case QEMU_OPT_NUMBER:
+ parse_option_number(opt->name, opt->str, &opt->value.uint, errp);
+ break;
+ case QEMU_OPT_SIZE:
+ parse_option_size(opt->name, opt->str, &opt->value.uint, errp);
+ break;
+ default:
+ abort();
+ }
+}
+
+static bool opts_accepts_any(const QemuOpts *opts)
+{
+ return opts->list->desc[0].name == NULL;
+}
+
+int qemu_opt_unset(QemuOpts *opts, const char *name)
+{
+ QemuOpt *opt = qemu_opt_find(opts, name);
+
+ assert(opts_accepts_any(opts));
+
+ if (opt == NULL) {
+ return -1;
+ } else {
+ qemu_opt_del(opt);
+ return 0;
+ }
+}
+
+static void opt_set(QemuOpts *opts, const char *name, const char *value,
+ bool prepend, Error **errp)
+{
+ QemuOpt *opt;
+ const QemuOptDesc *desc;
+ Error *local_err = NULL;
+
+ desc = find_desc_by_name(opts->list->desc, name);
+ if (!desc && !opts_accepts_any(opts)) {
+ error_setg(errp, QERR_INVALID_PARAMETER, name);
+ return;
+ }
+
+ opt = g_malloc0(sizeof(*opt));
+ opt->name = g_strdup(name);
+ opt->opts = opts;
+ if (prepend) {
+ QTAILQ_INSERT_HEAD(&opts->head, opt, next);
+ } else {
+ QTAILQ_INSERT_TAIL(&opts->head, opt, next);
+ }
+ opt->desc = desc;
+ opt->str = g_strdup(value);
+ qemu_opt_parse(opt, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ qemu_opt_del(opt);
+ }
+}
+
+void qemu_opt_set(QemuOpts *opts, const char *name, const char *value,
+ Error **errp)
+{
+ opt_set(opts, name, value, false, errp);
+}
+
+void qemu_opt_set_bool(QemuOpts *opts, const char *name, bool val,
+ Error **errp)
+{
+ QemuOpt *opt;
+ const QemuOptDesc *desc = opts->list->desc;
+
+ opt = g_malloc0(sizeof(*opt));
+ opt->desc = find_desc_by_name(desc, name);
+ if (!opt->desc && !opts_accepts_any(opts)) {
+ error_setg(errp, QERR_INVALID_PARAMETER, name);
+ g_free(opt);
+ return;
+ }
+
+ opt->name = g_strdup(name);
+ opt->opts = opts;
+ opt->value.boolean = !!val;
+ opt->str = g_strdup(val ? "on" : "off");
+ QTAILQ_INSERT_TAIL(&opts->head, opt, next);
+}
+
+void qemu_opt_set_number(QemuOpts *opts, const char *name, int64_t val,
+ Error **errp)
+{
+ QemuOpt *opt;
+ const QemuOptDesc *desc = opts->list->desc;
+
+ opt = g_malloc0(sizeof(*opt));
+ opt->desc = find_desc_by_name(desc, name);
+ if (!opt->desc && !opts_accepts_any(opts)) {
+ error_setg(errp, QERR_INVALID_PARAMETER, name);
+ g_free(opt);
+ return;
+ }
+
+ opt->name = g_strdup(name);
+ opt->opts = opts;
+ opt->value.uint = val;
+ opt->str = g_strdup_printf("%" PRId64, val);
+ QTAILQ_INSERT_TAIL(&opts->head, opt, next);
+}
+
+/**
+ * For each member of @opts, call @func(@opaque, name, value, @errp).
+ * @func() may store an Error through @errp, but must return non-zero then.
+ * When @func() returns non-zero, break the loop and return that value.
+ * Return zero when the loop completes.
+ */
+int qemu_opt_foreach(QemuOpts *opts, qemu_opt_loopfunc func, void *opaque,
+ Error **errp)
+{
+ QemuOpt *opt;
+ int rc;
+
+ QTAILQ_FOREACH(opt, &opts->head, next) {
+ rc = func(opaque, opt->name, opt->str, errp);
+ if (rc) {
+ return rc;
+ }
+ assert(!errp || !*errp);
+ }
+ return 0;
+}
+
+QemuOpts *qemu_opts_find(QemuOptsList *list, const char *id)
+{
+ QemuOpts *opts;
+
+ QTAILQ_FOREACH(opts, &list->head, next) {
+ if (!opts->id && !id) {
+ return opts;
+ }
+ if (opts->id && id && !strcmp(opts->id, id)) {
+ return opts;
+ }
+ }
+ return NULL;
+}
+
+QemuOpts *qemu_opts_create(QemuOptsList *list, const char *id,
+ int fail_if_exists, Error **errp)
+{
+ QemuOpts *opts = NULL;
+
+ if (id) {
+ if (!id_wellformed(id)) {
+ error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "id",
+ "an identifier");
+ error_append_hint(errp, "Identifiers consist of letters, digits, "
+ "'-', '.', '_', starting with a letter.");
+ return NULL;
+ }
+ opts = qemu_opts_find(list, id);
+ if (opts != NULL) {
+ if (fail_if_exists && !list->merge_lists) {
+ error_setg(errp, "Duplicate ID '%s' for %s", id, list->name);
+ return NULL;
+ } else {
+ return opts;
+ }
+ }
+ } else if (list->merge_lists) {
+ opts = qemu_opts_find(list, NULL);
+ if (opts) {
+ return opts;
+ }
+ }
+ opts = g_malloc0(sizeof(*opts));
+ opts->id = g_strdup(id);
+ opts->list = list;
+ loc_save(&opts->loc);
+ QTAILQ_INIT(&opts->head);
+ QTAILQ_INSERT_TAIL(&list->head, opts, next);
+ return opts;
+}
+
+void qemu_opts_reset(QemuOptsList *list)
+{
+ QemuOpts *opts, *next_opts;
+
+ QTAILQ_FOREACH_SAFE(opts, &list->head, next, next_opts) {
+ qemu_opts_del(opts);
+ }
+}
+
+void qemu_opts_loc_restore(QemuOpts *opts)
+{
+ loc_restore(&opts->loc);
+}
+
+void qemu_opts_set(QemuOptsList *list, const char *id,
+ const char *name, const char *value, Error **errp)
+{
+ QemuOpts *opts;
+ Error *local_err = NULL;
+
+ opts = qemu_opts_create(list, id, 1, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ qemu_opt_set(opts, name, value, errp);
+}
+
+const char *qemu_opts_id(QemuOpts *opts)
+{
+ return opts->id;
+}
+
+/* The id string will be g_free()d by qemu_opts_del */
+void qemu_opts_set_id(QemuOpts *opts, char *id)
+{
+ opts->id = id;
+}
+
+void qemu_opts_del(QemuOpts *opts)
+{
+ QemuOpt *opt;
+
+ if (opts == NULL) {
+ return;
+ }
+
+ for (;;) {
+ opt = QTAILQ_FIRST(&opts->head);
+ if (opt == NULL)
+ break;
+ qemu_opt_del(opt);
+ }
+ QTAILQ_REMOVE(&opts->list->head, opts, next);
+ g_free(opts->id);
+ g_free(opts);
+}
+
+/* print value, escaping any commas in value */
+static void escaped_print(const char *value)
+{
+ const char *ptr;
+
+ for (ptr = value; *ptr; ++ptr) {
+ if (*ptr == ',') {
+ putchar(',');
+ }
+ putchar(*ptr);
+ }
+}
+
+void qemu_opts_print(QemuOpts *opts, const char *separator)
+{
+ QemuOpt *opt;
+ QemuOptDesc *desc = opts->list->desc;
+ const char *sep = "";
+
+ if (opts->id) {
+ printf("id=%s", opts->id); /* passed id_wellformed -> no commas */
+ sep = separator;
+ }
+
+ if (desc[0].name == NULL) {
+ QTAILQ_FOREACH(opt, &opts->head, next) {
+ printf("%s%s=", sep, opt->name);
+ escaped_print(opt->str);
+ sep = separator;
+ }
+ return;
+ }
+ for (; desc && desc->name; desc++) {
+ const char *value;
+ QemuOpt *opt = qemu_opt_find(opts, desc->name);
+
+ value = opt ? opt->str : desc->def_value_str;
+ if (!value) {
+ continue;
+ }
+ if (desc->type == QEMU_OPT_STRING) {
+ printf("%s%s=", sep, desc->name);
+ escaped_print(value);
+ } else if ((desc->type == QEMU_OPT_SIZE ||
+ desc->type == QEMU_OPT_NUMBER) && opt) {
+ printf("%s%s=%" PRId64, sep, desc->name, opt->value.uint);
+ } else {
+ printf("%s%s=%s", sep, desc->name, value);
+ }
+ sep = separator;
+ }
+}
+
+static void opts_do_parse(QemuOpts *opts, const char *params,
+ const char *firstname, bool prepend, Error **errp)
+{
+ char option[128], value[1024];
+ const char *p,*pe,*pc;
+ Error *local_err = NULL;
+
+ for (p = params; *p != '\0'; p++) {
+ pe = strchr(p, '=');
+ pc = strchr(p, ',');
+ if (!pe || (pc && pc < pe)) {
+ /* found "foo,more" */
+ if (p == params && firstname) {
+ /* implicitly named first option */
+ pstrcpy(option, sizeof(option), firstname);
+ p = get_opt_value(value, sizeof(value), p);
+ } else {
+ /* option without value, probably a flag */
+ p = get_opt_name(option, sizeof(option), p, ',');
+ if (strncmp(option, "no", 2) == 0) {
+ memmove(option, option+2, strlen(option+2)+1);
+ pstrcpy(value, sizeof(value), "off");
+ } else {
+ pstrcpy(value, sizeof(value), "on");
+ }
+ }
+ } else {
+ /* found "foo=bar,more" */
+ p = get_opt_name(option, sizeof(option), p, '=');
+ if (*p != '=') {
+ break;
+ }
+ p++;
+ p = get_opt_value(value, sizeof(value), p);
+ }
+ if (strcmp(option, "id") != 0) {
+ /* store and parse */
+ opt_set(opts, option, value, prepend, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ }
+ if (*p != ',') {
+ break;
+ }
+ }
+}
+
+/**
+ * Store options parsed from @params into @opts.
+ * If @firstname is non-null, the first key=value in @params may omit
+ * key=, and is treated as if key was @firstname.
+ * On error, store an error object through @errp if non-null.
+ */
+void qemu_opts_do_parse(QemuOpts *opts, const char *params,
+ const char *firstname, Error **errp)
+{
+ opts_do_parse(opts, params, firstname, false, errp);
+}
+
+static QemuOpts *opts_parse(QemuOptsList *list, const char *params,
+ bool permit_abbrev, bool defaults, Error **errp)
+{
+ const char *firstname;
+ char value[1024], *id = NULL;
+ const char *p;
+ QemuOpts *opts;
+ Error *local_err = NULL;
+
+ assert(!permit_abbrev || list->implied_opt_name);
+ firstname = permit_abbrev ? list->implied_opt_name : NULL;
+
+ if (strncmp(params, "id=", 3) == 0) {
+ get_opt_value(value, sizeof(value), params+3);
+ id = value;
+ } else if ((p = strstr(params, ",id=")) != NULL) {
+ get_opt_value(value, sizeof(value), p+4);
+ id = value;
+ }
+
+ /*
+ * This code doesn't work for defaults && !list->merge_lists: when
+ * params has no id=, and list has an element with !opts->id, it
+ * appends a new element instead of returning the existing opts.
+ * However, we got no use for this case. Guard against possible
+ * (if unlikely) future misuse:
+ */
+ assert(!defaults || list->merge_lists);
+ opts = qemu_opts_create(list, id, !defaults, &local_err);
+ if (opts == NULL) {
+ error_propagate(errp, local_err);
+ return NULL;
+ }
+
+ opts_do_parse(opts, params, firstname, defaults, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ qemu_opts_del(opts);
+ return NULL;
+ }
+
+ return opts;
+}
+
+/**
+ * Create a QemuOpts in @list and with options parsed from @params.
+ * If @permit_abbrev, the first key=value in @params may omit key=,
+ * and is treated as if key was @list->implied_opt_name.
+ * On error, store an error object through @errp if non-null.
+ * Return the new QemuOpts on success, null pointer on error.
+ */
+QemuOpts *qemu_opts_parse(QemuOptsList *list, const char *params,
+ bool permit_abbrev, Error **errp)
+{
+ return opts_parse(list, params, permit_abbrev, false, errp);
+}
+
+/**
+ * Create a QemuOpts in @list and with options parsed from @params.
+ * If @permit_abbrev, the first key=value in @params may omit key=,
+ * and is treated as if key was @list->implied_opt_name.
+ * Report errors with error_report_err(). This is inappropriate in
+ * QMP context. Do not use this function there!
+ * Return the new QemuOpts on success, null pointer on error.
+ */
+QemuOpts *qemu_opts_parse_noisily(QemuOptsList *list, const char *params,
+ bool permit_abbrev)
+{
+ Error *err = NULL;
+ QemuOpts *opts;
+
+ opts = opts_parse(list, params, permit_abbrev, false, &err);
+ if (err) {
+ error_report_err(err);
+ }
+ return opts;
+}
+
+void qemu_opts_set_defaults(QemuOptsList *list, const char *params,
+ int permit_abbrev)
+{
+ QemuOpts *opts;
+
+ opts = opts_parse(list, params, permit_abbrev, true, NULL);
+ assert(opts);
+}
+
+typedef struct OptsFromQDictState {
+ QemuOpts *opts;
+ Error **errp;
+} OptsFromQDictState;
+
+static void qemu_opts_from_qdict_1(const char *key, QObject *obj, void *opaque)
+{
+ OptsFromQDictState *state = opaque;
+ char buf[32];
+ const char *value;
+ int n;
+
+ if (!strcmp(key, "id") || *state->errp) {
+ return;
+ }
+
+ switch (qobject_type(obj)) {
+ case QTYPE_QSTRING:
+ value = qstring_get_str(qobject_to_qstring(obj));
+ break;
+ case QTYPE_QINT:
+ n = snprintf(buf, sizeof(buf), "%" PRId64,
+ qint_get_int(qobject_to_qint(obj)));
+ assert(n < sizeof(buf));
+ value = buf;
+ break;
+ case QTYPE_QFLOAT:
+ n = snprintf(buf, sizeof(buf), "%.17g",
+ qfloat_get_double(qobject_to_qfloat(obj)));
+ assert(n < sizeof(buf));
+ value = buf;
+ break;
+ case QTYPE_QBOOL:
+ pstrcpy(buf, sizeof(buf),
+ qbool_get_bool(qobject_to_qbool(obj)) ? "on" : "off");
+ value = buf;
+ break;
+ default:
+ return;
+ }
+
+ qemu_opt_set(state->opts, key, value, state->errp);
+}
+
+/*
+ * Create QemuOpts from a QDict.
+ * Use value of key "id" as ID if it exists and is a QString.
+ * Only QStrings, QInts, QFloats and QBools are copied. Entries with
+ * other types are silently ignored.
+ */
+QemuOpts *qemu_opts_from_qdict(QemuOptsList *list, const QDict *qdict,
+ Error **errp)
+{
+ OptsFromQDictState state;
+ Error *local_err = NULL;
+ QemuOpts *opts;
+
+ opts = qemu_opts_create(list, qdict_get_try_str(qdict, "id"), 1,
+ &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return NULL;
+ }
+
+ assert(opts != NULL);
+
+ state.errp = &local_err;
+ state.opts = opts;
+ qdict_iter(qdict, qemu_opts_from_qdict_1, &state);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ qemu_opts_del(opts);
+ return NULL;
+ }
+
+ return opts;
+}
+
+/*
+ * Adds all QDict entries to the QemuOpts that can be added and removes them
+ * from the QDict. When this function returns, the QDict contains only those
+ * entries that couldn't be added to the QemuOpts.
+ */
+void qemu_opts_absorb_qdict(QemuOpts *opts, QDict *qdict, Error **errp)
+{
+ const QDictEntry *entry, *next;
+
+ entry = qdict_first(qdict);
+
+ while (entry != NULL) {
+ Error *local_err = NULL;
+ OptsFromQDictState state = {
+ .errp = &local_err,
+ .opts = opts,
+ };
+
+ next = qdict_next(qdict, entry);
+
+ if (find_desc_by_name(opts->list->desc, entry->key)) {
+ qemu_opts_from_qdict_1(entry->key, entry->value, &state);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ } else {
+ qdict_del(qdict, entry->key);
+ }
+ }
+
+ entry = next;
+ }
+}
+
+/*
+ * Convert from QemuOpts to QDict.
+ * The QDict values are of type QString.
+ * TODO We'll want to use types appropriate for opt->desc->type, but
+ * this is enough for now.
+ */
+QDict *qemu_opts_to_qdict(QemuOpts *opts, QDict *qdict)
+{
+ QemuOpt *opt;
+ QObject *val;
+
+ if (!qdict) {
+ qdict = qdict_new();
+ }
+ if (opts->id) {
+ qdict_put(qdict, "id", qstring_from_str(opts->id));
+ }
+ QTAILQ_FOREACH(opt, &opts->head, next) {
+ val = QOBJECT(qstring_from_str(opt->str));
+ qdict_put_obj(qdict, opt->name, val);
+ }
+ return qdict;
+}
+
+/* Validate parsed opts against descriptions where no
+ * descriptions were provided in the QemuOptsList.
+ */
+void qemu_opts_validate(QemuOpts *opts, const QemuOptDesc *desc, Error **errp)
+{
+ QemuOpt *opt;
+ Error *local_err = NULL;
+
+ assert(opts_accepts_any(opts));
+
+ QTAILQ_FOREACH(opt, &opts->head, next) {
+ opt->desc = find_desc_by_name(desc, opt->name);
+ if (!opt->desc) {
+ error_setg(errp, QERR_INVALID_PARAMETER, opt->name);
+ return;
+ }
+
+ qemu_opt_parse(opt, &local_err);
+ if (local_err) {
+ error_propagate(errp, local_err);
+ return;
+ }
+ }
+}
+
+/**
+ * For each member of @list, call @func(@opaque, member, @errp).
+ * Call it with the current location temporarily set to the member's.
+ * @func() may store an Error through @errp, but must return non-zero then.
+ * When @func() returns non-zero, break the loop and return that value.
+ * Return zero when the loop completes.
+ */
+int qemu_opts_foreach(QemuOptsList *list, qemu_opts_loopfunc func,
+ void *opaque, Error **errp)
+{
+ Location loc;
+ QemuOpts *opts;
+ int rc;
+
+ loc_push_none(&loc);
+ QTAILQ_FOREACH(opts, &list->head, next) {
+ loc_restore(&opts->loc);
+ rc = func(opaque, opts, errp);
+ if (rc) {
+ return rc;
+ }
+ assert(!errp || !*errp);
+ }
+ loc_pop(&loc);
+ return 0;
+}
+
+static size_t count_opts_list(QemuOptsList *list)
+{
+ QemuOptDesc *desc = NULL;
+ size_t num_opts = 0;
+
+ if (!list) {
+ return 0;
+ }
+
+ desc = list->desc;
+ while (desc && desc->name) {
+ num_opts++;
+ desc++;
+ }
+
+ return num_opts;
+}
+
+void qemu_opts_free(QemuOptsList *list)
+{
+ g_free(list);
+}
+
+/* Realloc dst option list and append options from an option list (list)
+ * to it. dst could be NULL or a malloced list.
+ * The lifetime of dst must be shorter than the input list because the
+ * QemuOptDesc->name, ->help, and ->def_value_str strings are shared.
+ */
+QemuOptsList *qemu_opts_append(QemuOptsList *dst,
+ QemuOptsList *list)
+{
+ size_t num_opts, num_dst_opts;
+ QemuOptDesc *desc;
+ bool need_init = false;
+ bool need_head_update;
+
+ if (!list) {
+ return dst;
+ }
+
+ /* If dst is NULL, after realloc, some area of dst should be initialized
+ * before adding options to it.
+ */
+ if (!dst) {
+ need_init = true;
+ need_head_update = true;
+ } else {
+ /* Moreover, even if dst is not NULL, the realloc may move it to a
+ * different address in which case we may get a stale tail pointer
+ * in dst->head. */
+ need_head_update = QTAILQ_EMPTY(&dst->head);
+ }
+
+ num_opts = count_opts_list(dst);
+ num_dst_opts = num_opts;
+ num_opts += count_opts_list(list);
+ dst = g_realloc(dst, sizeof(QemuOptsList) +
+ (num_opts + 1) * sizeof(QemuOptDesc));
+ if (need_init) {
+ dst->name = NULL;
+ dst->implied_opt_name = NULL;
+ dst->merge_lists = false;
+ }
+ if (need_head_update) {
+ QTAILQ_INIT(&dst->head);
+ }
+ dst->desc[num_dst_opts].name = NULL;
+
+ /* append list->desc to dst->desc */
+ if (list) {
+ desc = list->desc;
+ while (desc && desc->name) {
+ if (find_desc_by_name(dst->desc, desc->name) == NULL) {
+ dst->desc[num_dst_opts++] = *desc;
+ dst->desc[num_dst_opts].name = NULL;
+ }
+ desc++;
+ }
+ }
+
+ return dst;
+}
diff --git a/src/util/qemu-progress.c b/src/util/qemu-progress.c
new file mode 100644
index 0000000..4ee5cd0
--- /dev/null
+++ b/src/util/qemu-progress.c
@@ -0,0 +1,159 @@
+/*
+ * QEMU progress printing utility functions
+ *
+ * Copyright (C) 2011 Jes Sorensen <Jes.Sorensen@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+#include "qemu/osdep.h"
+#include <stdio.h>
+
+struct progress_state {
+ float current;
+ float last_print;
+ float min_skip;
+ void (*print)(void);
+ void (*end)(void);
+};
+
+static struct progress_state state;
+static volatile sig_atomic_t print_pending;
+
+/*
+ * Simple progress print function.
+ * @percent relative percent of current operation
+ * @max percent of total operation
+ */
+static void progress_simple_print(void)
+{
+ printf(" (%3.2f/100%%)\r", state.current);
+ fflush(stdout);
+}
+
+static void progress_simple_end(void)
+{
+ printf("\n");
+}
+
+static void progress_simple_init(void)
+{
+ state.print = progress_simple_print;
+ state.end = progress_simple_end;
+}
+
+#ifdef CONFIG_POSIX
+static void sigusr_print(int signal)
+{
+ print_pending = 1;
+}
+#endif
+
+static void progress_dummy_print(void)
+{
+ if (print_pending) {
+ fprintf(stderr, " (%3.2f/100%%)\n", state.current);
+ print_pending = 0;
+ }
+}
+
+static void progress_dummy_end(void)
+{
+}
+
+static void progress_dummy_init(void)
+{
+#ifdef CONFIG_POSIX
+ struct sigaction action;
+ sigset_t set;
+
+ memset(&action, 0, sizeof(action));
+ sigfillset(&action.sa_mask);
+ action.sa_handler = sigusr_print;
+ action.sa_flags = 0;
+ sigaction(SIGUSR1, &action, NULL);
+
+ /*
+ * SIGUSR1 is SIG_IPI and gets blocked in qemu_init_main_loop(). In the
+ * tools that use the progress report SIGUSR1 isn't used in this meaning
+ * and instead should print the progress, so reenable it.
+ */
+ sigemptyset(&set);
+ sigaddset(&set, SIGUSR1);
+ pthread_sigmask(SIG_UNBLOCK, &set, NULL);
+#endif
+
+ state.print = progress_dummy_print;
+ state.end = progress_dummy_end;
+}
+
+/*
+ * Initialize progress reporting.
+ * If @enabled is false, actual reporting is suppressed. The user can
+ * still trigger a report by sending a SIGUSR1.
+ * Reports are also suppressed unless we've had at least @min_skip
+ * percent progress since the last report.
+ */
+void qemu_progress_init(int enabled, float min_skip)
+{
+ state.min_skip = min_skip;
+ if (enabled) {
+ progress_simple_init();
+ } else {
+ progress_dummy_init();
+ }
+}
+
+void qemu_progress_end(void)
+{
+ state.end();
+}
+
+/*
+ * Report progress.
+ * @delta is how much progress we made.
+ * If @max is zero, @delta is an absolut value of the total job done.
+ * Else, @delta is a progress delta since the last call, as a fraction
+ * of @max. I.e. the delta is @delta * @max / 100. This allows
+ * relative accounting of functions which may be a different fraction of
+ * the full job, depending on the context they are called in. I.e.
+ * a function might be considered 40% of the full job if used from
+ * bdrv_img_create() but only 20% if called from img_convert().
+ */
+void qemu_progress_print(float delta, int max)
+{
+ float current;
+
+ if (max == 0) {
+ current = delta;
+ } else {
+ current = state.current + delta / 100 * max;
+ }
+ if (current > 100) {
+ current = 100;
+ }
+ state.current = current;
+
+ if (current > (state.last_print + state.min_skip) ||
+ (current == 100) || (current == 0)) {
+ state.last_print = state.current;
+ state.print();
+ }
+}
diff --git a/src/util/qemu-sockets.c b/src/util/qemu-sockets.c
new file mode 100644
index 0000000..5a31d16
--- /dev/null
+++ b/src/util/qemu-sockets.c
@@ -0,0 +1,1167 @@
+/*
+ * inet and unix socket functions for qemu
+ *
+ * (c) 2008 Gerd Hoffmann <kraxel@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; under version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include "monitor/monitor.h"
+#include "qemu/sockets.h"
+#include "qemu/main-loop.h"
+#include "qapi/qmp-input-visitor.h"
+#include "qapi/qmp-output-visitor.h"
+#include "qapi-visit.h"
+
+#ifndef AI_ADDRCONFIG
+# define AI_ADDRCONFIG 0
+#endif
+#ifndef AI_V4MAPPED
+# define AI_V4MAPPED 0
+#endif
+
+/* used temporarily until all users are converted to QemuOpts */
+QemuOptsList socket_optslist = {
+ .name = "socket",
+ .head = QTAILQ_HEAD_INITIALIZER(socket_optslist.head),
+ .desc = {
+ {
+ .name = "path",
+ .type = QEMU_OPT_STRING,
+ },{
+ .name = "host",
+ .type = QEMU_OPT_STRING,
+ },{
+ .name = "port",
+ .type = QEMU_OPT_STRING,
+ },{
+ .name = "localaddr",
+ .type = QEMU_OPT_STRING,
+ },{
+ .name = "localport",
+ .type = QEMU_OPT_STRING,
+ },{
+ .name = "to",
+ .type = QEMU_OPT_NUMBER,
+ },{
+ .name = "ipv4",
+ .type = QEMU_OPT_BOOL,
+ },{
+ .name = "ipv6",
+ .type = QEMU_OPT_BOOL,
+ },
+ { /* end if list */ }
+ },
+};
+
+static int inet_getport(struct addrinfo *e)
+{
+ struct sockaddr_in *i4;
+ struct sockaddr_in6 *i6;
+
+ switch (e->ai_family) {
+ case PF_INET6:
+ i6 = (void*)e->ai_addr;
+ return ntohs(i6->sin6_port);
+ case PF_INET:
+ i4 = (void*)e->ai_addr;
+ return ntohs(i4->sin_port);
+ default:
+ return 0;
+ }
+}
+
+static void inet_setport(struct addrinfo *e, int port)
+{
+ struct sockaddr_in *i4;
+ struct sockaddr_in6 *i6;
+
+ switch (e->ai_family) {
+ case PF_INET6:
+ i6 = (void*)e->ai_addr;
+ i6->sin6_port = htons(port);
+ break;
+ case PF_INET:
+ i4 = (void*)e->ai_addr;
+ i4->sin_port = htons(port);
+ break;
+ }
+}
+
+NetworkAddressFamily inet_netfamily(int family)
+{
+ switch (family) {
+ case PF_INET6: return NETWORK_ADDRESS_FAMILY_IPV6;
+ case PF_INET: return NETWORK_ADDRESS_FAMILY_IPV4;
+ case PF_UNIX: return NETWORK_ADDRESS_FAMILY_UNIX;
+ }
+ return NETWORK_ADDRESS_FAMILY_UNKNOWN;
+}
+
+int inet_listen_opts(QemuOpts *opts, int port_offset, Error **errp)
+{
+ struct addrinfo ai,*res,*e;
+ const char *addr;
+ char port[33];
+ char uaddr[INET6_ADDRSTRLEN+1];
+ char uport[33];
+ int slisten, rc, to, port_min, port_max, p;
+
+ memset(&ai,0, sizeof(ai));
+ ai.ai_flags = AI_PASSIVE;
+ ai.ai_family = PF_UNSPEC;
+ ai.ai_socktype = SOCK_STREAM;
+
+ if ((qemu_opt_get(opts, "host") == NULL)) {
+ error_setg(errp, "host not specified");
+ return -1;
+ }
+ if (qemu_opt_get(opts, "port") != NULL) {
+ pstrcpy(port, sizeof(port), qemu_opt_get(opts, "port"));
+ } else {
+ port[0] = '\0';
+ }
+ addr = qemu_opt_get(opts, "host");
+
+ to = qemu_opt_get_number(opts, "to", 0);
+ if (qemu_opt_get_bool(opts, "ipv4", 0))
+ ai.ai_family = PF_INET;
+ if (qemu_opt_get_bool(opts, "ipv6", 0))
+ ai.ai_family = PF_INET6;
+
+ /* lookup */
+ if (port_offset) {
+ unsigned long long baseport;
+ if (strlen(port) == 0) {
+ error_setg(errp, "port not specified");
+ return -1;
+ }
+ if (parse_uint_full(port, &baseport, 10) < 0) {
+ error_setg(errp, "can't convert to a number: %s", port);
+ return -1;
+ }
+ if (baseport > 65535 ||
+ baseport + port_offset > 65535) {
+ error_setg(errp, "port %s out of range", port);
+ return -1;
+ }
+ snprintf(port, sizeof(port), "%d", (int)baseport + port_offset);
+ }
+ rc = getaddrinfo(strlen(addr) ? addr : NULL,
+ strlen(port) ? port : NULL, &ai, &res);
+ if (rc != 0) {
+ error_setg(errp, "address resolution failed for %s:%s: %s", addr, port,
+ gai_strerror(rc));
+ return -1;
+ }
+
+ /* create socket + bind */
+ for (e = res; e != NULL; e = e->ai_next) {
+ getnameinfo((struct sockaddr*)e->ai_addr,e->ai_addrlen,
+ uaddr,INET6_ADDRSTRLEN,uport,32,
+ NI_NUMERICHOST | NI_NUMERICSERV);
+ slisten = qemu_socket(e->ai_family, e->ai_socktype, e->ai_protocol);
+ if (slisten < 0) {
+ if (!e->ai_next) {
+ error_setg_errno(errp, errno, "Failed to create socket");
+ }
+ continue;
+ }
+
+ socket_set_fast_reuse(slisten);
+#ifdef IPV6_V6ONLY
+ if (e->ai_family == PF_INET6) {
+ /* listen on both ipv4 and ipv6 */
+ const int off = 0;
+ qemu_setsockopt(slisten, IPPROTO_IPV6, IPV6_V6ONLY, &off,
+ sizeof(off));
+ }
+#endif
+
+ port_min = inet_getport(e);
+ port_max = to ? to + port_offset : port_min;
+ for (p = port_min; p <= port_max; p++) {
+ inet_setport(e, p);
+ if (bind(slisten, e->ai_addr, e->ai_addrlen) == 0) {
+ goto listen;
+ }
+ if (p == port_max) {
+ if (!e->ai_next) {
+ error_setg_errno(errp, errno, "Failed to bind socket");
+ }
+ }
+ }
+ closesocket(slisten);
+ }
+ freeaddrinfo(res);
+ return -1;
+
+listen:
+ if (listen(slisten,1) != 0) {
+ error_setg_errno(errp, errno, "Failed to listen on socket");
+ closesocket(slisten);
+ freeaddrinfo(res);
+ return -1;
+ }
+ qemu_opt_set(opts, "host", uaddr, &error_abort);
+ qemu_opt_set_number(opts, "port", inet_getport(e) - port_offset,
+ &error_abort);
+ qemu_opt_set_bool(opts, "ipv6", e->ai_family == PF_INET6,
+ &error_abort);
+ qemu_opt_set_bool(opts, "ipv4", e->ai_family != PF_INET6,
+ &error_abort);
+ freeaddrinfo(res);
+ return slisten;
+}
+
+#ifdef _WIN32
+#define QEMU_SOCKET_RC_INPROGRESS(rc) \
+ ((rc) == -EINPROGRESS || (rc) == -EWOULDBLOCK || (rc) == -WSAEALREADY)
+#else
+#define QEMU_SOCKET_RC_INPROGRESS(rc) \
+ ((rc) == -EINPROGRESS)
+#endif
+
+/* Struct to store connect state for non blocking connect */
+typedef struct ConnectState {
+ int fd;
+ struct addrinfo *addr_list;
+ struct addrinfo *current_addr;
+ NonBlockingConnectHandler *callback;
+ void *opaque;
+} ConnectState;
+
+static int inet_connect_addr(struct addrinfo *addr, bool *in_progress,
+ ConnectState *connect_state, Error **errp);
+
+static void wait_for_connect(void *opaque)
+{
+ ConnectState *s = opaque;
+ int val = 0, rc = 0;
+ socklen_t valsize = sizeof(val);
+ bool in_progress;
+ Error *err = NULL;
+
+ qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
+
+ do {
+ rc = qemu_getsockopt(s->fd, SOL_SOCKET, SO_ERROR, &val, &valsize);
+ } while (rc == -1 && socket_error() == EINTR);
+
+ /* update rc to contain error */
+ if (!rc && val) {
+ rc = -1;
+ errno = val;
+ }
+
+ /* connect error */
+ if (rc < 0) {
+ error_setg_errno(&err, errno, "Error connecting to socket");
+ closesocket(s->fd);
+ s->fd = rc;
+ }
+
+ /* try to connect to the next address on the list */
+ if (s->current_addr) {
+ while (s->current_addr->ai_next != NULL && s->fd < 0) {
+ s->current_addr = s->current_addr->ai_next;
+ s->fd = inet_connect_addr(s->current_addr, &in_progress, s, NULL);
+ if (s->fd < 0) {
+ error_free(err);
+ err = NULL;
+ error_setg_errno(&err, errno, "Unable to start socket connect");
+ }
+ /* connect in progress */
+ if (in_progress) {
+ goto out;
+ }
+ }
+
+ freeaddrinfo(s->addr_list);
+ }
+
+ if (s->callback) {
+ s->callback(s->fd, err, s->opaque);
+ }
+ g_free(s);
+out:
+ error_free(err);
+}
+
+static int inet_connect_addr(struct addrinfo *addr, bool *in_progress,
+ ConnectState *connect_state, Error **errp)
+{
+ int sock, rc;
+
+ *in_progress = false;
+
+ sock = qemu_socket(addr->ai_family, addr->ai_socktype, addr->ai_protocol);
+ if (sock < 0) {
+ error_setg_errno(errp, errno, "Failed to create socket");
+ return -1;
+ }
+ socket_set_fast_reuse(sock);
+ if (connect_state != NULL) {
+ qemu_set_nonblock(sock);
+ }
+ /* connect to peer */
+ do {
+ rc = 0;
+ if (connect(sock, addr->ai_addr, addr->ai_addrlen) < 0) {
+ rc = -socket_error();
+ }
+ } while (rc == -EINTR);
+
+ if (connect_state != NULL && QEMU_SOCKET_RC_INPROGRESS(rc)) {
+ connect_state->fd = sock;
+ qemu_set_fd_handler(sock, NULL, wait_for_connect, connect_state);
+ *in_progress = true;
+ } else if (rc < 0) {
+ error_setg_errno(errp, errno, "Failed to connect socket");
+ closesocket(sock);
+ return -1;
+ }
+ return sock;
+}
+
+static struct addrinfo *inet_parse_connect_opts(QemuOpts *opts, Error **errp)
+{
+ struct addrinfo ai, *res;
+ int rc;
+ const char *addr;
+ const char *port;
+
+ memset(&ai, 0, sizeof(ai));
+
+ ai.ai_flags = AI_CANONNAME | AI_V4MAPPED | AI_ADDRCONFIG;
+ ai.ai_family = PF_UNSPEC;
+ ai.ai_socktype = SOCK_STREAM;
+
+ addr = qemu_opt_get(opts, "host");
+ port = qemu_opt_get(opts, "port");
+ if (addr == NULL || port == NULL) {
+ error_setg(errp, "host and/or port not specified");
+ return NULL;
+ }
+
+ if (qemu_opt_get_bool(opts, "ipv4", 0)) {
+ ai.ai_family = PF_INET;
+ }
+ if (qemu_opt_get_bool(opts, "ipv6", 0)) {
+ ai.ai_family = PF_INET6;
+ }
+
+ /* lookup */
+ rc = getaddrinfo(addr, port, &ai, &res);
+ if (rc != 0) {
+ error_setg(errp, "address resolution failed for %s:%s: %s", addr, port,
+ gai_strerror(rc));
+ return NULL;
+ }
+ return res;
+}
+
+/**
+ * Create a socket and connect it to an address.
+ *
+ * @opts: QEMU options, recognized parameters strings "host" and "port",
+ * bools "ipv4" and "ipv6".
+ * @errp: set on error
+ * @callback: callback function for non-blocking connect
+ * @opaque: opaque for callback function
+ *
+ * Returns: -1 on error, file descriptor on success.
+ *
+ * If @callback is non-null, the connect is non-blocking. If this
+ * function succeeds, callback will be called when the connection
+ * completes, with the file descriptor on success, or -1 on error.
+ */
+int inet_connect_opts(QemuOpts *opts, Error **errp,
+ NonBlockingConnectHandler *callback, void *opaque)
+{
+ Error *local_err = NULL;
+ struct addrinfo *res, *e;
+ int sock = -1;
+ bool in_progress;
+ ConnectState *connect_state = NULL;
+
+ res = inet_parse_connect_opts(opts, errp);
+ if (!res) {
+ return -1;
+ }
+
+ if (callback != NULL) {
+ connect_state = g_malloc0(sizeof(*connect_state));
+ connect_state->addr_list = res;
+ connect_state->callback = callback;
+ connect_state->opaque = opaque;
+ }
+
+ for (e = res; e != NULL; e = e->ai_next) {
+ error_free(local_err);
+ local_err = NULL;
+ if (connect_state != NULL) {
+ connect_state->current_addr = e;
+ }
+ sock = inet_connect_addr(e, &in_progress, connect_state, &local_err);
+ if (sock >= 0) {
+ break;
+ }
+ }
+
+ if (sock < 0) {
+ error_propagate(errp, local_err);
+ } else if (in_progress) {
+ /* wait_for_connect() will do the rest */
+ return sock;
+ } else {
+ if (callback) {
+ callback(sock, NULL, opaque);
+ }
+ }
+ g_free(connect_state);
+ freeaddrinfo(res);
+ return sock;
+}
+
+int inet_dgram_opts(QemuOpts *opts, Error **errp)
+{
+ struct addrinfo ai, *peer = NULL, *local = NULL;
+ const char *addr;
+ const char *port;
+ int sock = -1, rc;
+
+ /* lookup peer addr */
+ memset(&ai,0, sizeof(ai));
+ ai.ai_flags = AI_CANONNAME | AI_V4MAPPED | AI_ADDRCONFIG;
+ ai.ai_family = PF_UNSPEC;
+ ai.ai_socktype = SOCK_DGRAM;
+
+ addr = qemu_opt_get(opts, "host");
+ port = qemu_opt_get(opts, "port");
+ if (addr == NULL || strlen(addr) == 0) {
+ addr = "localhost";
+ }
+ if (port == NULL || strlen(port) == 0) {
+ error_setg(errp, "remote port not specified");
+ return -1;
+ }
+
+ if (qemu_opt_get_bool(opts, "ipv4", 0))
+ ai.ai_family = PF_INET;
+ if (qemu_opt_get_bool(opts, "ipv6", 0))
+ ai.ai_family = PF_INET6;
+
+ if (0 != (rc = getaddrinfo(addr, port, &ai, &peer))) {
+ error_setg(errp, "address resolution failed for %s:%s: %s", addr, port,
+ gai_strerror(rc));
+ return -1;
+ }
+
+ /* lookup local addr */
+ memset(&ai,0, sizeof(ai));
+ ai.ai_flags = AI_PASSIVE;
+ ai.ai_family = peer->ai_family;
+ ai.ai_socktype = SOCK_DGRAM;
+
+ addr = qemu_opt_get(opts, "localaddr");
+ port = qemu_opt_get(opts, "localport");
+ if (addr == NULL || strlen(addr) == 0) {
+ addr = NULL;
+ }
+ if (!port || strlen(port) == 0)
+ port = "0";
+
+ if (0 != (rc = getaddrinfo(addr, port, &ai, &local))) {
+ error_setg(errp, "address resolution failed for %s:%s: %s", addr, port,
+ gai_strerror(rc));
+ goto err;
+ }
+
+ /* create socket */
+ sock = qemu_socket(peer->ai_family, peer->ai_socktype, peer->ai_protocol);
+ if (sock < 0) {
+ error_setg_errno(errp, errno, "Failed to create socket");
+ goto err;
+ }
+ socket_set_fast_reuse(sock);
+
+ /* bind socket */
+ if (bind(sock, local->ai_addr, local->ai_addrlen) < 0) {
+ error_setg_errno(errp, errno, "Failed to bind socket");
+ goto err;
+ }
+
+ /* connect to peer */
+ if (connect(sock,peer->ai_addr,peer->ai_addrlen) < 0) {
+ error_setg_errno(errp, errno, "Failed to connect socket");
+ goto err;
+ }
+
+ freeaddrinfo(local);
+ freeaddrinfo(peer);
+ return sock;
+
+err:
+ if (-1 != sock)
+ closesocket(sock);
+ if (local)
+ freeaddrinfo(local);
+ if (peer)
+ freeaddrinfo(peer);
+ return -1;
+}
+
+/* compatibility wrapper */
+InetSocketAddress *inet_parse(const char *str, Error **errp)
+{
+ InetSocketAddress *addr;
+ const char *optstr, *h;
+ char host[65];
+ char port[33];
+ int to;
+ int pos;
+
+ addr = g_new0(InetSocketAddress, 1);
+
+ /* parse address */
+ if (str[0] == ':') {
+ /* no host given */
+ host[0] = '\0';
+ if (1 != sscanf(str, ":%32[^,]%n", port, &pos)) {
+ error_setg(errp, "error parsing port in address '%s'", str);
+ goto fail;
+ }
+ } else if (str[0] == '[') {
+ /* IPv6 addr */
+ if (2 != sscanf(str, "[%64[^]]]:%32[^,]%n", host, port, &pos)) {
+ error_setg(errp, "error parsing IPv6 address '%s'", str);
+ goto fail;
+ }
+ addr->ipv6 = addr->has_ipv6 = true;
+ } else {
+ /* hostname or IPv4 addr */
+ if (2 != sscanf(str, "%64[^:]:%32[^,]%n", host, port, &pos)) {
+ error_setg(errp, "error parsing address '%s'", str);
+ goto fail;
+ }
+ if (host[strspn(host, "0123456789.")] == '\0') {
+ addr->ipv4 = addr->has_ipv4 = true;
+ }
+ }
+
+ addr->host = g_strdup(host);
+ addr->port = g_strdup(port);
+
+ /* parse options */
+ optstr = str + pos;
+ h = strstr(optstr, ",to=");
+ if (h) {
+ h += 4;
+ if (sscanf(h, "%d%n", &to, &pos) != 1 ||
+ (h[pos] != '\0' && h[pos] != ',')) {
+ error_setg(errp, "error parsing to= argument");
+ goto fail;
+ }
+ addr->has_to = true;
+ addr->to = to;
+ }
+ if (strstr(optstr, ",ipv4")) {
+ addr->ipv4 = addr->has_ipv4 = true;
+ }
+ if (strstr(optstr, ",ipv6")) {
+ addr->ipv6 = addr->has_ipv6 = true;
+ }
+ return addr;
+
+fail:
+ qapi_free_InetSocketAddress(addr);
+ return NULL;
+}
+
+static void inet_addr_to_opts(QemuOpts *opts, const InetSocketAddress *addr)
+{
+ bool ipv4 = addr->has_ipv4 && addr->ipv4;
+ bool ipv6 = addr->has_ipv6 && addr->ipv6;
+
+ if (ipv4 || ipv6) {
+ qemu_opt_set_bool(opts, "ipv4", ipv4, &error_abort);
+ qemu_opt_set_bool(opts, "ipv6", ipv6, &error_abort);
+ } else if (addr->has_ipv4 || addr->has_ipv6) {
+ qemu_opt_set_bool(opts, "ipv4", !addr->has_ipv4, &error_abort);
+ qemu_opt_set_bool(opts, "ipv6", !addr->has_ipv6, &error_abort);
+ }
+ if (addr->has_to) {
+ qemu_opt_set_number(opts, "to", addr->to, &error_abort);
+ }
+ qemu_opt_set(opts, "host", addr->host, &error_abort);
+ qemu_opt_set(opts, "port", addr->port, &error_abort);
+}
+
+int inet_listen(const char *str, char *ostr, int olen,
+ int socktype, int port_offset, Error **errp)
+{
+ QemuOpts *opts;
+ char *optstr;
+ int sock = -1;
+ InetSocketAddress *addr;
+
+ addr = inet_parse(str, errp);
+ if (addr != NULL) {
+ opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort);
+ inet_addr_to_opts(opts, addr);
+ qapi_free_InetSocketAddress(addr);
+ sock = inet_listen_opts(opts, port_offset, errp);
+ if (sock != -1 && ostr) {
+ optstr = strchr(str, ',');
+ if (qemu_opt_get_bool(opts, "ipv6", 0)) {
+ snprintf(ostr, olen, "[%s]:%s%s",
+ qemu_opt_get(opts, "host"),
+ qemu_opt_get(opts, "port"),
+ optstr ? optstr : "");
+ } else {
+ snprintf(ostr, olen, "%s:%s%s",
+ qemu_opt_get(opts, "host"),
+ qemu_opt_get(opts, "port"),
+ optstr ? optstr : "");
+ }
+ }
+ qemu_opts_del(opts);
+ }
+ return sock;
+}
+
+/**
+ * Create a blocking socket and connect it to an address.
+ *
+ * @str: address string
+ * @errp: set in case of an error
+ *
+ * Returns -1 in case of error, file descriptor on success
+ **/
+int inet_connect(const char *str, Error **errp)
+{
+ QemuOpts *opts;
+ int sock = -1;
+ InetSocketAddress *addr;
+
+ addr = inet_parse(str, errp);
+ if (addr != NULL) {
+ opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort);
+ inet_addr_to_opts(opts, addr);
+ qapi_free_InetSocketAddress(addr);
+ sock = inet_connect_opts(opts, errp, NULL, NULL);
+ qemu_opts_del(opts);
+ }
+ return sock;
+}
+
+/**
+ * Create a non-blocking socket and connect it to an address.
+ * Calls the callback function with fd in case of success or -1 in case of
+ * error.
+ *
+ * @str: address string
+ * @callback: callback function that is called when connect completes,
+ * cannot be NULL.
+ * @opaque: opaque for callback function
+ * @errp: set in case of an error
+ *
+ * Returns: -1 on immediate error, file descriptor on success.
+ **/
+int inet_nonblocking_connect(const char *str,
+ NonBlockingConnectHandler *callback,
+ void *opaque, Error **errp)
+{
+ QemuOpts *opts;
+ int sock = -1;
+ InetSocketAddress *addr;
+
+ g_assert(callback != NULL);
+
+ addr = inet_parse(str, errp);
+ if (addr != NULL) {
+ opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort);
+ inet_addr_to_opts(opts, addr);
+ qapi_free_InetSocketAddress(addr);
+ sock = inet_connect_opts(opts, errp, callback, opaque);
+ qemu_opts_del(opts);
+ }
+ return sock;
+}
+
+#ifndef _WIN32
+
+int unix_listen_opts(QemuOpts *opts, Error **errp)
+{
+ struct sockaddr_un un;
+ const char *path = qemu_opt_get(opts, "path");
+ int sock, fd;
+
+ sock = qemu_socket(PF_UNIX, SOCK_STREAM, 0);
+ if (sock < 0) {
+ error_setg_errno(errp, errno, "Failed to create Unix socket");
+ return -1;
+ }
+
+ memset(&un, 0, sizeof(un));
+ un.sun_family = AF_UNIX;
+ if (path && strlen(path)) {
+ snprintf(un.sun_path, sizeof(un.sun_path), "%s", path);
+ } else {
+ const char *tmpdir = getenv("TMPDIR");
+ tmpdir = tmpdir ? tmpdir : "/tmp";
+ if (snprintf(un.sun_path, sizeof(un.sun_path), "%s/qemu-socket-XXXXXX",
+ tmpdir) >= sizeof(un.sun_path)) {
+ error_setg_errno(errp, errno,
+ "TMPDIR environment variable (%s) too large", tmpdir);
+ goto err;
+ }
+
+ /*
+ * This dummy fd usage silences the mktemp() unsecure warning.
+ * Using mkstemp() doesn't make things more secure here
+ * though. bind() complains about existing files, so we have
+ * to unlink first and thus re-open the race window. The
+ * worst case possible is bind() failing, i.e. a DoS attack.
+ */
+ fd = mkstemp(un.sun_path);
+ if (fd < 0) {
+ error_setg_errno(errp, errno,
+ "Failed to make a temporary socket name in %s", tmpdir);
+ goto err;
+ }
+ close(fd);
+ qemu_opt_set(opts, "path", un.sun_path, &error_abort);
+ }
+
+ if (unlink(un.sun_path) < 0 && errno != ENOENT) {
+ error_setg_errno(errp, errno,
+ "Failed to unlink socket %s", un.sun_path);
+ goto err;
+ }
+ if (bind(sock, (struct sockaddr*) &un, sizeof(un)) < 0) {
+ error_setg_errno(errp, errno, "Failed to bind socket to %s", un.sun_path);
+ goto err;
+ }
+ if (listen(sock, 1) < 0) {
+ error_setg_errno(errp, errno, "Failed to listen on socket");
+ goto err;
+ }
+
+ return sock;
+
+err:
+ closesocket(sock);
+ return -1;
+}
+
+int unix_connect_opts(QemuOpts *opts, Error **errp,
+ NonBlockingConnectHandler *callback, void *opaque)
+{
+ struct sockaddr_un un;
+ const char *path = qemu_opt_get(opts, "path");
+ ConnectState *connect_state = NULL;
+ int sock, rc;
+
+ if (path == NULL) {
+ error_setg(errp, "unix connect: no path specified");
+ return -1;
+ }
+
+ sock = qemu_socket(PF_UNIX, SOCK_STREAM, 0);
+ if (sock < 0) {
+ error_setg_errno(errp, errno, "Failed to create socket");
+ return -1;
+ }
+ if (callback != NULL) {
+ connect_state = g_malloc0(sizeof(*connect_state));
+ connect_state->callback = callback;
+ connect_state->opaque = opaque;
+ qemu_set_nonblock(sock);
+ }
+
+ memset(&un, 0, sizeof(un));
+ un.sun_family = AF_UNIX;
+ snprintf(un.sun_path, sizeof(un.sun_path), "%s", path);
+
+ /* connect to peer */
+ do {
+ rc = 0;
+ if (connect(sock, (struct sockaddr *) &un, sizeof(un)) < 0) {
+ rc = -socket_error();
+ }
+ } while (rc == -EINTR);
+
+ if (connect_state != NULL && QEMU_SOCKET_RC_INPROGRESS(rc)) {
+ connect_state->fd = sock;
+ qemu_set_fd_handler(sock, NULL, wait_for_connect, connect_state);
+ return sock;
+ } else if (rc >= 0) {
+ /* non blocking socket immediate success, call callback */
+ if (callback != NULL) {
+ callback(sock, NULL, opaque);
+ }
+ }
+
+ if (rc < 0) {
+ error_setg_errno(errp, -rc, "Failed to connect socket");
+ close(sock);
+ sock = -1;
+ }
+
+ g_free(connect_state);
+ return sock;
+}
+
+#else
+
+int unix_listen_opts(QemuOpts *opts, Error **errp)
+{
+ error_setg(errp, "unix sockets are not available on windows");
+ errno = ENOTSUP;
+ return -1;
+}
+
+int unix_connect_opts(QemuOpts *opts, Error **errp,
+ NonBlockingConnectHandler *callback, void *opaque)
+{
+ error_setg(errp, "unix sockets are not available on windows");
+ errno = ENOTSUP;
+ return -1;
+}
+#endif
+
+/* compatibility wrapper */
+int unix_listen(const char *str, char *ostr, int olen, Error **errp)
+{
+ QemuOpts *opts;
+ char *path, *optstr;
+ int sock, len;
+
+ opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort);
+
+ optstr = strchr(str, ',');
+ if (optstr) {
+ len = optstr - str;
+ if (len) {
+ path = g_malloc(len+1);
+ snprintf(path, len+1, "%.*s", len, str);
+ qemu_opt_set(opts, "path", path, &error_abort);
+ g_free(path);
+ }
+ } else {
+ qemu_opt_set(opts, "path", str, &error_abort);
+ }
+
+ sock = unix_listen_opts(opts, errp);
+
+ if (sock != -1 && ostr)
+ snprintf(ostr, olen, "%s%s", qemu_opt_get(opts, "path"), optstr ? optstr : "");
+ qemu_opts_del(opts);
+ return sock;
+}
+
+int unix_connect(const char *path, Error **errp)
+{
+ QemuOpts *opts;
+ int sock;
+
+ opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort);
+ qemu_opt_set(opts, "path", path, &error_abort);
+ sock = unix_connect_opts(opts, errp, NULL, NULL);
+ qemu_opts_del(opts);
+ return sock;
+}
+
+
+int unix_nonblocking_connect(const char *path,
+ NonBlockingConnectHandler *callback,
+ void *opaque, Error **errp)
+{
+ QemuOpts *opts;
+ int sock = -1;
+
+ g_assert(callback != NULL);
+
+ opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort);
+ qemu_opt_set(opts, "path", path, &error_abort);
+ sock = unix_connect_opts(opts, errp, callback, opaque);
+ qemu_opts_del(opts);
+ return sock;
+}
+
+SocketAddress *socket_parse(const char *str, Error **errp)
+{
+ SocketAddress *addr;
+
+ addr = g_new0(SocketAddress, 1);
+ if (strstart(str, "unix:", NULL)) {
+ if (str[5] == '\0') {
+ error_setg(errp, "invalid Unix socket address");
+ goto fail;
+ } else {
+ addr->type = SOCKET_ADDRESS_KIND_UNIX;
+ addr->u.q_unix = g_new(UnixSocketAddress, 1);
+ addr->u.q_unix->path = g_strdup(str + 5);
+ }
+ } else if (strstart(str, "fd:", NULL)) {
+ if (str[3] == '\0') {
+ error_setg(errp, "invalid file descriptor address");
+ goto fail;
+ } else {
+ addr->type = SOCKET_ADDRESS_KIND_FD;
+ addr->u.fd = g_new(String, 1);
+ addr->u.fd->str = g_strdup(str + 3);
+ }
+ } else {
+ addr->type = SOCKET_ADDRESS_KIND_INET;
+ addr->u.inet = inet_parse(str, errp);
+ if (addr->u.inet == NULL) {
+ goto fail;
+ }
+ }
+ return addr;
+
+fail:
+ qapi_free_SocketAddress(addr);
+ return NULL;
+}
+
+int socket_connect(SocketAddress *addr, Error **errp,
+ NonBlockingConnectHandler *callback, void *opaque)
+{
+ QemuOpts *opts;
+ int fd;
+
+ opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort);
+ switch (addr->type) {
+ case SOCKET_ADDRESS_KIND_INET:
+ inet_addr_to_opts(opts, addr->u.inet);
+ fd = inet_connect_opts(opts, errp, callback, opaque);
+ break;
+
+ case SOCKET_ADDRESS_KIND_UNIX:
+ qemu_opt_set(opts, "path", addr->u.q_unix->path, &error_abort);
+ fd = unix_connect_opts(opts, errp, callback, opaque);
+ break;
+
+ case SOCKET_ADDRESS_KIND_FD:
+ fd = monitor_get_fd(cur_mon, addr->u.fd->str, errp);
+ if (fd >= 0 && callback) {
+ qemu_set_nonblock(fd);
+ callback(fd, NULL, opaque);
+ }
+ break;
+
+ default:
+ abort();
+ }
+ qemu_opts_del(opts);
+ return fd;
+}
+
+int socket_listen(SocketAddress *addr, Error **errp)
+{
+ QemuOpts *opts;
+ int fd;
+
+ opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort);
+ switch (addr->type) {
+ case SOCKET_ADDRESS_KIND_INET:
+ inet_addr_to_opts(opts, addr->u.inet);
+ fd = inet_listen_opts(opts, 0, errp);
+ break;
+
+ case SOCKET_ADDRESS_KIND_UNIX:
+ qemu_opt_set(opts, "path", addr->u.q_unix->path, &error_abort);
+ fd = unix_listen_opts(opts, errp);
+ break;
+
+ case SOCKET_ADDRESS_KIND_FD:
+ fd = monitor_get_fd(cur_mon, addr->u.fd->str, errp);
+ break;
+
+ default:
+ abort();
+ }
+ qemu_opts_del(opts);
+ return fd;
+}
+
+int socket_dgram(SocketAddress *remote, SocketAddress *local, Error **errp)
+{
+ QemuOpts *opts;
+ int fd;
+
+ opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort);
+ switch (remote->type) {
+ case SOCKET_ADDRESS_KIND_INET:
+ inet_addr_to_opts(opts, remote->u.inet);
+ if (local) {
+ qemu_opt_set(opts, "localaddr", local->u.inet->host, &error_abort);
+ qemu_opt_set(opts, "localport", local->u.inet->port, &error_abort);
+ }
+ fd = inet_dgram_opts(opts, errp);
+ break;
+
+ default:
+ error_setg(errp, "socket type unsupported for datagram");
+ fd = -1;
+ }
+ qemu_opts_del(opts);
+ return fd;
+}
+
+
+static SocketAddress *
+socket_sockaddr_to_address_inet(struct sockaddr_storage *sa,
+ socklen_t salen,
+ Error **errp)
+{
+ char host[NI_MAXHOST];
+ char serv[NI_MAXSERV];
+ SocketAddress *addr;
+ int ret;
+
+ ret = getnameinfo((struct sockaddr *)sa, salen,
+ host, sizeof(host),
+ serv, sizeof(serv),
+ NI_NUMERICHOST | NI_NUMERICSERV);
+ if (ret != 0) {
+ error_setg(errp, "Cannot format numeric socket address: %s",
+ gai_strerror(ret));
+ return NULL;
+ }
+
+ addr = g_new0(SocketAddress, 1);
+ addr->type = SOCKET_ADDRESS_KIND_INET;
+ addr->u.inet = g_new0(InetSocketAddress, 1);
+ addr->u.inet->host = g_strdup(host);
+ addr->u.inet->port = g_strdup(serv);
+ if (sa->ss_family == AF_INET) {
+ addr->u.inet->has_ipv4 = addr->u.inet->ipv4 = true;
+ } else {
+ addr->u.inet->has_ipv6 = addr->u.inet->ipv6 = true;
+ }
+
+ return addr;
+}
+
+
+#ifndef WIN32
+static SocketAddress *
+socket_sockaddr_to_address_unix(struct sockaddr_storage *sa,
+ socklen_t salen,
+ Error **errp)
+{
+ SocketAddress *addr;
+ struct sockaddr_un *su = (struct sockaddr_un *)sa;
+
+ addr = g_new0(SocketAddress, 1);
+ addr->type = SOCKET_ADDRESS_KIND_UNIX;
+ addr->u.q_unix = g_new0(UnixSocketAddress, 1);
+ if (su->sun_path[0]) {
+ addr->u.q_unix->path = g_strndup(su->sun_path,
+ sizeof(su->sun_path));
+ }
+
+ return addr;
+}
+#endif /* WIN32 */
+
+static SocketAddress *
+socket_sockaddr_to_address(struct sockaddr_storage *sa,
+ socklen_t salen,
+ Error **errp)
+{
+ switch (sa->ss_family) {
+ case AF_INET:
+ case AF_INET6:
+ return socket_sockaddr_to_address_inet(sa, salen, errp);
+
+#ifndef WIN32
+ case AF_UNIX:
+ return socket_sockaddr_to_address_unix(sa, salen, errp);
+#endif /* WIN32 */
+
+ default:
+ error_setg(errp, "socket family %d unsupported",
+ sa->ss_family);
+ return NULL;
+ }
+ return 0;
+}
+
+
+SocketAddress *socket_local_address(int fd, Error **errp)
+{
+ struct sockaddr_storage ss;
+ socklen_t sslen = sizeof(ss);
+
+ if (getsockname(fd, (struct sockaddr *)&ss, &sslen) < 0) {
+ error_setg_errno(errp, socket_error(), "%s",
+ "Unable to query local socket address");
+ return NULL;
+ }
+
+ return socket_sockaddr_to_address(&ss, sslen, errp);
+}
+
+
+SocketAddress *socket_remote_address(int fd, Error **errp)
+{
+ struct sockaddr_storage ss;
+ socklen_t sslen = sizeof(ss);
+
+ if (getpeername(fd, (struct sockaddr *)&ss, &sslen) < 0) {
+ error_setg_errno(errp, socket_error(), "%s",
+ "Unable to query remote socket address");
+ return NULL;
+ }
+
+ return socket_sockaddr_to_address(&ss, sslen, errp);
+}
+
+
+void qapi_copy_SocketAddress(SocketAddress **p_dest,
+ SocketAddress *src)
+{
+ QmpOutputVisitor *qov;
+ QmpInputVisitor *qiv;
+ Visitor *ov, *iv;
+ QObject *obj;
+
+ *p_dest = NULL;
+
+ qov = qmp_output_visitor_new();
+ ov = qmp_output_get_visitor(qov);
+ visit_type_SocketAddress(ov, &src, NULL, &error_abort);
+ obj = qmp_output_get_qobject(qov);
+ qmp_output_visitor_cleanup(qov);
+ if (!obj) {
+ return;
+ }
+
+ qiv = qmp_input_visitor_new(obj);
+ iv = qmp_input_get_visitor(qiv);
+ visit_type_SocketAddress(iv, p_dest, NULL, &error_abort);
+ qmp_input_visitor_cleanup(qiv);
+ qobject_decref(obj);
+}
diff --git a/src/util/qemu-thread-posix.c b/src/util/qemu-thread-posix.c
new file mode 100644
index 0000000..dbd8094
--- /dev/null
+++ b/src/util/qemu-thread-posix.c
@@ -0,0 +1,518 @@
+/*
+ * Wrappers around mutex/cond/thread functions
+ *
+ * Copyright Red Hat, Inc. 2009
+ *
+ * Author:
+ * Marcelo Tosatti <mtosatti@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <time.h>
+#include <signal.h>
+#include <stdint.h>
+#include <string.h>
+#include <limits.h>
+#include <unistd.h>
+#include <sys/time.h>
+#ifdef __linux__
+#include <sys/syscall.h>
+#include <linux/futex.h>
+#endif
+#include "qemu/thread.h"
+#include "qemu/atomic.h"
+#include "qemu/notify.h"
+
+static bool name_threads;
+
+void qemu_thread_naming(bool enable)
+{
+ name_threads = enable;
+
+#ifndef CONFIG_THREAD_SETNAME_BYTHREAD
+ /* This is a debugging option, not fatal */
+ if (enable) {
+ fprintf(stderr, "qemu: thread naming not supported on this host\n");
+ }
+#endif
+}
+
+static void error_exit(int err, const char *msg)
+{
+ fprintf(stderr, "qemu: %s: %s\n", msg, strerror(err));
+ abort();
+}
+
+void qemu_mutex_init(QemuMutex *mutex)
+{
+ int err;
+
+ err = pthread_mutex_init(&mutex->lock, NULL);
+ if (err)
+ error_exit(err, __func__);
+}
+
+void qemu_mutex_destroy(QemuMutex *mutex)
+{
+ int err;
+
+ err = pthread_mutex_destroy(&mutex->lock);
+ if (err)
+ error_exit(err, __func__);
+}
+
+void qemu_mutex_lock(QemuMutex *mutex)
+{
+ int err;
+
+ err = pthread_mutex_lock(&mutex->lock);
+ if (err)
+ error_exit(err, __func__);
+}
+
+int qemu_mutex_trylock(QemuMutex *mutex)
+{
+ return pthread_mutex_trylock(&mutex->lock);
+}
+
+void qemu_mutex_unlock(QemuMutex *mutex)
+{
+ int err;
+
+ err = pthread_mutex_unlock(&mutex->lock);
+ if (err)
+ error_exit(err, __func__);
+}
+
+void qemu_cond_init(QemuCond *cond)
+{
+ int err;
+
+ err = pthread_cond_init(&cond->cond, NULL);
+ if (err)
+ error_exit(err, __func__);
+}
+
+void qemu_cond_destroy(QemuCond *cond)
+{
+ int err;
+
+ err = pthread_cond_destroy(&cond->cond);
+ if (err)
+ error_exit(err, __func__);
+}
+
+void qemu_cond_signal(QemuCond *cond)
+{
+ int err;
+
+ err = pthread_cond_signal(&cond->cond);
+ if (err)
+ error_exit(err, __func__);
+}
+
+void qemu_cond_broadcast(QemuCond *cond)
+{
+ int err;
+
+ err = pthread_cond_broadcast(&cond->cond);
+ if (err)
+ error_exit(err, __func__);
+}
+
+void qemu_cond_wait(QemuCond *cond, QemuMutex *mutex)
+{
+ int err;
+
+ err = pthread_cond_wait(&cond->cond, &mutex->lock);
+ if (err)
+ error_exit(err, __func__);
+}
+
+void qemu_sem_init(QemuSemaphore *sem, int init)
+{
+ int rc;
+
+#if defined(__APPLE__) || defined(__NetBSD__)
+ rc = pthread_mutex_init(&sem->lock, NULL);
+ if (rc != 0) {
+ error_exit(rc, __func__);
+ }
+ rc = pthread_cond_init(&sem->cond, NULL);
+ if (rc != 0) {
+ error_exit(rc, __func__);
+ }
+ if (init < 0) {
+ error_exit(EINVAL, __func__);
+ }
+ sem->count = init;
+#else
+ rc = sem_init(&sem->sem, 0, init);
+ if (rc < 0) {
+ error_exit(errno, __func__);
+ }
+#endif
+}
+
+void qemu_sem_destroy(QemuSemaphore *sem)
+{
+ int rc;
+
+#if defined(__APPLE__) || defined(__NetBSD__)
+ rc = pthread_cond_destroy(&sem->cond);
+ if (rc < 0) {
+ error_exit(rc, __func__);
+ }
+ rc = pthread_mutex_destroy(&sem->lock);
+ if (rc < 0) {
+ error_exit(rc, __func__);
+ }
+#else
+ rc = sem_destroy(&sem->sem);
+ if (rc < 0) {
+ error_exit(errno, __func__);
+ }
+#endif
+}
+
+void qemu_sem_post(QemuSemaphore *sem)
+{
+ int rc;
+
+#if defined(__APPLE__) || defined(__NetBSD__)
+ pthread_mutex_lock(&sem->lock);
+ if (sem->count == UINT_MAX) {
+ rc = EINVAL;
+ } else {
+ sem->count++;
+ rc = pthread_cond_signal(&sem->cond);
+ }
+ pthread_mutex_unlock(&sem->lock);
+ if (rc != 0) {
+ error_exit(rc, __func__);
+ }
+#else
+ rc = sem_post(&sem->sem);
+ if (rc < 0) {
+ error_exit(errno, __func__);
+ }
+#endif
+}
+
+static void compute_abs_deadline(struct timespec *ts, int ms)
+{
+ struct timeval tv;
+ gettimeofday(&tv, NULL);
+ ts->tv_nsec = tv.tv_usec * 1000 + (ms % 1000) * 1000000;
+ ts->tv_sec = tv.tv_sec + ms / 1000;
+ if (ts->tv_nsec >= 1000000000) {
+ ts->tv_sec++;
+ ts->tv_nsec -= 1000000000;
+ }
+}
+
+int qemu_sem_timedwait(QemuSemaphore *sem, int ms)
+{
+ int rc;
+ struct timespec ts;
+
+#if defined(__APPLE__) || defined(__NetBSD__)
+ rc = 0;
+ compute_abs_deadline(&ts, ms);
+ pthread_mutex_lock(&sem->lock);
+ while (sem->count == 0) {
+ rc = pthread_cond_timedwait(&sem->cond, &sem->lock, &ts);
+ if (rc == ETIMEDOUT) {
+ break;
+ }
+ if (rc != 0) {
+ error_exit(rc, __func__);
+ }
+ }
+ if (rc != ETIMEDOUT) {
+ --sem->count;
+ }
+ pthread_mutex_unlock(&sem->lock);
+ return (rc == ETIMEDOUT ? -1 : 0);
+#else
+ if (ms <= 0) {
+ /* This is cheaper than sem_timedwait. */
+ do {
+ rc = sem_trywait(&sem->sem);
+ } while (rc == -1 && errno == EINTR);
+ if (rc == -1 && errno == EAGAIN) {
+ return -1;
+ }
+ } else {
+ compute_abs_deadline(&ts, ms);
+ do {
+ rc = sem_timedwait(&sem->sem, &ts);
+ } while (rc == -1 && errno == EINTR);
+ if (rc == -1 && errno == ETIMEDOUT) {
+ return -1;
+ }
+ }
+ if (rc < 0) {
+ error_exit(errno, __func__);
+ }
+ return 0;
+#endif
+}
+
+void qemu_sem_wait(QemuSemaphore *sem)
+{
+ int rc;
+
+#if defined(__APPLE__) || defined(__NetBSD__)
+ pthread_mutex_lock(&sem->lock);
+ while (sem->count == 0) {
+ rc = pthread_cond_wait(&sem->cond, &sem->lock);
+ if (rc != 0) {
+ error_exit(rc, __func__);
+ }
+ }
+ --sem->count;
+ pthread_mutex_unlock(&sem->lock);
+#else
+ do {
+ rc = sem_wait(&sem->sem);
+ } while (rc == -1 && errno == EINTR);
+ if (rc < 0) {
+ error_exit(errno, __func__);
+ }
+#endif
+}
+
+#ifdef __linux__
+#define futex(...) syscall(__NR_futex, __VA_ARGS__)
+
+static inline void futex_wake(QemuEvent *ev, int n)
+{
+ futex(ev, FUTEX_WAKE, n, NULL, NULL, 0);
+}
+
+static inline void futex_wait(QemuEvent *ev, unsigned val)
+{
+ while (futex(ev, FUTEX_WAIT, (int) val, NULL, NULL, 0)) {
+ switch (errno) {
+ case EWOULDBLOCK:
+ return;
+ case EINTR:
+ break; /* get out of switch and retry */
+ default:
+ abort();
+ }
+ }
+}
+#else
+static inline void futex_wake(QemuEvent *ev, int n)
+{
+ pthread_mutex_lock(&ev->lock);
+ if (n == 1) {
+ pthread_cond_signal(&ev->cond);
+ } else {
+ pthread_cond_broadcast(&ev->cond);
+ }
+ pthread_mutex_unlock(&ev->lock);
+}
+
+static inline void futex_wait(QemuEvent *ev, unsigned val)
+{
+ pthread_mutex_lock(&ev->lock);
+ if (ev->value == val) {
+ pthread_cond_wait(&ev->cond, &ev->lock);
+ }
+ pthread_mutex_unlock(&ev->lock);
+}
+#endif
+
+/* Valid transitions:
+ * - free->set, when setting the event
+ * - busy->set, when setting the event, followed by futex_wake
+ * - set->free, when resetting the event
+ * - free->busy, when waiting
+ *
+ * set->busy does not happen (it can be observed from the outside but
+ * it really is set->free->busy).
+ *
+ * busy->free provably cannot happen; to enforce it, the set->free transition
+ * is done with an OR, which becomes a no-op if the event has concurrently
+ * transitioned to free or busy.
+ */
+
+#define EV_SET 0
+#define EV_FREE 1
+#define EV_BUSY -1
+
+void qemu_event_init(QemuEvent *ev, bool init)
+{
+#ifndef __linux__
+ pthread_mutex_init(&ev->lock, NULL);
+ pthread_cond_init(&ev->cond, NULL);
+#endif
+
+ ev->value = (init ? EV_SET : EV_FREE);
+}
+
+void qemu_event_destroy(QemuEvent *ev)
+{
+#ifndef __linux__
+ pthread_mutex_destroy(&ev->lock);
+ pthread_cond_destroy(&ev->cond);
+#endif
+}
+
+void qemu_event_set(QemuEvent *ev)
+{
+ if (atomic_mb_read(&ev->value) != EV_SET) {
+ if (atomic_xchg(&ev->value, EV_SET) == EV_BUSY) {
+ /* There were waiters, wake them up. */
+ futex_wake(ev, INT_MAX);
+ }
+ }
+}
+
+void qemu_event_reset(QemuEvent *ev)
+{
+ if (atomic_mb_read(&ev->value) == EV_SET) {
+ /*
+ * If there was a concurrent reset (or even reset+wait),
+ * do nothing. Otherwise change EV_SET->EV_FREE.
+ */
+ atomic_or(&ev->value, EV_FREE);
+ }
+}
+
+void qemu_event_wait(QemuEvent *ev)
+{
+ unsigned value;
+
+ value = atomic_mb_read(&ev->value);
+ if (value != EV_SET) {
+ if (value == EV_FREE) {
+ /*
+ * Leave the event reset and tell qemu_event_set that there
+ * are waiters. No need to retry, because there cannot be
+ * a concurrent busy->free transition. After the CAS, the
+ * event will be either set or busy.
+ */
+ if (atomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) {
+ return;
+ }
+ }
+ futex_wait(ev, EV_BUSY);
+ }
+}
+
+static pthread_key_t exit_key;
+
+union NotifierThreadData {
+ void *ptr;
+ NotifierList list;
+};
+QEMU_BUILD_BUG_ON(sizeof(union NotifierThreadData) != sizeof(void *));
+
+void qemu_thread_atexit_add(Notifier *notifier)
+{
+ union NotifierThreadData ntd;
+ ntd.ptr = pthread_getspecific(exit_key);
+ notifier_list_add(&ntd.list, notifier);
+ pthread_setspecific(exit_key, ntd.ptr);
+}
+
+void qemu_thread_atexit_remove(Notifier *notifier)
+{
+ union NotifierThreadData ntd;
+ ntd.ptr = pthread_getspecific(exit_key);
+ notifier_remove(notifier);
+ pthread_setspecific(exit_key, ntd.ptr);
+}
+
+static void qemu_thread_atexit_run(void *arg)
+{
+ union NotifierThreadData ntd = { .ptr = arg };
+ notifier_list_notify(&ntd.list, NULL);
+}
+
+static void __attribute__((constructor)) qemu_thread_atexit_init(void)
+{
+ pthread_key_create(&exit_key, qemu_thread_atexit_run);
+}
+
+
+/* Attempt to set the threads name; note that this is for debug, so
+ * we're not going to fail if we can't set it.
+ */
+static void qemu_thread_set_name(QemuThread *thread, const char *name)
+{
+#ifdef CONFIG_PTHREAD_SETNAME_NP
+ pthread_setname_np(thread->thread, name);
+#endif
+}
+
+void qemu_thread_create(QemuThread *thread, const char *name,
+ void *(*start_routine)(void*),
+ void *arg, int mode)
+{
+ sigset_t set, oldset;
+ int err;
+ pthread_attr_t attr;
+
+ err = pthread_attr_init(&attr);
+ if (err) {
+ error_exit(err, __func__);
+ }
+ if (mode == QEMU_THREAD_DETACHED) {
+ err = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+ if (err) {
+ error_exit(err, __func__);
+ }
+ }
+
+ /* Leave signal handling to the iothread. */
+ sigfillset(&set);
+ pthread_sigmask(SIG_SETMASK, &set, &oldset);
+ err = pthread_create(&thread->thread, &attr, start_routine, arg);
+ if (err)
+ error_exit(err, __func__);
+
+ if (name_threads) {
+ qemu_thread_set_name(thread, name);
+ }
+
+ pthread_sigmask(SIG_SETMASK, &oldset, NULL);
+
+ pthread_attr_destroy(&attr);
+}
+
+void qemu_thread_get_self(QemuThread *thread)
+{
+ thread->thread = pthread_self();
+}
+
+bool qemu_thread_is_self(QemuThread *thread)
+{
+ return pthread_equal(pthread_self(), thread->thread);
+}
+
+void qemu_thread_exit(void *retval)
+{
+ pthread_exit(retval);
+}
+
+void *qemu_thread_join(QemuThread *thread)
+{
+ int err;
+ void *ret;
+
+ err = pthread_join(thread->thread, &ret);
+ if (err) {
+ error_exit(err, __func__);
+ }
+ return ret;
+}
diff --git a/src/util/qemu-thread-win32.c b/src/util/qemu-thread-win32.c
new file mode 100644
index 0000000..6cdd553
--- /dev/null
+++ b/src/util/qemu-thread-win32.c
@@ -0,0 +1,479 @@
+/*
+ * Win32 implementation for mutex/cond/thread functions
+ *
+ * Copyright Red Hat, Inc. 2010
+ *
+ * Author:
+ * Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+#include "qemu-common.h"
+#include "qemu/thread.h"
+#include "qemu/notify.h"
+#include <process.h>
+#include <assert.h>
+#include <limits.h>
+
+static bool name_threads;
+
+void qemu_thread_naming(bool enable)
+{
+ /* But note we don't actually name them on Windows yet */
+ name_threads = enable;
+
+ fprintf(stderr, "qemu: thread naming not supported on this host\n");
+}
+
+static void error_exit(int err, const char *msg)
+{
+ char *pstr;
+
+ FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_ALLOCATE_BUFFER,
+ NULL, err, 0, (LPTSTR)&pstr, 2, NULL);
+ fprintf(stderr, "qemu: %s: %s\n", msg, pstr);
+ LocalFree(pstr);
+ abort();
+}
+
+void qemu_mutex_init(QemuMutex *mutex)
+{
+ mutex->owner = 0;
+ InitializeCriticalSection(&mutex->lock);
+}
+
+void qemu_mutex_destroy(QemuMutex *mutex)
+{
+ assert(mutex->owner == 0);
+ DeleteCriticalSection(&mutex->lock);
+}
+
+void qemu_mutex_lock(QemuMutex *mutex)
+{
+ EnterCriticalSection(&mutex->lock);
+
+ /* Win32 CRITICAL_SECTIONs are recursive. Assert that we're not
+ * using them as such.
+ */
+ assert(mutex->owner == 0);
+ mutex->owner = GetCurrentThreadId();
+}
+
+int qemu_mutex_trylock(QemuMutex *mutex)
+{
+ int owned;
+
+ owned = TryEnterCriticalSection(&mutex->lock);
+ if (owned) {
+ assert(mutex->owner == 0);
+ mutex->owner = GetCurrentThreadId();
+ }
+ return !owned;
+}
+
+void qemu_mutex_unlock(QemuMutex *mutex)
+{
+ assert(mutex->owner == GetCurrentThreadId());
+ mutex->owner = 0;
+ LeaveCriticalSection(&mutex->lock);
+}
+
+void qemu_cond_init(QemuCond *cond)
+{
+ memset(cond, 0, sizeof(*cond));
+
+ cond->sema = CreateSemaphore(NULL, 0, LONG_MAX, NULL);
+ if (!cond->sema) {
+ error_exit(GetLastError(), __func__);
+ }
+ cond->continue_event = CreateEvent(NULL, /* security */
+ FALSE, /* auto-reset */
+ FALSE, /* not signaled */
+ NULL); /* name */
+ if (!cond->continue_event) {
+ error_exit(GetLastError(), __func__);
+ }
+}
+
+void qemu_cond_destroy(QemuCond *cond)
+{
+ BOOL result;
+ result = CloseHandle(cond->continue_event);
+ if (!result) {
+ error_exit(GetLastError(), __func__);
+ }
+ cond->continue_event = 0;
+ result = CloseHandle(cond->sema);
+ if (!result) {
+ error_exit(GetLastError(), __func__);
+ }
+ cond->sema = 0;
+}
+
+void qemu_cond_signal(QemuCond *cond)
+{
+ DWORD result;
+
+ /*
+ * Signal only when there are waiters. cond->waiters is
+ * incremented by pthread_cond_wait under the external lock,
+ * so we are safe about that.
+ */
+ if (cond->waiters == 0) {
+ return;
+ }
+
+ /*
+ * Waiting threads decrement it outside the external lock, but
+ * only if another thread is executing pthread_cond_broadcast and
+ * has the mutex. So, it also cannot be decremented concurrently
+ * with this particular access.
+ */
+ cond->target = cond->waiters - 1;
+ result = SignalObjectAndWait(cond->sema, cond->continue_event,
+ INFINITE, FALSE);
+ if (result == WAIT_ABANDONED || result == WAIT_FAILED) {
+ error_exit(GetLastError(), __func__);
+ }
+}
+
+void qemu_cond_broadcast(QemuCond *cond)
+{
+ BOOLEAN result;
+ /*
+ * As in pthread_cond_signal, access to cond->waiters and
+ * cond->target is locked via the external mutex.
+ */
+ if (cond->waiters == 0) {
+ return;
+ }
+
+ cond->target = 0;
+ result = ReleaseSemaphore(cond->sema, cond->waiters, NULL);
+ if (!result) {
+ error_exit(GetLastError(), __func__);
+ }
+
+ /*
+ * At this point all waiters continue. Each one takes its
+ * slice of the semaphore. Now it's our turn to wait: Since
+ * the external mutex is held, no thread can leave cond_wait,
+ * yet. For this reason, we can be sure that no thread gets
+ * a chance to eat *more* than one slice. OTOH, it means
+ * that the last waiter must send us a wake-up.
+ */
+ WaitForSingleObject(cond->continue_event, INFINITE);
+}
+
+void qemu_cond_wait(QemuCond *cond, QemuMutex *mutex)
+{
+ /*
+ * This access is protected under the mutex.
+ */
+ cond->waiters++;
+
+ /*
+ * Unlock external mutex and wait for signal.
+ * NOTE: we've held mutex locked long enough to increment
+ * waiters count above, so there's no problem with
+ * leaving mutex unlocked before we wait on semaphore.
+ */
+ qemu_mutex_unlock(mutex);
+ WaitForSingleObject(cond->sema, INFINITE);
+
+ /* Now waiters must rendez-vous with the signaling thread and
+ * let it continue. For cond_broadcast this has heavy contention
+ * and triggers thundering herd. So goes life.
+ *
+ * Decrease waiters count. The mutex is not taken, so we have
+ * to do this atomically.
+ *
+ * All waiters contend for the mutex at the end of this function
+ * until the signaling thread relinquishes it. To ensure
+ * each waiter consumes exactly one slice of the semaphore,
+ * the signaling thread stops until it is told by the last
+ * waiter that it can go on.
+ */
+ if (InterlockedDecrement(&cond->waiters) == cond->target) {
+ SetEvent(cond->continue_event);
+ }
+
+ qemu_mutex_lock(mutex);
+}
+
+void qemu_sem_init(QemuSemaphore *sem, int init)
+{
+ /* Manual reset. */
+ sem->sema = CreateSemaphore(NULL, init, LONG_MAX, NULL);
+}
+
+void qemu_sem_destroy(QemuSemaphore *sem)
+{
+ CloseHandle(sem->sema);
+}
+
+void qemu_sem_post(QemuSemaphore *sem)
+{
+ ReleaseSemaphore(sem->sema, 1, NULL);
+}
+
+int qemu_sem_timedwait(QemuSemaphore *sem, int ms)
+{
+ int rc = WaitForSingleObject(sem->sema, ms);
+ if (rc == WAIT_OBJECT_0) {
+ return 0;
+ }
+ if (rc != WAIT_TIMEOUT) {
+ error_exit(GetLastError(), __func__);
+ }
+ return -1;
+}
+
+void qemu_sem_wait(QemuSemaphore *sem)
+{
+ if (WaitForSingleObject(sem->sema, INFINITE) != WAIT_OBJECT_0) {
+ error_exit(GetLastError(), __func__);
+ }
+}
+
+/* Wrap a Win32 manual-reset event with a fast userspace path. The idea
+ * is to reset the Win32 event lazily, as part of a test-reset-test-wait
+ * sequence. Such a sequence is, indeed, how QemuEvents are used by
+ * RCU and other subsystems!
+ *
+ * Valid transitions:
+ * - free->set, when setting the event
+ * - busy->set, when setting the event, followed by futex_wake
+ * - set->free, when resetting the event
+ * - free->busy, when waiting
+ *
+ * set->busy does not happen (it can be observed from the outside but
+ * it really is set->free->busy).
+ *
+ * busy->free provably cannot happen; to enforce it, the set->free transition
+ * is done with an OR, which becomes a no-op if the event has concurrently
+ * transitioned to free or busy (and is faster than cmpxchg).
+ */
+
+#define EV_SET 0
+#define EV_FREE 1
+#define EV_BUSY -1
+
+void qemu_event_init(QemuEvent *ev, bool init)
+{
+ /* Manual reset. */
+ ev->event = CreateEvent(NULL, TRUE, TRUE, NULL);
+ ev->value = (init ? EV_SET : EV_FREE);
+}
+
+void qemu_event_destroy(QemuEvent *ev)
+{
+ CloseHandle(ev->event);
+}
+
+void qemu_event_set(QemuEvent *ev)
+{
+ if (atomic_mb_read(&ev->value) != EV_SET) {
+ if (atomic_xchg(&ev->value, EV_SET) == EV_BUSY) {
+ /* There were waiters, wake them up. */
+ SetEvent(ev->event);
+ }
+ }
+}
+
+void qemu_event_reset(QemuEvent *ev)
+{
+ if (atomic_mb_read(&ev->value) == EV_SET) {
+ /* If there was a concurrent reset (or even reset+wait),
+ * do nothing. Otherwise change EV_SET->EV_FREE.
+ */
+ atomic_or(&ev->value, EV_FREE);
+ }
+}
+
+void qemu_event_wait(QemuEvent *ev)
+{
+ unsigned value;
+
+ value = atomic_mb_read(&ev->value);
+ if (value != EV_SET) {
+ if (value == EV_FREE) {
+ /* qemu_event_set is not yet going to call SetEvent, but we are
+ * going to do another check for EV_SET below when setting EV_BUSY.
+ * At that point it is safe to call WaitForSingleObject.
+ */
+ ResetEvent(ev->event);
+
+ /* Tell qemu_event_set that there are waiters. No need to retry
+ * because there cannot be a concurent busy->free transition.
+ * After the CAS, the event will be either set or busy.
+ */
+ if (atomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) {
+ value = EV_SET;
+ } else {
+ value = EV_BUSY;
+ }
+ }
+ if (value == EV_BUSY) {
+ WaitForSingleObject(ev->event, INFINITE);
+ }
+ }
+}
+
+struct QemuThreadData {
+ /* Passed to win32_start_routine. */
+ void *(*start_routine)(void *);
+ void *arg;
+ short mode;
+ NotifierList exit;
+
+ /* Only used for joinable threads. */
+ bool exited;
+ void *ret;
+ CRITICAL_SECTION cs;
+};
+
+static bool atexit_registered;
+static NotifierList main_thread_exit;
+
+static __thread QemuThreadData *qemu_thread_data;
+
+static void run_main_thread_exit(void)
+{
+ notifier_list_notify(&main_thread_exit, NULL);
+}
+
+void qemu_thread_atexit_add(Notifier *notifier)
+{
+ if (!qemu_thread_data) {
+ if (!atexit_registered) {
+ atexit_registered = true;
+ atexit(run_main_thread_exit);
+ }
+ notifier_list_add(&main_thread_exit, notifier);
+ } else {
+ notifier_list_add(&qemu_thread_data->exit, notifier);
+ }
+}
+
+void qemu_thread_atexit_remove(Notifier *notifier)
+{
+ notifier_remove(notifier);
+}
+
+static unsigned __stdcall win32_start_routine(void *arg)
+{
+ QemuThreadData *data = (QemuThreadData *) arg;
+ void *(*start_routine)(void *) = data->start_routine;
+ void *thread_arg = data->arg;
+
+ qemu_thread_data = data;
+ qemu_thread_exit(start_routine(thread_arg));
+ abort();
+}
+
+void qemu_thread_exit(void *arg)
+{
+ QemuThreadData *data = qemu_thread_data;
+
+ notifier_list_notify(&data->exit, NULL);
+ if (data->mode == QEMU_THREAD_JOINABLE) {
+ data->ret = arg;
+ EnterCriticalSection(&data->cs);
+ data->exited = true;
+ LeaveCriticalSection(&data->cs);
+ } else {
+ g_free(data);
+ }
+ _endthreadex(0);
+}
+
+void *qemu_thread_join(QemuThread *thread)
+{
+ QemuThreadData *data;
+ void *ret;
+ HANDLE handle;
+
+ data = thread->data;
+ if (data->mode == QEMU_THREAD_DETACHED) {
+ return NULL;
+ }
+
+ /*
+ * Because multiple copies of the QemuThread can exist via
+ * qemu_thread_get_self, we need to store a value that cannot
+ * leak there. The simplest, non racy way is to store the TID,
+ * discard the handle that _beginthreadex gives back, and
+ * get another copy of the handle here.
+ */
+ handle = qemu_thread_get_handle(thread);
+ if (handle) {
+ WaitForSingleObject(handle, INFINITE);
+ CloseHandle(handle);
+ }
+ ret = data->ret;
+ DeleteCriticalSection(&data->cs);
+ g_free(data);
+ return ret;
+}
+
+void qemu_thread_create(QemuThread *thread, const char *name,
+ void *(*start_routine)(void *),
+ void *arg, int mode)
+{
+ HANDLE hThread;
+ struct QemuThreadData *data;
+
+ data = g_malloc(sizeof *data);
+ data->start_routine = start_routine;
+ data->arg = arg;
+ data->mode = mode;
+ data->exited = false;
+ notifier_list_init(&data->exit);
+
+ if (data->mode != QEMU_THREAD_DETACHED) {
+ InitializeCriticalSection(&data->cs);
+ }
+
+ hThread = (HANDLE) _beginthreadex(NULL, 0, win32_start_routine,
+ data, 0, &thread->tid);
+ if (!hThread) {
+ error_exit(GetLastError(), __func__);
+ }
+ CloseHandle(hThread);
+ thread->data = data;
+}
+
+void qemu_thread_get_self(QemuThread *thread)
+{
+ thread->data = qemu_thread_data;
+ thread->tid = GetCurrentThreadId();
+}
+
+HANDLE qemu_thread_get_handle(QemuThread *thread)
+{
+ QemuThreadData *data;
+ HANDLE handle;
+
+ data = thread->data;
+ if (data->mode == QEMU_THREAD_DETACHED) {
+ return NULL;
+ }
+
+ EnterCriticalSection(&data->cs);
+ if (!data->exited) {
+ handle = OpenThread(SYNCHRONIZE | THREAD_SUSPEND_RESUME, FALSE,
+ thread->tid);
+ } else {
+ handle = NULL;
+ }
+ LeaveCriticalSection(&data->cs);
+ return handle;
+}
+
+bool qemu_thread_is_self(QemuThread *thread)
+{
+ return GetCurrentThreadId() == thread->tid;
+}
diff --git a/src/util/qemu-timer-common.c b/src/util/qemu-timer-common.c
new file mode 100644
index 0000000..95e0847
--- /dev/null
+++ b/src/util/qemu-timer-common.c
@@ -0,0 +1,61 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/timer.h"
+
+/***********************************************************/
+/* real time host monotonic timer */
+
+#ifdef _WIN32
+
+int64_t clock_freq;
+
+static void __attribute__((constructor)) init_get_clock(void)
+{
+ LARGE_INTEGER freq;
+ int ret;
+ ret = QueryPerformanceFrequency(&freq);
+ if (ret == 0) {
+ fprintf(stderr, "Could not calibrate ticks\n");
+ exit(1);
+ }
+ clock_freq = freq.QuadPart;
+}
+
+#else
+
+int use_rt_clock;
+
+static void __attribute__((constructor)) init_get_clock(void)
+{
+ use_rt_clock = 0;
+#ifdef CLOCK_MONOTONIC
+ {
+ struct timespec ts;
+ if (clock_gettime(CLOCK_MONOTONIC, &ts) == 0) {
+ use_rt_clock = 1;
+ }
+ }
+#endif
+}
+#endif
diff --git a/src/util/rcu.c b/src/util/rcu.c
new file mode 100644
index 0000000..8ba304d
--- /dev/null
+++ b/src/util/rcu.c
@@ -0,0 +1,352 @@
+/*
+ * urcu-mb.c
+ *
+ * Userspace RCU library with explicit memory barriers
+ *
+ * Copyright (c) 2009 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ * Copyright (c) 2009 Paul E. McKenney, IBM Corporation.
+ * Copyright 2015 Red Hat, Inc.
+ *
+ * Ported to QEMU by Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * IBM's contributions to this file may be relicensed under LGPLv2 or later.
+ */
+
+#include "qemu-common.h"
+#include <stdio.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <errno.h>
+#include "qemu/rcu.h"
+#include "qemu/atomic.h"
+#include "qemu/thread.h"
+#include "qemu/main-loop.h"
+
+/*
+ * Global grace period counter. Bit 0 is always one in rcu_gp_ctr.
+ * Bits 1 and above are defined in synchronize_rcu.
+ */
+#define RCU_GP_LOCKED (1UL << 0)
+#define RCU_GP_CTR (1UL << 1)
+
+unsigned long rcu_gp_ctr = RCU_GP_LOCKED;
+
+QemuEvent rcu_gp_event;
+static QemuMutex rcu_registry_lock;
+static QemuMutex rcu_sync_lock;
+
+/*
+ * Check whether a quiescent state was crossed between the beginning of
+ * update_counter_and_wait and now.
+ */
+static inline int rcu_gp_ongoing(unsigned long *ctr)
+{
+ unsigned long v;
+
+ v = atomic_read(ctr);
+ return v && (v != rcu_gp_ctr);
+}
+
+/* Written to only by each individual reader. Read by both the reader and the
+ * writers.
+ */
+__thread struct rcu_reader_data rcu_reader;
+
+/* Protected by rcu_registry_lock. */
+typedef QLIST_HEAD(, rcu_reader_data) ThreadList;
+static ThreadList registry = QLIST_HEAD_INITIALIZER(registry);
+
+/* Wait for previous parity/grace period to be empty of readers. */
+static void wait_for_readers(void)
+{
+ ThreadList qsreaders = QLIST_HEAD_INITIALIZER(qsreaders);
+ struct rcu_reader_data *index, *tmp;
+
+ for (;;) {
+ /* We want to be notified of changes made to rcu_gp_ongoing
+ * while we walk the list.
+ */
+ qemu_event_reset(&rcu_gp_event);
+
+ /* Instead of using atomic_mb_set for index->waiting, and
+ * atomic_mb_read for index->ctr, memory barriers are placed
+ * manually since writes to different threads are independent.
+ * atomic_mb_set has a smp_wmb before...
+ */
+ smp_wmb();
+ QLIST_FOREACH(index, &registry, node) {
+ atomic_set(&index->waiting, true);
+ }
+
+ /* ... and a smp_mb after. */
+ smp_mb();
+
+ QLIST_FOREACH_SAFE(index, &registry, node, tmp) {
+ if (!rcu_gp_ongoing(&index->ctr)) {
+ QLIST_REMOVE(index, node);
+ QLIST_INSERT_HEAD(&qsreaders, index, node);
+
+ /* No need for mb_set here, worst of all we
+ * get some extra futex wakeups.
+ */
+ atomic_set(&index->waiting, false);
+ }
+ }
+
+ /* atomic_mb_read has smp_rmb after. */
+ smp_rmb();
+
+ if (QLIST_EMPTY(&registry)) {
+ break;
+ }
+
+ /* Wait for one thread to report a quiescent state and try again.
+ * Release rcu_registry_lock, so rcu_(un)register_thread() doesn't
+ * wait too much time.
+ *
+ * rcu_register_thread() may add nodes to &registry; it will not
+ * wake up synchronize_rcu, but that is okay because at least another
+ * thread must exit its RCU read-side critical section before
+ * synchronize_rcu is done. The next iteration of the loop will
+ * move the new thread's rcu_reader from &registry to &qsreaders,
+ * because rcu_gp_ongoing() will return false.
+ *
+ * rcu_unregister_thread() may remove nodes from &qsreaders instead
+ * of &registry if it runs during qemu_event_wait. That's okay;
+ * the node then will not be added back to &registry by QLIST_SWAP
+ * below. The invariant is that the node is part of one list when
+ * rcu_registry_lock is released.
+ */
+ qemu_mutex_unlock(&rcu_registry_lock);
+ qemu_event_wait(&rcu_gp_event);
+ qemu_mutex_lock(&rcu_registry_lock);
+ }
+
+ /* put back the reader list in the registry */
+ QLIST_SWAP(&registry, &qsreaders, node);
+}
+
+void synchronize_rcu(void)
+{
+ qemu_mutex_lock(&rcu_sync_lock);
+ qemu_mutex_lock(&rcu_registry_lock);
+
+ if (!QLIST_EMPTY(&registry)) {
+ /* In either case, the atomic_mb_set below blocks stores that free
+ * old RCU-protected pointers.
+ */
+ if (sizeof(rcu_gp_ctr) < 8) {
+ /* For architectures with 32-bit longs, a two-subphases algorithm
+ * ensures we do not encounter overflow bugs.
+ *
+ * Switch parity: 0 -> 1, 1 -> 0.
+ */
+ atomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR);
+ wait_for_readers();
+ atomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR);
+ } else {
+ /* Increment current grace period. */
+ atomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr + RCU_GP_CTR);
+ }
+
+ wait_for_readers();
+ }
+
+ qemu_mutex_unlock(&rcu_registry_lock);
+ qemu_mutex_unlock(&rcu_sync_lock);
+}
+
+
+#define RCU_CALL_MIN_SIZE 30
+
+/* Multi-producer, single-consumer queue based on urcu/static/wfqueue.h
+ * from liburcu. Note that head is only used by the consumer.
+ */
+static struct rcu_head dummy;
+static struct rcu_head *head = &dummy, **tail = &dummy.next;
+static int rcu_call_count;
+static QemuEvent rcu_call_ready_event;
+
+static void enqueue(struct rcu_head *node)
+{
+ struct rcu_head **old_tail;
+
+ node->next = NULL;
+ old_tail = atomic_xchg(&tail, &node->next);
+ atomic_mb_set(old_tail, node);
+}
+
+static struct rcu_head *try_dequeue(void)
+{
+ struct rcu_head *node, *next;
+
+retry:
+ /* Test for an empty list, which we do not expect. Note that for
+ * the consumer head and tail are always consistent. The head
+ * is consistent because only the consumer reads/writes it.
+ * The tail, because it is the first step in the enqueuing.
+ * It is only the next pointers that might be inconsistent.
+ */
+ if (head == &dummy && atomic_mb_read(&tail) == &dummy.next) {
+ abort();
+ }
+
+ /* If the head node has NULL in its next pointer, the value is
+ * wrong and we need to wait until its enqueuer finishes the update.
+ */
+ node = head;
+ next = atomic_mb_read(&head->next);
+ if (!next) {
+ return NULL;
+ }
+
+ /* Since we are the sole consumer, and we excluded the empty case
+ * above, the queue will always have at least two nodes: the
+ * dummy node, and the one being removed. So we do not need to update
+ * the tail pointer.
+ */
+ head = next;
+
+ /* If we dequeued the dummy node, add it back at the end and retry. */
+ if (node == &dummy) {
+ enqueue(node);
+ goto retry;
+ }
+
+ return node;
+}
+
+static void *call_rcu_thread(void *opaque)
+{
+ struct rcu_head *node;
+
+ rcu_register_thread();
+
+ for (;;) {
+ int tries = 0;
+ int n = atomic_read(&rcu_call_count);
+
+ /* Heuristically wait for a decent number of callbacks to pile up.
+ * Fetch rcu_call_count now, we only must process elements that were
+ * added before synchronize_rcu() starts.
+ */
+ while (n == 0 || (n < RCU_CALL_MIN_SIZE && ++tries <= 5)) {
+ g_usleep(10000);
+ if (n == 0) {
+ qemu_event_reset(&rcu_call_ready_event);
+ n = atomic_read(&rcu_call_count);
+ if (n == 0) {
+ qemu_event_wait(&rcu_call_ready_event);
+ }
+ }
+ n = atomic_read(&rcu_call_count);
+ }
+
+ atomic_sub(&rcu_call_count, n);
+ synchronize_rcu();
+ qemu_mutex_lock_iothread();
+ while (n > 0) {
+ node = try_dequeue();
+ while (!node) {
+ qemu_mutex_unlock_iothread();
+ qemu_event_reset(&rcu_call_ready_event);
+ node = try_dequeue();
+ if (!node) {
+ qemu_event_wait(&rcu_call_ready_event);
+ node = try_dequeue();
+ }
+ qemu_mutex_lock_iothread();
+ }
+
+ n--;
+ node->func(node);
+ }
+ qemu_mutex_unlock_iothread();
+ }
+ abort();
+}
+
+void call_rcu1(struct rcu_head *node, void (*func)(struct rcu_head *node))
+{
+ node->func = func;
+ enqueue(node);
+ atomic_inc(&rcu_call_count);
+ qemu_event_set(&rcu_call_ready_event);
+}
+
+void rcu_register_thread(void)
+{
+ assert(rcu_reader.ctr == 0);
+ qemu_mutex_lock(&rcu_registry_lock);
+ QLIST_INSERT_HEAD(&registry, &rcu_reader, node);
+ qemu_mutex_unlock(&rcu_registry_lock);
+}
+
+void rcu_unregister_thread(void)
+{
+ qemu_mutex_lock(&rcu_registry_lock);
+ QLIST_REMOVE(&rcu_reader, node);
+ qemu_mutex_unlock(&rcu_registry_lock);
+}
+
+static void rcu_init_complete(void)
+{
+ QemuThread thread;
+
+ qemu_mutex_init(&rcu_registry_lock);
+ qemu_mutex_init(&rcu_sync_lock);
+ qemu_event_init(&rcu_gp_event, true);
+
+ qemu_event_init(&rcu_call_ready_event, false);
+
+ /* The caller is assumed to have iothread lock, so the call_rcu thread
+ * must have been quiescent even after forking, just recreate it.
+ */
+ qemu_thread_create(&thread, "call_rcu", call_rcu_thread,
+ NULL, QEMU_THREAD_DETACHED);
+
+ rcu_register_thread();
+}
+
+#ifdef CONFIG_POSIX
+static void rcu_init_lock(void)
+{
+ qemu_mutex_lock(&rcu_sync_lock);
+ qemu_mutex_lock(&rcu_registry_lock);
+}
+
+static void rcu_init_unlock(void)
+{
+ qemu_mutex_unlock(&rcu_registry_lock);
+ qemu_mutex_unlock(&rcu_sync_lock);
+}
+#endif
+
+void rcu_after_fork(void)
+{
+ memset(&registry, 0, sizeof(registry));
+ rcu_init_complete();
+}
+
+static void __attribute__((__constructor__)) rcu_init(void)
+{
+#ifdef CONFIG_POSIX
+ pthread_atfork(rcu_init_lock, rcu_init_unlock, rcu_init_unlock);
+#endif
+ rcu_init_complete();
+}
diff --git a/src/util/readline.c b/src/util/readline.c
new file mode 100644
index 0000000..cc1302a
--- /dev/null
+++ b/src/util/readline.c
@@ -0,0 +1,515 @@
+/*
+ * QEMU readline utility
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+#include "qemu/readline.h"
+
+#define IS_NORM 0
+#define IS_ESC 1
+#define IS_CSI 2
+#define IS_SS3 3
+
+void readline_show_prompt(ReadLineState *rs)
+{
+ rs->printf_func(rs->opaque, "%s", rs->prompt);
+ rs->flush_func(rs->opaque);
+ rs->last_cmd_buf_index = 0;
+ rs->last_cmd_buf_size = 0;
+ rs->esc_state = IS_NORM;
+}
+
+/* update the displayed command line */
+static void readline_update(ReadLineState *rs)
+{
+ int i, delta, len;
+
+ if (rs->cmd_buf_size != rs->last_cmd_buf_size ||
+ memcmp(rs->cmd_buf, rs->last_cmd_buf, rs->cmd_buf_size) != 0) {
+ for(i = 0; i < rs->last_cmd_buf_index; i++) {
+ rs->printf_func(rs->opaque, "\033[D");
+ }
+ rs->cmd_buf[rs->cmd_buf_size] = '\0';
+ if (rs->read_password) {
+ len = strlen(rs->cmd_buf);
+ for(i = 0; i < len; i++)
+ rs->printf_func(rs->opaque, "*");
+ } else {
+ rs->printf_func(rs->opaque, "%s", rs->cmd_buf);
+ }
+ rs->printf_func(rs->opaque, "\033[K");
+ memcpy(rs->last_cmd_buf, rs->cmd_buf, rs->cmd_buf_size);
+ rs->last_cmd_buf_size = rs->cmd_buf_size;
+ rs->last_cmd_buf_index = rs->cmd_buf_size;
+ }
+ if (rs->cmd_buf_index != rs->last_cmd_buf_index) {
+ delta = rs->cmd_buf_index - rs->last_cmd_buf_index;
+ if (delta > 0) {
+ for(i = 0;i < delta; i++) {
+ rs->printf_func(rs->opaque, "\033[C");
+ }
+ } else {
+ delta = -delta;
+ for(i = 0;i < delta; i++) {
+ rs->printf_func(rs->opaque, "\033[D");
+ }
+ }
+ rs->last_cmd_buf_index = rs->cmd_buf_index;
+ }
+ rs->flush_func(rs->opaque);
+}
+
+static void readline_insert_char(ReadLineState *rs, int ch)
+{
+ if (rs->cmd_buf_index < READLINE_CMD_BUF_SIZE) {
+ memmove(rs->cmd_buf + rs->cmd_buf_index + 1,
+ rs->cmd_buf + rs->cmd_buf_index,
+ rs->cmd_buf_size - rs->cmd_buf_index);
+ rs->cmd_buf[rs->cmd_buf_index] = ch;
+ rs->cmd_buf_size++;
+ rs->cmd_buf_index++;
+ }
+}
+
+static void readline_backward_char(ReadLineState *rs)
+{
+ if (rs->cmd_buf_index > 0) {
+ rs->cmd_buf_index--;
+ }
+}
+
+static void readline_forward_char(ReadLineState *rs)
+{
+ if (rs->cmd_buf_index < rs->cmd_buf_size) {
+ rs->cmd_buf_index++;
+ }
+}
+
+static void readline_delete_char(ReadLineState *rs)
+{
+ if (rs->cmd_buf_index < rs->cmd_buf_size) {
+ memmove(rs->cmd_buf + rs->cmd_buf_index,
+ rs->cmd_buf + rs->cmd_buf_index + 1,
+ rs->cmd_buf_size - rs->cmd_buf_index - 1);
+ rs->cmd_buf_size--;
+ }
+}
+
+static void readline_backspace(ReadLineState *rs)
+{
+ if (rs->cmd_buf_index > 0) {
+ readline_backward_char(rs);
+ readline_delete_char(rs);
+ }
+}
+
+static void readline_backword(ReadLineState *rs)
+{
+ int start;
+
+ if (rs->cmd_buf_index == 0 || rs->cmd_buf_index > rs->cmd_buf_size) {
+ return;
+ }
+
+ start = rs->cmd_buf_index - 1;
+
+ /* find first word (backwards) */
+ while (start > 0) {
+ if (!qemu_isspace(rs->cmd_buf[start])) {
+ break;
+ }
+
+ --start;
+ }
+
+ /* find first space (backwards) */
+ while (start > 0) {
+ if (qemu_isspace(rs->cmd_buf[start])) {
+ ++start;
+ break;
+ }
+
+ --start;
+ }
+
+ /* remove word */
+ if (start < rs->cmd_buf_index) {
+ memmove(rs->cmd_buf + start,
+ rs->cmd_buf + rs->cmd_buf_index,
+ rs->cmd_buf_size - rs->cmd_buf_index);
+ rs->cmd_buf_size -= rs->cmd_buf_index - start;
+ rs->cmd_buf_index = start;
+ }
+}
+
+static void readline_bol(ReadLineState *rs)
+{
+ rs->cmd_buf_index = 0;
+}
+
+static void readline_eol(ReadLineState *rs)
+{
+ rs->cmd_buf_index = rs->cmd_buf_size;
+}
+
+static void readline_up_char(ReadLineState *rs)
+{
+ int idx;
+
+ if (rs->hist_entry == 0)
+ return;
+ if (rs->hist_entry == -1) {
+ /* Find latest entry */
+ for (idx = 0; idx < READLINE_MAX_CMDS; idx++) {
+ if (rs->history[idx] == NULL)
+ break;
+ }
+ rs->hist_entry = idx;
+ }
+ rs->hist_entry--;
+ if (rs->hist_entry >= 0) {
+ pstrcpy(rs->cmd_buf, sizeof(rs->cmd_buf),
+ rs->history[rs->hist_entry]);
+ rs->cmd_buf_index = rs->cmd_buf_size = strlen(rs->cmd_buf);
+ }
+}
+
+static void readline_down_char(ReadLineState *rs)
+{
+ if (rs->hist_entry == -1)
+ return;
+ if (rs->hist_entry < READLINE_MAX_CMDS - 1 &&
+ rs->history[++rs->hist_entry] != NULL) {
+ pstrcpy(rs->cmd_buf, sizeof(rs->cmd_buf),
+ rs->history[rs->hist_entry]);
+ } else {
+ rs->cmd_buf[0] = 0;
+ rs->hist_entry = -1;
+ }
+ rs->cmd_buf_index = rs->cmd_buf_size = strlen(rs->cmd_buf);
+}
+
+static void readline_hist_add(ReadLineState *rs, const char *cmdline)
+{
+ char *hist_entry, *new_entry;
+ int idx;
+
+ if (cmdline[0] == '\0')
+ return;
+ new_entry = NULL;
+ if (rs->hist_entry != -1) {
+ /* We were editing an existing history entry: replace it */
+ hist_entry = rs->history[rs->hist_entry];
+ idx = rs->hist_entry;
+ if (strcmp(hist_entry, cmdline) == 0) {
+ goto same_entry;
+ }
+ }
+ /* Search cmdline in history buffers */
+ for (idx = 0; idx < READLINE_MAX_CMDS; idx++) {
+ hist_entry = rs->history[idx];
+ if (hist_entry == NULL)
+ break;
+ if (strcmp(hist_entry, cmdline) == 0) {
+ same_entry:
+ new_entry = hist_entry;
+ /* Put this entry at the end of history */
+ memmove(&rs->history[idx], &rs->history[idx + 1],
+ (READLINE_MAX_CMDS - (idx + 1)) * sizeof(char *));
+ rs->history[READLINE_MAX_CMDS - 1] = NULL;
+ for (; idx < READLINE_MAX_CMDS; idx++) {
+ if (rs->history[idx] == NULL)
+ break;
+ }
+ break;
+ }
+ }
+ if (idx == READLINE_MAX_CMDS) {
+ /* Need to get one free slot */
+ g_free(rs->history[0]);
+ memmove(rs->history, &rs->history[1],
+ (READLINE_MAX_CMDS - 1) * sizeof(char *));
+ rs->history[READLINE_MAX_CMDS - 1] = NULL;
+ idx = READLINE_MAX_CMDS - 1;
+ }
+ if (new_entry == NULL)
+ new_entry = g_strdup(cmdline);
+ rs->history[idx] = new_entry;
+ rs->hist_entry = -1;
+}
+
+/* completion support */
+
+void readline_add_completion(ReadLineState *rs, const char *str)
+{
+ if (rs->nb_completions < READLINE_MAX_COMPLETIONS) {
+ int i;
+ for (i = 0; i < rs->nb_completions; i++) {
+ if (!strcmp(rs->completions[i], str)) {
+ return;
+ }
+ }
+ rs->completions[rs->nb_completions++] = g_strdup(str);
+ }
+}
+
+void readline_set_completion_index(ReadLineState *rs, int index)
+{
+ rs->completion_index = index;
+}
+
+static int completion_comp(const void *a, const void *b)
+{
+ return strcmp(*(const char **) a, *(const char **) b);
+}
+
+static void readline_completion(ReadLineState *rs)
+{
+ int len, i, j, max_width, nb_cols, max_prefix;
+ char *cmdline;
+
+ rs->nb_completions = 0;
+
+ cmdline = g_strndup(rs->cmd_buf, rs->cmd_buf_index);
+ rs->completion_finder(rs->opaque, cmdline);
+ g_free(cmdline);
+
+ /* no completion found */
+ if (rs->nb_completions <= 0)
+ return;
+ if (rs->nb_completions == 1) {
+ len = strlen(rs->completions[0]);
+ for(i = rs->completion_index; i < len; i++) {
+ readline_insert_char(rs, rs->completions[0][i]);
+ }
+ /* extra space for next argument. XXX: make it more generic */
+ if (len > 0 && rs->completions[0][len - 1] != '/')
+ readline_insert_char(rs, ' ');
+ } else {
+ qsort(rs->completions, rs->nb_completions, sizeof(char *),
+ completion_comp);
+ rs->printf_func(rs->opaque, "\n");
+ max_width = 0;
+ max_prefix = 0;
+ for(i = 0; i < rs->nb_completions; i++) {
+ len = strlen(rs->completions[i]);
+ if (i==0) {
+ max_prefix = len;
+ } else {
+ if (len < max_prefix)
+ max_prefix = len;
+ for(j=0; j<max_prefix; j++) {
+ if (rs->completions[i][j] != rs->completions[0][j])
+ max_prefix = j;
+ }
+ }
+ if (len > max_width)
+ max_width = len;
+ }
+ if (max_prefix > 0)
+ for(i = rs->completion_index; i < max_prefix; i++) {
+ readline_insert_char(rs, rs->completions[0][i]);
+ }
+ max_width += 2;
+ if (max_width < 10)
+ max_width = 10;
+ else if (max_width > 80)
+ max_width = 80;
+ nb_cols = 80 / max_width;
+ j = 0;
+ for(i = 0; i < rs->nb_completions; i++) {
+ rs->printf_func(rs->opaque, "%-*s", max_width, rs->completions[i]);
+ if (++j == nb_cols || i == (rs->nb_completions - 1)) {
+ rs->printf_func(rs->opaque, "\n");
+ j = 0;
+ }
+ }
+ readline_show_prompt(rs);
+ }
+ for (i = 0; i < rs->nb_completions; i++) {
+ g_free(rs->completions[i]);
+ }
+}
+
+static void readline_clear_screen(ReadLineState *rs)
+{
+ rs->printf_func(rs->opaque, "\033[2J\033[1;1H");
+ readline_show_prompt(rs);
+}
+
+/* return true if command handled */
+void readline_handle_byte(ReadLineState *rs, int ch)
+{
+ switch(rs->esc_state) {
+ case IS_NORM:
+ switch(ch) {
+ case 1:
+ readline_bol(rs);
+ break;
+ case 4:
+ readline_delete_char(rs);
+ break;
+ case 5:
+ readline_eol(rs);
+ break;
+ case 9:
+ readline_completion(rs);
+ break;
+ case 12:
+ readline_clear_screen(rs);
+ break;
+ case 10:
+ case 13:
+ rs->cmd_buf[rs->cmd_buf_size] = '\0';
+ if (!rs->read_password)
+ readline_hist_add(rs, rs->cmd_buf);
+ rs->printf_func(rs->opaque, "\n");
+ rs->cmd_buf_index = 0;
+ rs->cmd_buf_size = 0;
+ rs->last_cmd_buf_index = 0;
+ rs->last_cmd_buf_size = 0;
+ rs->readline_func(rs->opaque, rs->cmd_buf, rs->readline_opaque);
+ break;
+ case 23:
+ /* ^W */
+ readline_backword(rs);
+ break;
+ case 27:
+ rs->esc_state = IS_ESC;
+ break;
+ case 127:
+ case 8:
+ readline_backspace(rs);
+ break;
+ case 155:
+ rs->esc_state = IS_CSI;
+ break;
+ default:
+ if (ch >= 32) {
+ readline_insert_char(rs, ch);
+ }
+ break;
+ }
+ break;
+ case IS_ESC:
+ if (ch == '[') {
+ rs->esc_state = IS_CSI;
+ rs->esc_param = 0;
+ } else if (ch == 'O') {
+ rs->esc_state = IS_SS3;
+ rs->esc_param = 0;
+ } else {
+ rs->esc_state = IS_NORM;
+ }
+ break;
+ case IS_CSI:
+ switch(ch) {
+ case 'A':
+ case 'F':
+ readline_up_char(rs);
+ break;
+ case 'B':
+ case 'E':
+ readline_down_char(rs);
+ break;
+ case 'D':
+ readline_backward_char(rs);
+ break;
+ case 'C':
+ readline_forward_char(rs);
+ break;
+ case '0' ... '9':
+ rs->esc_param = rs->esc_param * 10 + (ch - '0');
+ goto the_end;
+ case '~':
+ switch(rs->esc_param) {
+ case 1:
+ readline_bol(rs);
+ break;
+ case 3:
+ readline_delete_char(rs);
+ break;
+ case 4:
+ readline_eol(rs);
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ rs->esc_state = IS_NORM;
+ the_end:
+ break;
+ case IS_SS3:
+ switch(ch) {
+ case 'F':
+ readline_eol(rs);
+ break;
+ case 'H':
+ readline_bol(rs);
+ break;
+ }
+ rs->esc_state = IS_NORM;
+ break;
+ }
+ readline_update(rs);
+}
+
+void readline_start(ReadLineState *rs, const char *prompt, int read_password,
+ ReadLineFunc *readline_func, void *opaque)
+{
+ pstrcpy(rs->prompt, sizeof(rs->prompt), prompt);
+ rs->readline_func = readline_func;
+ rs->readline_opaque = opaque;
+ rs->read_password = read_password;
+ readline_restart(rs);
+}
+
+void readline_restart(ReadLineState *rs)
+{
+ rs->cmd_buf_index = 0;
+ rs->cmd_buf_size = 0;
+}
+
+const char *readline_get_history(ReadLineState *rs, unsigned int index)
+{
+ if (index >= READLINE_MAX_CMDS)
+ return NULL;
+ return rs->history[index];
+}
+
+ReadLineState *readline_init(ReadLinePrintfFunc *printf_func,
+ ReadLineFlushFunc *flush_func,
+ void *opaque,
+ ReadLineCompletionFunc *completion_finder)
+{
+ ReadLineState *rs = g_malloc0(sizeof(*rs));
+
+ rs->hist_entry = -1;
+ rs->opaque = opaque;
+ rs->printf_func = printf_func;
+ rs->flush_func = flush_func;
+ rs->completion_finder = completion_finder;
+
+ return rs;
+}
diff --git a/src/util/rfifolock.c b/src/util/rfifolock.c
new file mode 100644
index 0000000..afbf748
--- /dev/null
+++ b/src/util/rfifolock.c
@@ -0,0 +1,78 @@
+/*
+ * Recursive FIFO lock
+ *
+ * Copyright Red Hat, Inc. 2013
+ *
+ * Authors:
+ * Stefan Hajnoczi <stefanha@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include <assert.h>
+#include "qemu/rfifolock.h"
+
+void rfifolock_init(RFifoLock *r, void (*cb)(void *), void *opaque)
+{
+ qemu_mutex_init(&r->lock);
+ r->head = 0;
+ r->tail = 0;
+ qemu_cond_init(&r->cond);
+ r->nesting = 0;
+ r->cb = cb;
+ r->cb_opaque = opaque;
+}
+
+void rfifolock_destroy(RFifoLock *r)
+{
+ qemu_cond_destroy(&r->cond);
+ qemu_mutex_destroy(&r->lock);
+}
+
+/*
+ * Theory of operation:
+ *
+ * In order to ensure FIFO ordering, implement a ticketlock. Threads acquiring
+ * the lock enqueue themselves by incrementing the tail index. When the lock
+ * is unlocked, the head is incremented and waiting threads are notified.
+ *
+ * Recursive locking does not take a ticket since the head is only incremented
+ * when the outermost recursive caller unlocks.
+ */
+void rfifolock_lock(RFifoLock *r)
+{
+ qemu_mutex_lock(&r->lock);
+
+ /* Take a ticket */
+ unsigned int ticket = r->tail++;
+
+ if (r->nesting > 0 && qemu_thread_is_self(&r->owner_thread)) {
+ r->tail--; /* put ticket back, we're nesting */
+ } else {
+ while (ticket != r->head) {
+ /* Invoke optional contention callback */
+ if (r->cb) {
+ r->cb(r->cb_opaque);
+ }
+ qemu_cond_wait(&r->cond, &r->lock);
+ }
+ }
+
+ qemu_thread_get_self(&r->owner_thread);
+ r->nesting++;
+ qemu_mutex_unlock(&r->lock);
+}
+
+void rfifolock_unlock(RFifoLock *r)
+{
+ qemu_mutex_lock(&r->lock);
+ assert(r->nesting > 0);
+ assert(qemu_thread_is_self(&r->owner_thread));
+ if (--r->nesting == 0) {
+ r->head++;
+ qemu_cond_broadcast(&r->cond);
+ }
+ qemu_mutex_unlock(&r->lock);
+}
diff --git a/src/util/throttle.c b/src/util/throttle.c
new file mode 100644
index 0000000..1113671
--- /dev/null
+++ b/src/util/throttle.c
@@ -0,0 +1,445 @@
+/*
+ * QEMU throttling infrastructure
+ *
+ * Copyright (C) Nodalink, EURL. 2013-2014
+ * Copyright (C) Igalia, S.L. 2015
+ *
+ * Authors:
+ * Benoît Canet <benoit.canet@nodalink.com>
+ * Alberto Garcia <berto@igalia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/throttle.h"
+#include "qemu/timer.h"
+#include "block/aio.h"
+
+/* This function make a bucket leak
+ *
+ * @bkt: the bucket to make leak
+ * @delta_ns: the time delta
+ */
+void throttle_leak_bucket(LeakyBucket *bkt, int64_t delta_ns)
+{
+ double leak;
+
+ /* compute how much to leak */
+ leak = (bkt->avg * (double) delta_ns) / NANOSECONDS_PER_SECOND;
+
+ /* make the bucket leak */
+ bkt->level = MAX(bkt->level - leak, 0);
+}
+
+/* Calculate the time delta since last leak and make proportionals leaks
+ *
+ * @now: the current timestamp in ns
+ */
+static void throttle_do_leak(ThrottleState *ts, int64_t now)
+{
+ /* compute the time elapsed since the last leak */
+ int64_t delta_ns = now - ts->previous_leak;
+ int i;
+
+ ts->previous_leak = now;
+
+ if (delta_ns <= 0) {
+ return;
+ }
+
+ /* make each bucket leak */
+ for (i = 0; i < BUCKETS_COUNT; i++) {
+ throttle_leak_bucket(&ts->cfg.buckets[i], delta_ns);
+ }
+}
+
+/* do the real job of computing the time to wait
+ *
+ * @limit: the throttling limit
+ * @extra: the number of operation to delay
+ * @ret: the time to wait in ns
+ */
+static int64_t throttle_do_compute_wait(double limit, double extra)
+{
+ double wait = extra * NANOSECONDS_PER_SECOND;
+ wait /= limit;
+ return wait;
+}
+
+/* This function compute the wait time in ns that a leaky bucket should trigger
+ *
+ * @bkt: the leaky bucket we operate on
+ * @ret: the resulting wait time in ns or 0 if the operation can go through
+ */
+int64_t throttle_compute_wait(LeakyBucket *bkt)
+{
+ double extra; /* the number of extra units blocking the io */
+
+ if (!bkt->avg) {
+ return 0;
+ }
+
+ extra = bkt->level - bkt->max;
+
+ if (extra <= 0) {
+ return 0;
+ }
+
+ return throttle_do_compute_wait(bkt->avg, extra);
+}
+
+/* This function compute the time that must be waited while this IO
+ *
+ * @is_write: true if the current IO is a write, false if it's a read
+ * @ret: time to wait
+ */
+static int64_t throttle_compute_wait_for(ThrottleState *ts,
+ bool is_write)
+{
+ BucketType to_check[2][4] = { {THROTTLE_BPS_TOTAL,
+ THROTTLE_OPS_TOTAL,
+ THROTTLE_BPS_READ,
+ THROTTLE_OPS_READ},
+ {THROTTLE_BPS_TOTAL,
+ THROTTLE_OPS_TOTAL,
+ THROTTLE_BPS_WRITE,
+ THROTTLE_OPS_WRITE}, };
+ int64_t wait, max_wait = 0;
+ int i;
+
+ for (i = 0; i < 4; i++) {
+ BucketType index = to_check[is_write][i];
+ wait = throttle_compute_wait(&ts->cfg.buckets[index]);
+ if (wait > max_wait) {
+ max_wait = wait;
+ }
+ }
+
+ return max_wait;
+}
+
+/* compute the timer for this type of operation
+ *
+ * @is_write: the type of operation
+ * @now: the current clock timestamp
+ * @next_timestamp: the resulting timer
+ * @ret: true if a timer must be set
+ */
+bool throttle_compute_timer(ThrottleState *ts,
+ bool is_write,
+ int64_t now,
+ int64_t *next_timestamp)
+{
+ int64_t wait;
+
+ /* leak proportionally to the time elapsed */
+ throttle_do_leak(ts, now);
+
+ /* compute the wait time if any */
+ wait = throttle_compute_wait_for(ts, is_write);
+
+ /* if the code must wait compute when the next timer should fire */
+ if (wait) {
+ *next_timestamp = now + wait;
+ return true;
+ }
+
+ /* else no need to wait at all */
+ *next_timestamp = now;
+ return false;
+}
+
+/* Add timers to event loop */
+void throttle_timers_attach_aio_context(ThrottleTimers *tt,
+ AioContext *new_context)
+{
+ tt->timers[0] = aio_timer_new(new_context, tt->clock_type, SCALE_NS,
+ tt->read_timer_cb, tt->timer_opaque);
+ tt->timers[1] = aio_timer_new(new_context, tt->clock_type, SCALE_NS,
+ tt->write_timer_cb, tt->timer_opaque);
+}
+
+/* To be called first on the ThrottleState */
+void throttle_init(ThrottleState *ts)
+{
+ memset(ts, 0, sizeof(ThrottleState));
+}
+
+/* To be called first on the ThrottleTimers */
+void throttle_timers_init(ThrottleTimers *tt,
+ AioContext *aio_context,
+ QEMUClockType clock_type,
+ QEMUTimerCB *read_timer_cb,
+ QEMUTimerCB *write_timer_cb,
+ void *timer_opaque)
+{
+ memset(tt, 0, sizeof(ThrottleTimers));
+
+ tt->clock_type = clock_type;
+ tt->read_timer_cb = read_timer_cb;
+ tt->write_timer_cb = write_timer_cb;
+ tt->timer_opaque = timer_opaque;
+ throttle_timers_attach_aio_context(tt, aio_context);
+}
+
+/* destroy a timer */
+static void throttle_timer_destroy(QEMUTimer **timer)
+{
+ assert(*timer != NULL);
+
+ timer_del(*timer);
+ timer_free(*timer);
+ *timer = NULL;
+}
+
+/* Remove timers from event loop */
+void throttle_timers_detach_aio_context(ThrottleTimers *tt)
+{
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ throttle_timer_destroy(&tt->timers[i]);
+ }
+}
+
+/* To be called last on the ThrottleTimers */
+void throttle_timers_destroy(ThrottleTimers *tt)
+{
+ throttle_timers_detach_aio_context(tt);
+}
+
+/* is any throttling timer configured */
+bool throttle_timers_are_initialized(ThrottleTimers *tt)
+{
+ if (tt->timers[0]) {
+ return true;
+ }
+
+ return false;
+}
+
+/* Does any throttling must be done
+ *
+ * @cfg: the throttling configuration to inspect
+ * @ret: true if throttling must be done else false
+ */
+bool throttle_enabled(ThrottleConfig *cfg)
+{
+ int i;
+
+ for (i = 0; i < BUCKETS_COUNT; i++) {
+ if (cfg->buckets[i].avg > 0) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/* return true if any two throttling parameters conflicts
+ *
+ * @cfg: the throttling configuration to inspect
+ * @ret: true if any conflict detected else false
+ */
+bool throttle_conflicting(ThrottleConfig *cfg)
+{
+ bool bps_flag, ops_flag;
+ bool bps_max_flag, ops_max_flag;
+
+ bps_flag = cfg->buckets[THROTTLE_BPS_TOTAL].avg &&
+ (cfg->buckets[THROTTLE_BPS_READ].avg ||
+ cfg->buckets[THROTTLE_BPS_WRITE].avg);
+
+ ops_flag = cfg->buckets[THROTTLE_OPS_TOTAL].avg &&
+ (cfg->buckets[THROTTLE_OPS_READ].avg ||
+ cfg->buckets[THROTTLE_OPS_WRITE].avg);
+
+ bps_max_flag = cfg->buckets[THROTTLE_BPS_TOTAL].max &&
+ (cfg->buckets[THROTTLE_BPS_READ].max ||
+ cfg->buckets[THROTTLE_BPS_WRITE].max);
+
+ ops_max_flag = cfg->buckets[THROTTLE_OPS_TOTAL].max &&
+ (cfg->buckets[THROTTLE_OPS_READ].max ||
+ cfg->buckets[THROTTLE_OPS_WRITE].max);
+
+ return bps_flag || ops_flag || bps_max_flag || ops_max_flag;
+}
+
+/* check if a throttling configuration is valid
+ * @cfg: the throttling configuration to inspect
+ * @ret: true if valid else false
+ */
+bool throttle_is_valid(ThrottleConfig *cfg)
+{
+ bool invalid = false;
+ int i;
+
+ for (i = 0; i < BUCKETS_COUNT; i++) {
+ if (cfg->buckets[i].avg < 0) {
+ invalid = true;
+ }
+ }
+
+ for (i = 0; i < BUCKETS_COUNT; i++) {
+ if (cfg->buckets[i].max < 0) {
+ invalid = true;
+ }
+ }
+
+ return !invalid;
+}
+
+/* check if bps_max/iops_max is used without bps/iops
+ * @cfg: the throttling configuration to inspect
+ */
+bool throttle_max_is_missing_limit(ThrottleConfig *cfg)
+{
+ int i;
+
+ for (i = 0; i < BUCKETS_COUNT; i++) {
+ if (cfg->buckets[i].max && !cfg->buckets[i].avg) {
+ return true;
+ }
+ }
+ return false;
+}
+
+/* fix bucket parameters */
+static void throttle_fix_bucket(LeakyBucket *bkt)
+{
+ double min;
+
+ /* zero bucket level */
+ bkt->level = 0;
+
+ /* The following is done to cope with the Linux CFQ block scheduler
+ * which regroup reads and writes by block of 100ms in the guest.
+ * When they are two process one making reads and one making writes cfq
+ * make a pattern looking like the following:
+ * WWWWWWWWWWWRRRRRRRRRRRRRRWWWWWWWWWWWWWwRRRRRRRRRRRRRRRRR
+ * Having a max burst value of 100ms of the average will help smooth the
+ * throttling
+ */
+ min = bkt->avg / 10;
+ if (bkt->avg && !bkt->max) {
+ bkt->max = min;
+ }
+}
+
+/* take care of canceling a timer */
+static void throttle_cancel_timer(QEMUTimer *timer)
+{
+ assert(timer != NULL);
+
+ timer_del(timer);
+}
+
+/* Used to configure the throttle
+ *
+ * @ts: the throttle state we are working on
+ * @tt: the throttle timers we use in this aio context
+ * @cfg: the config to set
+ */
+void throttle_config(ThrottleState *ts,
+ ThrottleTimers *tt,
+ ThrottleConfig *cfg)
+{
+ int i;
+
+ ts->cfg = *cfg;
+
+ for (i = 0; i < BUCKETS_COUNT; i++) {
+ throttle_fix_bucket(&ts->cfg.buckets[i]);
+ }
+
+ ts->previous_leak = qemu_clock_get_ns(tt->clock_type);
+
+ for (i = 0; i < 2; i++) {
+ throttle_cancel_timer(tt->timers[i]);
+ }
+}
+
+/* used to get config
+ *
+ * @ts: the throttle state we are working on
+ * @cfg: the config to write
+ */
+void throttle_get_config(ThrottleState *ts, ThrottleConfig *cfg)
+{
+ *cfg = ts->cfg;
+}
+
+
+/* Schedule the read or write timer if needed
+ *
+ * NOTE: this function is not unit tested due to it's usage of timer_mod
+ *
+ * @tt: the timers structure
+ * @is_write: the type of operation (read/write)
+ * @ret: true if the timer has been scheduled else false
+ */
+bool throttle_schedule_timer(ThrottleState *ts,
+ ThrottleTimers *tt,
+ bool is_write)
+{
+ int64_t now = qemu_clock_get_ns(tt->clock_type);
+ int64_t next_timestamp;
+ bool must_wait;
+
+ must_wait = throttle_compute_timer(ts,
+ is_write,
+ now,
+ &next_timestamp);
+
+ /* request not throttled */
+ if (!must_wait) {
+ return false;
+ }
+
+ /* request throttled and timer pending -> do nothing */
+ if (timer_pending(tt->timers[is_write])) {
+ return true;
+ }
+
+ /* request throttled and timer not pending -> arm timer */
+ timer_mod(tt->timers[is_write], next_timestamp);
+ return true;
+}
+
+/* do the accounting for this operation
+ *
+ * @is_write: the type of operation (read/write)
+ * @size: the size of the operation
+ */
+void throttle_account(ThrottleState *ts, bool is_write, uint64_t size)
+{
+ double units = 1.0;
+
+ /* if cfg.op_size is defined and smaller than size we compute unit count */
+ if (ts->cfg.op_size && size > ts->cfg.op_size) {
+ units = (double) size / ts->cfg.op_size;
+ }
+
+ ts->cfg.buckets[THROTTLE_BPS_TOTAL].level += size;
+ ts->cfg.buckets[THROTTLE_OPS_TOTAL].level += units;
+
+ if (is_write) {
+ ts->cfg.buckets[THROTTLE_BPS_WRITE].level += size;
+ ts->cfg.buckets[THROTTLE_OPS_WRITE].level += units;
+ } else {
+ ts->cfg.buckets[THROTTLE_BPS_READ].level += size;
+ ts->cfg.buckets[THROTTLE_OPS_READ].level += units;
+ }
+}
+
diff --git a/src/util/timed-average.c b/src/util/timed-average.c
new file mode 100644
index 0000000..a2dfb48
--- /dev/null
+++ b/src/util/timed-average.c
@@ -0,0 +1,231 @@
+/*
+ * QEMU timed average computation
+ *
+ * Copyright (C) Nodalink, EURL. 2014
+ * Copyright (C) Igalia, S.L. 2015
+ *
+ * Authors:
+ * Benoît Canet <benoit.canet@nodalink.com>
+ * Alberto Garcia <berto@igalia.com>
+ *
+ * This program is free sofware: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Sofware Foundation, either version 2 of the License, or
+ * (at your option) version 3 or any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <string.h>
+
+#include "qemu/timed-average.h"
+
+/* This module computes an average of a set of values within a time
+ * window.
+ *
+ * Algorithm:
+ *
+ * - Create two windows with a certain expiration period, and
+ * offsetted by period / 2.
+ * - Each time you want to account a new value, do it in both windows.
+ * - The minimum / maximum / average values are always returned from
+ * the oldest window.
+ *
+ * Example:
+ *
+ * t=0 |t=0.5 |t=1 |t=1.5 |t=2
+ * wnd0: [0,0.5)|wnd0: [0.5,1.5) | |wnd0: [1.5,2.5) |
+ * wnd1: [0,1) | |wnd1: [1,2) | |
+ *
+ * Values are returned from:
+ *
+ * wnd0---------|wnd1------------|wnd0---------|wnd1-------------|
+ */
+
+/* Update the expiration of a time window
+ *
+ * @w: the window used
+ * @now: the current time in nanoseconds
+ * @period: the expiration period in nanoseconds
+ */
+static void update_expiration(TimedAverageWindow *w, int64_t now,
+ int64_t period)
+{
+ /* time elapsed since the last theoretical expiration */
+ int64_t elapsed = (now - w->expiration) % period;
+ /* time remaininging until the next expiration */
+ int64_t remaining = period - elapsed;
+ /* compute expiration */
+ w->expiration = now + remaining;
+}
+
+/* Reset a window
+ *
+ * @w: the window to reset
+ */
+static void window_reset(TimedAverageWindow *w)
+{
+ w->min = UINT64_MAX;
+ w->max = 0;
+ w->sum = 0;
+ w->count = 0;
+}
+
+/* Get the current window (that is, the one with the earliest
+ * expiration time).
+ *
+ * @ta: the TimedAverage structure
+ * @ret: a pointer to the current window
+ */
+static TimedAverageWindow *current_window(TimedAverage *ta)
+{
+ return &ta->windows[ta->current];
+}
+
+/* Initialize a TimedAverage structure
+ *
+ * @ta: the TimedAverage structure
+ * @clock_type: the type of clock to use
+ * @period: the time window period in nanoseconds
+ */
+void timed_average_init(TimedAverage *ta, QEMUClockType clock_type,
+ uint64_t period)
+{
+ int64_t now = qemu_clock_get_ns(clock_type);
+
+ /* Returned values are from the oldest window, so they belong to
+ * the interval [ta->period/2,ta->period). By adjusting the
+ * requested period by 4/3, we guarantee that they're in the
+ * interval [2/3 period,4/3 period), closer to the requested
+ * period on average */
+ ta->period = (uint64_t) period * 4 / 3;
+ ta->clock_type = clock_type;
+ ta->current = 0;
+
+ window_reset(&ta->windows[0]);
+ window_reset(&ta->windows[1]);
+
+ /* Both windows are offsetted by half a period */
+ ta->windows[0].expiration = now + ta->period / 2;
+ ta->windows[1].expiration = now + ta->period;
+}
+
+/* Check if the time windows have expired, updating their counters and
+ * expiration time if that's the case.
+ *
+ * @ta: the TimedAverage structure
+ * @elapsed: if non-NULL, the elapsed time (in ns) within the current
+ * window will be stored here
+ */
+static void check_expirations(TimedAverage *ta, uint64_t *elapsed)
+{
+ int64_t now = qemu_clock_get_ns(ta->clock_type);
+ int i;
+
+ assert(ta->period != 0);
+
+ /* Check if the windows have expired */
+ for (i = 0; i < 2; i++) {
+ TimedAverageWindow *w = &ta->windows[i];
+ if (w->expiration <= now) {
+ window_reset(w);
+ update_expiration(w, now, ta->period);
+ }
+ }
+
+ /* Make ta->current point to the oldest window */
+ if (ta->windows[0].expiration < ta->windows[1].expiration) {
+ ta->current = 0;
+ } else {
+ ta->current = 1;
+ }
+
+ /* Calculate the elapsed time within the current window */
+ if (elapsed) {
+ int64_t remaining = ta->windows[ta->current].expiration - now;
+ *elapsed = ta->period - remaining;
+ }
+}
+
+/* Account a value
+ *
+ * @ta: the TimedAverage structure
+ * @value: the value to account
+ */
+void timed_average_account(TimedAverage *ta, uint64_t value)
+{
+ int i;
+ check_expirations(ta, NULL);
+
+ /* Do the accounting in both windows at the same time */
+ for (i = 0; i < 2; i++) {
+ TimedAverageWindow *w = &ta->windows[i];
+
+ w->sum += value;
+ w->count++;
+
+ if (value < w->min) {
+ w->min = value;
+ }
+
+ if (value > w->max) {
+ w->max = value;
+ }
+ }
+}
+
+/* Get the minimum value
+ *
+ * @ta: the TimedAverage structure
+ * @ret: the minimum value
+ */
+uint64_t timed_average_min(TimedAverage *ta)
+{
+ TimedAverageWindow *w;
+ check_expirations(ta, NULL);
+ w = current_window(ta);
+ return w->min < UINT64_MAX ? w->min : 0;
+}
+
+/* Get the average value
+ *
+ * @ta: the TimedAverage structure
+ * @ret: the average value
+ */
+uint64_t timed_average_avg(TimedAverage *ta)
+{
+ TimedAverageWindow *w;
+ check_expirations(ta, NULL);
+ w = current_window(ta);
+ return w->count > 0 ? w->sum / w->count : 0;
+}
+
+/* Get the maximum value
+ *
+ * @ta: the TimedAverage structure
+ * @ret: the maximum value
+ */
+uint64_t timed_average_max(TimedAverage *ta)
+{
+ check_expirations(ta, NULL);
+ return current_window(ta)->max;
+}
+
+/* Get the sum of all accounted values
+ * @ta: the TimedAverage structure
+ * @elapsed: if non-NULL, the elapsed time (in ns) will be stored here
+ * @ret: the sum of all accounted values
+ */
+uint64_t timed_average_sum(TimedAverage *ta, uint64_t *elapsed)
+{
+ TimedAverageWindow *w;
+ check_expirations(ta, elapsed);
+ w = current_window(ta);
+ return w->sum;
+}
diff --git a/src/util/unicode.c b/src/util/unicode.c
new file mode 100644
index 0000000..d1c8658
--- /dev/null
+++ b/src/util/unicode.c
@@ -0,0 +1,100 @@
+/*
+ * Dealing with Unicode
+ *
+ * Copyright (C) 2013 Red Hat, Inc.
+ *
+ * Authors:
+ * Markus Armbruster <armbru@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later. See the COPYING file in the top-level directory.
+ */
+
+#include "qemu-common.h"
+
+/**
+ * mod_utf8_codepoint:
+ * @s: string encoded in modified UTF-8
+ * @n: maximum number of bytes to read from @s, if less than 6
+ * @end: set to end of sequence on return
+ *
+ * Convert the modified UTF-8 sequence at the start of @s. Modified
+ * UTF-8 is exactly like UTF-8, except U+0000 is encoded as
+ * "\xC0\x80".
+ *
+ * If @n is zero or @s points to a zero byte, the sequence is invalid,
+ * and @end is set to @s.
+ *
+ * If @s points to an impossible byte (0xFE or 0xFF) or a continuation
+ * byte, the sequence is invalid, and @end is set to @s + 1
+ *
+ * Else, the first byte determines how many continuation bytes are
+ * expected. If there are fewer, the sequence is invalid, and @end is
+ * set to @s + 1 + actual number of continuation bytes. Else, the
+ * sequence is well-formed, and @end is set to @s + 1 + expected
+ * number of continuation bytes.
+ *
+ * A well-formed sequence is valid unless it encodes a codepoint
+ * outside the Unicode range U+0000..U+10FFFF, one of Unicode's 66
+ * noncharacters, a surrogate codepoint, or is overlong. Except the
+ * overlong sequence "\xC0\x80" is valid.
+ *
+ * Conversion succeeds if and only if the sequence is valid.
+ *
+ * Returns: the Unicode codepoint on success, -1 on failure.
+ */
+int mod_utf8_codepoint(const char *s, size_t n, char **end)
+{
+ static int min_cp[5] = { 0x80, 0x800, 0x10000, 0x200000, 0x4000000 };
+ const unsigned char *p;
+ unsigned byte, mask, len, i;
+ int cp;
+
+ if (n == 0 || *s == 0) {
+ /* empty sequence */
+ *end = (char *)s;
+ return -1;
+ }
+
+ p = (const unsigned char *)s;
+ byte = *p++;
+ if (byte < 0x80) {
+ cp = byte; /* one byte sequence */
+ } else if (byte >= 0xFE) {
+ cp = -1; /* impossible bytes 0xFE, 0xFF */
+ } else if ((byte & 0x40) == 0) {
+ cp = -1; /* unexpected continuation byte */
+ } else {
+ /* multi-byte sequence */
+ len = 0;
+ for (mask = 0x80; byte & mask; mask >>= 1) {
+ len++;
+ }
+ assert(len > 1 && len < 7);
+ cp = byte & (mask - 1);
+ for (i = 1; i < len; i++) {
+ byte = i < n ? *p : 0;
+ if ((byte & 0xC0) != 0x80) {
+ cp = -1; /* continuation byte missing */
+ goto out;
+ }
+ p++;
+ cp <<= 6;
+ cp |= byte & 0x3F;
+ }
+ if (cp > 0x10FFFF) {
+ cp = -1; /* beyond Unicode range */
+ } else if ((cp >= 0xFDD0 && cp <= 0xFDEF)
+ || (cp & 0xFFFE) == 0xFFFE) {
+ cp = -1; /* noncharacter */
+ } else if (cp >= 0xD800 && cp <= 0xDFFF) {
+ cp = -1; /* surrogate code point */
+ } else if (cp < min_cp[len - 2] && !(cp == 0 && len == 2)) {
+ cp = -1; /* overlong, not \xC0\x80 */
+ }
+ }
+
+out:
+ *end = (char *)p;
+ return cp;
+}
diff --git a/src/util/uri.c b/src/util/uri.c
new file mode 100644
index 0000000..550b984
--- /dev/null
+++ b/src/util/uri.c
@@ -0,0 +1,2204 @@
+/**
+ * uri.c: set of generic URI related routines
+ *
+ * Reference: RFCs 3986, 2732 and 2373
+ *
+ * Copyright (C) 1998-2003 Daniel Veillard. All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * DANIEL VEILLARD BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Except as contained in this notice, the name of Daniel Veillard shall not
+ * be used in advertising or otherwise to promote the sale, use or other
+ * dealings in this Software without prior written authorization from him.
+ *
+ * daniel@veillard.com
+ *
+ **
+ *
+ * Copyright (C) 2007, 2009-2010 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Authors:
+ * Richard W.M. Jones <rjones@redhat.com>
+ *
+ */
+
+#include <glib.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "qemu/uri.h"
+
+static void uri_clean(URI *uri);
+
+/*
+ * Old rule from 2396 used in legacy handling code
+ * alpha = lowalpha | upalpha
+ */
+#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
+
+
+/*
+ * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
+ * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
+ * "u" | "v" | "w" | "x" | "y" | "z"
+ */
+
+#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
+
+/*
+ * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
+ * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
+ * "U" | "V" | "W" | "X" | "Y" | "Z"
+ */
+#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
+
+#ifdef IS_DIGIT
+#undef IS_DIGIT
+#endif
+/*
+ * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
+ */
+#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
+
+/*
+ * alphanum = alpha | digit
+ */
+
+#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
+
+/*
+ * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
+ */
+
+#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \
+ ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \
+ ((x) == '(') || ((x) == ')'))
+
+/*
+ * unwise = "{" | "}" | "|" | "\" | "^" | "`"
+ */
+
+#define IS_UNWISE(p) \
+ (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \
+ ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \
+ ((*(p) == ']')) || ((*(p) == '`')))
+/*
+ * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
+ * "[" | "]"
+ */
+
+#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
+ ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
+ ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
+ ((x) == ']'))
+
+/*
+ * unreserved = alphanum | mark
+ */
+
+#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
+
+/*
+ * Skip to next pointer char, handle escaped sequences
+ */
+
+#define NEXT(p) ((*p == '%')? p += 3 : p++)
+
+/*
+ * Productions from the spec.
+ *
+ * authority = server | reg_name
+ * reg_name = 1*( unreserved | escaped | "$" | "," |
+ * ";" | ":" | "@" | "&" | "=" | "+" )
+ *
+ * path = [ abs_path | opaque_part ]
+ */
+
+
+/************************************************************************
+ * *
+ * RFC 3986 parser *
+ * *
+ ************************************************************************/
+
+#define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
+#define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \
+ ((*(p) >= 'A') && (*(p) <= 'Z')))
+#define ISA_HEXDIG(p) \
+ (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \
+ ((*(p) >= 'A') && (*(p) <= 'F')))
+
+/*
+ * sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
+ * / "*" / "+" / "," / ";" / "="
+ */
+#define ISA_SUB_DELIM(p) \
+ (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \
+ ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \
+ ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \
+ ((*(p) == '=')) || ((*(p) == '\'')))
+
+/*
+ * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+ */
+#define ISA_GEN_DELIM(p) \
+ (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \
+ ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \
+ ((*(p) == '@')))
+
+/*
+ * reserved = gen-delims / sub-delims
+ */
+#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
+
+/*
+ * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
+ */
+#define ISA_UNRESERVED(p) \
+ ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \
+ ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
+
+/*
+ * pct-encoded = "%" HEXDIG HEXDIG
+ */
+#define ISA_PCT_ENCODED(p) \
+ ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
+
+/*
+ * pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
+ */
+#define ISA_PCHAR(p) \
+ (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \
+ ((*(p) == ':')) || ((*(p) == '@')))
+
+/**
+ * rfc3986_parse_scheme:
+ * @uri: pointer to an URI structure
+ * @str: pointer to the string to analyze
+ *
+ * Parse an URI scheme
+ *
+ * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_scheme(URI *uri, const char **str) {
+ const char *cur;
+
+ if (str == NULL)
+ return(-1);
+
+ cur = *str;
+ if (!ISA_ALPHA(cur))
+ return(2);
+ cur++;
+ while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
+ (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
+ if (uri != NULL) {
+ g_free(uri->scheme);
+ uri->scheme = g_strndup(*str, cur - *str);
+ }
+ *str = cur;
+ return(0);
+}
+
+/**
+ * rfc3986_parse_fragment:
+ * @uri: pointer to an URI structure
+ * @str: pointer to the string to analyze
+ *
+ * Parse the query part of an URI
+ *
+ * fragment = *( pchar / "/" / "?" )
+ * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
+ * in the fragment identifier but this is used very broadly for
+ * xpointer scheme selection, so we are allowing it here to not break
+ * for example all the DocBook processing chains.
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_fragment(URI *uri, const char **str)
+{
+ const char *cur;
+
+ if (str == NULL)
+ return (-1);
+
+ cur = *str;
+
+ while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
+ (*cur == '[') || (*cur == ']') ||
+ ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
+ NEXT(cur);
+ if (uri != NULL) {
+ g_free(uri->fragment);
+ if (uri->cleanup & 2)
+ uri->fragment = g_strndup(*str, cur - *str);
+ else
+ uri->fragment = uri_string_unescape(*str, cur - *str, NULL);
+ }
+ *str = cur;
+ return (0);
+}
+
+/**
+ * rfc3986_parse_query:
+ * @uri: pointer to an URI structure
+ * @str: pointer to the string to analyze
+ *
+ * Parse the query part of an URI
+ *
+ * query = *uric
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_query(URI *uri, const char **str)
+{
+ const char *cur;
+
+ if (str == NULL)
+ return (-1);
+
+ cur = *str;
+
+ while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
+ ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
+ NEXT(cur);
+ if (uri != NULL) {
+ g_free(uri->query);
+ uri->query = g_strndup (*str, cur - *str);
+ }
+ *str = cur;
+ return (0);
+}
+
+/**
+ * rfc3986_parse_port:
+ * @uri: pointer to an URI structure
+ * @str: the string to analyze
+ *
+ * Parse a port part and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * port = *DIGIT
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_port(URI *uri, const char **str)
+{
+ const char *cur = *str;
+ int port = 0;
+
+ if (ISA_DIGIT(cur)) {
+ while (ISA_DIGIT(cur)) {
+ port = port * 10 + (*cur - '0');
+ if (port > 65535) {
+ return 1;
+ }
+ cur++;
+ }
+ if (uri) {
+ uri->port = port;
+ }
+ *str = cur;
+ return 0;
+ }
+ return 1;
+}
+
+/**
+ * rfc3986_parse_user_info:
+ * @uri: pointer to an URI structure
+ * @str: the string to analyze
+ *
+ * Parse an user informations part and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * userinfo = *( unreserved / pct-encoded / sub-delims / ":" )
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_user_info(URI *uri, const char **str)
+{
+ const char *cur;
+
+ cur = *str;
+ while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
+ ISA_SUB_DELIM(cur) || (*cur == ':'))
+ NEXT(cur);
+ if (*cur == '@') {
+ if (uri != NULL) {
+ g_free(uri->user);
+ if (uri->cleanup & 2)
+ uri->user = g_strndup(*str, cur - *str);
+ else
+ uri->user = uri_string_unescape(*str, cur - *str, NULL);
+ }
+ *str = cur;
+ return(0);
+ }
+ return(1);
+}
+
+/**
+ * rfc3986_parse_dec_octet:
+ * @str: the string to analyze
+ *
+ * dec-octet = DIGIT ; 0-9
+ * / %x31-39 DIGIT ; 10-99
+ * / "1" 2DIGIT ; 100-199
+ * / "2" %x30-34 DIGIT ; 200-249
+ * / "25" %x30-35 ; 250-255
+ *
+ * Skip a dec-octet.
+ *
+ * Returns 0 if found and skipped, 1 otherwise
+ */
+static int
+rfc3986_parse_dec_octet(const char **str) {
+ const char *cur = *str;
+
+ if (!(ISA_DIGIT(cur)))
+ return(1);
+ if (!ISA_DIGIT(cur+1))
+ cur++;
+ else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
+ cur += 2;
+ else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
+ cur += 3;
+ else if ((*cur == '2') && (*(cur + 1) >= '0') &&
+ (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
+ cur += 3;
+ else if ((*cur == '2') && (*(cur + 1) == '5') &&
+ (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
+ cur += 3;
+ else
+ return(1);
+ *str = cur;
+ return(0);
+}
+/**
+ * rfc3986_parse_host:
+ * @uri: pointer to an URI structure
+ * @str: the string to analyze
+ *
+ * Parse an host part and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * host = IP-literal / IPv4address / reg-name
+ * IP-literal = "[" ( IPv6address / IPvFuture ) "]"
+ * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet
+ * reg-name = *( unreserved / pct-encoded / sub-delims )
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_host(URI *uri, const char **str)
+{
+ const char *cur = *str;
+ const char *host;
+
+ host = cur;
+ /*
+ * IPv6 and future addressing scheme are enclosed between brackets
+ */
+ if (*cur == '[') {
+ cur++;
+ while ((*cur != ']') && (*cur != 0))
+ cur++;
+ if (*cur != ']')
+ return(1);
+ cur++;
+ goto found;
+ }
+ /*
+ * try to parse an IPv4
+ */
+ if (ISA_DIGIT(cur)) {
+ if (rfc3986_parse_dec_octet(&cur) != 0)
+ goto not_ipv4;
+ if (*cur != '.')
+ goto not_ipv4;
+ cur++;
+ if (rfc3986_parse_dec_octet(&cur) != 0)
+ goto not_ipv4;
+ if (*cur != '.')
+ goto not_ipv4;
+ if (rfc3986_parse_dec_octet(&cur) != 0)
+ goto not_ipv4;
+ if (*cur != '.')
+ goto not_ipv4;
+ if (rfc3986_parse_dec_octet(&cur) != 0)
+ goto not_ipv4;
+ goto found;
+not_ipv4:
+ cur = *str;
+ }
+ /*
+ * then this should be a hostname which can be empty
+ */
+ while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
+ NEXT(cur);
+found:
+ if (uri != NULL) {
+ g_free(uri->authority);
+ uri->authority = NULL;
+ g_free(uri->server);
+ if (cur != host) {
+ if (uri->cleanup & 2)
+ uri->server = g_strndup(host, cur - host);
+ else
+ uri->server = uri_string_unescape(host, cur - host, NULL);
+ } else
+ uri->server = NULL;
+ }
+ *str = cur;
+ return(0);
+}
+
+/**
+ * rfc3986_parse_authority:
+ * @uri: pointer to an URI structure
+ * @str: the string to analyze
+ *
+ * Parse an authority part and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * authority = [ userinfo "@" ] host [ ":" port ]
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_authority(URI *uri, const char **str)
+{
+ const char *cur;
+ int ret;
+
+ cur = *str;
+ /*
+ * try to parse an userinfo and check for the trailing @
+ */
+ ret = rfc3986_parse_user_info(uri, &cur);
+ if ((ret != 0) || (*cur != '@'))
+ cur = *str;
+ else
+ cur++;
+ ret = rfc3986_parse_host(uri, &cur);
+ if (ret != 0) return(ret);
+ if (*cur == ':') {
+ cur++;
+ ret = rfc3986_parse_port(uri, &cur);
+ if (ret != 0) return(ret);
+ }
+ *str = cur;
+ return(0);
+}
+
+/**
+ * rfc3986_parse_segment:
+ * @str: the string to analyze
+ * @forbid: an optional forbidden character
+ * @empty: allow an empty segment
+ *
+ * Parse a segment and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * segment = *pchar
+ * segment-nz = 1*pchar
+ * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
+ * ; non-zero-length segment without any colon ":"
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_segment(const char **str, char forbid, int empty)
+{
+ const char *cur;
+
+ cur = *str;
+ if (!ISA_PCHAR(cur)) {
+ if (empty)
+ return(0);
+ return(1);
+ }
+ while (ISA_PCHAR(cur) && (*cur != forbid))
+ NEXT(cur);
+ *str = cur;
+ return (0);
+}
+
+/**
+ * rfc3986_parse_path_ab_empty:
+ * @uri: pointer to an URI structure
+ * @str: the string to analyze
+ *
+ * Parse an path absolute or empty and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * path-abempty = *( "/" segment )
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_path_ab_empty(URI *uri, const char **str)
+{
+ const char *cur;
+ int ret;
+
+ cur = *str;
+
+ while (*cur == '/') {
+ cur++;
+ ret = rfc3986_parse_segment(&cur, 0, 1);
+ if (ret != 0) return(ret);
+ }
+ if (uri != NULL) {
+ g_free(uri->path);
+ if (*str != cur) {
+ if (uri->cleanup & 2)
+ uri->path = g_strndup(*str, cur - *str);
+ else
+ uri->path = uri_string_unescape(*str, cur - *str, NULL);
+ } else {
+ uri->path = NULL;
+ }
+ }
+ *str = cur;
+ return (0);
+}
+
+/**
+ * rfc3986_parse_path_absolute:
+ * @uri: pointer to an URI structure
+ * @str: the string to analyze
+ *
+ * Parse an path absolute and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * path-absolute = "/" [ segment-nz *( "/" segment ) ]
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_path_absolute(URI *uri, const char **str)
+{
+ const char *cur;
+ int ret;
+
+ cur = *str;
+
+ if (*cur != '/')
+ return(1);
+ cur++;
+ ret = rfc3986_parse_segment(&cur, 0, 0);
+ if (ret == 0) {
+ while (*cur == '/') {
+ cur++;
+ ret = rfc3986_parse_segment(&cur, 0, 1);
+ if (ret != 0) return(ret);
+ }
+ }
+ if (uri != NULL) {
+ g_free(uri->path);
+ if (cur != *str) {
+ if (uri->cleanup & 2)
+ uri->path = g_strndup(*str, cur - *str);
+ else
+ uri->path = uri_string_unescape(*str, cur - *str, NULL);
+ } else {
+ uri->path = NULL;
+ }
+ }
+ *str = cur;
+ return (0);
+}
+
+/**
+ * rfc3986_parse_path_rootless:
+ * @uri: pointer to an URI structure
+ * @str: the string to analyze
+ *
+ * Parse an path without root and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * path-rootless = segment-nz *( "/" segment )
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_path_rootless(URI *uri, const char **str)
+{
+ const char *cur;
+ int ret;
+
+ cur = *str;
+
+ ret = rfc3986_parse_segment(&cur, 0, 0);
+ if (ret != 0) return(ret);
+ while (*cur == '/') {
+ cur++;
+ ret = rfc3986_parse_segment(&cur, 0, 1);
+ if (ret != 0) return(ret);
+ }
+ if (uri != NULL) {
+ g_free(uri->path);
+ if (cur != *str) {
+ if (uri->cleanup & 2)
+ uri->path = g_strndup(*str, cur - *str);
+ else
+ uri->path = uri_string_unescape(*str, cur - *str, NULL);
+ } else {
+ uri->path = NULL;
+ }
+ }
+ *str = cur;
+ return (0);
+}
+
+/**
+ * rfc3986_parse_path_no_scheme:
+ * @uri: pointer to an URI structure
+ * @str: the string to analyze
+ *
+ * Parse an path which is not a scheme and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * path-noscheme = segment-nz-nc *( "/" segment )
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_path_no_scheme(URI *uri, const char **str)
+{
+ const char *cur;
+ int ret;
+
+ cur = *str;
+
+ ret = rfc3986_parse_segment(&cur, ':', 0);
+ if (ret != 0) return(ret);
+ while (*cur == '/') {
+ cur++;
+ ret = rfc3986_parse_segment(&cur, 0, 1);
+ if (ret != 0) return(ret);
+ }
+ if (uri != NULL) {
+ g_free(uri->path);
+ if (cur != *str) {
+ if (uri->cleanup & 2)
+ uri->path = g_strndup(*str, cur - *str);
+ else
+ uri->path = uri_string_unescape(*str, cur - *str, NULL);
+ } else {
+ uri->path = NULL;
+ }
+ }
+ *str = cur;
+ return (0);
+}
+
+/**
+ * rfc3986_parse_hier_part:
+ * @uri: pointer to an URI structure
+ * @str: the string to analyze
+ *
+ * Parse an hierarchical part and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * hier-part = "//" authority path-abempty
+ * / path-absolute
+ * / path-rootless
+ * / path-empty
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_hier_part(URI *uri, const char **str)
+{
+ const char *cur;
+ int ret;
+
+ cur = *str;
+
+ if ((*cur == '/') && (*(cur + 1) == '/')) {
+ cur += 2;
+ ret = rfc3986_parse_authority(uri, &cur);
+ if (ret != 0) return(ret);
+ ret = rfc3986_parse_path_ab_empty(uri, &cur);
+ if (ret != 0) return(ret);
+ *str = cur;
+ return(0);
+ } else if (*cur == '/') {
+ ret = rfc3986_parse_path_absolute(uri, &cur);
+ if (ret != 0) return(ret);
+ } else if (ISA_PCHAR(cur)) {
+ ret = rfc3986_parse_path_rootless(uri, &cur);
+ if (ret != 0) return(ret);
+ } else {
+ /* path-empty is effectively empty */
+ if (uri != NULL) {
+ g_free(uri->path);
+ uri->path = NULL;
+ }
+ }
+ *str = cur;
+ return (0);
+}
+
+/**
+ * rfc3986_parse_relative_ref:
+ * @uri: pointer to an URI structure
+ * @str: the string to analyze
+ *
+ * Parse an URI string and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * relative-ref = relative-part [ "?" query ] [ "#" fragment ]
+ * relative-part = "//" authority path-abempty
+ * / path-absolute
+ * / path-noscheme
+ * / path-empty
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_relative_ref(URI *uri, const char *str) {
+ int ret;
+
+ if ((*str == '/') && (*(str + 1) == '/')) {
+ str += 2;
+ ret = rfc3986_parse_authority(uri, &str);
+ if (ret != 0) return(ret);
+ ret = rfc3986_parse_path_ab_empty(uri, &str);
+ if (ret != 0) return(ret);
+ } else if (*str == '/') {
+ ret = rfc3986_parse_path_absolute(uri, &str);
+ if (ret != 0) return(ret);
+ } else if (ISA_PCHAR(str)) {
+ ret = rfc3986_parse_path_no_scheme(uri, &str);
+ if (ret != 0) return(ret);
+ } else {
+ /* path-empty is effectively empty */
+ if (uri != NULL) {
+ g_free(uri->path);
+ uri->path = NULL;
+ }
+ }
+
+ if (*str == '?') {
+ str++;
+ ret = rfc3986_parse_query(uri, &str);
+ if (ret != 0) return(ret);
+ }
+ if (*str == '#') {
+ str++;
+ ret = rfc3986_parse_fragment(uri, &str);
+ if (ret != 0) return(ret);
+ }
+ if (*str != 0) {
+ uri_clean(uri);
+ return(1);
+ }
+ return(0);
+}
+
+
+/**
+ * rfc3986_parse:
+ * @uri: pointer to an URI structure
+ * @str: the string to analyze
+ *
+ * Parse an URI string and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse(URI *uri, const char *str) {
+ int ret;
+
+ ret = rfc3986_parse_scheme(uri, &str);
+ if (ret != 0) return(ret);
+ if (*str != ':') {
+ return(1);
+ }
+ str++;
+ ret = rfc3986_parse_hier_part(uri, &str);
+ if (ret != 0) return(ret);
+ if (*str == '?') {
+ str++;
+ ret = rfc3986_parse_query(uri, &str);
+ if (ret != 0) return(ret);
+ }
+ if (*str == '#') {
+ str++;
+ ret = rfc3986_parse_fragment(uri, &str);
+ if (ret != 0) return(ret);
+ }
+ if (*str != 0) {
+ uri_clean(uri);
+ return(1);
+ }
+ return(0);
+}
+
+/**
+ * rfc3986_parse_uri_reference:
+ * @uri: pointer to an URI structure
+ * @str: the string to analyze
+ *
+ * Parse an URI reference string and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * URI-reference = URI / relative-ref
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_uri_reference(URI *uri, const char *str) {
+ int ret;
+
+ if (str == NULL)
+ return(-1);
+ uri_clean(uri);
+
+ /*
+ * Try first to parse absolute refs, then fallback to relative if
+ * it fails.
+ */
+ ret = rfc3986_parse(uri, str);
+ if (ret != 0) {
+ uri_clean(uri);
+ ret = rfc3986_parse_relative_ref(uri, str);
+ if (ret != 0) {
+ uri_clean(uri);
+ return(ret);
+ }
+ }
+ return(0);
+}
+
+/**
+ * uri_parse:
+ * @str: the URI string to analyze
+ *
+ * Parse an URI based on RFC 3986
+ *
+ * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
+ *
+ * Returns a newly built URI or NULL in case of error
+ */
+URI *
+uri_parse(const char *str) {
+ URI *uri;
+ int ret;
+
+ if (str == NULL)
+ return(NULL);
+ uri = uri_new();
+ ret = rfc3986_parse_uri_reference(uri, str);
+ if (ret) {
+ uri_free(uri);
+ return(NULL);
+ }
+ return(uri);
+}
+
+/**
+ * uri_parse_into:
+ * @uri: pointer to an URI structure
+ * @str: the string to analyze
+ *
+ * Parse an URI reference string based on RFC 3986 and fills in the
+ * appropriate fields of the @uri structure
+ *
+ * URI-reference = URI / relative-ref
+ *
+ * Returns 0 or the error code
+ */
+int
+uri_parse_into(URI *uri, const char *str) {
+ return(rfc3986_parse_uri_reference(uri, str));
+}
+
+/**
+ * uri_parse_raw:
+ * @str: the URI string to analyze
+ * @raw: if 1 unescaping of URI pieces are disabled
+ *
+ * Parse an URI but allows to keep intact the original fragments.
+ *
+ * URI-reference = URI / relative-ref
+ *
+ * Returns a newly built URI or NULL in case of error
+ */
+URI *
+uri_parse_raw(const char *str, int raw) {
+ URI *uri;
+ int ret;
+
+ if (str == NULL)
+ return(NULL);
+ uri = uri_new();
+ if (raw) {
+ uri->cleanup |= 2;
+ }
+ ret = uri_parse_into(uri, str);
+ if (ret) {
+ uri_free(uri);
+ return(NULL);
+ }
+ return(uri);
+}
+
+/************************************************************************
+ * *
+ * Generic URI structure functions *
+ * *
+ ************************************************************************/
+
+/**
+ * uri_new:
+ *
+ * Simply creates an empty URI
+ *
+ * Returns the new structure or NULL in case of error
+ */
+URI *
+uri_new(void) {
+ URI *ret;
+
+ ret = g_new0(URI, 1);
+ return(ret);
+}
+
+/**
+ * realloc2n:
+ *
+ * Function to handle properly a reallocation when saving an URI
+ * Also imposes some limit on the length of an URI string output
+ */
+static char *
+realloc2n(char *ret, int *max) {
+ char *temp;
+ int tmp;
+
+ tmp = *max * 2;
+ temp = g_realloc(ret, (tmp + 1));
+ *max = tmp;
+ return(temp);
+}
+
+/**
+ * uri_to_string:
+ * @uri: pointer to an URI
+ *
+ * Save the URI as an escaped string
+ *
+ * Returns a new string (to be deallocated by caller)
+ */
+char *
+uri_to_string(URI *uri) {
+ char *ret = NULL;
+ char *temp;
+ const char *p;
+ int len;
+ int max;
+
+ if (uri == NULL) return(NULL);
+
+
+ max = 80;
+ ret = g_malloc(max + 1);
+ len = 0;
+
+ if (uri->scheme != NULL) {
+ p = uri->scheme;
+ while (*p != 0) {
+ if (len >= max) {
+ temp = realloc2n(ret, &max);
+ ret = temp;
+ }
+ ret[len++] = *p++;
+ }
+ if (len >= max) {
+ temp = realloc2n(ret, &max);
+ ret = temp;
+ }
+ ret[len++] = ':';
+ }
+ if (uri->opaque != NULL) {
+ p = uri->opaque;
+ while (*p != 0) {
+ if (len + 3 >= max) {
+ temp = realloc2n(ret, &max);
+ ret = temp;
+ }
+ if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
+ ret[len++] = *p++;
+ else {
+ int val = *(unsigned char *)p++;
+ int hi = val / 0x10, lo = val % 0x10;
+ ret[len++] = '%';
+ ret[len++] = hi + (hi > 9? 'A'-10 : '0');
+ ret[len++] = lo + (lo > 9? 'A'-10 : '0');
+ }
+ }
+ } else {
+ if (uri->server != NULL) {
+ if (len + 3 >= max) {
+ temp = realloc2n(ret, &max);
+ ret = temp;
+ }
+ ret[len++] = '/';
+ ret[len++] = '/';
+ if (uri->user != NULL) {
+ p = uri->user;
+ while (*p != 0) {
+ if (len + 3 >= max) {
+ temp = realloc2n(ret, &max);
+ ret = temp;
+ }
+ if ((IS_UNRESERVED(*(p))) ||
+ ((*(p) == ';')) || ((*(p) == ':')) ||
+ ((*(p) == '&')) || ((*(p) == '=')) ||
+ ((*(p) == '+')) || ((*(p) == '$')) ||
+ ((*(p) == ',')))
+ ret[len++] = *p++;
+ else {
+ int val = *(unsigned char *)p++;
+ int hi = val / 0x10, lo = val % 0x10;
+ ret[len++] = '%';
+ ret[len++] = hi + (hi > 9? 'A'-10 : '0');
+ ret[len++] = lo + (lo > 9? 'A'-10 : '0');
+ }
+ }
+ if (len + 3 >= max) {
+ temp = realloc2n(ret, &max);
+ ret = temp;
+ }
+ ret[len++] = '@';
+ }
+ p = uri->server;
+ while (*p != 0) {
+ if (len >= max) {
+ temp = realloc2n(ret, &max);
+ ret = temp;
+ }
+ ret[len++] = *p++;
+ }
+ if (uri->port > 0) {
+ if (len + 10 >= max) {
+ temp = realloc2n(ret, &max);
+ ret = temp;
+ }
+ len += snprintf(&ret[len], max - len, ":%d", uri->port);
+ }
+ } else if (uri->authority != NULL) {
+ if (len + 3 >= max) {
+ temp = realloc2n(ret, &max);
+ ret = temp;
+ }
+ ret[len++] = '/';
+ ret[len++] = '/';
+ p = uri->authority;
+ while (*p != 0) {
+ if (len + 3 >= max) {
+ temp = realloc2n(ret, &max);
+ ret = temp;
+ }
+ if ((IS_UNRESERVED(*(p))) ||
+ ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
+ ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
+ ((*(p) == '=')) || ((*(p) == '+')))
+ ret[len++] = *p++;
+ else {
+ int val = *(unsigned char *)p++;
+ int hi = val / 0x10, lo = val % 0x10;
+ ret[len++] = '%';
+ ret[len++] = hi + (hi > 9? 'A'-10 : '0');
+ ret[len++] = lo + (lo > 9? 'A'-10 : '0');
+ }
+ }
+ } else if (uri->scheme != NULL) {
+ if (len + 3 >= max) {
+ temp = realloc2n(ret, &max);
+ ret = temp;
+ }
+ ret[len++] = '/';
+ ret[len++] = '/';
+ }
+ if (uri->path != NULL) {
+ p = uri->path;
+ /*
+ * the colon in file:///d: should not be escaped or
+ * Windows accesses fail later.
+ */
+ if ((uri->scheme != NULL) &&
+ (p[0] == '/') &&
+ (((p[1] >= 'a') && (p[1] <= 'z')) ||
+ ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
+ (p[2] == ':') &&
+ (!strcmp(uri->scheme, "file"))) {
+ if (len + 3 >= max) {
+ temp = realloc2n(ret, &max);
+ ret = temp;
+ }
+ ret[len++] = *p++;
+ ret[len++] = *p++;
+ ret[len++] = *p++;
+ }
+ while (*p != 0) {
+ if (len + 3 >= max) {
+ temp = realloc2n(ret, &max);
+ ret = temp;
+ }
+ if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
+ ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
+ ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
+ ((*(p) == ',')))
+ ret[len++] = *p++;
+ else {
+ int val = *(unsigned char *)p++;
+ int hi = val / 0x10, lo = val % 0x10;
+ ret[len++] = '%';
+ ret[len++] = hi + (hi > 9? 'A'-10 : '0');
+ ret[len++] = lo + (lo > 9? 'A'-10 : '0');
+ }
+ }
+ }
+ if (uri->query != NULL) {
+ if (len + 1 >= max) {
+ temp = realloc2n(ret, &max);
+ ret = temp;
+ }
+ ret[len++] = '?';
+ p = uri->query;
+ while (*p != 0) {
+ if (len + 1 >= max) {
+ temp = realloc2n(ret, &max);
+ ret = temp;
+ }
+ ret[len++] = *p++;
+ }
+ }
+ }
+ if (uri->fragment != NULL) {
+ if (len + 3 >= max) {
+ temp = realloc2n(ret, &max);
+ ret = temp;
+ }
+ ret[len++] = '#';
+ p = uri->fragment;
+ while (*p != 0) {
+ if (len + 3 >= max) {
+ temp = realloc2n(ret, &max);
+ ret = temp;
+ }
+ if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
+ ret[len++] = *p++;
+ else {
+ int val = *(unsigned char *)p++;
+ int hi = val / 0x10, lo = val % 0x10;
+ ret[len++] = '%';
+ ret[len++] = hi + (hi > 9? 'A'-10 : '0');
+ ret[len++] = lo + (lo > 9? 'A'-10 : '0');
+ }
+ }
+ }
+ if (len >= max) {
+ temp = realloc2n(ret, &max);
+ ret = temp;
+ }
+ ret[len] = 0;
+ return(ret);
+}
+
+/**
+ * uri_clean:
+ * @uri: pointer to an URI
+ *
+ * Make sure the URI struct is free of content
+ */
+static void
+uri_clean(URI *uri) {
+ if (uri == NULL) return;
+
+ g_free(uri->scheme);
+ uri->scheme = NULL;
+ g_free(uri->server);
+ uri->server = NULL;
+ g_free(uri->user);
+ uri->user = NULL;
+ g_free(uri->path);
+ uri->path = NULL;
+ g_free(uri->fragment);
+ uri->fragment = NULL;
+ g_free(uri->opaque);
+ uri->opaque = NULL;
+ g_free(uri->authority);
+ uri->authority = NULL;
+ g_free(uri->query);
+ uri->query = NULL;
+}
+
+/**
+ * uri_free:
+ * @uri: pointer to an URI
+ *
+ * Free up the URI struct
+ */
+void
+uri_free(URI *uri) {
+ uri_clean(uri);
+ g_free(uri);
+}
+
+/************************************************************************
+ * *
+ * Helper functions *
+ * *
+ ************************************************************************/
+
+/**
+ * normalize_uri_path:
+ * @path: pointer to the path string
+ *
+ * Applies the 5 normalization steps to a path string--that is, RFC 2396
+ * Section 5.2, steps 6.c through 6.g.
+ *
+ * Normalization occurs directly on the string, no new allocation is done
+ *
+ * Returns 0 or an error code
+ */
+static int
+normalize_uri_path(char *path) {
+ char *cur, *out;
+
+ if (path == NULL)
+ return(-1);
+
+ /* Skip all initial "/" chars. We want to get to the beginning of the
+ * first non-empty segment.
+ */
+ cur = path;
+ while (cur[0] == '/')
+ ++cur;
+ if (cur[0] == '\0')
+ return(0);
+
+ /* Keep everything we've seen so far. */
+ out = cur;
+
+ /*
+ * Analyze each segment in sequence for cases (c) and (d).
+ */
+ while (cur[0] != '\0') {
+ /*
+ * c) All occurrences of "./", where "." is a complete path segment,
+ * are removed from the buffer string.
+ */
+ if ((cur[0] == '.') && (cur[1] == '/')) {
+ cur += 2;
+ /* '//' normalization should be done at this point too */
+ while (cur[0] == '/')
+ cur++;
+ continue;
+ }
+
+ /*
+ * d) If the buffer string ends with "." as a complete path segment,
+ * that "." is removed.
+ */
+ if ((cur[0] == '.') && (cur[1] == '\0'))
+ break;
+
+ /* Otherwise keep the segment. */
+ while (cur[0] != '/') {
+ if (cur[0] == '\0')
+ goto done_cd;
+ (out++)[0] = (cur++)[0];
+ }
+ /* nomalize // */
+ while ((cur[0] == '/') && (cur[1] == '/'))
+ cur++;
+
+ (out++)[0] = (cur++)[0];
+ }
+ done_cd:
+ out[0] = '\0';
+
+ /* Reset to the beginning of the first segment for the next sequence. */
+ cur = path;
+ while (cur[0] == '/')
+ ++cur;
+ if (cur[0] == '\0')
+ return(0);
+
+ /*
+ * Analyze each segment in sequence for cases (e) and (f).
+ *
+ * e) All occurrences of "<segment>/../", where <segment> is a
+ * complete path segment not equal to "..", are removed from the
+ * buffer string. Removal of these path segments is performed
+ * iteratively, removing the leftmost matching pattern on each
+ * iteration, until no matching pattern remains.
+ *
+ * f) If the buffer string ends with "<segment>/..", where <segment>
+ * is a complete path segment not equal to "..", that
+ * "<segment>/.." is removed.
+ *
+ * To satisfy the "iterative" clause in (e), we need to collapse the
+ * string every time we find something that needs to be removed. Thus,
+ * we don't need to keep two pointers into the string: we only need a
+ * "current position" pointer.
+ */
+ while (1) {
+ char *segp, *tmp;
+
+ /* At the beginning of each iteration of this loop, "cur" points to
+ * the first character of the segment we want to examine.
+ */
+
+ /* Find the end of the current segment. */
+ segp = cur;
+ while ((segp[0] != '/') && (segp[0] != '\0'))
+ ++segp;
+
+ /* If this is the last segment, we're done (we need at least two
+ * segments to meet the criteria for the (e) and (f) cases).
+ */
+ if (segp[0] == '\0')
+ break;
+
+ /* If the first segment is "..", or if the next segment _isn't_ "..",
+ * keep this segment and try the next one.
+ */
+ ++segp;
+ if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
+ || ((segp[0] != '.') || (segp[1] != '.')
+ || ((segp[2] != '/') && (segp[2] != '\0')))) {
+ cur = segp;
+ continue;
+ }
+
+ /* If we get here, remove this segment and the next one and back up
+ * to the previous segment (if there is one), to implement the
+ * "iteratively" clause. It's pretty much impossible to back up
+ * while maintaining two pointers into the buffer, so just compact
+ * the whole buffer now.
+ */
+
+ /* If this is the end of the buffer, we're done. */
+ if (segp[2] == '\0') {
+ cur[0] = '\0';
+ break;
+ }
+ /* Valgrind complained, strcpy(cur, segp + 3); */
+ /* string will overlap, do not use strcpy */
+ tmp = cur;
+ segp += 3;
+ while ((*tmp++ = *segp++) != 0)
+ ;
+
+ /* If there are no previous segments, then keep going from here. */
+ segp = cur;
+ while ((segp > path) && ((--segp)[0] == '/'))
+ ;
+ if (segp == path)
+ continue;
+
+ /* "segp" is pointing to the end of a previous segment; find it's
+ * start. We need to back up to the previous segment and start
+ * over with that to handle things like "foo/bar/../..". If we
+ * don't do this, then on the first pass we'll remove the "bar/..",
+ * but be pointing at the second ".." so we won't realize we can also
+ * remove the "foo/..".
+ */
+ cur = segp;
+ while ((cur > path) && (cur[-1] != '/'))
+ --cur;
+ }
+ out[0] = '\0';
+
+ /*
+ * g) If the resulting buffer string still begins with one or more
+ * complete path segments of "..", then the reference is
+ * considered to be in error. Implementations may handle this
+ * error by retaining these components in the resolved path (i.e.,
+ * treating them as part of the final URI), by removing them from
+ * the resolved path (i.e., discarding relative levels above the
+ * root), or by avoiding traversal of the reference.
+ *
+ * We discard them from the final path.
+ */
+ if (path[0] == '/') {
+ cur = path;
+ while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
+ && ((cur[3] == '/') || (cur[3] == '\0')))
+ cur += 3;
+
+ if (cur != path) {
+ out = path;
+ while (cur[0] != '\0')
+ (out++)[0] = (cur++)[0];
+ out[0] = 0;
+ }
+ }
+
+ return(0);
+}
+
+static int is_hex(char c) {
+ if (((c >= '0') && (c <= '9')) ||
+ ((c >= 'a') && (c <= 'f')) ||
+ ((c >= 'A') && (c <= 'F')))
+ return(1);
+ return(0);
+}
+
+
+/**
+ * uri_string_unescape:
+ * @str: the string to unescape
+ * @len: the length in bytes to unescape (or <= 0 to indicate full string)
+ * @target: optional destination buffer
+ *
+ * Unescaping routine, but does not check that the string is an URI. The
+ * output is a direct unsigned char translation of %XX values (no encoding)
+ * Note that the length of the result can only be smaller or same size as
+ * the input string.
+ *
+ * Returns a copy of the string, but unescaped, will return NULL only in case
+ * of error
+ */
+char *
+uri_string_unescape(const char *str, int len, char *target) {
+ char *ret, *out;
+ const char *in;
+
+ if (str == NULL)
+ return(NULL);
+ if (len <= 0) len = strlen(str);
+ if (len < 0) return(NULL);
+
+ if (target == NULL) {
+ ret = g_malloc(len + 1);
+ } else
+ ret = target;
+ in = str;
+ out = ret;
+ while(len > 0) {
+ if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
+ in++;
+ if ((*in >= '0') && (*in <= '9'))
+ *out = (*in - '0');
+ else if ((*in >= 'a') && (*in <= 'f'))
+ *out = (*in - 'a') + 10;
+ else if ((*in >= 'A') && (*in <= 'F'))
+ *out = (*in - 'A') + 10;
+ in++;
+ if ((*in >= '0') && (*in <= '9'))
+ *out = *out * 16 + (*in - '0');
+ else if ((*in >= 'a') && (*in <= 'f'))
+ *out = *out * 16 + (*in - 'a') + 10;
+ else if ((*in >= 'A') && (*in <= 'F'))
+ *out = *out * 16 + (*in - 'A') + 10;
+ in++;
+ len -= 3;
+ out++;
+ } else {
+ *out++ = *in++;
+ len--;
+ }
+ }
+ *out = 0;
+ return(ret);
+}
+
+/**
+ * uri_string_escape:
+ * @str: string to escape
+ * @list: exception list string of chars not to escape
+ *
+ * This routine escapes a string to hex, ignoring reserved characters (a-z)
+ * and the characters in the exception list.
+ *
+ * Returns a new escaped string or NULL in case of error.
+ */
+char *
+uri_string_escape(const char *str, const char *list) {
+ char *ret, ch;
+ char *temp;
+ const char *in;
+ int len, out;
+
+ if (str == NULL)
+ return(NULL);
+ if (str[0] == 0)
+ return(g_strdup(str));
+ len = strlen(str);
+ if (!(len > 0)) return(NULL);
+
+ len += 20;
+ ret = g_malloc(len);
+ in = str;
+ out = 0;
+ while(*in != 0) {
+ if (len - out <= 3) {
+ temp = realloc2n(ret, &len);
+ ret = temp;
+ }
+
+ ch = *in;
+
+ if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!strchr(list, ch))) {
+ unsigned char val;
+ ret[out++] = '%';
+ val = ch >> 4;
+ if (val <= 9)
+ ret[out++] = '0' + val;
+ else
+ ret[out++] = 'A' + val - 0xA;
+ val = ch & 0xF;
+ if (val <= 9)
+ ret[out++] = '0' + val;
+ else
+ ret[out++] = 'A' + val - 0xA;
+ in++;
+ } else {
+ ret[out++] = *in++;
+ }
+
+ }
+ ret[out] = 0;
+ return(ret);
+}
+
+/************************************************************************
+ * *
+ * Public functions *
+ * *
+ ************************************************************************/
+
+/**
+ * uri_resolve:
+ * @URI: the URI instance found in the document
+ * @base: the base value
+ *
+ * Computes he final URI of the reference done by checking that
+ * the given URI is valid, and building the final URI using the
+ * base URI. This is processed according to section 5.2 of the
+ * RFC 2396
+ *
+ * 5.2. Resolving Relative References to Absolute Form
+ *
+ * Returns a new URI string (to be freed by the caller) or NULL in case
+ * of error.
+ */
+char *
+uri_resolve(const char *uri, const char *base) {
+ char *val = NULL;
+ int ret, len, indx, cur, out;
+ URI *ref = NULL;
+ URI *bas = NULL;
+ URI *res = NULL;
+
+ /*
+ * 1) The URI reference is parsed into the potential four components and
+ * fragment identifier, as described in Section 4.3.
+ *
+ * NOTE that a completely empty URI is treated by modern browsers
+ * as a reference to "." rather than as a synonym for the current
+ * URI. Should we do that here?
+ */
+ if (uri == NULL)
+ ret = -1;
+ else {
+ if (*uri) {
+ ref = uri_new();
+ ret = uri_parse_into(ref, uri);
+ }
+ else
+ ret = 0;
+ }
+ if (ret != 0)
+ goto done;
+ if ((ref != NULL) && (ref->scheme != NULL)) {
+ /*
+ * The URI is absolute don't modify.
+ */
+ val = g_strdup(uri);
+ goto done;
+ }
+ if (base == NULL)
+ ret = -1;
+ else {
+ bas = uri_new();
+ ret = uri_parse_into(bas, base);
+ }
+ if (ret != 0) {
+ if (ref)
+ val = uri_to_string(ref);
+ goto done;
+ }
+ if (ref == NULL) {
+ /*
+ * the base fragment must be ignored
+ */
+ g_free(bas->fragment);
+ bas->fragment = NULL;
+ val = uri_to_string(bas);
+ goto done;
+ }
+
+ /*
+ * 2) If the path component is empty and the scheme, authority, and
+ * query components are undefined, then it is a reference to the
+ * current document and we are done. Otherwise, the reference URI's
+ * query and fragment components are defined as found (or not found)
+ * within the URI reference and not inherited from the base URI.
+ *
+ * NOTE that in modern browsers, the parsing differs from the above
+ * in the following aspect: the query component is allowed to be
+ * defined while still treating this as a reference to the current
+ * document.
+ */
+ res = uri_new();
+ if ((ref->scheme == NULL) && (ref->path == NULL) &&
+ ((ref->authority == NULL) && (ref->server == NULL))) {
+ res->scheme = g_strdup(bas->scheme);
+ if (bas->authority != NULL)
+ res->authority = g_strdup(bas->authority);
+ else if (bas->server != NULL) {
+ res->server = g_strdup(bas->server);
+ res->user = g_strdup(bas->user);
+ res->port = bas->port;
+ }
+ res->path = g_strdup(bas->path);
+ if (ref->query != NULL) {
+ res->query = g_strdup (ref->query);
+ } else {
+ res->query = g_strdup(bas->query);
+ }
+ res->fragment = g_strdup(ref->fragment);
+ goto step_7;
+ }
+
+ /*
+ * 3) If the scheme component is defined, indicating that the reference
+ * starts with a scheme name, then the reference is interpreted as an
+ * absolute URI and we are done. Otherwise, the reference URI's
+ * scheme is inherited from the base URI's scheme component.
+ */
+ if (ref->scheme != NULL) {
+ val = uri_to_string(ref);
+ goto done;
+ }
+ res->scheme = g_strdup(bas->scheme);
+
+ res->query = g_strdup(ref->query);
+ res->fragment = g_strdup(ref->fragment);
+
+ /*
+ * 4) If the authority component is defined, then the reference is a
+ * network-path and we skip to step 7. Otherwise, the reference
+ * URI's authority is inherited from the base URI's authority
+ * component, which will also be undefined if the URI scheme does not
+ * use an authority component.
+ */
+ if ((ref->authority != NULL) || (ref->server != NULL)) {
+ if (ref->authority != NULL)
+ res->authority = g_strdup(ref->authority);
+ else {
+ res->server = g_strdup(ref->server);
+ res->user = g_strdup(ref->user);
+ res->port = ref->port;
+ }
+ res->path = g_strdup(ref->path);
+ goto step_7;
+ }
+ if (bas->authority != NULL)
+ res->authority = g_strdup(bas->authority);
+ else if (bas->server != NULL) {
+ res->server = g_strdup(bas->server);
+ res->user = g_strdup(bas->user);
+ res->port = bas->port;
+ }
+
+ /*
+ * 5) If the path component begins with a slash character ("/"), then
+ * the reference is an absolute-path and we skip to step 7.
+ */
+ if ((ref->path != NULL) && (ref->path[0] == '/')) {
+ res->path = g_strdup(ref->path);
+ goto step_7;
+ }
+
+
+ /*
+ * 6) If this step is reached, then we are resolving a relative-path
+ * reference. The relative path needs to be merged with the base
+ * URI's path. Although there are many ways to do this, we will
+ * describe a simple method using a separate string buffer.
+ *
+ * Allocate a buffer large enough for the result string.
+ */
+ len = 2; /* extra / and 0 */
+ if (ref->path != NULL)
+ len += strlen(ref->path);
+ if (bas->path != NULL)
+ len += strlen(bas->path);
+ res->path = g_malloc(len);
+ res->path[0] = 0;
+
+ /*
+ * a) All but the last segment of the base URI's path component is
+ * copied to the buffer. In other words, any characters after the
+ * last (right-most) slash character, if any, are excluded.
+ */
+ cur = 0;
+ out = 0;
+ if (bas->path != NULL) {
+ while (bas->path[cur] != 0) {
+ while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
+ cur++;
+ if (bas->path[cur] == 0)
+ break;
+
+ cur++;
+ while (out < cur) {
+ res->path[out] = bas->path[out];
+ out++;
+ }
+ }
+ }
+ res->path[out] = 0;
+
+ /*
+ * b) The reference's path component is appended to the buffer
+ * string.
+ */
+ if (ref->path != NULL && ref->path[0] != 0) {
+ indx = 0;
+ /*
+ * Ensure the path includes a '/'
+ */
+ if ((out == 0) && (bas->server != NULL))
+ res->path[out++] = '/';
+ while (ref->path[indx] != 0) {
+ res->path[out++] = ref->path[indx++];
+ }
+ }
+ res->path[out] = 0;
+
+ /*
+ * Steps c) to h) are really path normalization steps
+ */
+ normalize_uri_path(res->path);
+
+step_7:
+
+ /*
+ * 7) The resulting URI components, including any inherited from the
+ * base URI, are recombined to give the absolute form of the URI
+ * reference.
+ */
+ val = uri_to_string(res);
+
+done:
+ if (ref != NULL)
+ uri_free(ref);
+ if (bas != NULL)
+ uri_free(bas);
+ if (res != NULL)
+ uri_free(res);
+ return(val);
+}
+
+/**
+ * uri_resolve_relative:
+ * @URI: the URI reference under consideration
+ * @base: the base value
+ *
+ * Expresses the URI of the reference in terms relative to the
+ * base. Some examples of this operation include:
+ * base = "http://site1.com/docs/book1.html"
+ * URI input URI returned
+ * docs/pic1.gif pic1.gif
+ * docs/img/pic1.gif img/pic1.gif
+ * img/pic1.gif ../img/pic1.gif
+ * http://site1.com/docs/pic1.gif pic1.gif
+ * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif
+ *
+ * base = "docs/book1.html"
+ * URI input URI returned
+ * docs/pic1.gif pic1.gif
+ * docs/img/pic1.gif img/pic1.gif
+ * img/pic1.gif ../img/pic1.gif
+ * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif
+ *
+ *
+ * Note: if the URI reference is really weird or complicated, it may be
+ * worthwhile to first convert it into a "nice" one by calling
+ * uri_resolve (using 'base') before calling this routine,
+ * since this routine (for reasonable efficiency) assumes URI has
+ * already been through some validation.
+ *
+ * Returns a new URI string (to be freed by the caller) or NULL in case
+ * error.
+ */
+char *
+uri_resolve_relative (const char *uri, const char * base)
+{
+ char *val = NULL;
+ int ret;
+ int ix;
+ int pos = 0;
+ int nbslash = 0;
+ int len;
+ URI *ref = NULL;
+ URI *bas = NULL;
+ char *bptr, *uptr, *vptr;
+ int remove_path = 0;
+
+ if ((uri == NULL) || (*uri == 0))
+ return NULL;
+
+ /*
+ * First parse URI into a standard form
+ */
+ ref = uri_new ();
+ /* If URI not already in "relative" form */
+ if (uri[0] != '.') {
+ ret = uri_parse_into (ref, uri);
+ if (ret != 0)
+ goto done; /* Error in URI, return NULL */
+ } else
+ ref->path = g_strdup(uri);
+
+ /*
+ * Next parse base into the same standard form
+ */
+ if ((base == NULL) || (*base == 0)) {
+ val = g_strdup (uri);
+ goto done;
+ }
+ bas = uri_new ();
+ if (base[0] != '.') {
+ ret = uri_parse_into (bas, base);
+ if (ret != 0)
+ goto done; /* Error in base, return NULL */
+ } else
+ bas->path = g_strdup(base);
+
+ /*
+ * If the scheme / server on the URI differs from the base,
+ * just return the URI
+ */
+ if ((ref->scheme != NULL) &&
+ ((bas->scheme == NULL) ||
+ (strcmp (bas->scheme, ref->scheme)) ||
+ (strcmp (bas->server, ref->server)))) {
+ val = g_strdup (uri);
+ goto done;
+ }
+ if (bas->path == ref->path ||
+ (bas->path && ref->path && !strcmp(bas->path, ref->path))) {
+ val = g_strdup("");
+ goto done;
+ }
+ if (bas->path == NULL) {
+ val = g_strdup(ref->path);
+ goto done;
+ }
+ if (ref->path == NULL) {
+ ref->path = (char *) "/";
+ remove_path = 1;
+ }
+
+ /*
+ * At this point (at last!) we can compare the two paths
+ *
+ * First we take care of the special case where either of the
+ * two path components may be missing (bug 316224)
+ */
+ if (bas->path == NULL) {
+ if (ref->path != NULL) {
+ uptr = ref->path;
+ if (*uptr == '/')
+ uptr++;
+ /* exception characters from uri_to_string */
+ val = uri_string_escape(uptr, "/;&=+$,");
+ }
+ goto done;
+ }
+ bptr = bas->path;
+ if (ref->path == NULL) {
+ for (ix = 0; bptr[ix] != 0; ix++) {
+ if (bptr[ix] == '/')
+ nbslash++;
+ }
+ uptr = NULL;
+ len = 1; /* this is for a string terminator only */
+ } else {
+ /*
+ * Next we compare the two strings and find where they first differ
+ */
+ if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
+ pos += 2;
+ if ((*bptr == '.') && (bptr[1] == '/'))
+ bptr += 2;
+ else if ((*bptr == '/') && (ref->path[pos] != '/'))
+ bptr++;
+ while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
+ pos++;
+
+ if (bptr[pos] == ref->path[pos]) {
+ val = g_strdup("");
+ goto done; /* (I can't imagine why anyone would do this) */
+ }
+
+ /*
+ * In URI, "back up" to the last '/' encountered. This will be the
+ * beginning of the "unique" suffix of URI
+ */
+ ix = pos;
+ if ((ref->path[ix] == '/') && (ix > 0))
+ ix--;
+ else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
+ ix -= 2;
+ for (; ix > 0; ix--) {
+ if (ref->path[ix] == '/')
+ break;
+ }
+ if (ix == 0) {
+ uptr = ref->path;
+ } else {
+ ix++;
+ uptr = &ref->path[ix];
+ }
+
+ /*
+ * In base, count the number of '/' from the differing point
+ */
+ if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
+ for (; bptr[ix] != 0; ix++) {
+ if (bptr[ix] == '/')
+ nbslash++;
+ }
+ }
+ len = strlen (uptr) + 1;
+ }
+
+ if (nbslash == 0) {
+ if (uptr != NULL)
+ /* exception characters from uri_to_string */
+ val = uri_string_escape(uptr, "/;&=+$,");
+ goto done;
+ }
+
+ /*
+ * Allocate just enough space for the returned string -
+ * length of the remainder of the URI, plus enough space
+ * for the "../" groups, plus one for the terminator
+ */
+ val = g_malloc (len + 3 * nbslash);
+ vptr = val;
+ /*
+ * Put in as many "../" as needed
+ */
+ for (; nbslash>0; nbslash--) {
+ *vptr++ = '.';
+ *vptr++ = '.';
+ *vptr++ = '/';
+ }
+ /*
+ * Finish up with the end of the URI
+ */
+ if (uptr != NULL) {
+ if ((vptr > val) && (len > 0) &&
+ (uptr[0] == '/') && (vptr[-1] == '/')) {
+ memcpy (vptr, uptr + 1, len - 1);
+ vptr[len - 2] = 0;
+ } else {
+ memcpy (vptr, uptr, len);
+ vptr[len - 1] = 0;
+ }
+ } else {
+ vptr[len - 1] = 0;
+ }
+
+ /* escape the freshly-built path */
+ vptr = val;
+ /* exception characters from uri_to_string */
+ val = uri_string_escape(vptr, "/;&=+$,");
+ g_free(vptr);
+
+done:
+ /*
+ * Free the working variables
+ */
+ if (remove_path != 0)
+ ref->path = NULL;
+ if (ref != NULL)
+ uri_free (ref);
+ if (bas != NULL)
+ uri_free (bas);
+
+ return val;
+}
+
+/*
+ * Utility functions to help parse and assemble query strings.
+ */
+
+struct QueryParams *
+query_params_new (int init_alloc)
+{
+ struct QueryParams *ps;
+
+ if (init_alloc <= 0) init_alloc = 1;
+
+ ps = g_new(QueryParams, 1);
+ ps->n = 0;
+ ps->alloc = init_alloc;
+ ps->p = g_new(QueryParam, ps->alloc);
+
+ return ps;
+}
+
+/* Ensure there is space to store at least one more parameter
+ * at the end of the set.
+ */
+static int
+query_params_append (struct QueryParams *ps,
+ const char *name, const char *value)
+{
+ if (ps->n >= ps->alloc) {
+ ps->p = g_renew(QueryParam, ps->p, ps->alloc * 2);
+ ps->alloc *= 2;
+ }
+
+ ps->p[ps->n].name = g_strdup(name);
+ ps->p[ps->n].value = g_strdup(value);
+ ps->p[ps->n].ignore = 0;
+ ps->n++;
+
+ return 0;
+}
+
+void
+query_params_free (struct QueryParams *ps)
+{
+ int i;
+
+ for (i = 0; i < ps->n; ++i) {
+ g_free (ps->p[i].name);
+ g_free (ps->p[i].value);
+ }
+ g_free (ps->p);
+ g_free (ps);
+}
+
+struct QueryParams *
+query_params_parse (const char *query)
+{
+ struct QueryParams *ps;
+ const char *end, *eq;
+
+ ps = query_params_new (0);
+ if (!query || query[0] == '\0') return ps;
+
+ while (*query) {
+ char *name = NULL, *value = NULL;
+
+ /* Find the next separator, or end of the string. */
+ end = strchr (query, '&');
+ if (!end)
+ end = strchr (query, ';');
+ if (!end)
+ end = query + strlen (query);
+
+ /* Find the first '=' character between here and end. */
+ eq = strchr (query, '=');
+ if (eq && eq >= end) eq = NULL;
+
+ /* Empty section (eg. "&&"). */
+ if (end == query)
+ goto next;
+
+ /* If there is no '=' character, then we have just "name"
+ * and consistent with CGI.pm we assume value is "".
+ */
+ else if (!eq) {
+ name = uri_string_unescape (query, end - query, NULL);
+ value = NULL;
+ }
+ /* Or if we have "name=" here (works around annoying
+ * problem when calling uri_string_unescape with len = 0).
+ */
+ else if (eq+1 == end) {
+ name = uri_string_unescape (query, eq - query, NULL);
+ value = g_new0(char, 1);
+ }
+ /* If the '=' character is at the beginning then we have
+ * "=value" and consistent with CGI.pm we _ignore_ this.
+ */
+ else if (query == eq)
+ goto next;
+
+ /* Otherwise it's "name=value". */
+ else {
+ name = uri_string_unescape (query, eq - query, NULL);
+ value = uri_string_unescape (eq+1, end - (eq+1), NULL);
+ }
+
+ /* Append to the parameter set. */
+ query_params_append (ps, name, value);
+ g_free(name);
+ g_free(value);
+
+ next:
+ query = end;
+ if (*query) query ++; /* skip '&' separator */
+ }
+
+ return ps;
+}
OpenPOWER on IntegriCloud