51 files changed, 15957 insertions, 0 deletions
diff --git a/src/util/Makefile.objs b/src/util/Makefile.objs
new file mode 100644
index 0000000..89dd80e
--- /dev/null
+++ b/src/util/Makefile.objs
@@ -0,0 +1,32 @@
+util-obj-y = osdep.o cutils.o unicode.o qemu-timer-common.o
+util-obj-$(CONFIG_POSIX) += compatfd.o
+util-obj-$(CONFIG_POSIX) += event_notifier-posix.o
+util-obj-$(CONFIG_POSIX) += mmap-alloc.o
+util-obj-$(CONFIG_POSIX) += oslib-posix.o
+util-obj-$(CONFIG_POSIX) += qemu-openpty.o
+util-obj-$(CONFIG_POSIX) += qemu-thread-posix.o
+util-obj-$(CONFIG_WIN32) += event_notifier-win32.o
+util-obj-$(CONFIG_POSIX) += memfd.o
+util-obj-$(CONFIG_WIN32) += oslib-win32.o
+util-obj-$(CONFIG_WIN32) += qemu-thread-win32.o
+util-obj-y += envlist.o path.o module.o
+util-obj-$(call lnot,$(CONFIG_INT128)) += host-utils.o
+util-obj-y += bitmap.o bitops.o hbitmap.o
+util-obj-y += fifo8.o
+util-obj-y += acl.o
+util-obj-y += error.o qemu-error.o
+util-obj-y += id.o
+util-obj-y += iov.o qemu-config.o qemu-sockets.o uri.o notify.o
+util-obj-y += qemu-option.o qemu-progress.o
+util-obj-y += hexdump.o
+util-obj-y += crc32c.o
+util-obj-y += throttle.o
+util-obj-y += getauxval.o
+util-obj-y += readline.o
+util-obj-y += rfifolock.o
+util-obj-y += rcu.o
+util-obj-y += qemu-coroutine.o qemu-coroutine-lock.o qemu-coroutine-io.o
+util-obj-y += qemu-coroutine-sleep.o
+util-obj-y += coroutine-$(CONFIG_COROUTINE_BACKEND).o
+util-obj-y += buffer.o
+util-obj-y += timed-average.o
diff --git a/src/util/acl.c b/src/util/acl.c
new file mode 100644
index 0000000..571d686
--- /dev/null
+++ b/src/util/acl.c
@@ -0,0 +1,187 @@
+/*
+ * QEMU access control list management
+ *
+ * Copyright (C) 2009 Red Hat, Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+
+#include "qemu-common.h"
+#include "qemu/acl.h"
+
+#ifdef CONFIG_FNMATCH
+#include <fnmatch.h>
+#endif
+
+
+static unsigned int nacls = 0;
+static qemu_acl **acls = NULL;
+
+
+
+qemu_acl *qemu_acl_find(const char *aclname)
+{
+    int i;
+    for (i = 0 ; i < nacls ; i++) {
+        if (strcmp(acls[i]->aclname, aclname) == 0)
+            return acls[i];
+    }
+
+    return NULL;
+}
+
+qemu_acl *qemu_acl_init(const char *aclname)
+{
+    qemu_acl *acl;
+
+    acl = qemu_acl_find(aclname);
+    if (acl)
+        return acl;
+
+    acl = g_malloc(sizeof(*acl));
+    acl->aclname = g_strdup(aclname);
+    /* Deny by default, so there is no window of "open
+     * access" between QEMU starting, and the user setting
+     * up ACLs in the monitor */
+    acl->defaultDeny = 1;
+
+    acl->nentries = 0;
+    QTAILQ_INIT(&acl->entries);
+
+    acls = g_realloc(acls, sizeof(*acls) * (nacls +1));
+    acls[nacls] = acl;
+    nacls++;
+
+    return acl;
+}
+
+int qemu_acl_party_is_allowed(qemu_acl *acl,
+                              const char *party)
+{
+    qemu_acl_entry *entry;
+
+    QTAILQ_FOREACH(entry, &acl->entries, next) {
+#ifdef CONFIG_FNMATCH
+        if (fnmatch(entry->match, party, 0) == 0)
+            return entry->deny ? 0 : 1;
+#else
+        /* No fnmatch, so fallback to exact string matching
+         * instead of allowing wildcards */
+        if (strcmp(entry->match, party) == 0)
+            return entry->deny ? 0 : 1;
+#endif
+    }
+
+    return acl->defaultDeny ? 0 : 1;
+}
+
+
+void qemu_acl_reset(qemu_acl *acl)
+{
+    qemu_acl_entry *entry, *next_entry;
+
+    /* Put back to deny by default, so there is no window
+     * of "open access" while the user re-initializes the
+     * access control list */
+    acl->defaultDeny = 1;
+    QTAILQ_FOREACH_SAFE(entry, &acl->entries, next, next_entry) {
+        QTAILQ_REMOVE(&acl->entries, entry, next);
+        g_free(entry->match);
+        g_free(entry);
+    }
+    acl->nentries = 0;
+}
+
+
+int qemu_acl_append(qemu_acl *acl,
+                    int deny,
+                    const char *match)
+{
+    qemu_acl_entry *entry;
+
+    entry = g_malloc(sizeof(*entry));
+    entry->match = g_strdup(match);
+    entry->deny = deny;
+
+    QTAILQ_INSERT_TAIL(&acl->entries, entry, next);
+    acl->nentries++;
+
+    return acl->nentries;
+}
+
+
+int qemu_acl_insert(qemu_acl *acl,
+                    int deny,
+                    const char *match,
+                    int index)
+{
+    qemu_acl_entry *tmp;
+    int i = 0;
+
+    if (index <= 0)
+        return -1;
+    if (index > acl->nentries) {
+        return qemu_acl_append(acl, deny, match);
+    }
+
+    QTAILQ_FOREACH(tmp, &acl->entries, next) {
+        i++;
+        if (i == index) {
+            qemu_acl_entry *entry;
+            entry = g_malloc(sizeof(*entry));
+            entry->match = g_strdup(match);
+            entry->deny = deny;
+
+            QTAILQ_INSERT_BEFORE(tmp, entry, next);
+            acl->nentries++;
+            break;
+        }
+    }
+
+    return i;
+}
+
+int qemu_acl_remove(qemu_acl *acl,
+                    const char *match)
+{
+    qemu_acl_entry *entry;
+    int i = 0;
+
+    QTAILQ_FOREACH(entry, &acl->entries, next) {
+        i++;
+        if (strcmp(entry->match, match) == 0) {
+            QTAILQ_REMOVE(&acl->entries, entry, next);
+            acl->nentries--;
+            g_free(entry->match);
+            g_free(entry);
+            return i;
+        }
+    }
+    return -1;
+}
+
+
+/*
+ * Local variables:
+ *  c-indent-level: 4
+ *  c-basic-offset: 4
+ *  tab-width: 8
+ * End:
+ */
diff --git a/src/util/bitmap.c b/src/util/bitmap.c
new file mode 100644
index 0000000..44f0f48
--- /dev/null
+++ b/src/util/bitmap.c
@@ -0,0 +1,339 @@
+/*
+ * Bitmap Module
+ *
+ * Stolen from linux/src/lib/bitmap.c
+ *
+ * Copyright (C) 2010 Corentin Chary
+ *
+ * This source code is licensed under the GNU General Public License,
+ * Version 2.
+ */
+
+#include "qemu/bitops.h"
+#include "qemu/bitmap.h"
+#include "qemu/atomic.h"
+
+/*
+ * bitmaps provide an array of bits, implemented using an
+ * array of unsigned longs.  The number of valid bits in a
+ * given bitmap does _not_ need to be an exact multiple of
+ * BITS_PER_LONG.
+ *
+ * The possible unused bits in the last, partially used word
+ * of a bitmap are 'don't care'.  The implementation makes
+ * no particular effort to keep them zero.  It ensures that
+ * their value will not affect the results of any operation.
+ * The bitmap operations that return Boolean (bitmap_empty,
+ * for example) or scalar (bitmap_weight, for example) results
+ * carefully filter out these unused bits from impacting their
+ * results.
+ *
+ * These operations actually hold to a slightly stronger rule:
+ * if you don't input any bitmaps to these ops that have some
+ * unused bits set, then they won't output any set unused bits
+ * in output bitmaps.
+ *
+ * The byte ordering of bitmaps is more natural on little
+ * endian architectures.
+ */
+
+int slow_bitmap_empty(const unsigned long *bitmap, long bits)
+{
+    long k, lim = bits/BITS_PER_LONG;
+
+    for (k = 0; k < lim; ++k) {
+        if (bitmap[k]) {
+            return 0;
+        }
+    }
+    if (bits % BITS_PER_LONG) {
+        if (bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) {
+            return 0;
+        }
+    }
+
+    return 1;
+}
+
+int slow_bitmap_full(const unsigned long *bitmap, long bits)
+{
+    long k, lim = bits/BITS_PER_LONG;
+
+    for (k = 0; k < lim; ++k) {
+        if (~bitmap[k]) {
+            return 0;
+        }
+    }
+
+    if (bits % BITS_PER_LONG) {
+        if (~bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) {
+            return 0;
+        }
+    }
+
+    return 1;
+}
+
+int slow_bitmap_equal(const unsigned long *bitmap1,
+                      const unsigned long *bitmap2, long bits)
+{
+    long k, lim = bits/BITS_PER_LONG;
+
+    for (k = 0; k < lim; ++k) {
+        if (bitmap1[k] != bitmap2[k]) {
+            return 0;
+        }
+    }
+
+    if (bits % BITS_PER_LONG) {
+        if ((bitmap1[k] ^ bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) {
+            return 0;
+        }
+    }
+
+    return 1;
+}
+
+void slow_bitmap_complement(unsigned long *dst, const unsigned long *src,
+                            long bits)
+{
+    long k, lim = bits/BITS_PER_LONG;
+
+    for (k = 0; k < lim; ++k) {
+        dst[k] = ~src[k];
+    }
+
+    if (bits % BITS_PER_LONG) {
+        dst[k] = ~src[k] & BITMAP_LAST_WORD_MASK(bits);
+    }
+}
+
+int slow_bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
+                    const unsigned long *bitmap2, long bits)
+{
+    long k;
+    long nr = BITS_TO_LONGS(bits);
+    unsigned long result = 0;
+
+    for (k = 0; k < nr; k++) {
+        result |= (dst[k] = bitmap1[k] & bitmap2[k]);
+    }
+    return result != 0;
+}
+
+void slow_bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
+                    const unsigned long *bitmap2, long bits)
+{
+    long k;
+    long nr = BITS_TO_LONGS(bits);
+
+    for (k = 0; k < nr; k++) {
+        dst[k] = bitmap1[k] | bitmap2[k];
+    }
+}
+
+void slow_bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
+                     const unsigned long *bitmap2, long bits)
+{
+    long k;
+    long nr = BITS_TO_LONGS(bits);
+
+    for (k = 0; k < nr; k++) {
+        dst[k] = bitmap1[k] ^ bitmap2[k];
+    }
+}
+
+int slow_bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
+                       const unsigned long *bitmap2, long bits)
+{
+    long k;
+    long nr = BITS_TO_LONGS(bits);
+    unsigned long result = 0;
+
+    for (k = 0; k < nr; k++) {
+        result |= (dst[k] = bitmap1[k] & ~bitmap2[k]);
+    }
+    return result != 0;
+}
+
+#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG))
+
+void bitmap_set(unsigned long *map, long start, long nr)
+{
+    unsigned long *p = map + BIT_WORD(start);
+    const long size = start + nr;
+    int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
+    unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
+
+    while (nr - bits_to_set >= 0) {
+        *p |= mask_to_set;
+        nr -= bits_to_set;
+        bits_to_set = BITS_PER_LONG;
+        mask_to_set = ~0UL;
+        p++;
+    }
+    if (nr) {
+        mask_to_set &= BITMAP_LAST_WORD_MASK(size);
+        *p |= mask_to_set;
+    }
+}
+
+void bitmap_set_atomic(unsigned long *map, long start, long nr)
+{
+    unsigned long *p = map + BIT_WORD(start);
+    const long size = start + nr;
+    int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
+    unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
+
+    /* First word */
+    if (nr - bits_to_set > 0) {
+        atomic_or(p, mask_to_set);
+        nr -= bits_to_set;
+        bits_to_set = BITS_PER_LONG;
+        mask_to_set = ~0UL;
+        p++;
+    }
+
+    /* Full words */
+    if (bits_to_set == BITS_PER_LONG) {
+        while (nr >= BITS_PER_LONG) {
+            *p = ~0UL;
+            nr -= BITS_PER_LONG;
+            p++;
+        }
+    }
+
+    /* Last word */
+    if (nr) {
+        mask_to_set &= BITMAP_LAST_WORD_MASK(size);
+        atomic_or(p, mask_to_set);
+    } else {
+        /* If we avoided the full barrier in atomic_or(), issue a
+         * barrier to account for the assignments in the while loop.
+         */
+        smp_mb();
+    }
+}
+
+void bitmap_clear(unsigned long *map, long start, long nr)
+{
+    unsigned long *p = map + BIT_WORD(start);
+    const long size = start + nr;
+    int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
+    unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
+
+    while (nr - bits_to_clear >= 0) {
+        *p &= ~mask_to_clear;
+        nr -= bits_to_clear;
+        bits_to_clear = BITS_PER_LONG;
+        mask_to_clear = ~0UL;
+        p++;
+    }
+    if (nr) {
+        mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
+        *p &= ~mask_to_clear;
+    }
+}
+
+bool bitmap_test_and_clear_atomic(unsigned long *map, long start, long nr)
+{
+    unsigned long *p = map + BIT_WORD(start);
+    const long size = start + nr;
+    int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
+    unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
+    unsigned long dirty = 0;
+    unsigned long old_bits;
+
+    /* First word */
+    if (nr - bits_to_clear > 0) {
+        old_bits = atomic_fetch_and(p, ~mask_to_clear);
+        dirty |= old_bits & mask_to_clear;
+        nr -= bits_to_clear;
+        bits_to_clear = BITS_PER_LONG;
+        mask_to_clear = ~0UL;
+        p++;
+    }
+
+    /* Full words */
+    if (bits_to_clear == BITS_PER_LONG) {
+        while (nr >= BITS_PER_LONG) {
+            if (*p) {
+                old_bits = atomic_xchg(p, 0);
+                dirty |= old_bits;
+            }
+            nr -= BITS_PER_LONG;
+            p++;
+        }
+    }
+
+    /* Last word */
+    if (nr) {
+        mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
+        old_bits = atomic_fetch_and(p, ~mask_to_clear);
+        dirty |= old_bits & mask_to_clear;
+    } else {
+        if (!dirty) {
+            smp_mb();
+        }
+    }
+
+    return dirty != 0;
+}
+
+#define ALIGN_MASK(x,mask)      (((x)+(mask))&~(mask))
+
+/**
+ * bitmap_find_next_zero_area - find a contiguous aligned zero area
+ * @map: The address to base the search on
+ * @size: The bitmap size in bits
+ * @start: The bitnumber to start searching at
+ * @nr: The number of zeroed bits we're looking for
+ * @align_mask: Alignment mask for zero area
+ *
+ * The @align_mask should be one less than a power of 2; the effect is that
+ * the bit offset of all zero areas this function finds is multiples of that
+ * power of 2. A @align_mask of 0 means no alignment is required.
+ */
+unsigned long bitmap_find_next_zero_area(unsigned long *map,
+                                         unsigned long size,
+                                         unsigned long start,
+                                         unsigned long nr,
+                                         unsigned long align_mask)
+{
+    unsigned long index, end, i;
+again:
+    index = find_next_zero_bit(map, size, start);
+
+    /* Align allocation */
+    index = ALIGN_MASK(index, align_mask);
+
+    end = index + nr;
+    if (end > size) {
+        return end;
+    }
+    i = find_next_bit(map, end, index);
+    if (i < end) {
+        start = i + 1;
+        goto again;
+    }
+    return index;
+}
+
+int slow_bitmap_intersects(const unsigned long *bitmap1,
+                           const unsigned long *bitmap2, long bits)
+{
+    long k, lim = bits/BITS_PER_LONG;
+
+    for (k = 0; k < lim; ++k) {
+        if (bitmap1[k] & bitmap2[k]) {
+            return 1;
+        }
+    }
+
+    if (bits % BITS_PER_LONG) {
+        if ((bitmap1[k] & bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) {
+            return 1;
+        }
+    }
+    return 0;
+}
diff --git a/src/util/bitops.c b/src/util/bitops.c
new file mode 100644
index 0000000..227c38b
--- /dev/null
+++ b/src/util/bitops.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ * Copyright (C) 2008 IBM Corporation
+ * Written by Rusty Russell <rusty@rustcorp.com.au>
+ * (Inspired by David Howell's find_next_bit implementation)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "qemu/bitops.h"
+
+#define BITOP_WORD(nr)		((nr) / BITS_PER_LONG)
+
+/*
+ * Find the next set bit in a memory region.
+ */
+unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
+			    unsigned long offset)
+{
+    const unsigned long *p = addr + BITOP_WORD(offset);
+    unsigned long result = offset & ~(BITS_PER_LONG-1);
+    unsigned long tmp;
+
+    if (offset >= size) {
+        return size;
+    }
+    size -= result;
+    offset %= BITS_PER_LONG;
+    if (offset) {
+        tmp = *(p++);
+        tmp &= (~0UL << offset);
+        if (size < BITS_PER_LONG) {
+            goto found_first;
+        }
+        if (tmp) {
+            goto found_middle;
+        }
+        size -= BITS_PER_LONG;
+        result += BITS_PER_LONG;
+    }
+    while (size >= 4*BITS_PER_LONG) {
+        unsigned long d1, d2, d3;
+        tmp = *p;
+        d1 = *(p+1);
+        d2 = *(p+2);
+        d3 = *(p+3);
+        if (tmp) {
+            goto found_middle;
+        }
+        if (d1 | d2 | d3) {
+            break;
+        }
+        p += 4;
+        result += 4*BITS_PER_LONG;
+        size -= 4*BITS_PER_LONG;
+    }
+    while (size >= BITS_PER_LONG) {
+        if ((tmp = *(p++))) {
+            goto found_middle;
+        }
+        result += BITS_PER_LONG;
+        size -= BITS_PER_LONG;
+    }
+    if (!size) {
+        return result;
+    }
+    tmp = *p;
+
+found_first:
+    tmp &= (~0UL >> (BITS_PER_LONG - size));
+    if (tmp == 0UL) {		/* Are any bits set? */
+        return result + size;	/* Nope. */
+    }
+found_middle:
+    return result + ctzl(tmp);
+}
+
+/*
+ * This implementation of find_{first,next}_zero_bit was stolen from
+ * Linus' asm-alpha/bitops.h.
+ */
+unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
+				 unsigned long offset)
+{
+    const unsigned long *p = addr + BITOP_WORD(offset);
+    unsigned long result = offset & ~(BITS_PER_LONG-1);
+    unsigned long tmp;
+
+    if (offset >= size) {
+        return size;
+    }
+    size -= result;
+    offset %= BITS_PER_LONG;
+    if (offset) {
+        tmp = *(p++);
+        tmp |= ~0UL >> (BITS_PER_LONG - offset);
+        if (size < BITS_PER_LONG) {
+            goto found_first;
+        }
+        if (~tmp) {
+            goto found_middle;
+        }
+        size -= BITS_PER_LONG;
+        result += BITS_PER_LONG;
+    }
+    while (size & ~(BITS_PER_LONG-1)) {
+        if (~(tmp = *(p++))) {
+            goto found_middle;
+        }
+        result += BITS_PER_LONG;
+        size -= BITS_PER_LONG;
+    }
+    if (!size) {
+        return result;
+    }
+    tmp = *p;
+
+found_first:
+    tmp |= ~0UL << size;
+    if (tmp == ~0UL) {	/* Are any bits zero? */
+        return result + size;	/* Nope. */
+    }
+found_middle:
+    return result + ctzl(~tmp);
+}
+
+unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
+{
+    unsigned long words;
+    unsigned long tmp;
+
+    /* Start at final word. */
+    words = size / BITS_PER_LONG;
+
+    /* Partial final word? */
+    if (size & (BITS_PER_LONG-1)) {
+        tmp = (addr[words] & (~0UL >> (BITS_PER_LONG
+                                       - (size & (BITS_PER_LONG-1)))));
+        if (tmp) {
+            goto found;
+        }
+    }
+
+    while (words) {
+        tmp = addr[--words];
+        if (tmp) {
+        found:
+            return words * BITS_PER_LONG + BITS_PER_LONG - 1 - clzl(tmp);
+        }
+    }
+
+    /* Not found */
+    return size;
+}
diff --git a/src/util/buffer.c b/src/util/buffer.c
new file mode 100644
index 0000000..8b27c08
--- /dev/null
+++ b/src/util/buffer.c
@@ -0,0 +1,171 @@
+/*
+ * QEMU generic buffers
+ *
+ * Copyright (c) 2015 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include "qemu/buffer.h"
+#include "trace.h"
+
+#define BUFFER_MIN_INIT_SIZE     4096
+#define BUFFER_MIN_SHRINK_SIZE  65536
+
+/* define the factor alpha for the expentional smoothing
+ * that is used in the average size calculation. a shift
+ * of 7 results in an alpha of 1/2^7. */
+#define BUFFER_AVG_SIZE_SHIFT       7
+
+static size_t buffer_req_size(Buffer *buffer, size_t len)
+{
+    return MAX(BUFFER_MIN_INIT_SIZE,
+               pow2ceil(buffer->offset + len));
+}
+
+static void buffer_adj_size(Buffer *buffer, size_t len)
+{
+    size_t old = buffer->capacity;
+    buffer->capacity = buffer_req_size(buffer, len);
+    buffer->buffer = g_realloc(buffer->buffer, buffer->capacity);
+    trace_buffer_resize(buffer->name ?: "unnamed",
+                        old, buffer->capacity);
+
+    /* make it even harder for the buffer to shrink, reset average size
+     * to currenty capacity if it is larger than the average. */
+    buffer->avg_size = MAX(buffer->avg_size,
+                           buffer->capacity << BUFFER_AVG_SIZE_SHIFT);
+}
+
+void buffer_init(Buffer *buffer, const char *name, ...)
+{
+    va_list ap;
+
+    va_start(ap, name);
+    buffer->name = g_strdup_vprintf(name, ap);
+    va_end(ap);
+}
+
+static uint64_t buffer_get_avg_size(Buffer *buffer)
+{
+    return buffer->avg_size >> BUFFER_AVG_SIZE_SHIFT;
+}
+
+void buffer_shrink(Buffer *buffer)
+{
+    size_t new;
+
+    /* Calculate the average size of the buffer as
+     * avg_size = avg_size * ( 1 - a ) + required_size * a
+     * where a is 1 / 2 ^ BUFFER_AVG_SIZE_SHIFT. */
+    buffer->avg_size *= (1 << BUFFER_AVG_SIZE_SHIFT) - 1;
+    buffer->avg_size >>= BUFFER_AVG_SIZE_SHIFT;
+    buffer->avg_size += buffer_req_size(buffer, 0);
+
+    /* And then only shrink if the average size of the buffer is much
+     * too big, to avoid bumping up & down the buffers all the time.
+     * realloc() isn't exactly cheap ...  */
+    new = buffer_req_size(buffer, buffer_get_avg_size(buffer));
+    if (new < buffer->capacity >> 3 &&
+        new >= BUFFER_MIN_SHRINK_SIZE) {
+        buffer_adj_size(buffer, buffer_get_avg_size(buffer));
+    }
+
+    buffer_adj_size(buffer, 0);
+}
+
+void buffer_reserve(Buffer *buffer, size_t len)
+{
+    if ((buffer->capacity - buffer->offset) < len) {
+        buffer_adj_size(buffer, len);
+    }
+}
+
+gboolean buffer_empty(Buffer *buffer)
+{
+    return buffer->offset == 0;
+}
+
+uint8_t *buffer_end(Buffer *buffer)
+{
+    return buffer->buffer + buffer->offset;
+}
+
+void buffer_reset(Buffer *buffer)
+{
+    buffer->offset = 0;
+    buffer_shrink(buffer);
+}
+
+void buffer_free(Buffer *buffer)
+{
+    trace_buffer_free(buffer->name ?: "unnamed", buffer->capacity);
+    g_free(buffer->buffer);
+    g_free(buffer->name);
+    buffer->offset = 0;
+    buffer->capacity = 0;
+    buffer->buffer = NULL;
+    buffer->name = NULL;
+}
+
+void buffer_append(Buffer *buffer, const void *data, size_t len)
+{
+    memcpy(buffer->buffer + buffer->offset, data, len);
+    buffer->offset += len;
+}
+
+void buffer_advance(Buffer *buffer, size_t len)
+{
+    memmove(buffer->buffer, buffer->buffer + len,
+            (buffer->offset - len));
+    buffer->offset -= len;
+    buffer_shrink(buffer);
+}
+
+void buffer_move_empty(Buffer *to, Buffer *from)
+{
+    trace_buffer_move_empty(to->name ?: "unnamed",
+                            from->offset,
+                            from->name ?: "unnamed");
+    assert(to->offset == 0);
+
+    g_free(to->buffer);
+    to->offset = from->offset;
+    to->capacity = from->capacity;
+    to->buffer = from->buffer;
+
+    from->offset = 0;
+    from->capacity = 0;
+    from->buffer = NULL;
+}
+
+void buffer_move(Buffer *to, Buffer *from)
+{
+    if (to->offset == 0) {
+        buffer_move_empty(to, from);
+        return;
+    }
+
+    trace_buffer_move(to->name ?: "unnamed",
+                      from->offset,
+                      from->name ?: "unnamed");
+    buffer_reserve(to, from->offset);
+    buffer_append(to, from->buffer, from->offset);
+
+    g_free(from->buffer);
+    from->offset = 0;
+    from->capacity = 0;
+    from->buffer = NULL;
+}
diff --git a/src/util/compatfd.c b/src/util/compatfd.c
new file mode 100644
index 0000000..e857150
--- /dev/null
+++ b/src/util/compatfd.c
@@ -0,0 +1,110 @@
+/*
+ * signalfd/eventfd compatibility
+ *
+ * Copyright IBM, Corp. 2008
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu-common.h"
+#include "qemu/compatfd.h"
+#include "qemu/thread.h"
+
+#include <sys/syscall.h>
+
+struct sigfd_compat_info
+{
+    sigset_t mask;
+    int fd;
+};
+
+static void *sigwait_compat(void *opaque)
+{
+    struct sigfd_compat_info *info = opaque;
+
+    while (1) {
+        int sig;
+        int err;
+
+        err = sigwait(&info->mask, &sig);
+        if (err != 0) {
+            if (errno == EINTR) {
+                continue;
+            } else {
+                return NULL;
+            }
+        } else {
+            struct qemu_signalfd_siginfo buffer;
+            size_t offset = 0;
+
+            memset(&buffer, 0, sizeof(buffer));
+            buffer.ssi_signo = sig;
+
+            while (offset < sizeof(buffer)) {
+                ssize_t len;
+
+                len = write(info->fd, (char *)&buffer + offset,
+                            sizeof(buffer) - offset);
+                if (len == -1 && errno == EINTR)
+                    continue;
+
+                if (len <= 0) {
+                    return NULL;
+                }
+
+                offset += len;
+            }
+        }
+    }
+}
+
+static int qemu_signalfd_compat(const sigset_t *mask)
+{
+    struct sigfd_compat_info *info;
+    QemuThread thread;
+    int fds[2];
+
+    info = malloc(sizeof(*info));
+    if (info == NULL) {
+        errno = ENOMEM;
+        return -1;
+    }
+
+    if (pipe(fds) == -1) {
+        free(info);
+        return -1;
+    }
+
+    qemu_set_cloexec(fds[0]);
+    qemu_set_cloexec(fds[1]);
+
+    memcpy(&info->mask, mask, sizeof(*mask));
+    info->fd = fds[1];
+
+    qemu_thread_create(&thread, "signalfd_compat", sigwait_compat, info,
+                       QEMU_THREAD_DETACHED);
+
+    return fds[0];
+}
+
+int qemu_signalfd(const sigset_t *mask)
+{
+#if defined(CONFIG_SIGNALFD)
+    int ret;
+
+    ret = syscall(SYS_signalfd, -1, mask, _NSIG / 8);
+    if (ret != -1) {
+        qemu_set_cloexec(ret);
+        return ret;
+    }
+#endif
+
+    return qemu_signalfd_compat(mask);
+}
diff --git a/src/util/coroutine-gthread.c b/src/util/coroutine-gthread.c
new file mode 100644
index 0000000..0bcd778
--- /dev/null
+++ b/src/util/coroutine-gthread.c
@@ -0,0 +1,198 @@
+/*
+ * GThread coroutine initialization code
+ *
+ * Copyright (C) 2006  Anthony Liguori <anthony@codemonkey.ws>
+ * Copyright (C) 2011  Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.0 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <glib.h>
+#include "qemu-common.h"
+#include "qemu/coroutine_int.h"
+
+typedef struct {
+    Coroutine base;
+    GThread *thread;
+    bool runnable;
+    bool free_on_thread_exit;
+    CoroutineAction action;
+} CoroutineGThread;
+
+static CompatGMutex coroutine_lock;
+static CompatGCond coroutine_cond;
+
+/* GLib 2.31 and beyond deprecated various parts of the thread API,
+ * but the new interfaces are not available in older GLib versions
+ * so we have to cope with both.
+ */
+#if GLIB_CHECK_VERSION(2, 31, 0)
+/* Awkwardly, the GPrivate API doesn't provide a way to update the
+ * GDestroyNotify handler for the coroutine key dynamically. So instead
+ * we track whether or not the CoroutineGThread should be freed on
+ * thread exit / coroutine key update using the free_on_thread_exit
+ * field.
+ */
+static void coroutine_destroy_notify(gpointer data)
+{
+    CoroutineGThread *co = data;
+    if (co && co->free_on_thread_exit) {
+        g_free(co);
+    }
+}
+
+static GPrivate coroutine_key = G_PRIVATE_INIT(coroutine_destroy_notify);
+
+static inline CoroutineGThread *get_coroutine_key(void)
+{
+    return g_private_get(&coroutine_key);
+}
+
+static inline void set_coroutine_key(CoroutineGThread *co,
+                                     bool free_on_thread_exit)
+{
+    /* Unlike g_static_private_set() this does not call the GDestroyNotify
+     * if the previous value of the key was NULL. Fortunately we only need
+     * the GDestroyNotify in the non-NULL key case.
+     */
+    co->free_on_thread_exit = free_on_thread_exit;
+    g_private_replace(&coroutine_key, co);
+}
+
+static inline GThread *create_thread(GThreadFunc func, gpointer data)
+{
+    return g_thread_new("coroutine", func, data);
+}
+
+#else
+
+/* Handle older GLib versions */
+
+static GStaticPrivate coroutine_key = G_STATIC_PRIVATE_INIT;
+
+static inline CoroutineGThread *get_coroutine_key(void)
+{
+    return g_static_private_get(&coroutine_key);
+}
+
+static inline void set_coroutine_key(CoroutineGThread *co,
+                                     bool free_on_thread_exit)
+{
+    g_static_private_set(&coroutine_key, co,
+                         free_on_thread_exit ? (GDestroyNotify)g_free : NULL);
+}
+
+static inline GThread *create_thread(GThreadFunc func, gpointer data)
+{
+    return g_thread_create_full(func, data, 0, TRUE, TRUE,
+                                G_THREAD_PRIORITY_NORMAL, NULL);
+}
+
+#endif
+
+
+static void __attribute__((constructor)) coroutine_init(void)
+{
+#if !GLIB_CHECK_VERSION(2, 31, 0)
+    if (!g_thread_supported()) {
+        g_thread_init(NULL);
+    }
+#endif
+}
+
+static void coroutine_wait_runnable_locked(CoroutineGThread *co)
+{
+    while (!co->runnable) {
+        g_cond_wait(&coroutine_cond, &coroutine_lock);
+    }
+}
+
+static void coroutine_wait_runnable(CoroutineGThread *co)
+{
+    g_mutex_lock(&coroutine_lock);
+    coroutine_wait_runnable_locked(co);
+    g_mutex_unlock(&coroutine_lock);
+}
+
+static gpointer coroutine_thread(gpointer opaque)
+{
+    CoroutineGThread *co = opaque;
+
+    set_coroutine_key(co, false);
+    coroutine_wait_runnable(co);
+    co->base.entry(co->base.entry_arg);
+    qemu_coroutine_switch(&co->base, co->base.caller, COROUTINE_TERMINATE);
+    return NULL;
+}
+
+Coroutine *qemu_coroutine_new(void)
+{
+    CoroutineGThread *co;
+
+    co = g_malloc0(sizeof(*co));
+    co->thread = create_thread(coroutine_thread, co);
+    if (!co->thread) {
+        g_free(co);
+        return NULL;
+    }
+    return &co->base;
+}
+
+void qemu_coroutine_delete(Coroutine *co_)
+{
+    CoroutineGThread *co = DO_UPCAST(CoroutineGThread, base, co_);
+
+    g_thread_join(co->thread);
+    g_free(co);
+}
+
+CoroutineAction qemu_coroutine_switch(Coroutine *from_,
+                                      Coroutine *to_,
+                                      CoroutineAction action)
+{
+    CoroutineGThread *from = DO_UPCAST(CoroutineGThread, base, from_);
+    CoroutineGThread *to = DO_UPCAST(CoroutineGThread, base, to_);
+
+    g_mutex_lock(&coroutine_lock);
+    from->runnable = false;
+    from->action = action;
+    to->runnable = true;
+    to->action = action;
+    g_cond_broadcast(&coroutine_cond);
+
+    if (action != COROUTINE_TERMINATE) {
+        coroutine_wait_runnable_locked(from);
+    }
+    g_mutex_unlock(&coroutine_lock);
+    return from->action;
+}
+
+Coroutine *qemu_coroutine_self(void)
+{
+    CoroutineGThread *co = get_coroutine_key();
+    if (!co) {
+        co = g_malloc0(sizeof(*co));
+        co->runnable = true;
+        set_coroutine_key(co, true);
+    }
+
+    return &co->base;
+}
+
+bool qemu_in_coroutine(void)
+{
+    CoroutineGThread *co = get_coroutine_key();
+
+    return co && co->base.caller;
+}
diff --git a/src/util/coroutine-sigaltstack.c b/src/util/coroutine-sigaltstack.c
new file mode 100644
index 0000000..39842a4
--- /dev/null
+++ b/src/util/coroutine-sigaltstack.c
@@ -0,0 +1,293 @@
+/*
+ * sigaltstack coroutine initialization code
+ *
+ * Copyright (C) 2006  Anthony Liguori <anthony@codemonkey.ws>
+ * Copyright (C) 2011  Kevin Wolf <kwolf@redhat.com>
+ * Copyright (C) 2012  Alex Barcelo <abarcelo@ac.upc.edu>
+** This file is partly based on pth_mctx.c, from the GNU Portable Threads
+**  Copyright (c) 1999-2006 Ralf S. Engelschall <rse@engelschall.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* XXX Is there a nicer way to disable glibc's stack check for longjmp? */
+#ifdef _FORTIFY_SOURCE
+#undef _FORTIFY_SOURCE
+#endif
+#include <stdlib.h>
+#include <setjmp.h>
+#include <stdint.h>
+#include <pthread.h>
+#include <signal.h>
+#include "qemu-common.h"
+#include "qemu/coroutine_int.h"
+
+typedef struct {
+    Coroutine base;
+    void *stack;
+    sigjmp_buf env;
+} CoroutineUContext;
+
+/**
+ * Per-thread coroutine bookkeeping
+ */
+typedef struct {
+    /** Currently executing coroutine */
+    Coroutine *current;
+
+    /** The default coroutine */
+    CoroutineUContext leader;
+
+    /** Information for the signal handler (trampoline) */
+    sigjmp_buf tr_reenter;
+    volatile sig_atomic_t tr_called;
+    void *tr_handler;
+} CoroutineThreadState;
+
+static pthread_key_t thread_state_key;
+
+static CoroutineThreadState *coroutine_get_thread_state(void)
+{
+    CoroutineThreadState *s = pthread_getspecific(thread_state_key);
+
+    if (!s) {
+        s = g_malloc0(sizeof(*s));
+        s->current = &s->leader.base;
+        pthread_setspecific(thread_state_key, s);
+    }
+    return s;
+}
+
+static void qemu_coroutine_thread_cleanup(void *opaque)
+{
+    CoroutineThreadState *s = opaque;
+
+    g_free(s);
+}
+
+static void __attribute__((constructor)) coroutine_init(void)
+{
+    int ret;
+
+    ret = pthread_key_create(&thread_state_key, qemu_coroutine_thread_cleanup);
+    if (ret != 0) {
+        fprintf(stderr, "unable to create leader key: %s\n", strerror(errno));
+        abort();
+    }
+}
+
+/* "boot" function
+ * This is what starts the coroutine, is called from the trampoline
+ * (from the signal handler when it is not signal handling, read ahead
+ * for more information).
+ */
+static void coroutine_bootstrap(CoroutineUContext *self, Coroutine *co)
+{
+    /* Initialize longjmp environment and switch back the caller */
+    if (!sigsetjmp(self->env, 0)) {
+        siglongjmp(*(sigjmp_buf *)co->entry_arg, 1);
+    }
+
+    while (true) {
+        co->entry(co->entry_arg);
+        qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
+    }
+}
+
+/*
+ * This is used as the signal handler. This is called with the brand new stack
+ * (thanks to sigaltstack). We have to return, given that this is a signal
+ * handler and the sigmask and some other things are changed.
+ */
+static void coroutine_trampoline(int signal)
+{
+    CoroutineUContext *self;
+    Coroutine *co;
+    CoroutineThreadState *coTS;
+
+    /* Get the thread specific information */
+    coTS = coroutine_get_thread_state();
+    self = coTS->tr_handler;
+    coTS->tr_called = 1;
+    co = &self->base;
+
+    /*
+     * Here we have to do a bit of a ping pong between the caller, given that
+     * this is a signal handler and we have to do a return "soon". Then the
+     * caller can reestablish everything and do a siglongjmp here again.
+     */
+    if (!sigsetjmp(coTS->tr_reenter, 0)) {
+        return;
+    }
+
+    /*
+     * Ok, the caller has siglongjmp'ed back to us, so now prepare
+     * us for the real machine state switching. We have to jump
+     * into another function here to get a new stack context for
+     * the auto variables (which have to be auto-variables
+     * because the start of the thread happens later). Else with
+     * PIC (i.e. Position Independent Code which is used when PTH
+     * is built as a shared library) most platforms would
+     * horrible core dump as experience showed.
+     */
+    coroutine_bootstrap(self, co);
+}
+
+Coroutine *qemu_coroutine_new(void)
+{
+    const size_t stack_size = 1 << 20;
+    CoroutineUContext *co;
+    CoroutineThreadState *coTS;
+    struct sigaction sa;
+    struct sigaction osa;
+    stack_t ss;
+    stack_t oss;
+    sigset_t sigs;
+    sigset_t osigs;
+    sigjmp_buf old_env;
+
+    /* The way to manipulate stack is with the sigaltstack function. We
+     * prepare a stack, with it delivering a signal to ourselves and then
+     * put sigsetjmp/siglongjmp where needed.
+     * This has been done keeping coroutine-ucontext as a model and with the
+     * pth ideas (GNU Portable Threads). See coroutine-ucontext for the basics
+     * of the coroutines and see pth_mctx.c (from the pth project) for the
+     * sigaltstack way of manipulating stacks.
+     */
+
+    co = g_malloc0(sizeof(*co));
+    co->stack = g_malloc(stack_size);
+    co->base.entry_arg = &old_env; /* stash away our jmp_buf */
+
+    coTS = coroutine_get_thread_state();
+    coTS->tr_handler = co;
+
+    /*
+     * Preserve the SIGUSR2 signal state, block SIGUSR2,
+     * and establish our signal handler. The signal will
+     * later transfer control onto the signal stack.
+     */
+    sigemptyset(&sigs);
+    sigaddset(&sigs, SIGUSR2);
+    pthread_sigmask(SIG_BLOCK, &sigs, &osigs);
+    sa.sa_handler = coroutine_trampoline;
+    sigfillset(&sa.sa_mask);
+    sa.sa_flags = SA_ONSTACK;
+    if (sigaction(SIGUSR2, &sa, &osa) != 0) {
+        abort();
+    }
+
+    /*
+     * Set the new stack.
+     */
+    ss.ss_sp = co->stack;
+    ss.ss_size = stack_size;
+    ss.ss_flags = 0;
+    if (sigaltstack(&ss, &oss) < 0) {
+        abort();
+    }
+
+    /*
+     * Now transfer control onto the signal stack and set it up.
+     * It will return immediately via "return" after the sigsetjmp()
+     * was performed. Be careful here with race conditions.  The
+     * signal can be delivered the first time sigsuspend() is
+     * called.
+     */
+    coTS->tr_called = 0;
+    pthread_kill(pthread_self(), SIGUSR2);
+    sigfillset(&sigs);
+    sigdelset(&sigs, SIGUSR2);
+    while (!coTS->tr_called) {
+        sigsuspend(&sigs);
+    }
+
+    /*
+     * Inform the system that we are back off the signal stack by
+     * removing the alternative signal stack. Be careful here: It
+     * first has to be disabled, before it can be removed.
+     */
+    sigaltstack(NULL, &ss);
+    ss.ss_flags = SS_DISABLE;
+    if (sigaltstack(&ss, NULL) < 0) {
+        abort();
+    }
+    sigaltstack(NULL, &ss);
+    if (!(oss.ss_flags & SS_DISABLE)) {
+        sigaltstack(&oss, NULL);
+    }
+
+    /*
+     * Restore the old SIGUSR2 signal handler and mask
+     */
+    sigaction(SIGUSR2, &osa, NULL);
+    pthread_sigmask(SIG_SETMASK, &osigs, NULL);
+
+    /*
+     * Now enter the trampoline again, but this time not as a signal
+     * handler. Instead we jump into it directly. The functionally
+     * redundant ping-pong pointer arithmetic is necessary to avoid
+     * type-conversion warnings related to the `volatile' qualifier and
+     * the fact that `jmp_buf' usually is an array type.
+     */
+    if (!sigsetjmp(old_env, 0)) {
+        siglongjmp(coTS->tr_reenter, 1);
+    }
+
+    /*
+     * Ok, we returned again, so now we're finished
+     */
+
+    return &co->base;
+}
+
+void qemu_coroutine_delete(Coroutine *co_)
+{
+    CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_);
+
+    g_free(co->stack);
+    g_free(co);
+}
+
+CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
+                                      CoroutineAction action)
+{
+    CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_);
+    CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_);
+    CoroutineThreadState *s = coroutine_get_thread_state();
+    int ret;
+
+    s->current = to_;
+
+    ret = sigsetjmp(from->env, 0);
+    if (ret == 0) {
+        siglongjmp(to->env, action);
+    }
+    return ret;
+}
+
+Coroutine *qemu_coroutine_self(void)
+{
+    CoroutineThreadState *s = coroutine_get_thread_state();
+
+    return s->current;
+}
+
+bool qemu_in_coroutine(void)
+{
+    CoroutineThreadState *s = pthread_getspecific(thread_state_key);
+
+    return s && s->current->caller;
+}
+
diff --git a/src/util/coroutine-ucontext.c b/src/util/coroutine-ucontext.c
new file mode 100644
index 0000000..26cbebb
--- /dev/null
+++ b/src/util/coroutine-ucontext.c
@@ -0,0 +1,194 @@
+/*
+ * ucontext coroutine initialization code
+ *
+ * Copyright (C) 2006  Anthony Liguori <anthony@codemonkey.ws>
+ * Copyright (C) 2011  Kevin Wolf <kwolf@redhat.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.0 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* XXX Is there a nicer way to disable glibc's stack check for longjmp? */
+#ifdef _FORTIFY_SOURCE
+#undef _FORTIFY_SOURCE
+#endif
+#include <stdlib.h>
+#include <setjmp.h>
+#include <stdint.h>
+#include <ucontext.h>
+#include "qemu-common.h"
+#include "qemu/coroutine_int.h"
+
+#ifdef CONFIG_VALGRIND_H
+#include <valgrind/valgrind.h>
+#endif
+
+typedef struct {
+    Coroutine base;
+    void *stack;
+    sigjmp_buf env;
+
+#ifdef CONFIG_VALGRIND_H
+    unsigned int valgrind_stack_id;
+#endif
+
+} CoroutineUContext;
+
+/**
+ * Per-thread coroutine bookkeeping
+ */
+static __thread CoroutineUContext leader;
+static __thread Coroutine *current;
+
+/*
+ * va_args to makecontext() must be type 'int', so passing
+ * the pointer we need may require several int args. This
+ * union is a quick hack to let us do that
+ */
+union cc_arg {
+    void *p;
+    int i[2];
+};
+
+static void coroutine_trampoline(int i0, int i1)
+{
+    union cc_arg arg;
+    CoroutineUContext *self;
+    Coroutine *co;
+
+    arg.i[0] = i0;
+    arg.i[1] = i1;
+    self = arg.p;
+    co = &self->base;
+
+    /* Initialize longjmp environment and switch back the caller */
+    if (!sigsetjmp(self->env, 0)) {
+        siglongjmp(*(sigjmp_buf *)co->entry_arg, 1);
+    }
+
+    while (true) {
+        co->entry(co->entry_arg);
+        qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
+    }
+}
+
+Coroutine *qemu_coroutine_new(void)
+{
+    const size_t stack_size = 1 << 20;
+    CoroutineUContext *co;
+    ucontext_t old_uc, uc;
+    sigjmp_buf old_env;
+    union cc_arg arg = {0};
+
+    /* The ucontext functions preserve signal masks which incurs a
+     * system call overhead.  sigsetjmp(buf, 0)/siglongjmp() does not
+     * preserve signal masks but only works on the current stack.
+     * Since we need a way to create and switch to a new stack, use
+     * the ucontext functions for that but sigsetjmp()/siglongjmp() for
+     * everything else.
+     */
+
+    if (getcontext(&uc) == -1) {
+        abort();
+    }
+
+    co = g_malloc0(sizeof(*co));
+    co->stack = g_malloc(stack_size);
+    co->base.entry_arg = &old_env; /* stash away our jmp_buf */
+
+    uc.uc_link = &old_uc;
+    uc.uc_stack.ss_sp = co->stack;
+    uc.uc_stack.ss_size = stack_size;
+    uc.uc_stack.ss_flags = 0;
+
+#ifdef CONFIG_VALGRIND_H
+    co->valgrind_stack_id =
+        VALGRIND_STACK_REGISTER(co->stack, co->stack + stack_size);
+#endif
+
+    arg.p = co;
+
+    makecontext(&uc, (void (*)(void))coroutine_trampoline,
+                2, arg.i[0], arg.i[1]);
+
+    /* swapcontext() in, siglongjmp() back out */
+    if (!sigsetjmp(old_env, 0)) {
+        swapcontext(&old_uc, &uc);
+    }
+    return &co->base;
+}
+
+#ifdef CONFIG_VALGRIND_H
+#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE
+/* Work around an unused variable in the valgrind.h macro... */
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
+#endif
+static inline void valgrind_stack_deregister(CoroutineUContext *co)
+{
+    VALGRIND_STACK_DEREGISTER(co->valgrind_stack_id);
+}
+#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE
+#pragma GCC diagnostic pop
+#endif
+#endif
+
+void qemu_coroutine_delete(Coroutine *co_)
+{
+    CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_);
+
+#ifdef CONFIG_VALGRIND_H
+    valgrind_stack_deregister(co);
+#endif
+
+    g_free(co->stack);
+    g_free(co);
+}
+
+/* This function is marked noinline to prevent GCC from inlining it
+ * into coroutine_trampoline(). If we allow it to do that then it
+ * hoists the code to get the address of the TLS variable "current"
+ * out of the while() loop. This is an invalid transformation because
+ * the sigsetjmp() call may be called when running thread A but
+ * return in thread B, and so we might be in a different thread
+ * context each time round the loop.
+ */
+CoroutineAction __attribute__((noinline))
+qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
+                      CoroutineAction action)
+{
+    CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_);
+    CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_);
+    int ret;
+
+    current = to_;
+
+    ret = sigsetjmp(from->env, 0);
+    if (ret == 0) {
+        siglongjmp(to->env, action);
+    }
+    return ret;
+}
+
+Coroutine *qemu_coroutine_self(void)
+{
+    if (!current) {
+        current = &leader.base;
+    }
+    return current;
+}
+
+bool qemu_in_coroutine(void)
+{
+    return current && current->caller;
+}
diff --git a/src/util/coroutine-win32.c b/src/util/coroutine-win32.c
new file mode 100644
index 0000000..4f922c5
--- /dev/null
+++ b/src/util/coroutine-win32.c
@@ -0,0 +1,101 @@
+/*
+ * Win32 coroutine initialization code
+ *
+ * Copyright (c) 2011 Kevin Wolf <kwolf@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+#include "qemu/coroutine_int.h"
+
+typedef struct
+{
+    Coroutine base;
+
+    LPVOID fiber;
+    CoroutineAction action;
+} CoroutineWin32;
+
+static __thread CoroutineWin32 leader;
+static __thread Coroutine *current;
+
+/* This function is marked noinline to prevent GCC from inlining it
+ * into coroutine_trampoline(). If we allow it to do that then it
+ * hoists the code to get the address of the TLS variable "current"
+ * out of the while() loop. This is an invalid transformation because
+ * the SwitchToFiber() call may be called when running thread A but
+ * return in thread B, and so we might be in a different thread
+ * context each time round the loop.
+ */
+CoroutineAction __attribute__((noinline))
+qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
+                      CoroutineAction action)
+{
+    CoroutineWin32 *from = DO_UPCAST(CoroutineWin32, base, from_);
+    CoroutineWin32 *to = DO_UPCAST(CoroutineWin32, base, to_);
+
+    current = to_;
+
+    to->action = action;
+    SwitchToFiber(to->fiber);
+    return from->action;
+}
+
+static void CALLBACK coroutine_trampoline(void *co_)
+{
+    Coroutine *co = co_;
+
+    while (true) {
+        co->entry(co->entry_arg);
+        qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE);
+    }
+}
+
+Coroutine *qemu_coroutine_new(void)
+{
+    const size_t stack_size = 1 << 20;
+    CoroutineWin32 *co;
+
+    co = g_malloc0(sizeof(*co));
+    co->fiber = CreateFiber(stack_size, coroutine_trampoline, &co->base);
+    return &co->base;
+}
+
+void qemu_coroutine_delete(Coroutine *co_)
+{
+    CoroutineWin32 *co = DO_UPCAST(CoroutineWin32, base, co_);
+
+    DeleteFiber(co->fiber);
+    g_free(co);
+}
+
+Coroutine *qemu_coroutine_self(void)
+{
+    if (!current) {
+        current = &leader.base;
+        leader.fiber = ConvertThreadToFiber(NULL);
+    }
+    return current;
+}
+
+bool qemu_in_coroutine(void)
+{
+    return current && current->caller;
+}
diff --git a/src/util/crc32c.c b/src/util/crc32c.c
new file mode 100644
index 0000000..8866327
--- /dev/null
+++ b/src/util/crc32c.c
@@ -0,0 +1,115 @@
+/*
+ *  Castagnoli CRC32C Checksum Algorithm
+ *
+ *  Polynomial: 0x11EDC6F41
+ *
+ *  Castagnoli93: Guy Castagnoli and Stefan Braeuer and Martin Herrman
+ *               "Optimization of Cyclic Redundancy-Check Codes with 24
+ *                 and 32 Parity Bits",IEEE Transactions on Communication,
+ *                Volume 41, Number 6, June 1993
+ *
+ *  Copyright (c) 2013 Red Hat, Inc.,
+ *
+ *  Authors:
+ *   Jeff Cody <jcody@redhat.com>
+ *
+ *  Based on the Linux kernel cryptographic crc32c module,
+ *
+ *  Copyright (c) 2004 Cisco Systems, Inc.
+ *  Copyright (c) 2008 Herbert Xu <herbert@gondor.apana.org.au>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ */
+
+#include "qemu-common.h"
+#include "qemu/crc32c.h"
+
+/*
+ * This is the CRC-32C table
+ * Generated with:
+ * width = 32 bits
+ * poly = 0x1EDC6F41
+ * reflect input bytes = true
+ * reflect output bytes = true
+ */
+
+static const uint32_t crc32c_table[256] = {
+    0x00000000L, 0xF26B8303L, 0xE13B70F7L, 0x1350F3F4L,
+    0xC79A971FL, 0x35F1141CL, 0x26A1E7E8L, 0xD4CA64EBL,
+    0x8AD958CFL, 0x78B2DBCCL, 0x6BE22838L, 0x9989AB3BL,
+    0x4D43CFD0L, 0xBF284CD3L, 0xAC78BF27L, 0x5E133C24L,
+    0x105EC76FL, 0xE235446CL, 0xF165B798L, 0x030E349BL,
+    0xD7C45070L, 0x25AFD373L, 0x36FF2087L, 0xC494A384L,
+    0x9A879FA0L, 0x68EC1CA3L, 0x7BBCEF57L, 0x89D76C54L,
+    0x5D1D08BFL, 0xAF768BBCL, 0xBC267848L, 0x4E4DFB4BL,
+    0x20BD8EDEL, 0xD2D60DDDL, 0xC186FE29L, 0x33ED7D2AL,
+    0xE72719C1L, 0x154C9AC2L, 0x061C6936L, 0xF477EA35L,
+    0xAA64D611L, 0x580F5512L, 0x4B5FA6E6L, 0xB93425E5L,
+    0x6DFE410EL, 0x9F95C20DL, 0x8CC531F9L, 0x7EAEB2FAL,
+    0x30E349B1L, 0xC288CAB2L, 0xD1D83946L, 0x23B3BA45L,
+    0xF779DEAEL, 0x05125DADL, 0x1642AE59L, 0xE4292D5AL,
+    0xBA3A117EL, 0x4851927DL, 0x5B016189L, 0xA96AE28AL,
+    0x7DA08661L, 0x8FCB0562L, 0x9C9BF696L, 0x6EF07595L,
+    0x417B1DBCL, 0xB3109EBFL, 0xA0406D4BL, 0x522BEE48L,
+    0x86E18AA3L, 0x748A09A0L, 0x67DAFA54L, 0x95B17957L,
+    0xCBA24573L, 0x39C9C670L, 0x2A993584L, 0xD8F2B687L,
+    0x0C38D26CL, 0xFE53516FL, 0xED03A29BL, 0x1F682198L,
+    0x5125DAD3L, 0xA34E59D0L, 0xB01EAA24L, 0x42752927L,
+    0x96BF4DCCL, 0x64D4CECFL, 0x77843D3BL, 0x85EFBE38L,
+    0xDBFC821CL, 0x2997011FL, 0x3AC7F2EBL, 0xC8AC71E8L,
+    0x1C661503L, 0xEE0D9600L, 0xFD5D65F4L, 0x0F36E6F7L,
+    0x61C69362L, 0x93AD1061L, 0x80FDE395L, 0x72966096L,
+    0xA65C047DL, 0x5437877EL, 0x4767748AL, 0xB50CF789L,
+    0xEB1FCBADL, 0x197448AEL, 0x0A24BB5AL, 0xF84F3859L,
+    0x2C855CB2L, 0xDEEEDFB1L, 0xCDBE2C45L, 0x3FD5AF46L,
+    0x7198540DL, 0x83F3D70EL, 0x90A324FAL, 0x62C8A7F9L,
+    0xB602C312L, 0x44694011L, 0x5739B3E5L, 0xA55230E6L,
+    0xFB410CC2L, 0x092A8FC1L, 0x1A7A7C35L, 0xE811FF36L,
+    0x3CDB9BDDL, 0xCEB018DEL, 0xDDE0EB2AL, 0x2F8B6829L,
+    0x82F63B78L, 0x709DB87BL, 0x63CD4B8FL, 0x91A6C88CL,
+    0x456CAC67L, 0xB7072F64L, 0xA457DC90L, 0x563C5F93L,
+    0x082F63B7L, 0xFA44E0B4L, 0xE9141340L, 0x1B7F9043L,
+    0xCFB5F4A8L, 0x3DDE77ABL, 0x2E8E845FL, 0xDCE5075CL,
+    0x92A8FC17L, 0x60C37F14L, 0x73938CE0L, 0x81F80FE3L,
+    0x55326B08L, 0xA759E80BL, 0xB4091BFFL, 0x466298FCL,
+    0x1871A4D8L, 0xEA1A27DBL, 0xF94AD42FL, 0x0B21572CL,
+    0xDFEB33C7L, 0x2D80B0C4L, 0x3ED04330L, 0xCCBBC033L,
+    0xA24BB5A6L, 0x502036A5L, 0x4370C551L, 0xB11B4652L,
+    0x65D122B9L, 0x97BAA1BAL, 0x84EA524EL, 0x7681D14DL,
+    0x2892ED69L, 0xDAF96E6AL, 0xC9A99D9EL, 0x3BC21E9DL,
+    0xEF087A76L, 0x1D63F975L, 0x0E330A81L, 0xFC588982L,
+    0xB21572C9L, 0x407EF1CAL, 0x532E023EL, 0xA145813DL,
+    0x758FE5D6L, 0x87E466D5L, 0x94B49521L, 0x66DF1622L,
+    0x38CC2A06L, 0xCAA7A905L, 0xD9F75AF1L, 0x2B9CD9F2L,
+    0xFF56BD19L, 0x0D3D3E1AL, 0x1E6DCDEEL, 0xEC064EEDL,
+    0xC38D26C4L, 0x31E6A5C7L, 0x22B65633L, 0xD0DDD530L,
+    0x0417B1DBL, 0xF67C32D8L, 0xE52CC12CL, 0x1747422FL,
+    0x49547E0BL, 0xBB3FFD08L, 0xA86F0EFCL, 0x5A048DFFL,
+    0x8ECEE914L, 0x7CA56A17L, 0x6FF599E3L, 0x9D9E1AE0L,
+    0xD3D3E1ABL, 0x21B862A8L, 0x32E8915CL, 0xC083125FL,
+    0x144976B4L, 0xE622F5B7L, 0xF5720643L, 0x07198540L,
+    0x590AB964L, 0xAB613A67L, 0xB831C993L, 0x4A5A4A90L,
+    0x9E902E7BL, 0x6CFBAD78L, 0x7FAB5E8CL, 0x8DC0DD8FL,
+    0xE330A81AL, 0x115B2B19L, 0x020BD8EDL, 0xF0605BEEL,
+    0x24AA3F05L, 0xD6C1BC06L, 0xC5914FF2L, 0x37FACCF1L,
+    0x69E9F0D5L, 0x9B8273D6L, 0x88D28022L, 0x7AB90321L,
+    0xAE7367CAL, 0x5C18E4C9L, 0x4F48173DL, 0xBD23943EL,
+    0xF36E6F75L, 0x0105EC76L, 0x12551F82L, 0xE03E9C81L,
+    0x34F4F86AL, 0xC69F7B69L, 0xD5CF889DL, 0x27A40B9EL,
+    0x79B737BAL, 0x8BDCB4B9L, 0x988C474DL, 0x6AE7C44EL,
+    0xBE2DA0A5L, 0x4C4623A6L, 0x5F16D052L, 0xAD7D5351L
+};
+
+
+uint32_t crc32c(uint32_t crc, const uint8_t *data, unsigned int length)
+{
+    while (length--) {
+        crc = crc32c_table[(crc ^ *data++) & 0xFFL] ^ (crc >> 8);
+    }
+    return crc^0xffffffff;
+}
+
diff --git a/src/util/cutils.c b/src/util/cutils.c
new file mode 100644
index 0000000..cfeb848
--- /dev/null
+++ b/src/util/cutils.c
@@ -0,0 +1,691 @@
+/*
+ * Simple C functions to supplement the C library
+ *
+ * Copyright (c) 2006 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu-common.h"
+#include "qemu/host-utils.h"
+#include <math.h>
+#include <limits.h>
+#include <errno.h>
+
+#include "qemu/sockets.h"
+#include "qemu/iov.h"
+#include "net/net.h"
+
+void strpadcpy(char *buf, int buf_size, const char *str, char pad)
+{
+    int len = qemu_strnlen(str, buf_size);
+    memcpy(buf, str, len);
+    memset(buf + len, pad, buf_size - len);
+}
+
+void pstrcpy(char *buf, int buf_size, const char *str)
+{
+    int c;
+    char *q = buf;
+
+    if (buf_size <= 0)
+        return;
+
+    for(;;) {
+        c = *str++;
+        if (c == 0 || q >= buf + buf_size - 1)
+            break;
+        *q++ = c;
+    }
+    *q = '\0';
+}
+
+/* strcat and truncate. */
+char *pstrcat(char *buf, int buf_size, const char *s)
+{
+    int len;
+    len = strlen(buf);
+    if (len < buf_size)
+        pstrcpy(buf + len, buf_size - len, s);
+    return buf;
+}
+
+int strstart(const char *str, const char *val, const char **ptr)
+{
+    const char *p, *q;
+    p = str;
+    q = val;
+    while (*q != '\0') {
+        if (*p != *q)
+            return 0;
+        p++;
+        q++;
+    }
+    if (ptr)
+        *ptr = p;
+    return 1;
+}
+
+int stristart(const char *str, const char *val, const char **ptr)
+{
+    const char *p, *q;
+    p = str;
+    q = val;
+    while (*q != '\0') {
+        if (qemu_toupper(*p) != qemu_toupper(*q))
+            return 0;
+        p++;
+        q++;
+    }
+    if (ptr)
+        *ptr = p;
+    return 1;
+}
+
+/* XXX: use host strnlen if available ? */
+int qemu_strnlen(const char *s, int max_len)
+{
+    int i;
+
+    for(i = 0; i < max_len; i++) {
+        if (s[i] == '\0') {
+            break;
+        }
+    }
+    return i;
+}
+
+char *qemu_strsep(char **input, const char *delim)
+{
+    char *result = *input;
+    if (result != NULL) {
+        char *p;
+
+        for (p = result; *p != '\0'; p++) {
+            if (strchr(delim, *p)) {
+                break;
+            }
+        }
+        if (*p == '\0') {
+            *input = NULL;
+        } else {
+            *p = '\0';
+            *input = p + 1;
+        }
+    }
+    return result;
+}
+
+time_t mktimegm(struct tm *tm)
+{
+    time_t t;
+    int y = tm->tm_year + 1900, m = tm->tm_mon + 1, d = tm->tm_mday;
+    if (m < 3) {
+        m += 12;
+        y--;
+    }
+    t = 86400ULL * (d + (153 * m - 457) / 5 + 365 * y + y / 4 - y / 100 + 
+                 y / 400 - 719469);
+    t += 3600 * tm->tm_hour + 60 * tm->tm_min + tm->tm_sec;
+    return t;
+}
+
+/*
+ * Make sure data goes on disk, but if possible do not bother to
+ * write out the inode just for timestamp updates.
+ *
+ * Unfortunately even in 2009 many operating systems do not support
+ * fdatasync and have to fall back to fsync.
+ */
+int qemu_fdatasync(int fd)
+{
+#ifdef CONFIG_FDATASYNC
+    return fdatasync(fd);
+#else
+    return fsync(fd);
+#endif
+}
+
+/*
+ * Searches for an area with non-zero content in a buffer
+ *
+ * Attention! The len must be a multiple of
+ * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR * sizeof(VECTYPE)
+ * and addr must be a multiple of sizeof(VECTYPE) due to
+ * restriction of optimizations in this function.
+ *
+ * can_use_buffer_find_nonzero_offset() can be used to check
+ * these requirements.
+ *
+ * The return value is the offset of the non-zero area rounded
+ * down to a multiple of sizeof(VECTYPE) for the first
+ * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR chunks and down to
+ * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR * sizeof(VECTYPE)
+ * afterwards.
+ *
+ * If the buffer is all zero the return value is equal to len.
+ */
+
+size_t buffer_find_nonzero_offset(const void *buf, size_t len)
+{
+    const VECTYPE *p = buf;
+    const VECTYPE zero = (VECTYPE){0};
+    size_t i;
+
+    assert(can_use_buffer_find_nonzero_offset(buf, len));
+
+    if (!len) {
+        return 0;
+    }
+
+    for (i = 0; i < BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR; i++) {
+        if (!ALL_EQ(p[i], zero)) {
+            return i * sizeof(VECTYPE);
+        }
+    }
+
+    for (i = BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR;
+         i < len / sizeof(VECTYPE);
+         i += BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR) {
+        VECTYPE tmp0 = VEC_OR(p[i + 0], p[i + 1]);
+        VECTYPE tmp1 = VEC_OR(p[i + 2], p[i + 3]);
+        VECTYPE tmp2 = VEC_OR(p[i + 4], p[i + 5]);
+        VECTYPE tmp3 = VEC_OR(p[i + 6], p[i + 7]);
+        VECTYPE tmp01 = VEC_OR(tmp0, tmp1);
+        VECTYPE tmp23 = VEC_OR(tmp2, tmp3);
+        if (!ALL_EQ(VEC_OR(tmp01, tmp23), zero)) {
+            break;
+        }
+    }
+
+    return i * sizeof(VECTYPE);
+}
+
+/*
+ * Checks if a buffer is all zeroes
+ *
+ * Attention! The len must be a multiple of 4 * sizeof(long) due to
+ * restriction of optimizations in this function.
+ */
+bool buffer_is_zero(const void *buf, size_t len)
+{
+    /*
+     * Use long as the biggest available internal data type that fits into the
+     * CPU register and unroll the loop to smooth out the effect of memory
+     * latency.
+     */
+
+    size_t i;
+    long d0, d1, d2, d3;
+    const long * const data = buf;
+
+    /* use vector optimized zero check if possible */
+    if (can_use_buffer_find_nonzero_offset(buf, len)) {
+        return buffer_find_nonzero_offset(buf, len) == len;
+    }
+
+    assert(len % (4 * sizeof(long)) == 0);
+    len /= sizeof(long);
+
+    for (i = 0; i < len; i += 4) {
+        d0 = data[i + 0];
+        d1 = data[i + 1];
+        d2 = data[i + 2];
+        d3 = data[i + 3];
+
+        if (d0 || d1 || d2 || d3) {
+            return false;
+        }
+    }
+
+    return true;
+}
+
+#ifndef _WIN32
+/* Sets a specific flag */
+int fcntl_setfl(int fd, int flag)
+{
+    int flags;
+
+    flags = fcntl(fd, F_GETFL);
+    if (flags == -1)
+        return -errno;
+
+    if (fcntl(fd, F_SETFL, flags | flag) == -1)
+        return -errno;
+
+    return 0;
+}
+#endif
+
+static int64_t suffix_mul(char suffix, int64_t unit)
+{
+    switch (qemu_toupper(suffix)) {
+    case QEMU_STRTOSZ_DEFSUFFIX_B:
+        return 1;
+    case QEMU_STRTOSZ_DEFSUFFIX_KB:
+        return unit;
+    case QEMU_STRTOSZ_DEFSUFFIX_MB:
+        return unit * unit;
+    case QEMU_STRTOSZ_DEFSUFFIX_GB:
+        return unit * unit * unit;
+    case QEMU_STRTOSZ_DEFSUFFIX_TB:
+        return unit * unit * unit * unit;
+    case QEMU_STRTOSZ_DEFSUFFIX_PB:
+        return unit * unit * unit * unit * unit;
+    case QEMU_STRTOSZ_DEFSUFFIX_EB:
+        return unit * unit * unit * unit * unit * unit;
+    }
+    return -1;
+}
+
+/*
+ * Convert string to bytes, allowing either B/b for bytes, K/k for KB,
+ * M/m for MB, G/g for GB or T/t for TB. End pointer will be returned
+ * in *end, if not NULL. Return -ERANGE on overflow, Return -EINVAL on
+ * other error.
+ */
+int64_t qemu_strtosz_suffix_unit(const char *nptr, char **end,
+                            const char default_suffix, int64_t unit)
+{
+    int64_t retval = -EINVAL;
+    char *endptr;
+    unsigned char c;
+    int mul_required = 0;
+    double val, mul, integral, fraction;
+
+    errno = 0;
+    val = strtod(nptr, &endptr);
+    if (isnan(val) || endptr == nptr || errno != 0) {
+        goto fail;
+    }
+    fraction = modf(val, &integral);
+    if (fraction != 0) {
+        mul_required = 1;
+    }
+    c = *endptr;
+    mul = suffix_mul(c, unit);
+    if (mul >= 0) {
+        endptr++;
+    } else {
+        mul = suffix_mul(default_suffix, unit);
+        assert(mul >= 0);
+    }
+    if (mul == 1 && mul_required) {
+        goto fail;
+    }
+    if ((val * mul >= INT64_MAX) || val < 0) {
+        retval = -ERANGE;
+        goto fail;
+    }
+    retval = val * mul;
+
+fail:
+    if (end) {
+        *end = endptr;
+    }
+
+    return retval;
+}
+
+int64_t qemu_strtosz_suffix(const char *nptr, char **end,
+                            const char default_suffix)
+{
+    return qemu_strtosz_suffix_unit(nptr, end, default_suffix, 1024);
+}
+
+int64_t qemu_strtosz(const char *nptr, char **end)
+{
+    return qemu_strtosz_suffix(nptr, end, QEMU_STRTOSZ_DEFSUFFIX_MB);
+}
+
+/**
+ * Helper function for qemu_strto*l() functions.
+ */
+static int check_strtox_error(const char *p, char *endptr, const char **next,
+                              int err)
+{
+    /* If no conversion was performed, prefer BSD behavior over glibc
+     * behavior.
+     */
+    if (err == 0 && endptr == p) {
+        err = EINVAL;
+    }
+    if (!next && *endptr) {
+        return -EINVAL;
+    }
+    if (next) {
+        *next = endptr;
+    }
+    return -err;
+}
+
+/**
+ * QEMU wrappers for strtol(), strtoll(), strtoul(), strotull() C functions.
+ *
+ * Convert ASCII string @nptr to a long integer value
+ * from the given @base. Parameters @nptr, @endptr, @base
+ * follows same semantics as strtol() C function.
+ *
+ * Unlike from strtol() function, if @endptr is not NULL, this
+ * function will return -EINVAL whenever it cannot fully convert
+ * the string in @nptr with given @base to a long. This function returns
+ * the result of the conversion only through the @result parameter.
+ *
+ * If NULL is passed in @endptr, then the whole string in @ntpr
+ * is a number otherwise it returns -EINVAL.
+ *
+ * RETURN VALUE
+ * Unlike from strtol() function, this wrapper returns either
+ * -EINVAL or the errno set by strtol() function (e.g -ERANGE).
+ * If the conversion overflows, -ERANGE is returned, and @result
+ * is set to the max value of the desired type
+ * (e.g. LONG_MAX, LLONG_MAX, ULONG_MAX, ULLONG_MAX). If the case
+ * of underflow, -ERANGE is returned, and @result is set to the min
+ * value of the desired type. For strtol(), strtoll(), @result is set to
+ * LONG_MIN, LLONG_MIN, respectively, and for strtoul(), strtoull() it
+ * is set to 0.
+ */
+int qemu_strtol(const char *nptr, const char **endptr, int base,
+                long *result)
+{
+    char *p;
+    int err = 0;
+    if (!nptr) {
+        if (endptr) {
+            *endptr = nptr;
+        }
+        err = -EINVAL;
+    } else {
+        errno = 0;
+        *result = strtol(nptr, &p, base);
+        err = check_strtox_error(nptr, p, endptr, errno);
+    }
+    return err;
+}
+
+/**
+ * Converts ASCII string to an unsigned long integer.
+ *
+ * If string contains a negative number, value will be converted to
+ * the unsigned representation of the signed value, unless the original
+ * (nonnegated) value would overflow, in this case, it will set @result
+ * to ULONG_MAX, and return ERANGE.
+ *
+ * The same behavior holds, for qemu_strtoull() but sets @result to
+ * ULLONG_MAX instead of ULONG_MAX.
+ *
+ * See qemu_strtol() documentation for more info.
+ */
+int qemu_strtoul(const char *nptr, const char **endptr, int base,
+                 unsigned long *result)
+{
+    char *p;
+    int err = 0;
+    if (!nptr) {
+        if (endptr) {
+            *endptr = nptr;
+        }
+        err = -EINVAL;
+    } else {
+        errno = 0;
+        *result = strtoul(nptr, &p, base);
+        /* Windows returns 1 for negative out-of-range values.  */
+        if (errno == ERANGE) {
+            *result = -1;
+        }
+        err = check_strtox_error(nptr, p, endptr, errno);
+    }
+    return err;
+}
+
+/**
+ * Converts ASCII string to a long long integer.
+ *
+ * See qemu_strtol() documentation for more info.
+ */
+int qemu_strtoll(const char *nptr, const char **endptr, int base,
+                 int64_t *result)
+{
+    char *p;
+    int err = 0;
+    if (!nptr) {
+        if (endptr) {
+            *endptr = nptr;
+        }
+        err = -EINVAL;
+    } else {
+        errno = 0;
+        *result = strtoll(nptr, &p, base);
+        err = check_strtox_error(nptr, p, endptr, errno);
+    }
+    return err;
+}
+
+/**
+ * Converts ASCII string to an unsigned long long integer.
+ *
+ * See qemu_strtol() documentation for more info.
+ */
+int qemu_strtoull(const char *nptr, const char **endptr, int base,
+                  uint64_t *result)
+{
+    char *p;
+    int err = 0;
+    if (!nptr) {
+        if (endptr) {
+            *endptr = nptr;
+        }
+        err = -EINVAL;
+    } else {
+        errno = 0;
+        *result = strtoull(nptr, &p, base);
+        /* Windows returns 1 for negative out-of-range values.  */
+        if (errno == ERANGE) {
+            *result = -1;
+        }
+        err = check_strtox_error(nptr, p, endptr, errno);
+    }
+    return err;
+}
+
+/**
+ * parse_uint:
+ *
+ * @s: String to parse
+ * @value: Destination for parsed integer value
+ * @endptr: Destination for pointer to first character not consumed
+ * @base: integer base, between 2 and 36 inclusive, or 0
+ *
+ * Parse unsigned integer
+ *
+ * Parsed syntax is like strtoull()'s: arbitrary whitespace, a single optional
+ * '+' or '-', an optional "0x" if @base is 0 or 16, one or more digits.
+ *
+ * If @s is null, or @base is invalid, or @s doesn't start with an
+ * integer in the syntax above, set *@value to 0, *@endptr to @s, and
+ * return -EINVAL.
+ *
+ * Set *@endptr to point right beyond the parsed integer (even if the integer
+ * overflows or is negative, all digits will be parsed and *@endptr will
+ * point right beyond them).
+ *
+ * If the integer is negative, set *@value to 0, and return -ERANGE.
+ *
+ * If the integer overflows unsigned long long, set *@value to
+ * ULLONG_MAX, and return -ERANGE.
+ *
+ * Else, set *@value to the parsed integer, and return 0.
+ */
+int parse_uint(const char *s, unsigned long long *value, char **endptr,
+               int base)
+{
+    int r = 0;
+    char *endp = (char *)s;
+    unsigned long long val = 0;
+
+    if (!s) {
+        r = -EINVAL;
+        goto out;
+    }
+
+    errno = 0;
+    val = strtoull(s, &endp, base);
+    if (errno) {
+        r = -errno;
+        goto out;
+    }
+
+    if (endp == s) {
+        r = -EINVAL;
+        goto out;
+    }
+
+    /* make sure we reject negative numbers: */
+    while (isspace((unsigned char)*s)) {
+        s++;
+    }
+    if (*s == '-') {
+        val = 0;
+        r = -ERANGE;
+        goto out;
+    }
+
+out:
+    *value = val;
+    *endptr = endp;
+    return r;
+}
+
+/**
+ * parse_uint_full:
+ *
+ * @s: String to parse
+ * @value: Destination for parsed integer value
+ * @base: integer base, between 2 and 36 inclusive, or 0
+ *
+ * Parse unsigned integer from entire string
+ *
+ * Have the same behavior of parse_uint(), but with an additional check
+ * for additional data after the parsed number. If extra characters are present
+ * after the parsed number, the function will return -EINVAL, and *@v will
+ * be set to 0.
+ */
+int parse_uint_full(const char *s, unsigned long long *value, int base)
+{
+    char *endp;
+    int r;
+
+    r = parse_uint(s, value, &endp, base);
+    if (r < 0) {
+        return r;
+    }
+    if (*endp) {
+        *value = 0;
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
+int qemu_parse_fd(const char *param)
+{
+    long fd;
+    char *endptr;
+
+    errno = 0;
+    fd = strtol(param, &endptr, 10);
+    if (param == endptr /* no conversion performed */                    ||
+        errno != 0      /* not representable as long; possibly others */ ||
+        *endptr != '\0' /* final string not empty */                     ||
+        fd < 0          /* invalid as file descriptor */                 ||
+        fd > INT_MAX    /* not representable as int */) {
+        return -1;
+    }
+    return fd;
+}
+
+/*
+ * Implementation of  ULEB128 (http://en.wikipedia.org/wiki/LEB128)
+ * Input is limited to 14-bit numbers
+ */
+int uleb128_encode_small(uint8_t *out, uint32_t n)
+{
+    g_assert(n <= 0x3fff);
+    if (n < 0x80) {
+        *out++ = n;
+        return 1;
+    } else {
+        *out++ = (n & 0x7f) | 0x80;
+        *out++ = n >> 7;
+        return 2;
+    }
+}
+
+int uleb128_decode_small(const uint8_t *in, uint32_t *n)
+{
+    if (!(*in & 0x80)) {
+        *n = *in++;
+        return 1;
+    } else {
+        *n = *in++ & 0x7f;
+        /* we exceed 14 bit number */
+        if (*in & 0x80) {
+            return -1;
+        }
+        *n |= *in++ << 7;
+        return 2;
+    }
+}
+
+/*
+ * helper to parse debug environment variables
+ */
+int parse_debug_env(const char *name, int max, int initial)
+{
+    char *debug_env = getenv(name);
+    char *inv = NULL;
+    long debug;
+
+    if (!debug_env) {
+        return initial;
+    }
+    errno = 0;
+    debug = strtol(debug_env, &inv, 10);
+    if (inv == debug_env) {
+        return initial;
+    }
+    if (debug < 0 || debug > max || errno != 0) {
+        fprintf(stderr, "warning: %s not in [0, %d]", name, max);
+        return initial;
+    }
+    return debug;
+}
+
+/*
+ * Helper to print ethernet mac address
+ */
+const char *qemu_ether_ntoa(const MACAddr *mac)
+{
+    static char ret[18];
+
+    snprintf(ret, sizeof(ret), "%02x:%02x:%02x:%02x:%02x:%02x",
+             mac->a[0], mac->a[1], mac->a[2], mac->a[3], mac->a[4], mac->a[5]);
+
+    return ret;
+}
diff --git a/src/util/envlist.c b/src/util/envlist.c
new file mode 100644
index 0000000..099a544
--- /dev/null
+++ b/src/util/envlist.c
@@ -0,0 +1,241 @@
+#include "qemu-common.h"
+#include "qemu/queue.h"
+#include "qemu/envlist.h"
+
+struct envlist_entry {
+	const char *ev_var;			/* actual env value */
+	QLIST_ENTRY(envlist_entry) ev_link;
+};
+
+struct envlist {
+	QLIST_HEAD(, envlist_entry) el_entries;	/* actual entries */
+	size_t el_count;			/* number of entries */
+};
+
+static int envlist_parse(envlist_t *envlist,
+    const char *env, int (*)(envlist_t *, const char *));
+
+/*
+ * Allocates new envlist and returns pointer to that or
+ * NULL in case of error.
+ */
+envlist_t *
+envlist_create(void)
+{
+	envlist_t *envlist;
+
+	if ((envlist = malloc(sizeof (*envlist))) == NULL)
+		return (NULL);
+
+	QLIST_INIT(&envlist->el_entries);
+	envlist->el_count = 0;
+
+	return (envlist);
+}
+
+/*
+ * Releases given envlist and its entries.
+ */
+void
+envlist_free(envlist_t *envlist)
+{
+	struct envlist_entry *entry;
+
+	assert(envlist != NULL);
+
+	while (envlist->el_entries.lh_first != NULL) {
+		entry = envlist->el_entries.lh_first;
+		QLIST_REMOVE(entry, ev_link);
+
+		free((char *)entry->ev_var);
+		free(entry);
+	}
+	free(envlist);
+}
+
+/*
+ * Parses comma separated list of set/modify environment
+ * variable entries and updates given enlist accordingly.
+ *
+ * For example:
+ *     envlist_parse(el, "HOME=foo,SHELL=/bin/sh");
+ *
+ * inserts/sets environment variables HOME and SHELL.
+ *
+ * Returns 0 on success, errno otherwise.
+ */
+int
+envlist_parse_set(envlist_t *envlist, const char *env)
+{
+	return (envlist_parse(envlist, env, &envlist_setenv));
+}
+
+/*
+ * Parses comma separated list of unset environment variable
+ * entries and removes given variables from given envlist.
+ *
+ * Returns 0 on success, errno otherwise.
+ */
+int
+envlist_parse_unset(envlist_t *envlist, const char *env)
+{
+	return (envlist_parse(envlist, env, &envlist_unsetenv));
+}
+
+/*
+ * Parses comma separated list of set, modify or unset entries
+ * and calls given callback for each entry.
+ *
+ * Returns 0 in case of success, errno otherwise.
+ */
+static int
+envlist_parse(envlist_t *envlist, const char *env,
+    int (*callback)(envlist_t *, const char *))
+{
+	char *tmpenv, *envvar;
+	char *envsave = NULL;
+    int ret = 0;
+    assert(callback != NULL);
+
+	if ((envlist == NULL) || (env == NULL))
+		return (EINVAL);
+
+	if ((tmpenv = strdup(env)) == NULL)
+		return (errno);
+    envsave = tmpenv;
+
+    do {
+        envvar = strchr(tmpenv, ',');
+        if (envvar != NULL) {
+            *envvar = '\0';
+        }
+        if ((*callback)(envlist, tmpenv) != 0) {
+            ret = errno;
+            break;
+		}
+        tmpenv = envvar + 1;
+    } while (envvar != NULL);
+
+    free(envsave);
+    return ret;
+}
+
+/*
+ * Sets environment value to envlist in similar manner
+ * than putenv(3).
+ *
+ * Returns 0 in success, errno otherwise.
+ */
+int
+envlist_setenv(envlist_t *envlist, const char *env)
+{
+	struct envlist_entry *entry = NULL;
+	const char *eq_sign;
+	size_t envname_len;
+
+	if ((envlist == NULL) || (env == NULL))
+		return (EINVAL);
+
+	/* find out first equals sign in given env */
+	if ((eq_sign = strchr(env, '=')) == NULL)
+		return (EINVAL);
+	envname_len = eq_sign - env + 1;
+
+	/*
+	 * If there already exists variable with given name
+	 * we remove and release it before allocating a whole
+	 * new entry.
+	 */
+	for (entry = envlist->el_entries.lh_first; entry != NULL;
+	    entry = entry->ev_link.le_next) {
+		if (strncmp(entry->ev_var, env, envname_len) == 0)
+			break;
+	}
+
+	if (entry != NULL) {
+		QLIST_REMOVE(entry, ev_link);
+		free((char *)entry->ev_var);
+		free(entry);
+	} else {
+		envlist->el_count++;
+	}
+
+	if ((entry = malloc(sizeof (*entry))) == NULL)
+		return (errno);
+	if ((entry->ev_var = strdup(env)) == NULL) {
+		free(entry);
+		return (errno);
+	}
+	QLIST_INSERT_HEAD(&envlist->el_entries, entry, ev_link);
+
+	return (0);
+}
+
+/*
+ * Removes given env value from envlist in similar manner
+ * than unsetenv(3).  Returns 0 in success, errno otherwise.
+ */
+int
+envlist_unsetenv(envlist_t *envlist, const char *env)
+{
+	struct envlist_entry *entry;
+	size_t envname_len;
+
+	if ((envlist == NULL) || (env == NULL))
+		return (EINVAL);
+
+	/* env is not allowed to contain '=' */
+	if (strchr(env, '=') != NULL)
+		return (EINVAL);
+
+	/*
+	 * Find out the requested entry and remove
+	 * it from the list.
+	 */
+	envname_len = strlen(env);
+	for (entry = envlist->el_entries.lh_first; entry != NULL;
+	    entry = entry->ev_link.le_next) {
+		if (strncmp(entry->ev_var, env, envname_len) == 0)
+			break;
+	}
+	if (entry != NULL) {
+		QLIST_REMOVE(entry, ev_link);
+		free((char *)entry->ev_var);
+		free(entry);
+
+		envlist->el_count--;
+	}
+	return (0);
+}
+
+/*
+ * Returns given envlist as array of strings (in same form that
+ * global variable environ is).  Caller must free returned memory
+ * by calling free(3) for each element and for the array.  Returned
+ * array and given envlist are not related (no common references).
+ *
+ * If caller provides count pointer, number of items in array is
+ * stored there.  In case of error, NULL is returned and no memory
+ * is allocated.
+ */
+char **
+envlist_to_environ(const envlist_t *envlist, size_t *count)
+{
+	struct envlist_entry *entry;
+	char **env, **penv;
+
+	penv = env = malloc((envlist->el_count + 1) * sizeof (char *));
+	if (env == NULL)
+		return (NULL);
+
+	for (entry = envlist->el_entries.lh_first; entry != NULL;
+	    entry = entry->ev_link.le_next) {
+		*(penv++) = strdup(entry->ev_var);
+	}
+	*penv = NULL; /* NULL terminate the list */
+
+	if (count != NULL)
+		*count = envlist->el_count;
+
+	return (env);
+}
diff --git a/src/util/error.c b/src/util/error.c
new file mode 100644
index 0000000..80c89a2
--- /dev/null
+++ b/src/util/error.c
@@ -0,0 +1,241 @@
+/*
+ * QEMU Error Objects
+ *
+ * Copyright IBM, Corp. 2011
+ * Copyright (C) 2011-2015 Red Hat, Inc.
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *  Markus Armbruster <armbru@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.  See
+ * the COPYING.LIB file in the top-level directory.
+ */
+
+#include "qemu-common.h"
+#include "qapi/error.h"
+#include "qemu/error-report.h"
+
+struct Error
+{
+    char *msg;
+    ErrorClass err_class;
+    const char *src, *func;
+    int line;
+    GString *hint;
+};
+
+Error *error_abort;
+Error *error_fatal;
+
+static void error_handle_fatal(Error **errp, Error *err)
+{
+    if (errp == &error_abort) {
+        fprintf(stderr, "Unexpected error in %s() at %s:%d:\n",
+                err->func, err->src, err->line);
+        error_report_err(err);
+        abort();
+    }
+    if (errp == &error_fatal) {
+        error_report_err(err);
+        exit(1);
+    }
+}
+
+static void error_setv(Error **errp,
+                       const char *src, int line, const char *func,
+                       ErrorClass err_class, const char *fmt, va_list ap)
+{
+    Error *err;
+    int saved_errno = errno;
+
+    if (errp == NULL) {
+        return;
+    }
+    assert(*errp == NULL);
+
+    err = g_malloc0(sizeof(*err));
+    err->msg = g_strdup_vprintf(fmt, ap);
+    err->err_class = err_class;
+    err->src = src;
+    err->line = line;
+    err->func = func;
+
+    error_handle_fatal(errp, err);
+    *errp = err;
+
+    errno = saved_errno;
+}
+
+void error_set_internal(Error **errp,
+                        const char *src, int line, const char *func,
+                        ErrorClass err_class, const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start(ap, fmt);
+    error_setv(errp, src, line, func, err_class, fmt, ap);
+    va_end(ap);
+}
+
+void error_setg_internal(Error **errp,
+                         const char *src, int line, const char *func,
+                         const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start(ap, fmt);
+    error_setv(errp, src, line, func, ERROR_CLASS_GENERIC_ERROR, fmt, ap);
+    va_end(ap);
+}
+
+void error_setg_errno_internal(Error **errp,
+                               const char *src, int line, const char *func,
+                               int os_errno, const char *fmt, ...)
+{
+    va_list ap;
+    char *msg;
+    int saved_errno = errno;
+
+    if (errp == NULL) {
+        return;
+    }
+
+    va_start(ap, fmt);
+    error_setv(errp, src, line, func, ERROR_CLASS_GENERIC_ERROR, fmt, ap);
+    va_end(ap);
+
+    if (os_errno != 0) {
+        msg = (*errp)->msg;
+        (*errp)->msg = g_strdup_printf("%s: %s", msg, strerror(os_errno));
+        g_free(msg);
+    }
+
+    errno = saved_errno;
+}
+
+void error_setg_file_open_internal(Error **errp,
+                                   const char *src, int line, const char *func,
+                                   int os_errno, const char *filename)
+{
+    error_setg_errno_internal(errp, src, line, func, os_errno,
+                              "Could not open '%s'", filename);
+}
+
+void error_append_hint(Error **errp, const char *fmt, ...)
+{
+    va_list ap;
+    int saved_errno = errno;
+    Error *err;
+
+    if (!errp) {
+        return;
+    }
+    err = *errp;
+    assert(err && errp != &error_abort);
+
+    if (!err->hint) {
+        err->hint = g_string_new(NULL);
+    }
+    va_start(ap, fmt);
+    g_string_append_vprintf(err->hint, fmt, ap);
+    va_end(ap);
+
+    errno = saved_errno;
+}
+
+#ifdef _WIN32
+
+void error_setg_win32_internal(Error **errp,
+                               const char *src, int line, const char *func,
+                               int win32_err, const char *fmt, ...)
+{
+    va_list ap;
+    char *msg1, *msg2;
+
+    if (errp == NULL) {
+        return;
+    }
+
+    va_start(ap, fmt);
+    error_setv(errp, src, line, func, ERROR_CLASS_GENERIC_ERROR, fmt, ap);
+    va_end(ap);
+
+    if (win32_err != 0) {
+        msg1 = (*errp)->msg;
+        msg2 = g_win32_error_message(win32_err);
+        (*errp)->msg = g_strdup_printf("%s: %s (error: %x)", msg1, msg2,
+                                       (unsigned)win32_err);
+        g_free(msg2);
+        g_free(msg1);
+    }
+}
+
+#endif
+
+Error *error_copy(const Error *err)
+{
+    Error *err_new;
+
+    err_new = g_malloc0(sizeof(*err));
+    err_new->msg = g_strdup(err->msg);
+    err_new->err_class = err->err_class;
+    err_new->src = err->src;
+    err_new->line = err->line;
+    err_new->func = err->func;
+    if (err->hint) {
+        err_new->hint = g_string_new(err->hint->str);
+    }
+
+    return err_new;
+}
+
+ErrorClass error_get_class(const Error *err)
+{
+    return err->err_class;
+}
+
+const char *error_get_pretty(Error *err)
+{
+    return err->msg;
+}
+
+void error_report_err(Error *err)
+{
+    error_report("%s", error_get_pretty(err));
+    if (err->hint) {
+        error_printf_unless_qmp("%s\n", err->hint->str);
+    }
+    error_free(err);
+}
+
+void error_free(Error *err)
+{
+    if (err) {
+        g_free(err->msg);
+        if (err->hint) {
+            g_string_free(err->hint, true);
+        }
+        g_free(err);
+    }
+}
+
+void error_free_or_abort(Error **errp)
+{
+    assert(errp && *errp);
+    error_free(*errp);
+    *errp = NULL;
+}
+
+void error_propagate(Error **dst_errp, Error *local_err)
+{
+    if (!local_err) {
+        return;
+    }
+    error_handle_fatal(dst_errp, local_err);
+    if (dst_errp && !*dst_errp) {
+        *dst_errp = local_err;
+    } else {
+        error_free(local_err);
+    }
+}
diff --git a/src/util/event_notifier-posix.c b/src/util/event_notifier-posix.c
new file mode 100644
index 0000000..d4a0c63
--- /dev/null
+++ b/src/util/event_notifier-posix.c
@@ -0,0 +1,122 @@
+/*
+ * event notifier support
+ *
+ * Copyright Red Hat, Inc. 2010
+ *
+ * Authors:
+ *  Michael S. Tsirkin <mst@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu-common.h"
+#include "qemu/event_notifier.h"
+#include "sysemu/char.h"
+#include "qemu/main-loop.h"
+
+#ifdef CONFIG_EVENTFD
+#include <sys/eventfd.h>
+#endif
+
+void event_notifier_init_fd(EventNotifier *e, int fd)
+{
+    e->rfd = fd;
+    e->wfd = fd;
+}
+
+int event_notifier_init(EventNotifier *e, int active)
+{
+    int fds[2];
+    int ret;
+
+#ifdef CONFIG_EVENTFD
+    ret = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+#else
+    ret = -1;
+    errno = ENOSYS;
+#endif
+    if (ret >= 0) {
+        e->rfd = e->wfd = ret;
+    } else {
+        if (errno != ENOSYS) {
+            return -errno;
+        }
+        if (qemu_pipe(fds) < 0) {
+            return -errno;
+        }
+        ret = fcntl_setfl(fds[0], O_NONBLOCK);
+        if (ret < 0) {
+            ret = -errno;
+            goto fail;
+        }
+        ret = fcntl_setfl(fds[1], O_NONBLOCK);
+        if (ret < 0) {
+            ret = -errno;
+            goto fail;
+        }
+        e->rfd = fds[0];
+        e->wfd = fds[1];
+    }
+    if (active) {
+        event_notifier_set(e);
+    }
+    return 0;
+
+fail:
+    close(fds[0]);
+    close(fds[1]);
+    return ret;
+}
+
+void event_notifier_cleanup(EventNotifier *e)
+{
+    if (e->rfd != e->wfd) {
+        close(e->rfd);
+    }
+    close(e->wfd);
+}
+
+int event_notifier_get_fd(const EventNotifier *e)
+{
+    return e->rfd;
+}
+
+int event_notifier_set_handler(EventNotifier *e,
+                               EventNotifierHandler *handler)
+{
+    qemu_set_fd_handler(e->rfd, (IOHandler *)handler, NULL, e);
+    return 0;
+}
+
+int event_notifier_set(EventNotifier *e)
+{
+    static const uint64_t value = 1;
+    ssize_t ret;
+
+    do {
+        ret = write(e->wfd, &value, sizeof(value));
+    } while (ret < 0 && errno == EINTR);
+
+    /* EAGAIN is fine, a read must be pending.  */
+    if (ret < 0 && errno != EAGAIN) {
+        return -errno;
+    }
+    return 0;
+}
+
+int event_notifier_test_and_clear(EventNotifier *e)
+{
+    int value;
+    ssize_t len;
+    char buffer[512];
+
+    /* Drain the notify pipe.  For eventfd, only 8 bytes will be read.  */
+    value = 0;
+    do {
+        len = read(e->rfd, buffer, sizeof(buffer));
+        value |= (len > 0);
+    } while ((len == -1 && errno == EINTR) || len == sizeof(buffer));
+
+    return value;
+}
diff --git a/src/util/event_notifier-win32.c b/src/util/event_notifier-win32.c
new file mode 100644
index 0000000..6dbb530
--- /dev/null
+++ b/src/util/event_notifier-win32.c
@@ -0,0 +1,59 @@
+/*
+ * event notifier support
+ *
+ * Copyright Red Hat, Inc. 2010
+ *
+ * Authors:
+ *  Michael S. Tsirkin <mst@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include "qemu-common.h"
+#include "qemu/event_notifier.h"
+#include "qemu/main-loop.h"
+
+int event_notifier_init(EventNotifier *e, int active)
+{
+    e->event = CreateEvent(NULL, TRUE, FALSE, NULL);
+    assert(e->event);
+    return 0;
+}
+
+void event_notifier_cleanup(EventNotifier *e)
+{
+    CloseHandle(e->event);
+}
+
+HANDLE event_notifier_get_handle(EventNotifier *e)
+{
+    return e->event;
+}
+
+int event_notifier_set_handler(EventNotifier *e,
+                               EventNotifierHandler *handler)
+{
+    if (handler) {
+        return qemu_add_wait_object(e->event, (IOHandler *)handler, e);
+    } else {
+        qemu_del_wait_object(e->event, (IOHandler *)handler, e);
+        return 0;
+    }
+}
+
+int event_notifier_set(EventNotifier *e)
+{
+    SetEvent(e->event);
+    return 0;
+}
+
+int event_notifier_test_and_clear(EventNotifier *e)
+{
+    int ret = WaitForSingleObject(e->event, 0);
+    if (ret == WAIT_OBJECT_0) {
+        ResetEvent(e->event);
+        return true;
+    }
+    return false;
+}
diff --git a/src/util/fifo8.c b/src/util/fifo8.c
new file mode 100644
index 0000000..0ea5ad9
--- /dev/null
+++ b/src/util/fifo8.c
@@ -0,0 +1,125 @@
+/*
+ * Generic FIFO component, implemented as a circular buffer.
+ *
+ * Copyright (c) 2012 Peter A. G. Crosthwaite
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu-common.h"
+#include "qemu/fifo8.h"
+
+void fifo8_create(Fifo8 *fifo, uint32_t capacity)
+{
+    fifo->data = g_new(uint8_t, capacity);
+    fifo->capacity = capacity;
+    fifo->head = 0;
+    fifo->num = 0;
+}
+
+void fifo8_destroy(Fifo8 *fifo)
+{
+    g_free(fifo->data);
+}
+
+void fifo8_push(Fifo8 *fifo, uint8_t data)
+{
+    if (fifo->num == fifo->capacity) {
+        abort();
+    }
+    fifo->data[(fifo->head + fifo->num) % fifo->capacity] = data;
+    fifo->num++;
+}
+
+void fifo8_push_all(Fifo8 *fifo, const uint8_t *data, uint32_t num)
+{
+    uint32_t start, avail;
+
+    if (fifo->num + num > fifo->capacity) {
+        abort();
+    }
+
+    start = (fifo->head + fifo->num) % fifo->capacity;
+
+    if (start + num <= fifo->capacity) {
+        memcpy(&fifo->data[start], data, num);
+    } else {
+        avail = fifo->capacity - start;
+        memcpy(&fifo->data[start], data, avail);
+        memcpy(&fifo->data[0], &data[avail], num - avail);
+    }
+
+    fifo->num += num;
+}
+
+uint8_t fifo8_pop(Fifo8 *fifo)
+{
+    uint8_t ret;
+
+    if (fifo->num == 0) {
+        abort();
+    }
+    ret = fifo->data[fifo->head++];
+    fifo->head %= fifo->capacity;
+    fifo->num--;
+    return ret;
+}
+
+const uint8_t *fifo8_pop_buf(Fifo8 *fifo, uint32_t max, uint32_t *num)
+{
+    uint8_t *ret;
+
+    if (max == 0 || max > fifo->num) {
+        abort();
+    }
+    *num = MIN(fifo->capacity - fifo->head, max);
+    ret = &fifo->data[fifo->head];
+    fifo->head += *num;
+    fifo->head %= fifo->capacity;
+    fifo->num -= *num;
+    return ret;
+}
+
+void fifo8_reset(Fifo8 *fifo)
+{
+    fifo->num = 0;
+    fifo->head = 0;
+}
+
+bool fifo8_is_empty(Fifo8 *fifo)
+{
+    return (fifo->num == 0);
+}
+
+bool fifo8_is_full(Fifo8 *fifo)
+{
+    return (fifo->num == fifo->capacity);
+}
+
+uint32_t fifo8_num_free(Fifo8 *fifo)
+{
+    return fifo->capacity - fifo->num;
+}
+
+uint32_t fifo8_num_used(Fifo8 *fifo)
+{
+    return fifo->num;
+}
+
+const VMStateDescription vmstate_fifo8 = {
+    .name = "Fifo8",
+    .version_id = 1,
+    .minimum_version_id = 1,
+    .fields = (VMStateField[]) {
+        VMSTATE_VBUFFER_UINT32(data, Fifo8, 1, NULL, 0, capacity),
+        VMSTATE_UINT32(head, Fifo8),
+        VMSTATE_UINT32(num, Fifo8),
+        VMSTATE_END_OF_LIST()
+    }
+};
diff --git a/src/util/getauxval.c b/src/util/getauxval.c
new file mode 100644
index 0000000..1732ace
--- /dev/null
+++ b/src/util/getauxval.c
@@ -0,0 +1,109 @@
+/*
+ * QEMU access to the auxiliary vector
+ *
+ * Copyright (C) 2013 Red Hat, Inc
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+#include "qemu/osdep.h"
+
+#ifdef CONFIG_GETAUXVAL
+/* Don't inline this in qemu/osdep.h, because pulling in <sys/auxv.h> for
+   the system declaration of getauxval pulls in the system <elf.h>, which
+   conflicts with qemu's version.  */
+
+#include <sys/auxv.h>
+
+unsigned long qemu_getauxval(unsigned long key)
+{
+    return getauxval(key);
+}
+#elif defined(__linux__)
+#include "elf.h"
+
+/* Our elf.h doesn't contain Elf32_auxv_t and Elf64_auxv_t, which is ok because
+   that just makes it easier to define it properly for the host here.  */
+typedef struct {
+    unsigned long a_type;
+    unsigned long a_val;
+} ElfW_auxv_t;
+
+static const ElfW_auxv_t *auxv;
+
+static const ElfW_auxv_t *qemu_init_auxval(void)
+{
+    ElfW_auxv_t *a;
+    ssize_t size = 512, r, ofs;
+    int fd;
+
+    /* Allocate some initial storage.  Make sure the first entry is set
+       to end-of-list, so that we've got a valid list in case of error.  */
+    auxv = a = g_malloc(size);
+    a[0].a_type = 0;
+    a[0].a_val = 0;
+
+    fd = open("/proc/self/auxv", O_RDONLY);
+    if (fd < 0) {
+        return a;
+    }
+
+    /* Read the first SIZE bytes.  Hopefully, this covers everything.  */
+    r = read(fd, a, size);
+
+    if (r == size) {
+        /* Continue to expand until we do get a partial read.  */
+        do {
+            ofs = size;
+            size *= 2;
+            auxv = a = g_realloc(a, size);
+            r = read(fd, (char *)a + ofs, ofs);
+        } while (r == ofs);
+    }
+
+    close(fd);
+    return a;
+}
+
+unsigned long qemu_getauxval(unsigned long type)
+{
+    const ElfW_auxv_t *a = auxv;
+
+    if (unlikely(a == NULL)) {
+        a = qemu_init_auxval();
+    }
+
+    for (; a->a_type != 0; a++) {
+        if (a->a_type == type) {
+            return a->a_val;
+        }
+    }
+
+    return 0;
+}
+
+#else
+
+unsigned long qemu_getauxval(unsigned long type)
+{
+    return 0;
+}
+
+#endif
diff --git a/src/util/hbitmap.c b/src/util/hbitmap.c
new file mode 100644
index 0000000..50b888f
--- /dev/null
+++ b/src/util/hbitmap.c
@@ -0,0 +1,495 @@
+/*
+ * Hierarchical Bitmap Data Type
+ *
+ * Copyright Red Hat, Inc., 2012
+ *
+ * Author: Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include <string.h>
+#include <glib.h>
+#include <assert.h>
+#include "qemu/osdep.h"
+#include "qemu/hbitmap.h"
+#include "qemu/host-utils.h"
+#include "trace.h"
+
+/* HBitmaps provides an array of bits.  The bits are stored as usual in an
+ * array of unsigned longs, but HBitmap is also optimized to provide fast
+ * iteration over set bits; going from one bit to the next is O(logB n)
+ * worst case, with B = sizeof(long) * CHAR_BIT: the result is low enough
+ * that the number of levels is in fact fixed.
+ *
+ * In order to do this, it stacks multiple bitmaps with progressively coarser
+ * granularity; in all levels except the last, bit N is set iff the N-th
+ * unsigned long is nonzero in the immediately next level.  When iteration
+ * completes on the last level it can examine the 2nd-last level to quickly
+ * skip entire words, and even do so recursively to skip blocks of 64 words or
+ * powers thereof (32 on 32-bit machines).
+ *
+ * Given an index in the bitmap, it can be split in group of bits like
+ * this (for the 64-bit case):
+ *
+ *   bits 0-57 => word in the last bitmap     | bits 58-63 => bit in the word
+ *   bits 0-51 => word in the 2nd-last bitmap | bits 52-57 => bit in the word
+ *   bits 0-45 => word in the 3rd-last bitmap | bits 46-51 => bit in the word
+ *
+ * So it is easy to move up simply by shifting the index right by
+ * log2(BITS_PER_LONG) bits.  To move down, you shift the index left
+ * similarly, and add the word index within the group.  Iteration uses
+ * ffs (find first set bit) to find the next word to examine; this
+ * operation can be done in constant time in most current architectures.
+ *
+ * Setting or clearing a range of m bits on all levels, the work to perform
+ * is O(m + m/W + m/W^2 + ...), which is O(m) like on a regular bitmap.
+ *
+ * When iterating on a bitmap, each bit (on any level) is only visited
+ * once.  Hence, The total cost of visiting a bitmap with m bits in it is
+ * the number of bits that are set in all bitmaps.  Unless the bitmap is
+ * extremely sparse, this is also O(m + m/W + m/W^2 + ...), so the amortized
+ * cost of advancing from one bit to the next is usually constant (worst case
+ * O(logB n) as in the non-amortized complexity).
+ */
+
+struct HBitmap {
+    /* Number of total bits in the bottom level.  */
+    uint64_t size;
+
+    /* Number of set bits in the bottom level.  */
+    uint64_t count;
+
+    /* A scaling factor.  Given a granularity of G, each bit in the bitmap will
+     * will actually represent a group of 2^G elements.  Each operation on a
+     * range of bits first rounds the bits to determine which group they land
+     * in, and then affect the entire page; iteration will only visit the first
+     * bit of each group.  Here is an example of operations in a size-16,
+     * granularity-1 HBitmap:
+     *
+     *    initial state            00000000
+     *    set(start=0, count=9)    11111000 (iter: 0, 2, 4, 6, 8)
+     *    reset(start=1, count=3)  00111000 (iter: 4, 6, 8)
+     *    set(start=9, count=2)    00111100 (iter: 4, 6, 8, 10)
+     *    reset(start=5, count=5)  00000000
+     *
+     * From an implementation point of view, when setting or resetting bits,
+     * the bitmap will scale bit numbers right by this amount of bits.  When
+     * iterating, the bitmap will scale bit numbers left by this amount of
+     * bits.
+     */
+    int granularity;
+
+    /* A number of progressively less coarse bitmaps (i.e. level 0 is the
+     * coarsest).  Each bit in level N represents a word in level N+1 that
+     * has a set bit, except the last level where each bit represents the
+     * actual bitmap.
+     *
+     * Note that all bitmaps have the same number of levels.  Even a 1-bit
+     * bitmap will still allocate HBITMAP_LEVELS arrays.
+     */
+    unsigned long *levels[HBITMAP_LEVELS];
+
+    /* The length of each levels[] array. */
+    uint64_t sizes[HBITMAP_LEVELS];
+};
+
+/* Advance hbi to the next nonzero word and return it.  hbi->pos
+ * is updated.  Returns zero if we reach the end of the bitmap.
+ */
+unsigned long hbitmap_iter_skip_words(HBitmapIter *hbi)
+{
+    size_t pos = hbi->pos;
+    const HBitmap *hb = hbi->hb;
+    unsigned i = HBITMAP_LEVELS - 1;
+
+    unsigned long cur;
+    do {
+        cur = hbi->cur[--i];
+        pos >>= BITS_PER_LEVEL;
+    } while (cur == 0);
+
+    /* Check for end of iteration.  We always use fewer than BITS_PER_LONG
+     * bits in the level 0 bitmap; thus we can repurpose the most significant
+     * bit as a sentinel.  The sentinel is set in hbitmap_alloc and ensures
+     * that the above loop ends even without an explicit check on i.
+     */
+
+    if (i == 0 && cur == (1UL << (BITS_PER_LONG - 1))) {
+        return 0;
+    }
+    for (; i < HBITMAP_LEVELS - 1; i++) {
+        /* Shift back pos to the left, matching the right shifts above.
+         * The index of this word's least significant set bit provides
+         * the low-order bits.
+         */
+        assert(cur);
+        pos = (pos << BITS_PER_LEVEL) + ctzl(cur);
+        hbi->cur[i] = cur & (cur - 1);
+
+        /* Set up next level for iteration.  */
+        cur = hb->levels[i + 1][pos];
+    }
+
+    hbi->pos = pos;
+    trace_hbitmap_iter_skip_words(hbi->hb, hbi, pos, cur);
+
+    assert(cur);
+    return cur;
+}
+
+void hbitmap_iter_init(HBitmapIter *hbi, const HBitmap *hb, uint64_t first)
+{
+    unsigned i, bit;
+    uint64_t pos;
+
+    hbi->hb = hb;
+    pos = first >> hb->granularity;
+    assert(pos < hb->size);
+    hbi->pos = pos >> BITS_PER_LEVEL;
+    hbi->granularity = hb->granularity;
+
+    for (i = HBITMAP_LEVELS; i-- > 0; ) {
+        bit = pos & (BITS_PER_LONG - 1);
+        pos >>= BITS_PER_LEVEL;
+
+        /* Drop bits representing items before first.  */
+        hbi->cur[i] = hb->levels[i][pos] & ~((1UL << bit) - 1);
+
+        /* We have already added level i+1, so the lowest set bit has
+         * been processed.  Clear it.
+         */
+        if (i != HBITMAP_LEVELS - 1) {
+            hbi->cur[i] &= ~(1UL << bit);
+        }
+    }
+}
+
+bool hbitmap_empty(const HBitmap *hb)
+{
+    return hb->count == 0;
+}
+
+int hbitmap_granularity(const HBitmap *hb)
+{
+    return hb->granularity;
+}
+
+uint64_t hbitmap_count(const HBitmap *hb)
+{
+    return hb->count << hb->granularity;
+}
+
+/* Count the number of set bits between start and end, not accounting for
+ * the granularity.  Also an example of how to use hbitmap_iter_next_word.
+ */
+static uint64_t hb_count_between(HBitmap *hb, uint64_t start, uint64_t last)
+{
+    HBitmapIter hbi;
+    uint64_t count = 0;
+    uint64_t end = last + 1;
+    unsigned long cur;
+    size_t pos;
+
+    hbitmap_iter_init(&hbi, hb, start << hb->granularity);
+    for (;;) {
+        pos = hbitmap_iter_next_word(&hbi, &cur);
+        if (pos >= (end >> BITS_PER_LEVEL)) {
+            break;
+        }
+        count += ctpopl(cur);
+    }
+
+    if (pos == (end >> BITS_PER_LEVEL)) {
+        /* Drop bits representing the END-th and subsequent items.  */
+        int bit = end & (BITS_PER_LONG - 1);
+        cur &= (1UL << bit) - 1;
+        count += ctpopl(cur);
+    }
+
+    return count;
+}
+
+/* Setting starts at the last layer and propagates up if an element
+ * changes from zero to non-zero.
+ */
+static inline bool hb_set_elem(unsigned long *elem, uint64_t start, uint64_t last)
+{
+    unsigned long mask;
+    bool changed;
+
+    assert((last >> BITS_PER_LEVEL) == (start >> BITS_PER_LEVEL));
+    assert(start <= last);
+
+    mask = 2UL << (last & (BITS_PER_LONG - 1));
+    mask -= 1UL << (start & (BITS_PER_LONG - 1));
+    changed = (*elem == 0);
+    *elem |= mask;
+    return changed;
+}
+
+/* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)... */
+static void hb_set_between(HBitmap *hb, int level, uint64_t start, uint64_t last)
+{
+    size_t pos = start >> BITS_PER_LEVEL;
+    size_t lastpos = last >> BITS_PER_LEVEL;
+    bool changed = false;
+    size_t i;
+
+    i = pos;
+    if (i < lastpos) {
+        uint64_t next = (start | (BITS_PER_LONG - 1)) + 1;
+        changed |= hb_set_elem(&hb->levels[level][i], start, next - 1);
+        for (;;) {
+            start = next;
+            next += BITS_PER_LONG;
+            if (++i == lastpos) {
+                break;
+            }
+            changed |= (hb->levels[level][i] == 0);
+            hb->levels[level][i] = ~0UL;
+        }
+    }
+    changed |= hb_set_elem(&hb->levels[level][i], start, last);
+
+    /* If there was any change in this layer, we may have to update
+     * the one above.
+     */
+    if (level > 0 && changed) {
+        hb_set_between(hb, level - 1, pos, lastpos);
+    }
+}
+
+void hbitmap_set(HBitmap *hb, uint64_t start, uint64_t count)
+{
+    /* Compute range in the last layer.  */
+    uint64_t last = start + count - 1;
+
+    trace_hbitmap_set(hb, start, count,
+                      start >> hb->granularity, last >> hb->granularity);
+
+    start >>= hb->granularity;
+    last >>= hb->granularity;
+    count = last - start + 1;
+
+    hb->count += count - hb_count_between(hb, start, last);
+    hb_set_between(hb, HBITMAP_LEVELS - 1, start, last);
+}
+
+/* Resetting works the other way round: propagate up if the new
+ * value is zero.
+ */
+static inline bool hb_reset_elem(unsigned long *elem, uint64_t start, uint64_t last)
+{
+    unsigned long mask;
+    bool blanked;
+
+    assert((last >> BITS_PER_LEVEL) == (start >> BITS_PER_LEVEL));
+    assert(start <= last);
+
+    mask = 2UL << (last & (BITS_PER_LONG - 1));
+    mask -= 1UL << (start & (BITS_PER_LONG - 1));
+    blanked = *elem != 0 && ((*elem & ~mask) == 0);
+    *elem &= ~mask;
+    return blanked;
+}
+
+/* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)... */
+static void hb_reset_between(HBitmap *hb, int level, uint64_t start, uint64_t last)
+{
+    size_t pos = start >> BITS_PER_LEVEL;
+    size_t lastpos = last >> BITS_PER_LEVEL;
+    bool changed = false;
+    size_t i;
+
+    i = pos;
+    if (i < lastpos) {
+        uint64_t next = (start | (BITS_PER_LONG - 1)) + 1;
+
+        /* Here we need a more complex test than when setting bits.  Even if
+         * something was changed, we must not blank bits in the upper level
+         * unless the lower-level word became entirely zero.  So, remove pos
+         * from the upper-level range if bits remain set.
+         */
+        if (hb_reset_elem(&hb->levels[level][i], start, next - 1)) {
+            changed = true;
+        } else {
+            pos++;
+        }
+
+        for (;;) {
+            start = next;
+            next += BITS_PER_LONG;
+            if (++i == lastpos) {
+                break;
+            }
+            changed |= (hb->levels[level][i] != 0);
+            hb->levels[level][i] = 0UL;
+        }
+    }
+
+    /* Same as above, this time for lastpos.  */
+    if (hb_reset_elem(&hb->levels[level][i], start, last)) {
+        changed = true;
+    } else {
+        lastpos--;
+    }
+
+    if (level > 0 && changed) {
+        hb_reset_between(hb, level - 1, pos, lastpos);
+    }
+}
+
+void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count)
+{
+    /* Compute range in the last layer.  */
+    uint64_t last = start + count - 1;
+
+    trace_hbitmap_reset(hb, start, count,
+                        start >> hb->granularity, last >> hb->granularity);
+
+    start >>= hb->granularity;
+    last >>= hb->granularity;
+
+    hb->count -= hb_count_between(hb, start, last);
+    hb_reset_between(hb, HBITMAP_LEVELS - 1, start, last);
+}
+
+void hbitmap_reset_all(HBitmap *hb)
+{
+    unsigned int i;
+
+    /* Same as hbitmap_alloc() except for memset() instead of malloc() */
+    for (i = HBITMAP_LEVELS; --i >= 1; ) {
+        memset(hb->levels[i], 0, hb->sizes[i] * sizeof(unsigned long));
+    }
+
+    hb->levels[0][0] = 1UL << (BITS_PER_LONG - 1);
+    hb->count = 0;
+}
+
+bool hbitmap_get(const HBitmap *hb, uint64_t item)
+{
+    /* Compute position and bit in the last layer.  */
+    uint64_t pos = item >> hb->granularity;
+    unsigned long bit = 1UL << (pos & (BITS_PER_LONG - 1));
+
+    return (hb->levels[HBITMAP_LEVELS - 1][pos >> BITS_PER_LEVEL] & bit) != 0;
+}
+
+void hbitmap_free(HBitmap *hb)
+{
+    unsigned i;
+    for (i = HBITMAP_LEVELS; i-- > 0; ) {
+        g_free(hb->levels[i]);
+    }
+    g_free(hb);
+}
+
+HBitmap *hbitmap_alloc(uint64_t size, int granularity)
+{
+    HBitmap *hb = g_new0(struct HBitmap, 1);
+    unsigned i;
+
+    assert(granularity >= 0 && granularity < 64);
+    size = (size + (1ULL << granularity) - 1) >> granularity;
+    assert(size <= ((uint64_t)1 << HBITMAP_LOG_MAX_SIZE));
+
+    hb->size = size;
+    hb->granularity = granularity;
+    for (i = HBITMAP_LEVELS; i-- > 0; ) {
+        size = MAX((size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1);
+        hb->sizes[i] = size;
+        hb->levels[i] = g_new0(unsigned long, size);
+    }
+
+    /* We necessarily have free bits in level 0 due to the definition
+     * of HBITMAP_LEVELS, so use one for a sentinel.  This speeds up
+     * hbitmap_iter_skip_words.
+     */
+    assert(size == 1);
+    hb->levels[0][0] |= 1UL << (BITS_PER_LONG - 1);
+    return hb;
+}
+
+void hbitmap_truncate(HBitmap *hb, uint64_t size)
+{
+    bool shrink;
+    unsigned i;
+    uint64_t num_elements = size;
+    uint64_t old;
+
+    /* Size comes in as logical elements, adjust for granularity. */
+    size = (size + (1ULL << hb->granularity) - 1) >> hb->granularity;
+    assert(size <= ((uint64_t)1 << HBITMAP_LOG_MAX_SIZE));
+    shrink = size < hb->size;
+
+    /* bit sizes are identical; nothing to do. */
+    if (size == hb->size) {
+        return;
+    }
+
+    /* If we're losing bits, let's clear those bits before we invalidate all of
+     * our invariants. This helps keep the bitcount consistent, and will prevent
+     * us from carrying around garbage bits beyond the end of the map.
+     */
+    if (shrink) {
+        /* Don't clear partial granularity groups;
+         * start at the first full one. */
+        uint64_t start = QEMU_ALIGN_UP(num_elements, 1 << hb->granularity);
+        uint64_t fix_count = (hb->size << hb->granularity) - start;
+
+        assert(fix_count);
+        hbitmap_reset(hb, start, fix_count);
+    }
+
+    hb->size = size;
+    for (i = HBITMAP_LEVELS; i-- > 0; ) {
+        size = MAX(BITS_TO_LONGS(size), 1);
+        if (hb->sizes[i] == size) {
+            break;
+        }
+        old = hb->sizes[i];
+        hb->sizes[i] = size;
+        hb->levels[i] = g_realloc(hb->levels[i], size * sizeof(unsigned long));
+        if (!shrink) {
+            memset(&hb->levels[i][old], 0x00,
+                   (size - old) * sizeof(*hb->levels[i]));
+        }
+    }
+}
+
+
+/**
+ * Given HBitmaps A and B, let A := A (BITOR) B.
+ * Bitmap B will not be modified.
+ *
+ * @return true if the merge was successful,
+ *         false if it was not attempted.
+ */
+bool hbitmap_merge(HBitmap *a, const HBitmap *b)
+{
+    int i;
+    uint64_t j;
+
+    if ((a->size != b->size) || (a->granularity != b->granularity)) {
+        return false;
+    }
+
+    if (hbitmap_count(b) == 0) {
+        return true;
+    }
+
+    /* This merge is O(size), as BITS_PER_LONG and HBITMAP_LEVELS are constant.
+     * It may be possible to improve running times for sparsely populated maps
+     * by using hbitmap_iter_next, but this is suboptimal for dense maps.
+     */
+    for (i = HBITMAP_LEVELS - 1; i >= 0; i--) {
+        for (j = 0; j < a->sizes[i]; j++) {
+            a->levels[i][j] |= b->levels[i][j];
+        }
+    }
+
+    return true;
+}
diff --git a/src/util/hexdump.c b/src/util/hexdump.c
new file mode 100644
index 0000000..969b340
--- /dev/null
+++ b/src/util/hexdump.c
@@ -0,0 +1,37 @@
+/*
+ * Helper to hexdump a buffer
+ *
+ * Copyright (c) 2013 Red Hat, Inc.
+ * Copyright (c) 2013 Gerd Hoffmann <kraxel@redhat.com>
+ * Copyright (c) 2013 Peter Crosthwaite <peter.crosthwaite@xilinx.com>
+ * Copyright (c) 2013 Xilinx, Inc
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu-common.h"
+
+void qemu_hexdump(const char *buf, FILE *fp, const char *prefix, size_t size)
+{
+    unsigned int b;
+
+    for (b = 0; b < size; b++) {
+        if ((b % 16) == 0) {
+            fprintf(fp, "%s: %04x:", prefix, b);
+        }
+        if ((b % 4) == 0) {
+            fprintf(fp, " ");
+        }
+        fprintf(fp, " %02x", (unsigned char)buf[b]);
+        if ((b % 16) == 15) {
+            fprintf(fp, "\n");
+        }
+    }
+    if ((b % 16) != 0) {
+        fprintf(fp, "\n");
+    }
+}
diff --git a/src/util/host-utils.c b/src/util/host-utils.c
new file mode 100644
index 0000000..102e5bf
--- /dev/null
+++ b/src/util/host-utils.c
@@ -0,0 +1,162 @@
+/*
+ * Utility compute operations used by translated code.
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ * Copyright (c) 2007 Aurelien Jarno
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <stdlib.h>
+#include <stdint.h>
+#include "qemu/host-utils.h"
+
+/* Long integer helpers */
+static inline void mul64(uint64_t *plow, uint64_t *phigh,
+                         uint64_t a, uint64_t b)
+{
+    typedef union {
+        uint64_t ll;
+        struct {
+#ifdef HOST_WORDS_BIGENDIAN
+            uint32_t high, low;
+#else
+            uint32_t low, high;
+#endif
+        } l;
+    } LL;
+    LL rl, rm, rn, rh, a0, b0;
+    uint64_t c;
+
+    a0.ll = a;
+    b0.ll = b;
+
+    rl.ll = (uint64_t)a0.l.low * b0.l.low;
+    rm.ll = (uint64_t)a0.l.low * b0.l.high;
+    rn.ll = (uint64_t)a0.l.high * b0.l.low;
+    rh.ll = (uint64_t)a0.l.high * b0.l.high;
+
+    c = (uint64_t)rl.l.high + rm.l.low + rn.l.low;
+    rl.l.high = c;
+    c >>= 32;
+    c = c + rm.l.high + rn.l.high + rh.l.low;
+    rh.l.low = c;
+    rh.l.high += (uint32_t)(c >> 32);
+
+    *plow = rl.ll;
+    *phigh = rh.ll;
+}
+
+/* Unsigned 64x64 -> 128 multiplication */
+void mulu64 (uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b)
+{
+    mul64(plow, phigh, a, b);
+}
+
+/* Signed 64x64 -> 128 multiplication */
+void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b)
+{
+    uint64_t rh;
+
+    mul64(plow, &rh, a, b);
+
+    /* Adjust for signs.  */
+    if (b < 0) {
+        rh -= a;
+    }
+    if (a < 0) {
+        rh -= b;
+    }
+    *phigh = rh;
+}
+
+/* Unsigned 128x64 division.  Returns 1 if overflow (divide by zero or */
+/* quotient exceeds 64 bits).  Otherwise returns quotient via plow and */
+/* remainder via phigh. */
+int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor)
+{
+    uint64_t dhi = *phigh;
+    uint64_t dlo = *plow;
+    unsigned i;
+    uint64_t carry = 0;
+
+    if (divisor == 0) {
+        return 1;
+    } else if (dhi == 0) {
+        *plow  = dlo / divisor;
+        *phigh = dlo % divisor;
+        return 0;
+    } else if (dhi > divisor) {
+        return 1;
+    } else {
+
+        for (i = 0; i < 64; i++) {
+            carry = dhi >> 63;
+            dhi = (dhi << 1) | (dlo >> 63);
+            if (carry || (dhi >= divisor)) {
+                dhi -= divisor;
+                carry = 1;
+            } else {
+                carry = 0;
+            }
+            dlo = (dlo << 1) | carry;
+        }
+
+        *plow = dlo;
+        *phigh = dhi;
+        return 0;
+    }
+}
+
+int divs128(int64_t *plow, int64_t *phigh, int64_t divisor)
+{
+    int sgn_dvdnd = *phigh < 0;
+    int sgn_divsr = divisor < 0;
+    int overflow = 0;
+
+    if (sgn_dvdnd) {
+        *plow = ~(*plow);
+        *phigh = ~(*phigh);
+        if (*plow == (int64_t)-1) {
+            *plow = 0;
+            (*phigh)++;
+         } else {
+            (*plow)++;
+         }
+    }
+
+    if (sgn_divsr) {
+        divisor = 0 - divisor;
+    }
+
+    overflow = divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor);
+
+    if (sgn_dvdnd  ^ sgn_divsr) {
+        *plow = 0 - *plow;
+    }
+
+    if (!overflow) {
+        if ((*plow < 0) ^ (sgn_dvdnd ^ sgn_divsr)) {
+            overflow = 1;
+        }
+    }
+
+    return overflow;
+}
+
diff --git a/src/util/id.c b/src/util/id.c
new file mode 100644
index 0000000..7883fbe
--- /dev/null
+++ b/src/util/id.c
@@ -0,0 +1,65 @@
+/*
+ * Dealing with identifiers
+ *
+ * Copyright (C) 2014 Red Hat, Inc.
+ *
+ * Authors:
+ *  Markus Armbruster <armbru@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2.1
+ * or later.  See the COPYING.LIB file in the top-level directory.
+ */
+
+#include "qemu-common.h"
+
+bool id_wellformed(const char *id)
+{
+    int i;
+
+    if (!qemu_isalpha(id[0])) {
+        return false;
+    }
+    for (i = 1; id[i]; i++) {
+        if (!qemu_isalnum(id[i]) && !strchr("-._", id[i])) {
+            return false;
+        }
+    }
+    return true;
+}
+
+#define ID_SPECIAL_CHAR '#'
+
+static const char *const id_subsys_str[ID_MAX] = {
+    [ID_QDEV]  = "qdev",
+    [ID_BLOCK] = "block",
+};
+
+/*
+ *  Generates an ID of the form PREFIX SUBSYSTEM NUMBER
+ *  where:
+ *
+ *  - PREFIX is the reserved character '#'
+ *  - SUBSYSTEM identifies the subsystem creating the ID
+ *  - NUMBER is a decimal number unique within SUBSYSTEM.
+ *
+ *    Example: "#block146"
+ *
+ * Note that these IDs do not satisfy id_wellformed().
+ *
+ * The caller is responsible for freeing the returned string with g_free()
+ */
+char *id_generate(IdSubSystems id)
+{
+    static uint64_t id_counters[ID_MAX];
+    uint32_t rnd;
+
+    assert(id < ARRAY_SIZE(id_subsys_str));
+    assert(id_subsys_str[id]);
+
+    rnd = g_random_int_range(0, 100);
+
+    return g_strdup_printf("%c%s%" PRIu64 "%02" PRId32, ID_SPECIAL_CHAR,
+                                                        id_subsys_str[id],
+                                                        id_counters[id]++,
+                                                        rnd);
+}
diff --git a/src/util/iov.c b/src/util/iov.c
new file mode 100644
index 0000000..a0d5934
--- /dev/null
+++ b/src/util/iov.c
@@ -0,0 +1,577 @@
+/*
+ * Helpers for getting linearized buffers from iov / filling buffers into iovs
+ *
+ * Copyright IBM, Corp. 2007, 2008
+ * Copyright (C) 2010 Red Hat, Inc.
+ *
+ * Author(s):
+ *  Anthony Liguori <aliguori@us.ibm.com>
+ *  Amit Shah <amit.shah@redhat.com>
+ *  Michael Tokarev <mjt@tls.msk.ru>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu/iov.h"
+#include "qemu/sockets.h"
+
+size_t iov_from_buf(const struct iovec *iov, unsigned int iov_cnt,
+                    size_t offset, const void *buf, size_t bytes)
+{
+    size_t done;
+    unsigned int i;
+    for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) {
+        if (offset < iov[i].iov_len) {
+            size_t len = MIN(iov[i].iov_len - offset, bytes - done);
+            memcpy(iov[i].iov_base + offset, buf + done, len);
+            done += len;
+            offset = 0;
+        } else {
+            offset -= iov[i].iov_len;
+        }
+    }
+    assert(offset == 0);
+    return done;
+}
+
+size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt,
+                  size_t offset, void *buf, size_t bytes)
+{
+    size_t done;
+    unsigned int i;
+    for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) {
+        if (offset < iov[i].iov_len) {
+            size_t len = MIN(iov[i].iov_len - offset, bytes - done);
+            memcpy(buf + done, iov[i].iov_base + offset, len);
+            done += len;
+            offset = 0;
+        } else {
+            offset -= iov[i].iov_len;
+        }
+    }
+    assert(offset == 0);
+    return done;
+}
+
+size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt,
+                  size_t offset, int fillc, size_t bytes)
+{
+    size_t done;
+    unsigned int i;
+    for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) {
+        if (offset < iov[i].iov_len) {
+            size_t len = MIN(iov[i].iov_len - offset, bytes - done);
+            memset(iov[i].iov_base + offset, fillc, len);
+            done += len;
+            offset = 0;
+        } else {
+            offset -= iov[i].iov_len;
+        }
+    }
+    assert(offset == 0);
+    return done;
+}
+
+size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt)
+{
+    size_t len;
+    unsigned int i;
+
+    len = 0;
+    for (i = 0; i < iov_cnt; i++) {
+        len += iov[i].iov_len;
+    }
+    return len;
+}
+
+/* helper function for iov_send_recv() */
+static ssize_t
+do_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, bool do_send)
+{
+#ifdef CONFIG_POSIX
+    ssize_t ret;
+    struct msghdr msg;
+    memset(&msg, 0, sizeof(msg));
+    msg.msg_iov = iov;
+    msg.msg_iovlen = iov_cnt;
+    do {
+        ret = do_send
+            ? sendmsg(sockfd, &msg, 0)
+            : recvmsg(sockfd, &msg, 0);
+    } while (ret < 0 && errno == EINTR);
+    return ret;
+#else
+    /* else send piece-by-piece */
+    /*XXX Note: windows has WSASend() and WSARecv() */
+    unsigned i = 0;
+    ssize_t ret = 0;
+    while (i < iov_cnt) {
+        ssize_t r = do_send
+            ? send(sockfd, iov[i].iov_base, iov[i].iov_len, 0)
+            : recv(sockfd, iov[i].iov_base, iov[i].iov_len, 0);
+        if (r > 0) {
+            ret += r;
+        } else if (!r) {
+            break;
+        } else if (errno == EINTR) {
+            continue;
+        } else {
+            /* else it is some "other" error,
+             * only return if there was no data processed. */
+            if (ret == 0) {
+                ret = -1;
+            }
+            break;
+        }
+        i++;
+    }
+    return ret;
+#endif
+}
+
+ssize_t iov_send_recv(int sockfd, const struct iovec *_iov, unsigned iov_cnt,
+                      size_t offset, size_t bytes,
+                      bool do_send)
+{
+    ssize_t total = 0;
+    ssize_t ret;
+    size_t orig_len, tail;
+    unsigned niov;
+    struct iovec *local_iov, *iov;
+
+    if (bytes <= 0) {
+        return 0;
+    }
+
+    local_iov = g_new0(struct iovec, iov_cnt);
+    iov_copy(local_iov, iov_cnt, _iov, iov_cnt, offset, bytes);
+    offset = 0;
+    iov = local_iov;
+
+    while (bytes > 0) {
+        /* Find the start position, skipping `offset' bytes:
+         * first, skip all full-sized vector elements, */
+        for (niov = 0; niov < iov_cnt && offset >= iov[niov].iov_len; ++niov) {
+            offset -= iov[niov].iov_len;
+        }
+
+        /* niov == iov_cnt would only be valid if bytes == 0, which
+         * we already ruled out in the loop condition.  */
+        assert(niov < iov_cnt);
+        iov += niov;
+        iov_cnt -= niov;
+
+        if (offset) {
+            /* second, skip `offset' bytes from the (now) first element,
+             * undo it on exit */
+            iov[0].iov_base += offset;
+            iov[0].iov_len -= offset;
+        }
+        /* Find the end position skipping `bytes' bytes: */
+        /* first, skip all full-sized elements */
+        tail = bytes;
+        for (niov = 0; niov < iov_cnt && iov[niov].iov_len <= tail; ++niov) {
+            tail -= iov[niov].iov_len;
+        }
+        if (tail) {
+            /* second, fixup the last element, and remember the original
+             * length */
+            assert(niov < iov_cnt);
+            assert(iov[niov].iov_len > tail);
+            orig_len = iov[niov].iov_len;
+            iov[niov++].iov_len = tail;
+            ret = do_send_recv(sockfd, iov, niov, do_send);
+            /* Undo the changes above before checking for errors */
+            iov[niov-1].iov_len = orig_len;
+        } else {
+            ret = do_send_recv(sockfd, iov, niov, do_send);
+        }
+        if (offset) {
+            iov[0].iov_base -= offset;
+            iov[0].iov_len += offset;
+        }
+
+        if (ret < 0) {
+            assert(errno != EINTR);
+            g_free(local_iov);
+            if (errno == EAGAIN && total > 0) {
+                return total;
+            }
+            return -1;
+        }
+
+        if (ret == 0 && !do_send) {
+            /* recv returns 0 when the peer has performed an orderly
+             * shutdown. */
+            break;
+        }
+
+        /* Prepare for the next iteration */
+        offset += ret;
+        total += ret;
+        bytes -= ret;
+    }
+
+    g_free(local_iov);
+    return total;
+}
+
+
+void iov_hexdump(const struct iovec *iov, const unsigned int iov_cnt,
+                 FILE *fp, const char *prefix, size_t limit)
+{
+    int v;
+    size_t size = 0;
+    char *buf;
+
+    for (v = 0; v < iov_cnt; v++) {
+        size += iov[v].iov_len;
+    }
+    size = size > limit ? limit : size;
+    buf = g_malloc(size);
+    iov_to_buf(iov, iov_cnt, 0, buf, size);
+    qemu_hexdump(buf, fp, prefix, size);
+    g_free(buf);
+}
+
+unsigned iov_copy(struct iovec *dst_iov, unsigned int dst_iov_cnt,
+                 const struct iovec *iov, unsigned int iov_cnt,
+                 size_t offset, size_t bytes)
+{
+    size_t len;
+    unsigned int i, j;
+    for (i = 0, j = 0; i < iov_cnt && j < dst_iov_cnt && bytes; i++) {
+        if (offset >= iov[i].iov_len) {
+            offset -= iov[i].iov_len;
+            continue;
+        }
+        len = MIN(bytes, iov[i].iov_len - offset);
+
+        dst_iov[j].iov_base = iov[i].iov_base + offset;
+        dst_iov[j].iov_len = len;
+        j++;
+        bytes -= len;
+        offset = 0;
+    }
+    assert(offset == 0);
+    return j;
+}
+
+/* io vectors */
+
+void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint)
+{
+    qiov->iov = g_new(struct iovec, alloc_hint);
+    qiov->niov = 0;
+    qiov->nalloc = alloc_hint;
+    qiov->size = 0;
+}
+
+void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov)
+{
+    int i;
+
+    qiov->iov = iov;
+    qiov->niov = niov;
+    qiov->nalloc = -1;
+    qiov->size = 0;
+    for (i = 0; i < niov; i++)
+        qiov->size += iov[i].iov_len;
+}
+
+void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len)
+{
+    assert(qiov->nalloc != -1);
+
+    if (qiov->niov == qiov->nalloc) {
+        qiov->nalloc = 2 * qiov->nalloc + 1;
+        qiov->iov = g_renew(struct iovec, qiov->iov, qiov->nalloc);
+    }
+    qiov->iov[qiov->niov].iov_base = base;
+    qiov->iov[qiov->niov].iov_len = len;
+    qiov->size += len;
+    ++qiov->niov;
+}
+
+/*
+ * Concatenates (partial) iovecs from src_iov to the end of dst.
+ * It starts copying after skipping `soffset' bytes at the
+ * beginning of src and adds individual vectors from src to
+ * dst copies up to `sbytes' bytes total, or up to the end
+ * of src_iov if it comes first.  This way, it is okay to specify
+ * very large value for `sbytes' to indicate "up to the end
+ * of src".
+ * Only vector pointers are processed, not the actual data buffers.
+ */
+size_t qemu_iovec_concat_iov(QEMUIOVector *dst,
+                             struct iovec *src_iov, unsigned int src_cnt,
+                             size_t soffset, size_t sbytes)
+{
+    int i;
+    size_t done;
+
+    if (!sbytes) {
+        return 0;
+    }
+    assert(dst->nalloc != -1);
+    for (i = 0, done = 0; done < sbytes && i < src_cnt; i++) {
+        if (soffset < src_iov[i].iov_len) {
+            size_t len = MIN(src_iov[i].iov_len - soffset, sbytes - done);
+            qemu_iovec_add(dst, src_iov[i].iov_base + soffset, len);
+            done += len;
+            soffset = 0;
+        } else {
+            soffset -= src_iov[i].iov_len;
+        }
+    }
+    assert(soffset == 0); /* offset beyond end of src */
+
+    return done;
+}
+
+/*
+ * Concatenates (partial) iovecs from src to the end of dst.
+ * It starts copying after skipping `soffset' bytes at the
+ * beginning of src and adds individual vectors from src to
+ * dst copies up to `sbytes' bytes total, or up to the end
+ * of src if it comes first.  This way, it is okay to specify
+ * very large value for `sbytes' to indicate "up to the end
+ * of src".
+ * Only vector pointers are processed, not the actual data buffers.
+ */
+void qemu_iovec_concat(QEMUIOVector *dst,
+                       QEMUIOVector *src, size_t soffset, size_t sbytes)
+{
+    qemu_iovec_concat_iov(dst, src->iov, src->niov, soffset, sbytes);
+}
+
+/*
+ * Check if the contents of the iovecs are all zero
+ */
+bool qemu_iovec_is_zero(QEMUIOVector *qiov)
+{
+    int i;
+    for (i = 0; i < qiov->niov; i++) {
+        size_t offs = QEMU_ALIGN_DOWN(qiov->iov[i].iov_len, 4 * sizeof(long));
+        uint8_t *ptr = qiov->iov[i].iov_base;
+        if (offs && !buffer_is_zero(qiov->iov[i].iov_base, offs)) {
+            return false;
+        }
+        for (; offs < qiov->iov[i].iov_len; offs++) {
+            if (ptr[offs]) {
+                return false;
+            }
+        }
+    }
+    return true;
+}
+
+void qemu_iovec_destroy(QEMUIOVector *qiov)
+{
+    assert(qiov->nalloc != -1);
+
+    qemu_iovec_reset(qiov);
+    g_free(qiov->iov);
+    qiov->nalloc = 0;
+    qiov->iov = NULL;
+}
+
+void qemu_iovec_reset(QEMUIOVector *qiov)
+{
+    assert(qiov->nalloc != -1);
+
+    qiov->niov = 0;
+    qiov->size = 0;
+}
+
+size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset,
+                         void *buf, size_t bytes)
+{
+    return iov_to_buf(qiov->iov, qiov->niov, offset, buf, bytes);
+}
+
+size_t qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset,
+                           const void *buf, size_t bytes)
+{
+    return iov_from_buf(qiov->iov, qiov->niov, offset, buf, bytes);
+}
+
+size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset,
+                         int fillc, size_t bytes)
+{
+    return iov_memset(qiov->iov, qiov->niov, offset, fillc, bytes);
+}
+
+/**
+ * Check that I/O vector contents are identical
+ *
+ * The IO vectors must have the same structure (same length of all parts).
+ * A typical usage is to compare vectors created with qemu_iovec_clone().
+ *
+ * @a:          I/O vector
+ * @b:          I/O vector
+ * @ret:        Offset to first mismatching byte or -1 if match
+ */
+ssize_t qemu_iovec_compare(QEMUIOVector *a, QEMUIOVector *b)
+{
+    int i;
+    ssize_t offset = 0;
+
+    assert(a->niov == b->niov);
+    for (i = 0; i < a->niov; i++) {
+        size_t len = 0;
+        uint8_t *p = (uint8_t *)a->iov[i].iov_base;
+        uint8_t *q = (uint8_t *)b->iov[i].iov_base;
+
+        assert(a->iov[i].iov_len == b->iov[i].iov_len);
+        while (len < a->iov[i].iov_len && *p++ == *q++) {
+            len++;
+        }
+
+        offset += len;
+
+        if (len != a->iov[i].iov_len) {
+            return offset;
+        }
+    }
+    return -1;
+}
+
+typedef struct {
+    int src_index;
+    struct iovec *src_iov;
+    void *dest_base;
+} IOVectorSortElem;
+
+static int sortelem_cmp_src_base(const void *a, const void *b)
+{
+    const IOVectorSortElem *elem_a = a;
+    const IOVectorSortElem *elem_b = b;
+
+    /* Don't overflow */
+    if (elem_a->src_iov->iov_base < elem_b->src_iov->iov_base) {
+        return -1;
+    } else if (elem_a->src_iov->iov_base > elem_b->src_iov->iov_base) {
+        return 1;
+    } else {
+        return 0;
+    }
+}
+
+static int sortelem_cmp_src_index(const void *a, const void *b)
+{
+    const IOVectorSortElem *elem_a = a;
+    const IOVectorSortElem *elem_b = b;
+
+    return elem_a->src_index - elem_b->src_index;
+}
+
+/**
+ * Copy contents of I/O vector
+ *
+ * The relative relationships of overlapping iovecs are preserved.  This is
+ * necessary to ensure identical semantics in the cloned I/O vector.
+ */
+void qemu_iovec_clone(QEMUIOVector *dest, const QEMUIOVector *src, void *buf)
+{
+    IOVectorSortElem sortelems[src->niov];
+    void *last_end;
+    int i;
+
+    /* Sort by source iovecs by base address */
+    for (i = 0; i < src->niov; i++) {
+        sortelems[i].src_index = i;
+        sortelems[i].src_iov = &src->iov[i];
+    }
+    qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_base);
+
+    /* Allocate buffer space taking into account overlapping iovecs */
+    last_end = NULL;
+    for (i = 0; i < src->niov; i++) {
+        struct iovec *cur = sortelems[i].src_iov;
+        ptrdiff_t rewind = 0;
+
+        /* Detect overlap */
+        if (last_end && last_end > cur->iov_base) {
+            rewind = last_end - cur->iov_base;
+        }
+
+        sortelems[i].dest_base = buf - rewind;
+        buf += cur->iov_len - MIN(rewind, cur->iov_len);
+        last_end = MAX(cur->iov_base + cur->iov_len, last_end);
+    }
+
+    /* Sort by source iovec index and build destination iovec */
+    qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_index);
+    for (i = 0; i < src->niov; i++) {
+        qemu_iovec_add(dest, sortelems[i].dest_base, src->iov[i].iov_len);
+    }
+}
+
+size_t iov_discard_front(struct iovec **iov, unsigned int *iov_cnt,
+                         size_t bytes)
+{
+    size_t total = 0;
+    struct iovec *cur;
+
+    for (cur = *iov; *iov_cnt > 0; cur++) {
+        if (cur->iov_len > bytes) {
+            cur->iov_base += bytes;
+            cur->iov_len -= bytes;
+            total += bytes;
+            break;
+        }
+
+        bytes -= cur->iov_len;
+        total += cur->iov_len;
+        *iov_cnt -= 1;
+    }
+
+    *iov = cur;
+    return total;
+}
+
+size_t iov_discard_back(struct iovec *iov, unsigned int *iov_cnt,
+                        size_t bytes)
+{
+    size_t total = 0;
+    struct iovec *cur;
+
+    if (*iov_cnt == 0) {
+        return 0;
+    }
+
+    cur = iov + (*iov_cnt - 1);
+
+    while (*iov_cnt > 0) {
+        if (cur->iov_len > bytes) {
+            cur->iov_len -= bytes;
+            total += bytes;
+            break;
+        }
+
+        bytes -= cur->iov_len;
+        total += cur->iov_len;
+        cur--;
+        *iov_cnt -= 1;
+    }
+
+    return total;
+}
+
+void qemu_iovec_discard_back(QEMUIOVector *qiov, size_t bytes)
+{
+    size_t total;
+    unsigned int niov = qiov->niov;
+
+    assert(qiov->size >= bytes);
+    total = iov_discard_back(qiov->iov, &niov, bytes);
+    assert(total == bytes);
+
+    qiov->niov = niov;
+    qiov->size -= bytes;
+}
diff --git a/src/util/memfd.c b/src/util/memfd.c
new file mode 100644
index 0000000..587ef5a
--- /dev/null
+++ b/src/util/memfd.c
@@ -0,0 +1,162 @@
+/*
+ * memfd.c
+ *
+ * Copyright (c) 2015 Red Hat, Inc.
+ *
+ * QEMU library functions on POSIX which are shared between QEMU and
+ * the QEMU tools.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+
+#include <glib.h>
+#include <glib/gprintf.h>
+
+#include <sys/mman.h>
+
+#include "qemu/memfd.h"
+
+#ifdef CONFIG_MEMFD
+#include <sys/memfd.h>
+#elif defined CONFIG_LINUX
+#include <sys/syscall.h>
+#include <asm/unistd.h>
+
+static int qemu_memfd_create(const char *name, unsigned int flags)
+{
+#ifdef __NR_memfd_create
+    return syscall(__NR_memfd_create, name, flags);
+#else
+    return -1;
+#endif
+}
+#endif
+
+#ifndef MFD_CLOEXEC
+#define MFD_CLOEXEC 0x0001U
+#endif
+
+#ifndef MFD_ALLOW_SEALING
+#define MFD_ALLOW_SEALING 0x0002U
+#endif
+
+/*
+ * This is a best-effort helper for shared memory allocation, with
+ * optional sealing. The helper will do his best to allocate using
+ * memfd with sealing, but may fallback on other methods without
+ * sealing.
+ */
+void *qemu_memfd_alloc(const char *name, size_t size, unsigned int seals,
+                       int *fd)
+{
+    void *ptr;
+    int mfd = -1;
+
+    *fd = -1;
+
+#ifdef CONFIG_LINUX
+    if (seals) {
+        mfd = qemu_memfd_create(name, MFD_ALLOW_SEALING | MFD_CLOEXEC);
+    }
+
+    if (mfd == -1) {
+        /* some systems have memfd without sealing */
+        mfd = qemu_memfd_create(name, MFD_CLOEXEC);
+        seals = 0;
+    }
+#endif
+
+    if (mfd != -1) {
+        if (ftruncate(mfd, size) == -1) {
+            perror("ftruncate");
+            close(mfd);
+            return NULL;
+        }
+
+        if (seals && fcntl(mfd, F_ADD_SEALS, seals) == -1) {
+            perror("fcntl");
+            close(mfd);
+            return NULL;
+        }
+    } else {
+        const char *tmpdir = g_get_tmp_dir();
+        gchar *fname;
+
+        fname = g_strdup_printf("%s/memfd-XXXXXX", tmpdir);
+        mfd = mkstemp(fname);
+        unlink(fname);
+        g_free(fname);
+
+        if (mfd == -1) {
+            perror("mkstemp");
+            return NULL;
+        }
+
+        if (ftruncate(mfd, size) == -1) {
+            perror("ftruncate");
+            close(mfd);
+            return NULL;
+        }
+    }
+
+    ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, mfd, 0);
+    if (ptr == MAP_FAILED) {
+        perror("mmap");
+        close(mfd);
+        return NULL;
+    }
+
+    *fd = mfd;
+    return ptr;
+}
+
+void qemu_memfd_free(void *ptr, size_t size, int fd)
+{
+    if (ptr) {
+        munmap(ptr, size);
+    }
+
+    if (fd != -1) {
+        close(fd);
+    }
+}
+
+enum {
+    MEMFD_KO,
+    MEMFD_OK,
+    MEMFD_TODO
+};
+
+bool qemu_memfd_check(void)
+{
+    static int memfd_check = MEMFD_TODO;
+
+    if (memfd_check == MEMFD_TODO) {
+        int fd;
+        void *ptr;
+
+        ptr = qemu_memfd_alloc("test", 4096, 0, &fd);
+        memfd_check = ptr ? MEMFD_OK : MEMFD_KO;
+        qemu_memfd_free(ptr, 4096, fd);
+    }
+
+    return memfd_check == MEMFD_OK;
+}
diff --git a/src/util/mmap-alloc.c b/src/util/mmap-alloc.c
new file mode 100644
index 0000000..54793a5
--- /dev/null
+++ b/src/util/mmap-alloc.c
@@ -0,0 +1,110 @@
+/*
+ * Support for RAM backed by mmaped host memory.
+ *
+ * Copyright (c) 2015 Red Hat, Inc.
+ *
+ * Authors:
+ *  Michael S. Tsirkin <mst@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+#include <qemu/mmap-alloc.h>
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <assert.h>
+
+#define HUGETLBFS_MAGIC       0x958458f6
+
+#ifdef CONFIG_LINUX
+#include <sys/vfs.h>
+#endif
+
+size_t qemu_fd_getpagesize(int fd)
+{
+#ifdef CONFIG_LINUX
+    struct statfs fs;
+    int ret;
+
+    if (fd != -1) {
+        do {
+            ret = fstatfs(fd, &fs);
+        } while (ret != 0 && errno == EINTR);
+
+        if (ret == 0 && fs.f_type == HUGETLBFS_MAGIC) {
+            return fs.f_bsize;
+        }
+    }
+#endif
+
+    return getpagesize();
+}
+
+void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared)
+{
+    /*
+     * Note: this always allocates at least one extra page of virtual address
+     * space, even if size is already aligned.
+     */
+    size_t total = size + align;
+#if defined(__powerpc64__) && defined(__linux__)
+    /* On ppc64 mappings in the same segment (aka slice) must share the same
+     * page size. Since we will be re-allocating part of this segment
+     * from the supplied fd, we should make sure to use the same page size,
+     * unless we are using the system page size, in which case anonymous memory
+     * is OK. Use align as a hint for the page size.
+     * In this case, set MAP_NORESERVE to avoid allocating backing store memory.
+     */
+    int anonfd = fd == -1 || qemu_fd_getpagesize(fd) == getpagesize() ? -1 : fd;
+    int flags = anonfd == -1 ? MAP_ANONYMOUS : MAP_NORESERVE;
+    void *ptr = mmap(0, total, PROT_NONE, flags | MAP_PRIVATE, anonfd, 0);
+#else
+    void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+#endif
+    size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr;
+    void *ptr1;
+
+    if (ptr == MAP_FAILED) {
+        return MAP_FAILED;
+    }
+
+    /* Make sure align is a power of 2 */
+    assert(!(align & (align - 1)));
+    /* Always align to host page size */
+    assert(align >= getpagesize());
+
+    ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE,
+                MAP_FIXED |
+                (fd == -1 ? MAP_ANONYMOUS : 0) |
+                (shared ? MAP_SHARED : MAP_PRIVATE),
+                fd, 0);
+    if (ptr1 == MAP_FAILED) {
+        munmap(ptr, total);
+        return MAP_FAILED;
+    }
+
+    ptr += offset;
+    total -= offset;
+
+    if (offset > 0) {
+        munmap(ptr - offset, offset);
+    }
+
+    /*
+     * Leave a single PROT_NONE page allocated after the RAM block, to serve as
+     * a guard page guarding against potential buffer overflows.
+     */
+    if (total > size + getpagesize()) {
+        munmap(ptr + size + getpagesize(), total - size - getpagesize());
+    }
+
+    return ptr;
+}
+
+void qemu_ram_munmap(void *ptr, size_t size)
+{
+    if (ptr) {
+        /* Unmap both the RAM block and the guard page */
+        munmap(ptr, size + getpagesize());
+    }
+}
diff --git a/src/util/module.c b/src/util/module.c
new file mode 100644
index 0000000..4bd4a94
--- /dev/null
+++ b/src/util/module.c
@@ -0,0 +1,219 @@
+/*
+ * QEMU Module Infrastructure
+ *
+ * Copyright IBM, Corp. 2009
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include <stdlib.h>
+#include "qemu-common.h"
+#ifdef CONFIG_MODULES
+#include <gmodule.h>
+#endif
+#include "qemu/queue.h"
+#include "qemu/module.h"
+
+typedef struct ModuleEntry
+{
+    void (*init)(void);
+    QTAILQ_ENTRY(ModuleEntry) node;
+    module_init_type type;
+} ModuleEntry;
+
+typedef QTAILQ_HEAD(, ModuleEntry) ModuleTypeList;
+
+static ModuleTypeList init_type_list[MODULE_INIT_MAX];
+
+static ModuleTypeList dso_init_list;
+
+static void init_lists(void)
+{
+    static int inited;
+    int i;
+
+    if (inited) {
+        return;
+    }
+
+    for (i = 0; i < MODULE_INIT_MAX; i++) {
+        QTAILQ_INIT(&init_type_list[i]);
+    }
+
+    QTAILQ_INIT(&dso_init_list);
+
+    inited = 1;
+}
+
+
+static ModuleTypeList *find_type(module_init_type type)
+{
+    ModuleTypeList *l;
+
+    init_lists();
+
+    l = &init_type_list[type];
+
+    return l;
+}
+
+void register_module_init(void (*fn)(void), module_init_type type)
+{
+    ModuleEntry *e;
+    ModuleTypeList *l;
+
+    e = g_malloc0(sizeof(*e));
+    e->init = fn;
+    e->type = type;
+
+    l = find_type(type);
+
+    QTAILQ_INSERT_TAIL(l, e, node);
+}
+
+void register_dso_module_init(void (*fn)(void), module_init_type type)
+{
+    ModuleEntry *e;
+
+    init_lists();
+
+    e = g_malloc0(sizeof(*e));
+    e->init = fn;
+    e->type = type;
+
+    QTAILQ_INSERT_TAIL(&dso_init_list, e, node);
+}
+
+static void module_load(module_init_type type);
+
+void module_call_init(module_init_type type)
+{
+    ModuleTypeList *l;
+    ModuleEntry *e;
+
+    module_load(type);
+    l = find_type(type);
+
+    QTAILQ_FOREACH(e, l, node) {
+        e->init();
+    }
+}
+
+#ifdef CONFIG_MODULES
+static int module_load_file(const char *fname)
+{
+    GModule *g_module;
+    void (*sym)(void);
+    const char *dsosuf = HOST_DSOSUF;
+    int len = strlen(fname);
+    int suf_len = strlen(dsosuf);
+    ModuleEntry *e, *next;
+    int ret;
+
+    if (len <= suf_len || strcmp(&fname[len - suf_len], dsosuf)) {
+        /* wrong suffix */
+        ret = -EINVAL;
+        goto out;
+    }
+    if (access(fname, F_OK)) {
+        ret = -ENOENT;
+        goto out;
+    }
+
+    assert(QTAILQ_EMPTY(&dso_init_list));
+
+    g_module = g_module_open(fname, G_MODULE_BIND_LAZY | G_MODULE_BIND_LOCAL);
+    if (!g_module) {
+        fprintf(stderr, "Failed to open module: %s\n",
+                g_module_error());
+        ret = -EINVAL;
+        goto out;
+    }
+    if (!g_module_symbol(g_module, DSO_STAMP_FUN_STR, (gpointer *)&sym)) {
+        fprintf(stderr, "Failed to initialize module: %s\n",
+                fname);
+        /* Print some info if this is a QEMU module (but from different build),
+         * this will make debugging user problems easier. */
+        if (g_module_symbol(g_module, "qemu_module_dummy", (gpointer *)&sym)) {
+            fprintf(stderr,
+                    "Note: only modules from the same build can be loaded.\n");
+        }
+        g_module_close(g_module);
+        ret = -EINVAL;
+    } else {
+        QTAILQ_FOREACH(e, &dso_init_list, node) {
+            register_module_init(e->init, e->type);
+        }
+        ret = 0;
+    }
+
+    QTAILQ_FOREACH_SAFE(e, &dso_init_list, node, next) {
+        QTAILQ_REMOVE(&dso_init_list, e, node);
+        g_free(e);
+    }
+out:
+    return ret;
+}
+#endif
+
+static void module_load(module_init_type type)
+{
+#ifdef CONFIG_MODULES
+    char *fname = NULL;
+    const char **mp;
+    static const char *block_modules[] = {
+        CONFIG_BLOCK_MODULES
+    };
+    char *exec_dir;
+    char *dirs[3];
+    int i = 0;
+    int ret;
+
+    if (!g_module_supported()) {
+        fprintf(stderr, "Module is not supported by system.\n");
+        return;
+    }
+
+    switch (type) {
+    case MODULE_INIT_BLOCK:
+        mp = block_modules;
+        break;
+    default:
+        /* no other types have dynamic modules for now*/
+        return;
+    }
+
+    exec_dir = qemu_get_exec_dir();
+    dirs[i++] = g_strdup_printf("%s", CONFIG_QEMU_MODDIR);
+    dirs[i++] = g_strdup_printf("%s/..", exec_dir ? : "");
+    dirs[i++] = g_strdup_printf("%s", exec_dir ? : "");
+    assert(i == ARRAY_SIZE(dirs));
+    g_free(exec_dir);
+    exec_dir = NULL;
+
+    for ( ; *mp; mp++) {
+        for (i = 0; i < ARRAY_SIZE(dirs); i++) {
+            fname = g_strdup_printf("%s/%s%s", dirs[i], *mp, HOST_DSOSUF);
+            ret = module_load_file(fname);
+            g_free(fname);
+            fname = NULL;
+            /* Try loading until loaded a module file */
+            if (!ret) {
+                break;
+            }
+        }
+    }
+
+    for (i = 0; i < ARRAY_SIZE(dirs); i++) {
+        g_free(dirs[i]);
+    }
+
+#endif
+}
diff --git a/src/util/notify.c b/src/util/notify.c
new file mode 100644
index 0000000..f215dfc
--- /dev/null
+++ b/src/util/notify.c
@@ -0,0 +1,71 @@
+/*
+ * Notifier lists
+ *
+ * Copyright IBM, Corp. 2010
+ *
+ * Authors:
+ *  Anthony Liguori   <aliguori@us.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+
+#include "qemu-common.h"
+#include "qemu/notify.h"
+
+void notifier_list_init(NotifierList *list)
+{
+    QLIST_INIT(&list->notifiers);
+}
+
+void notifier_list_add(NotifierList *list, Notifier *notifier)
+{
+    QLIST_INSERT_HEAD(&list->notifiers, notifier, node);
+}
+
+void notifier_remove(Notifier *notifier)
+{
+    QLIST_REMOVE(notifier, node);
+}
+
+void notifier_list_notify(NotifierList *list, void *data)
+{
+    Notifier *notifier, *next;
+
+    QLIST_FOREACH_SAFE(notifier, &list->notifiers, node, next) {
+        notifier->notify(notifier, data);
+    }
+}
+
+void notifier_with_return_list_init(NotifierWithReturnList *list)
+{
+    QLIST_INIT(&list->notifiers);
+}
+
+void notifier_with_return_list_add(NotifierWithReturnList *list,
+                                   NotifierWithReturn *notifier)
+{
+    QLIST_INSERT_HEAD(&list->notifiers, notifier, node);
+}
+
+void notifier_with_return_remove(NotifierWithReturn *notifier)
+{
+    QLIST_REMOVE(notifier, node);
+}
+
+int notifier_with_return_list_notify(NotifierWithReturnList *list, void *data)
+{
+    NotifierWithReturn *notifier, *next;
+    int ret = 0;
+
+    QLIST_FOREACH_SAFE(notifier, &list->notifiers, node, next) {
+        ret = notifier->notify(notifier, data);
+        if (ret != 0) {
+            break;
+        }
+    }
+    return ret;
+}
diff --git a/src/util/osdep.c b/src/util/osdep.c
new file mode 100644
index 0000000..534b511
--- /dev/null
+++ b/src/util/osdep.c
@@ -0,0 +1,437 @@
+/*
+ * QEMU low level functions
+ *
+ * Copyright (c) 2003 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+/* Needed early for CONFIG_BSD etc. */
+#include "config-host.h"
+
+#if defined(CONFIG_MADVISE) || defined(CONFIG_POSIX_MADVISE)
+#include <sys/mman.h>
+#endif
+
+#ifdef CONFIG_SOLARIS
+#include <sys/types.h>
+#include <sys/statvfs.h>
+/* See MySQL bug #7156 (http://bugs.mysql.com/bug.php?id=7156) for
+   discussion about Solaris header problems */
+extern int madvise(caddr_t, size_t, int);
+#endif
+
+#include "qemu-common.h"
+#include "qemu/sockets.h"
+#include "qemu/error-report.h"
+#include "monitor/monitor.h"
+
+static bool fips_enabled = false;
+
+/* Starting on QEMU 2.5, qemu_hw_version() returns "2.5+" by default
+ * instead of QEMU_VERSION, so setting hw_version on MachineClass
+ * is no longer mandatory.
+ *
+ * Do NOT change this string, or it will break compatibility on all
+ * machine classes that don't set hw_version.
+ */
+static const char *hw_version = "2.5+";
+
+int socket_set_cork(int fd, int v)
+{
+#if defined(SOL_TCP) && defined(TCP_CORK)
+    return qemu_setsockopt(fd, SOL_TCP, TCP_CORK, &v, sizeof(v));
+#else
+    return 0;
+#endif
+}
+
+int socket_set_nodelay(int fd)
+{
+    int v = 1;
+    return qemu_setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &v, sizeof(v));
+}
+
+int qemu_madvise(void *addr, size_t len, int advice)
+{
+    if (advice == QEMU_MADV_INVALID) {
+        errno = EINVAL;
+        return -1;
+    }
+#if defined(CONFIG_MADVISE)
+    return madvise(addr, len, advice);
+#elif defined(CONFIG_POSIX_MADVISE)
+    return posix_madvise(addr, len, advice);
+#else
+    errno = EINVAL;
+    return -1;
+#endif
+}
+
+#ifndef _WIN32
+/*
+ * Dups an fd and sets the flags
+ */
+static int qemu_dup_flags(int fd, int flags)
+{
+    int ret;
+    int serrno;
+    int dup_flags;
+
+#ifdef F_DUPFD_CLOEXEC
+    ret = fcntl(fd, F_DUPFD_CLOEXEC, 0);
+#else
+    ret = dup(fd);
+    if (ret != -1) {
+        qemu_set_cloexec(ret);
+    }
+#endif
+    if (ret == -1) {
+        goto fail;
+    }
+
+    dup_flags = fcntl(ret, F_GETFL);
+    if (dup_flags == -1) {
+        goto fail;
+    }
+
+    if ((flags & O_SYNC) != (dup_flags & O_SYNC)) {
+        errno = EINVAL;
+        goto fail;
+    }
+
+    /* Set/unset flags that we can with fcntl */
+    if (fcntl(ret, F_SETFL, flags) == -1) {
+        goto fail;
+    }
+
+    /* Truncate the file in the cases that open() would truncate it */
+    if (flags & O_TRUNC ||
+            ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))) {
+        if (ftruncate(ret, 0) == -1) {
+            goto fail;
+        }
+    }
+
+    return ret;
+
+fail:
+    serrno = errno;
+    if (ret != -1) {
+        close(ret);
+    }
+    errno = serrno;
+    return -1;
+}
+
+static int qemu_parse_fdset(const char *param)
+{
+    return qemu_parse_fd(param);
+}
+#endif
+
+/*
+ * Opens a file with FD_CLOEXEC set
+ */
+int qemu_open(const char *name, int flags, ...)
+{
+    int ret;
+    int mode = 0;
+
+#ifndef _WIN32
+    const char *fdset_id_str;
+
+    /* Attempt dup of fd from fd set */
+    if (strstart(name, "/dev/fdset/", &fdset_id_str)) {
+        int64_t fdset_id;
+        int fd, dupfd;
+
+        fdset_id = qemu_parse_fdset(fdset_id_str);
+        if (fdset_id == -1) {
+            errno = EINVAL;
+            return -1;
+        }
+
+        fd = monitor_fdset_get_fd(fdset_id, flags);
+        if (fd == -1) {
+            return -1;
+        }
+
+        dupfd = qemu_dup_flags(fd, flags);
+        if (dupfd == -1) {
+            return -1;
+        }
+
+        ret = monitor_fdset_dup_fd_add(fdset_id, dupfd);
+        if (ret == -1) {
+            close(dupfd);
+            errno = EINVAL;
+            return -1;
+        }
+
+        return dupfd;
+    }
+#endif
+
+    if (flags & O_CREAT) {
+        va_list ap;
+
+        va_start(ap, flags);
+        mode = va_arg(ap, int);
+        va_end(ap);
+    }
+
+#ifdef O_CLOEXEC
+    ret = open(name, flags | O_CLOEXEC, mode);
+#else
+    ret = open(name, flags, mode);
+    if (ret >= 0) {
+        qemu_set_cloexec(ret);
+    }
+#endif
+
+#ifdef O_DIRECT
+    if (ret == -1 && errno == EINVAL && (flags & O_DIRECT)) {
+        error_report("file system may not support O_DIRECT");
+        errno = EINVAL; /* in case it was clobbered */
+    }
+#endif /* O_DIRECT */
+
+    return ret;
+}
+
+int qemu_close(int fd)
+{
+    int64_t fdset_id;
+
+    /* Close fd that was dup'd from an fdset */
+    fdset_id = monitor_fdset_dup_fd_find(fd);
+    if (fdset_id != -1) {
+        int ret;
+
+        ret = close(fd);
+        if (ret == 0) {
+            monitor_fdset_dup_fd_remove(fd);
+        }
+
+        return ret;
+    }
+
+    return close(fd);
+}
+
+/*
+ * A variant of write(2) which handles partial write.
+ *
+ * Return the number of bytes transferred.
+ * Set errno if fewer than `count' bytes are written.
+ *
+ * This function don't work with non-blocking fd's.
+ * Any of the possibilities with non-bloking fd's is bad:
+ *   - return a short write (then name is wrong)
+ *   - busy wait adding (errno == EAGAIN) to the loop
+ */
+ssize_t qemu_write_full(int fd, const void *buf, size_t count)
+{
+    ssize_t ret = 0;
+    ssize_t total = 0;
+
+    while (count) {
+        ret = write(fd, buf, count);
+        if (ret < 0) {
+            if (errno == EINTR)
+                continue;
+            break;
+        }
+
+        count -= ret;
+        buf += ret;
+        total += ret;
+    }
+
+    return total;
+}
+
+/*
+ * Opens a socket with FD_CLOEXEC set
+ */
+int qemu_socket(int domain, int type, int protocol)
+{
+    int ret;
+
+#ifdef SOCK_CLOEXEC
+    ret = socket(domain, type | SOCK_CLOEXEC, protocol);
+    if (ret != -1 || errno != EINVAL) {
+        return ret;
+    }
+#endif
+    ret = socket(domain, type, protocol);
+    if (ret >= 0) {
+        qemu_set_cloexec(ret);
+    }
+
+    return ret;
+}
+
+/*
+ * Accept a connection and set FD_CLOEXEC
+ */
+int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen)
+{
+    int ret;
+
+#ifdef CONFIG_ACCEPT4
+    ret = accept4(s, addr, addrlen, SOCK_CLOEXEC);
+    if (ret != -1 || errno != ENOSYS) {
+        return ret;
+    }
+#endif
+    ret = accept(s, addr, addrlen);
+    if (ret >= 0) {
+        qemu_set_cloexec(ret);
+    }
+
+    return ret;
+}
+
+void qemu_set_hw_version(const char *version)
+{
+    hw_version = version;
+}
+
+const char *qemu_hw_version(void)
+{
+    return hw_version;
+}
+
+void fips_set_state(bool requested)
+{
+#ifdef __linux__
+    if (requested) {
+        FILE *fds = fopen("/proc/sys/crypto/fips_enabled", "r");
+        if (fds != NULL) {
+            fips_enabled = (fgetc(fds) == '1');
+            fclose(fds);
+        }
+    }
+#else
+    fips_enabled = false;
+#endif /* __linux__ */
+
+#ifdef _FIPS_DEBUG
+    fprintf(stderr, "FIPS mode %s (requested %s)\n",
+	    (fips_enabled ? "enabled" : "disabled"),
+	    (requested ? "enabled" : "disabled"));
+#endif
+}
+
+bool fips_get_state(void)
+{
+    return fips_enabled;
+}
+
+#ifdef _WIN32
+static void socket_cleanup(void)
+{
+    WSACleanup();
+}
+#endif
+
+int socket_init(void)
+{
+#ifdef _WIN32
+    WSADATA Data;
+    int ret, err;
+
+    ret = WSAStartup(MAKEWORD(2, 2), &Data);
+    if (ret != 0) {
+        err = WSAGetLastError();
+        fprintf(stderr, "WSAStartup: %d\n", err);
+        return -1;
+    }
+    atexit(socket_cleanup);
+#endif
+    return 0;
+}
+
+#if !GLIB_CHECK_VERSION(2, 31, 0)
+/* Ensure that glib is running in multi-threaded mode
+ * Old versions of glib require explicit initialization.  Failure to do
+ * this results in the single-threaded code paths being taken inside
+ * glib.  For example, the g_slice allocator will not be thread-safe
+ * and cause crashes.
+ */
+static void __attribute__((constructor)) thread_init(void)
+{
+    if (!g_thread_supported()) {
+       g_thread_init(NULL);
+    }
+}
+#endif
+
+#ifndef CONFIG_IOVEC
+/* helper function for iov_send_recv() */
+static ssize_t
+readv_writev(int fd, const struct iovec *iov, int iov_cnt, bool do_write)
+{
+    unsigned i = 0;
+    ssize_t ret = 0;
+    while (i < iov_cnt) {
+        ssize_t r = do_write
+            ? write(fd, iov[i].iov_base, iov[i].iov_len)
+            : read(fd, iov[i].iov_base, iov[i].iov_len);
+        if (r > 0) {
+            ret += r;
+        } else if (!r) {
+            break;
+        } else if (errno == EINTR) {
+            continue;
+        } else {
+            /* else it is some "other" error,
+             * only return if there was no data processed. */
+            if (ret == 0) {
+                ret = -1;
+            }
+            break;
+        }
+        i++;
+    }
+    return ret;
+}
+
+ssize_t
+readv(int fd, const struct iovec *iov, int iov_cnt)
+{
+    return readv_writev(fd, iov, iov_cnt, false);
+}
+
+ssize_t
+writev(int fd, const struct iovec *iov, int iov_cnt)
+{
+    return readv_writev(fd, iov, iov_cnt, true);
+}
+#endif
diff --git a/src/util/oslib-posix.c b/src/util/oslib-posix.c
new file mode 100644
index 0000000..d25f671
--- /dev/null
+++ b/src/util/oslib-posix.c
@@ -0,0 +1,521 @@
+/*
+ * os-posix-lib.c
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2010 Red Hat, Inc.
+ *
+ * QEMU library functions on POSIX which are shared between QEMU and
+ * the QEMU tools.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/* The following block of code temporarily renames the daemon() function so the
+   compiler does not see the warning associated with it in stdlib.h on OSX */
+#ifdef __APPLE__
+#define daemon qemu_fake_daemon_function
+#include <stdlib.h>
+#undef daemon
+extern int daemon(int, int);
+#endif
+
+#if defined(__linux__) && (defined(__x86_64__) || defined(__arm__))
+   /* Use 2 MiB alignment so transparent hugepages can be used by KVM.
+      Valgrind does not support alignments larger than 1 MiB,
+      therefore we need special code which handles running on Valgrind. */
+#  define QEMU_VMALLOC_ALIGN (512 * 4096)
+#elif defined(__linux__) && defined(__s390x__)
+   /* Use 1 MiB (segment size) alignment so gmap can be used by KVM. */
+#  define QEMU_VMALLOC_ALIGN (256 * 4096)
+#else
+#  define QEMU_VMALLOC_ALIGN getpagesize()
+#endif
+
+#include <termios.h>
+#include <unistd.h>
+#include <termios.h>
+
+#include <glib/gprintf.h>
+
+#include "config-host.h"
+#include "sysemu/sysemu.h"
+#include "trace.h"
+#include "qemu/sockets.h"
+#include <sys/mman.h>
+#include <libgen.h>
+#include <setjmp.h>
+#include <sys/signal.h>
+
+#ifdef CONFIG_LINUX
+#include <sys/syscall.h>
+#endif
+
+#ifdef __FreeBSD__
+#include <sys/sysctl.h>
+#endif
+
+#include <qemu/mmap-alloc.h>
+
+int qemu_get_thread_id(void)
+{
+#if defined(__linux__)
+    return syscall(SYS_gettid);
+#else
+    return getpid();
+#endif
+}
+
+int qemu_daemon(int nochdir, int noclose)
+{
+    return daemon(nochdir, noclose);
+}
+
+void *qemu_oom_check(void *ptr)
+{
+    if (ptr == NULL) {
+        fprintf(stderr, "Failed to allocate memory: %s\n", strerror(errno));
+        abort();
+    }
+    return ptr;
+}
+
+void *qemu_try_memalign(size_t alignment, size_t size)
+{
+    void *ptr;
+
+    if (alignment < sizeof(void*)) {
+        alignment = sizeof(void*);
+    }
+
+#if defined(_POSIX_C_SOURCE) && !defined(__sun__)
+    int ret;
+    ret = posix_memalign(&ptr, alignment, size);
+    if (ret != 0) {
+        errno = ret;
+        ptr = NULL;
+    }
+#elif defined(CONFIG_BSD)
+    ptr = valloc(size);
+#else
+    ptr = memalign(alignment, size);
+#endif
+    trace_qemu_memalign(alignment, size, ptr);
+    return ptr;
+}
+
+void *qemu_memalign(size_t alignment, size_t size)
+{
+    return qemu_oom_check(qemu_try_memalign(alignment, size));
+}
+
+/* alloc shared memory pages */
+void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment)
+{
+    size_t align = QEMU_VMALLOC_ALIGN;
+    void *ptr = qemu_ram_mmap(-1, size, align, false);
+
+    if (ptr == MAP_FAILED) {
+        return NULL;
+    }
+
+    if (alignment) {
+        *alignment = align;
+    }
+
+    trace_qemu_anon_ram_alloc(size, ptr);
+    return ptr;
+}
+
+void qemu_vfree(void *ptr)
+{
+    trace_qemu_vfree(ptr);
+    free(ptr);
+}
+
+void qemu_anon_ram_free(void *ptr, size_t size)
+{
+    trace_qemu_anon_ram_free(ptr, size);
+    qemu_ram_munmap(ptr, size);
+}
+
+void qemu_set_block(int fd)
+{
+    int f;
+    f = fcntl(fd, F_GETFL);
+    fcntl(fd, F_SETFL, f & ~O_NONBLOCK);
+}
+
+void qemu_set_nonblock(int fd)
+{
+    int f;
+    f = fcntl(fd, F_GETFL);
+    fcntl(fd, F_SETFL, f | O_NONBLOCK);
+}
+
+int socket_set_fast_reuse(int fd)
+{
+    int val = 1, ret;
+
+    ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR,
+                     (const char *)&val, sizeof(val));
+
+    assert(ret == 0);
+
+    return ret;
+}
+
+void qemu_set_cloexec(int fd)
+{
+    int f;
+    f = fcntl(fd, F_GETFD);
+    fcntl(fd, F_SETFD, f | FD_CLOEXEC);
+}
+
+/*
+ * Creates a pipe with FD_CLOEXEC set on both file descriptors
+ */
+int qemu_pipe(int pipefd[2])
+{
+    int ret;
+
+#ifdef CONFIG_PIPE2
+    ret = pipe2(pipefd, O_CLOEXEC);
+    if (ret != -1 || errno != ENOSYS) {
+        return ret;
+    }
+#endif
+    ret = pipe(pipefd);
+    if (ret == 0) {
+        qemu_set_cloexec(pipefd[0]);
+        qemu_set_cloexec(pipefd[1]);
+    }
+
+    return ret;
+}
+
+int qemu_utimens(const char *path, const struct timespec *times)
+{
+    struct timeval tv[2], tv_now;
+    struct stat st;
+    int i;
+#ifdef CONFIG_UTIMENSAT
+    int ret;
+
+    ret = utimensat(AT_FDCWD, path, times, AT_SYMLINK_NOFOLLOW);
+    if (ret != -1 || errno != ENOSYS) {
+        return ret;
+    }
+#endif
+    /* Fallback: use utimes() instead of utimensat() */
+
+    /* happy if special cases */
+    if (times[0].tv_nsec == UTIME_OMIT && times[1].tv_nsec == UTIME_OMIT) {
+        return 0;
+    }
+    if (times[0].tv_nsec == UTIME_NOW && times[1].tv_nsec == UTIME_NOW) {
+        return utimes(path, NULL);
+    }
+
+    /* prepare for hard cases */
+    if (times[0].tv_nsec == UTIME_NOW || times[1].tv_nsec == UTIME_NOW) {
+        gettimeofday(&tv_now, NULL);
+    }
+    if (times[0].tv_nsec == UTIME_OMIT || times[1].tv_nsec == UTIME_OMIT) {
+        stat(path, &st);
+    }
+
+    for (i = 0; i < 2; i++) {
+        if (times[i].tv_nsec == UTIME_NOW) {
+            tv[i].tv_sec = tv_now.tv_sec;
+            tv[i].tv_usec = tv_now.tv_usec;
+        } else if (times[i].tv_nsec == UTIME_OMIT) {
+            tv[i].tv_sec = (i == 0) ? st.st_atime : st.st_mtime;
+            tv[i].tv_usec = 0;
+        } else {
+            tv[i].tv_sec = times[i].tv_sec;
+            tv[i].tv_usec = times[i].tv_nsec / 1000;
+        }
+    }
+
+    return utimes(path, &tv[0]);
+}
+
+char *
+qemu_get_local_state_pathname(const char *relative_pathname)
+{
+    return g_strdup_printf("%s/%s", CONFIG_QEMU_LOCALSTATEDIR,
+                           relative_pathname);
+}
+
+void qemu_set_tty_echo(int fd, bool echo)
+{
+    struct termios tty;
+
+    tcgetattr(fd, &tty);
+
+    if (echo) {
+        tty.c_lflag |= ECHO | ECHONL | ICANON | IEXTEN;
+    } else {
+        tty.c_lflag &= ~(ECHO | ECHONL | ICANON | IEXTEN);
+    }
+
+    tcsetattr(fd, TCSANOW, &tty);
+}
+
+static char exec_dir[PATH_MAX];
+
+void qemu_init_exec_dir(const char *argv0)
+{
+    char *dir;
+    char *p = NULL;
+    char buf[PATH_MAX];
+
+    assert(!exec_dir[0]);
+
+#if defined(__linux__)
+    {
+        int len;
+        len = readlink("/proc/self/exe", buf, sizeof(buf) - 1);
+        if (len > 0) {
+            buf[len] = 0;
+            p = buf;
+        }
+    }
+#elif defined(__FreeBSD__)
+    {
+        static int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
+        size_t len = sizeof(buf) - 1;
+
+        *buf = '\0';
+        if (!sysctl(mib, ARRAY_SIZE(mib), buf, &len, NULL, 0) &&
+            *buf) {
+            buf[sizeof(buf) - 1] = '\0';
+            p = buf;
+        }
+    }
+#endif
+    /* If we don't have any way of figuring out the actual executable
+       location then try argv[0].  */
+    if (!p) {
+        if (!argv0) {
+            return;
+        }
+        p = realpath(argv0, buf);
+        if (!p) {
+            return;
+        }
+    }
+    dir = dirname(p);
+
+    pstrcpy(exec_dir, sizeof(exec_dir), dir);
+}
+
+char *qemu_get_exec_dir(void)
+{
+    return g_strdup(exec_dir);
+}
+
+static sigjmp_buf sigjump;
+
+static void sigbus_handler(int signal)
+{
+    siglongjmp(sigjump, 1);
+}
+
+void os_mem_prealloc(int fd, char *area, size_t memory)
+{
+    int ret;
+    struct sigaction act, oldact;
+    sigset_t set, oldset;
+
+    memset(&act, 0, sizeof(act));
+    act.sa_handler = &sigbus_handler;
+    act.sa_flags = 0;
+
+    ret = sigaction(SIGBUS, &act, &oldact);
+    if (ret) {
+        perror("os_mem_prealloc: failed to install signal handler");
+        exit(1);
+    }
+
+    /* unblock SIGBUS */
+    sigemptyset(&set);
+    sigaddset(&set, SIGBUS);
+    pthread_sigmask(SIG_UNBLOCK, &set, &oldset);
+
+    if (sigsetjmp(sigjump, 1)) {
+        fprintf(stderr, "os_mem_prealloc: Insufficient free host memory "
+                        "pages available to allocate guest RAM\n");
+        exit(1);
+    } else {
+        int i;
+        size_t hpagesize = qemu_fd_getpagesize(fd);
+        size_t numpages = DIV_ROUND_UP(memory, hpagesize);
+
+        /* MAP_POPULATE silently ignores failures */
+        for (i = 0; i < numpages; i++) {
+            memset(area + (hpagesize * i), 0, 1);
+        }
+
+        ret = sigaction(SIGBUS, &oldact, NULL);
+        if (ret) {
+            perror("os_mem_prealloc: failed to reinstall signal handler");
+            exit(1);
+        }
+
+        pthread_sigmask(SIG_SETMASK, &oldset, NULL);
+    }
+}
+
+
+static struct termios oldtty;
+
+static void term_exit(void)
+{
+    tcsetattr(0, TCSANOW, &oldtty);
+}
+
+static void term_init(void)
+{
+    struct termios tty;
+
+    tcgetattr(0, &tty);
+    oldtty = tty;
+
+    tty.c_iflag &= ~(IGNBRK|BRKINT|PARMRK|ISTRIP
+                          |INLCR|IGNCR|ICRNL|IXON);
+    tty.c_oflag |= OPOST;
+    tty.c_lflag &= ~(ECHO|ECHONL|ICANON|IEXTEN);
+    tty.c_cflag &= ~(CSIZE|PARENB);
+    tty.c_cflag |= CS8;
+    tty.c_cc[VMIN] = 1;
+    tty.c_cc[VTIME] = 0;
+
+    tcsetattr(0, TCSANOW, &tty);
+
+    atexit(term_exit);
+}
+
+int qemu_read_password(char *buf, int buf_size)
+{
+    uint8_t ch;
+    int i, ret;
+
+    printf("password: ");
+    fflush(stdout);
+    term_init();
+    i = 0;
+    for (;;) {
+        ret = read(0, &ch, 1);
+        if (ret == -1) {
+            if (errno == EAGAIN || errno == EINTR) {
+                continue;
+            } else {
+                break;
+            }
+        } else if (ret == 0) {
+            ret = -1;
+            break;
+        } else {
+            if (ch == '\r' ||
+                ch == '\n') {
+                ret = 0;
+                break;
+            }
+            if (i < (buf_size - 1)) {
+                buf[i++] = ch;
+            }
+        }
+    }
+    term_exit();
+    buf[i] = '\0';
+    printf("\n");
+    return ret;
+}
+
+
+pid_t qemu_fork(Error **errp)
+{
+    sigset_t oldmask, newmask;
+    struct sigaction sig_action;
+    int saved_errno;
+    pid_t pid;
+
+    /*
+     * Need to block signals now, so that child process can safely
+     * kill off caller's signal handlers without a race.
+     */
+    sigfillset(&newmask);
+    if (pthread_sigmask(SIG_SETMASK, &newmask, &oldmask) != 0) {
+        error_setg_errno(errp, errno,
+                         "cannot block signals");
+        return -1;
+    }
+
+    pid = fork();
+    saved_errno = errno;
+
+    if (pid < 0) {
+        /* attempt to restore signal mask, but ignore failure, to
+         * avoid obscuring the fork failure */
+        (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
+        error_setg_errno(errp, saved_errno,
+                         "cannot fork child process");
+        errno = saved_errno;
+        return -1;
+    } else if (pid) {
+        /* parent process */
+
+        /* Restore our original signal mask now that the child is
+         * safely running. Only documented failures are EFAULT (not
+         * possible, since we are using just-grabbed mask) or EINVAL
+         * (not possible, since we are using correct arguments).  */
+        (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL);
+    } else {
+        /* child process */
+        size_t i;
+
+        /* Clear out all signal handlers from parent so nothing
+         * unexpected can happen in our child once we unblock
+         * signals */
+        sig_action.sa_handler = SIG_DFL;
+        sig_action.sa_flags = 0;
+        sigemptyset(&sig_action.sa_mask);
+
+        for (i = 1; i < NSIG; i++) {
+            /* Only possible errors are EFAULT or EINVAL The former
+             * won't happen, the latter we expect, so no need to check
+             * return value */
+            (void)sigaction(i, &sig_action, NULL);
+        }
+
+        /* Unmask all signals in child, since we've no idea what the
+         * caller's done with their signal mask and don't want to
+         * propagate that to children */
+        sigemptyset(&newmask);
+        if (pthread_sigmask(SIG_SETMASK, &newmask, NULL) != 0) {
+            Error *local_err = NULL;
+            error_setg_errno(&local_err, errno,
+                             "cannot unblock signals");
+            error_report_err(local_err);
+            _exit(1);
+        }
+    }
+    return pid;
+}
diff --git a/src/util/oslib-win32.c b/src/util/oslib-win32.c
new file mode 100644
index 0000000..6a47019
--- /dev/null
+++ b/src/util/oslib-win32.c
@@ -0,0 +1,507 @@
+/*
+ * os-win32.c
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2010 Red Hat, Inc.
+ *
+ * QEMU library functions for win32 which are shared between QEMU and
+ * the QEMU tools.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ * The implementation of g_poll (functions poll_rest, g_poll) at the end of
+ * this file are based on code from GNOME glib-2 and use a different license,
+ * see the license comment there.
+ */
+#include <windows.h>
+#include <glib.h>
+#include <stdlib.h>
+#include "config-host.h"
+#include "sysemu/sysemu.h"
+#include "qemu/main-loop.h"
+#include "trace.h"
+#include "qemu/sockets.h"
+
+/* this must come after including "trace.h" */
+#include <shlobj.h>
+
+void *qemu_oom_check(void *ptr)
+{
+    if (ptr == NULL) {
+        fprintf(stderr, "Failed to allocate memory: %lu\n", GetLastError());
+        abort();
+    }
+    return ptr;
+}
+
+void *qemu_try_memalign(size_t alignment, size_t size)
+{
+    void *ptr;
+
+    if (!size) {
+        abort();
+    }
+    ptr = VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
+    trace_qemu_memalign(alignment, size, ptr);
+    return ptr;
+}
+
+void *qemu_memalign(size_t alignment, size_t size)
+{
+    return qemu_oom_check(qemu_try_memalign(alignment, size));
+}
+
+void *qemu_anon_ram_alloc(size_t size, uint64_t *align)
+{
+    void *ptr;
+
+    /* FIXME: this is not exactly optimal solution since VirtualAlloc
+       has 64Kb granularity, but at least it guarantees us that the
+       memory is page aligned. */
+    ptr = VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE);
+    trace_qemu_anon_ram_alloc(size, ptr);
+    return ptr;
+}
+
+void qemu_vfree(void *ptr)
+{
+    trace_qemu_vfree(ptr);
+    if (ptr) {
+        VirtualFree(ptr, 0, MEM_RELEASE);
+    }
+}
+
+void qemu_anon_ram_free(void *ptr, size_t size)
+{
+    trace_qemu_anon_ram_free(ptr, size);
+    if (ptr) {
+        VirtualFree(ptr, 0, MEM_RELEASE);
+    }
+}
+
+#ifndef CONFIG_LOCALTIME_R
+/* FIXME: add proper locking */
+struct tm *gmtime_r(const time_t *timep, struct tm *result)
+{
+    struct tm *p = gmtime(timep);
+    memset(result, 0, sizeof(*result));
+    if (p) {
+        *result = *p;
+        p = result;
+    }
+    return p;
+}
+
+/* FIXME: add proper locking */
+struct tm *localtime_r(const time_t *timep, struct tm *result)
+{
+    struct tm *p = localtime(timep);
+    memset(result, 0, sizeof(*result));
+    if (p) {
+        *result = *p;
+        p = result;
+    }
+    return p;
+}
+#endif /* CONFIG_LOCALTIME_R */
+
+void qemu_set_block(int fd)
+{
+    unsigned long opt = 0;
+    WSAEventSelect(fd, NULL, 0);
+    ioctlsocket(fd, FIONBIO, &opt);
+}
+
+void qemu_set_nonblock(int fd)
+{
+    unsigned long opt = 1;
+    ioctlsocket(fd, FIONBIO, &opt);
+    qemu_fd_register(fd);
+}
+
+int socket_set_fast_reuse(int fd)
+{
+    /* Enabling the reuse of an endpoint that was used by a socket still in
+     * TIME_WAIT state is usually performed by setting SO_REUSEADDR. On Windows
+     * fast reuse is the default and SO_REUSEADDR does strange things. So we
+     * don't have to do anything here. More info can be found at:
+     * http://msdn.microsoft.com/en-us/library/windows/desktop/ms740621.aspx */
+    return 0;
+}
+
+int inet_aton(const char *cp, struct in_addr *ia)
+{
+    uint32_t addr = inet_addr(cp);
+    if (addr == 0xffffffff) {
+        return 0;
+    }
+    ia->s_addr = addr;
+    return 1;
+}
+
+void qemu_set_cloexec(int fd)
+{
+}
+
+/* Offset between 1/1/1601 and 1/1/1970 in 100 nanosec units */
+#define _W32_FT_OFFSET (116444736000000000ULL)
+
+int qemu_gettimeofday(qemu_timeval *tp)
+{
+  union {
+    unsigned long long ns100; /*time since 1 Jan 1601 in 100ns units */
+    FILETIME ft;
+  }  _now;
+
+  if(tp) {
+      GetSystemTimeAsFileTime (&_now.ft);
+      tp->tv_usec=(long)((_now.ns100 / 10ULL) % 1000000ULL );
+      tp->tv_sec= (long)((_now.ns100 - _W32_FT_OFFSET) / 10000000ULL);
+  }
+  /* Always return 0 as per Open Group Base Specifications Issue 6.
+     Do not set errno on error.  */
+  return 0;
+}
+
+int qemu_get_thread_id(void)
+{
+    return GetCurrentThreadId();
+}
+
+char *
+qemu_get_local_state_pathname(const char *relative_pathname)
+{
+    HRESULT result;
+    char base_path[MAX_PATH+1] = "";
+
+    result = SHGetFolderPath(NULL, CSIDL_COMMON_APPDATA, NULL,
+                             /* SHGFP_TYPE_CURRENT */ 0, base_path);
+    if (result != S_OK) {
+        /* misconfigured environment */
+        g_critical("CSIDL_COMMON_APPDATA unavailable: %ld", (long)result);
+        abort();
+    }
+    return g_strdup_printf("%s" G_DIR_SEPARATOR_S "%s", base_path,
+                           relative_pathname);
+}
+
+void qemu_set_tty_echo(int fd, bool echo)
+{
+    HANDLE handle = (HANDLE)_get_osfhandle(fd);
+    DWORD dwMode = 0;
+
+    if (handle == INVALID_HANDLE_VALUE) {
+        return;
+    }
+
+    GetConsoleMode(handle, &dwMode);
+
+    if (echo) {
+        SetConsoleMode(handle, dwMode | ENABLE_ECHO_INPUT | ENABLE_LINE_INPUT);
+    } else {
+        SetConsoleMode(handle,
+                       dwMode & ~(ENABLE_ECHO_INPUT | ENABLE_LINE_INPUT));
+    }
+}
+
+static char exec_dir[PATH_MAX];
+
+void qemu_init_exec_dir(const char *argv0)
+{
+
+    char *p;
+    char buf[MAX_PATH];
+    DWORD len;
+
+    len = GetModuleFileName(NULL, buf, sizeof(buf) - 1);
+    if (len == 0) {
+        return;
+    }
+
+    buf[len] = 0;
+    p = buf + len - 1;
+    while (p != buf && *p != '\\') {
+        p--;
+    }
+    *p = 0;
+    if (access(buf, R_OK) == 0) {
+        pstrcpy(exec_dir, sizeof(exec_dir), buf);
+    }
+}
+
+char *qemu_get_exec_dir(void)
+{
+    return g_strdup(exec_dir);
+}
+
+/*
+ * The original implementation of g_poll from glib has a problem on Windows
+ * when using timeouts < 10 ms.
+ *
+ * Whenever g_poll is called with timeout < 10 ms, it does a quick poll instead
+ * of wait. This causes significant performance degradation of QEMU.
+ *
+ * The following code is a copy of the original code from glib/gpoll.c
+ * (glib commit 20f4d1820b8d4d0fc4447188e33efffd6d4a88d8 from 2014-02-19).
+ * Some debug code was removed and the code was reformatted.
+ * All other code modifications are marked with 'QEMU'.
+ */
+
+/*
+ * gpoll.c: poll(2) abstraction
+ * Copyright 1998 Owen Taylor
+ * Copyright 2008 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+static int poll_rest(gboolean poll_msgs, HANDLE *handles, gint nhandles,
+                     GPollFD *fds, guint nfds, gint timeout)
+{
+    DWORD ready;
+    GPollFD *f;
+    int recursed_result;
+
+    if (poll_msgs) {
+        /* Wait for either messages or handles
+         * -> Use MsgWaitForMultipleObjectsEx
+         */
+        ready = MsgWaitForMultipleObjectsEx(nhandles, handles, timeout,
+                                            QS_ALLINPUT, MWMO_ALERTABLE);
+
+        if (ready == WAIT_FAILED) {
+            gchar *emsg = g_win32_error_message(GetLastError());
+            g_warning("MsgWaitForMultipleObjectsEx failed: %s", emsg);
+            g_free(emsg);
+        }
+    } else if (nhandles == 0) {
+        /* No handles to wait for, just the timeout */
+        if (timeout == INFINITE) {
+            ready = WAIT_FAILED;
+        } else {
+            SleepEx(timeout, TRUE);
+            ready = WAIT_TIMEOUT;
+        }
+    } else {
+        /* Wait for just handles
+         * -> Use WaitForMultipleObjectsEx
+         */
+        ready =
+            WaitForMultipleObjectsEx(nhandles, handles, FALSE, timeout, TRUE);
+        if (ready == WAIT_FAILED) {
+            gchar *emsg = g_win32_error_message(GetLastError());
+            g_warning("WaitForMultipleObjectsEx failed: %s", emsg);
+            g_free(emsg);
+        }
+    }
+
+    if (ready == WAIT_FAILED) {
+        return -1;
+    } else if (ready == WAIT_TIMEOUT || ready == WAIT_IO_COMPLETION) {
+        return 0;
+    } else if (poll_msgs && ready == WAIT_OBJECT_0 + nhandles) {
+        for (f = fds; f < &fds[nfds]; ++f) {
+            if (f->fd == G_WIN32_MSG_HANDLE && f->events & G_IO_IN) {
+                f->revents |= G_IO_IN;
+            }
+        }
+
+        /* If we have a timeout, or no handles to poll, be satisfied
+         * with just noticing we have messages waiting.
+         */
+        if (timeout != 0 || nhandles == 0) {
+            return 1;
+        }
+
+        /* If no timeout and handles to poll, recurse to poll them,
+         * too.
+         */
+        recursed_result = poll_rest(FALSE, handles, nhandles, fds, nfds, 0);
+        return (recursed_result == -1) ? -1 : 1 + recursed_result;
+    } else if (/* QEMU: removed the following unneeded statement which causes
+                * a compiler warning: ready >= WAIT_OBJECT_0 && */
+               ready < WAIT_OBJECT_0 + nhandles) {
+        for (f = fds; f < &fds[nfds]; ++f) {
+            if ((HANDLE) f->fd == handles[ready - WAIT_OBJECT_0]) {
+                f->revents = f->events;
+            }
+        }
+
+        /* If no timeout and polling several handles, recurse to poll
+         * the rest of them.
+         */
+        if (timeout == 0 && nhandles > 1) {
+            /* Remove the handle that fired */
+            int i;
+            if (ready < nhandles - 1) {
+                for (i = ready - WAIT_OBJECT_0 + 1; i < nhandles; i++) {
+                    handles[i-1] = handles[i];
+                }
+            }
+            nhandles--;
+            recursed_result = poll_rest(FALSE, handles, nhandles, fds, nfds, 0);
+            return (recursed_result == -1) ? -1 : 1 + recursed_result;
+        }
+        return 1;
+    }
+
+    return 0;
+}
+
+gint g_poll(GPollFD *fds, guint nfds, gint timeout)
+{
+    HANDLE handles[MAXIMUM_WAIT_OBJECTS];
+    gboolean poll_msgs = FALSE;
+    GPollFD *f;
+    gint nhandles = 0;
+    int retval;
+
+    for (f = fds; f < &fds[nfds]; ++f) {
+        if (f->fd == G_WIN32_MSG_HANDLE && (f->events & G_IO_IN)) {
+            poll_msgs = TRUE;
+        } else if (f->fd > 0) {
+            /* Don't add the same handle several times into the array, as
+             * docs say that is not allowed, even if it actually does seem
+             * to work.
+             */
+            gint i;
+
+            for (i = 0; i < nhandles; i++) {
+                if (handles[i] == (HANDLE) f->fd) {
+                    break;
+                }
+            }
+
+            if (i == nhandles) {
+                if (nhandles == MAXIMUM_WAIT_OBJECTS) {
+                    g_warning("Too many handles to wait for!\n");
+                    break;
+                } else {
+                    handles[nhandles++] = (HANDLE) f->fd;
+                }
+            }
+        }
+    }
+
+    for (f = fds; f < &fds[nfds]; ++f) {
+        f->revents = 0;
+    }
+
+    if (timeout == -1) {
+        timeout = INFINITE;
+    }
+
+    /* Polling for several things? */
+    if (nhandles > 1 || (nhandles > 0 && poll_msgs)) {
+        /* First check if one or several of them are immediately
+         * available
+         */
+        retval = poll_rest(poll_msgs, handles, nhandles, fds, nfds, 0);
+
+        /* If not, and we have a significant timeout, poll again with
+         * timeout then. Note that this will return indication for only
+         * one event, or only for messages. We ignore timeouts less than
+         * ten milliseconds as they are mostly pointless on Windows, the
+         * MsgWaitForMultipleObjectsEx() call will timeout right away
+         * anyway.
+         *
+         * Modification for QEMU: replaced timeout >= 10 by timeout > 0.
+         */
+        if (retval == 0 && (timeout == INFINITE || timeout > 0)) {
+            retval = poll_rest(poll_msgs, handles, nhandles,
+                               fds, nfds, timeout);
+        }
+    } else {
+        /* Just polling for one thing, so no need to check first if
+         * available immediately
+         */
+        retval = poll_rest(poll_msgs, handles, nhandles, fds, nfds, timeout);
+    }
+
+    if (retval == -1) {
+        for (f = fds; f < &fds[nfds]; ++f) {
+            f->revents = 0;
+        }
+    }
+
+    return retval;
+}
+
+int getpagesize(void)
+{
+    SYSTEM_INFO system_info;
+
+    GetSystemInfo(&system_info);
+    return system_info.dwPageSize;
+}
+
+void os_mem_prealloc(int fd, char *area, size_t memory)
+{
+    int i;
+    size_t pagesize = getpagesize();
+
+    memory = (memory + pagesize - 1) & -pagesize;
+    for (i = 0; i < memory / pagesize; i++) {
+        memset(area + pagesize * i, 0, 1);
+    }
+}
+
+
+/* XXX: put correct support for win32 */
+int qemu_read_password(char *buf, int buf_size)
+{
+    int c, i;
+
+    printf("Password: ");
+    fflush(stdout);
+    i = 0;
+    for (;;) {
+        c = getchar();
+        if (c < 0) {
+            buf[i] = '\0';
+            return -1;
+        } else if (c == '\n') {
+            break;
+        } else if (i < (buf_size - 1)) {
+            buf[i++] = c;
+        }
+    }
+    buf[i] = '\0';
+    return 0;
+}
+
+
+pid_t qemu_fork(Error **errp)
+{
+    errno = ENOSYS;
+    error_setg_errno(errp, errno,
+                     "cannot fork child process");
+    return -1;
+}
diff --git a/src/util/path.c b/src/util/path.c
new file mode 100644
index 0000000..4e4877e
--- /dev/null
+++ b/src/util/path.c
@@ -0,0 +1,181 @@
+/* Code to mangle pathnames into those matching a given prefix.
+   eg. open("/lib/foo.so") => open("/usr/gnemul/i386-linux/lib/foo.so");
+
+   The assumption is that this area does not change.
+*/
+#include <sys/types.h>
+#include <sys/param.h>
+#include <dirent.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <stdio.h>
+#include "qemu-common.h"
+
+struct pathelem
+{
+    /* Name of this, eg. lib */
+    char *name;
+    /* Full path name, eg. /usr/gnemul/x86-linux/lib. */
+    char *pathname;
+    struct pathelem *parent;
+    /* Children */
+    unsigned int num_entries;
+    struct pathelem *entries[0];
+};
+
+static struct pathelem *base;
+
+/* First N chars of S1 match S2, and S2 is N chars long. */
+static int strneq(const char *s1, unsigned int n, const char *s2)
+{
+    unsigned int i;
+
+    for (i = 0; i < n; i++)
+        if (s1[i] != s2[i])
+            return 0;
+    return s2[i] == 0;
+}
+
+static struct pathelem *add_entry(struct pathelem *root, const char *name,
+                                  unsigned type);
+
+static struct pathelem *new_entry(const char *root,
+                                  struct pathelem *parent,
+                                  const char *name)
+{
+    struct pathelem *new = g_malloc(sizeof(*new));
+    new->name = g_strdup(name);
+    new->pathname = g_strdup_printf("%s/%s", root, name);
+    new->num_entries = 0;
+    return new;
+}
+
+#define streq(a,b) (strcmp((a), (b)) == 0)
+
+/* Not all systems provide this feature */
+#if defined(DT_DIR) && defined(DT_UNKNOWN) && defined(DT_LNK)
+# define dirent_type(dirent) ((dirent)->d_type)
+# define is_dir_maybe(type) \
+    ((type) == DT_DIR || (type) == DT_UNKNOWN || (type) == DT_LNK)
+#else
+# define dirent_type(dirent) (1)
+# define is_dir_maybe(type)  (type)
+#endif
+
+static struct pathelem *add_dir_maybe(struct pathelem *path)
+{
+    DIR *dir;
+
+    if ((dir = opendir(path->pathname)) != NULL) {
+        struct dirent *dirent;
+
+        while ((dirent = readdir(dir)) != NULL) {
+            if (!streq(dirent->d_name,".") && !streq(dirent->d_name,"..")){
+                path = add_entry(path, dirent->d_name, dirent_type(dirent));
+            }
+        }
+        closedir(dir);
+    }
+    return path;
+}
+
+static struct pathelem *add_entry(struct pathelem *root, const char *name,
+                                  unsigned type)
+{
+    struct pathelem **e;
+
+    root->num_entries++;
+
+    root = g_realloc(root, sizeof(*root)
+                   + sizeof(root->entries[0])*root->num_entries);
+    e = &root->entries[root->num_entries-1];
+
+    *e = new_entry(root->pathname, root, name);
+    if (is_dir_maybe(type)) {
+        *e = add_dir_maybe(*e);
+    }
+
+    return root;
+}
+
+/* This needs to be done after tree is stabilized (ie. no more reallocs!). */
+static void set_parents(struct pathelem *child, struct pathelem *parent)
+{
+    unsigned int i;
+
+    child->parent = parent;
+    for (i = 0; i < child->num_entries; i++)
+        set_parents(child->entries[i], child);
+}
+
+/* FIXME: Doesn't handle DIR/.. where DIR is not in emulated dir. */
+static const char *
+follow_path(const struct pathelem *cursor, const char *name)
+{
+    unsigned int i, namelen;
+
+    name += strspn(name, "/");
+    namelen = strcspn(name, "/");
+
+    if (namelen == 0)
+        return cursor->pathname;
+
+    if (strneq(name, namelen, ".."))
+        return follow_path(cursor->parent, name + namelen);
+
+    if (strneq(name, namelen, "."))
+        return follow_path(cursor, name + namelen);
+
+    for (i = 0; i < cursor->num_entries; i++)
+        if (strneq(name, namelen, cursor->entries[i]->name))
+            return follow_path(cursor->entries[i], name + namelen);
+
+    /* Not found */
+    return NULL;
+}
+
+void init_paths(const char *prefix)
+{
+    char pref_buf[PATH_MAX];
+
+    if (prefix[0] == '\0' ||
+        !strcmp(prefix, "/"))
+        return;
+
+    if (prefix[0] != '/') {
+        char *cwd = getcwd(NULL, 0);
+        size_t pref_buf_len = sizeof(pref_buf);
+
+        if (!cwd)
+            abort();
+        pstrcpy(pref_buf, sizeof(pref_buf), cwd);
+        pstrcat(pref_buf, pref_buf_len, "/");
+        pstrcat(pref_buf, pref_buf_len, prefix);
+        free(cwd);
+    } else
+        pstrcpy(pref_buf, sizeof(pref_buf), prefix + 1);
+
+    base = new_entry("", NULL, pref_buf);
+    base = add_dir_maybe(base);
+    if (base->num_entries == 0) {
+        g_free(base->pathname);
+        g_free(base->name);
+        g_free(base);
+        base = NULL;
+    } else {
+        set_parents(base, base);
+    }
+}
+
+/* Look for path in emulation dir, otherwise return name. */
+const char *path(const char *name)
+{
+    /* Only do absolute paths: quick and dirty, but should mostly be OK.
+       Could do relative by tracking cwd. */
+    if (!base || !name || name[0] != '/')
+        return name;
+
+    return follow_path(base, name) ?: name;
+}
diff --git a/src/util/qemu-config.c b/src/util/qemu-config.c
new file mode 100644
index 0000000..687fd34
--- /dev/null
+++ b/src/util/qemu-config.c
@@ -0,0 +1,573 @@
+#include "qemu-common.h"
+#include "qemu/error-report.h"
+#include "qemu/option.h"
+#include "qemu/config-file.h"
+#include "qapi/error.h"
+#include "qmp-commands.h"
+
+static QemuOptsList *vm_config_groups[48];
+static QemuOptsList *drive_config_groups[4];
+
+static QemuOptsList *find_list(QemuOptsList **lists, const char *group,
+                               Error **errp)
+{
+    int i;
+
+    for (i = 0; lists[i] != NULL; i++) {
+        if (strcmp(lists[i]->name, group) == 0)
+            break;
+    }
+    if (lists[i] == NULL) {
+        error_setg(errp, "There is no option group '%s'", group);
+    }
+    return lists[i];
+}
+
+QemuOptsList *qemu_find_opts(const char *group)
+{
+    QemuOptsList *ret;
+    Error *local_err = NULL;
+
+    ret = find_list(vm_config_groups, group, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+    }
+
+    return ret;
+}
+
+QemuOpts *qemu_find_opts_singleton(const char *group)
+{
+    QemuOptsList *list;
+    QemuOpts *opts;
+
+    list = qemu_find_opts(group);
+    assert(list);
+    opts = qemu_opts_find(list, NULL);
+    if (!opts) {
+        opts = qemu_opts_create(list, NULL, 0, &error_abort);
+    }
+    return opts;
+}
+
+static CommandLineParameterInfoList *query_option_descs(const QemuOptDesc *desc)
+{
+    CommandLineParameterInfoList *param_list = NULL, *entry;
+    CommandLineParameterInfo *info;
+    int i;
+
+    for (i = 0; desc[i].name != NULL; i++) {
+        info = g_malloc0(sizeof(*info));
+        info->name = g_strdup(desc[i].name);
+
+        switch (desc[i].type) {
+        case QEMU_OPT_STRING:
+            info->type = COMMAND_LINE_PARAMETER_TYPE_STRING;
+            break;
+        case QEMU_OPT_BOOL:
+            info->type = COMMAND_LINE_PARAMETER_TYPE_BOOLEAN;
+            break;
+        case QEMU_OPT_NUMBER:
+            info->type = COMMAND_LINE_PARAMETER_TYPE_NUMBER;
+            break;
+        case QEMU_OPT_SIZE:
+            info->type = COMMAND_LINE_PARAMETER_TYPE_SIZE;
+            break;
+        }
+
+        if (desc[i].help) {
+            info->has_help = true;
+            info->help = g_strdup(desc[i].help);
+        }
+        if (desc[i].def_value_str) {
+            info->has_q_default = true;
+            info->q_default = g_strdup(desc[i].def_value_str);
+        }
+
+        entry = g_malloc0(sizeof(*entry));
+        entry->value = info;
+        entry->next = param_list;
+        param_list = entry;
+    }
+
+    return param_list;
+}
+
+/* remove repeated entry from the info list */
+static void cleanup_infolist(CommandLineParameterInfoList *head)
+{
+    CommandLineParameterInfoList *pre_entry, *cur, *del_entry;
+
+    cur = head;
+    while (cur->next) {
+        pre_entry = head;
+        while (pre_entry != cur->next) {
+            if (!strcmp(pre_entry->value->name, cur->next->value->name)) {
+                del_entry = cur->next;
+                cur->next = cur->next->next;
+                g_free(del_entry);
+                break;
+            }
+            pre_entry = pre_entry->next;
+        }
+        cur = cur->next;
+    }
+}
+
+/* merge the description items of two parameter infolists */
+static void connect_infolist(CommandLineParameterInfoList *head,
+                             CommandLineParameterInfoList *new)
+{
+    CommandLineParameterInfoList *cur;
+
+    cur = head;
+    while (cur->next) {
+        cur = cur->next;
+    }
+    cur->next = new;
+}
+
+/* access all the local QemuOptsLists for drive option */
+static CommandLineParameterInfoList *get_drive_infolist(void)
+{
+    CommandLineParameterInfoList *head = NULL, *cur;
+    int i;
+
+    for (i = 0; drive_config_groups[i] != NULL; i++) {
+        if (!head) {
+            head = query_option_descs(drive_config_groups[i]->desc);
+        } else {
+            cur = query_option_descs(drive_config_groups[i]->desc);
+            connect_infolist(head, cur);
+        }
+    }
+    cleanup_infolist(head);
+
+    return head;
+}
+
+/* restore machine options that are now machine's properties */
+static QemuOptsList machine_opts = {
+    .merge_lists = true,
+    .head = QTAILQ_HEAD_INITIALIZER(machine_opts.head),
+    .desc = {
+        {
+            .name = "type",
+            .type = QEMU_OPT_STRING,
+            .help = "emulated machine"
+        },{
+            .name = "accel",
+            .type = QEMU_OPT_STRING,
+            .help = "accelerator list",
+        },{
+            .name = "kernel_irqchip",
+            .type = QEMU_OPT_BOOL,
+            .help = "use KVM in-kernel irqchip",
+        },{
+            .name = "kvm_shadow_mem",
+            .type = QEMU_OPT_SIZE,
+            .help = "KVM shadow MMU size",
+        },{
+            .name = "kernel",
+            .type = QEMU_OPT_STRING,
+            .help = "Linux kernel image file",
+        },{
+            .name = "initrd",
+            .type = QEMU_OPT_STRING,
+            .help = "Linux initial ramdisk file",
+        },{
+            .name = "append",
+            .type = QEMU_OPT_STRING,
+            .help = "Linux kernel command line",
+        },{
+            .name = "dtb",
+            .type = QEMU_OPT_STRING,
+            .help = "Linux kernel device tree file",
+        },{
+            .name = "dumpdtb",
+            .type = QEMU_OPT_STRING,
+            .help = "Dump current dtb to a file and quit",
+        },{
+            .name = "phandle_start",
+            .type = QEMU_OPT_NUMBER,
+            .help = "The first phandle ID we may generate dynamically",
+        },{
+            .name = "dt_compatible",
+            .type = QEMU_OPT_STRING,
+            .help = "Overrides the \"compatible\" property of the dt root node",
+        },{
+            .name = "dump-guest-core",
+            .type = QEMU_OPT_BOOL,
+            .help = "Include guest memory in  a core dump",
+        },{
+            .name = "mem-merge",
+            .type = QEMU_OPT_BOOL,
+            .help = "enable/disable memory merge support",
+        },{
+            .name = "usb",
+            .type = QEMU_OPT_BOOL,
+            .help = "Set on/off to enable/disable usb",
+        },{
+            .name = "firmware",
+            .type = QEMU_OPT_STRING,
+            .help = "firmware image",
+        },{
+            .name = "iommu",
+            .type = QEMU_OPT_BOOL,
+            .help = "Set on/off to enable/disable Intel IOMMU (VT-d)",
+        },{
+            .name = "suppress-vmdesc",
+            .type = QEMU_OPT_BOOL,
+            .help = "Set on to disable self-describing migration",
+        },{
+            .name = "aes-key-wrap",
+            .type = QEMU_OPT_BOOL,
+            .help = "enable/disable AES key wrapping using the CPACF wrapping key",
+        },{
+            .name = "dea-key-wrap",
+            .type = QEMU_OPT_BOOL,
+            .help = "enable/disable DEA key wrapping using the CPACF wrapping key",
+        },
+        { /* End of list */ }
+    }
+};
+
+CommandLineOptionInfoList *qmp_query_command_line_options(bool has_option,
+                                                          const char *option,
+                                                          Error **errp)
+{
+    CommandLineOptionInfoList *conf_list = NULL, *entry;
+    CommandLineOptionInfo *info;
+    int i;
+
+    for (i = 0; vm_config_groups[i] != NULL; i++) {
+        if (!has_option || !strcmp(option, vm_config_groups[i]->name)) {
+            info = g_malloc0(sizeof(*info));
+            info->option = g_strdup(vm_config_groups[i]->name);
+            if (!strcmp("drive", vm_config_groups[i]->name)) {
+                info->parameters = get_drive_infolist();
+            } else if (!strcmp("machine", vm_config_groups[i]->name)) {
+                info->parameters = query_option_descs(machine_opts.desc);
+            } else {
+                info->parameters =
+                    query_option_descs(vm_config_groups[i]->desc);
+            }
+            entry = g_malloc0(sizeof(*entry));
+            entry->value = info;
+            entry->next = conf_list;
+            conf_list = entry;
+        }
+    }
+
+    if (conf_list == NULL) {
+        error_setg(errp, "invalid option name: %s", option);
+    }
+
+    return conf_list;
+}
+
+QemuOptsList *qemu_find_opts_err(const char *group, Error **errp)
+{
+    return find_list(vm_config_groups, group, errp);
+}
+
+void qemu_add_drive_opts(QemuOptsList *list)
+{
+    int entries, i;
+
+    entries = ARRAY_SIZE(drive_config_groups);
+    entries--; /* keep list NULL terminated */
+    for (i = 0; i < entries; i++) {
+        if (drive_config_groups[i] == NULL) {
+            drive_config_groups[i] = list;
+            return;
+        }
+    }
+    fprintf(stderr, "ran out of space in drive_config_groups");
+    abort();
+}
+
+void qemu_add_opts(QemuOptsList *list)
+{
+    int entries, i;
+
+    entries = ARRAY_SIZE(vm_config_groups);
+    entries--; /* keep list NULL terminated */
+    for (i = 0; i < entries; i++) {
+        if (vm_config_groups[i] == NULL) {
+            vm_config_groups[i] = list;
+            return;
+        }
+    }
+    fprintf(stderr, "ran out of space in vm_config_groups");
+    abort();
+}
+
+int qemu_set_option(const char *str)
+{
+    Error *local_err = NULL;
+    char group[64], id[64], arg[64];
+    QemuOptsList *list;
+    QemuOpts *opts;
+    int rc, offset;
+
+    rc = sscanf(str, "%63[^.].%63[^.].%63[^=]%n", group, id, arg, &offset);
+    if (rc < 3 || str[offset] != '=') {
+        error_report("can't parse: \"%s\"", str);
+        return -1;
+    }
+
+    list = qemu_find_opts(group);
+    if (list == NULL) {
+        return -1;
+    }
+
+    opts = qemu_opts_find(list, id);
+    if (!opts) {
+        error_report("there is no %s \"%s\" defined",
+                     list->name, id);
+        return -1;
+    }
+
+    qemu_opt_set(opts, arg, str + offset + 1, &local_err);
+    if (local_err) {
+        error_report_err(local_err);
+        return -1;
+    }
+    return 0;
+}
+
+struct ConfigWriteData {
+    QemuOptsList *list;
+    FILE *fp;
+};
+
+static int config_write_opt(void *opaque, const char *name, const char *value,
+                            Error **errp)
+{
+    struct ConfigWriteData *data = opaque;
+
+    fprintf(data->fp, "  %s = \"%s\"\n", name, value);
+    return 0;
+}
+
+static int config_write_opts(void *opaque, QemuOpts *opts, Error **errp)
+{
+    struct ConfigWriteData *data = opaque;
+    const char *id = qemu_opts_id(opts);
+
+    if (id) {
+        fprintf(data->fp, "[%s \"%s\"]\n", data->list->name, id);
+    } else {
+        fprintf(data->fp, "[%s]\n", data->list->name);
+    }
+    qemu_opt_foreach(opts, config_write_opt, data, NULL);
+    fprintf(data->fp, "\n");
+    return 0;
+}
+
+void qemu_config_write(FILE *fp)
+{
+    struct ConfigWriteData data = { .fp = fp };
+    QemuOptsList **lists = vm_config_groups;
+    int i;
+
+    fprintf(fp, "# qemu config file\n\n");
+    for (i = 0; lists[i] != NULL; i++) {
+        data.list = lists[i];
+        qemu_opts_foreach(data.list, config_write_opts, &data, NULL);
+    }
+}
+
+int qemu_config_parse(FILE *fp, QemuOptsList **lists, const char *fname)
+{
+    char line[1024], group[64], id[64], arg[64], value[1024];
+    Location loc;
+    QemuOptsList *list = NULL;
+    Error *local_err = NULL;
+    QemuOpts *opts = NULL;
+    int res = -1, lno = 0;
+
+    loc_push_none(&loc);
+    while (fgets(line, sizeof(line), fp) != NULL) {
+        loc_set_file(fname, ++lno);
+        if (line[0] == '\n') {
+            /* skip empty lines */
+            continue;
+        }
+        if (line[0] == '#') {
+            /* comment */
+            continue;
+        }
+        if (sscanf(line, "[%63s \"%63[^\"]\"]", group, id) == 2) {
+            /* group with id */
+            list = find_list(lists, group, &local_err);
+            if (local_err) {
+                error_report_err(local_err);
+                goto out;
+            }
+            opts = qemu_opts_create(list, id, 1, NULL);
+            continue;
+        }
+        if (sscanf(line, "[%63[^]]]", group) == 1) {
+            /* group without id */
+            list = find_list(lists, group, &local_err);
+            if (local_err) {
+                error_report_err(local_err);
+                goto out;
+            }
+            opts = qemu_opts_create(list, NULL, 0, &error_abort);
+            continue;
+        }
+        value[0] = '\0';
+        if (sscanf(line, " %63s = \"%1023[^\"]\"", arg, value) == 2 ||
+            sscanf(line, " %63s = \"\"", arg) == 1) {
+            /* arg = value */
+            if (opts == NULL) {
+                error_report("no group defined");
+                goto out;
+            }
+            qemu_opt_set(opts, arg, value, &local_err);
+            if (local_err) {
+                error_report_err(local_err);
+                goto out;
+            }
+            continue;
+        }
+        error_report("parse error");
+        goto out;
+    }
+    if (ferror(fp)) {
+        error_report("error reading file");
+        goto out;
+    }
+    res = 0;
+out:
+    loc_pop(&loc);
+    return res;
+}
+
+int qemu_read_config_file(const char *filename)
+{
+    FILE *f = fopen(filename, "r");
+    int ret;
+
+    if (f == NULL) {
+        return -errno;
+    }
+
+    ret = qemu_config_parse(f, vm_config_groups, filename);
+    fclose(f);
+
+    if (ret == 0) {
+        return 0;
+    } else {
+        return -EINVAL;
+    }
+}
+
+static void config_parse_qdict_section(QDict *options, QemuOptsList *opts,
+                                       Error **errp)
+{
+    QemuOpts *subopts;
+    QDict *subqdict;
+    QList *list = NULL;
+    Error *local_err = NULL;
+    size_t orig_size, enum_size;
+    char *prefix;
+
+    prefix = g_strdup_printf("%s.", opts->name);
+    qdict_extract_subqdict(options, &subqdict, prefix);
+    g_free(prefix);
+    orig_size = qdict_size(subqdict);
+    if (!orig_size) {
+        goto out;
+    }
+
+    subopts = qemu_opts_create(opts, NULL, 0, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        goto out;
+    }
+
+    qemu_opts_absorb_qdict(subopts, subqdict, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        goto out;
+    }
+
+    enum_size = qdict_size(subqdict);
+    if (enum_size < orig_size && enum_size) {
+        error_setg(errp, "Unknown option '%s' for [%s]",
+                   qdict_first(subqdict)->key, opts->name);
+        goto out;
+    }
+
+    if (enum_size) {
+        /* Multiple, enumerated sections */
+        QListEntry *list_entry;
+        unsigned i = 0;
+
+        /* Not required anymore */
+        qemu_opts_del(subopts);
+
+        qdict_array_split(subqdict, &list);
+        if (qdict_size(subqdict)) {
+            error_setg(errp, "Unused option '%s' for [%s]",
+                       qdict_first(subqdict)->key, opts->name);
+            goto out;
+        }
+
+        QLIST_FOREACH_ENTRY(list, list_entry) {
+            QDict *section = qobject_to_qdict(qlist_entry_obj(list_entry));
+            char *opt_name;
+
+            if (!section) {
+                error_setg(errp, "[%s] section (index %u) does not consist of "
+                           "keys", opts->name, i);
+                goto out;
+            }
+
+            opt_name = g_strdup_printf("%s.%u", opts->name, i++);
+            subopts = qemu_opts_create(opts, opt_name, 1, &local_err);
+            g_free(opt_name);
+            if (local_err) {
+                error_propagate(errp, local_err);
+                goto out;
+            }
+
+            qemu_opts_absorb_qdict(subopts, section, &local_err);
+            if (local_err) {
+                error_propagate(errp, local_err);
+                qemu_opts_del(subopts);
+                goto out;
+            }
+
+            if (qdict_size(section)) {
+                error_setg(errp, "[%s] section doesn't support the option '%s'",
+                           opts->name, qdict_first(section)->key);
+                qemu_opts_del(subopts);
+                goto out;
+            }
+        }
+    }
+
+out:
+    QDECREF(subqdict);
+    QDECREF(list);
+}
+
+void qemu_config_parse_qdict(QDict *options, QemuOptsList **lists,
+                             Error **errp)
+{
+    int i;
+    Error *local_err = NULL;
+
+    for (i = 0; lists[i]; i++) {
+        config_parse_qdict_section(options, lists[i], &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
+}
diff --git a/src/util/qemu-coroutine-io.c b/src/util/qemu-coroutine-io.c
new file mode 100644
index 0000000..e1eae73
--- /dev/null
+++ b/src/util/qemu-coroutine-io.c
@@ -0,0 +1,91 @@
+/*
+ * Coroutine-aware I/O functions
+ *
+ * Copyright (C) 2009-2010 Nippon Telegraph and Telephone Corporation.
+ * Copyright (c) 2011, Red Hat, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu-common.h"
+#include "qemu/sockets.h"
+#include "qemu/coroutine.h"
+#include "qemu/iov.h"
+#include "qemu/main-loop.h"
+
+ssize_t coroutine_fn
+qemu_co_sendv_recvv(int sockfd, struct iovec *iov, unsigned iov_cnt,
+                    size_t offset, size_t bytes, bool do_send)
+{
+    size_t done = 0;
+    ssize_t ret;
+    int err;
+    while (done < bytes) {
+        ret = iov_send_recv(sockfd, iov, iov_cnt,
+                            offset + done, bytes - done, do_send);
+        if (ret > 0) {
+            done += ret;
+        } else if (ret < 0) {
+            err = socket_error();
+            if (err == EAGAIN || err == EWOULDBLOCK) {
+                qemu_coroutine_yield();
+            } else if (done == 0) {
+                return -err;
+            } else {
+                break;
+            }
+        } else if (ret == 0 && !do_send) {
+            /* write (send) should never return 0.
+             * read (recv) returns 0 for end-of-file (-data).
+             * In both cases there's little point retrying,
+             * but we do for write anyway, just in case */
+            break;
+        }
+    }
+    return done;
+}
+
+ssize_t coroutine_fn
+qemu_co_send_recv(int sockfd, void *buf, size_t bytes, bool do_send)
+{
+    struct iovec iov = { .iov_base = buf, .iov_len = bytes };
+    return qemu_co_sendv_recvv(sockfd, &iov, 1, 0, bytes, do_send);
+}
+
+typedef struct {
+    Coroutine *co;
+    int fd;
+} FDYieldUntilData;
+
+static void fd_coroutine_enter(void *opaque)
+{
+    FDYieldUntilData *data = opaque;
+    qemu_set_fd_handler(data->fd, NULL, NULL, NULL);
+    qemu_coroutine_enter(data->co, NULL);
+}
+
+void coroutine_fn yield_until_fd_readable(int fd)
+{
+    FDYieldUntilData data;
+
+    assert(qemu_in_coroutine());
+    data.co = qemu_coroutine_self();
+    data.fd = fd;
+    qemu_set_fd_handler(fd, fd_coroutine_enter, NULL, &data);
+    qemu_coroutine_yield();
+}
diff --git a/src/util/qemu-coroutine-lock.c b/src/util/qemu-coroutine-lock.c
new file mode 100644
index 0000000..130ee19
--- /dev/null
+++ b/src/util/qemu-coroutine-lock.c
@@ -0,0 +1,186 @@
+/*
+ * coroutine queues and locks
+ *
+ * Copyright (c) 2011 Kevin Wolf <kwolf@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+#include "qemu/coroutine.h"
+#include "qemu/coroutine_int.h"
+#include "qemu/queue.h"
+#include "trace.h"
+
+void qemu_co_queue_init(CoQueue *queue)
+{
+    QTAILQ_INIT(&queue->entries);
+}
+
+void coroutine_fn qemu_co_queue_wait(CoQueue *queue)
+{
+    Coroutine *self = qemu_coroutine_self();
+    QTAILQ_INSERT_TAIL(&queue->entries, self, co_queue_next);
+    qemu_coroutine_yield();
+    assert(qemu_in_coroutine());
+}
+
+/**
+ * qemu_co_queue_run_restart:
+ *
+ * Enter each coroutine that was previously marked for restart by
+ * qemu_co_queue_next() or qemu_co_queue_restart_all().  This function is
+ * invoked by the core coroutine code when the current coroutine yields or
+ * terminates.
+ */
+void qemu_co_queue_run_restart(Coroutine *co)
+{
+    Coroutine *next;
+
+    trace_qemu_co_queue_run_restart(co);
+    while ((next = QTAILQ_FIRST(&co->co_queue_wakeup))) {
+        QTAILQ_REMOVE(&co->co_queue_wakeup, next, co_queue_next);
+        qemu_coroutine_enter(next, NULL);
+    }
+}
+
+static bool qemu_co_queue_do_restart(CoQueue *queue, bool single)
+{
+    Coroutine *self = qemu_coroutine_self();
+    Coroutine *next;
+
+    if (QTAILQ_EMPTY(&queue->entries)) {
+        return false;
+    }
+
+    while ((next = QTAILQ_FIRST(&queue->entries)) != NULL) {
+        QTAILQ_REMOVE(&queue->entries, next, co_queue_next);
+        QTAILQ_INSERT_TAIL(&self->co_queue_wakeup, next, co_queue_next);
+        trace_qemu_co_queue_next(next);
+        if (single) {
+            break;
+        }
+    }
+    return true;
+}
+
+bool coroutine_fn qemu_co_queue_next(CoQueue *queue)
+{
+    assert(qemu_in_coroutine());
+    return qemu_co_queue_do_restart(queue, true);
+}
+
+void coroutine_fn qemu_co_queue_restart_all(CoQueue *queue)
+{
+    assert(qemu_in_coroutine());
+    qemu_co_queue_do_restart(queue, false);
+}
+
+bool qemu_co_enter_next(CoQueue *queue)
+{
+    Coroutine *next;
+
+    next = QTAILQ_FIRST(&queue->entries);
+    if (!next) {
+        return false;
+    }
+
+    QTAILQ_REMOVE(&queue->entries, next, co_queue_next);
+    qemu_coroutine_enter(next, NULL);
+    return true;
+}
+
+bool qemu_co_queue_empty(CoQueue *queue)
+{
+    return QTAILQ_FIRST(&queue->entries) == NULL;
+}
+
+void qemu_co_mutex_init(CoMutex *mutex)
+{
+    memset(mutex, 0, sizeof(*mutex));
+    qemu_co_queue_init(&mutex->queue);
+}
+
+void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex)
+{
+    Coroutine *self = qemu_coroutine_self();
+
+    trace_qemu_co_mutex_lock_entry(mutex, self);
+
+    while (mutex->locked) {
+        qemu_co_queue_wait(&mutex->queue);
+    }
+
+    mutex->locked = true;
+
+    trace_qemu_co_mutex_lock_return(mutex, self);
+}
+
+void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex)
+{
+    Coroutine *self = qemu_coroutine_self();
+
+    trace_qemu_co_mutex_unlock_entry(mutex, self);
+
+    assert(mutex->locked == true);
+    assert(qemu_in_coroutine());
+
+    mutex->locked = false;
+    qemu_co_queue_next(&mutex->queue);
+
+    trace_qemu_co_mutex_unlock_return(mutex, self);
+}
+
+void qemu_co_rwlock_init(CoRwlock *lock)
+{
+    memset(lock, 0, sizeof(*lock));
+    qemu_co_queue_init(&lock->queue);
+}
+
+void qemu_co_rwlock_rdlock(CoRwlock *lock)
+{
+    while (lock->writer) {
+        qemu_co_queue_wait(&lock->queue);
+    }
+    lock->reader++;
+}
+
+void qemu_co_rwlock_unlock(CoRwlock *lock)
+{
+    assert(qemu_in_coroutine());
+    if (lock->writer) {
+        lock->writer = false;
+        qemu_co_queue_restart_all(&lock->queue);
+    } else {
+        lock->reader--;
+        assert(lock->reader >= 0);
+        /* Wakeup only one waiting writer */
+        if (!lock->reader) {
+            qemu_co_queue_next(&lock->queue);
+        }
+    }
+}
+
+void qemu_co_rwlock_wrlock(CoRwlock *lock)
+{
+    while (lock->writer || lock->reader) {
+        qemu_co_queue_wait(&lock->queue);
+    }
+    lock->writer = true;
+}
diff --git a/src/util/qemu-coroutine-sleep.c b/src/util/qemu-coroutine-sleep.c
new file mode 100644
index 0000000..b35db56
--- /dev/null
+++ b/src/util/qemu-coroutine-sleep.c
@@ -0,0 +1,41 @@
+/*
+ * QEMU coroutine sleep
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ *  Stefan Hajnoczi    <stefanha@linux.vnet.ibm.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "qemu/coroutine.h"
+#include "qemu/timer.h"
+#include "block/aio.h"
+
+typedef struct CoSleepCB {
+    QEMUTimer *ts;
+    Coroutine *co;
+} CoSleepCB;
+
+static void co_sleep_cb(void *opaque)
+{
+    CoSleepCB *sleep_cb = opaque;
+
+    qemu_coroutine_enter(sleep_cb->co, NULL);
+}
+
+void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type,
+                                  int64_t ns)
+{
+    CoSleepCB sleep_cb = {
+        .co = qemu_coroutine_self(),
+    };
+    sleep_cb.ts = aio_timer_new(ctx, type, SCALE_NS, co_sleep_cb, &sleep_cb);
+    timer_mod(sleep_cb.ts, qemu_clock_get_ns(type) + ns);
+    qemu_coroutine_yield();
+    timer_del(sleep_cb.ts);
+    timer_free(sleep_cb.ts);
+}
diff --git a/src/util/qemu-coroutine.c b/src/util/qemu-coroutine.c
new file mode 100644
index 0000000..8953560
--- /dev/null
+++ b/src/util/qemu-coroutine.c
@@ -0,0 +1,146 @@
+/*
+ * QEMU coroutines
+ *
+ * Copyright IBM, Corp. 2011
+ *
+ * Authors:
+ *  Stefan Hajnoczi    <stefanha@linux.vnet.ibm.com>
+ *  Kevin Wolf         <kwolf@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include "trace.h"
+#include "qemu-common.h"
+#include "qemu/thread.h"
+#include "qemu/atomic.h"
+#include "qemu/coroutine.h"
+#include "qemu/coroutine_int.h"
+
+enum {
+    POOL_BATCH_SIZE = 64,
+};
+
+/** Free list to speed up creation */
+static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool);
+static unsigned int release_pool_size;
+static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool);
+static __thread unsigned int alloc_pool_size;
+static __thread Notifier coroutine_pool_cleanup_notifier;
+
+static void coroutine_pool_cleanup(Notifier *n, void *value)
+{
+    Coroutine *co;
+    Coroutine *tmp;
+
+    QSLIST_FOREACH_SAFE(co, &alloc_pool, pool_next, tmp) {
+        QSLIST_REMOVE_HEAD(&alloc_pool, pool_next);
+        qemu_coroutine_delete(co);
+    }
+}
+
+Coroutine *qemu_coroutine_create(CoroutineEntry *entry)
+{
+    Coroutine *co = NULL;
+
+    if (CONFIG_COROUTINE_POOL) {
+        co = QSLIST_FIRST(&alloc_pool);
+        if (!co) {
+            if (release_pool_size > POOL_BATCH_SIZE) {
+                /* Slow path; a good place to register the destructor, too.  */
+                if (!coroutine_pool_cleanup_notifier.notify) {
+                    coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup;
+                    qemu_thread_atexit_add(&coroutine_pool_cleanup_notifier);
+                }
+
+                /* This is not exact; there could be a little skew between
+                 * release_pool_size and the actual size of release_pool.  But
+                 * it is just a heuristic, it does not need to be perfect.
+                 */
+                alloc_pool_size = atomic_xchg(&release_pool_size, 0);
+                QSLIST_MOVE_ATOMIC(&alloc_pool, &release_pool);
+                co = QSLIST_FIRST(&alloc_pool);
+            }
+        }
+        if (co) {
+            QSLIST_REMOVE_HEAD(&alloc_pool, pool_next);
+            alloc_pool_size--;
+        }
+    }
+
+    if (!co) {
+        co = qemu_coroutine_new();
+    }
+
+    co->entry = entry;
+    QTAILQ_INIT(&co->co_queue_wakeup);
+    return co;
+}
+
+static void coroutine_delete(Coroutine *co)
+{
+    co->caller = NULL;
+
+    if (CONFIG_COROUTINE_POOL) {
+        if (release_pool_size < POOL_BATCH_SIZE * 2) {
+            QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next);
+            atomic_inc(&release_pool_size);
+            return;
+        }
+        if (alloc_pool_size < POOL_BATCH_SIZE) {
+            QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next);
+            alloc_pool_size++;
+            return;
+        }
+    }
+
+    qemu_coroutine_delete(co);
+}
+
+void qemu_coroutine_enter(Coroutine *co, void *opaque)
+{
+    Coroutine *self = qemu_coroutine_self();
+    CoroutineAction ret;
+
+    trace_qemu_coroutine_enter(self, co, opaque);
+
+    if (co->caller) {
+        fprintf(stderr, "Co-routine re-entered recursively\n");
+        abort();
+    }
+
+    co->caller = self;
+    co->entry_arg = opaque;
+    ret = qemu_coroutine_switch(self, co, COROUTINE_ENTER);
+
+    qemu_co_queue_run_restart(co);
+
+    switch (ret) {
+    case COROUTINE_YIELD:
+        return;
+    case COROUTINE_TERMINATE:
+        trace_qemu_coroutine_terminate(co);
+        coroutine_delete(co);
+        return;
+    default:
+        abort();
+    }
+}
+
+void coroutine_fn qemu_coroutine_yield(void)
+{
+    Coroutine *self = qemu_coroutine_self();
+    Coroutine *to = self->caller;
+
+    trace_qemu_coroutine_yield(self, to);
+
+    if (!to) {
+        fprintf(stderr, "Co-routine is yielding to no one\n");
+        abort();
+    }
+
+    self->caller = NULL;
+    qemu_coroutine_switch(self, to, COROUTINE_YIELD);
+}
diff --git a/src/util/qemu-error.c b/src/util/qemu-error.c
new file mode 100644
index 0000000..c1574bb
--- /dev/null
+++ b/src/util/qemu-error.c
@@ -0,0 +1,239 @@
+/*
+ * Error reporting
+ *
+ * Copyright (C) 2010 Red Hat Inc.
+ *
+ * Authors:
+ *  Markus Armbruster <armbru@redhat.com>,
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ */
+
+#include <stdio.h>
+#include "monitor/monitor.h"
+#include "qemu/error-report.h"
+
+/*
+ * Print to current monitor if we have one, else to stderr.
+ * TODO should return int, so callers can calculate width, but that
+ * requires surgery to monitor_vprintf().  Left for another day.
+ */
+void error_vprintf(const char *fmt, va_list ap)
+{
+    if (cur_mon && !monitor_cur_is_qmp()) {
+        monitor_vprintf(cur_mon, fmt, ap);
+    } else {
+        vfprintf(stderr, fmt, ap);
+    }
+}
+
+/*
+ * Print to current monitor if we have one, else to stderr.
+ * TODO just like error_vprintf()
+ */
+void error_printf(const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start(ap, fmt);
+    error_vprintf(fmt, ap);
+    va_end(ap);
+}
+
+void error_printf_unless_qmp(const char *fmt, ...)
+{
+    va_list ap;
+
+    if (!monitor_cur_is_qmp()) {
+        va_start(ap, fmt);
+        error_vprintf(fmt, ap);
+        va_end(ap);
+    }
+}
+
+static Location std_loc = {
+    .kind = LOC_NONE
+};
+static Location *cur_loc = &std_loc;
+
+/*
+ * Push location saved in LOC onto the location stack, return it.
+ * The top of that stack is the current location.
+ * Needs a matching loc_pop().
+ */
+Location *loc_push_restore(Location *loc)
+{
+    assert(!loc->prev);
+    loc->prev = cur_loc;
+    cur_loc = loc;
+    return loc;
+}
+
+/*
+ * Initialize *LOC to "nowhere", push it onto the location stack.
+ * The top of that stack is the current location.
+ * Needs a matching loc_pop().
+ * Return LOC.
+ */
+Location *loc_push_none(Location *loc)
+{
+    loc->kind = LOC_NONE;
+    loc->prev = NULL;
+    return loc_push_restore(loc);
+}
+
+/*
+ * Pop the location stack.
+ * LOC must be the current location, i.e. the top of the stack.
+ */
+Location *loc_pop(Location *loc)
+{
+    assert(cur_loc == loc && loc->prev);
+    cur_loc = loc->prev;
+    loc->prev = NULL;
+    return loc;
+}
+
+/*
+ * Save the current location in LOC, return LOC.
+ */
+Location *loc_save(Location *loc)
+{
+    *loc = *cur_loc;
+    loc->prev = NULL;
+    return loc;
+}
+
+/*
+ * Change the current location to the one saved in LOC.
+ */
+void loc_restore(Location *loc)
+{
+    Location *prev = cur_loc->prev;
+    assert(!loc->prev);
+    *cur_loc = *loc;
+    cur_loc->prev = prev;
+}
+
+/*
+ * Change the current location to "nowhere in particular".
+ */
+void loc_set_none(void)
+{
+    cur_loc->kind = LOC_NONE;
+}
+
+/*
+ * Change the current location to argument ARGV[IDX..IDX+CNT-1].
+ */
+void loc_set_cmdline(char **argv, int idx, int cnt)
+{
+    cur_loc->kind = LOC_CMDLINE;
+    cur_loc->num = cnt;
+    cur_loc->ptr = argv + idx;
+}
+
+/*
+ * Change the current location to file FNAME, line LNO.
+ */
+void loc_set_file(const char *fname, int lno)
+{
+    assert (fname || cur_loc->kind == LOC_FILE);
+    cur_loc->kind = LOC_FILE;
+    cur_loc->num = lno;
+    if (fname) {
+        cur_loc->ptr = fname;
+    }
+}
+
+static const char *progname;
+
+/*
+ * Set the program name for error_print_loc().
+ */
+void error_set_progname(const char *argv0)
+{
+    const char *p = strrchr(argv0, '/');
+    progname = p ? p + 1 : argv0;
+}
+
+const char *error_get_progname(void)
+{
+    return progname;
+}
+
+/*
+ * Print current location to current monitor if we have one, else to stderr.
+ */
+static void error_print_loc(void)
+{
+    const char *sep = "";
+    int i;
+    const char *const *argp;
+
+    if (!cur_mon && progname) {
+        fprintf(stderr, "%s:", progname);
+        sep = " ";
+    }
+    switch (cur_loc->kind) {
+    case LOC_CMDLINE:
+        argp = cur_loc->ptr;
+        for (i = 0; i < cur_loc->num; i++) {
+            error_printf("%s%s", sep, argp[i]);
+            sep = " ";
+        }
+        error_printf(": ");
+        break;
+    case LOC_FILE:
+        error_printf("%s:", (const char *)cur_loc->ptr);
+        if (cur_loc->num) {
+            error_printf("%d:", cur_loc->num);
+        }
+        error_printf(" ");
+        break;
+    default:
+        error_printf("%s", sep);
+    }
+}
+
+bool enable_timestamp_msg;
+/*
+ * Print an error message to current monitor if we have one, else to stderr.
+ * Format arguments like vsprintf().  The result should not contain
+ * newlines.
+ * Prepend the current location and append a newline.
+ * It's wrong to call this in a QMP monitor.  Use error_setg() there.
+ */
+void error_vreport(const char *fmt, va_list ap)
+{
+    GTimeVal tv;
+    gchar *timestr;
+
+    if (enable_timestamp_msg && !cur_mon) {
+        g_get_current_time(&tv);
+        timestr = g_time_val_to_iso8601(&tv);
+        error_printf("%s ", timestr);
+        g_free(timestr);
+    }
+
+    error_print_loc();
+    error_vprintf(fmt, ap);
+    error_printf("\n");
+}
+
+/*
+ * Print an error message to current monitor if we have one, else to stderr.
+ * Format arguments like sprintf().  The result should not contain
+ * newlines.
+ * Prepend the current location and append a newline.
+ * It's wrong to call this in a QMP monitor.  Use error_setg() there.
+ */
+void error_report(const char *fmt, ...)
+{
+    va_list ap;
+
+    va_start(ap, fmt);
+    error_vreport(fmt, ap);
+    va_end(ap);
+}
diff --git a/src/util/qemu-openpty.c b/src/util/qemu-openpty.c
new file mode 100644
index 0000000..4c53211
--- /dev/null
+++ b/src/util/qemu-openpty.c
@@ -0,0 +1,137 @@
+/*
+ * qemu-openpty.c
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2010 Red Hat, Inc.
+ *
+ * Wrapper function qemu_openpty() implementation.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+/*
+ * This is not part of oslib-posix.c because this function
+ * uses openpty() which often in -lutil, and if we add this
+ * dependency to oslib-posix.o, every app will have to be
+ * linked with -lutil.
+ */
+
+#include "config-host.h"
+#include "qemu-common.h"
+
+#if defined(__GLIBC__)
+# include <pty.h>
+#elif defined CONFIG_BSD
+# include <termios.h>
+# if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__)
+#  include <libutil.h>
+# else
+#  include <util.h>
+# endif
+#elif defined CONFIG_SOLARIS
+# include <termios.h>
+# include <stropts.h>
+#else
+# include <termios.h>
+#endif
+
+#ifdef __sun__
+/* Once Solaris has openpty(), this is going to be removed. */
+static int openpty(int *amaster, int *aslave, char *name,
+                   struct termios *termp, struct winsize *winp)
+{
+        const char *slave;
+        int mfd = -1, sfd = -1;
+
+        *amaster = *aslave = -1;
+
+        mfd = open("/dev/ptmx", O_RDWR | O_NOCTTY);
+        if (mfd < 0)
+                goto err;
+
+        if (grantpt(mfd) == -1 || unlockpt(mfd) == -1)
+                goto err;
+
+        if ((slave = ptsname(mfd)) == NULL)
+                goto err;
+
+        if ((sfd = open(slave, O_RDONLY | O_NOCTTY)) == -1)
+                goto err;
+
+        if (ioctl(sfd, I_PUSH, "ptem") == -1 ||
+            (termp != NULL && tcgetattr(sfd, termp) < 0))
+                goto err;
+
+        if (amaster)
+                *amaster = mfd;
+        if (aslave)
+                *aslave = sfd;
+        if (winp)
+                ioctl(sfd, TIOCSWINSZ, winp);
+
+        return 0;
+
+err:
+        if (sfd != -1)
+                close(sfd);
+        close(mfd);
+        return -1;
+}
+
+static void cfmakeraw (struct termios *termios_p)
+{
+        termios_p->c_iflag &=
+                ~(IGNBRK|BRKINT|PARMRK|ISTRIP|INLCR|IGNCR|ICRNL|IXON);
+        termios_p->c_oflag &= ~OPOST;
+        termios_p->c_lflag &= ~(ECHO|ECHONL|ICANON|ISIG|IEXTEN);
+        termios_p->c_cflag &= ~(CSIZE|PARENB);
+        termios_p->c_cflag |= CS8;
+
+        termios_p->c_cc[VMIN] = 0;
+        termios_p->c_cc[VTIME] = 0;
+}
+#endif
+
+int qemu_openpty_raw(int *aslave, char *pty_name)
+{
+    int amaster;
+    struct termios tty;
+#if defined(__OpenBSD__) || defined(__DragonFly__)
+    char pty_buf[PATH_MAX];
+#define q_ptsname(x) pty_buf
+#else
+    char *pty_buf = NULL;
+#define q_ptsname(x) ptsname(x)
+#endif
+
+    if (openpty(&amaster, aslave, pty_buf, NULL, NULL) < 0) {
+        return -1;
+    }
+
+    /* Set raw attributes on the pty. */
+    tcgetattr(*aslave, &tty);
+    cfmakeraw(&tty);
+    tcsetattr(*aslave, TCSAFLUSH, &tty);
+
+    if (pty_name) {
+        strcpy(pty_name, q_ptsname(amaster));
+    }
+
+    return amaster;
+}
diff --git a/src/util/qemu-option.c b/src/util/qemu-option.c
new file mode 100644
index 0000000..a50ecea
--- /dev/null
+++ b/src/util/qemu-option.c
@@ -0,0 +1,1205 @@
+/*
+ * Commandline option parsing functions
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ * Copyright (c) 2009 Kevin Wolf <kwolf@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "qemu-common.h"
+#include "qemu/error-report.h"
+#include "qapi/qmp/types.h"
+#include "qapi/error.h"
+#include "qapi/qmp/qerror.h"
+#include "qemu/option_int.h"
+
+/*
+ * Extracts the name of an option from the parameter string (p points at the
+ * first byte of the option name)
+ *
+ * The option name is delimited by delim (usually , or =) or the string end
+ * and is copied into buf. If the option name is longer than buf_size, it is
+ * truncated. buf is always zero terminated.
+ *
+ * The return value is the position of the delimiter/zero byte after the option
+ * name in p.
+ */
+const char *get_opt_name(char *buf, int buf_size, const char *p, char delim)
+{
+    char *q;
+
+    q = buf;
+    while (*p != '\0' && *p != delim) {
+        if (q && (q - buf) < buf_size - 1)
+            *q++ = *p;
+        p++;
+    }
+    if (q)
+        *q = '\0';
+
+    return p;
+}
+
+/*
+ * Extracts the value of an option from the parameter string p (p points at the
+ * first byte of the option value)
+ *
+ * This function is comparable to get_opt_name with the difference that the
+ * delimiter is fixed to be comma which starts a new option. To specify an
+ * option value that contains commas, double each comma.
+ */
+const char *get_opt_value(char *buf, int buf_size, const char *p)
+{
+    char *q;
+
+    q = buf;
+    while (*p != '\0') {
+        if (*p == ',') {
+            if (*(p + 1) != ',')
+                break;
+            p++;
+        }
+        if (q && (q - buf) < buf_size - 1)
+            *q++ = *p;
+        p++;
+    }
+    if (q)
+        *q = '\0';
+
+    return p;
+}
+
+int get_next_param_value(char *buf, int buf_size,
+                         const char *tag, const char **pstr)
+{
+    const char *p;
+    char option[128];
+
+    p = *pstr;
+    for(;;) {
+        p = get_opt_name(option, sizeof(option), p, '=');
+        if (*p != '=')
+            break;
+        p++;
+        if (!strcmp(tag, option)) {
+            *pstr = get_opt_value(buf, buf_size, p);
+            if (**pstr == ',') {
+                (*pstr)++;
+            }
+            return strlen(buf);
+        } else {
+            p = get_opt_value(NULL, 0, p);
+        }
+        if (*p != ',')
+            break;
+        p++;
+    }
+    return 0;
+}
+
+int get_param_value(char *buf, int buf_size,
+                    const char *tag, const char *str)
+{
+    return get_next_param_value(buf, buf_size, tag, &str);
+}
+
+static void parse_option_bool(const char *name, const char *value, bool *ret,
+                              Error **errp)
+{
+    if (value != NULL) {
+        if (!strcmp(value, "on")) {
+            *ret = 1;
+        } else if (!strcmp(value, "off")) {
+            *ret = 0;
+        } else {
+            error_setg(errp, QERR_INVALID_PARAMETER_VALUE,
+                       name, "'on' or 'off'");
+        }
+    } else {
+        *ret = 1;
+    }
+}
+
+static void parse_option_number(const char *name, const char *value,
+                                uint64_t *ret, Error **errp)
+{
+    char *postfix;
+    uint64_t number;
+
+    if (value != NULL) {
+        number = strtoull(value, &postfix, 0);
+        if (*postfix != '\0') {
+            error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name, "a number");
+            return;
+        }
+        *ret = number;
+    } else {
+        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name, "a number");
+    }
+}
+
+static const QemuOptDesc *find_desc_by_name(const QemuOptDesc *desc,
+                                            const char *name)
+{
+    int i;
+
+    for (i = 0; desc[i].name != NULL; i++) {
+        if (strcmp(desc[i].name, name) == 0) {
+            return &desc[i];
+        }
+    }
+
+    return NULL;
+}
+
+void parse_option_size(const char *name, const char *value,
+                       uint64_t *ret, Error **errp)
+{
+    char *postfix;
+    double sizef;
+
+    if (value != NULL) {
+        sizef = strtod(value, &postfix);
+        if (sizef < 0 || sizef > UINT64_MAX) {
+            error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name,
+                             "a non-negative number below 2^64");
+            return;
+        }
+        switch (*postfix) {
+        case 'T':
+            sizef *= 1024;
+            /* fall through */
+        case 'G':
+            sizef *= 1024;
+            /* fall through */
+        case 'M':
+            sizef *= 1024;
+            /* fall through */
+        case 'K':
+        case 'k':
+            sizef *= 1024;
+            /* fall through */
+        case 'b':
+        case '\0':
+            *ret = (uint64_t) sizef;
+            break;
+        default:
+            error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name, "a size");
+            error_append_hint(errp, "You may use k, M, G or T suffixes for "
+                    "kilobytes, megabytes, gigabytes and terabytes.");
+            return;
+        }
+    } else {
+        error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name, "a size");
+    }
+}
+
+bool has_help_option(const char *param)
+{
+    size_t buflen = strlen(param) + 1;
+    char *buf = g_malloc(buflen);
+    const char *p = param;
+    bool result = false;
+
+    while (*p) {
+        p = get_opt_value(buf, buflen, p);
+        if (*p) {
+            p++;
+        }
+
+        if (is_help_option(buf)) {
+            result = true;
+            goto out;
+        }
+    }
+
+out:
+    g_free(buf);
+    return result;
+}
+
+bool is_valid_option_list(const char *param)
+{
+    size_t buflen = strlen(param) + 1;
+    char *buf = g_malloc(buflen);
+    const char *p = param;
+    bool result = true;
+
+    while (*p) {
+        p = get_opt_value(buf, buflen, p);
+        if (*p && !*++p) {
+            result = false;
+            goto out;
+        }
+
+        if (!*buf || *buf == ',') {
+            result = false;
+            goto out;
+        }
+    }
+
+out:
+    g_free(buf);
+    return result;
+}
+
+void qemu_opts_print_help(QemuOptsList *list)
+{
+    QemuOptDesc *desc;
+
+    assert(list);
+    desc = list->desc;
+    printf("Supported options:\n");
+    while (desc && desc->name) {
+        printf("%-16s %s\n", desc->name,
+               desc->help ? desc->help : "No description available");
+        desc++;
+    }
+}
+/* ------------------------------------------------------------------ */
+
+QemuOpt *qemu_opt_find(QemuOpts *opts, const char *name)
+{
+    QemuOpt *opt;
+
+    QTAILQ_FOREACH_REVERSE(opt, &opts->head, QemuOptHead, next) {
+        if (strcmp(opt->name, name) != 0)
+            continue;
+        return opt;
+    }
+    return NULL;
+}
+
+static void qemu_opt_del(QemuOpt *opt)
+{
+    QTAILQ_REMOVE(&opt->opts->head, opt, next);
+    g_free(opt->name);
+    g_free(opt->str);
+    g_free(opt);
+}
+
+/* qemu_opt_set allows many settings for the same option.
+ * This function deletes all settings for an option.
+ */
+static void qemu_opt_del_all(QemuOpts *opts, const char *name)
+{
+    QemuOpt *opt, *next_opt;
+
+    QTAILQ_FOREACH_SAFE(opt, &opts->head, next, next_opt) {
+        if (!strcmp(opt->name, name)) {
+            qemu_opt_del(opt);
+        }
+    }
+}
+
+const char *qemu_opt_get(QemuOpts *opts, const char *name)
+{
+    QemuOpt *opt;
+
+    if (opts == NULL) {
+        return NULL;
+    }
+
+    opt = qemu_opt_find(opts, name);
+    if (!opt) {
+        const QemuOptDesc *desc = find_desc_by_name(opts->list->desc, name);
+        if (desc && desc->def_value_str) {
+            return desc->def_value_str;
+        }
+    }
+    return opt ? opt->str : NULL;
+}
+
+/* Get a known option (or its default) and remove it from the list
+ * all in one action. Return a malloced string of the option value.
+ * Result must be freed by caller with g_free().
+ */
+char *qemu_opt_get_del(QemuOpts *opts, const char *name)
+{
+    QemuOpt *opt;
+    const QemuOptDesc *desc;
+    char *str = NULL;
+
+    if (opts == NULL) {
+        return NULL;
+    }
+
+    opt = qemu_opt_find(opts, name);
+    if (!opt) {
+        desc = find_desc_by_name(opts->list->desc, name);
+        if (desc && desc->def_value_str) {
+            str = g_strdup(desc->def_value_str);
+        }
+        return str;
+    }
+    str = opt->str;
+    opt->str = NULL;
+    qemu_opt_del_all(opts, name);
+    return str;
+}
+
+bool qemu_opt_has_help_opt(QemuOpts *opts)
+{
+    QemuOpt *opt;
+
+    QTAILQ_FOREACH_REVERSE(opt, &opts->head, QemuOptHead, next) {
+        if (is_help_option(opt->name)) {
+            return true;
+        }
+    }
+    return false;
+}
+
+static bool qemu_opt_get_bool_helper(QemuOpts *opts, const char *name,
+                                     bool defval, bool del)
+{
+    QemuOpt *opt;
+    bool ret = defval;
+
+    if (opts == NULL) {
+        return ret;
+    }
+
+    opt = qemu_opt_find(opts, name);
+    if (opt == NULL) {
+        const QemuOptDesc *desc = find_desc_by_name(opts->list->desc, name);
+        if (desc && desc->def_value_str) {
+            parse_option_bool(name, desc->def_value_str, &ret, &error_abort);
+        }
+        return ret;
+    }
+    assert(opt->desc && opt->desc->type == QEMU_OPT_BOOL);
+    ret = opt->value.boolean;
+    if (del) {
+        qemu_opt_del_all(opts, name);
+    }
+    return ret;
+}
+
+bool qemu_opt_get_bool(QemuOpts *opts, const char *name, bool defval)
+{
+    return qemu_opt_get_bool_helper(opts, name, defval, false);
+}
+
+bool qemu_opt_get_bool_del(QemuOpts *opts, const char *name, bool defval)
+{
+    return qemu_opt_get_bool_helper(opts, name, defval, true);
+}
+
+static uint64_t qemu_opt_get_number_helper(QemuOpts *opts, const char *name,
+                                           uint64_t defval, bool del)
+{
+    QemuOpt *opt;
+    uint64_t ret = defval;
+
+    if (opts == NULL) {
+        return ret;
+    }
+
+    opt = qemu_opt_find(opts, name);
+    if (opt == NULL) {
+        const QemuOptDesc *desc = find_desc_by_name(opts->list->desc, name);
+        if (desc && desc->def_value_str) {
+            parse_option_number(name, desc->def_value_str, &ret, &error_abort);
+        }
+        return ret;
+    }
+    assert(opt->desc && opt->desc->type == QEMU_OPT_NUMBER);
+    ret = opt->value.uint;
+    if (del) {
+        qemu_opt_del_all(opts, name);
+    }
+    return ret;
+}
+
+uint64_t qemu_opt_get_number(QemuOpts *opts, const char *name, uint64_t defval)
+{
+    return qemu_opt_get_number_helper(opts, name, defval, false);
+}
+
+uint64_t qemu_opt_get_number_del(QemuOpts *opts, const char *name,
+                                 uint64_t defval)
+{
+    return qemu_opt_get_number_helper(opts, name, defval, true);
+}
+
+static uint64_t qemu_opt_get_size_helper(QemuOpts *opts, const char *name,
+                                         uint64_t defval, bool del)
+{
+    QemuOpt *opt;
+    uint64_t ret = defval;
+
+    if (opts == NULL) {
+        return ret;
+    }
+
+    opt = qemu_opt_find(opts, name);
+    if (opt == NULL) {
+        const QemuOptDesc *desc = find_desc_by_name(opts->list->desc, name);
+        if (desc && desc->def_value_str) {
+            parse_option_size(name, desc->def_value_str, &ret, &error_abort);
+        }
+        return ret;
+    }
+    assert(opt->desc && opt->desc->type == QEMU_OPT_SIZE);
+    ret = opt->value.uint;
+    if (del) {
+        qemu_opt_del_all(opts, name);
+    }
+    return ret;
+}
+
+uint64_t qemu_opt_get_size(QemuOpts *opts, const char *name, uint64_t defval)
+{
+    return qemu_opt_get_size_helper(opts, name, defval, false);
+}
+
+uint64_t qemu_opt_get_size_del(QemuOpts *opts, const char *name,
+                               uint64_t defval)
+{
+    return qemu_opt_get_size_helper(opts, name, defval, true);
+}
+
+static void qemu_opt_parse(QemuOpt *opt, Error **errp)
+{
+    if (opt->desc == NULL)
+        return;
+
+    switch (opt->desc->type) {
+    case QEMU_OPT_STRING:
+        /* nothing */
+        return;
+    case QEMU_OPT_BOOL:
+        parse_option_bool(opt->name, opt->str, &opt->value.boolean, errp);
+        break;
+    case QEMU_OPT_NUMBER:
+        parse_option_number(opt->name, opt->str, &opt->value.uint, errp);
+        break;
+    case QEMU_OPT_SIZE:
+        parse_option_size(opt->name, opt->str, &opt->value.uint, errp);
+        break;
+    default:
+        abort();
+    }
+}
+
+static bool opts_accepts_any(const QemuOpts *opts)
+{
+    return opts->list->desc[0].name == NULL;
+}
+
+int qemu_opt_unset(QemuOpts *opts, const char *name)
+{
+    QemuOpt *opt = qemu_opt_find(opts, name);
+
+    assert(opts_accepts_any(opts));
+
+    if (opt == NULL) {
+        return -1;
+    } else {
+        qemu_opt_del(opt);
+        return 0;
+    }
+}
+
+static void opt_set(QemuOpts *opts, const char *name, const char *value,
+                    bool prepend, Error **errp)
+{
+    QemuOpt *opt;
+    const QemuOptDesc *desc;
+    Error *local_err = NULL;
+
+    desc = find_desc_by_name(opts->list->desc, name);
+    if (!desc && !opts_accepts_any(opts)) {
+        error_setg(errp, QERR_INVALID_PARAMETER, name);
+        return;
+    }
+
+    opt = g_malloc0(sizeof(*opt));
+    opt->name = g_strdup(name);
+    opt->opts = opts;
+    if (prepend) {
+        QTAILQ_INSERT_HEAD(&opts->head, opt, next);
+    } else {
+        QTAILQ_INSERT_TAIL(&opts->head, opt, next);
+    }
+    opt->desc = desc;
+    opt->str = g_strdup(value);
+    qemu_opt_parse(opt, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        qemu_opt_del(opt);
+    }
+}
+
+void qemu_opt_set(QemuOpts *opts, const char *name, const char *value,
+                  Error **errp)
+{
+    opt_set(opts, name, value, false, errp);
+}
+
+void qemu_opt_set_bool(QemuOpts *opts, const char *name, bool val,
+                       Error **errp)
+{
+    QemuOpt *opt;
+    const QemuOptDesc *desc = opts->list->desc;
+
+    opt = g_malloc0(sizeof(*opt));
+    opt->desc = find_desc_by_name(desc, name);
+    if (!opt->desc && !opts_accepts_any(opts)) {
+        error_setg(errp, QERR_INVALID_PARAMETER, name);
+        g_free(opt);
+        return;
+    }
+
+    opt->name = g_strdup(name);
+    opt->opts = opts;
+    opt->value.boolean = !!val;
+    opt->str = g_strdup(val ? "on" : "off");
+    QTAILQ_INSERT_TAIL(&opts->head, opt, next);
+}
+
+void qemu_opt_set_number(QemuOpts *opts, const char *name, int64_t val,
+                         Error **errp)
+{
+    QemuOpt *opt;
+    const QemuOptDesc *desc = opts->list->desc;
+
+    opt = g_malloc0(sizeof(*opt));
+    opt->desc = find_desc_by_name(desc, name);
+    if (!opt->desc && !opts_accepts_any(opts)) {
+        error_setg(errp, QERR_INVALID_PARAMETER, name);
+        g_free(opt);
+        return;
+    }
+
+    opt->name = g_strdup(name);
+    opt->opts = opts;
+    opt->value.uint = val;
+    opt->str = g_strdup_printf("%" PRId64, val);
+    QTAILQ_INSERT_TAIL(&opts->head, opt, next);
+}
+
+/**
+ * For each member of @opts, call @func(@opaque, name, value, @errp).
+ * @func() may store an Error through @errp, but must return non-zero then.
+ * When @func() returns non-zero, break the loop and return that value.
+ * Return zero when the loop completes.
+ */
+int qemu_opt_foreach(QemuOpts *opts, qemu_opt_loopfunc func, void *opaque,
+                     Error **errp)
+{
+    QemuOpt *opt;
+    int rc;
+
+    QTAILQ_FOREACH(opt, &opts->head, next) {
+        rc = func(opaque, opt->name, opt->str, errp);
+        if (rc) {
+            return rc;
+        }
+        assert(!errp || !*errp);
+    }
+    return 0;
+}
+
+QemuOpts *qemu_opts_find(QemuOptsList *list, const char *id)
+{
+    QemuOpts *opts;
+
+    QTAILQ_FOREACH(opts, &list->head, next) {
+        if (!opts->id && !id) {
+            return opts;
+        }
+        if (opts->id && id && !strcmp(opts->id, id)) {
+            return opts;
+        }
+    }
+    return NULL;
+}
+
+QemuOpts *qemu_opts_create(QemuOptsList *list, const char *id,
+                           int fail_if_exists, Error **errp)
+{
+    QemuOpts *opts = NULL;
+
+    if (id) {
+        if (!id_wellformed(id)) {
+            error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "id",
+                       "an identifier");
+            error_append_hint(errp, "Identifiers consist of letters, digits, "
+                              "'-', '.', '_', starting with a letter.");
+            return NULL;
+        }
+        opts = qemu_opts_find(list, id);
+        if (opts != NULL) {
+            if (fail_if_exists && !list->merge_lists) {
+                error_setg(errp, "Duplicate ID '%s' for %s", id, list->name);
+                return NULL;
+            } else {
+                return opts;
+            }
+        }
+    } else if (list->merge_lists) {
+        opts = qemu_opts_find(list, NULL);
+        if (opts) {
+            return opts;
+        }
+    }
+    opts = g_malloc0(sizeof(*opts));
+    opts->id = g_strdup(id);
+    opts->list = list;
+    loc_save(&opts->loc);
+    QTAILQ_INIT(&opts->head);
+    QTAILQ_INSERT_TAIL(&list->head, opts, next);
+    return opts;
+}
+
+void qemu_opts_reset(QemuOptsList *list)
+{
+    QemuOpts *opts, *next_opts;
+
+    QTAILQ_FOREACH_SAFE(opts, &list->head, next, next_opts) {
+        qemu_opts_del(opts);
+    }
+}
+
+void qemu_opts_loc_restore(QemuOpts *opts)
+{
+    loc_restore(&opts->loc);
+}
+
+void qemu_opts_set(QemuOptsList *list, const char *id,
+                   const char *name, const char *value, Error **errp)
+{
+    QemuOpts *opts;
+    Error *local_err = NULL;
+
+    opts = qemu_opts_create(list, id, 1, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+    qemu_opt_set(opts, name, value, errp);
+}
+
+const char *qemu_opts_id(QemuOpts *opts)
+{
+    return opts->id;
+}
+
+/* The id string will be g_free()d by qemu_opts_del */
+void qemu_opts_set_id(QemuOpts *opts, char *id)
+{
+    opts->id = id;
+}
+
+void qemu_opts_del(QemuOpts *opts)
+{
+    QemuOpt *opt;
+
+    if (opts == NULL) {
+        return;
+    }
+
+    for (;;) {
+        opt = QTAILQ_FIRST(&opts->head);
+        if (opt == NULL)
+            break;
+        qemu_opt_del(opt);
+    }
+    QTAILQ_REMOVE(&opts->list->head, opts, next);
+    g_free(opts->id);
+    g_free(opts);
+}
+
+/* print value, escaping any commas in value */
+static void escaped_print(const char *value)
+{
+    const char *ptr;
+
+    for (ptr = value; *ptr; ++ptr) {
+        if (*ptr == ',') {
+            putchar(',');
+        }
+        putchar(*ptr);
+    }
+}
+
+void qemu_opts_print(QemuOpts *opts, const char *separator)
+{
+    QemuOpt *opt;
+    QemuOptDesc *desc = opts->list->desc;
+    const char *sep = "";
+
+    if (opts->id) {
+        printf("id=%s", opts->id); /* passed id_wellformed -> no commas */
+        sep = separator;
+    }
+
+    if (desc[0].name == NULL) {
+        QTAILQ_FOREACH(opt, &opts->head, next) {
+            printf("%s%s=", sep, opt->name);
+            escaped_print(opt->str);
+            sep = separator;
+        }
+        return;
+    }
+    for (; desc && desc->name; desc++) {
+        const char *value;
+        QemuOpt *opt = qemu_opt_find(opts, desc->name);
+
+        value = opt ? opt->str : desc->def_value_str;
+        if (!value) {
+            continue;
+        }
+        if (desc->type == QEMU_OPT_STRING) {
+            printf("%s%s=", sep, desc->name);
+            escaped_print(value);
+        } else if ((desc->type == QEMU_OPT_SIZE ||
+                    desc->type == QEMU_OPT_NUMBER) && opt) {
+            printf("%s%s=%" PRId64, sep, desc->name, opt->value.uint);
+        } else {
+            printf("%s%s=%s", sep, desc->name, value);
+        }
+        sep = separator;
+    }
+}
+
+static void opts_do_parse(QemuOpts *opts, const char *params,
+                          const char *firstname, bool prepend, Error **errp)
+{
+    char option[128], value[1024];
+    const char *p,*pe,*pc;
+    Error *local_err = NULL;
+
+    for (p = params; *p != '\0'; p++) {
+        pe = strchr(p, '=');
+        pc = strchr(p, ',');
+        if (!pe || (pc && pc < pe)) {
+            /* found "foo,more" */
+            if (p == params && firstname) {
+                /* implicitly named first option */
+                pstrcpy(option, sizeof(option), firstname);
+                p = get_opt_value(value, sizeof(value), p);
+            } else {
+                /* option without value, probably a flag */
+                p = get_opt_name(option, sizeof(option), p, ',');
+                if (strncmp(option, "no", 2) == 0) {
+                    memmove(option, option+2, strlen(option+2)+1);
+                    pstrcpy(value, sizeof(value), "off");
+                } else {
+                    pstrcpy(value, sizeof(value), "on");
+                }
+            }
+        } else {
+            /* found "foo=bar,more" */
+            p = get_opt_name(option, sizeof(option), p, '=');
+            if (*p != '=') {
+                break;
+            }
+            p++;
+            p = get_opt_value(value, sizeof(value), p);
+        }
+        if (strcmp(option, "id") != 0) {
+            /* store and parse */
+            opt_set(opts, option, value, prepend, &local_err);
+            if (local_err) {
+                error_propagate(errp, local_err);
+                return;
+            }
+        }
+        if (*p != ',') {
+            break;
+        }
+    }
+}
+
+/**
+ * Store options parsed from @params into @opts.
+ * If @firstname is non-null, the first key=value in @params may omit
+ * key=, and is treated as if key was @firstname.
+ * On error, store an error object through @errp if non-null.
+ */
+void qemu_opts_do_parse(QemuOpts *opts, const char *params,
+                       const char *firstname, Error **errp)
+{
+    opts_do_parse(opts, params, firstname, false, errp);
+}
+
+static QemuOpts *opts_parse(QemuOptsList *list, const char *params,
+                            bool permit_abbrev, bool defaults, Error **errp)
+{
+    const char *firstname;
+    char value[1024], *id = NULL;
+    const char *p;
+    QemuOpts *opts;
+    Error *local_err = NULL;
+
+    assert(!permit_abbrev || list->implied_opt_name);
+    firstname = permit_abbrev ? list->implied_opt_name : NULL;
+
+    if (strncmp(params, "id=", 3) == 0) {
+        get_opt_value(value, sizeof(value), params+3);
+        id = value;
+    } else if ((p = strstr(params, ",id=")) != NULL) {
+        get_opt_value(value, sizeof(value), p+4);
+        id = value;
+    }
+
+    /*
+     * This code doesn't work for defaults && !list->merge_lists: when
+     * params has no id=, and list has an element with !opts->id, it
+     * appends a new element instead of returning the existing opts.
+     * However, we got no use for this case.  Guard against possible
+     * (if unlikely) future misuse:
+     */
+    assert(!defaults || list->merge_lists);
+    opts = qemu_opts_create(list, id, !defaults, &local_err);
+    if (opts == NULL) {
+        error_propagate(errp, local_err);
+        return NULL;
+    }
+
+    opts_do_parse(opts, params, firstname, defaults, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        qemu_opts_del(opts);
+        return NULL;
+    }
+
+    return opts;
+}
+
+/**
+ * Create a QemuOpts in @list and with options parsed from @params.
+ * If @permit_abbrev, the first key=value in @params may omit key=,
+ * and is treated as if key was @list->implied_opt_name.
+ * On error, store an error object through @errp if non-null.
+ * Return the new QemuOpts on success, null pointer on error.
+ */
+QemuOpts *qemu_opts_parse(QemuOptsList *list, const char *params,
+                          bool permit_abbrev, Error **errp)
+{
+    return opts_parse(list, params, permit_abbrev, false, errp);
+}
+
+/**
+ * Create a QemuOpts in @list and with options parsed from @params.
+ * If @permit_abbrev, the first key=value in @params may omit key=,
+ * and is treated as if key was @list->implied_opt_name.
+ * Report errors with error_report_err().  This is inappropriate in
+ * QMP context.  Do not use this function there!
+ * Return the new QemuOpts on success, null pointer on error.
+ */
+QemuOpts *qemu_opts_parse_noisily(QemuOptsList *list, const char *params,
+                                  bool permit_abbrev)
+{
+    Error *err = NULL;
+    QemuOpts *opts;
+
+    opts = opts_parse(list, params, permit_abbrev, false, &err);
+    if (err) {
+        error_report_err(err);
+    }
+    return opts;
+}
+
+void qemu_opts_set_defaults(QemuOptsList *list, const char *params,
+                            int permit_abbrev)
+{
+    QemuOpts *opts;
+
+    opts = opts_parse(list, params, permit_abbrev, true, NULL);
+    assert(opts);
+}
+
+typedef struct OptsFromQDictState {
+    QemuOpts *opts;
+    Error **errp;
+} OptsFromQDictState;
+
+static void qemu_opts_from_qdict_1(const char *key, QObject *obj, void *opaque)
+{
+    OptsFromQDictState *state = opaque;
+    char buf[32];
+    const char *value;
+    int n;
+
+    if (!strcmp(key, "id") || *state->errp) {
+        return;
+    }
+
+    switch (qobject_type(obj)) {
+    case QTYPE_QSTRING:
+        value = qstring_get_str(qobject_to_qstring(obj));
+        break;
+    case QTYPE_QINT:
+        n = snprintf(buf, sizeof(buf), "%" PRId64,
+                     qint_get_int(qobject_to_qint(obj)));
+        assert(n < sizeof(buf));
+        value = buf;
+        break;
+    case QTYPE_QFLOAT:
+        n = snprintf(buf, sizeof(buf), "%.17g",
+                     qfloat_get_double(qobject_to_qfloat(obj)));
+        assert(n < sizeof(buf));
+        value = buf;
+        break;
+    case QTYPE_QBOOL:
+        pstrcpy(buf, sizeof(buf),
+                qbool_get_bool(qobject_to_qbool(obj)) ? "on" : "off");
+        value = buf;
+        break;
+    default:
+        return;
+    }
+
+    qemu_opt_set(state->opts, key, value, state->errp);
+}
+
+/*
+ * Create QemuOpts from a QDict.
+ * Use value of key "id" as ID if it exists and is a QString.
+ * Only QStrings, QInts, QFloats and QBools are copied.  Entries with
+ * other types are silently ignored.
+ */
+QemuOpts *qemu_opts_from_qdict(QemuOptsList *list, const QDict *qdict,
+                               Error **errp)
+{
+    OptsFromQDictState state;
+    Error *local_err = NULL;
+    QemuOpts *opts;
+
+    opts = qemu_opts_create(list, qdict_get_try_str(qdict, "id"), 1,
+                            &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return NULL;
+    }
+
+    assert(opts != NULL);
+
+    state.errp = &local_err;
+    state.opts = opts;
+    qdict_iter(qdict, qemu_opts_from_qdict_1, &state);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        qemu_opts_del(opts);
+        return NULL;
+    }
+
+    return opts;
+}
+
+/*
+ * Adds all QDict entries to the QemuOpts that can be added and removes them
+ * from the QDict. When this function returns, the QDict contains only those
+ * entries that couldn't be added to the QemuOpts.
+ */
+void qemu_opts_absorb_qdict(QemuOpts *opts, QDict *qdict, Error **errp)
+{
+    const QDictEntry *entry, *next;
+
+    entry = qdict_first(qdict);
+
+    while (entry != NULL) {
+        Error *local_err = NULL;
+        OptsFromQDictState state = {
+            .errp = &local_err,
+            .opts = opts,
+        };
+
+        next = qdict_next(qdict, entry);
+
+        if (find_desc_by_name(opts->list->desc, entry->key)) {
+            qemu_opts_from_qdict_1(entry->key, entry->value, &state);
+            if (local_err) {
+                error_propagate(errp, local_err);
+                return;
+            } else {
+                qdict_del(qdict, entry->key);
+            }
+        }
+
+        entry = next;
+    }
+}
+
+/*
+ * Convert from QemuOpts to QDict.
+ * The QDict values are of type QString.
+ * TODO We'll want to use types appropriate for opt->desc->type, but
+ * this is enough for now.
+ */
+QDict *qemu_opts_to_qdict(QemuOpts *opts, QDict *qdict)
+{
+    QemuOpt *opt;
+    QObject *val;
+
+    if (!qdict) {
+        qdict = qdict_new();
+    }
+    if (opts->id) {
+        qdict_put(qdict, "id", qstring_from_str(opts->id));
+    }
+    QTAILQ_FOREACH(opt, &opts->head, next) {
+        val = QOBJECT(qstring_from_str(opt->str));
+        qdict_put_obj(qdict, opt->name, val);
+    }
+    return qdict;
+}
+
+/* Validate parsed opts against descriptions where no
+ * descriptions were provided in the QemuOptsList.
+ */
+void qemu_opts_validate(QemuOpts *opts, const QemuOptDesc *desc, Error **errp)
+{
+    QemuOpt *opt;
+    Error *local_err = NULL;
+
+    assert(opts_accepts_any(opts));
+
+    QTAILQ_FOREACH(opt, &opts->head, next) {
+        opt->desc = find_desc_by_name(desc, opt->name);
+        if (!opt->desc) {
+            error_setg(errp, QERR_INVALID_PARAMETER, opt->name);
+            return;
+        }
+
+        qemu_opt_parse(opt, &local_err);
+        if (local_err) {
+            error_propagate(errp, local_err);
+            return;
+        }
+    }
+}
+
+/**
+ * For each member of @list, call @func(@opaque, member, @errp).
+ * Call it with the current location temporarily set to the member's.
+ * @func() may store an Error through @errp, but must return non-zero then.
+ * When @func() returns non-zero, break the loop and return that value.
+ * Return zero when the loop completes.
+ */
+int qemu_opts_foreach(QemuOptsList *list, qemu_opts_loopfunc func,
+                      void *opaque, Error **errp)
+{
+    Location loc;
+    QemuOpts *opts;
+    int rc;
+
+    loc_push_none(&loc);
+    QTAILQ_FOREACH(opts, &list->head, next) {
+        loc_restore(&opts->loc);
+        rc = func(opaque, opts, errp);
+        if (rc) {
+            return rc;
+        }
+        assert(!errp || !*errp);
+    }
+    loc_pop(&loc);
+    return 0;
+}
+
+static size_t count_opts_list(QemuOptsList *list)
+{
+    QemuOptDesc *desc = NULL;
+    size_t num_opts = 0;
+
+    if (!list) {
+        return 0;
+    }
+
+    desc = list->desc;
+    while (desc && desc->name) {
+        num_opts++;
+        desc++;
+    }
+
+    return num_opts;
+}
+
+void qemu_opts_free(QemuOptsList *list)
+{
+    g_free(list);
+}
+
+/* Realloc dst option list and append options from an option list (list)
+ * to it. dst could be NULL or a malloced list.
+ * The lifetime of dst must be shorter than the input list because the
+ * QemuOptDesc->name, ->help, and ->def_value_str strings are shared.
+ */
+QemuOptsList *qemu_opts_append(QemuOptsList *dst,
+                               QemuOptsList *list)
+{
+    size_t num_opts, num_dst_opts;
+    QemuOptDesc *desc;
+    bool need_init = false;
+    bool need_head_update;
+
+    if (!list) {
+        return dst;
+    }
+
+    /* If dst is NULL, after realloc, some area of dst should be initialized
+     * before adding options to it.
+     */
+    if (!dst) {
+        need_init = true;
+        need_head_update = true;
+    } else {
+        /* Moreover, even if dst is not NULL, the realloc may move it to a
+         * different address in which case we may get a stale tail pointer
+         * in dst->head. */
+        need_head_update = QTAILQ_EMPTY(&dst->head);
+    }
+
+    num_opts = count_opts_list(dst);
+    num_dst_opts = num_opts;
+    num_opts += count_opts_list(list);
+    dst = g_realloc(dst, sizeof(QemuOptsList) +
+                    (num_opts + 1) * sizeof(QemuOptDesc));
+    if (need_init) {
+        dst->name = NULL;
+        dst->implied_opt_name = NULL;
+        dst->merge_lists = false;
+    }
+    if (need_head_update) {
+        QTAILQ_INIT(&dst->head);
+    }
+    dst->desc[num_dst_opts].name = NULL;
+
+    /* append list->desc to dst->desc */
+    if (list) {
+        desc = list->desc;
+        while (desc && desc->name) {
+            if (find_desc_by_name(dst->desc, desc->name) == NULL) {
+                dst->desc[num_dst_opts++] = *desc;
+                dst->desc[num_dst_opts].name = NULL;
+            }
+            desc++;
+        }
+    }
+
+    return dst;
+}
diff --git a/src/util/qemu-progress.c b/src/util/qemu-progress.c
new file mode 100644
index 0000000..4ee5cd0
--- /dev/null
+++ b/src/util/qemu-progress.c
@@ -0,0 +1,159 @@
+/*
+ * QEMU progress printing utility functions
+ *
+ * Copyright (C) 2011 Jes Sorensen <Jes.Sorensen@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+#include "qemu/osdep.h"
+#include <stdio.h>
+
+struct progress_state {
+    float current;
+    float last_print;
+    float min_skip;
+    void (*print)(void);
+    void (*end)(void);
+};
+
+static struct progress_state state;
+static volatile sig_atomic_t print_pending;
+
+/*
+ * Simple progress print function.
+ * @percent relative percent of current operation
+ * @max percent of total operation
+ */
+static void progress_simple_print(void)
+{
+    printf("    (%3.2f/100%%)\r", state.current);
+    fflush(stdout);
+}
+
+static void progress_simple_end(void)
+{
+    printf("\n");
+}
+
+static void progress_simple_init(void)
+{
+    state.print = progress_simple_print;
+    state.end = progress_simple_end;
+}
+
+#ifdef CONFIG_POSIX
+static void sigusr_print(int signal)
+{
+    print_pending = 1;
+}
+#endif
+
+static void progress_dummy_print(void)
+{
+    if (print_pending) {
+        fprintf(stderr, "    (%3.2f/100%%)\n", state.current);
+        print_pending = 0;
+    }
+}
+
+static void progress_dummy_end(void)
+{
+}
+
+static void progress_dummy_init(void)
+{
+#ifdef CONFIG_POSIX
+    struct sigaction action;
+    sigset_t set;
+
+    memset(&action, 0, sizeof(action));
+    sigfillset(&action.sa_mask);
+    action.sa_handler = sigusr_print;
+    action.sa_flags = 0;
+    sigaction(SIGUSR1, &action, NULL);
+
+    /*
+     * SIGUSR1 is SIG_IPI and gets blocked in qemu_init_main_loop(). In the
+     * tools that use the progress report SIGUSR1 isn't used in this meaning
+     * and instead should print the progress, so reenable it.
+     */
+    sigemptyset(&set);
+    sigaddset(&set, SIGUSR1);
+    pthread_sigmask(SIG_UNBLOCK, &set, NULL);
+#endif
+
+    state.print = progress_dummy_print;
+    state.end = progress_dummy_end;
+}
+
+/*
+ * Initialize progress reporting.
+ * If @enabled is false, actual reporting is suppressed.  The user can
+ * still trigger a report by sending a SIGUSR1.
+ * Reports are also suppressed unless we've had at least @min_skip
+ * percent progress since the last report.
+ */
+void qemu_progress_init(int enabled, float min_skip)
+{
+    state.min_skip = min_skip;
+    if (enabled) {
+        progress_simple_init();
+    } else {
+        progress_dummy_init();
+    }
+}
+
+void qemu_progress_end(void)
+{
+    state.end();
+}
+
+/*
+ * Report progress.
+ * @delta is how much progress we made.
+ * If @max is zero, @delta is an absolut value of the total job done.
+ * Else, @delta is a progress delta since the last call, as a fraction
+ * of @max.  I.e. the delta is @delta * @max / 100. This allows
+ * relative accounting of functions which may be a different fraction of
+ * the full job, depending on the context they are called in. I.e.
+ * a function might be considered 40% of the full job if used from
+ * bdrv_img_create() but only 20% if called from img_convert().
+ */
+void qemu_progress_print(float delta, int max)
+{
+    float current;
+
+    if (max == 0) {
+        current = delta;
+    } else {
+        current = state.current + delta / 100 * max;
+    }
+    if (current > 100) {
+        current = 100;
+    }
+    state.current = current;
+
+    if (current > (state.last_print + state.min_skip) ||
+        (current == 100) || (current == 0)) {
+        state.last_print = state.current;
+        state.print();
+    }
+}
diff --git a/src/util/qemu-sockets.c b/src/util/qemu-sockets.c
new file mode 100644
index 0000000..5a31d16
--- /dev/null
+++ b/src/util/qemu-sockets.c
@@ -0,0 +1,1167 @@
+/*
+ *  inet and unix socket functions for qemu
+ *
+ *  (c) 2008 Gerd Hoffmann <kraxel@redhat.com>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; under version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ * Contributions after 2012-01-13 are licensed under the terms of the
+ * GNU GPL, version 2 or (at your option) any later version.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include "monitor/monitor.h"
+#include "qemu/sockets.h"
+#include "qemu/main-loop.h"
+#include "qapi/qmp-input-visitor.h"
+#include "qapi/qmp-output-visitor.h"
+#include "qapi-visit.h"
+
+#ifndef AI_ADDRCONFIG
+# define AI_ADDRCONFIG 0
+#endif
+#ifndef AI_V4MAPPED
+# define AI_V4MAPPED 0
+#endif
+
+/* used temporarily until all users are converted to QemuOpts */
+QemuOptsList socket_optslist = {
+    .name = "socket",
+    .head = QTAILQ_HEAD_INITIALIZER(socket_optslist.head),
+    .desc = {
+        {
+            .name = "path",
+            .type = QEMU_OPT_STRING,
+        },{
+            .name = "host",
+            .type = QEMU_OPT_STRING,
+        },{
+            .name = "port",
+            .type = QEMU_OPT_STRING,
+        },{
+            .name = "localaddr",
+            .type = QEMU_OPT_STRING,
+        },{
+            .name = "localport",
+            .type = QEMU_OPT_STRING,
+        },{
+            .name = "to",
+            .type = QEMU_OPT_NUMBER,
+        },{
+            .name = "ipv4",
+            .type = QEMU_OPT_BOOL,
+        },{
+            .name = "ipv6",
+            .type = QEMU_OPT_BOOL,
+        },
+        { /* end if list */ }
+    },
+};
+
+static int inet_getport(struct addrinfo *e)
+{
+    struct sockaddr_in *i4;
+    struct sockaddr_in6 *i6;
+
+    switch (e->ai_family) {
+    case PF_INET6:
+        i6 = (void*)e->ai_addr;
+        return ntohs(i6->sin6_port);
+    case PF_INET:
+        i4 = (void*)e->ai_addr;
+        return ntohs(i4->sin_port);
+    default:
+        return 0;
+    }
+}
+
+static void inet_setport(struct addrinfo *e, int port)
+{
+    struct sockaddr_in *i4;
+    struct sockaddr_in6 *i6;
+
+    switch (e->ai_family) {
+    case PF_INET6:
+        i6 = (void*)e->ai_addr;
+        i6->sin6_port = htons(port);
+        break;
+    case PF_INET:
+        i4 = (void*)e->ai_addr;
+        i4->sin_port = htons(port);
+        break;
+    }
+}
+
+NetworkAddressFamily inet_netfamily(int family)
+{
+    switch (family) {
+    case PF_INET6: return NETWORK_ADDRESS_FAMILY_IPV6;
+    case PF_INET:  return NETWORK_ADDRESS_FAMILY_IPV4;
+    case PF_UNIX:  return NETWORK_ADDRESS_FAMILY_UNIX;
+    }
+    return NETWORK_ADDRESS_FAMILY_UNKNOWN;
+}
+
+int inet_listen_opts(QemuOpts *opts, int port_offset, Error **errp)
+{
+    struct addrinfo ai,*res,*e;
+    const char *addr;
+    char port[33];
+    char uaddr[INET6_ADDRSTRLEN+1];
+    char uport[33];
+    int slisten, rc, to, port_min, port_max, p;
+
+    memset(&ai,0, sizeof(ai));
+    ai.ai_flags = AI_PASSIVE;
+    ai.ai_family = PF_UNSPEC;
+    ai.ai_socktype = SOCK_STREAM;
+
+    if ((qemu_opt_get(opts, "host") == NULL)) {
+        error_setg(errp, "host not specified");
+        return -1;
+    }
+    if (qemu_opt_get(opts, "port") != NULL) {
+        pstrcpy(port, sizeof(port), qemu_opt_get(opts, "port"));
+    } else {
+        port[0] = '\0';
+    }
+    addr = qemu_opt_get(opts, "host");
+
+    to = qemu_opt_get_number(opts, "to", 0);
+    if (qemu_opt_get_bool(opts, "ipv4", 0))
+        ai.ai_family = PF_INET;
+    if (qemu_opt_get_bool(opts, "ipv6", 0))
+        ai.ai_family = PF_INET6;
+
+    /* lookup */
+    if (port_offset) {
+        unsigned long long baseport;
+        if (strlen(port) == 0) {
+            error_setg(errp, "port not specified");
+            return -1;
+        }
+        if (parse_uint_full(port, &baseport, 10) < 0) {
+            error_setg(errp, "can't convert to a number: %s", port);
+            return -1;
+        }
+        if (baseport > 65535 ||
+            baseport + port_offset > 65535) {
+            error_setg(errp, "port %s out of range", port);
+            return -1;
+        }
+        snprintf(port, sizeof(port), "%d", (int)baseport + port_offset);
+    }
+    rc = getaddrinfo(strlen(addr) ? addr : NULL,
+                     strlen(port) ? port : NULL, &ai, &res);
+    if (rc != 0) {
+        error_setg(errp, "address resolution failed for %s:%s: %s", addr, port,
+                   gai_strerror(rc));
+        return -1;
+    }
+
+    /* create socket + bind */
+    for (e = res; e != NULL; e = e->ai_next) {
+        getnameinfo((struct sockaddr*)e->ai_addr,e->ai_addrlen,
+		        uaddr,INET6_ADDRSTRLEN,uport,32,
+		        NI_NUMERICHOST | NI_NUMERICSERV);
+        slisten = qemu_socket(e->ai_family, e->ai_socktype, e->ai_protocol);
+        if (slisten < 0) {
+            if (!e->ai_next) {
+                error_setg_errno(errp, errno, "Failed to create socket");
+            }
+            continue;
+        }
+
+        socket_set_fast_reuse(slisten);
+#ifdef IPV6_V6ONLY
+        if (e->ai_family == PF_INET6) {
+            /* listen on both ipv4 and ipv6 */
+            const int off = 0;
+            qemu_setsockopt(slisten, IPPROTO_IPV6, IPV6_V6ONLY, &off,
+                            sizeof(off));
+        }
+#endif
+
+        port_min = inet_getport(e);
+        port_max = to ? to + port_offset : port_min;
+        for (p = port_min; p <= port_max; p++) {
+            inet_setport(e, p);
+            if (bind(slisten, e->ai_addr, e->ai_addrlen) == 0) {
+                goto listen;
+            }
+            if (p == port_max) {
+                if (!e->ai_next) {
+                    error_setg_errno(errp, errno, "Failed to bind socket");
+                }
+            }
+        }
+        closesocket(slisten);
+    }
+    freeaddrinfo(res);
+    return -1;
+
+listen:
+    if (listen(slisten,1) != 0) {
+        error_setg_errno(errp, errno, "Failed to listen on socket");
+        closesocket(slisten);
+        freeaddrinfo(res);
+        return -1;
+    }
+    qemu_opt_set(opts, "host", uaddr, &error_abort);
+    qemu_opt_set_number(opts, "port", inet_getport(e) - port_offset,
+                        &error_abort);
+    qemu_opt_set_bool(opts, "ipv6", e->ai_family == PF_INET6,
+                      &error_abort);
+    qemu_opt_set_bool(opts, "ipv4", e->ai_family != PF_INET6,
+                      &error_abort);
+    freeaddrinfo(res);
+    return slisten;
+}
+
+#ifdef _WIN32
+#define QEMU_SOCKET_RC_INPROGRESS(rc) \
+    ((rc) == -EINPROGRESS || (rc) == -EWOULDBLOCK || (rc) == -WSAEALREADY)
+#else
+#define QEMU_SOCKET_RC_INPROGRESS(rc) \
+    ((rc) == -EINPROGRESS)
+#endif
+
+/* Struct to store connect state for non blocking connect */
+typedef struct ConnectState {
+    int fd;
+    struct addrinfo *addr_list;
+    struct addrinfo *current_addr;
+    NonBlockingConnectHandler *callback;
+    void *opaque;
+} ConnectState;
+
+static int inet_connect_addr(struct addrinfo *addr, bool *in_progress,
+                             ConnectState *connect_state, Error **errp);
+
+static void wait_for_connect(void *opaque)
+{
+    ConnectState *s = opaque;
+    int val = 0, rc = 0;
+    socklen_t valsize = sizeof(val);
+    bool in_progress;
+    Error *err = NULL;
+
+    qemu_set_fd_handler(s->fd, NULL, NULL, NULL);
+
+    do {
+        rc = qemu_getsockopt(s->fd, SOL_SOCKET, SO_ERROR, &val, &valsize);
+    } while (rc == -1 && socket_error() == EINTR);
+
+    /* update rc to contain error */
+    if (!rc && val) {
+        rc = -1;
+        errno = val;
+    }
+
+    /* connect error */
+    if (rc < 0) {
+        error_setg_errno(&err, errno, "Error connecting to socket");
+        closesocket(s->fd);
+        s->fd = rc;
+    }
+
+    /* try to connect to the next address on the list */
+    if (s->current_addr) {
+        while (s->current_addr->ai_next != NULL && s->fd < 0) {
+            s->current_addr = s->current_addr->ai_next;
+            s->fd = inet_connect_addr(s->current_addr, &in_progress, s, NULL);
+            if (s->fd < 0) {
+                error_free(err);
+                err = NULL;
+                error_setg_errno(&err, errno, "Unable to start socket connect");
+            }
+            /* connect in progress */
+            if (in_progress) {
+                goto out;
+            }
+        }
+
+        freeaddrinfo(s->addr_list);
+    }
+
+    if (s->callback) {
+        s->callback(s->fd, err, s->opaque);
+    }
+    g_free(s);
+out:
+    error_free(err);
+}
+
+static int inet_connect_addr(struct addrinfo *addr, bool *in_progress,
+                             ConnectState *connect_state, Error **errp)
+{
+    int sock, rc;
+
+    *in_progress = false;
+
+    sock = qemu_socket(addr->ai_family, addr->ai_socktype, addr->ai_protocol);
+    if (sock < 0) {
+        error_setg_errno(errp, errno, "Failed to create socket");
+        return -1;
+    }
+    socket_set_fast_reuse(sock);
+    if (connect_state != NULL) {
+        qemu_set_nonblock(sock);
+    }
+    /* connect to peer */
+    do {
+        rc = 0;
+        if (connect(sock, addr->ai_addr, addr->ai_addrlen) < 0) {
+            rc = -socket_error();
+        }
+    } while (rc == -EINTR);
+
+    if (connect_state != NULL && QEMU_SOCKET_RC_INPROGRESS(rc)) {
+        connect_state->fd = sock;
+        qemu_set_fd_handler(sock, NULL, wait_for_connect, connect_state);
+        *in_progress = true;
+    } else if (rc < 0) {
+        error_setg_errno(errp, errno, "Failed to connect socket");
+        closesocket(sock);
+        return -1;
+    }
+    return sock;
+}
+
+static struct addrinfo *inet_parse_connect_opts(QemuOpts *opts, Error **errp)
+{
+    struct addrinfo ai, *res;
+    int rc;
+    const char *addr;
+    const char *port;
+
+    memset(&ai, 0, sizeof(ai));
+
+    ai.ai_flags = AI_CANONNAME | AI_V4MAPPED | AI_ADDRCONFIG;
+    ai.ai_family = PF_UNSPEC;
+    ai.ai_socktype = SOCK_STREAM;
+
+    addr = qemu_opt_get(opts, "host");
+    port = qemu_opt_get(opts, "port");
+    if (addr == NULL || port == NULL) {
+        error_setg(errp, "host and/or port not specified");
+        return NULL;
+    }
+
+    if (qemu_opt_get_bool(opts, "ipv4", 0)) {
+        ai.ai_family = PF_INET;
+    }
+    if (qemu_opt_get_bool(opts, "ipv6", 0)) {
+        ai.ai_family = PF_INET6;
+    }
+
+    /* lookup */
+    rc = getaddrinfo(addr, port, &ai, &res);
+    if (rc != 0) {
+        error_setg(errp, "address resolution failed for %s:%s: %s", addr, port,
+                   gai_strerror(rc));
+        return NULL;
+    }
+    return res;
+}
+
+/**
+ * Create a socket and connect it to an address.
+ *
+ * @opts: QEMU options, recognized parameters strings "host" and "port",
+ *        bools "ipv4" and "ipv6".
+ * @errp: set on error
+ * @callback: callback function for non-blocking connect
+ * @opaque: opaque for callback function
+ *
+ * Returns: -1 on error, file descriptor on success.
+ *
+ * If @callback is non-null, the connect is non-blocking.  If this
+ * function succeeds, callback will be called when the connection
+ * completes, with the file descriptor on success, or -1 on error.
+ */
+int inet_connect_opts(QemuOpts *opts, Error **errp,
+                      NonBlockingConnectHandler *callback, void *opaque)
+{
+    Error *local_err = NULL;
+    struct addrinfo *res, *e;
+    int sock = -1;
+    bool in_progress;
+    ConnectState *connect_state = NULL;
+
+    res = inet_parse_connect_opts(opts, errp);
+    if (!res) {
+        return -1;
+    }
+
+    if (callback != NULL) {
+        connect_state = g_malloc0(sizeof(*connect_state));
+        connect_state->addr_list = res;
+        connect_state->callback = callback;
+        connect_state->opaque = opaque;
+    }
+
+    for (e = res; e != NULL; e = e->ai_next) {
+        error_free(local_err);
+        local_err = NULL;
+        if (connect_state != NULL) {
+            connect_state->current_addr = e;
+        }
+        sock = inet_connect_addr(e, &in_progress, connect_state, &local_err);
+        if (sock >= 0) {
+            break;
+        }
+    }
+
+    if (sock < 0) {
+        error_propagate(errp, local_err);
+    } else if (in_progress) {
+        /* wait_for_connect() will do the rest */
+        return sock;
+    } else {
+        if (callback) {
+            callback(sock, NULL, opaque);
+        }
+    }
+    g_free(connect_state);
+    freeaddrinfo(res);
+    return sock;
+}
+
+int inet_dgram_opts(QemuOpts *opts, Error **errp)
+{
+    struct addrinfo ai, *peer = NULL, *local = NULL;
+    const char *addr;
+    const char *port;
+    int sock = -1, rc;
+
+    /* lookup peer addr */
+    memset(&ai,0, sizeof(ai));
+    ai.ai_flags = AI_CANONNAME | AI_V4MAPPED | AI_ADDRCONFIG;
+    ai.ai_family = PF_UNSPEC;
+    ai.ai_socktype = SOCK_DGRAM;
+
+    addr = qemu_opt_get(opts, "host");
+    port = qemu_opt_get(opts, "port");
+    if (addr == NULL || strlen(addr) == 0) {
+        addr = "localhost";
+    }
+    if (port == NULL || strlen(port) == 0) {
+        error_setg(errp, "remote port not specified");
+        return -1;
+    }
+
+    if (qemu_opt_get_bool(opts, "ipv4", 0))
+        ai.ai_family = PF_INET;
+    if (qemu_opt_get_bool(opts, "ipv6", 0))
+        ai.ai_family = PF_INET6;
+
+    if (0 != (rc = getaddrinfo(addr, port, &ai, &peer))) {
+        error_setg(errp, "address resolution failed for %s:%s: %s", addr, port,
+                   gai_strerror(rc));
+	return -1;
+    }
+
+    /* lookup local addr */
+    memset(&ai,0, sizeof(ai));
+    ai.ai_flags = AI_PASSIVE;
+    ai.ai_family = peer->ai_family;
+    ai.ai_socktype = SOCK_DGRAM;
+
+    addr = qemu_opt_get(opts, "localaddr");
+    port = qemu_opt_get(opts, "localport");
+    if (addr == NULL || strlen(addr) == 0) {
+        addr = NULL;
+    }
+    if (!port || strlen(port) == 0)
+        port = "0";
+
+    if (0 != (rc = getaddrinfo(addr, port, &ai, &local))) {
+        error_setg(errp, "address resolution failed for %s:%s: %s", addr, port,
+                   gai_strerror(rc));
+        goto err;
+    }
+
+    /* create socket */
+    sock = qemu_socket(peer->ai_family, peer->ai_socktype, peer->ai_protocol);
+    if (sock < 0) {
+        error_setg_errno(errp, errno, "Failed to create socket");
+        goto err;
+    }
+    socket_set_fast_reuse(sock);
+
+    /* bind socket */
+    if (bind(sock, local->ai_addr, local->ai_addrlen) < 0) {
+        error_setg_errno(errp, errno, "Failed to bind socket");
+        goto err;
+    }
+
+    /* connect to peer */
+    if (connect(sock,peer->ai_addr,peer->ai_addrlen) < 0) {
+        error_setg_errno(errp, errno, "Failed to connect socket");
+        goto err;
+    }
+
+    freeaddrinfo(local);
+    freeaddrinfo(peer);
+    return sock;
+
+err:
+    if (-1 != sock)
+        closesocket(sock);
+    if (local)
+        freeaddrinfo(local);
+    if (peer)
+        freeaddrinfo(peer);
+    return -1;
+}
+
+/* compatibility wrapper */
+InetSocketAddress *inet_parse(const char *str, Error **errp)
+{
+    InetSocketAddress *addr;
+    const char *optstr, *h;
+    char host[65];
+    char port[33];
+    int to;
+    int pos;
+
+    addr = g_new0(InetSocketAddress, 1);
+
+    /* parse address */
+    if (str[0] == ':') {
+        /* no host given */
+        host[0] = '\0';
+        if (1 != sscanf(str, ":%32[^,]%n", port, &pos)) {
+            error_setg(errp, "error parsing port in address '%s'", str);
+            goto fail;
+        }
+    } else if (str[0] == '[') {
+        /* IPv6 addr */
+        if (2 != sscanf(str, "[%64[^]]]:%32[^,]%n", host, port, &pos)) {
+            error_setg(errp, "error parsing IPv6 address '%s'", str);
+            goto fail;
+        }
+        addr->ipv6 = addr->has_ipv6 = true;
+    } else {
+        /* hostname or IPv4 addr */
+        if (2 != sscanf(str, "%64[^:]:%32[^,]%n", host, port, &pos)) {
+            error_setg(errp, "error parsing address '%s'", str);
+            goto fail;
+        }
+        if (host[strspn(host, "0123456789.")] == '\0') {
+            addr->ipv4 = addr->has_ipv4 = true;
+        }
+    }
+
+    addr->host = g_strdup(host);
+    addr->port = g_strdup(port);
+
+    /* parse options */
+    optstr = str + pos;
+    h = strstr(optstr, ",to=");
+    if (h) {
+        h += 4;
+        if (sscanf(h, "%d%n", &to, &pos) != 1 ||
+            (h[pos] != '\0' && h[pos] != ',')) {
+            error_setg(errp, "error parsing to= argument");
+            goto fail;
+        }
+        addr->has_to = true;
+        addr->to = to;
+    }
+    if (strstr(optstr, ",ipv4")) {
+        addr->ipv4 = addr->has_ipv4 = true;
+    }
+    if (strstr(optstr, ",ipv6")) {
+        addr->ipv6 = addr->has_ipv6 = true;
+    }
+    return addr;
+
+fail:
+    qapi_free_InetSocketAddress(addr);
+    return NULL;
+}
+
+static void inet_addr_to_opts(QemuOpts *opts, const InetSocketAddress *addr)
+{
+    bool ipv4 = addr->has_ipv4 && addr->ipv4;
+    bool ipv6 = addr->has_ipv6 && addr->ipv6;
+
+    if (ipv4 || ipv6) {
+        qemu_opt_set_bool(opts, "ipv4", ipv4, &error_abort);
+        qemu_opt_set_bool(opts, "ipv6", ipv6, &error_abort);
+    } else if (addr->has_ipv4 || addr->has_ipv6) {
+        qemu_opt_set_bool(opts, "ipv4", !addr->has_ipv4, &error_abort);
+        qemu_opt_set_bool(opts, "ipv6", !addr->has_ipv6, &error_abort);
+    }
+    if (addr->has_to) {
+        qemu_opt_set_number(opts, "to", addr->to, &error_abort);
+    }
+    qemu_opt_set(opts, "host", addr->host, &error_abort);
+    qemu_opt_set(opts, "port", addr->port, &error_abort);
+}
+
+int inet_listen(const char *str, char *ostr, int olen,
+                int socktype, int port_offset, Error **errp)
+{
+    QemuOpts *opts;
+    char *optstr;
+    int sock = -1;
+    InetSocketAddress *addr;
+
+    addr = inet_parse(str, errp);
+    if (addr != NULL) {
+        opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort);
+        inet_addr_to_opts(opts, addr);
+        qapi_free_InetSocketAddress(addr);
+        sock = inet_listen_opts(opts, port_offset, errp);
+        if (sock != -1 && ostr) {
+            optstr = strchr(str, ',');
+            if (qemu_opt_get_bool(opts, "ipv6", 0)) {
+                snprintf(ostr, olen, "[%s]:%s%s",
+                         qemu_opt_get(opts, "host"),
+                         qemu_opt_get(opts, "port"),
+                         optstr ? optstr : "");
+            } else {
+                snprintf(ostr, olen, "%s:%s%s",
+                         qemu_opt_get(opts, "host"),
+                         qemu_opt_get(opts, "port"),
+                         optstr ? optstr : "");
+            }
+        }
+        qemu_opts_del(opts);
+    }
+    return sock;
+}
+
+/**
+ * Create a blocking socket and connect it to an address.
+ *
+ * @str: address string
+ * @errp: set in case of an error
+ *
+ * Returns -1 in case of error, file descriptor on success
+ **/
+int inet_connect(const char *str, Error **errp)
+{
+    QemuOpts *opts;
+    int sock = -1;
+    InetSocketAddress *addr;
+
+    addr = inet_parse(str, errp);
+    if (addr != NULL) {
+        opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort);
+        inet_addr_to_opts(opts, addr);
+        qapi_free_InetSocketAddress(addr);
+        sock = inet_connect_opts(opts, errp, NULL, NULL);
+        qemu_opts_del(opts);
+    }
+    return sock;
+}
+
+/**
+ * Create a non-blocking socket and connect it to an address.
+ * Calls the callback function with fd in case of success or -1 in case of
+ * error.
+ *
+ * @str: address string
+ * @callback: callback function that is called when connect completes,
+ *            cannot be NULL.
+ * @opaque: opaque for callback function
+ * @errp: set in case of an error
+ *
+ * Returns: -1 on immediate error, file descriptor on success.
+ **/
+int inet_nonblocking_connect(const char *str,
+                             NonBlockingConnectHandler *callback,
+                             void *opaque, Error **errp)
+{
+    QemuOpts *opts;
+    int sock = -1;
+    InetSocketAddress *addr;
+
+    g_assert(callback != NULL);
+
+    addr = inet_parse(str, errp);
+    if (addr != NULL) {
+        opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort);
+        inet_addr_to_opts(opts, addr);
+        qapi_free_InetSocketAddress(addr);
+        sock = inet_connect_opts(opts, errp, callback, opaque);
+        qemu_opts_del(opts);
+    }
+    return sock;
+}
+
+#ifndef _WIN32
+
+int unix_listen_opts(QemuOpts *opts, Error **errp)
+{
+    struct sockaddr_un un;
+    const char *path = qemu_opt_get(opts, "path");
+    int sock, fd;
+
+    sock = qemu_socket(PF_UNIX, SOCK_STREAM, 0);
+    if (sock < 0) {
+        error_setg_errno(errp, errno, "Failed to create Unix socket");
+        return -1;
+    }
+
+    memset(&un, 0, sizeof(un));
+    un.sun_family = AF_UNIX;
+    if (path && strlen(path)) {
+        snprintf(un.sun_path, sizeof(un.sun_path), "%s", path);
+    } else {
+        const char *tmpdir = getenv("TMPDIR");
+        tmpdir = tmpdir ? tmpdir : "/tmp";
+        if (snprintf(un.sun_path, sizeof(un.sun_path), "%s/qemu-socket-XXXXXX",
+                     tmpdir) >= sizeof(un.sun_path)) {
+            error_setg_errno(errp, errno,
+                             "TMPDIR environment variable (%s) too large", tmpdir);
+            goto err;
+        }
+
+        /*
+         * This dummy fd usage silences the mktemp() unsecure warning.
+         * Using mkstemp() doesn't make things more secure here
+         * though.  bind() complains about existing files, so we have
+         * to unlink first and thus re-open the race window.  The
+         * worst case possible is bind() failing, i.e. a DoS attack.
+         */
+        fd = mkstemp(un.sun_path);
+        if (fd < 0) {
+            error_setg_errno(errp, errno,
+                             "Failed to make a temporary socket name in %s", tmpdir);
+            goto err;
+        }
+        close(fd);
+        qemu_opt_set(opts, "path", un.sun_path, &error_abort);
+    }
+
+    if (unlink(un.sun_path) < 0 && errno != ENOENT) {
+        error_setg_errno(errp, errno,
+                         "Failed to unlink socket %s", un.sun_path);
+        goto err;
+    }
+    if (bind(sock, (struct sockaddr*) &un, sizeof(un)) < 0) {
+        error_setg_errno(errp, errno, "Failed to bind socket to %s", un.sun_path);
+        goto err;
+    }
+    if (listen(sock, 1) < 0) {
+        error_setg_errno(errp, errno, "Failed to listen on socket");
+        goto err;
+    }
+
+    return sock;
+
+err:
+    closesocket(sock);
+    return -1;
+}
+
+int unix_connect_opts(QemuOpts *opts, Error **errp,
+                      NonBlockingConnectHandler *callback, void *opaque)
+{
+    struct sockaddr_un un;
+    const char *path = qemu_opt_get(opts, "path");
+    ConnectState *connect_state = NULL;
+    int sock, rc;
+
+    if (path == NULL) {
+        error_setg(errp, "unix connect: no path specified");
+        return -1;
+    }
+
+    sock = qemu_socket(PF_UNIX, SOCK_STREAM, 0);
+    if (sock < 0) {
+        error_setg_errno(errp, errno, "Failed to create socket");
+        return -1;
+    }
+    if (callback != NULL) {
+        connect_state = g_malloc0(sizeof(*connect_state));
+        connect_state->callback = callback;
+        connect_state->opaque = opaque;
+        qemu_set_nonblock(sock);
+    }
+
+    memset(&un, 0, sizeof(un));
+    un.sun_family = AF_UNIX;
+    snprintf(un.sun_path, sizeof(un.sun_path), "%s", path);
+
+    /* connect to peer */
+    do {
+        rc = 0;
+        if (connect(sock, (struct sockaddr *) &un, sizeof(un)) < 0) {
+            rc = -socket_error();
+        }
+    } while (rc == -EINTR);
+
+    if (connect_state != NULL && QEMU_SOCKET_RC_INPROGRESS(rc)) {
+        connect_state->fd = sock;
+        qemu_set_fd_handler(sock, NULL, wait_for_connect, connect_state);
+        return sock;
+    } else if (rc >= 0) {
+        /* non blocking socket immediate success, call callback */
+        if (callback != NULL) {
+            callback(sock, NULL, opaque);
+        }
+    }
+
+    if (rc < 0) {
+        error_setg_errno(errp, -rc, "Failed to connect socket");
+        close(sock);
+        sock = -1;
+    }
+
+    g_free(connect_state);
+    return sock;
+}
+
+#else
+
+int unix_listen_opts(QemuOpts *opts, Error **errp)
+{
+    error_setg(errp, "unix sockets are not available on windows");
+    errno = ENOTSUP;
+    return -1;
+}
+
+int unix_connect_opts(QemuOpts *opts, Error **errp,
+                      NonBlockingConnectHandler *callback, void *opaque)
+{
+    error_setg(errp, "unix sockets are not available on windows");
+    errno = ENOTSUP;
+    return -1;
+}
+#endif
+
+/* compatibility wrapper */
+int unix_listen(const char *str, char *ostr, int olen, Error **errp)
+{
+    QemuOpts *opts;
+    char *path, *optstr;
+    int sock, len;
+
+    opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort);
+
+    optstr = strchr(str, ',');
+    if (optstr) {
+        len = optstr - str;
+        if (len) {
+            path = g_malloc(len+1);
+            snprintf(path, len+1, "%.*s", len, str);
+            qemu_opt_set(opts, "path", path, &error_abort);
+            g_free(path);
+        }
+    } else {
+        qemu_opt_set(opts, "path", str, &error_abort);
+    }
+
+    sock = unix_listen_opts(opts, errp);
+
+    if (sock != -1 && ostr)
+        snprintf(ostr, olen, "%s%s", qemu_opt_get(opts, "path"), optstr ? optstr : "");
+    qemu_opts_del(opts);
+    return sock;
+}
+
+int unix_connect(const char *path, Error **errp)
+{
+    QemuOpts *opts;
+    int sock;
+
+    opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort);
+    qemu_opt_set(opts, "path", path, &error_abort);
+    sock = unix_connect_opts(opts, errp, NULL, NULL);
+    qemu_opts_del(opts);
+    return sock;
+}
+
+
+int unix_nonblocking_connect(const char *path,
+                             NonBlockingConnectHandler *callback,
+                             void *opaque, Error **errp)
+{
+    QemuOpts *opts;
+    int sock = -1;
+
+    g_assert(callback != NULL);
+
+    opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort);
+    qemu_opt_set(opts, "path", path, &error_abort);
+    sock = unix_connect_opts(opts, errp, callback, opaque);
+    qemu_opts_del(opts);
+    return sock;
+}
+
+SocketAddress *socket_parse(const char *str, Error **errp)
+{
+    SocketAddress *addr;
+
+    addr = g_new0(SocketAddress, 1);
+    if (strstart(str, "unix:", NULL)) {
+        if (str[5] == '\0') {
+            error_setg(errp, "invalid Unix socket address");
+            goto fail;
+        } else {
+            addr->type = SOCKET_ADDRESS_KIND_UNIX;
+            addr->u.q_unix = g_new(UnixSocketAddress, 1);
+            addr->u.q_unix->path = g_strdup(str + 5);
+        }
+    } else if (strstart(str, "fd:", NULL)) {
+        if (str[3] == '\0') {
+            error_setg(errp, "invalid file descriptor address");
+            goto fail;
+        } else {
+            addr->type = SOCKET_ADDRESS_KIND_FD;
+            addr->u.fd = g_new(String, 1);
+            addr->u.fd->str = g_strdup(str + 3);
+        }
+    } else {
+        addr->type = SOCKET_ADDRESS_KIND_INET;
+        addr->u.inet = inet_parse(str, errp);
+        if (addr->u.inet == NULL) {
+            goto fail;
+        }
+    }
+    return addr;
+
+fail:
+    qapi_free_SocketAddress(addr);
+    return NULL;
+}
+
+int socket_connect(SocketAddress *addr, Error **errp,
+                   NonBlockingConnectHandler *callback, void *opaque)
+{
+    QemuOpts *opts;
+    int fd;
+
+    opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort);
+    switch (addr->type) {
+    case SOCKET_ADDRESS_KIND_INET:
+        inet_addr_to_opts(opts, addr->u.inet);
+        fd = inet_connect_opts(opts, errp, callback, opaque);
+        break;
+
+    case SOCKET_ADDRESS_KIND_UNIX:
+        qemu_opt_set(opts, "path", addr->u.q_unix->path, &error_abort);
+        fd = unix_connect_opts(opts, errp, callback, opaque);
+        break;
+
+    case SOCKET_ADDRESS_KIND_FD:
+        fd = monitor_get_fd(cur_mon, addr->u.fd->str, errp);
+        if (fd >= 0 && callback) {
+            qemu_set_nonblock(fd);
+            callback(fd, NULL, opaque);
+        }
+        break;
+
+    default:
+        abort();
+    }
+    qemu_opts_del(opts);
+    return fd;
+}
+
+int socket_listen(SocketAddress *addr, Error **errp)
+{
+    QemuOpts *opts;
+    int fd;
+
+    opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort);
+    switch (addr->type) {
+    case SOCKET_ADDRESS_KIND_INET:
+        inet_addr_to_opts(opts, addr->u.inet);
+        fd = inet_listen_opts(opts, 0, errp);
+        break;
+
+    case SOCKET_ADDRESS_KIND_UNIX:
+        qemu_opt_set(opts, "path", addr->u.q_unix->path, &error_abort);
+        fd = unix_listen_opts(opts, errp);
+        break;
+
+    case SOCKET_ADDRESS_KIND_FD:
+        fd = monitor_get_fd(cur_mon, addr->u.fd->str, errp);
+        break;
+
+    default:
+        abort();
+    }
+    qemu_opts_del(opts);
+    return fd;
+}
+
+int socket_dgram(SocketAddress *remote, SocketAddress *local, Error **errp)
+{
+    QemuOpts *opts;
+    int fd;
+
+    opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort);
+    switch (remote->type) {
+    case SOCKET_ADDRESS_KIND_INET:
+        inet_addr_to_opts(opts, remote->u.inet);
+        if (local) {
+            qemu_opt_set(opts, "localaddr", local->u.inet->host, &error_abort);
+            qemu_opt_set(opts, "localport", local->u.inet->port, &error_abort);
+        }
+        fd = inet_dgram_opts(opts, errp);
+        break;
+
+    default:
+        error_setg(errp, "socket type unsupported for datagram");
+        fd = -1;
+    }
+    qemu_opts_del(opts);
+    return fd;
+}
+
+
+static SocketAddress *
+socket_sockaddr_to_address_inet(struct sockaddr_storage *sa,
+                                socklen_t salen,
+                                Error **errp)
+{
+    char host[NI_MAXHOST];
+    char serv[NI_MAXSERV];
+    SocketAddress *addr;
+    int ret;
+
+    ret = getnameinfo((struct sockaddr *)sa, salen,
+                      host, sizeof(host),
+                      serv, sizeof(serv),
+                      NI_NUMERICHOST | NI_NUMERICSERV);
+    if (ret != 0) {
+        error_setg(errp, "Cannot format numeric socket address: %s",
+                   gai_strerror(ret));
+        return NULL;
+    }
+
+    addr = g_new0(SocketAddress, 1);
+    addr->type = SOCKET_ADDRESS_KIND_INET;
+    addr->u.inet = g_new0(InetSocketAddress, 1);
+    addr->u.inet->host = g_strdup(host);
+    addr->u.inet->port = g_strdup(serv);
+    if (sa->ss_family == AF_INET) {
+        addr->u.inet->has_ipv4 = addr->u.inet->ipv4 = true;
+    } else {
+        addr->u.inet->has_ipv6 = addr->u.inet->ipv6 = true;
+    }
+
+    return addr;
+}
+
+
+#ifndef WIN32
+static SocketAddress *
+socket_sockaddr_to_address_unix(struct sockaddr_storage *sa,
+                                socklen_t salen,
+                                Error **errp)
+{
+    SocketAddress *addr;
+    struct sockaddr_un *su = (struct sockaddr_un *)sa;
+
+    addr = g_new0(SocketAddress, 1);
+    addr->type = SOCKET_ADDRESS_KIND_UNIX;
+    addr->u.q_unix = g_new0(UnixSocketAddress, 1);
+    if (su->sun_path[0]) {
+        addr->u.q_unix->path = g_strndup(su->sun_path,
+                                         sizeof(su->sun_path));
+    }
+
+    return addr;
+}
+#endif /* WIN32 */
+
+static SocketAddress *
+socket_sockaddr_to_address(struct sockaddr_storage *sa,
+                           socklen_t salen,
+                           Error **errp)
+{
+    switch (sa->ss_family) {
+    case AF_INET:
+    case AF_INET6:
+        return socket_sockaddr_to_address_inet(sa, salen, errp);
+
+#ifndef WIN32
+    case AF_UNIX:
+        return socket_sockaddr_to_address_unix(sa, salen, errp);
+#endif /* WIN32 */
+
+    default:
+        error_setg(errp, "socket family %d unsupported",
+                   sa->ss_family);
+        return NULL;
+    }
+    return 0;
+}
+
+
+SocketAddress *socket_local_address(int fd, Error **errp)
+{
+    struct sockaddr_storage ss;
+    socklen_t sslen = sizeof(ss);
+
+    if (getsockname(fd, (struct sockaddr *)&ss, &sslen) < 0) {
+        error_setg_errno(errp, socket_error(), "%s",
+                         "Unable to query local socket address");
+        return NULL;
+    }
+
+    return socket_sockaddr_to_address(&ss, sslen, errp);
+}
+
+
+SocketAddress *socket_remote_address(int fd, Error **errp)
+{
+    struct sockaddr_storage ss;
+    socklen_t sslen = sizeof(ss);
+
+    if (getpeername(fd, (struct sockaddr *)&ss, &sslen) < 0) {
+        error_setg_errno(errp, socket_error(), "%s",
+                         "Unable to query remote socket address");
+        return NULL;
+    }
+
+    return socket_sockaddr_to_address(&ss, sslen, errp);
+}
+
+
+void qapi_copy_SocketAddress(SocketAddress **p_dest,
+                             SocketAddress *src)
+{
+    QmpOutputVisitor *qov;
+    QmpInputVisitor *qiv;
+    Visitor *ov, *iv;
+    QObject *obj;
+
+    *p_dest = NULL;
+
+    qov = qmp_output_visitor_new();
+    ov = qmp_output_get_visitor(qov);
+    visit_type_SocketAddress(ov, &src, NULL, &error_abort);
+    obj = qmp_output_get_qobject(qov);
+    qmp_output_visitor_cleanup(qov);
+    if (!obj) {
+        return;
+    }
+
+    qiv = qmp_input_visitor_new(obj);
+    iv = qmp_input_get_visitor(qiv);
+    visit_type_SocketAddress(iv, p_dest, NULL, &error_abort);
+    qmp_input_visitor_cleanup(qiv);
+    qobject_decref(obj);
+}
diff --git a/src/util/qemu-thread-posix.c b/src/util/qemu-thread-posix.c
new file mode 100644
index 0000000..dbd8094
--- /dev/null
+++ b/src/util/qemu-thread-posix.c
@@ -0,0 +1,518 @@
+/*
+ * Wrappers around mutex/cond/thread functions
+ *
+ * Copyright Red Hat, Inc. 2009
+ *
+ * Author:
+ *  Marcelo Tosatti <mtosatti@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <time.h>
+#include <signal.h>
+#include <stdint.h>
+#include <string.h>
+#include <limits.h>
+#include <unistd.h>
+#include <sys/time.h>
+#ifdef __linux__
+#include <sys/syscall.h>
+#include <linux/futex.h>
+#endif
+#include "qemu/thread.h"
+#include "qemu/atomic.h"
+#include "qemu/notify.h"
+
+static bool name_threads;
+
+void qemu_thread_naming(bool enable)
+{
+    name_threads = enable;
+
+#ifndef CONFIG_THREAD_SETNAME_BYTHREAD
+    /* This is a debugging option, not fatal */
+    if (enable) {
+        fprintf(stderr, "qemu: thread naming not supported on this host\n");
+    }
+#endif
+}
+
+static void error_exit(int err, const char *msg)
+{
+    fprintf(stderr, "qemu: %s: %s\n", msg, strerror(err));
+    abort();
+}
+
+void qemu_mutex_init(QemuMutex *mutex)
+{
+    int err;
+
+    err = pthread_mutex_init(&mutex->lock, NULL);
+    if (err)
+        error_exit(err, __func__);
+}
+
+void qemu_mutex_destroy(QemuMutex *mutex)
+{
+    int err;
+
+    err = pthread_mutex_destroy(&mutex->lock);
+    if (err)
+        error_exit(err, __func__);
+}
+
+void qemu_mutex_lock(QemuMutex *mutex)
+{
+    int err;
+
+    err = pthread_mutex_lock(&mutex->lock);
+    if (err)
+        error_exit(err, __func__);
+}
+
+int qemu_mutex_trylock(QemuMutex *mutex)
+{
+    return pthread_mutex_trylock(&mutex->lock);
+}
+
+void qemu_mutex_unlock(QemuMutex *mutex)
+{
+    int err;
+
+    err = pthread_mutex_unlock(&mutex->lock);
+    if (err)
+        error_exit(err, __func__);
+}
+
+void qemu_cond_init(QemuCond *cond)
+{
+    int err;
+
+    err = pthread_cond_init(&cond->cond, NULL);
+    if (err)
+        error_exit(err, __func__);
+}
+
+void qemu_cond_destroy(QemuCond *cond)
+{
+    int err;
+
+    err = pthread_cond_destroy(&cond->cond);
+    if (err)
+        error_exit(err, __func__);
+}
+
+void qemu_cond_signal(QemuCond *cond)
+{
+    int err;
+
+    err = pthread_cond_signal(&cond->cond);
+    if (err)
+        error_exit(err, __func__);
+}
+
+void qemu_cond_broadcast(QemuCond *cond)
+{
+    int err;
+
+    err = pthread_cond_broadcast(&cond->cond);
+    if (err)
+        error_exit(err, __func__);
+}
+
+void qemu_cond_wait(QemuCond *cond, QemuMutex *mutex)
+{
+    int err;
+
+    err = pthread_cond_wait(&cond->cond, &mutex->lock);
+    if (err)
+        error_exit(err, __func__);
+}
+
+void qemu_sem_init(QemuSemaphore *sem, int init)
+{
+    int rc;
+
+#if defined(__APPLE__) || defined(__NetBSD__)
+    rc = pthread_mutex_init(&sem->lock, NULL);
+    if (rc != 0) {
+        error_exit(rc, __func__);
+    }
+    rc = pthread_cond_init(&sem->cond, NULL);
+    if (rc != 0) {
+        error_exit(rc, __func__);
+    }
+    if (init < 0) {
+        error_exit(EINVAL, __func__);
+    }
+    sem->count = init;
+#else
+    rc = sem_init(&sem->sem, 0, init);
+    if (rc < 0) {
+        error_exit(errno, __func__);
+    }
+#endif
+}
+
+void qemu_sem_destroy(QemuSemaphore *sem)
+{
+    int rc;
+
+#if defined(__APPLE__) || defined(__NetBSD__)
+    rc = pthread_cond_destroy(&sem->cond);
+    if (rc < 0) {
+        error_exit(rc, __func__);
+    }
+    rc = pthread_mutex_destroy(&sem->lock);
+    if (rc < 0) {
+        error_exit(rc, __func__);
+    }
+#else
+    rc = sem_destroy(&sem->sem);
+    if (rc < 0) {
+        error_exit(errno, __func__);
+    }
+#endif
+}
+
+void qemu_sem_post(QemuSemaphore *sem)
+{
+    int rc;
+
+#if defined(__APPLE__) || defined(__NetBSD__)
+    pthread_mutex_lock(&sem->lock);
+    if (sem->count == UINT_MAX) {
+        rc = EINVAL;
+    } else {
+        sem->count++;
+        rc = pthread_cond_signal(&sem->cond);
+    }
+    pthread_mutex_unlock(&sem->lock);
+    if (rc != 0) {
+        error_exit(rc, __func__);
+    }
+#else
+    rc = sem_post(&sem->sem);
+    if (rc < 0) {
+        error_exit(errno, __func__);
+    }
+#endif
+}
+
+static void compute_abs_deadline(struct timespec *ts, int ms)
+{
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    ts->tv_nsec = tv.tv_usec * 1000 + (ms % 1000) * 1000000;
+    ts->tv_sec = tv.tv_sec + ms / 1000;
+    if (ts->tv_nsec >= 1000000000) {
+        ts->tv_sec++;
+        ts->tv_nsec -= 1000000000;
+    }
+}
+
+int qemu_sem_timedwait(QemuSemaphore *sem, int ms)
+{
+    int rc;
+    struct timespec ts;
+
+#if defined(__APPLE__) || defined(__NetBSD__)
+    rc = 0;
+    compute_abs_deadline(&ts, ms);
+    pthread_mutex_lock(&sem->lock);
+    while (sem->count == 0) {
+        rc = pthread_cond_timedwait(&sem->cond, &sem->lock, &ts);
+        if (rc == ETIMEDOUT) {
+            break;
+        }
+        if (rc != 0) {
+            error_exit(rc, __func__);
+        }
+    }
+    if (rc != ETIMEDOUT) {
+        --sem->count;
+    }
+    pthread_mutex_unlock(&sem->lock);
+    return (rc == ETIMEDOUT ? -1 : 0);
+#else
+    if (ms <= 0) {
+        /* This is cheaper than sem_timedwait.  */
+        do {
+            rc = sem_trywait(&sem->sem);
+        } while (rc == -1 && errno == EINTR);
+        if (rc == -1 && errno == EAGAIN) {
+            return -1;
+        }
+    } else {
+        compute_abs_deadline(&ts, ms);
+        do {
+            rc = sem_timedwait(&sem->sem, &ts);
+        } while (rc == -1 && errno == EINTR);
+        if (rc == -1 && errno == ETIMEDOUT) {
+            return -1;
+        }
+    }
+    if (rc < 0) {
+        error_exit(errno, __func__);
+    }
+    return 0;
+#endif
+}
+
+void qemu_sem_wait(QemuSemaphore *sem)
+{
+    int rc;
+
+#if defined(__APPLE__) || defined(__NetBSD__)
+    pthread_mutex_lock(&sem->lock);
+    while (sem->count == 0) {
+        rc = pthread_cond_wait(&sem->cond, &sem->lock);
+        if (rc != 0) {
+            error_exit(rc, __func__);
+        }
+    }
+    --sem->count;
+    pthread_mutex_unlock(&sem->lock);
+#else
+    do {
+        rc = sem_wait(&sem->sem);
+    } while (rc == -1 && errno == EINTR);
+    if (rc < 0) {
+        error_exit(errno, __func__);
+    }
+#endif
+}
+
+#ifdef __linux__
+#define futex(...)              syscall(__NR_futex, __VA_ARGS__)
+
+static inline void futex_wake(QemuEvent *ev, int n)
+{
+    futex(ev, FUTEX_WAKE, n, NULL, NULL, 0);
+}
+
+static inline void futex_wait(QemuEvent *ev, unsigned val)
+{
+    while (futex(ev, FUTEX_WAIT, (int) val, NULL, NULL, 0)) {
+        switch (errno) {
+        case EWOULDBLOCK:
+            return;
+        case EINTR:
+            break; /* get out of switch and retry */
+        default:
+            abort();
+        }
+    }
+}
+#else
+static inline void futex_wake(QemuEvent *ev, int n)
+{
+    pthread_mutex_lock(&ev->lock);
+    if (n == 1) {
+        pthread_cond_signal(&ev->cond);
+    } else {
+        pthread_cond_broadcast(&ev->cond);
+    }
+    pthread_mutex_unlock(&ev->lock);
+}
+
+static inline void futex_wait(QemuEvent *ev, unsigned val)
+{
+    pthread_mutex_lock(&ev->lock);
+    if (ev->value == val) {
+        pthread_cond_wait(&ev->cond, &ev->lock);
+    }
+    pthread_mutex_unlock(&ev->lock);
+}
+#endif
+
+/* Valid transitions:
+ * - free->set, when setting the event
+ * - busy->set, when setting the event, followed by futex_wake
+ * - set->free, when resetting the event
+ * - free->busy, when waiting
+ *
+ * set->busy does not happen (it can be observed from the outside but
+ * it really is set->free->busy).
+ *
+ * busy->free provably cannot happen; to enforce it, the set->free transition
+ * is done with an OR, which becomes a no-op if the event has concurrently
+ * transitioned to free or busy.
+ */
+
+#define EV_SET         0
+#define EV_FREE        1
+#define EV_BUSY       -1
+
+void qemu_event_init(QemuEvent *ev, bool init)
+{
+#ifndef __linux__
+    pthread_mutex_init(&ev->lock, NULL);
+    pthread_cond_init(&ev->cond, NULL);
+#endif
+
+    ev->value = (init ? EV_SET : EV_FREE);
+}
+
+void qemu_event_destroy(QemuEvent *ev)
+{
+#ifndef __linux__
+    pthread_mutex_destroy(&ev->lock);
+    pthread_cond_destroy(&ev->cond);
+#endif
+}
+
+void qemu_event_set(QemuEvent *ev)
+{
+    if (atomic_mb_read(&ev->value) != EV_SET) {
+        if (atomic_xchg(&ev->value, EV_SET) == EV_BUSY) {
+            /* There were waiters, wake them up.  */
+            futex_wake(ev, INT_MAX);
+        }
+    }
+}
+
+void qemu_event_reset(QemuEvent *ev)
+{
+    if (atomic_mb_read(&ev->value) == EV_SET) {
+        /*
+         * If there was a concurrent reset (or even reset+wait),
+         * do nothing.  Otherwise change EV_SET->EV_FREE.
+         */
+        atomic_or(&ev->value, EV_FREE);
+    }
+}
+
+void qemu_event_wait(QemuEvent *ev)
+{
+    unsigned value;
+
+    value = atomic_mb_read(&ev->value);
+    if (value != EV_SET) {
+        if (value == EV_FREE) {
+            /*
+             * Leave the event reset and tell qemu_event_set that there
+             * are waiters.  No need to retry, because there cannot be
+             * a concurrent busy->free transition.  After the CAS, the
+             * event will be either set or busy.
+             */
+            if (atomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) {
+                return;
+            }
+        }
+        futex_wait(ev, EV_BUSY);
+    }
+}
+
+static pthread_key_t exit_key;
+
+union NotifierThreadData {
+    void *ptr;
+    NotifierList list;
+};
+QEMU_BUILD_BUG_ON(sizeof(union NotifierThreadData) != sizeof(void *));
+
+void qemu_thread_atexit_add(Notifier *notifier)
+{
+    union NotifierThreadData ntd;
+    ntd.ptr = pthread_getspecific(exit_key);
+    notifier_list_add(&ntd.list, notifier);
+    pthread_setspecific(exit_key, ntd.ptr);
+}
+
+void qemu_thread_atexit_remove(Notifier *notifier)
+{
+    union NotifierThreadData ntd;
+    ntd.ptr = pthread_getspecific(exit_key);
+    notifier_remove(notifier);
+    pthread_setspecific(exit_key, ntd.ptr);
+}
+
+static void qemu_thread_atexit_run(void *arg)
+{
+    union NotifierThreadData ntd = { .ptr = arg };
+    notifier_list_notify(&ntd.list, NULL);
+}
+
+static void __attribute__((constructor)) qemu_thread_atexit_init(void)
+{
+    pthread_key_create(&exit_key, qemu_thread_atexit_run);
+}
+
+
+/* Attempt to set the threads name; note that this is for debug, so
+ * we're not going to fail if we can't set it.
+ */
+static void qemu_thread_set_name(QemuThread *thread, const char *name)
+{
+#ifdef CONFIG_PTHREAD_SETNAME_NP
+    pthread_setname_np(thread->thread, name);
+#endif
+}
+
+void qemu_thread_create(QemuThread *thread, const char *name,
+                       void *(*start_routine)(void*),
+                       void *arg, int mode)
+{
+    sigset_t set, oldset;
+    int err;
+    pthread_attr_t attr;
+
+    err = pthread_attr_init(&attr);
+    if (err) {
+        error_exit(err, __func__);
+    }
+    if (mode == QEMU_THREAD_DETACHED) {
+        err = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+        if (err) {
+            error_exit(err, __func__);
+        }
+    }
+
+    /* Leave signal handling to the iothread.  */
+    sigfillset(&set);
+    pthread_sigmask(SIG_SETMASK, &set, &oldset);
+    err = pthread_create(&thread->thread, &attr, start_routine, arg);
+    if (err)
+        error_exit(err, __func__);
+
+    if (name_threads) {
+        qemu_thread_set_name(thread, name);
+    }
+
+    pthread_sigmask(SIG_SETMASK, &oldset, NULL);
+
+    pthread_attr_destroy(&attr);
+}
+
+void qemu_thread_get_self(QemuThread *thread)
+{
+    thread->thread = pthread_self();
+}
+
+bool qemu_thread_is_self(QemuThread *thread)
+{
+   return pthread_equal(pthread_self(), thread->thread);
+}
+
+void qemu_thread_exit(void *retval)
+{
+    pthread_exit(retval);
+}
+
+void *qemu_thread_join(QemuThread *thread)
+{
+    int err;
+    void *ret;
+
+    err = pthread_join(thread->thread, &ret);
+    if (err) {
+        error_exit(err, __func__);
+    }
+    return ret;
+}
diff --git a/src/util/qemu-thread-win32.c b/src/util/qemu-thread-win32.c
new file mode 100644
index 0000000..6cdd553
--- /dev/null
+++ b/src/util/qemu-thread-win32.c
@@ -0,0 +1,479 @@
+/*
+ * Win32 implementation for mutex/cond/thread functions
+ *
+ * Copyright Red Hat, Inc. 2010
+ *
+ * Author:
+ *  Paolo Bonzini <pbonzini@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or later.
+ * See the COPYING file in the top-level directory.
+ *
+ */
+#include "qemu-common.h"
+#include "qemu/thread.h"
+#include "qemu/notify.h"
+#include <process.h>
+#include <assert.h>
+#include <limits.h>
+
+static bool name_threads;
+
+void qemu_thread_naming(bool enable)
+{
+    /* But note we don't actually name them on Windows yet */
+    name_threads = enable;
+
+    fprintf(stderr, "qemu: thread naming not supported on this host\n");
+}
+
+static void error_exit(int err, const char *msg)
+{
+    char *pstr;
+
+    FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_ALLOCATE_BUFFER,
+                  NULL, err, 0, (LPTSTR)&pstr, 2, NULL);
+    fprintf(stderr, "qemu: %s: %s\n", msg, pstr);
+    LocalFree(pstr);
+    abort();
+}
+
+void qemu_mutex_init(QemuMutex *mutex)
+{
+    mutex->owner = 0;
+    InitializeCriticalSection(&mutex->lock);
+}
+
+void qemu_mutex_destroy(QemuMutex *mutex)
+{
+    assert(mutex->owner == 0);
+    DeleteCriticalSection(&mutex->lock);
+}
+
+void qemu_mutex_lock(QemuMutex *mutex)
+{
+    EnterCriticalSection(&mutex->lock);
+
+    /* Win32 CRITICAL_SECTIONs are recursive.  Assert that we're not
+     * using them as such.
+     */
+    assert(mutex->owner == 0);
+    mutex->owner = GetCurrentThreadId();
+}
+
+int qemu_mutex_trylock(QemuMutex *mutex)
+{
+    int owned;
+
+    owned = TryEnterCriticalSection(&mutex->lock);
+    if (owned) {
+        assert(mutex->owner == 0);
+        mutex->owner = GetCurrentThreadId();
+    }
+    return !owned;
+}
+
+void qemu_mutex_unlock(QemuMutex *mutex)
+{
+    assert(mutex->owner == GetCurrentThreadId());
+    mutex->owner = 0;
+    LeaveCriticalSection(&mutex->lock);
+}
+
+void qemu_cond_init(QemuCond *cond)
+{
+    memset(cond, 0, sizeof(*cond));
+
+    cond->sema = CreateSemaphore(NULL, 0, LONG_MAX, NULL);
+    if (!cond->sema) {
+        error_exit(GetLastError(), __func__);
+    }
+    cond->continue_event = CreateEvent(NULL,    /* security */
+                                       FALSE,   /* auto-reset */
+                                       FALSE,   /* not signaled */
+                                       NULL);   /* name */
+    if (!cond->continue_event) {
+        error_exit(GetLastError(), __func__);
+    }
+}
+
+void qemu_cond_destroy(QemuCond *cond)
+{
+    BOOL result;
+    result = CloseHandle(cond->continue_event);
+    if (!result) {
+        error_exit(GetLastError(), __func__);
+    }
+    cond->continue_event = 0;
+    result = CloseHandle(cond->sema);
+    if (!result) {
+        error_exit(GetLastError(), __func__);
+    }
+    cond->sema = 0;
+}
+
+void qemu_cond_signal(QemuCond *cond)
+{
+    DWORD result;
+
+    /*
+     * Signal only when there are waiters.  cond->waiters is
+     * incremented by pthread_cond_wait under the external lock,
+     * so we are safe about that.
+     */
+    if (cond->waiters == 0) {
+        return;
+    }
+
+    /*
+     * Waiting threads decrement it outside the external lock, but
+     * only if another thread is executing pthread_cond_broadcast and
+     * has the mutex.  So, it also cannot be decremented concurrently
+     * with this particular access.
+     */
+    cond->target = cond->waiters - 1;
+    result = SignalObjectAndWait(cond->sema, cond->continue_event,
+                                 INFINITE, FALSE);
+    if (result == WAIT_ABANDONED || result == WAIT_FAILED) {
+        error_exit(GetLastError(), __func__);
+    }
+}
+
+void qemu_cond_broadcast(QemuCond *cond)
+{
+    BOOLEAN result;
+    /*
+     * As in pthread_cond_signal, access to cond->waiters and
+     * cond->target is locked via the external mutex.
+     */
+    if (cond->waiters == 0) {
+        return;
+    }
+
+    cond->target = 0;
+    result = ReleaseSemaphore(cond->sema, cond->waiters, NULL);
+    if (!result) {
+        error_exit(GetLastError(), __func__);
+    }
+
+    /*
+     * At this point all waiters continue. Each one takes its
+     * slice of the semaphore. Now it's our turn to wait: Since
+     * the external mutex is held, no thread can leave cond_wait,
+     * yet. For this reason, we can be sure that no thread gets
+     * a chance to eat *more* than one slice. OTOH, it means
+     * that the last waiter must send us a wake-up.
+     */
+    WaitForSingleObject(cond->continue_event, INFINITE);
+}
+
+void qemu_cond_wait(QemuCond *cond, QemuMutex *mutex)
+{
+    /*
+     * This access is protected under the mutex.
+     */
+    cond->waiters++;
+
+    /*
+     * Unlock external mutex and wait for signal.
+     * NOTE: we've held mutex locked long enough to increment
+     * waiters count above, so there's no problem with
+     * leaving mutex unlocked before we wait on semaphore.
+     */
+    qemu_mutex_unlock(mutex);
+    WaitForSingleObject(cond->sema, INFINITE);
+
+    /* Now waiters must rendez-vous with the signaling thread and
+     * let it continue.  For cond_broadcast this has heavy contention
+     * and triggers thundering herd.  So goes life.
+     *
+     * Decrease waiters count.  The mutex is not taken, so we have
+     * to do this atomically.
+     *
+     * All waiters contend for the mutex at the end of this function
+     * until the signaling thread relinquishes it.  To ensure
+     * each waiter consumes exactly one slice of the semaphore,
+     * the signaling thread stops until it is told by the last
+     * waiter that it can go on.
+     */
+    if (InterlockedDecrement(&cond->waiters) == cond->target) {
+        SetEvent(cond->continue_event);
+    }
+
+    qemu_mutex_lock(mutex);
+}
+
+void qemu_sem_init(QemuSemaphore *sem, int init)
+{
+    /* Manual reset.  */
+    sem->sema = CreateSemaphore(NULL, init, LONG_MAX, NULL);
+}
+
+void qemu_sem_destroy(QemuSemaphore *sem)
+{
+    CloseHandle(sem->sema);
+}
+
+void qemu_sem_post(QemuSemaphore *sem)
+{
+    ReleaseSemaphore(sem->sema, 1, NULL);
+}
+
+int qemu_sem_timedwait(QemuSemaphore *sem, int ms)
+{
+    int rc = WaitForSingleObject(sem->sema, ms);
+    if (rc == WAIT_OBJECT_0) {
+        return 0;
+    }
+    if (rc != WAIT_TIMEOUT) {
+        error_exit(GetLastError(), __func__);
+    }
+    return -1;
+}
+
+void qemu_sem_wait(QemuSemaphore *sem)
+{
+    if (WaitForSingleObject(sem->sema, INFINITE) != WAIT_OBJECT_0) {
+        error_exit(GetLastError(), __func__);
+    }
+}
+
+/* Wrap a Win32 manual-reset event with a fast userspace path.  The idea
+ * is to reset the Win32 event lazily, as part of a test-reset-test-wait
+ * sequence.  Such a sequence is, indeed, how QemuEvents are used by
+ * RCU and other subsystems!
+ *
+ * Valid transitions:
+ * - free->set, when setting the event
+ * - busy->set, when setting the event, followed by futex_wake
+ * - set->free, when resetting the event
+ * - free->busy, when waiting
+ *
+ * set->busy does not happen (it can be observed from the outside but
+ * it really is set->free->busy).
+ *
+ * busy->free provably cannot happen; to enforce it, the set->free transition
+ * is done with an OR, which becomes a no-op if the event has concurrently
+ * transitioned to free or busy (and is faster than cmpxchg).
+ */
+
+#define EV_SET         0
+#define EV_FREE        1
+#define EV_BUSY       -1
+
+void qemu_event_init(QemuEvent *ev, bool init)
+{
+    /* Manual reset.  */
+    ev->event = CreateEvent(NULL, TRUE, TRUE, NULL);
+    ev->value = (init ? EV_SET : EV_FREE);
+}
+
+void qemu_event_destroy(QemuEvent *ev)
+{
+    CloseHandle(ev->event);
+}
+
+void qemu_event_set(QemuEvent *ev)
+{
+    if (atomic_mb_read(&ev->value) != EV_SET) {
+        if (atomic_xchg(&ev->value, EV_SET) == EV_BUSY) {
+            /* There were waiters, wake them up.  */
+            SetEvent(ev->event);
+        }
+    }
+}
+
+void qemu_event_reset(QemuEvent *ev)
+{
+    if (atomic_mb_read(&ev->value) == EV_SET) {
+        /* If there was a concurrent reset (or even reset+wait),
+         * do nothing.  Otherwise change EV_SET->EV_FREE.
+         */
+        atomic_or(&ev->value, EV_FREE);
+    }
+}
+
+void qemu_event_wait(QemuEvent *ev)
+{
+    unsigned value;
+
+    value = atomic_mb_read(&ev->value);
+    if (value != EV_SET) {
+        if (value == EV_FREE) {
+            /* qemu_event_set is not yet going to call SetEvent, but we are
+             * going to do another check for EV_SET below when setting EV_BUSY.
+             * At that point it is safe to call WaitForSingleObject.
+             */
+            ResetEvent(ev->event);
+
+            /* Tell qemu_event_set that there are waiters.  No need to retry
+             * because there cannot be a concurent busy->free transition.
+             * After the CAS, the event will be either set or busy.
+             */
+            if (atomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) {
+                value = EV_SET;
+            } else {
+                value = EV_BUSY;
+            }
+        }
+        if (value == EV_BUSY) {
+            WaitForSingleObject(ev->event, INFINITE);
+        }
+    }
+}
+
+struct QemuThreadData {
+    /* Passed to win32_start_routine.  */
+    void             *(*start_routine)(void *);
+    void             *arg;
+    short             mode;
+    NotifierList      exit;
+
+    /* Only used for joinable threads. */
+    bool              exited;
+    void             *ret;
+    CRITICAL_SECTION  cs;
+};
+
+static bool atexit_registered;
+static NotifierList main_thread_exit;
+
+static __thread QemuThreadData *qemu_thread_data;
+
+static void run_main_thread_exit(void)
+{
+    notifier_list_notify(&main_thread_exit, NULL);
+}
+
+void qemu_thread_atexit_add(Notifier *notifier)
+{
+    if (!qemu_thread_data) {
+        if (!atexit_registered) {
+            atexit_registered = true;
+            atexit(run_main_thread_exit);
+        }
+        notifier_list_add(&main_thread_exit, notifier);
+    } else {
+        notifier_list_add(&qemu_thread_data->exit, notifier);
+    }
+}
+
+void qemu_thread_atexit_remove(Notifier *notifier)
+{
+    notifier_remove(notifier);
+}
+
+static unsigned __stdcall win32_start_routine(void *arg)
+{
+    QemuThreadData *data = (QemuThreadData *) arg;
+    void *(*start_routine)(void *) = data->start_routine;
+    void *thread_arg = data->arg;
+
+    qemu_thread_data = data;
+    qemu_thread_exit(start_routine(thread_arg));
+    abort();
+}
+
+void qemu_thread_exit(void *arg)
+{
+    QemuThreadData *data = qemu_thread_data;
+
+    notifier_list_notify(&data->exit, NULL);
+    if (data->mode == QEMU_THREAD_JOINABLE) {
+        data->ret = arg;
+        EnterCriticalSection(&data->cs);
+        data->exited = true;
+        LeaveCriticalSection(&data->cs);
+    } else {
+        g_free(data);
+    }
+    _endthreadex(0);
+}
+
+void *qemu_thread_join(QemuThread *thread)
+{
+    QemuThreadData *data;
+    void *ret;
+    HANDLE handle;
+
+    data = thread->data;
+    if (data->mode == QEMU_THREAD_DETACHED) {
+        return NULL;
+    }
+
+    /*
+     * Because multiple copies of the QemuThread can exist via
+     * qemu_thread_get_self, we need to store a value that cannot
+     * leak there.  The simplest, non racy way is to store the TID,
+     * discard the handle that _beginthreadex gives back, and
+     * get another copy of the handle here.
+     */
+    handle = qemu_thread_get_handle(thread);
+    if (handle) {
+        WaitForSingleObject(handle, INFINITE);
+        CloseHandle(handle);
+    }
+    ret = data->ret;
+    DeleteCriticalSection(&data->cs);
+    g_free(data);
+    return ret;
+}
+
+void qemu_thread_create(QemuThread *thread, const char *name,
+                       void *(*start_routine)(void *),
+                       void *arg, int mode)
+{
+    HANDLE hThread;
+    struct QemuThreadData *data;
+
+    data = g_malloc(sizeof *data);
+    data->start_routine = start_routine;
+    data->arg = arg;
+    data->mode = mode;
+    data->exited = false;
+    notifier_list_init(&data->exit);
+
+    if (data->mode != QEMU_THREAD_DETACHED) {
+        InitializeCriticalSection(&data->cs);
+    }
+
+    hThread = (HANDLE) _beginthreadex(NULL, 0, win32_start_routine,
+                                      data, 0, &thread->tid);
+    if (!hThread) {
+        error_exit(GetLastError(), __func__);
+    }
+    CloseHandle(hThread);
+    thread->data = data;
+}
+
+void qemu_thread_get_self(QemuThread *thread)
+{
+    thread->data = qemu_thread_data;
+    thread->tid = GetCurrentThreadId();
+}
+
+HANDLE qemu_thread_get_handle(QemuThread *thread)
+{
+    QemuThreadData *data;
+    HANDLE handle;
+
+    data = thread->data;
+    if (data->mode == QEMU_THREAD_DETACHED) {
+        return NULL;
+    }
+
+    EnterCriticalSection(&data->cs);
+    if (!data->exited) {
+        handle = OpenThread(SYNCHRONIZE | THREAD_SUSPEND_RESUME, FALSE,
+                            thread->tid);
+    } else {
+        handle = NULL;
+    }
+    LeaveCriticalSection(&data->cs);
+    return handle;
+}
+
+bool qemu_thread_is_self(QemuThread *thread)
+{
+    return GetCurrentThreadId() == thread->tid;
+}
diff --git a/src/util/qemu-timer-common.c b/src/util/qemu-timer-common.c
new file mode 100644
index 0000000..95e0847
--- /dev/null
+++ b/src/util/qemu-timer-common.c
@@ -0,0 +1,61 @@
+/*
+ * QEMU System Emulator
+ *
+ * Copyright (c) 2003-2008 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "qemu/timer.h"
+
+/***********************************************************/
+/* real time host monotonic timer */
+
+#ifdef _WIN32
+
+int64_t clock_freq;
+
+static void __attribute__((constructor)) init_get_clock(void)
+{
+    LARGE_INTEGER freq;
+    int ret;
+    ret = QueryPerformanceFrequency(&freq);
+    if (ret == 0) {
+        fprintf(stderr, "Could not calibrate ticks\n");
+        exit(1);
+    }
+    clock_freq = freq.QuadPart;
+}
+
+#else
+
+int use_rt_clock;
+
+static void __attribute__((constructor)) init_get_clock(void)
+{
+    use_rt_clock = 0;
+#ifdef CLOCK_MONOTONIC
+    {
+        struct timespec ts;
+        if (clock_gettime(CLOCK_MONOTONIC, &ts) == 0) {
+            use_rt_clock = 1;
+        }
+    }
+#endif
+}
+#endif
diff --git a/src/util/rcu.c b/src/util/rcu.c
new file mode 100644
index 0000000..8ba304d
--- /dev/null
+++ b/src/util/rcu.c
@@ -0,0 +1,352 @@
+/*
+ * urcu-mb.c
+ *
+ * Userspace RCU library with explicit memory barriers
+ *
+ * Copyright (c) 2009 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ * Copyright (c) 2009 Paul E. McKenney, IBM Corporation.
+ * Copyright 2015 Red Hat, Inc.
+ *
+ * Ported to QEMU by Paolo Bonzini  <pbonzini@redhat.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ * IBM's contributions to this file may be relicensed under LGPLv2 or later.
+ */
+
+#include "qemu-common.h"
+#include <stdio.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <errno.h>
+#include "qemu/rcu.h"
+#include "qemu/atomic.h"
+#include "qemu/thread.h"
+#include "qemu/main-loop.h"
+
+/*
+ * Global grace period counter.  Bit 0 is always one in rcu_gp_ctr.
+ * Bits 1 and above are defined in synchronize_rcu.
+ */
+#define RCU_GP_LOCKED           (1UL << 0)
+#define RCU_GP_CTR              (1UL << 1)
+
+unsigned long rcu_gp_ctr = RCU_GP_LOCKED;
+
+QemuEvent rcu_gp_event;
+static QemuMutex rcu_registry_lock;
+static QemuMutex rcu_sync_lock;
+
+/*
+ * Check whether a quiescent state was crossed between the beginning of
+ * update_counter_and_wait and now.
+ */
+static inline int rcu_gp_ongoing(unsigned long *ctr)
+{
+    unsigned long v;
+
+    v = atomic_read(ctr);
+    return v && (v != rcu_gp_ctr);
+}
+
+/* Written to only by each individual reader. Read by both the reader and the
+ * writers.
+ */
+__thread struct rcu_reader_data rcu_reader;
+
+/* Protected by rcu_registry_lock.  */
+typedef QLIST_HEAD(, rcu_reader_data) ThreadList;
+static ThreadList registry = QLIST_HEAD_INITIALIZER(registry);
+
+/* Wait for previous parity/grace period to be empty of readers.  */
+static void wait_for_readers(void)
+{
+    ThreadList qsreaders = QLIST_HEAD_INITIALIZER(qsreaders);
+    struct rcu_reader_data *index, *tmp;
+
+    for (;;) {
+        /* We want to be notified of changes made to rcu_gp_ongoing
+         * while we walk the list.
+         */
+        qemu_event_reset(&rcu_gp_event);
+
+        /* Instead of using atomic_mb_set for index->waiting, and
+         * atomic_mb_read for index->ctr, memory barriers are placed
+         * manually since writes to different threads are independent.
+         * atomic_mb_set has a smp_wmb before...
+         */
+        smp_wmb();
+        QLIST_FOREACH(index, &registry, node) {
+            atomic_set(&index->waiting, true);
+        }
+
+        /* ... and a smp_mb after.  */
+        smp_mb();
+
+        QLIST_FOREACH_SAFE(index, &registry, node, tmp) {
+            if (!rcu_gp_ongoing(&index->ctr)) {
+                QLIST_REMOVE(index, node);
+                QLIST_INSERT_HEAD(&qsreaders, index, node);
+
+                /* No need for mb_set here, worst of all we
+                 * get some extra futex wakeups.
+                 */
+                atomic_set(&index->waiting, false);
+            }
+        }
+
+        /* atomic_mb_read has smp_rmb after.  */
+        smp_rmb();
+
+        if (QLIST_EMPTY(&registry)) {
+            break;
+        }
+
+        /* Wait for one thread to report a quiescent state and try again.
+         * Release rcu_registry_lock, so rcu_(un)register_thread() doesn't
+         * wait too much time.
+         *
+         * rcu_register_thread() may add nodes to &registry; it will not
+         * wake up synchronize_rcu, but that is okay because at least another
+         * thread must exit its RCU read-side critical section before
+         * synchronize_rcu is done.  The next iteration of the loop will
+         * move the new thread's rcu_reader from &registry to &qsreaders,
+         * because rcu_gp_ongoing() will return false.
+         *
+         * rcu_unregister_thread() may remove nodes from &qsreaders instead
+         * of &registry if it runs during qemu_event_wait.  That's okay;
+         * the node then will not be added back to &registry by QLIST_SWAP
+         * below.  The invariant is that the node is part of one list when
+         * rcu_registry_lock is released.
+         */
+        qemu_mutex_unlock(&rcu_registry_lock);
+        qemu_event_wait(&rcu_gp_event);
+        qemu_mutex_lock(&rcu_registry_lock);
+    }
+
+    /* put back the reader list in the registry */
+    QLIST_SWAP(&registry, &qsreaders, node);
+}
+
+void synchronize_rcu(void)
+{
+    qemu_mutex_lock(&rcu_sync_lock);
+    qemu_mutex_lock(&rcu_registry_lock);
+
+    if (!QLIST_EMPTY(&registry)) {
+        /* In either case, the atomic_mb_set below blocks stores that free
+         * old RCU-protected pointers.
+         */
+        if (sizeof(rcu_gp_ctr) < 8) {
+            /* For architectures with 32-bit longs, a two-subphases algorithm
+             * ensures we do not encounter overflow bugs.
+             *
+             * Switch parity: 0 -> 1, 1 -> 0.
+             */
+            atomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR);
+            wait_for_readers();
+            atomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR);
+        } else {
+            /* Increment current grace period.  */
+            atomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr + RCU_GP_CTR);
+        }
+
+        wait_for_readers();
+    }
+
+    qemu_mutex_unlock(&rcu_registry_lock);
+    qemu_mutex_unlock(&rcu_sync_lock);
+}
+
+
+#define RCU_CALL_MIN_SIZE        30
+
+/* Multi-producer, single-consumer queue based on urcu/static/wfqueue.h
+ * from liburcu.  Note that head is only used by the consumer.
+ */
+static struct rcu_head dummy;
+static struct rcu_head *head = &dummy, **tail = &dummy.next;
+static int rcu_call_count;
+static QemuEvent rcu_call_ready_event;
+
+static void enqueue(struct rcu_head *node)
+{
+    struct rcu_head **old_tail;
+
+    node->next = NULL;
+    old_tail = atomic_xchg(&tail, &node->next);
+    atomic_mb_set(old_tail, node);
+}
+
+static struct rcu_head *try_dequeue(void)
+{
+    struct rcu_head *node, *next;
+
+retry:
+    /* Test for an empty list, which we do not expect.  Note that for
+     * the consumer head and tail are always consistent.  The head
+     * is consistent because only the consumer reads/writes it.
+     * The tail, because it is the first step in the enqueuing.
+     * It is only the next pointers that might be inconsistent.
+     */
+    if (head == &dummy && atomic_mb_read(&tail) == &dummy.next) {
+        abort();
+    }
+
+    /* If the head node has NULL in its next pointer, the value is
+     * wrong and we need to wait until its enqueuer finishes the update.
+     */
+    node = head;
+    next = atomic_mb_read(&head->next);
+    if (!next) {
+        return NULL;
+    }
+
+    /* Since we are the sole consumer, and we excluded the empty case
+     * above, the queue will always have at least two nodes: the
+     * dummy node, and the one being removed.  So we do not need to update
+     * the tail pointer.
+     */
+    head = next;
+
+    /* If we dequeued the dummy node, add it back at the end and retry.  */
+    if (node == &dummy) {
+        enqueue(node);
+        goto retry;
+    }
+
+    return node;
+}
+
+static void *call_rcu_thread(void *opaque)
+{
+    struct rcu_head *node;
+
+    rcu_register_thread();
+
+    for (;;) {
+        int tries = 0;
+        int n = atomic_read(&rcu_call_count);
+
+        /* Heuristically wait for a decent number of callbacks to pile up.
+         * Fetch rcu_call_count now, we only must process elements that were
+         * added before synchronize_rcu() starts.
+         */
+        while (n == 0 || (n < RCU_CALL_MIN_SIZE && ++tries <= 5)) {
+            g_usleep(10000);
+            if (n == 0) {
+                qemu_event_reset(&rcu_call_ready_event);
+                n = atomic_read(&rcu_call_count);
+                if (n == 0) {
+                    qemu_event_wait(&rcu_call_ready_event);
+                }
+            }
+            n = atomic_read(&rcu_call_count);
+        }
+
+        atomic_sub(&rcu_call_count, n);
+        synchronize_rcu();
+        qemu_mutex_lock_iothread();
+        while (n > 0) {
+            node = try_dequeue();
+            while (!node) {
+                qemu_mutex_unlock_iothread();
+                qemu_event_reset(&rcu_call_ready_event);
+                node = try_dequeue();
+                if (!node) {
+                    qemu_event_wait(&rcu_call_ready_event);
+                    node = try_dequeue();
+                }
+                qemu_mutex_lock_iothread();
+            }
+
+            n--;
+            node->func(node);
+        }
+        qemu_mutex_unlock_iothread();
+    }
+    abort();
+}
+
+void call_rcu1(struct rcu_head *node, void (*func)(struct rcu_head *node))
+{
+    node->func = func;
+    enqueue(node);
+    atomic_inc(&rcu_call_count);
+    qemu_event_set(&rcu_call_ready_event);
+}
+
+void rcu_register_thread(void)
+{
+    assert(rcu_reader.ctr == 0);
+    qemu_mutex_lock(&rcu_registry_lock);
+    QLIST_INSERT_HEAD(&registry, &rcu_reader, node);
+    qemu_mutex_unlock(&rcu_registry_lock);
+}
+
+void rcu_unregister_thread(void)
+{
+    qemu_mutex_lock(&rcu_registry_lock);
+    QLIST_REMOVE(&rcu_reader, node);
+    qemu_mutex_unlock(&rcu_registry_lock);
+}
+
+static void rcu_init_complete(void)
+{
+    QemuThread thread;
+
+    qemu_mutex_init(&rcu_registry_lock);
+    qemu_mutex_init(&rcu_sync_lock);
+    qemu_event_init(&rcu_gp_event, true);
+
+    qemu_event_init(&rcu_call_ready_event, false);
+
+    /* The caller is assumed to have iothread lock, so the call_rcu thread
+     * must have been quiescent even after forking, just recreate it.
+     */
+    qemu_thread_create(&thread, "call_rcu", call_rcu_thread,
+                       NULL, QEMU_THREAD_DETACHED);
+
+    rcu_register_thread();
+}
+
+#ifdef CONFIG_POSIX
+static void rcu_init_lock(void)
+{
+    qemu_mutex_lock(&rcu_sync_lock);
+    qemu_mutex_lock(&rcu_registry_lock);
+}
+
+static void rcu_init_unlock(void)
+{
+    qemu_mutex_unlock(&rcu_registry_lock);
+    qemu_mutex_unlock(&rcu_sync_lock);
+}
+#endif
+
+void rcu_after_fork(void)
+{
+    memset(&registry, 0, sizeof(registry));
+    rcu_init_complete();
+}
+
+static void __attribute__((__constructor__)) rcu_init(void)
+{
+#ifdef CONFIG_POSIX
+    pthread_atfork(rcu_init_lock, rcu_init_unlock, rcu_init_unlock);
+#endif
+    rcu_init_complete();
+}
diff --git a/src/util/readline.c b/src/util/readline.c
new file mode 100644
index 0000000..cc1302a
--- /dev/null
+++ b/src/util/readline.c
@@ -0,0 +1,515 @@
+/*
+ * QEMU readline utility
+ *
+ * Copyright (c) 2003-2004 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+#include "qemu/readline.h"
+
+#define IS_NORM 0
+#define IS_ESC  1
+#define IS_CSI  2
+#define IS_SS3  3
+
+void readline_show_prompt(ReadLineState *rs)
+{
+    rs->printf_func(rs->opaque, "%s", rs->prompt);
+    rs->flush_func(rs->opaque);
+    rs->last_cmd_buf_index = 0;
+    rs->last_cmd_buf_size = 0;
+    rs->esc_state = IS_NORM;
+}
+
+/* update the displayed command line */
+static void readline_update(ReadLineState *rs)
+{
+    int i, delta, len;
+
+    if (rs->cmd_buf_size != rs->last_cmd_buf_size ||
+        memcmp(rs->cmd_buf, rs->last_cmd_buf, rs->cmd_buf_size) != 0) {
+        for(i = 0; i < rs->last_cmd_buf_index; i++) {
+            rs->printf_func(rs->opaque, "\033[D");
+        }
+        rs->cmd_buf[rs->cmd_buf_size] = '\0';
+        if (rs->read_password) {
+            len = strlen(rs->cmd_buf);
+            for(i = 0; i < len; i++)
+                rs->printf_func(rs->opaque, "*");
+        } else {
+            rs->printf_func(rs->opaque, "%s", rs->cmd_buf);
+        }
+        rs->printf_func(rs->opaque, "\033[K");
+        memcpy(rs->last_cmd_buf, rs->cmd_buf, rs->cmd_buf_size);
+        rs->last_cmd_buf_size = rs->cmd_buf_size;
+        rs->last_cmd_buf_index = rs->cmd_buf_size;
+    }
+    if (rs->cmd_buf_index != rs->last_cmd_buf_index) {
+        delta = rs->cmd_buf_index - rs->last_cmd_buf_index;
+        if (delta > 0) {
+            for(i = 0;i < delta; i++) {
+                rs->printf_func(rs->opaque, "\033[C");
+            }
+        } else {
+            delta = -delta;
+            for(i = 0;i < delta; i++) {
+                rs->printf_func(rs->opaque, "\033[D");
+            }
+        }
+        rs->last_cmd_buf_index = rs->cmd_buf_index;
+    }
+    rs->flush_func(rs->opaque);
+}
+
+static void readline_insert_char(ReadLineState *rs, int ch)
+{
+    if (rs->cmd_buf_index < READLINE_CMD_BUF_SIZE) {
+        memmove(rs->cmd_buf + rs->cmd_buf_index + 1,
+                rs->cmd_buf + rs->cmd_buf_index,
+                rs->cmd_buf_size - rs->cmd_buf_index);
+        rs->cmd_buf[rs->cmd_buf_index] = ch;
+        rs->cmd_buf_size++;
+        rs->cmd_buf_index++;
+    }
+}
+
+static void readline_backward_char(ReadLineState *rs)
+{
+    if (rs->cmd_buf_index > 0) {
+        rs->cmd_buf_index--;
+    }
+}
+
+static void readline_forward_char(ReadLineState *rs)
+{
+    if (rs->cmd_buf_index < rs->cmd_buf_size) {
+        rs->cmd_buf_index++;
+    }
+}
+
+static void readline_delete_char(ReadLineState *rs)
+{
+    if (rs->cmd_buf_index < rs->cmd_buf_size) {
+        memmove(rs->cmd_buf + rs->cmd_buf_index,
+                rs->cmd_buf + rs->cmd_buf_index + 1,
+                rs->cmd_buf_size - rs->cmd_buf_index - 1);
+        rs->cmd_buf_size--;
+    }
+}
+
+static void readline_backspace(ReadLineState *rs)
+{
+    if (rs->cmd_buf_index > 0) {
+        readline_backward_char(rs);
+        readline_delete_char(rs);
+    }
+}
+
+static void readline_backword(ReadLineState *rs)
+{
+    int start;
+
+    if (rs->cmd_buf_index == 0 || rs->cmd_buf_index > rs->cmd_buf_size) {
+        return;
+    }
+
+    start = rs->cmd_buf_index - 1;
+
+    /* find first word (backwards) */
+    while (start > 0) {
+        if (!qemu_isspace(rs->cmd_buf[start])) {
+            break;
+        }
+
+        --start;
+    }
+
+    /* find first space (backwards) */
+    while (start > 0) {
+        if (qemu_isspace(rs->cmd_buf[start])) {
+            ++start;
+            break;
+        }
+
+        --start;
+    }
+
+    /* remove word */
+    if (start < rs->cmd_buf_index) {
+        memmove(rs->cmd_buf + start,
+                rs->cmd_buf + rs->cmd_buf_index,
+                rs->cmd_buf_size - rs->cmd_buf_index);
+        rs->cmd_buf_size -= rs->cmd_buf_index - start;
+        rs->cmd_buf_index = start;
+    }
+}
+
+static void readline_bol(ReadLineState *rs)
+{
+    rs->cmd_buf_index = 0;
+}
+
+static void readline_eol(ReadLineState *rs)
+{
+    rs->cmd_buf_index = rs->cmd_buf_size;
+}
+
+static void readline_up_char(ReadLineState *rs)
+{
+    int idx;
+
+    if (rs->hist_entry == 0)
+	return;
+    if (rs->hist_entry == -1) {
+	/* Find latest entry */
+	for (idx = 0; idx < READLINE_MAX_CMDS; idx++) {
+	    if (rs->history[idx] == NULL)
+		break;
+	}
+	rs->hist_entry = idx;
+    }
+    rs->hist_entry--;
+    if (rs->hist_entry >= 0) {
+	pstrcpy(rs->cmd_buf, sizeof(rs->cmd_buf),
+                rs->history[rs->hist_entry]);
+	rs->cmd_buf_index = rs->cmd_buf_size = strlen(rs->cmd_buf);
+    }
+}
+
+static void readline_down_char(ReadLineState *rs)
+{
+    if (rs->hist_entry == -1)
+        return;
+    if (rs->hist_entry < READLINE_MAX_CMDS - 1 &&
+        rs->history[++rs->hist_entry] != NULL) {
+	pstrcpy(rs->cmd_buf, sizeof(rs->cmd_buf),
+                rs->history[rs->hist_entry]);
+    } else {
+        rs->cmd_buf[0] = 0;
+	rs->hist_entry = -1;
+    }
+    rs->cmd_buf_index = rs->cmd_buf_size = strlen(rs->cmd_buf);
+}
+
+static void readline_hist_add(ReadLineState *rs, const char *cmdline)
+{
+    char *hist_entry, *new_entry;
+    int idx;
+
+    if (cmdline[0] == '\0')
+	return;
+    new_entry = NULL;
+    if (rs->hist_entry != -1) {
+	/* We were editing an existing history entry: replace it */
+	hist_entry = rs->history[rs->hist_entry];
+	idx = rs->hist_entry;
+	if (strcmp(hist_entry, cmdline) == 0) {
+	    goto same_entry;
+	}
+    }
+    /* Search cmdline in history buffers */
+    for (idx = 0; idx < READLINE_MAX_CMDS; idx++) {
+	hist_entry = rs->history[idx];
+	if (hist_entry == NULL)
+	    break;
+	if (strcmp(hist_entry, cmdline) == 0) {
+	same_entry:
+	    new_entry = hist_entry;
+	    /* Put this entry at the end of history */
+	    memmove(&rs->history[idx], &rs->history[idx + 1],
+		    (READLINE_MAX_CMDS - (idx + 1)) * sizeof(char *));
+	    rs->history[READLINE_MAX_CMDS - 1] = NULL;
+	    for (; idx < READLINE_MAX_CMDS; idx++) {
+		if (rs->history[idx] == NULL)
+		    break;
+	    }
+	    break;
+	}
+    }
+    if (idx == READLINE_MAX_CMDS) {
+	/* Need to get one free slot */
+        g_free(rs->history[0]);
+	memmove(rs->history, &rs->history[1],
+	        (READLINE_MAX_CMDS - 1) * sizeof(char *));
+	rs->history[READLINE_MAX_CMDS - 1] = NULL;
+	idx = READLINE_MAX_CMDS - 1;
+    }
+    if (new_entry == NULL)
+        new_entry = g_strdup(cmdline);
+    rs->history[idx] = new_entry;
+    rs->hist_entry = -1;
+}
+
+/* completion support */
+
+void readline_add_completion(ReadLineState *rs, const char *str)
+{
+    if (rs->nb_completions < READLINE_MAX_COMPLETIONS) {
+        int i;
+        for (i = 0; i < rs->nb_completions; i++) {
+            if (!strcmp(rs->completions[i], str)) {
+                return;
+            }
+        }
+        rs->completions[rs->nb_completions++] = g_strdup(str);
+    }
+}
+
+void readline_set_completion_index(ReadLineState *rs, int index)
+{
+    rs->completion_index = index;
+}
+
+static int completion_comp(const void *a, const void *b)
+{
+    return strcmp(*(const char **) a, *(const char **) b);
+}
+
+static void readline_completion(ReadLineState *rs)
+{
+    int len, i, j, max_width, nb_cols, max_prefix;
+    char *cmdline;
+
+    rs->nb_completions = 0;
+
+    cmdline = g_strndup(rs->cmd_buf, rs->cmd_buf_index);
+    rs->completion_finder(rs->opaque, cmdline);
+    g_free(cmdline);
+
+    /* no completion found */
+    if (rs->nb_completions <= 0)
+        return;
+    if (rs->nb_completions == 1) {
+        len = strlen(rs->completions[0]);
+        for(i = rs->completion_index; i < len; i++) {
+            readline_insert_char(rs, rs->completions[0][i]);
+        }
+        /* extra space for next argument. XXX: make it more generic */
+        if (len > 0 && rs->completions[0][len - 1] != '/')
+            readline_insert_char(rs, ' ');
+    } else {
+        qsort(rs->completions, rs->nb_completions, sizeof(char *),
+              completion_comp);
+        rs->printf_func(rs->opaque, "\n");
+        max_width = 0;
+        max_prefix = 0;	
+        for(i = 0; i < rs->nb_completions; i++) {
+            len = strlen(rs->completions[i]);
+            if (i==0) {
+                max_prefix = len;
+            } else {
+                if (len < max_prefix)
+                    max_prefix = len;
+                for(j=0; j<max_prefix; j++) {
+                    if (rs->completions[i][j] != rs->completions[0][j])
+                        max_prefix = j;
+                }
+            }
+            if (len > max_width)
+                max_width = len;
+        }
+        if (max_prefix > 0) 
+            for(i = rs->completion_index; i < max_prefix; i++) {
+                readline_insert_char(rs, rs->completions[0][i]);
+            }
+        max_width += 2;
+        if (max_width < 10)
+            max_width = 10;
+        else if (max_width > 80)
+            max_width = 80;
+        nb_cols = 80 / max_width;
+        j = 0;
+        for(i = 0; i < rs->nb_completions; i++) {
+            rs->printf_func(rs->opaque, "%-*s", max_width, rs->completions[i]);
+            if (++j == nb_cols || i == (rs->nb_completions - 1)) {
+                rs->printf_func(rs->opaque, "\n");
+                j = 0;
+            }
+        }
+        readline_show_prompt(rs);
+    }
+    for (i = 0; i < rs->nb_completions; i++) {
+        g_free(rs->completions[i]);
+    }
+}
+
+static void readline_clear_screen(ReadLineState *rs)
+{
+    rs->printf_func(rs->opaque, "\033[2J\033[1;1H");
+    readline_show_prompt(rs);
+}
+
+/* return true if command handled */
+void readline_handle_byte(ReadLineState *rs, int ch)
+{
+    switch(rs->esc_state) {
+    case IS_NORM:
+        switch(ch) {
+        case 1:
+            readline_bol(rs);
+            break;
+        case 4:
+            readline_delete_char(rs);
+            break;
+        case 5:
+            readline_eol(rs);
+            break;
+        case 9:
+            readline_completion(rs);
+            break;
+        case 12:
+            readline_clear_screen(rs);
+            break;
+        case 10:
+        case 13:
+            rs->cmd_buf[rs->cmd_buf_size] = '\0';
+            if (!rs->read_password)
+                readline_hist_add(rs, rs->cmd_buf);
+            rs->printf_func(rs->opaque, "\n");
+            rs->cmd_buf_index = 0;
+            rs->cmd_buf_size = 0;
+            rs->last_cmd_buf_index = 0;
+            rs->last_cmd_buf_size = 0;
+            rs->readline_func(rs->opaque, rs->cmd_buf, rs->readline_opaque);
+            break;
+        case 23:
+            /* ^W */
+            readline_backword(rs);
+            break;
+        case 27:
+            rs->esc_state = IS_ESC;
+            break;
+        case 127:
+        case 8:
+            readline_backspace(rs);
+            break;
+	case 155:
+            rs->esc_state = IS_CSI;
+	    break;
+        default:
+            if (ch >= 32) {
+                readline_insert_char(rs, ch);
+            }
+            break;
+        }
+        break;
+    case IS_ESC:
+        if (ch == '[') {
+            rs->esc_state = IS_CSI;
+            rs->esc_param = 0;
+        } else if (ch == 'O') {
+            rs->esc_state = IS_SS3;
+            rs->esc_param = 0;
+        } else {
+            rs->esc_state = IS_NORM;
+        }
+        break;
+    case IS_CSI:
+        switch(ch) {
+	case 'A':
+	case 'F':
+	    readline_up_char(rs);
+	    break;
+	case 'B':
+	case 'E':
+	    readline_down_char(rs);
+	    break;
+        case 'D':
+            readline_backward_char(rs);
+            break;
+        case 'C':
+            readline_forward_char(rs);
+            break;
+        case '0' ... '9':
+            rs->esc_param = rs->esc_param * 10 + (ch - '0');
+            goto the_end;
+        case '~':
+            switch(rs->esc_param) {
+            case 1:
+                readline_bol(rs);
+                break;
+            case 3:
+                readline_delete_char(rs);
+                break;
+            case 4:
+                readline_eol(rs);
+                break;
+            }
+            break;
+        default:
+            break;
+        }
+        rs->esc_state = IS_NORM;
+    the_end:
+        break;
+    case IS_SS3:
+        switch(ch) {
+        case 'F':
+            readline_eol(rs);
+            break;
+        case 'H':
+            readline_bol(rs);
+            break;
+        }
+        rs->esc_state = IS_NORM;
+        break;
+    }
+    readline_update(rs);
+}
+
+void readline_start(ReadLineState *rs, const char *prompt, int read_password,
+                    ReadLineFunc *readline_func, void *opaque)
+{
+    pstrcpy(rs->prompt, sizeof(rs->prompt), prompt);
+    rs->readline_func = readline_func;
+    rs->readline_opaque = opaque;
+    rs->read_password = read_password;
+    readline_restart(rs);
+}
+
+void readline_restart(ReadLineState *rs)
+{
+    rs->cmd_buf_index = 0;
+    rs->cmd_buf_size = 0;
+}
+
+const char *readline_get_history(ReadLineState *rs, unsigned int index)
+{
+    if (index >= READLINE_MAX_CMDS)
+        return NULL;
+    return rs->history[index];
+}
+
+ReadLineState *readline_init(ReadLinePrintfFunc *printf_func,
+                             ReadLineFlushFunc *flush_func,
+                             void *opaque,
+                             ReadLineCompletionFunc *completion_finder)
+{
+    ReadLineState *rs = g_malloc0(sizeof(*rs));
+
+    rs->hist_entry = -1;
+    rs->opaque = opaque;
+    rs->printf_func = printf_func;
+    rs->flush_func = flush_func;
+    rs->completion_finder = completion_finder;
+
+    return rs;
+}
diff --git a/src/util/rfifolock.c b/src/util/rfifolock.c
new file mode 100644
index 0000000..afbf748
--- /dev/null
+++ b/src/util/rfifolock.c
@@ -0,0 +1,78 @@
+/*
+ * Recursive FIFO lock
+ *
+ * Copyright Red Hat, Inc. 2013
+ *
+ * Authors:
+ *  Stefan Hajnoczi   <stefanha@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU LGPL, version 2 or later.
+ * See the COPYING.LIB file in the top-level directory.
+ *
+ */
+
+#include <assert.h>
+#include "qemu/rfifolock.h"
+
+void rfifolock_init(RFifoLock *r, void (*cb)(void *), void *opaque)
+{
+    qemu_mutex_init(&r->lock);
+    r->head = 0;
+    r->tail = 0;
+    qemu_cond_init(&r->cond);
+    r->nesting = 0;
+    r->cb = cb;
+    r->cb_opaque = opaque;
+}
+
+void rfifolock_destroy(RFifoLock *r)
+{
+    qemu_cond_destroy(&r->cond);
+    qemu_mutex_destroy(&r->lock);
+}
+
+/*
+ * Theory of operation:
+ *
+ * In order to ensure FIFO ordering, implement a ticketlock.  Threads acquiring
+ * the lock enqueue themselves by incrementing the tail index.  When the lock
+ * is unlocked, the head is incremented and waiting threads are notified.
+ *
+ * Recursive locking does not take a ticket since the head is only incremented
+ * when the outermost recursive caller unlocks.
+ */
+void rfifolock_lock(RFifoLock *r)
+{
+    qemu_mutex_lock(&r->lock);
+
+    /* Take a ticket */
+    unsigned int ticket = r->tail++;
+
+    if (r->nesting > 0 && qemu_thread_is_self(&r->owner_thread)) {
+        r->tail--; /* put ticket back, we're nesting */
+    } else {
+        while (ticket != r->head) {
+            /* Invoke optional contention callback */
+            if (r->cb) {
+                r->cb(r->cb_opaque);
+            }
+            qemu_cond_wait(&r->cond, &r->lock);
+        }
+    }
+
+    qemu_thread_get_self(&r->owner_thread);
+    r->nesting++;
+    qemu_mutex_unlock(&r->lock);
+}
+
+void rfifolock_unlock(RFifoLock *r)
+{
+    qemu_mutex_lock(&r->lock);
+    assert(r->nesting > 0);
+    assert(qemu_thread_is_self(&r->owner_thread));
+    if (--r->nesting == 0) {
+        r->head++;
+        qemu_cond_broadcast(&r->cond);
+    }
+    qemu_mutex_unlock(&r->lock);
+}
diff --git a/src/util/throttle.c b/src/util/throttle.c
new file mode 100644
index 0000000..1113671
--- /dev/null
+++ b/src/util/throttle.c
@@ -0,0 +1,445 @@
+/*
+ * QEMU throttling infrastructure
+ *
+ * Copyright (C) Nodalink, EURL. 2013-2014
+ * Copyright (C) Igalia, S.L. 2015
+ *
+ * Authors:
+ *   Benoît Canet <benoit.canet@nodalink.com>
+ *   Alberto Garcia <berto@igalia.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/throttle.h"
+#include "qemu/timer.h"
+#include "block/aio.h"
+
+/* This function make a bucket leak
+ *
+ * @bkt:   the bucket to make leak
+ * @delta_ns: the time delta
+ */
+void throttle_leak_bucket(LeakyBucket *bkt, int64_t delta_ns)
+{
+    double leak;
+
+    /* compute how much to leak */
+    leak = (bkt->avg * (double) delta_ns) / NANOSECONDS_PER_SECOND;
+
+    /* make the bucket leak */
+    bkt->level = MAX(bkt->level - leak, 0);
+}
+
+/* Calculate the time delta since last leak and make proportionals leaks
+ *
+ * @now:      the current timestamp in ns
+ */
+static void throttle_do_leak(ThrottleState *ts, int64_t now)
+{
+    /* compute the time elapsed since the last leak */
+    int64_t delta_ns = now - ts->previous_leak;
+    int i;
+
+    ts->previous_leak = now;
+
+    if (delta_ns <= 0) {
+        return;
+    }
+
+    /* make each bucket leak */
+    for (i = 0; i < BUCKETS_COUNT; i++) {
+        throttle_leak_bucket(&ts->cfg.buckets[i], delta_ns);
+    }
+}
+
+/* do the real job of computing the time to wait
+ *
+ * @limit: the throttling limit
+ * @extra: the number of operation to delay
+ * @ret:   the time to wait in ns
+ */
+static int64_t throttle_do_compute_wait(double limit, double extra)
+{
+    double wait = extra * NANOSECONDS_PER_SECOND;
+    wait /= limit;
+    return wait;
+}
+
+/* This function compute the wait time in ns that a leaky bucket should trigger
+ *
+ * @bkt: the leaky bucket we operate on
+ * @ret: the resulting wait time in ns or 0 if the operation can go through
+ */
+int64_t throttle_compute_wait(LeakyBucket *bkt)
+{
+    double extra; /* the number of extra units blocking the io */
+
+    if (!bkt->avg) {
+        return 0;
+    }
+
+    extra = bkt->level - bkt->max;
+
+    if (extra <= 0) {
+        return 0;
+    }
+
+    return throttle_do_compute_wait(bkt->avg, extra);
+}
+
+/* This function compute the time that must be waited while this IO
+ *
+ * @is_write:   true if the current IO is a write, false if it's a read
+ * @ret:        time to wait
+ */
+static int64_t throttle_compute_wait_for(ThrottleState *ts,
+                                         bool is_write)
+{
+    BucketType to_check[2][4] = { {THROTTLE_BPS_TOTAL,
+                                   THROTTLE_OPS_TOTAL,
+                                   THROTTLE_BPS_READ,
+                                   THROTTLE_OPS_READ},
+                                  {THROTTLE_BPS_TOTAL,
+                                   THROTTLE_OPS_TOTAL,
+                                   THROTTLE_BPS_WRITE,
+                                   THROTTLE_OPS_WRITE}, };
+    int64_t wait, max_wait = 0;
+    int i;
+
+    for (i = 0; i < 4; i++) {
+        BucketType index = to_check[is_write][i];
+        wait = throttle_compute_wait(&ts->cfg.buckets[index]);
+        if (wait > max_wait) {
+            max_wait = wait;
+        }
+    }
+
+    return max_wait;
+}
+
+/* compute the timer for this type of operation
+ *
+ * @is_write:   the type of operation
+ * @now:        the current clock timestamp
+ * @next_timestamp: the resulting timer
+ * @ret:        true if a timer must be set
+ */
+bool throttle_compute_timer(ThrottleState *ts,
+                            bool is_write,
+                            int64_t now,
+                            int64_t *next_timestamp)
+{
+    int64_t wait;
+
+    /* leak proportionally to the time elapsed */
+    throttle_do_leak(ts, now);
+
+    /* compute the wait time if any */
+    wait = throttle_compute_wait_for(ts, is_write);
+
+    /* if the code must wait compute when the next timer should fire */
+    if (wait) {
+        *next_timestamp = now + wait;
+        return true;
+    }
+
+    /* else no need to wait at all */
+    *next_timestamp = now;
+    return false;
+}
+
+/* Add timers to event loop */
+void throttle_timers_attach_aio_context(ThrottleTimers *tt,
+                                        AioContext *new_context)
+{
+    tt->timers[0] = aio_timer_new(new_context, tt->clock_type, SCALE_NS,
+                                  tt->read_timer_cb, tt->timer_opaque);
+    tt->timers[1] = aio_timer_new(new_context, tt->clock_type, SCALE_NS,
+                                  tt->write_timer_cb, tt->timer_opaque);
+}
+
+/* To be called first on the ThrottleState */
+void throttle_init(ThrottleState *ts)
+{
+    memset(ts, 0, sizeof(ThrottleState));
+}
+
+/* To be called first on the ThrottleTimers */
+void throttle_timers_init(ThrottleTimers *tt,
+                          AioContext *aio_context,
+                          QEMUClockType clock_type,
+                          QEMUTimerCB *read_timer_cb,
+                          QEMUTimerCB *write_timer_cb,
+                          void *timer_opaque)
+{
+    memset(tt, 0, sizeof(ThrottleTimers));
+
+    tt->clock_type = clock_type;
+    tt->read_timer_cb = read_timer_cb;
+    tt->write_timer_cb = write_timer_cb;
+    tt->timer_opaque = timer_opaque;
+    throttle_timers_attach_aio_context(tt, aio_context);
+}
+
+/* destroy a timer */
+static void throttle_timer_destroy(QEMUTimer **timer)
+{
+    assert(*timer != NULL);
+
+    timer_del(*timer);
+    timer_free(*timer);
+    *timer = NULL;
+}
+
+/* Remove timers from event loop */
+void throttle_timers_detach_aio_context(ThrottleTimers *tt)
+{
+    int i;
+
+    for (i = 0; i < 2; i++) {
+        throttle_timer_destroy(&tt->timers[i]);
+    }
+}
+
+/* To be called last on the ThrottleTimers */
+void throttle_timers_destroy(ThrottleTimers *tt)
+{
+    throttle_timers_detach_aio_context(tt);
+}
+
+/* is any throttling timer configured */
+bool throttle_timers_are_initialized(ThrottleTimers *tt)
+{
+    if (tt->timers[0]) {
+        return true;
+    }
+
+    return false;
+}
+
+/* Does any throttling must be done
+ *
+ * @cfg: the throttling configuration to inspect
+ * @ret: true if throttling must be done else false
+ */
+bool throttle_enabled(ThrottleConfig *cfg)
+{
+    int i;
+
+    for (i = 0; i < BUCKETS_COUNT; i++) {
+        if (cfg->buckets[i].avg > 0) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+/* return true if any two throttling parameters conflicts
+ *
+ * @cfg: the throttling configuration to inspect
+ * @ret: true if any conflict detected else false
+ */
+bool throttle_conflicting(ThrottleConfig *cfg)
+{
+    bool bps_flag, ops_flag;
+    bool bps_max_flag, ops_max_flag;
+
+    bps_flag = cfg->buckets[THROTTLE_BPS_TOTAL].avg &&
+               (cfg->buckets[THROTTLE_BPS_READ].avg ||
+                cfg->buckets[THROTTLE_BPS_WRITE].avg);
+
+    ops_flag = cfg->buckets[THROTTLE_OPS_TOTAL].avg &&
+               (cfg->buckets[THROTTLE_OPS_READ].avg ||
+                cfg->buckets[THROTTLE_OPS_WRITE].avg);
+
+    bps_max_flag = cfg->buckets[THROTTLE_BPS_TOTAL].max &&
+                  (cfg->buckets[THROTTLE_BPS_READ].max  ||
+                   cfg->buckets[THROTTLE_BPS_WRITE].max);
+
+    ops_max_flag = cfg->buckets[THROTTLE_OPS_TOTAL].max &&
+                   (cfg->buckets[THROTTLE_OPS_READ].max ||
+                   cfg->buckets[THROTTLE_OPS_WRITE].max);
+
+    return bps_flag || ops_flag || bps_max_flag || ops_max_flag;
+}
+
+/* check if a throttling configuration is valid
+ * @cfg: the throttling configuration to inspect
+ * @ret: true if valid else false
+ */
+bool throttle_is_valid(ThrottleConfig *cfg)
+{
+    bool invalid = false;
+    int i;
+
+    for (i = 0; i < BUCKETS_COUNT; i++) {
+        if (cfg->buckets[i].avg < 0) {
+            invalid = true;
+        }
+    }
+
+    for (i = 0; i < BUCKETS_COUNT; i++) {
+        if (cfg->buckets[i].max < 0) {
+            invalid = true;
+        }
+    }
+
+    return !invalid;
+}
+
+/* check if bps_max/iops_max is used without bps/iops
+ * @cfg: the throttling configuration to inspect
+ */
+bool throttle_max_is_missing_limit(ThrottleConfig *cfg)
+{
+    int i;
+
+    for (i = 0; i < BUCKETS_COUNT; i++) {
+        if (cfg->buckets[i].max && !cfg->buckets[i].avg) {
+            return true;
+        }
+    }
+    return false;
+}
+
+/* fix bucket parameters */
+static void throttle_fix_bucket(LeakyBucket *bkt)
+{
+    double min;
+
+    /* zero bucket level */
+    bkt->level = 0;
+
+    /* The following is done to cope with the Linux CFQ block scheduler
+     * which regroup reads and writes by block of 100ms in the guest.
+     * When they are two process one making reads and one making writes cfq
+     * make a pattern looking like the following:
+     * WWWWWWWWWWWRRRRRRRRRRRRRRWWWWWWWWWWWWWwRRRRRRRRRRRRRRRRR
+     * Having a max burst value of 100ms of the average will help smooth the
+     * throttling
+     */
+    min = bkt->avg / 10;
+    if (bkt->avg && !bkt->max) {
+        bkt->max = min;
+    }
+}
+
+/* take care of canceling a timer */
+static void throttle_cancel_timer(QEMUTimer *timer)
+{
+    assert(timer != NULL);
+
+    timer_del(timer);
+}
+
+/* Used to configure the throttle
+ *
+ * @ts: the throttle state we are working on
+ * @tt: the throttle timers we use in this aio context
+ * @cfg: the config to set
+ */
+void throttle_config(ThrottleState *ts,
+                     ThrottleTimers *tt,
+                     ThrottleConfig *cfg)
+{
+    int i;
+
+    ts->cfg = *cfg;
+
+    for (i = 0; i < BUCKETS_COUNT; i++) {
+        throttle_fix_bucket(&ts->cfg.buckets[i]);
+    }
+
+    ts->previous_leak = qemu_clock_get_ns(tt->clock_type);
+
+    for (i = 0; i < 2; i++) {
+        throttle_cancel_timer(tt->timers[i]);
+    }
+}
+
+/* used to get config
+ *
+ * @ts:  the throttle state we are working on
+ * @cfg: the config to write
+ */
+void throttle_get_config(ThrottleState *ts, ThrottleConfig *cfg)
+{
+    *cfg = ts->cfg;
+}
+
+
+/* Schedule the read or write timer if needed
+ *
+ * NOTE: this function is not unit tested due to it's usage of timer_mod
+ *
+ * @tt:       the timers structure
+ * @is_write: the type of operation (read/write)
+ * @ret:      true if the timer has been scheduled else false
+ */
+bool throttle_schedule_timer(ThrottleState *ts,
+                             ThrottleTimers *tt,
+                             bool is_write)
+{
+    int64_t now = qemu_clock_get_ns(tt->clock_type);
+    int64_t next_timestamp;
+    bool must_wait;
+
+    must_wait = throttle_compute_timer(ts,
+                                       is_write,
+                                       now,
+                                       &next_timestamp);
+
+    /* request not throttled */
+    if (!must_wait) {
+        return false;
+    }
+
+    /* request throttled and timer pending -> do nothing */
+    if (timer_pending(tt->timers[is_write])) {
+        return true;
+    }
+
+    /* request throttled and timer not pending -> arm timer */
+    timer_mod(tt->timers[is_write], next_timestamp);
+    return true;
+}
+
+/* do the accounting for this operation
+ *
+ * @is_write: the type of operation (read/write)
+ * @size:     the size of the operation
+ */
+void throttle_account(ThrottleState *ts, bool is_write, uint64_t size)
+{
+    double units = 1.0;
+
+    /* if cfg.op_size is defined and smaller than size we compute unit count */
+    if (ts->cfg.op_size && size > ts->cfg.op_size) {
+        units = (double) size / ts->cfg.op_size;
+    }
+
+    ts->cfg.buckets[THROTTLE_BPS_TOTAL].level += size;
+    ts->cfg.buckets[THROTTLE_OPS_TOTAL].level += units;
+
+    if (is_write) {
+        ts->cfg.buckets[THROTTLE_BPS_WRITE].level += size;
+        ts->cfg.buckets[THROTTLE_OPS_WRITE].level += units;
+    } else {
+        ts->cfg.buckets[THROTTLE_BPS_READ].level += size;
+        ts->cfg.buckets[THROTTLE_OPS_READ].level += units;
+    }
+}
+
diff --git a/src/util/timed-average.c b/src/util/timed-average.c
new file mode 100644
index 0000000..a2dfb48
--- /dev/null
+++ b/src/util/timed-average.c
@@ -0,0 +1,231 @@
+/*
+ * QEMU timed average computation
+ *
+ * Copyright (C) Nodalink, EURL. 2014
+ * Copyright (C) Igalia, S.L. 2015
+ *
+ * Authors:
+ *   Benoît Canet <benoit.canet@nodalink.com>
+ *   Alberto Garcia <berto@igalia.com>
+ *
+ * This program is free sofware: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Sofware Foundation, either version 2 of the License, or
+ * (at your option) version 3 or any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <string.h>
+
+#include "qemu/timed-average.h"
+
+/* This module computes an average of a set of values within a time
+ * window.
+ *
+ * Algorithm:
+ *
+ * - Create two windows with a certain expiration period, and
+ *   offsetted by period / 2.
+ * - Each time you want to account a new value, do it in both windows.
+ * - The minimum / maximum / average values are always returned from
+ *   the oldest window.
+ *
+ * Example:
+ *
+ *        t=0          |t=0.5           |t=1          |t=1.5            |t=2
+ *        wnd0: [0,0.5)|wnd0: [0.5,1.5) |             |wnd0: [1.5,2.5)  |
+ *        wnd1: [0,1)  |                |wnd1: [1,2)  |                 |
+ *
+ * Values are returned from:
+ *
+ *        wnd0---------|wnd1------------|wnd0---------|wnd1-------------|
+ */
+
+/* Update the expiration of a time window
+ *
+ * @w:      the window used
+ * @now:    the current time in nanoseconds
+ * @period: the expiration period in nanoseconds
+ */
+static void update_expiration(TimedAverageWindow *w, int64_t now,
+                              int64_t period)
+{
+    /* time elapsed since the last theoretical expiration */
+    int64_t elapsed = (now - w->expiration) % period;
+    /* time remaininging until the next expiration */
+    int64_t remaining = period - elapsed;
+    /* compute expiration */
+    w->expiration = now + remaining;
+}
+
+/* Reset a window
+ *
+ * @w: the window to reset
+ */
+static void window_reset(TimedAverageWindow *w)
+{
+    w->min = UINT64_MAX;
+    w->max = 0;
+    w->sum = 0;
+    w->count = 0;
+}
+
+/* Get the current window (that is, the one with the earliest
+ * expiration time).
+ *
+ * @ta:  the TimedAverage structure
+ * @ret: a pointer to the current window
+ */
+static TimedAverageWindow *current_window(TimedAverage *ta)
+{
+     return &ta->windows[ta->current];
+}
+
+/* Initialize a TimedAverage structure
+ *
+ * @ta:         the TimedAverage structure
+ * @clock_type: the type of clock to use
+ * @period:     the time window period in nanoseconds
+ */
+void timed_average_init(TimedAverage *ta, QEMUClockType clock_type,
+                        uint64_t period)
+{
+    int64_t now = qemu_clock_get_ns(clock_type);
+
+    /* Returned values are from the oldest window, so they belong to
+     * the interval [ta->period/2,ta->period). By adjusting the
+     * requested period by 4/3, we guarantee that they're in the
+     * interval [2/3 period,4/3 period), closer to the requested
+     * period on average */
+    ta->period = (uint64_t) period * 4 / 3;
+    ta->clock_type = clock_type;
+    ta->current = 0;
+
+    window_reset(&ta->windows[0]);
+    window_reset(&ta->windows[1]);
+
+    /* Both windows are offsetted by half a period */
+    ta->windows[0].expiration = now + ta->period / 2;
+    ta->windows[1].expiration = now + ta->period;
+}
+
+/* Check if the time windows have expired, updating their counters and
+ * expiration time if that's the case.
+ *
+ * @ta: the TimedAverage structure
+ * @elapsed: if non-NULL, the elapsed time (in ns) within the current
+ *           window will be stored here
+ */
+static void check_expirations(TimedAverage *ta, uint64_t *elapsed)
+{
+    int64_t now = qemu_clock_get_ns(ta->clock_type);
+    int i;
+
+    assert(ta->period != 0);
+
+    /* Check if the windows have expired */
+    for (i = 0; i < 2; i++) {
+        TimedAverageWindow *w = &ta->windows[i];
+        if (w->expiration <= now) {
+            window_reset(w);
+            update_expiration(w, now, ta->period);
+        }
+    }
+
+    /* Make ta->current point to the oldest window */
+    if (ta->windows[0].expiration < ta->windows[1].expiration) {
+        ta->current = 0;
+    } else {
+        ta->current = 1;
+    }
+
+    /* Calculate the elapsed time within the current window */
+    if (elapsed) {
+        int64_t remaining = ta->windows[ta->current].expiration - now;
+        *elapsed = ta->period - remaining;
+    }
+}
+
+/* Account a value
+ *
+ * @ta:    the TimedAverage structure
+ * @value: the value to account
+ */
+void timed_average_account(TimedAverage *ta, uint64_t value)
+{
+    int i;
+    check_expirations(ta, NULL);
+
+    /* Do the accounting in both windows at the same time */
+    for (i = 0; i < 2; i++) {
+        TimedAverageWindow *w = &ta->windows[i];
+
+        w->sum += value;
+        w->count++;
+
+        if (value < w->min) {
+            w->min = value;
+        }
+
+        if (value > w->max) {
+            w->max = value;
+        }
+    }
+}
+
+/* Get the minimum value
+ *
+ * @ta:  the TimedAverage structure
+ * @ret: the minimum value
+ */
+uint64_t timed_average_min(TimedAverage *ta)
+{
+    TimedAverageWindow *w;
+    check_expirations(ta, NULL);
+    w = current_window(ta);
+    return w->min < UINT64_MAX ? w->min : 0;
+}
+
+/* Get the average value
+ *
+ * @ta:  the TimedAverage structure
+ * @ret: the average value
+ */
+uint64_t timed_average_avg(TimedAverage *ta)
+{
+    TimedAverageWindow *w;
+    check_expirations(ta, NULL);
+    w = current_window(ta);
+    return w->count > 0 ? w->sum / w->count : 0;
+}
+
+/* Get the maximum value
+ *
+ * @ta:  the TimedAverage structure
+ * @ret: the maximum value
+ */
+uint64_t timed_average_max(TimedAverage *ta)
+{
+    check_expirations(ta, NULL);
+    return current_window(ta)->max;
+}
+
+/* Get the sum of all accounted values
+ * @ta:      the TimedAverage structure
+ * @elapsed: if non-NULL, the elapsed time (in ns) will be stored here
+ * @ret:     the sum of all accounted values
+ */
+uint64_t timed_average_sum(TimedAverage *ta, uint64_t *elapsed)
+{
+    TimedAverageWindow *w;
+    check_expirations(ta, elapsed);
+    w = current_window(ta);
+    return w->sum;
+}
diff --git a/src/util/unicode.c b/src/util/unicode.c
new file mode 100644
index 0000000..d1c8658
--- /dev/null
+++ b/src/util/unicode.c
@@ -0,0 +1,100 @@
+/*
+ * Dealing with Unicode
+ *
+ * Copyright (C) 2013 Red Hat, Inc.
+ *
+ * Authors:
+ *  Markus Armbruster <armbru@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2 or
+ * later.  See the COPYING file in the top-level directory.
+ */
+
+#include "qemu-common.h"
+
+/**
+ * mod_utf8_codepoint:
+ * @s: string encoded in modified UTF-8
+ * @n: maximum number of bytes to read from @s, if less than 6
+ * @end: set to end of sequence on return
+ *
+ * Convert the modified UTF-8 sequence at the start of @s.  Modified
+ * UTF-8 is exactly like UTF-8, except U+0000 is encoded as
+ * "\xC0\x80".
+ *
+ * If @n is zero or @s points to a zero byte, the sequence is invalid,
+ * and @end is set to @s.
+ *
+ * If @s points to an impossible byte (0xFE or 0xFF) or a continuation
+ * byte, the sequence is invalid, and @end is set to @s + 1
+ *
+ * Else, the first byte determines how many continuation bytes are
+ * expected.  If there are fewer, the sequence is invalid, and @end is
+ * set to @s + 1 + actual number of continuation bytes.  Else, the
+ * sequence is well-formed, and @end is set to @s + 1 + expected
+ * number of continuation bytes.
+ *
+ * A well-formed sequence is valid unless it encodes a codepoint
+ * outside the Unicode range U+0000..U+10FFFF, one of Unicode's 66
+ * noncharacters, a surrogate codepoint, or is overlong.  Except the
+ * overlong sequence "\xC0\x80" is valid.
+ *
+ * Conversion succeeds if and only if the sequence is valid.
+ *
+ * Returns: the Unicode codepoint on success, -1 on failure.
+ */
+int mod_utf8_codepoint(const char *s, size_t n, char **end)
+{
+    static int min_cp[5] = { 0x80, 0x800, 0x10000, 0x200000, 0x4000000 };
+    const unsigned char *p;
+    unsigned byte, mask, len, i;
+    int cp;
+
+    if (n == 0 || *s == 0) {
+        /* empty sequence */
+        *end = (char *)s;
+        return -1;
+    }
+
+    p = (const unsigned char *)s;
+    byte = *p++;
+    if (byte < 0x80) {
+        cp = byte;              /* one byte sequence */
+    } else if (byte >= 0xFE) {
+        cp = -1;                /* impossible bytes 0xFE, 0xFF */
+    } else if ((byte & 0x40) == 0) {
+        cp = -1;                /* unexpected continuation byte */
+    } else {
+        /* multi-byte sequence */
+        len = 0;
+        for (mask = 0x80; byte & mask; mask >>= 1) {
+            len++;
+        }
+        assert(len > 1 && len < 7);
+        cp = byte & (mask - 1);
+        for (i = 1; i < len; i++) {
+            byte = i < n ? *p : 0;
+            if ((byte & 0xC0) != 0x80) {
+                cp = -1;        /* continuation byte missing */
+                goto out;
+            }
+            p++;
+            cp <<= 6;
+            cp |= byte & 0x3F;
+        }
+        if (cp > 0x10FFFF) {
+            cp = -1;            /* beyond Unicode range */
+        } else if ((cp >= 0xFDD0 && cp <= 0xFDEF)
+                   || (cp & 0xFFFE) == 0xFFFE) {
+            cp = -1;            /* noncharacter */
+        } else if (cp >= 0xD800 && cp <= 0xDFFF) {
+            cp = -1;            /* surrogate code point */
+        } else if (cp < min_cp[len - 2] && !(cp == 0 && len == 2)) {
+            cp = -1;            /* overlong, not \xC0\x80 */
+        }
+    }
+
+out:
+    *end = (char *)p;
+    return cp;
+}
diff --git a/src/util/uri.c b/src/util/uri.c
new file mode 100644
index 0000000..550b984
--- /dev/null
+++ b/src/util/uri.c
@@ -0,0 +1,2204 @@
+/**
+ * uri.c: set of generic URI related routines
+ *
+ * Reference: RFCs 3986, 2732 and 2373
+ *
+ * Copyright (C) 1998-2003 Daniel Veillard.  All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * DANIEL VEILLARD BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Except as contained in this notice, the name of Daniel Veillard shall not
+ * be used in advertising or otherwise to promote the sale, use or other
+ * dealings in this Software without prior written authorization from him.
+ *
+ * daniel@veillard.com
+ *
+ **
+ *
+ * Copyright (C) 2007, 2009-2010 Red Hat, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
+ *
+ * Authors:
+ *    Richard W.M. Jones <rjones@redhat.com>
+ *
+ */
+
+#include <glib.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "qemu/uri.h"
+
+static void uri_clean(URI *uri);
+
+/*
+ * Old rule from 2396 used in legacy handling code
+ * alpha    = lowalpha | upalpha
+ */
+#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x))
+
+
+/*
+ * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" |
+ *            "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" |
+ *            "u" | "v" | "w" | "x" | "y" | "z"
+ */
+
+#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z'))
+
+/*
+ * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" |
+ *           "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" |
+ *           "U" | "V" | "W" | "X" | "Y" | "Z"
+ */
+#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z'))
+
+#ifdef IS_DIGIT
+#undef IS_DIGIT
+#endif
+/*
+ * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9"
+ */
+#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9'))
+
+/*
+ * alphanum = alpha | digit
+ */
+
+#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x))
+
+/*
+ * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
+ */
+
+#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') ||     \
+    ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') ||    \
+    ((x) == '(') || ((x) == ')'))
+
+/*
+ * unwise = "{" | "}" | "|" | "\" | "^" | "`"
+ */
+
+#define IS_UNWISE(p)                                                    \
+      (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) ||         \
+       ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) ||        \
+       ((*(p) == ']')) || ((*(p) == '`')))
+/*
+ * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," |
+ *            "[" | "]"
+ */
+
+#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \
+        ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \
+        ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \
+        ((x) == ']'))
+
+/*
+ * unreserved = alphanum | mark
+ */
+
+#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x))
+
+/*
+ * Skip to next pointer char, handle escaped sequences
+ */
+
+#define NEXT(p) ((*p == '%')? p += 3 : p++)
+
+/*
+ * Productions from the spec.
+ *
+ *    authority     = server | reg_name
+ *    reg_name      = 1*( unreserved | escaped | "$" | "," |
+ *                        ";" | ":" | "@" | "&" | "=" | "+" )
+ *
+ * path          = [ abs_path | opaque_part ]
+ */
+
+
+/************************************************************************
+ *									*
+ *                         RFC 3986 parser				*
+ *									*
+ ************************************************************************/
+
+#define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9'))
+#define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) ||		\
+                      ((*(p) >= 'A') && (*(p) <= 'Z')))
+#define ISA_HEXDIG(p)							\
+       (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) ||		\
+        ((*(p) >= 'A') && (*(p) <= 'F')))
+
+/*
+ *    sub-delims    = "!" / "$" / "&" / "'" / "(" / ")"
+ *                     / "*" / "+" / "," / ";" / "="
+ */
+#define ISA_SUB_DELIM(p)						\
+      (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) ||		\
+       ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) ||		\
+       ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) ||		\
+       ((*(p) == '=')) || ((*(p) == '\'')))
+
+/*
+ *    gen-delims    = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+ */
+#define ISA_GEN_DELIM(p)						\
+      (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) ||         \
+       ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) ||         \
+       ((*(p) == '@')))
+
+/*
+ *    reserved      = gen-delims / sub-delims
+ */
+#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p)))
+
+/*
+ *    unreserved    = ALPHA / DIGIT / "-" / "." / "_" / "~"
+ */
+#define ISA_UNRESERVED(p)						\
+      ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) ||		\
+       ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~')))
+
+/*
+ *    pct-encoded   = "%" HEXDIG HEXDIG
+ */
+#define ISA_PCT_ENCODED(p)						\
+     ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2)))
+
+/*
+ *    pchar         = unreserved / pct-encoded / sub-delims / ":" / "@"
+ */
+#define ISA_PCHAR(p)							\
+     (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) ||	\
+      ((*(p) == ':')) || ((*(p) == '@')))
+
+/**
+ * rfc3986_parse_scheme:
+ * @uri:  pointer to an URI structure
+ * @str:  pointer to the string to analyze
+ *
+ * Parse an URI scheme
+ *
+ * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_scheme(URI *uri, const char **str) {
+    const char *cur;
+
+    if (str == NULL)
+	return(-1);
+
+    cur = *str;
+    if (!ISA_ALPHA(cur))
+	return(2);
+    cur++;
+    while (ISA_ALPHA(cur) || ISA_DIGIT(cur) ||
+           (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++;
+    if (uri != NULL) {
+        g_free(uri->scheme);
+	uri->scheme = g_strndup(*str, cur - *str);
+    }
+    *str = cur;
+    return(0);
+}
+
+/**
+ * rfc3986_parse_fragment:
+ * @uri:  pointer to an URI structure
+ * @str:  pointer to the string to analyze
+ *
+ * Parse the query part of an URI
+ *
+ * fragment      = *( pchar / "/" / "?" )
+ * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']'
+ *       in the fragment identifier but this is used very broadly for
+ *       xpointer scheme selection, so we are allowing it here to not break
+ *       for example all the DocBook processing chains.
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_fragment(URI *uri, const char **str)
+{
+    const char *cur;
+
+    if (str == NULL)
+        return (-1);
+
+    cur = *str;
+
+    while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
+           (*cur == '[') || (*cur == ']') ||
+           ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
+        NEXT(cur);
+    if (uri != NULL) {
+        g_free(uri->fragment);
+	if (uri->cleanup & 2)
+	    uri->fragment = g_strndup(*str, cur - *str);
+	else
+	    uri->fragment = uri_string_unescape(*str, cur - *str, NULL);
+    }
+    *str = cur;
+    return (0);
+}
+
+/**
+ * rfc3986_parse_query:
+ * @uri:  pointer to an URI structure
+ * @str:  pointer to the string to analyze
+ *
+ * Parse the query part of an URI
+ *
+ * query = *uric
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_query(URI *uri, const char **str)
+{
+    const char *cur;
+
+    if (str == NULL)
+        return (-1);
+
+    cur = *str;
+
+    while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') ||
+           ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur))))
+        NEXT(cur);
+    if (uri != NULL) {
+        g_free(uri->query);
+	uri->query = g_strndup (*str, cur - *str);
+    }
+    *str = cur;
+    return (0);
+}
+
+/**
+ * rfc3986_parse_port:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse a port  part and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * port          = *DIGIT
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_port(URI *uri, const char **str)
+{
+    const char *cur = *str;
+    int port = 0;
+
+    if (ISA_DIGIT(cur)) {
+        while (ISA_DIGIT(cur)) {
+            port = port * 10 + (*cur - '0');
+            if (port > 65535) {
+                return 1;
+            }
+            cur++;
+        }
+        if (uri) {
+            uri->port = port;
+        }
+        *str = cur;
+        return 0;
+    }
+    return 1;
+}
+
+/**
+ * rfc3986_parse_user_info:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an user informations part and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * userinfo      = *( unreserved / pct-encoded / sub-delims / ":" )
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_user_info(URI *uri, const char **str)
+{
+    const char *cur;
+
+    cur = *str;
+    while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) ||
+           ISA_SUB_DELIM(cur) || (*cur == ':'))
+	NEXT(cur);
+    if (*cur == '@') {
+	if (uri != NULL) {
+            g_free(uri->user);
+	    if (uri->cleanup & 2)
+		uri->user = g_strndup(*str, cur - *str);
+	    else
+		uri->user = uri_string_unescape(*str, cur - *str, NULL);
+	}
+	*str = cur;
+	return(0);
+    }
+    return(1);
+}
+
+/**
+ * rfc3986_parse_dec_octet:
+ * @str:  the string to analyze
+ *
+ *    dec-octet     = DIGIT                 ; 0-9
+ *                  / %x31-39 DIGIT         ; 10-99
+ *                  / "1" 2DIGIT            ; 100-199
+ *                  / "2" %x30-34 DIGIT     ; 200-249
+ *                  / "25" %x30-35          ; 250-255
+ *
+ * Skip a dec-octet.
+ *
+ * Returns 0 if found and skipped, 1 otherwise
+ */
+static int
+rfc3986_parse_dec_octet(const char **str) {
+    const char *cur = *str;
+
+    if (!(ISA_DIGIT(cur)))
+        return(1);
+    if (!ISA_DIGIT(cur+1))
+	cur++;
+    else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2)))
+	cur += 2;
+    else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2)))
+	cur += 3;
+    else if ((*cur == '2') && (*(cur + 1) >= '0') &&
+	     (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2)))
+	cur += 3;
+    else if ((*cur == '2') && (*(cur + 1) == '5') &&
+	     (*(cur + 2) >= '0') && (*(cur + 1) <= '5'))
+	cur += 3;
+    else
+        return(1);
+    *str = cur;
+    return(0);
+}
+/**
+ * rfc3986_parse_host:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an host part and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * host          = IP-literal / IPv4address / reg-name
+ * IP-literal    = "[" ( IPv6address / IPvFuture  ) "]"
+ * IPv4address   = dec-octet "." dec-octet "." dec-octet "." dec-octet
+ * reg-name      = *( unreserved / pct-encoded / sub-delims )
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_host(URI *uri, const char **str)
+{
+    const char *cur = *str;
+    const char *host;
+
+    host = cur;
+    /*
+     * IPv6 and future addressing scheme are enclosed between brackets
+     */
+    if (*cur == '[') {
+        cur++;
+	while ((*cur != ']') && (*cur != 0))
+	    cur++;
+	if (*cur != ']')
+	    return(1);
+	cur++;
+	goto found;
+    }
+    /*
+     * try to parse an IPv4
+     */
+    if (ISA_DIGIT(cur)) {
+        if (rfc3986_parse_dec_octet(&cur) != 0)
+	    goto not_ipv4;
+	if (*cur != '.')
+	    goto not_ipv4;
+	cur++;
+        if (rfc3986_parse_dec_octet(&cur) != 0)
+	    goto not_ipv4;
+	if (*cur != '.')
+	    goto not_ipv4;
+        if (rfc3986_parse_dec_octet(&cur) != 0)
+	    goto not_ipv4;
+	if (*cur != '.')
+	    goto not_ipv4;
+        if (rfc3986_parse_dec_octet(&cur) != 0)
+	    goto not_ipv4;
+	goto found;
+not_ipv4:
+        cur = *str;
+    }
+    /*
+     * then this should be a hostname which can be empty
+     */
+    while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur))
+        NEXT(cur);
+found:
+    if (uri != NULL) {
+        g_free(uri->authority);
+	uri->authority = NULL;
+        g_free(uri->server);
+	if (cur != host) {
+	    if (uri->cleanup & 2)
+		uri->server = g_strndup(host, cur - host);
+	    else
+		uri->server = uri_string_unescape(host, cur - host, NULL);
+	} else
+	    uri->server = NULL;
+    }
+    *str = cur;
+    return(0);
+}
+
+/**
+ * rfc3986_parse_authority:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an authority part and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * authority     = [ userinfo "@" ] host [ ":" port ]
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_authority(URI *uri, const char **str)
+{
+    const char *cur;
+    int ret;
+
+    cur = *str;
+    /*
+     * try to parse an userinfo and check for the trailing @
+     */
+    ret = rfc3986_parse_user_info(uri, &cur);
+    if ((ret != 0) || (*cur != '@'))
+        cur = *str;
+    else
+        cur++;
+    ret = rfc3986_parse_host(uri, &cur);
+    if (ret != 0) return(ret);
+    if (*cur == ':') {
+        cur++;
+        ret = rfc3986_parse_port(uri, &cur);
+	if (ret != 0) return(ret);
+    }
+    *str = cur;
+    return(0);
+}
+
+/**
+ * rfc3986_parse_segment:
+ * @str:  the string to analyze
+ * @forbid: an optional forbidden character
+ * @empty: allow an empty segment
+ *
+ * Parse a segment and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * segment       = *pchar
+ * segment-nz    = 1*pchar
+ * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" )
+ *               ; non-zero-length segment without any colon ":"
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_segment(const char **str, char forbid, int empty)
+{
+    const char *cur;
+
+    cur = *str;
+    if (!ISA_PCHAR(cur)) {
+        if (empty)
+	    return(0);
+	return(1);
+    }
+    while (ISA_PCHAR(cur) && (*cur != forbid))
+        NEXT(cur);
+    *str = cur;
+    return (0);
+}
+
+/**
+ * rfc3986_parse_path_ab_empty:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an path absolute or empty and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * path-abempty  = *( "/" segment )
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_path_ab_empty(URI *uri, const char **str)
+{
+    const char *cur;
+    int ret;
+
+    cur = *str;
+
+    while (*cur == '/') {
+        cur++;
+	ret = rfc3986_parse_segment(&cur, 0, 1);
+	if (ret != 0) return(ret);
+    }
+    if (uri != NULL) {
+        g_free(uri->path);
+        if (*str != cur) {
+            if (uri->cleanup & 2)
+                uri->path = g_strndup(*str, cur - *str);
+            else
+                uri->path = uri_string_unescape(*str, cur - *str, NULL);
+        } else {
+            uri->path = NULL;
+        }
+    }
+    *str = cur;
+    return (0);
+}
+
+/**
+ * rfc3986_parse_path_absolute:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an path absolute and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * path-absolute = "/" [ segment-nz *( "/" segment ) ]
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_path_absolute(URI *uri, const char **str)
+{
+    const char *cur;
+    int ret;
+
+    cur = *str;
+
+    if (*cur != '/')
+        return(1);
+    cur++;
+    ret = rfc3986_parse_segment(&cur, 0, 0);
+    if (ret == 0) {
+	while (*cur == '/') {
+	    cur++;
+	    ret = rfc3986_parse_segment(&cur, 0, 1);
+	    if (ret != 0) return(ret);
+	}
+    }
+    if (uri != NULL) {
+        g_free(uri->path);
+        if (cur != *str) {
+            if (uri->cleanup & 2)
+                uri->path = g_strndup(*str, cur - *str);
+            else
+                uri->path = uri_string_unescape(*str, cur - *str, NULL);
+        } else {
+            uri->path = NULL;
+        }
+    }
+    *str = cur;
+    return (0);
+}
+
+/**
+ * rfc3986_parse_path_rootless:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an path without root and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * path-rootless = segment-nz *( "/" segment )
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_path_rootless(URI *uri, const char **str)
+{
+    const char *cur;
+    int ret;
+
+    cur = *str;
+
+    ret = rfc3986_parse_segment(&cur, 0, 0);
+    if (ret != 0) return(ret);
+    while (*cur == '/') {
+        cur++;
+	ret = rfc3986_parse_segment(&cur, 0, 1);
+	if (ret != 0) return(ret);
+    }
+    if (uri != NULL) {
+        g_free(uri->path);
+        if (cur != *str) {
+            if (uri->cleanup & 2)
+                uri->path = g_strndup(*str, cur - *str);
+            else
+                uri->path = uri_string_unescape(*str, cur - *str, NULL);
+        } else {
+            uri->path = NULL;
+        }
+    }
+    *str = cur;
+    return (0);
+}
+
+/**
+ * rfc3986_parse_path_no_scheme:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an path which is not a scheme and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * path-noscheme = segment-nz-nc *( "/" segment )
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_path_no_scheme(URI *uri, const char **str)
+{
+    const char *cur;
+    int ret;
+
+    cur = *str;
+
+    ret = rfc3986_parse_segment(&cur, ':', 0);
+    if (ret != 0) return(ret);
+    while (*cur == '/') {
+        cur++;
+	ret = rfc3986_parse_segment(&cur, 0, 1);
+	if (ret != 0) return(ret);
+    }
+    if (uri != NULL) {
+        g_free(uri->path);
+        if (cur != *str) {
+            if (uri->cleanup & 2)
+                uri->path = g_strndup(*str, cur - *str);
+            else
+                uri->path = uri_string_unescape(*str, cur - *str, NULL);
+        } else {
+            uri->path = NULL;
+        }
+    }
+    *str = cur;
+    return (0);
+}
+
+/**
+ * rfc3986_parse_hier_part:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an hierarchical part and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * hier-part     = "//" authority path-abempty
+ *                / path-absolute
+ *                / path-rootless
+ *                / path-empty
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_hier_part(URI *uri, const char **str)
+{
+    const char *cur;
+    int ret;
+
+    cur = *str;
+
+    if ((*cur == '/') && (*(cur + 1) == '/')) {
+        cur += 2;
+	ret = rfc3986_parse_authority(uri, &cur);
+	if (ret != 0) return(ret);
+	ret = rfc3986_parse_path_ab_empty(uri, &cur);
+	if (ret != 0) return(ret);
+	*str = cur;
+	return(0);
+    } else if (*cur == '/') {
+        ret = rfc3986_parse_path_absolute(uri, &cur);
+	if (ret != 0) return(ret);
+    } else if (ISA_PCHAR(cur)) {
+        ret = rfc3986_parse_path_rootless(uri, &cur);
+	if (ret != 0) return(ret);
+    } else {
+	/* path-empty is effectively empty */
+	if (uri != NULL) {
+            g_free(uri->path);
+	    uri->path = NULL;
+	}
+    }
+    *str = cur;
+    return (0);
+}
+
+/**
+ * rfc3986_parse_relative_ref:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an URI string and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * relative-ref  = relative-part [ "?" query ] [ "#" fragment ]
+ * relative-part = "//" authority path-abempty
+ *               / path-absolute
+ *               / path-noscheme
+ *               / path-empty
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_relative_ref(URI *uri, const char *str) {
+    int ret;
+
+    if ((*str == '/') && (*(str + 1) == '/')) {
+        str += 2;
+	ret = rfc3986_parse_authority(uri, &str);
+	if (ret != 0) return(ret);
+	ret = rfc3986_parse_path_ab_empty(uri, &str);
+	if (ret != 0) return(ret);
+    } else if (*str == '/') {
+	ret = rfc3986_parse_path_absolute(uri, &str);
+	if (ret != 0) return(ret);
+    } else if (ISA_PCHAR(str)) {
+        ret = rfc3986_parse_path_no_scheme(uri, &str);
+	if (ret != 0) return(ret);
+    } else {
+	/* path-empty is effectively empty */
+	if (uri != NULL) {
+            g_free(uri->path);
+	    uri->path = NULL;
+	}
+    }
+
+    if (*str == '?') {
+	str++;
+	ret = rfc3986_parse_query(uri, &str);
+	if (ret != 0) return(ret);
+    }
+    if (*str == '#') {
+	str++;
+	ret = rfc3986_parse_fragment(uri, &str);
+	if (ret != 0) return(ret);
+    }
+    if (*str != 0) {
+	uri_clean(uri);
+	return(1);
+    }
+    return(0);
+}
+
+
+/**
+ * rfc3986_parse:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an URI string and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * scheme ":" hier-part [ "?" query ] [ "#" fragment ]
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse(URI *uri, const char *str) {
+    int ret;
+
+    ret = rfc3986_parse_scheme(uri, &str);
+    if (ret != 0) return(ret);
+    if (*str != ':') {
+	return(1);
+    }
+    str++;
+    ret = rfc3986_parse_hier_part(uri, &str);
+    if (ret != 0) return(ret);
+    if (*str == '?') {
+	str++;
+	ret = rfc3986_parse_query(uri, &str);
+	if (ret != 0) return(ret);
+    }
+    if (*str == '#') {
+	str++;
+	ret = rfc3986_parse_fragment(uri, &str);
+	if (ret != 0) return(ret);
+    }
+    if (*str != 0) {
+	uri_clean(uri);
+	return(1);
+    }
+    return(0);
+}
+
+/**
+ * rfc3986_parse_uri_reference:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an URI reference string and fills in the appropriate fields
+ * of the @uri structure
+ *
+ * URI-reference = URI / relative-ref
+ *
+ * Returns 0 or the error code
+ */
+static int
+rfc3986_parse_uri_reference(URI *uri, const char *str) {
+    int ret;
+
+    if (str == NULL)
+	return(-1);
+    uri_clean(uri);
+
+    /*
+     * Try first to parse absolute refs, then fallback to relative if
+     * it fails.
+     */
+    ret = rfc3986_parse(uri, str);
+    if (ret != 0) {
+	uri_clean(uri);
+        ret = rfc3986_parse_relative_ref(uri, str);
+	if (ret != 0) {
+	    uri_clean(uri);
+	    return(ret);
+	}
+    }
+    return(0);
+}
+
+/**
+ * uri_parse:
+ * @str:  the URI string to analyze
+ *
+ * Parse an URI based on RFC 3986
+ *
+ * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
+ *
+ * Returns a newly built URI or NULL in case of error
+ */
+URI *
+uri_parse(const char *str) {
+    URI *uri;
+    int ret;
+
+    if (str == NULL)
+	return(NULL);
+    uri = uri_new();
+    ret = rfc3986_parse_uri_reference(uri, str);
+    if (ret) {
+        uri_free(uri);
+        return(NULL);
+    }
+    return(uri);
+}
+
+/**
+ * uri_parse_into:
+ * @uri:  pointer to an URI structure
+ * @str:  the string to analyze
+ *
+ * Parse an URI reference string based on RFC 3986 and fills in the
+ * appropriate fields of the @uri structure
+ *
+ * URI-reference = URI / relative-ref
+ *
+ * Returns 0 or the error code
+ */
+int
+uri_parse_into(URI *uri, const char *str) {
+    return(rfc3986_parse_uri_reference(uri, str));
+}
+
+/**
+ * uri_parse_raw:
+ * @str:  the URI string to analyze
+ * @raw:  if 1 unescaping of URI pieces are disabled
+ *
+ * Parse an URI but allows to keep intact the original fragments.
+ *
+ * URI-reference = URI / relative-ref
+ *
+ * Returns a newly built URI or NULL in case of error
+ */
+URI *
+uri_parse_raw(const char *str, int raw) {
+    URI *uri;
+    int ret;
+
+    if (str == NULL)
+	return(NULL);
+    uri = uri_new();
+    if (raw) {
+        uri->cleanup |= 2;
+    }
+    ret = uri_parse_into(uri, str);
+    if (ret) {
+        uri_free(uri);
+        return(NULL);
+    }
+    return(uri);
+}
+
+/************************************************************************
+ *									*
+ *			Generic URI structure functions			*
+ *									*
+ ************************************************************************/
+
+/**
+ * uri_new:
+ *
+ * Simply creates an empty URI
+ *
+ * Returns the new structure or NULL in case of error
+ */
+URI *
+uri_new(void) {
+    URI *ret;
+
+    ret = g_new0(URI, 1);
+    return(ret);
+}
+
+/**
+ * realloc2n:
+ *
+ * Function to handle properly a reallocation when saving an URI
+ * Also imposes some limit on the length of an URI string output
+ */
+static char *
+realloc2n(char *ret, int *max) {
+    char *temp;
+    int tmp;
+
+    tmp = *max * 2;
+    temp = g_realloc(ret, (tmp + 1));
+    *max = tmp;
+    return(temp);
+}
+
+/**
+ * uri_to_string:
+ * @uri:  pointer to an URI
+ *
+ * Save the URI as an escaped string
+ *
+ * Returns a new string (to be deallocated by caller)
+ */
+char *
+uri_to_string(URI *uri) {
+    char *ret = NULL;
+    char *temp;
+    const char *p;
+    int len;
+    int max;
+
+    if (uri == NULL) return(NULL);
+
+
+    max = 80;
+    ret = g_malloc(max + 1);
+    len = 0;
+
+    if (uri->scheme != NULL) {
+	p = uri->scheme;
+	while (*p != 0) {
+	    if (len >= max) {
+                temp = realloc2n(ret, &max);
+		ret = temp;
+	    }
+	    ret[len++] = *p++;
+	}
+	if (len >= max) {
+            temp = realloc2n(ret, &max);
+            ret = temp;
+	}
+	ret[len++] = ':';
+    }
+    if (uri->opaque != NULL) {
+	p = uri->opaque;
+	while (*p != 0) {
+	    if (len + 3 >= max) {
+                temp = realloc2n(ret, &max);
+                ret = temp;
+	    }
+	    if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p)))
+		ret[len++] = *p++;
+	    else {
+		int val = *(unsigned char *)p++;
+		int hi = val / 0x10, lo = val % 0x10;
+		ret[len++] = '%';
+		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
+		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
+	    }
+	}
+    } else {
+	if (uri->server != NULL) {
+	    if (len + 3 >= max) {
+                temp = realloc2n(ret, &max);
+                ret = temp;
+	    }
+	    ret[len++] = '/';
+	    ret[len++] = '/';
+	    if (uri->user != NULL) {
+		p = uri->user;
+		while (*p != 0) {
+		    if (len + 3 >= max) {
+                        temp = realloc2n(ret, &max);
+                        ret = temp;
+		    }
+		    if ((IS_UNRESERVED(*(p))) ||
+			((*(p) == ';')) || ((*(p) == ':')) ||
+			((*(p) == '&')) || ((*(p) == '=')) ||
+			((*(p) == '+')) || ((*(p) == '$')) ||
+			((*(p) == ',')))
+			ret[len++] = *p++;
+		    else {
+			int val = *(unsigned char *)p++;
+			int hi = val / 0x10, lo = val % 0x10;
+			ret[len++] = '%';
+			ret[len++] = hi + (hi > 9? 'A'-10 : '0');
+			ret[len++] = lo + (lo > 9? 'A'-10 : '0');
+		    }
+		}
+		if (len + 3 >= max) {
+                    temp = realloc2n(ret, &max);
+                    ret = temp;
+		}
+		ret[len++] = '@';
+	    }
+	    p = uri->server;
+	    while (*p != 0) {
+		if (len >= max) {
+                    temp = realloc2n(ret, &max);
+                    ret = temp;
+		}
+		ret[len++] = *p++;
+	    }
+	    if (uri->port > 0) {
+		if (len + 10 >= max) {
+                    temp = realloc2n(ret, &max);
+                    ret = temp;
+		}
+		len += snprintf(&ret[len], max - len, ":%d", uri->port);
+	    }
+	} else if (uri->authority != NULL) {
+	    if (len + 3 >= max) {
+                temp = realloc2n(ret, &max);
+                ret = temp;
+	    }
+	    ret[len++] = '/';
+	    ret[len++] = '/';
+	    p = uri->authority;
+	    while (*p != 0) {
+		if (len + 3 >= max) {
+                    temp = realloc2n(ret, &max);
+                    ret = temp;
+		}
+		if ((IS_UNRESERVED(*(p))) ||
+                    ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) ||
+                    ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) ||
+                    ((*(p) == '=')) || ((*(p) == '+')))
+		    ret[len++] = *p++;
+		else {
+		    int val = *(unsigned char *)p++;
+		    int hi = val / 0x10, lo = val % 0x10;
+		    ret[len++] = '%';
+		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
+		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
+		}
+	    }
+	} else if (uri->scheme != NULL) {
+	    if (len + 3 >= max) {
+                temp = realloc2n(ret, &max);
+                ret = temp;
+	    }
+	    ret[len++] = '/';
+	    ret[len++] = '/';
+	}
+	if (uri->path != NULL) {
+	    p = uri->path;
+	    /*
+	     * the colon in file:///d: should not be escaped or
+	     * Windows accesses fail later.
+	     */
+	    if ((uri->scheme != NULL) &&
+		(p[0] == '/') &&
+		(((p[1] >= 'a') && (p[1] <= 'z')) ||
+		 ((p[1] >= 'A') && (p[1] <= 'Z'))) &&
+		(p[2] == ':') &&
+	        (!strcmp(uri->scheme, "file"))) {
+		if (len + 3 >= max) {
+                    temp = realloc2n(ret, &max);
+                    ret = temp;
+		}
+		ret[len++] = *p++;
+		ret[len++] = *p++;
+		ret[len++] = *p++;
+	    }
+	    while (*p != 0) {
+		if (len + 3 >= max) {
+                    temp = realloc2n(ret, &max);
+                    ret = temp;
+		}
+		if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) ||
+                    ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) ||
+	            ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) ||
+	            ((*(p) == ',')))
+		    ret[len++] = *p++;
+		else {
+		    int val = *(unsigned char *)p++;
+		    int hi = val / 0x10, lo = val % 0x10;
+		    ret[len++] = '%';
+		    ret[len++] = hi + (hi > 9? 'A'-10 : '0');
+		    ret[len++] = lo + (lo > 9? 'A'-10 : '0');
+		}
+	    }
+	}
+	if (uri->query != NULL) {
+	    if (len + 1 >= max) {
+                temp = realloc2n(ret, &max);
+                ret = temp;
+	    }
+	    ret[len++] = '?';
+	    p = uri->query;
+	    while (*p != 0) {
+		if (len + 1 >= max) {
+                    temp = realloc2n(ret, &max);
+                    ret = temp;
+		}
+		ret[len++] = *p++;
+	    }
+	}
+    }
+    if (uri->fragment != NULL) {
+	if (len + 3 >= max) {
+            temp = realloc2n(ret, &max);
+            ret = temp;
+	}
+	ret[len++] = '#';
+	p = uri->fragment;
+	while (*p != 0) {
+	    if (len + 3 >= max) {
+                temp = realloc2n(ret, &max);
+                ret = temp;
+	    }
+	    if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p))))
+		ret[len++] = *p++;
+	    else {
+		int val = *(unsigned char *)p++;
+		int hi = val / 0x10, lo = val % 0x10;
+		ret[len++] = '%';
+		ret[len++] = hi + (hi > 9? 'A'-10 : '0');
+		ret[len++] = lo + (lo > 9? 'A'-10 : '0');
+	    }
+	}
+    }
+    if (len >= max) {
+        temp = realloc2n(ret, &max);
+        ret = temp;
+    }
+    ret[len] = 0;
+    return(ret);
+}
+
+/**
+ * uri_clean:
+ * @uri:  pointer to an URI
+ *
+ * Make sure the URI struct is free of content
+ */
+static void
+uri_clean(URI *uri) {
+    if (uri == NULL) return;
+
+    g_free(uri->scheme);
+    uri->scheme = NULL;
+    g_free(uri->server);
+    uri->server = NULL;
+    g_free(uri->user);
+    uri->user = NULL;
+    g_free(uri->path);
+    uri->path = NULL;
+    g_free(uri->fragment);
+    uri->fragment = NULL;
+    g_free(uri->opaque);
+    uri->opaque = NULL;
+    g_free(uri->authority);
+    uri->authority = NULL;
+    g_free(uri->query);
+    uri->query = NULL;
+}
+
+/**
+ * uri_free:
+ * @uri:  pointer to an URI
+ *
+ * Free up the URI struct
+ */
+void
+uri_free(URI *uri) {
+    uri_clean(uri);
+    g_free(uri);
+}
+
+/************************************************************************
+ *									*
+ *			Helper functions				*
+ *									*
+ ************************************************************************/
+
+/**
+ * normalize_uri_path:
+ * @path:  pointer to the path string
+ *
+ * Applies the 5 normalization steps to a path string--that is, RFC 2396
+ * Section 5.2, steps 6.c through 6.g.
+ *
+ * Normalization occurs directly on the string, no new allocation is done
+ *
+ * Returns 0 or an error code
+ */
+static int
+normalize_uri_path(char *path) {
+    char *cur, *out;
+
+    if (path == NULL)
+	return(-1);
+
+    /* Skip all initial "/" chars.  We want to get to the beginning of the
+     * first non-empty segment.
+     */
+    cur = path;
+    while (cur[0] == '/')
+      ++cur;
+    if (cur[0] == '\0')
+      return(0);
+
+    /* Keep everything we've seen so far.  */
+    out = cur;
+
+    /*
+     * Analyze each segment in sequence for cases (c) and (d).
+     */
+    while (cur[0] != '\0') {
+	/*
+	 * c) All occurrences of "./", where "." is a complete path segment,
+	 *    are removed from the buffer string.
+	 */
+	if ((cur[0] == '.') && (cur[1] == '/')) {
+	    cur += 2;
+	    /* '//' normalization should be done at this point too */
+	    while (cur[0] == '/')
+		cur++;
+	    continue;
+	}
+
+	/*
+	 * d) If the buffer string ends with "." as a complete path segment,
+	 *    that "." is removed.
+	 */
+	if ((cur[0] == '.') && (cur[1] == '\0'))
+	    break;
+
+	/* Otherwise keep the segment.  */
+	while (cur[0] != '/') {
+            if (cur[0] == '\0')
+              goto done_cd;
+	    (out++)[0] = (cur++)[0];
+	}
+	/* nomalize // */
+	while ((cur[0] == '/') && (cur[1] == '/'))
+	    cur++;
+
+        (out++)[0] = (cur++)[0];
+    }
+ done_cd:
+    out[0] = '\0';
+
+    /* Reset to the beginning of the first segment for the next sequence.  */
+    cur = path;
+    while (cur[0] == '/')
+      ++cur;
+    if (cur[0] == '\0')
+	return(0);
+
+    /*
+     * Analyze each segment in sequence for cases (e) and (f).
+     *
+     * e) All occurrences of "<segment>/../", where <segment> is a
+     *    complete path segment not equal to "..", are removed from the
+     *    buffer string.  Removal of these path segments is performed
+     *    iteratively, removing the leftmost matching pattern on each
+     *    iteration, until no matching pattern remains.
+     *
+     * f) If the buffer string ends with "<segment>/..", where <segment>
+     *    is a complete path segment not equal to "..", that
+     *    "<segment>/.." is removed.
+     *
+     * To satisfy the "iterative" clause in (e), we need to collapse the
+     * string every time we find something that needs to be removed.  Thus,
+     * we don't need to keep two pointers into the string: we only need a
+     * "current position" pointer.
+     */
+    while (1) {
+        char *segp, *tmp;
+
+        /* At the beginning of each iteration of this loop, "cur" points to
+         * the first character of the segment we want to examine.
+         */
+
+        /* Find the end of the current segment.  */
+        segp = cur;
+        while ((segp[0] != '/') && (segp[0] != '\0'))
+          ++segp;
+
+        /* If this is the last segment, we're done (we need at least two
+         * segments to meet the criteria for the (e) and (f) cases).
+         */
+        if (segp[0] == '\0')
+          break;
+
+        /* If the first segment is "..", or if the next segment _isn't_ "..",
+         * keep this segment and try the next one.
+         */
+        ++segp;
+        if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3))
+            || ((segp[0] != '.') || (segp[1] != '.')
+                || ((segp[2] != '/') && (segp[2] != '\0')))) {
+          cur = segp;
+          continue;
+        }
+
+        /* If we get here, remove this segment and the next one and back up
+         * to the previous segment (if there is one), to implement the
+         * "iteratively" clause.  It's pretty much impossible to back up
+         * while maintaining two pointers into the buffer, so just compact
+         * the whole buffer now.
+         */
+
+        /* If this is the end of the buffer, we're done.  */
+        if (segp[2] == '\0') {
+          cur[0] = '\0';
+          break;
+        }
+        /* Valgrind complained, strcpy(cur, segp + 3); */
+        /* string will overlap, do not use strcpy */
+        tmp = cur;
+        segp += 3;
+        while ((*tmp++ = *segp++) != 0)
+          ;
+
+        /* If there are no previous segments, then keep going from here.  */
+        segp = cur;
+        while ((segp > path) && ((--segp)[0] == '/'))
+          ;
+        if (segp == path)
+          continue;
+
+        /* "segp" is pointing to the end of a previous segment; find it's
+         * start.  We need to back up to the previous segment and start
+         * over with that to handle things like "foo/bar/../..".  If we
+         * don't do this, then on the first pass we'll remove the "bar/..",
+         * but be pointing at the second ".." so we won't realize we can also
+         * remove the "foo/..".
+         */
+        cur = segp;
+        while ((cur > path) && (cur[-1] != '/'))
+          --cur;
+    }
+    out[0] = '\0';
+
+    /*
+     * g) If the resulting buffer string still begins with one or more
+     *    complete path segments of "..", then the reference is
+     *    considered to be in error. Implementations may handle this
+     *    error by retaining these components in the resolved path (i.e.,
+     *    treating them as part of the final URI), by removing them from
+     *    the resolved path (i.e., discarding relative levels above the
+     *    root), or by avoiding traversal of the reference.
+     *
+     * We discard them from the final path.
+     */
+    if (path[0] == '/') {
+      cur = path;
+      while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.')
+             && ((cur[3] == '/') || (cur[3] == '\0')))
+	cur += 3;
+
+      if (cur != path) {
+	out = path;
+	while (cur[0] != '\0')
+          (out++)[0] = (cur++)[0];
+	out[0] = 0;
+      }
+    }
+
+    return(0);
+}
+
+static int is_hex(char c) {
+    if (((c >= '0') && (c <= '9')) ||
+        ((c >= 'a') && (c <= 'f')) ||
+        ((c >= 'A') && (c <= 'F')))
+	return(1);
+    return(0);
+}
+
+
+/**
+ * uri_string_unescape:
+ * @str:  the string to unescape
+ * @len:   the length in bytes to unescape (or <= 0 to indicate full string)
+ * @target:  optional destination buffer
+ *
+ * Unescaping routine, but does not check that the string is an URI. The
+ * output is a direct unsigned char translation of %XX values (no encoding)
+ * Note that the length of the result can only be smaller or same size as
+ * the input string.
+ *
+ * Returns a copy of the string, but unescaped, will return NULL only in case
+ * of error
+ */
+char *
+uri_string_unescape(const char *str, int len, char *target) {
+    char *ret, *out;
+    const char *in;
+
+    if (str == NULL)
+	return(NULL);
+    if (len <= 0) len = strlen(str);
+    if (len < 0) return(NULL);
+
+    if (target == NULL) {
+	ret = g_malloc(len + 1);
+    } else
+	ret = target;
+    in = str;
+    out = ret;
+    while(len > 0) {
+	if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) {
+	    in++;
+	    if ((*in >= '0') && (*in <= '9'))
+	        *out = (*in - '0');
+	    else if ((*in >= 'a') && (*in <= 'f'))
+	        *out = (*in - 'a') + 10;
+	    else if ((*in >= 'A') && (*in <= 'F'))
+	        *out = (*in - 'A') + 10;
+	    in++;
+	    if ((*in >= '0') && (*in <= '9'))
+	        *out = *out * 16 + (*in - '0');
+	    else if ((*in >= 'a') && (*in <= 'f'))
+	        *out = *out * 16 + (*in - 'a') + 10;
+	    else if ((*in >= 'A') && (*in <= 'F'))
+	        *out = *out * 16 + (*in - 'A') + 10;
+	    in++;
+	    len -= 3;
+	    out++;
+	} else {
+	    *out++ = *in++;
+	    len--;
+	}
+    }
+    *out = 0;
+    return(ret);
+}
+
+/**
+ * uri_string_escape:
+ * @str:  string to escape
+ * @list: exception list string of chars not to escape
+ *
+ * This routine escapes a string to hex, ignoring reserved characters (a-z)
+ * and the characters in the exception list.
+ *
+ * Returns a new escaped string or NULL in case of error.
+ */
+char *
+uri_string_escape(const char *str, const char *list) {
+    char *ret, ch;
+    char *temp;
+    const char *in;
+    int len, out;
+
+    if (str == NULL)
+	return(NULL);
+    if (str[0] == 0)
+	return(g_strdup(str));
+    len = strlen(str);
+    if (!(len > 0)) return(NULL);
+
+    len += 20;
+    ret = g_malloc(len);
+    in = str;
+    out = 0;
+    while(*in != 0) {
+	if (len - out <= 3) {
+            temp = realloc2n(ret, &len);
+	    ret = temp;
+	}
+
+	ch = *in;
+
+	if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!strchr(list, ch))) {
+	    unsigned char val;
+	    ret[out++] = '%';
+	    val = ch >> 4;
+	    if (val <= 9)
+		ret[out++] = '0' + val;
+	    else
+		ret[out++] = 'A' + val - 0xA;
+	    val = ch & 0xF;
+	    if (val <= 9)
+		ret[out++] = '0' + val;
+	    else
+		ret[out++] = 'A' + val - 0xA;
+	    in++;
+	} else {
+	    ret[out++] = *in++;
+	}
+
+    }
+    ret[out] = 0;
+    return(ret);
+}
+
+/************************************************************************
+ *									*
+ *			Public functions				*
+ *									*
+ ************************************************************************/
+
+/**
+ * uri_resolve:
+ * @URI:  the URI instance found in the document
+ * @base:  the base value
+ *
+ * Computes he final URI of the reference done by checking that
+ * the given URI is valid, and building the final URI using the
+ * base URI. This is processed according to section 5.2 of the
+ * RFC 2396
+ *
+ * 5.2. Resolving Relative References to Absolute Form
+ *
+ * Returns a new URI string (to be freed by the caller) or NULL in case
+ *         of error.
+ */
+char *
+uri_resolve(const char *uri, const char *base) {
+    char *val = NULL;
+    int ret, len, indx, cur, out;
+    URI *ref = NULL;
+    URI *bas = NULL;
+    URI *res = NULL;
+
+    /*
+     * 1) The URI reference is parsed into the potential four components and
+     *    fragment identifier, as described in Section 4.3.
+     *
+     *    NOTE that a completely empty URI is treated by modern browsers
+     *    as a reference to "." rather than as a synonym for the current
+     *    URI.  Should we do that here?
+     */
+    if (uri == NULL)
+	ret = -1;
+    else {
+	if (*uri) {
+	    ref = uri_new();
+	    ret = uri_parse_into(ref, uri);
+	}
+	else
+	    ret = 0;
+    }
+    if (ret != 0)
+	goto done;
+    if ((ref != NULL) && (ref->scheme != NULL)) {
+	/*
+	 * The URI is absolute don't modify.
+	 */
+	val = g_strdup(uri);
+	goto done;
+    }
+    if (base == NULL)
+	ret = -1;
+    else {
+	bas = uri_new();
+	ret = uri_parse_into(bas, base);
+    }
+    if (ret != 0) {
+	if (ref)
+	    val = uri_to_string(ref);
+	goto done;
+    }
+    if (ref == NULL) {
+	/*
+	 * the base fragment must be ignored
+	 */
+        g_free(bas->fragment);
+        bas->fragment = NULL;
+	val = uri_to_string(bas);
+	goto done;
+    }
+
+    /*
+     * 2) If the path component is empty and the scheme, authority, and
+     *    query components are undefined, then it is a reference to the
+     *    current document and we are done.  Otherwise, the reference URI's
+     *    query and fragment components are defined as found (or not found)
+     *    within the URI reference and not inherited from the base URI.
+     *
+     *    NOTE that in modern browsers, the parsing differs from the above
+     *    in the following aspect:  the query component is allowed to be
+     *    defined while still treating this as a reference to the current
+     *    document.
+     */
+    res = uri_new();
+    if ((ref->scheme == NULL) && (ref->path == NULL) &&
+	((ref->authority == NULL) && (ref->server == NULL))) {
+        res->scheme = g_strdup(bas->scheme);
+	if (bas->authority != NULL)
+	    res->authority = g_strdup(bas->authority);
+	else if (bas->server != NULL) {
+            res->server = g_strdup(bas->server);
+            res->user = g_strdup(bas->user);
+            res->port = bas->port;
+	}
+        res->path = g_strdup(bas->path);
+        if (ref->query != NULL) {
+	    res->query = g_strdup (ref->query);
+        } else {
+            res->query = g_strdup(bas->query);
+        }
+        res->fragment = g_strdup(ref->fragment);
+	goto step_7;
+    }
+
+    /*
+     * 3) If the scheme component is defined, indicating that the reference
+     *    starts with a scheme name, then the reference is interpreted as an
+     *    absolute URI and we are done.  Otherwise, the reference URI's
+     *    scheme is inherited from the base URI's scheme component.
+     */
+    if (ref->scheme != NULL) {
+	val = uri_to_string(ref);
+	goto done;
+    }
+    res->scheme = g_strdup(bas->scheme);
+
+    res->query = g_strdup(ref->query);
+    res->fragment = g_strdup(ref->fragment);
+
+    /*
+     * 4) If the authority component is defined, then the reference is a
+     *    network-path and we skip to step 7.  Otherwise, the reference
+     *    URI's authority is inherited from the base URI's authority
+     *    component, which will also be undefined if the URI scheme does not
+     *    use an authority component.
+     */
+    if ((ref->authority != NULL) || (ref->server != NULL)) {
+	if (ref->authority != NULL)
+	    res->authority = g_strdup(ref->authority);
+	else {
+	    res->server = g_strdup(ref->server);
+            res->user = g_strdup(ref->user);
+            res->port = ref->port;
+	}
+        res->path = g_strdup(ref->path);
+	goto step_7;
+    }
+    if (bas->authority != NULL)
+	res->authority = g_strdup(bas->authority);
+    else if (bas->server != NULL) {
+        res->server = g_strdup(bas->server);
+        res->user = g_strdup(bas->user);
+	res->port = bas->port;
+    }
+
+    /*
+     * 5) If the path component begins with a slash character ("/"), then
+     *    the reference is an absolute-path and we skip to step 7.
+     */
+    if ((ref->path != NULL) && (ref->path[0] == '/')) {
+	res->path = g_strdup(ref->path);
+	goto step_7;
+    }
+
+
+    /*
+     * 6) If this step is reached, then we are resolving a relative-path
+     *    reference.  The relative path needs to be merged with the base
+     *    URI's path.  Although there are many ways to do this, we will
+     *    describe a simple method using a separate string buffer.
+     *
+     * Allocate a buffer large enough for the result string.
+     */
+    len = 2; /* extra / and 0 */
+    if (ref->path != NULL)
+	len += strlen(ref->path);
+    if (bas->path != NULL)
+	len += strlen(bas->path);
+    res->path = g_malloc(len);
+    res->path[0] = 0;
+
+    /*
+     * a) All but the last segment of the base URI's path component is
+     *    copied to the buffer.  In other words, any characters after the
+     *    last (right-most) slash character, if any, are excluded.
+     */
+    cur = 0;
+    out = 0;
+    if (bas->path != NULL) {
+	while (bas->path[cur] != 0) {
+	    while ((bas->path[cur] != 0) && (bas->path[cur] != '/'))
+		cur++;
+	    if (bas->path[cur] == 0)
+		break;
+
+	    cur++;
+	    while (out < cur) {
+		res->path[out] = bas->path[out];
+		out++;
+	    }
+	}
+    }
+    res->path[out] = 0;
+
+    /*
+     * b) The reference's path component is appended to the buffer
+     *    string.
+     */
+    if (ref->path != NULL && ref->path[0] != 0) {
+	indx = 0;
+	/*
+	 * Ensure the path includes a '/'
+	 */
+	if ((out == 0) && (bas->server != NULL))
+	    res->path[out++] = '/';
+	while (ref->path[indx] != 0) {
+	    res->path[out++] = ref->path[indx++];
+	}
+    }
+    res->path[out] = 0;
+
+    /*
+     * Steps c) to h) are really path normalization steps
+     */
+    normalize_uri_path(res->path);
+
+step_7:
+
+    /*
+     * 7) The resulting URI components, including any inherited from the
+     *    base URI, are recombined to give the absolute form of the URI
+     *    reference.
+     */
+    val = uri_to_string(res);
+
+done:
+    if (ref != NULL)
+	uri_free(ref);
+    if (bas != NULL)
+	uri_free(bas);
+    if (res != NULL)
+	uri_free(res);
+    return(val);
+}
+
+/**
+ * uri_resolve_relative:
+ * @URI:  the URI reference under consideration
+ * @base:  the base value
+ *
+ * Expresses the URI of the reference in terms relative to the
+ * base.  Some examples of this operation include:
+ *     base = "http://site1.com/docs/book1.html"
+ *        URI input                        URI returned
+ *     docs/pic1.gif                    pic1.gif
+ *     docs/img/pic1.gif                img/pic1.gif
+ *     img/pic1.gif                     ../img/pic1.gif
+ *     http://site1.com/docs/pic1.gif   pic1.gif
+ *     http://site2.com/docs/pic1.gif   http://site2.com/docs/pic1.gif
+ *
+ *     base = "docs/book1.html"
+ *        URI input                        URI returned
+ *     docs/pic1.gif                    pic1.gif
+ *     docs/img/pic1.gif                img/pic1.gif
+ *     img/pic1.gif                     ../img/pic1.gif
+ *     http://site1.com/docs/pic1.gif   http://site1.com/docs/pic1.gif
+ *
+ *
+ * Note: if the URI reference is really weird or complicated, it may be
+ *       worthwhile to first convert it into a "nice" one by calling
+ *       uri_resolve (using 'base') before calling this routine,
+ *       since this routine (for reasonable efficiency) assumes URI has
+ *       already been through some validation.
+ *
+ * Returns a new URI string (to be freed by the caller) or NULL in case
+ * error.
+ */
+char *
+uri_resolve_relative (const char *uri, const char * base)
+{
+    char *val = NULL;
+    int ret;
+    int ix;
+    int pos = 0;
+    int nbslash = 0;
+    int len;
+    URI *ref = NULL;
+    URI *bas = NULL;
+    char *bptr, *uptr, *vptr;
+    int remove_path = 0;
+
+    if ((uri == NULL) || (*uri == 0))
+	return NULL;
+
+    /*
+     * First parse URI into a standard form
+     */
+    ref = uri_new ();
+    /* If URI not already in "relative" form */
+    if (uri[0] != '.') {
+	ret = uri_parse_into (ref, uri);
+	if (ret != 0)
+	    goto done;		/* Error in URI, return NULL */
+    } else
+	ref->path = g_strdup(uri);
+
+    /*
+     * Next parse base into the same standard form
+     */
+    if ((base == NULL) || (*base == 0)) {
+	val = g_strdup (uri);
+	goto done;
+    }
+    bas = uri_new ();
+    if (base[0] != '.') {
+	ret = uri_parse_into (bas, base);
+	if (ret != 0)
+	    goto done;		/* Error in base, return NULL */
+    } else
+	bas->path = g_strdup(base);
+
+    /*
+     * If the scheme / server on the URI differs from the base,
+     * just return the URI
+     */
+    if ((ref->scheme != NULL) &&
+	((bas->scheme == NULL) ||
+	 (strcmp (bas->scheme, ref->scheme)) ||
+	 (strcmp (bas->server, ref->server)))) {
+	val = g_strdup (uri);
+	goto done;
+    }
+    if (bas->path == ref->path ||
+        (bas->path && ref->path && !strcmp(bas->path, ref->path))) {
+	val = g_strdup("");
+	goto done;
+    }
+    if (bas->path == NULL) {
+	val = g_strdup(ref->path);
+	goto done;
+    }
+    if (ref->path == NULL) {
+        ref->path = (char *) "/";
+	remove_path = 1;
+    }
+
+    /*
+     * At this point (at last!) we can compare the two paths
+     *
+     * First we take care of the special case where either of the
+     * two path components may be missing (bug 316224)
+     */
+    if (bas->path == NULL) {
+	if (ref->path != NULL) {
+	    uptr = ref->path;
+	    if (*uptr == '/')
+		uptr++;
+	    /* exception characters from uri_to_string */
+	    val = uri_string_escape(uptr, "/;&=+$,");
+	}
+	goto done;
+    }
+    bptr = bas->path;
+    if (ref->path == NULL) {
+	for (ix = 0; bptr[ix] != 0; ix++) {
+	    if (bptr[ix] == '/')
+		nbslash++;
+	}
+	uptr = NULL;
+	len = 1;	/* this is for a string terminator only */
+    } else {
+    /*
+     * Next we compare the two strings and find where they first differ
+     */
+	if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/'))
+            pos += 2;
+	if ((*bptr == '.') && (bptr[1] == '/'))
+            bptr += 2;
+	else if ((*bptr == '/') && (ref->path[pos] != '/'))
+	    bptr++;
+	while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0))
+	    pos++;
+
+	if (bptr[pos] == ref->path[pos]) {
+	    val = g_strdup("");
+	    goto done;		/* (I can't imagine why anyone would do this) */
+	}
+
+	/*
+	 * In URI, "back up" to the last '/' encountered.  This will be the
+	 * beginning of the "unique" suffix of URI
+	 */
+	ix = pos;
+	if ((ref->path[ix] == '/') && (ix > 0))
+	    ix--;
+	else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/'))
+	    ix -= 2;
+	for (; ix > 0; ix--) {
+	    if (ref->path[ix] == '/')
+		break;
+	}
+	if (ix == 0) {
+	    uptr = ref->path;
+	} else {
+	    ix++;
+	    uptr = &ref->path[ix];
+	}
+
+	/*
+	 * In base, count the number of '/' from the differing point
+	 */
+	if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */
+	    for (; bptr[ix] != 0; ix++) {
+		if (bptr[ix] == '/')
+		    nbslash++;
+	    }
+	}
+	len = strlen (uptr) + 1;
+    }
+
+    if (nbslash == 0) {
+	if (uptr != NULL)
+	    /* exception characters from uri_to_string */
+	    val = uri_string_escape(uptr, "/;&=+$,");
+	goto done;
+    }
+
+    /*
+     * Allocate just enough space for the returned string -
+     * length of the remainder of the URI, plus enough space
+     * for the "../" groups, plus one for the terminator
+     */
+    val = g_malloc (len + 3 * nbslash);
+    vptr = val;
+    /*
+     * Put in as many "../" as needed
+     */
+    for (; nbslash>0; nbslash--) {
+	*vptr++ = '.';
+	*vptr++ = '.';
+	*vptr++ = '/';
+    }
+    /*
+     * Finish up with the end of the URI
+     */
+    if (uptr != NULL) {
+        if ((vptr > val) && (len > 0) &&
+	    (uptr[0] == '/') && (vptr[-1] == '/')) {
+	    memcpy (vptr, uptr + 1, len - 1);
+	    vptr[len - 2] = 0;
+	} else {
+	    memcpy (vptr, uptr, len);
+	    vptr[len - 1] = 0;
+	}
+    } else {
+	vptr[len - 1] = 0;
+    }
+
+    /* escape the freshly-built path */
+    vptr = val;
+	/* exception characters from uri_to_string */
+    val = uri_string_escape(vptr, "/;&=+$,");
+    g_free(vptr);
+
+done:
+    /*
+     * Free the working variables
+     */
+    if (remove_path != 0)
+        ref->path = NULL;
+    if (ref != NULL)
+	uri_free (ref);
+    if (bas != NULL)
+	uri_free (bas);
+
+    return val;
+}
+
+/*
+ * Utility functions to help parse and assemble query strings.
+ */
+
+struct QueryParams *
+query_params_new (int init_alloc)
+{
+    struct QueryParams *ps;
+
+    if (init_alloc <= 0) init_alloc = 1;
+
+    ps = g_new(QueryParams, 1);
+    ps->n = 0;
+    ps->alloc = init_alloc;
+    ps->p = g_new(QueryParam, ps->alloc);
+
+    return ps;
+}
+
+/* Ensure there is space to store at least one more parameter
+ * at the end of the set.
+ */
+static int
+query_params_append (struct QueryParams *ps,
+               const char *name, const char *value)
+{
+    if (ps->n >= ps->alloc) {
+        ps->p = g_renew(QueryParam, ps->p, ps->alloc * 2);
+        ps->alloc *= 2;
+    }
+
+    ps->p[ps->n].name = g_strdup(name);
+    ps->p[ps->n].value = g_strdup(value);
+    ps->p[ps->n].ignore = 0;
+    ps->n++;
+
+    return 0;
+}
+
+void
+query_params_free (struct QueryParams *ps)
+{
+    int i;
+
+    for (i = 0; i < ps->n; ++i) {
+        g_free (ps->p[i].name);
+        g_free (ps->p[i].value);
+    }
+    g_free (ps->p);
+    g_free (ps);
+}
+
+struct QueryParams *
+query_params_parse (const char *query)
+{
+    struct QueryParams *ps;
+    const char *end, *eq;
+
+    ps = query_params_new (0);
+    if (!query || query[0] == '\0') return ps;
+
+    while (*query) {
+        char *name = NULL, *value = NULL;
+
+        /* Find the next separator, or end of the string. */
+        end = strchr (query, '&');
+        if (!end)
+            end = strchr (query, ';');
+        if (!end)
+            end = query + strlen (query);
+
+        /* Find the first '=' character between here and end. */
+        eq = strchr (query, '=');
+        if (eq && eq >= end) eq = NULL;
+
+        /* Empty section (eg. "&&"). */
+        if (end == query)
+            goto next;
+
+        /* If there is no '=' character, then we have just "name"
+         * and consistent with CGI.pm we assume value is "".
+         */
+        else if (!eq) {
+            name = uri_string_unescape (query, end - query, NULL);
+            value = NULL;
+        }
+        /* Or if we have "name=" here (works around annoying
+         * problem when calling uri_string_unescape with len = 0).
+         */
+        else if (eq+1 == end) {
+            name = uri_string_unescape (query, eq - query, NULL);
+            value = g_new0(char, 1);
+        }
+        /* If the '=' character is at the beginning then we have
+         * "=value" and consistent with CGI.pm we _ignore_ this.
+         */
+        else if (query == eq)
+            goto next;
+
+        /* Otherwise it's "name=value". */
+        else {
+            name = uri_string_unescape (query, eq - query, NULL);
+            value = uri_string_unescape (eq+1, end - (eq+1), NULL);
+        }
+
+        /* Append to the parameter set. */
+        query_params_append (ps, name, value);
+        g_free(name);
+        g_free(value);
+
+    next:
+        query = end;
+        if (*query) query ++; /* skip '&' separator */
+    }
+
+    return ps;
+}