diff options
Diffstat (limited to 'src/util')
51 files changed, 15957 insertions, 0 deletions
diff --git a/src/util/Makefile.objs b/src/util/Makefile.objs new file mode 100644 index 0000000..89dd80e --- /dev/null +++ b/src/util/Makefile.objs @@ -0,0 +1,32 @@ +util-obj-y = osdep.o cutils.o unicode.o qemu-timer-common.o +util-obj-$(CONFIG_POSIX) += compatfd.o +util-obj-$(CONFIG_POSIX) += event_notifier-posix.o +util-obj-$(CONFIG_POSIX) += mmap-alloc.o +util-obj-$(CONFIG_POSIX) += oslib-posix.o +util-obj-$(CONFIG_POSIX) += qemu-openpty.o +util-obj-$(CONFIG_POSIX) += qemu-thread-posix.o +util-obj-$(CONFIG_WIN32) += event_notifier-win32.o +util-obj-$(CONFIG_POSIX) += memfd.o +util-obj-$(CONFIG_WIN32) += oslib-win32.o +util-obj-$(CONFIG_WIN32) += qemu-thread-win32.o +util-obj-y += envlist.o path.o module.o +util-obj-$(call lnot,$(CONFIG_INT128)) += host-utils.o +util-obj-y += bitmap.o bitops.o hbitmap.o +util-obj-y += fifo8.o +util-obj-y += acl.o +util-obj-y += error.o qemu-error.o +util-obj-y += id.o +util-obj-y += iov.o qemu-config.o qemu-sockets.o uri.o notify.o +util-obj-y += qemu-option.o qemu-progress.o +util-obj-y += hexdump.o +util-obj-y += crc32c.o +util-obj-y += throttle.o +util-obj-y += getauxval.o +util-obj-y += readline.o +util-obj-y += rfifolock.o +util-obj-y += rcu.o +util-obj-y += qemu-coroutine.o qemu-coroutine-lock.o qemu-coroutine-io.o +util-obj-y += qemu-coroutine-sleep.o +util-obj-y += coroutine-$(CONFIG_COROUTINE_BACKEND).o +util-obj-y += buffer.o +util-obj-y += timed-average.o diff --git a/src/util/acl.c b/src/util/acl.c new file mode 100644 index 0000000..571d686 --- /dev/null +++ b/src/util/acl.c @@ -0,0 +1,187 @@ +/* + * QEMU access control list management + * + * Copyright (C) 2009 Red Hat, Inc + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + + +#include "qemu-common.h" +#include "qemu/acl.h" + +#ifdef CONFIG_FNMATCH +#include <fnmatch.h> +#endif + + +static unsigned int nacls = 0; +static qemu_acl **acls = NULL; + + + +qemu_acl *qemu_acl_find(const char *aclname) +{ + int i; + for (i = 0 ; i < nacls ; i++) { + if (strcmp(acls[i]->aclname, aclname) == 0) + return acls[i]; + } + + return NULL; +} + +qemu_acl *qemu_acl_init(const char *aclname) +{ + qemu_acl *acl; + + acl = qemu_acl_find(aclname); + if (acl) + return acl; + + acl = g_malloc(sizeof(*acl)); + acl->aclname = g_strdup(aclname); + /* Deny by default, so there is no window of "open + * access" between QEMU starting, and the user setting + * up ACLs in the monitor */ + acl->defaultDeny = 1; + + acl->nentries = 0; + QTAILQ_INIT(&acl->entries); + + acls = g_realloc(acls, sizeof(*acls) * (nacls +1)); + acls[nacls] = acl; + nacls++; + + return acl; +} + +int qemu_acl_party_is_allowed(qemu_acl *acl, + const char *party) +{ + qemu_acl_entry *entry; + + QTAILQ_FOREACH(entry, &acl->entries, next) { +#ifdef CONFIG_FNMATCH + if (fnmatch(entry->match, party, 0) == 0) + return entry->deny ? 0 : 1; +#else + /* No fnmatch, so fallback to exact string matching + * instead of allowing wildcards */ + if (strcmp(entry->match, party) == 0) + return entry->deny ? 0 : 1; +#endif + } + + return acl->defaultDeny ? 0 : 1; +} + + +void qemu_acl_reset(qemu_acl *acl) +{ + qemu_acl_entry *entry, *next_entry; + + /* Put back to deny by default, so there is no window + * of "open access" while the user re-initializes the + * access control list */ + acl->defaultDeny = 1; + QTAILQ_FOREACH_SAFE(entry, &acl->entries, next, next_entry) { + QTAILQ_REMOVE(&acl->entries, entry, next); + g_free(entry->match); + g_free(entry); + } + acl->nentries = 0; +} + + +int qemu_acl_append(qemu_acl *acl, + int deny, + const char *match) +{ + qemu_acl_entry *entry; + + entry = g_malloc(sizeof(*entry)); + entry->match = g_strdup(match); + entry->deny = deny; + + QTAILQ_INSERT_TAIL(&acl->entries, entry, next); + acl->nentries++; + + return acl->nentries; +} + + +int qemu_acl_insert(qemu_acl *acl, + int deny, + const char *match, + int index) +{ + qemu_acl_entry *tmp; + int i = 0; + + if (index <= 0) + return -1; + if (index > acl->nentries) { + return qemu_acl_append(acl, deny, match); + } + + QTAILQ_FOREACH(tmp, &acl->entries, next) { + i++; + if (i == index) { + qemu_acl_entry *entry; + entry = g_malloc(sizeof(*entry)); + entry->match = g_strdup(match); + entry->deny = deny; + + QTAILQ_INSERT_BEFORE(tmp, entry, next); + acl->nentries++; + break; + } + } + + return i; +} + +int qemu_acl_remove(qemu_acl *acl, + const char *match) +{ + qemu_acl_entry *entry; + int i = 0; + + QTAILQ_FOREACH(entry, &acl->entries, next) { + i++; + if (strcmp(entry->match, match) == 0) { + QTAILQ_REMOVE(&acl->entries, entry, next); + acl->nentries--; + g_free(entry->match); + g_free(entry); + return i; + } + } + return -1; +} + + +/* + * Local variables: + * c-indent-level: 4 + * c-basic-offset: 4 + * tab-width: 8 + * End: + */ diff --git a/src/util/bitmap.c b/src/util/bitmap.c new file mode 100644 index 0000000..44f0f48 --- /dev/null +++ b/src/util/bitmap.c @@ -0,0 +1,339 @@ +/* + * Bitmap Module + * + * Stolen from linux/src/lib/bitmap.c + * + * Copyright (C) 2010 Corentin Chary + * + * This source code is licensed under the GNU General Public License, + * Version 2. + */ + +#include "qemu/bitops.h" +#include "qemu/bitmap.h" +#include "qemu/atomic.h" + +/* + * bitmaps provide an array of bits, implemented using an + * array of unsigned longs. The number of valid bits in a + * given bitmap does _not_ need to be an exact multiple of + * BITS_PER_LONG. + * + * The possible unused bits in the last, partially used word + * of a bitmap are 'don't care'. The implementation makes + * no particular effort to keep them zero. It ensures that + * their value will not affect the results of any operation. + * The bitmap operations that return Boolean (bitmap_empty, + * for example) or scalar (bitmap_weight, for example) results + * carefully filter out these unused bits from impacting their + * results. + * + * These operations actually hold to a slightly stronger rule: + * if you don't input any bitmaps to these ops that have some + * unused bits set, then they won't output any set unused bits + * in output bitmaps. + * + * The byte ordering of bitmaps is more natural on little + * endian architectures. + */ + +int slow_bitmap_empty(const unsigned long *bitmap, long bits) +{ + long k, lim = bits/BITS_PER_LONG; + + for (k = 0; k < lim; ++k) { + if (bitmap[k]) { + return 0; + } + } + if (bits % BITS_PER_LONG) { + if (bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) { + return 0; + } + } + + return 1; +} + +int slow_bitmap_full(const unsigned long *bitmap, long bits) +{ + long k, lim = bits/BITS_PER_LONG; + + for (k = 0; k < lim; ++k) { + if (~bitmap[k]) { + return 0; + } + } + + if (bits % BITS_PER_LONG) { + if (~bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) { + return 0; + } + } + + return 1; +} + +int slow_bitmap_equal(const unsigned long *bitmap1, + const unsigned long *bitmap2, long bits) +{ + long k, lim = bits/BITS_PER_LONG; + + for (k = 0; k < lim; ++k) { + if (bitmap1[k] != bitmap2[k]) { + return 0; + } + } + + if (bits % BITS_PER_LONG) { + if ((bitmap1[k] ^ bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) { + return 0; + } + } + + return 1; +} + +void slow_bitmap_complement(unsigned long *dst, const unsigned long *src, + long bits) +{ + long k, lim = bits/BITS_PER_LONG; + + for (k = 0; k < lim; ++k) { + dst[k] = ~src[k]; + } + + if (bits % BITS_PER_LONG) { + dst[k] = ~src[k] & BITMAP_LAST_WORD_MASK(bits); + } +} + +int slow_bitmap_and(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, long bits) +{ + long k; + long nr = BITS_TO_LONGS(bits); + unsigned long result = 0; + + for (k = 0; k < nr; k++) { + result |= (dst[k] = bitmap1[k] & bitmap2[k]); + } + return result != 0; +} + +void slow_bitmap_or(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, long bits) +{ + long k; + long nr = BITS_TO_LONGS(bits); + + for (k = 0; k < nr; k++) { + dst[k] = bitmap1[k] | bitmap2[k]; + } +} + +void slow_bitmap_xor(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, long bits) +{ + long k; + long nr = BITS_TO_LONGS(bits); + + for (k = 0; k < nr; k++) { + dst[k] = bitmap1[k] ^ bitmap2[k]; + } +} + +int slow_bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, long bits) +{ + long k; + long nr = BITS_TO_LONGS(bits); + unsigned long result = 0; + + for (k = 0; k < nr; k++) { + result |= (dst[k] = bitmap1[k] & ~bitmap2[k]); + } + return result != 0; +} + +#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG)) + +void bitmap_set(unsigned long *map, long start, long nr) +{ + unsigned long *p = map + BIT_WORD(start); + const long size = start + nr; + int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG); + unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start); + + while (nr - bits_to_set >= 0) { + *p |= mask_to_set; + nr -= bits_to_set; + bits_to_set = BITS_PER_LONG; + mask_to_set = ~0UL; + p++; + } + if (nr) { + mask_to_set &= BITMAP_LAST_WORD_MASK(size); + *p |= mask_to_set; + } +} + +void bitmap_set_atomic(unsigned long *map, long start, long nr) +{ + unsigned long *p = map + BIT_WORD(start); + const long size = start + nr; + int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG); + unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start); + + /* First word */ + if (nr - bits_to_set > 0) { + atomic_or(p, mask_to_set); + nr -= bits_to_set; + bits_to_set = BITS_PER_LONG; + mask_to_set = ~0UL; + p++; + } + + /* Full words */ + if (bits_to_set == BITS_PER_LONG) { + while (nr >= BITS_PER_LONG) { + *p = ~0UL; + nr -= BITS_PER_LONG; + p++; + } + } + + /* Last word */ + if (nr) { + mask_to_set &= BITMAP_LAST_WORD_MASK(size); + atomic_or(p, mask_to_set); + } else { + /* If we avoided the full barrier in atomic_or(), issue a + * barrier to account for the assignments in the while loop. + */ + smp_mb(); + } +} + +void bitmap_clear(unsigned long *map, long start, long nr) +{ + unsigned long *p = map + BIT_WORD(start); + const long size = start + nr; + int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); + unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start); + + while (nr - bits_to_clear >= 0) { + *p &= ~mask_to_clear; + nr -= bits_to_clear; + bits_to_clear = BITS_PER_LONG; + mask_to_clear = ~0UL; + p++; + } + if (nr) { + mask_to_clear &= BITMAP_LAST_WORD_MASK(size); + *p &= ~mask_to_clear; + } +} + +bool bitmap_test_and_clear_atomic(unsigned long *map, long start, long nr) +{ + unsigned long *p = map + BIT_WORD(start); + const long size = start + nr; + int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); + unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start); + unsigned long dirty = 0; + unsigned long old_bits; + + /* First word */ + if (nr - bits_to_clear > 0) { + old_bits = atomic_fetch_and(p, ~mask_to_clear); + dirty |= old_bits & mask_to_clear; + nr -= bits_to_clear; + bits_to_clear = BITS_PER_LONG; + mask_to_clear = ~0UL; + p++; + } + + /* Full words */ + if (bits_to_clear == BITS_PER_LONG) { + while (nr >= BITS_PER_LONG) { + if (*p) { + old_bits = atomic_xchg(p, 0); + dirty |= old_bits; + } + nr -= BITS_PER_LONG; + p++; + } + } + + /* Last word */ + if (nr) { + mask_to_clear &= BITMAP_LAST_WORD_MASK(size); + old_bits = atomic_fetch_and(p, ~mask_to_clear); + dirty |= old_bits & mask_to_clear; + } else { + if (!dirty) { + smp_mb(); + } + } + + return dirty != 0; +} + +#define ALIGN_MASK(x,mask) (((x)+(mask))&~(mask)) + +/** + * bitmap_find_next_zero_area - find a contiguous aligned zero area + * @map: The address to base the search on + * @size: The bitmap size in bits + * @start: The bitnumber to start searching at + * @nr: The number of zeroed bits we're looking for + * @align_mask: Alignment mask for zero area + * + * The @align_mask should be one less than a power of 2; the effect is that + * the bit offset of all zero areas this function finds is multiples of that + * power of 2. A @align_mask of 0 means no alignment is required. + */ +unsigned long bitmap_find_next_zero_area(unsigned long *map, + unsigned long size, + unsigned long start, + unsigned long nr, + unsigned long align_mask) +{ + unsigned long index, end, i; +again: + index = find_next_zero_bit(map, size, start); + + /* Align allocation */ + index = ALIGN_MASK(index, align_mask); + + end = index + nr; + if (end > size) { + return end; + } + i = find_next_bit(map, end, index); + if (i < end) { + start = i + 1; + goto again; + } + return index; +} + +int slow_bitmap_intersects(const unsigned long *bitmap1, + const unsigned long *bitmap2, long bits) +{ + long k, lim = bits/BITS_PER_LONG; + + for (k = 0; k < lim; ++k) { + if (bitmap1[k] & bitmap2[k]) { + return 1; + } + } + + if (bits % BITS_PER_LONG) { + if ((bitmap1[k] & bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) { + return 1; + } + } + return 0; +} diff --git a/src/util/bitops.c b/src/util/bitops.c new file mode 100644 index 0000000..227c38b --- /dev/null +++ b/src/util/bitops.c @@ -0,0 +1,158 @@ +/* + * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * Copyright (C) 2008 IBM Corporation + * Written by Rusty Russell <rusty@rustcorp.com.au> + * (Inspired by David Howell's find_next_bit implementation) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include "qemu/bitops.h" + +#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) + +/* + * Find the next set bit in a memory region. + */ +unsigned long find_next_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + const unsigned long *p = addr + BITOP_WORD(offset); + unsigned long result = offset & ~(BITS_PER_LONG-1); + unsigned long tmp; + + if (offset >= size) { + return size; + } + size -= result; + offset %= BITS_PER_LONG; + if (offset) { + tmp = *(p++); + tmp &= (~0UL << offset); + if (size < BITS_PER_LONG) { + goto found_first; + } + if (tmp) { + goto found_middle; + } + size -= BITS_PER_LONG; + result += BITS_PER_LONG; + } + while (size >= 4*BITS_PER_LONG) { + unsigned long d1, d2, d3; + tmp = *p; + d1 = *(p+1); + d2 = *(p+2); + d3 = *(p+3); + if (tmp) { + goto found_middle; + } + if (d1 | d2 | d3) { + break; + } + p += 4; + result += 4*BITS_PER_LONG; + size -= 4*BITS_PER_LONG; + } + while (size >= BITS_PER_LONG) { + if ((tmp = *(p++))) { + goto found_middle; + } + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) { + return result; + } + tmp = *p; + +found_first: + tmp &= (~0UL >> (BITS_PER_LONG - size)); + if (tmp == 0UL) { /* Are any bits set? */ + return result + size; /* Nope. */ + } +found_middle: + return result + ctzl(tmp); +} + +/* + * This implementation of find_{first,next}_zero_bit was stolen from + * Linus' asm-alpha/bitops.h. + */ +unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + const unsigned long *p = addr + BITOP_WORD(offset); + unsigned long result = offset & ~(BITS_PER_LONG-1); + unsigned long tmp; + + if (offset >= size) { + return size; + } + size -= result; + offset %= BITS_PER_LONG; + if (offset) { + tmp = *(p++); + tmp |= ~0UL >> (BITS_PER_LONG - offset); + if (size < BITS_PER_LONG) { + goto found_first; + } + if (~tmp) { + goto found_middle; + } + size -= BITS_PER_LONG; + result += BITS_PER_LONG; + } + while (size & ~(BITS_PER_LONG-1)) { + if (~(tmp = *(p++))) { + goto found_middle; + } + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) { + return result; + } + tmp = *p; + +found_first: + tmp |= ~0UL << size; + if (tmp == ~0UL) { /* Are any bits zero? */ + return result + size; /* Nope. */ + } +found_middle: + return result + ctzl(~tmp); +} + +unsigned long find_last_bit(const unsigned long *addr, unsigned long size) +{ + unsigned long words; + unsigned long tmp; + + /* Start at final word. */ + words = size / BITS_PER_LONG; + + /* Partial final word? */ + if (size & (BITS_PER_LONG-1)) { + tmp = (addr[words] & (~0UL >> (BITS_PER_LONG + - (size & (BITS_PER_LONG-1))))); + if (tmp) { + goto found; + } + } + + while (words) { + tmp = addr[--words]; + if (tmp) { + found: + return words * BITS_PER_LONG + BITS_PER_LONG - 1 - clzl(tmp); + } + } + + /* Not found */ + return size; +} diff --git a/src/util/buffer.c b/src/util/buffer.c new file mode 100644 index 0000000..8b27c08 --- /dev/null +++ b/src/util/buffer.c @@ -0,0 +1,171 @@ +/* + * QEMU generic buffers + * + * Copyright (c) 2015 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + * + */ + +#include "qemu/buffer.h" +#include "trace.h" + +#define BUFFER_MIN_INIT_SIZE 4096 +#define BUFFER_MIN_SHRINK_SIZE 65536 + +/* define the factor alpha for the expentional smoothing + * that is used in the average size calculation. a shift + * of 7 results in an alpha of 1/2^7. */ +#define BUFFER_AVG_SIZE_SHIFT 7 + +static size_t buffer_req_size(Buffer *buffer, size_t len) +{ + return MAX(BUFFER_MIN_INIT_SIZE, + pow2ceil(buffer->offset + len)); +} + +static void buffer_adj_size(Buffer *buffer, size_t len) +{ + size_t old = buffer->capacity; + buffer->capacity = buffer_req_size(buffer, len); + buffer->buffer = g_realloc(buffer->buffer, buffer->capacity); + trace_buffer_resize(buffer->name ?: "unnamed", + old, buffer->capacity); + + /* make it even harder for the buffer to shrink, reset average size + * to currenty capacity if it is larger than the average. */ + buffer->avg_size = MAX(buffer->avg_size, + buffer->capacity << BUFFER_AVG_SIZE_SHIFT); +} + +void buffer_init(Buffer *buffer, const char *name, ...) +{ + va_list ap; + + va_start(ap, name); + buffer->name = g_strdup_vprintf(name, ap); + va_end(ap); +} + +static uint64_t buffer_get_avg_size(Buffer *buffer) +{ + return buffer->avg_size >> BUFFER_AVG_SIZE_SHIFT; +} + +void buffer_shrink(Buffer *buffer) +{ + size_t new; + + /* Calculate the average size of the buffer as + * avg_size = avg_size * ( 1 - a ) + required_size * a + * where a is 1 / 2 ^ BUFFER_AVG_SIZE_SHIFT. */ + buffer->avg_size *= (1 << BUFFER_AVG_SIZE_SHIFT) - 1; + buffer->avg_size >>= BUFFER_AVG_SIZE_SHIFT; + buffer->avg_size += buffer_req_size(buffer, 0); + + /* And then only shrink if the average size of the buffer is much + * too big, to avoid bumping up & down the buffers all the time. + * realloc() isn't exactly cheap ... */ + new = buffer_req_size(buffer, buffer_get_avg_size(buffer)); + if (new < buffer->capacity >> 3 && + new >= BUFFER_MIN_SHRINK_SIZE) { + buffer_adj_size(buffer, buffer_get_avg_size(buffer)); + } + + buffer_adj_size(buffer, 0); +} + +void buffer_reserve(Buffer *buffer, size_t len) +{ + if ((buffer->capacity - buffer->offset) < len) { + buffer_adj_size(buffer, len); + } +} + +gboolean buffer_empty(Buffer *buffer) +{ + return buffer->offset == 0; +} + +uint8_t *buffer_end(Buffer *buffer) +{ + return buffer->buffer + buffer->offset; +} + +void buffer_reset(Buffer *buffer) +{ + buffer->offset = 0; + buffer_shrink(buffer); +} + +void buffer_free(Buffer *buffer) +{ + trace_buffer_free(buffer->name ?: "unnamed", buffer->capacity); + g_free(buffer->buffer); + g_free(buffer->name); + buffer->offset = 0; + buffer->capacity = 0; + buffer->buffer = NULL; + buffer->name = NULL; +} + +void buffer_append(Buffer *buffer, const void *data, size_t len) +{ + memcpy(buffer->buffer + buffer->offset, data, len); + buffer->offset += len; +} + +void buffer_advance(Buffer *buffer, size_t len) +{ + memmove(buffer->buffer, buffer->buffer + len, + (buffer->offset - len)); + buffer->offset -= len; + buffer_shrink(buffer); +} + +void buffer_move_empty(Buffer *to, Buffer *from) +{ + trace_buffer_move_empty(to->name ?: "unnamed", + from->offset, + from->name ?: "unnamed"); + assert(to->offset == 0); + + g_free(to->buffer); + to->offset = from->offset; + to->capacity = from->capacity; + to->buffer = from->buffer; + + from->offset = 0; + from->capacity = 0; + from->buffer = NULL; +} + +void buffer_move(Buffer *to, Buffer *from) +{ + if (to->offset == 0) { + buffer_move_empty(to, from); + return; + } + + trace_buffer_move(to->name ?: "unnamed", + from->offset, + from->name ?: "unnamed"); + buffer_reserve(to, from->offset); + buffer_append(to, from->buffer, from->offset); + + g_free(from->buffer); + from->offset = 0; + from->capacity = 0; + from->buffer = NULL; +} diff --git a/src/util/compatfd.c b/src/util/compatfd.c new file mode 100644 index 0000000..e857150 --- /dev/null +++ b/src/util/compatfd.c @@ -0,0 +1,110 @@ +/* + * signalfd/eventfd compatibility + * + * Copyright IBM, Corp. 2008 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#include "qemu-common.h" +#include "qemu/compatfd.h" +#include "qemu/thread.h" + +#include <sys/syscall.h> + +struct sigfd_compat_info +{ + sigset_t mask; + int fd; +}; + +static void *sigwait_compat(void *opaque) +{ + struct sigfd_compat_info *info = opaque; + + while (1) { + int sig; + int err; + + err = sigwait(&info->mask, &sig); + if (err != 0) { + if (errno == EINTR) { + continue; + } else { + return NULL; + } + } else { + struct qemu_signalfd_siginfo buffer; + size_t offset = 0; + + memset(&buffer, 0, sizeof(buffer)); + buffer.ssi_signo = sig; + + while (offset < sizeof(buffer)) { + ssize_t len; + + len = write(info->fd, (char *)&buffer + offset, + sizeof(buffer) - offset); + if (len == -1 && errno == EINTR) + continue; + + if (len <= 0) { + return NULL; + } + + offset += len; + } + } + } +} + +static int qemu_signalfd_compat(const sigset_t *mask) +{ + struct sigfd_compat_info *info; + QemuThread thread; + int fds[2]; + + info = malloc(sizeof(*info)); + if (info == NULL) { + errno = ENOMEM; + return -1; + } + + if (pipe(fds) == -1) { + free(info); + return -1; + } + + qemu_set_cloexec(fds[0]); + qemu_set_cloexec(fds[1]); + + memcpy(&info->mask, mask, sizeof(*mask)); + info->fd = fds[1]; + + qemu_thread_create(&thread, "signalfd_compat", sigwait_compat, info, + QEMU_THREAD_DETACHED); + + return fds[0]; +} + +int qemu_signalfd(const sigset_t *mask) +{ +#if defined(CONFIG_SIGNALFD) + int ret; + + ret = syscall(SYS_signalfd, -1, mask, _NSIG / 8); + if (ret != -1) { + qemu_set_cloexec(ret); + return ret; + } +#endif + + return qemu_signalfd_compat(mask); +} diff --git a/src/util/coroutine-gthread.c b/src/util/coroutine-gthread.c new file mode 100644 index 0000000..0bcd778 --- /dev/null +++ b/src/util/coroutine-gthread.c @@ -0,0 +1,198 @@ +/* + * GThread coroutine initialization code + * + * Copyright (C) 2006 Anthony Liguori <anthony@codemonkey.ws> + * Copyright (C) 2011 Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.0 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include <glib.h> +#include "qemu-common.h" +#include "qemu/coroutine_int.h" + +typedef struct { + Coroutine base; + GThread *thread; + bool runnable; + bool free_on_thread_exit; + CoroutineAction action; +} CoroutineGThread; + +static CompatGMutex coroutine_lock; +static CompatGCond coroutine_cond; + +/* GLib 2.31 and beyond deprecated various parts of the thread API, + * but the new interfaces are not available in older GLib versions + * so we have to cope with both. + */ +#if GLIB_CHECK_VERSION(2, 31, 0) +/* Awkwardly, the GPrivate API doesn't provide a way to update the + * GDestroyNotify handler for the coroutine key dynamically. So instead + * we track whether or not the CoroutineGThread should be freed on + * thread exit / coroutine key update using the free_on_thread_exit + * field. + */ +static void coroutine_destroy_notify(gpointer data) +{ + CoroutineGThread *co = data; + if (co && co->free_on_thread_exit) { + g_free(co); + } +} + +static GPrivate coroutine_key = G_PRIVATE_INIT(coroutine_destroy_notify); + +static inline CoroutineGThread *get_coroutine_key(void) +{ + return g_private_get(&coroutine_key); +} + +static inline void set_coroutine_key(CoroutineGThread *co, + bool free_on_thread_exit) +{ + /* Unlike g_static_private_set() this does not call the GDestroyNotify + * if the previous value of the key was NULL. Fortunately we only need + * the GDestroyNotify in the non-NULL key case. + */ + co->free_on_thread_exit = free_on_thread_exit; + g_private_replace(&coroutine_key, co); +} + +static inline GThread *create_thread(GThreadFunc func, gpointer data) +{ + return g_thread_new("coroutine", func, data); +} + +#else + +/* Handle older GLib versions */ + +static GStaticPrivate coroutine_key = G_STATIC_PRIVATE_INIT; + +static inline CoroutineGThread *get_coroutine_key(void) +{ + return g_static_private_get(&coroutine_key); +} + +static inline void set_coroutine_key(CoroutineGThread *co, + bool free_on_thread_exit) +{ + g_static_private_set(&coroutine_key, co, + free_on_thread_exit ? (GDestroyNotify)g_free : NULL); +} + +static inline GThread *create_thread(GThreadFunc func, gpointer data) +{ + return g_thread_create_full(func, data, 0, TRUE, TRUE, + G_THREAD_PRIORITY_NORMAL, NULL); +} + +#endif + + +static void __attribute__((constructor)) coroutine_init(void) +{ +#if !GLIB_CHECK_VERSION(2, 31, 0) + if (!g_thread_supported()) { + g_thread_init(NULL); + } +#endif +} + +static void coroutine_wait_runnable_locked(CoroutineGThread *co) +{ + while (!co->runnable) { + g_cond_wait(&coroutine_cond, &coroutine_lock); + } +} + +static void coroutine_wait_runnable(CoroutineGThread *co) +{ + g_mutex_lock(&coroutine_lock); + coroutine_wait_runnable_locked(co); + g_mutex_unlock(&coroutine_lock); +} + +static gpointer coroutine_thread(gpointer opaque) +{ + CoroutineGThread *co = opaque; + + set_coroutine_key(co, false); + coroutine_wait_runnable(co); + co->base.entry(co->base.entry_arg); + qemu_coroutine_switch(&co->base, co->base.caller, COROUTINE_TERMINATE); + return NULL; +} + +Coroutine *qemu_coroutine_new(void) +{ + CoroutineGThread *co; + + co = g_malloc0(sizeof(*co)); + co->thread = create_thread(coroutine_thread, co); + if (!co->thread) { + g_free(co); + return NULL; + } + return &co->base; +} + +void qemu_coroutine_delete(Coroutine *co_) +{ + CoroutineGThread *co = DO_UPCAST(CoroutineGThread, base, co_); + + g_thread_join(co->thread); + g_free(co); +} + +CoroutineAction qemu_coroutine_switch(Coroutine *from_, + Coroutine *to_, + CoroutineAction action) +{ + CoroutineGThread *from = DO_UPCAST(CoroutineGThread, base, from_); + CoroutineGThread *to = DO_UPCAST(CoroutineGThread, base, to_); + + g_mutex_lock(&coroutine_lock); + from->runnable = false; + from->action = action; + to->runnable = true; + to->action = action; + g_cond_broadcast(&coroutine_cond); + + if (action != COROUTINE_TERMINATE) { + coroutine_wait_runnable_locked(from); + } + g_mutex_unlock(&coroutine_lock); + return from->action; +} + +Coroutine *qemu_coroutine_self(void) +{ + CoroutineGThread *co = get_coroutine_key(); + if (!co) { + co = g_malloc0(sizeof(*co)); + co->runnable = true; + set_coroutine_key(co, true); + } + + return &co->base; +} + +bool qemu_in_coroutine(void) +{ + CoroutineGThread *co = get_coroutine_key(); + + return co && co->base.caller; +} diff --git a/src/util/coroutine-sigaltstack.c b/src/util/coroutine-sigaltstack.c new file mode 100644 index 0000000..39842a4 --- /dev/null +++ b/src/util/coroutine-sigaltstack.c @@ -0,0 +1,293 @@ +/* + * sigaltstack coroutine initialization code + * + * Copyright (C) 2006 Anthony Liguori <anthony@codemonkey.ws> + * Copyright (C) 2011 Kevin Wolf <kwolf@redhat.com> + * Copyright (C) 2012 Alex Barcelo <abarcelo@ac.upc.edu> +** This file is partly based on pth_mctx.c, from the GNU Portable Threads +** Copyright (c) 1999-2006 Ralf S. Engelschall <rse@engelschall.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* XXX Is there a nicer way to disable glibc's stack check for longjmp? */ +#ifdef _FORTIFY_SOURCE +#undef _FORTIFY_SOURCE +#endif +#include <stdlib.h> +#include <setjmp.h> +#include <stdint.h> +#include <pthread.h> +#include <signal.h> +#include "qemu-common.h" +#include "qemu/coroutine_int.h" + +typedef struct { + Coroutine base; + void *stack; + sigjmp_buf env; +} CoroutineUContext; + +/** + * Per-thread coroutine bookkeeping + */ +typedef struct { + /** Currently executing coroutine */ + Coroutine *current; + + /** The default coroutine */ + CoroutineUContext leader; + + /** Information for the signal handler (trampoline) */ + sigjmp_buf tr_reenter; + volatile sig_atomic_t tr_called; + void *tr_handler; +} CoroutineThreadState; + +static pthread_key_t thread_state_key; + +static CoroutineThreadState *coroutine_get_thread_state(void) +{ + CoroutineThreadState *s = pthread_getspecific(thread_state_key); + + if (!s) { + s = g_malloc0(sizeof(*s)); + s->current = &s->leader.base; + pthread_setspecific(thread_state_key, s); + } + return s; +} + +static void qemu_coroutine_thread_cleanup(void *opaque) +{ + CoroutineThreadState *s = opaque; + + g_free(s); +} + +static void __attribute__((constructor)) coroutine_init(void) +{ + int ret; + + ret = pthread_key_create(&thread_state_key, qemu_coroutine_thread_cleanup); + if (ret != 0) { + fprintf(stderr, "unable to create leader key: %s\n", strerror(errno)); + abort(); + } +} + +/* "boot" function + * This is what starts the coroutine, is called from the trampoline + * (from the signal handler when it is not signal handling, read ahead + * for more information). + */ +static void coroutine_bootstrap(CoroutineUContext *self, Coroutine *co) +{ + /* Initialize longjmp environment and switch back the caller */ + if (!sigsetjmp(self->env, 0)) { + siglongjmp(*(sigjmp_buf *)co->entry_arg, 1); + } + + while (true) { + co->entry(co->entry_arg); + qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE); + } +} + +/* + * This is used as the signal handler. This is called with the brand new stack + * (thanks to sigaltstack). We have to return, given that this is a signal + * handler and the sigmask and some other things are changed. + */ +static void coroutine_trampoline(int signal) +{ + CoroutineUContext *self; + Coroutine *co; + CoroutineThreadState *coTS; + + /* Get the thread specific information */ + coTS = coroutine_get_thread_state(); + self = coTS->tr_handler; + coTS->tr_called = 1; + co = &self->base; + + /* + * Here we have to do a bit of a ping pong between the caller, given that + * this is a signal handler and we have to do a return "soon". Then the + * caller can reestablish everything and do a siglongjmp here again. + */ + if (!sigsetjmp(coTS->tr_reenter, 0)) { + return; + } + + /* + * Ok, the caller has siglongjmp'ed back to us, so now prepare + * us for the real machine state switching. We have to jump + * into another function here to get a new stack context for + * the auto variables (which have to be auto-variables + * because the start of the thread happens later). Else with + * PIC (i.e. Position Independent Code which is used when PTH + * is built as a shared library) most platforms would + * horrible core dump as experience showed. + */ + coroutine_bootstrap(self, co); +} + +Coroutine *qemu_coroutine_new(void) +{ + const size_t stack_size = 1 << 20; + CoroutineUContext *co; + CoroutineThreadState *coTS; + struct sigaction sa; + struct sigaction osa; + stack_t ss; + stack_t oss; + sigset_t sigs; + sigset_t osigs; + sigjmp_buf old_env; + + /* The way to manipulate stack is with the sigaltstack function. We + * prepare a stack, with it delivering a signal to ourselves and then + * put sigsetjmp/siglongjmp where needed. + * This has been done keeping coroutine-ucontext as a model and with the + * pth ideas (GNU Portable Threads). See coroutine-ucontext for the basics + * of the coroutines and see pth_mctx.c (from the pth project) for the + * sigaltstack way of manipulating stacks. + */ + + co = g_malloc0(sizeof(*co)); + co->stack = g_malloc(stack_size); + co->base.entry_arg = &old_env; /* stash away our jmp_buf */ + + coTS = coroutine_get_thread_state(); + coTS->tr_handler = co; + + /* + * Preserve the SIGUSR2 signal state, block SIGUSR2, + * and establish our signal handler. The signal will + * later transfer control onto the signal stack. + */ + sigemptyset(&sigs); + sigaddset(&sigs, SIGUSR2); + pthread_sigmask(SIG_BLOCK, &sigs, &osigs); + sa.sa_handler = coroutine_trampoline; + sigfillset(&sa.sa_mask); + sa.sa_flags = SA_ONSTACK; + if (sigaction(SIGUSR2, &sa, &osa) != 0) { + abort(); + } + + /* + * Set the new stack. + */ + ss.ss_sp = co->stack; + ss.ss_size = stack_size; + ss.ss_flags = 0; + if (sigaltstack(&ss, &oss) < 0) { + abort(); + } + + /* + * Now transfer control onto the signal stack and set it up. + * It will return immediately via "return" after the sigsetjmp() + * was performed. Be careful here with race conditions. The + * signal can be delivered the first time sigsuspend() is + * called. + */ + coTS->tr_called = 0; + pthread_kill(pthread_self(), SIGUSR2); + sigfillset(&sigs); + sigdelset(&sigs, SIGUSR2); + while (!coTS->tr_called) { + sigsuspend(&sigs); + } + + /* + * Inform the system that we are back off the signal stack by + * removing the alternative signal stack. Be careful here: It + * first has to be disabled, before it can be removed. + */ + sigaltstack(NULL, &ss); + ss.ss_flags = SS_DISABLE; + if (sigaltstack(&ss, NULL) < 0) { + abort(); + } + sigaltstack(NULL, &ss); + if (!(oss.ss_flags & SS_DISABLE)) { + sigaltstack(&oss, NULL); + } + + /* + * Restore the old SIGUSR2 signal handler and mask + */ + sigaction(SIGUSR2, &osa, NULL); + pthread_sigmask(SIG_SETMASK, &osigs, NULL); + + /* + * Now enter the trampoline again, but this time not as a signal + * handler. Instead we jump into it directly. The functionally + * redundant ping-pong pointer arithmetic is necessary to avoid + * type-conversion warnings related to the `volatile' qualifier and + * the fact that `jmp_buf' usually is an array type. + */ + if (!sigsetjmp(old_env, 0)) { + siglongjmp(coTS->tr_reenter, 1); + } + + /* + * Ok, we returned again, so now we're finished + */ + + return &co->base; +} + +void qemu_coroutine_delete(Coroutine *co_) +{ + CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_); + + g_free(co->stack); + g_free(co); +} + +CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, + CoroutineAction action) +{ + CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_); + CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_); + CoroutineThreadState *s = coroutine_get_thread_state(); + int ret; + + s->current = to_; + + ret = sigsetjmp(from->env, 0); + if (ret == 0) { + siglongjmp(to->env, action); + } + return ret; +} + +Coroutine *qemu_coroutine_self(void) +{ + CoroutineThreadState *s = coroutine_get_thread_state(); + + return s->current; +} + +bool qemu_in_coroutine(void) +{ + CoroutineThreadState *s = pthread_getspecific(thread_state_key); + + return s && s->current->caller; +} + diff --git a/src/util/coroutine-ucontext.c b/src/util/coroutine-ucontext.c new file mode 100644 index 0000000..26cbebb --- /dev/null +++ b/src/util/coroutine-ucontext.c @@ -0,0 +1,194 @@ +/* + * ucontext coroutine initialization code + * + * Copyright (C) 2006 Anthony Liguori <anthony@codemonkey.ws> + * Copyright (C) 2011 Kevin Wolf <kwolf@redhat.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.0 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* XXX Is there a nicer way to disable glibc's stack check for longjmp? */ +#ifdef _FORTIFY_SOURCE +#undef _FORTIFY_SOURCE +#endif +#include <stdlib.h> +#include <setjmp.h> +#include <stdint.h> +#include <ucontext.h> +#include "qemu-common.h" +#include "qemu/coroutine_int.h" + +#ifdef CONFIG_VALGRIND_H +#include <valgrind/valgrind.h> +#endif + +typedef struct { + Coroutine base; + void *stack; + sigjmp_buf env; + +#ifdef CONFIG_VALGRIND_H + unsigned int valgrind_stack_id; +#endif + +} CoroutineUContext; + +/** + * Per-thread coroutine bookkeeping + */ +static __thread CoroutineUContext leader; +static __thread Coroutine *current; + +/* + * va_args to makecontext() must be type 'int', so passing + * the pointer we need may require several int args. This + * union is a quick hack to let us do that + */ +union cc_arg { + void *p; + int i[2]; +}; + +static void coroutine_trampoline(int i0, int i1) +{ + union cc_arg arg; + CoroutineUContext *self; + Coroutine *co; + + arg.i[0] = i0; + arg.i[1] = i1; + self = arg.p; + co = &self->base; + + /* Initialize longjmp environment and switch back the caller */ + if (!sigsetjmp(self->env, 0)) { + siglongjmp(*(sigjmp_buf *)co->entry_arg, 1); + } + + while (true) { + co->entry(co->entry_arg); + qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE); + } +} + +Coroutine *qemu_coroutine_new(void) +{ + const size_t stack_size = 1 << 20; + CoroutineUContext *co; + ucontext_t old_uc, uc; + sigjmp_buf old_env; + union cc_arg arg = {0}; + + /* The ucontext functions preserve signal masks which incurs a + * system call overhead. sigsetjmp(buf, 0)/siglongjmp() does not + * preserve signal masks but only works on the current stack. + * Since we need a way to create and switch to a new stack, use + * the ucontext functions for that but sigsetjmp()/siglongjmp() for + * everything else. + */ + + if (getcontext(&uc) == -1) { + abort(); + } + + co = g_malloc0(sizeof(*co)); + co->stack = g_malloc(stack_size); + co->base.entry_arg = &old_env; /* stash away our jmp_buf */ + + uc.uc_link = &old_uc; + uc.uc_stack.ss_sp = co->stack; + uc.uc_stack.ss_size = stack_size; + uc.uc_stack.ss_flags = 0; + +#ifdef CONFIG_VALGRIND_H + co->valgrind_stack_id = + VALGRIND_STACK_REGISTER(co->stack, co->stack + stack_size); +#endif + + arg.p = co; + + makecontext(&uc, (void (*)(void))coroutine_trampoline, + 2, arg.i[0], arg.i[1]); + + /* swapcontext() in, siglongjmp() back out */ + if (!sigsetjmp(old_env, 0)) { + swapcontext(&old_uc, &uc); + } + return &co->base; +} + +#ifdef CONFIG_VALGRIND_H +#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE +/* Work around an unused variable in the valgrind.h macro... */ +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-but-set-variable" +#endif +static inline void valgrind_stack_deregister(CoroutineUContext *co) +{ + VALGRIND_STACK_DEREGISTER(co->valgrind_stack_id); +} +#ifdef CONFIG_PRAGMA_DIAGNOSTIC_AVAILABLE +#pragma GCC diagnostic pop +#endif +#endif + +void qemu_coroutine_delete(Coroutine *co_) +{ + CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_); + +#ifdef CONFIG_VALGRIND_H + valgrind_stack_deregister(co); +#endif + + g_free(co->stack); + g_free(co); +} + +/* This function is marked noinline to prevent GCC from inlining it + * into coroutine_trampoline(). If we allow it to do that then it + * hoists the code to get the address of the TLS variable "current" + * out of the while() loop. This is an invalid transformation because + * the sigsetjmp() call may be called when running thread A but + * return in thread B, and so we might be in a different thread + * context each time round the loop. + */ +CoroutineAction __attribute__((noinline)) +qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, + CoroutineAction action) +{ + CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_); + CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_); + int ret; + + current = to_; + + ret = sigsetjmp(from->env, 0); + if (ret == 0) { + siglongjmp(to->env, action); + } + return ret; +} + +Coroutine *qemu_coroutine_self(void) +{ + if (!current) { + current = &leader.base; + } + return current; +} + +bool qemu_in_coroutine(void) +{ + return current && current->caller; +} diff --git a/src/util/coroutine-win32.c b/src/util/coroutine-win32.c new file mode 100644 index 0000000..4f922c5 --- /dev/null +++ b/src/util/coroutine-win32.c @@ -0,0 +1,101 @@ +/* + * Win32 coroutine initialization code + * + * Copyright (c) 2011 Kevin Wolf <kwolf@redhat.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu-common.h" +#include "qemu/coroutine_int.h" + +typedef struct +{ + Coroutine base; + + LPVOID fiber; + CoroutineAction action; +} CoroutineWin32; + +static __thread CoroutineWin32 leader; +static __thread Coroutine *current; + +/* This function is marked noinline to prevent GCC from inlining it + * into coroutine_trampoline(). If we allow it to do that then it + * hoists the code to get the address of the TLS variable "current" + * out of the while() loop. This is an invalid transformation because + * the SwitchToFiber() call may be called when running thread A but + * return in thread B, and so we might be in a different thread + * context each time round the loop. + */ +CoroutineAction __attribute__((noinline)) +qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, + CoroutineAction action) +{ + CoroutineWin32 *from = DO_UPCAST(CoroutineWin32, base, from_); + CoroutineWin32 *to = DO_UPCAST(CoroutineWin32, base, to_); + + current = to_; + + to->action = action; + SwitchToFiber(to->fiber); + return from->action; +} + +static void CALLBACK coroutine_trampoline(void *co_) +{ + Coroutine *co = co_; + + while (true) { + co->entry(co->entry_arg); + qemu_coroutine_switch(co, co->caller, COROUTINE_TERMINATE); + } +} + +Coroutine *qemu_coroutine_new(void) +{ + const size_t stack_size = 1 << 20; + CoroutineWin32 *co; + + co = g_malloc0(sizeof(*co)); + co->fiber = CreateFiber(stack_size, coroutine_trampoline, &co->base); + return &co->base; +} + +void qemu_coroutine_delete(Coroutine *co_) +{ + CoroutineWin32 *co = DO_UPCAST(CoroutineWin32, base, co_); + + DeleteFiber(co->fiber); + g_free(co); +} + +Coroutine *qemu_coroutine_self(void) +{ + if (!current) { + current = &leader.base; + leader.fiber = ConvertThreadToFiber(NULL); + } + return current; +} + +bool qemu_in_coroutine(void) +{ + return current && current->caller; +} diff --git a/src/util/crc32c.c b/src/util/crc32c.c new file mode 100644 index 0000000..8866327 --- /dev/null +++ b/src/util/crc32c.c @@ -0,0 +1,115 @@ +/* + * Castagnoli CRC32C Checksum Algorithm + * + * Polynomial: 0x11EDC6F41 + * + * Castagnoli93: Guy Castagnoli and Stefan Braeuer and Martin Herrman + * "Optimization of Cyclic Redundancy-Check Codes with 24 + * and 32 Parity Bits",IEEE Transactions on Communication, + * Volume 41, Number 6, June 1993 + * + * Copyright (c) 2013 Red Hat, Inc., + * + * Authors: + * Jeff Cody <jcody@redhat.com> + * + * Based on the Linux kernel cryptographic crc32c module, + * + * Copyright (c) 2004 Cisco Systems, Inc. + * Copyright (c) 2008 Herbert Xu <herbert@gondor.apana.org.au> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include "qemu-common.h" +#include "qemu/crc32c.h" + +/* + * This is the CRC-32C table + * Generated with: + * width = 32 bits + * poly = 0x1EDC6F41 + * reflect input bytes = true + * reflect output bytes = true + */ + +static const uint32_t crc32c_table[256] = { + 0x00000000L, 0xF26B8303L, 0xE13B70F7L, 0x1350F3F4L, + 0xC79A971FL, 0x35F1141CL, 0x26A1E7E8L, 0xD4CA64EBL, + 0x8AD958CFL, 0x78B2DBCCL, 0x6BE22838L, 0x9989AB3BL, + 0x4D43CFD0L, 0xBF284CD3L, 0xAC78BF27L, 0x5E133C24L, + 0x105EC76FL, 0xE235446CL, 0xF165B798L, 0x030E349BL, + 0xD7C45070L, 0x25AFD373L, 0x36FF2087L, 0xC494A384L, + 0x9A879FA0L, 0x68EC1CA3L, 0x7BBCEF57L, 0x89D76C54L, + 0x5D1D08BFL, 0xAF768BBCL, 0xBC267848L, 0x4E4DFB4BL, + 0x20BD8EDEL, 0xD2D60DDDL, 0xC186FE29L, 0x33ED7D2AL, + 0xE72719C1L, 0x154C9AC2L, 0x061C6936L, 0xF477EA35L, + 0xAA64D611L, 0x580F5512L, 0x4B5FA6E6L, 0xB93425E5L, + 0x6DFE410EL, 0x9F95C20DL, 0x8CC531F9L, 0x7EAEB2FAL, + 0x30E349B1L, 0xC288CAB2L, 0xD1D83946L, 0x23B3BA45L, + 0xF779DEAEL, 0x05125DADL, 0x1642AE59L, 0xE4292D5AL, + 0xBA3A117EL, 0x4851927DL, 0x5B016189L, 0xA96AE28AL, + 0x7DA08661L, 0x8FCB0562L, 0x9C9BF696L, 0x6EF07595L, + 0x417B1DBCL, 0xB3109EBFL, 0xA0406D4BL, 0x522BEE48L, + 0x86E18AA3L, 0x748A09A0L, 0x67DAFA54L, 0x95B17957L, + 0xCBA24573L, 0x39C9C670L, 0x2A993584L, 0xD8F2B687L, + 0x0C38D26CL, 0xFE53516FL, 0xED03A29BL, 0x1F682198L, + 0x5125DAD3L, 0xA34E59D0L, 0xB01EAA24L, 0x42752927L, + 0x96BF4DCCL, 0x64D4CECFL, 0x77843D3BL, 0x85EFBE38L, + 0xDBFC821CL, 0x2997011FL, 0x3AC7F2EBL, 0xC8AC71E8L, + 0x1C661503L, 0xEE0D9600L, 0xFD5D65F4L, 0x0F36E6F7L, + 0x61C69362L, 0x93AD1061L, 0x80FDE395L, 0x72966096L, + 0xA65C047DL, 0x5437877EL, 0x4767748AL, 0xB50CF789L, + 0xEB1FCBADL, 0x197448AEL, 0x0A24BB5AL, 0xF84F3859L, + 0x2C855CB2L, 0xDEEEDFB1L, 0xCDBE2C45L, 0x3FD5AF46L, + 0x7198540DL, 0x83F3D70EL, 0x90A324FAL, 0x62C8A7F9L, + 0xB602C312L, 0x44694011L, 0x5739B3E5L, 0xA55230E6L, + 0xFB410CC2L, 0x092A8FC1L, 0x1A7A7C35L, 0xE811FF36L, + 0x3CDB9BDDL, 0xCEB018DEL, 0xDDE0EB2AL, 0x2F8B6829L, + 0x82F63B78L, 0x709DB87BL, 0x63CD4B8FL, 0x91A6C88CL, + 0x456CAC67L, 0xB7072F64L, 0xA457DC90L, 0x563C5F93L, + 0x082F63B7L, 0xFA44E0B4L, 0xE9141340L, 0x1B7F9043L, + 0xCFB5F4A8L, 0x3DDE77ABL, 0x2E8E845FL, 0xDCE5075CL, + 0x92A8FC17L, 0x60C37F14L, 0x73938CE0L, 0x81F80FE3L, + 0x55326B08L, 0xA759E80BL, 0xB4091BFFL, 0x466298FCL, + 0x1871A4D8L, 0xEA1A27DBL, 0xF94AD42FL, 0x0B21572CL, + 0xDFEB33C7L, 0x2D80B0C4L, 0x3ED04330L, 0xCCBBC033L, + 0xA24BB5A6L, 0x502036A5L, 0x4370C551L, 0xB11B4652L, + 0x65D122B9L, 0x97BAA1BAL, 0x84EA524EL, 0x7681D14DL, + 0x2892ED69L, 0xDAF96E6AL, 0xC9A99D9EL, 0x3BC21E9DL, + 0xEF087A76L, 0x1D63F975L, 0x0E330A81L, 0xFC588982L, + 0xB21572C9L, 0x407EF1CAL, 0x532E023EL, 0xA145813DL, + 0x758FE5D6L, 0x87E466D5L, 0x94B49521L, 0x66DF1622L, + 0x38CC2A06L, 0xCAA7A905L, 0xD9F75AF1L, 0x2B9CD9F2L, + 0xFF56BD19L, 0x0D3D3E1AL, 0x1E6DCDEEL, 0xEC064EEDL, + 0xC38D26C4L, 0x31E6A5C7L, 0x22B65633L, 0xD0DDD530L, + 0x0417B1DBL, 0xF67C32D8L, 0xE52CC12CL, 0x1747422FL, + 0x49547E0BL, 0xBB3FFD08L, 0xA86F0EFCL, 0x5A048DFFL, + 0x8ECEE914L, 0x7CA56A17L, 0x6FF599E3L, 0x9D9E1AE0L, + 0xD3D3E1ABL, 0x21B862A8L, 0x32E8915CL, 0xC083125FL, + 0x144976B4L, 0xE622F5B7L, 0xF5720643L, 0x07198540L, + 0x590AB964L, 0xAB613A67L, 0xB831C993L, 0x4A5A4A90L, + 0x9E902E7BL, 0x6CFBAD78L, 0x7FAB5E8CL, 0x8DC0DD8FL, + 0xE330A81AL, 0x115B2B19L, 0x020BD8EDL, 0xF0605BEEL, + 0x24AA3F05L, 0xD6C1BC06L, 0xC5914FF2L, 0x37FACCF1L, + 0x69E9F0D5L, 0x9B8273D6L, 0x88D28022L, 0x7AB90321L, + 0xAE7367CAL, 0x5C18E4C9L, 0x4F48173DL, 0xBD23943EL, + 0xF36E6F75L, 0x0105EC76L, 0x12551F82L, 0xE03E9C81L, + 0x34F4F86AL, 0xC69F7B69L, 0xD5CF889DL, 0x27A40B9EL, + 0x79B737BAL, 0x8BDCB4B9L, 0x988C474DL, 0x6AE7C44EL, + 0xBE2DA0A5L, 0x4C4623A6L, 0x5F16D052L, 0xAD7D5351L +}; + + +uint32_t crc32c(uint32_t crc, const uint8_t *data, unsigned int length) +{ + while (length--) { + crc = crc32c_table[(crc ^ *data++) & 0xFFL] ^ (crc >> 8); + } + return crc^0xffffffff; +} + diff --git a/src/util/cutils.c b/src/util/cutils.c new file mode 100644 index 0000000..cfeb848 --- /dev/null +++ b/src/util/cutils.c @@ -0,0 +1,691 @@ +/* + * Simple C functions to supplement the C library + * + * Copyright (c) 2006 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu-common.h" +#include "qemu/host-utils.h" +#include <math.h> +#include <limits.h> +#include <errno.h> + +#include "qemu/sockets.h" +#include "qemu/iov.h" +#include "net/net.h" + +void strpadcpy(char *buf, int buf_size, const char *str, char pad) +{ + int len = qemu_strnlen(str, buf_size); + memcpy(buf, str, len); + memset(buf + len, pad, buf_size - len); +} + +void pstrcpy(char *buf, int buf_size, const char *str) +{ + int c; + char *q = buf; + + if (buf_size <= 0) + return; + + for(;;) { + c = *str++; + if (c == 0 || q >= buf + buf_size - 1) + break; + *q++ = c; + } + *q = '\0'; +} + +/* strcat and truncate. */ +char *pstrcat(char *buf, int buf_size, const char *s) +{ + int len; + len = strlen(buf); + if (len < buf_size) + pstrcpy(buf + len, buf_size - len, s); + return buf; +} + +int strstart(const char *str, const char *val, const char **ptr) +{ + const char *p, *q; + p = str; + q = val; + while (*q != '\0') { + if (*p != *q) + return 0; + p++; + q++; + } + if (ptr) + *ptr = p; + return 1; +} + +int stristart(const char *str, const char *val, const char **ptr) +{ + const char *p, *q; + p = str; + q = val; + while (*q != '\0') { + if (qemu_toupper(*p) != qemu_toupper(*q)) + return 0; + p++; + q++; + } + if (ptr) + *ptr = p; + return 1; +} + +/* XXX: use host strnlen if available ? */ +int qemu_strnlen(const char *s, int max_len) +{ + int i; + + for(i = 0; i < max_len; i++) { + if (s[i] == '\0') { + break; + } + } + return i; +} + +char *qemu_strsep(char **input, const char *delim) +{ + char *result = *input; + if (result != NULL) { + char *p; + + for (p = result; *p != '\0'; p++) { + if (strchr(delim, *p)) { + break; + } + } + if (*p == '\0') { + *input = NULL; + } else { + *p = '\0'; + *input = p + 1; + } + } + return result; +} + +time_t mktimegm(struct tm *tm) +{ + time_t t; + int y = tm->tm_year + 1900, m = tm->tm_mon + 1, d = tm->tm_mday; + if (m < 3) { + m += 12; + y--; + } + t = 86400ULL * (d + (153 * m - 457) / 5 + 365 * y + y / 4 - y / 100 + + y / 400 - 719469); + t += 3600 * tm->tm_hour + 60 * tm->tm_min + tm->tm_sec; + return t; +} + +/* + * Make sure data goes on disk, but if possible do not bother to + * write out the inode just for timestamp updates. + * + * Unfortunately even in 2009 many operating systems do not support + * fdatasync and have to fall back to fsync. + */ +int qemu_fdatasync(int fd) +{ +#ifdef CONFIG_FDATASYNC + return fdatasync(fd); +#else + return fsync(fd); +#endif +} + +/* + * Searches for an area with non-zero content in a buffer + * + * Attention! The len must be a multiple of + * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR * sizeof(VECTYPE) + * and addr must be a multiple of sizeof(VECTYPE) due to + * restriction of optimizations in this function. + * + * can_use_buffer_find_nonzero_offset() can be used to check + * these requirements. + * + * The return value is the offset of the non-zero area rounded + * down to a multiple of sizeof(VECTYPE) for the first + * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR chunks and down to + * BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR * sizeof(VECTYPE) + * afterwards. + * + * If the buffer is all zero the return value is equal to len. + */ + +size_t buffer_find_nonzero_offset(const void *buf, size_t len) +{ + const VECTYPE *p = buf; + const VECTYPE zero = (VECTYPE){0}; + size_t i; + + assert(can_use_buffer_find_nonzero_offset(buf, len)); + + if (!len) { + return 0; + } + + for (i = 0; i < BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR; i++) { + if (!ALL_EQ(p[i], zero)) { + return i * sizeof(VECTYPE); + } + } + + for (i = BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR; + i < len / sizeof(VECTYPE); + i += BUFFER_FIND_NONZERO_OFFSET_UNROLL_FACTOR) { + VECTYPE tmp0 = VEC_OR(p[i + 0], p[i + 1]); + VECTYPE tmp1 = VEC_OR(p[i + 2], p[i + 3]); + VECTYPE tmp2 = VEC_OR(p[i + 4], p[i + 5]); + VECTYPE tmp3 = VEC_OR(p[i + 6], p[i + 7]); + VECTYPE tmp01 = VEC_OR(tmp0, tmp1); + VECTYPE tmp23 = VEC_OR(tmp2, tmp3); + if (!ALL_EQ(VEC_OR(tmp01, tmp23), zero)) { + break; + } + } + + return i * sizeof(VECTYPE); +} + +/* + * Checks if a buffer is all zeroes + * + * Attention! The len must be a multiple of 4 * sizeof(long) due to + * restriction of optimizations in this function. + */ +bool buffer_is_zero(const void *buf, size_t len) +{ + /* + * Use long as the biggest available internal data type that fits into the + * CPU register and unroll the loop to smooth out the effect of memory + * latency. + */ + + size_t i; + long d0, d1, d2, d3; + const long * const data = buf; + + /* use vector optimized zero check if possible */ + if (can_use_buffer_find_nonzero_offset(buf, len)) { + return buffer_find_nonzero_offset(buf, len) == len; + } + + assert(len % (4 * sizeof(long)) == 0); + len /= sizeof(long); + + for (i = 0; i < len; i += 4) { + d0 = data[i + 0]; + d1 = data[i + 1]; + d2 = data[i + 2]; + d3 = data[i + 3]; + + if (d0 || d1 || d2 || d3) { + return false; + } + } + + return true; +} + +#ifndef _WIN32 +/* Sets a specific flag */ +int fcntl_setfl(int fd, int flag) +{ + int flags; + + flags = fcntl(fd, F_GETFL); + if (flags == -1) + return -errno; + + if (fcntl(fd, F_SETFL, flags | flag) == -1) + return -errno; + + return 0; +} +#endif + +static int64_t suffix_mul(char suffix, int64_t unit) +{ + switch (qemu_toupper(suffix)) { + case QEMU_STRTOSZ_DEFSUFFIX_B: + return 1; + case QEMU_STRTOSZ_DEFSUFFIX_KB: + return unit; + case QEMU_STRTOSZ_DEFSUFFIX_MB: + return unit * unit; + case QEMU_STRTOSZ_DEFSUFFIX_GB: + return unit * unit * unit; + case QEMU_STRTOSZ_DEFSUFFIX_TB: + return unit * unit * unit * unit; + case QEMU_STRTOSZ_DEFSUFFIX_PB: + return unit * unit * unit * unit * unit; + case QEMU_STRTOSZ_DEFSUFFIX_EB: + return unit * unit * unit * unit * unit * unit; + } + return -1; +} + +/* + * Convert string to bytes, allowing either B/b for bytes, K/k for KB, + * M/m for MB, G/g for GB or T/t for TB. End pointer will be returned + * in *end, if not NULL. Return -ERANGE on overflow, Return -EINVAL on + * other error. + */ +int64_t qemu_strtosz_suffix_unit(const char *nptr, char **end, + const char default_suffix, int64_t unit) +{ + int64_t retval = -EINVAL; + char *endptr; + unsigned char c; + int mul_required = 0; + double val, mul, integral, fraction; + + errno = 0; + val = strtod(nptr, &endptr); + if (isnan(val) || endptr == nptr || errno != 0) { + goto fail; + } + fraction = modf(val, &integral); + if (fraction != 0) { + mul_required = 1; + } + c = *endptr; + mul = suffix_mul(c, unit); + if (mul >= 0) { + endptr++; + } else { + mul = suffix_mul(default_suffix, unit); + assert(mul >= 0); + } + if (mul == 1 && mul_required) { + goto fail; + } + if ((val * mul >= INT64_MAX) || val < 0) { + retval = -ERANGE; + goto fail; + } + retval = val * mul; + +fail: + if (end) { + *end = endptr; + } + + return retval; +} + +int64_t qemu_strtosz_suffix(const char *nptr, char **end, + const char default_suffix) +{ + return qemu_strtosz_suffix_unit(nptr, end, default_suffix, 1024); +} + +int64_t qemu_strtosz(const char *nptr, char **end) +{ + return qemu_strtosz_suffix(nptr, end, QEMU_STRTOSZ_DEFSUFFIX_MB); +} + +/** + * Helper function for qemu_strto*l() functions. + */ +static int check_strtox_error(const char *p, char *endptr, const char **next, + int err) +{ + /* If no conversion was performed, prefer BSD behavior over glibc + * behavior. + */ + if (err == 0 && endptr == p) { + err = EINVAL; + } + if (!next && *endptr) { + return -EINVAL; + } + if (next) { + *next = endptr; + } + return -err; +} + +/** + * QEMU wrappers for strtol(), strtoll(), strtoul(), strotull() C functions. + * + * Convert ASCII string @nptr to a long integer value + * from the given @base. Parameters @nptr, @endptr, @base + * follows same semantics as strtol() C function. + * + * Unlike from strtol() function, if @endptr is not NULL, this + * function will return -EINVAL whenever it cannot fully convert + * the string in @nptr with given @base to a long. This function returns + * the result of the conversion only through the @result parameter. + * + * If NULL is passed in @endptr, then the whole string in @ntpr + * is a number otherwise it returns -EINVAL. + * + * RETURN VALUE + * Unlike from strtol() function, this wrapper returns either + * -EINVAL or the errno set by strtol() function (e.g -ERANGE). + * If the conversion overflows, -ERANGE is returned, and @result + * is set to the max value of the desired type + * (e.g. LONG_MAX, LLONG_MAX, ULONG_MAX, ULLONG_MAX). If the case + * of underflow, -ERANGE is returned, and @result is set to the min + * value of the desired type. For strtol(), strtoll(), @result is set to + * LONG_MIN, LLONG_MIN, respectively, and for strtoul(), strtoull() it + * is set to 0. + */ +int qemu_strtol(const char *nptr, const char **endptr, int base, + long *result) +{ + char *p; + int err = 0; + if (!nptr) { + if (endptr) { + *endptr = nptr; + } + err = -EINVAL; + } else { + errno = 0; + *result = strtol(nptr, &p, base); + err = check_strtox_error(nptr, p, endptr, errno); + } + return err; +} + +/** + * Converts ASCII string to an unsigned long integer. + * + * If string contains a negative number, value will be converted to + * the unsigned representation of the signed value, unless the original + * (nonnegated) value would overflow, in this case, it will set @result + * to ULONG_MAX, and return ERANGE. + * + * The same behavior holds, for qemu_strtoull() but sets @result to + * ULLONG_MAX instead of ULONG_MAX. + * + * See qemu_strtol() documentation for more info. + */ +int qemu_strtoul(const char *nptr, const char **endptr, int base, + unsigned long *result) +{ + char *p; + int err = 0; + if (!nptr) { + if (endptr) { + *endptr = nptr; + } + err = -EINVAL; + } else { + errno = 0; + *result = strtoul(nptr, &p, base); + /* Windows returns 1 for negative out-of-range values. */ + if (errno == ERANGE) { + *result = -1; + } + err = check_strtox_error(nptr, p, endptr, errno); + } + return err; +} + +/** + * Converts ASCII string to a long long integer. + * + * See qemu_strtol() documentation for more info. + */ +int qemu_strtoll(const char *nptr, const char **endptr, int base, + int64_t *result) +{ + char *p; + int err = 0; + if (!nptr) { + if (endptr) { + *endptr = nptr; + } + err = -EINVAL; + } else { + errno = 0; + *result = strtoll(nptr, &p, base); + err = check_strtox_error(nptr, p, endptr, errno); + } + return err; +} + +/** + * Converts ASCII string to an unsigned long long integer. + * + * See qemu_strtol() documentation for more info. + */ +int qemu_strtoull(const char *nptr, const char **endptr, int base, + uint64_t *result) +{ + char *p; + int err = 0; + if (!nptr) { + if (endptr) { + *endptr = nptr; + } + err = -EINVAL; + } else { + errno = 0; + *result = strtoull(nptr, &p, base); + /* Windows returns 1 for negative out-of-range values. */ + if (errno == ERANGE) { + *result = -1; + } + err = check_strtox_error(nptr, p, endptr, errno); + } + return err; +} + +/** + * parse_uint: + * + * @s: String to parse + * @value: Destination for parsed integer value + * @endptr: Destination for pointer to first character not consumed + * @base: integer base, between 2 and 36 inclusive, or 0 + * + * Parse unsigned integer + * + * Parsed syntax is like strtoull()'s: arbitrary whitespace, a single optional + * '+' or '-', an optional "0x" if @base is 0 or 16, one or more digits. + * + * If @s is null, or @base is invalid, or @s doesn't start with an + * integer in the syntax above, set *@value to 0, *@endptr to @s, and + * return -EINVAL. + * + * Set *@endptr to point right beyond the parsed integer (even if the integer + * overflows or is negative, all digits will be parsed and *@endptr will + * point right beyond them). + * + * If the integer is negative, set *@value to 0, and return -ERANGE. + * + * If the integer overflows unsigned long long, set *@value to + * ULLONG_MAX, and return -ERANGE. + * + * Else, set *@value to the parsed integer, and return 0. + */ +int parse_uint(const char *s, unsigned long long *value, char **endptr, + int base) +{ + int r = 0; + char *endp = (char *)s; + unsigned long long val = 0; + + if (!s) { + r = -EINVAL; + goto out; + } + + errno = 0; + val = strtoull(s, &endp, base); + if (errno) { + r = -errno; + goto out; + } + + if (endp == s) { + r = -EINVAL; + goto out; + } + + /* make sure we reject negative numbers: */ + while (isspace((unsigned char)*s)) { + s++; + } + if (*s == '-') { + val = 0; + r = -ERANGE; + goto out; + } + +out: + *value = val; + *endptr = endp; + return r; +} + +/** + * parse_uint_full: + * + * @s: String to parse + * @value: Destination for parsed integer value + * @base: integer base, between 2 and 36 inclusive, or 0 + * + * Parse unsigned integer from entire string + * + * Have the same behavior of parse_uint(), but with an additional check + * for additional data after the parsed number. If extra characters are present + * after the parsed number, the function will return -EINVAL, and *@v will + * be set to 0. + */ +int parse_uint_full(const char *s, unsigned long long *value, int base) +{ + char *endp; + int r; + + r = parse_uint(s, value, &endp, base); + if (r < 0) { + return r; + } + if (*endp) { + *value = 0; + return -EINVAL; + } + + return 0; +} + +int qemu_parse_fd(const char *param) +{ + long fd; + char *endptr; + + errno = 0; + fd = strtol(param, &endptr, 10); + if (param == endptr /* no conversion performed */ || + errno != 0 /* not representable as long; possibly others */ || + *endptr != '\0' /* final string not empty */ || + fd < 0 /* invalid as file descriptor */ || + fd > INT_MAX /* not representable as int */) { + return -1; + } + return fd; +} + +/* + * Implementation of ULEB128 (http://en.wikipedia.org/wiki/LEB128) + * Input is limited to 14-bit numbers + */ +int uleb128_encode_small(uint8_t *out, uint32_t n) +{ + g_assert(n <= 0x3fff); + if (n < 0x80) { + *out++ = n; + return 1; + } else { + *out++ = (n & 0x7f) | 0x80; + *out++ = n >> 7; + return 2; + } +} + +int uleb128_decode_small(const uint8_t *in, uint32_t *n) +{ + if (!(*in & 0x80)) { + *n = *in++; + return 1; + } else { + *n = *in++ & 0x7f; + /* we exceed 14 bit number */ + if (*in & 0x80) { + return -1; + } + *n |= *in++ << 7; + return 2; + } +} + +/* + * helper to parse debug environment variables + */ +int parse_debug_env(const char *name, int max, int initial) +{ + char *debug_env = getenv(name); + char *inv = NULL; + long debug; + + if (!debug_env) { + return initial; + } + errno = 0; + debug = strtol(debug_env, &inv, 10); + if (inv == debug_env) { + return initial; + } + if (debug < 0 || debug > max || errno != 0) { + fprintf(stderr, "warning: %s not in [0, %d]", name, max); + return initial; + } + return debug; +} + +/* + * Helper to print ethernet mac address + */ +const char *qemu_ether_ntoa(const MACAddr *mac) +{ + static char ret[18]; + + snprintf(ret, sizeof(ret), "%02x:%02x:%02x:%02x:%02x:%02x", + mac->a[0], mac->a[1], mac->a[2], mac->a[3], mac->a[4], mac->a[5]); + + return ret; +} diff --git a/src/util/envlist.c b/src/util/envlist.c new file mode 100644 index 0000000..099a544 --- /dev/null +++ b/src/util/envlist.c @@ -0,0 +1,241 @@ +#include "qemu-common.h" +#include "qemu/queue.h" +#include "qemu/envlist.h" + +struct envlist_entry { + const char *ev_var; /* actual env value */ + QLIST_ENTRY(envlist_entry) ev_link; +}; + +struct envlist { + QLIST_HEAD(, envlist_entry) el_entries; /* actual entries */ + size_t el_count; /* number of entries */ +}; + +static int envlist_parse(envlist_t *envlist, + const char *env, int (*)(envlist_t *, const char *)); + +/* + * Allocates new envlist and returns pointer to that or + * NULL in case of error. + */ +envlist_t * +envlist_create(void) +{ + envlist_t *envlist; + + if ((envlist = malloc(sizeof (*envlist))) == NULL) + return (NULL); + + QLIST_INIT(&envlist->el_entries); + envlist->el_count = 0; + + return (envlist); +} + +/* + * Releases given envlist and its entries. + */ +void +envlist_free(envlist_t *envlist) +{ + struct envlist_entry *entry; + + assert(envlist != NULL); + + while (envlist->el_entries.lh_first != NULL) { + entry = envlist->el_entries.lh_first; + QLIST_REMOVE(entry, ev_link); + + free((char *)entry->ev_var); + free(entry); + } + free(envlist); +} + +/* + * Parses comma separated list of set/modify environment + * variable entries and updates given enlist accordingly. + * + * For example: + * envlist_parse(el, "HOME=foo,SHELL=/bin/sh"); + * + * inserts/sets environment variables HOME and SHELL. + * + * Returns 0 on success, errno otherwise. + */ +int +envlist_parse_set(envlist_t *envlist, const char *env) +{ + return (envlist_parse(envlist, env, &envlist_setenv)); +} + +/* + * Parses comma separated list of unset environment variable + * entries and removes given variables from given envlist. + * + * Returns 0 on success, errno otherwise. + */ +int +envlist_parse_unset(envlist_t *envlist, const char *env) +{ + return (envlist_parse(envlist, env, &envlist_unsetenv)); +} + +/* + * Parses comma separated list of set, modify or unset entries + * and calls given callback for each entry. + * + * Returns 0 in case of success, errno otherwise. + */ +static int +envlist_parse(envlist_t *envlist, const char *env, + int (*callback)(envlist_t *, const char *)) +{ + char *tmpenv, *envvar; + char *envsave = NULL; + int ret = 0; + assert(callback != NULL); + + if ((envlist == NULL) || (env == NULL)) + return (EINVAL); + + if ((tmpenv = strdup(env)) == NULL) + return (errno); + envsave = tmpenv; + + do { + envvar = strchr(tmpenv, ','); + if (envvar != NULL) { + *envvar = '\0'; + } + if ((*callback)(envlist, tmpenv) != 0) { + ret = errno; + break; + } + tmpenv = envvar + 1; + } while (envvar != NULL); + + free(envsave); + return ret; +} + +/* + * Sets environment value to envlist in similar manner + * than putenv(3). + * + * Returns 0 in success, errno otherwise. + */ +int +envlist_setenv(envlist_t *envlist, const char *env) +{ + struct envlist_entry *entry = NULL; + const char *eq_sign; + size_t envname_len; + + if ((envlist == NULL) || (env == NULL)) + return (EINVAL); + + /* find out first equals sign in given env */ + if ((eq_sign = strchr(env, '=')) == NULL) + return (EINVAL); + envname_len = eq_sign - env + 1; + + /* + * If there already exists variable with given name + * we remove and release it before allocating a whole + * new entry. + */ + for (entry = envlist->el_entries.lh_first; entry != NULL; + entry = entry->ev_link.le_next) { + if (strncmp(entry->ev_var, env, envname_len) == 0) + break; + } + + if (entry != NULL) { + QLIST_REMOVE(entry, ev_link); + free((char *)entry->ev_var); + free(entry); + } else { + envlist->el_count++; + } + + if ((entry = malloc(sizeof (*entry))) == NULL) + return (errno); + if ((entry->ev_var = strdup(env)) == NULL) { + free(entry); + return (errno); + } + QLIST_INSERT_HEAD(&envlist->el_entries, entry, ev_link); + + return (0); +} + +/* + * Removes given env value from envlist in similar manner + * than unsetenv(3). Returns 0 in success, errno otherwise. + */ +int +envlist_unsetenv(envlist_t *envlist, const char *env) +{ + struct envlist_entry *entry; + size_t envname_len; + + if ((envlist == NULL) || (env == NULL)) + return (EINVAL); + + /* env is not allowed to contain '=' */ + if (strchr(env, '=') != NULL) + return (EINVAL); + + /* + * Find out the requested entry and remove + * it from the list. + */ + envname_len = strlen(env); + for (entry = envlist->el_entries.lh_first; entry != NULL; + entry = entry->ev_link.le_next) { + if (strncmp(entry->ev_var, env, envname_len) == 0) + break; + } + if (entry != NULL) { + QLIST_REMOVE(entry, ev_link); + free((char *)entry->ev_var); + free(entry); + + envlist->el_count--; + } + return (0); +} + +/* + * Returns given envlist as array of strings (in same form that + * global variable environ is). Caller must free returned memory + * by calling free(3) for each element and for the array. Returned + * array and given envlist are not related (no common references). + * + * If caller provides count pointer, number of items in array is + * stored there. In case of error, NULL is returned and no memory + * is allocated. + */ +char ** +envlist_to_environ(const envlist_t *envlist, size_t *count) +{ + struct envlist_entry *entry; + char **env, **penv; + + penv = env = malloc((envlist->el_count + 1) * sizeof (char *)); + if (env == NULL) + return (NULL); + + for (entry = envlist->el_entries.lh_first; entry != NULL; + entry = entry->ev_link.le_next) { + *(penv++) = strdup(entry->ev_var); + } + *penv = NULL; /* NULL terminate the list */ + + if (count != NULL) + *count = envlist->el_count; + + return (env); +} diff --git a/src/util/error.c b/src/util/error.c new file mode 100644 index 0000000..80c89a2 --- /dev/null +++ b/src/util/error.c @@ -0,0 +1,241 @@ +/* + * QEMU Error Objects + * + * Copyright IBM, Corp. 2011 + * Copyright (C) 2011-2015 Red Hat, Inc. + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * Markus Armbruster <armbru@redhat.com>, + * + * This work is licensed under the terms of the GNU LGPL, version 2. See + * the COPYING.LIB file in the top-level directory. + */ + +#include "qemu-common.h" +#include "qapi/error.h" +#include "qemu/error-report.h" + +struct Error +{ + char *msg; + ErrorClass err_class; + const char *src, *func; + int line; + GString *hint; +}; + +Error *error_abort; +Error *error_fatal; + +static void error_handle_fatal(Error **errp, Error *err) +{ + if (errp == &error_abort) { + fprintf(stderr, "Unexpected error in %s() at %s:%d:\n", + err->func, err->src, err->line); + error_report_err(err); + abort(); + } + if (errp == &error_fatal) { + error_report_err(err); + exit(1); + } +} + +static void error_setv(Error **errp, + const char *src, int line, const char *func, + ErrorClass err_class, const char *fmt, va_list ap) +{ + Error *err; + int saved_errno = errno; + + if (errp == NULL) { + return; + } + assert(*errp == NULL); + + err = g_malloc0(sizeof(*err)); + err->msg = g_strdup_vprintf(fmt, ap); + err->err_class = err_class; + err->src = src; + err->line = line; + err->func = func; + + error_handle_fatal(errp, err); + *errp = err; + + errno = saved_errno; +} + +void error_set_internal(Error **errp, + const char *src, int line, const char *func, + ErrorClass err_class, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + error_setv(errp, src, line, func, err_class, fmt, ap); + va_end(ap); +} + +void error_setg_internal(Error **errp, + const char *src, int line, const char *func, + const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + error_setv(errp, src, line, func, ERROR_CLASS_GENERIC_ERROR, fmt, ap); + va_end(ap); +} + +void error_setg_errno_internal(Error **errp, + const char *src, int line, const char *func, + int os_errno, const char *fmt, ...) +{ + va_list ap; + char *msg; + int saved_errno = errno; + + if (errp == NULL) { + return; + } + + va_start(ap, fmt); + error_setv(errp, src, line, func, ERROR_CLASS_GENERIC_ERROR, fmt, ap); + va_end(ap); + + if (os_errno != 0) { + msg = (*errp)->msg; + (*errp)->msg = g_strdup_printf("%s: %s", msg, strerror(os_errno)); + g_free(msg); + } + + errno = saved_errno; +} + +void error_setg_file_open_internal(Error **errp, + const char *src, int line, const char *func, + int os_errno, const char *filename) +{ + error_setg_errno_internal(errp, src, line, func, os_errno, + "Could not open '%s'", filename); +} + +void error_append_hint(Error **errp, const char *fmt, ...) +{ + va_list ap; + int saved_errno = errno; + Error *err; + + if (!errp) { + return; + } + err = *errp; + assert(err && errp != &error_abort); + + if (!err->hint) { + err->hint = g_string_new(NULL); + } + va_start(ap, fmt); + g_string_append_vprintf(err->hint, fmt, ap); + va_end(ap); + + errno = saved_errno; +} + +#ifdef _WIN32 + +void error_setg_win32_internal(Error **errp, + const char *src, int line, const char *func, + int win32_err, const char *fmt, ...) +{ + va_list ap; + char *msg1, *msg2; + + if (errp == NULL) { + return; + } + + va_start(ap, fmt); + error_setv(errp, src, line, func, ERROR_CLASS_GENERIC_ERROR, fmt, ap); + va_end(ap); + + if (win32_err != 0) { + msg1 = (*errp)->msg; + msg2 = g_win32_error_message(win32_err); + (*errp)->msg = g_strdup_printf("%s: %s (error: %x)", msg1, msg2, + (unsigned)win32_err); + g_free(msg2); + g_free(msg1); + } +} + +#endif + +Error *error_copy(const Error *err) +{ + Error *err_new; + + err_new = g_malloc0(sizeof(*err)); + err_new->msg = g_strdup(err->msg); + err_new->err_class = err->err_class; + err_new->src = err->src; + err_new->line = err->line; + err_new->func = err->func; + if (err->hint) { + err_new->hint = g_string_new(err->hint->str); + } + + return err_new; +} + +ErrorClass error_get_class(const Error *err) +{ + return err->err_class; +} + +const char *error_get_pretty(Error *err) +{ + return err->msg; +} + +void error_report_err(Error *err) +{ + error_report("%s", error_get_pretty(err)); + if (err->hint) { + error_printf_unless_qmp("%s\n", err->hint->str); + } + error_free(err); +} + +void error_free(Error *err) +{ + if (err) { + g_free(err->msg); + if (err->hint) { + g_string_free(err->hint, true); + } + g_free(err); + } +} + +void error_free_or_abort(Error **errp) +{ + assert(errp && *errp); + error_free(*errp); + *errp = NULL; +} + +void error_propagate(Error **dst_errp, Error *local_err) +{ + if (!local_err) { + return; + } + error_handle_fatal(dst_errp, local_err); + if (dst_errp && !*dst_errp) { + *dst_errp = local_err; + } else { + error_free(local_err); + } +} diff --git a/src/util/event_notifier-posix.c b/src/util/event_notifier-posix.c new file mode 100644 index 0000000..d4a0c63 --- /dev/null +++ b/src/util/event_notifier-posix.c @@ -0,0 +1,122 @@ +/* + * event notifier support + * + * Copyright Red Hat, Inc. 2010 + * + * Authors: + * Michael S. Tsirkin <mst@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu-common.h" +#include "qemu/event_notifier.h" +#include "sysemu/char.h" +#include "qemu/main-loop.h" + +#ifdef CONFIG_EVENTFD +#include <sys/eventfd.h> +#endif + +void event_notifier_init_fd(EventNotifier *e, int fd) +{ + e->rfd = fd; + e->wfd = fd; +} + +int event_notifier_init(EventNotifier *e, int active) +{ + int fds[2]; + int ret; + +#ifdef CONFIG_EVENTFD + ret = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); +#else + ret = -1; + errno = ENOSYS; +#endif + if (ret >= 0) { + e->rfd = e->wfd = ret; + } else { + if (errno != ENOSYS) { + return -errno; + } + if (qemu_pipe(fds) < 0) { + return -errno; + } + ret = fcntl_setfl(fds[0], O_NONBLOCK); + if (ret < 0) { + ret = -errno; + goto fail; + } + ret = fcntl_setfl(fds[1], O_NONBLOCK); + if (ret < 0) { + ret = -errno; + goto fail; + } + e->rfd = fds[0]; + e->wfd = fds[1]; + } + if (active) { + event_notifier_set(e); + } + return 0; + +fail: + close(fds[0]); + close(fds[1]); + return ret; +} + +void event_notifier_cleanup(EventNotifier *e) +{ + if (e->rfd != e->wfd) { + close(e->rfd); + } + close(e->wfd); +} + +int event_notifier_get_fd(const EventNotifier *e) +{ + return e->rfd; +} + +int event_notifier_set_handler(EventNotifier *e, + EventNotifierHandler *handler) +{ + qemu_set_fd_handler(e->rfd, (IOHandler *)handler, NULL, e); + return 0; +} + +int event_notifier_set(EventNotifier *e) +{ + static const uint64_t value = 1; + ssize_t ret; + + do { + ret = write(e->wfd, &value, sizeof(value)); + } while (ret < 0 && errno == EINTR); + + /* EAGAIN is fine, a read must be pending. */ + if (ret < 0 && errno != EAGAIN) { + return -errno; + } + return 0; +} + +int event_notifier_test_and_clear(EventNotifier *e) +{ + int value; + ssize_t len; + char buffer[512]; + + /* Drain the notify pipe. For eventfd, only 8 bytes will be read. */ + value = 0; + do { + len = read(e->rfd, buffer, sizeof(buffer)); + value |= (len > 0); + } while ((len == -1 && errno == EINTR) || len == sizeof(buffer)); + + return value; +} diff --git a/src/util/event_notifier-win32.c b/src/util/event_notifier-win32.c new file mode 100644 index 0000000..6dbb530 --- /dev/null +++ b/src/util/event_notifier-win32.c @@ -0,0 +1,59 @@ +/* + * event notifier support + * + * Copyright Red Hat, Inc. 2010 + * + * Authors: + * Michael S. Tsirkin <mst@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu-common.h" +#include "qemu/event_notifier.h" +#include "qemu/main-loop.h" + +int event_notifier_init(EventNotifier *e, int active) +{ + e->event = CreateEvent(NULL, TRUE, FALSE, NULL); + assert(e->event); + return 0; +} + +void event_notifier_cleanup(EventNotifier *e) +{ + CloseHandle(e->event); +} + +HANDLE event_notifier_get_handle(EventNotifier *e) +{ + return e->event; +} + +int event_notifier_set_handler(EventNotifier *e, + EventNotifierHandler *handler) +{ + if (handler) { + return qemu_add_wait_object(e->event, (IOHandler *)handler, e); + } else { + qemu_del_wait_object(e->event, (IOHandler *)handler, e); + return 0; + } +} + +int event_notifier_set(EventNotifier *e) +{ + SetEvent(e->event); + return 0; +} + +int event_notifier_test_and_clear(EventNotifier *e) +{ + int ret = WaitForSingleObject(e->event, 0); + if (ret == WAIT_OBJECT_0) { + ResetEvent(e->event); + return true; + } + return false; +} diff --git a/src/util/fifo8.c b/src/util/fifo8.c new file mode 100644 index 0000000..0ea5ad9 --- /dev/null +++ b/src/util/fifo8.c @@ -0,0 +1,125 @@ +/* + * Generic FIFO component, implemented as a circular buffer. + * + * Copyright (c) 2012 Peter A. G. Crosthwaite + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu-common.h" +#include "qemu/fifo8.h" + +void fifo8_create(Fifo8 *fifo, uint32_t capacity) +{ + fifo->data = g_new(uint8_t, capacity); + fifo->capacity = capacity; + fifo->head = 0; + fifo->num = 0; +} + +void fifo8_destroy(Fifo8 *fifo) +{ + g_free(fifo->data); +} + +void fifo8_push(Fifo8 *fifo, uint8_t data) +{ + if (fifo->num == fifo->capacity) { + abort(); + } + fifo->data[(fifo->head + fifo->num) % fifo->capacity] = data; + fifo->num++; +} + +void fifo8_push_all(Fifo8 *fifo, const uint8_t *data, uint32_t num) +{ + uint32_t start, avail; + + if (fifo->num + num > fifo->capacity) { + abort(); + } + + start = (fifo->head + fifo->num) % fifo->capacity; + + if (start + num <= fifo->capacity) { + memcpy(&fifo->data[start], data, num); + } else { + avail = fifo->capacity - start; + memcpy(&fifo->data[start], data, avail); + memcpy(&fifo->data[0], &data[avail], num - avail); + } + + fifo->num += num; +} + +uint8_t fifo8_pop(Fifo8 *fifo) +{ + uint8_t ret; + + if (fifo->num == 0) { + abort(); + } + ret = fifo->data[fifo->head++]; + fifo->head %= fifo->capacity; + fifo->num--; + return ret; +} + +const uint8_t *fifo8_pop_buf(Fifo8 *fifo, uint32_t max, uint32_t *num) +{ + uint8_t *ret; + + if (max == 0 || max > fifo->num) { + abort(); + } + *num = MIN(fifo->capacity - fifo->head, max); + ret = &fifo->data[fifo->head]; + fifo->head += *num; + fifo->head %= fifo->capacity; + fifo->num -= *num; + return ret; +} + +void fifo8_reset(Fifo8 *fifo) +{ + fifo->num = 0; + fifo->head = 0; +} + +bool fifo8_is_empty(Fifo8 *fifo) +{ + return (fifo->num == 0); +} + +bool fifo8_is_full(Fifo8 *fifo) +{ + return (fifo->num == fifo->capacity); +} + +uint32_t fifo8_num_free(Fifo8 *fifo) +{ + return fifo->capacity - fifo->num; +} + +uint32_t fifo8_num_used(Fifo8 *fifo) +{ + return fifo->num; +} + +const VMStateDescription vmstate_fifo8 = { + .name = "Fifo8", + .version_id = 1, + .minimum_version_id = 1, + .fields = (VMStateField[]) { + VMSTATE_VBUFFER_UINT32(data, Fifo8, 1, NULL, 0, capacity), + VMSTATE_UINT32(head, Fifo8), + VMSTATE_UINT32(num, Fifo8), + VMSTATE_END_OF_LIST() + } +}; diff --git a/src/util/getauxval.c b/src/util/getauxval.c new file mode 100644 index 0000000..1732ace --- /dev/null +++ b/src/util/getauxval.c @@ -0,0 +1,109 @@ +/* + * QEMU access to the auxiliary vector + * + * Copyright (C) 2013 Red Hat, Inc + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu-common.h" +#include "qemu/osdep.h" + +#ifdef CONFIG_GETAUXVAL +/* Don't inline this in qemu/osdep.h, because pulling in <sys/auxv.h> for + the system declaration of getauxval pulls in the system <elf.h>, which + conflicts with qemu's version. */ + +#include <sys/auxv.h> + +unsigned long qemu_getauxval(unsigned long key) +{ + return getauxval(key); +} +#elif defined(__linux__) +#include "elf.h" + +/* Our elf.h doesn't contain Elf32_auxv_t and Elf64_auxv_t, which is ok because + that just makes it easier to define it properly for the host here. */ +typedef struct { + unsigned long a_type; + unsigned long a_val; +} ElfW_auxv_t; + +static const ElfW_auxv_t *auxv; + +static const ElfW_auxv_t *qemu_init_auxval(void) +{ + ElfW_auxv_t *a; + ssize_t size = 512, r, ofs; + int fd; + + /* Allocate some initial storage. Make sure the first entry is set + to end-of-list, so that we've got a valid list in case of error. */ + auxv = a = g_malloc(size); + a[0].a_type = 0; + a[0].a_val = 0; + + fd = open("/proc/self/auxv", O_RDONLY); + if (fd < 0) { + return a; + } + + /* Read the first SIZE bytes. Hopefully, this covers everything. */ + r = read(fd, a, size); + + if (r == size) { + /* Continue to expand until we do get a partial read. */ + do { + ofs = size; + size *= 2; + auxv = a = g_realloc(a, size); + r = read(fd, (char *)a + ofs, ofs); + } while (r == ofs); + } + + close(fd); + return a; +} + +unsigned long qemu_getauxval(unsigned long type) +{ + const ElfW_auxv_t *a = auxv; + + if (unlikely(a == NULL)) { + a = qemu_init_auxval(); + } + + for (; a->a_type != 0; a++) { + if (a->a_type == type) { + return a->a_val; + } + } + + return 0; +} + +#else + +unsigned long qemu_getauxval(unsigned long type) +{ + return 0; +} + +#endif diff --git a/src/util/hbitmap.c b/src/util/hbitmap.c new file mode 100644 index 0000000..50b888f --- /dev/null +++ b/src/util/hbitmap.c @@ -0,0 +1,495 @@ +/* + * Hierarchical Bitmap Data Type + * + * Copyright Red Hat, Inc., 2012 + * + * Author: Paolo Bonzini <pbonzini@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#include <string.h> +#include <glib.h> +#include <assert.h> +#include "qemu/osdep.h" +#include "qemu/hbitmap.h" +#include "qemu/host-utils.h" +#include "trace.h" + +/* HBitmaps provides an array of bits. The bits are stored as usual in an + * array of unsigned longs, but HBitmap is also optimized to provide fast + * iteration over set bits; going from one bit to the next is O(logB n) + * worst case, with B = sizeof(long) * CHAR_BIT: the result is low enough + * that the number of levels is in fact fixed. + * + * In order to do this, it stacks multiple bitmaps with progressively coarser + * granularity; in all levels except the last, bit N is set iff the N-th + * unsigned long is nonzero in the immediately next level. When iteration + * completes on the last level it can examine the 2nd-last level to quickly + * skip entire words, and even do so recursively to skip blocks of 64 words or + * powers thereof (32 on 32-bit machines). + * + * Given an index in the bitmap, it can be split in group of bits like + * this (for the 64-bit case): + * + * bits 0-57 => word in the last bitmap | bits 58-63 => bit in the word + * bits 0-51 => word in the 2nd-last bitmap | bits 52-57 => bit in the word + * bits 0-45 => word in the 3rd-last bitmap | bits 46-51 => bit in the word + * + * So it is easy to move up simply by shifting the index right by + * log2(BITS_PER_LONG) bits. To move down, you shift the index left + * similarly, and add the word index within the group. Iteration uses + * ffs (find first set bit) to find the next word to examine; this + * operation can be done in constant time in most current architectures. + * + * Setting or clearing a range of m bits on all levels, the work to perform + * is O(m + m/W + m/W^2 + ...), which is O(m) like on a regular bitmap. + * + * When iterating on a bitmap, each bit (on any level) is only visited + * once. Hence, The total cost of visiting a bitmap with m bits in it is + * the number of bits that are set in all bitmaps. Unless the bitmap is + * extremely sparse, this is also O(m + m/W + m/W^2 + ...), so the amortized + * cost of advancing from one bit to the next is usually constant (worst case + * O(logB n) as in the non-amortized complexity). + */ + +struct HBitmap { + /* Number of total bits in the bottom level. */ + uint64_t size; + + /* Number of set bits in the bottom level. */ + uint64_t count; + + /* A scaling factor. Given a granularity of G, each bit in the bitmap will + * will actually represent a group of 2^G elements. Each operation on a + * range of bits first rounds the bits to determine which group they land + * in, and then affect the entire page; iteration will only visit the first + * bit of each group. Here is an example of operations in a size-16, + * granularity-1 HBitmap: + * + * initial state 00000000 + * set(start=0, count=9) 11111000 (iter: 0, 2, 4, 6, 8) + * reset(start=1, count=3) 00111000 (iter: 4, 6, 8) + * set(start=9, count=2) 00111100 (iter: 4, 6, 8, 10) + * reset(start=5, count=5) 00000000 + * + * From an implementation point of view, when setting or resetting bits, + * the bitmap will scale bit numbers right by this amount of bits. When + * iterating, the bitmap will scale bit numbers left by this amount of + * bits. + */ + int granularity; + + /* A number of progressively less coarse bitmaps (i.e. level 0 is the + * coarsest). Each bit in level N represents a word in level N+1 that + * has a set bit, except the last level where each bit represents the + * actual bitmap. + * + * Note that all bitmaps have the same number of levels. Even a 1-bit + * bitmap will still allocate HBITMAP_LEVELS arrays. + */ + unsigned long *levels[HBITMAP_LEVELS]; + + /* The length of each levels[] array. */ + uint64_t sizes[HBITMAP_LEVELS]; +}; + +/* Advance hbi to the next nonzero word and return it. hbi->pos + * is updated. Returns zero if we reach the end of the bitmap. + */ +unsigned long hbitmap_iter_skip_words(HBitmapIter *hbi) +{ + size_t pos = hbi->pos; + const HBitmap *hb = hbi->hb; + unsigned i = HBITMAP_LEVELS - 1; + + unsigned long cur; + do { + cur = hbi->cur[--i]; + pos >>= BITS_PER_LEVEL; + } while (cur == 0); + + /* Check for end of iteration. We always use fewer than BITS_PER_LONG + * bits in the level 0 bitmap; thus we can repurpose the most significant + * bit as a sentinel. The sentinel is set in hbitmap_alloc and ensures + * that the above loop ends even without an explicit check on i. + */ + + if (i == 0 && cur == (1UL << (BITS_PER_LONG - 1))) { + return 0; + } + for (; i < HBITMAP_LEVELS - 1; i++) { + /* Shift back pos to the left, matching the right shifts above. + * The index of this word's least significant set bit provides + * the low-order bits. + */ + assert(cur); + pos = (pos << BITS_PER_LEVEL) + ctzl(cur); + hbi->cur[i] = cur & (cur - 1); + + /* Set up next level for iteration. */ + cur = hb->levels[i + 1][pos]; + } + + hbi->pos = pos; + trace_hbitmap_iter_skip_words(hbi->hb, hbi, pos, cur); + + assert(cur); + return cur; +} + +void hbitmap_iter_init(HBitmapIter *hbi, const HBitmap *hb, uint64_t first) +{ + unsigned i, bit; + uint64_t pos; + + hbi->hb = hb; + pos = first >> hb->granularity; + assert(pos < hb->size); + hbi->pos = pos >> BITS_PER_LEVEL; + hbi->granularity = hb->granularity; + + for (i = HBITMAP_LEVELS; i-- > 0; ) { + bit = pos & (BITS_PER_LONG - 1); + pos >>= BITS_PER_LEVEL; + + /* Drop bits representing items before first. */ + hbi->cur[i] = hb->levels[i][pos] & ~((1UL << bit) - 1); + + /* We have already added level i+1, so the lowest set bit has + * been processed. Clear it. + */ + if (i != HBITMAP_LEVELS - 1) { + hbi->cur[i] &= ~(1UL << bit); + } + } +} + +bool hbitmap_empty(const HBitmap *hb) +{ + return hb->count == 0; +} + +int hbitmap_granularity(const HBitmap *hb) +{ + return hb->granularity; +} + +uint64_t hbitmap_count(const HBitmap *hb) +{ + return hb->count << hb->granularity; +} + +/* Count the number of set bits between start and end, not accounting for + * the granularity. Also an example of how to use hbitmap_iter_next_word. + */ +static uint64_t hb_count_between(HBitmap *hb, uint64_t start, uint64_t last) +{ + HBitmapIter hbi; + uint64_t count = 0; + uint64_t end = last + 1; + unsigned long cur; + size_t pos; + + hbitmap_iter_init(&hbi, hb, start << hb->granularity); + for (;;) { + pos = hbitmap_iter_next_word(&hbi, &cur); + if (pos >= (end >> BITS_PER_LEVEL)) { + break; + } + count += ctpopl(cur); + } + + if (pos == (end >> BITS_PER_LEVEL)) { + /* Drop bits representing the END-th and subsequent items. */ + int bit = end & (BITS_PER_LONG - 1); + cur &= (1UL << bit) - 1; + count += ctpopl(cur); + } + + return count; +} + +/* Setting starts at the last layer and propagates up if an element + * changes from zero to non-zero. + */ +static inline bool hb_set_elem(unsigned long *elem, uint64_t start, uint64_t last) +{ + unsigned long mask; + bool changed; + + assert((last >> BITS_PER_LEVEL) == (start >> BITS_PER_LEVEL)); + assert(start <= last); + + mask = 2UL << (last & (BITS_PER_LONG - 1)); + mask -= 1UL << (start & (BITS_PER_LONG - 1)); + changed = (*elem == 0); + *elem |= mask; + return changed; +} + +/* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)... */ +static void hb_set_between(HBitmap *hb, int level, uint64_t start, uint64_t last) +{ + size_t pos = start >> BITS_PER_LEVEL; + size_t lastpos = last >> BITS_PER_LEVEL; + bool changed = false; + size_t i; + + i = pos; + if (i < lastpos) { + uint64_t next = (start | (BITS_PER_LONG - 1)) + 1; + changed |= hb_set_elem(&hb->levels[level][i], start, next - 1); + for (;;) { + start = next; + next += BITS_PER_LONG; + if (++i == lastpos) { + break; + } + changed |= (hb->levels[level][i] == 0); + hb->levels[level][i] = ~0UL; + } + } + changed |= hb_set_elem(&hb->levels[level][i], start, last); + + /* If there was any change in this layer, we may have to update + * the one above. + */ + if (level > 0 && changed) { + hb_set_between(hb, level - 1, pos, lastpos); + } +} + +void hbitmap_set(HBitmap *hb, uint64_t start, uint64_t count) +{ + /* Compute range in the last layer. */ + uint64_t last = start + count - 1; + + trace_hbitmap_set(hb, start, count, + start >> hb->granularity, last >> hb->granularity); + + start >>= hb->granularity; + last >>= hb->granularity; + count = last - start + 1; + + hb->count += count - hb_count_between(hb, start, last); + hb_set_between(hb, HBITMAP_LEVELS - 1, start, last); +} + +/* Resetting works the other way round: propagate up if the new + * value is zero. + */ +static inline bool hb_reset_elem(unsigned long *elem, uint64_t start, uint64_t last) +{ + unsigned long mask; + bool blanked; + + assert((last >> BITS_PER_LEVEL) == (start >> BITS_PER_LEVEL)); + assert(start <= last); + + mask = 2UL << (last & (BITS_PER_LONG - 1)); + mask -= 1UL << (start & (BITS_PER_LONG - 1)); + blanked = *elem != 0 && ((*elem & ~mask) == 0); + *elem &= ~mask; + return blanked; +} + +/* The recursive workhorse (the depth is limited to HBITMAP_LEVELS)... */ +static void hb_reset_between(HBitmap *hb, int level, uint64_t start, uint64_t last) +{ + size_t pos = start >> BITS_PER_LEVEL; + size_t lastpos = last >> BITS_PER_LEVEL; + bool changed = false; + size_t i; + + i = pos; + if (i < lastpos) { + uint64_t next = (start | (BITS_PER_LONG - 1)) + 1; + + /* Here we need a more complex test than when setting bits. Even if + * something was changed, we must not blank bits in the upper level + * unless the lower-level word became entirely zero. So, remove pos + * from the upper-level range if bits remain set. + */ + if (hb_reset_elem(&hb->levels[level][i], start, next - 1)) { + changed = true; + } else { + pos++; + } + + for (;;) { + start = next; + next += BITS_PER_LONG; + if (++i == lastpos) { + break; + } + changed |= (hb->levels[level][i] != 0); + hb->levels[level][i] = 0UL; + } + } + + /* Same as above, this time for lastpos. */ + if (hb_reset_elem(&hb->levels[level][i], start, last)) { + changed = true; + } else { + lastpos--; + } + + if (level > 0 && changed) { + hb_reset_between(hb, level - 1, pos, lastpos); + } +} + +void hbitmap_reset(HBitmap *hb, uint64_t start, uint64_t count) +{ + /* Compute range in the last layer. */ + uint64_t last = start + count - 1; + + trace_hbitmap_reset(hb, start, count, + start >> hb->granularity, last >> hb->granularity); + + start >>= hb->granularity; + last >>= hb->granularity; + + hb->count -= hb_count_between(hb, start, last); + hb_reset_between(hb, HBITMAP_LEVELS - 1, start, last); +} + +void hbitmap_reset_all(HBitmap *hb) +{ + unsigned int i; + + /* Same as hbitmap_alloc() except for memset() instead of malloc() */ + for (i = HBITMAP_LEVELS; --i >= 1; ) { + memset(hb->levels[i], 0, hb->sizes[i] * sizeof(unsigned long)); + } + + hb->levels[0][0] = 1UL << (BITS_PER_LONG - 1); + hb->count = 0; +} + +bool hbitmap_get(const HBitmap *hb, uint64_t item) +{ + /* Compute position and bit in the last layer. */ + uint64_t pos = item >> hb->granularity; + unsigned long bit = 1UL << (pos & (BITS_PER_LONG - 1)); + + return (hb->levels[HBITMAP_LEVELS - 1][pos >> BITS_PER_LEVEL] & bit) != 0; +} + +void hbitmap_free(HBitmap *hb) +{ + unsigned i; + for (i = HBITMAP_LEVELS; i-- > 0; ) { + g_free(hb->levels[i]); + } + g_free(hb); +} + +HBitmap *hbitmap_alloc(uint64_t size, int granularity) +{ + HBitmap *hb = g_new0(struct HBitmap, 1); + unsigned i; + + assert(granularity >= 0 && granularity < 64); + size = (size + (1ULL << granularity) - 1) >> granularity; + assert(size <= ((uint64_t)1 << HBITMAP_LOG_MAX_SIZE)); + + hb->size = size; + hb->granularity = granularity; + for (i = HBITMAP_LEVELS; i-- > 0; ) { + size = MAX((size + BITS_PER_LONG - 1) >> BITS_PER_LEVEL, 1); + hb->sizes[i] = size; + hb->levels[i] = g_new0(unsigned long, size); + } + + /* We necessarily have free bits in level 0 due to the definition + * of HBITMAP_LEVELS, so use one for a sentinel. This speeds up + * hbitmap_iter_skip_words. + */ + assert(size == 1); + hb->levels[0][0] |= 1UL << (BITS_PER_LONG - 1); + return hb; +} + +void hbitmap_truncate(HBitmap *hb, uint64_t size) +{ + bool shrink; + unsigned i; + uint64_t num_elements = size; + uint64_t old; + + /* Size comes in as logical elements, adjust for granularity. */ + size = (size + (1ULL << hb->granularity) - 1) >> hb->granularity; + assert(size <= ((uint64_t)1 << HBITMAP_LOG_MAX_SIZE)); + shrink = size < hb->size; + + /* bit sizes are identical; nothing to do. */ + if (size == hb->size) { + return; + } + + /* If we're losing bits, let's clear those bits before we invalidate all of + * our invariants. This helps keep the bitcount consistent, and will prevent + * us from carrying around garbage bits beyond the end of the map. + */ + if (shrink) { + /* Don't clear partial granularity groups; + * start at the first full one. */ + uint64_t start = QEMU_ALIGN_UP(num_elements, 1 << hb->granularity); + uint64_t fix_count = (hb->size << hb->granularity) - start; + + assert(fix_count); + hbitmap_reset(hb, start, fix_count); + } + + hb->size = size; + for (i = HBITMAP_LEVELS; i-- > 0; ) { + size = MAX(BITS_TO_LONGS(size), 1); + if (hb->sizes[i] == size) { + break; + } + old = hb->sizes[i]; + hb->sizes[i] = size; + hb->levels[i] = g_realloc(hb->levels[i], size * sizeof(unsigned long)); + if (!shrink) { + memset(&hb->levels[i][old], 0x00, + (size - old) * sizeof(*hb->levels[i])); + } + } +} + + +/** + * Given HBitmaps A and B, let A := A (BITOR) B. + * Bitmap B will not be modified. + * + * @return true if the merge was successful, + * false if it was not attempted. + */ +bool hbitmap_merge(HBitmap *a, const HBitmap *b) +{ + int i; + uint64_t j; + + if ((a->size != b->size) || (a->granularity != b->granularity)) { + return false; + } + + if (hbitmap_count(b) == 0) { + return true; + } + + /* This merge is O(size), as BITS_PER_LONG and HBITMAP_LEVELS are constant. + * It may be possible to improve running times for sparsely populated maps + * by using hbitmap_iter_next, but this is suboptimal for dense maps. + */ + for (i = HBITMAP_LEVELS - 1; i >= 0; i--) { + for (j = 0; j < a->sizes[i]; j++) { + a->levels[i][j] |= b->levels[i][j]; + } + } + + return true; +} diff --git a/src/util/hexdump.c b/src/util/hexdump.c new file mode 100644 index 0000000..969b340 --- /dev/null +++ b/src/util/hexdump.c @@ -0,0 +1,37 @@ +/* + * Helper to hexdump a buffer + * + * Copyright (c) 2013 Red Hat, Inc. + * Copyright (c) 2013 Gerd Hoffmann <kraxel@redhat.com> + * Copyright (c) 2013 Peter Crosthwaite <peter.crosthwaite@xilinx.com> + * Copyright (c) 2013 Xilinx, Inc + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#include "qemu-common.h" + +void qemu_hexdump(const char *buf, FILE *fp, const char *prefix, size_t size) +{ + unsigned int b; + + for (b = 0; b < size; b++) { + if ((b % 16) == 0) { + fprintf(fp, "%s: %04x:", prefix, b); + } + if ((b % 4) == 0) { + fprintf(fp, " "); + } + fprintf(fp, " %02x", (unsigned char)buf[b]); + if ((b % 16) == 15) { + fprintf(fp, "\n"); + } + } + if ((b % 16) != 0) { + fprintf(fp, "\n"); + } +} diff --git a/src/util/host-utils.c b/src/util/host-utils.c new file mode 100644 index 0000000..102e5bf --- /dev/null +++ b/src/util/host-utils.c @@ -0,0 +1,162 @@ +/* + * Utility compute operations used by translated code. + * + * Copyright (c) 2003 Fabrice Bellard + * Copyright (c) 2007 Aurelien Jarno + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include <stdlib.h> +#include <stdint.h> +#include "qemu/host-utils.h" + +/* Long integer helpers */ +static inline void mul64(uint64_t *plow, uint64_t *phigh, + uint64_t a, uint64_t b) +{ + typedef union { + uint64_t ll; + struct { +#ifdef HOST_WORDS_BIGENDIAN + uint32_t high, low; +#else + uint32_t low, high; +#endif + } l; + } LL; + LL rl, rm, rn, rh, a0, b0; + uint64_t c; + + a0.ll = a; + b0.ll = b; + + rl.ll = (uint64_t)a0.l.low * b0.l.low; + rm.ll = (uint64_t)a0.l.low * b0.l.high; + rn.ll = (uint64_t)a0.l.high * b0.l.low; + rh.ll = (uint64_t)a0.l.high * b0.l.high; + + c = (uint64_t)rl.l.high + rm.l.low + rn.l.low; + rl.l.high = c; + c >>= 32; + c = c + rm.l.high + rn.l.high + rh.l.low; + rh.l.low = c; + rh.l.high += (uint32_t)(c >> 32); + + *plow = rl.ll; + *phigh = rh.ll; +} + +/* Unsigned 64x64 -> 128 multiplication */ +void mulu64 (uint64_t *plow, uint64_t *phigh, uint64_t a, uint64_t b) +{ + mul64(plow, phigh, a, b); +} + +/* Signed 64x64 -> 128 multiplication */ +void muls64 (uint64_t *plow, uint64_t *phigh, int64_t a, int64_t b) +{ + uint64_t rh; + + mul64(plow, &rh, a, b); + + /* Adjust for signs. */ + if (b < 0) { + rh -= a; + } + if (a < 0) { + rh -= b; + } + *phigh = rh; +} + +/* Unsigned 128x64 division. Returns 1 if overflow (divide by zero or */ +/* quotient exceeds 64 bits). Otherwise returns quotient via plow and */ +/* remainder via phigh. */ +int divu128(uint64_t *plow, uint64_t *phigh, uint64_t divisor) +{ + uint64_t dhi = *phigh; + uint64_t dlo = *plow; + unsigned i; + uint64_t carry = 0; + + if (divisor == 0) { + return 1; + } else if (dhi == 0) { + *plow = dlo / divisor; + *phigh = dlo % divisor; + return 0; + } else if (dhi > divisor) { + return 1; + } else { + + for (i = 0; i < 64; i++) { + carry = dhi >> 63; + dhi = (dhi << 1) | (dlo >> 63); + if (carry || (dhi >= divisor)) { + dhi -= divisor; + carry = 1; + } else { + carry = 0; + } + dlo = (dlo << 1) | carry; + } + + *plow = dlo; + *phigh = dhi; + return 0; + } +} + +int divs128(int64_t *plow, int64_t *phigh, int64_t divisor) +{ + int sgn_dvdnd = *phigh < 0; + int sgn_divsr = divisor < 0; + int overflow = 0; + + if (sgn_dvdnd) { + *plow = ~(*plow); + *phigh = ~(*phigh); + if (*plow == (int64_t)-1) { + *plow = 0; + (*phigh)++; + } else { + (*plow)++; + } + } + + if (sgn_divsr) { + divisor = 0 - divisor; + } + + overflow = divu128((uint64_t *)plow, (uint64_t *)phigh, (uint64_t)divisor); + + if (sgn_dvdnd ^ sgn_divsr) { + *plow = 0 - *plow; + } + + if (!overflow) { + if ((*plow < 0) ^ (sgn_dvdnd ^ sgn_divsr)) { + overflow = 1; + } + } + + return overflow; +} + diff --git a/src/util/id.c b/src/util/id.c new file mode 100644 index 0000000..7883fbe --- /dev/null +++ b/src/util/id.c @@ -0,0 +1,65 @@ +/* + * Dealing with identifiers + * + * Copyright (C) 2014 Red Hat, Inc. + * + * Authors: + * Markus Armbruster <armbru@redhat.com>, + * + * This work is licensed under the terms of the GNU LGPL, version 2.1 + * or later. See the COPYING.LIB file in the top-level directory. + */ + +#include "qemu-common.h" + +bool id_wellformed(const char *id) +{ + int i; + + if (!qemu_isalpha(id[0])) { + return false; + } + for (i = 1; id[i]; i++) { + if (!qemu_isalnum(id[i]) && !strchr("-._", id[i])) { + return false; + } + } + return true; +} + +#define ID_SPECIAL_CHAR '#' + +static const char *const id_subsys_str[ID_MAX] = { + [ID_QDEV] = "qdev", + [ID_BLOCK] = "block", +}; + +/* + * Generates an ID of the form PREFIX SUBSYSTEM NUMBER + * where: + * + * - PREFIX is the reserved character '#' + * - SUBSYSTEM identifies the subsystem creating the ID + * - NUMBER is a decimal number unique within SUBSYSTEM. + * + * Example: "#block146" + * + * Note that these IDs do not satisfy id_wellformed(). + * + * The caller is responsible for freeing the returned string with g_free() + */ +char *id_generate(IdSubSystems id) +{ + static uint64_t id_counters[ID_MAX]; + uint32_t rnd; + + assert(id < ARRAY_SIZE(id_subsys_str)); + assert(id_subsys_str[id]); + + rnd = g_random_int_range(0, 100); + + return g_strdup_printf("%c%s%" PRIu64 "%02" PRId32, ID_SPECIAL_CHAR, + id_subsys_str[id], + id_counters[id]++, + rnd); +} diff --git a/src/util/iov.c b/src/util/iov.c new file mode 100644 index 0000000..a0d5934 --- /dev/null +++ b/src/util/iov.c @@ -0,0 +1,577 @@ +/* + * Helpers for getting linearized buffers from iov / filling buffers into iovs + * + * Copyright IBM, Corp. 2007, 2008 + * Copyright (C) 2010 Red Hat, Inc. + * + * Author(s): + * Anthony Liguori <aliguori@us.ibm.com> + * Amit Shah <amit.shah@redhat.com> + * Michael Tokarev <mjt@tls.msk.ru> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#include "qemu/iov.h" +#include "qemu/sockets.h" + +size_t iov_from_buf(const struct iovec *iov, unsigned int iov_cnt, + size_t offset, const void *buf, size_t bytes) +{ + size_t done; + unsigned int i; + for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) { + if (offset < iov[i].iov_len) { + size_t len = MIN(iov[i].iov_len - offset, bytes - done); + memcpy(iov[i].iov_base + offset, buf + done, len); + done += len; + offset = 0; + } else { + offset -= iov[i].iov_len; + } + } + assert(offset == 0); + return done; +} + +size_t iov_to_buf(const struct iovec *iov, const unsigned int iov_cnt, + size_t offset, void *buf, size_t bytes) +{ + size_t done; + unsigned int i; + for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) { + if (offset < iov[i].iov_len) { + size_t len = MIN(iov[i].iov_len - offset, bytes - done); + memcpy(buf + done, iov[i].iov_base + offset, len); + done += len; + offset = 0; + } else { + offset -= iov[i].iov_len; + } + } + assert(offset == 0); + return done; +} + +size_t iov_memset(const struct iovec *iov, const unsigned int iov_cnt, + size_t offset, int fillc, size_t bytes) +{ + size_t done; + unsigned int i; + for (i = 0, done = 0; (offset || done < bytes) && i < iov_cnt; i++) { + if (offset < iov[i].iov_len) { + size_t len = MIN(iov[i].iov_len - offset, bytes - done); + memset(iov[i].iov_base + offset, fillc, len); + done += len; + offset = 0; + } else { + offset -= iov[i].iov_len; + } + } + assert(offset == 0); + return done; +} + +size_t iov_size(const struct iovec *iov, const unsigned int iov_cnt) +{ + size_t len; + unsigned int i; + + len = 0; + for (i = 0; i < iov_cnt; i++) { + len += iov[i].iov_len; + } + return len; +} + +/* helper function for iov_send_recv() */ +static ssize_t +do_send_recv(int sockfd, struct iovec *iov, unsigned iov_cnt, bool do_send) +{ +#ifdef CONFIG_POSIX + ssize_t ret; + struct msghdr msg; + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = iov; + msg.msg_iovlen = iov_cnt; + do { + ret = do_send + ? sendmsg(sockfd, &msg, 0) + : recvmsg(sockfd, &msg, 0); + } while (ret < 0 && errno == EINTR); + return ret; +#else + /* else send piece-by-piece */ + /*XXX Note: windows has WSASend() and WSARecv() */ + unsigned i = 0; + ssize_t ret = 0; + while (i < iov_cnt) { + ssize_t r = do_send + ? send(sockfd, iov[i].iov_base, iov[i].iov_len, 0) + : recv(sockfd, iov[i].iov_base, iov[i].iov_len, 0); + if (r > 0) { + ret += r; + } else if (!r) { + break; + } else if (errno == EINTR) { + continue; + } else { + /* else it is some "other" error, + * only return if there was no data processed. */ + if (ret == 0) { + ret = -1; + } + break; + } + i++; + } + return ret; +#endif +} + +ssize_t iov_send_recv(int sockfd, const struct iovec *_iov, unsigned iov_cnt, + size_t offset, size_t bytes, + bool do_send) +{ + ssize_t total = 0; + ssize_t ret; + size_t orig_len, tail; + unsigned niov; + struct iovec *local_iov, *iov; + + if (bytes <= 0) { + return 0; + } + + local_iov = g_new0(struct iovec, iov_cnt); + iov_copy(local_iov, iov_cnt, _iov, iov_cnt, offset, bytes); + offset = 0; + iov = local_iov; + + while (bytes > 0) { + /* Find the start position, skipping `offset' bytes: + * first, skip all full-sized vector elements, */ + for (niov = 0; niov < iov_cnt && offset >= iov[niov].iov_len; ++niov) { + offset -= iov[niov].iov_len; + } + + /* niov == iov_cnt would only be valid if bytes == 0, which + * we already ruled out in the loop condition. */ + assert(niov < iov_cnt); + iov += niov; + iov_cnt -= niov; + + if (offset) { + /* second, skip `offset' bytes from the (now) first element, + * undo it on exit */ + iov[0].iov_base += offset; + iov[0].iov_len -= offset; + } + /* Find the end position skipping `bytes' bytes: */ + /* first, skip all full-sized elements */ + tail = bytes; + for (niov = 0; niov < iov_cnt && iov[niov].iov_len <= tail; ++niov) { + tail -= iov[niov].iov_len; + } + if (tail) { + /* second, fixup the last element, and remember the original + * length */ + assert(niov < iov_cnt); + assert(iov[niov].iov_len > tail); + orig_len = iov[niov].iov_len; + iov[niov++].iov_len = tail; + ret = do_send_recv(sockfd, iov, niov, do_send); + /* Undo the changes above before checking for errors */ + iov[niov-1].iov_len = orig_len; + } else { + ret = do_send_recv(sockfd, iov, niov, do_send); + } + if (offset) { + iov[0].iov_base -= offset; + iov[0].iov_len += offset; + } + + if (ret < 0) { + assert(errno != EINTR); + g_free(local_iov); + if (errno == EAGAIN && total > 0) { + return total; + } + return -1; + } + + if (ret == 0 && !do_send) { + /* recv returns 0 when the peer has performed an orderly + * shutdown. */ + break; + } + + /* Prepare for the next iteration */ + offset += ret; + total += ret; + bytes -= ret; + } + + g_free(local_iov); + return total; +} + + +void iov_hexdump(const struct iovec *iov, const unsigned int iov_cnt, + FILE *fp, const char *prefix, size_t limit) +{ + int v; + size_t size = 0; + char *buf; + + for (v = 0; v < iov_cnt; v++) { + size += iov[v].iov_len; + } + size = size > limit ? limit : size; + buf = g_malloc(size); + iov_to_buf(iov, iov_cnt, 0, buf, size); + qemu_hexdump(buf, fp, prefix, size); + g_free(buf); +} + +unsigned iov_copy(struct iovec *dst_iov, unsigned int dst_iov_cnt, + const struct iovec *iov, unsigned int iov_cnt, + size_t offset, size_t bytes) +{ + size_t len; + unsigned int i, j; + for (i = 0, j = 0; i < iov_cnt && j < dst_iov_cnt && bytes; i++) { + if (offset >= iov[i].iov_len) { + offset -= iov[i].iov_len; + continue; + } + len = MIN(bytes, iov[i].iov_len - offset); + + dst_iov[j].iov_base = iov[i].iov_base + offset; + dst_iov[j].iov_len = len; + j++; + bytes -= len; + offset = 0; + } + assert(offset == 0); + return j; +} + +/* io vectors */ + +void qemu_iovec_init(QEMUIOVector *qiov, int alloc_hint) +{ + qiov->iov = g_new(struct iovec, alloc_hint); + qiov->niov = 0; + qiov->nalloc = alloc_hint; + qiov->size = 0; +} + +void qemu_iovec_init_external(QEMUIOVector *qiov, struct iovec *iov, int niov) +{ + int i; + + qiov->iov = iov; + qiov->niov = niov; + qiov->nalloc = -1; + qiov->size = 0; + for (i = 0; i < niov; i++) + qiov->size += iov[i].iov_len; +} + +void qemu_iovec_add(QEMUIOVector *qiov, void *base, size_t len) +{ + assert(qiov->nalloc != -1); + + if (qiov->niov == qiov->nalloc) { + qiov->nalloc = 2 * qiov->nalloc + 1; + qiov->iov = g_renew(struct iovec, qiov->iov, qiov->nalloc); + } + qiov->iov[qiov->niov].iov_base = base; + qiov->iov[qiov->niov].iov_len = len; + qiov->size += len; + ++qiov->niov; +} + +/* + * Concatenates (partial) iovecs from src_iov to the end of dst. + * It starts copying after skipping `soffset' bytes at the + * beginning of src and adds individual vectors from src to + * dst copies up to `sbytes' bytes total, or up to the end + * of src_iov if it comes first. This way, it is okay to specify + * very large value for `sbytes' to indicate "up to the end + * of src". + * Only vector pointers are processed, not the actual data buffers. + */ +size_t qemu_iovec_concat_iov(QEMUIOVector *dst, + struct iovec *src_iov, unsigned int src_cnt, + size_t soffset, size_t sbytes) +{ + int i; + size_t done; + + if (!sbytes) { + return 0; + } + assert(dst->nalloc != -1); + for (i = 0, done = 0; done < sbytes && i < src_cnt; i++) { + if (soffset < src_iov[i].iov_len) { + size_t len = MIN(src_iov[i].iov_len - soffset, sbytes - done); + qemu_iovec_add(dst, src_iov[i].iov_base + soffset, len); + done += len; + soffset = 0; + } else { + soffset -= src_iov[i].iov_len; + } + } + assert(soffset == 0); /* offset beyond end of src */ + + return done; +} + +/* + * Concatenates (partial) iovecs from src to the end of dst. + * It starts copying after skipping `soffset' bytes at the + * beginning of src and adds individual vectors from src to + * dst copies up to `sbytes' bytes total, or up to the end + * of src if it comes first. This way, it is okay to specify + * very large value for `sbytes' to indicate "up to the end + * of src". + * Only vector pointers are processed, not the actual data buffers. + */ +void qemu_iovec_concat(QEMUIOVector *dst, + QEMUIOVector *src, size_t soffset, size_t sbytes) +{ + qemu_iovec_concat_iov(dst, src->iov, src->niov, soffset, sbytes); +} + +/* + * Check if the contents of the iovecs are all zero + */ +bool qemu_iovec_is_zero(QEMUIOVector *qiov) +{ + int i; + for (i = 0; i < qiov->niov; i++) { + size_t offs = QEMU_ALIGN_DOWN(qiov->iov[i].iov_len, 4 * sizeof(long)); + uint8_t *ptr = qiov->iov[i].iov_base; + if (offs && !buffer_is_zero(qiov->iov[i].iov_base, offs)) { + return false; + } + for (; offs < qiov->iov[i].iov_len; offs++) { + if (ptr[offs]) { + return false; + } + } + } + return true; +} + +void qemu_iovec_destroy(QEMUIOVector *qiov) +{ + assert(qiov->nalloc != -1); + + qemu_iovec_reset(qiov); + g_free(qiov->iov); + qiov->nalloc = 0; + qiov->iov = NULL; +} + +void qemu_iovec_reset(QEMUIOVector *qiov) +{ + assert(qiov->nalloc != -1); + + qiov->niov = 0; + qiov->size = 0; +} + +size_t qemu_iovec_to_buf(QEMUIOVector *qiov, size_t offset, + void *buf, size_t bytes) +{ + return iov_to_buf(qiov->iov, qiov->niov, offset, buf, bytes); +} + +size_t qemu_iovec_from_buf(QEMUIOVector *qiov, size_t offset, + const void *buf, size_t bytes) +{ + return iov_from_buf(qiov->iov, qiov->niov, offset, buf, bytes); +} + +size_t qemu_iovec_memset(QEMUIOVector *qiov, size_t offset, + int fillc, size_t bytes) +{ + return iov_memset(qiov->iov, qiov->niov, offset, fillc, bytes); +} + +/** + * Check that I/O vector contents are identical + * + * The IO vectors must have the same structure (same length of all parts). + * A typical usage is to compare vectors created with qemu_iovec_clone(). + * + * @a: I/O vector + * @b: I/O vector + * @ret: Offset to first mismatching byte or -1 if match + */ +ssize_t qemu_iovec_compare(QEMUIOVector *a, QEMUIOVector *b) +{ + int i; + ssize_t offset = 0; + + assert(a->niov == b->niov); + for (i = 0; i < a->niov; i++) { + size_t len = 0; + uint8_t *p = (uint8_t *)a->iov[i].iov_base; + uint8_t *q = (uint8_t *)b->iov[i].iov_base; + + assert(a->iov[i].iov_len == b->iov[i].iov_len); + while (len < a->iov[i].iov_len && *p++ == *q++) { + len++; + } + + offset += len; + + if (len != a->iov[i].iov_len) { + return offset; + } + } + return -1; +} + +typedef struct { + int src_index; + struct iovec *src_iov; + void *dest_base; +} IOVectorSortElem; + +static int sortelem_cmp_src_base(const void *a, const void *b) +{ + const IOVectorSortElem *elem_a = a; + const IOVectorSortElem *elem_b = b; + + /* Don't overflow */ + if (elem_a->src_iov->iov_base < elem_b->src_iov->iov_base) { + return -1; + } else if (elem_a->src_iov->iov_base > elem_b->src_iov->iov_base) { + return 1; + } else { + return 0; + } +} + +static int sortelem_cmp_src_index(const void *a, const void *b) +{ + const IOVectorSortElem *elem_a = a; + const IOVectorSortElem *elem_b = b; + + return elem_a->src_index - elem_b->src_index; +} + +/** + * Copy contents of I/O vector + * + * The relative relationships of overlapping iovecs are preserved. This is + * necessary to ensure identical semantics in the cloned I/O vector. + */ +void qemu_iovec_clone(QEMUIOVector *dest, const QEMUIOVector *src, void *buf) +{ + IOVectorSortElem sortelems[src->niov]; + void *last_end; + int i; + + /* Sort by source iovecs by base address */ + for (i = 0; i < src->niov; i++) { + sortelems[i].src_index = i; + sortelems[i].src_iov = &src->iov[i]; + } + qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_base); + + /* Allocate buffer space taking into account overlapping iovecs */ + last_end = NULL; + for (i = 0; i < src->niov; i++) { + struct iovec *cur = sortelems[i].src_iov; + ptrdiff_t rewind = 0; + + /* Detect overlap */ + if (last_end && last_end > cur->iov_base) { + rewind = last_end - cur->iov_base; + } + + sortelems[i].dest_base = buf - rewind; + buf += cur->iov_len - MIN(rewind, cur->iov_len); + last_end = MAX(cur->iov_base + cur->iov_len, last_end); + } + + /* Sort by source iovec index and build destination iovec */ + qsort(sortelems, src->niov, sizeof(sortelems[0]), sortelem_cmp_src_index); + for (i = 0; i < src->niov; i++) { + qemu_iovec_add(dest, sortelems[i].dest_base, src->iov[i].iov_len); + } +} + +size_t iov_discard_front(struct iovec **iov, unsigned int *iov_cnt, + size_t bytes) +{ + size_t total = 0; + struct iovec *cur; + + for (cur = *iov; *iov_cnt > 0; cur++) { + if (cur->iov_len > bytes) { + cur->iov_base += bytes; + cur->iov_len -= bytes; + total += bytes; + break; + } + + bytes -= cur->iov_len; + total += cur->iov_len; + *iov_cnt -= 1; + } + + *iov = cur; + return total; +} + +size_t iov_discard_back(struct iovec *iov, unsigned int *iov_cnt, + size_t bytes) +{ + size_t total = 0; + struct iovec *cur; + + if (*iov_cnt == 0) { + return 0; + } + + cur = iov + (*iov_cnt - 1); + + while (*iov_cnt > 0) { + if (cur->iov_len > bytes) { + cur->iov_len -= bytes; + total += bytes; + break; + } + + bytes -= cur->iov_len; + total += cur->iov_len; + cur--; + *iov_cnt -= 1; + } + + return total; +} + +void qemu_iovec_discard_back(QEMUIOVector *qiov, size_t bytes) +{ + size_t total; + unsigned int niov = qiov->niov; + + assert(qiov->size >= bytes); + total = iov_discard_back(qiov->iov, &niov, bytes); + assert(total == bytes); + + qiov->niov = niov; + qiov->size -= bytes; +} diff --git a/src/util/memfd.c b/src/util/memfd.c new file mode 100644 index 0000000..587ef5a --- /dev/null +++ b/src/util/memfd.c @@ -0,0 +1,162 @@ +/* + * memfd.c + * + * Copyright (c) 2015 Red Hat, Inc. + * + * QEMU library functions on POSIX which are shared between QEMU and + * the QEMU tools. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu/osdep.h" + +#include <glib.h> +#include <glib/gprintf.h> + +#include <sys/mman.h> + +#include "qemu/memfd.h" + +#ifdef CONFIG_MEMFD +#include <sys/memfd.h> +#elif defined CONFIG_LINUX +#include <sys/syscall.h> +#include <asm/unistd.h> + +static int qemu_memfd_create(const char *name, unsigned int flags) +{ +#ifdef __NR_memfd_create + return syscall(__NR_memfd_create, name, flags); +#else + return -1; +#endif +} +#endif + +#ifndef MFD_CLOEXEC +#define MFD_CLOEXEC 0x0001U +#endif + +#ifndef MFD_ALLOW_SEALING +#define MFD_ALLOW_SEALING 0x0002U +#endif + +/* + * This is a best-effort helper for shared memory allocation, with + * optional sealing. The helper will do his best to allocate using + * memfd with sealing, but may fallback on other methods without + * sealing. + */ +void *qemu_memfd_alloc(const char *name, size_t size, unsigned int seals, + int *fd) +{ + void *ptr; + int mfd = -1; + + *fd = -1; + +#ifdef CONFIG_LINUX + if (seals) { + mfd = qemu_memfd_create(name, MFD_ALLOW_SEALING | MFD_CLOEXEC); + } + + if (mfd == -1) { + /* some systems have memfd without sealing */ + mfd = qemu_memfd_create(name, MFD_CLOEXEC); + seals = 0; + } +#endif + + if (mfd != -1) { + if (ftruncate(mfd, size) == -1) { + perror("ftruncate"); + close(mfd); + return NULL; + } + + if (seals && fcntl(mfd, F_ADD_SEALS, seals) == -1) { + perror("fcntl"); + close(mfd); + return NULL; + } + } else { + const char *tmpdir = g_get_tmp_dir(); + gchar *fname; + + fname = g_strdup_printf("%s/memfd-XXXXXX", tmpdir); + mfd = mkstemp(fname); + unlink(fname); + g_free(fname); + + if (mfd == -1) { + perror("mkstemp"); + return NULL; + } + + if (ftruncate(mfd, size) == -1) { + perror("ftruncate"); + close(mfd); + return NULL; + } + } + + ptr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, mfd, 0); + if (ptr == MAP_FAILED) { + perror("mmap"); + close(mfd); + return NULL; + } + + *fd = mfd; + return ptr; +} + +void qemu_memfd_free(void *ptr, size_t size, int fd) +{ + if (ptr) { + munmap(ptr, size); + } + + if (fd != -1) { + close(fd); + } +} + +enum { + MEMFD_KO, + MEMFD_OK, + MEMFD_TODO +}; + +bool qemu_memfd_check(void) +{ + static int memfd_check = MEMFD_TODO; + + if (memfd_check == MEMFD_TODO) { + int fd; + void *ptr; + + ptr = qemu_memfd_alloc("test", 4096, 0, &fd); + memfd_check = ptr ? MEMFD_OK : MEMFD_KO; + qemu_memfd_free(ptr, 4096, fd); + } + + return memfd_check == MEMFD_OK; +} diff --git a/src/util/mmap-alloc.c b/src/util/mmap-alloc.c new file mode 100644 index 0000000..54793a5 --- /dev/null +++ b/src/util/mmap-alloc.c @@ -0,0 +1,110 @@ +/* + * Support for RAM backed by mmaped host memory. + * + * Copyright (c) 2015 Red Hat, Inc. + * + * Authors: + * Michael S. Tsirkin <mst@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ +#include <qemu/mmap-alloc.h> +#include <sys/types.h> +#include <sys/mman.h> +#include <assert.h> + +#define HUGETLBFS_MAGIC 0x958458f6 + +#ifdef CONFIG_LINUX +#include <sys/vfs.h> +#endif + +size_t qemu_fd_getpagesize(int fd) +{ +#ifdef CONFIG_LINUX + struct statfs fs; + int ret; + + if (fd != -1) { + do { + ret = fstatfs(fd, &fs); + } while (ret != 0 && errno == EINTR); + + if (ret == 0 && fs.f_type == HUGETLBFS_MAGIC) { + return fs.f_bsize; + } + } +#endif + + return getpagesize(); +} + +void *qemu_ram_mmap(int fd, size_t size, size_t align, bool shared) +{ + /* + * Note: this always allocates at least one extra page of virtual address + * space, even if size is already aligned. + */ + size_t total = size + align; +#if defined(__powerpc64__) && defined(__linux__) + /* On ppc64 mappings in the same segment (aka slice) must share the same + * page size. Since we will be re-allocating part of this segment + * from the supplied fd, we should make sure to use the same page size, + * unless we are using the system page size, in which case anonymous memory + * is OK. Use align as a hint for the page size. + * In this case, set MAP_NORESERVE to avoid allocating backing store memory. + */ + int anonfd = fd == -1 || qemu_fd_getpagesize(fd) == getpagesize() ? -1 : fd; + int flags = anonfd == -1 ? MAP_ANONYMOUS : MAP_NORESERVE; + void *ptr = mmap(0, total, PROT_NONE, flags | MAP_PRIVATE, anonfd, 0); +#else + void *ptr = mmap(0, total, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); +#endif + size_t offset = QEMU_ALIGN_UP((uintptr_t)ptr, align) - (uintptr_t)ptr; + void *ptr1; + + if (ptr == MAP_FAILED) { + return MAP_FAILED; + } + + /* Make sure align is a power of 2 */ + assert(!(align & (align - 1))); + /* Always align to host page size */ + assert(align >= getpagesize()); + + ptr1 = mmap(ptr + offset, size, PROT_READ | PROT_WRITE, + MAP_FIXED | + (fd == -1 ? MAP_ANONYMOUS : 0) | + (shared ? MAP_SHARED : MAP_PRIVATE), + fd, 0); + if (ptr1 == MAP_FAILED) { + munmap(ptr, total); + return MAP_FAILED; + } + + ptr += offset; + total -= offset; + + if (offset > 0) { + munmap(ptr - offset, offset); + } + + /* + * Leave a single PROT_NONE page allocated after the RAM block, to serve as + * a guard page guarding against potential buffer overflows. + */ + if (total > size + getpagesize()) { + munmap(ptr + size + getpagesize(), total - size - getpagesize()); + } + + return ptr; +} + +void qemu_ram_munmap(void *ptr, size_t size) +{ + if (ptr) { + /* Unmap both the RAM block and the guard page */ + munmap(ptr, size + getpagesize()); + } +} diff --git a/src/util/module.c b/src/util/module.c new file mode 100644 index 0000000..4bd4a94 --- /dev/null +++ b/src/util/module.c @@ -0,0 +1,219 @@ +/* + * QEMU Module Infrastructure + * + * Copyright IBM, Corp. 2009 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#include <stdlib.h> +#include "qemu-common.h" +#ifdef CONFIG_MODULES +#include <gmodule.h> +#endif +#include "qemu/queue.h" +#include "qemu/module.h" + +typedef struct ModuleEntry +{ + void (*init)(void); + QTAILQ_ENTRY(ModuleEntry) node; + module_init_type type; +} ModuleEntry; + +typedef QTAILQ_HEAD(, ModuleEntry) ModuleTypeList; + +static ModuleTypeList init_type_list[MODULE_INIT_MAX]; + +static ModuleTypeList dso_init_list; + +static void init_lists(void) +{ + static int inited; + int i; + + if (inited) { + return; + } + + for (i = 0; i < MODULE_INIT_MAX; i++) { + QTAILQ_INIT(&init_type_list[i]); + } + + QTAILQ_INIT(&dso_init_list); + + inited = 1; +} + + +static ModuleTypeList *find_type(module_init_type type) +{ + ModuleTypeList *l; + + init_lists(); + + l = &init_type_list[type]; + + return l; +} + +void register_module_init(void (*fn)(void), module_init_type type) +{ + ModuleEntry *e; + ModuleTypeList *l; + + e = g_malloc0(sizeof(*e)); + e->init = fn; + e->type = type; + + l = find_type(type); + + QTAILQ_INSERT_TAIL(l, e, node); +} + +void register_dso_module_init(void (*fn)(void), module_init_type type) +{ + ModuleEntry *e; + + init_lists(); + + e = g_malloc0(sizeof(*e)); + e->init = fn; + e->type = type; + + QTAILQ_INSERT_TAIL(&dso_init_list, e, node); +} + +static void module_load(module_init_type type); + +void module_call_init(module_init_type type) +{ + ModuleTypeList *l; + ModuleEntry *e; + + module_load(type); + l = find_type(type); + + QTAILQ_FOREACH(e, l, node) { + e->init(); + } +} + +#ifdef CONFIG_MODULES +static int module_load_file(const char *fname) +{ + GModule *g_module; + void (*sym)(void); + const char *dsosuf = HOST_DSOSUF; + int len = strlen(fname); + int suf_len = strlen(dsosuf); + ModuleEntry *e, *next; + int ret; + + if (len <= suf_len || strcmp(&fname[len - suf_len], dsosuf)) { + /* wrong suffix */ + ret = -EINVAL; + goto out; + } + if (access(fname, F_OK)) { + ret = -ENOENT; + goto out; + } + + assert(QTAILQ_EMPTY(&dso_init_list)); + + g_module = g_module_open(fname, G_MODULE_BIND_LAZY | G_MODULE_BIND_LOCAL); + if (!g_module) { + fprintf(stderr, "Failed to open module: %s\n", + g_module_error()); + ret = -EINVAL; + goto out; + } + if (!g_module_symbol(g_module, DSO_STAMP_FUN_STR, (gpointer *)&sym)) { + fprintf(stderr, "Failed to initialize module: %s\n", + fname); + /* Print some info if this is a QEMU module (but from different build), + * this will make debugging user problems easier. */ + if (g_module_symbol(g_module, "qemu_module_dummy", (gpointer *)&sym)) { + fprintf(stderr, + "Note: only modules from the same build can be loaded.\n"); + } + g_module_close(g_module); + ret = -EINVAL; + } else { + QTAILQ_FOREACH(e, &dso_init_list, node) { + register_module_init(e->init, e->type); + } + ret = 0; + } + + QTAILQ_FOREACH_SAFE(e, &dso_init_list, node, next) { + QTAILQ_REMOVE(&dso_init_list, e, node); + g_free(e); + } +out: + return ret; +} +#endif + +static void module_load(module_init_type type) +{ +#ifdef CONFIG_MODULES + char *fname = NULL; + const char **mp; + static const char *block_modules[] = { + CONFIG_BLOCK_MODULES + }; + char *exec_dir; + char *dirs[3]; + int i = 0; + int ret; + + if (!g_module_supported()) { + fprintf(stderr, "Module is not supported by system.\n"); + return; + } + + switch (type) { + case MODULE_INIT_BLOCK: + mp = block_modules; + break; + default: + /* no other types have dynamic modules for now*/ + return; + } + + exec_dir = qemu_get_exec_dir(); + dirs[i++] = g_strdup_printf("%s", CONFIG_QEMU_MODDIR); + dirs[i++] = g_strdup_printf("%s/..", exec_dir ? : ""); + dirs[i++] = g_strdup_printf("%s", exec_dir ? : ""); + assert(i == ARRAY_SIZE(dirs)); + g_free(exec_dir); + exec_dir = NULL; + + for ( ; *mp; mp++) { + for (i = 0; i < ARRAY_SIZE(dirs); i++) { + fname = g_strdup_printf("%s/%s%s", dirs[i], *mp, HOST_DSOSUF); + ret = module_load_file(fname); + g_free(fname); + fname = NULL; + /* Try loading until loaded a module file */ + if (!ret) { + break; + } + } + } + + for (i = 0; i < ARRAY_SIZE(dirs); i++) { + g_free(dirs[i]); + } + +#endif +} diff --git a/src/util/notify.c b/src/util/notify.c new file mode 100644 index 0000000..f215dfc --- /dev/null +++ b/src/util/notify.c @@ -0,0 +1,71 @@ +/* + * Notifier lists + * + * Copyright IBM, Corp. 2010 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ + +#include "qemu-common.h" +#include "qemu/notify.h" + +void notifier_list_init(NotifierList *list) +{ + QLIST_INIT(&list->notifiers); +} + +void notifier_list_add(NotifierList *list, Notifier *notifier) +{ + QLIST_INSERT_HEAD(&list->notifiers, notifier, node); +} + +void notifier_remove(Notifier *notifier) +{ + QLIST_REMOVE(notifier, node); +} + +void notifier_list_notify(NotifierList *list, void *data) +{ + Notifier *notifier, *next; + + QLIST_FOREACH_SAFE(notifier, &list->notifiers, node, next) { + notifier->notify(notifier, data); + } +} + +void notifier_with_return_list_init(NotifierWithReturnList *list) +{ + QLIST_INIT(&list->notifiers); +} + +void notifier_with_return_list_add(NotifierWithReturnList *list, + NotifierWithReturn *notifier) +{ + QLIST_INSERT_HEAD(&list->notifiers, notifier, node); +} + +void notifier_with_return_remove(NotifierWithReturn *notifier) +{ + QLIST_REMOVE(notifier, node); +} + +int notifier_with_return_list_notify(NotifierWithReturnList *list, void *data) +{ + NotifierWithReturn *notifier, *next; + int ret = 0; + + QLIST_FOREACH_SAFE(notifier, &list->notifiers, node, next) { + ret = notifier->notify(notifier, data); + if (ret != 0) { + break; + } + } + return ret; +} diff --git a/src/util/osdep.c b/src/util/osdep.c new file mode 100644 index 0000000..534b511 --- /dev/null +++ b/src/util/osdep.c @@ -0,0 +1,437 @@ +/* + * QEMU low level functions + * + * Copyright (c) 2003 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include <stdlib.h> +#include <stdio.h> +#include <stdarg.h> +#include <stdbool.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <fcntl.h> + +/* Needed early for CONFIG_BSD etc. */ +#include "config-host.h" + +#if defined(CONFIG_MADVISE) || defined(CONFIG_POSIX_MADVISE) +#include <sys/mman.h> +#endif + +#ifdef CONFIG_SOLARIS +#include <sys/types.h> +#include <sys/statvfs.h> +/* See MySQL bug #7156 (http://bugs.mysql.com/bug.php?id=7156) for + discussion about Solaris header problems */ +extern int madvise(caddr_t, size_t, int); +#endif + +#include "qemu-common.h" +#include "qemu/sockets.h" +#include "qemu/error-report.h" +#include "monitor/monitor.h" + +static bool fips_enabled = false; + +/* Starting on QEMU 2.5, qemu_hw_version() returns "2.5+" by default + * instead of QEMU_VERSION, so setting hw_version on MachineClass + * is no longer mandatory. + * + * Do NOT change this string, or it will break compatibility on all + * machine classes that don't set hw_version. + */ +static const char *hw_version = "2.5+"; + +int socket_set_cork(int fd, int v) +{ +#if defined(SOL_TCP) && defined(TCP_CORK) + return qemu_setsockopt(fd, SOL_TCP, TCP_CORK, &v, sizeof(v)); +#else + return 0; +#endif +} + +int socket_set_nodelay(int fd) +{ + int v = 1; + return qemu_setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &v, sizeof(v)); +} + +int qemu_madvise(void *addr, size_t len, int advice) +{ + if (advice == QEMU_MADV_INVALID) { + errno = EINVAL; + return -1; + } +#if defined(CONFIG_MADVISE) + return madvise(addr, len, advice); +#elif defined(CONFIG_POSIX_MADVISE) + return posix_madvise(addr, len, advice); +#else + errno = EINVAL; + return -1; +#endif +} + +#ifndef _WIN32 +/* + * Dups an fd and sets the flags + */ +static int qemu_dup_flags(int fd, int flags) +{ + int ret; + int serrno; + int dup_flags; + +#ifdef F_DUPFD_CLOEXEC + ret = fcntl(fd, F_DUPFD_CLOEXEC, 0); +#else + ret = dup(fd); + if (ret != -1) { + qemu_set_cloexec(ret); + } +#endif + if (ret == -1) { + goto fail; + } + + dup_flags = fcntl(ret, F_GETFL); + if (dup_flags == -1) { + goto fail; + } + + if ((flags & O_SYNC) != (dup_flags & O_SYNC)) { + errno = EINVAL; + goto fail; + } + + /* Set/unset flags that we can with fcntl */ + if (fcntl(ret, F_SETFL, flags) == -1) { + goto fail; + } + + /* Truncate the file in the cases that open() would truncate it */ + if (flags & O_TRUNC || + ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))) { + if (ftruncate(ret, 0) == -1) { + goto fail; + } + } + + return ret; + +fail: + serrno = errno; + if (ret != -1) { + close(ret); + } + errno = serrno; + return -1; +} + +static int qemu_parse_fdset(const char *param) +{ + return qemu_parse_fd(param); +} +#endif + +/* + * Opens a file with FD_CLOEXEC set + */ +int qemu_open(const char *name, int flags, ...) +{ + int ret; + int mode = 0; + +#ifndef _WIN32 + const char *fdset_id_str; + + /* Attempt dup of fd from fd set */ + if (strstart(name, "/dev/fdset/", &fdset_id_str)) { + int64_t fdset_id; + int fd, dupfd; + + fdset_id = qemu_parse_fdset(fdset_id_str); + if (fdset_id == -1) { + errno = EINVAL; + return -1; + } + + fd = monitor_fdset_get_fd(fdset_id, flags); + if (fd == -1) { + return -1; + } + + dupfd = qemu_dup_flags(fd, flags); + if (dupfd == -1) { + return -1; + } + + ret = monitor_fdset_dup_fd_add(fdset_id, dupfd); + if (ret == -1) { + close(dupfd); + errno = EINVAL; + return -1; + } + + return dupfd; + } +#endif + + if (flags & O_CREAT) { + va_list ap; + + va_start(ap, flags); + mode = va_arg(ap, int); + va_end(ap); + } + +#ifdef O_CLOEXEC + ret = open(name, flags | O_CLOEXEC, mode); +#else + ret = open(name, flags, mode); + if (ret >= 0) { + qemu_set_cloexec(ret); + } +#endif + +#ifdef O_DIRECT + if (ret == -1 && errno == EINVAL && (flags & O_DIRECT)) { + error_report("file system may not support O_DIRECT"); + errno = EINVAL; /* in case it was clobbered */ + } +#endif /* O_DIRECT */ + + return ret; +} + +int qemu_close(int fd) +{ + int64_t fdset_id; + + /* Close fd that was dup'd from an fdset */ + fdset_id = monitor_fdset_dup_fd_find(fd); + if (fdset_id != -1) { + int ret; + + ret = close(fd); + if (ret == 0) { + monitor_fdset_dup_fd_remove(fd); + } + + return ret; + } + + return close(fd); +} + +/* + * A variant of write(2) which handles partial write. + * + * Return the number of bytes transferred. + * Set errno if fewer than `count' bytes are written. + * + * This function don't work with non-blocking fd's. + * Any of the possibilities with non-bloking fd's is bad: + * - return a short write (then name is wrong) + * - busy wait adding (errno == EAGAIN) to the loop + */ +ssize_t qemu_write_full(int fd, const void *buf, size_t count) +{ + ssize_t ret = 0; + ssize_t total = 0; + + while (count) { + ret = write(fd, buf, count); + if (ret < 0) { + if (errno == EINTR) + continue; + break; + } + + count -= ret; + buf += ret; + total += ret; + } + + return total; +} + +/* + * Opens a socket with FD_CLOEXEC set + */ +int qemu_socket(int domain, int type, int protocol) +{ + int ret; + +#ifdef SOCK_CLOEXEC + ret = socket(domain, type | SOCK_CLOEXEC, protocol); + if (ret != -1 || errno != EINVAL) { + return ret; + } +#endif + ret = socket(domain, type, protocol); + if (ret >= 0) { + qemu_set_cloexec(ret); + } + + return ret; +} + +/* + * Accept a connection and set FD_CLOEXEC + */ +int qemu_accept(int s, struct sockaddr *addr, socklen_t *addrlen) +{ + int ret; + +#ifdef CONFIG_ACCEPT4 + ret = accept4(s, addr, addrlen, SOCK_CLOEXEC); + if (ret != -1 || errno != ENOSYS) { + return ret; + } +#endif + ret = accept(s, addr, addrlen); + if (ret >= 0) { + qemu_set_cloexec(ret); + } + + return ret; +} + +void qemu_set_hw_version(const char *version) +{ + hw_version = version; +} + +const char *qemu_hw_version(void) +{ + return hw_version; +} + +void fips_set_state(bool requested) +{ +#ifdef __linux__ + if (requested) { + FILE *fds = fopen("/proc/sys/crypto/fips_enabled", "r"); + if (fds != NULL) { + fips_enabled = (fgetc(fds) == '1'); + fclose(fds); + } + } +#else + fips_enabled = false; +#endif /* __linux__ */ + +#ifdef _FIPS_DEBUG + fprintf(stderr, "FIPS mode %s (requested %s)\n", + (fips_enabled ? "enabled" : "disabled"), + (requested ? "enabled" : "disabled")); +#endif +} + +bool fips_get_state(void) +{ + return fips_enabled; +} + +#ifdef _WIN32 +static void socket_cleanup(void) +{ + WSACleanup(); +} +#endif + +int socket_init(void) +{ +#ifdef _WIN32 + WSADATA Data; + int ret, err; + + ret = WSAStartup(MAKEWORD(2, 2), &Data); + if (ret != 0) { + err = WSAGetLastError(); + fprintf(stderr, "WSAStartup: %d\n", err); + return -1; + } + atexit(socket_cleanup); +#endif + return 0; +} + +#if !GLIB_CHECK_VERSION(2, 31, 0) +/* Ensure that glib is running in multi-threaded mode + * Old versions of glib require explicit initialization. Failure to do + * this results in the single-threaded code paths being taken inside + * glib. For example, the g_slice allocator will not be thread-safe + * and cause crashes. + */ +static void __attribute__((constructor)) thread_init(void) +{ + if (!g_thread_supported()) { + g_thread_init(NULL); + } +} +#endif + +#ifndef CONFIG_IOVEC +/* helper function for iov_send_recv() */ +static ssize_t +readv_writev(int fd, const struct iovec *iov, int iov_cnt, bool do_write) +{ + unsigned i = 0; + ssize_t ret = 0; + while (i < iov_cnt) { + ssize_t r = do_write + ? write(fd, iov[i].iov_base, iov[i].iov_len) + : read(fd, iov[i].iov_base, iov[i].iov_len); + if (r > 0) { + ret += r; + } else if (!r) { + break; + } else if (errno == EINTR) { + continue; + } else { + /* else it is some "other" error, + * only return if there was no data processed. */ + if (ret == 0) { + ret = -1; + } + break; + } + i++; + } + return ret; +} + +ssize_t +readv(int fd, const struct iovec *iov, int iov_cnt) +{ + return readv_writev(fd, iov, iov_cnt, false); +} + +ssize_t +writev(int fd, const struct iovec *iov, int iov_cnt) +{ + return readv_writev(fd, iov, iov_cnt, true); +} +#endif diff --git a/src/util/oslib-posix.c b/src/util/oslib-posix.c new file mode 100644 index 0000000..d25f671 --- /dev/null +++ b/src/util/oslib-posix.c @@ -0,0 +1,521 @@ +/* + * os-posix-lib.c + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2010 Red Hat, Inc. + * + * QEMU library functions on POSIX which are shared between QEMU and + * the QEMU tools. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* The following block of code temporarily renames the daemon() function so the + compiler does not see the warning associated with it in stdlib.h on OSX */ +#ifdef __APPLE__ +#define daemon qemu_fake_daemon_function +#include <stdlib.h> +#undef daemon +extern int daemon(int, int); +#endif + +#if defined(__linux__) && (defined(__x86_64__) || defined(__arm__)) + /* Use 2 MiB alignment so transparent hugepages can be used by KVM. + Valgrind does not support alignments larger than 1 MiB, + therefore we need special code which handles running on Valgrind. */ +# define QEMU_VMALLOC_ALIGN (512 * 4096) +#elif defined(__linux__) && defined(__s390x__) + /* Use 1 MiB (segment size) alignment so gmap can be used by KVM. */ +# define QEMU_VMALLOC_ALIGN (256 * 4096) +#else +# define QEMU_VMALLOC_ALIGN getpagesize() +#endif + +#include <termios.h> +#include <unistd.h> +#include <termios.h> + +#include <glib/gprintf.h> + +#include "config-host.h" +#include "sysemu/sysemu.h" +#include "trace.h" +#include "qemu/sockets.h" +#include <sys/mman.h> +#include <libgen.h> +#include <setjmp.h> +#include <sys/signal.h> + +#ifdef CONFIG_LINUX +#include <sys/syscall.h> +#endif + +#ifdef __FreeBSD__ +#include <sys/sysctl.h> +#endif + +#include <qemu/mmap-alloc.h> + +int qemu_get_thread_id(void) +{ +#if defined(__linux__) + return syscall(SYS_gettid); +#else + return getpid(); +#endif +} + +int qemu_daemon(int nochdir, int noclose) +{ + return daemon(nochdir, noclose); +} + +void *qemu_oom_check(void *ptr) +{ + if (ptr == NULL) { + fprintf(stderr, "Failed to allocate memory: %s\n", strerror(errno)); + abort(); + } + return ptr; +} + +void *qemu_try_memalign(size_t alignment, size_t size) +{ + void *ptr; + + if (alignment < sizeof(void*)) { + alignment = sizeof(void*); + } + +#if defined(_POSIX_C_SOURCE) && !defined(__sun__) + int ret; + ret = posix_memalign(&ptr, alignment, size); + if (ret != 0) { + errno = ret; + ptr = NULL; + } +#elif defined(CONFIG_BSD) + ptr = valloc(size); +#else + ptr = memalign(alignment, size); +#endif + trace_qemu_memalign(alignment, size, ptr); + return ptr; +} + +void *qemu_memalign(size_t alignment, size_t size) +{ + return qemu_oom_check(qemu_try_memalign(alignment, size)); +} + +/* alloc shared memory pages */ +void *qemu_anon_ram_alloc(size_t size, uint64_t *alignment) +{ + size_t align = QEMU_VMALLOC_ALIGN; + void *ptr = qemu_ram_mmap(-1, size, align, false); + + if (ptr == MAP_FAILED) { + return NULL; + } + + if (alignment) { + *alignment = align; + } + + trace_qemu_anon_ram_alloc(size, ptr); + return ptr; +} + +void qemu_vfree(void *ptr) +{ + trace_qemu_vfree(ptr); + free(ptr); +} + +void qemu_anon_ram_free(void *ptr, size_t size) +{ + trace_qemu_anon_ram_free(ptr, size); + qemu_ram_munmap(ptr, size); +} + +void qemu_set_block(int fd) +{ + int f; + f = fcntl(fd, F_GETFL); + fcntl(fd, F_SETFL, f & ~O_NONBLOCK); +} + +void qemu_set_nonblock(int fd) +{ + int f; + f = fcntl(fd, F_GETFL); + fcntl(fd, F_SETFL, f | O_NONBLOCK); +} + +int socket_set_fast_reuse(int fd) +{ + int val = 1, ret; + + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, + (const char *)&val, sizeof(val)); + + assert(ret == 0); + + return ret; +} + +void qemu_set_cloexec(int fd) +{ + int f; + f = fcntl(fd, F_GETFD); + fcntl(fd, F_SETFD, f | FD_CLOEXEC); +} + +/* + * Creates a pipe with FD_CLOEXEC set on both file descriptors + */ +int qemu_pipe(int pipefd[2]) +{ + int ret; + +#ifdef CONFIG_PIPE2 + ret = pipe2(pipefd, O_CLOEXEC); + if (ret != -1 || errno != ENOSYS) { + return ret; + } +#endif + ret = pipe(pipefd); + if (ret == 0) { + qemu_set_cloexec(pipefd[0]); + qemu_set_cloexec(pipefd[1]); + } + + return ret; +} + +int qemu_utimens(const char *path, const struct timespec *times) +{ + struct timeval tv[2], tv_now; + struct stat st; + int i; +#ifdef CONFIG_UTIMENSAT + int ret; + + ret = utimensat(AT_FDCWD, path, times, AT_SYMLINK_NOFOLLOW); + if (ret != -1 || errno != ENOSYS) { + return ret; + } +#endif + /* Fallback: use utimes() instead of utimensat() */ + + /* happy if special cases */ + if (times[0].tv_nsec == UTIME_OMIT && times[1].tv_nsec == UTIME_OMIT) { + return 0; + } + if (times[0].tv_nsec == UTIME_NOW && times[1].tv_nsec == UTIME_NOW) { + return utimes(path, NULL); + } + + /* prepare for hard cases */ + if (times[0].tv_nsec == UTIME_NOW || times[1].tv_nsec == UTIME_NOW) { + gettimeofday(&tv_now, NULL); + } + if (times[0].tv_nsec == UTIME_OMIT || times[1].tv_nsec == UTIME_OMIT) { + stat(path, &st); + } + + for (i = 0; i < 2; i++) { + if (times[i].tv_nsec == UTIME_NOW) { + tv[i].tv_sec = tv_now.tv_sec; + tv[i].tv_usec = tv_now.tv_usec; + } else if (times[i].tv_nsec == UTIME_OMIT) { + tv[i].tv_sec = (i == 0) ? st.st_atime : st.st_mtime; + tv[i].tv_usec = 0; + } else { + tv[i].tv_sec = times[i].tv_sec; + tv[i].tv_usec = times[i].tv_nsec / 1000; + } + } + + return utimes(path, &tv[0]); +} + +char * +qemu_get_local_state_pathname(const char *relative_pathname) +{ + return g_strdup_printf("%s/%s", CONFIG_QEMU_LOCALSTATEDIR, + relative_pathname); +} + +void qemu_set_tty_echo(int fd, bool echo) +{ + struct termios tty; + + tcgetattr(fd, &tty); + + if (echo) { + tty.c_lflag |= ECHO | ECHONL | ICANON | IEXTEN; + } else { + tty.c_lflag &= ~(ECHO | ECHONL | ICANON | IEXTEN); + } + + tcsetattr(fd, TCSANOW, &tty); +} + +static char exec_dir[PATH_MAX]; + +void qemu_init_exec_dir(const char *argv0) +{ + char *dir; + char *p = NULL; + char buf[PATH_MAX]; + + assert(!exec_dir[0]); + +#if defined(__linux__) + { + int len; + len = readlink("/proc/self/exe", buf, sizeof(buf) - 1); + if (len > 0) { + buf[len] = 0; + p = buf; + } + } +#elif defined(__FreeBSD__) + { + static int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1}; + size_t len = sizeof(buf) - 1; + + *buf = '\0'; + if (!sysctl(mib, ARRAY_SIZE(mib), buf, &len, NULL, 0) && + *buf) { + buf[sizeof(buf) - 1] = '\0'; + p = buf; + } + } +#endif + /* If we don't have any way of figuring out the actual executable + location then try argv[0]. */ + if (!p) { + if (!argv0) { + return; + } + p = realpath(argv0, buf); + if (!p) { + return; + } + } + dir = dirname(p); + + pstrcpy(exec_dir, sizeof(exec_dir), dir); +} + +char *qemu_get_exec_dir(void) +{ + return g_strdup(exec_dir); +} + +static sigjmp_buf sigjump; + +static void sigbus_handler(int signal) +{ + siglongjmp(sigjump, 1); +} + +void os_mem_prealloc(int fd, char *area, size_t memory) +{ + int ret; + struct sigaction act, oldact; + sigset_t set, oldset; + + memset(&act, 0, sizeof(act)); + act.sa_handler = &sigbus_handler; + act.sa_flags = 0; + + ret = sigaction(SIGBUS, &act, &oldact); + if (ret) { + perror("os_mem_prealloc: failed to install signal handler"); + exit(1); + } + + /* unblock SIGBUS */ + sigemptyset(&set); + sigaddset(&set, SIGBUS); + pthread_sigmask(SIG_UNBLOCK, &set, &oldset); + + if (sigsetjmp(sigjump, 1)) { + fprintf(stderr, "os_mem_prealloc: Insufficient free host memory " + "pages available to allocate guest RAM\n"); + exit(1); + } else { + int i; + size_t hpagesize = qemu_fd_getpagesize(fd); + size_t numpages = DIV_ROUND_UP(memory, hpagesize); + + /* MAP_POPULATE silently ignores failures */ + for (i = 0; i < numpages; i++) { + memset(area + (hpagesize * i), 0, 1); + } + + ret = sigaction(SIGBUS, &oldact, NULL); + if (ret) { + perror("os_mem_prealloc: failed to reinstall signal handler"); + exit(1); + } + + pthread_sigmask(SIG_SETMASK, &oldset, NULL); + } +} + + +static struct termios oldtty; + +static void term_exit(void) +{ + tcsetattr(0, TCSANOW, &oldtty); +} + +static void term_init(void) +{ + struct termios tty; + + tcgetattr(0, &tty); + oldtty = tty; + + tty.c_iflag &= ~(IGNBRK|BRKINT|PARMRK|ISTRIP + |INLCR|IGNCR|ICRNL|IXON); + tty.c_oflag |= OPOST; + tty.c_lflag &= ~(ECHO|ECHONL|ICANON|IEXTEN); + tty.c_cflag &= ~(CSIZE|PARENB); + tty.c_cflag |= CS8; + tty.c_cc[VMIN] = 1; + tty.c_cc[VTIME] = 0; + + tcsetattr(0, TCSANOW, &tty); + + atexit(term_exit); +} + +int qemu_read_password(char *buf, int buf_size) +{ + uint8_t ch; + int i, ret; + + printf("password: "); + fflush(stdout); + term_init(); + i = 0; + for (;;) { + ret = read(0, &ch, 1); + if (ret == -1) { + if (errno == EAGAIN || errno == EINTR) { + continue; + } else { + break; + } + } else if (ret == 0) { + ret = -1; + break; + } else { + if (ch == '\r' || + ch == '\n') { + ret = 0; + break; + } + if (i < (buf_size - 1)) { + buf[i++] = ch; + } + } + } + term_exit(); + buf[i] = '\0'; + printf("\n"); + return ret; +} + + +pid_t qemu_fork(Error **errp) +{ + sigset_t oldmask, newmask; + struct sigaction sig_action; + int saved_errno; + pid_t pid; + + /* + * Need to block signals now, so that child process can safely + * kill off caller's signal handlers without a race. + */ + sigfillset(&newmask); + if (pthread_sigmask(SIG_SETMASK, &newmask, &oldmask) != 0) { + error_setg_errno(errp, errno, + "cannot block signals"); + return -1; + } + + pid = fork(); + saved_errno = errno; + + if (pid < 0) { + /* attempt to restore signal mask, but ignore failure, to + * avoid obscuring the fork failure */ + (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL); + error_setg_errno(errp, saved_errno, + "cannot fork child process"); + errno = saved_errno; + return -1; + } else if (pid) { + /* parent process */ + + /* Restore our original signal mask now that the child is + * safely running. Only documented failures are EFAULT (not + * possible, since we are using just-grabbed mask) or EINVAL + * (not possible, since we are using correct arguments). */ + (void)pthread_sigmask(SIG_SETMASK, &oldmask, NULL); + } else { + /* child process */ + size_t i; + + /* Clear out all signal handlers from parent so nothing + * unexpected can happen in our child once we unblock + * signals */ + sig_action.sa_handler = SIG_DFL; + sig_action.sa_flags = 0; + sigemptyset(&sig_action.sa_mask); + + for (i = 1; i < NSIG; i++) { + /* Only possible errors are EFAULT or EINVAL The former + * won't happen, the latter we expect, so no need to check + * return value */ + (void)sigaction(i, &sig_action, NULL); + } + + /* Unmask all signals in child, since we've no idea what the + * caller's done with their signal mask and don't want to + * propagate that to children */ + sigemptyset(&newmask); + if (pthread_sigmask(SIG_SETMASK, &newmask, NULL) != 0) { + Error *local_err = NULL; + error_setg_errno(&local_err, errno, + "cannot unblock signals"); + error_report_err(local_err); + _exit(1); + } + } + return pid; +} diff --git a/src/util/oslib-win32.c b/src/util/oslib-win32.c new file mode 100644 index 0000000..6a47019 --- /dev/null +++ b/src/util/oslib-win32.c @@ -0,0 +1,507 @@ +/* + * os-win32.c + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2010 Red Hat, Inc. + * + * QEMU library functions for win32 which are shared between QEMU and + * the QEMU tools. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + * The implementation of g_poll (functions poll_rest, g_poll) at the end of + * this file are based on code from GNOME glib-2 and use a different license, + * see the license comment there. + */ +#include <windows.h> +#include <glib.h> +#include <stdlib.h> +#include "config-host.h" +#include "sysemu/sysemu.h" +#include "qemu/main-loop.h" +#include "trace.h" +#include "qemu/sockets.h" + +/* this must come after including "trace.h" */ +#include <shlobj.h> + +void *qemu_oom_check(void *ptr) +{ + if (ptr == NULL) { + fprintf(stderr, "Failed to allocate memory: %lu\n", GetLastError()); + abort(); + } + return ptr; +} + +void *qemu_try_memalign(size_t alignment, size_t size) +{ + void *ptr; + + if (!size) { + abort(); + } + ptr = VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE); + trace_qemu_memalign(alignment, size, ptr); + return ptr; +} + +void *qemu_memalign(size_t alignment, size_t size) +{ + return qemu_oom_check(qemu_try_memalign(alignment, size)); +} + +void *qemu_anon_ram_alloc(size_t size, uint64_t *align) +{ + void *ptr; + + /* FIXME: this is not exactly optimal solution since VirtualAlloc + has 64Kb granularity, but at least it guarantees us that the + memory is page aligned. */ + ptr = VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE); + trace_qemu_anon_ram_alloc(size, ptr); + return ptr; +} + +void qemu_vfree(void *ptr) +{ + trace_qemu_vfree(ptr); + if (ptr) { + VirtualFree(ptr, 0, MEM_RELEASE); + } +} + +void qemu_anon_ram_free(void *ptr, size_t size) +{ + trace_qemu_anon_ram_free(ptr, size); + if (ptr) { + VirtualFree(ptr, 0, MEM_RELEASE); + } +} + +#ifndef CONFIG_LOCALTIME_R +/* FIXME: add proper locking */ +struct tm *gmtime_r(const time_t *timep, struct tm *result) +{ + struct tm *p = gmtime(timep); + memset(result, 0, sizeof(*result)); + if (p) { + *result = *p; + p = result; + } + return p; +} + +/* FIXME: add proper locking */ +struct tm *localtime_r(const time_t *timep, struct tm *result) +{ + struct tm *p = localtime(timep); + memset(result, 0, sizeof(*result)); + if (p) { + *result = *p; + p = result; + } + return p; +} +#endif /* CONFIG_LOCALTIME_R */ + +void qemu_set_block(int fd) +{ + unsigned long opt = 0; + WSAEventSelect(fd, NULL, 0); + ioctlsocket(fd, FIONBIO, &opt); +} + +void qemu_set_nonblock(int fd) +{ + unsigned long opt = 1; + ioctlsocket(fd, FIONBIO, &opt); + qemu_fd_register(fd); +} + +int socket_set_fast_reuse(int fd) +{ + /* Enabling the reuse of an endpoint that was used by a socket still in + * TIME_WAIT state is usually performed by setting SO_REUSEADDR. On Windows + * fast reuse is the default and SO_REUSEADDR does strange things. So we + * don't have to do anything here. More info can be found at: + * http://msdn.microsoft.com/en-us/library/windows/desktop/ms740621.aspx */ + return 0; +} + +int inet_aton(const char *cp, struct in_addr *ia) +{ + uint32_t addr = inet_addr(cp); + if (addr == 0xffffffff) { + return 0; + } + ia->s_addr = addr; + return 1; +} + +void qemu_set_cloexec(int fd) +{ +} + +/* Offset between 1/1/1601 and 1/1/1970 in 100 nanosec units */ +#define _W32_FT_OFFSET (116444736000000000ULL) + +int qemu_gettimeofday(qemu_timeval *tp) +{ + union { + unsigned long long ns100; /*time since 1 Jan 1601 in 100ns units */ + FILETIME ft; + } _now; + + if(tp) { + GetSystemTimeAsFileTime (&_now.ft); + tp->tv_usec=(long)((_now.ns100 / 10ULL) % 1000000ULL ); + tp->tv_sec= (long)((_now.ns100 - _W32_FT_OFFSET) / 10000000ULL); + } + /* Always return 0 as per Open Group Base Specifications Issue 6. + Do not set errno on error. */ + return 0; +} + +int qemu_get_thread_id(void) +{ + return GetCurrentThreadId(); +} + +char * +qemu_get_local_state_pathname(const char *relative_pathname) +{ + HRESULT result; + char base_path[MAX_PATH+1] = ""; + + result = SHGetFolderPath(NULL, CSIDL_COMMON_APPDATA, NULL, + /* SHGFP_TYPE_CURRENT */ 0, base_path); + if (result != S_OK) { + /* misconfigured environment */ + g_critical("CSIDL_COMMON_APPDATA unavailable: %ld", (long)result); + abort(); + } + return g_strdup_printf("%s" G_DIR_SEPARATOR_S "%s", base_path, + relative_pathname); +} + +void qemu_set_tty_echo(int fd, bool echo) +{ + HANDLE handle = (HANDLE)_get_osfhandle(fd); + DWORD dwMode = 0; + + if (handle == INVALID_HANDLE_VALUE) { + return; + } + + GetConsoleMode(handle, &dwMode); + + if (echo) { + SetConsoleMode(handle, dwMode | ENABLE_ECHO_INPUT | ENABLE_LINE_INPUT); + } else { + SetConsoleMode(handle, + dwMode & ~(ENABLE_ECHO_INPUT | ENABLE_LINE_INPUT)); + } +} + +static char exec_dir[PATH_MAX]; + +void qemu_init_exec_dir(const char *argv0) +{ + + char *p; + char buf[MAX_PATH]; + DWORD len; + + len = GetModuleFileName(NULL, buf, sizeof(buf) - 1); + if (len == 0) { + return; + } + + buf[len] = 0; + p = buf + len - 1; + while (p != buf && *p != '\\') { + p--; + } + *p = 0; + if (access(buf, R_OK) == 0) { + pstrcpy(exec_dir, sizeof(exec_dir), buf); + } +} + +char *qemu_get_exec_dir(void) +{ + return g_strdup(exec_dir); +} + +/* + * The original implementation of g_poll from glib has a problem on Windows + * when using timeouts < 10 ms. + * + * Whenever g_poll is called with timeout < 10 ms, it does a quick poll instead + * of wait. This causes significant performance degradation of QEMU. + * + * The following code is a copy of the original code from glib/gpoll.c + * (glib commit 20f4d1820b8d4d0fc4447188e33efffd6d4a88d8 from 2014-02-19). + * Some debug code was removed and the code was reformatted. + * All other code modifications are marked with 'QEMU'. + */ + +/* + * gpoll.c: poll(2) abstraction + * Copyright 1998 Owen Taylor + * Copyright 2008 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +static int poll_rest(gboolean poll_msgs, HANDLE *handles, gint nhandles, + GPollFD *fds, guint nfds, gint timeout) +{ + DWORD ready; + GPollFD *f; + int recursed_result; + + if (poll_msgs) { + /* Wait for either messages or handles + * -> Use MsgWaitForMultipleObjectsEx + */ + ready = MsgWaitForMultipleObjectsEx(nhandles, handles, timeout, + QS_ALLINPUT, MWMO_ALERTABLE); + + if (ready == WAIT_FAILED) { + gchar *emsg = g_win32_error_message(GetLastError()); + g_warning("MsgWaitForMultipleObjectsEx failed: %s", emsg); + g_free(emsg); + } + } else if (nhandles == 0) { + /* No handles to wait for, just the timeout */ + if (timeout == INFINITE) { + ready = WAIT_FAILED; + } else { + SleepEx(timeout, TRUE); + ready = WAIT_TIMEOUT; + } + } else { + /* Wait for just handles + * -> Use WaitForMultipleObjectsEx + */ + ready = + WaitForMultipleObjectsEx(nhandles, handles, FALSE, timeout, TRUE); + if (ready == WAIT_FAILED) { + gchar *emsg = g_win32_error_message(GetLastError()); + g_warning("WaitForMultipleObjectsEx failed: %s", emsg); + g_free(emsg); + } + } + + if (ready == WAIT_FAILED) { + return -1; + } else if (ready == WAIT_TIMEOUT || ready == WAIT_IO_COMPLETION) { + return 0; + } else if (poll_msgs && ready == WAIT_OBJECT_0 + nhandles) { + for (f = fds; f < &fds[nfds]; ++f) { + if (f->fd == G_WIN32_MSG_HANDLE && f->events & G_IO_IN) { + f->revents |= G_IO_IN; + } + } + + /* If we have a timeout, or no handles to poll, be satisfied + * with just noticing we have messages waiting. + */ + if (timeout != 0 || nhandles == 0) { + return 1; + } + + /* If no timeout and handles to poll, recurse to poll them, + * too. + */ + recursed_result = poll_rest(FALSE, handles, nhandles, fds, nfds, 0); + return (recursed_result == -1) ? -1 : 1 + recursed_result; + } else if (/* QEMU: removed the following unneeded statement which causes + * a compiler warning: ready >= WAIT_OBJECT_0 && */ + ready < WAIT_OBJECT_0 + nhandles) { + for (f = fds; f < &fds[nfds]; ++f) { + if ((HANDLE) f->fd == handles[ready - WAIT_OBJECT_0]) { + f->revents = f->events; + } + } + + /* If no timeout and polling several handles, recurse to poll + * the rest of them. + */ + if (timeout == 0 && nhandles > 1) { + /* Remove the handle that fired */ + int i; + if (ready < nhandles - 1) { + for (i = ready - WAIT_OBJECT_0 + 1; i < nhandles; i++) { + handles[i-1] = handles[i]; + } + } + nhandles--; + recursed_result = poll_rest(FALSE, handles, nhandles, fds, nfds, 0); + return (recursed_result == -1) ? -1 : 1 + recursed_result; + } + return 1; + } + + return 0; +} + +gint g_poll(GPollFD *fds, guint nfds, gint timeout) +{ + HANDLE handles[MAXIMUM_WAIT_OBJECTS]; + gboolean poll_msgs = FALSE; + GPollFD *f; + gint nhandles = 0; + int retval; + + for (f = fds; f < &fds[nfds]; ++f) { + if (f->fd == G_WIN32_MSG_HANDLE && (f->events & G_IO_IN)) { + poll_msgs = TRUE; + } else if (f->fd > 0) { + /* Don't add the same handle several times into the array, as + * docs say that is not allowed, even if it actually does seem + * to work. + */ + gint i; + + for (i = 0; i < nhandles; i++) { + if (handles[i] == (HANDLE) f->fd) { + break; + } + } + + if (i == nhandles) { + if (nhandles == MAXIMUM_WAIT_OBJECTS) { + g_warning("Too many handles to wait for!\n"); + break; + } else { + handles[nhandles++] = (HANDLE) f->fd; + } + } + } + } + + for (f = fds; f < &fds[nfds]; ++f) { + f->revents = 0; + } + + if (timeout == -1) { + timeout = INFINITE; + } + + /* Polling for several things? */ + if (nhandles > 1 || (nhandles > 0 && poll_msgs)) { + /* First check if one or several of them are immediately + * available + */ + retval = poll_rest(poll_msgs, handles, nhandles, fds, nfds, 0); + + /* If not, and we have a significant timeout, poll again with + * timeout then. Note that this will return indication for only + * one event, or only for messages. We ignore timeouts less than + * ten milliseconds as they are mostly pointless on Windows, the + * MsgWaitForMultipleObjectsEx() call will timeout right away + * anyway. + * + * Modification for QEMU: replaced timeout >= 10 by timeout > 0. + */ + if (retval == 0 && (timeout == INFINITE || timeout > 0)) { + retval = poll_rest(poll_msgs, handles, nhandles, + fds, nfds, timeout); + } + } else { + /* Just polling for one thing, so no need to check first if + * available immediately + */ + retval = poll_rest(poll_msgs, handles, nhandles, fds, nfds, timeout); + } + + if (retval == -1) { + for (f = fds; f < &fds[nfds]; ++f) { + f->revents = 0; + } + } + + return retval; +} + +int getpagesize(void) +{ + SYSTEM_INFO system_info; + + GetSystemInfo(&system_info); + return system_info.dwPageSize; +} + +void os_mem_prealloc(int fd, char *area, size_t memory) +{ + int i; + size_t pagesize = getpagesize(); + + memory = (memory + pagesize - 1) & -pagesize; + for (i = 0; i < memory / pagesize; i++) { + memset(area + pagesize * i, 0, 1); + } +} + + +/* XXX: put correct support for win32 */ +int qemu_read_password(char *buf, int buf_size) +{ + int c, i; + + printf("Password: "); + fflush(stdout); + i = 0; + for (;;) { + c = getchar(); + if (c < 0) { + buf[i] = '\0'; + return -1; + } else if (c == '\n') { + break; + } else if (i < (buf_size - 1)) { + buf[i++] = c; + } + } + buf[i] = '\0'; + return 0; +} + + +pid_t qemu_fork(Error **errp) +{ + errno = ENOSYS; + error_setg_errno(errp, errno, + "cannot fork child process"); + return -1; +} diff --git a/src/util/path.c b/src/util/path.c new file mode 100644 index 0000000..4e4877e --- /dev/null +++ b/src/util/path.c @@ -0,0 +1,181 @@ +/* Code to mangle pathnames into those matching a given prefix. + eg. open("/lib/foo.so") => open("/usr/gnemul/i386-linux/lib/foo.so"); + + The assumption is that this area does not change. +*/ +#include <sys/types.h> +#include <sys/param.h> +#include <dirent.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <stdio.h> +#include "qemu-common.h" + +struct pathelem +{ + /* Name of this, eg. lib */ + char *name; + /* Full path name, eg. /usr/gnemul/x86-linux/lib. */ + char *pathname; + struct pathelem *parent; + /* Children */ + unsigned int num_entries; + struct pathelem *entries[0]; +}; + +static struct pathelem *base; + +/* First N chars of S1 match S2, and S2 is N chars long. */ +static int strneq(const char *s1, unsigned int n, const char *s2) +{ + unsigned int i; + + for (i = 0; i < n; i++) + if (s1[i] != s2[i]) + return 0; + return s2[i] == 0; +} + +static struct pathelem *add_entry(struct pathelem *root, const char *name, + unsigned type); + +static struct pathelem *new_entry(const char *root, + struct pathelem *parent, + const char *name) +{ + struct pathelem *new = g_malloc(sizeof(*new)); + new->name = g_strdup(name); + new->pathname = g_strdup_printf("%s/%s", root, name); + new->num_entries = 0; + return new; +} + +#define streq(a,b) (strcmp((a), (b)) == 0) + +/* Not all systems provide this feature */ +#if defined(DT_DIR) && defined(DT_UNKNOWN) && defined(DT_LNK) +# define dirent_type(dirent) ((dirent)->d_type) +# define is_dir_maybe(type) \ + ((type) == DT_DIR || (type) == DT_UNKNOWN || (type) == DT_LNK) +#else +# define dirent_type(dirent) (1) +# define is_dir_maybe(type) (type) +#endif + +static struct pathelem *add_dir_maybe(struct pathelem *path) +{ + DIR *dir; + + if ((dir = opendir(path->pathname)) != NULL) { + struct dirent *dirent; + + while ((dirent = readdir(dir)) != NULL) { + if (!streq(dirent->d_name,".") && !streq(dirent->d_name,"..")){ + path = add_entry(path, dirent->d_name, dirent_type(dirent)); + } + } + closedir(dir); + } + return path; +} + +static struct pathelem *add_entry(struct pathelem *root, const char *name, + unsigned type) +{ + struct pathelem **e; + + root->num_entries++; + + root = g_realloc(root, sizeof(*root) + + sizeof(root->entries[0])*root->num_entries); + e = &root->entries[root->num_entries-1]; + + *e = new_entry(root->pathname, root, name); + if (is_dir_maybe(type)) { + *e = add_dir_maybe(*e); + } + + return root; +} + +/* This needs to be done after tree is stabilized (ie. no more reallocs!). */ +static void set_parents(struct pathelem *child, struct pathelem *parent) +{ + unsigned int i; + + child->parent = parent; + for (i = 0; i < child->num_entries; i++) + set_parents(child->entries[i], child); +} + +/* FIXME: Doesn't handle DIR/.. where DIR is not in emulated dir. */ +static const char * +follow_path(const struct pathelem *cursor, const char *name) +{ + unsigned int i, namelen; + + name += strspn(name, "/"); + namelen = strcspn(name, "/"); + + if (namelen == 0) + return cursor->pathname; + + if (strneq(name, namelen, "..")) + return follow_path(cursor->parent, name + namelen); + + if (strneq(name, namelen, ".")) + return follow_path(cursor, name + namelen); + + for (i = 0; i < cursor->num_entries; i++) + if (strneq(name, namelen, cursor->entries[i]->name)) + return follow_path(cursor->entries[i], name + namelen); + + /* Not found */ + return NULL; +} + +void init_paths(const char *prefix) +{ + char pref_buf[PATH_MAX]; + + if (prefix[0] == '\0' || + !strcmp(prefix, "/")) + return; + + if (prefix[0] != '/') { + char *cwd = getcwd(NULL, 0); + size_t pref_buf_len = sizeof(pref_buf); + + if (!cwd) + abort(); + pstrcpy(pref_buf, sizeof(pref_buf), cwd); + pstrcat(pref_buf, pref_buf_len, "/"); + pstrcat(pref_buf, pref_buf_len, prefix); + free(cwd); + } else + pstrcpy(pref_buf, sizeof(pref_buf), prefix + 1); + + base = new_entry("", NULL, pref_buf); + base = add_dir_maybe(base); + if (base->num_entries == 0) { + g_free(base->pathname); + g_free(base->name); + g_free(base); + base = NULL; + } else { + set_parents(base, base); + } +} + +/* Look for path in emulation dir, otherwise return name. */ +const char *path(const char *name) +{ + /* Only do absolute paths: quick and dirty, but should mostly be OK. + Could do relative by tracking cwd. */ + if (!base || !name || name[0] != '/') + return name; + + return follow_path(base, name) ?: name; +} diff --git a/src/util/qemu-config.c b/src/util/qemu-config.c new file mode 100644 index 0000000..687fd34 --- /dev/null +++ b/src/util/qemu-config.c @@ -0,0 +1,573 @@ +#include "qemu-common.h" +#include "qemu/error-report.h" +#include "qemu/option.h" +#include "qemu/config-file.h" +#include "qapi/error.h" +#include "qmp-commands.h" + +static QemuOptsList *vm_config_groups[48]; +static QemuOptsList *drive_config_groups[4]; + +static QemuOptsList *find_list(QemuOptsList **lists, const char *group, + Error **errp) +{ + int i; + + for (i = 0; lists[i] != NULL; i++) { + if (strcmp(lists[i]->name, group) == 0) + break; + } + if (lists[i] == NULL) { + error_setg(errp, "There is no option group '%s'", group); + } + return lists[i]; +} + +QemuOptsList *qemu_find_opts(const char *group) +{ + QemuOptsList *ret; + Error *local_err = NULL; + + ret = find_list(vm_config_groups, group, &local_err); + if (local_err) { + error_report_err(local_err); + } + + return ret; +} + +QemuOpts *qemu_find_opts_singleton(const char *group) +{ + QemuOptsList *list; + QemuOpts *opts; + + list = qemu_find_opts(group); + assert(list); + opts = qemu_opts_find(list, NULL); + if (!opts) { + opts = qemu_opts_create(list, NULL, 0, &error_abort); + } + return opts; +} + +static CommandLineParameterInfoList *query_option_descs(const QemuOptDesc *desc) +{ + CommandLineParameterInfoList *param_list = NULL, *entry; + CommandLineParameterInfo *info; + int i; + + for (i = 0; desc[i].name != NULL; i++) { + info = g_malloc0(sizeof(*info)); + info->name = g_strdup(desc[i].name); + + switch (desc[i].type) { + case QEMU_OPT_STRING: + info->type = COMMAND_LINE_PARAMETER_TYPE_STRING; + break; + case QEMU_OPT_BOOL: + info->type = COMMAND_LINE_PARAMETER_TYPE_BOOLEAN; + break; + case QEMU_OPT_NUMBER: + info->type = COMMAND_LINE_PARAMETER_TYPE_NUMBER; + break; + case QEMU_OPT_SIZE: + info->type = COMMAND_LINE_PARAMETER_TYPE_SIZE; + break; + } + + if (desc[i].help) { + info->has_help = true; + info->help = g_strdup(desc[i].help); + } + if (desc[i].def_value_str) { + info->has_q_default = true; + info->q_default = g_strdup(desc[i].def_value_str); + } + + entry = g_malloc0(sizeof(*entry)); + entry->value = info; + entry->next = param_list; + param_list = entry; + } + + return param_list; +} + +/* remove repeated entry from the info list */ +static void cleanup_infolist(CommandLineParameterInfoList *head) +{ + CommandLineParameterInfoList *pre_entry, *cur, *del_entry; + + cur = head; + while (cur->next) { + pre_entry = head; + while (pre_entry != cur->next) { + if (!strcmp(pre_entry->value->name, cur->next->value->name)) { + del_entry = cur->next; + cur->next = cur->next->next; + g_free(del_entry); + break; + } + pre_entry = pre_entry->next; + } + cur = cur->next; + } +} + +/* merge the description items of two parameter infolists */ +static void connect_infolist(CommandLineParameterInfoList *head, + CommandLineParameterInfoList *new) +{ + CommandLineParameterInfoList *cur; + + cur = head; + while (cur->next) { + cur = cur->next; + } + cur->next = new; +} + +/* access all the local QemuOptsLists for drive option */ +static CommandLineParameterInfoList *get_drive_infolist(void) +{ + CommandLineParameterInfoList *head = NULL, *cur; + int i; + + for (i = 0; drive_config_groups[i] != NULL; i++) { + if (!head) { + head = query_option_descs(drive_config_groups[i]->desc); + } else { + cur = query_option_descs(drive_config_groups[i]->desc); + connect_infolist(head, cur); + } + } + cleanup_infolist(head); + + return head; +} + +/* restore machine options that are now machine's properties */ +static QemuOptsList machine_opts = { + .merge_lists = true, + .head = QTAILQ_HEAD_INITIALIZER(machine_opts.head), + .desc = { + { + .name = "type", + .type = QEMU_OPT_STRING, + .help = "emulated machine" + },{ + .name = "accel", + .type = QEMU_OPT_STRING, + .help = "accelerator list", + },{ + .name = "kernel_irqchip", + .type = QEMU_OPT_BOOL, + .help = "use KVM in-kernel irqchip", + },{ + .name = "kvm_shadow_mem", + .type = QEMU_OPT_SIZE, + .help = "KVM shadow MMU size", + },{ + .name = "kernel", + .type = QEMU_OPT_STRING, + .help = "Linux kernel image file", + },{ + .name = "initrd", + .type = QEMU_OPT_STRING, + .help = "Linux initial ramdisk file", + },{ + .name = "append", + .type = QEMU_OPT_STRING, + .help = "Linux kernel command line", + },{ + .name = "dtb", + .type = QEMU_OPT_STRING, + .help = "Linux kernel device tree file", + },{ + .name = "dumpdtb", + .type = QEMU_OPT_STRING, + .help = "Dump current dtb to a file and quit", + },{ + .name = "phandle_start", + .type = QEMU_OPT_NUMBER, + .help = "The first phandle ID we may generate dynamically", + },{ + .name = "dt_compatible", + .type = QEMU_OPT_STRING, + .help = "Overrides the \"compatible\" property of the dt root node", + },{ + .name = "dump-guest-core", + .type = QEMU_OPT_BOOL, + .help = "Include guest memory in a core dump", + },{ + .name = "mem-merge", + .type = QEMU_OPT_BOOL, + .help = "enable/disable memory merge support", + },{ + .name = "usb", + .type = QEMU_OPT_BOOL, + .help = "Set on/off to enable/disable usb", + },{ + .name = "firmware", + .type = QEMU_OPT_STRING, + .help = "firmware image", + },{ + .name = "iommu", + .type = QEMU_OPT_BOOL, + .help = "Set on/off to enable/disable Intel IOMMU (VT-d)", + },{ + .name = "suppress-vmdesc", + .type = QEMU_OPT_BOOL, + .help = "Set on to disable self-describing migration", + },{ + .name = "aes-key-wrap", + .type = QEMU_OPT_BOOL, + .help = "enable/disable AES key wrapping using the CPACF wrapping key", + },{ + .name = "dea-key-wrap", + .type = QEMU_OPT_BOOL, + .help = "enable/disable DEA key wrapping using the CPACF wrapping key", + }, + { /* End of list */ } + } +}; + +CommandLineOptionInfoList *qmp_query_command_line_options(bool has_option, + const char *option, + Error **errp) +{ + CommandLineOptionInfoList *conf_list = NULL, *entry; + CommandLineOptionInfo *info; + int i; + + for (i = 0; vm_config_groups[i] != NULL; i++) { + if (!has_option || !strcmp(option, vm_config_groups[i]->name)) { + info = g_malloc0(sizeof(*info)); + info->option = g_strdup(vm_config_groups[i]->name); + if (!strcmp("drive", vm_config_groups[i]->name)) { + info->parameters = get_drive_infolist(); + } else if (!strcmp("machine", vm_config_groups[i]->name)) { + info->parameters = query_option_descs(machine_opts.desc); + } else { + info->parameters = + query_option_descs(vm_config_groups[i]->desc); + } + entry = g_malloc0(sizeof(*entry)); + entry->value = info; + entry->next = conf_list; + conf_list = entry; + } + } + + if (conf_list == NULL) { + error_setg(errp, "invalid option name: %s", option); + } + + return conf_list; +} + +QemuOptsList *qemu_find_opts_err(const char *group, Error **errp) +{ + return find_list(vm_config_groups, group, errp); +} + +void qemu_add_drive_opts(QemuOptsList *list) +{ + int entries, i; + + entries = ARRAY_SIZE(drive_config_groups); + entries--; /* keep list NULL terminated */ + for (i = 0; i < entries; i++) { + if (drive_config_groups[i] == NULL) { + drive_config_groups[i] = list; + return; + } + } + fprintf(stderr, "ran out of space in drive_config_groups"); + abort(); +} + +void qemu_add_opts(QemuOptsList *list) +{ + int entries, i; + + entries = ARRAY_SIZE(vm_config_groups); + entries--; /* keep list NULL terminated */ + for (i = 0; i < entries; i++) { + if (vm_config_groups[i] == NULL) { + vm_config_groups[i] = list; + return; + } + } + fprintf(stderr, "ran out of space in vm_config_groups"); + abort(); +} + +int qemu_set_option(const char *str) +{ + Error *local_err = NULL; + char group[64], id[64], arg[64]; + QemuOptsList *list; + QemuOpts *opts; + int rc, offset; + + rc = sscanf(str, "%63[^.].%63[^.].%63[^=]%n", group, id, arg, &offset); + if (rc < 3 || str[offset] != '=') { + error_report("can't parse: \"%s\"", str); + return -1; + } + + list = qemu_find_opts(group); + if (list == NULL) { + return -1; + } + + opts = qemu_opts_find(list, id); + if (!opts) { + error_report("there is no %s \"%s\" defined", + list->name, id); + return -1; + } + + qemu_opt_set(opts, arg, str + offset + 1, &local_err); + if (local_err) { + error_report_err(local_err); + return -1; + } + return 0; +} + +struct ConfigWriteData { + QemuOptsList *list; + FILE *fp; +}; + +static int config_write_opt(void *opaque, const char *name, const char *value, + Error **errp) +{ + struct ConfigWriteData *data = opaque; + + fprintf(data->fp, " %s = \"%s\"\n", name, value); + return 0; +} + +static int config_write_opts(void *opaque, QemuOpts *opts, Error **errp) +{ + struct ConfigWriteData *data = opaque; + const char *id = qemu_opts_id(opts); + + if (id) { + fprintf(data->fp, "[%s \"%s\"]\n", data->list->name, id); + } else { + fprintf(data->fp, "[%s]\n", data->list->name); + } + qemu_opt_foreach(opts, config_write_opt, data, NULL); + fprintf(data->fp, "\n"); + return 0; +} + +void qemu_config_write(FILE *fp) +{ + struct ConfigWriteData data = { .fp = fp }; + QemuOptsList **lists = vm_config_groups; + int i; + + fprintf(fp, "# qemu config file\n\n"); + for (i = 0; lists[i] != NULL; i++) { + data.list = lists[i]; + qemu_opts_foreach(data.list, config_write_opts, &data, NULL); + } +} + +int qemu_config_parse(FILE *fp, QemuOptsList **lists, const char *fname) +{ + char line[1024], group[64], id[64], arg[64], value[1024]; + Location loc; + QemuOptsList *list = NULL; + Error *local_err = NULL; + QemuOpts *opts = NULL; + int res = -1, lno = 0; + + loc_push_none(&loc); + while (fgets(line, sizeof(line), fp) != NULL) { + loc_set_file(fname, ++lno); + if (line[0] == '\n') { + /* skip empty lines */ + continue; + } + if (line[0] == '#') { + /* comment */ + continue; + } + if (sscanf(line, "[%63s \"%63[^\"]\"]", group, id) == 2) { + /* group with id */ + list = find_list(lists, group, &local_err); + if (local_err) { + error_report_err(local_err); + goto out; + } + opts = qemu_opts_create(list, id, 1, NULL); + continue; + } + if (sscanf(line, "[%63[^]]]", group) == 1) { + /* group without id */ + list = find_list(lists, group, &local_err); + if (local_err) { + error_report_err(local_err); + goto out; + } + opts = qemu_opts_create(list, NULL, 0, &error_abort); + continue; + } + value[0] = '\0'; + if (sscanf(line, " %63s = \"%1023[^\"]\"", arg, value) == 2 || + sscanf(line, " %63s = \"\"", arg) == 1) { + /* arg = value */ + if (opts == NULL) { + error_report("no group defined"); + goto out; + } + qemu_opt_set(opts, arg, value, &local_err); + if (local_err) { + error_report_err(local_err); + goto out; + } + continue; + } + error_report("parse error"); + goto out; + } + if (ferror(fp)) { + error_report("error reading file"); + goto out; + } + res = 0; +out: + loc_pop(&loc); + return res; +} + +int qemu_read_config_file(const char *filename) +{ + FILE *f = fopen(filename, "r"); + int ret; + + if (f == NULL) { + return -errno; + } + + ret = qemu_config_parse(f, vm_config_groups, filename); + fclose(f); + + if (ret == 0) { + return 0; + } else { + return -EINVAL; + } +} + +static void config_parse_qdict_section(QDict *options, QemuOptsList *opts, + Error **errp) +{ + QemuOpts *subopts; + QDict *subqdict; + QList *list = NULL; + Error *local_err = NULL; + size_t orig_size, enum_size; + char *prefix; + + prefix = g_strdup_printf("%s.", opts->name); + qdict_extract_subqdict(options, &subqdict, prefix); + g_free(prefix); + orig_size = qdict_size(subqdict); + if (!orig_size) { + goto out; + } + + subopts = qemu_opts_create(opts, NULL, 0, &local_err); + if (local_err) { + error_propagate(errp, local_err); + goto out; + } + + qemu_opts_absorb_qdict(subopts, subqdict, &local_err); + if (local_err) { + error_propagate(errp, local_err); + goto out; + } + + enum_size = qdict_size(subqdict); + if (enum_size < orig_size && enum_size) { + error_setg(errp, "Unknown option '%s' for [%s]", + qdict_first(subqdict)->key, opts->name); + goto out; + } + + if (enum_size) { + /* Multiple, enumerated sections */ + QListEntry *list_entry; + unsigned i = 0; + + /* Not required anymore */ + qemu_opts_del(subopts); + + qdict_array_split(subqdict, &list); + if (qdict_size(subqdict)) { + error_setg(errp, "Unused option '%s' for [%s]", + qdict_first(subqdict)->key, opts->name); + goto out; + } + + QLIST_FOREACH_ENTRY(list, list_entry) { + QDict *section = qobject_to_qdict(qlist_entry_obj(list_entry)); + char *opt_name; + + if (!section) { + error_setg(errp, "[%s] section (index %u) does not consist of " + "keys", opts->name, i); + goto out; + } + + opt_name = g_strdup_printf("%s.%u", opts->name, i++); + subopts = qemu_opts_create(opts, opt_name, 1, &local_err); + g_free(opt_name); + if (local_err) { + error_propagate(errp, local_err); + goto out; + } + + qemu_opts_absorb_qdict(subopts, section, &local_err); + if (local_err) { + error_propagate(errp, local_err); + qemu_opts_del(subopts); + goto out; + } + + if (qdict_size(section)) { + error_setg(errp, "[%s] section doesn't support the option '%s'", + opts->name, qdict_first(section)->key); + qemu_opts_del(subopts); + goto out; + } + } + } + +out: + QDECREF(subqdict); + QDECREF(list); +} + +void qemu_config_parse_qdict(QDict *options, QemuOptsList **lists, + Error **errp) +{ + int i; + Error *local_err = NULL; + + for (i = 0; lists[i]; i++) { + config_parse_qdict_section(options, lists[i], &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + } +} diff --git a/src/util/qemu-coroutine-io.c b/src/util/qemu-coroutine-io.c new file mode 100644 index 0000000..e1eae73 --- /dev/null +++ b/src/util/qemu-coroutine-io.c @@ -0,0 +1,91 @@ +/* + * Coroutine-aware I/O functions + * + * Copyright (C) 2009-2010 Nippon Telegraph and Telephone Corporation. + * Copyright (c) 2011, Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu-common.h" +#include "qemu/sockets.h" +#include "qemu/coroutine.h" +#include "qemu/iov.h" +#include "qemu/main-loop.h" + +ssize_t coroutine_fn +qemu_co_sendv_recvv(int sockfd, struct iovec *iov, unsigned iov_cnt, + size_t offset, size_t bytes, bool do_send) +{ + size_t done = 0; + ssize_t ret; + int err; + while (done < bytes) { + ret = iov_send_recv(sockfd, iov, iov_cnt, + offset + done, bytes - done, do_send); + if (ret > 0) { + done += ret; + } else if (ret < 0) { + err = socket_error(); + if (err == EAGAIN || err == EWOULDBLOCK) { + qemu_coroutine_yield(); + } else if (done == 0) { + return -err; + } else { + break; + } + } else if (ret == 0 && !do_send) { + /* write (send) should never return 0. + * read (recv) returns 0 for end-of-file (-data). + * In both cases there's little point retrying, + * but we do for write anyway, just in case */ + break; + } + } + return done; +} + +ssize_t coroutine_fn +qemu_co_send_recv(int sockfd, void *buf, size_t bytes, bool do_send) +{ + struct iovec iov = { .iov_base = buf, .iov_len = bytes }; + return qemu_co_sendv_recvv(sockfd, &iov, 1, 0, bytes, do_send); +} + +typedef struct { + Coroutine *co; + int fd; +} FDYieldUntilData; + +static void fd_coroutine_enter(void *opaque) +{ + FDYieldUntilData *data = opaque; + qemu_set_fd_handler(data->fd, NULL, NULL, NULL); + qemu_coroutine_enter(data->co, NULL); +} + +void coroutine_fn yield_until_fd_readable(int fd) +{ + FDYieldUntilData data; + + assert(qemu_in_coroutine()); + data.co = qemu_coroutine_self(); + data.fd = fd; + qemu_set_fd_handler(fd, fd_coroutine_enter, NULL, &data); + qemu_coroutine_yield(); +} diff --git a/src/util/qemu-coroutine-lock.c b/src/util/qemu-coroutine-lock.c new file mode 100644 index 0000000..130ee19 --- /dev/null +++ b/src/util/qemu-coroutine-lock.c @@ -0,0 +1,186 @@ +/* + * coroutine queues and locks + * + * Copyright (c) 2011 Kevin Wolf <kwolf@redhat.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu-common.h" +#include "qemu/coroutine.h" +#include "qemu/coroutine_int.h" +#include "qemu/queue.h" +#include "trace.h" + +void qemu_co_queue_init(CoQueue *queue) +{ + QTAILQ_INIT(&queue->entries); +} + +void coroutine_fn qemu_co_queue_wait(CoQueue *queue) +{ + Coroutine *self = qemu_coroutine_self(); + QTAILQ_INSERT_TAIL(&queue->entries, self, co_queue_next); + qemu_coroutine_yield(); + assert(qemu_in_coroutine()); +} + +/** + * qemu_co_queue_run_restart: + * + * Enter each coroutine that was previously marked for restart by + * qemu_co_queue_next() or qemu_co_queue_restart_all(). This function is + * invoked by the core coroutine code when the current coroutine yields or + * terminates. + */ +void qemu_co_queue_run_restart(Coroutine *co) +{ + Coroutine *next; + + trace_qemu_co_queue_run_restart(co); + while ((next = QTAILQ_FIRST(&co->co_queue_wakeup))) { + QTAILQ_REMOVE(&co->co_queue_wakeup, next, co_queue_next); + qemu_coroutine_enter(next, NULL); + } +} + +static bool qemu_co_queue_do_restart(CoQueue *queue, bool single) +{ + Coroutine *self = qemu_coroutine_self(); + Coroutine *next; + + if (QTAILQ_EMPTY(&queue->entries)) { + return false; + } + + while ((next = QTAILQ_FIRST(&queue->entries)) != NULL) { + QTAILQ_REMOVE(&queue->entries, next, co_queue_next); + QTAILQ_INSERT_TAIL(&self->co_queue_wakeup, next, co_queue_next); + trace_qemu_co_queue_next(next); + if (single) { + break; + } + } + return true; +} + +bool coroutine_fn qemu_co_queue_next(CoQueue *queue) +{ + assert(qemu_in_coroutine()); + return qemu_co_queue_do_restart(queue, true); +} + +void coroutine_fn qemu_co_queue_restart_all(CoQueue *queue) +{ + assert(qemu_in_coroutine()); + qemu_co_queue_do_restart(queue, false); +} + +bool qemu_co_enter_next(CoQueue *queue) +{ + Coroutine *next; + + next = QTAILQ_FIRST(&queue->entries); + if (!next) { + return false; + } + + QTAILQ_REMOVE(&queue->entries, next, co_queue_next); + qemu_coroutine_enter(next, NULL); + return true; +} + +bool qemu_co_queue_empty(CoQueue *queue) +{ + return QTAILQ_FIRST(&queue->entries) == NULL; +} + +void qemu_co_mutex_init(CoMutex *mutex) +{ + memset(mutex, 0, sizeof(*mutex)); + qemu_co_queue_init(&mutex->queue); +} + +void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex) +{ + Coroutine *self = qemu_coroutine_self(); + + trace_qemu_co_mutex_lock_entry(mutex, self); + + while (mutex->locked) { + qemu_co_queue_wait(&mutex->queue); + } + + mutex->locked = true; + + trace_qemu_co_mutex_lock_return(mutex, self); +} + +void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex) +{ + Coroutine *self = qemu_coroutine_self(); + + trace_qemu_co_mutex_unlock_entry(mutex, self); + + assert(mutex->locked == true); + assert(qemu_in_coroutine()); + + mutex->locked = false; + qemu_co_queue_next(&mutex->queue); + + trace_qemu_co_mutex_unlock_return(mutex, self); +} + +void qemu_co_rwlock_init(CoRwlock *lock) +{ + memset(lock, 0, sizeof(*lock)); + qemu_co_queue_init(&lock->queue); +} + +void qemu_co_rwlock_rdlock(CoRwlock *lock) +{ + while (lock->writer) { + qemu_co_queue_wait(&lock->queue); + } + lock->reader++; +} + +void qemu_co_rwlock_unlock(CoRwlock *lock) +{ + assert(qemu_in_coroutine()); + if (lock->writer) { + lock->writer = false; + qemu_co_queue_restart_all(&lock->queue); + } else { + lock->reader--; + assert(lock->reader >= 0); + /* Wakeup only one waiting writer */ + if (!lock->reader) { + qemu_co_queue_next(&lock->queue); + } + } +} + +void qemu_co_rwlock_wrlock(CoRwlock *lock) +{ + while (lock->writer || lock->reader) { + qemu_co_queue_wait(&lock->queue); + } + lock->writer = true; +} diff --git a/src/util/qemu-coroutine-sleep.c b/src/util/qemu-coroutine-sleep.c new file mode 100644 index 0000000..b35db56 --- /dev/null +++ b/src/util/qemu-coroutine-sleep.c @@ -0,0 +1,41 @@ +/* + * QEMU coroutine sleep + * + * Copyright IBM, Corp. 2011 + * + * Authors: + * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include "qemu/coroutine.h" +#include "qemu/timer.h" +#include "block/aio.h" + +typedef struct CoSleepCB { + QEMUTimer *ts; + Coroutine *co; +} CoSleepCB; + +static void co_sleep_cb(void *opaque) +{ + CoSleepCB *sleep_cb = opaque; + + qemu_coroutine_enter(sleep_cb->co, NULL); +} + +void coroutine_fn co_aio_sleep_ns(AioContext *ctx, QEMUClockType type, + int64_t ns) +{ + CoSleepCB sleep_cb = { + .co = qemu_coroutine_self(), + }; + sleep_cb.ts = aio_timer_new(ctx, type, SCALE_NS, co_sleep_cb, &sleep_cb); + timer_mod(sleep_cb.ts, qemu_clock_get_ns(type) + ns); + qemu_coroutine_yield(); + timer_del(sleep_cb.ts); + timer_free(sleep_cb.ts); +} diff --git a/src/util/qemu-coroutine.c b/src/util/qemu-coroutine.c new file mode 100644 index 0000000..8953560 --- /dev/null +++ b/src/util/qemu-coroutine.c @@ -0,0 +1,146 @@ +/* + * QEMU coroutines + * + * Copyright IBM, Corp. 2011 + * + * Authors: + * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> + * Kevin Wolf <kwolf@redhat.com> + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include "trace.h" +#include "qemu-common.h" +#include "qemu/thread.h" +#include "qemu/atomic.h" +#include "qemu/coroutine.h" +#include "qemu/coroutine_int.h" + +enum { + POOL_BATCH_SIZE = 64, +}; + +/** Free list to speed up creation */ +static QSLIST_HEAD(, Coroutine) release_pool = QSLIST_HEAD_INITIALIZER(pool); +static unsigned int release_pool_size; +static __thread QSLIST_HEAD(, Coroutine) alloc_pool = QSLIST_HEAD_INITIALIZER(pool); +static __thread unsigned int alloc_pool_size; +static __thread Notifier coroutine_pool_cleanup_notifier; + +static void coroutine_pool_cleanup(Notifier *n, void *value) +{ + Coroutine *co; + Coroutine *tmp; + + QSLIST_FOREACH_SAFE(co, &alloc_pool, pool_next, tmp) { + QSLIST_REMOVE_HEAD(&alloc_pool, pool_next); + qemu_coroutine_delete(co); + } +} + +Coroutine *qemu_coroutine_create(CoroutineEntry *entry) +{ + Coroutine *co = NULL; + + if (CONFIG_COROUTINE_POOL) { + co = QSLIST_FIRST(&alloc_pool); + if (!co) { + if (release_pool_size > POOL_BATCH_SIZE) { + /* Slow path; a good place to register the destructor, too. */ + if (!coroutine_pool_cleanup_notifier.notify) { + coroutine_pool_cleanup_notifier.notify = coroutine_pool_cleanup; + qemu_thread_atexit_add(&coroutine_pool_cleanup_notifier); + } + + /* This is not exact; there could be a little skew between + * release_pool_size and the actual size of release_pool. But + * it is just a heuristic, it does not need to be perfect. + */ + alloc_pool_size = atomic_xchg(&release_pool_size, 0); + QSLIST_MOVE_ATOMIC(&alloc_pool, &release_pool); + co = QSLIST_FIRST(&alloc_pool); + } + } + if (co) { + QSLIST_REMOVE_HEAD(&alloc_pool, pool_next); + alloc_pool_size--; + } + } + + if (!co) { + co = qemu_coroutine_new(); + } + + co->entry = entry; + QTAILQ_INIT(&co->co_queue_wakeup); + return co; +} + +static void coroutine_delete(Coroutine *co) +{ + co->caller = NULL; + + if (CONFIG_COROUTINE_POOL) { + if (release_pool_size < POOL_BATCH_SIZE * 2) { + QSLIST_INSERT_HEAD_ATOMIC(&release_pool, co, pool_next); + atomic_inc(&release_pool_size); + return; + } + if (alloc_pool_size < POOL_BATCH_SIZE) { + QSLIST_INSERT_HEAD(&alloc_pool, co, pool_next); + alloc_pool_size++; + return; + } + } + + qemu_coroutine_delete(co); +} + +void qemu_coroutine_enter(Coroutine *co, void *opaque) +{ + Coroutine *self = qemu_coroutine_self(); + CoroutineAction ret; + + trace_qemu_coroutine_enter(self, co, opaque); + + if (co->caller) { + fprintf(stderr, "Co-routine re-entered recursively\n"); + abort(); + } + + co->caller = self; + co->entry_arg = opaque; + ret = qemu_coroutine_switch(self, co, COROUTINE_ENTER); + + qemu_co_queue_run_restart(co); + + switch (ret) { + case COROUTINE_YIELD: + return; + case COROUTINE_TERMINATE: + trace_qemu_coroutine_terminate(co); + coroutine_delete(co); + return; + default: + abort(); + } +} + +void coroutine_fn qemu_coroutine_yield(void) +{ + Coroutine *self = qemu_coroutine_self(); + Coroutine *to = self->caller; + + trace_qemu_coroutine_yield(self, to); + + if (!to) { + fprintf(stderr, "Co-routine is yielding to no one\n"); + abort(); + } + + self->caller = NULL; + qemu_coroutine_switch(self, to, COROUTINE_YIELD); +} diff --git a/src/util/qemu-error.c b/src/util/qemu-error.c new file mode 100644 index 0000000..c1574bb --- /dev/null +++ b/src/util/qemu-error.c @@ -0,0 +1,239 @@ +/* + * Error reporting + * + * Copyright (C) 2010 Red Hat Inc. + * + * Authors: + * Markus Armbruster <armbru@redhat.com>, + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include <stdio.h> +#include "monitor/monitor.h" +#include "qemu/error-report.h" + +/* + * Print to current monitor if we have one, else to stderr. + * TODO should return int, so callers can calculate width, but that + * requires surgery to monitor_vprintf(). Left for another day. + */ +void error_vprintf(const char *fmt, va_list ap) +{ + if (cur_mon && !monitor_cur_is_qmp()) { + monitor_vprintf(cur_mon, fmt, ap); + } else { + vfprintf(stderr, fmt, ap); + } +} + +/* + * Print to current monitor if we have one, else to stderr. + * TODO just like error_vprintf() + */ +void error_printf(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + error_vprintf(fmt, ap); + va_end(ap); +} + +void error_printf_unless_qmp(const char *fmt, ...) +{ + va_list ap; + + if (!monitor_cur_is_qmp()) { + va_start(ap, fmt); + error_vprintf(fmt, ap); + va_end(ap); + } +} + +static Location std_loc = { + .kind = LOC_NONE +}; +static Location *cur_loc = &std_loc; + +/* + * Push location saved in LOC onto the location stack, return it. + * The top of that stack is the current location. + * Needs a matching loc_pop(). + */ +Location *loc_push_restore(Location *loc) +{ + assert(!loc->prev); + loc->prev = cur_loc; + cur_loc = loc; + return loc; +} + +/* + * Initialize *LOC to "nowhere", push it onto the location stack. + * The top of that stack is the current location. + * Needs a matching loc_pop(). + * Return LOC. + */ +Location *loc_push_none(Location *loc) +{ + loc->kind = LOC_NONE; + loc->prev = NULL; + return loc_push_restore(loc); +} + +/* + * Pop the location stack. + * LOC must be the current location, i.e. the top of the stack. + */ +Location *loc_pop(Location *loc) +{ + assert(cur_loc == loc && loc->prev); + cur_loc = loc->prev; + loc->prev = NULL; + return loc; +} + +/* + * Save the current location in LOC, return LOC. + */ +Location *loc_save(Location *loc) +{ + *loc = *cur_loc; + loc->prev = NULL; + return loc; +} + +/* + * Change the current location to the one saved in LOC. + */ +void loc_restore(Location *loc) +{ + Location *prev = cur_loc->prev; + assert(!loc->prev); + *cur_loc = *loc; + cur_loc->prev = prev; +} + +/* + * Change the current location to "nowhere in particular". + */ +void loc_set_none(void) +{ + cur_loc->kind = LOC_NONE; +} + +/* + * Change the current location to argument ARGV[IDX..IDX+CNT-1]. + */ +void loc_set_cmdline(char **argv, int idx, int cnt) +{ + cur_loc->kind = LOC_CMDLINE; + cur_loc->num = cnt; + cur_loc->ptr = argv + idx; +} + +/* + * Change the current location to file FNAME, line LNO. + */ +void loc_set_file(const char *fname, int lno) +{ + assert (fname || cur_loc->kind == LOC_FILE); + cur_loc->kind = LOC_FILE; + cur_loc->num = lno; + if (fname) { + cur_loc->ptr = fname; + } +} + +static const char *progname; + +/* + * Set the program name for error_print_loc(). + */ +void error_set_progname(const char *argv0) +{ + const char *p = strrchr(argv0, '/'); + progname = p ? p + 1 : argv0; +} + +const char *error_get_progname(void) +{ + return progname; +} + +/* + * Print current location to current monitor if we have one, else to stderr. + */ +static void error_print_loc(void) +{ + const char *sep = ""; + int i; + const char *const *argp; + + if (!cur_mon && progname) { + fprintf(stderr, "%s:", progname); + sep = " "; + } + switch (cur_loc->kind) { + case LOC_CMDLINE: + argp = cur_loc->ptr; + for (i = 0; i < cur_loc->num; i++) { + error_printf("%s%s", sep, argp[i]); + sep = " "; + } + error_printf(": "); + break; + case LOC_FILE: + error_printf("%s:", (const char *)cur_loc->ptr); + if (cur_loc->num) { + error_printf("%d:", cur_loc->num); + } + error_printf(" "); + break; + default: + error_printf("%s", sep); + } +} + +bool enable_timestamp_msg; +/* + * Print an error message to current monitor if we have one, else to stderr. + * Format arguments like vsprintf(). The result should not contain + * newlines. + * Prepend the current location and append a newline. + * It's wrong to call this in a QMP monitor. Use error_setg() there. + */ +void error_vreport(const char *fmt, va_list ap) +{ + GTimeVal tv; + gchar *timestr; + + if (enable_timestamp_msg && !cur_mon) { + g_get_current_time(&tv); + timestr = g_time_val_to_iso8601(&tv); + error_printf("%s ", timestr); + g_free(timestr); + } + + error_print_loc(); + error_vprintf(fmt, ap); + error_printf("\n"); +} + +/* + * Print an error message to current monitor if we have one, else to stderr. + * Format arguments like sprintf(). The result should not contain + * newlines. + * Prepend the current location and append a newline. + * It's wrong to call this in a QMP monitor. Use error_setg() there. + */ +void error_report(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + error_vreport(fmt, ap); + va_end(ap); +} diff --git a/src/util/qemu-openpty.c b/src/util/qemu-openpty.c new file mode 100644 index 0000000..4c53211 --- /dev/null +++ b/src/util/qemu-openpty.c @@ -0,0 +1,137 @@ +/* + * qemu-openpty.c + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2010 Red Hat, Inc. + * + * Wrapper function qemu_openpty() implementation. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +/* + * This is not part of oslib-posix.c because this function + * uses openpty() which often in -lutil, and if we add this + * dependency to oslib-posix.o, every app will have to be + * linked with -lutil. + */ + +#include "config-host.h" +#include "qemu-common.h" + +#if defined(__GLIBC__) +# include <pty.h> +#elif defined CONFIG_BSD +# include <termios.h> +# if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__) +# include <libutil.h> +# else +# include <util.h> +# endif +#elif defined CONFIG_SOLARIS +# include <termios.h> +# include <stropts.h> +#else +# include <termios.h> +#endif + +#ifdef __sun__ +/* Once Solaris has openpty(), this is going to be removed. */ +static int openpty(int *amaster, int *aslave, char *name, + struct termios *termp, struct winsize *winp) +{ + const char *slave; + int mfd = -1, sfd = -1; + + *amaster = *aslave = -1; + + mfd = open("/dev/ptmx", O_RDWR | O_NOCTTY); + if (mfd < 0) + goto err; + + if (grantpt(mfd) == -1 || unlockpt(mfd) == -1) + goto err; + + if ((slave = ptsname(mfd)) == NULL) + goto err; + + if ((sfd = open(slave, O_RDONLY | O_NOCTTY)) == -1) + goto err; + + if (ioctl(sfd, I_PUSH, "ptem") == -1 || + (termp != NULL && tcgetattr(sfd, termp) < 0)) + goto err; + + if (amaster) + *amaster = mfd; + if (aslave) + *aslave = sfd; + if (winp) + ioctl(sfd, TIOCSWINSZ, winp); + + return 0; + +err: + if (sfd != -1) + close(sfd); + close(mfd); + return -1; +} + +static void cfmakeraw (struct termios *termios_p) +{ + termios_p->c_iflag &= + ~(IGNBRK|BRKINT|PARMRK|ISTRIP|INLCR|IGNCR|ICRNL|IXON); + termios_p->c_oflag &= ~OPOST; + termios_p->c_lflag &= ~(ECHO|ECHONL|ICANON|ISIG|IEXTEN); + termios_p->c_cflag &= ~(CSIZE|PARENB); + termios_p->c_cflag |= CS8; + + termios_p->c_cc[VMIN] = 0; + termios_p->c_cc[VTIME] = 0; +} +#endif + +int qemu_openpty_raw(int *aslave, char *pty_name) +{ + int amaster; + struct termios tty; +#if defined(__OpenBSD__) || defined(__DragonFly__) + char pty_buf[PATH_MAX]; +#define q_ptsname(x) pty_buf +#else + char *pty_buf = NULL; +#define q_ptsname(x) ptsname(x) +#endif + + if (openpty(&amaster, aslave, pty_buf, NULL, NULL) < 0) { + return -1; + } + + /* Set raw attributes on the pty. */ + tcgetattr(*aslave, &tty); + cfmakeraw(&tty); + tcsetattr(*aslave, TCSAFLUSH, &tty); + + if (pty_name) { + strcpy(pty_name, q_ptsname(amaster)); + } + + return amaster; +} diff --git a/src/util/qemu-option.c b/src/util/qemu-option.c new file mode 100644 index 0000000..a50ecea --- /dev/null +++ b/src/util/qemu-option.c @@ -0,0 +1,1205 @@ +/* + * Commandline option parsing functions + * + * Copyright (c) 2003-2008 Fabrice Bellard + * Copyright (c) 2009 Kevin Wolf <kwolf@redhat.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include <stdio.h> +#include <string.h> + +#include "qemu-common.h" +#include "qemu/error-report.h" +#include "qapi/qmp/types.h" +#include "qapi/error.h" +#include "qapi/qmp/qerror.h" +#include "qemu/option_int.h" + +/* + * Extracts the name of an option from the parameter string (p points at the + * first byte of the option name) + * + * The option name is delimited by delim (usually , or =) or the string end + * and is copied into buf. If the option name is longer than buf_size, it is + * truncated. buf is always zero terminated. + * + * The return value is the position of the delimiter/zero byte after the option + * name in p. + */ +const char *get_opt_name(char *buf, int buf_size, const char *p, char delim) +{ + char *q; + + q = buf; + while (*p != '\0' && *p != delim) { + if (q && (q - buf) < buf_size - 1) + *q++ = *p; + p++; + } + if (q) + *q = '\0'; + + return p; +} + +/* + * Extracts the value of an option from the parameter string p (p points at the + * first byte of the option value) + * + * This function is comparable to get_opt_name with the difference that the + * delimiter is fixed to be comma which starts a new option. To specify an + * option value that contains commas, double each comma. + */ +const char *get_opt_value(char *buf, int buf_size, const char *p) +{ + char *q; + + q = buf; + while (*p != '\0') { + if (*p == ',') { + if (*(p + 1) != ',') + break; + p++; + } + if (q && (q - buf) < buf_size - 1) + *q++ = *p; + p++; + } + if (q) + *q = '\0'; + + return p; +} + +int get_next_param_value(char *buf, int buf_size, + const char *tag, const char **pstr) +{ + const char *p; + char option[128]; + + p = *pstr; + for(;;) { + p = get_opt_name(option, sizeof(option), p, '='); + if (*p != '=') + break; + p++; + if (!strcmp(tag, option)) { + *pstr = get_opt_value(buf, buf_size, p); + if (**pstr == ',') { + (*pstr)++; + } + return strlen(buf); + } else { + p = get_opt_value(NULL, 0, p); + } + if (*p != ',') + break; + p++; + } + return 0; +} + +int get_param_value(char *buf, int buf_size, + const char *tag, const char *str) +{ + return get_next_param_value(buf, buf_size, tag, &str); +} + +static void parse_option_bool(const char *name, const char *value, bool *ret, + Error **errp) +{ + if (value != NULL) { + if (!strcmp(value, "on")) { + *ret = 1; + } else if (!strcmp(value, "off")) { + *ret = 0; + } else { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, + name, "'on' or 'off'"); + } + } else { + *ret = 1; + } +} + +static void parse_option_number(const char *name, const char *value, + uint64_t *ret, Error **errp) +{ + char *postfix; + uint64_t number; + + if (value != NULL) { + number = strtoull(value, &postfix, 0); + if (*postfix != '\0') { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name, "a number"); + return; + } + *ret = number; + } else { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name, "a number"); + } +} + +static const QemuOptDesc *find_desc_by_name(const QemuOptDesc *desc, + const char *name) +{ + int i; + + for (i = 0; desc[i].name != NULL; i++) { + if (strcmp(desc[i].name, name) == 0) { + return &desc[i]; + } + } + + return NULL; +} + +void parse_option_size(const char *name, const char *value, + uint64_t *ret, Error **errp) +{ + char *postfix; + double sizef; + + if (value != NULL) { + sizef = strtod(value, &postfix); + if (sizef < 0 || sizef > UINT64_MAX) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name, + "a non-negative number below 2^64"); + return; + } + switch (*postfix) { + case 'T': + sizef *= 1024; + /* fall through */ + case 'G': + sizef *= 1024; + /* fall through */ + case 'M': + sizef *= 1024; + /* fall through */ + case 'K': + case 'k': + sizef *= 1024; + /* fall through */ + case 'b': + case '\0': + *ret = (uint64_t) sizef; + break; + default: + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name, "a size"); + error_append_hint(errp, "You may use k, M, G or T suffixes for " + "kilobytes, megabytes, gigabytes and terabytes."); + return; + } + } else { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, name, "a size"); + } +} + +bool has_help_option(const char *param) +{ + size_t buflen = strlen(param) + 1; + char *buf = g_malloc(buflen); + const char *p = param; + bool result = false; + + while (*p) { + p = get_opt_value(buf, buflen, p); + if (*p) { + p++; + } + + if (is_help_option(buf)) { + result = true; + goto out; + } + } + +out: + g_free(buf); + return result; +} + +bool is_valid_option_list(const char *param) +{ + size_t buflen = strlen(param) + 1; + char *buf = g_malloc(buflen); + const char *p = param; + bool result = true; + + while (*p) { + p = get_opt_value(buf, buflen, p); + if (*p && !*++p) { + result = false; + goto out; + } + + if (!*buf || *buf == ',') { + result = false; + goto out; + } + } + +out: + g_free(buf); + return result; +} + +void qemu_opts_print_help(QemuOptsList *list) +{ + QemuOptDesc *desc; + + assert(list); + desc = list->desc; + printf("Supported options:\n"); + while (desc && desc->name) { + printf("%-16s %s\n", desc->name, + desc->help ? desc->help : "No description available"); + desc++; + } +} +/* ------------------------------------------------------------------ */ + +QemuOpt *qemu_opt_find(QemuOpts *opts, const char *name) +{ + QemuOpt *opt; + + QTAILQ_FOREACH_REVERSE(opt, &opts->head, QemuOptHead, next) { + if (strcmp(opt->name, name) != 0) + continue; + return opt; + } + return NULL; +} + +static void qemu_opt_del(QemuOpt *opt) +{ + QTAILQ_REMOVE(&opt->opts->head, opt, next); + g_free(opt->name); + g_free(opt->str); + g_free(opt); +} + +/* qemu_opt_set allows many settings for the same option. + * This function deletes all settings for an option. + */ +static void qemu_opt_del_all(QemuOpts *opts, const char *name) +{ + QemuOpt *opt, *next_opt; + + QTAILQ_FOREACH_SAFE(opt, &opts->head, next, next_opt) { + if (!strcmp(opt->name, name)) { + qemu_opt_del(opt); + } + } +} + +const char *qemu_opt_get(QemuOpts *opts, const char *name) +{ + QemuOpt *opt; + + if (opts == NULL) { + return NULL; + } + + opt = qemu_opt_find(opts, name); + if (!opt) { + const QemuOptDesc *desc = find_desc_by_name(opts->list->desc, name); + if (desc && desc->def_value_str) { + return desc->def_value_str; + } + } + return opt ? opt->str : NULL; +} + +/* Get a known option (or its default) and remove it from the list + * all in one action. Return a malloced string of the option value. + * Result must be freed by caller with g_free(). + */ +char *qemu_opt_get_del(QemuOpts *opts, const char *name) +{ + QemuOpt *opt; + const QemuOptDesc *desc; + char *str = NULL; + + if (opts == NULL) { + return NULL; + } + + opt = qemu_opt_find(opts, name); + if (!opt) { + desc = find_desc_by_name(opts->list->desc, name); + if (desc && desc->def_value_str) { + str = g_strdup(desc->def_value_str); + } + return str; + } + str = opt->str; + opt->str = NULL; + qemu_opt_del_all(opts, name); + return str; +} + +bool qemu_opt_has_help_opt(QemuOpts *opts) +{ + QemuOpt *opt; + + QTAILQ_FOREACH_REVERSE(opt, &opts->head, QemuOptHead, next) { + if (is_help_option(opt->name)) { + return true; + } + } + return false; +} + +static bool qemu_opt_get_bool_helper(QemuOpts *opts, const char *name, + bool defval, bool del) +{ + QemuOpt *opt; + bool ret = defval; + + if (opts == NULL) { + return ret; + } + + opt = qemu_opt_find(opts, name); + if (opt == NULL) { + const QemuOptDesc *desc = find_desc_by_name(opts->list->desc, name); + if (desc && desc->def_value_str) { + parse_option_bool(name, desc->def_value_str, &ret, &error_abort); + } + return ret; + } + assert(opt->desc && opt->desc->type == QEMU_OPT_BOOL); + ret = opt->value.boolean; + if (del) { + qemu_opt_del_all(opts, name); + } + return ret; +} + +bool qemu_opt_get_bool(QemuOpts *opts, const char *name, bool defval) +{ + return qemu_opt_get_bool_helper(opts, name, defval, false); +} + +bool qemu_opt_get_bool_del(QemuOpts *opts, const char *name, bool defval) +{ + return qemu_opt_get_bool_helper(opts, name, defval, true); +} + +static uint64_t qemu_opt_get_number_helper(QemuOpts *opts, const char *name, + uint64_t defval, bool del) +{ + QemuOpt *opt; + uint64_t ret = defval; + + if (opts == NULL) { + return ret; + } + + opt = qemu_opt_find(opts, name); + if (opt == NULL) { + const QemuOptDesc *desc = find_desc_by_name(opts->list->desc, name); + if (desc && desc->def_value_str) { + parse_option_number(name, desc->def_value_str, &ret, &error_abort); + } + return ret; + } + assert(opt->desc && opt->desc->type == QEMU_OPT_NUMBER); + ret = opt->value.uint; + if (del) { + qemu_opt_del_all(opts, name); + } + return ret; +} + +uint64_t qemu_opt_get_number(QemuOpts *opts, const char *name, uint64_t defval) +{ + return qemu_opt_get_number_helper(opts, name, defval, false); +} + +uint64_t qemu_opt_get_number_del(QemuOpts *opts, const char *name, + uint64_t defval) +{ + return qemu_opt_get_number_helper(opts, name, defval, true); +} + +static uint64_t qemu_opt_get_size_helper(QemuOpts *opts, const char *name, + uint64_t defval, bool del) +{ + QemuOpt *opt; + uint64_t ret = defval; + + if (opts == NULL) { + return ret; + } + + opt = qemu_opt_find(opts, name); + if (opt == NULL) { + const QemuOptDesc *desc = find_desc_by_name(opts->list->desc, name); + if (desc && desc->def_value_str) { + parse_option_size(name, desc->def_value_str, &ret, &error_abort); + } + return ret; + } + assert(opt->desc && opt->desc->type == QEMU_OPT_SIZE); + ret = opt->value.uint; + if (del) { + qemu_opt_del_all(opts, name); + } + return ret; +} + +uint64_t qemu_opt_get_size(QemuOpts *opts, const char *name, uint64_t defval) +{ + return qemu_opt_get_size_helper(opts, name, defval, false); +} + +uint64_t qemu_opt_get_size_del(QemuOpts *opts, const char *name, + uint64_t defval) +{ + return qemu_opt_get_size_helper(opts, name, defval, true); +} + +static void qemu_opt_parse(QemuOpt *opt, Error **errp) +{ + if (opt->desc == NULL) + return; + + switch (opt->desc->type) { + case QEMU_OPT_STRING: + /* nothing */ + return; + case QEMU_OPT_BOOL: + parse_option_bool(opt->name, opt->str, &opt->value.boolean, errp); + break; + case QEMU_OPT_NUMBER: + parse_option_number(opt->name, opt->str, &opt->value.uint, errp); + break; + case QEMU_OPT_SIZE: + parse_option_size(opt->name, opt->str, &opt->value.uint, errp); + break; + default: + abort(); + } +} + +static bool opts_accepts_any(const QemuOpts *opts) +{ + return opts->list->desc[0].name == NULL; +} + +int qemu_opt_unset(QemuOpts *opts, const char *name) +{ + QemuOpt *opt = qemu_opt_find(opts, name); + + assert(opts_accepts_any(opts)); + + if (opt == NULL) { + return -1; + } else { + qemu_opt_del(opt); + return 0; + } +} + +static void opt_set(QemuOpts *opts, const char *name, const char *value, + bool prepend, Error **errp) +{ + QemuOpt *opt; + const QemuOptDesc *desc; + Error *local_err = NULL; + + desc = find_desc_by_name(opts->list->desc, name); + if (!desc && !opts_accepts_any(opts)) { + error_setg(errp, QERR_INVALID_PARAMETER, name); + return; + } + + opt = g_malloc0(sizeof(*opt)); + opt->name = g_strdup(name); + opt->opts = opts; + if (prepend) { + QTAILQ_INSERT_HEAD(&opts->head, opt, next); + } else { + QTAILQ_INSERT_TAIL(&opts->head, opt, next); + } + opt->desc = desc; + opt->str = g_strdup(value); + qemu_opt_parse(opt, &local_err); + if (local_err) { + error_propagate(errp, local_err); + qemu_opt_del(opt); + } +} + +void qemu_opt_set(QemuOpts *opts, const char *name, const char *value, + Error **errp) +{ + opt_set(opts, name, value, false, errp); +} + +void qemu_opt_set_bool(QemuOpts *opts, const char *name, bool val, + Error **errp) +{ + QemuOpt *opt; + const QemuOptDesc *desc = opts->list->desc; + + opt = g_malloc0(sizeof(*opt)); + opt->desc = find_desc_by_name(desc, name); + if (!opt->desc && !opts_accepts_any(opts)) { + error_setg(errp, QERR_INVALID_PARAMETER, name); + g_free(opt); + return; + } + + opt->name = g_strdup(name); + opt->opts = opts; + opt->value.boolean = !!val; + opt->str = g_strdup(val ? "on" : "off"); + QTAILQ_INSERT_TAIL(&opts->head, opt, next); +} + +void qemu_opt_set_number(QemuOpts *opts, const char *name, int64_t val, + Error **errp) +{ + QemuOpt *opt; + const QemuOptDesc *desc = opts->list->desc; + + opt = g_malloc0(sizeof(*opt)); + opt->desc = find_desc_by_name(desc, name); + if (!opt->desc && !opts_accepts_any(opts)) { + error_setg(errp, QERR_INVALID_PARAMETER, name); + g_free(opt); + return; + } + + opt->name = g_strdup(name); + opt->opts = opts; + opt->value.uint = val; + opt->str = g_strdup_printf("%" PRId64, val); + QTAILQ_INSERT_TAIL(&opts->head, opt, next); +} + +/** + * For each member of @opts, call @func(@opaque, name, value, @errp). + * @func() may store an Error through @errp, but must return non-zero then. + * When @func() returns non-zero, break the loop and return that value. + * Return zero when the loop completes. + */ +int qemu_opt_foreach(QemuOpts *opts, qemu_opt_loopfunc func, void *opaque, + Error **errp) +{ + QemuOpt *opt; + int rc; + + QTAILQ_FOREACH(opt, &opts->head, next) { + rc = func(opaque, opt->name, opt->str, errp); + if (rc) { + return rc; + } + assert(!errp || !*errp); + } + return 0; +} + +QemuOpts *qemu_opts_find(QemuOptsList *list, const char *id) +{ + QemuOpts *opts; + + QTAILQ_FOREACH(opts, &list->head, next) { + if (!opts->id && !id) { + return opts; + } + if (opts->id && id && !strcmp(opts->id, id)) { + return opts; + } + } + return NULL; +} + +QemuOpts *qemu_opts_create(QemuOptsList *list, const char *id, + int fail_if_exists, Error **errp) +{ + QemuOpts *opts = NULL; + + if (id) { + if (!id_wellformed(id)) { + error_setg(errp, QERR_INVALID_PARAMETER_VALUE, "id", + "an identifier"); + error_append_hint(errp, "Identifiers consist of letters, digits, " + "'-', '.', '_', starting with a letter."); + return NULL; + } + opts = qemu_opts_find(list, id); + if (opts != NULL) { + if (fail_if_exists && !list->merge_lists) { + error_setg(errp, "Duplicate ID '%s' for %s", id, list->name); + return NULL; + } else { + return opts; + } + } + } else if (list->merge_lists) { + opts = qemu_opts_find(list, NULL); + if (opts) { + return opts; + } + } + opts = g_malloc0(sizeof(*opts)); + opts->id = g_strdup(id); + opts->list = list; + loc_save(&opts->loc); + QTAILQ_INIT(&opts->head); + QTAILQ_INSERT_TAIL(&list->head, opts, next); + return opts; +} + +void qemu_opts_reset(QemuOptsList *list) +{ + QemuOpts *opts, *next_opts; + + QTAILQ_FOREACH_SAFE(opts, &list->head, next, next_opts) { + qemu_opts_del(opts); + } +} + +void qemu_opts_loc_restore(QemuOpts *opts) +{ + loc_restore(&opts->loc); +} + +void qemu_opts_set(QemuOptsList *list, const char *id, + const char *name, const char *value, Error **errp) +{ + QemuOpts *opts; + Error *local_err = NULL; + + opts = qemu_opts_create(list, id, 1, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + qemu_opt_set(opts, name, value, errp); +} + +const char *qemu_opts_id(QemuOpts *opts) +{ + return opts->id; +} + +/* The id string will be g_free()d by qemu_opts_del */ +void qemu_opts_set_id(QemuOpts *opts, char *id) +{ + opts->id = id; +} + +void qemu_opts_del(QemuOpts *opts) +{ + QemuOpt *opt; + + if (opts == NULL) { + return; + } + + for (;;) { + opt = QTAILQ_FIRST(&opts->head); + if (opt == NULL) + break; + qemu_opt_del(opt); + } + QTAILQ_REMOVE(&opts->list->head, opts, next); + g_free(opts->id); + g_free(opts); +} + +/* print value, escaping any commas in value */ +static void escaped_print(const char *value) +{ + const char *ptr; + + for (ptr = value; *ptr; ++ptr) { + if (*ptr == ',') { + putchar(','); + } + putchar(*ptr); + } +} + +void qemu_opts_print(QemuOpts *opts, const char *separator) +{ + QemuOpt *opt; + QemuOptDesc *desc = opts->list->desc; + const char *sep = ""; + + if (opts->id) { + printf("id=%s", opts->id); /* passed id_wellformed -> no commas */ + sep = separator; + } + + if (desc[0].name == NULL) { + QTAILQ_FOREACH(opt, &opts->head, next) { + printf("%s%s=", sep, opt->name); + escaped_print(opt->str); + sep = separator; + } + return; + } + for (; desc && desc->name; desc++) { + const char *value; + QemuOpt *opt = qemu_opt_find(opts, desc->name); + + value = opt ? opt->str : desc->def_value_str; + if (!value) { + continue; + } + if (desc->type == QEMU_OPT_STRING) { + printf("%s%s=", sep, desc->name); + escaped_print(value); + } else if ((desc->type == QEMU_OPT_SIZE || + desc->type == QEMU_OPT_NUMBER) && opt) { + printf("%s%s=%" PRId64, sep, desc->name, opt->value.uint); + } else { + printf("%s%s=%s", sep, desc->name, value); + } + sep = separator; + } +} + +static void opts_do_parse(QemuOpts *opts, const char *params, + const char *firstname, bool prepend, Error **errp) +{ + char option[128], value[1024]; + const char *p,*pe,*pc; + Error *local_err = NULL; + + for (p = params; *p != '\0'; p++) { + pe = strchr(p, '='); + pc = strchr(p, ','); + if (!pe || (pc && pc < pe)) { + /* found "foo,more" */ + if (p == params && firstname) { + /* implicitly named first option */ + pstrcpy(option, sizeof(option), firstname); + p = get_opt_value(value, sizeof(value), p); + } else { + /* option without value, probably a flag */ + p = get_opt_name(option, sizeof(option), p, ','); + if (strncmp(option, "no", 2) == 0) { + memmove(option, option+2, strlen(option+2)+1); + pstrcpy(value, sizeof(value), "off"); + } else { + pstrcpy(value, sizeof(value), "on"); + } + } + } else { + /* found "foo=bar,more" */ + p = get_opt_name(option, sizeof(option), p, '='); + if (*p != '=') { + break; + } + p++; + p = get_opt_value(value, sizeof(value), p); + } + if (strcmp(option, "id") != 0) { + /* store and parse */ + opt_set(opts, option, value, prepend, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + } + if (*p != ',') { + break; + } + } +} + +/** + * Store options parsed from @params into @opts. + * If @firstname is non-null, the first key=value in @params may omit + * key=, and is treated as if key was @firstname. + * On error, store an error object through @errp if non-null. + */ +void qemu_opts_do_parse(QemuOpts *opts, const char *params, + const char *firstname, Error **errp) +{ + opts_do_parse(opts, params, firstname, false, errp); +} + +static QemuOpts *opts_parse(QemuOptsList *list, const char *params, + bool permit_abbrev, bool defaults, Error **errp) +{ + const char *firstname; + char value[1024], *id = NULL; + const char *p; + QemuOpts *opts; + Error *local_err = NULL; + + assert(!permit_abbrev || list->implied_opt_name); + firstname = permit_abbrev ? list->implied_opt_name : NULL; + + if (strncmp(params, "id=", 3) == 0) { + get_opt_value(value, sizeof(value), params+3); + id = value; + } else if ((p = strstr(params, ",id=")) != NULL) { + get_opt_value(value, sizeof(value), p+4); + id = value; + } + + /* + * This code doesn't work for defaults && !list->merge_lists: when + * params has no id=, and list has an element with !opts->id, it + * appends a new element instead of returning the existing opts. + * However, we got no use for this case. Guard against possible + * (if unlikely) future misuse: + */ + assert(!defaults || list->merge_lists); + opts = qemu_opts_create(list, id, !defaults, &local_err); + if (opts == NULL) { + error_propagate(errp, local_err); + return NULL; + } + + opts_do_parse(opts, params, firstname, defaults, &local_err); + if (local_err) { + error_propagate(errp, local_err); + qemu_opts_del(opts); + return NULL; + } + + return opts; +} + +/** + * Create a QemuOpts in @list and with options parsed from @params. + * If @permit_abbrev, the first key=value in @params may omit key=, + * and is treated as if key was @list->implied_opt_name. + * On error, store an error object through @errp if non-null. + * Return the new QemuOpts on success, null pointer on error. + */ +QemuOpts *qemu_opts_parse(QemuOptsList *list, const char *params, + bool permit_abbrev, Error **errp) +{ + return opts_parse(list, params, permit_abbrev, false, errp); +} + +/** + * Create a QemuOpts in @list and with options parsed from @params. + * If @permit_abbrev, the first key=value in @params may omit key=, + * and is treated as if key was @list->implied_opt_name. + * Report errors with error_report_err(). This is inappropriate in + * QMP context. Do not use this function there! + * Return the new QemuOpts on success, null pointer on error. + */ +QemuOpts *qemu_opts_parse_noisily(QemuOptsList *list, const char *params, + bool permit_abbrev) +{ + Error *err = NULL; + QemuOpts *opts; + + opts = opts_parse(list, params, permit_abbrev, false, &err); + if (err) { + error_report_err(err); + } + return opts; +} + +void qemu_opts_set_defaults(QemuOptsList *list, const char *params, + int permit_abbrev) +{ + QemuOpts *opts; + + opts = opts_parse(list, params, permit_abbrev, true, NULL); + assert(opts); +} + +typedef struct OptsFromQDictState { + QemuOpts *opts; + Error **errp; +} OptsFromQDictState; + +static void qemu_opts_from_qdict_1(const char *key, QObject *obj, void *opaque) +{ + OptsFromQDictState *state = opaque; + char buf[32]; + const char *value; + int n; + + if (!strcmp(key, "id") || *state->errp) { + return; + } + + switch (qobject_type(obj)) { + case QTYPE_QSTRING: + value = qstring_get_str(qobject_to_qstring(obj)); + break; + case QTYPE_QINT: + n = snprintf(buf, sizeof(buf), "%" PRId64, + qint_get_int(qobject_to_qint(obj))); + assert(n < sizeof(buf)); + value = buf; + break; + case QTYPE_QFLOAT: + n = snprintf(buf, sizeof(buf), "%.17g", + qfloat_get_double(qobject_to_qfloat(obj))); + assert(n < sizeof(buf)); + value = buf; + break; + case QTYPE_QBOOL: + pstrcpy(buf, sizeof(buf), + qbool_get_bool(qobject_to_qbool(obj)) ? "on" : "off"); + value = buf; + break; + default: + return; + } + + qemu_opt_set(state->opts, key, value, state->errp); +} + +/* + * Create QemuOpts from a QDict. + * Use value of key "id" as ID if it exists and is a QString. + * Only QStrings, QInts, QFloats and QBools are copied. Entries with + * other types are silently ignored. + */ +QemuOpts *qemu_opts_from_qdict(QemuOptsList *list, const QDict *qdict, + Error **errp) +{ + OptsFromQDictState state; + Error *local_err = NULL; + QemuOpts *opts; + + opts = qemu_opts_create(list, qdict_get_try_str(qdict, "id"), 1, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + return NULL; + } + + assert(opts != NULL); + + state.errp = &local_err; + state.opts = opts; + qdict_iter(qdict, qemu_opts_from_qdict_1, &state); + if (local_err) { + error_propagate(errp, local_err); + qemu_opts_del(opts); + return NULL; + } + + return opts; +} + +/* + * Adds all QDict entries to the QemuOpts that can be added and removes them + * from the QDict. When this function returns, the QDict contains only those + * entries that couldn't be added to the QemuOpts. + */ +void qemu_opts_absorb_qdict(QemuOpts *opts, QDict *qdict, Error **errp) +{ + const QDictEntry *entry, *next; + + entry = qdict_first(qdict); + + while (entry != NULL) { + Error *local_err = NULL; + OptsFromQDictState state = { + .errp = &local_err, + .opts = opts, + }; + + next = qdict_next(qdict, entry); + + if (find_desc_by_name(opts->list->desc, entry->key)) { + qemu_opts_from_qdict_1(entry->key, entry->value, &state); + if (local_err) { + error_propagate(errp, local_err); + return; + } else { + qdict_del(qdict, entry->key); + } + } + + entry = next; + } +} + +/* + * Convert from QemuOpts to QDict. + * The QDict values are of type QString. + * TODO We'll want to use types appropriate for opt->desc->type, but + * this is enough for now. + */ +QDict *qemu_opts_to_qdict(QemuOpts *opts, QDict *qdict) +{ + QemuOpt *opt; + QObject *val; + + if (!qdict) { + qdict = qdict_new(); + } + if (opts->id) { + qdict_put(qdict, "id", qstring_from_str(opts->id)); + } + QTAILQ_FOREACH(opt, &opts->head, next) { + val = QOBJECT(qstring_from_str(opt->str)); + qdict_put_obj(qdict, opt->name, val); + } + return qdict; +} + +/* Validate parsed opts against descriptions where no + * descriptions were provided in the QemuOptsList. + */ +void qemu_opts_validate(QemuOpts *opts, const QemuOptDesc *desc, Error **errp) +{ + QemuOpt *opt; + Error *local_err = NULL; + + assert(opts_accepts_any(opts)); + + QTAILQ_FOREACH(opt, &opts->head, next) { + opt->desc = find_desc_by_name(desc, opt->name); + if (!opt->desc) { + error_setg(errp, QERR_INVALID_PARAMETER, opt->name); + return; + } + + qemu_opt_parse(opt, &local_err); + if (local_err) { + error_propagate(errp, local_err); + return; + } + } +} + +/** + * For each member of @list, call @func(@opaque, member, @errp). + * Call it with the current location temporarily set to the member's. + * @func() may store an Error through @errp, but must return non-zero then. + * When @func() returns non-zero, break the loop and return that value. + * Return zero when the loop completes. + */ +int qemu_opts_foreach(QemuOptsList *list, qemu_opts_loopfunc func, + void *opaque, Error **errp) +{ + Location loc; + QemuOpts *opts; + int rc; + + loc_push_none(&loc); + QTAILQ_FOREACH(opts, &list->head, next) { + loc_restore(&opts->loc); + rc = func(opaque, opts, errp); + if (rc) { + return rc; + } + assert(!errp || !*errp); + } + loc_pop(&loc); + return 0; +} + +static size_t count_opts_list(QemuOptsList *list) +{ + QemuOptDesc *desc = NULL; + size_t num_opts = 0; + + if (!list) { + return 0; + } + + desc = list->desc; + while (desc && desc->name) { + num_opts++; + desc++; + } + + return num_opts; +} + +void qemu_opts_free(QemuOptsList *list) +{ + g_free(list); +} + +/* Realloc dst option list and append options from an option list (list) + * to it. dst could be NULL or a malloced list. + * The lifetime of dst must be shorter than the input list because the + * QemuOptDesc->name, ->help, and ->def_value_str strings are shared. + */ +QemuOptsList *qemu_opts_append(QemuOptsList *dst, + QemuOptsList *list) +{ + size_t num_opts, num_dst_opts; + QemuOptDesc *desc; + bool need_init = false; + bool need_head_update; + + if (!list) { + return dst; + } + + /* If dst is NULL, after realloc, some area of dst should be initialized + * before adding options to it. + */ + if (!dst) { + need_init = true; + need_head_update = true; + } else { + /* Moreover, even if dst is not NULL, the realloc may move it to a + * different address in which case we may get a stale tail pointer + * in dst->head. */ + need_head_update = QTAILQ_EMPTY(&dst->head); + } + + num_opts = count_opts_list(dst); + num_dst_opts = num_opts; + num_opts += count_opts_list(list); + dst = g_realloc(dst, sizeof(QemuOptsList) + + (num_opts + 1) * sizeof(QemuOptDesc)); + if (need_init) { + dst->name = NULL; + dst->implied_opt_name = NULL; + dst->merge_lists = false; + } + if (need_head_update) { + QTAILQ_INIT(&dst->head); + } + dst->desc[num_dst_opts].name = NULL; + + /* append list->desc to dst->desc */ + if (list) { + desc = list->desc; + while (desc && desc->name) { + if (find_desc_by_name(dst->desc, desc->name) == NULL) { + dst->desc[num_dst_opts++] = *desc; + dst->desc[num_dst_opts].name = NULL; + } + desc++; + } + } + + return dst; +} diff --git a/src/util/qemu-progress.c b/src/util/qemu-progress.c new file mode 100644 index 0000000..4ee5cd0 --- /dev/null +++ b/src/util/qemu-progress.c @@ -0,0 +1,159 @@ +/* + * QEMU progress printing utility functions + * + * Copyright (C) 2011 Jes Sorensen <Jes.Sorensen@redhat.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu-common.h" +#include "qemu/osdep.h" +#include <stdio.h> + +struct progress_state { + float current; + float last_print; + float min_skip; + void (*print)(void); + void (*end)(void); +}; + +static struct progress_state state; +static volatile sig_atomic_t print_pending; + +/* + * Simple progress print function. + * @percent relative percent of current operation + * @max percent of total operation + */ +static void progress_simple_print(void) +{ + printf(" (%3.2f/100%%)\r", state.current); + fflush(stdout); +} + +static void progress_simple_end(void) +{ + printf("\n"); +} + +static void progress_simple_init(void) +{ + state.print = progress_simple_print; + state.end = progress_simple_end; +} + +#ifdef CONFIG_POSIX +static void sigusr_print(int signal) +{ + print_pending = 1; +} +#endif + +static void progress_dummy_print(void) +{ + if (print_pending) { + fprintf(stderr, " (%3.2f/100%%)\n", state.current); + print_pending = 0; + } +} + +static void progress_dummy_end(void) +{ +} + +static void progress_dummy_init(void) +{ +#ifdef CONFIG_POSIX + struct sigaction action; + sigset_t set; + + memset(&action, 0, sizeof(action)); + sigfillset(&action.sa_mask); + action.sa_handler = sigusr_print; + action.sa_flags = 0; + sigaction(SIGUSR1, &action, NULL); + + /* + * SIGUSR1 is SIG_IPI and gets blocked in qemu_init_main_loop(). In the + * tools that use the progress report SIGUSR1 isn't used in this meaning + * and instead should print the progress, so reenable it. + */ + sigemptyset(&set); + sigaddset(&set, SIGUSR1); + pthread_sigmask(SIG_UNBLOCK, &set, NULL); +#endif + + state.print = progress_dummy_print; + state.end = progress_dummy_end; +} + +/* + * Initialize progress reporting. + * If @enabled is false, actual reporting is suppressed. The user can + * still trigger a report by sending a SIGUSR1. + * Reports are also suppressed unless we've had at least @min_skip + * percent progress since the last report. + */ +void qemu_progress_init(int enabled, float min_skip) +{ + state.min_skip = min_skip; + if (enabled) { + progress_simple_init(); + } else { + progress_dummy_init(); + } +} + +void qemu_progress_end(void) +{ + state.end(); +} + +/* + * Report progress. + * @delta is how much progress we made. + * If @max is zero, @delta is an absolut value of the total job done. + * Else, @delta is a progress delta since the last call, as a fraction + * of @max. I.e. the delta is @delta * @max / 100. This allows + * relative accounting of functions which may be a different fraction of + * the full job, depending on the context they are called in. I.e. + * a function might be considered 40% of the full job if used from + * bdrv_img_create() but only 20% if called from img_convert(). + */ +void qemu_progress_print(float delta, int max) +{ + float current; + + if (max == 0) { + current = delta; + } else { + current = state.current + delta / 100 * max; + } + if (current > 100) { + current = 100; + } + state.current = current; + + if (current > (state.last_print + state.min_skip) || + (current == 100) || (current == 0)) { + state.last_print = state.current; + state.print(); + } +} diff --git a/src/util/qemu-sockets.c b/src/util/qemu-sockets.c new file mode 100644 index 0000000..5a31d16 --- /dev/null +++ b/src/util/qemu-sockets.c @@ -0,0 +1,1167 @@ +/* + * inet and unix socket functions for qemu + * + * (c) 2008 Gerd Hoffmann <kraxel@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; under version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ctype.h> +#include <errno.h> +#include <unistd.h> + +#include "monitor/monitor.h" +#include "qemu/sockets.h" +#include "qemu/main-loop.h" +#include "qapi/qmp-input-visitor.h" +#include "qapi/qmp-output-visitor.h" +#include "qapi-visit.h" + +#ifndef AI_ADDRCONFIG +# define AI_ADDRCONFIG 0 +#endif +#ifndef AI_V4MAPPED +# define AI_V4MAPPED 0 +#endif + +/* used temporarily until all users are converted to QemuOpts */ +QemuOptsList socket_optslist = { + .name = "socket", + .head = QTAILQ_HEAD_INITIALIZER(socket_optslist.head), + .desc = { + { + .name = "path", + .type = QEMU_OPT_STRING, + },{ + .name = "host", + .type = QEMU_OPT_STRING, + },{ + .name = "port", + .type = QEMU_OPT_STRING, + },{ + .name = "localaddr", + .type = QEMU_OPT_STRING, + },{ + .name = "localport", + .type = QEMU_OPT_STRING, + },{ + .name = "to", + .type = QEMU_OPT_NUMBER, + },{ + .name = "ipv4", + .type = QEMU_OPT_BOOL, + },{ + .name = "ipv6", + .type = QEMU_OPT_BOOL, + }, + { /* end if list */ } + }, +}; + +static int inet_getport(struct addrinfo *e) +{ + struct sockaddr_in *i4; + struct sockaddr_in6 *i6; + + switch (e->ai_family) { + case PF_INET6: + i6 = (void*)e->ai_addr; + return ntohs(i6->sin6_port); + case PF_INET: + i4 = (void*)e->ai_addr; + return ntohs(i4->sin_port); + default: + return 0; + } +} + +static void inet_setport(struct addrinfo *e, int port) +{ + struct sockaddr_in *i4; + struct sockaddr_in6 *i6; + + switch (e->ai_family) { + case PF_INET6: + i6 = (void*)e->ai_addr; + i6->sin6_port = htons(port); + break; + case PF_INET: + i4 = (void*)e->ai_addr; + i4->sin_port = htons(port); + break; + } +} + +NetworkAddressFamily inet_netfamily(int family) +{ + switch (family) { + case PF_INET6: return NETWORK_ADDRESS_FAMILY_IPV6; + case PF_INET: return NETWORK_ADDRESS_FAMILY_IPV4; + case PF_UNIX: return NETWORK_ADDRESS_FAMILY_UNIX; + } + return NETWORK_ADDRESS_FAMILY_UNKNOWN; +} + +int inet_listen_opts(QemuOpts *opts, int port_offset, Error **errp) +{ + struct addrinfo ai,*res,*e; + const char *addr; + char port[33]; + char uaddr[INET6_ADDRSTRLEN+1]; + char uport[33]; + int slisten, rc, to, port_min, port_max, p; + + memset(&ai,0, sizeof(ai)); + ai.ai_flags = AI_PASSIVE; + ai.ai_family = PF_UNSPEC; + ai.ai_socktype = SOCK_STREAM; + + if ((qemu_opt_get(opts, "host") == NULL)) { + error_setg(errp, "host not specified"); + return -1; + } + if (qemu_opt_get(opts, "port") != NULL) { + pstrcpy(port, sizeof(port), qemu_opt_get(opts, "port")); + } else { + port[0] = '\0'; + } + addr = qemu_opt_get(opts, "host"); + + to = qemu_opt_get_number(opts, "to", 0); + if (qemu_opt_get_bool(opts, "ipv4", 0)) + ai.ai_family = PF_INET; + if (qemu_opt_get_bool(opts, "ipv6", 0)) + ai.ai_family = PF_INET6; + + /* lookup */ + if (port_offset) { + unsigned long long baseport; + if (strlen(port) == 0) { + error_setg(errp, "port not specified"); + return -1; + } + if (parse_uint_full(port, &baseport, 10) < 0) { + error_setg(errp, "can't convert to a number: %s", port); + return -1; + } + if (baseport > 65535 || + baseport + port_offset > 65535) { + error_setg(errp, "port %s out of range", port); + return -1; + } + snprintf(port, sizeof(port), "%d", (int)baseport + port_offset); + } + rc = getaddrinfo(strlen(addr) ? addr : NULL, + strlen(port) ? port : NULL, &ai, &res); + if (rc != 0) { + error_setg(errp, "address resolution failed for %s:%s: %s", addr, port, + gai_strerror(rc)); + return -1; + } + + /* create socket + bind */ + for (e = res; e != NULL; e = e->ai_next) { + getnameinfo((struct sockaddr*)e->ai_addr,e->ai_addrlen, + uaddr,INET6_ADDRSTRLEN,uport,32, + NI_NUMERICHOST | NI_NUMERICSERV); + slisten = qemu_socket(e->ai_family, e->ai_socktype, e->ai_protocol); + if (slisten < 0) { + if (!e->ai_next) { + error_setg_errno(errp, errno, "Failed to create socket"); + } + continue; + } + + socket_set_fast_reuse(slisten); +#ifdef IPV6_V6ONLY + if (e->ai_family == PF_INET6) { + /* listen on both ipv4 and ipv6 */ + const int off = 0; + qemu_setsockopt(slisten, IPPROTO_IPV6, IPV6_V6ONLY, &off, + sizeof(off)); + } +#endif + + port_min = inet_getport(e); + port_max = to ? to + port_offset : port_min; + for (p = port_min; p <= port_max; p++) { + inet_setport(e, p); + if (bind(slisten, e->ai_addr, e->ai_addrlen) == 0) { + goto listen; + } + if (p == port_max) { + if (!e->ai_next) { + error_setg_errno(errp, errno, "Failed to bind socket"); + } + } + } + closesocket(slisten); + } + freeaddrinfo(res); + return -1; + +listen: + if (listen(slisten,1) != 0) { + error_setg_errno(errp, errno, "Failed to listen on socket"); + closesocket(slisten); + freeaddrinfo(res); + return -1; + } + qemu_opt_set(opts, "host", uaddr, &error_abort); + qemu_opt_set_number(opts, "port", inet_getport(e) - port_offset, + &error_abort); + qemu_opt_set_bool(opts, "ipv6", e->ai_family == PF_INET6, + &error_abort); + qemu_opt_set_bool(opts, "ipv4", e->ai_family != PF_INET6, + &error_abort); + freeaddrinfo(res); + return slisten; +} + +#ifdef _WIN32 +#define QEMU_SOCKET_RC_INPROGRESS(rc) \ + ((rc) == -EINPROGRESS || (rc) == -EWOULDBLOCK || (rc) == -WSAEALREADY) +#else +#define QEMU_SOCKET_RC_INPROGRESS(rc) \ + ((rc) == -EINPROGRESS) +#endif + +/* Struct to store connect state for non blocking connect */ +typedef struct ConnectState { + int fd; + struct addrinfo *addr_list; + struct addrinfo *current_addr; + NonBlockingConnectHandler *callback; + void *opaque; +} ConnectState; + +static int inet_connect_addr(struct addrinfo *addr, bool *in_progress, + ConnectState *connect_state, Error **errp); + +static void wait_for_connect(void *opaque) +{ + ConnectState *s = opaque; + int val = 0, rc = 0; + socklen_t valsize = sizeof(val); + bool in_progress; + Error *err = NULL; + + qemu_set_fd_handler(s->fd, NULL, NULL, NULL); + + do { + rc = qemu_getsockopt(s->fd, SOL_SOCKET, SO_ERROR, &val, &valsize); + } while (rc == -1 && socket_error() == EINTR); + + /* update rc to contain error */ + if (!rc && val) { + rc = -1; + errno = val; + } + + /* connect error */ + if (rc < 0) { + error_setg_errno(&err, errno, "Error connecting to socket"); + closesocket(s->fd); + s->fd = rc; + } + + /* try to connect to the next address on the list */ + if (s->current_addr) { + while (s->current_addr->ai_next != NULL && s->fd < 0) { + s->current_addr = s->current_addr->ai_next; + s->fd = inet_connect_addr(s->current_addr, &in_progress, s, NULL); + if (s->fd < 0) { + error_free(err); + err = NULL; + error_setg_errno(&err, errno, "Unable to start socket connect"); + } + /* connect in progress */ + if (in_progress) { + goto out; + } + } + + freeaddrinfo(s->addr_list); + } + + if (s->callback) { + s->callback(s->fd, err, s->opaque); + } + g_free(s); +out: + error_free(err); +} + +static int inet_connect_addr(struct addrinfo *addr, bool *in_progress, + ConnectState *connect_state, Error **errp) +{ + int sock, rc; + + *in_progress = false; + + sock = qemu_socket(addr->ai_family, addr->ai_socktype, addr->ai_protocol); + if (sock < 0) { + error_setg_errno(errp, errno, "Failed to create socket"); + return -1; + } + socket_set_fast_reuse(sock); + if (connect_state != NULL) { + qemu_set_nonblock(sock); + } + /* connect to peer */ + do { + rc = 0; + if (connect(sock, addr->ai_addr, addr->ai_addrlen) < 0) { + rc = -socket_error(); + } + } while (rc == -EINTR); + + if (connect_state != NULL && QEMU_SOCKET_RC_INPROGRESS(rc)) { + connect_state->fd = sock; + qemu_set_fd_handler(sock, NULL, wait_for_connect, connect_state); + *in_progress = true; + } else if (rc < 0) { + error_setg_errno(errp, errno, "Failed to connect socket"); + closesocket(sock); + return -1; + } + return sock; +} + +static struct addrinfo *inet_parse_connect_opts(QemuOpts *opts, Error **errp) +{ + struct addrinfo ai, *res; + int rc; + const char *addr; + const char *port; + + memset(&ai, 0, sizeof(ai)); + + ai.ai_flags = AI_CANONNAME | AI_V4MAPPED | AI_ADDRCONFIG; + ai.ai_family = PF_UNSPEC; + ai.ai_socktype = SOCK_STREAM; + + addr = qemu_opt_get(opts, "host"); + port = qemu_opt_get(opts, "port"); + if (addr == NULL || port == NULL) { + error_setg(errp, "host and/or port not specified"); + return NULL; + } + + if (qemu_opt_get_bool(opts, "ipv4", 0)) { + ai.ai_family = PF_INET; + } + if (qemu_opt_get_bool(opts, "ipv6", 0)) { + ai.ai_family = PF_INET6; + } + + /* lookup */ + rc = getaddrinfo(addr, port, &ai, &res); + if (rc != 0) { + error_setg(errp, "address resolution failed for %s:%s: %s", addr, port, + gai_strerror(rc)); + return NULL; + } + return res; +} + +/** + * Create a socket and connect it to an address. + * + * @opts: QEMU options, recognized parameters strings "host" and "port", + * bools "ipv4" and "ipv6". + * @errp: set on error + * @callback: callback function for non-blocking connect + * @opaque: opaque for callback function + * + * Returns: -1 on error, file descriptor on success. + * + * If @callback is non-null, the connect is non-blocking. If this + * function succeeds, callback will be called when the connection + * completes, with the file descriptor on success, or -1 on error. + */ +int inet_connect_opts(QemuOpts *opts, Error **errp, + NonBlockingConnectHandler *callback, void *opaque) +{ + Error *local_err = NULL; + struct addrinfo *res, *e; + int sock = -1; + bool in_progress; + ConnectState *connect_state = NULL; + + res = inet_parse_connect_opts(opts, errp); + if (!res) { + return -1; + } + + if (callback != NULL) { + connect_state = g_malloc0(sizeof(*connect_state)); + connect_state->addr_list = res; + connect_state->callback = callback; + connect_state->opaque = opaque; + } + + for (e = res; e != NULL; e = e->ai_next) { + error_free(local_err); + local_err = NULL; + if (connect_state != NULL) { + connect_state->current_addr = e; + } + sock = inet_connect_addr(e, &in_progress, connect_state, &local_err); + if (sock >= 0) { + break; + } + } + + if (sock < 0) { + error_propagate(errp, local_err); + } else if (in_progress) { + /* wait_for_connect() will do the rest */ + return sock; + } else { + if (callback) { + callback(sock, NULL, opaque); + } + } + g_free(connect_state); + freeaddrinfo(res); + return sock; +} + +int inet_dgram_opts(QemuOpts *opts, Error **errp) +{ + struct addrinfo ai, *peer = NULL, *local = NULL; + const char *addr; + const char *port; + int sock = -1, rc; + + /* lookup peer addr */ + memset(&ai,0, sizeof(ai)); + ai.ai_flags = AI_CANONNAME | AI_V4MAPPED | AI_ADDRCONFIG; + ai.ai_family = PF_UNSPEC; + ai.ai_socktype = SOCK_DGRAM; + + addr = qemu_opt_get(opts, "host"); + port = qemu_opt_get(opts, "port"); + if (addr == NULL || strlen(addr) == 0) { + addr = "localhost"; + } + if (port == NULL || strlen(port) == 0) { + error_setg(errp, "remote port not specified"); + return -1; + } + + if (qemu_opt_get_bool(opts, "ipv4", 0)) + ai.ai_family = PF_INET; + if (qemu_opt_get_bool(opts, "ipv6", 0)) + ai.ai_family = PF_INET6; + + if (0 != (rc = getaddrinfo(addr, port, &ai, &peer))) { + error_setg(errp, "address resolution failed for %s:%s: %s", addr, port, + gai_strerror(rc)); + return -1; + } + + /* lookup local addr */ + memset(&ai,0, sizeof(ai)); + ai.ai_flags = AI_PASSIVE; + ai.ai_family = peer->ai_family; + ai.ai_socktype = SOCK_DGRAM; + + addr = qemu_opt_get(opts, "localaddr"); + port = qemu_opt_get(opts, "localport"); + if (addr == NULL || strlen(addr) == 0) { + addr = NULL; + } + if (!port || strlen(port) == 0) + port = "0"; + + if (0 != (rc = getaddrinfo(addr, port, &ai, &local))) { + error_setg(errp, "address resolution failed for %s:%s: %s", addr, port, + gai_strerror(rc)); + goto err; + } + + /* create socket */ + sock = qemu_socket(peer->ai_family, peer->ai_socktype, peer->ai_protocol); + if (sock < 0) { + error_setg_errno(errp, errno, "Failed to create socket"); + goto err; + } + socket_set_fast_reuse(sock); + + /* bind socket */ + if (bind(sock, local->ai_addr, local->ai_addrlen) < 0) { + error_setg_errno(errp, errno, "Failed to bind socket"); + goto err; + } + + /* connect to peer */ + if (connect(sock,peer->ai_addr,peer->ai_addrlen) < 0) { + error_setg_errno(errp, errno, "Failed to connect socket"); + goto err; + } + + freeaddrinfo(local); + freeaddrinfo(peer); + return sock; + +err: + if (-1 != sock) + closesocket(sock); + if (local) + freeaddrinfo(local); + if (peer) + freeaddrinfo(peer); + return -1; +} + +/* compatibility wrapper */ +InetSocketAddress *inet_parse(const char *str, Error **errp) +{ + InetSocketAddress *addr; + const char *optstr, *h; + char host[65]; + char port[33]; + int to; + int pos; + + addr = g_new0(InetSocketAddress, 1); + + /* parse address */ + if (str[0] == ':') { + /* no host given */ + host[0] = '\0'; + if (1 != sscanf(str, ":%32[^,]%n", port, &pos)) { + error_setg(errp, "error parsing port in address '%s'", str); + goto fail; + } + } else if (str[0] == '[') { + /* IPv6 addr */ + if (2 != sscanf(str, "[%64[^]]]:%32[^,]%n", host, port, &pos)) { + error_setg(errp, "error parsing IPv6 address '%s'", str); + goto fail; + } + addr->ipv6 = addr->has_ipv6 = true; + } else { + /* hostname or IPv4 addr */ + if (2 != sscanf(str, "%64[^:]:%32[^,]%n", host, port, &pos)) { + error_setg(errp, "error parsing address '%s'", str); + goto fail; + } + if (host[strspn(host, "0123456789.")] == '\0') { + addr->ipv4 = addr->has_ipv4 = true; + } + } + + addr->host = g_strdup(host); + addr->port = g_strdup(port); + + /* parse options */ + optstr = str + pos; + h = strstr(optstr, ",to="); + if (h) { + h += 4; + if (sscanf(h, "%d%n", &to, &pos) != 1 || + (h[pos] != '\0' && h[pos] != ',')) { + error_setg(errp, "error parsing to= argument"); + goto fail; + } + addr->has_to = true; + addr->to = to; + } + if (strstr(optstr, ",ipv4")) { + addr->ipv4 = addr->has_ipv4 = true; + } + if (strstr(optstr, ",ipv6")) { + addr->ipv6 = addr->has_ipv6 = true; + } + return addr; + +fail: + qapi_free_InetSocketAddress(addr); + return NULL; +} + +static void inet_addr_to_opts(QemuOpts *opts, const InetSocketAddress *addr) +{ + bool ipv4 = addr->has_ipv4 && addr->ipv4; + bool ipv6 = addr->has_ipv6 && addr->ipv6; + + if (ipv4 || ipv6) { + qemu_opt_set_bool(opts, "ipv4", ipv4, &error_abort); + qemu_opt_set_bool(opts, "ipv6", ipv6, &error_abort); + } else if (addr->has_ipv4 || addr->has_ipv6) { + qemu_opt_set_bool(opts, "ipv4", !addr->has_ipv4, &error_abort); + qemu_opt_set_bool(opts, "ipv6", !addr->has_ipv6, &error_abort); + } + if (addr->has_to) { + qemu_opt_set_number(opts, "to", addr->to, &error_abort); + } + qemu_opt_set(opts, "host", addr->host, &error_abort); + qemu_opt_set(opts, "port", addr->port, &error_abort); +} + +int inet_listen(const char *str, char *ostr, int olen, + int socktype, int port_offset, Error **errp) +{ + QemuOpts *opts; + char *optstr; + int sock = -1; + InetSocketAddress *addr; + + addr = inet_parse(str, errp); + if (addr != NULL) { + opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort); + inet_addr_to_opts(opts, addr); + qapi_free_InetSocketAddress(addr); + sock = inet_listen_opts(opts, port_offset, errp); + if (sock != -1 && ostr) { + optstr = strchr(str, ','); + if (qemu_opt_get_bool(opts, "ipv6", 0)) { + snprintf(ostr, olen, "[%s]:%s%s", + qemu_opt_get(opts, "host"), + qemu_opt_get(opts, "port"), + optstr ? optstr : ""); + } else { + snprintf(ostr, olen, "%s:%s%s", + qemu_opt_get(opts, "host"), + qemu_opt_get(opts, "port"), + optstr ? optstr : ""); + } + } + qemu_opts_del(opts); + } + return sock; +} + +/** + * Create a blocking socket and connect it to an address. + * + * @str: address string + * @errp: set in case of an error + * + * Returns -1 in case of error, file descriptor on success + **/ +int inet_connect(const char *str, Error **errp) +{ + QemuOpts *opts; + int sock = -1; + InetSocketAddress *addr; + + addr = inet_parse(str, errp); + if (addr != NULL) { + opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort); + inet_addr_to_opts(opts, addr); + qapi_free_InetSocketAddress(addr); + sock = inet_connect_opts(opts, errp, NULL, NULL); + qemu_opts_del(opts); + } + return sock; +} + +/** + * Create a non-blocking socket and connect it to an address. + * Calls the callback function with fd in case of success or -1 in case of + * error. + * + * @str: address string + * @callback: callback function that is called when connect completes, + * cannot be NULL. + * @opaque: opaque for callback function + * @errp: set in case of an error + * + * Returns: -1 on immediate error, file descriptor on success. + **/ +int inet_nonblocking_connect(const char *str, + NonBlockingConnectHandler *callback, + void *opaque, Error **errp) +{ + QemuOpts *opts; + int sock = -1; + InetSocketAddress *addr; + + g_assert(callback != NULL); + + addr = inet_parse(str, errp); + if (addr != NULL) { + opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort); + inet_addr_to_opts(opts, addr); + qapi_free_InetSocketAddress(addr); + sock = inet_connect_opts(opts, errp, callback, opaque); + qemu_opts_del(opts); + } + return sock; +} + +#ifndef _WIN32 + +int unix_listen_opts(QemuOpts *opts, Error **errp) +{ + struct sockaddr_un un; + const char *path = qemu_opt_get(opts, "path"); + int sock, fd; + + sock = qemu_socket(PF_UNIX, SOCK_STREAM, 0); + if (sock < 0) { + error_setg_errno(errp, errno, "Failed to create Unix socket"); + return -1; + } + + memset(&un, 0, sizeof(un)); + un.sun_family = AF_UNIX; + if (path && strlen(path)) { + snprintf(un.sun_path, sizeof(un.sun_path), "%s", path); + } else { + const char *tmpdir = getenv("TMPDIR"); + tmpdir = tmpdir ? tmpdir : "/tmp"; + if (snprintf(un.sun_path, sizeof(un.sun_path), "%s/qemu-socket-XXXXXX", + tmpdir) >= sizeof(un.sun_path)) { + error_setg_errno(errp, errno, + "TMPDIR environment variable (%s) too large", tmpdir); + goto err; + } + + /* + * This dummy fd usage silences the mktemp() unsecure warning. + * Using mkstemp() doesn't make things more secure here + * though. bind() complains about existing files, so we have + * to unlink first and thus re-open the race window. The + * worst case possible is bind() failing, i.e. a DoS attack. + */ + fd = mkstemp(un.sun_path); + if (fd < 0) { + error_setg_errno(errp, errno, + "Failed to make a temporary socket name in %s", tmpdir); + goto err; + } + close(fd); + qemu_opt_set(opts, "path", un.sun_path, &error_abort); + } + + if (unlink(un.sun_path) < 0 && errno != ENOENT) { + error_setg_errno(errp, errno, + "Failed to unlink socket %s", un.sun_path); + goto err; + } + if (bind(sock, (struct sockaddr*) &un, sizeof(un)) < 0) { + error_setg_errno(errp, errno, "Failed to bind socket to %s", un.sun_path); + goto err; + } + if (listen(sock, 1) < 0) { + error_setg_errno(errp, errno, "Failed to listen on socket"); + goto err; + } + + return sock; + +err: + closesocket(sock); + return -1; +} + +int unix_connect_opts(QemuOpts *opts, Error **errp, + NonBlockingConnectHandler *callback, void *opaque) +{ + struct sockaddr_un un; + const char *path = qemu_opt_get(opts, "path"); + ConnectState *connect_state = NULL; + int sock, rc; + + if (path == NULL) { + error_setg(errp, "unix connect: no path specified"); + return -1; + } + + sock = qemu_socket(PF_UNIX, SOCK_STREAM, 0); + if (sock < 0) { + error_setg_errno(errp, errno, "Failed to create socket"); + return -1; + } + if (callback != NULL) { + connect_state = g_malloc0(sizeof(*connect_state)); + connect_state->callback = callback; + connect_state->opaque = opaque; + qemu_set_nonblock(sock); + } + + memset(&un, 0, sizeof(un)); + un.sun_family = AF_UNIX; + snprintf(un.sun_path, sizeof(un.sun_path), "%s", path); + + /* connect to peer */ + do { + rc = 0; + if (connect(sock, (struct sockaddr *) &un, sizeof(un)) < 0) { + rc = -socket_error(); + } + } while (rc == -EINTR); + + if (connect_state != NULL && QEMU_SOCKET_RC_INPROGRESS(rc)) { + connect_state->fd = sock; + qemu_set_fd_handler(sock, NULL, wait_for_connect, connect_state); + return sock; + } else if (rc >= 0) { + /* non blocking socket immediate success, call callback */ + if (callback != NULL) { + callback(sock, NULL, opaque); + } + } + + if (rc < 0) { + error_setg_errno(errp, -rc, "Failed to connect socket"); + close(sock); + sock = -1; + } + + g_free(connect_state); + return sock; +} + +#else + +int unix_listen_opts(QemuOpts *opts, Error **errp) +{ + error_setg(errp, "unix sockets are not available on windows"); + errno = ENOTSUP; + return -1; +} + +int unix_connect_opts(QemuOpts *opts, Error **errp, + NonBlockingConnectHandler *callback, void *opaque) +{ + error_setg(errp, "unix sockets are not available on windows"); + errno = ENOTSUP; + return -1; +} +#endif + +/* compatibility wrapper */ +int unix_listen(const char *str, char *ostr, int olen, Error **errp) +{ + QemuOpts *opts; + char *path, *optstr; + int sock, len; + + opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort); + + optstr = strchr(str, ','); + if (optstr) { + len = optstr - str; + if (len) { + path = g_malloc(len+1); + snprintf(path, len+1, "%.*s", len, str); + qemu_opt_set(opts, "path", path, &error_abort); + g_free(path); + } + } else { + qemu_opt_set(opts, "path", str, &error_abort); + } + + sock = unix_listen_opts(opts, errp); + + if (sock != -1 && ostr) + snprintf(ostr, olen, "%s%s", qemu_opt_get(opts, "path"), optstr ? optstr : ""); + qemu_opts_del(opts); + return sock; +} + +int unix_connect(const char *path, Error **errp) +{ + QemuOpts *opts; + int sock; + + opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort); + qemu_opt_set(opts, "path", path, &error_abort); + sock = unix_connect_opts(opts, errp, NULL, NULL); + qemu_opts_del(opts); + return sock; +} + + +int unix_nonblocking_connect(const char *path, + NonBlockingConnectHandler *callback, + void *opaque, Error **errp) +{ + QemuOpts *opts; + int sock = -1; + + g_assert(callback != NULL); + + opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort); + qemu_opt_set(opts, "path", path, &error_abort); + sock = unix_connect_opts(opts, errp, callback, opaque); + qemu_opts_del(opts); + return sock; +} + +SocketAddress *socket_parse(const char *str, Error **errp) +{ + SocketAddress *addr; + + addr = g_new0(SocketAddress, 1); + if (strstart(str, "unix:", NULL)) { + if (str[5] == '\0') { + error_setg(errp, "invalid Unix socket address"); + goto fail; + } else { + addr->type = SOCKET_ADDRESS_KIND_UNIX; + addr->u.q_unix = g_new(UnixSocketAddress, 1); + addr->u.q_unix->path = g_strdup(str + 5); + } + } else if (strstart(str, "fd:", NULL)) { + if (str[3] == '\0') { + error_setg(errp, "invalid file descriptor address"); + goto fail; + } else { + addr->type = SOCKET_ADDRESS_KIND_FD; + addr->u.fd = g_new(String, 1); + addr->u.fd->str = g_strdup(str + 3); + } + } else { + addr->type = SOCKET_ADDRESS_KIND_INET; + addr->u.inet = inet_parse(str, errp); + if (addr->u.inet == NULL) { + goto fail; + } + } + return addr; + +fail: + qapi_free_SocketAddress(addr); + return NULL; +} + +int socket_connect(SocketAddress *addr, Error **errp, + NonBlockingConnectHandler *callback, void *opaque) +{ + QemuOpts *opts; + int fd; + + opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort); + switch (addr->type) { + case SOCKET_ADDRESS_KIND_INET: + inet_addr_to_opts(opts, addr->u.inet); + fd = inet_connect_opts(opts, errp, callback, opaque); + break; + + case SOCKET_ADDRESS_KIND_UNIX: + qemu_opt_set(opts, "path", addr->u.q_unix->path, &error_abort); + fd = unix_connect_opts(opts, errp, callback, opaque); + break; + + case SOCKET_ADDRESS_KIND_FD: + fd = monitor_get_fd(cur_mon, addr->u.fd->str, errp); + if (fd >= 0 && callback) { + qemu_set_nonblock(fd); + callback(fd, NULL, opaque); + } + break; + + default: + abort(); + } + qemu_opts_del(opts); + return fd; +} + +int socket_listen(SocketAddress *addr, Error **errp) +{ + QemuOpts *opts; + int fd; + + opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort); + switch (addr->type) { + case SOCKET_ADDRESS_KIND_INET: + inet_addr_to_opts(opts, addr->u.inet); + fd = inet_listen_opts(opts, 0, errp); + break; + + case SOCKET_ADDRESS_KIND_UNIX: + qemu_opt_set(opts, "path", addr->u.q_unix->path, &error_abort); + fd = unix_listen_opts(opts, errp); + break; + + case SOCKET_ADDRESS_KIND_FD: + fd = monitor_get_fd(cur_mon, addr->u.fd->str, errp); + break; + + default: + abort(); + } + qemu_opts_del(opts); + return fd; +} + +int socket_dgram(SocketAddress *remote, SocketAddress *local, Error **errp) +{ + QemuOpts *opts; + int fd; + + opts = qemu_opts_create(&socket_optslist, NULL, 0, &error_abort); + switch (remote->type) { + case SOCKET_ADDRESS_KIND_INET: + inet_addr_to_opts(opts, remote->u.inet); + if (local) { + qemu_opt_set(opts, "localaddr", local->u.inet->host, &error_abort); + qemu_opt_set(opts, "localport", local->u.inet->port, &error_abort); + } + fd = inet_dgram_opts(opts, errp); + break; + + default: + error_setg(errp, "socket type unsupported for datagram"); + fd = -1; + } + qemu_opts_del(opts); + return fd; +} + + +static SocketAddress * +socket_sockaddr_to_address_inet(struct sockaddr_storage *sa, + socklen_t salen, + Error **errp) +{ + char host[NI_MAXHOST]; + char serv[NI_MAXSERV]; + SocketAddress *addr; + int ret; + + ret = getnameinfo((struct sockaddr *)sa, salen, + host, sizeof(host), + serv, sizeof(serv), + NI_NUMERICHOST | NI_NUMERICSERV); + if (ret != 0) { + error_setg(errp, "Cannot format numeric socket address: %s", + gai_strerror(ret)); + return NULL; + } + + addr = g_new0(SocketAddress, 1); + addr->type = SOCKET_ADDRESS_KIND_INET; + addr->u.inet = g_new0(InetSocketAddress, 1); + addr->u.inet->host = g_strdup(host); + addr->u.inet->port = g_strdup(serv); + if (sa->ss_family == AF_INET) { + addr->u.inet->has_ipv4 = addr->u.inet->ipv4 = true; + } else { + addr->u.inet->has_ipv6 = addr->u.inet->ipv6 = true; + } + + return addr; +} + + +#ifndef WIN32 +static SocketAddress * +socket_sockaddr_to_address_unix(struct sockaddr_storage *sa, + socklen_t salen, + Error **errp) +{ + SocketAddress *addr; + struct sockaddr_un *su = (struct sockaddr_un *)sa; + + addr = g_new0(SocketAddress, 1); + addr->type = SOCKET_ADDRESS_KIND_UNIX; + addr->u.q_unix = g_new0(UnixSocketAddress, 1); + if (su->sun_path[0]) { + addr->u.q_unix->path = g_strndup(su->sun_path, + sizeof(su->sun_path)); + } + + return addr; +} +#endif /* WIN32 */ + +static SocketAddress * +socket_sockaddr_to_address(struct sockaddr_storage *sa, + socklen_t salen, + Error **errp) +{ + switch (sa->ss_family) { + case AF_INET: + case AF_INET6: + return socket_sockaddr_to_address_inet(sa, salen, errp); + +#ifndef WIN32 + case AF_UNIX: + return socket_sockaddr_to_address_unix(sa, salen, errp); +#endif /* WIN32 */ + + default: + error_setg(errp, "socket family %d unsupported", + sa->ss_family); + return NULL; + } + return 0; +} + + +SocketAddress *socket_local_address(int fd, Error **errp) +{ + struct sockaddr_storage ss; + socklen_t sslen = sizeof(ss); + + if (getsockname(fd, (struct sockaddr *)&ss, &sslen) < 0) { + error_setg_errno(errp, socket_error(), "%s", + "Unable to query local socket address"); + return NULL; + } + + return socket_sockaddr_to_address(&ss, sslen, errp); +} + + +SocketAddress *socket_remote_address(int fd, Error **errp) +{ + struct sockaddr_storage ss; + socklen_t sslen = sizeof(ss); + + if (getpeername(fd, (struct sockaddr *)&ss, &sslen) < 0) { + error_setg_errno(errp, socket_error(), "%s", + "Unable to query remote socket address"); + return NULL; + } + + return socket_sockaddr_to_address(&ss, sslen, errp); +} + + +void qapi_copy_SocketAddress(SocketAddress **p_dest, + SocketAddress *src) +{ + QmpOutputVisitor *qov; + QmpInputVisitor *qiv; + Visitor *ov, *iv; + QObject *obj; + + *p_dest = NULL; + + qov = qmp_output_visitor_new(); + ov = qmp_output_get_visitor(qov); + visit_type_SocketAddress(ov, &src, NULL, &error_abort); + obj = qmp_output_get_qobject(qov); + qmp_output_visitor_cleanup(qov); + if (!obj) { + return; + } + + qiv = qmp_input_visitor_new(obj); + iv = qmp_input_get_visitor(qiv); + visit_type_SocketAddress(iv, p_dest, NULL, &error_abort); + qmp_input_visitor_cleanup(qiv); + qobject_decref(obj); +} diff --git a/src/util/qemu-thread-posix.c b/src/util/qemu-thread-posix.c new file mode 100644 index 0000000..dbd8094 --- /dev/null +++ b/src/util/qemu-thread-posix.c @@ -0,0 +1,518 @@ +/* + * Wrappers around mutex/cond/thread functions + * + * Copyright Red Hat, Inc. 2009 + * + * Author: + * Marcelo Tosatti <mtosatti@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> +#include <time.h> +#include <signal.h> +#include <stdint.h> +#include <string.h> +#include <limits.h> +#include <unistd.h> +#include <sys/time.h> +#ifdef __linux__ +#include <sys/syscall.h> +#include <linux/futex.h> +#endif +#include "qemu/thread.h" +#include "qemu/atomic.h" +#include "qemu/notify.h" + +static bool name_threads; + +void qemu_thread_naming(bool enable) +{ + name_threads = enable; + +#ifndef CONFIG_THREAD_SETNAME_BYTHREAD + /* This is a debugging option, not fatal */ + if (enable) { + fprintf(stderr, "qemu: thread naming not supported on this host\n"); + } +#endif +} + +static void error_exit(int err, const char *msg) +{ + fprintf(stderr, "qemu: %s: %s\n", msg, strerror(err)); + abort(); +} + +void qemu_mutex_init(QemuMutex *mutex) +{ + int err; + + err = pthread_mutex_init(&mutex->lock, NULL); + if (err) + error_exit(err, __func__); +} + +void qemu_mutex_destroy(QemuMutex *mutex) +{ + int err; + + err = pthread_mutex_destroy(&mutex->lock); + if (err) + error_exit(err, __func__); +} + +void qemu_mutex_lock(QemuMutex *mutex) +{ + int err; + + err = pthread_mutex_lock(&mutex->lock); + if (err) + error_exit(err, __func__); +} + +int qemu_mutex_trylock(QemuMutex *mutex) +{ + return pthread_mutex_trylock(&mutex->lock); +} + +void qemu_mutex_unlock(QemuMutex *mutex) +{ + int err; + + err = pthread_mutex_unlock(&mutex->lock); + if (err) + error_exit(err, __func__); +} + +void qemu_cond_init(QemuCond *cond) +{ + int err; + + err = pthread_cond_init(&cond->cond, NULL); + if (err) + error_exit(err, __func__); +} + +void qemu_cond_destroy(QemuCond *cond) +{ + int err; + + err = pthread_cond_destroy(&cond->cond); + if (err) + error_exit(err, __func__); +} + +void qemu_cond_signal(QemuCond *cond) +{ + int err; + + err = pthread_cond_signal(&cond->cond); + if (err) + error_exit(err, __func__); +} + +void qemu_cond_broadcast(QemuCond *cond) +{ + int err; + + err = pthread_cond_broadcast(&cond->cond); + if (err) + error_exit(err, __func__); +} + +void qemu_cond_wait(QemuCond *cond, QemuMutex *mutex) +{ + int err; + + err = pthread_cond_wait(&cond->cond, &mutex->lock); + if (err) + error_exit(err, __func__); +} + +void qemu_sem_init(QemuSemaphore *sem, int init) +{ + int rc; + +#if defined(__APPLE__) || defined(__NetBSD__) + rc = pthread_mutex_init(&sem->lock, NULL); + if (rc != 0) { + error_exit(rc, __func__); + } + rc = pthread_cond_init(&sem->cond, NULL); + if (rc != 0) { + error_exit(rc, __func__); + } + if (init < 0) { + error_exit(EINVAL, __func__); + } + sem->count = init; +#else + rc = sem_init(&sem->sem, 0, init); + if (rc < 0) { + error_exit(errno, __func__); + } +#endif +} + +void qemu_sem_destroy(QemuSemaphore *sem) +{ + int rc; + +#if defined(__APPLE__) || defined(__NetBSD__) + rc = pthread_cond_destroy(&sem->cond); + if (rc < 0) { + error_exit(rc, __func__); + } + rc = pthread_mutex_destroy(&sem->lock); + if (rc < 0) { + error_exit(rc, __func__); + } +#else + rc = sem_destroy(&sem->sem); + if (rc < 0) { + error_exit(errno, __func__); + } +#endif +} + +void qemu_sem_post(QemuSemaphore *sem) +{ + int rc; + +#if defined(__APPLE__) || defined(__NetBSD__) + pthread_mutex_lock(&sem->lock); + if (sem->count == UINT_MAX) { + rc = EINVAL; + } else { + sem->count++; + rc = pthread_cond_signal(&sem->cond); + } + pthread_mutex_unlock(&sem->lock); + if (rc != 0) { + error_exit(rc, __func__); + } +#else + rc = sem_post(&sem->sem); + if (rc < 0) { + error_exit(errno, __func__); + } +#endif +} + +static void compute_abs_deadline(struct timespec *ts, int ms) +{ + struct timeval tv; + gettimeofday(&tv, NULL); + ts->tv_nsec = tv.tv_usec * 1000 + (ms % 1000) * 1000000; + ts->tv_sec = tv.tv_sec + ms / 1000; + if (ts->tv_nsec >= 1000000000) { + ts->tv_sec++; + ts->tv_nsec -= 1000000000; + } +} + +int qemu_sem_timedwait(QemuSemaphore *sem, int ms) +{ + int rc; + struct timespec ts; + +#if defined(__APPLE__) || defined(__NetBSD__) + rc = 0; + compute_abs_deadline(&ts, ms); + pthread_mutex_lock(&sem->lock); + while (sem->count == 0) { + rc = pthread_cond_timedwait(&sem->cond, &sem->lock, &ts); + if (rc == ETIMEDOUT) { + break; + } + if (rc != 0) { + error_exit(rc, __func__); + } + } + if (rc != ETIMEDOUT) { + --sem->count; + } + pthread_mutex_unlock(&sem->lock); + return (rc == ETIMEDOUT ? -1 : 0); +#else + if (ms <= 0) { + /* This is cheaper than sem_timedwait. */ + do { + rc = sem_trywait(&sem->sem); + } while (rc == -1 && errno == EINTR); + if (rc == -1 && errno == EAGAIN) { + return -1; + } + } else { + compute_abs_deadline(&ts, ms); + do { + rc = sem_timedwait(&sem->sem, &ts); + } while (rc == -1 && errno == EINTR); + if (rc == -1 && errno == ETIMEDOUT) { + return -1; + } + } + if (rc < 0) { + error_exit(errno, __func__); + } + return 0; +#endif +} + +void qemu_sem_wait(QemuSemaphore *sem) +{ + int rc; + +#if defined(__APPLE__) || defined(__NetBSD__) + pthread_mutex_lock(&sem->lock); + while (sem->count == 0) { + rc = pthread_cond_wait(&sem->cond, &sem->lock); + if (rc != 0) { + error_exit(rc, __func__); + } + } + --sem->count; + pthread_mutex_unlock(&sem->lock); +#else + do { + rc = sem_wait(&sem->sem); + } while (rc == -1 && errno == EINTR); + if (rc < 0) { + error_exit(errno, __func__); + } +#endif +} + +#ifdef __linux__ +#define futex(...) syscall(__NR_futex, __VA_ARGS__) + +static inline void futex_wake(QemuEvent *ev, int n) +{ + futex(ev, FUTEX_WAKE, n, NULL, NULL, 0); +} + +static inline void futex_wait(QemuEvent *ev, unsigned val) +{ + while (futex(ev, FUTEX_WAIT, (int) val, NULL, NULL, 0)) { + switch (errno) { + case EWOULDBLOCK: + return; + case EINTR: + break; /* get out of switch and retry */ + default: + abort(); + } + } +} +#else +static inline void futex_wake(QemuEvent *ev, int n) +{ + pthread_mutex_lock(&ev->lock); + if (n == 1) { + pthread_cond_signal(&ev->cond); + } else { + pthread_cond_broadcast(&ev->cond); + } + pthread_mutex_unlock(&ev->lock); +} + +static inline void futex_wait(QemuEvent *ev, unsigned val) +{ + pthread_mutex_lock(&ev->lock); + if (ev->value == val) { + pthread_cond_wait(&ev->cond, &ev->lock); + } + pthread_mutex_unlock(&ev->lock); +} +#endif + +/* Valid transitions: + * - free->set, when setting the event + * - busy->set, when setting the event, followed by futex_wake + * - set->free, when resetting the event + * - free->busy, when waiting + * + * set->busy does not happen (it can be observed from the outside but + * it really is set->free->busy). + * + * busy->free provably cannot happen; to enforce it, the set->free transition + * is done with an OR, which becomes a no-op if the event has concurrently + * transitioned to free or busy. + */ + +#define EV_SET 0 +#define EV_FREE 1 +#define EV_BUSY -1 + +void qemu_event_init(QemuEvent *ev, bool init) +{ +#ifndef __linux__ + pthread_mutex_init(&ev->lock, NULL); + pthread_cond_init(&ev->cond, NULL); +#endif + + ev->value = (init ? EV_SET : EV_FREE); +} + +void qemu_event_destroy(QemuEvent *ev) +{ +#ifndef __linux__ + pthread_mutex_destroy(&ev->lock); + pthread_cond_destroy(&ev->cond); +#endif +} + +void qemu_event_set(QemuEvent *ev) +{ + if (atomic_mb_read(&ev->value) != EV_SET) { + if (atomic_xchg(&ev->value, EV_SET) == EV_BUSY) { + /* There were waiters, wake them up. */ + futex_wake(ev, INT_MAX); + } + } +} + +void qemu_event_reset(QemuEvent *ev) +{ + if (atomic_mb_read(&ev->value) == EV_SET) { + /* + * If there was a concurrent reset (or even reset+wait), + * do nothing. Otherwise change EV_SET->EV_FREE. + */ + atomic_or(&ev->value, EV_FREE); + } +} + +void qemu_event_wait(QemuEvent *ev) +{ + unsigned value; + + value = atomic_mb_read(&ev->value); + if (value != EV_SET) { + if (value == EV_FREE) { + /* + * Leave the event reset and tell qemu_event_set that there + * are waiters. No need to retry, because there cannot be + * a concurrent busy->free transition. After the CAS, the + * event will be either set or busy. + */ + if (atomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) { + return; + } + } + futex_wait(ev, EV_BUSY); + } +} + +static pthread_key_t exit_key; + +union NotifierThreadData { + void *ptr; + NotifierList list; +}; +QEMU_BUILD_BUG_ON(sizeof(union NotifierThreadData) != sizeof(void *)); + +void qemu_thread_atexit_add(Notifier *notifier) +{ + union NotifierThreadData ntd; + ntd.ptr = pthread_getspecific(exit_key); + notifier_list_add(&ntd.list, notifier); + pthread_setspecific(exit_key, ntd.ptr); +} + +void qemu_thread_atexit_remove(Notifier *notifier) +{ + union NotifierThreadData ntd; + ntd.ptr = pthread_getspecific(exit_key); + notifier_remove(notifier); + pthread_setspecific(exit_key, ntd.ptr); +} + +static void qemu_thread_atexit_run(void *arg) +{ + union NotifierThreadData ntd = { .ptr = arg }; + notifier_list_notify(&ntd.list, NULL); +} + +static void __attribute__((constructor)) qemu_thread_atexit_init(void) +{ + pthread_key_create(&exit_key, qemu_thread_atexit_run); +} + + +/* Attempt to set the threads name; note that this is for debug, so + * we're not going to fail if we can't set it. + */ +static void qemu_thread_set_name(QemuThread *thread, const char *name) +{ +#ifdef CONFIG_PTHREAD_SETNAME_NP + pthread_setname_np(thread->thread, name); +#endif +} + +void qemu_thread_create(QemuThread *thread, const char *name, + void *(*start_routine)(void*), + void *arg, int mode) +{ + sigset_t set, oldset; + int err; + pthread_attr_t attr; + + err = pthread_attr_init(&attr); + if (err) { + error_exit(err, __func__); + } + if (mode == QEMU_THREAD_DETACHED) { + err = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED); + if (err) { + error_exit(err, __func__); + } + } + + /* Leave signal handling to the iothread. */ + sigfillset(&set); + pthread_sigmask(SIG_SETMASK, &set, &oldset); + err = pthread_create(&thread->thread, &attr, start_routine, arg); + if (err) + error_exit(err, __func__); + + if (name_threads) { + qemu_thread_set_name(thread, name); + } + + pthread_sigmask(SIG_SETMASK, &oldset, NULL); + + pthread_attr_destroy(&attr); +} + +void qemu_thread_get_self(QemuThread *thread) +{ + thread->thread = pthread_self(); +} + +bool qemu_thread_is_self(QemuThread *thread) +{ + return pthread_equal(pthread_self(), thread->thread); +} + +void qemu_thread_exit(void *retval) +{ + pthread_exit(retval); +} + +void *qemu_thread_join(QemuThread *thread) +{ + int err; + void *ret; + + err = pthread_join(thread->thread, &ret); + if (err) { + error_exit(err, __func__); + } + return ret; +} diff --git a/src/util/qemu-thread-win32.c b/src/util/qemu-thread-win32.c new file mode 100644 index 0000000..6cdd553 --- /dev/null +++ b/src/util/qemu-thread-win32.c @@ -0,0 +1,479 @@ +/* + * Win32 implementation for mutex/cond/thread functions + * + * Copyright Red Hat, Inc. 2010 + * + * Author: + * Paolo Bonzini <pbonzini@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ +#include "qemu-common.h" +#include "qemu/thread.h" +#include "qemu/notify.h" +#include <process.h> +#include <assert.h> +#include <limits.h> + +static bool name_threads; + +void qemu_thread_naming(bool enable) +{ + /* But note we don't actually name them on Windows yet */ + name_threads = enable; + + fprintf(stderr, "qemu: thread naming not supported on this host\n"); +} + +static void error_exit(int err, const char *msg) +{ + char *pstr; + + FormatMessage(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_ALLOCATE_BUFFER, + NULL, err, 0, (LPTSTR)&pstr, 2, NULL); + fprintf(stderr, "qemu: %s: %s\n", msg, pstr); + LocalFree(pstr); + abort(); +} + +void qemu_mutex_init(QemuMutex *mutex) +{ + mutex->owner = 0; + InitializeCriticalSection(&mutex->lock); +} + +void qemu_mutex_destroy(QemuMutex *mutex) +{ + assert(mutex->owner == 0); + DeleteCriticalSection(&mutex->lock); +} + +void qemu_mutex_lock(QemuMutex *mutex) +{ + EnterCriticalSection(&mutex->lock); + + /* Win32 CRITICAL_SECTIONs are recursive. Assert that we're not + * using them as such. + */ + assert(mutex->owner == 0); + mutex->owner = GetCurrentThreadId(); +} + +int qemu_mutex_trylock(QemuMutex *mutex) +{ + int owned; + + owned = TryEnterCriticalSection(&mutex->lock); + if (owned) { + assert(mutex->owner == 0); + mutex->owner = GetCurrentThreadId(); + } + return !owned; +} + +void qemu_mutex_unlock(QemuMutex *mutex) +{ + assert(mutex->owner == GetCurrentThreadId()); + mutex->owner = 0; + LeaveCriticalSection(&mutex->lock); +} + +void qemu_cond_init(QemuCond *cond) +{ + memset(cond, 0, sizeof(*cond)); + + cond->sema = CreateSemaphore(NULL, 0, LONG_MAX, NULL); + if (!cond->sema) { + error_exit(GetLastError(), __func__); + } + cond->continue_event = CreateEvent(NULL, /* security */ + FALSE, /* auto-reset */ + FALSE, /* not signaled */ + NULL); /* name */ + if (!cond->continue_event) { + error_exit(GetLastError(), __func__); + } +} + +void qemu_cond_destroy(QemuCond *cond) +{ + BOOL result; + result = CloseHandle(cond->continue_event); + if (!result) { + error_exit(GetLastError(), __func__); + } + cond->continue_event = 0; + result = CloseHandle(cond->sema); + if (!result) { + error_exit(GetLastError(), __func__); + } + cond->sema = 0; +} + +void qemu_cond_signal(QemuCond *cond) +{ + DWORD result; + + /* + * Signal only when there are waiters. cond->waiters is + * incremented by pthread_cond_wait under the external lock, + * so we are safe about that. + */ + if (cond->waiters == 0) { + return; + } + + /* + * Waiting threads decrement it outside the external lock, but + * only if another thread is executing pthread_cond_broadcast and + * has the mutex. So, it also cannot be decremented concurrently + * with this particular access. + */ + cond->target = cond->waiters - 1; + result = SignalObjectAndWait(cond->sema, cond->continue_event, + INFINITE, FALSE); + if (result == WAIT_ABANDONED || result == WAIT_FAILED) { + error_exit(GetLastError(), __func__); + } +} + +void qemu_cond_broadcast(QemuCond *cond) +{ + BOOLEAN result; + /* + * As in pthread_cond_signal, access to cond->waiters and + * cond->target is locked via the external mutex. + */ + if (cond->waiters == 0) { + return; + } + + cond->target = 0; + result = ReleaseSemaphore(cond->sema, cond->waiters, NULL); + if (!result) { + error_exit(GetLastError(), __func__); + } + + /* + * At this point all waiters continue. Each one takes its + * slice of the semaphore. Now it's our turn to wait: Since + * the external mutex is held, no thread can leave cond_wait, + * yet. For this reason, we can be sure that no thread gets + * a chance to eat *more* than one slice. OTOH, it means + * that the last waiter must send us a wake-up. + */ + WaitForSingleObject(cond->continue_event, INFINITE); +} + +void qemu_cond_wait(QemuCond *cond, QemuMutex *mutex) +{ + /* + * This access is protected under the mutex. + */ + cond->waiters++; + + /* + * Unlock external mutex and wait for signal. + * NOTE: we've held mutex locked long enough to increment + * waiters count above, so there's no problem with + * leaving mutex unlocked before we wait on semaphore. + */ + qemu_mutex_unlock(mutex); + WaitForSingleObject(cond->sema, INFINITE); + + /* Now waiters must rendez-vous with the signaling thread and + * let it continue. For cond_broadcast this has heavy contention + * and triggers thundering herd. So goes life. + * + * Decrease waiters count. The mutex is not taken, so we have + * to do this atomically. + * + * All waiters contend for the mutex at the end of this function + * until the signaling thread relinquishes it. To ensure + * each waiter consumes exactly one slice of the semaphore, + * the signaling thread stops until it is told by the last + * waiter that it can go on. + */ + if (InterlockedDecrement(&cond->waiters) == cond->target) { + SetEvent(cond->continue_event); + } + + qemu_mutex_lock(mutex); +} + +void qemu_sem_init(QemuSemaphore *sem, int init) +{ + /* Manual reset. */ + sem->sema = CreateSemaphore(NULL, init, LONG_MAX, NULL); +} + +void qemu_sem_destroy(QemuSemaphore *sem) +{ + CloseHandle(sem->sema); +} + +void qemu_sem_post(QemuSemaphore *sem) +{ + ReleaseSemaphore(sem->sema, 1, NULL); +} + +int qemu_sem_timedwait(QemuSemaphore *sem, int ms) +{ + int rc = WaitForSingleObject(sem->sema, ms); + if (rc == WAIT_OBJECT_0) { + return 0; + } + if (rc != WAIT_TIMEOUT) { + error_exit(GetLastError(), __func__); + } + return -1; +} + +void qemu_sem_wait(QemuSemaphore *sem) +{ + if (WaitForSingleObject(sem->sema, INFINITE) != WAIT_OBJECT_0) { + error_exit(GetLastError(), __func__); + } +} + +/* Wrap a Win32 manual-reset event with a fast userspace path. The idea + * is to reset the Win32 event lazily, as part of a test-reset-test-wait + * sequence. Such a sequence is, indeed, how QemuEvents are used by + * RCU and other subsystems! + * + * Valid transitions: + * - free->set, when setting the event + * - busy->set, when setting the event, followed by futex_wake + * - set->free, when resetting the event + * - free->busy, when waiting + * + * set->busy does not happen (it can be observed from the outside but + * it really is set->free->busy). + * + * busy->free provably cannot happen; to enforce it, the set->free transition + * is done with an OR, which becomes a no-op if the event has concurrently + * transitioned to free or busy (and is faster than cmpxchg). + */ + +#define EV_SET 0 +#define EV_FREE 1 +#define EV_BUSY -1 + +void qemu_event_init(QemuEvent *ev, bool init) +{ + /* Manual reset. */ + ev->event = CreateEvent(NULL, TRUE, TRUE, NULL); + ev->value = (init ? EV_SET : EV_FREE); +} + +void qemu_event_destroy(QemuEvent *ev) +{ + CloseHandle(ev->event); +} + +void qemu_event_set(QemuEvent *ev) +{ + if (atomic_mb_read(&ev->value) != EV_SET) { + if (atomic_xchg(&ev->value, EV_SET) == EV_BUSY) { + /* There were waiters, wake them up. */ + SetEvent(ev->event); + } + } +} + +void qemu_event_reset(QemuEvent *ev) +{ + if (atomic_mb_read(&ev->value) == EV_SET) { + /* If there was a concurrent reset (or even reset+wait), + * do nothing. Otherwise change EV_SET->EV_FREE. + */ + atomic_or(&ev->value, EV_FREE); + } +} + +void qemu_event_wait(QemuEvent *ev) +{ + unsigned value; + + value = atomic_mb_read(&ev->value); + if (value != EV_SET) { + if (value == EV_FREE) { + /* qemu_event_set is not yet going to call SetEvent, but we are + * going to do another check for EV_SET below when setting EV_BUSY. + * At that point it is safe to call WaitForSingleObject. + */ + ResetEvent(ev->event); + + /* Tell qemu_event_set that there are waiters. No need to retry + * because there cannot be a concurent busy->free transition. + * After the CAS, the event will be either set or busy. + */ + if (atomic_cmpxchg(&ev->value, EV_FREE, EV_BUSY) == EV_SET) { + value = EV_SET; + } else { + value = EV_BUSY; + } + } + if (value == EV_BUSY) { + WaitForSingleObject(ev->event, INFINITE); + } + } +} + +struct QemuThreadData { + /* Passed to win32_start_routine. */ + void *(*start_routine)(void *); + void *arg; + short mode; + NotifierList exit; + + /* Only used for joinable threads. */ + bool exited; + void *ret; + CRITICAL_SECTION cs; +}; + +static bool atexit_registered; +static NotifierList main_thread_exit; + +static __thread QemuThreadData *qemu_thread_data; + +static void run_main_thread_exit(void) +{ + notifier_list_notify(&main_thread_exit, NULL); +} + +void qemu_thread_atexit_add(Notifier *notifier) +{ + if (!qemu_thread_data) { + if (!atexit_registered) { + atexit_registered = true; + atexit(run_main_thread_exit); + } + notifier_list_add(&main_thread_exit, notifier); + } else { + notifier_list_add(&qemu_thread_data->exit, notifier); + } +} + +void qemu_thread_atexit_remove(Notifier *notifier) +{ + notifier_remove(notifier); +} + +static unsigned __stdcall win32_start_routine(void *arg) +{ + QemuThreadData *data = (QemuThreadData *) arg; + void *(*start_routine)(void *) = data->start_routine; + void *thread_arg = data->arg; + + qemu_thread_data = data; + qemu_thread_exit(start_routine(thread_arg)); + abort(); +} + +void qemu_thread_exit(void *arg) +{ + QemuThreadData *data = qemu_thread_data; + + notifier_list_notify(&data->exit, NULL); + if (data->mode == QEMU_THREAD_JOINABLE) { + data->ret = arg; + EnterCriticalSection(&data->cs); + data->exited = true; + LeaveCriticalSection(&data->cs); + } else { + g_free(data); + } + _endthreadex(0); +} + +void *qemu_thread_join(QemuThread *thread) +{ + QemuThreadData *data; + void *ret; + HANDLE handle; + + data = thread->data; + if (data->mode == QEMU_THREAD_DETACHED) { + return NULL; + } + + /* + * Because multiple copies of the QemuThread can exist via + * qemu_thread_get_self, we need to store a value that cannot + * leak there. The simplest, non racy way is to store the TID, + * discard the handle that _beginthreadex gives back, and + * get another copy of the handle here. + */ + handle = qemu_thread_get_handle(thread); + if (handle) { + WaitForSingleObject(handle, INFINITE); + CloseHandle(handle); + } + ret = data->ret; + DeleteCriticalSection(&data->cs); + g_free(data); + return ret; +} + +void qemu_thread_create(QemuThread *thread, const char *name, + void *(*start_routine)(void *), + void *arg, int mode) +{ + HANDLE hThread; + struct QemuThreadData *data; + + data = g_malloc(sizeof *data); + data->start_routine = start_routine; + data->arg = arg; + data->mode = mode; + data->exited = false; + notifier_list_init(&data->exit); + + if (data->mode != QEMU_THREAD_DETACHED) { + InitializeCriticalSection(&data->cs); + } + + hThread = (HANDLE) _beginthreadex(NULL, 0, win32_start_routine, + data, 0, &thread->tid); + if (!hThread) { + error_exit(GetLastError(), __func__); + } + CloseHandle(hThread); + thread->data = data; +} + +void qemu_thread_get_self(QemuThread *thread) +{ + thread->data = qemu_thread_data; + thread->tid = GetCurrentThreadId(); +} + +HANDLE qemu_thread_get_handle(QemuThread *thread) +{ + QemuThreadData *data; + HANDLE handle; + + data = thread->data; + if (data->mode == QEMU_THREAD_DETACHED) { + return NULL; + } + + EnterCriticalSection(&data->cs); + if (!data->exited) { + handle = OpenThread(SYNCHRONIZE | THREAD_SUSPEND_RESUME, FALSE, + thread->tid); + } else { + handle = NULL; + } + LeaveCriticalSection(&data->cs); + return handle; +} + +bool qemu_thread_is_self(QemuThread *thread) +{ + return GetCurrentThreadId() == thread->tid; +} diff --git a/src/util/qemu-timer-common.c b/src/util/qemu-timer-common.c new file mode 100644 index 0000000..95e0847 --- /dev/null +++ b/src/util/qemu-timer-common.c @@ -0,0 +1,61 @@ +/* + * QEMU System Emulator + * + * Copyright (c) 2003-2008 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#include "qemu/timer.h" + +/***********************************************************/ +/* real time host monotonic timer */ + +#ifdef _WIN32 + +int64_t clock_freq; + +static void __attribute__((constructor)) init_get_clock(void) +{ + LARGE_INTEGER freq; + int ret; + ret = QueryPerformanceFrequency(&freq); + if (ret == 0) { + fprintf(stderr, "Could not calibrate ticks\n"); + exit(1); + } + clock_freq = freq.QuadPart; +} + +#else + +int use_rt_clock; + +static void __attribute__((constructor)) init_get_clock(void) +{ + use_rt_clock = 0; +#ifdef CLOCK_MONOTONIC + { + struct timespec ts; + if (clock_gettime(CLOCK_MONOTONIC, &ts) == 0) { + use_rt_clock = 1; + } + } +#endif +} +#endif diff --git a/src/util/rcu.c b/src/util/rcu.c new file mode 100644 index 0000000..8ba304d --- /dev/null +++ b/src/util/rcu.c @@ -0,0 +1,352 @@ +/* + * urcu-mb.c + * + * Userspace RCU library with explicit memory barriers + * + * Copyright (c) 2009 Mathieu Desnoyers <mathieu.desnoyers@efficios.com> + * Copyright (c) 2009 Paul E. McKenney, IBM Corporation. + * Copyright 2015 Red Hat, Inc. + * + * Ported to QEMU by Paolo Bonzini <pbonzini@redhat.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + * + * IBM's contributions to this file may be relicensed under LGPLv2 or later. + */ + +#include "qemu-common.h" +#include <stdio.h> +#include <assert.h> +#include <stdlib.h> +#include <stdint.h> +#include <errno.h> +#include "qemu/rcu.h" +#include "qemu/atomic.h" +#include "qemu/thread.h" +#include "qemu/main-loop.h" + +/* + * Global grace period counter. Bit 0 is always one in rcu_gp_ctr. + * Bits 1 and above are defined in synchronize_rcu. + */ +#define RCU_GP_LOCKED (1UL << 0) +#define RCU_GP_CTR (1UL << 1) + +unsigned long rcu_gp_ctr = RCU_GP_LOCKED; + +QemuEvent rcu_gp_event; +static QemuMutex rcu_registry_lock; +static QemuMutex rcu_sync_lock; + +/* + * Check whether a quiescent state was crossed between the beginning of + * update_counter_and_wait and now. + */ +static inline int rcu_gp_ongoing(unsigned long *ctr) +{ + unsigned long v; + + v = atomic_read(ctr); + return v && (v != rcu_gp_ctr); +} + +/* Written to only by each individual reader. Read by both the reader and the + * writers. + */ +__thread struct rcu_reader_data rcu_reader; + +/* Protected by rcu_registry_lock. */ +typedef QLIST_HEAD(, rcu_reader_data) ThreadList; +static ThreadList registry = QLIST_HEAD_INITIALIZER(registry); + +/* Wait for previous parity/grace period to be empty of readers. */ +static void wait_for_readers(void) +{ + ThreadList qsreaders = QLIST_HEAD_INITIALIZER(qsreaders); + struct rcu_reader_data *index, *tmp; + + for (;;) { + /* We want to be notified of changes made to rcu_gp_ongoing + * while we walk the list. + */ + qemu_event_reset(&rcu_gp_event); + + /* Instead of using atomic_mb_set for index->waiting, and + * atomic_mb_read for index->ctr, memory barriers are placed + * manually since writes to different threads are independent. + * atomic_mb_set has a smp_wmb before... + */ + smp_wmb(); + QLIST_FOREACH(index, ®istry, node) { + atomic_set(&index->waiting, true); + } + + /* ... and a smp_mb after. */ + smp_mb(); + + QLIST_FOREACH_SAFE(index, ®istry, node, tmp) { + if (!rcu_gp_ongoing(&index->ctr)) { + QLIST_REMOVE(index, node); + QLIST_INSERT_HEAD(&qsreaders, index, node); + + /* No need for mb_set here, worst of all we + * get some extra futex wakeups. + */ + atomic_set(&index->waiting, false); + } + } + + /* atomic_mb_read has smp_rmb after. */ + smp_rmb(); + + if (QLIST_EMPTY(®istry)) { + break; + } + + /* Wait for one thread to report a quiescent state and try again. + * Release rcu_registry_lock, so rcu_(un)register_thread() doesn't + * wait too much time. + * + * rcu_register_thread() may add nodes to ®istry; it will not + * wake up synchronize_rcu, but that is okay because at least another + * thread must exit its RCU read-side critical section before + * synchronize_rcu is done. The next iteration of the loop will + * move the new thread's rcu_reader from ®istry to &qsreaders, + * because rcu_gp_ongoing() will return false. + * + * rcu_unregister_thread() may remove nodes from &qsreaders instead + * of ®istry if it runs during qemu_event_wait. That's okay; + * the node then will not be added back to ®istry by QLIST_SWAP + * below. The invariant is that the node is part of one list when + * rcu_registry_lock is released. + */ + qemu_mutex_unlock(&rcu_registry_lock); + qemu_event_wait(&rcu_gp_event); + qemu_mutex_lock(&rcu_registry_lock); + } + + /* put back the reader list in the registry */ + QLIST_SWAP(®istry, &qsreaders, node); +} + +void synchronize_rcu(void) +{ + qemu_mutex_lock(&rcu_sync_lock); + qemu_mutex_lock(&rcu_registry_lock); + + if (!QLIST_EMPTY(®istry)) { + /* In either case, the atomic_mb_set below blocks stores that free + * old RCU-protected pointers. + */ + if (sizeof(rcu_gp_ctr) < 8) { + /* For architectures with 32-bit longs, a two-subphases algorithm + * ensures we do not encounter overflow bugs. + * + * Switch parity: 0 -> 1, 1 -> 0. + */ + atomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR); + wait_for_readers(); + atomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr ^ RCU_GP_CTR); + } else { + /* Increment current grace period. */ + atomic_mb_set(&rcu_gp_ctr, rcu_gp_ctr + RCU_GP_CTR); + } + + wait_for_readers(); + } + + qemu_mutex_unlock(&rcu_registry_lock); + qemu_mutex_unlock(&rcu_sync_lock); +} + + +#define RCU_CALL_MIN_SIZE 30 + +/* Multi-producer, single-consumer queue based on urcu/static/wfqueue.h + * from liburcu. Note that head is only used by the consumer. + */ +static struct rcu_head dummy; +static struct rcu_head *head = &dummy, **tail = &dummy.next; +static int rcu_call_count; +static QemuEvent rcu_call_ready_event; + +static void enqueue(struct rcu_head *node) +{ + struct rcu_head **old_tail; + + node->next = NULL; + old_tail = atomic_xchg(&tail, &node->next); + atomic_mb_set(old_tail, node); +} + +static struct rcu_head *try_dequeue(void) +{ + struct rcu_head *node, *next; + +retry: + /* Test for an empty list, which we do not expect. Note that for + * the consumer head and tail are always consistent. The head + * is consistent because only the consumer reads/writes it. + * The tail, because it is the first step in the enqueuing. + * It is only the next pointers that might be inconsistent. + */ + if (head == &dummy && atomic_mb_read(&tail) == &dummy.next) { + abort(); + } + + /* If the head node has NULL in its next pointer, the value is + * wrong and we need to wait until its enqueuer finishes the update. + */ + node = head; + next = atomic_mb_read(&head->next); + if (!next) { + return NULL; + } + + /* Since we are the sole consumer, and we excluded the empty case + * above, the queue will always have at least two nodes: the + * dummy node, and the one being removed. So we do not need to update + * the tail pointer. + */ + head = next; + + /* If we dequeued the dummy node, add it back at the end and retry. */ + if (node == &dummy) { + enqueue(node); + goto retry; + } + + return node; +} + +static void *call_rcu_thread(void *opaque) +{ + struct rcu_head *node; + + rcu_register_thread(); + + for (;;) { + int tries = 0; + int n = atomic_read(&rcu_call_count); + + /* Heuristically wait for a decent number of callbacks to pile up. + * Fetch rcu_call_count now, we only must process elements that were + * added before synchronize_rcu() starts. + */ + while (n == 0 || (n < RCU_CALL_MIN_SIZE && ++tries <= 5)) { + g_usleep(10000); + if (n == 0) { + qemu_event_reset(&rcu_call_ready_event); + n = atomic_read(&rcu_call_count); + if (n == 0) { + qemu_event_wait(&rcu_call_ready_event); + } + } + n = atomic_read(&rcu_call_count); + } + + atomic_sub(&rcu_call_count, n); + synchronize_rcu(); + qemu_mutex_lock_iothread(); + while (n > 0) { + node = try_dequeue(); + while (!node) { + qemu_mutex_unlock_iothread(); + qemu_event_reset(&rcu_call_ready_event); + node = try_dequeue(); + if (!node) { + qemu_event_wait(&rcu_call_ready_event); + node = try_dequeue(); + } + qemu_mutex_lock_iothread(); + } + + n--; + node->func(node); + } + qemu_mutex_unlock_iothread(); + } + abort(); +} + +void call_rcu1(struct rcu_head *node, void (*func)(struct rcu_head *node)) +{ + node->func = func; + enqueue(node); + atomic_inc(&rcu_call_count); + qemu_event_set(&rcu_call_ready_event); +} + +void rcu_register_thread(void) +{ + assert(rcu_reader.ctr == 0); + qemu_mutex_lock(&rcu_registry_lock); + QLIST_INSERT_HEAD(®istry, &rcu_reader, node); + qemu_mutex_unlock(&rcu_registry_lock); +} + +void rcu_unregister_thread(void) +{ + qemu_mutex_lock(&rcu_registry_lock); + QLIST_REMOVE(&rcu_reader, node); + qemu_mutex_unlock(&rcu_registry_lock); +} + +static void rcu_init_complete(void) +{ + QemuThread thread; + + qemu_mutex_init(&rcu_registry_lock); + qemu_mutex_init(&rcu_sync_lock); + qemu_event_init(&rcu_gp_event, true); + + qemu_event_init(&rcu_call_ready_event, false); + + /* The caller is assumed to have iothread lock, so the call_rcu thread + * must have been quiescent even after forking, just recreate it. + */ + qemu_thread_create(&thread, "call_rcu", call_rcu_thread, + NULL, QEMU_THREAD_DETACHED); + + rcu_register_thread(); +} + +#ifdef CONFIG_POSIX +static void rcu_init_lock(void) +{ + qemu_mutex_lock(&rcu_sync_lock); + qemu_mutex_lock(&rcu_registry_lock); +} + +static void rcu_init_unlock(void) +{ + qemu_mutex_unlock(&rcu_registry_lock); + qemu_mutex_unlock(&rcu_sync_lock); +} +#endif + +void rcu_after_fork(void) +{ + memset(®istry, 0, sizeof(registry)); + rcu_init_complete(); +} + +static void __attribute__((__constructor__)) rcu_init(void) +{ +#ifdef CONFIG_POSIX + pthread_atfork(rcu_init_lock, rcu_init_unlock, rcu_init_unlock); +#endif + rcu_init_complete(); +} diff --git a/src/util/readline.c b/src/util/readline.c new file mode 100644 index 0000000..cc1302a --- /dev/null +++ b/src/util/readline.c @@ -0,0 +1,515 @@ +/* + * QEMU readline utility + * + * Copyright (c) 2003-2004 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "qemu-common.h" +#include "qemu/readline.h" + +#define IS_NORM 0 +#define IS_ESC 1 +#define IS_CSI 2 +#define IS_SS3 3 + +void readline_show_prompt(ReadLineState *rs) +{ + rs->printf_func(rs->opaque, "%s", rs->prompt); + rs->flush_func(rs->opaque); + rs->last_cmd_buf_index = 0; + rs->last_cmd_buf_size = 0; + rs->esc_state = IS_NORM; +} + +/* update the displayed command line */ +static void readline_update(ReadLineState *rs) +{ + int i, delta, len; + + if (rs->cmd_buf_size != rs->last_cmd_buf_size || + memcmp(rs->cmd_buf, rs->last_cmd_buf, rs->cmd_buf_size) != 0) { + for(i = 0; i < rs->last_cmd_buf_index; i++) { + rs->printf_func(rs->opaque, "\033[D"); + } + rs->cmd_buf[rs->cmd_buf_size] = '\0'; + if (rs->read_password) { + len = strlen(rs->cmd_buf); + for(i = 0; i < len; i++) + rs->printf_func(rs->opaque, "*"); + } else { + rs->printf_func(rs->opaque, "%s", rs->cmd_buf); + } + rs->printf_func(rs->opaque, "\033[K"); + memcpy(rs->last_cmd_buf, rs->cmd_buf, rs->cmd_buf_size); + rs->last_cmd_buf_size = rs->cmd_buf_size; + rs->last_cmd_buf_index = rs->cmd_buf_size; + } + if (rs->cmd_buf_index != rs->last_cmd_buf_index) { + delta = rs->cmd_buf_index - rs->last_cmd_buf_index; + if (delta > 0) { + for(i = 0;i < delta; i++) { + rs->printf_func(rs->opaque, "\033[C"); + } + } else { + delta = -delta; + for(i = 0;i < delta; i++) { + rs->printf_func(rs->opaque, "\033[D"); + } + } + rs->last_cmd_buf_index = rs->cmd_buf_index; + } + rs->flush_func(rs->opaque); +} + +static void readline_insert_char(ReadLineState *rs, int ch) +{ + if (rs->cmd_buf_index < READLINE_CMD_BUF_SIZE) { + memmove(rs->cmd_buf + rs->cmd_buf_index + 1, + rs->cmd_buf + rs->cmd_buf_index, + rs->cmd_buf_size - rs->cmd_buf_index); + rs->cmd_buf[rs->cmd_buf_index] = ch; + rs->cmd_buf_size++; + rs->cmd_buf_index++; + } +} + +static void readline_backward_char(ReadLineState *rs) +{ + if (rs->cmd_buf_index > 0) { + rs->cmd_buf_index--; + } +} + +static void readline_forward_char(ReadLineState *rs) +{ + if (rs->cmd_buf_index < rs->cmd_buf_size) { + rs->cmd_buf_index++; + } +} + +static void readline_delete_char(ReadLineState *rs) +{ + if (rs->cmd_buf_index < rs->cmd_buf_size) { + memmove(rs->cmd_buf + rs->cmd_buf_index, + rs->cmd_buf + rs->cmd_buf_index + 1, + rs->cmd_buf_size - rs->cmd_buf_index - 1); + rs->cmd_buf_size--; + } +} + +static void readline_backspace(ReadLineState *rs) +{ + if (rs->cmd_buf_index > 0) { + readline_backward_char(rs); + readline_delete_char(rs); + } +} + +static void readline_backword(ReadLineState *rs) +{ + int start; + + if (rs->cmd_buf_index == 0 || rs->cmd_buf_index > rs->cmd_buf_size) { + return; + } + + start = rs->cmd_buf_index - 1; + + /* find first word (backwards) */ + while (start > 0) { + if (!qemu_isspace(rs->cmd_buf[start])) { + break; + } + + --start; + } + + /* find first space (backwards) */ + while (start > 0) { + if (qemu_isspace(rs->cmd_buf[start])) { + ++start; + break; + } + + --start; + } + + /* remove word */ + if (start < rs->cmd_buf_index) { + memmove(rs->cmd_buf + start, + rs->cmd_buf + rs->cmd_buf_index, + rs->cmd_buf_size - rs->cmd_buf_index); + rs->cmd_buf_size -= rs->cmd_buf_index - start; + rs->cmd_buf_index = start; + } +} + +static void readline_bol(ReadLineState *rs) +{ + rs->cmd_buf_index = 0; +} + +static void readline_eol(ReadLineState *rs) +{ + rs->cmd_buf_index = rs->cmd_buf_size; +} + +static void readline_up_char(ReadLineState *rs) +{ + int idx; + + if (rs->hist_entry == 0) + return; + if (rs->hist_entry == -1) { + /* Find latest entry */ + for (idx = 0; idx < READLINE_MAX_CMDS; idx++) { + if (rs->history[idx] == NULL) + break; + } + rs->hist_entry = idx; + } + rs->hist_entry--; + if (rs->hist_entry >= 0) { + pstrcpy(rs->cmd_buf, sizeof(rs->cmd_buf), + rs->history[rs->hist_entry]); + rs->cmd_buf_index = rs->cmd_buf_size = strlen(rs->cmd_buf); + } +} + +static void readline_down_char(ReadLineState *rs) +{ + if (rs->hist_entry == -1) + return; + if (rs->hist_entry < READLINE_MAX_CMDS - 1 && + rs->history[++rs->hist_entry] != NULL) { + pstrcpy(rs->cmd_buf, sizeof(rs->cmd_buf), + rs->history[rs->hist_entry]); + } else { + rs->cmd_buf[0] = 0; + rs->hist_entry = -1; + } + rs->cmd_buf_index = rs->cmd_buf_size = strlen(rs->cmd_buf); +} + +static void readline_hist_add(ReadLineState *rs, const char *cmdline) +{ + char *hist_entry, *new_entry; + int idx; + + if (cmdline[0] == '\0') + return; + new_entry = NULL; + if (rs->hist_entry != -1) { + /* We were editing an existing history entry: replace it */ + hist_entry = rs->history[rs->hist_entry]; + idx = rs->hist_entry; + if (strcmp(hist_entry, cmdline) == 0) { + goto same_entry; + } + } + /* Search cmdline in history buffers */ + for (idx = 0; idx < READLINE_MAX_CMDS; idx++) { + hist_entry = rs->history[idx]; + if (hist_entry == NULL) + break; + if (strcmp(hist_entry, cmdline) == 0) { + same_entry: + new_entry = hist_entry; + /* Put this entry at the end of history */ + memmove(&rs->history[idx], &rs->history[idx + 1], + (READLINE_MAX_CMDS - (idx + 1)) * sizeof(char *)); + rs->history[READLINE_MAX_CMDS - 1] = NULL; + for (; idx < READLINE_MAX_CMDS; idx++) { + if (rs->history[idx] == NULL) + break; + } + break; + } + } + if (idx == READLINE_MAX_CMDS) { + /* Need to get one free slot */ + g_free(rs->history[0]); + memmove(rs->history, &rs->history[1], + (READLINE_MAX_CMDS - 1) * sizeof(char *)); + rs->history[READLINE_MAX_CMDS - 1] = NULL; + idx = READLINE_MAX_CMDS - 1; + } + if (new_entry == NULL) + new_entry = g_strdup(cmdline); + rs->history[idx] = new_entry; + rs->hist_entry = -1; +} + +/* completion support */ + +void readline_add_completion(ReadLineState *rs, const char *str) +{ + if (rs->nb_completions < READLINE_MAX_COMPLETIONS) { + int i; + for (i = 0; i < rs->nb_completions; i++) { + if (!strcmp(rs->completions[i], str)) { + return; + } + } + rs->completions[rs->nb_completions++] = g_strdup(str); + } +} + +void readline_set_completion_index(ReadLineState *rs, int index) +{ + rs->completion_index = index; +} + +static int completion_comp(const void *a, const void *b) +{ + return strcmp(*(const char **) a, *(const char **) b); +} + +static void readline_completion(ReadLineState *rs) +{ + int len, i, j, max_width, nb_cols, max_prefix; + char *cmdline; + + rs->nb_completions = 0; + + cmdline = g_strndup(rs->cmd_buf, rs->cmd_buf_index); + rs->completion_finder(rs->opaque, cmdline); + g_free(cmdline); + + /* no completion found */ + if (rs->nb_completions <= 0) + return; + if (rs->nb_completions == 1) { + len = strlen(rs->completions[0]); + for(i = rs->completion_index; i < len; i++) { + readline_insert_char(rs, rs->completions[0][i]); + } + /* extra space for next argument. XXX: make it more generic */ + if (len > 0 && rs->completions[0][len - 1] != '/') + readline_insert_char(rs, ' '); + } else { + qsort(rs->completions, rs->nb_completions, sizeof(char *), + completion_comp); + rs->printf_func(rs->opaque, "\n"); + max_width = 0; + max_prefix = 0; + for(i = 0; i < rs->nb_completions; i++) { + len = strlen(rs->completions[i]); + if (i==0) { + max_prefix = len; + } else { + if (len < max_prefix) + max_prefix = len; + for(j=0; j<max_prefix; j++) { + if (rs->completions[i][j] != rs->completions[0][j]) + max_prefix = j; + } + } + if (len > max_width) + max_width = len; + } + if (max_prefix > 0) + for(i = rs->completion_index; i < max_prefix; i++) { + readline_insert_char(rs, rs->completions[0][i]); + } + max_width += 2; + if (max_width < 10) + max_width = 10; + else if (max_width > 80) + max_width = 80; + nb_cols = 80 / max_width; + j = 0; + for(i = 0; i < rs->nb_completions; i++) { + rs->printf_func(rs->opaque, "%-*s", max_width, rs->completions[i]); + if (++j == nb_cols || i == (rs->nb_completions - 1)) { + rs->printf_func(rs->opaque, "\n"); + j = 0; + } + } + readline_show_prompt(rs); + } + for (i = 0; i < rs->nb_completions; i++) { + g_free(rs->completions[i]); + } +} + +static void readline_clear_screen(ReadLineState *rs) +{ + rs->printf_func(rs->opaque, "\033[2J\033[1;1H"); + readline_show_prompt(rs); +} + +/* return true if command handled */ +void readline_handle_byte(ReadLineState *rs, int ch) +{ + switch(rs->esc_state) { + case IS_NORM: + switch(ch) { + case 1: + readline_bol(rs); + break; + case 4: + readline_delete_char(rs); + break; + case 5: + readline_eol(rs); + break; + case 9: + readline_completion(rs); + break; + case 12: + readline_clear_screen(rs); + break; + case 10: + case 13: + rs->cmd_buf[rs->cmd_buf_size] = '\0'; + if (!rs->read_password) + readline_hist_add(rs, rs->cmd_buf); + rs->printf_func(rs->opaque, "\n"); + rs->cmd_buf_index = 0; + rs->cmd_buf_size = 0; + rs->last_cmd_buf_index = 0; + rs->last_cmd_buf_size = 0; + rs->readline_func(rs->opaque, rs->cmd_buf, rs->readline_opaque); + break; + case 23: + /* ^W */ + readline_backword(rs); + break; + case 27: + rs->esc_state = IS_ESC; + break; + case 127: + case 8: + readline_backspace(rs); + break; + case 155: + rs->esc_state = IS_CSI; + break; + default: + if (ch >= 32) { + readline_insert_char(rs, ch); + } + break; + } + break; + case IS_ESC: + if (ch == '[') { + rs->esc_state = IS_CSI; + rs->esc_param = 0; + } else if (ch == 'O') { + rs->esc_state = IS_SS3; + rs->esc_param = 0; + } else { + rs->esc_state = IS_NORM; + } + break; + case IS_CSI: + switch(ch) { + case 'A': + case 'F': + readline_up_char(rs); + break; + case 'B': + case 'E': + readline_down_char(rs); + break; + case 'D': + readline_backward_char(rs); + break; + case 'C': + readline_forward_char(rs); + break; + case '0' ... '9': + rs->esc_param = rs->esc_param * 10 + (ch - '0'); + goto the_end; + case '~': + switch(rs->esc_param) { + case 1: + readline_bol(rs); + break; + case 3: + readline_delete_char(rs); + break; + case 4: + readline_eol(rs); + break; + } + break; + default: + break; + } + rs->esc_state = IS_NORM; + the_end: + break; + case IS_SS3: + switch(ch) { + case 'F': + readline_eol(rs); + break; + case 'H': + readline_bol(rs); + break; + } + rs->esc_state = IS_NORM; + break; + } + readline_update(rs); +} + +void readline_start(ReadLineState *rs, const char *prompt, int read_password, + ReadLineFunc *readline_func, void *opaque) +{ + pstrcpy(rs->prompt, sizeof(rs->prompt), prompt); + rs->readline_func = readline_func; + rs->readline_opaque = opaque; + rs->read_password = read_password; + readline_restart(rs); +} + +void readline_restart(ReadLineState *rs) +{ + rs->cmd_buf_index = 0; + rs->cmd_buf_size = 0; +} + +const char *readline_get_history(ReadLineState *rs, unsigned int index) +{ + if (index >= READLINE_MAX_CMDS) + return NULL; + return rs->history[index]; +} + +ReadLineState *readline_init(ReadLinePrintfFunc *printf_func, + ReadLineFlushFunc *flush_func, + void *opaque, + ReadLineCompletionFunc *completion_finder) +{ + ReadLineState *rs = g_malloc0(sizeof(*rs)); + + rs->hist_entry = -1; + rs->opaque = opaque; + rs->printf_func = printf_func; + rs->flush_func = flush_func; + rs->completion_finder = completion_finder; + + return rs; +} diff --git a/src/util/rfifolock.c b/src/util/rfifolock.c new file mode 100644 index 0000000..afbf748 --- /dev/null +++ b/src/util/rfifolock.c @@ -0,0 +1,78 @@ +/* + * Recursive FIFO lock + * + * Copyright Red Hat, Inc. 2013 + * + * Authors: + * Stefan Hajnoczi <stefanha@redhat.com> + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include <assert.h> +#include "qemu/rfifolock.h" + +void rfifolock_init(RFifoLock *r, void (*cb)(void *), void *opaque) +{ + qemu_mutex_init(&r->lock); + r->head = 0; + r->tail = 0; + qemu_cond_init(&r->cond); + r->nesting = 0; + r->cb = cb; + r->cb_opaque = opaque; +} + +void rfifolock_destroy(RFifoLock *r) +{ + qemu_cond_destroy(&r->cond); + qemu_mutex_destroy(&r->lock); +} + +/* + * Theory of operation: + * + * In order to ensure FIFO ordering, implement a ticketlock. Threads acquiring + * the lock enqueue themselves by incrementing the tail index. When the lock + * is unlocked, the head is incremented and waiting threads are notified. + * + * Recursive locking does not take a ticket since the head is only incremented + * when the outermost recursive caller unlocks. + */ +void rfifolock_lock(RFifoLock *r) +{ + qemu_mutex_lock(&r->lock); + + /* Take a ticket */ + unsigned int ticket = r->tail++; + + if (r->nesting > 0 && qemu_thread_is_self(&r->owner_thread)) { + r->tail--; /* put ticket back, we're nesting */ + } else { + while (ticket != r->head) { + /* Invoke optional contention callback */ + if (r->cb) { + r->cb(r->cb_opaque); + } + qemu_cond_wait(&r->cond, &r->lock); + } + } + + qemu_thread_get_self(&r->owner_thread); + r->nesting++; + qemu_mutex_unlock(&r->lock); +} + +void rfifolock_unlock(RFifoLock *r) +{ + qemu_mutex_lock(&r->lock); + assert(r->nesting > 0); + assert(qemu_thread_is_self(&r->owner_thread)); + if (--r->nesting == 0) { + r->head++; + qemu_cond_broadcast(&r->cond); + } + qemu_mutex_unlock(&r->lock); +} diff --git a/src/util/throttle.c b/src/util/throttle.c new file mode 100644 index 0000000..1113671 --- /dev/null +++ b/src/util/throttle.c @@ -0,0 +1,445 @@ +/* + * QEMU throttling infrastructure + * + * Copyright (C) Nodalink, EURL. 2013-2014 + * Copyright (C) Igalia, S.L. 2015 + * + * Authors: + * Benoît Canet <benoit.canet@nodalink.com> + * Alberto Garcia <berto@igalia.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation; either version 2 or + * (at your option) version 3 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/throttle.h" +#include "qemu/timer.h" +#include "block/aio.h" + +/* This function make a bucket leak + * + * @bkt: the bucket to make leak + * @delta_ns: the time delta + */ +void throttle_leak_bucket(LeakyBucket *bkt, int64_t delta_ns) +{ + double leak; + + /* compute how much to leak */ + leak = (bkt->avg * (double) delta_ns) / NANOSECONDS_PER_SECOND; + + /* make the bucket leak */ + bkt->level = MAX(bkt->level - leak, 0); +} + +/* Calculate the time delta since last leak and make proportionals leaks + * + * @now: the current timestamp in ns + */ +static void throttle_do_leak(ThrottleState *ts, int64_t now) +{ + /* compute the time elapsed since the last leak */ + int64_t delta_ns = now - ts->previous_leak; + int i; + + ts->previous_leak = now; + + if (delta_ns <= 0) { + return; + } + + /* make each bucket leak */ + for (i = 0; i < BUCKETS_COUNT; i++) { + throttle_leak_bucket(&ts->cfg.buckets[i], delta_ns); + } +} + +/* do the real job of computing the time to wait + * + * @limit: the throttling limit + * @extra: the number of operation to delay + * @ret: the time to wait in ns + */ +static int64_t throttle_do_compute_wait(double limit, double extra) +{ + double wait = extra * NANOSECONDS_PER_SECOND; + wait /= limit; + return wait; +} + +/* This function compute the wait time in ns that a leaky bucket should trigger + * + * @bkt: the leaky bucket we operate on + * @ret: the resulting wait time in ns or 0 if the operation can go through + */ +int64_t throttle_compute_wait(LeakyBucket *bkt) +{ + double extra; /* the number of extra units blocking the io */ + + if (!bkt->avg) { + return 0; + } + + extra = bkt->level - bkt->max; + + if (extra <= 0) { + return 0; + } + + return throttle_do_compute_wait(bkt->avg, extra); +} + +/* This function compute the time that must be waited while this IO + * + * @is_write: true if the current IO is a write, false if it's a read + * @ret: time to wait + */ +static int64_t throttle_compute_wait_for(ThrottleState *ts, + bool is_write) +{ + BucketType to_check[2][4] = { {THROTTLE_BPS_TOTAL, + THROTTLE_OPS_TOTAL, + THROTTLE_BPS_READ, + THROTTLE_OPS_READ}, + {THROTTLE_BPS_TOTAL, + THROTTLE_OPS_TOTAL, + THROTTLE_BPS_WRITE, + THROTTLE_OPS_WRITE}, }; + int64_t wait, max_wait = 0; + int i; + + for (i = 0; i < 4; i++) { + BucketType index = to_check[is_write][i]; + wait = throttle_compute_wait(&ts->cfg.buckets[index]); + if (wait > max_wait) { + max_wait = wait; + } + } + + return max_wait; +} + +/* compute the timer for this type of operation + * + * @is_write: the type of operation + * @now: the current clock timestamp + * @next_timestamp: the resulting timer + * @ret: true if a timer must be set + */ +bool throttle_compute_timer(ThrottleState *ts, + bool is_write, + int64_t now, + int64_t *next_timestamp) +{ + int64_t wait; + + /* leak proportionally to the time elapsed */ + throttle_do_leak(ts, now); + + /* compute the wait time if any */ + wait = throttle_compute_wait_for(ts, is_write); + + /* if the code must wait compute when the next timer should fire */ + if (wait) { + *next_timestamp = now + wait; + return true; + } + + /* else no need to wait at all */ + *next_timestamp = now; + return false; +} + +/* Add timers to event loop */ +void throttle_timers_attach_aio_context(ThrottleTimers *tt, + AioContext *new_context) +{ + tt->timers[0] = aio_timer_new(new_context, tt->clock_type, SCALE_NS, + tt->read_timer_cb, tt->timer_opaque); + tt->timers[1] = aio_timer_new(new_context, tt->clock_type, SCALE_NS, + tt->write_timer_cb, tt->timer_opaque); +} + +/* To be called first on the ThrottleState */ +void throttle_init(ThrottleState *ts) +{ + memset(ts, 0, sizeof(ThrottleState)); +} + +/* To be called first on the ThrottleTimers */ +void throttle_timers_init(ThrottleTimers *tt, + AioContext *aio_context, + QEMUClockType clock_type, + QEMUTimerCB *read_timer_cb, + QEMUTimerCB *write_timer_cb, + void *timer_opaque) +{ + memset(tt, 0, sizeof(ThrottleTimers)); + + tt->clock_type = clock_type; + tt->read_timer_cb = read_timer_cb; + tt->write_timer_cb = write_timer_cb; + tt->timer_opaque = timer_opaque; + throttle_timers_attach_aio_context(tt, aio_context); +} + +/* destroy a timer */ +static void throttle_timer_destroy(QEMUTimer **timer) +{ + assert(*timer != NULL); + + timer_del(*timer); + timer_free(*timer); + *timer = NULL; +} + +/* Remove timers from event loop */ +void throttle_timers_detach_aio_context(ThrottleTimers *tt) +{ + int i; + + for (i = 0; i < 2; i++) { + throttle_timer_destroy(&tt->timers[i]); + } +} + +/* To be called last on the ThrottleTimers */ +void throttle_timers_destroy(ThrottleTimers *tt) +{ + throttle_timers_detach_aio_context(tt); +} + +/* is any throttling timer configured */ +bool throttle_timers_are_initialized(ThrottleTimers *tt) +{ + if (tt->timers[0]) { + return true; + } + + return false; +} + +/* Does any throttling must be done + * + * @cfg: the throttling configuration to inspect + * @ret: true if throttling must be done else false + */ +bool throttle_enabled(ThrottleConfig *cfg) +{ + int i; + + for (i = 0; i < BUCKETS_COUNT; i++) { + if (cfg->buckets[i].avg > 0) { + return true; + } + } + + return false; +} + +/* return true if any two throttling parameters conflicts + * + * @cfg: the throttling configuration to inspect + * @ret: true if any conflict detected else false + */ +bool throttle_conflicting(ThrottleConfig *cfg) +{ + bool bps_flag, ops_flag; + bool bps_max_flag, ops_max_flag; + + bps_flag = cfg->buckets[THROTTLE_BPS_TOTAL].avg && + (cfg->buckets[THROTTLE_BPS_READ].avg || + cfg->buckets[THROTTLE_BPS_WRITE].avg); + + ops_flag = cfg->buckets[THROTTLE_OPS_TOTAL].avg && + (cfg->buckets[THROTTLE_OPS_READ].avg || + cfg->buckets[THROTTLE_OPS_WRITE].avg); + + bps_max_flag = cfg->buckets[THROTTLE_BPS_TOTAL].max && + (cfg->buckets[THROTTLE_BPS_READ].max || + cfg->buckets[THROTTLE_BPS_WRITE].max); + + ops_max_flag = cfg->buckets[THROTTLE_OPS_TOTAL].max && + (cfg->buckets[THROTTLE_OPS_READ].max || + cfg->buckets[THROTTLE_OPS_WRITE].max); + + return bps_flag || ops_flag || bps_max_flag || ops_max_flag; +} + +/* check if a throttling configuration is valid + * @cfg: the throttling configuration to inspect + * @ret: true if valid else false + */ +bool throttle_is_valid(ThrottleConfig *cfg) +{ + bool invalid = false; + int i; + + for (i = 0; i < BUCKETS_COUNT; i++) { + if (cfg->buckets[i].avg < 0) { + invalid = true; + } + } + + for (i = 0; i < BUCKETS_COUNT; i++) { + if (cfg->buckets[i].max < 0) { + invalid = true; + } + } + + return !invalid; +} + +/* check if bps_max/iops_max is used without bps/iops + * @cfg: the throttling configuration to inspect + */ +bool throttle_max_is_missing_limit(ThrottleConfig *cfg) +{ + int i; + + for (i = 0; i < BUCKETS_COUNT; i++) { + if (cfg->buckets[i].max && !cfg->buckets[i].avg) { + return true; + } + } + return false; +} + +/* fix bucket parameters */ +static void throttle_fix_bucket(LeakyBucket *bkt) +{ + double min; + + /* zero bucket level */ + bkt->level = 0; + + /* The following is done to cope with the Linux CFQ block scheduler + * which regroup reads and writes by block of 100ms in the guest. + * When they are two process one making reads and one making writes cfq + * make a pattern looking like the following: + * WWWWWWWWWWWRRRRRRRRRRRRRRWWWWWWWWWWWWWwRRRRRRRRRRRRRRRRR + * Having a max burst value of 100ms of the average will help smooth the + * throttling + */ + min = bkt->avg / 10; + if (bkt->avg && !bkt->max) { + bkt->max = min; + } +} + +/* take care of canceling a timer */ +static void throttle_cancel_timer(QEMUTimer *timer) +{ + assert(timer != NULL); + + timer_del(timer); +} + +/* Used to configure the throttle + * + * @ts: the throttle state we are working on + * @tt: the throttle timers we use in this aio context + * @cfg: the config to set + */ +void throttle_config(ThrottleState *ts, + ThrottleTimers *tt, + ThrottleConfig *cfg) +{ + int i; + + ts->cfg = *cfg; + + for (i = 0; i < BUCKETS_COUNT; i++) { + throttle_fix_bucket(&ts->cfg.buckets[i]); + } + + ts->previous_leak = qemu_clock_get_ns(tt->clock_type); + + for (i = 0; i < 2; i++) { + throttle_cancel_timer(tt->timers[i]); + } +} + +/* used to get config + * + * @ts: the throttle state we are working on + * @cfg: the config to write + */ +void throttle_get_config(ThrottleState *ts, ThrottleConfig *cfg) +{ + *cfg = ts->cfg; +} + + +/* Schedule the read or write timer if needed + * + * NOTE: this function is not unit tested due to it's usage of timer_mod + * + * @tt: the timers structure + * @is_write: the type of operation (read/write) + * @ret: true if the timer has been scheduled else false + */ +bool throttle_schedule_timer(ThrottleState *ts, + ThrottleTimers *tt, + bool is_write) +{ + int64_t now = qemu_clock_get_ns(tt->clock_type); + int64_t next_timestamp; + bool must_wait; + + must_wait = throttle_compute_timer(ts, + is_write, + now, + &next_timestamp); + + /* request not throttled */ + if (!must_wait) { + return false; + } + + /* request throttled and timer pending -> do nothing */ + if (timer_pending(tt->timers[is_write])) { + return true; + } + + /* request throttled and timer not pending -> arm timer */ + timer_mod(tt->timers[is_write], next_timestamp); + return true; +} + +/* do the accounting for this operation + * + * @is_write: the type of operation (read/write) + * @size: the size of the operation + */ +void throttle_account(ThrottleState *ts, bool is_write, uint64_t size) +{ + double units = 1.0; + + /* if cfg.op_size is defined and smaller than size we compute unit count */ + if (ts->cfg.op_size && size > ts->cfg.op_size) { + units = (double) size / ts->cfg.op_size; + } + + ts->cfg.buckets[THROTTLE_BPS_TOTAL].level += size; + ts->cfg.buckets[THROTTLE_OPS_TOTAL].level += units; + + if (is_write) { + ts->cfg.buckets[THROTTLE_BPS_WRITE].level += size; + ts->cfg.buckets[THROTTLE_OPS_WRITE].level += units; + } else { + ts->cfg.buckets[THROTTLE_BPS_READ].level += size; + ts->cfg.buckets[THROTTLE_OPS_READ].level += units; + } +} + diff --git a/src/util/timed-average.c b/src/util/timed-average.c new file mode 100644 index 0000000..a2dfb48 --- /dev/null +++ b/src/util/timed-average.c @@ -0,0 +1,231 @@ +/* + * QEMU timed average computation + * + * Copyright (C) Nodalink, EURL. 2014 + * Copyright (C) Igalia, S.L. 2015 + * + * Authors: + * Benoît Canet <benoit.canet@nodalink.com> + * Alberto Garcia <berto@igalia.com> + * + * This program is free sofware: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Sofware Foundation, either version 2 of the License, or + * (at your option) version 3 or any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <string.h> + +#include "qemu/timed-average.h" + +/* This module computes an average of a set of values within a time + * window. + * + * Algorithm: + * + * - Create two windows with a certain expiration period, and + * offsetted by period / 2. + * - Each time you want to account a new value, do it in both windows. + * - The minimum / maximum / average values are always returned from + * the oldest window. + * + * Example: + * + * t=0 |t=0.5 |t=1 |t=1.5 |t=2 + * wnd0: [0,0.5)|wnd0: [0.5,1.5) | |wnd0: [1.5,2.5) | + * wnd1: [0,1) | |wnd1: [1,2) | | + * + * Values are returned from: + * + * wnd0---------|wnd1------------|wnd0---------|wnd1-------------| + */ + +/* Update the expiration of a time window + * + * @w: the window used + * @now: the current time in nanoseconds + * @period: the expiration period in nanoseconds + */ +static void update_expiration(TimedAverageWindow *w, int64_t now, + int64_t period) +{ + /* time elapsed since the last theoretical expiration */ + int64_t elapsed = (now - w->expiration) % period; + /* time remaininging until the next expiration */ + int64_t remaining = period - elapsed; + /* compute expiration */ + w->expiration = now + remaining; +} + +/* Reset a window + * + * @w: the window to reset + */ +static void window_reset(TimedAverageWindow *w) +{ + w->min = UINT64_MAX; + w->max = 0; + w->sum = 0; + w->count = 0; +} + +/* Get the current window (that is, the one with the earliest + * expiration time). + * + * @ta: the TimedAverage structure + * @ret: a pointer to the current window + */ +static TimedAverageWindow *current_window(TimedAverage *ta) +{ + return &ta->windows[ta->current]; +} + +/* Initialize a TimedAverage structure + * + * @ta: the TimedAverage structure + * @clock_type: the type of clock to use + * @period: the time window period in nanoseconds + */ +void timed_average_init(TimedAverage *ta, QEMUClockType clock_type, + uint64_t period) +{ + int64_t now = qemu_clock_get_ns(clock_type); + + /* Returned values are from the oldest window, so they belong to + * the interval [ta->period/2,ta->period). By adjusting the + * requested period by 4/3, we guarantee that they're in the + * interval [2/3 period,4/3 period), closer to the requested + * period on average */ + ta->period = (uint64_t) period * 4 / 3; + ta->clock_type = clock_type; + ta->current = 0; + + window_reset(&ta->windows[0]); + window_reset(&ta->windows[1]); + + /* Both windows are offsetted by half a period */ + ta->windows[0].expiration = now + ta->period / 2; + ta->windows[1].expiration = now + ta->period; +} + +/* Check if the time windows have expired, updating their counters and + * expiration time if that's the case. + * + * @ta: the TimedAverage structure + * @elapsed: if non-NULL, the elapsed time (in ns) within the current + * window will be stored here + */ +static void check_expirations(TimedAverage *ta, uint64_t *elapsed) +{ + int64_t now = qemu_clock_get_ns(ta->clock_type); + int i; + + assert(ta->period != 0); + + /* Check if the windows have expired */ + for (i = 0; i < 2; i++) { + TimedAverageWindow *w = &ta->windows[i]; + if (w->expiration <= now) { + window_reset(w); + update_expiration(w, now, ta->period); + } + } + + /* Make ta->current point to the oldest window */ + if (ta->windows[0].expiration < ta->windows[1].expiration) { + ta->current = 0; + } else { + ta->current = 1; + } + + /* Calculate the elapsed time within the current window */ + if (elapsed) { + int64_t remaining = ta->windows[ta->current].expiration - now; + *elapsed = ta->period - remaining; + } +} + +/* Account a value + * + * @ta: the TimedAverage structure + * @value: the value to account + */ +void timed_average_account(TimedAverage *ta, uint64_t value) +{ + int i; + check_expirations(ta, NULL); + + /* Do the accounting in both windows at the same time */ + for (i = 0; i < 2; i++) { + TimedAverageWindow *w = &ta->windows[i]; + + w->sum += value; + w->count++; + + if (value < w->min) { + w->min = value; + } + + if (value > w->max) { + w->max = value; + } + } +} + +/* Get the minimum value + * + * @ta: the TimedAverage structure + * @ret: the minimum value + */ +uint64_t timed_average_min(TimedAverage *ta) +{ + TimedAverageWindow *w; + check_expirations(ta, NULL); + w = current_window(ta); + return w->min < UINT64_MAX ? w->min : 0; +} + +/* Get the average value + * + * @ta: the TimedAverage structure + * @ret: the average value + */ +uint64_t timed_average_avg(TimedAverage *ta) +{ + TimedAverageWindow *w; + check_expirations(ta, NULL); + w = current_window(ta); + return w->count > 0 ? w->sum / w->count : 0; +} + +/* Get the maximum value + * + * @ta: the TimedAverage structure + * @ret: the maximum value + */ +uint64_t timed_average_max(TimedAverage *ta) +{ + check_expirations(ta, NULL); + return current_window(ta)->max; +} + +/* Get the sum of all accounted values + * @ta: the TimedAverage structure + * @elapsed: if non-NULL, the elapsed time (in ns) will be stored here + * @ret: the sum of all accounted values + */ +uint64_t timed_average_sum(TimedAverage *ta, uint64_t *elapsed) +{ + TimedAverageWindow *w; + check_expirations(ta, elapsed); + w = current_window(ta); + return w->sum; +} diff --git a/src/util/unicode.c b/src/util/unicode.c new file mode 100644 index 0000000..d1c8658 --- /dev/null +++ b/src/util/unicode.c @@ -0,0 +1,100 @@ +/* + * Dealing with Unicode + * + * Copyright (C) 2013 Red Hat, Inc. + * + * Authors: + * Markus Armbruster <armbru@redhat.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#include "qemu-common.h" + +/** + * mod_utf8_codepoint: + * @s: string encoded in modified UTF-8 + * @n: maximum number of bytes to read from @s, if less than 6 + * @end: set to end of sequence on return + * + * Convert the modified UTF-8 sequence at the start of @s. Modified + * UTF-8 is exactly like UTF-8, except U+0000 is encoded as + * "\xC0\x80". + * + * If @n is zero or @s points to a zero byte, the sequence is invalid, + * and @end is set to @s. + * + * If @s points to an impossible byte (0xFE or 0xFF) or a continuation + * byte, the sequence is invalid, and @end is set to @s + 1 + * + * Else, the first byte determines how many continuation bytes are + * expected. If there are fewer, the sequence is invalid, and @end is + * set to @s + 1 + actual number of continuation bytes. Else, the + * sequence is well-formed, and @end is set to @s + 1 + expected + * number of continuation bytes. + * + * A well-formed sequence is valid unless it encodes a codepoint + * outside the Unicode range U+0000..U+10FFFF, one of Unicode's 66 + * noncharacters, a surrogate codepoint, or is overlong. Except the + * overlong sequence "\xC0\x80" is valid. + * + * Conversion succeeds if and only if the sequence is valid. + * + * Returns: the Unicode codepoint on success, -1 on failure. + */ +int mod_utf8_codepoint(const char *s, size_t n, char **end) +{ + static int min_cp[5] = { 0x80, 0x800, 0x10000, 0x200000, 0x4000000 }; + const unsigned char *p; + unsigned byte, mask, len, i; + int cp; + + if (n == 0 || *s == 0) { + /* empty sequence */ + *end = (char *)s; + return -1; + } + + p = (const unsigned char *)s; + byte = *p++; + if (byte < 0x80) { + cp = byte; /* one byte sequence */ + } else if (byte >= 0xFE) { + cp = -1; /* impossible bytes 0xFE, 0xFF */ + } else if ((byte & 0x40) == 0) { + cp = -1; /* unexpected continuation byte */ + } else { + /* multi-byte sequence */ + len = 0; + for (mask = 0x80; byte & mask; mask >>= 1) { + len++; + } + assert(len > 1 && len < 7); + cp = byte & (mask - 1); + for (i = 1; i < len; i++) { + byte = i < n ? *p : 0; + if ((byte & 0xC0) != 0x80) { + cp = -1; /* continuation byte missing */ + goto out; + } + p++; + cp <<= 6; + cp |= byte & 0x3F; + } + if (cp > 0x10FFFF) { + cp = -1; /* beyond Unicode range */ + } else if ((cp >= 0xFDD0 && cp <= 0xFDEF) + || (cp & 0xFFFE) == 0xFFFE) { + cp = -1; /* noncharacter */ + } else if (cp >= 0xD800 && cp <= 0xDFFF) { + cp = -1; /* surrogate code point */ + } else if (cp < min_cp[len - 2] && !(cp == 0 && len == 2)) { + cp = -1; /* overlong, not \xC0\x80 */ + } + } + +out: + *end = (char *)p; + return cp; +} diff --git a/src/util/uri.c b/src/util/uri.c new file mode 100644 index 0000000..550b984 --- /dev/null +++ b/src/util/uri.c @@ -0,0 +1,2204 @@ +/** + * uri.c: set of generic URI related routines + * + * Reference: RFCs 3986, 2732 and 2373 + * + * Copyright (C) 1998-2003 Daniel Veillard. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * DANIEL VEILLARD BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Except as contained in this notice, the name of Daniel Veillard shall not + * be used in advertising or otherwise to promote the sale, use or other + * dealings in this Software without prior written authorization from him. + * + * daniel@veillard.com + * + ** + * + * Copyright (C) 2007, 2009-2010 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Authors: + * Richard W.M. Jones <rjones@redhat.com> + * + */ + +#include <glib.h> +#include <string.h> +#include <stdio.h> + +#include "qemu/uri.h" + +static void uri_clean(URI *uri); + +/* + * Old rule from 2396 used in legacy handling code + * alpha = lowalpha | upalpha + */ +#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x)) + + +/* + * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | + * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | + * "u" | "v" | "w" | "x" | "y" | "z" + */ + +#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z')) + +/* + * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | + * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | + * "U" | "V" | "W" | "X" | "Y" | "Z" + */ +#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z')) + +#ifdef IS_DIGIT +#undef IS_DIGIT +#endif +/* + * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" + */ +#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9')) + +/* + * alphanum = alpha | digit + */ + +#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x)) + +/* + * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" + */ + +#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \ + ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \ + ((x) == '(') || ((x) == ')')) + +/* + * unwise = "{" | "}" | "|" | "\" | "^" | "`" + */ + +#define IS_UNWISE(p) \ + (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \ + ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \ + ((*(p) == ']')) || ((*(p) == '`'))) +/* + * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," | + * "[" | "]" + */ + +#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \ + ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \ + ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \ + ((x) == ']')) + +/* + * unreserved = alphanum | mark + */ + +#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x)) + +/* + * Skip to next pointer char, handle escaped sequences + */ + +#define NEXT(p) ((*p == '%')? p += 3 : p++) + +/* + * Productions from the spec. + * + * authority = server | reg_name + * reg_name = 1*( unreserved | escaped | "$" | "," | + * ";" | ":" | "@" | "&" | "=" | "+" ) + * + * path = [ abs_path | opaque_part ] + */ + + +/************************************************************************ + * * + * RFC 3986 parser * + * * + ************************************************************************/ + +#define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9')) +#define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \ + ((*(p) >= 'A') && (*(p) <= 'Z'))) +#define ISA_HEXDIG(p) \ + (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \ + ((*(p) >= 'A') && (*(p) <= 'F'))) + +/* + * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + * / "*" / "+" / "," / ";" / "=" + */ +#define ISA_SUB_DELIM(p) \ + (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \ + ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \ + ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \ + ((*(p) == '=')) || ((*(p) == '\''))) + +/* + * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" + */ +#define ISA_GEN_DELIM(p) \ + (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \ + ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \ + ((*(p) == '@'))) + +/* + * reserved = gen-delims / sub-delims + */ +#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p))) + +/* + * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + */ +#define ISA_UNRESERVED(p) \ + ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \ + ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~'))) + +/* + * pct-encoded = "%" HEXDIG HEXDIG + */ +#define ISA_PCT_ENCODED(p) \ + ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2))) + +/* + * pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + */ +#define ISA_PCHAR(p) \ + (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \ + ((*(p) == ':')) || ((*(p) == '@'))) + +/** + * rfc3986_parse_scheme: + * @uri: pointer to an URI structure + * @str: pointer to the string to analyze + * + * Parse an URI scheme + * + * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_scheme(URI *uri, const char **str) { + const char *cur; + + if (str == NULL) + return(-1); + + cur = *str; + if (!ISA_ALPHA(cur)) + return(2); + cur++; + while (ISA_ALPHA(cur) || ISA_DIGIT(cur) || + (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++; + if (uri != NULL) { + g_free(uri->scheme); + uri->scheme = g_strndup(*str, cur - *str); + } + *str = cur; + return(0); +} + +/** + * rfc3986_parse_fragment: + * @uri: pointer to an URI structure + * @str: pointer to the string to analyze + * + * Parse the query part of an URI + * + * fragment = *( pchar / "/" / "?" ) + * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']' + * in the fragment identifier but this is used very broadly for + * xpointer scheme selection, so we are allowing it here to not break + * for example all the DocBook processing chains. + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_fragment(URI *uri, const char **str) +{ + const char *cur; + + if (str == NULL) + return (-1); + + cur = *str; + + while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') || + (*cur == '[') || (*cur == ']') || + ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) + NEXT(cur); + if (uri != NULL) { + g_free(uri->fragment); + if (uri->cleanup & 2) + uri->fragment = g_strndup(*str, cur - *str); + else + uri->fragment = uri_string_unescape(*str, cur - *str, NULL); + } + *str = cur; + return (0); +} + +/** + * rfc3986_parse_query: + * @uri: pointer to an URI structure + * @str: pointer to the string to analyze + * + * Parse the query part of an URI + * + * query = *uric + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_query(URI *uri, const char **str) +{ + const char *cur; + + if (str == NULL) + return (-1); + + cur = *str; + + while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') || + ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) + NEXT(cur); + if (uri != NULL) { + g_free(uri->query); + uri->query = g_strndup (*str, cur - *str); + } + *str = cur; + return (0); +} + +/** + * rfc3986_parse_port: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse a port part and fills in the appropriate fields + * of the @uri structure + * + * port = *DIGIT + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_port(URI *uri, const char **str) +{ + const char *cur = *str; + int port = 0; + + if (ISA_DIGIT(cur)) { + while (ISA_DIGIT(cur)) { + port = port * 10 + (*cur - '0'); + if (port > 65535) { + return 1; + } + cur++; + } + if (uri) { + uri->port = port; + } + *str = cur; + return 0; + } + return 1; +} + +/** + * rfc3986_parse_user_info: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an user informations part and fills in the appropriate fields + * of the @uri structure + * + * userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_user_info(URI *uri, const char **str) +{ + const char *cur; + + cur = *str; + while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || + ISA_SUB_DELIM(cur) || (*cur == ':')) + NEXT(cur); + if (*cur == '@') { + if (uri != NULL) { + g_free(uri->user); + if (uri->cleanup & 2) + uri->user = g_strndup(*str, cur - *str); + else + uri->user = uri_string_unescape(*str, cur - *str, NULL); + } + *str = cur; + return(0); + } + return(1); +} + +/** + * rfc3986_parse_dec_octet: + * @str: the string to analyze + * + * dec-octet = DIGIT ; 0-9 + * / %x31-39 DIGIT ; 10-99 + * / "1" 2DIGIT ; 100-199 + * / "2" %x30-34 DIGIT ; 200-249 + * / "25" %x30-35 ; 250-255 + * + * Skip a dec-octet. + * + * Returns 0 if found and skipped, 1 otherwise + */ +static int +rfc3986_parse_dec_octet(const char **str) { + const char *cur = *str; + + if (!(ISA_DIGIT(cur))) + return(1); + if (!ISA_DIGIT(cur+1)) + cur++; + else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2))) + cur += 2; + else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2))) + cur += 3; + else if ((*cur == '2') && (*(cur + 1) >= '0') && + (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2))) + cur += 3; + else if ((*cur == '2') && (*(cur + 1) == '5') && + (*(cur + 2) >= '0') && (*(cur + 1) <= '5')) + cur += 3; + else + return(1); + *str = cur; + return(0); +} +/** + * rfc3986_parse_host: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an host part and fills in the appropriate fields + * of the @uri structure + * + * host = IP-literal / IPv4address / reg-name + * IP-literal = "[" ( IPv6address / IPvFuture ) "]" + * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet + * reg-name = *( unreserved / pct-encoded / sub-delims ) + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_host(URI *uri, const char **str) +{ + const char *cur = *str; + const char *host; + + host = cur; + /* + * IPv6 and future addressing scheme are enclosed between brackets + */ + if (*cur == '[') { + cur++; + while ((*cur != ']') && (*cur != 0)) + cur++; + if (*cur != ']') + return(1); + cur++; + goto found; + } + /* + * try to parse an IPv4 + */ + if (ISA_DIGIT(cur)) { + if (rfc3986_parse_dec_octet(&cur) != 0) + goto not_ipv4; + if (*cur != '.') + goto not_ipv4; + cur++; + if (rfc3986_parse_dec_octet(&cur) != 0) + goto not_ipv4; + if (*cur != '.') + goto not_ipv4; + if (rfc3986_parse_dec_octet(&cur) != 0) + goto not_ipv4; + if (*cur != '.') + goto not_ipv4; + if (rfc3986_parse_dec_octet(&cur) != 0) + goto not_ipv4; + goto found; +not_ipv4: + cur = *str; + } + /* + * then this should be a hostname which can be empty + */ + while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur)) + NEXT(cur); +found: + if (uri != NULL) { + g_free(uri->authority); + uri->authority = NULL; + g_free(uri->server); + if (cur != host) { + if (uri->cleanup & 2) + uri->server = g_strndup(host, cur - host); + else + uri->server = uri_string_unescape(host, cur - host, NULL); + } else + uri->server = NULL; + } + *str = cur; + return(0); +} + +/** + * rfc3986_parse_authority: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an authority part and fills in the appropriate fields + * of the @uri structure + * + * authority = [ userinfo "@" ] host [ ":" port ] + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_authority(URI *uri, const char **str) +{ + const char *cur; + int ret; + + cur = *str; + /* + * try to parse an userinfo and check for the trailing @ + */ + ret = rfc3986_parse_user_info(uri, &cur); + if ((ret != 0) || (*cur != '@')) + cur = *str; + else + cur++; + ret = rfc3986_parse_host(uri, &cur); + if (ret != 0) return(ret); + if (*cur == ':') { + cur++; + ret = rfc3986_parse_port(uri, &cur); + if (ret != 0) return(ret); + } + *str = cur; + return(0); +} + +/** + * rfc3986_parse_segment: + * @str: the string to analyze + * @forbid: an optional forbidden character + * @empty: allow an empty segment + * + * Parse a segment and fills in the appropriate fields + * of the @uri structure + * + * segment = *pchar + * segment-nz = 1*pchar + * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) + * ; non-zero-length segment without any colon ":" + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_segment(const char **str, char forbid, int empty) +{ + const char *cur; + + cur = *str; + if (!ISA_PCHAR(cur)) { + if (empty) + return(0); + return(1); + } + while (ISA_PCHAR(cur) && (*cur != forbid)) + NEXT(cur); + *str = cur; + return (0); +} + +/** + * rfc3986_parse_path_ab_empty: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an path absolute or empty and fills in the appropriate fields + * of the @uri structure + * + * path-abempty = *( "/" segment ) + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_path_ab_empty(URI *uri, const char **str) +{ + const char *cur; + int ret; + + cur = *str; + + while (*cur == '/') { + cur++; + ret = rfc3986_parse_segment(&cur, 0, 1); + if (ret != 0) return(ret); + } + if (uri != NULL) { + g_free(uri->path); + if (*str != cur) { + if (uri->cleanup & 2) + uri->path = g_strndup(*str, cur - *str); + else + uri->path = uri_string_unescape(*str, cur - *str, NULL); + } else { + uri->path = NULL; + } + } + *str = cur; + return (0); +} + +/** + * rfc3986_parse_path_absolute: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an path absolute and fills in the appropriate fields + * of the @uri structure + * + * path-absolute = "/" [ segment-nz *( "/" segment ) ] + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_path_absolute(URI *uri, const char **str) +{ + const char *cur; + int ret; + + cur = *str; + + if (*cur != '/') + return(1); + cur++; + ret = rfc3986_parse_segment(&cur, 0, 0); + if (ret == 0) { + while (*cur == '/') { + cur++; + ret = rfc3986_parse_segment(&cur, 0, 1); + if (ret != 0) return(ret); + } + } + if (uri != NULL) { + g_free(uri->path); + if (cur != *str) { + if (uri->cleanup & 2) + uri->path = g_strndup(*str, cur - *str); + else + uri->path = uri_string_unescape(*str, cur - *str, NULL); + } else { + uri->path = NULL; + } + } + *str = cur; + return (0); +} + +/** + * rfc3986_parse_path_rootless: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an path without root and fills in the appropriate fields + * of the @uri structure + * + * path-rootless = segment-nz *( "/" segment ) + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_path_rootless(URI *uri, const char **str) +{ + const char *cur; + int ret; + + cur = *str; + + ret = rfc3986_parse_segment(&cur, 0, 0); + if (ret != 0) return(ret); + while (*cur == '/') { + cur++; + ret = rfc3986_parse_segment(&cur, 0, 1); + if (ret != 0) return(ret); + } + if (uri != NULL) { + g_free(uri->path); + if (cur != *str) { + if (uri->cleanup & 2) + uri->path = g_strndup(*str, cur - *str); + else + uri->path = uri_string_unescape(*str, cur - *str, NULL); + } else { + uri->path = NULL; + } + } + *str = cur; + return (0); +} + +/** + * rfc3986_parse_path_no_scheme: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an path which is not a scheme and fills in the appropriate fields + * of the @uri structure + * + * path-noscheme = segment-nz-nc *( "/" segment ) + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_path_no_scheme(URI *uri, const char **str) +{ + const char *cur; + int ret; + + cur = *str; + + ret = rfc3986_parse_segment(&cur, ':', 0); + if (ret != 0) return(ret); + while (*cur == '/') { + cur++; + ret = rfc3986_parse_segment(&cur, 0, 1); + if (ret != 0) return(ret); + } + if (uri != NULL) { + g_free(uri->path); + if (cur != *str) { + if (uri->cleanup & 2) + uri->path = g_strndup(*str, cur - *str); + else + uri->path = uri_string_unescape(*str, cur - *str, NULL); + } else { + uri->path = NULL; + } + } + *str = cur; + return (0); +} + +/** + * rfc3986_parse_hier_part: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an hierarchical part and fills in the appropriate fields + * of the @uri structure + * + * hier-part = "//" authority path-abempty + * / path-absolute + * / path-rootless + * / path-empty + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_hier_part(URI *uri, const char **str) +{ + const char *cur; + int ret; + + cur = *str; + + if ((*cur == '/') && (*(cur + 1) == '/')) { + cur += 2; + ret = rfc3986_parse_authority(uri, &cur); + if (ret != 0) return(ret); + ret = rfc3986_parse_path_ab_empty(uri, &cur); + if (ret != 0) return(ret); + *str = cur; + return(0); + } else if (*cur == '/') { + ret = rfc3986_parse_path_absolute(uri, &cur); + if (ret != 0) return(ret); + } else if (ISA_PCHAR(cur)) { + ret = rfc3986_parse_path_rootless(uri, &cur); + if (ret != 0) return(ret); + } else { + /* path-empty is effectively empty */ + if (uri != NULL) { + g_free(uri->path); + uri->path = NULL; + } + } + *str = cur; + return (0); +} + +/** + * rfc3986_parse_relative_ref: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an URI string and fills in the appropriate fields + * of the @uri structure + * + * relative-ref = relative-part [ "?" query ] [ "#" fragment ] + * relative-part = "//" authority path-abempty + * / path-absolute + * / path-noscheme + * / path-empty + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_relative_ref(URI *uri, const char *str) { + int ret; + + if ((*str == '/') && (*(str + 1) == '/')) { + str += 2; + ret = rfc3986_parse_authority(uri, &str); + if (ret != 0) return(ret); + ret = rfc3986_parse_path_ab_empty(uri, &str); + if (ret != 0) return(ret); + } else if (*str == '/') { + ret = rfc3986_parse_path_absolute(uri, &str); + if (ret != 0) return(ret); + } else if (ISA_PCHAR(str)) { + ret = rfc3986_parse_path_no_scheme(uri, &str); + if (ret != 0) return(ret); + } else { + /* path-empty is effectively empty */ + if (uri != NULL) { + g_free(uri->path); + uri->path = NULL; + } + } + + if (*str == '?') { + str++; + ret = rfc3986_parse_query(uri, &str); + if (ret != 0) return(ret); + } + if (*str == '#') { + str++; + ret = rfc3986_parse_fragment(uri, &str); + if (ret != 0) return(ret); + } + if (*str != 0) { + uri_clean(uri); + return(1); + } + return(0); +} + + +/** + * rfc3986_parse: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an URI string and fills in the appropriate fields + * of the @uri structure + * + * scheme ":" hier-part [ "?" query ] [ "#" fragment ] + * + * Returns 0 or the error code + */ +static int +rfc3986_parse(URI *uri, const char *str) { + int ret; + + ret = rfc3986_parse_scheme(uri, &str); + if (ret != 0) return(ret); + if (*str != ':') { + return(1); + } + str++; + ret = rfc3986_parse_hier_part(uri, &str); + if (ret != 0) return(ret); + if (*str == '?') { + str++; + ret = rfc3986_parse_query(uri, &str); + if (ret != 0) return(ret); + } + if (*str == '#') { + str++; + ret = rfc3986_parse_fragment(uri, &str); + if (ret != 0) return(ret); + } + if (*str != 0) { + uri_clean(uri); + return(1); + } + return(0); +} + +/** + * rfc3986_parse_uri_reference: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an URI reference string and fills in the appropriate fields + * of the @uri structure + * + * URI-reference = URI / relative-ref + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_uri_reference(URI *uri, const char *str) { + int ret; + + if (str == NULL) + return(-1); + uri_clean(uri); + + /* + * Try first to parse absolute refs, then fallback to relative if + * it fails. + */ + ret = rfc3986_parse(uri, str); + if (ret != 0) { + uri_clean(uri); + ret = rfc3986_parse_relative_ref(uri, str); + if (ret != 0) { + uri_clean(uri); + return(ret); + } + } + return(0); +} + +/** + * uri_parse: + * @str: the URI string to analyze + * + * Parse an URI based on RFC 3986 + * + * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] + * + * Returns a newly built URI or NULL in case of error + */ +URI * +uri_parse(const char *str) { + URI *uri; + int ret; + + if (str == NULL) + return(NULL); + uri = uri_new(); + ret = rfc3986_parse_uri_reference(uri, str); + if (ret) { + uri_free(uri); + return(NULL); + } + return(uri); +} + +/** + * uri_parse_into: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an URI reference string based on RFC 3986 and fills in the + * appropriate fields of the @uri structure + * + * URI-reference = URI / relative-ref + * + * Returns 0 or the error code + */ +int +uri_parse_into(URI *uri, const char *str) { + return(rfc3986_parse_uri_reference(uri, str)); +} + +/** + * uri_parse_raw: + * @str: the URI string to analyze + * @raw: if 1 unescaping of URI pieces are disabled + * + * Parse an URI but allows to keep intact the original fragments. + * + * URI-reference = URI / relative-ref + * + * Returns a newly built URI or NULL in case of error + */ +URI * +uri_parse_raw(const char *str, int raw) { + URI *uri; + int ret; + + if (str == NULL) + return(NULL); + uri = uri_new(); + if (raw) { + uri->cleanup |= 2; + } + ret = uri_parse_into(uri, str); + if (ret) { + uri_free(uri); + return(NULL); + } + return(uri); +} + +/************************************************************************ + * * + * Generic URI structure functions * + * * + ************************************************************************/ + +/** + * uri_new: + * + * Simply creates an empty URI + * + * Returns the new structure or NULL in case of error + */ +URI * +uri_new(void) { + URI *ret; + + ret = g_new0(URI, 1); + return(ret); +} + +/** + * realloc2n: + * + * Function to handle properly a reallocation when saving an URI + * Also imposes some limit on the length of an URI string output + */ +static char * +realloc2n(char *ret, int *max) { + char *temp; + int tmp; + + tmp = *max * 2; + temp = g_realloc(ret, (tmp + 1)); + *max = tmp; + return(temp); +} + +/** + * uri_to_string: + * @uri: pointer to an URI + * + * Save the URI as an escaped string + * + * Returns a new string (to be deallocated by caller) + */ +char * +uri_to_string(URI *uri) { + char *ret = NULL; + char *temp; + const char *p; + int len; + int max; + + if (uri == NULL) return(NULL); + + + max = 80; + ret = g_malloc(max + 1); + len = 0; + + if (uri->scheme != NULL) { + p = uri->scheme; + while (*p != 0) { + if (len >= max) { + temp = realloc2n(ret, &max); + ret = temp; + } + ret[len++] = *p++; + } + if (len >= max) { + temp = realloc2n(ret, &max); + ret = temp; + } + ret[len++] = ':'; + } + if (uri->opaque != NULL) { + p = uri->opaque; + while (*p != 0) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + ret = temp; + } + if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p))) + ret[len++] = *p++; + else { + int val = *(unsigned char *)p++; + int hi = val / 0x10, lo = val % 0x10; + ret[len++] = '%'; + ret[len++] = hi + (hi > 9? 'A'-10 : '0'); + ret[len++] = lo + (lo > 9? 'A'-10 : '0'); + } + } + } else { + if (uri->server != NULL) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + ret = temp; + } + ret[len++] = '/'; + ret[len++] = '/'; + if (uri->user != NULL) { + p = uri->user; + while (*p != 0) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + ret = temp; + } + if ((IS_UNRESERVED(*(p))) || + ((*(p) == ';')) || ((*(p) == ':')) || + ((*(p) == '&')) || ((*(p) == '=')) || + ((*(p) == '+')) || ((*(p) == '$')) || + ((*(p) == ','))) + ret[len++] = *p++; + else { + int val = *(unsigned char *)p++; + int hi = val / 0x10, lo = val % 0x10; + ret[len++] = '%'; + ret[len++] = hi + (hi > 9? 'A'-10 : '0'); + ret[len++] = lo + (lo > 9? 'A'-10 : '0'); + } + } + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + ret = temp; + } + ret[len++] = '@'; + } + p = uri->server; + while (*p != 0) { + if (len >= max) { + temp = realloc2n(ret, &max); + ret = temp; + } + ret[len++] = *p++; + } + if (uri->port > 0) { + if (len + 10 >= max) { + temp = realloc2n(ret, &max); + ret = temp; + } + len += snprintf(&ret[len], max - len, ":%d", uri->port); + } + } else if (uri->authority != NULL) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + ret = temp; + } + ret[len++] = '/'; + ret[len++] = '/'; + p = uri->authority; + while (*p != 0) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + ret = temp; + } + if ((IS_UNRESERVED(*(p))) || + ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || + ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || + ((*(p) == '=')) || ((*(p) == '+'))) + ret[len++] = *p++; + else { + int val = *(unsigned char *)p++; + int hi = val / 0x10, lo = val % 0x10; + ret[len++] = '%'; + ret[len++] = hi + (hi > 9? 'A'-10 : '0'); + ret[len++] = lo + (lo > 9? 'A'-10 : '0'); + } + } + } else if (uri->scheme != NULL) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + ret = temp; + } + ret[len++] = '/'; + ret[len++] = '/'; + } + if (uri->path != NULL) { + p = uri->path; + /* + * the colon in file:///d: should not be escaped or + * Windows accesses fail later. + */ + if ((uri->scheme != NULL) && + (p[0] == '/') && + (((p[1] >= 'a') && (p[1] <= 'z')) || + ((p[1] >= 'A') && (p[1] <= 'Z'))) && + (p[2] == ':') && + (!strcmp(uri->scheme, "file"))) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + ret = temp; + } + ret[len++] = *p++; + ret[len++] = *p++; + ret[len++] = *p++; + } + while (*p != 0) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + ret = temp; + } + if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) || + ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || + ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || + ((*(p) == ','))) + ret[len++] = *p++; + else { + int val = *(unsigned char *)p++; + int hi = val / 0x10, lo = val % 0x10; + ret[len++] = '%'; + ret[len++] = hi + (hi > 9? 'A'-10 : '0'); + ret[len++] = lo + (lo > 9? 'A'-10 : '0'); + } + } + } + if (uri->query != NULL) { + if (len + 1 >= max) { + temp = realloc2n(ret, &max); + ret = temp; + } + ret[len++] = '?'; + p = uri->query; + while (*p != 0) { + if (len + 1 >= max) { + temp = realloc2n(ret, &max); + ret = temp; + } + ret[len++] = *p++; + } + } + } + if (uri->fragment != NULL) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + ret = temp; + } + ret[len++] = '#'; + p = uri->fragment; + while (*p != 0) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + ret = temp; + } + if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p)))) + ret[len++] = *p++; + else { + int val = *(unsigned char *)p++; + int hi = val / 0x10, lo = val % 0x10; + ret[len++] = '%'; + ret[len++] = hi + (hi > 9? 'A'-10 : '0'); + ret[len++] = lo + (lo > 9? 'A'-10 : '0'); + } + } + } + if (len >= max) { + temp = realloc2n(ret, &max); + ret = temp; + } + ret[len] = 0; + return(ret); +} + +/** + * uri_clean: + * @uri: pointer to an URI + * + * Make sure the URI struct is free of content + */ +static void +uri_clean(URI *uri) { + if (uri == NULL) return; + + g_free(uri->scheme); + uri->scheme = NULL; + g_free(uri->server); + uri->server = NULL; + g_free(uri->user); + uri->user = NULL; + g_free(uri->path); + uri->path = NULL; + g_free(uri->fragment); + uri->fragment = NULL; + g_free(uri->opaque); + uri->opaque = NULL; + g_free(uri->authority); + uri->authority = NULL; + g_free(uri->query); + uri->query = NULL; +} + +/** + * uri_free: + * @uri: pointer to an URI + * + * Free up the URI struct + */ +void +uri_free(URI *uri) { + uri_clean(uri); + g_free(uri); +} + +/************************************************************************ + * * + * Helper functions * + * * + ************************************************************************/ + +/** + * normalize_uri_path: + * @path: pointer to the path string + * + * Applies the 5 normalization steps to a path string--that is, RFC 2396 + * Section 5.2, steps 6.c through 6.g. + * + * Normalization occurs directly on the string, no new allocation is done + * + * Returns 0 or an error code + */ +static int +normalize_uri_path(char *path) { + char *cur, *out; + + if (path == NULL) + return(-1); + + /* Skip all initial "/" chars. We want to get to the beginning of the + * first non-empty segment. + */ + cur = path; + while (cur[0] == '/') + ++cur; + if (cur[0] == '\0') + return(0); + + /* Keep everything we've seen so far. */ + out = cur; + + /* + * Analyze each segment in sequence for cases (c) and (d). + */ + while (cur[0] != '\0') { + /* + * c) All occurrences of "./", where "." is a complete path segment, + * are removed from the buffer string. + */ + if ((cur[0] == '.') && (cur[1] == '/')) { + cur += 2; + /* '//' normalization should be done at this point too */ + while (cur[0] == '/') + cur++; + continue; + } + + /* + * d) If the buffer string ends with "." as a complete path segment, + * that "." is removed. + */ + if ((cur[0] == '.') && (cur[1] == '\0')) + break; + + /* Otherwise keep the segment. */ + while (cur[0] != '/') { + if (cur[0] == '\0') + goto done_cd; + (out++)[0] = (cur++)[0]; + } + /* nomalize // */ + while ((cur[0] == '/') && (cur[1] == '/')) + cur++; + + (out++)[0] = (cur++)[0]; + } + done_cd: + out[0] = '\0'; + + /* Reset to the beginning of the first segment for the next sequence. */ + cur = path; + while (cur[0] == '/') + ++cur; + if (cur[0] == '\0') + return(0); + + /* + * Analyze each segment in sequence for cases (e) and (f). + * + * e) All occurrences of "<segment>/../", where <segment> is a + * complete path segment not equal to "..", are removed from the + * buffer string. Removal of these path segments is performed + * iteratively, removing the leftmost matching pattern on each + * iteration, until no matching pattern remains. + * + * f) If the buffer string ends with "<segment>/..", where <segment> + * is a complete path segment not equal to "..", that + * "<segment>/.." is removed. + * + * To satisfy the "iterative" clause in (e), we need to collapse the + * string every time we find something that needs to be removed. Thus, + * we don't need to keep two pointers into the string: we only need a + * "current position" pointer. + */ + while (1) { + char *segp, *tmp; + + /* At the beginning of each iteration of this loop, "cur" points to + * the first character of the segment we want to examine. + */ + + /* Find the end of the current segment. */ + segp = cur; + while ((segp[0] != '/') && (segp[0] != '\0')) + ++segp; + + /* If this is the last segment, we're done (we need at least two + * segments to meet the criteria for the (e) and (f) cases). + */ + if (segp[0] == '\0') + break; + + /* If the first segment is "..", or if the next segment _isn't_ "..", + * keep this segment and try the next one. + */ + ++segp; + if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3)) + || ((segp[0] != '.') || (segp[1] != '.') + || ((segp[2] != '/') && (segp[2] != '\0')))) { + cur = segp; + continue; + } + + /* If we get here, remove this segment and the next one and back up + * to the previous segment (if there is one), to implement the + * "iteratively" clause. It's pretty much impossible to back up + * while maintaining two pointers into the buffer, so just compact + * the whole buffer now. + */ + + /* If this is the end of the buffer, we're done. */ + if (segp[2] == '\0') { + cur[0] = '\0'; + break; + } + /* Valgrind complained, strcpy(cur, segp + 3); */ + /* string will overlap, do not use strcpy */ + tmp = cur; + segp += 3; + while ((*tmp++ = *segp++) != 0) + ; + + /* If there are no previous segments, then keep going from here. */ + segp = cur; + while ((segp > path) && ((--segp)[0] == '/')) + ; + if (segp == path) + continue; + + /* "segp" is pointing to the end of a previous segment; find it's + * start. We need to back up to the previous segment and start + * over with that to handle things like "foo/bar/../..". If we + * don't do this, then on the first pass we'll remove the "bar/..", + * but be pointing at the second ".." so we won't realize we can also + * remove the "foo/..". + */ + cur = segp; + while ((cur > path) && (cur[-1] != '/')) + --cur; + } + out[0] = '\0'; + + /* + * g) If the resulting buffer string still begins with one or more + * complete path segments of "..", then the reference is + * considered to be in error. Implementations may handle this + * error by retaining these components in the resolved path (i.e., + * treating them as part of the final URI), by removing them from + * the resolved path (i.e., discarding relative levels above the + * root), or by avoiding traversal of the reference. + * + * We discard them from the final path. + */ + if (path[0] == '/') { + cur = path; + while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.') + && ((cur[3] == '/') || (cur[3] == '\0'))) + cur += 3; + + if (cur != path) { + out = path; + while (cur[0] != '\0') + (out++)[0] = (cur++)[0]; + out[0] = 0; + } + } + + return(0); +} + +static int is_hex(char c) { + if (((c >= '0') && (c <= '9')) || + ((c >= 'a') && (c <= 'f')) || + ((c >= 'A') && (c <= 'F'))) + return(1); + return(0); +} + + +/** + * uri_string_unescape: + * @str: the string to unescape + * @len: the length in bytes to unescape (or <= 0 to indicate full string) + * @target: optional destination buffer + * + * Unescaping routine, but does not check that the string is an URI. The + * output is a direct unsigned char translation of %XX values (no encoding) + * Note that the length of the result can only be smaller or same size as + * the input string. + * + * Returns a copy of the string, but unescaped, will return NULL only in case + * of error + */ +char * +uri_string_unescape(const char *str, int len, char *target) { + char *ret, *out; + const char *in; + + if (str == NULL) + return(NULL); + if (len <= 0) len = strlen(str); + if (len < 0) return(NULL); + + if (target == NULL) { + ret = g_malloc(len + 1); + } else + ret = target; + in = str; + out = ret; + while(len > 0) { + if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) { + in++; + if ((*in >= '0') && (*in <= '9')) + *out = (*in - '0'); + else if ((*in >= 'a') && (*in <= 'f')) + *out = (*in - 'a') + 10; + else if ((*in >= 'A') && (*in <= 'F')) + *out = (*in - 'A') + 10; + in++; + if ((*in >= '0') && (*in <= '9')) + *out = *out * 16 + (*in - '0'); + else if ((*in >= 'a') && (*in <= 'f')) + *out = *out * 16 + (*in - 'a') + 10; + else if ((*in >= 'A') && (*in <= 'F')) + *out = *out * 16 + (*in - 'A') + 10; + in++; + len -= 3; + out++; + } else { + *out++ = *in++; + len--; + } + } + *out = 0; + return(ret); +} + +/** + * uri_string_escape: + * @str: string to escape + * @list: exception list string of chars not to escape + * + * This routine escapes a string to hex, ignoring reserved characters (a-z) + * and the characters in the exception list. + * + * Returns a new escaped string or NULL in case of error. + */ +char * +uri_string_escape(const char *str, const char *list) { + char *ret, ch; + char *temp; + const char *in; + int len, out; + + if (str == NULL) + return(NULL); + if (str[0] == 0) + return(g_strdup(str)); + len = strlen(str); + if (!(len > 0)) return(NULL); + + len += 20; + ret = g_malloc(len); + in = str; + out = 0; + while(*in != 0) { + if (len - out <= 3) { + temp = realloc2n(ret, &len); + ret = temp; + } + + ch = *in; + + if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!strchr(list, ch))) { + unsigned char val; + ret[out++] = '%'; + val = ch >> 4; + if (val <= 9) + ret[out++] = '0' + val; + else + ret[out++] = 'A' + val - 0xA; + val = ch & 0xF; + if (val <= 9) + ret[out++] = '0' + val; + else + ret[out++] = 'A' + val - 0xA; + in++; + } else { + ret[out++] = *in++; + } + + } + ret[out] = 0; + return(ret); +} + +/************************************************************************ + * * + * Public functions * + * * + ************************************************************************/ + +/** + * uri_resolve: + * @URI: the URI instance found in the document + * @base: the base value + * + * Computes he final URI of the reference done by checking that + * the given URI is valid, and building the final URI using the + * base URI. This is processed according to section 5.2 of the + * RFC 2396 + * + * 5.2. Resolving Relative References to Absolute Form + * + * Returns a new URI string (to be freed by the caller) or NULL in case + * of error. + */ +char * +uri_resolve(const char *uri, const char *base) { + char *val = NULL; + int ret, len, indx, cur, out; + URI *ref = NULL; + URI *bas = NULL; + URI *res = NULL; + + /* + * 1) The URI reference is parsed into the potential four components and + * fragment identifier, as described in Section 4.3. + * + * NOTE that a completely empty URI is treated by modern browsers + * as a reference to "." rather than as a synonym for the current + * URI. Should we do that here? + */ + if (uri == NULL) + ret = -1; + else { + if (*uri) { + ref = uri_new(); + ret = uri_parse_into(ref, uri); + } + else + ret = 0; + } + if (ret != 0) + goto done; + if ((ref != NULL) && (ref->scheme != NULL)) { + /* + * The URI is absolute don't modify. + */ + val = g_strdup(uri); + goto done; + } + if (base == NULL) + ret = -1; + else { + bas = uri_new(); + ret = uri_parse_into(bas, base); + } + if (ret != 0) { + if (ref) + val = uri_to_string(ref); + goto done; + } + if (ref == NULL) { + /* + * the base fragment must be ignored + */ + g_free(bas->fragment); + bas->fragment = NULL; + val = uri_to_string(bas); + goto done; + } + + /* + * 2) If the path component is empty and the scheme, authority, and + * query components are undefined, then it is a reference to the + * current document and we are done. Otherwise, the reference URI's + * query and fragment components are defined as found (or not found) + * within the URI reference and not inherited from the base URI. + * + * NOTE that in modern browsers, the parsing differs from the above + * in the following aspect: the query component is allowed to be + * defined while still treating this as a reference to the current + * document. + */ + res = uri_new(); + if ((ref->scheme == NULL) && (ref->path == NULL) && + ((ref->authority == NULL) && (ref->server == NULL))) { + res->scheme = g_strdup(bas->scheme); + if (bas->authority != NULL) + res->authority = g_strdup(bas->authority); + else if (bas->server != NULL) { + res->server = g_strdup(bas->server); + res->user = g_strdup(bas->user); + res->port = bas->port; + } + res->path = g_strdup(bas->path); + if (ref->query != NULL) { + res->query = g_strdup (ref->query); + } else { + res->query = g_strdup(bas->query); + } + res->fragment = g_strdup(ref->fragment); + goto step_7; + } + + /* + * 3) If the scheme component is defined, indicating that the reference + * starts with a scheme name, then the reference is interpreted as an + * absolute URI and we are done. Otherwise, the reference URI's + * scheme is inherited from the base URI's scheme component. + */ + if (ref->scheme != NULL) { + val = uri_to_string(ref); + goto done; + } + res->scheme = g_strdup(bas->scheme); + + res->query = g_strdup(ref->query); + res->fragment = g_strdup(ref->fragment); + + /* + * 4) If the authority component is defined, then the reference is a + * network-path and we skip to step 7. Otherwise, the reference + * URI's authority is inherited from the base URI's authority + * component, which will also be undefined if the URI scheme does not + * use an authority component. + */ + if ((ref->authority != NULL) || (ref->server != NULL)) { + if (ref->authority != NULL) + res->authority = g_strdup(ref->authority); + else { + res->server = g_strdup(ref->server); + res->user = g_strdup(ref->user); + res->port = ref->port; + } + res->path = g_strdup(ref->path); + goto step_7; + } + if (bas->authority != NULL) + res->authority = g_strdup(bas->authority); + else if (bas->server != NULL) { + res->server = g_strdup(bas->server); + res->user = g_strdup(bas->user); + res->port = bas->port; + } + + /* + * 5) If the path component begins with a slash character ("/"), then + * the reference is an absolute-path and we skip to step 7. + */ + if ((ref->path != NULL) && (ref->path[0] == '/')) { + res->path = g_strdup(ref->path); + goto step_7; + } + + + /* + * 6) If this step is reached, then we are resolving a relative-path + * reference. The relative path needs to be merged with the base + * URI's path. Although there are many ways to do this, we will + * describe a simple method using a separate string buffer. + * + * Allocate a buffer large enough for the result string. + */ + len = 2; /* extra / and 0 */ + if (ref->path != NULL) + len += strlen(ref->path); + if (bas->path != NULL) + len += strlen(bas->path); + res->path = g_malloc(len); + res->path[0] = 0; + + /* + * a) All but the last segment of the base URI's path component is + * copied to the buffer. In other words, any characters after the + * last (right-most) slash character, if any, are excluded. + */ + cur = 0; + out = 0; + if (bas->path != NULL) { + while (bas->path[cur] != 0) { + while ((bas->path[cur] != 0) && (bas->path[cur] != '/')) + cur++; + if (bas->path[cur] == 0) + break; + + cur++; + while (out < cur) { + res->path[out] = bas->path[out]; + out++; + } + } + } + res->path[out] = 0; + + /* + * b) The reference's path component is appended to the buffer + * string. + */ + if (ref->path != NULL && ref->path[0] != 0) { + indx = 0; + /* + * Ensure the path includes a '/' + */ + if ((out == 0) && (bas->server != NULL)) + res->path[out++] = '/'; + while (ref->path[indx] != 0) { + res->path[out++] = ref->path[indx++]; + } + } + res->path[out] = 0; + + /* + * Steps c) to h) are really path normalization steps + */ + normalize_uri_path(res->path); + +step_7: + + /* + * 7) The resulting URI components, including any inherited from the + * base URI, are recombined to give the absolute form of the URI + * reference. + */ + val = uri_to_string(res); + +done: + if (ref != NULL) + uri_free(ref); + if (bas != NULL) + uri_free(bas); + if (res != NULL) + uri_free(res); + return(val); +} + +/** + * uri_resolve_relative: + * @URI: the URI reference under consideration + * @base: the base value + * + * Expresses the URI of the reference in terms relative to the + * base. Some examples of this operation include: + * base = "http://site1.com/docs/book1.html" + * URI input URI returned + * docs/pic1.gif pic1.gif + * docs/img/pic1.gif img/pic1.gif + * img/pic1.gif ../img/pic1.gif + * http://site1.com/docs/pic1.gif pic1.gif + * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif + * + * base = "docs/book1.html" + * URI input URI returned + * docs/pic1.gif pic1.gif + * docs/img/pic1.gif img/pic1.gif + * img/pic1.gif ../img/pic1.gif + * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif + * + * + * Note: if the URI reference is really weird or complicated, it may be + * worthwhile to first convert it into a "nice" one by calling + * uri_resolve (using 'base') before calling this routine, + * since this routine (for reasonable efficiency) assumes URI has + * already been through some validation. + * + * Returns a new URI string (to be freed by the caller) or NULL in case + * error. + */ +char * +uri_resolve_relative (const char *uri, const char * base) +{ + char *val = NULL; + int ret; + int ix; + int pos = 0; + int nbslash = 0; + int len; + URI *ref = NULL; + URI *bas = NULL; + char *bptr, *uptr, *vptr; + int remove_path = 0; + + if ((uri == NULL) || (*uri == 0)) + return NULL; + + /* + * First parse URI into a standard form + */ + ref = uri_new (); + /* If URI not already in "relative" form */ + if (uri[0] != '.') { + ret = uri_parse_into (ref, uri); + if (ret != 0) + goto done; /* Error in URI, return NULL */ + } else + ref->path = g_strdup(uri); + + /* + * Next parse base into the same standard form + */ + if ((base == NULL) || (*base == 0)) { + val = g_strdup (uri); + goto done; + } + bas = uri_new (); + if (base[0] != '.') { + ret = uri_parse_into (bas, base); + if (ret != 0) + goto done; /* Error in base, return NULL */ + } else + bas->path = g_strdup(base); + + /* + * If the scheme / server on the URI differs from the base, + * just return the URI + */ + if ((ref->scheme != NULL) && + ((bas->scheme == NULL) || + (strcmp (bas->scheme, ref->scheme)) || + (strcmp (bas->server, ref->server)))) { + val = g_strdup (uri); + goto done; + } + if (bas->path == ref->path || + (bas->path && ref->path && !strcmp(bas->path, ref->path))) { + val = g_strdup(""); + goto done; + } + if (bas->path == NULL) { + val = g_strdup(ref->path); + goto done; + } + if (ref->path == NULL) { + ref->path = (char *) "/"; + remove_path = 1; + } + + /* + * At this point (at last!) we can compare the two paths + * + * First we take care of the special case where either of the + * two path components may be missing (bug 316224) + */ + if (bas->path == NULL) { + if (ref->path != NULL) { + uptr = ref->path; + if (*uptr == '/') + uptr++; + /* exception characters from uri_to_string */ + val = uri_string_escape(uptr, "/;&=+$,"); + } + goto done; + } + bptr = bas->path; + if (ref->path == NULL) { + for (ix = 0; bptr[ix] != 0; ix++) { + if (bptr[ix] == '/') + nbslash++; + } + uptr = NULL; + len = 1; /* this is for a string terminator only */ + } else { + /* + * Next we compare the two strings and find where they first differ + */ + if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/')) + pos += 2; + if ((*bptr == '.') && (bptr[1] == '/')) + bptr += 2; + else if ((*bptr == '/') && (ref->path[pos] != '/')) + bptr++; + while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0)) + pos++; + + if (bptr[pos] == ref->path[pos]) { + val = g_strdup(""); + goto done; /* (I can't imagine why anyone would do this) */ + } + + /* + * In URI, "back up" to the last '/' encountered. This will be the + * beginning of the "unique" suffix of URI + */ + ix = pos; + if ((ref->path[ix] == '/') && (ix > 0)) + ix--; + else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/')) + ix -= 2; + for (; ix > 0; ix--) { + if (ref->path[ix] == '/') + break; + } + if (ix == 0) { + uptr = ref->path; + } else { + ix++; + uptr = &ref->path[ix]; + } + + /* + * In base, count the number of '/' from the differing point + */ + if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */ + for (; bptr[ix] != 0; ix++) { + if (bptr[ix] == '/') + nbslash++; + } + } + len = strlen (uptr) + 1; + } + + if (nbslash == 0) { + if (uptr != NULL) + /* exception characters from uri_to_string */ + val = uri_string_escape(uptr, "/;&=+$,"); + goto done; + } + + /* + * Allocate just enough space for the returned string - + * length of the remainder of the URI, plus enough space + * for the "../" groups, plus one for the terminator + */ + val = g_malloc (len + 3 * nbslash); + vptr = val; + /* + * Put in as many "../" as needed + */ + for (; nbslash>0; nbslash--) { + *vptr++ = '.'; + *vptr++ = '.'; + *vptr++ = '/'; + } + /* + * Finish up with the end of the URI + */ + if (uptr != NULL) { + if ((vptr > val) && (len > 0) && + (uptr[0] == '/') && (vptr[-1] == '/')) { + memcpy (vptr, uptr + 1, len - 1); + vptr[len - 2] = 0; + } else { + memcpy (vptr, uptr, len); + vptr[len - 1] = 0; + } + } else { + vptr[len - 1] = 0; + } + + /* escape the freshly-built path */ + vptr = val; + /* exception characters from uri_to_string */ + val = uri_string_escape(vptr, "/;&=+$,"); + g_free(vptr); + +done: + /* + * Free the working variables + */ + if (remove_path != 0) + ref->path = NULL; + if (ref != NULL) + uri_free (ref); + if (bas != NULL) + uri_free (bas); + + return val; +} + +/* + * Utility functions to help parse and assemble query strings. + */ + +struct QueryParams * +query_params_new (int init_alloc) +{ + struct QueryParams *ps; + + if (init_alloc <= 0) init_alloc = 1; + + ps = g_new(QueryParams, 1); + ps->n = 0; + ps->alloc = init_alloc; + ps->p = g_new(QueryParam, ps->alloc); + + return ps; +} + +/* Ensure there is space to store at least one more parameter + * at the end of the set. + */ +static int +query_params_append (struct QueryParams *ps, + const char *name, const char *value) +{ + if (ps->n >= ps->alloc) { + ps->p = g_renew(QueryParam, ps->p, ps->alloc * 2); + ps->alloc *= 2; + } + + ps->p[ps->n].name = g_strdup(name); + ps->p[ps->n].value = g_strdup(value); + ps->p[ps->n].ignore = 0; + ps->n++; + + return 0; +} + +void +query_params_free (struct QueryParams *ps) +{ + int i; + + for (i = 0; i < ps->n; ++i) { + g_free (ps->p[i].name); + g_free (ps->p[i].value); + } + g_free (ps->p); + g_free (ps); +} + +struct QueryParams * +query_params_parse (const char *query) +{ + struct QueryParams *ps; + const char *end, *eq; + + ps = query_params_new (0); + if (!query || query[0] == '\0') return ps; + + while (*query) { + char *name = NULL, *value = NULL; + + /* Find the next separator, or end of the string. */ + end = strchr (query, '&'); + if (!end) + end = strchr (query, ';'); + if (!end) + end = query + strlen (query); + + /* Find the first '=' character between here and end. */ + eq = strchr (query, '='); + if (eq && eq >= end) eq = NULL; + + /* Empty section (eg. "&&"). */ + if (end == query) + goto next; + + /* If there is no '=' character, then we have just "name" + * and consistent with CGI.pm we assume value is "". + */ + else if (!eq) { + name = uri_string_unescape (query, end - query, NULL); + value = NULL; + } + /* Or if we have "name=" here (works around annoying + * problem when calling uri_string_unescape with len = 0). + */ + else if (eq+1 == end) { + name = uri_string_unescape (query, eq - query, NULL); + value = g_new0(char, 1); + } + /* If the '=' character is at the beginning then we have + * "=value" and consistent with CGI.pm we _ignore_ this. + */ + else if (query == eq) + goto next; + + /* Otherwise it's "name=value". */ + else { + name = uri_string_unescape (query, eq - query, NULL); + value = uri_string_unescape (eq+1, end - (eq+1), NULL); + } + + /* Append to the parameter set. */ + query_params_append (ps, name, value); + g_free(name); + g_free(value); + + next: + query = end; + if (*query) query ++; /* skip '&' separator */ + } + + return ps; +} |