diff options
Diffstat (limited to 'src/hw/virtio/vhost-user.c')
-rw-r--r-- | src/hw/virtio/vhost-user.c | 657 |
1 files changed, 657 insertions, 0 deletions
diff --git a/src/hw/virtio/vhost-user.c b/src/hw/virtio/vhost-user.c new file mode 100644 index 0000000..577c95e --- /dev/null +++ b/src/hw/virtio/vhost-user.c @@ -0,0 +1,657 @@ +/* + * vhost-user + * + * Copyright (c) 2013 Virtual Open Systems Sarl. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "hw/virtio/vhost.h" +#include "hw/virtio/vhost-backend.h" +#include "hw/virtio/virtio-net.h" +#include "sysemu/char.h" +#include "sysemu/kvm.h" +#include "qemu/error-report.h" +#include "qemu/sockets.h" +#include "exec/ram_addr.h" +#include "migration/migration.h" + +#include <fcntl.h> +#include <unistd.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/un.h> +#include <linux/vhost.h> + +#define VHOST_MEMORY_MAX_NREGIONS 8 +#define VHOST_USER_F_PROTOCOL_FEATURES 30 + +enum VhostUserProtocolFeature { + VHOST_USER_PROTOCOL_F_MQ = 0, + VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1, + VHOST_USER_PROTOCOL_F_RARP = 2, + + VHOST_USER_PROTOCOL_F_MAX +}; + +#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1) + +typedef enum VhostUserRequest { + VHOST_USER_NONE = 0, + VHOST_USER_GET_FEATURES = 1, + VHOST_USER_SET_FEATURES = 2, + VHOST_USER_SET_OWNER = 3, + VHOST_USER_RESET_OWNER = 4, + VHOST_USER_SET_MEM_TABLE = 5, + VHOST_USER_SET_LOG_BASE = 6, + VHOST_USER_SET_LOG_FD = 7, + VHOST_USER_SET_VRING_NUM = 8, + VHOST_USER_SET_VRING_ADDR = 9, + VHOST_USER_SET_VRING_BASE = 10, + VHOST_USER_GET_VRING_BASE = 11, + VHOST_USER_SET_VRING_KICK = 12, + VHOST_USER_SET_VRING_CALL = 13, + VHOST_USER_SET_VRING_ERR = 14, + VHOST_USER_GET_PROTOCOL_FEATURES = 15, + VHOST_USER_SET_PROTOCOL_FEATURES = 16, + VHOST_USER_GET_QUEUE_NUM = 17, + VHOST_USER_SET_VRING_ENABLE = 18, + VHOST_USER_SEND_RARP = 19, + VHOST_USER_MAX +} VhostUserRequest; + +typedef struct VhostUserMemoryRegion { + uint64_t guest_phys_addr; + uint64_t memory_size; + uint64_t userspace_addr; + uint64_t mmap_offset; +} VhostUserMemoryRegion; + +typedef struct VhostUserMemory { + uint32_t nregions; + uint32_t padding; + VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; +} VhostUserMemory; + +typedef struct VhostUserLog { + uint64_t mmap_size; + uint64_t mmap_offset; +} VhostUserLog; + +typedef struct VhostUserMsg { + VhostUserRequest request; + +#define VHOST_USER_VERSION_MASK (0x3) +#define VHOST_USER_REPLY_MASK (0x1<<2) + uint32_t flags; + uint32_t size; /* the following payload size */ + union { +#define VHOST_USER_VRING_IDX_MASK (0xff) +#define VHOST_USER_VRING_NOFD_MASK (0x1<<8) + uint64_t u64; + struct vhost_vring_state state; + struct vhost_vring_addr addr; + VhostUserMemory memory; + VhostUserLog log; + } payload; +} QEMU_PACKED VhostUserMsg; + +static VhostUserMsg m __attribute__ ((unused)); +#define VHOST_USER_HDR_SIZE (sizeof(m.request) \ + + sizeof(m.flags) \ + + sizeof(m.size)) + +#define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE) + +/* The version of the protocol we support */ +#define VHOST_USER_VERSION (0x1) + +static bool ioeventfd_enabled(void) +{ + return kvm_enabled() && kvm_eventfds_enabled(); +} + +static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg) +{ + CharDriverState *chr = dev->opaque; + uint8_t *p = (uint8_t *) msg; + int r, size = VHOST_USER_HDR_SIZE; + + r = qemu_chr_fe_read_all(chr, p, size); + if (r != size) { + error_report("Failed to read msg header. Read %d instead of %d." + " Original request %d.", r, size, msg->request); + goto fail; + } + + /* validate received flags */ + if (msg->flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) { + error_report("Failed to read msg header." + " Flags 0x%x instead of 0x%x.", msg->flags, + VHOST_USER_REPLY_MASK | VHOST_USER_VERSION); + goto fail; + } + + /* validate message size is sane */ + if (msg->size > VHOST_USER_PAYLOAD_SIZE) { + error_report("Failed to read msg header." + " Size %d exceeds the maximum %zu.", msg->size, + VHOST_USER_PAYLOAD_SIZE); + goto fail; + } + + if (msg->size) { + p += VHOST_USER_HDR_SIZE; + size = msg->size; + r = qemu_chr_fe_read_all(chr, p, size); + if (r != size) { + error_report("Failed to read msg payload." + " Read %d instead of %d.", r, msg->size); + goto fail; + } + } + + return 0; + +fail: + return -1; +} + +static bool vhost_user_one_time_request(VhostUserRequest request) +{ + switch (request) { + case VHOST_USER_SET_OWNER: + case VHOST_USER_RESET_OWNER: + case VHOST_USER_SET_MEM_TABLE: + case VHOST_USER_GET_QUEUE_NUM: + return true; + default: + return false; + } +} + +/* most non-init callers ignore the error */ +static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, + int *fds, int fd_num) +{ + CharDriverState *chr = dev->opaque; + int size = VHOST_USER_HDR_SIZE + msg->size; + + /* + * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE, + * we just need send it once in the first time. For later such + * request, we just ignore it. + */ + if (vhost_user_one_time_request(msg->request) && dev->vq_index != 0) { + return 0; + } + + if (fd_num) { + qemu_chr_fe_set_msgfds(chr, fds, fd_num); + } + + return qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size) == size ? + 0 : -1; +} + +static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base, + struct vhost_log *log) +{ + int fds[VHOST_MEMORY_MAX_NREGIONS]; + size_t fd_num = 0; + bool shmfd = virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_LOG_SHMFD); + VhostUserMsg msg = { + .request = VHOST_USER_SET_LOG_BASE, + .flags = VHOST_USER_VERSION, + .payload.log.mmap_size = log->size * sizeof(*(log->log)), + .payload.log.mmap_offset = 0, + .size = sizeof(msg.payload.log), + }; + + if (shmfd && log->fd != -1) { + fds[fd_num++] = log->fd; + } + + vhost_user_write(dev, &msg, fds, fd_num); + + if (shmfd) { + msg.size = 0; + if (vhost_user_read(dev, &msg) < 0) { + return 0; + } + + if (msg.request != VHOST_USER_SET_LOG_BASE) { + error_report("Received unexpected msg type. " + "Expected %d received %d", + VHOST_USER_SET_LOG_BASE, msg.request); + return -1; + } + } + + return 0; +} + +static int vhost_user_set_mem_table(struct vhost_dev *dev, + struct vhost_memory *mem) +{ + int fds[VHOST_MEMORY_MAX_NREGIONS]; + int i, fd; + size_t fd_num = 0; + VhostUserMsg msg = { + .request = VHOST_USER_SET_MEM_TABLE, + .flags = VHOST_USER_VERSION, + }; + + for (i = 0; i < dev->mem->nregions; ++i) { + struct vhost_memory_region *reg = dev->mem->regions + i; + ram_addr_t ram_addr; + + assert((uintptr_t)reg->userspace_addr == reg->userspace_addr); + qemu_ram_addr_from_host((void *)(uintptr_t)reg->userspace_addr, + &ram_addr); + fd = qemu_get_ram_fd(ram_addr); + if (fd > 0) { + msg.payload.memory.regions[fd_num].userspace_addr = reg->userspace_addr; + msg.payload.memory.regions[fd_num].memory_size = reg->memory_size; + msg.payload.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr; + msg.payload.memory.regions[fd_num].mmap_offset = reg->userspace_addr - + (uintptr_t) qemu_get_ram_block_host_ptr(ram_addr); + assert(fd_num < VHOST_MEMORY_MAX_NREGIONS); + fds[fd_num++] = fd; + } + } + + msg.payload.memory.nregions = fd_num; + + if (!fd_num) { + error_report("Failed initializing vhost-user memory map, " + "consider using -object memory-backend-file share=on"); + return -1; + } + + msg.size = sizeof(msg.payload.memory.nregions); + msg.size += sizeof(msg.payload.memory.padding); + msg.size += fd_num * sizeof(VhostUserMemoryRegion); + + vhost_user_write(dev, &msg, fds, fd_num); + + return 0; +} + +static int vhost_user_set_vring_addr(struct vhost_dev *dev, + struct vhost_vring_addr *addr) +{ + VhostUserMsg msg = { + .request = VHOST_USER_SET_VRING_ADDR, + .flags = VHOST_USER_VERSION, + .payload.addr = *addr, + .size = sizeof(msg.payload.addr), + }; + + vhost_user_write(dev, &msg, NULL, 0); + + return 0; +} + +static int vhost_user_set_vring_endian(struct vhost_dev *dev, + struct vhost_vring_state *ring) +{ + error_report("vhost-user trying to send unhandled ioctl"); + return -1; +} + +static int vhost_set_vring(struct vhost_dev *dev, + unsigned long int request, + struct vhost_vring_state *ring) +{ + VhostUserMsg msg = { + .request = request, + .flags = VHOST_USER_VERSION, + .payload.state = *ring, + .size = sizeof(msg.payload.state), + }; + + vhost_user_write(dev, &msg, NULL, 0); + + return 0; +} + +static int vhost_user_set_vring_num(struct vhost_dev *dev, + struct vhost_vring_state *ring) +{ + return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring); +} + +static int vhost_user_set_vring_base(struct vhost_dev *dev, + struct vhost_vring_state *ring) +{ + return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring); +} + +static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable) +{ + int i; + + if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) { + return -1; + } + + for (i = 0; i < dev->nvqs; ++i) { + struct vhost_vring_state state = { + .index = dev->vq_index + i, + .num = enable, + }; + + vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state); + } + + return 0; +} + +static int vhost_user_get_vring_base(struct vhost_dev *dev, + struct vhost_vring_state *ring) +{ + VhostUserMsg msg = { + .request = VHOST_USER_GET_VRING_BASE, + .flags = VHOST_USER_VERSION, + .payload.state = *ring, + .size = sizeof(msg.payload.state), + }; + + vhost_user_write(dev, &msg, NULL, 0); + + if (vhost_user_read(dev, &msg) < 0) { + return 0; + } + + if (msg.request != VHOST_USER_GET_VRING_BASE) { + error_report("Received unexpected msg type. Expected %d received %d", + VHOST_USER_GET_VRING_BASE, msg.request); + return -1; + } + + if (msg.size != sizeof(msg.payload.state)) { + error_report("Received bad msg size."); + return -1; + } + + *ring = msg.payload.state; + + return 0; +} + +static int vhost_set_vring_file(struct vhost_dev *dev, + VhostUserRequest request, + struct vhost_vring_file *file) +{ + int fds[VHOST_MEMORY_MAX_NREGIONS]; + size_t fd_num = 0; + VhostUserMsg msg = { + .request = request, + .flags = VHOST_USER_VERSION, + .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK, + .size = sizeof(msg.payload.u64), + }; + + if (ioeventfd_enabled() && file->fd > 0) { + fds[fd_num++] = file->fd; + } else { + msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK; + } + + vhost_user_write(dev, &msg, fds, fd_num); + + return 0; +} + +static int vhost_user_set_vring_kick(struct vhost_dev *dev, + struct vhost_vring_file *file) +{ + return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file); +} + +static int vhost_user_set_vring_call(struct vhost_dev *dev, + struct vhost_vring_file *file) +{ + return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file); +} + +static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64) +{ + VhostUserMsg msg = { + .request = request, + .flags = VHOST_USER_VERSION, + .payload.u64 = u64, + .size = sizeof(msg.payload.u64), + }; + + vhost_user_write(dev, &msg, NULL, 0); + + return 0; +} + +static int vhost_user_set_features(struct vhost_dev *dev, + uint64_t features) +{ + return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features); +} + +static int vhost_user_set_protocol_features(struct vhost_dev *dev, + uint64_t features) +{ + return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features); +} + +static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) +{ + VhostUserMsg msg = { + .request = request, + .flags = VHOST_USER_VERSION, + }; + + if (vhost_user_one_time_request(request) && dev->vq_index != 0) { + return 0; + } + + vhost_user_write(dev, &msg, NULL, 0); + + if (vhost_user_read(dev, &msg) < 0) { + return 0; + } + + if (msg.request != request) { + error_report("Received unexpected msg type. Expected %d received %d", + request, msg.request); + return -1; + } + + if (msg.size != sizeof(msg.payload.u64)) { + error_report("Received bad msg size."); + return -1; + } + + *u64 = msg.payload.u64; + + return 0; +} + +static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features) +{ + return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features); +} + +static int vhost_user_set_owner(struct vhost_dev *dev) +{ + VhostUserMsg msg = { + .request = VHOST_USER_SET_OWNER, + .flags = VHOST_USER_VERSION, + }; + + vhost_user_write(dev, &msg, NULL, 0); + + return 0; +} + +static int vhost_user_reset_device(struct vhost_dev *dev) +{ + VhostUserMsg msg = { + .request = VHOST_USER_RESET_OWNER, + .flags = VHOST_USER_VERSION, + }; + + vhost_user_write(dev, &msg, NULL, 0); + + return 0; +} + +static int vhost_user_init(struct vhost_dev *dev, void *opaque) +{ + uint64_t features; + int err; + + assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); + + dev->opaque = opaque; + + err = vhost_user_get_features(dev, &features); + if (err < 0) { + return err; + } + + if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) { + dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES; + + err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES, + &features); + if (err < 0) { + return err; + } + + dev->protocol_features = features & VHOST_USER_PROTOCOL_FEATURE_MASK; + err = vhost_user_set_protocol_features(dev, dev->protocol_features); + if (err < 0) { + return err; + } + + /* query the max queues we support if backend supports Multiple Queue */ + if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) { + err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM, + &dev->max_queues); + if (err < 0) { + return err; + } + } + } + + if (dev->migration_blocker == NULL && + !virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_LOG_SHMFD)) { + error_setg(&dev->migration_blocker, + "Migration disabled: vhost-user backend lacks " + "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature."); + } + + return 0; +} + +static int vhost_user_cleanup(struct vhost_dev *dev) +{ + assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); + + dev->opaque = 0; + + return 0; +} + +static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx) +{ + assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs); + + return idx; +} + +static int vhost_user_memslots_limit(struct vhost_dev *dev) +{ + return VHOST_MEMORY_MAX_NREGIONS; +} + +static bool vhost_user_requires_shm_log(struct vhost_dev *dev) +{ + assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); + + return virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_LOG_SHMFD); +} + +static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr) +{ + VhostUserMsg msg = { 0 }; + int err; + + assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER); + + /* If guest supports GUEST_ANNOUNCE do nothing */ + if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) { + return 0; + } + + /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */ + if (virtio_has_feature(dev->protocol_features, + VHOST_USER_PROTOCOL_F_RARP)) { + msg.request = VHOST_USER_SEND_RARP; + msg.flags = VHOST_USER_VERSION; + memcpy((char *)&msg.payload.u64, mac_addr, 6); + msg.size = sizeof(msg.payload.u64); + + err = vhost_user_write(dev, &msg, NULL, 0); + return err; + } + return -1; +} + +static bool vhost_user_can_merge(struct vhost_dev *dev, + uint64_t start1, uint64_t size1, + uint64_t start2, uint64_t size2) +{ + ram_addr_t ram_addr; + int mfd, rfd; + MemoryRegion *mr; + + mr = qemu_ram_addr_from_host((void *)(uintptr_t)start1, &ram_addr); + assert(mr); + mfd = qemu_get_ram_fd(ram_addr); + + mr = qemu_ram_addr_from_host((void *)(uintptr_t)start2, &ram_addr); + assert(mr); + rfd = qemu_get_ram_fd(ram_addr); + + return mfd == rfd; +} + +const VhostOps user_ops = { + .backend_type = VHOST_BACKEND_TYPE_USER, + .vhost_backend_init = vhost_user_init, + .vhost_backend_cleanup = vhost_user_cleanup, + .vhost_backend_memslots_limit = vhost_user_memslots_limit, + .vhost_set_log_base = vhost_user_set_log_base, + .vhost_set_mem_table = vhost_user_set_mem_table, + .vhost_set_vring_addr = vhost_user_set_vring_addr, + .vhost_set_vring_endian = vhost_user_set_vring_endian, + .vhost_set_vring_num = vhost_user_set_vring_num, + .vhost_set_vring_base = vhost_user_set_vring_base, + .vhost_get_vring_base = vhost_user_get_vring_base, + .vhost_set_vring_kick = vhost_user_set_vring_kick, + .vhost_set_vring_call = vhost_user_set_vring_call, + .vhost_set_features = vhost_user_set_features, + .vhost_get_features = vhost_user_get_features, + .vhost_set_owner = vhost_user_set_owner, + .vhost_reset_device = vhost_user_reset_device, + .vhost_get_vq_index = vhost_user_get_vq_index, + .vhost_set_vring_enable = vhost_user_set_vring_enable, + .vhost_requires_shm_log = vhost_user_requires_shm_log, + .vhost_migration_done = vhost_user_migration_done, + .vhost_backend_can_merge = vhost_user_can_merge, +}; |