diff options
-rw-r--r-- | arch/x86/kvm/x86.c | 1 | ||||
-rw-r--r-- | include/linux/kvm.h | 24 | ||||
-rw-r--r-- | include/linux/kvm_host.h | 10 | ||||
-rw-r--r-- | virt/kvm/eventfd.c | 251 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 11 |
5 files changed, 293 insertions, 4 deletions
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2214384..42160b0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1212,6 +1212,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_IRQ_INJECT_STATUS: case KVM_CAP_ASSIGN_DEV_IRQ: case KVM_CAP_IRQFD: + case KVM_CAP_IOEVENTFD: case KVM_CAP_PIT2: case KVM_CAP_PIT_STATE2: r = 1; diff --git a/include/linux/kvm.h b/include/linux/kvm.h index a74a1fc..230a91a 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -307,6 +307,28 @@ struct kvm_guest_debug { struct kvm_guest_debug_arch arch; }; +enum { + kvm_ioeventfd_flag_nr_datamatch, + kvm_ioeventfd_flag_nr_pio, + kvm_ioeventfd_flag_nr_deassign, + kvm_ioeventfd_flag_nr_max, +}; + +#define KVM_IOEVENTFD_FLAG_DATAMATCH (1 << kvm_ioeventfd_flag_nr_datamatch) +#define KVM_IOEVENTFD_FLAG_PIO (1 << kvm_ioeventfd_flag_nr_pio) +#define KVM_IOEVENTFD_FLAG_DEASSIGN (1 << kvm_ioeventfd_flag_nr_deassign) + +#define KVM_IOEVENTFD_VALID_FLAG_MASK ((1 << kvm_ioeventfd_flag_nr_max) - 1) + +struct kvm_ioeventfd { + __u64 datamatch; + __u64 addr; /* legal pio/mmio address */ + __u32 len; /* 1, 2, 4, or 8 bytes */ + __s32 fd; + __u32 flags; + __u8 pad[36]; +}; + #define KVM_TRC_SHIFT 16 /* * kvm trace categories @@ -412,6 +434,7 @@ struct kvm_guest_debug { #ifdef __KVM_HAVE_PIT_STATE2 #define KVM_CAP_PIT_STATE2 35 #endif +#define KVM_CAP_IOEVENTFD 36 #ifdef KVM_CAP_IRQ_ROUTING @@ -520,6 +543,7 @@ struct kvm_irqfd { #define KVM_IRQFD _IOW(KVMIO, 0x76, struct kvm_irqfd) #define KVM_CREATE_PIT2 _IOW(KVMIO, 0x77, struct kvm_pit_config) #define KVM_SET_BOOT_CPU_ID _IO(KVMIO, 0x78) +#define KVM_IOEVENTFD _IOW(KVMIO, 0x79, struct kvm_ioeventfd) /* * ioctls for vcpu fds diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 983b0bd..6ec9fc5 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -155,6 +155,7 @@ struct kvm { spinlock_t lock; struct list_head items; } irqfds; + struct list_head ioeventfds; #endif struct kvm_vm_stat stat; struct kvm_arch arch; @@ -528,19 +529,24 @@ static inline void kvm_free_irq_routing(struct kvm *kvm) {} #ifdef CONFIG_HAVE_KVM_EVENTFD -void kvm_irqfd_init(struct kvm *kvm); +void kvm_eventfd_init(struct kvm *kvm); int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags); void kvm_irqfd_release(struct kvm *kvm); +int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args); #else -static inline void kvm_irqfd_init(struct kvm *kvm) {} +static inline void kvm_eventfd_init(struct kvm *kvm) {} static inline int kvm_irqfd(struct kvm *kvm, int fd, int gsi, int flags) { return -EINVAL; } static inline void kvm_irqfd_release(struct kvm *kvm) {} +static inline int kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +{ + return -ENOSYS; +} #endif /* CONFIG_HAVE_KVM_EVENTFD */ diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 4092b8d..99017e8 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -21,6 +21,7 @@ */ #include <linux/kvm_host.h> +#include <linux/kvm.h> #include <linux/workqueue.h> #include <linux/syscalls.h> #include <linux/wait.h> @@ -28,6 +29,9 @@ #include <linux/file.h> #include <linux/list.h> #include <linux/eventfd.h> +#include <linux/kernel.h> + +#include "iodev.h" /* * -------------------------------------------------------------------- @@ -234,10 +238,11 @@ fail: } void -kvm_irqfd_init(struct kvm *kvm) +kvm_eventfd_init(struct kvm *kvm) { spin_lock_init(&kvm->irqfds.lock); INIT_LIST_HEAD(&kvm->irqfds.items); + INIT_LIST_HEAD(&kvm->ioeventfds); } /* @@ -327,3 +332,247 @@ static void __exit irqfd_module_exit(void) module_init(irqfd_module_init); module_exit(irqfd_module_exit); + +/* + * -------------------------------------------------------------------- + * ioeventfd: translate a PIO/MMIO memory write to an eventfd signal. + * + * userspace can register a PIO/MMIO address with an eventfd for receiving + * notification when the memory has been touched. + * -------------------------------------------------------------------- + */ + +struct _ioeventfd { + struct list_head list; + u64 addr; + int length; + struct eventfd_ctx *eventfd; + u64 datamatch; + struct kvm_io_device dev; + bool wildcard; +}; + +static inline struct _ioeventfd * +to_ioeventfd(struct kvm_io_device *dev) +{ + return container_of(dev, struct _ioeventfd, dev); +} + +static void +ioeventfd_release(struct _ioeventfd *p) +{ + eventfd_ctx_put(p->eventfd); + list_del(&p->list); + kfree(p); +} + +static bool +ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val) +{ + u64 _val; + + if (!(addr == p->addr && len == p->length)) + /* address-range must be precise for a hit */ + return false; + + if (p->wildcard) + /* all else equal, wildcard is always a hit */ + return true; + + /* otherwise, we have to actually compare the data */ + + BUG_ON(!IS_ALIGNED((unsigned long)val, len)); + + switch (len) { + case 1: + _val = *(u8 *)val; + break; + case 2: + _val = *(u16 *)val; + break; + case 4: + _val = *(u32 *)val; + break; + case 8: + _val = *(u64 *)val; + break; + default: + return false; + } + + return _val == p->datamatch ? true : false; +} + +/* MMIO/PIO writes trigger an event if the addr/val match */ +static int +ioeventfd_write(struct kvm_io_device *this, gpa_t addr, int len, + const void *val) +{ + struct _ioeventfd *p = to_ioeventfd(this); + + if (!ioeventfd_in_range(p, addr, len, val)) + return -EOPNOTSUPP; + + eventfd_signal(p->eventfd, 1); + return 0; +} + +/* + * This function is called as KVM is completely shutting down. We do not + * need to worry about locking just nuke anything we have as quickly as possible + */ +static void +ioeventfd_destructor(struct kvm_io_device *this) +{ + struct _ioeventfd *p = to_ioeventfd(this); + + ioeventfd_release(p); +} + +static const struct kvm_io_device_ops ioeventfd_ops = { + .write = ioeventfd_write, + .destructor = ioeventfd_destructor, +}; + +/* assumes kvm->slots_lock held */ +static bool +ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p) +{ + struct _ioeventfd *_p; + + list_for_each_entry(_p, &kvm->ioeventfds, list) + if (_p->addr == p->addr && _p->length == p->length && + (_p->wildcard || p->wildcard || + _p->datamatch == p->datamatch)) + return true; + + return false; +} + +static int +kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +{ + int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; + struct kvm_io_bus *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus; + struct _ioeventfd *p; + struct eventfd_ctx *eventfd; + int ret; + + /* must be natural-word sized */ + switch (args->len) { + case 1: + case 2: + case 4: + case 8: + break; + default: + return -EINVAL; + } + + /* check for range overflow */ + if (args->addr + args->len < args->addr) + return -EINVAL; + + /* check for extra flags that we don't understand */ + if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK) + return -EINVAL; + + eventfd = eventfd_ctx_fdget(args->fd); + if (IS_ERR(eventfd)) + return PTR_ERR(eventfd); + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) { + ret = -ENOMEM; + goto fail; + } + + INIT_LIST_HEAD(&p->list); + p->addr = args->addr; + p->length = args->len; + p->eventfd = eventfd; + + /* The datamatch feature is optional, otherwise this is a wildcard */ + if (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH) + p->datamatch = args->datamatch; + else + p->wildcard = true; + + down_write(&kvm->slots_lock); + + /* Verify that there isnt a match already */ + if (ioeventfd_check_collision(kvm, p)) { + ret = -EEXIST; + goto unlock_fail; + } + + kvm_iodevice_init(&p->dev, &ioeventfd_ops); + + ret = __kvm_io_bus_register_dev(bus, &p->dev); + if (ret < 0) + goto unlock_fail; + + list_add_tail(&p->list, &kvm->ioeventfds); + + up_write(&kvm->slots_lock); + + return 0; + +unlock_fail: + up_write(&kvm->slots_lock); + +fail: + kfree(p); + eventfd_ctx_put(eventfd); + + return ret; +} + +static int +kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +{ + int pio = args->flags & KVM_IOEVENTFD_FLAG_PIO; + struct kvm_io_bus *bus = pio ? &kvm->pio_bus : &kvm->mmio_bus; + struct _ioeventfd *p, *tmp; + struct eventfd_ctx *eventfd; + int ret = -ENOENT; + + eventfd = eventfd_ctx_fdget(args->fd); + if (IS_ERR(eventfd)) + return PTR_ERR(eventfd); + + down_write(&kvm->slots_lock); + + list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) { + bool wildcard = !(args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH); + + if (p->eventfd != eventfd || + p->addr != args->addr || + p->length != args->len || + p->wildcard != wildcard) + continue; + + if (!p->wildcard && p->datamatch != args->datamatch) + continue; + + __kvm_io_bus_unregister_dev(bus, &p->dev); + ioeventfd_release(p); + ret = 0; + break; + } + + up_write(&kvm->slots_lock); + + eventfd_ctx_put(eventfd); + + return ret; +} + +int +kvm_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) +{ + if (args->flags & KVM_IOEVENTFD_FLAG_DEASSIGN) + return kvm_deassign_ioeventfd(kvm, args); + + return kvm_assign_ioeventfd(kvm, args); +} diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9c2fd02..d7b9bbb 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -979,7 +979,7 @@ static struct kvm *kvm_create_vm(void) spin_lock_init(&kvm->mmu_lock); spin_lock_init(&kvm->requests_lock); kvm_io_bus_init(&kvm->pio_bus); - kvm_irqfd_init(kvm); + kvm_eventfd_init(kvm); mutex_init(&kvm->lock); mutex_init(&kvm->irq_lock); kvm_io_bus_init(&kvm->mmio_bus); @@ -2271,6 +2271,15 @@ static long kvm_vm_ioctl(struct file *filp, r = kvm_irqfd(kvm, data.fd, data.gsi, data.flags); break; } + case KVM_IOEVENTFD: { + struct kvm_ioeventfd data; + + r = -EFAULT; + if (copy_from_user(&data, argp, sizeof data)) + goto out; + r = kvm_ioeventfd(kvm, &data); + break; + } #ifdef CONFIG_KVM_APIC_ARCHITECTURE case KVM_SET_BOOT_CPU_ID: r = 0; |