summaryrefslogtreecommitdiffstats
path: root/virt/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'virt/kvm')
-rw-r--r--virt/kvm/Kconfig4
-rw-r--r--virt/kvm/arm/arch_timer.c2
-rw-r--r--virt/kvm/arm/vgic.c17
-rw-r--r--virt/kvm/assigned-dev.c3
-rw-r--r--virt/kvm/async_pf.c39
-rw-r--r--virt/kvm/eventfd.c76
-rw-r--r--virt/kvm/ioapic.c133
-rw-r--r--virt/kvm/irq_comm.c17
-rw-r--r--virt/kvm/irqchip.c31
-rw-r--r--virt/kvm/kvm_main.c29
-rw-r--r--virt/kvm/vfio.c27
11 files changed, 254 insertions, 124 deletions
diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig
index fbe1a48..13f2d19 100644
--- a/virt/kvm/Kconfig
+++ b/virt/kvm/Kconfig
@@ -22,6 +22,10 @@ config KVM_MMIO
config KVM_ASYNC_PF
bool
+# Toggle to switch between direct notification and batch job
+config KVM_ASYNC_PF_SYNC
+ bool
+
config HAVE_KVM_MSI
bool
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 5081e80..22fa819 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -277,7 +277,7 @@ int kvm_timer_hyp_init(void)
host_vtimer_irq = ppi;
- err = register_cpu_notifier(&kvm_timer_cpu_nb);
+ err = __register_cpu_notifier(&kvm_timer_cpu_nb);
if (err) {
kvm_err("Cannot register timer CPU notifier\n");
goto out_free;
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 8ca405c..56ff9be 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -548,11 +548,10 @@ static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu,
u32 val;
u32 *reg;
- offset >>= 1;
reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg,
- vcpu->vcpu_id, offset);
+ vcpu->vcpu_id, offset >> 1);
- if (offset & 2)
+ if (offset & 4)
val = *reg >> 16;
else
val = *reg & 0xffff;
@@ -561,13 +560,13 @@ static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu,
vgic_reg_access(mmio, &val, offset,
ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
if (mmio->is_write) {
- if (offset < 4) {
+ if (offset < 8) {
*reg = ~0U; /* Force PPIs/SGIs to 1 */
return false;
}
val = vgic_cfg_compress(val);
- if (offset & 2) {
+ if (offset & 4) {
*reg &= 0xffff;
*reg |= val << 16;
} else {
@@ -916,6 +915,7 @@ static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg)
case 0:
if (!target_cpus)
return;
+ break;
case 1:
target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff;
@@ -1496,7 +1496,7 @@ int kvm_vgic_hyp_init(void)
goto out;
}
- ret = register_cpu_notifier(&vgic_cpu_nb);
+ ret = __register_cpu_notifier(&vgic_cpu_nb);
if (ret) {
kvm_err("Cannot register vgic CPU notifier\n");
goto out_free_irq;
@@ -1667,10 +1667,11 @@ static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr,
if (addr + size < addr)
return -EINVAL;
+ *ioaddr = addr;
ret = vgic_ioaddr_overlap(kvm);
if (ret)
- return ret;
- *ioaddr = addr;
+ *ioaddr = VGIC_ADDR_UNDEF;
+
return ret;
}
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
index 8db4370..bf06577 100644
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -395,7 +395,8 @@ static int assigned_device_enable_host_msix(struct kvm *kvm,
if (dev->entries_nr == 0)
return r;
- r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr);
+ r = pci_enable_msix_exact(dev->dev,
+ dev->host_msix_entries, dev->entries_nr);
if (r)
return r;
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index 8631d9c..d6a3d09 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -28,6 +28,21 @@
#include "async_pf.h"
#include <trace/events/kvm.h>
+static inline void kvm_async_page_present_sync(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work)
+{
+#ifdef CONFIG_KVM_ASYNC_PF_SYNC
+ kvm_arch_async_page_present(vcpu, work);
+#endif
+}
+static inline void kvm_async_page_present_async(struct kvm_vcpu *vcpu,
+ struct kvm_async_pf *work)
+{
+#ifndef CONFIG_KVM_ASYNC_PF_SYNC
+ kvm_arch_async_page_present(vcpu, work);
+#endif
+}
+
static struct kmem_cache *async_pf_cache;
int kvm_async_pf_init(void)
@@ -65,11 +80,10 @@ static void async_pf_execute(struct work_struct *work)
might_sleep();
- use_mm(mm);
down_read(&mm->mmap_sem);
- get_user_pages(current, mm, addr, 1, 1, 0, NULL, NULL);
+ get_user_pages(NULL, mm, addr, 1, 1, 0, NULL, NULL);
up_read(&mm->mmap_sem);
- unuse_mm(mm);
+ kvm_async_page_present_sync(vcpu, apf);
spin_lock(&vcpu->async_pf.lock);
list_add_tail(&apf->link, &vcpu->async_pf.done);
@@ -85,7 +99,7 @@ static void async_pf_execute(struct work_struct *work)
if (waitqueue_active(&vcpu->wq))
wake_up_interruptible(&vcpu->wq);
- mmdrop(mm);
+ mmput(mm);
kvm_put_kvm(vcpu->kvm);
}
@@ -97,11 +111,16 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
list_entry(vcpu->async_pf.queue.next,
typeof(*work), queue);
list_del(&work->queue);
+
+#ifdef CONFIG_KVM_ASYNC_PF_SYNC
+ flush_work(&work->work);
+#else
if (cancel_work_sync(&work->work)) {
- mmdrop(work->mm);
+ mmput(work->mm);
kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */
kmem_cache_free(async_pf_cache, work);
}
+#endif
}
spin_lock(&vcpu->async_pf.lock);
@@ -130,7 +149,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
spin_unlock(&vcpu->async_pf.lock);
kvm_arch_async_page_ready(vcpu, work);
- kvm_arch_async_page_present(vcpu, work);
+ kvm_async_page_present_async(vcpu, work);
list_del(&work->queue);
vcpu->async_pf.queued--;
@@ -138,7 +157,7 @@ void kvm_check_async_pf_completion(struct kvm_vcpu *vcpu)
}
}
-int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
+int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
struct kvm_arch_async_pf *arch)
{
struct kvm_async_pf *work;
@@ -159,10 +178,10 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
work->wakeup_all = false;
work->vcpu = vcpu;
work->gva = gva;
- work->addr = gfn_to_hva(vcpu->kvm, gfn);
+ work->addr = hva;
work->arch = *arch;
work->mm = current->mm;
- atomic_inc(&work->mm->mm_count);
+ atomic_inc(&work->mm->mm_users);
kvm_get_kvm(work->vcpu->kvm);
/* this can't really happen otherwise gfn_to_pfn_async
@@ -180,7 +199,7 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn,
return 1;
retry_sync:
kvm_put_kvm(work->vcpu->kvm);
- mmdrop(work->mm);
+ mmput(work->mm);
kmem_cache_free(async_pf_cache, work);
return 0;
}
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index abe4d60..20c3af7 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -31,6 +31,7 @@
#include <linux/list.h>
#include <linux/eventfd.h>
#include <linux/kernel.h>
+#include <linux/srcu.h>
#include <linux/slab.h>
#include "iodev.h"
@@ -118,19 +119,22 @@ static void
irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian)
{
struct _irqfd_resampler *resampler;
+ struct kvm *kvm;
struct _irqfd *irqfd;
+ int idx;
resampler = container_of(kian, struct _irqfd_resampler, notifier);
+ kvm = resampler->kvm;
- kvm_set_irq(resampler->kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
+ kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID,
resampler->notifier.gsi, 0, false);
- rcu_read_lock();
+ idx = srcu_read_lock(&kvm->irq_srcu);
list_for_each_entry_rcu(irqfd, &resampler->list, resampler_link)
eventfd_signal(irqfd->resamplefd, 1);
- rcu_read_unlock();
+ srcu_read_unlock(&kvm->irq_srcu, idx);
}
static void
@@ -142,7 +146,7 @@ irqfd_resampler_shutdown(struct _irqfd *irqfd)
mutex_lock(&kvm->irqfds.resampler_lock);
list_del_rcu(&irqfd->resampler_link);
- synchronize_rcu();
+ synchronize_srcu(&kvm->irq_srcu);
if (list_empty(&resampler->list)) {
list_del(&resampler->link);
@@ -221,17 +225,18 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key)
unsigned long flags = (unsigned long)key;
struct kvm_kernel_irq_routing_entry *irq;
struct kvm *kvm = irqfd->kvm;
+ int idx;
if (flags & POLLIN) {
- rcu_read_lock();
- irq = rcu_dereference(irqfd->irq_entry);
+ idx = srcu_read_lock(&kvm->irq_srcu);
+ irq = srcu_dereference(irqfd->irq_entry, &kvm->irq_srcu);
/* An event has been signaled, inject an interrupt */
if (irq)
kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1,
false);
else
schedule_work(&irqfd->inject);
- rcu_read_unlock();
+ srcu_read_unlock(&kvm->irq_srcu, idx);
}
if (flags & POLLHUP) {
@@ -363,7 +368,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
}
list_add_rcu(&irqfd->resampler_link, &irqfd->resampler->list);
- synchronize_rcu();
+ synchronize_srcu(&kvm->irq_srcu);
mutex_unlock(&kvm->irqfds.resampler_lock);
}
@@ -391,19 +396,19 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
lockdep_is_held(&kvm->irqfds.lock));
irqfd_update(kvm, irqfd, irq_rt);
- events = f.file->f_op->poll(f.file, &irqfd->pt);
-
list_add_tail(&irqfd->list, &kvm->irqfds.items);
+ spin_unlock_irq(&kvm->irqfds.lock);
+
/*
* Check if there was an event already pending on the eventfd
* before we registered, and trigger it as if we didn't miss it.
*/
+ events = f.file->f_op->poll(f.file, &irqfd->pt);
+
if (events & POLLIN)
schedule_work(&irqfd->inject);
- spin_unlock_irq(&kvm->irqfds.lock);
-
/*
* do not drop the file until the irqfd is fully initialized, otherwise
* we might race against the POLLHUP
@@ -465,7 +470,7 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args)
* another thread calls kvm_irq_routing_update before
* we flush workqueue below (we synchronize with
* kvm_irq_routing_update using irqfds.lock).
- * It is paired with synchronize_rcu done by caller
+ * It is paired with synchronize_srcu done by caller
* of that function.
*/
rcu_assign_pointer(irqfd->irq_entry, NULL);
@@ -524,7 +529,7 @@ kvm_irqfd_release(struct kvm *kvm)
/*
* Change irq_routing and irqfd.
- * Caller must invoke synchronize_rcu afterwards.
+ * Caller must invoke synchronize_srcu(&kvm->irq_srcu) afterwards.
*/
void kvm_irq_routing_update(struct kvm *kvm,
struct kvm_irq_routing_table *irq_rt)
@@ -600,7 +605,15 @@ ioeventfd_in_range(struct _ioeventfd *p, gpa_t addr, int len, const void *val)
{
u64 _val;
- if (!(addr == p->addr && len == p->length))
+ if (addr != p->addr)
+ /* address must be precise for a hit */
+ return false;
+
+ if (!p->length)
+ /* length = 0 means only look at the address, so always a hit */
+ return true;
+
+ if (len != p->length)
/* address-range must be precise for a hit */
return false;
@@ -671,9 +684,11 @@ ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p)
list_for_each_entry(_p, &kvm->ioeventfds, list)
if (_p->bus_idx == p->bus_idx &&
- _p->addr == p->addr && _p->length == p->length &&
- (_p->wildcard || p->wildcard ||
- _p->datamatch == p->datamatch))
+ _p->addr == p->addr &&
+ (!_p->length || !p->length ||
+ (_p->length == p->length &&
+ (_p->wildcard || p->wildcard ||
+ _p->datamatch == p->datamatch))))
return true;
return false;
@@ -697,8 +712,9 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
int ret;
bus_idx = ioeventfd_bus_from_flags(args->flags);
- /* must be natural-word sized */
+ /* must be natural-word sized, or 0 to ignore length */
switch (args->len) {
+ case 0:
case 1:
case 2:
case 4:
@@ -716,6 +732,12 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
if (args->flags & ~KVM_IOEVENTFD_VALID_FLAG_MASK)
return -EINVAL;
+ /* ioeventfd with no length can't be combined with DATAMATCH */
+ if (!args->len &&
+ args->flags & (KVM_IOEVENTFD_FLAG_PIO |
+ KVM_IOEVENTFD_FLAG_DATAMATCH))
+ return -EINVAL;
+
eventfd = eventfd_ctx_fdget(args->fd);
if (IS_ERR(eventfd))
return PTR_ERR(eventfd);
@@ -753,6 +775,16 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
if (ret < 0)
goto unlock_fail;
+ /* When length is ignored, MMIO is also put on a separate bus, for
+ * faster lookups.
+ */
+ if (!args->len && !(args->flags & KVM_IOEVENTFD_FLAG_PIO)) {
+ ret = kvm_io_bus_register_dev(kvm, KVM_FAST_MMIO_BUS,
+ p->addr, 0, &p->dev);
+ if (ret < 0)
+ goto register_fail;
+ }
+
kvm->buses[bus_idx]->ioeventfd_count++;
list_add_tail(&p->list, &kvm->ioeventfds);
@@ -760,6 +792,8 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
return 0;
+register_fail:
+ kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
unlock_fail:
mutex_unlock(&kvm->slots_lock);
@@ -799,6 +833,10 @@ kvm_deassign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
continue;
kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
+ if (!p->length) {
+ kvm_io_bus_unregister_dev(kvm, KVM_FAST_MMIO_BUS,
+ &p->dev);
+ }
kvm->buses[bus_idx]->ioeventfd_count--;
ioeventfd_release(p);
ret = 0;
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index ce9ed99..2458a1d 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -50,7 +50,7 @@
#else
#define ioapic_debug(fmt, arg...)
#endif
-static int ioapic_deliver(struct kvm_ioapic *vioapic, int irq,
+static int ioapic_service(struct kvm_ioapic *vioapic, int irq,
bool line_status);
static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
@@ -97,6 +97,14 @@ static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic)
bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS);
}
+static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic);
+
+static void rtc_status_pending_eoi_check_valid(struct kvm_ioapic *ioapic)
+{
+ if (WARN_ON(ioapic->rtc_status.pending_eoi < 0))
+ kvm_rtc_eoi_tracking_restore_all(ioapic);
+}
+
static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
{
bool new_val, old_val;
@@ -120,9 +128,8 @@ static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
} else {
__clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
ioapic->rtc_status.pending_eoi--;
+ rtc_status_pending_eoi_check_valid(ioapic);
}
-
- WARN_ON(ioapic->rtc_status.pending_eoi < 0);
}
void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
@@ -149,10 +156,10 @@ static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic)
static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu)
{
- if (test_and_clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map))
+ if (test_and_clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map)) {
--ioapic->rtc_status.pending_eoi;
-
- WARN_ON(ioapic->rtc_status.pending_eoi < 0);
+ rtc_status_pending_eoi_check_valid(ioapic);
+ }
}
static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic)
@@ -163,23 +170,67 @@ static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic)
return false;
}
-static int ioapic_service(struct kvm_ioapic *ioapic, unsigned int idx,
- bool line_status)
+static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq,
+ int irq_level, bool line_status)
{
- union kvm_ioapic_redirect_entry *pent;
- int injected = -1;
+ union kvm_ioapic_redirect_entry entry;
+ u32 mask = 1 << irq;
+ u32 old_irr;
+ int edge, ret;
+
+ entry = ioapic->redirtbl[irq];
+ edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
+
+ if (!irq_level) {
+ ioapic->irr &= ~mask;
+ ret = 1;
+ goto out;
+ }
- pent = &ioapic->redirtbl[idx];
+ /*
+ * Return 0 for coalesced interrupts; for edge-triggered interrupts,
+ * this only happens if a previous edge has not been delivered due
+ * do masking. For level interrupts, the remote_irr field tells
+ * us if the interrupt is waiting for an EOI.
+ *
+ * RTC is special: it is edge-triggered, but userspace likes to know
+ * if it has been already ack-ed via EOI because coalesced RTC
+ * interrupts lead to time drift in Windows guests. So we track
+ * EOI manually for the RTC interrupt.
+ */
+ if (irq == RTC_GSI && line_status &&
+ rtc_irq_check_coalesced(ioapic)) {
+ ret = 0;
+ goto out;
+ }
- if (!pent->fields.mask) {
- injected = ioapic_deliver(ioapic, idx, line_status);
- if (injected && pent->fields.trig_mode == IOAPIC_LEVEL_TRIG)
- pent->fields.remote_irr = 1;
+ old_irr = ioapic->irr;
+ ioapic->irr |= mask;
+ if ((edge && old_irr == ioapic->irr) ||
+ (!edge && entry.fields.remote_irr)) {
+ ret = 0;
+ goto out;
}
- return injected;
+ ret = ioapic_service(ioapic, irq, line_status);
+
+out:
+ trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
+ return ret;
}
+static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr)
+{
+ u32 idx;
+
+ rtc_irq_eoi_tracking_reset(ioapic);
+ for_each_set_bit(idx, &irr, IOAPIC_NUM_PINS)
+ ioapic_set_irq(ioapic, idx, 1, true);
+
+ kvm_rtc_eoi_tracking_restore_all(ioapic);
+}
+
+
static void update_handled_vectors(struct kvm_ioapic *ioapic)
{
DECLARE_BITMAP(handled_vectors, 256);
@@ -282,12 +333,15 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
}
}
-static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq, bool line_status)
+static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
{
union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
struct kvm_lapic_irq irqe;
int ret;
+ if (entry->fields.mask)
+ return -1;
+
ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
"vector=%x trig_mode=%x\n",
entry->fields.dest_id, entry->fields.dest_mode,
@@ -302,53 +356,41 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq, bool line_status)
irqe.level = 1;
irqe.shorthand = 0;
+ if (irqe.trig_mode == IOAPIC_EDGE_TRIG)
+ ioapic->irr &= ~(1 << irq);
+
if (irq == RTC_GSI && line_status) {
+ /*
+ * pending_eoi cannot ever become negative (see
+ * rtc_status_pending_eoi_check_valid) and the caller
+ * ensures that it is only called if it is >= zero, namely
+ * if rtc_irq_check_coalesced returns false).
+ */
BUG_ON(ioapic->rtc_status.pending_eoi != 0);
ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
ioapic->rtc_status.dest_map);
- ioapic->rtc_status.pending_eoi = ret;
+ ioapic->rtc_status.pending_eoi = (ret < 0 ? 0 : ret);
} else
ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);
+ if (ret && irqe.trig_mode == IOAPIC_LEVEL_TRIG)
+ entry->fields.remote_irr = 1;
+
return ret;
}
int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
int level, bool line_status)
{
- u32 old_irr;
- u32 mask = 1 << irq;
- union kvm_ioapic_redirect_entry entry;
int ret, irq_level;
BUG_ON(irq < 0 || irq >= IOAPIC_NUM_PINS);
spin_lock(&ioapic->lock);
- old_irr = ioapic->irr;
irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq],
irq_source_id, level);
- entry = ioapic->redirtbl[irq];
- irq_level ^= entry.fields.polarity;
- if (!irq_level) {
- ioapic->irr &= ~mask;
- ret = 1;
- } else {
- int edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
+ ret = ioapic_set_irq(ioapic, irq, irq_level, line_status);
- if (irq == RTC_GSI && line_status &&
- rtc_irq_check_coalesced(ioapic)) {
- ret = 0; /* coalesced */
- goto out;
- }
- ioapic->irr |= mask;
- if ((edge && old_irr != ioapic->irr) ||
- (!edge && !entry.fields.remote_irr))
- ret = ioapic_service(ioapic, irq, line_status);
- else
- ret = 0; /* report coalesced interrupt */
- }
-out:
- trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
spin_unlock(&ioapic->lock);
return ret;
@@ -394,7 +436,7 @@ static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
ent->fields.remote_irr = 0;
- if (!ent->fields.mask && (ioapic->irr & (1 << i)))
+ if (ioapic->irr & (1 << i))
ioapic_service(ioapic, i, false);
}
}
@@ -595,9 +637,10 @@ int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
spin_lock(&ioapic->lock);
memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
+ ioapic->irr = 0;
update_handled_vectors(ioapic);
kvm_vcpu_request_scan_ioapic(kvm);
- kvm_rtc_eoi_tracking_restore_all(ioapic);
+ kvm_ioapic_inject_all(ioapic, state->irr);
spin_unlock(&ioapic->lock);
return 0;
}
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
index e2e6b44..ced4a54 100644
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -163,6 +163,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
struct kvm_kernel_irq_routing_entry *e;
int ret = -EINVAL;
struct kvm_irq_routing_table *irq_rt;
+ int idx;
trace_kvm_set_irq(irq, level, irq_source_id);
@@ -174,8 +175,8 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
* Since there's no easy way to do this, we only support injecting MSI
* which is limited to 1:1 GSI mapping.
*/
- rcu_read_lock();
- irq_rt = rcu_dereference(kvm->irq_routing);
+ idx = srcu_read_lock(&kvm->irq_srcu);
+ irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
if (irq < irq_rt->nr_rt_entries)
hlist_for_each_entry(e, &irq_rt->map[irq], link) {
if (likely(e->type == KVM_IRQ_ROUTING_MSI))
@@ -184,7 +185,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
ret = -EWOULDBLOCK;
break;
}
- rcu_read_unlock();
+ srcu_read_unlock(&kvm->irq_srcu, idx);
return ret;
}
@@ -253,22 +254,22 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
mutex_lock(&kvm->irq_lock);
hlist_del_rcu(&kimn->link);
mutex_unlock(&kvm->irq_lock);
- synchronize_rcu();
+ synchronize_srcu(&kvm->irq_srcu);
}
void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
bool mask)
{
struct kvm_irq_mask_notifier *kimn;
- int gsi;
+ int idx, gsi;
- rcu_read_lock();
- gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
+ idx = srcu_read_lock(&kvm->irq_srcu);
+ gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin];
if (gsi != -1)
hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link)
if (kimn->irq == gsi)
kimn->func(kimn, mask);
- rcu_read_unlock();
+ srcu_read_unlock(&kvm->irq_srcu, idx);
}
int kvm_set_routing_entry(struct kvm_irq_routing_table *rt,
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 20dc9e4..b43c275 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -26,6 +26,7 @@
#include <linux/kvm_host.h>
#include <linux/slab.h>
+#include <linux/srcu.h>
#include <linux/export.h>
#include <trace/events/kvm.h>
#include "irq.h"
@@ -33,19 +34,19 @@
bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin)
{
struct kvm_irq_ack_notifier *kian;
- int gsi;
+ int gsi, idx;
- rcu_read_lock();
- gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
+ idx = srcu_read_lock(&kvm->irq_srcu);
+ gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin];
if (gsi != -1)
hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
link)
if (kian->gsi == gsi) {
- rcu_read_unlock();
+ srcu_read_unlock(&kvm->irq_srcu, idx);
return true;
}
- rcu_read_unlock();
+ srcu_read_unlock(&kvm->irq_srcu, idx);
return false;
}
@@ -54,18 +55,18 @@ EXPORT_SYMBOL_GPL(kvm_irq_has_notifier);
void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
{
struct kvm_irq_ack_notifier *kian;
- int gsi;
+ int gsi, idx;
trace_kvm_ack_irq(irqchip, pin);
- rcu_read_lock();
- gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
+ idx = srcu_read_lock(&kvm->irq_srcu);
+ gsi = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu)->chip[irqchip][pin];
if (gsi != -1)
hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list,
link)
if (kian->gsi == gsi)
kian->irq_acked(kian);
- rcu_read_unlock();
+ srcu_read_unlock(&kvm->irq_srcu, idx);
}
void kvm_register_irq_ack_notifier(struct kvm *kvm,
@@ -85,7 +86,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
mutex_lock(&kvm->irq_lock);
hlist_del_init_rcu(&kian->link);
mutex_unlock(&kvm->irq_lock);
- synchronize_rcu();
+ synchronize_srcu(&kvm->irq_srcu);
#ifdef __KVM_HAVE_IOAPIC
kvm_vcpu_request_scan_ioapic(kvm);
#endif
@@ -115,7 +116,7 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
bool line_status)
{
struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS];
- int ret = -1, i = 0;
+ int ret = -1, i = 0, idx;
struct kvm_irq_routing_table *irq_rt;
trace_kvm_set_irq(irq, level, irq_source_id);
@@ -124,12 +125,12 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level,
* IOAPIC. So set the bit in both. The guest will ignore
* writes to the unused one.
*/
- rcu_read_lock();
- irq_rt = rcu_dereference(kvm->irq_routing);
+ idx = srcu_read_lock(&kvm->irq_srcu);
+ irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
if (irq < irq_rt->nr_rt_entries)
hlist_for_each_entry(e, &irq_rt->map[irq], link)
irq_set[i++] = *e;
- rcu_read_unlock();
+ srcu_read_unlock(&kvm->irq_srcu, idx);
while(i--) {
int r;
@@ -226,7 +227,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
kvm_irq_routing_update(kvm, new);
mutex_unlock(&kvm->irq_lock);
- synchronize_rcu();
+ synchronize_srcu_expedited(&kvm->irq_srcu);
new = old;
r = 0;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 03a0381..c86be0f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -102,7 +102,7 @@ static void kvm_release_pfn_dirty(pfn_t pfn);
static void mark_page_dirty_in_slot(struct kvm *kvm,
struct kvm_memory_slot *memslot, gfn_t gfn);
-bool kvm_rebooting;
+__visible bool kvm_rebooting;
EXPORT_SYMBOL_GPL(kvm_rebooting);
static bool largepages_enabled = true;
@@ -457,11 +457,11 @@ static struct kvm *kvm_create_vm(unsigned long type)
r = kvm_arch_init_vm(kvm, type);
if (r)
- goto out_err_nodisable;
+ goto out_err_no_disable;
r = hardware_enable_all();
if (r)
- goto out_err_nodisable;
+ goto out_err_no_disable;
#ifdef CONFIG_HAVE_KVM_IRQCHIP
INIT_HLIST_HEAD(&kvm->mask_notifier_list);
@@ -473,10 +473,12 @@ static struct kvm *kvm_create_vm(unsigned long type)
r = -ENOMEM;
kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
if (!kvm->memslots)
- goto out_err_nosrcu;
+ goto out_err_no_srcu;
kvm_init_memslots_id(kvm);
if (init_srcu_struct(&kvm->srcu))
- goto out_err_nosrcu;
+ goto out_err_no_srcu;
+ if (init_srcu_struct(&kvm->irq_srcu))
+ goto out_err_no_irq_srcu;
for (i = 0; i < KVM_NR_BUSES; i++) {
kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus),
GFP_KERNEL);
@@ -505,10 +507,12 @@ static struct kvm *kvm_create_vm(unsigned long type)
return kvm;
out_err:
+ cleanup_srcu_struct(&kvm->irq_srcu);
+out_err_no_irq_srcu:
cleanup_srcu_struct(&kvm->srcu);
-out_err_nosrcu:
+out_err_no_srcu:
hardware_disable_all();
-out_err_nodisable:
+out_err_no_disable:
for (i = 0; i < KVM_NR_BUSES; i++)
kfree(kvm->buses[i]);
kfree(kvm->memslots);
@@ -604,6 +608,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
kvm_arch_destroy_vm(kvm);
kvm_destroy_devices(kvm);
kvm_free_physmem(kvm);
+ cleanup_srcu_struct(&kvm->irq_srcu);
cleanup_srcu_struct(&kvm->srcu);
kvm_arch_free_vm(kvm);
hardware_disable_all();
@@ -640,14 +645,12 @@ static int kvm_vm_release(struct inode *inode, struct file *filp)
*/
static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
{
-#ifndef CONFIG_S390
unsigned long dirty_bytes = 2 * kvm_dirty_bitmap_bytes(memslot);
memslot->dirty_bitmap = kvm_kvzalloc(dirty_bytes);
if (!memslot->dirty_bitmap)
return -ENOMEM;
-#endif /* !CONFIG_S390 */
return 0;
}
@@ -1804,7 +1807,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me)
continue;
if (vcpu == me)
continue;
- if (waitqueue_active(&vcpu->wq))
+ if (waitqueue_active(&vcpu->wq) && !kvm_arch_vcpu_runnable(vcpu))
continue;
if (!kvm_vcpu_eligible_for_directed_yield(vcpu))
continue;
@@ -2284,6 +2287,11 @@ static int kvm_ioctl_create_device(struct kvm *kvm,
ops = &kvm_arm_vgic_v2_ops;
break;
#endif
+#ifdef CONFIG_S390
+ case KVM_DEV_TYPE_FLIC:
+ ops = &kvm_flic_ops;
+ break;
+#endif
default:
return -ENODEV;
}
@@ -2920,6 +2928,7 @@ static int __kvm_io_bus_read(struct kvm_io_bus *bus, struct kvm_io_range *range,
return -EOPNOTSUPP;
}
+EXPORT_SYMBOL_GPL(kvm_io_bus_write);
/* kvm_io_bus_read - called under kvm->slots_lock */
int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index b4f9507..ba1a93f 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -59,6 +59,22 @@ static void kvm_vfio_group_put_external_user(struct vfio_group *vfio_group)
symbol_put(vfio_group_put_external_user);
}
+static bool kvm_vfio_group_is_coherent(struct vfio_group *vfio_group)
+{
+ long (*fn)(struct vfio_group *, unsigned long);
+ long ret;
+
+ fn = symbol_get(vfio_external_check_extension);
+ if (!fn)
+ return false;
+
+ ret = fn(vfio_group, VFIO_DMA_CC_IOMMU);
+
+ symbol_put(vfio_external_check_extension);
+
+ return ret > 0;
+}
+
/*
* Groups can use the same or different IOMMU domains. If the same then
* adding a new group may change the coherency of groups we've previously
@@ -75,13 +91,10 @@ static void kvm_vfio_update_coherency(struct kvm_device *dev)
mutex_lock(&kv->lock);
list_for_each_entry(kvg, &kv->group_list, node) {
- /*
- * TODO: We need an interface to check the coherency of
- * the IOMMU domain this group is using. For now, assume
- * it's always noncoherent.
- */
- noncoherent = true;
- break;
+ if (!kvm_vfio_group_is_coherent(kvg->vfio_group)) {
+ noncoherent = true;
+ break;
+ }
}
if (noncoherent != kv->noncoherent) {
OpenPOWER on IntegriCloud