summaryrefslogtreecommitdiffstats
path: root/virt
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-09-10 16:42:49 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2015-09-10 16:42:49 -0700
commit519f526d391b0ef775aeb04c4b6f632ea6b3ee50 (patch)
tree36985d7882734c136fc3c9a48e9d9abf9e97c1f1 /virt
parent06ab838c2024db468855118087db16d8fa905ddc (diff)
parentba60c41ae392b473a1897faa0b8739fcb8759d69 (diff)
downloadop-kernel-dev-519f526d391b0ef775aeb04c4b6f632ea6b3ee50.zip
op-kernel-dev-519f526d391b0ef775aeb04c4b6f632ea6b3ee50.tar.gz
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull more kvm updates from Paolo Bonzini: "ARM: - Full debug support for arm64 - Active state switching for timer interrupts - Lazy FP/SIMD save/restore for arm64 - Generic ARMv8 target PPC: - Book3S: A few bug fixes - Book3S: Allow micro-threading on POWER8 x86: - Compiler warnings Generic: - Adaptive polling for guest halt" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (49 commits) kvm: irqchip: fix memory leak kvm: move new trace event outside #ifdef CONFIG_KVM_ASYNC_PF KVM: trace kvm_halt_poll_ns grow/shrink KVM: dynamic halt-polling KVM: make halt_poll_ns per-vCPU Silence compiler warning in arch/x86/kvm/emulate.c kvm: compile process_smi_save_seg_64() only for x86_64 KVM: x86: avoid uninitialized variable warning KVM: PPC: Book3S: Fix typo in top comment about locking KVM: PPC: Book3S: Fix size of the PSPB register KVM: PPC: Book3S HV: Exit on H_DOORBELL if HOST_IPI is set KVM: PPC: Book3S HV: Fix race in starting secondary threads KVM: PPC: Book3S: correct width in XER handling KVM: PPC: Book3S HV: Fix preempted vcore stolen time calculation KVM: PPC: Book3S HV: Fix preempted vcore list locking KVM: PPC: Book3S HV: Implement H_CLEAR_REF and H_CLEAR_MOD KVM: PPC: Book3S HV: Fix bug in dirty page tracking KVM: PPC: Book3S HV: Fix race in reading change bit when removing HPTE KVM: PPC: Book3S HV: Implement dynamic micro-threading on POWER8 KVM: PPC: Book3S HV: Make use of unused threads when running guests ...
Diffstat (limited to 'virt')
-rw-r--r--virt/kvm/arm/arch_timer.c29
-rw-r--r--virt/kvm/arm/vgic-v2.c16
-rw-r--r--virt/kvm/arm/vgic-v3.c21
-rw-r--r--virt/kvm/arm/vgic.c427
-rw-r--r--virt/kvm/irqchip.c8
-rw-r--r--virt/kvm/kvm_main.c62
6 files changed, 508 insertions, 55 deletions
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 98c95f2..76e38d2 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -64,10 +64,10 @@ static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu)
int ret;
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
- timer->cntv_ctl |= ARCH_TIMER_CTRL_IT_MASK;
- ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id,
- timer->irq->irq,
- timer->irq->level);
+ kvm_vgic_set_phys_irq_active(timer->map, true);
+ ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id,
+ timer->map,
+ timer->irq->level);
WARN_ON(ret);
}
@@ -117,7 +117,8 @@ bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
cycle_t cval, now;
if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) ||
- !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE))
+ !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE) ||
+ kvm_vgic_get_phys_irq_active(timer->map))
return false;
cval = timer->cntv_cval;
@@ -184,10 +185,11 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
timer_arm(timer, ns);
}
-void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
- const struct kvm_irq_level *irq)
+int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
+ const struct kvm_irq_level *irq)
{
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
+ struct irq_phys_map *map;
/*
* The vcpu timer irq number cannot be determined in
@@ -196,6 +198,17 @@ void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
* vcpu timer irq number when the vcpu is reset.
*/
timer->irq = irq;
+
+ /*
+ * Tell the VGIC that the virtual interrupt is tied to a
+ * physical interrupt. We do that once per VCPU.
+ */
+ map = kvm_vgic_map_phys_irq(vcpu, irq->irq, host_vtimer_irq);
+ if (WARN_ON(IS_ERR(map)))
+ return PTR_ERR(map);
+
+ timer->map = map;
+ return 0;
}
void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
@@ -335,6 +348,8 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
timer_disarm(timer);
+ if (timer->map)
+ kvm_vgic_unmap_phys_irq(vcpu, timer->map);
}
void kvm_timer_enable(struct kvm *kvm)
diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c
index f9b9c7c..8d7b04d 100644
--- a/virt/kvm/arm/vgic-v2.c
+++ b/virt/kvm/arm/vgic-v2.c
@@ -48,6 +48,10 @@ static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr)
lr_desc.state |= LR_STATE_ACTIVE;
if (val & GICH_LR_EOI)
lr_desc.state |= LR_EOI_INT;
+ if (val & GICH_LR_HW) {
+ lr_desc.state |= LR_HW;
+ lr_desc.hwirq = (val & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT;
+ }
return lr_desc;
}
@@ -55,7 +59,9 @@ static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr)
static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr,
struct vgic_lr lr_desc)
{
- u32 lr_val = (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT) | lr_desc.irq;
+ u32 lr_val;
+
+ lr_val = lr_desc.irq;
if (lr_desc.state & LR_STATE_PENDING)
lr_val |= GICH_LR_PENDING_BIT;
@@ -64,6 +70,14 @@ static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr,
if (lr_desc.state & LR_EOI_INT)
lr_val |= GICH_LR_EOI;
+ if (lr_desc.state & LR_HW) {
+ lr_val |= GICH_LR_HW;
+ lr_val |= (u32)lr_desc.hwirq << GICH_LR_PHYSID_CPUID_SHIFT;
+ }
+
+ if (lr_desc.irq < VGIC_NR_SGIS)
+ lr_val |= (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT);
+
vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val;
}
diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c
index dff0602..afbf925 100644
--- a/virt/kvm/arm/vgic-v3.c
+++ b/virt/kvm/arm/vgic-v3.c
@@ -67,6 +67,10 @@ static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr)
lr_desc.state |= LR_STATE_ACTIVE;
if (val & ICH_LR_EOI)
lr_desc.state |= LR_EOI_INT;
+ if (val & ICH_LR_HW) {
+ lr_desc.state |= LR_HW;
+ lr_desc.hwirq = (val >> ICH_LR_PHYS_ID_SHIFT) & GENMASK(9, 0);
+ }
return lr_desc;
}
@@ -84,10 +88,17 @@ static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr,
* Eventually we want to make this configurable, so we may revisit
* this in the future.
*/
- if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3)
+ switch (vcpu->kvm->arch.vgic.vgic_model) {
+ case KVM_DEV_TYPE_ARM_VGIC_V3:
lr_val |= ICH_LR_GROUP;
- else
- lr_val |= (u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT;
+ break;
+ case KVM_DEV_TYPE_ARM_VGIC_V2:
+ if (lr_desc.irq < VGIC_NR_SGIS)
+ lr_val |= (u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT;
+ break;
+ default:
+ BUG();
+ }
if (lr_desc.state & LR_STATE_PENDING)
lr_val |= ICH_LR_PENDING_BIT;
@@ -95,6 +106,10 @@ static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr,
lr_val |= ICH_LR_ACTIVE_BIT;
if (lr_desc.state & LR_EOI_INT)
lr_val |= ICH_LR_EOI;
+ if (lr_desc.state & LR_HW) {
+ lr_val |= ICH_LR_HW;
+ lr_val |= ((u64)lr_desc.hwirq) << ICH_LR_PHYS_ID_SHIFT;
+ }
vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val;
}
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index bc40137..9eb489a 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -24,6 +24,7 @@
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
+#include <linux/rculist.h>
#include <linux/uaccess.h>
#include <asm/kvm_emulate.h>
@@ -74,6 +75,28 @@
* cause the interrupt to become inactive in such a situation.
* Conversely, writes to GICD_ICPENDRn do not cause the interrupt to become
* inactive as long as the external input line is held high.
+ *
+ *
+ * Initialization rules: there are multiple stages to the vgic
+ * initialization, both for the distributor and the CPU interfaces.
+ *
+ * Distributor:
+ *
+ * - kvm_vgic_early_init(): initialization of static data that doesn't
+ * depend on any sizing information or emulation type. No allocation
+ * is allowed there.
+ *
+ * - vgic_init(): allocation and initialization of the generic data
+ * structures that depend on sizing information (number of CPUs,
+ * number of interrupts). Also initializes the vcpu specific data
+ * structures. Can be executed lazily for GICv2.
+ * [to be renamed to kvm_vgic_init??]
+ *
+ * CPU Interface:
+ *
+ * - kvm_vgic_cpu_early_init(): initialization of static data that
+ * doesn't depend on any sizing information or emulation type. No
+ * allocation is allowed there.
*/
#include "vgic.h"
@@ -82,6 +105,8 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu);
static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu);
static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr);
static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc);
+static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu,
+ int virt_irq);
static const struct vgic_ops *vgic_ops;
static const struct vgic_params *vgic;
@@ -375,7 +400,7 @@ void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq)
static bool vgic_can_sample_irq(struct kvm_vcpu *vcpu, int irq)
{
- return vgic_irq_is_edge(vcpu, irq) || !vgic_irq_is_queued(vcpu, irq);
+ return !vgic_irq_is_queued(vcpu, irq);
}
/**
@@ -1115,6 +1140,39 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq,
if (!vgic_irq_is_edge(vcpu, irq))
vlr.state |= LR_EOI_INT;
+ if (vlr.irq >= VGIC_NR_SGIS) {
+ struct irq_phys_map *map;
+ map = vgic_irq_map_search(vcpu, irq);
+
+ /*
+ * If we have a mapping, and the virtual interrupt is
+ * being injected, then we must set the state to
+ * active in the physical world. Otherwise the
+ * physical interrupt will fire and the guest will
+ * exit before processing the virtual interrupt.
+ */
+ if (map) {
+ int ret;
+
+ BUG_ON(!map->active);
+ vlr.hwirq = map->phys_irq;
+ vlr.state |= LR_HW;
+ vlr.state &= ~LR_EOI_INT;
+
+ ret = irq_set_irqchip_state(map->irq,
+ IRQCHIP_STATE_ACTIVE,
+ true);
+ WARN_ON(ret);
+
+ /*
+ * Make sure we're not going to sample this
+ * again, as a HW-backed interrupt cannot be
+ * in the PENDING_ACTIVE stage.
+ */
+ vgic_irq_set_queued(vcpu, irq);
+ }
+ }
+
vgic_set_lr(vcpu, lr_nr, vlr);
vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
}
@@ -1339,6 +1397,39 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
return level_pending;
}
+/*
+ * Save the physical active state, and reset it to inactive.
+ *
+ * Return 1 if HW interrupt went from active to inactive, and 0 otherwise.
+ */
+static int vgic_sync_hwirq(struct kvm_vcpu *vcpu, struct vgic_lr vlr)
+{
+ struct irq_phys_map *map;
+ int ret;
+
+ if (!(vlr.state & LR_HW))
+ return 0;
+
+ map = vgic_irq_map_search(vcpu, vlr.irq);
+ BUG_ON(!map || !map->active);
+
+ ret = irq_get_irqchip_state(map->irq,
+ IRQCHIP_STATE_ACTIVE,
+ &map->active);
+
+ WARN_ON(ret);
+
+ if (map->active) {
+ ret = irq_set_irqchip_state(map->irq,
+ IRQCHIP_STATE_ACTIVE,
+ false);
+ WARN_ON(ret);
+ return 0;
+ }
+
+ return 1;
+}
+
/* Sync back the VGIC state after a guest run */
static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
{
@@ -1353,14 +1444,31 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
elrsr = vgic_get_elrsr(vcpu);
elrsr_ptr = u64_to_bitmask(&elrsr);
- /* Clear mappings for empty LRs */
- for_each_set_bit(lr, elrsr_ptr, vgic->nr_lr) {
+ /* Deal with HW interrupts, and clear mappings for empty LRs */
+ for (lr = 0; lr < vgic->nr_lr; lr++) {
struct vgic_lr vlr;
- if (!test_and_clear_bit(lr, vgic_cpu->lr_used))
+ if (!test_bit(lr, vgic_cpu->lr_used))
continue;
vlr = vgic_get_lr(vcpu, lr);
+ if (vgic_sync_hwirq(vcpu, vlr)) {
+ /*
+ * So this is a HW interrupt that the guest
+ * EOI-ed. Clean the LR state and allow the
+ * interrupt to be sampled again.
+ */
+ vlr.state = 0;
+ vlr.hwirq = 0;
+ vgic_set_lr(vcpu, lr, vlr);
+ vgic_irq_clear_queued(vcpu, vlr.irq);
+ set_bit(lr, elrsr_ptr);
+ }
+
+ if (!test_bit(lr, elrsr_ptr))
+ continue;
+
+ clear_bit(lr, vgic_cpu->lr_used);
BUG_ON(vlr.irq >= dist->nr_irqs);
vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY;
@@ -1447,7 +1555,8 @@ static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level)
}
static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
- unsigned int irq_num, bool level)
+ struct irq_phys_map *map,
+ unsigned int irq_num, bool level)
{
struct vgic_dist *dist = &kvm->arch.vgic;
struct kvm_vcpu *vcpu;
@@ -1455,6 +1564,9 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
int enabled;
bool ret = true, can_inject = true;
+ if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020))
+ return -EINVAL;
+
spin_lock(&dist->lock);
vcpu = kvm_get_vcpu(kvm, cpuid);
@@ -1517,18 +1629,46 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid,
out:
spin_unlock(&dist->lock);
- return ret ? cpuid : -EINVAL;
+ if (ret) {
+ /* kick the specified vcpu */
+ kvm_vcpu_kick(kvm_get_vcpu(kvm, cpuid));
+ }
+
+ return 0;
+}
+
+static int vgic_lazy_init(struct kvm *kvm)
+{
+ int ret = 0;
+
+ if (unlikely(!vgic_initialized(kvm))) {
+ /*
+ * We only provide the automatic initialization of the VGIC
+ * for the legacy case of a GICv2. Any other type must
+ * be explicitly initialized once setup with the respective
+ * KVM device call.
+ */
+ if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2)
+ return -EBUSY;
+
+ mutex_lock(&kvm->lock);
+ ret = vgic_init(kvm);
+ mutex_unlock(&kvm->lock);
+ }
+
+ return ret;
}
/**
* kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic
* @kvm: The VM structure pointer
* @cpuid: The CPU for PPIs
- * @irq_num: The IRQ number that is assigned to the device
+ * @irq_num: The IRQ number that is assigned to the device. This IRQ
+ * must not be mapped to a HW interrupt.
* @level: Edge-triggered: true: to trigger the interrupt
* false: to ignore the call
- * Level-sensitive true: activates an interrupt
- * false: deactivates an interrupt
+ * Level-sensitive true: raise the input signal
+ * false: lower the input signal
*
* The GIC is not concerned with devices being active-LOW or active-HIGH for
* level-sensitive interrupts. You can think of the level parameter as 1
@@ -1537,39 +1677,44 @@ out:
int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num,
bool level)
{
- int ret = 0;
- int vcpu_id;
-
- if (unlikely(!vgic_initialized(kvm))) {
- /*
- * We only provide the automatic initialization of the VGIC
- * for the legacy case of a GICv2. Any other type must
- * be explicitly initialized once setup with the respective
- * KVM device call.
- */
- if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2) {
- ret = -EBUSY;
- goto out;
- }
- mutex_lock(&kvm->lock);
- ret = vgic_init(kvm);
- mutex_unlock(&kvm->lock);
+ struct irq_phys_map *map;
+ int ret;
- if (ret)
- goto out;
- }
+ ret = vgic_lazy_init(kvm);
+ if (ret)
+ return ret;
- if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020))
+ map = vgic_irq_map_search(kvm_get_vcpu(kvm, cpuid), irq_num);
+ if (map)
return -EINVAL;
- vcpu_id = vgic_update_irq_pending(kvm, cpuid, irq_num, level);
- if (vcpu_id >= 0) {
- /* kick the specified vcpu */
- kvm_vcpu_kick(kvm_get_vcpu(kvm, vcpu_id));
- }
+ return vgic_update_irq_pending(kvm, cpuid, NULL, irq_num, level);
+}
-out:
- return ret;
+/**
+ * kvm_vgic_inject_mapped_irq - Inject a physically mapped IRQ to the vgic
+ * @kvm: The VM structure pointer
+ * @cpuid: The CPU for PPIs
+ * @map: Pointer to a irq_phys_map structure describing the mapping
+ * @level: Edge-triggered: true: to trigger the interrupt
+ * false: to ignore the call
+ * Level-sensitive true: raise the input signal
+ * false: lower the input signal
+ *
+ * The GIC is not concerned with devices being active-LOW or active-HIGH for
+ * level-sensitive interrupts. You can think of the level parameter as 1
+ * being HIGH and 0 being LOW and all devices being active-HIGH.
+ */
+int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid,
+ struct irq_phys_map *map, bool level)
+{
+ int ret;
+
+ ret = vgic_lazy_init(kvm);
+ if (ret)
+ return ret;
+
+ return vgic_update_irq_pending(kvm, cpuid, map, map->virt_irq, level);
}
static irqreturn_t vgic_maintenance_handler(int irq, void *data)
@@ -1583,6 +1728,188 @@ static irqreturn_t vgic_maintenance_handler(int irq, void *data)
return IRQ_HANDLED;
}
+static struct list_head *vgic_get_irq_phys_map_list(struct kvm_vcpu *vcpu,
+ int virt_irq)
+{
+ if (virt_irq < VGIC_NR_PRIVATE_IRQS)
+ return &vcpu->arch.vgic_cpu.irq_phys_map_list;
+ else
+ return &vcpu->kvm->arch.vgic.irq_phys_map_list;
+}
+
+/**
+ * kvm_vgic_map_phys_irq - map a virtual IRQ to a physical IRQ
+ * @vcpu: The VCPU pointer
+ * @virt_irq: The virtual irq number
+ * @irq: The Linux IRQ number
+ *
+ * Establish a mapping between a guest visible irq (@virt_irq) and a
+ * Linux irq (@irq). On injection, @virt_irq will be associated with
+ * the physical interrupt represented by @irq. This mapping can be
+ * established multiple times as long as the parameters are the same.
+ *
+ * Returns a valid pointer on success, and an error pointer otherwise
+ */
+struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
+ int virt_irq, int irq)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ struct list_head *root = vgic_get_irq_phys_map_list(vcpu, virt_irq);
+ struct irq_phys_map *map;
+ struct irq_phys_map_entry *entry;
+ struct irq_desc *desc;
+ struct irq_data *data;
+ int phys_irq;
+
+ desc = irq_to_desc(irq);
+ if (!desc) {
+ kvm_err("%s: no interrupt descriptor\n", __func__);
+ return ERR_PTR(-EINVAL);
+ }
+
+ data = irq_desc_get_irq_data(desc);
+ while (data->parent_data)
+ data = data->parent_data;
+
+ phys_irq = data->hwirq;
+
+ /* Create a new mapping */
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+ if (!entry)
+ return ERR_PTR(-ENOMEM);
+
+ spin_lock(&dist->irq_phys_map_lock);
+
+ /* Try to match an existing mapping */
+ map = vgic_irq_map_search(vcpu, virt_irq);
+ if (map) {
+ /* Make sure this mapping matches */
+ if (map->phys_irq != phys_irq ||
+ map->irq != irq)
+ map = ERR_PTR(-EINVAL);
+
+ /* Found an existing, valid mapping */
+ goto out;
+ }
+
+ map = &entry->map;
+ map->virt_irq = virt_irq;
+ map->phys_irq = phys_irq;
+ map->irq = irq;
+
+ list_add_tail_rcu(&entry->entry, root);
+
+out:
+ spin_unlock(&dist->irq_phys_map_lock);
+ /* If we've found a hit in the existing list, free the useless
+ * entry */
+ if (IS_ERR(map) || map != &entry->map)
+ kfree(entry);
+ return map;
+}
+
+static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu,
+ int virt_irq)
+{
+ struct list_head *root = vgic_get_irq_phys_map_list(vcpu, virt_irq);
+ struct irq_phys_map_entry *entry;
+ struct irq_phys_map *map;
+
+ rcu_read_lock();
+
+ list_for_each_entry_rcu(entry, root, entry) {
+ map = &entry->map;
+ if (map->virt_irq == virt_irq) {
+ rcu_read_unlock();
+ return map;
+ }
+ }
+
+ rcu_read_unlock();
+
+ return NULL;
+}
+
+static void vgic_free_phys_irq_map_rcu(struct rcu_head *rcu)
+{
+ struct irq_phys_map_entry *entry;
+
+ entry = container_of(rcu, struct irq_phys_map_entry, rcu);
+ kfree(entry);
+}
+
+/**
+ * kvm_vgic_get_phys_irq_active - Return the active state of a mapped IRQ
+ *
+ * Return the logical active state of a mapped interrupt. This doesn't
+ * necessarily reflects the current HW state.
+ */
+bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map)
+{
+ BUG_ON(!map);
+ return map->active;
+}
+
+/**
+ * kvm_vgic_set_phys_irq_active - Set the active state of a mapped IRQ
+ *
+ * Set the logical active state of a mapped interrupt. This doesn't
+ * immediately affects the HW state.
+ */
+void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active)
+{
+ BUG_ON(!map);
+ map->active = active;
+}
+
+/**
+ * kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping
+ * @vcpu: The VCPU pointer
+ * @map: The pointer to a mapping obtained through kvm_vgic_map_phys_irq
+ *
+ * Remove an existing mapping between virtual and physical interrupts.
+ */
+int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map)
+{
+ struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
+ struct irq_phys_map_entry *entry;
+ struct list_head *root;
+
+ if (!map)
+ return -EINVAL;
+
+ root = vgic_get_irq_phys_map_list(vcpu, map->virt_irq);
+
+ spin_lock(&dist->irq_phys_map_lock);
+
+ list_for_each_entry(entry, root, entry) {
+ if (&entry->map == map) {
+ list_del_rcu(&entry->entry);
+ call_rcu(&entry->rcu, vgic_free_phys_irq_map_rcu);
+ break;
+ }
+ }
+
+ spin_unlock(&dist->irq_phys_map_lock);
+
+ return 0;
+}
+
+static void vgic_destroy_irq_phys_map(struct kvm *kvm, struct list_head *root)
+{
+ struct vgic_dist *dist = &kvm->arch.vgic;
+ struct irq_phys_map_entry *entry;
+
+ spin_lock(&dist->irq_phys_map_lock);
+
+ list_for_each_entry(entry, root, entry) {
+ list_del_rcu(&entry->entry);
+ call_rcu(&entry->rcu, vgic_free_phys_irq_map_rcu);
+ }
+
+ spin_unlock(&dist->irq_phys_map_lock);
+}
+
void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
{
struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
@@ -1591,6 +1918,7 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
kfree(vgic_cpu->active_shared);
kfree(vgic_cpu->pend_act_shared);
kfree(vgic_cpu->vgic_irq_lr_map);
+ vgic_destroy_irq_phys_map(vcpu->kvm, &vgic_cpu->irq_phys_map_list);
vgic_cpu->pending_shared = NULL;
vgic_cpu->active_shared = NULL;
vgic_cpu->pend_act_shared = NULL;
@@ -1628,6 +1956,17 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs)
}
/**
+ * kvm_vgic_vcpu_early_init - Earliest possible per-vcpu vgic init stage
+ *
+ * No memory allocation should be performed here, only static init.
+ */
+void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu)
+{
+ struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+ INIT_LIST_HEAD(&vgic_cpu->irq_phys_map_list);
+}
+
+/**
* kvm_vgic_get_max_vcpus - Get the maximum number of VCPUs allowed by HW
*
* The host's GIC naturally limits the maximum amount of VCPUs a guest
@@ -1664,6 +2003,7 @@ void kvm_vgic_destroy(struct kvm *kvm)
kfree(dist->irq_spi_target);
kfree(dist->irq_pending_on_cpu);
kfree(dist->irq_active_on_cpu);
+ vgic_destroy_irq_phys_map(kvm, &dist->irq_phys_map_list);
dist->irq_sgi_sources = NULL;
dist->irq_spi_cpu = NULL;
dist->irq_spi_target = NULL;
@@ -1787,6 +2127,18 @@ static int init_vgic_model(struct kvm *kvm, int type)
return 0;
}
+/**
+ * kvm_vgic_early_init - Earliest possible vgic initialization stage
+ *
+ * No memory allocation should be performed here, only static init.
+ */
+void kvm_vgic_early_init(struct kvm *kvm)
+{
+ spin_lock_init(&kvm->arch.vgic.lock);
+ spin_lock_init(&kvm->arch.vgic.irq_phys_map_lock);
+ INIT_LIST_HEAD(&kvm->arch.vgic.irq_phys_map_list);
+}
+
int kvm_vgic_create(struct kvm *kvm, u32 type)
{
int i, vcpu_lock_idx = -1, ret;
@@ -1832,7 +2184,6 @@ int kvm_vgic_create(struct kvm *kvm, u32 type)
if (ret)
goto out_unlock;
- spin_lock_init(&kvm->arch.vgic.lock);
kvm->arch.vgic.in_kernel = true;
kvm->arch.vgic.vgic_model = type;
kvm->arch.vgic.vctrl_base = vgic->vctrl_base;
diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c
index 21c1424..d7ea8e2 100644
--- a/virt/kvm/irqchip.c
+++ b/virt/kvm/irqchip.c
@@ -213,11 +213,15 @@ int kvm_set_irq_routing(struct kvm *kvm,
goto out;
r = -EINVAL;
- if (ue->flags)
+ if (ue->flags) {
+ kfree(e);
goto out;
+ }
r = setup_routing_entry(new, e, ue);
- if (r)
+ if (r) {
+ kfree(e);
goto out;
+ }
++ue;
}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index d8db2f8f..4662a88 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -66,9 +66,18 @@
MODULE_AUTHOR("Qumranet");
MODULE_LICENSE("GPL");
-static unsigned int halt_poll_ns;
+/* halt polling only reduces halt latency by 5-7 us, 500us is enough */
+static unsigned int halt_poll_ns = 500000;
module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR);
+/* Default doubles per-vcpu halt_poll_ns. */
+static unsigned int halt_poll_ns_grow = 2;
+module_param(halt_poll_ns_grow, int, S_IRUGO);
+
+/* Default resets per-vcpu halt_poll_ns . */
+static unsigned int halt_poll_ns_shrink;
+module_param(halt_poll_ns_shrink, int, S_IRUGO);
+
/*
* Ordering of locks:
*
@@ -217,6 +226,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
vcpu->kvm = kvm;
vcpu->vcpu_id = id;
vcpu->pid = NULL;
+ vcpu->halt_poll_ns = 0;
init_waitqueue_head(&vcpu->wq);
kvm_async_pf_vcpu_init(vcpu);
@@ -1906,6 +1916,35 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn)
}
EXPORT_SYMBOL_GPL(kvm_vcpu_mark_page_dirty);
+static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
+{
+ int old, val;
+
+ old = val = vcpu->halt_poll_ns;
+ /* 10us base */
+ if (val == 0 && halt_poll_ns_grow)
+ val = 10000;
+ else
+ val *= halt_poll_ns_grow;
+
+ vcpu->halt_poll_ns = val;
+ trace_kvm_halt_poll_ns_grow(vcpu->vcpu_id, val, old);
+}
+
+static void shrink_halt_poll_ns(struct kvm_vcpu *vcpu)
+{
+ int old, val;
+
+ old = val = vcpu->halt_poll_ns;
+ if (halt_poll_ns_shrink == 0)
+ val = 0;
+ else
+ val /= halt_poll_ns_shrink;
+
+ vcpu->halt_poll_ns = val;
+ trace_kvm_halt_poll_ns_shrink(vcpu->vcpu_id, val, old);
+}
+
static int kvm_vcpu_check_block(struct kvm_vcpu *vcpu)
{
if (kvm_arch_vcpu_runnable(vcpu)) {
@@ -1928,10 +1967,11 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
ktime_t start, cur;
DEFINE_WAIT(wait);
bool waited = false;
+ u64 block_ns;
start = cur = ktime_get();
- if (halt_poll_ns) {
- ktime_t stop = ktime_add_ns(ktime_get(), halt_poll_ns);
+ if (vcpu->halt_poll_ns) {
+ ktime_t stop = ktime_add_ns(ktime_get(), vcpu->halt_poll_ns);
do {
/*
@@ -1960,7 +2000,21 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
cur = ktime_get();
out:
- trace_kvm_vcpu_wakeup(ktime_to_ns(cur) - ktime_to_ns(start), waited);
+ block_ns = ktime_to_ns(cur) - ktime_to_ns(start);
+
+ if (halt_poll_ns) {
+ if (block_ns <= vcpu->halt_poll_ns)
+ ;
+ /* we had a long block, shrink polling */
+ else if (vcpu->halt_poll_ns && block_ns > halt_poll_ns)
+ shrink_halt_poll_ns(vcpu);
+ /* we had a short halt and our poll time is too small */
+ else if (vcpu->halt_poll_ns < halt_poll_ns &&
+ block_ns < halt_poll_ns)
+ grow_halt_poll_ns(vcpu);
+ }
+
+ trace_kvm_vcpu_wakeup(block_ns, waited);
}
EXPORT_SYMBOL_GPL(kvm_vcpu_block);
OpenPOWER on IntegriCloud