summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/x86/include/asm/kvm_page_track.h14
-rw-r--r--arch/x86/include/asm/vmx.h1
-rw-r--r--arch/x86/include/uapi/asm/vmx.h4
-rw-r--r--arch/x86/kvm/lapic.c201
-rw-r--r--arch/x86/kvm/lapic.h2
-rw-r--r--arch/x86/kvm/mmu.c21
-rw-r--r--arch/x86/kvm/page_track.c31
-rw-r--r--arch/x86/kvm/vmx.c283
-rw-r--r--arch/x86/kvm/x86.c4
-rw-r--r--virt/kvm/kvm_main.c78
10 files changed, 364 insertions, 275 deletions
diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h
index c2b8d24..d74747b 100644
--- a/arch/x86/include/asm/kvm_page_track.h
+++ b/arch/x86/include/asm/kvm_page_track.h
@@ -29,9 +29,20 @@ struct kvm_page_track_notifier_node {
* @gpa: the physical address written by guest.
* @new: the data was written to the address.
* @bytes: the written length.
+ * @node: this node
*/
void (*track_write)(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
- int bytes);
+ int bytes, struct kvm_page_track_notifier_node *node);
+ /*
+ * It is called when memory slot is being moved or removed
+ * users can drop write-protection for the pages in that memory slot
+ *
+ * @kvm: the kvm where memory slot being moved or removed
+ * @slot: the memory slot being moved or removed
+ * @node: this node
+ */
+ void (*track_flush_slot)(struct kvm *kvm, struct kvm_memory_slot *slot,
+ struct kvm_page_track_notifier_node *node);
};
void kvm_page_track_init(struct kvm *kvm);
@@ -58,4 +69,5 @@ kvm_page_track_unregister_notifier(struct kvm *kvm,
struct kvm_page_track_notifier_node *n);
void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
int bytes);
+void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot);
#endif
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index a002b07..5ef9848 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -60,6 +60,7 @@
*/
#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
+#define SECONDARY_EXEC_DESC 0x00000004
#define SECONDARY_EXEC_RDTSCP 0x00000008
#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010
#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 37fee27..f9dea4f 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -65,6 +65,8 @@
#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
#define EXIT_REASON_APIC_ACCESS 44
#define EXIT_REASON_EOI_INDUCED 45
+#define EXIT_REASON_GDTR_IDTR 46
+#define EXIT_REASON_LDTR_TR 47
#define EXIT_REASON_EPT_VIOLATION 48
#define EXIT_REASON_EPT_MISCONFIG 49
#define EXIT_REASON_INVEPT 50
@@ -113,6 +115,8 @@
{ EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \
{ EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \
{ EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \
+ { EXIT_REASON_GDTR_IDTR, "GDTR_IDTR" }, \
+ { EXIT_REASON_LDTR_TR, "LDTR_TR" }, \
{ EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \
{ EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \
{ EXIT_REASON_INVEPT, "INVEPT" }, \
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 23b99f3..890f218 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -342,9 +342,11 @@ void __kvm_apic_update_irr(u32 *pir, void *regs)
u32 i, pir_val;
for (i = 0; i <= 7; i++) {
- pir_val = xchg(&pir[i], 0);
- if (pir_val)
+ pir_val = READ_ONCE(pir[i]);
+ if (pir_val) {
+ pir_val = xchg(&pir[i], 0);
*((u32 *)(regs + APIC_IRR + i * 0x10)) |= pir_val;
+ }
}
}
EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
@@ -1090,7 +1092,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
static u32 apic_get_tmcct(struct kvm_lapic *apic)
{
- ktime_t remaining;
+ ktime_t remaining, now;
s64 ns;
u32 tmcct;
@@ -1101,7 +1103,8 @@ static u32 apic_get_tmcct(struct kvm_lapic *apic)
apic->lapic_timer.period == 0)
return 0;
- remaining = hrtimer_get_remaining(&apic->lapic_timer.timer);
+ now = ktime_get();
+ remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
if (ktime_to_ns(remaining) < 0)
remaining = ktime_set(0, 0);
@@ -1332,7 +1335,7 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
local_irq_save(flags);
- now = apic->lapic_timer.timer.base->get_time();
+ now = ktime_get();
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
if (likely(tscdeadline > guest_tsc)) {
ns = (tscdeadline - guest_tsc) * 1000000ULL;
@@ -1347,6 +1350,79 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
local_irq_restore(flags);
}
+static void start_sw_period(struct kvm_lapic *apic)
+{
+ if (!apic->lapic_timer.period)
+ return;
+
+ if (apic_lvtt_oneshot(apic) &&
+ ktime_after(ktime_get(),
+ apic->lapic_timer.target_expiration)) {
+ apic_timer_expired(apic);
+ return;
+ }
+
+ hrtimer_start(&apic->lapic_timer.timer,
+ apic->lapic_timer.target_expiration,
+ HRTIMER_MODE_ABS_PINNED);
+}
+
+static bool set_target_expiration(struct kvm_lapic *apic)
+{
+ ktime_t now;
+ u64 tscl = rdtsc();
+
+ now = ktime_get();
+ apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
+ * APIC_BUS_CYCLE_NS * apic->divide_count;
+
+ if (!apic->lapic_timer.period)
+ return false;
+
+ /*
+ * Do not allow the guest to program periodic timers with small
+ * interval, since the hrtimers are not throttled by the host
+ * scheduler.
+ */
+ if (apic_lvtt_period(apic)) {
+ s64 min_period = min_timer_period_us * 1000LL;
+
+ if (apic->lapic_timer.period < min_period) {
+ pr_info_ratelimited(
+ "kvm: vcpu %i: requested %lld ns "
+ "lapic timer period limited to %lld ns\n",
+ apic->vcpu->vcpu_id,
+ apic->lapic_timer.period, min_period);
+ apic->lapic_timer.period = min_period;
+ }
+ }
+
+ apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
+ PRIx64 ", "
+ "timer initial count 0x%x, period %lldns, "
+ "expire @ 0x%016" PRIx64 ".\n", __func__,
+ APIC_BUS_CYCLE_NS, ktime_to_ns(now),
+ kvm_lapic_get_reg(apic, APIC_TMICT),
+ apic->lapic_timer.period,
+ ktime_to_ns(ktime_add_ns(now,
+ apic->lapic_timer.period)));
+
+ apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
+ nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
+ apic->lapic_timer.target_expiration = ktime_add_ns(now, apic->lapic_timer.period);
+
+ return true;
+}
+
+static void advance_periodic_target_expiration(struct kvm_lapic *apic)
+{
+ apic->lapic_timer.tscdeadline +=
+ nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
+ apic->lapic_timer.target_expiration =
+ ktime_add_ns(apic->lapic_timer.target_expiration,
+ apic->lapic_timer.period);
+}
+
bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
{
if (!lapic_in_kernel(vcpu))
@@ -1356,52 +1432,59 @@ bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
-static void cancel_hv_tscdeadline(struct kvm_lapic *apic)
+static void cancel_hv_timer(struct kvm_lapic *apic)
{
kvm_x86_ops->cancel_hv_timer(apic->vcpu);
apic->lapic_timer.hv_timer_in_use = false;
}
-void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
-{
- struct kvm_lapic *apic = vcpu->arch.apic;
-
- WARN_ON(!apic->lapic_timer.hv_timer_in_use);
- WARN_ON(swait_active(&vcpu->wq));
- cancel_hv_tscdeadline(apic);
- apic_timer_expired(apic);
-}
-EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
-
-static bool start_hv_tscdeadline(struct kvm_lapic *apic)
+static bool start_hv_timer(struct kvm_lapic *apic)
{
u64 tscdeadline = apic->lapic_timer.tscdeadline;
- if (atomic_read(&apic->lapic_timer.pending) ||
+ if ((atomic_read(&apic->lapic_timer.pending) &&
+ !apic_lvtt_period(apic)) ||
kvm_x86_ops->set_hv_timer(apic->vcpu, tscdeadline)) {
if (apic->lapic_timer.hv_timer_in_use)
- cancel_hv_tscdeadline(apic);
+ cancel_hv_timer(apic);
} else {
apic->lapic_timer.hv_timer_in_use = true;
hrtimer_cancel(&apic->lapic_timer.timer);
/* In case the sw timer triggered in the window */
- if (atomic_read(&apic->lapic_timer.pending))
- cancel_hv_tscdeadline(apic);
+ if (atomic_read(&apic->lapic_timer.pending) &&
+ !apic_lvtt_period(apic))
+ cancel_hv_timer(apic);
}
trace_kvm_hv_timer_state(apic->vcpu->vcpu_id,
apic->lapic_timer.hv_timer_in_use);
return apic->lapic_timer.hv_timer_in_use;
}
+void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic = vcpu->arch.apic;
+
+ WARN_ON(!apic->lapic_timer.hv_timer_in_use);
+ WARN_ON(swait_active(&vcpu->wq));
+ cancel_hv_timer(apic);
+ apic_timer_expired(apic);
+
+ if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
+ advance_periodic_target_expiration(apic);
+ if (!start_hv_timer(apic))
+ start_sw_period(apic);
+ }
+}
+EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
+
void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
WARN_ON(apic->lapic_timer.hv_timer_in_use);
- if (apic_lvtt_tscdeadline(apic))
- start_hv_tscdeadline(apic);
+ start_hv_timer(apic);
}
EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_hv_timer);
@@ -1413,62 +1496,28 @@ void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
if (!apic->lapic_timer.hv_timer_in_use)
return;
- cancel_hv_tscdeadline(apic);
+ cancel_hv_timer(apic);
if (atomic_read(&apic->lapic_timer.pending))
return;
- start_sw_tscdeadline(apic);
+ if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
+ start_sw_period(apic);
+ else if (apic_lvtt_tscdeadline(apic))
+ start_sw_tscdeadline(apic);
}
EXPORT_SYMBOL_GPL(kvm_lapic_switch_to_sw_timer);
static void start_apic_timer(struct kvm_lapic *apic)
{
- ktime_t now;
-
atomic_set(&apic->lapic_timer.pending, 0);
if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
- /* lapic timer in oneshot or periodic mode */
- now = apic->lapic_timer.timer.base->get_time();
- apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT)
- * APIC_BUS_CYCLE_NS * apic->divide_count;
-
- if (!apic->lapic_timer.period)
- return;
- /*
- * Do not allow the guest to program periodic timers with small
- * interval, since the hrtimers are not throttled by the host
- * scheduler.
- */
- if (apic_lvtt_period(apic)) {
- s64 min_period = min_timer_period_us * 1000LL;
-
- if (apic->lapic_timer.period < min_period) {
- pr_info_ratelimited(
- "kvm: vcpu %i: requested %lld ns "
- "lapic timer period limited to %lld ns\n",
- apic->vcpu->vcpu_id,
- apic->lapic_timer.period, min_period);
- apic->lapic_timer.period = min_period;
- }
- }
-
- hrtimer_start(&apic->lapic_timer.timer,
- ktime_add_ns(now, apic->lapic_timer.period),
- HRTIMER_MODE_ABS_PINNED);
-
- apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
- PRIx64 ", "
- "timer initial count 0x%x, period %lldns, "
- "expire @ 0x%016" PRIx64 ".\n", __func__,
- APIC_BUS_CYCLE_NS, ktime_to_ns(now),
- kvm_lapic_get_reg(apic, APIC_TMICT),
- apic->lapic_timer.period,
- ktime_to_ns(ktime_add_ns(now,
- apic->lapic_timer.period)));
+ if (set_target_expiration(apic) &&
+ !(kvm_x86_ops->set_hv_timer && start_hv_timer(apic)))
+ start_sw_period(apic);
} else if (apic_lvtt_tscdeadline(apic)) {
- if (!(kvm_x86_ops->set_hv_timer && start_hv_tscdeadline(apic)))
+ if (!(kvm_x86_ops->set_hv_timer && start_hv_timer(apic)))
start_sw_tscdeadline(apic);
}
}
@@ -1701,13 +1750,22 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
* LAPIC interface
*----------------------------------------------------------------------
*/
+u64 kvm_get_lapic_target_expiration_tsc(struct kvm_vcpu *vcpu)
+{
+ struct kvm_lapic *apic = vcpu->arch.apic;
+
+ if (!lapic_in_kernel(vcpu))
+ return 0;
+
+ return apic->lapic_timer.tscdeadline;
+}
u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
- if (!lapic_in_kernel(vcpu) || apic_lvtt_oneshot(apic) ||
- apic_lvtt_period(apic))
+ if (!lapic_in_kernel(vcpu) ||
+ !apic_lvtt_tscdeadline(apic))
return 0;
return apic->lapic_timer.tscdeadline;
@@ -1909,6 +1967,7 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
apic_timer_expired(apic);
if (lapic_is_periodic(apic)) {
+ advance_periodic_target_expiration(apic);
hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
return HRTIMER_RESTART;
} else
@@ -1993,6 +2052,10 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
kvm_apic_local_deliver(apic, APIC_LVTT);
if (apic_lvtt_tscdeadline(apic))
apic->lapic_timer.tscdeadline = 0;
+ if (apic_lvtt_oneshot(apic)) {
+ apic->lapic_timer.tscdeadline = 0;
+ apic->lapic_timer.target_expiration = ktime_set(0, 0);
+ }
atomic_set(&apic->lapic_timer.pending, 0);
}
}
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index f60d01c..e0c8023 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -15,6 +15,7 @@
struct kvm_timer {
struct hrtimer timer;
s64 period; /* unit: ns */
+ ktime_t target_expiration;
u32 timer_mode;
u32 timer_mode_mask;
u64 tscdeadline;
@@ -85,6 +86,7 @@ int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s);
int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s);
int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu);
+u64 kvm_get_lapic_target_expiration_tsc(struct kvm_vcpu *vcpu);
u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu);
void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index d9c7e98..1d2f350 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1660,17 +1660,9 @@ int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
* This has some overhead, but not as much as the cost of swapping
* out actively used pages or breaking up actively used hugepages.
*/
- if (!shadow_accessed_mask) {
- /*
- * We are holding the kvm->mmu_lock, and we are blowing up
- * shadow PTEs. MMU notifier consumers need to be kept at bay.
- * This is correct as long as we don't decouple the mmu_lock
- * protected regions (like invalidate_range_start|end does).
- */
- kvm->mmu_notifier_seq++;
+ if (!shadow_accessed_mask)
return kvm_handle_hva_range(kvm, start, end, 0,
kvm_unmap_rmapp);
- }
return kvm_handle_hva_range(kvm, start, end, 0, kvm_age_rmapp);
}
@@ -4405,7 +4397,8 @@ static u64 *get_written_sptes(struct kvm_mmu_page *sp, gpa_t gpa, int *nspte)
}
static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
- const u8 *new, int bytes)
+ const u8 *new, int bytes,
+ struct kvm_page_track_notifier_node *node)
{
gfn_t gfn = gpa >> PAGE_SHIFT;
struct kvm_mmu_page *sp;
@@ -4617,11 +4610,19 @@ void kvm_mmu_setup(struct kvm_vcpu *vcpu)
init_kvm_mmu(vcpu);
}
+static void kvm_mmu_invalidate_zap_pages_in_memslot(struct kvm *kvm,
+ struct kvm_memory_slot *slot,
+ struct kvm_page_track_notifier_node *node)
+{
+ kvm_mmu_invalidate_zap_all_pages(kvm);
+}
+
void kvm_mmu_init_vm(struct kvm *kvm)
{
struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
node->track_write = kvm_mmu_pte_write;
+ node->track_flush_slot = kvm_mmu_invalidate_zap_pages_in_memslot;
kvm_page_track_register_notifier(kvm, node);
}
diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c
index b431539..4a1c13e 100644
--- a/arch/x86/kvm/page_track.c
+++ b/arch/x86/kvm/page_track.c
@@ -106,6 +106,7 @@ void kvm_slot_page_track_add_page(struct kvm *kvm,
if (kvm_mmu_slot_gfn_write_protect(kvm, slot, gfn))
kvm_flush_remote_tlbs(kvm);
}
+EXPORT_SYMBOL_GPL(kvm_slot_page_track_add_page);
/*
* remove the guest page from the tracking pool which stops the interception
@@ -135,6 +136,7 @@ void kvm_slot_page_track_remove_page(struct kvm *kvm,
*/
kvm_mmu_gfn_allow_lpage(slot, gfn);
}
+EXPORT_SYMBOL_GPL(kvm_slot_page_track_remove_page);
/*
* check if the corresponding access on the specified guest page is tracked.
@@ -181,6 +183,7 @@ kvm_page_track_register_notifier(struct kvm *kvm,
hlist_add_head_rcu(&n->node, &head->track_notifier_list);
spin_unlock(&kvm->mmu_lock);
}
+EXPORT_SYMBOL_GPL(kvm_page_track_register_notifier);
/*
* stop receiving the event interception. It is the opposed operation of
@@ -199,6 +202,7 @@ kvm_page_track_unregister_notifier(struct kvm *kvm,
spin_unlock(&kvm->mmu_lock);
synchronize_srcu(&head->track_srcu);
}
+EXPORT_SYMBOL_GPL(kvm_page_track_unregister_notifier);
/*
* Notify the node that write access is intercepted and write emulation is
@@ -222,6 +226,31 @@ void kvm_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa, const u8 *new,
idx = srcu_read_lock(&head->track_srcu);
hlist_for_each_entry_rcu(n, &head->track_notifier_list, node)
if (n->track_write)
- n->track_write(vcpu, gpa, new, bytes);
+ n->track_write(vcpu, gpa, new, bytes, n);
+ srcu_read_unlock(&head->track_srcu, idx);
+}
+
+/*
+ * Notify the node that memory slot is being removed or moved so that it can
+ * drop write-protection for the pages in the memory slot.
+ *
+ * The node should figure out it has any write-protected pages in this slot
+ * by itself.
+ */
+void kvm_page_track_flush_slot(struct kvm *kvm, struct kvm_memory_slot *slot)
+{
+ struct kvm_page_track_notifier_head *head;
+ struct kvm_page_track_notifier_node *n;
+ int idx;
+
+ head = &kvm->arch.track_notifier_head;
+
+ if (hlist_empty(&head->track_notifier_list))
+ return;
+
+ idx = srcu_read_lock(&head->track_srcu);
+ hlist_for_each_entry_rcu(n, &head->track_notifier_list, node)
+ if (n->track_flush_slot)
+ n->track_flush_slot(kvm, slot, n);
srcu_read_unlock(&head->track_srcu, idx);
}
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5382b82..a91a5b0 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -520,6 +520,12 @@ static inline void pi_set_sn(struct pi_desc *pi_desc)
(unsigned long *)&pi_desc->control);
}
+static inline void pi_clear_on(struct pi_desc *pi_desc)
+{
+ clear_bit(POSTED_INTR_ON,
+ (unsigned long *)&pi_desc->control);
+}
+
static inline int pi_test_on(struct pi_desc *pi_desc)
{
return test_bit(POSTED_INTR_ON,
@@ -920,16 +926,32 @@ static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
-static unsigned long *vmx_io_bitmap_a;
-static unsigned long *vmx_io_bitmap_b;
-static unsigned long *vmx_msr_bitmap_legacy;
-static unsigned long *vmx_msr_bitmap_longmode;
-static unsigned long *vmx_msr_bitmap_legacy_x2apic;
-static unsigned long *vmx_msr_bitmap_longmode_x2apic;
-static unsigned long *vmx_msr_bitmap_legacy_x2apic_apicv_inactive;
-static unsigned long *vmx_msr_bitmap_longmode_x2apic_apicv_inactive;
-static unsigned long *vmx_vmread_bitmap;
-static unsigned long *vmx_vmwrite_bitmap;
+enum {
+ VMX_IO_BITMAP_A,
+ VMX_IO_BITMAP_B,
+ VMX_MSR_BITMAP_LEGACY,
+ VMX_MSR_BITMAP_LONGMODE,
+ VMX_MSR_BITMAP_LEGACY_X2APIC_APICV,
+ VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV,
+ VMX_MSR_BITMAP_LEGACY_X2APIC,
+ VMX_MSR_BITMAP_LONGMODE_X2APIC,
+ VMX_VMREAD_BITMAP,
+ VMX_VMWRITE_BITMAP,
+ VMX_BITMAP_NR
+};
+
+static unsigned long *vmx_bitmap[VMX_BITMAP_NR];
+
+#define vmx_io_bitmap_a (vmx_bitmap[VMX_IO_BITMAP_A])
+#define vmx_io_bitmap_b (vmx_bitmap[VMX_IO_BITMAP_B])
+#define vmx_msr_bitmap_legacy (vmx_bitmap[VMX_MSR_BITMAP_LEGACY])
+#define vmx_msr_bitmap_longmode (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE])
+#define vmx_msr_bitmap_legacy_x2apic_apicv (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC_APICV])
+#define vmx_msr_bitmap_longmode_x2apic_apicv (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC_APICV])
+#define vmx_msr_bitmap_legacy_x2apic (vmx_bitmap[VMX_MSR_BITMAP_LEGACY_X2APIC])
+#define vmx_msr_bitmap_longmode_x2apic (vmx_bitmap[VMX_MSR_BITMAP_LONGMODE_X2APIC])
+#define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP])
+#define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP])
static bool cpu_has_load_ia32_efer;
static bool cpu_has_load_perf_global_ctrl;
@@ -2529,14 +2551,14 @@ static void vmx_set_msr_bitmap(struct kvm_vcpu *vcpu)
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
if (enable_apicv && kvm_vcpu_apicv_active(vcpu)) {
if (is_long_mode(vcpu))
- msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
+ msr_bitmap = vmx_msr_bitmap_longmode_x2apic_apicv;
else
- msr_bitmap = vmx_msr_bitmap_legacy_x2apic;
+ msr_bitmap = vmx_msr_bitmap_legacy_x2apic_apicv;
} else {
if (is_long_mode(vcpu))
- msr_bitmap = vmx_msr_bitmap_longmode_x2apic_apicv_inactive;
+ msr_bitmap = vmx_msr_bitmap_longmode_x2apic;
else
- msr_bitmap = vmx_msr_bitmap_legacy_x2apic_apicv_inactive;
+ msr_bitmap = vmx_msr_bitmap_legacy_x2apic;
}
} else {
if (is_long_mode(vcpu))
@@ -2780,6 +2802,7 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
vmx->nested.nested_vmx_secondary_ctls_high &=
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
SECONDARY_EXEC_RDTSCP |
+ SECONDARY_EXEC_DESC |
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
SECONDARY_EXEC_ENABLE_VPID |
SECONDARY_EXEC_APIC_REGISTER_VIRT |
@@ -4575,41 +4598,6 @@ static void __vmx_disable_intercept_for_msr(unsigned long *msr_bitmap,
}
}
-static void __vmx_enable_intercept_for_msr(unsigned long *msr_bitmap,
- u32 msr, int type)
-{
- int f = sizeof(unsigned long);
-
- if (!cpu_has_vmx_msr_bitmap())
- return;
-
- /*
- * See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
- * have the write-low and read-high bitmap offsets the wrong way round.
- * We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
- */
- if (msr <= 0x1fff) {
- if (type & MSR_TYPE_R)
- /* read-low */
- __set_bit(msr, msr_bitmap + 0x000 / f);
-
- if (type & MSR_TYPE_W)
- /* write-low */
- __set_bit(msr, msr_bitmap + 0x800 / f);
-
- } else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
- msr &= 0x1fff;
- if (type & MSR_TYPE_R)
- /* read-high */
- __set_bit(msr, msr_bitmap + 0x400 / f);
-
- if (type & MSR_TYPE_W)
- /* write-high */
- __set_bit(msr, msr_bitmap + 0xc00 / f);
-
- }
-}
-
/*
* If a msr is allowed by L0, we should check whether it is allowed by L1.
* The corresponding bit will be cleared unless both of L0 and L1 allow it.
@@ -4665,48 +4653,18 @@ static void vmx_disable_intercept_for_msr(u32 msr, bool longmode_only)
msr, MSR_TYPE_R | MSR_TYPE_W);
}
-static void vmx_enable_intercept_msr_read_x2apic(u32 msr, bool apicv_active)
+static void vmx_disable_intercept_msr_x2apic(u32 msr, int type, bool apicv_active)
{
if (apicv_active) {
- __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
- msr, MSR_TYPE_R);
- __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
- msr, MSR_TYPE_R);
+ __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv,
+ msr, type);
+ __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv,
+ msr, type);
} else {
- __vmx_enable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
- msr, MSR_TYPE_R);
- __vmx_enable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
- msr, MSR_TYPE_R);
- }
-}
-
-static void vmx_disable_intercept_msr_read_x2apic(u32 msr, bool apicv_active)
-{
- if (apicv_active) {
__vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
- msr, MSR_TYPE_R);
+ msr, type);
__vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
- msr, MSR_TYPE_R);
- } else {
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
- msr, MSR_TYPE_R);
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
- msr, MSR_TYPE_R);
- }
-}
-
-static void vmx_disable_intercept_msr_write_x2apic(u32 msr, bool apicv_active)
-{
- if (apicv_active) {
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic,
- msr, MSR_TYPE_W);
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic,
- msr, MSR_TYPE_W);
- } else {
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
- msr, MSR_TYPE_W);
- __vmx_disable_intercept_for_msr(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
- msr, MSR_TYPE_W);
+ msr, type);
}
}
@@ -4828,9 +4786,15 @@ static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- if (!pi_test_and_clear_on(&vmx->pi_desc))
+ if (!pi_test_on(&vmx->pi_desc))
return;
+ pi_clear_on(&vmx->pi_desc);
+ /*
+ * IOMMU can write to PIR.ON, so the barrier matters even on UP.
+ * But on x86 this is just a compiler barrier anyway.
+ */
+ smp_mb__after_atomic();
kvm_apic_update_irr(vcpu, vmx->pi_desc.pir);
}
@@ -6352,50 +6316,13 @@ static __init int hardware_setup(void)
for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
kvm_define_shared_msr(i, vmx_msr_index[i]);
- vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_io_bitmap_a)
- return r;
+ for (i = 0; i < VMX_BITMAP_NR; i++) {
+ vmx_bitmap[i] = (unsigned long *)__get_free_page(GFP_KERNEL);
+ if (!vmx_bitmap[i])
+ goto out;
+ }
vmx_io_bitmap_b = (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_io_bitmap_b)
- goto out;
-
- vmx_msr_bitmap_legacy = (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_legacy)
- goto out1;
-
- vmx_msr_bitmap_legacy_x2apic =
- (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_legacy_x2apic)
- goto out2;
-
- vmx_msr_bitmap_legacy_x2apic_apicv_inactive =
- (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_legacy_x2apic_apicv_inactive)
- goto out3;
-
- vmx_msr_bitmap_longmode = (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_longmode)
- goto out4;
-
- vmx_msr_bitmap_longmode_x2apic =
- (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_longmode_x2apic)
- goto out5;
-
- vmx_msr_bitmap_longmode_x2apic_apicv_inactive =
- (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_msr_bitmap_longmode_x2apic_apicv_inactive)
- goto out6;
-
- vmx_vmread_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_vmread_bitmap)
- goto out7;
-
- vmx_vmwrite_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL);
- if (!vmx_vmwrite_bitmap)
- goto out8;
-
memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
@@ -6413,7 +6340,7 @@ static __init int hardware_setup(void)
if (setup_vmcs_config(&vmcs_config) < 0) {
r = -EIO;
- goto out9;
+ goto out;
}
if (boot_cpu_has(X86_FEATURE_NX))
@@ -6476,39 +6403,34 @@ static __init int hardware_setup(void)
vmx_disable_intercept_for_msr(MSR_IA32_SYSENTER_EIP, false);
vmx_disable_intercept_for_msr(MSR_IA32_BNDCFGS, true);
- memcpy(vmx_msr_bitmap_legacy_x2apic,
+ memcpy(vmx_msr_bitmap_legacy_x2apic_apicv,
vmx_msr_bitmap_legacy, PAGE_SIZE);
- memcpy(vmx_msr_bitmap_longmode_x2apic,
+ memcpy(vmx_msr_bitmap_longmode_x2apic_apicv,
vmx_msr_bitmap_longmode, PAGE_SIZE);
- memcpy(vmx_msr_bitmap_legacy_x2apic_apicv_inactive,
+ memcpy(vmx_msr_bitmap_legacy_x2apic,
vmx_msr_bitmap_legacy, PAGE_SIZE);
- memcpy(vmx_msr_bitmap_longmode_x2apic_apicv_inactive,
+ memcpy(vmx_msr_bitmap_longmode_x2apic,
vmx_msr_bitmap_longmode, PAGE_SIZE);
set_bit(0, vmx_vpid_bitmap); /* 0 is reserved for host */
+ for (msr = 0x800; msr <= 0x8ff; msr++) {
+ if (msr == 0x839 /* TMCCT */)
+ continue;
+ vmx_disable_intercept_msr_x2apic(msr, MSR_TYPE_R, true);
+ }
+
/*
- * enable_apicv && kvm_vcpu_apicv_active()
+ * TPR reads and writes can be virtualized even if virtual interrupt
+ * delivery is not in use.
*/
- for (msr = 0x800; msr <= 0x8ff; msr++)
- vmx_disable_intercept_msr_read_x2apic(msr, true);
+ vmx_disable_intercept_msr_x2apic(0x808, MSR_TYPE_W, true);
+ vmx_disable_intercept_msr_x2apic(0x808, MSR_TYPE_R | MSR_TYPE_W, false);
- /* TMCCT */
- vmx_enable_intercept_msr_read_x2apic(0x839, true);
- /* TPR */
- vmx_disable_intercept_msr_write_x2apic(0x808, true);
/* EOI */
- vmx_disable_intercept_msr_write_x2apic(0x80b, true);
+ vmx_disable_intercept_msr_x2apic(0x80b, MSR_TYPE_W, true);
/* SELF-IPI */
- vmx_disable_intercept_msr_write_x2apic(0x83f, true);
-
- /*
- * (enable_apicv && !kvm_vcpu_apicv_active()) ||
- * !enable_apicv
- */
- /* TPR */
- vmx_disable_intercept_msr_read_x2apic(0x808, false);
- vmx_disable_intercept_msr_write_x2apic(0x808, false);
+ vmx_disable_intercept_msr_x2apic(0x83f, MSR_TYPE_W, true);
if (enable_ept) {
kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK,
@@ -6555,42 +6477,19 @@ static __init int hardware_setup(void)
return alloc_kvm_area();
-out9:
- free_page((unsigned long)vmx_vmwrite_bitmap);
-out8:
- free_page((unsigned long)vmx_vmread_bitmap);
-out7:
- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive);
-out6:
- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
-out5:
- free_page((unsigned long)vmx_msr_bitmap_longmode);
-out4:
- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive);
-out3:
- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
-out2:
- free_page((unsigned long)vmx_msr_bitmap_legacy);
-out1:
- free_page((unsigned long)vmx_io_bitmap_b);
out:
- free_page((unsigned long)vmx_io_bitmap_a);
+ for (i = 0; i < VMX_BITMAP_NR; i++)
+ free_page((unsigned long)vmx_bitmap[i]);
return r;
}
static __exit void hardware_unsetup(void)
{
- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic);
- free_page((unsigned long)vmx_msr_bitmap_legacy_x2apic_apicv_inactive);
- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic);
- free_page((unsigned long)vmx_msr_bitmap_longmode_x2apic_apicv_inactive);
- free_page((unsigned long)vmx_msr_bitmap_legacy);
- free_page((unsigned long)vmx_msr_bitmap_longmode);
- free_page((unsigned long)vmx_io_bitmap_b);
- free_page((unsigned long)vmx_io_bitmap_a);
- free_page((unsigned long)vmx_vmwrite_bitmap);
- free_page((unsigned long)vmx_vmread_bitmap);
+ int i;
+
+ for (i = 0; i < VMX_BITMAP_NR; i++)
+ free_page((unsigned long)vmx_bitmap[i]);
free_kvm_area();
}
@@ -8075,6 +7974,8 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING);
case EXIT_REASON_IO_INSTRUCTION:
return nested_vmx_exit_handled_io(vcpu, vmcs12);
+ case EXIT_REASON_GDTR_IDTR: case EXIT_REASON_LDTR_TR:
+ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC);
case EXIT_REASON_MSR_READ:
case EXIT_REASON_MSR_WRITE:
return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason);
@@ -8624,11 +8525,6 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
register void *__sp asm(_ASM_SP);
- /*
- * If external interrupt exists, IF bit is set in rflags/eflags on the
- * interrupt stack frame, and interrupt will be enabled on a return
- * from interrupt handler.
- */
if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
== (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
unsigned int vector;
@@ -9968,6 +9864,15 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmx_set_constant_host_state(vmx);
/*
+ * Set the MSR load/store lists to match L0's settings.
+ */
+ vmcs_write32(VM_EXIT_MSR_STORE_COUNT, 0);
+ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
+ vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host));
+ vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
+ vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest));
+
+ /*
* HOST_RSP is normally set correctly in vmx_vcpu_run() just before
* entry, but only if the current (host) sp changed from the value
* we wrote last (vmx->host_rsp). This cache is no longer relevant
@@ -10755,6 +10660,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+ u32 vm_inst_error = 0;
/* trying to cancel vmlaunch/vmresume is a bug */
WARN_ON_ONCE(vmx->nested.nested_run_pending);
@@ -10767,6 +10673,9 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
vmcs12->vm_exit_msr_store_count))
nested_vmx_abort(vcpu, VMX_ABORT_SAVE_GUEST_MSR_FAIL);
+ if (unlikely(vmx->fail))
+ vm_inst_error = vmcs_read32(VM_INSTRUCTION_ERROR);
+
vmx_load_vmcs01(vcpu);
if ((exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
@@ -10795,6 +10704,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
load_vmcs12_host_state(vcpu, vmcs12);
/* Update any VMCS fields that might have changed while L2 ran */
+ vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
+ vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.nr);
vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
if (vmx->hv_deadline_tsc == -1)
vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
@@ -10843,7 +10754,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
*/
if (unlikely(vmx->fail)) {
vmx->fail = 0;
- nested_vmx_failValid(vcpu, vmcs_read32(VM_INSTRUCTION_ERROR));
+ nested_vmx_failValid(vcpu, vm_inst_error);
} else
nested_vmx_succeed(vcpu);
if (enable_shadow_vmcs)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3017de0..83990ad 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2794,7 +2794,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
}
if (kvm_lapic_hv_timer_in_use(vcpu) &&
kvm_x86_ops->set_hv_timer(vcpu,
- kvm_get_lapic_tscdeadline_msr(vcpu)))
+ kvm_get_lapic_target_expiration_tsc(vcpu)))
kvm_lapic_switch_to_sw_timer(vcpu);
/*
* On a host with synchronized TSC, there is no need to update
@@ -8155,7 +8155,7 @@ void kvm_arch_flush_shadow_all(struct kvm *kvm)
void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot)
{
- kvm_mmu_invalidate_zap_all_pages(kvm);
+ kvm_page_track_flush_slot(kvm, slot);
}
static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 5c36034..fbf04c0 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -595,7 +595,7 @@ static int kvm_create_vm_debugfs(struct kvm *kvm, int fd)
stat_data->kvm = kvm;
stat_data->offset = p->offset;
kvm->debugfs_stat_data[p - debugfs_entries] = stat_data;
- if (!debugfs_create_file(p->name, 0444,
+ if (!debugfs_create_file(p->name, 0644,
kvm->debugfs_dentry,
stat_data,
stat_fops_per_vm[p->kind]))
@@ -3661,11 +3661,23 @@ static int vm_stat_get_per_vm(void *data, u64 *val)
return 0;
}
+static int vm_stat_clear_per_vm(void *data, u64 val)
+{
+ struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data;
+
+ if (val)
+ return -EINVAL;
+
+ *(ulong *)((void *)stat_data->kvm + stat_data->offset) = 0;
+
+ return 0;
+}
+
static int vm_stat_get_per_vm_open(struct inode *inode, struct file *file)
{
__simple_attr_check_format("%llu\n", 0ull);
return kvm_debugfs_open(inode, file, vm_stat_get_per_vm,
- NULL, "%llu\n");
+ vm_stat_clear_per_vm, "%llu\n");
}
static const struct file_operations vm_stat_get_per_vm_fops = {
@@ -3691,11 +3703,26 @@ static int vcpu_stat_get_per_vm(void *data, u64 *val)
return 0;
}
+static int vcpu_stat_clear_per_vm(void *data, u64 val)
+{
+ int i;
+ struct kvm_stat_data *stat_data = (struct kvm_stat_data *)data;
+ struct kvm_vcpu *vcpu;
+
+ if (val)
+ return -EINVAL;
+
+ kvm_for_each_vcpu(i, vcpu, stat_data->kvm)
+ *(u64 *)((void *)vcpu + stat_data->offset) = 0;
+
+ return 0;
+}
+
static int vcpu_stat_get_per_vm_open(struct inode *inode, struct file *file)
{
__simple_attr_check_format("%llu\n", 0ull);
return kvm_debugfs_open(inode, file, vcpu_stat_get_per_vm,
- NULL, "%llu\n");
+ vcpu_stat_clear_per_vm, "%llu\n");
}
static const struct file_operations vcpu_stat_get_per_vm_fops = {
@@ -3730,7 +3757,26 @@ static int vm_stat_get(void *_offset, u64 *val)
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(vm_stat_fops, vm_stat_get, NULL, "%llu\n");
+static int vm_stat_clear(void *_offset, u64 val)
+{
+ unsigned offset = (long)_offset;
+ struct kvm *kvm;
+ struct kvm_stat_data stat_tmp = {.offset = offset};
+
+ if (val)
+ return -EINVAL;
+
+ spin_lock(&kvm_lock);
+ list_for_each_entry(kvm, &vm_list, vm_list) {
+ stat_tmp.kvm = kvm;
+ vm_stat_clear_per_vm((void *)&stat_tmp, 0);
+ }
+ spin_unlock(&kvm_lock);
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(vm_stat_fops, vm_stat_get, vm_stat_clear, "%llu\n");
static int vcpu_stat_get(void *_offset, u64 *val)
{
@@ -3750,7 +3796,27 @@ static int vcpu_stat_get(void *_offset, u64 *val)
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, NULL, "%llu\n");
+static int vcpu_stat_clear(void *_offset, u64 val)
+{
+ unsigned offset = (long)_offset;
+ struct kvm *kvm;
+ struct kvm_stat_data stat_tmp = {.offset = offset};
+
+ if (val)
+ return -EINVAL;
+
+ spin_lock(&kvm_lock);
+ list_for_each_entry(kvm, &vm_list, vm_list) {
+ stat_tmp.kvm = kvm;
+ vcpu_stat_clear_per_vm((void *)&stat_tmp, 0);
+ }
+ spin_unlock(&kvm_lock);
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(vcpu_stat_fops, vcpu_stat_get, vcpu_stat_clear,
+ "%llu\n");
static const struct file_operations *stat_fops[] = {
[KVM_STAT_VCPU] = &vcpu_stat_fops,
@@ -3768,7 +3834,7 @@ static int kvm_init_debug(void)
kvm_debugfs_num_entries = 0;
for (p = debugfs_entries; p->name; ++p, kvm_debugfs_num_entries++) {
- if (!debugfs_create_file(p->name, 0444, kvm_debugfs_dir,
+ if (!debugfs_create_file(p->name, 0644, kvm_debugfs_dir,
(void *)(long)p->offset,
stat_fops[p->kind]))
goto out_dir;
OpenPOWER on IntegriCloud