summaryrefslogtreecommitdiffstats
path: root/arch/x86/kvm/mmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/mmu.c')
-rw-r--r--arch/x86/kvm/mmu.c173
1 files changed, 91 insertions, 82 deletions
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 48aeee8..7a17db1 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -148,7 +148,6 @@ module_param(oos_shadow, bool, 0644);
#include <trace/events/kvm.h>
-#undef TRACE_INCLUDE_FILE
#define CREATE_TRACE_POINTS
#include "mmutrace.h"
@@ -174,12 +173,7 @@ struct kvm_shadow_walk_iterator {
shadow_walk_okay(&(_walker)); \
shadow_walk_next(&(_walker)))
-
-struct kvm_unsync_walk {
- int (*entry) (struct kvm_mmu_page *sp, struct kvm_unsync_walk *walk);
-};
-
-typedef int (*mmu_parent_walk_fn) (struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp);
+typedef int (*mmu_parent_walk_fn) (struct kvm_mmu_page *sp);
static struct kmem_cache *pte_chain_cache;
static struct kmem_cache *rmap_desc_cache;
@@ -327,7 +321,6 @@ static int mmu_topup_memory_cache_page(struct kvm_mmu_memory_cache *cache,
page = alloc_page(GFP_KERNEL);
if (!page)
return -ENOMEM;
- set_page_private(page, 0);
cache->objects[cache->nobjs++] = page_address(page);
}
return 0;
@@ -925,7 +918,6 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
sp->gfns = mmu_memory_cache_alloc(&vcpu->arch.mmu_page_cache, PAGE_SIZE);
set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
list_add(&sp->link, &vcpu->kvm->arch.active_mmu_pages);
- INIT_LIST_HEAD(&sp->oos_link);
bitmap_zero(sp->slot_bitmap, KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS);
sp->multimapped = 0;
sp->parent_pte = parent_pte;
@@ -1009,8 +1001,7 @@ static void mmu_page_remove_parent_pte(struct kvm_mmu_page *sp,
}
-static void mmu_parent_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
- mmu_parent_walk_fn fn)
+static void mmu_parent_walk(struct kvm_mmu_page *sp, mmu_parent_walk_fn fn)
{
struct kvm_pte_chain *pte_chain;
struct hlist_node *node;
@@ -1019,8 +1010,8 @@ static void mmu_parent_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
if (!sp->multimapped && sp->parent_pte) {
parent_sp = page_header(__pa(sp->parent_pte));
- fn(vcpu, parent_sp);
- mmu_parent_walk(vcpu, parent_sp, fn);
+ fn(parent_sp);
+ mmu_parent_walk(parent_sp, fn);
return;
}
hlist_for_each_entry(pte_chain, node, &sp->parent_ptes, link)
@@ -1028,8 +1019,8 @@ static void mmu_parent_walk(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
if (!pte_chain->parent_ptes[i])
break;
parent_sp = page_header(__pa(pte_chain->parent_ptes[i]));
- fn(vcpu, parent_sp);
- mmu_parent_walk(vcpu, parent_sp, fn);
+ fn(parent_sp);
+ mmu_parent_walk(parent_sp, fn);
}
}
@@ -1066,16 +1057,15 @@ static void kvm_mmu_update_parents_unsync(struct kvm_mmu_page *sp)
}
}
-static int unsync_walk_fn(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
+static int unsync_walk_fn(struct kvm_mmu_page *sp)
{
kvm_mmu_update_parents_unsync(sp);
return 1;
}
-static void kvm_mmu_mark_parents_unsync(struct kvm_vcpu *vcpu,
- struct kvm_mmu_page *sp)
+static void kvm_mmu_mark_parents_unsync(struct kvm_mmu_page *sp)
{
- mmu_parent_walk(vcpu, sp, unsync_walk_fn);
+ mmu_parent_walk(sp, unsync_walk_fn);
kvm_mmu_update_parents_unsync(sp);
}
@@ -1209,7 +1199,7 @@ static int kvm_mmu_zap_page(struct kvm *kvm, struct kvm_mmu_page *sp);
static int kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
{
- if (sp->role.glevels != vcpu->arch.mmu.root_level) {
+ if (sp->role.cr4_pae != !!is_pae(vcpu)) {
kvm_mmu_zap_page(vcpu->kvm, sp);
return 1;
}
@@ -1331,6 +1321,8 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
role = vcpu->arch.mmu.base_role;
role.level = level;
role.direct = direct;
+ if (role.direct)
+ role.cr4_pae = 0;
role.access = access;
if (vcpu->arch.mmu.root_level <= PT32_ROOT_LEVEL) {
quadrant = gaddr >> (PAGE_SHIFT + (PT64_PT_BITS * level));
@@ -1351,7 +1343,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu,
mmu_page_add_parent_pte(vcpu, sp, parent_pte);
if (sp->unsync_children) {
set_bit(KVM_REQ_MMU_SYNC, &vcpu->requests);
- kvm_mmu_mark_parents_unsync(vcpu, sp);
+ kvm_mmu_mark_parents_unsync(sp);
}
trace_kvm_mmu_get_page(sp, false);
return sp;
@@ -1490,8 +1482,8 @@ static int mmu_zap_unsync_children(struct kvm *kvm,
for_each_sp(pages, sp, parents, i) {
kvm_mmu_zap_page(kvm, sp);
mmu_pages_clear_parents(&parents);
+ zapped++;
}
- zapped += pages.nr;
kvm_mmu_pages_init(parent, &parents, &pages);
}
@@ -1542,14 +1534,16 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages)
*/
if (used_pages > kvm_nr_mmu_pages) {
- while (used_pages > kvm_nr_mmu_pages) {
+ while (used_pages > kvm_nr_mmu_pages &&
+ !list_empty(&kvm->arch.active_mmu_pages)) {
struct kvm_mmu_page *page;
page = container_of(kvm->arch.active_mmu_pages.prev,
struct kvm_mmu_page, link);
- kvm_mmu_zap_page(kvm, page);
+ used_pages -= kvm_mmu_zap_page(kvm, page);
used_pages--;
}
+ kvm_nr_mmu_pages = used_pages;
kvm->arch.n_free_mmu_pages = 0;
}
else
@@ -1571,13 +1565,14 @@ static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
r = 0;
index = kvm_page_table_hashfn(gfn);
bucket = &kvm->arch.mmu_page_hash[index];
+restart:
hlist_for_each_entry_safe(sp, node, n, bucket, hash_link)
if (sp->gfn == gfn && !sp->role.direct) {
pgprintk("%s: gfn %lx role %x\n", __func__, gfn,
sp->role.word);
r = 1;
if (kvm_mmu_zap_page(kvm, sp))
- n = bucket->first;
+ goto restart;
}
return r;
}
@@ -1591,12 +1586,14 @@ static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
index = kvm_page_table_hashfn(gfn);
bucket = &kvm->arch.mmu_page_hash[index];
+restart:
hlist_for_each_entry_safe(sp, node, nn, bucket, hash_link) {
if (sp->gfn == gfn && !sp->role.direct
&& !sp->role.invalid) {
pgprintk("%s: zap %lx %x\n",
__func__, gfn, sp->role.word);
- kvm_mmu_zap_page(kvm, sp);
+ if (kvm_mmu_zap_page(kvm, sp))
+ goto restart;
}
}
}
@@ -1762,7 +1759,7 @@ static int kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
++vcpu->kvm->stat.mmu_unsync;
sp->unsync = 1;
- kvm_mmu_mark_parents_unsync(vcpu, sp);
+ kvm_mmu_mark_parents_unsync(sp);
mmu_convert_notrap(sp);
return 0;
@@ -2296,13 +2293,19 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level)
/* no rsvd bits for 2 level 4K page table entries */
context->rsvd_bits_mask[0][1] = 0;
context->rsvd_bits_mask[0][0] = 0;
+ context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
+
+ if (!is_pse(vcpu)) {
+ context->rsvd_bits_mask[1][1] = 0;
+ break;
+ }
+
if (is_cpuid_PSE36())
/* 36bits PSE 4MB page */
context->rsvd_bits_mask[1][1] = rsvd_bits(17, 21);
else
/* 32 bits PSE 4MB page */
context->rsvd_bits_mask[1][1] = rsvd_bits(13, 21);
- context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0];
break;
case PT32E_ROOT_LEVEL:
context->rsvd_bits_mask[0][2] =
@@ -2315,7 +2318,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level)
context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
rsvd_bits(maxphyaddr, 62) |
rsvd_bits(13, 20); /* large page */
- context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0];
+ context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
break;
case PT64_ROOT_LEVEL:
context->rsvd_bits_mask[0][3] = exb_bit_rsvd |
@@ -2333,7 +2336,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int level)
context->rsvd_bits_mask[1][1] = exb_bit_rsvd |
rsvd_bits(maxphyaddr, 51) |
rsvd_bits(13, 20); /* large page */
- context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[1][0];
+ context->rsvd_bits_mask[1][0] = context->rsvd_bits_mask[0][0];
break;
}
}
@@ -2435,7 +2438,7 @@ static int init_kvm_softmmu(struct kvm_vcpu *vcpu)
else
r = paging32_init_context(vcpu);
- vcpu->arch.mmu.base_role.glevels = vcpu->arch.mmu.root_level;
+ vcpu->arch.mmu.base_role.cr4_pae = !!is_pae(vcpu);
return r;
}
@@ -2524,7 +2527,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
}
++vcpu->kvm->stat.mmu_pte_updated;
- if (sp->role.glevels == PT32_ROOT_LEVEL)
+ if (!sp->role.cr4_pae)
paging32_update_pte(vcpu, sp, spte, new);
else
paging64_update_pte(vcpu, sp, spte, new);
@@ -2559,36 +2562,11 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu)
}
static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
- const u8 *new, int bytes)
+ u64 gpte)
{
gfn_t gfn;
- int r;
- u64 gpte = 0;
pfn_t pfn;
- if (bytes != 4 && bytes != 8)
- return;
-
- /*
- * Assume that the pte write on a page table of the same type
- * as the current vcpu paging mode. This is nearly always true
- * (might be false while changing modes). Note it is verified later
- * by update_pte().
- */
- if (is_pae(vcpu)) {
- /* Handle a 32-bit guest writing two halves of a 64-bit gpte */
- if ((bytes == 4) && (gpa % 4 == 0)) {
- r = kvm_read_guest(vcpu->kvm, gpa & ~(u64)7, &gpte, 8);
- if (r)
- return;
- memcpy((void *)&gpte + (gpa % 8), new, 4);
- } else if ((bytes == 8) && (gpa % 8 == 0)) {
- memcpy((void *)&gpte, new, 8);
- }
- } else {
- if ((bytes == 4) && (gpa % 4 == 0))
- memcpy((void *)&gpte, new, 4);
- }
if (!is_present_gpte(gpte))
return;
gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
@@ -2637,10 +2615,46 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
int flooded = 0;
int npte;
int r;
+ int invlpg_counter;
pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes);
- mmu_guess_page_from_pte_write(vcpu, gpa, new, bytes);
+
+ invlpg_counter = atomic_read(&vcpu->kvm->arch.invlpg_counter);
+
+ /*
+ * Assume that the pte write on a page table of the same type
+ * as the current vcpu paging mode. This is nearly always true
+ * (might be false while changing modes). Note it is verified later
+ * by update_pte().
+ */
+ if ((is_pae(vcpu) && bytes == 4) || !new) {
+ /* Handle a 32-bit guest writing two halves of a 64-bit gpte */
+ if (is_pae(vcpu)) {
+ gpa &= ~(gpa_t)7;
+ bytes = 8;
+ }
+ r = kvm_read_guest(vcpu->kvm, gpa, &gentry, min(bytes, 8));
+ if (r)
+ gentry = 0;
+ new = (const u8 *)&gentry;
+ }
+
+ switch (bytes) {
+ case 4:
+ gentry = *(const u32 *)new;
+ break;
+ case 8:
+ gentry = *(const u64 *)new;
+ break;
+ default:
+ gentry = 0;
+ break;
+ }
+
+ mmu_guess_page_from_pte_write(vcpu, gpa, gentry);
spin_lock(&vcpu->kvm->mmu_lock);
+ if (atomic_read(&vcpu->kvm->arch.invlpg_counter) != invlpg_counter)
+ gentry = 0;
kvm_mmu_access_page(vcpu, gfn);
kvm_mmu_free_some_pages(vcpu);
++vcpu->kvm->stat.mmu_pte_write;
@@ -2659,10 +2673,12 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
}
index = kvm_page_table_hashfn(gfn);
bucket = &vcpu->kvm->arch.mmu_page_hash[index];
+
+restart:
hlist_for_each_entry_safe(sp, node, n, bucket, hash_link) {
if (sp->gfn != gfn || sp->role.direct || sp->role.invalid)
continue;
- pte_size = sp->role.glevels == PT32_ROOT_LEVEL ? 4 : 8;
+ pte_size = sp->role.cr4_pae ? 8 : 4;
misaligned = (offset ^ (offset + bytes - 1)) & ~(pte_size - 1);
misaligned |= bytes < 4;
if (misaligned || flooded) {
@@ -2679,14 +2695,14 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
pgprintk("misaligned: gpa %llx bytes %d role %x\n",
gpa, bytes, sp->role.word);
if (kvm_mmu_zap_page(vcpu->kvm, sp))
- n = bucket->first;
+ goto restart;
++vcpu->kvm->stat.mmu_flooded;
continue;
}
page_offset = offset;
level = sp->role.level;
npte = 1;
- if (sp->role.glevels == PT32_ROOT_LEVEL) {
+ if (!sp->role.cr4_pae) {
page_offset <<= 1; /* 32->64 */
/*
* A 32-bit pde maps 4MB while the shadow pdes map
@@ -2704,20 +2720,11 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
continue;
}
spte = &sp->spt[page_offset / sizeof(*spte)];
- if ((gpa & (pte_size - 1)) || (bytes < pte_size)) {
- gentry = 0;
- r = kvm_read_guest_atomic(vcpu->kvm,
- gpa & ~(u64)(pte_size - 1),
- &gentry, pte_size);
- new = (const void *)&gentry;
- if (r < 0)
- new = NULL;
- }
while (npte--) {
entry = *spte;
mmu_pte_write_zap_pte(vcpu, sp, spte);
- if (new)
- mmu_pte_write_new_pte(vcpu, sp, spte, new);
+ if (gentry)
+ mmu_pte_write_new_pte(vcpu, sp, spte, &gentry);
mmu_pte_write_flush_tlb(vcpu, entry, *spte);
++spte;
}
@@ -2897,10 +2904,11 @@ void kvm_mmu_zap_all(struct kvm *kvm)
struct kvm_mmu_page *sp, *node;
spin_lock(&kvm->mmu_lock);
+restart:
list_for_each_entry_safe(sp, node, &kvm->arch.active_mmu_pages, link)
if (kvm_mmu_zap_page(kvm, sp))
- node = container_of(kvm->arch.active_mmu_pages.next,
- struct kvm_mmu_page, link);
+ goto restart;
+
spin_unlock(&kvm->mmu_lock);
kvm_flush_remote_tlbs(kvm);
@@ -3171,8 +3179,7 @@ static gva_t canonicalize(gva_t gva)
}
-typedef void (*inspect_spte_fn) (struct kvm *kvm, struct kvm_mmu_page *sp,
- u64 *sptep);
+typedef void (*inspect_spte_fn) (struct kvm *kvm, u64 *sptep);
static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp,
inspect_spte_fn fn)
@@ -3188,7 +3195,7 @@ static void __mmu_spte_walk(struct kvm *kvm, struct kvm_mmu_page *sp,
child = page_header(ent & PT64_BASE_ADDR_MASK);
__mmu_spte_walk(kvm, child, fn);
} else
- fn(kvm, sp, &sp->spt[i]);
+ fn(kvm, &sp->spt[i]);
}
}
}
@@ -3279,6 +3286,8 @@ static void audit_mappings(struct kvm_vcpu *vcpu)
static int count_rmaps(struct kvm_vcpu *vcpu)
{
+ struct kvm *kvm = vcpu->kvm;
+ struct kvm_memslots *slots;
int nmaps = 0;
int i, j, k, idx;
@@ -3312,7 +3321,7 @@ static int count_rmaps(struct kvm_vcpu *vcpu)
return nmaps;
}
-void inspect_spte_has_rmap(struct kvm *kvm, struct kvm_mmu_page *sp, u64 *sptep)
+void inspect_spte_has_rmap(struct kvm *kvm, u64 *sptep)
{
unsigned long *rmapp;
struct kvm_mmu_page *rev_sp;
@@ -3328,14 +3337,14 @@ void inspect_spte_has_rmap(struct kvm *kvm, struct kvm_mmu_page *sp, u64 *sptep)
printk(KERN_ERR "%s: no memslot for gfn %ld\n",
audit_msg, gfn);
printk(KERN_ERR "%s: index %ld of sp (gfn=%lx)\n",
- audit_msg, sptep - rev_sp->spt,
+ audit_msg, (long int)(sptep - rev_sp->spt),
rev_sp->gfn);
dump_stack();
return;
}
rmapp = gfn_to_rmap(kvm, rev_sp->gfns[sptep - rev_sp->spt],
- is_large_pte(*sptep));
+ rev_sp->role.level);
if (!*rmapp) {
if (!printk_ratelimit())
return;
@@ -3370,7 +3379,7 @@ static void check_writable_mappings_rmap(struct kvm_vcpu *vcpu)
continue;
if (!(ent & PT_WRITABLE_MASK))
continue;
- inspect_spte_has_rmap(vcpu->kvm, sp, &pt[i]);
+ inspect_spte_has_rmap(vcpu->kvm, &pt[i]);
}
}
return;
OpenPOWER on IntegriCloud