From 957ed9effd80b04482cbdce8c95bdf803a656b94 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Sun, 22 Aug 2010 19:12:48 +0800 Subject: KVM: MMU: prefetch ptes when intercepted guest #PF Support prefetch ptes when intercept guest #PF, avoid to #PF by later access If we meet any failure in the prefetch path, we will exit it and not try other ptes to avoid become heavy path Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/paging_tmpl.h | 72 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) (limited to 'arch/x86/kvm/paging_tmpl.h') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 51ef909..872ff26 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -310,6 +310,77 @@ static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu, return r || curr_pte != gw->ptes[level - 1]; } +static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, u64 *sptep) +{ + struct kvm_mmu_page *sp; + pt_element_t gptep[PTE_PREFETCH_NUM]; + gpa_t first_pte_gpa; + int offset = 0, i; + u64 *spte; + + sp = page_header(__pa(sptep)); + + if (sp->role.level > PT_PAGE_TABLE_LEVEL) + return; + + if (sp->role.direct) + return __direct_pte_prefetch(vcpu, sp, sptep); + + i = (sptep - sp->spt) & ~(PTE_PREFETCH_NUM - 1); + + if (PTTYPE == 32) + offset = sp->role.quadrant << PT64_LEVEL_BITS; + + first_pte_gpa = gfn_to_gpa(sp->gfn) + + (offset + i) * sizeof(pt_element_t); + + if (kvm_read_guest_atomic(vcpu->kvm, first_pte_gpa, gptep, + sizeof(gptep)) < 0) + return; + + spte = sp->spt + i; + + for (i = 0; i < PTE_PREFETCH_NUM; i++, spte++) { + pt_element_t gpte; + unsigned pte_access; + gfn_t gfn; + pfn_t pfn; + bool dirty; + + if (spte == sptep) + continue; + + if (*spte != shadow_trap_nonpresent_pte) + continue; + + gpte = gptep[i]; + + if (!is_present_gpte(gpte) || + is_rsvd_bits_set(vcpu, gpte, PT_PAGE_TABLE_LEVEL)) { + if (!sp->unsync) + __set_spte(spte, shadow_notrap_nonpresent_pte); + continue; + } + + if (!(gpte & PT_ACCESSED_MASK)) + continue; + + pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte); + gfn = gpte_to_gfn(gpte); + dirty = is_dirty_gpte(gpte); + pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn, + (pte_access & ACC_WRITE_MASK) && dirty); + if (is_error_pfn(pfn)) { + kvm_release_pfn_clean(pfn); + break; + } + + mmu_set_spte(vcpu, spte, sp->role.access, pte_access, 0, 0, + dirty, NULL, PT_PAGE_TABLE_LEVEL, gfn, + pfn, true, true); + } +} + /* * Fetch a shadow pte for a specific level in the paging hierarchy. */ @@ -391,6 +462,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, mmu_set_spte(vcpu, it.sptep, access, gw->pte_access & access, user_fault, write_fault, dirty, ptwrite, it.level, gw->gfn, pfn, false, true); + FNAME(pte_prefetch)(vcpu, it.sptep); return it.sptep; -- cgit v1.1 From 189be38db3dde12699a8b9dc22d33e8c95efe110 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Sun, 22 Aug 2010 19:13:33 +0800 Subject: KVM: MMU: combine guest pte read between fetch and pte prefetch Combine guest pte read between guest pte check in the fetch path and pte prefetch Signed-off-by: Xiao Guangrong Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/paging_tmpl.h | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) (limited to 'arch/x86/kvm/paging_tmpl.h') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 872ff26..a4e8389 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -67,6 +67,7 @@ struct guest_walker { int level; gfn_t table_gfn[PT_MAX_FULL_LEVELS]; pt_element_t ptes[PT_MAX_FULL_LEVELS]; + pt_element_t prefetch_ptes[PTE_PREFETCH_NUM]; gpa_t pte_gpa[PT_MAX_FULL_LEVELS]; unsigned pt_access; unsigned pte_access; @@ -302,21 +303,33 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu, struct guest_walker *gw, int level) { - int r; pt_element_t curr_pte; - - r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 1], + gpa_t base_gpa, pte_gpa = gw->pte_gpa[level - 1]; + u64 mask; + int r, index; + + if (level == PT_PAGE_TABLE_LEVEL) { + mask = PTE_PREFETCH_NUM * sizeof(pt_element_t) - 1; + base_gpa = pte_gpa & ~mask; + index = (pte_gpa - base_gpa) / sizeof(pt_element_t); + + r = kvm_read_guest_atomic(vcpu->kvm, base_gpa, + gw->prefetch_ptes, sizeof(gw->prefetch_ptes)); + curr_pte = gw->prefetch_ptes[index]; + } else + r = kvm_read_guest_atomic(vcpu->kvm, pte_gpa, &curr_pte, sizeof(curr_pte)); + return r || curr_pte != gw->ptes[level - 1]; } -static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, u64 *sptep) +static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, + u64 *sptep) { struct kvm_mmu_page *sp; - pt_element_t gptep[PTE_PREFETCH_NUM]; - gpa_t first_pte_gpa; - int offset = 0, i; + pt_element_t *gptep = gw->prefetch_ptes; u64 *spte; + int i; sp = page_header(__pa(sptep)); @@ -327,17 +340,6 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, u64 *sptep) return __direct_pte_prefetch(vcpu, sp, sptep); i = (sptep - sp->spt) & ~(PTE_PREFETCH_NUM - 1); - - if (PTTYPE == 32) - offset = sp->role.quadrant << PT64_LEVEL_BITS; - - first_pte_gpa = gfn_to_gpa(sp->gfn) + - (offset + i) * sizeof(pt_element_t); - - if (kvm_read_guest_atomic(vcpu->kvm, first_pte_gpa, gptep, - sizeof(gptep)) < 0) - return; - spte = sp->spt + i; for (i = 0; i < PTE_PREFETCH_NUM; i++, spte++) { @@ -462,7 +464,7 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, mmu_set_spte(vcpu, it.sptep, access, gw->pte_access & access, user_fault, write_fault, dirty, ptwrite, it.level, gw->gfn, pfn, false, true); - FNAME(pte_prefetch)(vcpu, it.sptep); + FNAME(pte_prefetch)(vcpu, gw, it.sptep); return it.sptep; -- cgit v1.1 From bc32ce2152406431acf4daf4a81dc1664bb7b91b Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Sat, 28 Aug 2010 19:22:46 +0800 Subject: KVM: MMU: fix wrong not write protected sp report The audit code reports some sp not write protected in current code, it's just the bug in audit_write_protection(), since: - the invalid sp not need write protected - using uninitialize local variable('gfn') - call kvm_mmu_audit() out of mmu_lock's protection Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86/kvm/paging_tmpl.h') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index a4e8389..a0f2feb 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -504,7 +504,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, unsigned long mmu_seq; pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); - kvm_mmu_audit(vcpu, "pre page fault"); r = mmu_topup_memory_caches(vcpu); if (r) @@ -542,6 +541,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, spin_lock(&vcpu->kvm->mmu_lock); if (mmu_notifier_retry(vcpu, mmu_seq)) goto out_unlock; + + kvm_mmu_audit(vcpu, "pre page fault"); kvm_mmu_free_some_pages(vcpu); sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, level, &write_pt, pfn); -- cgit v1.1 From 8b1fe17cc7a8b2c62b400dcbfaebd96da6b4f58e Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Mon, 30 Aug 2010 18:22:53 +0800 Subject: KVM: MMU: support disable/enable mmu audit dynamicly Add a r/w module parameter named 'mmu_audit', it can control audit enable/disable: enable: echo 1 > /sys/module/kvm/parameters/mmu_audit disable: echo 0 > /sys/module/kvm/parameters/mmu_audit This patch not change the logic Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kvm/paging_tmpl.h') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index a0f2feb..debe770 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -542,7 +542,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, if (mmu_notifier_retry(vcpu, mmu_seq)) goto out_unlock; - kvm_mmu_audit(vcpu, "pre page fault"); + trace_kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT); kvm_mmu_free_some_pages(vcpu); sptep = FNAME(fetch)(vcpu, addr, &walker, user_fault, write_fault, level, &write_pt, pfn); @@ -554,7 +554,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, vcpu->arch.last_pt_write_count = 0; /* reset fork detector */ ++vcpu->stat.pf_fixed; - kvm_mmu_audit(vcpu, "post page fault (fixed)"); + trace_kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); spin_unlock(&vcpu->kvm->mmu_lock); return write_pt; -- cgit v1.1 From 957446afce22df9a42b9482fcd55985f4037fe66 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 10 Sep 2010 17:30:38 +0200 Subject: KVM: MMU: Check for root_level instead of long mode The walk_addr function checks for !is_long_mode in its 64 bit version. But what is meant here is a check for pae paging. Change the condition to really check for pae paging so that it also works with nested nested paging. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kvm/paging_tmpl.h') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index debe770..e4ad3dc 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -132,7 +132,7 @@ walk: walker->level = vcpu->arch.mmu.root_level; pte = vcpu->arch.cr3; #if PTTYPE == 64 - if (!is_long_mode(vcpu)) { + if (vcpu->arch.mmu.root_level == PT32E_ROOT_LEVEL) { pte = kvm_pdptr_read(vcpu, (addr >> 30) & 3); trace_kvm_mmu_paging_element(pte, walker->level); if (!is_present_gpte(pte)) { @@ -205,7 +205,7 @@ walk: (PTTYPE == 64 || is_pse(vcpu))) || ((walker->level == PT_PDPE_LEVEL) && is_large_pte(pte) && - is_long_mode(vcpu))) { + vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL)) { int lvl = walker->level; walker->gfn = gpte_to_gfn_lvl(pte, lvl); -- cgit v1.1 From 5777ed340d89cdc6c76a5c552337a3861b40a806 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 10 Sep 2010 17:30:42 +0200 Subject: KVM: MMU: Introduce get_cr3 function pointer This function pointer in the MMU context is required to implement Nested Nested Paging. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kvm/paging_tmpl.h') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index e4ad3dc..13d0c06 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -130,7 +130,7 @@ walk: present = true; eperm = rsvd_fault = false; walker->level = vcpu->arch.mmu.root_level; - pte = vcpu->arch.cr3; + pte = vcpu->arch.mmu.get_cr3(vcpu); #if PTTYPE == 64 if (vcpu->arch.mmu.root_level == PT32E_ROOT_LEVEL) { pte = kvm_pdptr_read(vcpu, (addr >> 30) & 3); @@ -143,7 +143,7 @@ walk: } #endif ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || - (vcpu->arch.cr3 & CR3_NONPAE_RESERVED_BITS) == 0); + (vcpu->arch.mmu.get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0); pt_access = ACC_ALL; -- cgit v1.1 From 3241f22da85d26505b39f525a88f52ebd1235975 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 10 Sep 2010 17:30:45 +0200 Subject: KVM: MMU: Let is_rsvd_bits_set take mmu context instead of vcpu This patch changes is_rsvd_bits_set() function prototype to take only a kvm_mmu context instead of a full vcpu. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/x86/kvm/paging_tmpl.h') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 13d0c06..68ee1b7 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -168,7 +168,7 @@ walk: break; } - if (is_rsvd_bits_set(vcpu, pte, walker->level)) { + if (is_rsvd_bits_set(&vcpu->arch.mmu, pte, walker->level)) { rsvd_fault = true; break; } @@ -327,6 +327,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, u64 *sptep) { struct kvm_mmu_page *sp; + struct kvm_mmu *mmu = &vcpu->arch.mmu; pt_element_t *gptep = gw->prefetch_ptes; u64 *spte; int i; @@ -358,7 +359,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, gpte = gptep[i]; if (!is_present_gpte(gpte) || - is_rsvd_bits_set(vcpu, gpte, PT_PAGE_TABLE_LEVEL)) { + is_rsvd_bits_set(mmu, gpte, PT_PAGE_TABLE_LEVEL)) { if (!sp->unsync) __set_spte(spte, shadow_notrap_nonpresent_pte); continue; @@ -713,7 +714,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, return -EINVAL; gfn = gpte_to_gfn(gpte); - if (is_rsvd_bits_set(vcpu, gpte, PT_PAGE_TABLE_LEVEL) + if (is_rsvd_bits_set(&vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL) || gfn != sp->gfns[i] || !is_present_gpte(gpte) || !(gpte & PT_ACCESSED_MASK)) { u64 nonpresent; -- cgit v1.1 From 8df25a328a6ca3bd0f048278f4d5ae0a1f6fadc1 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 10 Sep 2010 17:30:46 +0200 Subject: KVM: MMU: Track page fault data in struct vcpu This patch introduces a struct with two new fields in vcpu_arch for x86: * fault.address * fault.error_code This will be used to correctly propagate page faults back into the guest when we could have either an ordinary page fault or a nested page fault. In the case of a nested page fault the fault-address is different from the original address that should be walked. So we need to keep track about the real fault-address. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86/kvm/paging_tmpl.h') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 68ee1b7..d07f48a 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -258,6 +258,10 @@ error: walker->error_code |= PFERR_FETCH_MASK; if (rsvd_fault) walker->error_code |= PFERR_RSVD_MASK; + + vcpu->arch.fault.address = addr; + vcpu->arch.fault.error_code = walker->error_code; + trace_kvm_mmu_walker_error(walker->error_code); return 0; } @@ -521,7 +525,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, */ if (!r) { pgprintk("%s: guest page fault\n", __func__); - inject_page_fault(vcpu, addr, walker.error_code); + inject_page_fault(vcpu); vcpu->arch.last_pt_write_count = 0; /* reset fork detector */ return 0; } -- cgit v1.1 From 1e301feb079e8ee6091bb75283e960fc33059a68 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 10 Sep 2010 17:30:47 +0200 Subject: KVM: MMU: Introduce generic walk_addr function This is the first patch in the series towards a generic walk_addr implementation which could walk two-dimensional page tables in the end. In this first step the walk_addr function is renamed into walk_addr_generic which takes a mmu context as an additional parameter. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) (limited to 'arch/x86/kvm/paging_tmpl.h') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index d07f48a..a704a81 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -114,9 +114,10 @@ static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte) /* * Fetch a guest pte for a guest virtual address */ -static int FNAME(walk_addr)(struct guest_walker *walker, - struct kvm_vcpu *vcpu, gva_t addr, - int write_fault, int user_fault, int fetch_fault) +static int FNAME(walk_addr_generic)(struct guest_walker *walker, + struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, + gva_t addr, int write_fault, + int user_fault, int fetch_fault) { pt_element_t pte; gfn_t table_gfn; @@ -129,10 +130,11 @@ static int FNAME(walk_addr)(struct guest_walker *walker, walk: present = true; eperm = rsvd_fault = false; - walker->level = vcpu->arch.mmu.root_level; - pte = vcpu->arch.mmu.get_cr3(vcpu); + walker->level = mmu->root_level; + pte = mmu->get_cr3(vcpu); + #if PTTYPE == 64 - if (vcpu->arch.mmu.root_level == PT32E_ROOT_LEVEL) { + if (walker->level == PT32E_ROOT_LEVEL) { pte = kvm_pdptr_read(vcpu, (addr >> 30) & 3); trace_kvm_mmu_paging_element(pte, walker->level); if (!is_present_gpte(pte)) { @@ -143,7 +145,7 @@ walk: } #endif ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) || - (vcpu->arch.mmu.get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0); + (mmu->get_cr3(vcpu) & CR3_NONPAE_RESERVED_BITS) == 0); pt_access = ACC_ALL; @@ -205,7 +207,7 @@ walk: (PTTYPE == 64 || is_pse(vcpu))) || ((walker->level == PT_PDPE_LEVEL) && is_large_pte(pte) && - vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL)) { + mmu->root_level == PT64_ROOT_LEVEL)) { int lvl = walker->level; walker->gfn = gpte_to_gfn_lvl(pte, lvl); @@ -266,6 +268,14 @@ error: return 0; } +static int FNAME(walk_addr)(struct guest_walker *walker, + struct kvm_vcpu *vcpu, gva_t addr, + int write_fault, int user_fault, int fetch_fault) +{ + return FNAME(walk_addr_generic)(walker, vcpu, &vcpu->arch.mmu, addr, + write_fault, user_fault, fetch_fault); +} + static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *spte, const void *pte) { -- cgit v1.1 From 6539e738f65a8f1fc7806295d5d701fba4008343 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 10 Sep 2010 17:30:50 +0200 Subject: KVM: MMU: Implement nested gva_to_gpa functions This patch adds the functions to do a nested l2_gva to l1_gpa page table walk. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'arch/x86/kvm/paging_tmpl.h') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index a704a81..eefe363 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -276,6 +276,16 @@ static int FNAME(walk_addr)(struct guest_walker *walker, write_fault, user_fault, fetch_fault); } +static int FNAME(walk_addr_nested)(struct guest_walker *walker, + struct kvm_vcpu *vcpu, gva_t addr, + int write_fault, int user_fault, + int fetch_fault) +{ + return FNAME(walk_addr_generic)(walker, vcpu, &vcpu->arch.nested_mmu, + addr, write_fault, user_fault, + fetch_fault); +} + static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *spte, const void *pte) { @@ -660,6 +670,27 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, return gpa; } +static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, + u32 access, u32 *error) +{ + struct guest_walker walker; + gpa_t gpa = UNMAPPED_GVA; + int r; + + r = FNAME(walk_addr_nested)(&walker, vcpu, vaddr, + access & PFERR_WRITE_MASK, + access & PFERR_USER_MASK, + access & PFERR_FETCH_MASK); + + if (r) { + gpa = gfn_to_gpa(walker.gfn); + gpa |= vaddr & ~PAGE_MASK; + } else if (error) + *error = walker.error_code; + + return gpa; +} + static void FNAME(prefetch_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) { -- cgit v1.1 From 2329d46d213d0721dafae18db29f54b196f11468 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 10 Sep 2010 17:30:52 +0200 Subject: KVM: MMU: Make walk_addr_generic capable for two-level walking This patch uses kvm_read_guest_page_tdp to make the walk_addr_generic functions suitable for two-level page table walking. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) (limited to 'arch/x86/kvm/paging_tmpl.h') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index eefe363..f4e09d3 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -124,6 +124,8 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, unsigned index, pt_access, uninitialized_var(pte_access); gpa_t pte_gpa; bool eperm, present, rsvd_fault; + int offset; + u32 access = 0; trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault, fetch_fault); @@ -153,12 +155,14 @@ walk: index = PT_INDEX(addr, walker->level); table_gfn = gpte_to_gfn(pte); - pte_gpa = gfn_to_gpa(table_gfn); - pte_gpa += index * sizeof(pt_element_t); + offset = index * sizeof(pt_element_t); + pte_gpa = gfn_to_gpa(table_gfn) + offset; walker->table_gfn[walker->level - 1] = table_gfn; walker->pte_gpa[walker->level - 1] = pte_gpa; - if (kvm_read_guest(vcpu->kvm, pte_gpa, &pte, sizeof(pte))) { + if (kvm_read_guest_page_mmu(vcpu, mmu, table_gfn, &pte, + offset, sizeof(pte), + PFERR_USER_MASK|PFERR_WRITE_MASK)) { present = false; break; } @@ -209,15 +213,27 @@ walk: is_large_pte(pte) && mmu->root_level == PT64_ROOT_LEVEL)) { int lvl = walker->level; + gpa_t real_gpa; + gfn_t gfn; - walker->gfn = gpte_to_gfn_lvl(pte, lvl); - walker->gfn += (addr & PT_LVL_OFFSET_MASK(lvl)) - >> PAGE_SHIFT; + gfn = gpte_to_gfn_lvl(pte, lvl); + gfn += (addr & PT_LVL_OFFSET_MASK(lvl)) >> PAGE_SHIFT; if (PTTYPE == 32 && walker->level == PT_DIRECTORY_LEVEL && is_cpuid_PSE36()) - walker->gfn += pse36_gfn_delta(pte); + gfn += pse36_gfn_delta(pte); + + access |= write_fault ? PFERR_WRITE_MASK : 0; + access |= fetch_fault ? PFERR_FETCH_MASK : 0; + access |= user_fault ? PFERR_USER_MASK : 0; + + real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), + access); + if (real_gpa == UNMAPPED_GVA) + return 0; + + walker->gfn = real_gpa >> PAGE_SHIFT; break; } -- cgit v1.1 From d41d1895eb856b5d1c82f3be106b7a3e75e4216b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 10 Sep 2010 17:30:58 +0200 Subject: KVM: MMU: Introduce kvm_pdptr_read_mmu This function is implemented to load the pdptr pointers of the currently running guest (l1 or l2 guest). Therefore it takes care about the current paging mode and can read pdptrs out of l2 guest physical memory. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kvm/paging_tmpl.h') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index f4e09d3..a28f09b 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -137,7 +137,7 @@ walk: #if PTTYPE == 64 if (walker->level == PT32E_ROOT_LEVEL) { - pte = kvm_pdptr_read(vcpu, (addr >> 30) & 3); + pte = kvm_pdptr_read_mmu(vcpu, mmu, (addr >> 30) & 3); trace_kvm_mmu_paging_element(pte, walker->level); if (!is_present_gpte(pte)) { present = false; -- cgit v1.1 From 2d48a985c7bbcd72b4e92e301ea96bf1252ffc61 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 10 Sep 2010 17:31:01 +0200 Subject: KVM: MMU: Track NX state in struct kvm_mmu With Nested Paging emulation the NX state between the two MMU contexts may differ. To make sure that always the right fault error code is recorded this patch moves the NX state into struct kvm_mmu so that the code can distinguish between L1 and L2 NX state. Signed-off-by: Joerg Roedel Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86/kvm/paging_tmpl.h') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index a28f09b..2bdd843 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -105,7 +105,7 @@ static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte) access = (gpte & (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK; #if PTTYPE == 64 - if (is_nx(vcpu)) + if (vcpu->arch.mmu.nx) access &= ~(gpte >> PT64_NX_SHIFT); #endif return access; @@ -272,7 +272,7 @@ error: walker->error_code |= PFERR_WRITE_MASK; if (user_fault) walker->error_code |= PFERR_USER_MASK; - if (fetch_fault && is_nx(vcpu)) + if (fetch_fault && mmu->nx) walker->error_code |= PFERR_FETCH_MASK; if (rsvd_fault) walker->error_code |= PFERR_RSVD_MASK; -- cgit v1.1 From 20bd40dc6492da293993559555df07d467fd202e Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Mon, 27 Sep 2010 18:03:27 +0800 Subject: KVM: MMU: cleanup for error mask set while walk guest page table Small cleanup for set page fault error code Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'arch/x86/kvm/paging_tmpl.h') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 2bdd843..a83ff37 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -224,9 +224,7 @@ walk: is_cpuid_PSE36()) gfn += pse36_gfn_delta(pte); - access |= write_fault ? PFERR_WRITE_MASK : 0; - access |= fetch_fault ? PFERR_FETCH_MASK : 0; - access |= user_fault ? PFERR_USER_MASK : 0; + access |= write_fault | fetch_fault | user_fault; real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access); @@ -268,10 +266,9 @@ error: walker->error_code = 0; if (present) walker->error_code |= PFERR_PRESENT_MASK; - if (write_fault) - walker->error_code |= PFERR_WRITE_MASK; - if (user_fault) - walker->error_code |= PFERR_USER_MASK; + + walker->error_code |= write_fault | user_fault; + if (fetch_fault && mmu->nx) walker->error_code |= PFERR_FETCH_MASK; if (rsvd_fault) @@ -673,9 +670,9 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, int r; r = FNAME(walk_addr)(&walker, vcpu, vaddr, - !!(access & PFERR_WRITE_MASK), - !!(access & PFERR_USER_MASK), - !!(access & PFERR_FETCH_MASK)); + access & PFERR_WRITE_MASK, + access & PFERR_USER_MASK, + access & PFERR_FETCH_MASK); if (r) { gpa = gfn_to_gpa(walker.gfn); -- cgit v1.1 From 3377078027dc54dc2a5acb2efa09587e7ac1cbd9 Mon Sep 17 00:00:00 2001 From: Xiao Guangrong Date: Tue, 28 Sep 2010 17:03:14 +0800 Subject: KVM: MMU: move access code parsing to FNAME(walk_addr) function Move access code parsing from caller site to FNAME(walk_addr) function Signed-off-by: Xiao Guangrong Signed-off-by: Avi Kivity --- arch/x86/kvm/paging_tmpl.h | 40 ++++++++++++++++------------------------ 1 file changed, 16 insertions(+), 24 deletions(-) (limited to 'arch/x86/kvm/paging_tmpl.h') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index a83ff37..9a5f7bb 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -116,16 +116,18 @@ static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte) */ static int FNAME(walk_addr_generic)(struct guest_walker *walker, struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, - gva_t addr, int write_fault, - int user_fault, int fetch_fault) + gva_t addr, u32 access) { pt_element_t pte; gfn_t table_gfn; unsigned index, pt_access, uninitialized_var(pte_access); gpa_t pte_gpa; bool eperm, present, rsvd_fault; - int offset; - u32 access = 0; + int offset, write_fault, user_fault, fetch_fault; + + write_fault = access & PFERR_WRITE_MASK; + user_fault = access & PFERR_USER_MASK; + fetch_fault = access & PFERR_FETCH_MASK; trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault, fetch_fault); @@ -215,6 +217,7 @@ walk: int lvl = walker->level; gpa_t real_gpa; gfn_t gfn; + u32 ac; gfn = gpte_to_gfn_lvl(pte, lvl); gfn += (addr & PT_LVL_OFFSET_MASK(lvl)) >> PAGE_SHIFT; @@ -224,10 +227,10 @@ walk: is_cpuid_PSE36()) gfn += pse36_gfn_delta(pte); - access |= write_fault | fetch_fault | user_fault; + ac = write_fault | fetch_fault | user_fault; real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), - access); + ac); if (real_gpa == UNMAPPED_GVA) return 0; @@ -282,21 +285,18 @@ error: } static int FNAME(walk_addr)(struct guest_walker *walker, - struct kvm_vcpu *vcpu, gva_t addr, - int write_fault, int user_fault, int fetch_fault) + struct kvm_vcpu *vcpu, gva_t addr, u32 access) { return FNAME(walk_addr_generic)(walker, vcpu, &vcpu->arch.mmu, addr, - write_fault, user_fault, fetch_fault); + access); } static int FNAME(walk_addr_nested)(struct guest_walker *walker, struct kvm_vcpu *vcpu, gva_t addr, - int write_fault, int user_fault, - int fetch_fault) + u32 access) { return FNAME(walk_addr_generic)(walker, vcpu, &vcpu->arch.nested_mmu, - addr, write_fault, user_fault, - fetch_fault); + addr, access); } static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, @@ -532,7 +532,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, { int write_fault = error_code & PFERR_WRITE_MASK; int user_fault = error_code & PFERR_USER_MASK; - int fetch_fault = error_code & PFERR_FETCH_MASK; struct guest_walker walker; u64 *sptep; int write_pt = 0; @@ -550,8 +549,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, /* * Look up the guest pte for the faulting address. */ - r = FNAME(walk_addr)(&walker, vcpu, addr, write_fault, user_fault, - fetch_fault); + r = FNAME(walk_addr)(&walker, vcpu, addr, error_code); /* * The page is not mapped by the guest. Let the guest handle it. @@ -669,10 +667,7 @@ static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access, gpa_t gpa = UNMAPPED_GVA; int r; - r = FNAME(walk_addr)(&walker, vcpu, vaddr, - access & PFERR_WRITE_MASK, - access & PFERR_USER_MASK, - access & PFERR_FETCH_MASK); + r = FNAME(walk_addr)(&walker, vcpu, vaddr, access); if (r) { gpa = gfn_to_gpa(walker.gfn); @@ -690,10 +685,7 @@ static gpa_t FNAME(gva_to_gpa_nested)(struct kvm_vcpu *vcpu, gva_t vaddr, gpa_t gpa = UNMAPPED_GVA; int r; - r = FNAME(walk_addr_nested)(&walker, vcpu, vaddr, - access & PFERR_WRITE_MASK, - access & PFERR_USER_MASK, - access & PFERR_FETCH_MASK); + r = FNAME(walk_addr_nested)(&walker, vcpu, vaddr, access); if (r) { gpa = gfn_to_gpa(walker.gfn); -- cgit v1.1 From 9611c187774f0e20c258c23ced2599c44bd2fef4 Mon Sep 17 00:00:00 2001 From: Nicolas Kaiser Date: Wed, 6 Oct 2010 14:23:22 +0200 Subject: KVM: fix typo in copyright notice Fix typo in copyright notice. Signed-off-by: Nicolas Kaiser Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/paging_tmpl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/kvm/paging_tmpl.h') diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 9a5f7bb..cd7a833 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -7,7 +7,7 @@ * MMU support * * Copyright (C) 2006 Qumranet, Inc. - * Copyright 2010 Red Hat, Inc. and/or its affilates. + * Copyright 2010 Red Hat, Inc. and/or its affiliates. * * Authors: * Yaniv Kamay -- cgit v1.1