diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-02-22 18:22:53 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-02-22 18:22:53 -0800 |
commit | fd7e9a88348472521d999434ee02f25735c7dadf (patch) | |
tree | 90e6249e58d90ba9d590cfed4481c29ca36a05dc /arch/mips | |
parent | 5066e4a34081dd82fb625f2f382bfa29ca421a3f (diff) | |
parent | dd0fd8bca1850ddadf5d33a9ed28f3707cd98ac7 (diff) | |
download | op-kernel-dev-fd7e9a88348472521d999434ee02f25735c7dadf.zip op-kernel-dev-fd7e9a88348472521d999434ee02f25735c7dadf.tar.gz |
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Paolo Bonzini:
"4.11 is going to be a relatively large release for KVM, with a little
over 200 commits and noteworthy changes for most architectures.
ARM:
- GICv3 save/restore
- cache flushing fixes
- working MSI injection for GICv3 ITS
- physical timer emulation
MIPS:
- various improvements under the hood
- support for SMP guests
- a large rewrite of MMU emulation. KVM MIPS can now use MMU
notifiers to support copy-on-write, KSM, idle page tracking,
swapping, ballooning and everything else. KVM_CAP_READONLY_MEM is
also supported, so that writes to some memory regions can be
treated as MMIO. The new MMU also paves the way for hardware
virtualization support.
PPC:
- support for POWER9 using the radix-tree MMU for host and guest
- resizable hashed page table
- bugfixes.
s390:
- expose more features to the guest
- more SIMD extensions
- instruction execution protection
- ESOP2
x86:
- improved hashing in the MMU
- faster PageLRU tracking for Intel CPUs without EPT A/D bits
- some refactoring of nested VMX entry/exit code, preparing for live
migration support of nested hypervisors
- expose yet another AVX512 CPUID bit
- host-to-guest PTP support
- refactoring of interrupt injection, with some optimizations thrown
in and some duct tape removed.
- remove lazy FPU handling
- optimizations of user-mode exits
- optimizations of vcpu_is_preempted() for KVM guests
generic:
- alternative signaling mechanism that doesn't pound on
tsk->sighand->siglock"
* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (195 commits)
x86/kvm: Provide optimized version of vcpu_is_preempted() for x86-64
x86/paravirt: Change vcp_is_preempted() arg type to long
KVM: VMX: use correct vmcs_read/write for guest segment selector/base
x86/kvm/vmx: Defer TR reload after VM exit
x86/asm/64: Drop __cacheline_aligned from struct x86_hw_tss
x86/kvm/vmx: Simplify segment_base()
x86/kvm/vmx: Get rid of segment_base() on 64-bit kernels
x86/kvm/vmx: Don't fetch the TSS base from the GDT
x86/asm: Define the kernel TSS limit in a macro
kvm: fix page struct leak in handle_vmon
KVM: PPC: Book3S HV: Disable HPT resizing on POWER9 for now
KVM: Return an error code only as a constant in kvm_get_dirty_log()
KVM: Return an error code only as a constant in kvm_get_dirty_log_protect()
KVM: Return directly after a failed copy_from_user() in kvm_vm_compat_ioctl()
KVM: x86: remove code for lazy FPU handling
KVM: race-free exit from KVM_RUN without POSIX signals
KVM: PPC: Book3S HV: Turn "KVM guest htab" message into a debug message
KVM: PPC: Book3S PR: Ratelimit copy data failure error messages
KVM: Support vCPU-based gfn->hva cache
KVM: use separate generations for each address space
...
Diffstat (limited to 'arch/mips')
-rw-r--r-- | arch/mips/include/asm/kvm_host.h | 183 | ||||
-rw-r--r-- | arch/mips/include/asm/mmu_context.h | 9 | ||||
-rw-r--r-- | arch/mips/include/uapi/asm/kvm.h | 2 | ||||
-rw-r--r-- | arch/mips/kvm/Kconfig | 2 | ||||
-rw-r--r-- | arch/mips/kvm/dyntrans.c | 52 | ||||
-rw-r--r-- | arch/mips/kvm/emulate.c | 432 | ||||
-rw-r--r-- | arch/mips/kvm/entry.c | 155 | ||||
-rw-r--r-- | arch/mips/kvm/interrupt.c | 5 | ||||
-rw-r--r-- | arch/mips/kvm/mips.c | 503 | ||||
-rw-r--r-- | arch/mips/kvm/mmu.c | 1329 | ||||
-rw-r--r-- | arch/mips/kvm/tlb.c | 291 | ||||
-rw-r--r-- | arch/mips/kvm/trap_emul.c | 734 |
12 files changed, 2482 insertions, 1215 deletions
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index bebec37..05e785f 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -43,6 +43,7 @@ #define KVM_REG_MIPS_CP0_ENTRYHI MIPS_CP0_64(10, 0) #define KVM_REG_MIPS_CP0_COMPARE MIPS_CP0_32(11, 0) #define KVM_REG_MIPS_CP0_STATUS MIPS_CP0_32(12, 0) +#define KVM_REG_MIPS_CP0_INTCTL MIPS_CP0_32(12, 1) #define KVM_REG_MIPS_CP0_CAUSE MIPS_CP0_32(13, 0) #define KVM_REG_MIPS_CP0_EPC MIPS_CP0_64(14, 0) #define KVM_REG_MIPS_CP0_PRID MIPS_CP0_32(15, 0) @@ -64,7 +65,7 @@ #define KVM_REG_MIPS_CP0_KSCRATCH6 MIPS_CP0_64(31, 7) -#define KVM_MAX_VCPUS 1 +#define KVM_MAX_VCPUS 8 #define KVM_USER_MEM_SLOTS 8 /* memory slots that does not exposed to userspace */ #define KVM_PRIVATE_MEM_SLOTS 0 @@ -88,6 +89,7 @@ #define KVM_GUEST_KUSEG 0x00000000UL #define KVM_GUEST_KSEG0 0x40000000UL +#define KVM_GUEST_KSEG1 0x40000000UL #define KVM_GUEST_KSEG23 0x60000000UL #define KVM_GUEST_KSEGX(a) ((_ACAST32_(a)) & 0xe0000000) #define KVM_GUEST_CPHYSADDR(a) ((_ACAST32_(a)) & 0x1fffffff) @@ -104,7 +106,6 @@ #define KVM_GUEST_KSEG23ADDR(a) (KVM_GUEST_CPHYSADDR(a) | KVM_GUEST_KSEG23) #define KVM_INVALID_PAGE 0xdeadbeef -#define KVM_INVALID_INST 0xdeadbeef #define KVM_INVALID_ADDR 0xdeadbeef /* @@ -121,8 +122,6 @@ static inline bool kvm_is_error_hva(unsigned long addr) return IS_ERR_VALUE(addr); } -extern atomic_t kvm_mips_instance; - struct kvm_vm_stat { ulong remote_tlb_flush; }; @@ -156,12 +155,8 @@ struct kvm_arch_memory_slot { }; struct kvm_arch { - /* Guest GVA->HPA page table */ - unsigned long *guest_pmap; - unsigned long guest_pmap_npages; - - /* Wired host TLB used for the commpage */ - int commpage_tlb; + /* Guest physical mm */ + struct mm_struct gpa_mm; }; #define N_MIPS_COPROC_REGS 32 @@ -233,6 +228,7 @@ enum emulation_result { EMULATE_FAIL, /* can't emulate this instruction */ EMULATE_WAIT, /* WAIT instruction */ EMULATE_PRIV_FAIL, + EMULATE_EXCEPT, /* A guest exception has been generated */ }; #define mips3_paddr_to_tlbpfn(x) \ @@ -250,6 +246,7 @@ enum emulation_result { #define TLB_ASID(x) ((x).tlb_hi & KVM_ENTRYHI_ASID) #define TLB_LO_IDX(x, va) (((va) >> PAGE_SHIFT) & 1) #define TLB_IS_VALID(x, va) ((x).tlb_lo[TLB_LO_IDX(x, va)] & ENTRYLO_V) +#define TLB_IS_DIRTY(x, va) ((x).tlb_lo[TLB_LO_IDX(x, va)] & ENTRYLO_D) #define TLB_HI_VPN2_HIT(x, y) ((TLB_VPN2(x) & ~(x).tlb_mask) == \ ((y) & VPN2_MASK & ~(x).tlb_mask)) #define TLB_HI_ASID_HIT(x, y) (TLB_IS_GLOBAL(x) || \ @@ -261,6 +258,17 @@ struct kvm_mips_tlb { long tlb_lo[2]; }; +#define KVM_NR_MEM_OBJS 4 + +/* + * We don't want allocation failures within the mmu code, so we preallocate + * enough memory for a single page fault in a cache. + */ +struct kvm_mmu_memory_cache { + int nobjs; + void *objects[KVM_NR_MEM_OBJS]; +}; + #define KVM_MIPS_AUX_FPU 0x1 #define KVM_MIPS_AUX_MSA 0x2 @@ -275,6 +283,8 @@ struct kvm_vcpu_arch { unsigned long host_cp0_badvaddr; unsigned long host_cp0_epc; u32 host_cp0_cause; + u32 host_cp0_badinstr; + u32 host_cp0_badinstrp; /* GPRS */ unsigned long gprs[32]; @@ -318,20 +328,18 @@ struct kvm_vcpu_arch { /* Bitmask of pending exceptions to be cleared */ unsigned long pending_exceptions_clr; - /* Save/Restore the entryhi register when are are preempted/scheduled back in */ - unsigned long preempt_entryhi; - /* S/W Based TLB for guest */ struct kvm_mips_tlb guest_tlb[KVM_MIPS_GUEST_TLB_SIZE]; - /* Cached guest kernel/user ASIDs */ - u32 guest_user_asid[NR_CPUS]; - u32 guest_kernel_asid[NR_CPUS]; + /* Guest kernel/user [partial] mm */ struct mm_struct guest_kernel_mm, guest_user_mm; /* Guest ASID of last user mode execution */ unsigned int last_user_gasid; + /* Cache some mmu pages needed inside spinlock regions */ + struct kvm_mmu_memory_cache mmu_page_cache; + int last_sched_cpu; /* WAIT executed */ @@ -339,14 +347,15 @@ struct kvm_vcpu_arch { u8 fpu_enabled; u8 msa_enabled; - u8 kscratch_enabled; }; #define kvm_read_c0_guest_index(cop0) (cop0->reg[MIPS_CP0_TLB_INDEX][0]) #define kvm_write_c0_guest_index(cop0, val) (cop0->reg[MIPS_CP0_TLB_INDEX][0] = val) #define kvm_read_c0_guest_entrylo0(cop0) (cop0->reg[MIPS_CP0_TLB_LO0][0]) +#define kvm_write_c0_guest_entrylo0(cop0, val) (cop0->reg[MIPS_CP0_TLB_LO0][0] = (val)) #define kvm_read_c0_guest_entrylo1(cop0) (cop0->reg[MIPS_CP0_TLB_LO1][0]) +#define kvm_write_c0_guest_entrylo1(cop0, val) (cop0->reg[MIPS_CP0_TLB_LO1][0] = (val)) #define kvm_read_c0_guest_context(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0]) #define kvm_write_c0_guest_context(cop0, val) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0] = (val)) #define kvm_read_c0_guest_userlocal(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][2]) @@ -522,9 +531,17 @@ struct kvm_mips_callbacks { int (*handle_msa_fpe)(struct kvm_vcpu *vcpu); int (*handle_fpe)(struct kvm_vcpu *vcpu); int (*handle_msa_disabled)(struct kvm_vcpu *vcpu); - int (*vm_init)(struct kvm *kvm); int (*vcpu_init)(struct kvm_vcpu *vcpu); + void (*vcpu_uninit)(struct kvm_vcpu *vcpu); int (*vcpu_setup)(struct kvm_vcpu *vcpu); + void (*flush_shadow_all)(struct kvm *kvm); + /* + * Must take care of flushing any cached GPA PTEs (e.g. guest entries in + * VZ root TLB, or T&E GVA page tables and corresponding root TLB + * mappings). + */ + void (*flush_shadow_memslot)(struct kvm *kvm, + const struct kvm_memory_slot *slot); gpa_t (*gva_to_gpa)(gva_t gva); void (*queue_timer_int)(struct kvm_vcpu *vcpu); void (*dequeue_timer_int)(struct kvm_vcpu *vcpu); @@ -542,8 +559,10 @@ struct kvm_mips_callbacks { const struct kvm_one_reg *reg, s64 *v); int (*set_one_reg)(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, s64 v); - int (*vcpu_get_regs)(struct kvm_vcpu *vcpu); - int (*vcpu_set_regs)(struct kvm_vcpu *vcpu); + int (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu); + int (*vcpu_put)(struct kvm_vcpu *vcpu, int cpu); + int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu); + void (*vcpu_reenter)(struct kvm_run *run, struct kvm_vcpu *vcpu); }; extern struct kvm_mips_callbacks *kvm_mips_callbacks; int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks); @@ -556,6 +575,7 @@ extern int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu); /* Building of entry/exception code */ int kvm_mips_entry_setup(void); void *kvm_mips_build_vcpu_run(void *addr); +void *kvm_mips_build_tlb_refill_exception(void *addr, void *handler); void *kvm_mips_build_exception(void *addr, void *handler); void *kvm_mips_build_exit(void *addr); @@ -580,54 +600,125 @@ u32 kvm_get_user_asid(struct kvm_vcpu *vcpu); u32 kvm_get_commpage_asid (struct kvm_vcpu *vcpu); extern int kvm_mips_handle_kseg0_tlb_fault(unsigned long badbaddr, - struct kvm_vcpu *vcpu); + struct kvm_vcpu *vcpu, + bool write_fault); extern int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr, struct kvm_vcpu *vcpu); extern int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu, - struct kvm_mips_tlb *tlb); + struct kvm_mips_tlb *tlb, + unsigned long gva, + bool write_fault); extern enum emulation_result kvm_mips_handle_tlbmiss(u32 cause, u32 *opc, struct kvm_run *run, - struct kvm_vcpu *vcpu); - -extern enum emulation_result kvm_mips_handle_tlbmod(u32 cause, - u32 *opc, - struct kvm_run *run, - struct kvm_vcpu *vcpu); + struct kvm_vcpu *vcpu, + bool write_fault); extern void kvm_mips_dump_host_tlbs(void); extern void kvm_mips_dump_guest_tlbs(struct kvm_vcpu *vcpu); -extern int kvm_mips_host_tlb_write(struct kvm_vcpu *vcpu, unsigned long entryhi, - unsigned long entrylo0, - unsigned long entrylo1, - int flush_dcache_mask); -extern void kvm_mips_flush_host_tlb(int skip_kseg0); -extern int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long entryhi); +extern int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long entryhi, + bool user, bool kernel); extern int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long entryhi); -extern int kvm_mips_host_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long vaddr); -extern unsigned long kvm_mips_translate_guest_kseg0_to_hpa(struct kvm_vcpu *vcpu, - unsigned long gva); -extern void kvm_get_new_mmu_context(struct mm_struct *mm, unsigned long cpu, - struct kvm_vcpu *vcpu); -extern void kvm_local_flush_tlb_all(void); -extern void kvm_mips_alloc_new_mmu_context(struct kvm_vcpu *vcpu); -extern void kvm_mips_vcpu_load(struct kvm_vcpu *vcpu, int cpu); -extern void kvm_mips_vcpu_put(struct kvm_vcpu *vcpu); + +void kvm_mips_suspend_mm(int cpu); +void kvm_mips_resume_mm(int cpu); + +/* MMU handling */ + +/** + * enum kvm_mips_flush - Types of MMU flushes. + * @KMF_USER: Flush guest user virtual memory mappings. + * Guest USeg only. + * @KMF_KERN: Flush guest kernel virtual memory mappings. + * Guest USeg and KSeg2/3. + * @KMF_GPA: Flush guest physical memory mappings. + * Also includes KSeg0 if KMF_KERN is set. + */ +enum kvm_mips_flush { + KMF_USER = 0x0, + KMF_KERN = 0x1, + KMF_GPA = 0x2, +}; +void kvm_mips_flush_gva_pt(pgd_t *pgd, enum kvm_mips_flush flags); +bool kvm_mips_flush_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn); +int kvm_mips_mkclean_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn); +pgd_t *kvm_pgd_alloc(void); +void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu); +void kvm_trap_emul_invalidate_gva(struct kvm_vcpu *vcpu, unsigned long addr, + bool user); +void kvm_trap_emul_gva_lockless_begin(struct kvm_vcpu *vcpu); +void kvm_trap_emul_gva_lockless_end(struct kvm_vcpu *vcpu); + +enum kvm_mips_fault_result { + KVM_MIPS_MAPPED = 0, + KVM_MIPS_GVA, + KVM_MIPS_GPA, + KVM_MIPS_TLB, + KVM_MIPS_TLBINV, + KVM_MIPS_TLBMOD, +}; +enum kvm_mips_fault_result kvm_trap_emul_gva_fault(struct kvm_vcpu *vcpu, + unsigned long gva, + bool write); + +#define KVM_ARCH_WANT_MMU_NOTIFIER +int kvm_unmap_hva(struct kvm *kvm, unsigned long hva); +int kvm_unmap_hva_range(struct kvm *kvm, + unsigned long start, unsigned long end); +void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); +int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); + +static inline void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm, + unsigned long address) +{ +} /* Emulation */ -u32 kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu); +int kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu, u32 *out); enum emulation_result update_pc(struct kvm_vcpu *vcpu, u32 cause); +int kvm_get_badinstr(u32 *opc, struct kvm_vcpu *vcpu, u32 *out); +int kvm_get_badinstrp(u32 *opc, struct kvm_vcpu *vcpu, u32 *out); + +/** + * kvm_is_ifetch_fault() - Find whether a TLBL exception is due to ifetch fault. + * @vcpu: Virtual CPU. + * + * Returns: Whether the TLBL exception was likely due to an instruction + * fetch fault rather than a data load fault. + */ +static inline bool kvm_is_ifetch_fault(struct kvm_vcpu_arch *vcpu) +{ + unsigned long badvaddr = vcpu->host_cp0_badvaddr; + unsigned long epc = msk_isa16_mode(vcpu->pc); + u32 cause = vcpu->host_cp0_cause; + + if (epc == badvaddr) + return true; + + /* + * Branches may be 32-bit or 16-bit instructions. + * This isn't exact, but we don't really support MIPS16 or microMIPS yet + * in KVM anyway. + */ + if ((cause & CAUSEF_BD) && badvaddr - epc <= 4) + return true; + + return false; +} extern enum emulation_result kvm_mips_emulate_inst(u32 cause, u32 *opc, struct kvm_run *run, struct kvm_vcpu *vcpu); +long kvm_mips_guest_exception_base(struct kvm_vcpu *vcpu); + extern enum emulation_result kvm_mips_emulate_syscall(u32 cause, u32 *opc, struct kvm_run *run, @@ -761,10 +852,6 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) {} static inline void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, struct kvm_memory_slot *dont) {} static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslots *slots) {} -static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} -static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, - struct kvm_memory_slot *slot) {} -static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} diff --git a/arch/mips/include/asm/mmu_context.h b/arch/mips/include/asm/mmu_context.h index ddd57ad..2abf94f 100644 --- a/arch/mips/include/asm/mmu_context.h +++ b/arch/mips/include/asm/mmu_context.h @@ -29,9 +29,11 @@ do { \ } \ } while (0) +extern void tlbmiss_handler_setup_pgd(unsigned long); + +/* Note: This is also implemented with uasm in arch/mips/kvm/entry.c */ #define TLBMISS_HANDLER_SETUP_PGD(pgd) \ do { \ - extern void tlbmiss_handler_setup_pgd(unsigned long); \ tlbmiss_handler_setup_pgd((unsigned long)(pgd)); \ htw_set_pwbase((unsigned long)pgd); \ } while (0) @@ -97,17 +99,12 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) static inline void get_new_mmu_context(struct mm_struct *mm, unsigned long cpu) { - extern void kvm_local_flush_tlb_all(void); unsigned long asid = asid_cache(cpu); if (!((asid += cpu_asid_inc()) & cpu_asid_mask(&cpu_data[cpu]))) { if (cpu_has_vtag_icache) flush_icache_all(); -#ifdef CONFIG_KVM - kvm_local_flush_tlb_all(); /* start new asid cycle */ -#else local_flush_tlb_all(); /* start new asid cycle */ -#endif if (!asid) /* fix version if needed */ asid = asid_first_version(cpu); } diff --git a/arch/mips/include/uapi/asm/kvm.h b/arch/mips/include/uapi/asm/kvm.h index 6985eb5..a8a0199 100644 --- a/arch/mips/include/uapi/asm/kvm.h +++ b/arch/mips/include/uapi/asm/kvm.h @@ -19,6 +19,8 @@ * Some parts derived from the x86 version of this file. */ +#define __KVM_HAVE_READONLY_MEM + /* * for KVM_GET_REGS and KVM_SET_REGS * diff --git a/arch/mips/kvm/Kconfig b/arch/mips/kvm/Kconfig index 7c56d6b..6506732 100644 --- a/arch/mips/kvm/Kconfig +++ b/arch/mips/kvm/Kconfig @@ -20,7 +20,9 @@ config KVM select EXPORT_UASM select PREEMPT_NOTIFIERS select ANON_INODES + select KVM_GENERIC_DIRTYLOG_READ_PROTECT select KVM_MMIO + select MMU_NOTIFIER select SRCU ---help--- Support for hosting Guest kernels. diff --git a/arch/mips/kvm/dyntrans.c b/arch/mips/kvm/dyntrans.c index 010cef2..f8e7725 100644 --- a/arch/mips/kvm/dyntrans.c +++ b/arch/mips/kvm/dyntrans.c @@ -13,6 +13,7 @@ #include <linux/err.h> #include <linux/highmem.h> #include <linux/kvm_host.h> +#include <linux/uaccess.h> #include <linux/vmalloc.h> #include <linux/fs.h> #include <linux/bootmem.h> @@ -29,28 +30,37 @@ static int kvm_mips_trans_replace(struct kvm_vcpu *vcpu, u32 *opc, union mips_instruction replace) { - unsigned long paddr, flags; - void *vaddr; - - if (KVM_GUEST_KSEGX((unsigned long)opc) == KVM_GUEST_KSEG0) { - paddr = kvm_mips_translate_guest_kseg0_to_hpa(vcpu, - (unsigned long)opc); - vaddr = kmap_atomic(pfn_to_page(PHYS_PFN(paddr))); - vaddr += paddr & ~PAGE_MASK; - memcpy(vaddr, (void *)&replace, sizeof(u32)); - local_flush_icache_range((unsigned long)vaddr, - (unsigned long)vaddr + 32); - kunmap_atomic(vaddr); - } else if (KVM_GUEST_KSEGX((unsigned long) opc) == KVM_GUEST_KSEG23) { - local_irq_save(flags); - memcpy((void *)opc, (void *)&replace, sizeof(u32)); - __local_flush_icache_user_range((unsigned long)opc, - (unsigned long)opc + 32); - local_irq_restore(flags); - } else { - kvm_err("%s: Invalid address: %p\n", __func__, opc); - return -EFAULT; + unsigned long vaddr = (unsigned long)opc; + int err; + +retry: + /* The GVA page table is still active so use the Linux TLB handlers */ + kvm_trap_emul_gva_lockless_begin(vcpu); + err = put_user(replace.word, opc); + kvm_trap_emul_gva_lockless_end(vcpu); + + if (unlikely(err)) { + /* + * We write protect clean pages in GVA page table so normal + * Linux TLB mod handler doesn't silently dirty the page. + * Its also possible we raced with a GVA invalidation. + * Try to force the page to become dirty. + */ + err = kvm_trap_emul_gva_fault(vcpu, vaddr, true); + if (unlikely(err)) { + kvm_info("%s: Address unwriteable: %p\n", + __func__, opc); + return -EFAULT; + } + + /* + * Try again. This will likely trigger a TLB refill, which will + * fetch the new dirty entry from the GVA page table, which + * should then succeed. + */ + goto retry; } + __local_flush_icache_user_range(vaddr, vaddr + 4); return 0; } diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index aa09374..d40cfaa 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -38,23 +38,25 @@ * Compute the return address and do emulate branch simulation, if required. * This function should be called only in branch delay slot active. */ -unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu, - unsigned long instpc) +static int kvm_compute_return_epc(struct kvm_vcpu *vcpu, unsigned long instpc, + unsigned long *out) { unsigned int dspcontrol; union mips_instruction insn; struct kvm_vcpu_arch *arch = &vcpu->arch; long epc = instpc; - long nextpc = KVM_INVALID_INST; + long nextpc; + int err; - if (epc & 3) - goto unaligned; + if (epc & 3) { + kvm_err("%s: unaligned epc\n", __func__); + return -EINVAL; + } /* Read the instruction */ - insn.word = kvm_get_inst((u32 *) epc, vcpu); - - if (insn.word == KVM_INVALID_INST) - return KVM_INVALID_INST; + err = kvm_get_badinstrp((u32 *)epc, vcpu, &insn.word); + if (err) + return err; switch (insn.i_format.opcode) { /* jr and jalr are in r_format format. */ @@ -66,6 +68,8 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu, case jr_op: nextpc = arch->gprs[insn.r_format.rs]; break; + default: + return -EINVAL; } break; @@ -114,8 +118,11 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu, nextpc = epc; break; case bposge32_op: - if (!cpu_has_dsp) - goto sigill; + if (!cpu_has_dsp) { + kvm_err("%s: DSP branch but not DSP ASE\n", + __func__); + return -EINVAL; + } dspcontrol = rddsp(0x01); @@ -125,6 +132,8 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu, epc += 8; nextpc = epc; break; + default: + return -EINVAL; } break; @@ -189,7 +198,7 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu, /* And now the FPA/cp1 branch instructions. */ case cop1_op: kvm_err("%s: unsupported cop1_op\n", __func__); - break; + return -EINVAL; #ifdef CONFIG_CPU_MIPSR6 /* R6 added the following compact branches with forbidden slots */ @@ -198,19 +207,19 @@ unsigned long kvm_compute_return_epc(struct kvm_vcpu *vcpu, /* only rt == 0 isn't compact branch */ if (insn.i_format.rt != 0) goto compact_branch; - break; + return -EINVAL; case pop10_op: case pop30_op: /* only rs == rt == 0 is reserved, rest are compact branches */ if (insn.i_format.rs != 0 || insn.i_format.rt != 0) goto compact_branch; - break; + return -EINVAL; case pop66_op: case pop76_op: /* only rs == 0 isn't compact branch */ if (insn.i_format.rs != 0) goto compact_branch; - break; + return -EINVAL; compact_branch: /* * If we've hit an exception on the forbidden slot, then @@ -221,42 +230,74 @@ compact_branch: break; #else compact_branch: - /* Compact branches not supported before R6 */ - break; + /* Fall through - Compact branches not supported before R6 */ #endif + default: + return -EINVAL; } - return nextpc; - -unaligned: - kvm_err("%s: unaligned epc\n", __func__); - return nextpc; - -sigill: - kvm_err("%s: DSP branch but not DSP ASE\n", __func__); - return nextpc; + *out = nextpc; + return 0; } enum emulation_result update_pc(struct kvm_vcpu *vcpu, u32 cause) { - unsigned long branch_pc; - enum emulation_result er = EMULATE_DONE; + int err; if (cause & CAUSEF_BD) { - branch_pc = kvm_compute_return_epc(vcpu, vcpu->arch.pc); - if (branch_pc == KVM_INVALID_INST) { - er = EMULATE_FAIL; - } else { - vcpu->arch.pc = branch_pc; - kvm_debug("BD update_pc(): New PC: %#lx\n", - vcpu->arch.pc); - } - } else + err = kvm_compute_return_epc(vcpu, vcpu->arch.pc, + &vcpu->arch.pc); + if (err) + return EMULATE_FAIL; + } else { vcpu->arch.pc += 4; + } kvm_debug("update_pc(): New PC: %#lx\n", vcpu->arch.pc); - return er; + return EMULATE_DONE; +} + +/** + * kvm_get_badinstr() - Get bad instruction encoding. + * @opc: Guest pointer to faulting instruction. + * @vcpu: KVM VCPU information. + * + * Gets the instruction encoding of the faulting instruction, using the saved + * BadInstr register value if it exists, otherwise falling back to reading guest + * memory at @opc. + * + * Returns: The instruction encoding of the faulting instruction. + */ +int kvm_get_badinstr(u32 *opc, struct kvm_vcpu *vcpu, u32 *out) +{ + if (cpu_has_badinstr) { + *out = vcpu->arch.host_cp0_badinstr; + return 0; + } else { + return kvm_get_inst(opc, vcpu, out); + } +} + +/** + * kvm_get_badinstrp() - Get bad prior instruction encoding. + * @opc: Guest pointer to prior faulting instruction. + * @vcpu: KVM VCPU information. + * + * Gets the instruction encoding of the prior faulting instruction (the branch + * containing the delay slot which faulted), using the saved BadInstrP register + * value if it exists, otherwise falling back to reading guest memory at @opc. + * + * Returns: The instruction encoding of the prior faulting instruction. + */ +int kvm_get_badinstrp(u32 *opc, struct kvm_vcpu *vcpu, u32 *out) +{ + if (cpu_has_badinstrp) { + *out = vcpu->arch.host_cp0_badinstrp; + return 0; + } else { + return kvm_get_inst(opc, vcpu, out); + } } /** @@ -856,22 +897,30 @@ enum emulation_result kvm_mips_emul_tlbr(struct kvm_vcpu *vcpu) static void kvm_mips_invalidate_guest_tlb(struct kvm_vcpu *vcpu, struct kvm_mips_tlb *tlb) { + struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm; + struct mm_struct *user_mm = &vcpu->arch.guest_user_mm; int cpu, i; bool user; /* No need to flush for entries which are already invalid */ if (!((tlb->tlb_lo[0] | tlb->tlb_lo[1]) & ENTRYLO_V)) return; + /* Don't touch host kernel page tables or TLB mappings */ + if ((unsigned long)tlb->tlb_hi > 0x7fffffff) + return; /* User address space doesn't need flushing for KSeg2/3 changes */ user = tlb->tlb_hi < KVM_GUEST_KSEG0; preempt_disable(); + /* Invalidate page table entries */ + kvm_trap_emul_invalidate_gva(vcpu, tlb->tlb_hi & VPN2_MASK, user); + /* * Probe the shadow host TLB for the entry being overwritten, if one * matches, invalidate it */ - kvm_mips_host_tlb_inv(vcpu, tlb->tlb_hi); + kvm_mips_host_tlb_inv(vcpu, tlb->tlb_hi, user, true); /* Invalidate the whole ASID on other CPUs */ cpu = smp_processor_id(); @@ -879,8 +928,8 @@ static void kvm_mips_invalidate_guest_tlb(struct kvm_vcpu *vcpu, if (i == cpu) continue; if (user) - vcpu->arch.guest_user_asid[i] = 0; - vcpu->arch.guest_kernel_asid[i] = 0; + cpu_context(i, user_mm) = 0; + cpu_context(i, kern_mm) = 0; } preempt_enable(); @@ -1017,7 +1066,7 @@ unsigned int kvm_mips_config4_wrmask(struct kvm_vcpu *vcpu) unsigned int mask = MIPS_CONF_M; /* KScrExist */ - mask |= (unsigned int)vcpu->arch.kscratch_enabled << 16; + mask |= 0xfc << MIPS_CONF4_KSCREXIST_SHIFT; return mask; } @@ -1056,6 +1105,7 @@ enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst, struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; + struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm; enum emulation_result er = EMULATE_DONE; u32 rt, rd, sel; unsigned long curr_pc; @@ -1150,14 +1200,13 @@ enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst, er = EMULATE_FAIL; break; } -#define C0_EBASE_CORE_MASK 0xff if ((rd == MIPS_CP0_PRID) && (sel == 1)) { - /* Preserve CORE number */ - kvm_change_c0_guest_ebase(cop0, - ~(C0_EBASE_CORE_MASK), + /* + * Preserve core number, and keep the exception + * base in guest KSeg0. + */ + kvm_change_c0_guest_ebase(cop0, 0x1ffff000, vcpu->arch.gprs[rt]); - kvm_err("MTCz, cop0->reg[EBASE]: %#lx\n", - kvm_read_c0_guest_ebase(cop0)); } else if (rd == MIPS_CP0_TLB_HI && sel == 0) { u32 nasid = vcpu->arch.gprs[rt] & KVM_ENTRYHI_ASID; @@ -1169,6 +1218,17 @@ enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst, nasid); /* + * Flush entries from the GVA page + * tables. + * Guest user page table will get + * flushed lazily on re-entry to guest + * user if the guest ASID actually + * changes. + */ + kvm_mips_flush_gva_pt(kern_mm->pgd, + KMF_KERN); + + /* * Regenerate/invalidate kernel MMU * context. * The user MMU context will be @@ -1178,13 +1238,10 @@ enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst, */ preempt_disable(); cpu = smp_processor_id(); - kvm_get_new_mmu_context(&vcpu->arch.guest_kernel_mm, - cpu, vcpu); - vcpu->arch.guest_kernel_asid[cpu] = - vcpu->arch.guest_kernel_mm.context.asid[cpu]; + get_new_mmu_context(kern_mm, cpu); for_each_possible_cpu(i) if (i != cpu) - vcpu->arch.guest_kernel_asid[i] = 0; + cpu_context(i, kern_mm) = 0; preempt_enable(); } kvm_write_c0_guest_entryhi(cop0, @@ -1639,12 +1696,56 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst, return er; } +static enum emulation_result kvm_mips_guest_cache_op(int (*fn)(unsigned long), + unsigned long curr_pc, + unsigned long addr, + struct kvm_run *run, + struct kvm_vcpu *vcpu, + u32 cause) +{ + int err; + + for (;;) { + /* Carefully attempt the cache operation */ + kvm_trap_emul_gva_lockless_begin(vcpu); + err = fn(addr); + kvm_trap_emul_gva_lockless_end(vcpu); + + if (likely(!err)) + return EMULATE_DONE; + + /* + * Try to handle the fault and retry, maybe we just raced with a + * GVA invalidation. + */ + switch (kvm_trap_emul_gva_fault(vcpu, addr, false)) { + case KVM_MIPS_GVA: + case KVM_MIPS_GPA: + /* bad virtual or physical address */ + return EMULATE_FAIL; + case KVM_MIPS_TLB: + /* no matching guest TLB */ + vcpu->arch.host_cp0_badvaddr = addr; + vcpu->arch.pc = curr_pc; + kvm_mips_emulate_tlbmiss_ld(cause, NULL, run, vcpu); + return EMULATE_EXCEPT; + case KVM_MIPS_TLBINV: + /* invalid matching guest TLB */ + vcpu->arch.host_cp0_badvaddr = addr; + vcpu->arch.pc = curr_pc; + kvm_mips_emulate_tlbinv_ld(cause, NULL, run, vcpu); + return EMULATE_EXCEPT; + default: + break; + }; + } +} + enum emulation_result kvm_mips_emulate_cache(union mips_instruction inst, u32 *opc, u32 cause, struct kvm_run *run, struct kvm_vcpu *vcpu) { - struct mips_coproc *cop0 = vcpu->arch.cop0; enum emulation_result er = EMULATE_DONE; u32 cache, op_inst, op, base; s16 offset; @@ -1701,80 +1802,16 @@ enum emulation_result kvm_mips_emulate_cache(union mips_instruction inst, goto done; } - preempt_disable(); - if (KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG0) { - if (kvm_mips_host_tlb_lookup(vcpu, va) < 0 && - kvm_mips_handle_kseg0_tlb_fault(va, vcpu)) { - kvm_err("%s: handling mapped kseg0 tlb fault for %lx, vcpu: %p, ASID: %#lx\n", - __func__, va, vcpu, read_c0_entryhi()); - er = EMULATE_FAIL; - preempt_enable(); - goto done; - } - } else if ((KVM_GUEST_KSEGX(va) < KVM_GUEST_KSEG0) || - KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG23) { - int index; - - /* If an entry already exists then skip */ - if (kvm_mips_host_tlb_lookup(vcpu, va) >= 0) - goto skip_fault; - - /* - * If address not in the guest TLB, then give the guest a fault, - * the resulting handler will do the right thing - */ - index = kvm_mips_guest_tlb_lookup(vcpu, (va & VPN2_MASK) | - (kvm_read_c0_guest_entryhi - (cop0) & KVM_ENTRYHI_ASID)); - - if (index < 0) { - vcpu->arch.host_cp0_badvaddr = va; - vcpu->arch.pc = curr_pc; - er = kvm_mips_emulate_tlbmiss_ld(cause, NULL, run, - vcpu); - preempt_enable(); - goto dont_update_pc; - } else { - struct kvm_mips_tlb *tlb = &vcpu->arch.guest_tlb[index]; - /* - * Check if the entry is valid, if not then setup a TLB - * invalid exception to the guest - */ - if (!TLB_IS_VALID(*tlb, va)) { - vcpu->arch.host_cp0_badvaddr = va; - vcpu->arch.pc = curr_pc; - er = kvm_mips_emulate_tlbinv_ld(cause, NULL, - run, vcpu); - preempt_enable(); - goto dont_update_pc; - } - /* - * We fault an entry from the guest tlb to the - * shadow host TLB - */ - if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb)) { - kvm_err("%s: handling mapped seg tlb fault for %lx, index: %u, vcpu: %p, ASID: %#lx\n", - __func__, va, index, vcpu, - read_c0_entryhi()); - er = EMULATE_FAIL; - preempt_enable(); - goto done; - } - } - } else { - kvm_err("INVALID CACHE INDEX/ADDRESS (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n", - cache, op, base, arch->gprs[base], offset); - er = EMULATE_FAIL; - preempt_enable(); - goto done; - - } - -skip_fault: /* XXXKYMA: Only a subset of cache ops are supported, used by Linux */ if (op_inst == Hit_Writeback_Inv_D || op_inst == Hit_Invalidate_D) { - flush_dcache_line(va); - + /* + * Perform the dcache part of icache synchronisation on the + * guest's behalf. + */ + er = kvm_mips_guest_cache_op(protected_writeback_dcache_line, + curr_pc, va, run, vcpu, cause); + if (er != EMULATE_DONE) + goto done; #ifdef CONFIG_KVM_MIPS_DYN_TRANS /* * Replace the CACHE instruction, with a SYNCI, not the same, @@ -1783,8 +1820,15 @@ skip_fault: kvm_mips_trans_cache_va(inst, opc, vcpu); #endif } else if (op_inst == Hit_Invalidate_I) { - flush_dcache_line(va); - flush_icache_line(va); + /* Perform the icache synchronisation on the guest's behalf */ + er = kvm_mips_guest_cache_op(protected_writeback_dcache_line, + curr_pc, va, run, vcpu, cause); + if (er != EMULATE_DONE) + goto done; + er = kvm_mips_guest_cache_op(protected_flush_icache_line, + curr_pc, va, run, vcpu, cause); + if (er != EMULATE_DONE) + goto done; #ifdef CONFIG_KVM_MIPS_DYN_TRANS /* Replace the CACHE instruction, with a SYNCI */ @@ -1796,17 +1840,13 @@ skip_fault: er = EMULATE_FAIL; } - preempt_enable(); done: /* Rollback PC only if emulation was unsuccessful */ if (er == EMULATE_FAIL) vcpu->arch.pc = curr_pc; - -dont_update_pc: - /* - * This is for exceptions whose emulation updates the PC, so do not - * overwrite the PC under any circumstances - */ + /* Guest exception needs guest to resume */ + if (er == EMULATE_EXCEPT) + er = EMULATE_DONE; return er; } @@ -1817,12 +1857,14 @@ enum emulation_result kvm_mips_emulate_inst(u32 cause, u32 *opc, { union mips_instruction inst; enum emulation_result er = EMULATE_DONE; + int err; /* Fetch the instruction. */ if (cause & CAUSEF_BD) opc += 1; - - inst.word = kvm_get_inst(opc, vcpu); + err = kvm_get_badinstr(opc, vcpu, &inst.word); + if (err) + return EMULATE_FAIL; switch (inst.r_format.opcode) { case cop0_op: @@ -1874,6 +1916,22 @@ unknown: return er; } +/** + * kvm_mips_guest_exception_base() - Find guest exception vector base address. + * + * Returns: The base address of the current guest exception vector, taking + * both Guest.CP0_Status.BEV and Guest.CP0_EBase into account. + */ +long kvm_mips_guest_exception_base(struct kvm_vcpu *vcpu) +{ + struct mips_coproc *cop0 = vcpu->arch.cop0; + + if (kvm_read_c0_guest_status(cop0) & ST0_BEV) + return KVM_GUEST_CKSEG1ADDR(0x1fc00200); + else + return kvm_read_c0_guest_ebase(cop0) & MIPS_EBASE_BASE; +} + enum emulation_result kvm_mips_emulate_syscall(u32 cause, u32 *opc, struct kvm_run *run, @@ -1899,7 +1957,7 @@ enum emulation_result kvm_mips_emulate_syscall(u32 cause, (EXCCODE_SYS << CAUSEB_EXCCODE)); /* Set PC to the exception entry point */ - arch->pc = KVM_GUEST_KSEG0 + 0x180; + arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; } else { kvm_err("Trying to deliver SYSCALL when EXL is already set\n"); @@ -1933,13 +1991,13 @@ enum emulation_result kvm_mips_emulate_tlbmiss_ld(u32 cause, arch->pc); /* set pc to the exception entry point */ - arch->pc = KVM_GUEST_KSEG0 + 0x0; + arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x0; } else { kvm_debug("[EXL == 1] delivering TLB MISS @ pc %#lx\n", arch->pc); - arch->pc = KVM_GUEST_KSEG0 + 0x180; + arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; } kvm_change_c0_guest_cause(cop0, (0xff), @@ -1949,8 +2007,6 @@ enum emulation_result kvm_mips_emulate_tlbmiss_ld(u32 cause, kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr); /* XXXKYMA: is the context register used by linux??? */ kvm_write_c0_guest_entryhi(cop0, entryhi); - /* Blow away the shadow host TLBs */ - kvm_mips_flush_host_tlb(1); return EMULATE_DONE; } @@ -1978,16 +2034,14 @@ enum emulation_result kvm_mips_emulate_tlbinv_ld(u32 cause, kvm_debug("[EXL == 0] delivering TLB INV @ pc %#lx\n", arch->pc); - - /* set pc to the exception entry point */ - arch->pc = KVM_GUEST_KSEG0 + 0x180; - } else { kvm_debug("[EXL == 1] delivering TLB MISS @ pc %#lx\n", arch->pc); - arch->pc = KVM_GUEST_KSEG0 + 0x180; } + /* set pc to the exception entry point */ + arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; + kvm_change_c0_guest_cause(cop0, (0xff), (EXCCODE_TLBL << CAUSEB_EXCCODE)); @@ -1995,8 +2049,6 @@ enum emulation_result kvm_mips_emulate_tlbinv_ld(u32 cause, kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr); /* XXXKYMA: is the context register used by linux??? */ kvm_write_c0_guest_entryhi(cop0, entryhi); - /* Blow away the shadow host TLBs */ - kvm_mips_flush_host_tlb(1); return EMULATE_DONE; } @@ -2025,11 +2077,11 @@ enum emulation_result kvm_mips_emulate_tlbmiss_st(u32 cause, arch->pc); /* Set PC to the exception entry point */ - arch->pc = KVM_GUEST_KSEG0 + 0x0; + arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x0; } else { kvm_debug("[EXL == 1] Delivering TLB MISS @ pc %#lx\n", arch->pc); - arch->pc = KVM_GUEST_KSEG0 + 0x180; + arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; } kvm_change_c0_guest_cause(cop0, (0xff), @@ -2039,8 +2091,6 @@ enum emulation_result kvm_mips_emulate_tlbmiss_st(u32 cause, kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr); /* XXXKYMA: is the context register used by linux??? */ kvm_write_c0_guest_entryhi(cop0, entryhi); - /* Blow away the shadow host TLBs */ - kvm_mips_flush_host_tlb(1); return EMULATE_DONE; } @@ -2067,15 +2117,14 @@ enum emulation_result kvm_mips_emulate_tlbinv_st(u32 cause, kvm_debug("[EXL == 0] Delivering TLB MISS @ pc %#lx\n", arch->pc); - - /* Set PC to the exception entry point */ - arch->pc = KVM_GUEST_KSEG0 + 0x180; } else { kvm_debug("[EXL == 1] Delivering TLB MISS @ pc %#lx\n", arch->pc); - arch->pc = KVM_GUEST_KSEG0 + 0x180; } + /* Set PC to the exception entry point */ + arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; + kvm_change_c0_guest_cause(cop0, (0xff), (EXCCODE_TLBS << CAUSEB_EXCCODE)); @@ -2083,41 +2132,10 @@ enum emulation_result kvm_mips_emulate_tlbinv_st(u32 cause, kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr); /* XXXKYMA: is the context register used by linux??? */ kvm_write_c0_guest_entryhi(cop0, entryhi); - /* Blow away the shadow host TLBs */ - kvm_mips_flush_host_tlb(1); return EMULATE_DONE; } -/* TLBMOD: store into address matching TLB with Dirty bit off */ -enum emulation_result kvm_mips_handle_tlbmod(u32 cause, u32 *opc, - struct kvm_run *run, - struct kvm_vcpu *vcpu) -{ - enum emulation_result er = EMULATE_DONE; -#ifdef DEBUG - struct mips_coproc *cop0 = vcpu->arch.cop0; - unsigned long entryhi = (vcpu->arch.host_cp0_badvaddr & VPN2_MASK) | - (kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID); - int index; - - /* If address not in the guest TLB, then we are in trouble */ - index = kvm_mips_guest_tlb_lookup(vcpu, entryhi); - if (index < 0) { - /* XXXKYMA Invalidate and retry */ - kvm_mips_host_tlb_inv(vcpu, vcpu->arch.host_cp0_badvaddr); - kvm_err("%s: host got TLBMOD for %#lx but entry not present in Guest TLB\n", - __func__, entryhi); - kvm_mips_dump_guest_tlbs(vcpu); - kvm_mips_dump_host_tlbs(); - return EMULATE_FAIL; - } -#endif - - er = kvm_mips_emulate_tlbmod(cause, opc, run, vcpu); - return er; -} - enum emulation_result kvm_mips_emulate_tlbmod(u32 cause, u32 *opc, struct kvm_run *run, @@ -2140,14 +2158,13 @@ enum emulation_result kvm_mips_emulate_tlbmod(u32 cause, kvm_debug("[EXL == 0] Delivering TLB MOD @ pc %#lx\n", arch->pc); - - arch->pc = KVM_GUEST_KSEG0 + 0x180; } else { kvm_debug("[EXL == 1] Delivering TLB MOD @ pc %#lx\n", arch->pc); - arch->pc = KVM_GUEST_KSEG0 + 0x180; } + arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; + kvm_change_c0_guest_cause(cop0, (0xff), (EXCCODE_MOD << CAUSEB_EXCCODE)); @@ -2155,8 +2172,6 @@ enum emulation_result kvm_mips_emulate_tlbmod(u32 cause, kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr); /* XXXKYMA: is the context register used by linux??? */ kvm_write_c0_guest_entryhi(cop0, entryhi); - /* Blow away the shadow host TLBs */ - kvm_mips_flush_host_tlb(1); return EMULATE_DONE; } @@ -2181,7 +2196,7 @@ enum emulation_result kvm_mips_emulate_fpu_exc(u32 cause, } - arch->pc = KVM_GUEST_KSEG0 + 0x180; + arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; kvm_change_c0_guest_cause(cop0, (0xff), (EXCCODE_CPU << CAUSEB_EXCCODE)); @@ -2215,7 +2230,7 @@ enum emulation_result kvm_mips_emulate_ri_exc(u32 cause, (EXCCODE_RI << CAUSEB_EXCCODE)); /* Set PC to the exception entry point */ - arch->pc = KVM_GUEST_KSEG0 + 0x180; + arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; } else { kvm_err("Trying to deliver RI when EXL is already set\n"); @@ -2250,7 +2265,7 @@ enum emulation_result kvm_mips_emulate_bp_exc(u32 cause, (EXCCODE_BP << CAUSEB_EXCCODE)); /* Set PC to the exception entry point */ - arch->pc = KVM_GUEST_KSEG0 + 0x180; + arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; } else { kvm_err("Trying to deliver BP when EXL is already set\n"); @@ -2285,7 +2300,7 @@ enum emulation_result kvm_mips_emulate_trap_exc(u32 cause, (EXCCODE_TR << CAUSEB_EXCCODE)); /* Set PC to the exception entry point */ - arch->pc = KVM_GUEST_KSEG0 + 0x180; + arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; } else { kvm_err("Trying to deliver TRAP when EXL is already set\n"); @@ -2320,7 +2335,7 @@ enum emulation_result kvm_mips_emulate_msafpe_exc(u32 cause, (EXCCODE_MSAFPE << CAUSEB_EXCCODE)); /* Set PC to the exception entry point */ - arch->pc = KVM_GUEST_KSEG0 + 0x180; + arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; } else { kvm_err("Trying to deliver MSAFPE when EXL is already set\n"); @@ -2355,7 +2370,7 @@ enum emulation_result kvm_mips_emulate_fpe_exc(u32 cause, (EXCCODE_FPE << CAUSEB_EXCCODE)); /* Set PC to the exception entry point */ - arch->pc = KVM_GUEST_KSEG0 + 0x180; + arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; } else { kvm_err("Trying to deliver FPE when EXL is already set\n"); @@ -2390,7 +2405,7 @@ enum emulation_result kvm_mips_emulate_msadis_exc(u32 cause, (EXCCODE_MSADIS << CAUSEB_EXCCODE)); /* Set PC to the exception entry point */ - arch->pc = KVM_GUEST_KSEG0 + 0x180; + arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; } else { kvm_err("Trying to deliver MSADIS when EXL is already set\n"); @@ -2409,6 +2424,7 @@ enum emulation_result kvm_mips_handle_ri(u32 cause, u32 *opc, enum emulation_result er = EMULATE_DONE; unsigned long curr_pc; union mips_instruction inst; + int err; /* * Update PC and hold onto current PC in case there is @@ -2422,11 +2438,9 @@ enum emulation_result kvm_mips_handle_ri(u32 cause, u32 *opc, /* Fetch the instruction. */ if (cause & CAUSEF_BD) opc += 1; - - inst.word = kvm_get_inst(opc, vcpu); - - if (inst.word == KVM_INVALID_INST) { - kvm_err("%s: Cannot get inst @ %p\n", __func__, opc); + err = kvm_get_badinstr(opc, vcpu, &inst.word); + if (err) { + kvm_err("%s: Cannot get inst @ %p (%d)\n", __func__, opc, err); return EMULATE_FAIL; } @@ -2557,7 +2571,7 @@ static enum emulation_result kvm_mips_emulate_exc(u32 cause, (exccode << CAUSEB_EXCCODE)); /* Set PC to the exception entry point */ - arch->pc = KVM_GUEST_KSEG0 + 0x180; + arch->pc = kvm_mips_guest_exception_base(vcpu) + 0x180; kvm_write_c0_guest_badvaddr(cop0, vcpu->arch.host_cp0_badvaddr); kvm_debug("Delivering EXC %d @ pc %#lx, badVaddr: %#lx\n", @@ -2670,7 +2684,8 @@ enum emulation_result kvm_mips_check_privilege(u32 cause, enum emulation_result kvm_mips_handle_tlbmiss(u32 cause, u32 *opc, struct kvm_run *run, - struct kvm_vcpu *vcpu) + struct kvm_vcpu *vcpu, + bool write_fault) { enum emulation_result er = EMULATE_DONE; u32 exccode = (cause >> CAUSEB_EXCCODE) & 0x1f; @@ -2726,7 +2741,8 @@ enum emulation_result kvm_mips_handle_tlbmiss(u32 cause, * OK we have a Guest TLB entry, now inject it into the * shadow host TLB */ - if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb)) { + if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, va, + write_fault)) { kvm_err("%s: handling mapped seg tlb fault for %lx, index: %u, vcpu: %p, ASID: %#lx\n", __func__, va, index, vcpu, read_c0_entryhi()); diff --git a/arch/mips/kvm/entry.c b/arch/mips/kvm/entry.c index e92fb19..c5b254c 100644 --- a/arch/mips/kvm/entry.c +++ b/arch/mips/kvm/entry.c @@ -12,8 +12,11 @@ */ #include <linux/kvm_host.h> +#include <linux/log2.h> +#include <asm/mmu_context.h> #include <asm/msa.h> #include <asm/setup.h> +#include <asm/tlbex.h> #include <asm/uasm.h> /* Register names */ @@ -50,6 +53,8 @@ /* Some CP0 registers */ #define C0_HWRENA 7, 0 #define C0_BADVADDR 8, 0 +#define C0_BADINSTR 8, 1 +#define C0_BADINSTRP 8, 2 #define C0_ENTRYHI 10, 0 #define C0_STATUS 12, 0 #define C0_CAUSE 13, 0 @@ -89,6 +94,21 @@ static void *kvm_mips_build_ret_from_exit(void *addr); static void *kvm_mips_build_ret_to_guest(void *addr); static void *kvm_mips_build_ret_to_host(void *addr); +/* + * The version of this function in tlbex.c uses current_cpu_type(), but for KVM + * we assume symmetry. + */ +static int c0_kscratch(void) +{ + switch (boot_cpu_type()) { + case CPU_XLP: + case CPU_XLR: + return 22; + default: + return 31; + } +} + /** * kvm_mips_entry_setup() - Perform global setup for entry code. * @@ -103,18 +123,21 @@ int kvm_mips_entry_setup(void) * We prefer to use KScratchN registers if they are available over the * defaults above, which may not work on all cores. */ - unsigned int kscratch_mask = cpu_data[0].kscratch_mask & 0xfc; + unsigned int kscratch_mask = cpu_data[0].kscratch_mask; + + if (pgd_reg != -1) + kscratch_mask &= ~BIT(pgd_reg); /* Pick a scratch register for storing VCPU */ if (kscratch_mask) { - scratch_vcpu[0] = 31; + scratch_vcpu[0] = c0_kscratch(); scratch_vcpu[1] = ffs(kscratch_mask) - 1; kscratch_mask &= ~BIT(scratch_vcpu[1]); } /* Pick a scratch register to use as a temp for saving state */ if (kscratch_mask) { - scratch_tmp[0] = 31; + scratch_tmp[0] = c0_kscratch(); scratch_tmp[1] = ffs(kscratch_mask) - 1; kscratch_mask &= ~BIT(scratch_tmp[1]); } @@ -130,7 +153,7 @@ static void kvm_mips_build_save_scratch(u32 **p, unsigned int tmp, UASM_i_SW(p, tmp, offsetof(struct pt_regs, cp0_epc), frame); /* Save the temp scratch register value in cp0_cause of stack frame */ - if (scratch_tmp[0] == 31) { + if (scratch_tmp[0] == c0_kscratch()) { UASM_i_MFC0(p, tmp, scratch_tmp[0], scratch_tmp[1]); UASM_i_SW(p, tmp, offsetof(struct pt_regs, cp0_cause), frame); } @@ -146,7 +169,7 @@ static void kvm_mips_build_restore_scratch(u32 **p, unsigned int tmp, UASM_i_LW(p, tmp, offsetof(struct pt_regs, cp0_epc), frame); UASM_i_MTC0(p, tmp, scratch_vcpu[0], scratch_vcpu[1]); - if (scratch_tmp[0] == 31) { + if (scratch_tmp[0] == c0_kscratch()) { UASM_i_LW(p, tmp, offsetof(struct pt_regs, cp0_cause), frame); UASM_i_MTC0(p, tmp, scratch_tmp[0], scratch_tmp[1]); } @@ -286,23 +309,26 @@ static void *kvm_mips_build_enter_guest(void *addr) uasm_i_andi(&p, T0, T0, KSU_USER | ST0_ERL | ST0_EXL); uasm_i_xori(&p, T0, T0, KSU_USER); uasm_il_bnez(&p, &r, T0, label_kernel_asid); - UASM_i_ADDIU(&p, T1, K1, - offsetof(struct kvm_vcpu_arch, guest_kernel_asid)); + UASM_i_ADDIU(&p, T1, K1, offsetof(struct kvm_vcpu_arch, + guest_kernel_mm.context.asid)); /* else user */ - UASM_i_ADDIU(&p, T1, K1, - offsetof(struct kvm_vcpu_arch, guest_user_asid)); + UASM_i_ADDIU(&p, T1, K1, offsetof(struct kvm_vcpu_arch, + guest_user_mm.context.asid)); uasm_l_kernel_asid(&l, p); /* t1: contains the base of the ASID array, need to get the cpu id */ /* smp_processor_id */ uasm_i_lw(&p, T2, offsetof(struct thread_info, cpu), GP); - /* x4 */ - uasm_i_sll(&p, T2, T2, 2); + /* index the ASID array */ + uasm_i_sll(&p, T2, T2, ilog2(sizeof(long))); UASM_i_ADDU(&p, T3, T1, T2); - uasm_i_lw(&p, K0, 0, T3); + UASM_i_LW(&p, K0, 0, T3); #ifdef CONFIG_MIPS_ASID_BITS_VARIABLE - /* x sizeof(struct cpuinfo_mips)/4 */ - uasm_i_addiu(&p, T3, ZERO, sizeof(struct cpuinfo_mips)/4); + /* + * reuse ASID array offset + * cpuinfo_mips is a multiple of sizeof(long) + */ + uasm_i_addiu(&p, T3, ZERO, sizeof(struct cpuinfo_mips)/sizeof(long)); uasm_i_mul(&p, T2, T2, T3); UASM_i_LA_mostly(&p, AT, (long)&cpu_data[0].asid_mask); @@ -312,7 +338,20 @@ static void *kvm_mips_build_enter_guest(void *addr) #else uasm_i_andi(&p, K0, K0, MIPS_ENTRYHI_ASID); #endif - uasm_i_mtc0(&p, K0, C0_ENTRYHI); + + /* + * Set up KVM T&E GVA pgd. + * This does roughly the same as TLBMISS_HANDLER_SETUP_PGD(): + * - call tlbmiss_handler_setup_pgd(mm->pgd) + * - but skips write into CP0_PWBase for now + */ + UASM_i_LW(&p, A0, (int)offsetof(struct mm_struct, pgd) - + (int)offsetof(struct mm_struct, context.asid), T1); + + UASM_i_LA(&p, T9, (unsigned long)tlbmiss_handler_setup_pgd); + uasm_i_jalr(&p, RA, T9); + uasm_i_mtc0(&p, K0, C0_ENTRYHI); + uasm_i_ehb(&p); /* Disable RDHWR access */ @@ -348,6 +387,80 @@ static void *kvm_mips_build_enter_guest(void *addr) } /** + * kvm_mips_build_tlb_refill_exception() - Assemble TLB refill handler. + * @addr: Address to start writing code. + * @handler: Address of common handler (within range of @addr). + * + * Assemble TLB refill exception fast path handler for guest execution. + * + * Returns: Next address after end of written function. + */ +void *kvm_mips_build_tlb_refill_exception(void *addr, void *handler) +{ + u32 *p = addr; + struct uasm_label labels[2]; + struct uasm_reloc relocs[2]; + struct uasm_label *l = labels; + struct uasm_reloc *r = relocs; + + memset(labels, 0, sizeof(labels)); + memset(relocs, 0, sizeof(relocs)); + + /* Save guest k1 into scratch register */ + UASM_i_MTC0(&p, K1, scratch_tmp[0], scratch_tmp[1]); + + /* Get the VCPU pointer from the VCPU scratch register */ + UASM_i_MFC0(&p, K1, scratch_vcpu[0], scratch_vcpu[1]); + + /* Save guest k0 into VCPU structure */ + UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu, arch.gprs[K0]), K1); + + /* + * Some of the common tlbex code uses current_cpu_type(). For KVM we + * assume symmetry and just disable preemption to silence the warning. + */ + preempt_disable(); + + /* + * Now for the actual refill bit. A lot of this can be common with the + * Linux TLB refill handler, however we don't need to handle so many + * cases. We only need to handle user mode refills, and user mode runs + * with 32-bit addressing. + * + * Therefore the branch to label_vmalloc generated by build_get_pmde64() + * that isn't resolved should never actually get taken and is harmless + * to leave in place for now. + */ + +#ifdef CONFIG_64BIT + build_get_pmde64(&p, &l, &r, K0, K1); /* get pmd in K1 */ +#else + build_get_pgde32(&p, K0, K1); /* get pgd in K1 */ +#endif + + /* we don't support huge pages yet */ + + build_get_ptep(&p, K0, K1); + build_update_entries(&p, K0, K1); + build_tlb_write_entry(&p, &l, &r, tlb_random); + + preempt_enable(); + + /* Get the VCPU pointer from the VCPU scratch register again */ + UASM_i_MFC0(&p, K1, scratch_vcpu[0], scratch_vcpu[1]); + + /* Restore the guest's k0/k1 registers */ + UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu, arch.gprs[K0]), K1); + uasm_i_ehb(&p); + UASM_i_MFC0(&p, K1, scratch_tmp[0], scratch_tmp[1]); + + /* Jump to guest */ + uasm_i_eret(&p); + + return p; +} + +/** * kvm_mips_build_exception() - Assemble first level guest exception handler. * @addr: Address to start writing code. * @handler: Address of common handler (within range of @addr). @@ -468,6 +581,18 @@ void *kvm_mips_build_exit(void *addr) uasm_i_mfc0(&p, K0, C0_CAUSE); uasm_i_sw(&p, K0, offsetof(struct kvm_vcpu_arch, host_cp0_cause), K1); + if (cpu_has_badinstr) { + uasm_i_mfc0(&p, K0, C0_BADINSTR); + uasm_i_sw(&p, K0, offsetof(struct kvm_vcpu_arch, + host_cp0_badinstr), K1); + } + + if (cpu_has_badinstrp) { + uasm_i_mfc0(&p, K0, C0_BADINSTRP); + uasm_i_sw(&p, K0, offsetof(struct kvm_vcpu_arch, + host_cp0_badinstrp), K1); + } + /* Now restore the host state just enough to run the handlers */ /* Switch EBASE to the one used by Linux */ diff --git a/arch/mips/kvm/interrupt.c b/arch/mips/kvm/interrupt.c index e88403b..aa0a1a0 100644 --- a/arch/mips/kvm/interrupt.c +++ b/arch/mips/kvm/interrupt.c @@ -183,10 +183,11 @@ int kvm_mips_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority, (exccode << CAUSEB_EXCCODE)); /* XXXSL Set PC to the interrupt exception entry point */ + arch->pc = kvm_mips_guest_exception_base(vcpu); if (kvm_read_c0_guest_cause(cop0) & CAUSEF_IV) - arch->pc = KVM_GUEST_KSEG0 + 0x200; + arch->pc += 0x200; else - arch->pc = KVM_GUEST_KSEG0 + 0x180; + arch->pc += 0x180; clear_bit(priority, &vcpu->arch.pending_exceptions); } diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 29ec9ab..ed81e5a 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -22,6 +22,7 @@ #include <asm/page.h> #include <asm/cacheflush.h> #include <asm/mmu_context.h> +#include <asm/pgalloc.h> #include <asm/pgtable.h> #include <linux/kvm_host.h> @@ -63,18 +64,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { {NULL} }; -static int kvm_mips_reset_vcpu(struct kvm_vcpu *vcpu) -{ - int i; - - for_each_possible_cpu(i) { - vcpu->arch.guest_kernel_asid[i] = 0; - vcpu->arch.guest_user_asid[i] = 0; - } - - return 0; -} - /* * XXXKYMA: We are simulatoring a processor that has the WII bit set in * Config7, so we are "runnable" if interrupts are pending @@ -104,39 +93,12 @@ void kvm_arch_check_processor_compat(void *rtn) *(int *)rtn = 0; } -static void kvm_mips_init_tlbs(struct kvm *kvm) -{ - unsigned long wired; - - /* - * Add a wired entry to the TLB, it is used to map the commpage to - * the Guest kernel - */ - wired = read_c0_wired(); - write_c0_wired(wired + 1); - mtc0_tlbw_hazard(); - kvm->arch.commpage_tlb = wired; - - kvm_debug("[%d] commpage TLB: %d\n", smp_processor_id(), - kvm->arch.commpage_tlb); -} - -static void kvm_mips_init_vm_percpu(void *arg) -{ - struct kvm *kvm = (struct kvm *)arg; - - kvm_mips_init_tlbs(kvm); - kvm_mips_callbacks->vm_init(kvm); - -} - int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) { - if (atomic_inc_return(&kvm_mips_instance) == 1) { - kvm_debug("%s: 1st KVM instance, setup host TLB parameters\n", - __func__); - on_each_cpu(kvm_mips_init_vm_percpu, kvm, 1); - } + /* Allocate page table to map GPA -> RPA */ + kvm->arch.gpa_mm.pgd = kvm_pgd_alloc(); + if (!kvm->arch.gpa_mm.pgd) + return -ENOMEM; return 0; } @@ -156,13 +118,6 @@ void kvm_mips_free_vcpus(struct kvm *kvm) unsigned int i; struct kvm_vcpu *vcpu; - /* Put the pages we reserved for the guest pmap */ - for (i = 0; i < kvm->arch.guest_pmap_npages; i++) { - if (kvm->arch.guest_pmap[i] != KVM_INVALID_PAGE) - kvm_release_pfn_clean(kvm->arch.guest_pmap[i]); - } - kfree(kvm->arch.guest_pmap); - kvm_for_each_vcpu(i, vcpu, kvm) { kvm_arch_vcpu_free(vcpu); } @@ -177,25 +132,17 @@ void kvm_mips_free_vcpus(struct kvm *kvm) mutex_unlock(&kvm->lock); } -static void kvm_mips_uninit_tlbs(void *arg) +static void kvm_mips_free_gpa_pt(struct kvm *kvm) { - /* Restore wired count */ - write_c0_wired(0); - mtc0_tlbw_hazard(); - /* Clear out all the TLBs */ - kvm_local_flush_tlb_all(); + /* It should always be safe to remove after flushing the whole range */ + WARN_ON(!kvm_mips_flush_gpa_pt(kvm, 0, ~0)); + pgd_free(NULL, kvm->arch.gpa_mm.pgd); } void kvm_arch_destroy_vm(struct kvm *kvm) { kvm_mips_free_vcpus(kvm); - - /* If this is the last instance, restore wired count */ - if (atomic_dec_return(&kvm_mips_instance) == 0) { - kvm_debug("%s: last KVM instance, restoring TLB parameters\n", - __func__); - on_each_cpu(kvm_mips_uninit_tlbs, NULL, 1); - } + kvm_mips_free_gpa_pt(kvm); } long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, @@ -210,6 +157,32 @@ int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot, return 0; } +void kvm_arch_flush_shadow_all(struct kvm *kvm) +{ + /* Flush whole GPA */ + kvm_mips_flush_gpa_pt(kvm, 0, ~0); + + /* Let implementation do the rest */ + kvm_mips_callbacks->flush_shadow_all(kvm); +} + +void kvm_arch_flush_shadow_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot) +{ + /* + * The slot has been made invalid (ready for moving or deletion), so we + * need to ensure that it can no longer be accessed by any guest VCPUs. + */ + + spin_lock(&kvm->mmu_lock); + /* Flush slot from GPA */ + kvm_mips_flush_gpa_pt(kvm, slot->base_gfn, + slot->base_gfn + slot->npages - 1); + /* Let implementation do the rest */ + kvm_mips_callbacks->flush_shadow_memslot(kvm, slot); + spin_unlock(&kvm->mmu_lock); +} + int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, const struct kvm_userspace_memory_region *mem, @@ -224,35 +197,32 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_memory_slot *new, enum kvm_mr_change change) { - unsigned long npages = 0; - int i; + int needs_flush; kvm_debug("%s: kvm: %p slot: %d, GPA: %llx, size: %llx, QVA: %llx\n", __func__, kvm, mem->slot, mem->guest_phys_addr, mem->memory_size, mem->userspace_addr); - /* Setup Guest PMAP table */ - if (!kvm->arch.guest_pmap) { - if (mem->slot == 0) - npages = mem->memory_size >> PAGE_SHIFT; - - if (npages) { - kvm->arch.guest_pmap_npages = npages; - kvm->arch.guest_pmap = - kzalloc(npages * sizeof(unsigned long), GFP_KERNEL); - - if (!kvm->arch.guest_pmap) { - kvm_err("Failed to allocate guest PMAP\n"); - return; - } - - kvm_debug("Allocated space for Guest PMAP Table (%ld pages) @ %p\n", - npages, kvm->arch.guest_pmap); - - /* Now setup the page table */ - for (i = 0; i < npages; i++) - kvm->arch.guest_pmap[i] = KVM_INVALID_PAGE; - } + /* + * If dirty page logging is enabled, write protect all pages in the slot + * ready for dirty logging. + * + * There is no need to do this in any of the following cases: + * CREATE: No dirty mappings will already exist. + * MOVE/DELETE: The old mappings will already have been cleaned up by + * kvm_arch_flush_shadow_memslot() + */ + if (change == KVM_MR_FLAGS_ONLY && + (!(old->flags & KVM_MEM_LOG_DIRTY_PAGES) && + new->flags & KVM_MEM_LOG_DIRTY_PAGES)) { + spin_lock(&kvm->mmu_lock); + /* Write protect GPA page table entries */ + needs_flush = kvm_mips_mkclean_gpa_pt(kvm, new->base_gfn, + new->base_gfn + new->npages - 1); + /* Let implementation do the rest */ + if (needs_flush) + kvm_mips_callbacks->flush_shadow_memslot(kvm, new); + spin_unlock(&kvm->mmu_lock); } } @@ -276,7 +246,7 @@ static inline void dump_handler(const char *symbol, void *start, void *end) struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) { int err, size; - void *gebase, *p, *handler; + void *gebase, *p, *handler, *refill_start, *refill_end; int i; struct kvm_vcpu *vcpu = kzalloc(sizeof(struct kvm_vcpu), GFP_KERNEL); @@ -329,8 +299,9 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) /* Build guest exception vectors dynamically in unmapped memory */ handler = gebase + 0x2000; - /* TLB Refill, EXL = 0 */ - kvm_mips_build_exception(gebase, handler); + /* TLB refill */ + refill_start = gebase; + refill_end = kvm_mips_build_tlb_refill_exception(refill_start, handler); /* General Exception Entry point */ kvm_mips_build_exception(gebase + 0x180, handler); @@ -356,6 +327,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) pr_debug("#include <asm/regdef.h>\n"); pr_debug("\n"); dump_handler("kvm_vcpu_run", vcpu->arch.vcpu_run, p); + dump_handler("kvm_tlb_refill", refill_start, refill_end); dump_handler("kvm_gen_exc", gebase + 0x180, gebase + 0x200); dump_handler("kvm_exit", gebase + 0x2000, vcpu->arch.vcpu_run); @@ -406,6 +378,7 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) kvm_mips_dump_stats(vcpu); + kvm_mmu_free_memory_caches(vcpu); kfree(vcpu->arch.guest_ebase); kfree(vcpu->arch.kseg0_commpage); kfree(vcpu); @@ -422,37 +395,9 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, return -ENOIOCTLCMD; } -/* Must be called with preemption disabled, just before entering guest */ -static void kvm_mips_check_asids(struct kvm_vcpu *vcpu) -{ - struct mips_coproc *cop0 = vcpu->arch.cop0; - int i, cpu = smp_processor_id(); - unsigned int gasid; - - /* - * Lazy host ASID regeneration for guest user mode. - * If the guest ASID has changed since the last guest usermode - * execution, regenerate the host ASID so as to invalidate stale TLB - * entries. - */ - if (!KVM_GUEST_KERNEL_MODE(vcpu)) { - gasid = kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID; - if (gasid != vcpu->arch.last_user_gasid) { - kvm_get_new_mmu_context(&vcpu->arch.guest_user_mm, cpu, - vcpu); - vcpu->arch.guest_user_asid[cpu] = - vcpu->arch.guest_user_mm.context.asid[cpu]; - for_each_possible_cpu(i) - if (i != cpu) - vcpu->arch.guest_user_asid[cpu] = 0; - vcpu->arch.last_user_gasid = gasid; - } - } -} - int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) { - int r = 0; + int r = -EINTR; sigset_t sigsaved; if (vcpu->sigset_active) @@ -464,31 +409,30 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) vcpu->mmio_needed = 0; } + if (run->immediate_exit) + goto out; + lose_fpu(1); local_irq_disable(); - /* Check if we have any exceptions/interrupts pending */ - kvm_mips_deliver_interrupts(vcpu, - kvm_read_c0_guest_cause(vcpu->arch.cop0)); - guest_enter_irqoff(); - - /* Disable hardware page table walking while in guest */ - htw_stop(); - trace_kvm_enter(vcpu); - kvm_mips_check_asids(vcpu); - - r = vcpu->arch.vcpu_run(run, vcpu); - trace_kvm_out(vcpu); + /* + * Make sure the read of VCPU requests in vcpu_run() callback is not + * reordered ahead of the write to vcpu->mode, or we could miss a TLB + * flush request while the requester sees the VCPU as outside of guest + * mode and not needing an IPI. + */ + smp_store_mb(vcpu->mode, IN_GUEST_MODE); - /* Re-enable HTW before enabling interrupts */ - htw_start(); + r = kvm_mips_callbacks->vcpu_run(run, vcpu); + trace_kvm_out(vcpu); guest_exit_irqoff(); local_irq_enable(); +out: if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &sigsaved, NULL); @@ -580,33 +524,6 @@ static u64 kvm_mips_get_one_regs[] = { KVM_REG_MIPS_LO, #endif KVM_REG_MIPS_PC, - - KVM_REG_MIPS_CP0_INDEX, - KVM_REG_MIPS_CP0_CONTEXT, - KVM_REG_MIPS_CP0_USERLOCAL, - KVM_REG_MIPS_CP0_PAGEMASK, - KVM_REG_MIPS_CP0_WIRED, - KVM_REG_MIPS_CP0_HWRENA, - KVM_REG_MIPS_CP0_BADVADDR, - KVM_REG_MIPS_CP0_COUNT, - KVM_REG_MIPS_CP0_ENTRYHI, - KVM_REG_MIPS_CP0_COMPARE, - KVM_REG_MIPS_CP0_STATUS, - KVM_REG_MIPS_CP0_CAUSE, - KVM_REG_MIPS_CP0_EPC, - KVM_REG_MIPS_CP0_PRID, - KVM_REG_MIPS_CP0_CONFIG, - KVM_REG_MIPS_CP0_CONFIG1, - KVM_REG_MIPS_CP0_CONFIG2, - KVM_REG_MIPS_CP0_CONFIG3, - KVM_REG_MIPS_CP0_CONFIG4, - KVM_REG_MIPS_CP0_CONFIG5, - KVM_REG_MIPS_CP0_CONFIG7, - KVM_REG_MIPS_CP0_ERROREPC, - - KVM_REG_MIPS_COUNT_CTL, - KVM_REG_MIPS_COUNT_RESUME, - KVM_REG_MIPS_COUNT_HZ, }; static u64 kvm_mips_get_one_regs_fpu[] = { @@ -619,15 +536,6 @@ static u64 kvm_mips_get_one_regs_msa[] = { KVM_REG_MIPS_MSA_CSR, }; -static u64 kvm_mips_get_one_regs_kscratch[] = { - KVM_REG_MIPS_CP0_KSCRATCH1, - KVM_REG_MIPS_CP0_KSCRATCH2, - KVM_REG_MIPS_CP0_KSCRATCH3, - KVM_REG_MIPS_CP0_KSCRATCH4, - KVM_REG_MIPS_CP0_KSCRATCH5, - KVM_REG_MIPS_CP0_KSCRATCH6, -}; - static unsigned long kvm_mips_num_regs(struct kvm_vcpu *vcpu) { unsigned long ret; @@ -641,7 +549,6 @@ static unsigned long kvm_mips_num_regs(struct kvm_vcpu *vcpu) } if (kvm_mips_guest_can_have_msa(&vcpu->arch)) ret += ARRAY_SIZE(kvm_mips_get_one_regs_msa) + 32; - ret += __arch_hweight8(vcpu->arch.kscratch_enabled); ret += kvm_mips_callbacks->num_regs(vcpu); return ret; @@ -694,16 +601,6 @@ static int kvm_mips_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices) } } - for (i = 0; i < 6; ++i) { - if (!(vcpu->arch.kscratch_enabled & BIT(i + 2))) - continue; - - if (copy_to_user(indices, &kvm_mips_get_one_regs_kscratch[i], - sizeof(kvm_mips_get_one_regs_kscratch[i]))) - return -EFAULT; - ++indices; - } - return kvm_mips_callbacks->copy_reg_indices(vcpu, indices); } @@ -794,95 +691,6 @@ static int kvm_mips_get_reg(struct kvm_vcpu *vcpu, v = fpu->msacsr; break; - /* Co-processor 0 registers */ - case KVM_REG_MIPS_CP0_INDEX: - v = (long)kvm_read_c0_guest_index(cop0); - break; - case KVM_REG_MIPS_CP0_CONTEXT: - v = (long)kvm_read_c0_guest_context(cop0); - break; - case KVM_REG_MIPS_CP0_USERLOCAL: - v = (long)kvm_read_c0_guest_userlocal(cop0); - break; - case KVM_REG_MIPS_CP0_PAGEMASK: - v = (long)kvm_read_c0_guest_pagemask(cop0); - break; - case KVM_REG_MIPS_CP0_WIRED: - v = (long)kvm_read_c0_guest_wired(cop0); - break; - case KVM_REG_MIPS_CP0_HWRENA: - v = (long)kvm_read_c0_guest_hwrena(cop0); - break; - case KVM_REG_MIPS_CP0_BADVADDR: - v = (long)kvm_read_c0_guest_badvaddr(cop0); - break; - case KVM_REG_MIPS_CP0_ENTRYHI: - v = (long)kvm_read_c0_guest_entryhi(cop0); - break; - case KVM_REG_MIPS_CP0_COMPARE: - v = (long)kvm_read_c0_guest_compare(cop0); - break; - case KVM_REG_MIPS_CP0_STATUS: - v = (long)kvm_read_c0_guest_status(cop0); - break; - case KVM_REG_MIPS_CP0_CAUSE: - v = (long)kvm_read_c0_guest_cause(cop0); - break; - case KVM_REG_MIPS_CP0_EPC: - v = (long)kvm_read_c0_guest_epc(cop0); - break; - case KVM_REG_MIPS_CP0_PRID: - v = (long)kvm_read_c0_guest_prid(cop0); - break; - case KVM_REG_MIPS_CP0_CONFIG: - v = (long)kvm_read_c0_guest_config(cop0); - break; - case KVM_REG_MIPS_CP0_CONFIG1: - v = (long)kvm_read_c0_guest_config1(cop0); - break; - case KVM_REG_MIPS_CP0_CONFIG2: - v = (long)kvm_read_c0_guest_config2(cop0); - break; - case KVM_REG_MIPS_CP0_CONFIG3: - v = (long)kvm_read_c0_guest_config3(cop0); - break; - case KVM_REG_MIPS_CP0_CONFIG4: - v = (long)kvm_read_c0_guest_config4(cop0); - break; - case KVM_REG_MIPS_CP0_CONFIG5: - v = (long)kvm_read_c0_guest_config5(cop0); - break; - case KVM_REG_MIPS_CP0_CONFIG7: - v = (long)kvm_read_c0_guest_config7(cop0); - break; - case KVM_REG_MIPS_CP0_ERROREPC: - v = (long)kvm_read_c0_guest_errorepc(cop0); - break; - case KVM_REG_MIPS_CP0_KSCRATCH1 ... KVM_REG_MIPS_CP0_KSCRATCH6: - idx = reg->id - KVM_REG_MIPS_CP0_KSCRATCH1 + 2; - if (!(vcpu->arch.kscratch_enabled & BIT(idx))) - return -EINVAL; - switch (idx) { - case 2: - v = (long)kvm_read_c0_guest_kscratch1(cop0); - break; - case 3: - v = (long)kvm_read_c0_guest_kscratch2(cop0); - break; - case 4: - v = (long)kvm_read_c0_guest_kscratch3(cop0); - break; - case 5: - v = (long)kvm_read_c0_guest_kscratch4(cop0); - break; - case 6: - v = (long)kvm_read_c0_guest_kscratch5(cop0); - break; - case 7: - v = (long)kvm_read_c0_guest_kscratch6(cop0); - break; - } - break; /* registers to be handled specially */ default: ret = kvm_mips_callbacks->get_one_reg(vcpu, reg, &v); @@ -1014,68 +822,6 @@ static int kvm_mips_set_reg(struct kvm_vcpu *vcpu, fpu->msacsr = v; break; - /* Co-processor 0 registers */ - case KVM_REG_MIPS_CP0_INDEX: - kvm_write_c0_guest_index(cop0, v); - break; - case KVM_REG_MIPS_CP0_CONTEXT: - kvm_write_c0_guest_context(cop0, v); - break; - case KVM_REG_MIPS_CP0_USERLOCAL: - kvm_write_c0_guest_userlocal(cop0, v); - break; - case KVM_REG_MIPS_CP0_PAGEMASK: - kvm_write_c0_guest_pagemask(cop0, v); - break; - case KVM_REG_MIPS_CP0_WIRED: - kvm_write_c0_guest_wired(cop0, v); - break; - case KVM_REG_MIPS_CP0_HWRENA: - kvm_write_c0_guest_hwrena(cop0, v); - break; - case KVM_REG_MIPS_CP0_BADVADDR: - kvm_write_c0_guest_badvaddr(cop0, v); - break; - case KVM_REG_MIPS_CP0_ENTRYHI: - kvm_write_c0_guest_entryhi(cop0, v); - break; - case KVM_REG_MIPS_CP0_STATUS: - kvm_write_c0_guest_status(cop0, v); - break; - case KVM_REG_MIPS_CP0_EPC: - kvm_write_c0_guest_epc(cop0, v); - break; - case KVM_REG_MIPS_CP0_PRID: - kvm_write_c0_guest_prid(cop0, v); - break; - case KVM_REG_MIPS_CP0_ERROREPC: - kvm_write_c0_guest_errorepc(cop0, v); - break; - case KVM_REG_MIPS_CP0_KSCRATCH1 ... KVM_REG_MIPS_CP0_KSCRATCH6: - idx = reg->id - KVM_REG_MIPS_CP0_KSCRATCH1 + 2; - if (!(vcpu->arch.kscratch_enabled & BIT(idx))) - return -EINVAL; - switch (idx) { - case 2: - kvm_write_c0_guest_kscratch1(cop0, v); - break; - case 3: - kvm_write_c0_guest_kscratch2(cop0, v); - break; - case 4: - kvm_write_c0_guest_kscratch3(cop0, v); - break; - case 5: - kvm_write_c0_guest_kscratch4(cop0, v); - break; - case 6: - kvm_write_c0_guest_kscratch5(cop0, v); - break; - case 7: - kvm_write_c0_guest_kscratch6(cop0, v); - break; - } - break; /* registers to be handled specially */ default: return kvm_mips_callbacks->set_one_reg(vcpu, reg, v); @@ -1144,18 +890,12 @@ long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, return -E2BIG; return kvm_mips_copy_reg_indices(vcpu, user_list->reg); } - case KVM_NMI: - /* Treat the NMI as a CPU reset */ - r = kvm_mips_reset_vcpu(vcpu); - break; case KVM_INTERRUPT: { struct kvm_mips_interrupt irq; - r = -EFAULT; if (copy_from_user(&irq, argp, sizeof(irq))) - goto out; - + return -EFAULT; kvm_debug("[%d] %s: irq: %d\n", vcpu->vcpu_id, __func__, irq.irq); @@ -1165,56 +905,57 @@ long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, case KVM_ENABLE_CAP: { struct kvm_enable_cap cap; - r = -EFAULT; if (copy_from_user(&cap, argp, sizeof(cap))) - goto out; + return -EFAULT; r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); break; } default: r = -ENOIOCTLCMD; } - -out: return r; } -/* Get (and clear) the dirty memory log for a memory slot. */ +/** + * kvm_vm_ioctl_get_dirty_log - get and clear the log of dirty pages in a slot + * @kvm: kvm instance + * @log: slot id and address to which we copy the log + * + * Steps 1-4 below provide general overview of dirty page logging. See + * kvm_get_dirty_log_protect() function description for additional details. + * + * We call kvm_get_dirty_log_protect() to handle steps 1-3, upon return we + * always flush the TLB (step 4) even if previous step failed and the dirty + * bitmap may be corrupt. Regardless of previous outcome the KVM logging API + * does not preclude user space subsequent dirty log read. Flushing TLB ensures + * writes will be marked dirty for next log read. + * + * 1. Take a snapshot of the bit and clear it if needed. + * 2. Write protect the corresponding page. + * 3. Copy the snapshot to the userspace. + * 4. Flush TLB's if needed. + */ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) { struct kvm_memslots *slots; struct kvm_memory_slot *memslot; - unsigned long ga, ga_end; - int is_dirty = 0; + bool is_dirty = false; int r; - unsigned long n; mutex_lock(&kvm->slots_lock); - r = kvm_get_dirty_log(kvm, log, &is_dirty); - if (r) - goto out; + r = kvm_get_dirty_log_protect(kvm, log, &is_dirty); - /* If nothing is dirty, don't bother messing with page tables. */ if (is_dirty) { slots = kvm_memslots(kvm); memslot = id_to_memslot(slots, log->slot); - ga = memslot->base_gfn << PAGE_SHIFT; - ga_end = ga + (memslot->npages << PAGE_SHIFT); - - kvm_info("%s: dirty, ga: %#lx, ga_end %#lx\n", __func__, ga, - ga_end); - - n = kvm_dirty_bitmap_bytes(memslot); - memset(memslot->dirty_bitmap, 0, n); + /* Let implementation handle TLB/GVA invalidation */ + kvm_mips_callbacks->flush_shadow_memslot(kvm, memslot); } - r = 0; -out: mutex_unlock(&kvm->slots_lock); return r; - } long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) @@ -1282,11 +1023,20 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) switch (ext) { case KVM_CAP_ONE_REG: case KVM_CAP_ENABLE_CAP: + case KVM_CAP_READONLY_MEM: + case KVM_CAP_SYNC_MMU: + case KVM_CAP_IMMEDIATE_EXIT: r = 1; break; case KVM_CAP_COALESCED_MMIO: r = KVM_COALESCED_MMIO_PAGE_OFFSET; break; + case KVM_CAP_NR_VCPUS: + r = num_online_cpus(); + break; + case KVM_CAP_MAX_VCPUS: + r = KVM_MAX_VCPUS; + break; case KVM_CAP_MIPS_FPU: /* We don't handle systems with inconsistent cpu_has_fpu */ r = !!raw_cpu_has_fpu; @@ -1400,13 +1150,23 @@ static enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer) int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { - kvm_mips_callbacks->vcpu_init(vcpu); + int err; + + err = kvm_mips_callbacks->vcpu_init(vcpu); + if (err) + return err; + hrtimer_init(&vcpu->arch.comparecount_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); vcpu->arch.comparecount_timer.function = kvm_mips_comparecount_wakeup; return 0; } +void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) +{ + kvm_mips_callbacks->vcpu_uninit(vcpu); +} + int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, struct kvm_translation *tr) { @@ -1440,8 +1200,11 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) u32 __user *opc = (u32 __user *) vcpu->arch.pc; unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr; enum emulation_result er = EMULATE_DONE; + u32 inst; int ret = RESUME_GUEST; + vcpu->mode = OUTSIDE_GUEST_MODE; + /* re-enable HTW before enabling interrupts */ htw_start(); @@ -1564,8 +1327,12 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) break; default: + if (cause & CAUSEF_BD) + opc += 1; + inst = 0; + kvm_get_badinstr(opc, vcpu, &inst); kvm_err("Exception Code: %d, not yet handled, @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#lx\n", - exccode, opc, kvm_get_inst(opc, vcpu), badvaddr, + exccode, opc, inst, badvaddr, kvm_read_c0_guest_status(vcpu->arch.cop0)); kvm_arch_vcpu_dump_regs(vcpu); run->exit_reason = KVM_EXIT_INTERNAL_ERROR; @@ -1593,7 +1360,15 @@ skip_emul: if (ret == RESUME_GUEST) { trace_kvm_reenter(vcpu); - kvm_mips_check_asids(vcpu); + /* + * Make sure the read of VCPU requests in vcpu_reenter() + * callback is not reordered ahead of the write to vcpu->mode, + * or we could miss a TLB flush request while the requester sees + * the VCPU as outside of guest mode and not needing an IPI. + */ + smp_store_mb(vcpu->mode, IN_GUEST_MODE); + + kvm_mips_callbacks->vcpu_reenter(run, vcpu); /* * If FPU / MSA are enabled (i.e. the guest's FPU / MSA context diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c index 3b677c8..cb0faad 100644 --- a/arch/mips/kvm/mmu.c +++ b/arch/mips/kvm/mmu.c @@ -11,86 +11,995 @@ #include <linux/highmem.h> #include <linux/kvm_host.h> +#include <linux/uaccess.h> #include <asm/mmu_context.h> +#include <asm/pgalloc.h> -static u32 kvm_mips_get_kernel_asid(struct kvm_vcpu *vcpu) +/* + * KVM_MMU_CACHE_MIN_PAGES is the number of GPA page table translation levels + * for which pages need to be cached. + */ +#if defined(__PAGETABLE_PMD_FOLDED) +#define KVM_MMU_CACHE_MIN_PAGES 1 +#else +#define KVM_MMU_CACHE_MIN_PAGES 2 +#endif + +static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, + int min, int max) { - int cpu = smp_processor_id(); + void *page; + + BUG_ON(max > KVM_NR_MEM_OBJS); + if (cache->nobjs >= min) + return 0; + while (cache->nobjs < max) { + page = (void *)__get_free_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + cache->objects[cache->nobjs++] = page; + } + return 0; +} - return vcpu->arch.guest_kernel_asid[cpu] & - cpu_asid_mask(&cpu_data[cpu]); +static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc) +{ + while (mc->nobjs) + free_page((unsigned long)mc->objects[--mc->nobjs]); } -static u32 kvm_mips_get_user_asid(struct kvm_vcpu *vcpu) +static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) { - int cpu = smp_processor_id(); + void *p; - return vcpu->arch.guest_user_asid[cpu] & - cpu_asid_mask(&cpu_data[cpu]); + BUG_ON(!mc || !mc->nobjs); + p = mc->objects[--mc->nobjs]; + return p; } -static int kvm_mips_map_page(struct kvm *kvm, gfn_t gfn) +void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu) { - int srcu_idx, err = 0; - kvm_pfn_t pfn; + mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); +} + +/** + * kvm_pgd_init() - Initialise KVM GPA page directory. + * @page: Pointer to page directory (PGD) for KVM GPA. + * + * Initialise a KVM GPA page directory with pointers to the invalid table, i.e. + * representing no mappings. This is similar to pgd_init(), however it + * initialises all the page directory pointers, not just the ones corresponding + * to the userland address space (since it is for the guest physical address + * space rather than a virtual address space). + */ +static void kvm_pgd_init(void *page) +{ + unsigned long *p, *end; + unsigned long entry; + +#ifdef __PAGETABLE_PMD_FOLDED + entry = (unsigned long)invalid_pte_table; +#else + entry = (unsigned long)invalid_pmd_table; +#endif + + p = (unsigned long *)page; + end = p + PTRS_PER_PGD; + + do { + p[0] = entry; + p[1] = entry; + p[2] = entry; + p[3] = entry; + p[4] = entry; + p += 8; + p[-3] = entry; + p[-2] = entry; + p[-1] = entry; + } while (p != end); +} + +/** + * kvm_pgd_alloc() - Allocate and initialise a KVM GPA page directory. + * + * Allocate a blank KVM GPA page directory (PGD) for representing guest physical + * to host physical page mappings. + * + * Returns: Pointer to new KVM GPA page directory. + * NULL on allocation failure. + */ +pgd_t *kvm_pgd_alloc(void) +{ + pgd_t *ret; + + ret = (pgd_t *)__get_free_pages(GFP_KERNEL, PGD_ORDER); + if (ret) + kvm_pgd_init(ret); + + return ret; +} + +/** + * kvm_mips_walk_pgd() - Walk page table with optional allocation. + * @pgd: Page directory pointer. + * @addr: Address to index page table using. + * @cache: MMU page cache to allocate new page tables from, or NULL. + * + * Walk the page tables pointed to by @pgd to find the PTE corresponding to the + * address @addr. If page tables don't exist for @addr, they will be created + * from the MMU cache if @cache is not NULL. + * + * Returns: Pointer to pte_t corresponding to @addr. + * NULL if a page table doesn't exist for @addr and !@cache. + * NULL if a page table allocation failed. + */ +static pte_t *kvm_mips_walk_pgd(pgd_t *pgd, struct kvm_mmu_memory_cache *cache, + unsigned long addr) +{ + pud_t *pud; + pmd_t *pmd; + + pgd += pgd_index(addr); + if (pgd_none(*pgd)) { + /* Not used on MIPS yet */ + BUG(); + return NULL; + } + pud = pud_offset(pgd, addr); + if (pud_none(*pud)) { + pmd_t *new_pmd; + + if (!cache) + return NULL; + new_pmd = mmu_memory_cache_alloc(cache); + pmd_init((unsigned long)new_pmd, + (unsigned long)invalid_pte_table); + pud_populate(NULL, pud, new_pmd); + } + pmd = pmd_offset(pud, addr); + if (pmd_none(*pmd)) { + pte_t *new_pte; + + if (!cache) + return NULL; + new_pte = mmu_memory_cache_alloc(cache); + clear_page(new_pte); + pmd_populate_kernel(NULL, pmd, new_pte); + } + return pte_offset(pmd, addr); +} + +/* Caller must hold kvm->mm_lock */ +static pte_t *kvm_mips_pte_for_gpa(struct kvm *kvm, + struct kvm_mmu_memory_cache *cache, + unsigned long addr) +{ + return kvm_mips_walk_pgd(kvm->arch.gpa_mm.pgd, cache, addr); +} + +/* + * kvm_mips_flush_gpa_{pte,pmd,pud,pgd,pt}. + * Flush a range of guest physical address space from the VM's GPA page tables. + */ + +static bool kvm_mips_flush_gpa_pte(pte_t *pte, unsigned long start_gpa, + unsigned long end_gpa) +{ + int i_min = __pte_offset(start_gpa); + int i_max = __pte_offset(end_gpa); + bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PTE - 1); + int i; + + for (i = i_min; i <= i_max; ++i) { + if (!pte_present(pte[i])) + continue; + + set_pte(pte + i, __pte(0)); + } + return safe_to_remove; +} + +static bool kvm_mips_flush_gpa_pmd(pmd_t *pmd, unsigned long start_gpa, + unsigned long end_gpa) +{ + pte_t *pte; + unsigned long end = ~0ul; + int i_min = __pmd_offset(start_gpa); + int i_max = __pmd_offset(end_gpa); + bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PMD - 1); + int i; + + for (i = i_min; i <= i_max; ++i, start_gpa = 0) { + if (!pmd_present(pmd[i])) + continue; + + pte = pte_offset(pmd + i, 0); + if (i == i_max) + end = end_gpa; + + if (kvm_mips_flush_gpa_pte(pte, start_gpa, end)) { + pmd_clear(pmd + i); + pte_free_kernel(NULL, pte); + } else { + safe_to_remove = false; + } + } + return safe_to_remove; +} + +static bool kvm_mips_flush_gpa_pud(pud_t *pud, unsigned long start_gpa, + unsigned long end_gpa) +{ + pmd_t *pmd; + unsigned long end = ~0ul; + int i_min = __pud_offset(start_gpa); + int i_max = __pud_offset(end_gpa); + bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PUD - 1); + int i; + + for (i = i_min; i <= i_max; ++i, start_gpa = 0) { + if (!pud_present(pud[i])) + continue; + + pmd = pmd_offset(pud + i, 0); + if (i == i_max) + end = end_gpa; + + if (kvm_mips_flush_gpa_pmd(pmd, start_gpa, end)) { + pud_clear(pud + i); + pmd_free(NULL, pmd); + } else { + safe_to_remove = false; + } + } + return safe_to_remove; +} + +static bool kvm_mips_flush_gpa_pgd(pgd_t *pgd, unsigned long start_gpa, + unsigned long end_gpa) +{ + pud_t *pud; + unsigned long end = ~0ul; + int i_min = pgd_index(start_gpa); + int i_max = pgd_index(end_gpa); + bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PGD - 1); + int i; + + for (i = i_min; i <= i_max; ++i, start_gpa = 0) { + if (!pgd_present(pgd[i])) + continue; + + pud = pud_offset(pgd + i, 0); + if (i == i_max) + end = end_gpa; + + if (kvm_mips_flush_gpa_pud(pud, start_gpa, end)) { + pgd_clear(pgd + i); + pud_free(NULL, pud); + } else { + safe_to_remove = false; + } + } + return safe_to_remove; +} + +/** + * kvm_mips_flush_gpa_pt() - Flush a range of guest physical addresses. + * @kvm: KVM pointer. + * @start_gfn: Guest frame number of first page in GPA range to flush. + * @end_gfn: Guest frame number of last page in GPA range to flush. + * + * Flushes a range of GPA mappings from the GPA page tables. + * + * The caller must hold the @kvm->mmu_lock spinlock. + * + * Returns: Whether its safe to remove the top level page directory because + * all lower levels have been removed. + */ +bool kvm_mips_flush_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn) +{ + return kvm_mips_flush_gpa_pgd(kvm->arch.gpa_mm.pgd, + start_gfn << PAGE_SHIFT, + end_gfn << PAGE_SHIFT); +} + +#define BUILD_PTE_RANGE_OP(name, op) \ +static int kvm_mips_##name##_pte(pte_t *pte, unsigned long start, \ + unsigned long end) \ +{ \ + int ret = 0; \ + int i_min = __pte_offset(start); \ + int i_max = __pte_offset(end); \ + int i; \ + pte_t old, new; \ + \ + for (i = i_min; i <= i_max; ++i) { \ + if (!pte_present(pte[i])) \ + continue; \ + \ + old = pte[i]; \ + new = op(old); \ + if (pte_val(new) == pte_val(old)) \ + continue; \ + set_pte(pte + i, new); \ + ret = 1; \ + } \ + return ret; \ +} \ + \ +/* returns true if anything was done */ \ +static int kvm_mips_##name##_pmd(pmd_t *pmd, unsigned long start, \ + unsigned long end) \ +{ \ + int ret = 0; \ + pte_t *pte; \ + unsigned long cur_end = ~0ul; \ + int i_min = __pmd_offset(start); \ + int i_max = __pmd_offset(end); \ + int i; \ + \ + for (i = i_min; i <= i_max; ++i, start = 0) { \ + if (!pmd_present(pmd[i])) \ + continue; \ + \ + pte = pte_offset(pmd + i, 0); \ + if (i == i_max) \ + cur_end = end; \ + \ + ret |= kvm_mips_##name##_pte(pte, start, cur_end); \ + } \ + return ret; \ +} \ + \ +static int kvm_mips_##name##_pud(pud_t *pud, unsigned long start, \ + unsigned long end) \ +{ \ + int ret = 0; \ + pmd_t *pmd; \ + unsigned long cur_end = ~0ul; \ + int i_min = __pud_offset(start); \ + int i_max = __pud_offset(end); \ + int i; \ + \ + for (i = i_min; i <= i_max; ++i, start = 0) { \ + if (!pud_present(pud[i])) \ + continue; \ + \ + pmd = pmd_offset(pud + i, 0); \ + if (i == i_max) \ + cur_end = end; \ + \ + ret |= kvm_mips_##name##_pmd(pmd, start, cur_end); \ + } \ + return ret; \ +} \ + \ +static int kvm_mips_##name##_pgd(pgd_t *pgd, unsigned long start, \ + unsigned long end) \ +{ \ + int ret = 0; \ + pud_t *pud; \ + unsigned long cur_end = ~0ul; \ + int i_min = pgd_index(start); \ + int i_max = pgd_index(end); \ + int i; \ + \ + for (i = i_min; i <= i_max; ++i, start = 0) { \ + if (!pgd_present(pgd[i])) \ + continue; \ + \ + pud = pud_offset(pgd + i, 0); \ + if (i == i_max) \ + cur_end = end; \ + \ + ret |= kvm_mips_##name##_pud(pud, start, cur_end); \ + } \ + return ret; \ +} + +/* + * kvm_mips_mkclean_gpa_pt. + * Mark a range of guest physical address space clean (writes fault) in the VM's + * GPA page table to allow dirty page tracking. + */ - if (kvm->arch.guest_pmap[gfn] != KVM_INVALID_PAGE) +BUILD_PTE_RANGE_OP(mkclean, pte_mkclean) + +/** + * kvm_mips_mkclean_gpa_pt() - Make a range of guest physical addresses clean. + * @kvm: KVM pointer. + * @start_gfn: Guest frame number of first page in GPA range to flush. + * @end_gfn: Guest frame number of last page in GPA range to flush. + * + * Make a range of GPA mappings clean so that guest writes will fault and + * trigger dirty page logging. + * + * The caller must hold the @kvm->mmu_lock spinlock. + * + * Returns: Whether any GPA mappings were modified, which would require + * derived mappings (GVA page tables & TLB enties) to be + * invalidated. + */ +int kvm_mips_mkclean_gpa_pt(struct kvm *kvm, gfn_t start_gfn, gfn_t end_gfn) +{ + return kvm_mips_mkclean_pgd(kvm->arch.gpa_mm.pgd, + start_gfn << PAGE_SHIFT, + end_gfn << PAGE_SHIFT); +} + +/** + * kvm_arch_mmu_enable_log_dirty_pt_masked() - write protect dirty pages + * @kvm: The KVM pointer + * @slot: The memory slot associated with mask + * @gfn_offset: The gfn offset in memory slot + * @mask: The mask of dirty pages at offset 'gfn_offset' in this memory + * slot to be write protected + * + * Walks bits set in mask write protects the associated pte's. Caller must + * acquire @kvm->mmu_lock. + */ +void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, + struct kvm_memory_slot *slot, + gfn_t gfn_offset, unsigned long mask) +{ + gfn_t base_gfn = slot->base_gfn + gfn_offset; + gfn_t start = base_gfn + __ffs(mask); + gfn_t end = base_gfn + __fls(mask); + + kvm_mips_mkclean_gpa_pt(kvm, start, end); +} + +/* + * kvm_mips_mkold_gpa_pt. + * Mark a range of guest physical address space old (all accesses fault) in the + * VM's GPA page table to allow detection of commonly used pages. + */ + +BUILD_PTE_RANGE_OP(mkold, pte_mkold) + +static int kvm_mips_mkold_gpa_pt(struct kvm *kvm, gfn_t start_gfn, + gfn_t end_gfn) +{ + return kvm_mips_mkold_pgd(kvm->arch.gpa_mm.pgd, + start_gfn << PAGE_SHIFT, + end_gfn << PAGE_SHIFT); +} + +static int handle_hva_to_gpa(struct kvm *kvm, + unsigned long start, + unsigned long end, + int (*handler)(struct kvm *kvm, gfn_t gfn, + gpa_t gfn_end, + struct kvm_memory_slot *memslot, + void *data), + void *data) +{ + struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; + int ret = 0; + + slots = kvm_memslots(kvm); + + /* we only care about the pages that the guest sees */ + kvm_for_each_memslot(memslot, slots) { + unsigned long hva_start, hva_end; + gfn_t gfn, gfn_end; + + hva_start = max(start, memslot->userspace_addr); + hva_end = min(end, memslot->userspace_addr + + (memslot->npages << PAGE_SHIFT)); + if (hva_start >= hva_end) + continue; + + /* + * {gfn(page) | page intersects with [hva_start, hva_end)} = + * {gfn_start, gfn_start+1, ..., gfn_end-1}. + */ + gfn = hva_to_gfn_memslot(hva_start, memslot); + gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); + + ret |= handler(kvm, gfn, gfn_end, memslot, data); + } + + return ret; +} + + +static int kvm_unmap_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end, + struct kvm_memory_slot *memslot, void *data) +{ + kvm_mips_flush_gpa_pt(kvm, gfn, gfn_end); + return 1; +} + +int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) +{ + unsigned long end = hva + PAGE_SIZE; + + handle_hva_to_gpa(kvm, hva, end, &kvm_unmap_hva_handler, NULL); + + kvm_mips_callbacks->flush_shadow_all(kvm); + return 0; +} + +int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) +{ + handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL); + + kvm_mips_callbacks->flush_shadow_all(kvm); + return 0; +} + +static int kvm_set_spte_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end, + struct kvm_memory_slot *memslot, void *data) +{ + gpa_t gpa = gfn << PAGE_SHIFT; + pte_t hva_pte = *(pte_t *)data; + pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa); + pte_t old_pte; + + if (!gpa_pte) + return 0; + + /* Mapping may need adjusting depending on memslot flags */ + old_pte = *gpa_pte; + if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES && !pte_dirty(old_pte)) + hva_pte = pte_mkclean(hva_pte); + else if (memslot->flags & KVM_MEM_READONLY) + hva_pte = pte_wrprotect(hva_pte); + + set_pte(gpa_pte, hva_pte); + + /* Replacing an absent or old page doesn't need flushes */ + if (!pte_present(old_pte) || !pte_young(old_pte)) return 0; + /* Pages swapped, aged, moved, or cleaned require flushes */ + return !pte_present(hva_pte) || + !pte_young(hva_pte) || + pte_pfn(old_pte) != pte_pfn(hva_pte) || + (pte_dirty(old_pte) && !pte_dirty(hva_pte)); +} + +void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) +{ + unsigned long end = hva + PAGE_SIZE; + int ret; + + ret = handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &pte); + if (ret) + kvm_mips_callbacks->flush_shadow_all(kvm); +} + +static int kvm_age_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end, + struct kvm_memory_slot *memslot, void *data) +{ + return kvm_mips_mkold_gpa_pt(kvm, gfn, gfn_end); +} + +static int kvm_test_age_hva_handler(struct kvm *kvm, gfn_t gfn, gfn_t gfn_end, + struct kvm_memory_slot *memslot, void *data) +{ + gpa_t gpa = gfn << PAGE_SHIFT; + pte_t *gpa_pte = kvm_mips_pte_for_gpa(kvm, NULL, gpa); + + if (!gpa_pte) + return 0; + return pte_young(*gpa_pte); +} + +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) +{ + return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL); +} + +int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) +{ + return handle_hva_to_gpa(kvm, hva, hva, kvm_test_age_hva_handler, NULL); +} + +/** + * _kvm_mips_map_page_fast() - Fast path GPA fault handler. + * @vcpu: VCPU pointer. + * @gpa: Guest physical address of fault. + * @write_fault: Whether the fault was due to a write. + * @out_entry: New PTE for @gpa (written on success unless NULL). + * @out_buddy: New PTE for @gpa's buddy (written on success unless + * NULL). + * + * Perform fast path GPA fault handling, doing all that can be done without + * calling into KVM. This handles marking old pages young (for idle page + * tracking), and dirtying of clean pages (for dirty page logging). + * + * Returns: 0 on success, in which case we can update derived mappings and + * resume guest execution. + * -EFAULT on failure due to absent GPA mapping or write to + * read-only page, in which case KVM must be consulted. + */ +static int _kvm_mips_map_page_fast(struct kvm_vcpu *vcpu, unsigned long gpa, + bool write_fault, + pte_t *out_entry, pte_t *out_buddy) +{ + struct kvm *kvm = vcpu->kvm; + gfn_t gfn = gpa >> PAGE_SHIFT; + pte_t *ptep; + kvm_pfn_t pfn = 0; /* silence bogus GCC warning */ + bool pfn_valid = false; + int ret = 0; + + spin_lock(&kvm->mmu_lock); + + /* Fast path - just check GPA page table for an existing entry */ + ptep = kvm_mips_pte_for_gpa(kvm, NULL, gpa); + if (!ptep || !pte_present(*ptep)) { + ret = -EFAULT; + goto out; + } + + /* Track access to pages marked old */ + if (!pte_young(*ptep)) { + set_pte(ptep, pte_mkyoung(*ptep)); + pfn = pte_pfn(*ptep); + pfn_valid = true; + /* call kvm_set_pfn_accessed() after unlock */ + } + if (write_fault && !pte_dirty(*ptep)) { + if (!pte_write(*ptep)) { + ret = -EFAULT; + goto out; + } + + /* Track dirtying of writeable pages */ + set_pte(ptep, pte_mkdirty(*ptep)); + pfn = pte_pfn(*ptep); + mark_page_dirty(kvm, gfn); + kvm_set_pfn_dirty(pfn); + } + + if (out_entry) + *out_entry = *ptep; + if (out_buddy) + *out_buddy = *ptep_buddy(ptep); + +out: + spin_unlock(&kvm->mmu_lock); + if (pfn_valid) + kvm_set_pfn_accessed(pfn); + return ret; +} + +/** + * kvm_mips_map_page() - Map a guest physical page. + * @vcpu: VCPU pointer. + * @gpa: Guest physical address of fault. + * @write_fault: Whether the fault was due to a write. + * @out_entry: New PTE for @gpa (written on success unless NULL). + * @out_buddy: New PTE for @gpa's buddy (written on success unless + * NULL). + * + * Handle GPA faults by creating a new GPA mapping (or updating an existing + * one). + * + * This takes care of marking pages young or dirty (idle/dirty page tracking), + * asking KVM for the corresponding PFN, and creating a mapping in the GPA page + * tables. Derived mappings (GVA page tables and TLBs) must be handled by the + * caller. + * + * Returns: 0 on success, in which case the caller may use the @out_entry + * and @out_buddy PTEs to update derived mappings and resume guest + * execution. + * -EFAULT if there is no memory region at @gpa or a write was + * attempted to a read-only memory region. This is usually handled + * as an MMIO access. + */ +static int kvm_mips_map_page(struct kvm_vcpu *vcpu, unsigned long gpa, + bool write_fault, + pte_t *out_entry, pte_t *out_buddy) +{ + struct kvm *kvm = vcpu->kvm; + struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; + gfn_t gfn = gpa >> PAGE_SHIFT; + int srcu_idx, err; + kvm_pfn_t pfn; + pte_t *ptep, entry, old_pte; + bool writeable; + unsigned long prot_bits; + unsigned long mmu_seq; + + /* Try the fast path to handle old / clean pages */ srcu_idx = srcu_read_lock(&kvm->srcu); - pfn = gfn_to_pfn(kvm, gfn); + err = _kvm_mips_map_page_fast(vcpu, gpa, write_fault, out_entry, + out_buddy); + if (!err) + goto out; + /* We need a minimum of cached pages ready for page table creation */ + err = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES, + KVM_NR_MEM_OBJS); + if (err) + goto out; + +retry: + /* + * Used to check for invalidations in progress, of the pfn that is + * returned by pfn_to_pfn_prot below. + */ + mmu_seq = kvm->mmu_notifier_seq; + /* + * Ensure the read of mmu_notifier_seq isn't reordered with PTE reads in + * gfn_to_pfn_prot() (which calls get_user_pages()), so that we don't + * risk the page we get a reference to getting unmapped before we have a + * chance to grab the mmu_lock without mmu_notifier_retry() noticing. + * + * This smp_rmb() pairs with the effective smp_wmb() of the combination + * of the pte_unmap_unlock() after the PTE is zapped, and the + * spin_lock() in kvm_mmu_notifier_invalidate_<page|range_end>() before + * mmu_notifier_seq is incremented. + */ + smp_rmb(); + + /* Slow path - ask KVM core whether we can access this GPA */ + pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writeable); if (is_error_noslot_pfn(pfn)) { - kvm_err("Couldn't get pfn for gfn %#llx!\n", gfn); err = -EFAULT; goto out; } - kvm->arch.guest_pmap[gfn] = pfn; + spin_lock(&kvm->mmu_lock); + /* Check if an invalidation has taken place since we got pfn */ + if (mmu_notifier_retry(kvm, mmu_seq)) { + /* + * This can happen when mappings are changed asynchronously, but + * also synchronously if a COW is triggered by + * gfn_to_pfn_prot(). + */ + spin_unlock(&kvm->mmu_lock); + kvm_release_pfn_clean(pfn); + goto retry; + } + + /* Ensure page tables are allocated */ + ptep = kvm_mips_pte_for_gpa(kvm, memcache, gpa); + + /* Set up the PTE */ + prot_bits = _PAGE_PRESENT | __READABLE | _page_cachable_default; + if (writeable) { + prot_bits |= _PAGE_WRITE; + if (write_fault) { + prot_bits |= __WRITEABLE; + mark_page_dirty(kvm, gfn); + kvm_set_pfn_dirty(pfn); + } + } + entry = pfn_pte(pfn, __pgprot(prot_bits)); + + /* Write the PTE */ + old_pte = *ptep; + set_pte(ptep, entry); + + err = 0; + if (out_entry) + *out_entry = *ptep; + if (out_buddy) + *out_buddy = *ptep_buddy(ptep); + + spin_unlock(&kvm->mmu_lock); + kvm_release_pfn_clean(pfn); + kvm_set_pfn_accessed(pfn); out: srcu_read_unlock(&kvm->srcu, srcu_idx); return err; } -/* Translate guest KSEG0 addresses to Host PA */ -unsigned long kvm_mips_translate_guest_kseg0_to_hpa(struct kvm_vcpu *vcpu, - unsigned long gva) +static pte_t *kvm_trap_emul_pte_for_gva(struct kvm_vcpu *vcpu, + unsigned long addr) { - gfn_t gfn; - unsigned long offset = gva & ~PAGE_MASK; - struct kvm *kvm = vcpu->kvm; + struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; + pgd_t *pgdp; + int ret; + + /* We need a minimum of cached pages ready for page table creation */ + ret = mmu_topup_memory_cache(memcache, KVM_MMU_CACHE_MIN_PAGES, + KVM_NR_MEM_OBJS); + if (ret) + return NULL; + + if (KVM_GUEST_KERNEL_MODE(vcpu)) + pgdp = vcpu->arch.guest_kernel_mm.pgd; + else + pgdp = vcpu->arch.guest_user_mm.pgd; + + return kvm_mips_walk_pgd(pgdp, memcache, addr); +} - if (KVM_GUEST_KSEGX(gva) != KVM_GUEST_KSEG0) { - kvm_err("%s/%p: Invalid gva: %#lx\n", __func__, - __builtin_return_address(0), gva); - return KVM_INVALID_PAGE; +void kvm_trap_emul_invalidate_gva(struct kvm_vcpu *vcpu, unsigned long addr, + bool user) +{ + pgd_t *pgdp; + pte_t *ptep; + + addr &= PAGE_MASK << 1; + + pgdp = vcpu->arch.guest_kernel_mm.pgd; + ptep = kvm_mips_walk_pgd(pgdp, NULL, addr); + if (ptep) { + ptep[0] = pfn_pte(0, __pgprot(0)); + ptep[1] = pfn_pte(0, __pgprot(0)); + } + + if (user) { + pgdp = vcpu->arch.guest_user_mm.pgd; + ptep = kvm_mips_walk_pgd(pgdp, NULL, addr); + if (ptep) { + ptep[0] = pfn_pte(0, __pgprot(0)); + ptep[1] = pfn_pte(0, __pgprot(0)); + } } +} - gfn = (KVM_GUEST_CPHYSADDR(gva) >> PAGE_SHIFT); +/* + * kvm_mips_flush_gva_{pte,pmd,pud,pgd,pt}. + * Flush a range of guest physical address space from the VM's GPA page tables. + */ - if (gfn >= kvm->arch.guest_pmap_npages) { - kvm_err("%s: Invalid gfn: %#llx, GVA: %#lx\n", __func__, gfn, - gva); - return KVM_INVALID_PAGE; +static bool kvm_mips_flush_gva_pte(pte_t *pte, unsigned long start_gva, + unsigned long end_gva) +{ + int i_min = __pte_offset(start_gva); + int i_max = __pte_offset(end_gva); + bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PTE - 1); + int i; + + /* + * There's no freeing to do, so there's no point clearing individual + * entries unless only part of the last level page table needs flushing. + */ + if (safe_to_remove) + return true; + + for (i = i_min; i <= i_max; ++i) { + if (!pte_present(pte[i])) + continue; + + set_pte(pte + i, __pte(0)); } + return false; +} - if (kvm_mips_map_page(vcpu->kvm, gfn) < 0) - return KVM_INVALID_ADDR; +static bool kvm_mips_flush_gva_pmd(pmd_t *pmd, unsigned long start_gva, + unsigned long end_gva) +{ + pte_t *pte; + unsigned long end = ~0ul; + int i_min = __pmd_offset(start_gva); + int i_max = __pmd_offset(end_gva); + bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PMD - 1); + int i; + + for (i = i_min; i <= i_max; ++i, start_gva = 0) { + if (!pmd_present(pmd[i])) + continue; + + pte = pte_offset(pmd + i, 0); + if (i == i_max) + end = end_gva; + + if (kvm_mips_flush_gva_pte(pte, start_gva, end)) { + pmd_clear(pmd + i); + pte_free_kernel(NULL, pte); + } else { + safe_to_remove = false; + } + } + return safe_to_remove; +} - return (kvm->arch.guest_pmap[gfn] << PAGE_SHIFT) + offset; +static bool kvm_mips_flush_gva_pud(pud_t *pud, unsigned long start_gva, + unsigned long end_gva) +{ + pmd_t *pmd; + unsigned long end = ~0ul; + int i_min = __pud_offset(start_gva); + int i_max = __pud_offset(end_gva); + bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PUD - 1); + int i; + + for (i = i_min; i <= i_max; ++i, start_gva = 0) { + if (!pud_present(pud[i])) + continue; + + pmd = pmd_offset(pud + i, 0); + if (i == i_max) + end = end_gva; + + if (kvm_mips_flush_gva_pmd(pmd, start_gva, end)) { + pud_clear(pud + i); + pmd_free(NULL, pmd); + } else { + safe_to_remove = false; + } + } + return safe_to_remove; +} + +static bool kvm_mips_flush_gva_pgd(pgd_t *pgd, unsigned long start_gva, + unsigned long end_gva) +{ + pud_t *pud; + unsigned long end = ~0ul; + int i_min = pgd_index(start_gva); + int i_max = pgd_index(end_gva); + bool safe_to_remove = (i_min == 0 && i_max == PTRS_PER_PGD - 1); + int i; + + for (i = i_min; i <= i_max; ++i, start_gva = 0) { + if (!pgd_present(pgd[i])) + continue; + + pud = pud_offset(pgd + i, 0); + if (i == i_max) + end = end_gva; + + if (kvm_mips_flush_gva_pud(pud, start_gva, end)) { + pgd_clear(pgd + i); + pud_free(NULL, pud); + } else { + safe_to_remove = false; + } + } + return safe_to_remove; +} + +void kvm_mips_flush_gva_pt(pgd_t *pgd, enum kvm_mips_flush flags) +{ + if (flags & KMF_GPA) { + /* all of guest virtual address space could be affected */ + if (flags & KMF_KERN) + /* useg, kseg0, seg2/3 */ + kvm_mips_flush_gva_pgd(pgd, 0, 0x7fffffff); + else + /* useg */ + kvm_mips_flush_gva_pgd(pgd, 0, 0x3fffffff); + } else { + /* useg */ + kvm_mips_flush_gva_pgd(pgd, 0, 0x3fffffff); + + /* kseg2/3 */ + if (flags & KMF_KERN) + kvm_mips_flush_gva_pgd(pgd, 0x60000000, 0x7fffffff); + } +} + +static pte_t kvm_mips_gpa_pte_to_gva_unmapped(pte_t pte) +{ + /* + * Don't leak writeable but clean entries from GPA page tables. We don't + * want the normal Linux tlbmod handler to handle dirtying when KVM + * accesses guest memory. + */ + if (!pte_dirty(pte)) + pte = pte_wrprotect(pte); + + return pte; +} + +static pte_t kvm_mips_gpa_pte_to_gva_mapped(pte_t pte, long entrylo) +{ + /* Guest EntryLo overrides host EntryLo */ + if (!(entrylo & ENTRYLO_D)) + pte = pte_mkclean(pte); + + return kvm_mips_gpa_pte_to_gva_unmapped(pte); } /* XXXKYMA: Must be called with interrupts disabled */ int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr, - struct kvm_vcpu *vcpu) + struct kvm_vcpu *vcpu, + bool write_fault) { - gfn_t gfn; - kvm_pfn_t pfn0, pfn1; - unsigned long vaddr = 0; - unsigned long entryhi = 0, entrylo0 = 0, entrylo1 = 0; - struct kvm *kvm = vcpu->kvm; - const int flush_dcache_mask = 0; - int ret; + unsigned long gpa; + pte_t pte_gpa[2], *ptep_gva; + int idx; if (KVM_GUEST_KSEGX(badvaddr) != KVM_GUEST_KSEG0) { kvm_err("%s: Invalid BadVaddr: %#lx\n", __func__, badvaddr); @@ -98,49 +1007,39 @@ int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr, return -1; } - gfn = (KVM_GUEST_CPHYSADDR(badvaddr) >> PAGE_SHIFT); - if ((gfn | 1) >= kvm->arch.guest_pmap_npages) { - kvm_err("%s: Invalid gfn: %#llx, BadVaddr: %#lx\n", __func__, - gfn, badvaddr); - kvm_mips_dump_host_tlbs(); + /* Get the GPA page table entry */ + gpa = KVM_GUEST_CPHYSADDR(badvaddr); + idx = (badvaddr >> PAGE_SHIFT) & 1; + if (kvm_mips_map_page(vcpu, gpa, write_fault, &pte_gpa[idx], + &pte_gpa[!idx]) < 0) return -1; - } - vaddr = badvaddr & (PAGE_MASK << 1); - if (kvm_mips_map_page(vcpu->kvm, gfn) < 0) + /* Get the GVA page table entry */ + ptep_gva = kvm_trap_emul_pte_for_gva(vcpu, badvaddr & ~PAGE_SIZE); + if (!ptep_gva) { + kvm_err("No ptep for gva %lx\n", badvaddr); return -1; + } - if (kvm_mips_map_page(vcpu->kvm, gfn ^ 0x1) < 0) - return -1; - - pfn0 = kvm->arch.guest_pmap[gfn & ~0x1]; - pfn1 = kvm->arch.guest_pmap[gfn | 0x1]; - - entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) | - ((_page_cachable_default >> _CACHE_SHIFT) << ENTRYLO_C_SHIFT) | - ENTRYLO_D | ENTRYLO_V; - entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) | - ((_page_cachable_default >> _CACHE_SHIFT) << ENTRYLO_C_SHIFT) | - ENTRYLO_D | ENTRYLO_V; - - preempt_disable(); - entryhi = (vaddr | kvm_mips_get_kernel_asid(vcpu)); - ret = kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1, - flush_dcache_mask); - preempt_enable(); + /* Copy a pair of entries from GPA page table to GVA page table */ + ptep_gva[0] = kvm_mips_gpa_pte_to_gva_unmapped(pte_gpa[0]); + ptep_gva[1] = kvm_mips_gpa_pte_to_gva_unmapped(pte_gpa[1]); - return ret; + /* Invalidate this entry in the TLB, guest kernel ASID only */ + kvm_mips_host_tlb_inv(vcpu, badvaddr, false, true); + return 0; } int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu, - struct kvm_mips_tlb *tlb) + struct kvm_mips_tlb *tlb, + unsigned long gva, + bool write_fault) { - unsigned long entryhi = 0, entrylo0 = 0, entrylo1 = 0; struct kvm *kvm = vcpu->kvm; - kvm_pfn_t pfn0, pfn1; - gfn_t gfn0, gfn1; long tlb_lo[2]; - int ret; + pte_t pte_gpa[2], *ptep_buddy, *ptep_gva; + unsigned int idx = TLB_LO_IDX(*tlb, gva); + bool kernel = KVM_GUEST_KERNEL_MODE(vcpu); tlb_lo[0] = tlb->tlb_lo[0]; tlb_lo[1] = tlb->tlb_lo[1]; @@ -149,70 +1048,64 @@ int kvm_mips_handle_mapped_seg_tlb_fault(struct kvm_vcpu *vcpu, * The commpage address must not be mapped to anything else if the guest * TLB contains entries nearby, or commpage accesses will break. */ - if (!((tlb->tlb_hi ^ KVM_GUEST_COMMPAGE_ADDR) & - VPN2_MASK & (PAGE_MASK << 1))) - tlb_lo[(KVM_GUEST_COMMPAGE_ADDR >> PAGE_SHIFT) & 1] = 0; - - gfn0 = mips3_tlbpfn_to_paddr(tlb_lo[0]) >> PAGE_SHIFT; - gfn1 = mips3_tlbpfn_to_paddr(tlb_lo[1]) >> PAGE_SHIFT; - if (gfn0 >= kvm->arch.guest_pmap_npages || - gfn1 >= kvm->arch.guest_pmap_npages) { - kvm_err("%s: Invalid gfn: [%#llx, %#llx], EHi: %#lx\n", - __func__, gfn0, gfn1, tlb->tlb_hi); - kvm_mips_dump_guest_tlbs(vcpu); - return -1; - } + if (!((gva ^ KVM_GUEST_COMMPAGE_ADDR) & VPN2_MASK & (PAGE_MASK << 1))) + tlb_lo[TLB_LO_IDX(*tlb, KVM_GUEST_COMMPAGE_ADDR)] = 0; - if (kvm_mips_map_page(kvm, gfn0) < 0) + /* Get the GPA page table entry */ + if (kvm_mips_map_page(vcpu, mips3_tlbpfn_to_paddr(tlb_lo[idx]), + write_fault, &pte_gpa[idx], NULL) < 0) return -1; - if (kvm_mips_map_page(kvm, gfn1) < 0) + /* And its GVA buddy's GPA page table entry if it also exists */ + pte_gpa[!idx] = pfn_pte(0, __pgprot(0)); + if (tlb_lo[!idx] & ENTRYLO_V) { + spin_lock(&kvm->mmu_lock); + ptep_buddy = kvm_mips_pte_for_gpa(kvm, NULL, + mips3_tlbpfn_to_paddr(tlb_lo[!idx])); + if (ptep_buddy) + pte_gpa[!idx] = *ptep_buddy; + spin_unlock(&kvm->mmu_lock); + } + + /* Get the GVA page table entry pair */ + ptep_gva = kvm_trap_emul_pte_for_gva(vcpu, gva & ~PAGE_SIZE); + if (!ptep_gva) { + kvm_err("No ptep for gva %lx\n", gva); return -1; + } - pfn0 = kvm->arch.guest_pmap[gfn0]; - pfn1 = kvm->arch.guest_pmap[gfn1]; + /* Copy a pair of entries from GPA page table to GVA page table */ + ptep_gva[0] = kvm_mips_gpa_pte_to_gva_mapped(pte_gpa[0], tlb_lo[0]); + ptep_gva[1] = kvm_mips_gpa_pte_to_gva_mapped(pte_gpa[1], tlb_lo[1]); - /* Get attributes from the Guest TLB */ - entrylo0 = mips3_paddr_to_tlbpfn(pfn0 << PAGE_SHIFT) | - ((_page_cachable_default >> _CACHE_SHIFT) << ENTRYLO_C_SHIFT) | - (tlb_lo[0] & ENTRYLO_D) | - (tlb_lo[0] & ENTRYLO_V); - entrylo1 = mips3_paddr_to_tlbpfn(pfn1 << PAGE_SHIFT) | - ((_page_cachable_default >> _CACHE_SHIFT) << ENTRYLO_C_SHIFT) | - (tlb_lo[1] & ENTRYLO_D) | - (tlb_lo[1] & ENTRYLO_V); + /* Invalidate this entry in the TLB, current guest mode ASID only */ + kvm_mips_host_tlb_inv(vcpu, gva, !kernel, kernel); kvm_debug("@ %#lx tlb_lo0: 0x%08lx tlb_lo1: 0x%08lx\n", vcpu->arch.pc, tlb->tlb_lo[0], tlb->tlb_lo[1]); - preempt_disable(); - entryhi = (tlb->tlb_hi & VPN2_MASK) | (KVM_GUEST_KERNEL_MODE(vcpu) ? - kvm_mips_get_kernel_asid(vcpu) : - kvm_mips_get_user_asid(vcpu)); - ret = kvm_mips_host_tlb_write(vcpu, entryhi, entrylo0, entrylo1, - tlb->tlb_mask); - preempt_enable(); - - return ret; + return 0; } -void kvm_get_new_mmu_context(struct mm_struct *mm, unsigned long cpu, - struct kvm_vcpu *vcpu) +int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr, + struct kvm_vcpu *vcpu) { - unsigned long asid = asid_cache(cpu); - - asid += cpu_asid_inc(); - if (!(asid & cpu_asid_mask(&cpu_data[cpu]))) { - if (cpu_has_vtag_icache) - flush_icache_all(); - - kvm_local_flush_tlb_all(); /* start new asid cycle */ + kvm_pfn_t pfn; + pte_t *ptep; - if (!asid) /* fix version if needed */ - asid = asid_first_version(cpu); + ptep = kvm_trap_emul_pte_for_gva(vcpu, badvaddr); + if (!ptep) { + kvm_err("No ptep for commpage %lx\n", badvaddr); + return -1; } - cpu_context(cpu, mm) = asid_cache(cpu) = asid; + pfn = PFN_DOWN(virt_to_phys(vcpu->arch.kseg0_commpage)); + /* Also set valid and dirty, so refill handler doesn't have to */ + *ptep = pte_mkyoung(pte_mkdirty(pfn_pte(pfn, PAGE_SHARED))); + + /* Invalidate this entry in the TLB, guest kernel ASID only */ + kvm_mips_host_tlb_inv(vcpu, badvaddr, false, true); + return 0; } /** @@ -235,42 +1128,13 @@ static void kvm_mips_migrate_count(struct kvm_vcpu *vcpu) /* Restore ASID once we are scheduled back after preemption */ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { - unsigned long asid_mask = cpu_asid_mask(&cpu_data[cpu]); unsigned long flags; - int newasid = 0; kvm_debug("%s: vcpu %p, cpu: %d\n", __func__, vcpu, cpu); - /* Allocate new kernel and user ASIDs if needed */ - local_irq_save(flags); - if ((vcpu->arch.guest_kernel_asid[cpu] ^ asid_cache(cpu)) & - asid_version_mask(cpu)) { - kvm_get_new_mmu_context(&vcpu->arch.guest_kernel_mm, cpu, vcpu); - vcpu->arch.guest_kernel_asid[cpu] = - vcpu->arch.guest_kernel_mm.context.asid[cpu]; - newasid++; - - kvm_debug("[%d]: cpu_context: %#lx\n", cpu, - cpu_context(cpu, current->mm)); - kvm_debug("[%d]: Allocated new ASID for Guest Kernel: %#x\n", - cpu, vcpu->arch.guest_kernel_asid[cpu]); - } - - if ((vcpu->arch.guest_user_asid[cpu] ^ asid_cache(cpu)) & - asid_version_mask(cpu)) { - kvm_get_new_mmu_context(&vcpu->arch.guest_user_mm, cpu, vcpu); - vcpu->arch.guest_user_asid[cpu] = - vcpu->arch.guest_user_mm.context.asid[cpu]; - newasid++; - - kvm_debug("[%d]: cpu_context: %#lx\n", cpu, - cpu_context(cpu, current->mm)); - kvm_debug("[%d]: Allocated new ASID for Guest User: %#x\n", cpu, - vcpu->arch.guest_user_asid[cpu]); - } - + vcpu->cpu = cpu; if (vcpu->arch.last_sched_cpu != cpu) { kvm_debug("[%d->%d]KVM VCPU[%d] switch\n", vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id); @@ -282,42 +1146,10 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_mips_migrate_count(vcpu); } - if (!newasid) { - /* - * If we preempted while the guest was executing, then reload - * the pre-empted ASID - */ - if (current->flags & PF_VCPU) { - write_c0_entryhi(vcpu->arch. - preempt_entryhi & asid_mask); - ehb(); - } - } else { - /* New ASIDs were allocated for the VM */ - - /* - * Were we in guest context? If so then the pre-empted ASID is - * no longer valid, we need to set it to what it should be based - * on the mode of the Guest (Kernel/User) - */ - if (current->flags & PF_VCPU) { - if (KVM_GUEST_KERNEL_MODE(vcpu)) - write_c0_entryhi(vcpu->arch. - guest_kernel_asid[cpu] & - asid_mask); - else - write_c0_entryhi(vcpu->arch. - guest_user_asid[cpu] & - asid_mask); - ehb(); - } - } - /* restore guest state to registers */ - kvm_mips_callbacks->vcpu_set_regs(vcpu); + kvm_mips_callbacks->vcpu_load(vcpu, cpu); local_irq_restore(flags); - } /* ASID can change if another task is scheduled during preemption */ @@ -329,75 +1161,90 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) local_irq_save(flags); cpu = smp_processor_id(); - - vcpu->arch.preempt_entryhi = read_c0_entryhi(); vcpu->arch.last_sched_cpu = cpu; + vcpu->cpu = -1; /* save guest state in registers */ - kvm_mips_callbacks->vcpu_get_regs(vcpu); - - if (((cpu_context(cpu, current->mm) ^ asid_cache(cpu)) & - asid_version_mask(cpu))) { - kvm_debug("%s: Dropping MMU Context: %#lx\n", __func__, - cpu_context(cpu, current->mm)); - drop_mmu_context(current->mm, cpu); - } - write_c0_entryhi(cpu_asid(cpu, current->mm)); - ehb(); + kvm_mips_callbacks->vcpu_put(vcpu, cpu); local_irq_restore(flags); } -u32 kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu) +/** + * kvm_trap_emul_gva_fault() - Safely attempt to handle a GVA access fault. + * @vcpu: Virtual CPU. + * @gva: Guest virtual address to be accessed. + * @write: True if write attempted (must be dirtied and made writable). + * + * Safely attempt to handle a GVA fault, mapping GVA pages if necessary, and + * dirtying the page if @write so that guest instructions can be modified. + * + * Returns: KVM_MIPS_MAPPED on success. + * KVM_MIPS_GVA if bad guest virtual address. + * KVM_MIPS_GPA if bad guest physical address. + * KVM_MIPS_TLB if guest TLB not present. + * KVM_MIPS_TLBINV if guest TLB present but not valid. + * KVM_MIPS_TLBMOD if guest TLB read only. + */ +enum kvm_mips_fault_result kvm_trap_emul_gva_fault(struct kvm_vcpu *vcpu, + unsigned long gva, + bool write) { struct mips_coproc *cop0 = vcpu->arch.cop0; - unsigned long paddr, flags, vpn2, asid; - unsigned long va = (unsigned long)opc; - void *vaddr; - u32 inst; + struct kvm_mips_tlb *tlb; int index; - if (KVM_GUEST_KSEGX(va) < KVM_GUEST_KSEG0 || - KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG23) { - local_irq_save(flags); - index = kvm_mips_host_tlb_lookup(vcpu, va); - if (index >= 0) { - inst = *(opc); - } else { - vpn2 = va & VPN2_MASK; - asid = kvm_read_c0_guest_entryhi(cop0) & - KVM_ENTRYHI_ASID; - index = kvm_mips_guest_tlb_lookup(vcpu, vpn2 | asid); - if (index < 0) { - kvm_err("%s: get_user_failed for %p, vcpu: %p, ASID: %#lx\n", - __func__, opc, vcpu, read_c0_entryhi()); - kvm_mips_dump_host_tlbs(); - kvm_mips_dump_guest_tlbs(vcpu); - local_irq_restore(flags); - return KVM_INVALID_INST; - } - if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, - &vcpu->arch.guest_tlb[index])) { - kvm_err("%s: handling mapped seg tlb fault failed for %p, index: %u, vcpu: %p, ASID: %#lx\n", - __func__, opc, index, vcpu, - read_c0_entryhi()); - kvm_mips_dump_guest_tlbs(vcpu); - local_irq_restore(flags); - return KVM_INVALID_INST; - } - inst = *(opc); - } - local_irq_restore(flags); - } else if (KVM_GUEST_KSEGX(va) == KVM_GUEST_KSEG0) { - paddr = kvm_mips_translate_guest_kseg0_to_hpa(vcpu, va); - vaddr = kmap_atomic(pfn_to_page(PHYS_PFN(paddr))); - vaddr += paddr & ~PAGE_MASK; - inst = *(u32 *)vaddr; - kunmap_atomic(vaddr); + if (KVM_GUEST_KSEGX(gva) == KVM_GUEST_KSEG0) { + if (kvm_mips_handle_kseg0_tlb_fault(gva, vcpu, write) < 0) + return KVM_MIPS_GPA; + } else if ((KVM_GUEST_KSEGX(gva) < KVM_GUEST_KSEG0) || + KVM_GUEST_KSEGX(gva) == KVM_GUEST_KSEG23) { + /* Address should be in the guest TLB */ + index = kvm_mips_guest_tlb_lookup(vcpu, (gva & VPN2_MASK) | + (kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID)); + if (index < 0) + return KVM_MIPS_TLB; + tlb = &vcpu->arch.guest_tlb[index]; + + /* Entry should be valid, and dirty for writes */ + if (!TLB_IS_VALID(*tlb, gva)) + return KVM_MIPS_TLBINV; + if (write && !TLB_IS_DIRTY(*tlb, gva)) + return KVM_MIPS_TLBMOD; + + if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, gva, write)) + return KVM_MIPS_GPA; } else { - kvm_err("%s: illegal address: %p\n", __func__, opc); - return KVM_INVALID_INST; + return KVM_MIPS_GVA; } - return inst; + return KVM_MIPS_MAPPED; +} + +int kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu, u32 *out) +{ + int err; + +retry: + kvm_trap_emul_gva_lockless_begin(vcpu); + err = get_user(*out, opc); + kvm_trap_emul_gva_lockless_end(vcpu); + + if (unlikely(err)) { + /* + * Try to handle the fault, maybe we just raced with a GVA + * invalidation. + */ + err = kvm_trap_emul_gva_fault(vcpu, (unsigned long)opc, + false); + if (unlikely(err)) { + kvm_err("%s: illegal address: %p\n", + __func__, opc); + return -EFAULT; + } + + /* Hopefully it'll work now */ + goto retry; + } + return 0; } diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c index 254377d..2819eb7 100644 --- a/arch/mips/kvm/tlb.c +++ b/arch/mips/kvm/tlb.c @@ -33,28 +33,20 @@ #define KVM_GUEST_PC_TLB 0 #define KVM_GUEST_SP_TLB 1 -atomic_t kvm_mips_instance; -EXPORT_SYMBOL_GPL(kvm_mips_instance); - static u32 kvm_mips_get_kernel_asid(struct kvm_vcpu *vcpu) { + struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm; int cpu = smp_processor_id(); - return vcpu->arch.guest_kernel_asid[cpu] & - cpu_asid_mask(&cpu_data[cpu]); + return cpu_asid(cpu, kern_mm); } static u32 kvm_mips_get_user_asid(struct kvm_vcpu *vcpu) { + struct mm_struct *user_mm = &vcpu->arch.guest_user_mm; int cpu = smp_processor_id(); - return vcpu->arch.guest_user_asid[cpu] & - cpu_asid_mask(&cpu_data[cpu]); -} - -inline u32 kvm_mips_get_commpage_asid(struct kvm_vcpu *vcpu) -{ - return vcpu->kvm->arch.commpage_tlb; + return cpu_asid(cpu, user_mm); } /* Structure defining an tlb entry data set. */ @@ -104,109 +96,6 @@ void kvm_mips_dump_guest_tlbs(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_mips_dump_guest_tlbs); -/* XXXKYMA: Must be called with interrupts disabled */ -/* set flush_dcache_mask == 0 if no dcache flush required */ -int kvm_mips_host_tlb_write(struct kvm_vcpu *vcpu, unsigned long entryhi, - unsigned long entrylo0, unsigned long entrylo1, - int flush_dcache_mask) -{ - unsigned long flags; - unsigned long old_entryhi; - int idx; - - local_irq_save(flags); - - old_entryhi = read_c0_entryhi(); - write_c0_entryhi(entryhi); - mtc0_tlbw_hazard(); - - tlb_probe(); - tlb_probe_hazard(); - idx = read_c0_index(); - - if (idx > current_cpu_data.tlbsize) { - kvm_err("%s: Invalid Index: %d\n", __func__, idx); - kvm_mips_dump_host_tlbs(); - local_irq_restore(flags); - return -1; - } - - write_c0_entrylo0(entrylo0); - write_c0_entrylo1(entrylo1); - mtc0_tlbw_hazard(); - - if (idx < 0) - tlb_write_random(); - else - tlb_write_indexed(); - tlbw_use_hazard(); - - kvm_debug("@ %#lx idx: %2d [entryhi(R): %#lx] entrylo0(R): 0x%08lx, entrylo1(R): 0x%08lx\n", - vcpu->arch.pc, idx, read_c0_entryhi(), - read_c0_entrylo0(), read_c0_entrylo1()); - - /* Flush D-cache */ - if (flush_dcache_mask) { - if (entrylo0 & ENTRYLO_V) { - ++vcpu->stat.flush_dcache_exits; - flush_data_cache_page((entryhi & VPN2_MASK) & - ~flush_dcache_mask); - } - if (entrylo1 & ENTRYLO_V) { - ++vcpu->stat.flush_dcache_exits; - flush_data_cache_page(((entryhi & VPN2_MASK) & - ~flush_dcache_mask) | - (0x1 << PAGE_SHIFT)); - } - } - - /* Restore old ASID */ - write_c0_entryhi(old_entryhi); - mtc0_tlbw_hazard(); - local_irq_restore(flags); - return 0; -} -EXPORT_SYMBOL_GPL(kvm_mips_host_tlb_write); - -int kvm_mips_handle_commpage_tlb_fault(unsigned long badvaddr, - struct kvm_vcpu *vcpu) -{ - kvm_pfn_t pfn; - unsigned long flags, old_entryhi = 0, vaddr = 0; - unsigned long entrylo[2] = { 0, 0 }; - unsigned int pair_idx; - - pfn = PFN_DOWN(virt_to_phys(vcpu->arch.kseg0_commpage)); - pair_idx = (badvaddr >> PAGE_SHIFT) & 1; - entrylo[pair_idx] = mips3_paddr_to_tlbpfn(pfn << PAGE_SHIFT) | - ((_page_cachable_default >> _CACHE_SHIFT) << ENTRYLO_C_SHIFT) | - ENTRYLO_D | ENTRYLO_V; - - local_irq_save(flags); - - old_entryhi = read_c0_entryhi(); - vaddr = badvaddr & (PAGE_MASK << 1); - write_c0_entryhi(vaddr | kvm_mips_get_kernel_asid(vcpu)); - write_c0_entrylo0(entrylo[0]); - write_c0_entrylo1(entrylo[1]); - write_c0_index(kvm_mips_get_commpage_asid(vcpu)); - mtc0_tlbw_hazard(); - tlb_write_indexed(); - tlbw_use_hazard(); - - kvm_debug("@ %#lx idx: %2d [entryhi(R): %#lx] entrylo0 (R): 0x%08lx, entrylo1(R): 0x%08lx\n", - vcpu->arch.pc, read_c0_index(), read_c0_entryhi(), - read_c0_entrylo0(), read_c0_entrylo1()); - - /* Restore old ASID */ - write_c0_entryhi(old_entryhi); - mtc0_tlbw_hazard(); - local_irq_restore(flags); - - return 0; -} -EXPORT_SYMBOL_GPL(kvm_mips_handle_commpage_tlb_fault); - int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long entryhi) { int i; @@ -228,51 +117,11 @@ int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long entryhi) } EXPORT_SYMBOL_GPL(kvm_mips_guest_tlb_lookup); -int kvm_mips_host_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long vaddr) -{ - unsigned long old_entryhi, flags; - int idx; - - local_irq_save(flags); - - old_entryhi = read_c0_entryhi(); - - if (KVM_GUEST_KERNEL_MODE(vcpu)) - write_c0_entryhi((vaddr & VPN2_MASK) | - kvm_mips_get_kernel_asid(vcpu)); - else { - write_c0_entryhi((vaddr & VPN2_MASK) | - kvm_mips_get_user_asid(vcpu)); - } - - mtc0_tlbw_hazard(); - - tlb_probe(); - tlb_probe_hazard(); - idx = read_c0_index(); - - /* Restore old ASID */ - write_c0_entryhi(old_entryhi); - mtc0_tlbw_hazard(); - - local_irq_restore(flags); - - kvm_debug("Host TLB lookup, %#lx, idx: %2d\n", vaddr, idx); - - return idx; -} -EXPORT_SYMBOL_GPL(kvm_mips_host_tlb_lookup); - -int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va) +static int _kvm_mips_host_tlb_inv(unsigned long entryhi) { int idx; - unsigned long flags, old_entryhi; - - local_irq_save(flags); - - old_entryhi = read_c0_entryhi(); - write_c0_entryhi((va & VPN2_MASK) | kvm_mips_get_user_asid(vcpu)); + write_c0_entryhi(entryhi); mtc0_tlbw_hazard(); tlb_probe(); @@ -282,7 +131,7 @@ int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va) if (idx >= current_cpu_data.tlbsize) BUG(); - if (idx > 0) { + if (idx >= 0) { write_c0_entryhi(UNIQUE_ENTRYHI(idx)); write_c0_entrylo0(0); write_c0_entrylo1(0); @@ -292,93 +141,75 @@ int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va) tlbw_use_hazard(); } - write_c0_entryhi(old_entryhi); - mtc0_tlbw_hazard(); - - local_irq_restore(flags); - - if (idx > 0) - kvm_debug("%s: Invalidated entryhi %#lx @ idx %d\n", __func__, - (va & VPN2_MASK) | kvm_mips_get_user_asid(vcpu), idx); - - return 0; + return idx; } -EXPORT_SYMBOL_GPL(kvm_mips_host_tlb_inv); -void kvm_mips_flush_host_tlb(int skip_kseg0) +int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va, + bool user, bool kernel) { - unsigned long flags; - unsigned long old_entryhi, entryhi; - unsigned long old_pagemask; - int entry = 0; - int maxentry = current_cpu_data.tlbsize; + int idx_user, idx_kernel; + unsigned long flags, old_entryhi; local_irq_save(flags); old_entryhi = read_c0_entryhi(); - old_pagemask = read_c0_pagemask(); - - /* Blast 'em all away. */ - for (entry = 0; entry < maxentry; entry++) { - write_c0_index(entry); - - if (skip_kseg0) { - mtc0_tlbr_hazard(); - tlb_read(); - tlb_read_hazard(); - - entryhi = read_c0_entryhi(); - /* Don't blow away guest kernel entries */ - if (KVM_GUEST_KSEGX(entryhi) == KVM_GUEST_KSEG0) - continue; - - write_c0_pagemask(old_pagemask); - } - - /* Make sure all entries differ. */ - write_c0_entryhi(UNIQUE_ENTRYHI(entry)); - write_c0_entrylo0(0); - write_c0_entrylo1(0); - mtc0_tlbw_hazard(); - - tlb_write_indexed(); - tlbw_use_hazard(); - } + if (user) + idx_user = _kvm_mips_host_tlb_inv((va & VPN2_MASK) | + kvm_mips_get_user_asid(vcpu)); + if (kernel) + idx_kernel = _kvm_mips_host_tlb_inv((va & VPN2_MASK) | + kvm_mips_get_kernel_asid(vcpu)); write_c0_entryhi(old_entryhi); - write_c0_pagemask(old_pagemask); mtc0_tlbw_hazard(); local_irq_restore(flags); + + if (user && idx_user >= 0) + kvm_debug("%s: Invalidated guest user entryhi %#lx @ idx %d\n", + __func__, (va & VPN2_MASK) | + kvm_mips_get_user_asid(vcpu), idx_user); + if (kernel && idx_kernel >= 0) + kvm_debug("%s: Invalidated guest kernel entryhi %#lx @ idx %d\n", + __func__, (va & VPN2_MASK) | + kvm_mips_get_kernel_asid(vcpu), idx_kernel); + + return 0; } -EXPORT_SYMBOL_GPL(kvm_mips_flush_host_tlb); +EXPORT_SYMBOL_GPL(kvm_mips_host_tlb_inv); -void kvm_local_flush_tlb_all(void) +/** + * kvm_mips_suspend_mm() - Suspend the active mm. + * @cpu The CPU we're running on. + * + * Suspend the active_mm, ready for a switch to a KVM guest virtual address + * space. This is left active for the duration of guest context, including time + * with interrupts enabled, so we need to be careful not to confuse e.g. cache + * management IPIs. + * + * kvm_mips_resume_mm() should be called before context switching to a different + * process so we don't need to worry about reference counting. + * + * This needs to be in static kernel code to avoid exporting init_mm. + */ +void kvm_mips_suspend_mm(int cpu) { - unsigned long flags; - unsigned long old_ctx; - int entry = 0; - - local_irq_save(flags); - /* Save old context and create impossible VPN2 value */ - old_ctx = read_c0_entryhi(); - write_c0_entrylo0(0); - write_c0_entrylo1(0); - - /* Blast 'em all away. */ - while (entry < current_cpu_data.tlbsize) { - /* Make sure all entries differ. */ - write_c0_entryhi(UNIQUE_ENTRYHI(entry)); - write_c0_index(entry); - mtc0_tlbw_hazard(); - tlb_write_indexed(); - tlbw_use_hazard(); - entry++; - } - write_c0_entryhi(old_ctx); - mtc0_tlbw_hazard(); + cpumask_clear_cpu(cpu, mm_cpumask(current->active_mm)); + current->active_mm = &init_mm; +} +EXPORT_SYMBOL_GPL(kvm_mips_suspend_mm); - local_irq_restore(flags); +/** + * kvm_mips_resume_mm() - Resume the current process mm. + * @cpu The CPU we're running on. + * + * Resume the mm of the current process, after a switch back from a KVM guest + * virtual address space (see kvm_mips_suspend_mm()). + */ +void kvm_mips_resume_mm(int cpu) +{ + cpumask_set_cpu(cpu, mm_cpumask(current->mm)); + current->active_mm = current->mm; } -EXPORT_SYMBOL_GPL(kvm_local_flush_tlb_all); +EXPORT_SYMBOL_GPL(kvm_mips_resume_mm); diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c index 3b20441..b1fa53b 100644 --- a/arch/mips/kvm/trap_emul.c +++ b/arch/mips/kvm/trap_emul.c @@ -11,9 +11,11 @@ #include <linux/errno.h> #include <linux/err.h> -#include <linux/vmalloc.h> - #include <linux/kvm_host.h> +#include <linux/uaccess.h> +#include <linux/vmalloc.h> +#include <asm/mmu_context.h> +#include <asm/pgalloc.h> #include "interrupt.h" @@ -21,9 +23,12 @@ static gpa_t kvm_trap_emul_gva_to_gpa_cb(gva_t gva) { gpa_t gpa; gva_t kseg = KSEGX(gva); + gva_t gkseg = KVM_GUEST_KSEGX(gva); if ((kseg == CKSEG0) || (kseg == CKSEG1)) gpa = CPHYSADDR(gva); + else if (gkseg == KVM_GUEST_KSEG0) + gpa = KVM_GUEST_CPHYSADDR(gva); else { kvm_err("%s: cannot find GPA for GVA: %#lx\n", __func__, gva); kvm_mips_dump_host_tlbs(); @@ -83,48 +88,134 @@ static int kvm_trap_emul_handle_cop_unusable(struct kvm_vcpu *vcpu) return ret; } +static int kvm_mips_bad_load(u32 cause, u32 *opc, struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + enum emulation_result er; + union mips_instruction inst; + int err; + + /* A code fetch fault doesn't count as an MMIO */ + if (kvm_is_ifetch_fault(&vcpu->arch)) { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return RESUME_HOST; + } + + /* Fetch the instruction. */ + if (cause & CAUSEF_BD) + opc += 1; + err = kvm_get_badinstr(opc, vcpu, &inst.word); + if (err) { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return RESUME_HOST; + } + + /* Emulate the load */ + er = kvm_mips_emulate_load(inst, cause, run, vcpu); + if (er == EMULATE_FAIL) { + kvm_err("Emulate load from MMIO space failed\n"); + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + } else { + run->exit_reason = KVM_EXIT_MMIO; + } + return RESUME_HOST; +} + +static int kvm_mips_bad_store(u32 cause, u32 *opc, struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + enum emulation_result er; + union mips_instruction inst; + int err; + + /* Fetch the instruction. */ + if (cause & CAUSEF_BD) + opc += 1; + err = kvm_get_badinstr(opc, vcpu, &inst.word); + if (err) { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return RESUME_HOST; + } + + /* Emulate the store */ + er = kvm_mips_emulate_store(inst, cause, run, vcpu); + if (er == EMULATE_FAIL) { + kvm_err("Emulate store to MMIO space failed\n"); + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + } else { + run->exit_reason = KVM_EXIT_MMIO; + } + return RESUME_HOST; +} + +static int kvm_mips_bad_access(u32 cause, u32 *opc, struct kvm_run *run, + struct kvm_vcpu *vcpu, bool store) +{ + if (store) + return kvm_mips_bad_store(cause, opc, run, vcpu); + else + return kvm_mips_bad_load(cause, opc, run, vcpu); +} + static int kvm_trap_emul_handle_tlb_mod(struct kvm_vcpu *vcpu) { + struct mips_coproc *cop0 = vcpu->arch.cop0; struct kvm_run *run = vcpu->run; u32 __user *opc = (u32 __user *) vcpu->arch.pc; unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr; u32 cause = vcpu->arch.host_cp0_cause; - enum emulation_result er = EMULATE_DONE; - int ret = RESUME_GUEST; + struct kvm_mips_tlb *tlb; + unsigned long entryhi; + int index; if (KVM_GUEST_KSEGX(badvaddr) < KVM_GUEST_KSEG0 || KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG23) { - kvm_debug("USER/KSEG23 ADDR TLB MOD fault: cause %#x, PC: %p, BadVaddr: %#lx\n", - cause, opc, badvaddr); - er = kvm_mips_handle_tlbmod(cause, opc, run, vcpu); + /* + * First find the mapping in the guest TLB. If the failure to + * write was due to the guest TLB, it should be up to the guest + * to handle it. + */ + entryhi = (badvaddr & VPN2_MASK) | + (kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID); + index = kvm_mips_guest_tlb_lookup(vcpu, entryhi); - if (er == EMULATE_DONE) - ret = RESUME_GUEST; - else { + /* + * These should never happen. + * They would indicate stale host TLB entries. + */ + if (unlikely(index < 0)) { run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - ret = RESUME_HOST; + return RESUME_HOST; } - } else if (KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG0) { + tlb = vcpu->arch.guest_tlb + index; + if (unlikely(!TLB_IS_VALID(*tlb, badvaddr))) { + run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + return RESUME_HOST; + } + /* - * XXXKYMA: The guest kernel does not expect to get this fault - * when we are not using HIGHMEM. Need to address this in a - * HIGHMEM kernel + * Guest entry not dirty? That would explain the TLB modified + * exception. Relay that on to the guest so it can handle it. */ - kvm_err("TLB MOD fault not handled, cause %#x, PC: %p, BadVaddr: %#lx\n", - cause, opc, badvaddr); - kvm_mips_dump_host_tlbs(); - kvm_arch_vcpu_dump_regs(vcpu); - run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - ret = RESUME_HOST; + if (!TLB_IS_DIRTY(*tlb, badvaddr)) { + kvm_mips_emulate_tlbmod(cause, opc, run, vcpu); + return RESUME_GUEST; + } + + if (kvm_mips_handle_mapped_seg_tlb_fault(vcpu, tlb, badvaddr, + true)) + /* Not writable, needs handling as MMIO */ + return kvm_mips_bad_store(cause, opc, run, vcpu); + return RESUME_GUEST; + } else if (KVM_GUEST_KSEGX(badvaddr) == KVM_GUEST_KSEG0) { + if (kvm_mips_handle_kseg0_tlb_fault(badvaddr, vcpu, true) < 0) + /* Not writable, needs handling as MMIO */ + return kvm_mips_bad_store(cause, opc, run, vcpu); + return RESUME_GUEST; } else { - kvm_err("Illegal TLB Mod fault address , cause %#x, PC: %p, BadVaddr: %#lx\n", - cause, opc, badvaddr); - kvm_mips_dump_host_tlbs(); - kvm_arch_vcpu_dump_regs(vcpu); - run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - ret = RESUME_HOST; + /* host kernel addresses are all handled as MMIO */ + return kvm_mips_bad_store(cause, opc, run, vcpu); } - return ret; } static int kvm_trap_emul_handle_tlb_miss(struct kvm_vcpu *vcpu, bool store) @@ -157,7 +248,7 @@ static int kvm_trap_emul_handle_tlb_miss(struct kvm_vcpu *vcpu, bool store) * into the shadow host TLB */ - er = kvm_mips_handle_tlbmiss(cause, opc, run, vcpu); + er = kvm_mips_handle_tlbmiss(cause, opc, run, vcpu, store); if (er == EMULATE_DONE) ret = RESUME_GUEST; else { @@ -169,29 +260,15 @@ static int kvm_trap_emul_handle_tlb_miss(struct kvm_vcpu *vcpu, bool store) * All KSEG0 faults are handled by KVM, as the guest kernel does * not expect to ever get them */ - if (kvm_mips_handle_kseg0_tlb_fault - (vcpu->arch.host_cp0_badvaddr, vcpu) < 0) { - run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - ret = RESUME_HOST; - } + if (kvm_mips_handle_kseg0_tlb_fault(badvaddr, vcpu, store) < 0) + ret = kvm_mips_bad_access(cause, opc, run, vcpu, store); } else if (KVM_GUEST_KERNEL_MODE(vcpu) && (KSEGX(badvaddr) == CKSEG0 || KSEGX(badvaddr) == CKSEG1)) { /* * With EVA we may get a TLB exception instead of an address * error when the guest performs MMIO to KSeg1 addresses. */ - kvm_debug("Emulate %s MMIO space\n", - store ? "Store to" : "Load from"); - er = kvm_mips_emulate_inst(cause, opc, run, vcpu); - if (er == EMULATE_FAIL) { - kvm_err("Emulate %s MMIO space failed\n", - store ? "Store to" : "Load from"); - run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - ret = RESUME_HOST; - } else { - run->exit_reason = KVM_EXIT_MMIO; - ret = RESUME_HOST; - } + ret = kvm_mips_bad_access(cause, opc, run, vcpu, store); } else { kvm_err("Illegal TLB %s fault address , cause %#x, PC: %p, BadVaddr: %#lx\n", store ? "ST" : "LD", cause, opc, badvaddr); @@ -219,21 +296,11 @@ static int kvm_trap_emul_handle_addr_err_st(struct kvm_vcpu *vcpu) u32 __user *opc = (u32 __user *) vcpu->arch.pc; unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr; u32 cause = vcpu->arch.host_cp0_cause; - enum emulation_result er = EMULATE_DONE; int ret = RESUME_GUEST; if (KVM_GUEST_KERNEL_MODE(vcpu) && (KSEGX(badvaddr) == CKSEG0 || KSEGX(badvaddr) == CKSEG1)) { - kvm_debug("Emulate Store to MMIO space\n"); - er = kvm_mips_emulate_inst(cause, opc, run, vcpu); - if (er == EMULATE_FAIL) { - kvm_err("Emulate Store to MMIO space failed\n"); - run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - ret = RESUME_HOST; - } else { - run->exit_reason = KVM_EXIT_MMIO; - ret = RESUME_HOST; - } + ret = kvm_mips_bad_store(cause, opc, run, vcpu); } else { kvm_err("Address Error (STORE): cause %#x, PC: %p, BadVaddr: %#lx\n", cause, opc, badvaddr); @@ -249,26 +316,15 @@ static int kvm_trap_emul_handle_addr_err_ld(struct kvm_vcpu *vcpu) u32 __user *opc = (u32 __user *) vcpu->arch.pc; unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr; u32 cause = vcpu->arch.host_cp0_cause; - enum emulation_result er = EMULATE_DONE; int ret = RESUME_GUEST; if (KSEGX(badvaddr) == CKSEG0 || KSEGX(badvaddr) == CKSEG1) { - kvm_debug("Emulate Load from MMIO space @ %#lx\n", badvaddr); - er = kvm_mips_emulate_inst(cause, opc, run, vcpu); - if (er == EMULATE_FAIL) { - kvm_err("Emulate Load from MMIO space failed\n"); - run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - ret = RESUME_HOST; - } else { - run->exit_reason = KVM_EXIT_MMIO; - ret = RESUME_HOST; - } + ret = kvm_mips_bad_load(cause, opc, run, vcpu); } else { kvm_err("Address Error (LOAD): cause %#x, PC: %p, BadVaddr: %#lx\n", cause, opc, badvaddr); run->exit_reason = KVM_EXIT_INTERNAL_ERROR; ret = RESUME_HOST; - er = EMULATE_FAIL; } return ret; } @@ -428,16 +484,75 @@ static int kvm_trap_emul_handle_msa_disabled(struct kvm_vcpu *vcpu) return ret; } -static int kvm_trap_emul_vm_init(struct kvm *kvm) +static int kvm_trap_emul_vcpu_init(struct kvm_vcpu *vcpu) { + struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm; + struct mm_struct *user_mm = &vcpu->arch.guest_user_mm; + + /* + * Allocate GVA -> HPA page tables. + * MIPS doesn't use the mm_struct pointer argument. + */ + kern_mm->pgd = pgd_alloc(kern_mm); + if (!kern_mm->pgd) + return -ENOMEM; + + user_mm->pgd = pgd_alloc(user_mm); + if (!user_mm->pgd) { + pgd_free(kern_mm, kern_mm->pgd); + return -ENOMEM; + } + return 0; } -static int kvm_trap_emul_vcpu_init(struct kvm_vcpu *vcpu) +static void kvm_mips_emul_free_gva_pt(pgd_t *pgd) { - vcpu->arch.kscratch_enabled = 0xfc; + /* Don't free host kernel page tables copied from init_mm.pgd */ + const unsigned long end = 0x80000000; + unsigned long pgd_va, pud_va, pmd_va; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + int i, j, k; + + for (i = 0; i < USER_PTRS_PER_PGD; i++) { + if (pgd_none(pgd[i])) + continue; + + pgd_va = (unsigned long)i << PGDIR_SHIFT; + if (pgd_va >= end) + break; + pud = pud_offset(pgd + i, 0); + for (j = 0; j < PTRS_PER_PUD; j++) { + if (pud_none(pud[j])) + continue; + + pud_va = pgd_va | ((unsigned long)j << PUD_SHIFT); + if (pud_va >= end) + break; + pmd = pmd_offset(pud + j, 0); + for (k = 0; k < PTRS_PER_PMD; k++) { + if (pmd_none(pmd[k])) + continue; + + pmd_va = pud_va | (k << PMD_SHIFT); + if (pmd_va >= end) + break; + pte = pte_offset(pmd + k, 0); + pte_free_kernel(NULL, pte); + } + pmd_free(NULL, pmd); + } + pud_free(NULL, pud); + } + pgd_free(NULL, pgd); +} - return 0; +static void kvm_trap_emul_vcpu_uninit(struct kvm_vcpu *vcpu) +{ + kvm_mips_emul_free_gva_pt(vcpu->arch.guest_kernel_mm.pgd); + kvm_mips_emul_free_gva_pt(vcpu->arch.guest_user_mm.pgd); } static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu) @@ -499,6 +614,9 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu) /* Set Wait IE/IXMT Ignore in Config7, IAR, AR */ kvm_write_c0_guest_config7(cop0, (MIPS_CONF7_WII) | (1 << 10)); + /* Status */ + kvm_write_c0_guest_status(cop0, ST0_BEV | ST0_ERL); + /* * Setup IntCtl defaults, compatibility mode for timer interrupts (HW5) */ @@ -508,17 +626,76 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu) kvm_write_c0_guest_ebase(cop0, KVM_GUEST_KSEG0 | (vcpu_id & MIPS_EBASE_CPUNUM)); + /* Put PC at guest reset vector */ + vcpu->arch.pc = KVM_GUEST_CKSEG1ADDR(0x1fc00000); + return 0; } +static void kvm_trap_emul_flush_shadow_all(struct kvm *kvm) +{ + /* Flush GVA page tables and invalidate GVA ASIDs on all VCPUs */ + kvm_flush_remote_tlbs(kvm); +} + +static void kvm_trap_emul_flush_shadow_memslot(struct kvm *kvm, + const struct kvm_memory_slot *slot) +{ + kvm_trap_emul_flush_shadow_all(kvm); +} + +static u64 kvm_trap_emul_get_one_regs[] = { + KVM_REG_MIPS_CP0_INDEX, + KVM_REG_MIPS_CP0_ENTRYLO0, + KVM_REG_MIPS_CP0_ENTRYLO1, + KVM_REG_MIPS_CP0_CONTEXT, + KVM_REG_MIPS_CP0_USERLOCAL, + KVM_REG_MIPS_CP0_PAGEMASK, + KVM_REG_MIPS_CP0_WIRED, + KVM_REG_MIPS_CP0_HWRENA, + KVM_REG_MIPS_CP0_BADVADDR, + KVM_REG_MIPS_CP0_COUNT, + KVM_REG_MIPS_CP0_ENTRYHI, + KVM_REG_MIPS_CP0_COMPARE, + KVM_REG_MIPS_CP0_STATUS, + KVM_REG_MIPS_CP0_INTCTL, + KVM_REG_MIPS_CP0_CAUSE, + KVM_REG_MIPS_CP0_EPC, + KVM_REG_MIPS_CP0_PRID, + KVM_REG_MIPS_CP0_EBASE, + KVM_REG_MIPS_CP0_CONFIG, + KVM_REG_MIPS_CP0_CONFIG1, + KVM_REG_MIPS_CP0_CONFIG2, + KVM_REG_MIPS_CP0_CONFIG3, + KVM_REG_MIPS_CP0_CONFIG4, + KVM_REG_MIPS_CP0_CONFIG5, + KVM_REG_MIPS_CP0_CONFIG7, + KVM_REG_MIPS_CP0_ERROREPC, + KVM_REG_MIPS_CP0_KSCRATCH1, + KVM_REG_MIPS_CP0_KSCRATCH2, + KVM_REG_MIPS_CP0_KSCRATCH3, + KVM_REG_MIPS_CP0_KSCRATCH4, + KVM_REG_MIPS_CP0_KSCRATCH5, + KVM_REG_MIPS_CP0_KSCRATCH6, + + KVM_REG_MIPS_COUNT_CTL, + KVM_REG_MIPS_COUNT_RESUME, + KVM_REG_MIPS_COUNT_HZ, +}; + static unsigned long kvm_trap_emul_num_regs(struct kvm_vcpu *vcpu) { - return 0; + return ARRAY_SIZE(kvm_trap_emul_get_one_regs); } static int kvm_trap_emul_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices) { + if (copy_to_user(indices, kvm_trap_emul_get_one_regs, + sizeof(kvm_trap_emul_get_one_regs))) + return -EFAULT; + indices += ARRAY_SIZE(kvm_trap_emul_get_one_regs); + return 0; } @@ -526,7 +703,81 @@ static int kvm_trap_emul_get_one_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, s64 *v) { + struct mips_coproc *cop0 = vcpu->arch.cop0; + switch (reg->id) { + case KVM_REG_MIPS_CP0_INDEX: + *v = (long)kvm_read_c0_guest_index(cop0); + break; + case KVM_REG_MIPS_CP0_ENTRYLO0: + *v = kvm_read_c0_guest_entrylo0(cop0); + break; + case KVM_REG_MIPS_CP0_ENTRYLO1: + *v = kvm_read_c0_guest_entrylo1(cop0); + break; + case KVM_REG_MIPS_CP0_CONTEXT: + *v = (long)kvm_read_c0_guest_context(cop0); + break; + case KVM_REG_MIPS_CP0_USERLOCAL: + *v = (long)kvm_read_c0_guest_userlocal(cop0); + break; + case KVM_REG_MIPS_CP0_PAGEMASK: + *v = (long)kvm_read_c0_guest_pagemask(cop0); + break; + case KVM_REG_MIPS_CP0_WIRED: + *v = (long)kvm_read_c0_guest_wired(cop0); + break; + case KVM_REG_MIPS_CP0_HWRENA: + *v = (long)kvm_read_c0_guest_hwrena(cop0); + break; + case KVM_REG_MIPS_CP0_BADVADDR: + *v = (long)kvm_read_c0_guest_badvaddr(cop0); + break; + case KVM_REG_MIPS_CP0_ENTRYHI: + *v = (long)kvm_read_c0_guest_entryhi(cop0); + break; + case KVM_REG_MIPS_CP0_COMPARE: + *v = (long)kvm_read_c0_guest_compare(cop0); + break; + case KVM_REG_MIPS_CP0_STATUS: + *v = (long)kvm_read_c0_guest_status(cop0); + break; + case KVM_REG_MIPS_CP0_INTCTL: + *v = (long)kvm_read_c0_guest_intctl(cop0); + break; + case KVM_REG_MIPS_CP0_CAUSE: + *v = (long)kvm_read_c0_guest_cause(cop0); + break; + case KVM_REG_MIPS_CP0_EPC: + *v = (long)kvm_read_c0_guest_epc(cop0); + break; + case KVM_REG_MIPS_CP0_PRID: + *v = (long)kvm_read_c0_guest_prid(cop0); + break; + case KVM_REG_MIPS_CP0_EBASE: + *v = (long)kvm_read_c0_guest_ebase(cop0); + break; + case KVM_REG_MIPS_CP0_CONFIG: + *v = (long)kvm_read_c0_guest_config(cop0); + break; + case KVM_REG_MIPS_CP0_CONFIG1: + *v = (long)kvm_read_c0_guest_config1(cop0); + break; + case KVM_REG_MIPS_CP0_CONFIG2: + *v = (long)kvm_read_c0_guest_config2(cop0); + break; + case KVM_REG_MIPS_CP0_CONFIG3: + *v = (long)kvm_read_c0_guest_config3(cop0); + break; + case KVM_REG_MIPS_CP0_CONFIG4: + *v = (long)kvm_read_c0_guest_config4(cop0); + break; + case KVM_REG_MIPS_CP0_CONFIG5: + *v = (long)kvm_read_c0_guest_config5(cop0); + break; + case KVM_REG_MIPS_CP0_CONFIG7: + *v = (long)kvm_read_c0_guest_config7(cop0); + break; case KVM_REG_MIPS_CP0_COUNT: *v = kvm_mips_read_count(vcpu); break; @@ -539,6 +790,27 @@ static int kvm_trap_emul_get_one_reg(struct kvm_vcpu *vcpu, case KVM_REG_MIPS_COUNT_HZ: *v = vcpu->arch.count_hz; break; + case KVM_REG_MIPS_CP0_ERROREPC: + *v = (long)kvm_read_c0_guest_errorepc(cop0); + break; + case KVM_REG_MIPS_CP0_KSCRATCH1: + *v = (long)kvm_read_c0_guest_kscratch1(cop0); + break; + case KVM_REG_MIPS_CP0_KSCRATCH2: + *v = (long)kvm_read_c0_guest_kscratch2(cop0); + break; + case KVM_REG_MIPS_CP0_KSCRATCH3: + *v = (long)kvm_read_c0_guest_kscratch3(cop0); + break; + case KVM_REG_MIPS_CP0_KSCRATCH4: + *v = (long)kvm_read_c0_guest_kscratch4(cop0); + break; + case KVM_REG_MIPS_CP0_KSCRATCH5: + *v = (long)kvm_read_c0_guest_kscratch5(cop0); + break; + case KVM_REG_MIPS_CP0_KSCRATCH6: + *v = (long)kvm_read_c0_guest_kscratch6(cop0); + break; default: return -EINVAL; } @@ -554,6 +826,56 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu, unsigned int cur, change; switch (reg->id) { + case KVM_REG_MIPS_CP0_INDEX: + kvm_write_c0_guest_index(cop0, v); + break; + case KVM_REG_MIPS_CP0_ENTRYLO0: + kvm_write_c0_guest_entrylo0(cop0, v); + break; + case KVM_REG_MIPS_CP0_ENTRYLO1: + kvm_write_c0_guest_entrylo1(cop0, v); + break; + case KVM_REG_MIPS_CP0_CONTEXT: + kvm_write_c0_guest_context(cop0, v); + break; + case KVM_REG_MIPS_CP0_USERLOCAL: + kvm_write_c0_guest_userlocal(cop0, v); + break; + case KVM_REG_MIPS_CP0_PAGEMASK: + kvm_write_c0_guest_pagemask(cop0, v); + break; + case KVM_REG_MIPS_CP0_WIRED: + kvm_write_c0_guest_wired(cop0, v); + break; + case KVM_REG_MIPS_CP0_HWRENA: + kvm_write_c0_guest_hwrena(cop0, v); + break; + case KVM_REG_MIPS_CP0_BADVADDR: + kvm_write_c0_guest_badvaddr(cop0, v); + break; + case KVM_REG_MIPS_CP0_ENTRYHI: + kvm_write_c0_guest_entryhi(cop0, v); + break; + case KVM_REG_MIPS_CP0_STATUS: + kvm_write_c0_guest_status(cop0, v); + break; + case KVM_REG_MIPS_CP0_INTCTL: + /* No VInt, so no VS, read-only for now */ + break; + case KVM_REG_MIPS_CP0_EPC: + kvm_write_c0_guest_epc(cop0, v); + break; + case KVM_REG_MIPS_CP0_PRID: + kvm_write_c0_guest_prid(cop0, v); + break; + case KVM_REG_MIPS_CP0_EBASE: + /* + * Allow core number to be written, but the exception base must + * remain in guest KSeg0. + */ + kvm_change_c0_guest_ebase(cop0, 0x1ffff000 | MIPS_EBASE_CPUNUM, + v); + break; case KVM_REG_MIPS_CP0_COUNT: kvm_mips_write_count(vcpu, v); break; @@ -618,6 +940,9 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu, kvm_write_c0_guest_config5(cop0, v); } break; + case KVM_REG_MIPS_CP0_CONFIG7: + /* writes ignored */ + break; case KVM_REG_MIPS_COUNT_CTL: ret = kvm_mips_set_count_ctl(vcpu, v); break; @@ -627,24 +952,269 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu, case KVM_REG_MIPS_COUNT_HZ: ret = kvm_mips_set_count_hz(vcpu, v); break; + case KVM_REG_MIPS_CP0_ERROREPC: + kvm_write_c0_guest_errorepc(cop0, v); + break; + case KVM_REG_MIPS_CP0_KSCRATCH1: + kvm_write_c0_guest_kscratch1(cop0, v); + break; + case KVM_REG_MIPS_CP0_KSCRATCH2: + kvm_write_c0_guest_kscratch2(cop0, v); + break; + case KVM_REG_MIPS_CP0_KSCRATCH3: + kvm_write_c0_guest_kscratch3(cop0, v); + break; + case KVM_REG_MIPS_CP0_KSCRATCH4: + kvm_write_c0_guest_kscratch4(cop0, v); + break; + case KVM_REG_MIPS_CP0_KSCRATCH5: + kvm_write_c0_guest_kscratch5(cop0, v); + break; + case KVM_REG_MIPS_CP0_KSCRATCH6: + kvm_write_c0_guest_kscratch6(cop0, v); + break; default: return -EINVAL; } return ret; } -static int kvm_trap_emul_vcpu_get_regs(struct kvm_vcpu *vcpu) +static int kvm_trap_emul_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { - kvm_lose_fpu(vcpu); + struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm; + struct mm_struct *user_mm = &vcpu->arch.guest_user_mm; + struct mm_struct *mm; + + /* + * Were we in guest context? If so, restore the appropriate ASID based + * on the mode of the Guest (Kernel/User). + */ + if (current->flags & PF_VCPU) { + mm = KVM_GUEST_KERNEL_MODE(vcpu) ? kern_mm : user_mm; + if ((cpu_context(cpu, mm) ^ asid_cache(cpu)) & + asid_version_mask(cpu)) + get_new_mmu_context(mm, cpu); + write_c0_entryhi(cpu_asid(cpu, mm)); + TLBMISS_HANDLER_SETUP_PGD(mm->pgd); + kvm_mips_suspend_mm(cpu); + ehb(); + } return 0; } -static int kvm_trap_emul_vcpu_set_regs(struct kvm_vcpu *vcpu) +static int kvm_trap_emul_vcpu_put(struct kvm_vcpu *vcpu, int cpu) { + kvm_lose_fpu(vcpu); + + if (current->flags & PF_VCPU) { + /* Restore normal Linux process memory map */ + if (((cpu_context(cpu, current->mm) ^ asid_cache(cpu)) & + asid_version_mask(cpu))) + get_new_mmu_context(current->mm, cpu); + write_c0_entryhi(cpu_asid(cpu, current->mm)); + TLBMISS_HANDLER_SETUP_PGD(current->mm->pgd); + kvm_mips_resume_mm(cpu); + ehb(); + } + return 0; } +static void kvm_trap_emul_check_requests(struct kvm_vcpu *vcpu, int cpu, + bool reload_asid) +{ + struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm; + struct mm_struct *user_mm = &vcpu->arch.guest_user_mm; + struct mm_struct *mm; + int i; + + if (likely(!vcpu->requests)) + return; + + if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { + /* + * Both kernel & user GVA mappings must be invalidated. The + * caller is just about to check whether the ASID is stale + * anyway so no need to reload it here. + */ + kvm_mips_flush_gva_pt(kern_mm->pgd, KMF_GPA | KMF_KERN); + kvm_mips_flush_gva_pt(user_mm->pgd, KMF_GPA | KMF_USER); + for_each_possible_cpu(i) { + cpu_context(i, kern_mm) = 0; + cpu_context(i, user_mm) = 0; + } + + /* Generate new ASID for current mode */ + if (reload_asid) { + mm = KVM_GUEST_KERNEL_MODE(vcpu) ? kern_mm : user_mm; + get_new_mmu_context(mm, cpu); + htw_stop(); + write_c0_entryhi(cpu_asid(cpu, mm)); + TLBMISS_HANDLER_SETUP_PGD(mm->pgd); + htw_start(); + } + } +} + +/** + * kvm_trap_emul_gva_lockless_begin() - Begin lockless access to GVA space. + * @vcpu: VCPU pointer. + * + * Call before a GVA space access outside of guest mode, to ensure that + * asynchronous TLB flush requests are handled or delayed until completion of + * the GVA access (as indicated by a matching kvm_trap_emul_gva_lockless_end()). + * + * Should be called with IRQs already enabled. + */ +void kvm_trap_emul_gva_lockless_begin(struct kvm_vcpu *vcpu) +{ + /* We re-enable IRQs in kvm_trap_emul_gva_lockless_end() */ + WARN_ON_ONCE(irqs_disabled()); + + /* + * The caller is about to access the GVA space, so we set the mode to + * force TLB flush requests to send an IPI, and also disable IRQs to + * delay IPI handling until kvm_trap_emul_gva_lockless_end(). + */ + local_irq_disable(); + + /* + * Make sure the read of VCPU requests is not reordered ahead of the + * write to vcpu->mode, or we could miss a TLB flush request while + * the requester sees the VCPU as outside of guest mode and not needing + * an IPI. + */ + smp_store_mb(vcpu->mode, READING_SHADOW_PAGE_TABLES); + + /* + * If a TLB flush has been requested (potentially while + * OUTSIDE_GUEST_MODE and assumed immediately effective), perform it + * before accessing the GVA space, and be sure to reload the ASID if + * necessary as it'll be immediately used. + * + * TLB flush requests after this check will trigger an IPI due to the + * mode change above, which will be delayed due to IRQs disabled. + */ + kvm_trap_emul_check_requests(vcpu, smp_processor_id(), true); +} + +/** + * kvm_trap_emul_gva_lockless_end() - End lockless access to GVA space. + * @vcpu: VCPU pointer. + * + * Called after a GVA space access outside of guest mode. Should have a matching + * call to kvm_trap_emul_gva_lockless_begin(). + */ +void kvm_trap_emul_gva_lockless_end(struct kvm_vcpu *vcpu) +{ + /* + * Make sure the write to vcpu->mode is not reordered in front of GVA + * accesses, or a TLB flush requester may not think it necessary to send + * an IPI. + */ + smp_store_release(&vcpu->mode, OUTSIDE_GUEST_MODE); + + /* + * Now that the access to GVA space is complete, its safe for pending + * TLB flush request IPIs to be handled (which indicates completion). + */ + local_irq_enable(); +} + +static void kvm_trap_emul_vcpu_reenter(struct kvm_run *run, + struct kvm_vcpu *vcpu) +{ + struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm; + struct mm_struct *user_mm = &vcpu->arch.guest_user_mm; + struct mm_struct *mm; + struct mips_coproc *cop0 = vcpu->arch.cop0; + int i, cpu = smp_processor_id(); + unsigned int gasid; + + /* + * No need to reload ASID, IRQs are disabled already so there's no rush, + * and we'll check if we need to regenerate below anyway before + * re-entering the guest. + */ + kvm_trap_emul_check_requests(vcpu, cpu, false); + + if (KVM_GUEST_KERNEL_MODE(vcpu)) { + mm = kern_mm; + } else { + mm = user_mm; + + /* + * Lazy host ASID regeneration / PT flush for guest user mode. + * If the guest ASID has changed since the last guest usermode + * execution, invalidate the stale TLB entries and flush GVA PT + * entries too. + */ + gasid = kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID; + if (gasid != vcpu->arch.last_user_gasid) { + kvm_mips_flush_gva_pt(user_mm->pgd, KMF_USER); + for_each_possible_cpu(i) + cpu_context(i, user_mm) = 0; + vcpu->arch.last_user_gasid = gasid; + } + } + + /* + * Check if ASID is stale. This may happen due to a TLB flush request or + * a lazy user MM invalidation. + */ + if ((cpu_context(cpu, mm) ^ asid_cache(cpu)) & + asid_version_mask(cpu)) + get_new_mmu_context(mm, cpu); +} + +static int kvm_trap_emul_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) +{ + int cpu = smp_processor_id(); + int r; + + /* Check if we have any exceptions/interrupts pending */ + kvm_mips_deliver_interrupts(vcpu, + kvm_read_c0_guest_cause(vcpu->arch.cop0)); + + kvm_trap_emul_vcpu_reenter(run, vcpu); + + /* + * We use user accessors to access guest memory, but we don't want to + * invoke Linux page faulting. + */ + pagefault_disable(); + + /* Disable hardware page table walking while in guest */ + htw_stop(); + + /* + * While in guest context we're in the guest's address space, not the + * host process address space, so we need to be careful not to confuse + * e.g. cache management IPIs. + */ + kvm_mips_suspend_mm(cpu); + + r = vcpu->arch.vcpu_run(run, vcpu); + + /* We may have migrated while handling guest exits */ + cpu = smp_processor_id(); + + /* Restore normal Linux process memory map */ + if (((cpu_context(cpu, current->mm) ^ asid_cache(cpu)) & + asid_version_mask(cpu))) + get_new_mmu_context(current->mm, cpu); + write_c0_entryhi(cpu_asid(cpu, current->mm)); + TLBMISS_HANDLER_SETUP_PGD(current->mm->pgd); + kvm_mips_resume_mm(cpu); + + htw_start(); + + pagefault_enable(); + + return r; +} + static struct kvm_mips_callbacks kvm_trap_emul_callbacks = { /* exit handlers */ .handle_cop_unusable = kvm_trap_emul_handle_cop_unusable, @@ -661,9 +1231,11 @@ static struct kvm_mips_callbacks kvm_trap_emul_callbacks = { .handle_fpe = kvm_trap_emul_handle_fpe, .handle_msa_disabled = kvm_trap_emul_handle_msa_disabled, - .vm_init = kvm_trap_emul_vm_init, .vcpu_init = kvm_trap_emul_vcpu_init, + .vcpu_uninit = kvm_trap_emul_vcpu_uninit, .vcpu_setup = kvm_trap_emul_vcpu_setup, + .flush_shadow_all = kvm_trap_emul_flush_shadow_all, + .flush_shadow_memslot = kvm_trap_emul_flush_shadow_memslot, .gva_to_gpa = kvm_trap_emul_gva_to_gpa_cb, .queue_timer_int = kvm_mips_queue_timer_int_cb, .dequeue_timer_int = kvm_mips_dequeue_timer_int_cb, @@ -675,8 +1247,10 @@ static struct kvm_mips_callbacks kvm_trap_emul_callbacks = { .copy_reg_indices = kvm_trap_emul_copy_reg_indices, .get_one_reg = kvm_trap_emul_get_one_reg, .set_one_reg = kvm_trap_emul_set_one_reg, - .vcpu_get_regs = kvm_trap_emul_vcpu_get_regs, - .vcpu_set_regs = kvm_trap_emul_vcpu_set_regs, + .vcpu_load = kvm_trap_emul_vcpu_load, + .vcpu_put = kvm_trap_emul_vcpu_put, + .vcpu_run = kvm_trap_emul_vcpu_run, + .vcpu_reenter = kvm_trap_emul_vcpu_reenter, }; int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks) |