diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-24 13:07:18 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-24 13:07:18 -0800 |
commit | 89f883372fa60f604d136924baf3e89ff1870e9e (patch) | |
tree | cb69b0a14957945ba00d3d392bf9ccbbef56f3b8 /arch/powerpc/kvm | |
parent | 9e2d59ad580d590134285f361a0e80f0e98c0207 (diff) | |
parent | 6b73a96065e89dc9fa75ba4f78b1aa3a3bbd0470 (diff) | |
download | op-kernel-dev-89f883372fa60f604d136924baf3e89ff1870e9e.zip op-kernel-dev-89f883372fa60f604d136924baf3e89ff1870e9e.tar.gz |
Merge tag 'kvm-3.9-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Marcelo Tosatti:
"KVM updates for the 3.9 merge window, including x86 real mode
emulation fixes, stronger memory slot interface restrictions, mmu_lock
spinlock hold time reduction, improved handling of large page faults
on shadow, initial APICv HW acceleration support, s390 channel IO
based virtio, amongst others"
* tag 'kvm-3.9-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (143 commits)
Revert "KVM: MMU: lazily drop large spte"
x86: pvclock kvm: align allocation size to page size
KVM: nVMX: Remove redundant get_vmcs12 from nested_vmx_exit_handled_msr
x86 emulator: fix parity calculation for AAD instruction
KVM: PPC: BookE: Handle alignment interrupts
booke: Added DBCR4 SPR number
KVM: PPC: booke: Allow multiple exception types
KVM: PPC: booke: use vcpu reference from thread_struct
KVM: Remove user_alloc from struct kvm_memory_slot
KVM: VMX: disable apicv by default
KVM: s390: Fix handling of iscs.
KVM: MMU: cleanup __direct_map
KVM: MMU: remove pt_access in mmu_set_spte
KVM: MMU: cleanup mapping-level
KVM: MMU: lazily drop large spte
KVM: VMX: cleanup vmx_set_cr0().
KVM: VMX: add missing exit names to VMX_EXIT_REASONS array
KVM: VMX: disable SMEP feature when guest is in non-paging mode
KVM: Remove duplicate text in api.txt
Revert "KVM: MMU: split kvm_mmu_free_page"
...
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r-- | arch/powerpc/kvm/Makefile | 9 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_emulate.c | 30 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv.c | 2 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_pr.c | 5 | ||||
-rw-r--r-- | arch/powerpc/kvm/booke.c | 70 | ||||
-rw-r--r-- | arch/powerpc/kvm/booke.h | 1 | ||||
-rw-r--r-- | arch/powerpc/kvm/booke_emulate.c | 3 | ||||
-rw-r--r-- | arch/powerpc/kvm/booke_interrupts.S | 49 | ||||
-rw-r--r-- | arch/powerpc/kvm/e500.c | 16 | ||||
-rw-r--r-- | arch/powerpc/kvm/e500.h | 1 | ||||
-rw-r--r-- | arch/powerpc/kvm/e500_mmu.c (renamed from arch/powerpc/kvm/e500_tlb.c) | 659 | ||||
-rw-r--r-- | arch/powerpc/kvm/e500_mmu_host.c | 699 | ||||
-rw-r--r-- | arch/powerpc/kvm/e500_mmu_host.h | 18 | ||||
-rw-r--r-- | arch/powerpc/kvm/emulate.c | 5 | ||||
-rw-r--r-- | arch/powerpc/kvm/powerpc.c | 17 |
15 files changed, 910 insertions, 674 deletions
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 1e473d4..b772ede 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -10,7 +10,8 @@ common-objs-y = $(addprefix ../../../virt/kvm/, kvm_main.o coalesced_mmio.o \ eventfd.o) CFLAGS_44x_tlb.o := -I. -CFLAGS_e500_tlb.o := -I. +CFLAGS_e500_mmu.o := -I. +CFLAGS_e500_mmu_host.o := -I. CFLAGS_emulate.o := -I. common-objs-y += powerpc.o emulate.o @@ -35,7 +36,8 @@ kvm-e500-objs := \ booke_emulate.o \ booke_interrupts.o \ e500.o \ - e500_tlb.o \ + e500_mmu.o \ + e500_mmu_host.o \ e500_emulate.o kvm-objs-$(CONFIG_KVM_E500V2) := $(kvm-e500-objs) @@ -45,7 +47,8 @@ kvm-e500mc-objs := \ booke_emulate.o \ bookehv_interrupts.o \ e500mc.o \ - e500_tlb.o \ + e500_mmu.o \ + e500_mmu_host.o \ e500_emulate.o kvm-objs-$(CONFIG_KVM_E500MC) := $(kvm-e500mc-objs) diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c index d31a716..836c569 100644 --- a/arch/powerpc/kvm/book3s_emulate.c +++ b/arch/powerpc/kvm/book3s_emulate.c @@ -34,6 +34,8 @@ #define OP_31_XOP_MTSRIN 242 #define OP_31_XOP_TLBIEL 274 #define OP_31_XOP_TLBIE 306 +/* Opcode is officially reserved, reuse it as sc 1 when sc 1 doesn't trap */ +#define OP_31_XOP_FAKE_SC1 308 #define OP_31_XOP_SLBMTE 402 #define OP_31_XOP_SLBIE 434 #define OP_31_XOP_SLBIA 498 @@ -170,6 +172,32 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, vcpu->arch.mmu.tlbie(vcpu, addr, large); break; } +#ifdef CONFIG_KVM_BOOK3S_64_PR + case OP_31_XOP_FAKE_SC1: + { + /* SC 1 papr hypercalls */ + ulong cmd = kvmppc_get_gpr(vcpu, 3); + int i; + + if ((vcpu->arch.shared->msr & MSR_PR) || + !vcpu->arch.papr_enabled) { + emulated = EMULATE_FAIL; + break; + } + + if (kvmppc_h_pr(vcpu, cmd) == EMULATE_DONE) + break; + + run->papr_hcall.nr = cmd; + for (i = 0; i < 9; ++i) { + ulong gpr = kvmppc_get_gpr(vcpu, 4 + i); + run->papr_hcall.args[i] = gpr; + } + + emulated = EMULATE_DO_PAPR; + break; + } +#endif case OP_31_XOP_EIOIO: break; case OP_31_XOP_SLBMTE: @@ -427,6 +455,7 @@ int kvmppc_core_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) case SPRN_PMC3_GEKKO: case SPRN_PMC4_GEKKO: case SPRN_WPAR_GEKKO: + case SPRN_MSSSR0: break; unprivileged: default: @@ -523,6 +552,7 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) case SPRN_PMC3_GEKKO: case SPRN_PMC4_GEKKO: case SPRN_WPAR_GEKKO: + case SPRN_MSSSR0: *spr_val = 0; break; default: diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 71d0c90..80dcc53 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1549,7 +1549,7 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log) mutex_lock(&kvm->slots_lock); r = -EINVAL; - if (log->slot >= KVM_MEMORY_SLOTS) + if (log->slot >= KVM_USER_MEM_SLOTS) goto out; memslot = id_to_memslot(kvm->memslots, log->slot); diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 6702442..5e93438 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -762,6 +762,11 @@ program_interrupt: run->exit_reason = KVM_EXIT_MMIO; r = RESUME_HOST_NV; break; + case EMULATE_DO_PAPR: + run->exit_reason = KVM_EXIT_PAPR_HCALL; + vcpu->arch.hcall_needed = 1; + r = RESUME_HOST_NV; + break; default: BUG(); } diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 69f1140..020923e 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -182,6 +182,14 @@ static void kvmppc_core_queue_inst_storage(struct kvm_vcpu *vcpu, kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_INST_STORAGE); } +static void kvmppc_core_queue_alignment(struct kvm_vcpu *vcpu, ulong dear_flags, + ulong esr_flags) +{ + vcpu->arch.queued_dear = dear_flags; + vcpu->arch.queued_esr = esr_flags; + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_ALIGNMENT); +} + void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong esr_flags) { vcpu->arch.queued_esr = esr_flags; @@ -300,13 +308,22 @@ static void set_guest_esr(struct kvm_vcpu *vcpu, u32 esr) #endif } +static unsigned long get_guest_epr(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_KVM_BOOKE_HV + return mfspr(SPRN_GEPR); +#else + return vcpu->arch.epr; +#endif +} + /* Deliver the interrupt of the corresponding priority, if possible. */ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority) { int allowed = 0; ulong msr_mask = 0; - bool update_esr = false, update_dear = false; + bool update_esr = false, update_dear = false, update_epr = false; ulong crit_raw = vcpu->arch.shared->critical; ulong crit_r1 = kvmppc_get_gpr(vcpu, 1); bool crit; @@ -330,9 +347,13 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, keep_irq = true; } + if ((priority == BOOKE_IRQPRIO_EXTERNAL) && vcpu->arch.epr_enabled) + update_epr = true; + switch (priority) { case BOOKE_IRQPRIO_DTLB_MISS: case BOOKE_IRQPRIO_DATA_STORAGE: + case BOOKE_IRQPRIO_ALIGNMENT: update_dear = true; /* fall through */ case BOOKE_IRQPRIO_INST_STORAGE: @@ -346,7 +367,6 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, case BOOKE_IRQPRIO_SPE_FP_DATA: case BOOKE_IRQPRIO_SPE_FP_ROUND: case BOOKE_IRQPRIO_AP_UNAVAIL: - case BOOKE_IRQPRIO_ALIGNMENT: allowed = 1; msr_mask = MSR_CE | MSR_ME | MSR_DE; int_class = INT_CLASS_NONCRIT; @@ -408,6 +428,8 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, set_guest_esr(vcpu, vcpu->arch.queued_esr); if (update_dear == true) set_guest_dear(vcpu, vcpu->arch.queued_dear); + if (update_epr == true) + kvm_make_request(KVM_REQ_EPR_EXIT, vcpu); new_msr &= msr_mask; #if defined(CONFIG_64BIT) @@ -581,6 +603,11 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) kvmppc_core_check_exceptions(vcpu); + if (vcpu->requests) { + /* Exception delivery raised request; start over */ + return 1; + } + if (vcpu->arch.shared->msr & MSR_WE) { local_irq_enable(); kvm_vcpu_block(vcpu); @@ -610,6 +637,13 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu) r = 0; } + if (kvm_check_request(KVM_REQ_EPR_EXIT, vcpu)) { + vcpu->run->epr.epr = 0; + vcpu->arch.epr_needed = true; + vcpu->run->exit_reason = KVM_EXIT_EPR; + r = 0; + } + return r; } @@ -945,6 +979,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, r = RESUME_GUEST; break; + case BOOKE_INTERRUPT_ALIGNMENT: + kvmppc_core_queue_alignment(vcpu, vcpu->arch.fault_dear, + vcpu->arch.fault_esr); + r = RESUME_GUEST; + break; + #ifdef CONFIG_KVM_BOOKE_HV case BOOKE_INTERRUPT_HV_SYSCALL: if (!(vcpu->arch.shared->msr & MSR_PR)) { @@ -1388,6 +1428,11 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) &vcpu->arch.dbg_reg.dac[dac], sizeof(u64)); break; } + case KVM_REG_PPC_EPR: { + u32 epr = get_guest_epr(vcpu); + r = put_user(epr, (u32 __user *)(long)reg->addr); + break; + } #if defined(CONFIG_64BIT) case KVM_REG_PPC_EPCR: r = put_user(vcpu->arch.epcr, (u32 __user *)(long)reg->addr); @@ -1420,6 +1465,13 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) (u64 __user *)(long)reg->addr, sizeof(u64)); break; } + case KVM_REG_PPC_EPR: { + u32 new_epr; + r = get_user(new_epr, (u32 __user *)(long)reg->addr); + if (!r) + kvmppc_set_epr(vcpu, new_epr); + break; + } #if defined(CONFIG_64BIT) case KVM_REG_PPC_EPCR: { u32 new_epcr; @@ -1556,7 +1608,9 @@ int __init kvmppc_booke_init(void) { #ifndef CONFIG_KVM_BOOKE_HV unsigned long ivor[16]; + unsigned long *handler = kvmppc_booke_handler_addr; unsigned long max_ivor = 0; + unsigned long handler_len; int i; /* We install our own exception handlers by hijacking IVPR. IVPR must @@ -1589,14 +1643,16 @@ int __init kvmppc_booke_init(void) for (i = 0; i < 16; i++) { if (ivor[i] > max_ivor) - max_ivor = ivor[i]; + max_ivor = i; + handler_len = handler[i + 1] - handler[i]; memcpy((void *)kvmppc_booke_handlers + ivor[i], - kvmppc_handlers_start + i * kvmppc_handler_len, - kvmppc_handler_len); + (void *)handler[i], handler_len); } - flush_icache_range(kvmppc_booke_handlers, - kvmppc_booke_handlers + max_ivor + kvmppc_handler_len); + + handler_len = handler[max_ivor + 1] - handler[max_ivor]; + flush_icache_range(kvmppc_booke_handlers, kvmppc_booke_handlers + + ivor[max_ivor] + handler_len); #endif /* !BOOKE_HV */ return 0; } diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h index e9b88e4..5fd1ba6 100644 --- a/arch/powerpc/kvm/booke.h +++ b/arch/powerpc/kvm/booke.h @@ -65,6 +65,7 @@ (1 << BOOKE_IRQPRIO_CRITICAL)) extern unsigned long kvmppc_booke_handlers; +extern unsigned long kvmppc_booke_handler_addr[]; void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr); void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr); diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index 4685b8c..27a4b28 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c @@ -269,6 +269,9 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) case SPRN_ESR: *spr_val = vcpu->arch.shared->esr; break; + case SPRN_EPR: + *spr_val = vcpu->arch.epr; + break; case SPRN_CSRR0: *spr_val = vcpu->arch.csrr0; break; diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index bb46b32..f4bb55c 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -45,18 +45,21 @@ (1<<BOOKE_INTERRUPT_DEBUG)) #define NEED_DEAR_MASK ((1<<BOOKE_INTERRUPT_DATA_STORAGE) | \ - (1<<BOOKE_INTERRUPT_DTLB_MISS)) + (1<<BOOKE_INTERRUPT_DTLB_MISS) | \ + (1<<BOOKE_INTERRUPT_ALIGNMENT)) #define NEED_ESR_MASK ((1<<BOOKE_INTERRUPT_DATA_STORAGE) | \ (1<<BOOKE_INTERRUPT_INST_STORAGE) | \ (1<<BOOKE_INTERRUPT_PROGRAM) | \ - (1<<BOOKE_INTERRUPT_DTLB_MISS)) + (1<<BOOKE_INTERRUPT_DTLB_MISS) | \ + (1<<BOOKE_INTERRUPT_ALIGNMENT)) .macro KVM_HANDLER ivor_nr scratch srr0 _GLOBAL(kvmppc_handler_\ivor_nr) /* Get pointer to vcpu and record exit number. */ mtspr \scratch , r4 - mfspr r4, SPRN_SPRG_RVCPU + mfspr r4, SPRN_SPRG_THREAD + lwz r4, THREAD_KVM_VCPU(r4) stw r3, VCPU_GPR(R3)(r4) stw r5, VCPU_GPR(R5)(r4) stw r6, VCPU_GPR(R6)(r4) @@ -73,6 +76,14 @@ _GLOBAL(kvmppc_handler_\ivor_nr) bctr .endm +.macro KVM_HANDLER_ADDR ivor_nr + .long kvmppc_handler_\ivor_nr +.endm + +.macro KVM_HANDLER_END + .long kvmppc_handlers_end +.endm + _GLOBAL(kvmppc_handlers_start) KVM_HANDLER BOOKE_INTERRUPT_CRITICAL SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0 KVM_HANDLER BOOKE_INTERRUPT_MACHINE_CHECK SPRN_SPRG_RSCRATCH_MC SPRN_MCSRR0 @@ -93,9 +104,7 @@ KVM_HANDLER BOOKE_INTERRUPT_DEBUG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0 KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0 KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA SPRN_SPRG_RSCRATCH0 SPRN_SRR0 KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND SPRN_SPRG_RSCRATCH0 SPRN_SRR0 - -_GLOBAL(kvmppc_handler_len) - .long kvmppc_handler_1 - kvmppc_handler_0 +_GLOBAL(kvmppc_handlers_end) /* Registers: * SPRG_SCRATCH0: guest r4 @@ -402,9 +411,6 @@ lightweight_exit: lwz r8, kvmppc_booke_handlers@l(r8) mtspr SPRN_IVPR, r8 - /* Save vcpu pointer for the exception handlers. */ - mtspr SPRN_SPRG_WVCPU, r4 - lwz r5, VCPU_SHARED(r4) /* Can't switch the stack pointer until after IVPR is switched, @@ -463,6 +469,31 @@ lightweight_exit: lwz r4, VCPU_GPR(R4)(r4) rfi + .data + .align 4 + .globl kvmppc_booke_handler_addr +kvmppc_booke_handler_addr: +KVM_HANDLER_ADDR BOOKE_INTERRUPT_CRITICAL +KVM_HANDLER_ADDR BOOKE_INTERRUPT_MACHINE_CHECK +KVM_HANDLER_ADDR BOOKE_INTERRUPT_DATA_STORAGE +KVM_HANDLER_ADDR BOOKE_INTERRUPT_INST_STORAGE +KVM_HANDLER_ADDR BOOKE_INTERRUPT_EXTERNAL +KVM_HANDLER_ADDR BOOKE_INTERRUPT_ALIGNMENT +KVM_HANDLER_ADDR BOOKE_INTERRUPT_PROGRAM +KVM_HANDLER_ADDR BOOKE_INTERRUPT_FP_UNAVAIL +KVM_HANDLER_ADDR BOOKE_INTERRUPT_SYSCALL +KVM_HANDLER_ADDR BOOKE_INTERRUPT_AP_UNAVAIL +KVM_HANDLER_ADDR BOOKE_INTERRUPT_DECREMENTER +KVM_HANDLER_ADDR BOOKE_INTERRUPT_FIT +KVM_HANDLER_ADDR BOOKE_INTERRUPT_WATCHDOG +KVM_HANDLER_ADDR BOOKE_INTERRUPT_DTLB_MISS +KVM_HANDLER_ADDR BOOKE_INTERRUPT_ITLB_MISS +KVM_HANDLER_ADDR BOOKE_INTERRUPT_DEBUG +KVM_HANDLER_ADDR BOOKE_INTERRUPT_SPE_UNAVAIL +KVM_HANDLER_ADDR BOOKE_INTERRUPT_SPE_FP_DATA +KVM_HANDLER_ADDR BOOKE_INTERRUPT_SPE_FP_ROUND +KVM_HANDLER_END /*Always keep this in end*/ + #ifdef CONFIG_SPE _GLOBAL(kvmppc_save_guest_spe) cmpi 0,r3,0 diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index b479ed7..6dd4de7 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -491,6 +491,9 @@ static int __init kvmppc_e500_init(void) { int r, i; unsigned long ivor[3]; + /* Process remaining handlers above the generic first 16 */ + unsigned long *handler = &kvmppc_booke_handler_addr[16]; + unsigned long handler_len; unsigned long max_ivor = 0; r = kvmppc_core_check_processor_compat(); @@ -506,15 +509,16 @@ static int __init kvmppc_e500_init(void) ivor[1] = mfspr(SPRN_IVOR33); ivor[2] = mfspr(SPRN_IVOR34); for (i = 0; i < 3; i++) { - if (ivor[i] > max_ivor) - max_ivor = ivor[i]; + if (ivor[i] > ivor[max_ivor]) + max_ivor = i; + handler_len = handler[i + 1] - handler[i]; memcpy((void *)kvmppc_booke_handlers + ivor[i], - kvmppc_handlers_start + (i + 16) * kvmppc_handler_len, - kvmppc_handler_len); + (void *)handler[i], handler_len); } - flush_icache_range(kvmppc_booke_handlers, - kvmppc_booke_handlers + max_ivor + kvmppc_handler_len); + handler_len = handler[max_ivor + 1] - handler[max_ivor]; + flush_icache_range(kvmppc_booke_handlers, kvmppc_booke_handlers + + ivor[max_ivor] + handler_len); return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE); } diff --git a/arch/powerpc/kvm/e500.h b/arch/powerpc/kvm/e500.h index c70d37e..41cefd4 100644 --- a/arch/powerpc/kvm/e500.h +++ b/arch/powerpc/kvm/e500.h @@ -28,6 +28,7 @@ #define E500_TLB_VALID 1 #define E500_TLB_BITMAP 2 +#define E500_TLB_TLB0 (1 << 2) struct tlbe_ref { pfn_t pfn; diff --git a/arch/powerpc/kvm/e500_tlb.c b/arch/powerpc/kvm/e500_mmu.c index cf3f180..5c44759 100644 --- a/arch/powerpc/kvm/e500_tlb.c +++ b/arch/powerpc/kvm/e500_mmu.c @@ -1,10 +1,11 @@ /* - * Copyright (C) 2008-2011 Freescale Semiconductor, Inc. All rights reserved. + * Copyright (C) 2008-2013 Freescale Semiconductor, Inc. All rights reserved. * * Author: Yu Liu, yu.liu@freescale.com * Scott Wood, scottwood@freescale.com * Ashish Kalra, ashish.kalra@freescale.com * Varun Sethi, varun.sethi@freescale.com + * Alexander Graf, agraf@suse.de * * Description: * This file is based on arch/powerpc/kvm/44x_tlb.c, @@ -33,10 +34,7 @@ #include "e500.h" #include "trace.h" #include "timing.h" - -#define to_htlb1_esel(esel) (host_tlb_params[1].entries - (esel) - 1) - -static struct kvmppc_e500_tlb_params host_tlb_params[E500_TLB_NUM]; +#include "e500_mmu_host.h" static inline unsigned int gtlb0_get_next_victim( struct kvmppc_vcpu_e500 *vcpu_e500) @@ -50,174 +48,6 @@ static inline unsigned int gtlb0_get_next_victim( return victim; } -static inline unsigned int tlb1_max_shadow_size(void) -{ - /* reserve one entry for magic page */ - return host_tlb_params[1].entries - tlbcam_index - 1; -} - -static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe) -{ - return tlbe->mas7_3 & (MAS3_SW|MAS3_UW); -} - -static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode) -{ - /* Mask off reserved bits. */ - mas3 &= MAS3_ATTRIB_MASK; - -#ifndef CONFIG_KVM_BOOKE_HV - if (!usermode) { - /* Guest is in supervisor mode, - * so we need to translate guest - * supervisor permissions into user permissions. */ - mas3 &= ~E500_TLB_USER_PERM_MASK; - mas3 |= (mas3 & E500_TLB_SUPER_PERM_MASK) << 1; - } - mas3 |= E500_TLB_SUPER_PERM_MASK; -#endif - return mas3; -} - -static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode) -{ -#ifdef CONFIG_SMP - return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M; -#else - return mas2 & MAS2_ATTRIB_MASK; -#endif -} - -/* - * writing shadow tlb entry to host TLB - */ -static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe, - uint32_t mas0) -{ - unsigned long flags; - - local_irq_save(flags); - mtspr(SPRN_MAS0, mas0); - mtspr(SPRN_MAS1, stlbe->mas1); - mtspr(SPRN_MAS2, (unsigned long)stlbe->mas2); - mtspr(SPRN_MAS3, (u32)stlbe->mas7_3); - mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32)); -#ifdef CONFIG_KVM_BOOKE_HV - mtspr(SPRN_MAS8, stlbe->mas8); -#endif - asm volatile("isync; tlbwe" : : : "memory"); - -#ifdef CONFIG_KVM_BOOKE_HV - /* Must clear mas8 for other host tlbwe's */ - mtspr(SPRN_MAS8, 0); - isync(); -#endif - local_irq_restore(flags); - - trace_kvm_booke206_stlb_write(mas0, stlbe->mas8, stlbe->mas1, - stlbe->mas2, stlbe->mas7_3); -} - -/* - * Acquire a mas0 with victim hint, as if we just took a TLB miss. - * - * We don't care about the address we're searching for, other than that it's - * in the right set and is not present in the TLB. Using a zero PID and a - * userspace address means we don't have to set and then restore MAS5, or - * calculate a proper MAS6 value. - */ -static u32 get_host_mas0(unsigned long eaddr) -{ - unsigned long flags; - u32 mas0; - - local_irq_save(flags); - mtspr(SPRN_MAS6, 0); - asm volatile("tlbsx 0, %0" : : "b" (eaddr & ~CONFIG_PAGE_OFFSET)); - mas0 = mfspr(SPRN_MAS0); - local_irq_restore(flags); - - return mas0; -} - -/* sesel is for tlb1 only */ -static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500, - int tlbsel, int sesel, struct kvm_book3e_206_tlb_entry *stlbe) -{ - u32 mas0; - - if (tlbsel == 0) { - mas0 = get_host_mas0(stlbe->mas2); - __write_host_tlbe(stlbe, mas0); - } else { - __write_host_tlbe(stlbe, - MAS0_TLBSEL(1) | - MAS0_ESEL(to_htlb1_esel(sesel))); - } -} - -#ifdef CONFIG_KVM_E500V2 -void kvmppc_map_magic(struct kvm_vcpu *vcpu) -{ - struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - struct kvm_book3e_206_tlb_entry magic; - ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK; - unsigned int stid; - pfn_t pfn; - - pfn = (pfn_t)virt_to_phys((void *)shared_page) >> PAGE_SHIFT; - get_page(pfn_to_page(pfn)); - - preempt_disable(); - stid = kvmppc_e500_get_sid(vcpu_e500, 0, 0, 0, 0); - - magic.mas1 = MAS1_VALID | MAS1_TS | MAS1_TID(stid) | - MAS1_TSIZE(BOOK3E_PAGESZ_4K); - magic.mas2 = vcpu->arch.magic_page_ea | MAS2_M; - magic.mas7_3 = ((u64)pfn << PAGE_SHIFT) | - MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR; - magic.mas8 = 0; - - __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index)); - preempt_enable(); -} -#endif - -static void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, - int tlbsel, int esel) -{ - struct kvm_book3e_206_tlb_entry *gtlbe = - get_entry(vcpu_e500, tlbsel, esel); - - if (tlbsel == 1 && - vcpu_e500->gtlb_priv[1][esel].ref.flags & E500_TLB_BITMAP) { - u64 tmp = vcpu_e500->g2h_tlb1_map[esel]; - int hw_tlb_indx; - unsigned long flags; - - local_irq_save(flags); - while (tmp) { - hw_tlb_indx = __ilog2_u64(tmp & -tmp); - mtspr(SPRN_MAS0, - MAS0_TLBSEL(1) | - MAS0_ESEL(to_htlb1_esel(hw_tlb_indx))); - mtspr(SPRN_MAS1, 0); - asm volatile("tlbwe"); - vcpu_e500->h2g_tlb1_rmap[hw_tlb_indx] = 0; - tmp &= tmp - 1; - } - mb(); - vcpu_e500->g2h_tlb1_map[esel] = 0; - vcpu_e500->gtlb_priv[1][esel].ref.flags &= ~E500_TLB_BITMAP; - local_irq_restore(flags); - - return; - } - - /* Guest tlbe is backed by at most one host tlbe per shadow pid. */ - kvmppc_e500_tlbil_one(vcpu_e500, gtlbe); -} - static int tlb0_set_base(gva_t addr, int sets, int ways) { int set_base; @@ -296,70 +126,6 @@ static int kvmppc_e500_tlb_index(struct kvmppc_vcpu_e500 *vcpu_e500, return -1; } -static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, - struct kvm_book3e_206_tlb_entry *gtlbe, - pfn_t pfn) -{ - ref->pfn = pfn; - ref->flags = E500_TLB_VALID; - - if (tlbe_is_writable(gtlbe)) - kvm_set_pfn_dirty(pfn); -} - -static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref) -{ - if (ref->flags & E500_TLB_VALID) { - trace_kvm_booke206_ref_release(ref->pfn, ref->flags); - ref->flags = 0; - } -} - -static void clear_tlb1_bitmap(struct kvmppc_vcpu_e500 *vcpu_e500) -{ - if (vcpu_e500->g2h_tlb1_map) - memset(vcpu_e500->g2h_tlb1_map, 0, - sizeof(u64) * vcpu_e500->gtlb_params[1].entries); - if (vcpu_e500->h2g_tlb1_rmap) - memset(vcpu_e500->h2g_tlb1_rmap, 0, - sizeof(unsigned int) * host_tlb_params[1].entries); -} - -static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500) -{ - int tlbsel = 0; - int i; - - for (i = 0; i < vcpu_e500->gtlb_params[tlbsel].entries; i++) { - struct tlbe_ref *ref = - &vcpu_e500->gtlb_priv[tlbsel][i].ref; - kvmppc_e500_ref_release(ref); - } -} - -static void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500) -{ - int stlbsel = 1; - int i; - - kvmppc_e500_tlbil_all(vcpu_e500); - - for (i = 0; i < host_tlb_params[stlbsel].entries; i++) { - struct tlbe_ref *ref = - &vcpu_e500->tlb_refs[stlbsel][i]; - kvmppc_e500_ref_release(ref); - } - - clear_tlb_privs(vcpu_e500); -} - -void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu) -{ - struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - clear_tlb_refs(vcpu_e500); - clear_tlb1_bitmap(vcpu_e500); -} - static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu, unsigned int eaddr, int as) { @@ -385,216 +151,6 @@ static inline void kvmppc_e500_deliver_tlb_miss(struct kvm_vcpu *vcpu, | (as ? MAS6_SAS : 0); } -/* TID must be supplied by the caller */ -static inline void kvmppc_e500_setup_stlbe( - struct kvm_vcpu *vcpu, - struct kvm_book3e_206_tlb_entry *gtlbe, - int tsize, struct tlbe_ref *ref, u64 gvaddr, - struct kvm_book3e_206_tlb_entry *stlbe) -{ - pfn_t pfn = ref->pfn; - u32 pr = vcpu->arch.shared->msr & MSR_PR; - - BUG_ON(!(ref->flags & E500_TLB_VALID)); - - /* Force IPROT=0 for all guest mappings. */ - stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID; - stlbe->mas2 = (gvaddr & MAS2_EPN) | - e500_shadow_mas2_attrib(gtlbe->mas2, pr); - stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) | - e500_shadow_mas3_attrib(gtlbe->mas7_3, pr); - -#ifdef CONFIG_KVM_BOOKE_HV - stlbe->mas8 = MAS8_TGS | vcpu->kvm->arch.lpid; -#endif -} - -static inline void kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, - u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe, - int tlbsel, struct kvm_book3e_206_tlb_entry *stlbe, - struct tlbe_ref *ref) -{ - struct kvm_memory_slot *slot; - unsigned long pfn = 0; /* silence GCC warning */ - unsigned long hva; - int pfnmap = 0; - int tsize = BOOK3E_PAGESZ_4K; - - /* - * Translate guest physical to true physical, acquiring - * a page reference if it is normal, non-reserved memory. - * - * gfn_to_memslot() must succeed because otherwise we wouldn't - * have gotten this far. Eventually we should just pass the slot - * pointer through from the first lookup. - */ - slot = gfn_to_memslot(vcpu_e500->vcpu.kvm, gfn); - hva = gfn_to_hva_memslot(slot, gfn); - - if (tlbsel == 1) { - struct vm_area_struct *vma; - down_read(¤t->mm->mmap_sem); - - vma = find_vma(current->mm, hva); - if (vma && hva >= vma->vm_start && - (vma->vm_flags & VM_PFNMAP)) { - /* - * This VMA is a physically contiguous region (e.g. - * /dev/mem) that bypasses normal Linux page - * management. Find the overlap between the - * vma and the memslot. - */ - - unsigned long start, end; - unsigned long slot_start, slot_end; - - pfnmap = 1; - - start = vma->vm_pgoff; - end = start + - ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); - - pfn = start + ((hva - vma->vm_start) >> PAGE_SHIFT); - - slot_start = pfn - (gfn - slot->base_gfn); - slot_end = slot_start + slot->npages; - - if (start < slot_start) - start = slot_start; - if (end > slot_end) - end = slot_end; - - tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >> - MAS1_TSIZE_SHIFT; - - /* - * e500 doesn't implement the lowest tsize bit, - * or 1K pages. - */ - tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1); - - /* - * Now find the largest tsize (up to what the guest - * requested) that will cover gfn, stay within the - * range, and for which gfn and pfn are mutually - * aligned. - */ - - for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) { - unsigned long gfn_start, gfn_end, tsize_pages; - tsize_pages = 1 << (tsize - 2); - - gfn_start = gfn & ~(tsize_pages - 1); - gfn_end = gfn_start + tsize_pages; - - if (gfn_start + pfn - gfn < start) - continue; - if (gfn_end + pfn - gfn > end) - continue; - if ((gfn & (tsize_pages - 1)) != - (pfn & (tsize_pages - 1))) - continue; - - gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1); - pfn &= ~(tsize_pages - 1); - break; - } - } else if (vma && hva >= vma->vm_start && - (vma->vm_flags & VM_HUGETLB)) { - unsigned long psize = vma_kernel_pagesize(vma); - - tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >> - MAS1_TSIZE_SHIFT; - - /* - * Take the largest page size that satisfies both host - * and guest mapping - */ - tsize = min(__ilog2(psize) - 10, tsize); - - /* - * e500 doesn't implement the lowest tsize bit, - * or 1K pages. - */ - tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1); - } - - up_read(¤t->mm->mmap_sem); - } - - if (likely(!pfnmap)) { - unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT); - pfn = gfn_to_pfn_memslot(slot, gfn); - if (is_error_noslot_pfn(pfn)) { - printk(KERN_ERR "Couldn't get real page for gfn %lx!\n", - (long)gfn); - return; - } - - /* Align guest and physical address to page map boundaries */ - pfn &= ~(tsize_pages - 1); - gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1); - } - - /* Drop old ref and setup new one. */ - kvmppc_e500_ref_release(ref); - kvmppc_e500_ref_setup(ref, gtlbe, pfn); - - kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize, - ref, gvaddr, stlbe); - - /* Clear i-cache for new pages */ - kvmppc_mmu_flush_icache(pfn); - - /* Drop refcount on page, so that mmu notifiers can clear it */ - kvm_release_pfn_clean(pfn); -} - -/* XXX only map the one-one case, for now use TLB0 */ -static void kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500, - int esel, - struct kvm_book3e_206_tlb_entry *stlbe) -{ - struct kvm_book3e_206_tlb_entry *gtlbe; - struct tlbe_ref *ref; - - gtlbe = get_entry(vcpu_e500, 0, esel); - ref = &vcpu_e500->gtlb_priv[0][esel].ref; - - kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe), - get_tlb_raddr(gtlbe) >> PAGE_SHIFT, - gtlbe, 0, stlbe, ref); -} - -/* Caller must ensure that the specified guest TLB entry is safe to insert into - * the shadow TLB. */ -/* XXX for both one-one and one-to-many , for now use TLB1 */ -static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, - u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe, - struct kvm_book3e_206_tlb_entry *stlbe, int esel) -{ - struct tlbe_ref *ref; - unsigned int victim; - - victim = vcpu_e500->host_tlb1_nv++; - - if (unlikely(vcpu_e500->host_tlb1_nv >= tlb1_max_shadow_size())) - vcpu_e500->host_tlb1_nv = 0; - - ref = &vcpu_e500->tlb_refs[1][victim]; - kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, stlbe, ref); - - vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << victim; - vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP; - if (vcpu_e500->h2g_tlb1_rmap[victim]) { - unsigned int idx = vcpu_e500->h2g_tlb1_rmap[victim]; - vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << victim); - } - vcpu_e500->h2g_tlb1_rmap[victim] = esel; - - return victim; -} - static void kvmppc_recalc_tlb1map_range(struct kvmppc_vcpu_e500 *vcpu_e500) { int size = vcpu_e500->gtlb_params[1].entries; @@ -683,8 +239,8 @@ int kvmppc_e500_emul_mt_mmucsr0(struct kvmppc_vcpu_e500 *vcpu_e500, ulong value) for (esel = 0; esel < vcpu_e500->gtlb_params[1].entries; esel++) kvmppc_e500_gtlbe_invalidate(vcpu_e500, 1, esel); - /* Invalidate all vcpu id mappings */ - kvmppc_e500_tlbil_all(vcpu_e500); + /* Invalidate all host shadow mappings */ + kvmppc_core_flush_tlb(&vcpu_e500->vcpu); return EMULATE_DONE; } @@ -713,8 +269,8 @@ int kvmppc_e500_emul_tlbivax(struct kvm_vcpu *vcpu, gva_t ea) kvmppc_e500_gtlbe_invalidate(vcpu_e500, tlbsel, esel); } - /* Invalidate all vcpu id mappings */ - kvmppc_e500_tlbil_all(vcpu_e500); + /* Invalidate all host shadow mappings */ + kvmppc_core_flush_tlb(&vcpu_e500->vcpu); return EMULATE_DONE; } @@ -834,27 +390,11 @@ int kvmppc_e500_emul_tlbsx(struct kvm_vcpu *vcpu, gva_t ea) return EMULATE_DONE; } -/* sesel is for tlb1 only */ -static void write_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500, - struct kvm_book3e_206_tlb_entry *gtlbe, - struct kvm_book3e_206_tlb_entry *stlbe, - int stlbsel, int sesel) -{ - int stid; - - preempt_disable(); - stid = kvmppc_e500_get_tlb_stid(&vcpu_e500->vcpu, gtlbe); - - stlbe->mas1 |= MAS1_TID(stid); - write_host_tlbe(vcpu_e500, stlbsel, sesel, stlbe); - preempt_enable(); -} - int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - struct kvm_book3e_206_tlb_entry *gtlbe, stlbe; - int tlbsel, esel, stlbsel, sesel; + struct kvm_book3e_206_tlb_entry *gtlbe; + int tlbsel, esel; int recal = 0; tlbsel = get_tlb_tlbsel(vcpu); @@ -892,40 +432,16 @@ int kvmppc_e500_emul_tlbwe(struct kvm_vcpu *vcpu) /* Invalidate shadow mappings for the about-to-be-clobbered TLBE. */ if (tlbe_is_host_safe(vcpu, gtlbe)) { - u64 eaddr; - u64 raddr; + u64 eaddr = get_tlb_eaddr(gtlbe); + u64 raddr = get_tlb_raddr(gtlbe); - switch (tlbsel) { - case 0: - /* TLB0 */ + if (tlbsel == 0) { gtlbe->mas1 &= ~MAS1_TSIZE(~0); gtlbe->mas1 |= MAS1_TSIZE(BOOK3E_PAGESZ_4K); - - stlbsel = 0; - kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe); - sesel = 0; /* unused */ - - break; - - case 1: - /* TLB1 */ - eaddr = get_tlb_eaddr(gtlbe); - raddr = get_tlb_raddr(gtlbe); - - /* Create a 4KB mapping on the host. - * If the guest wanted a large page, - * only the first 4KB is mapped here and the rest - * are mapped on the fly. */ - stlbsel = 1; - sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, - raddr >> PAGE_SHIFT, gtlbe, &stlbe, esel); - break; - - default: - BUG(); } - write_stlbe(vcpu_e500, gtlbe, &stlbe, stlbsel, sesel); + /* Premap the faulting page */ + kvmppc_mmu_map(vcpu, eaddr, raddr, index_of(tlbsel, esel)); } kvmppc_set_exit_type(vcpu, EMULATED_TLBWE_EXITS); @@ -1019,100 +535,14 @@ void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) { } -void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, - unsigned int index) -{ - struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); - struct tlbe_priv *priv; - struct kvm_book3e_206_tlb_entry *gtlbe, stlbe; - int tlbsel = tlbsel_of(index); - int esel = esel_of(index); - int stlbsel, sesel; - - gtlbe = get_entry(vcpu_e500, tlbsel, esel); - - switch (tlbsel) { - case 0: - stlbsel = 0; - sesel = 0; /* unused */ - priv = &vcpu_e500->gtlb_priv[tlbsel][esel]; - - /* Only triggers after clear_tlb_refs */ - if (unlikely(!(priv->ref.flags & E500_TLB_VALID))) - kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe); - else - kvmppc_e500_setup_stlbe(vcpu, gtlbe, BOOK3E_PAGESZ_4K, - &priv->ref, eaddr, &stlbe); - break; - - case 1: { - gfn_t gfn = gpaddr >> PAGE_SHIFT; - - stlbsel = 1; - sesel = kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn, - gtlbe, &stlbe, esel); - break; - } - - default: - BUG(); - break; - } - - write_stlbe(vcpu_e500, gtlbe, &stlbe, stlbsel, sesel); -} - -/************* MMU Notifiers *************/ - -int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) -{ - trace_kvm_unmap_hva(hva); - - /* - * Flush all shadow tlb entries everywhere. This is slow, but - * we are 100% sure that we catch the to be unmapped page - */ - kvm_flush_remote_tlbs(kvm); - - return 0; -} - -int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) -{ - /* kvm_unmap_hva flushes everything anyways */ - kvm_unmap_hva(kvm, start); - - return 0; -} - -int kvm_age_hva(struct kvm *kvm, unsigned long hva) -{ - /* XXX could be more clever ;) */ - return 0; -} - -int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) -{ - /* XXX could be more clever ;) */ - return 0; -} - -void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) -{ - /* The page will get remapped properly on its next fault */ - kvm_unmap_hva(kvm, hva); -} - /*****************************************/ static void free_gtlb(struct kvmppc_vcpu_e500 *vcpu_e500) { int i; - clear_tlb1_bitmap(vcpu_e500); + kvmppc_core_flush_tlb(&vcpu_e500->vcpu); kfree(vcpu_e500->g2h_tlb1_map); - - clear_tlb_refs(vcpu_e500); kfree(vcpu_e500->gtlb_priv[0]); kfree(vcpu_e500->gtlb_priv[1]); @@ -1303,7 +733,7 @@ int kvm_vcpu_ioctl_dirty_tlb(struct kvm_vcpu *vcpu, { struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); kvmppc_recalc_tlb1map_range(vcpu_e500); - clear_tlb_refs(vcpu_e500); + kvmppc_core_flush_tlb(vcpu); return 0; } @@ -1313,37 +743,8 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) int entry_size = sizeof(struct kvm_book3e_206_tlb_entry); int entries = KVM_E500_TLB0_SIZE + KVM_E500_TLB1_SIZE; - host_tlb_params[0].entries = mfspr(SPRN_TLB0CFG) & TLBnCFG_N_ENTRY; - host_tlb_params[1].entries = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; - - /* - * This should never happen on real e500 hardware, but is - * architecturally possible -- e.g. in some weird nested - * virtualization case. - */ - if (host_tlb_params[0].entries == 0 || - host_tlb_params[1].entries == 0) { - pr_err("%s: need to know host tlb size\n", __func__); - return -ENODEV; - } - - host_tlb_params[0].ways = (mfspr(SPRN_TLB0CFG) & TLBnCFG_ASSOC) >> - TLBnCFG_ASSOC_SHIFT; - host_tlb_params[1].ways = host_tlb_params[1].entries; - - if (!is_power_of_2(host_tlb_params[0].entries) || - !is_power_of_2(host_tlb_params[0].ways) || - host_tlb_params[0].entries < host_tlb_params[0].ways || - host_tlb_params[0].ways == 0) { - pr_err("%s: bad tlb0 host config: %u entries %u ways\n", - __func__, host_tlb_params[0].entries, - host_tlb_params[0].ways); - return -ENODEV; - } - - host_tlb_params[0].sets = - host_tlb_params[0].entries / host_tlb_params[0].ways; - host_tlb_params[1].sets = 1; + if (e500_mmu_host_init(vcpu_e500)) + goto err; vcpu_e500->gtlb_params[0].entries = KVM_E500_TLB0_SIZE; vcpu_e500->gtlb_params[1].entries = KVM_E500_TLB1_SIZE; @@ -1362,18 +763,6 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) vcpu_e500->gtlb_offset[0] = 0; vcpu_e500->gtlb_offset[1] = KVM_E500_TLB0_SIZE; - vcpu_e500->tlb_refs[0] = - kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[0].entries, - GFP_KERNEL); - if (!vcpu_e500->tlb_refs[0]) - goto err; - - vcpu_e500->tlb_refs[1] = - kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[1].entries, - GFP_KERNEL); - if (!vcpu_e500->tlb_refs[1]) - goto err; - vcpu_e500->gtlb_priv[0] = kzalloc(sizeof(struct tlbe_ref) * vcpu_e500->gtlb_params[0].entries, GFP_KERNEL); @@ -1392,12 +781,6 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) if (!vcpu_e500->g2h_tlb1_map) goto err; - vcpu_e500->h2g_tlb1_rmap = kzalloc(sizeof(unsigned int) * - host_tlb_params[1].entries, - GFP_KERNEL); - if (!vcpu_e500->h2g_tlb1_rmap) - goto err; - /* Init TLB configuration register */ vcpu->arch.tlbcfg[0] = mfspr(SPRN_TLB0CFG) & ~(TLBnCFG_N_ENTRY | TLBnCFG_ASSOC); @@ -1416,15 +799,11 @@ int kvmppc_e500_tlb_init(struct kvmppc_vcpu_e500 *vcpu_e500) err: free_gtlb(vcpu_e500); - kfree(vcpu_e500->tlb_refs[0]); - kfree(vcpu_e500->tlb_refs[1]); return -1; } void kvmppc_e500_tlb_uninit(struct kvmppc_vcpu_e500 *vcpu_e500) { free_gtlb(vcpu_e500); - kfree(vcpu_e500->h2g_tlb1_rmap); - kfree(vcpu_e500->tlb_refs[0]); - kfree(vcpu_e500->tlb_refs[1]); + e500_mmu_host_uninit(vcpu_e500); } diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c new file mode 100644 index 0000000..a222edf --- /dev/null +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -0,0 +1,699 @@ +/* + * Copyright (C) 2008-2013 Freescale Semiconductor, Inc. All rights reserved. + * + * Author: Yu Liu, yu.liu@freescale.com + * Scott Wood, scottwood@freescale.com + * Ashish Kalra, ashish.kalra@freescale.com + * Varun Sethi, varun.sethi@freescale.com + * Alexander Graf, agraf@suse.de + * + * Description: + * This file is based on arch/powerpc/kvm/44x_tlb.c, + * by Hollis Blanchard <hollisb@us.ibm.com>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/highmem.h> +#include <linux/log2.h> +#include <linux/uaccess.h> +#include <linux/sched.h> +#include <linux/rwsem.h> +#include <linux/vmalloc.h> +#include <linux/hugetlb.h> +#include <asm/kvm_ppc.h> + +#include "e500.h" +#include "trace.h" +#include "timing.h" +#include "e500_mmu_host.h" + +#define to_htlb1_esel(esel) (host_tlb_params[1].entries - (esel) - 1) + +static struct kvmppc_e500_tlb_params host_tlb_params[E500_TLB_NUM]; + +static inline unsigned int tlb1_max_shadow_size(void) +{ + /* reserve one entry for magic page */ + return host_tlb_params[1].entries - tlbcam_index - 1; +} + +static inline u32 e500_shadow_mas3_attrib(u32 mas3, int usermode) +{ + /* Mask off reserved bits. */ + mas3 &= MAS3_ATTRIB_MASK; + +#ifndef CONFIG_KVM_BOOKE_HV + if (!usermode) { + /* Guest is in supervisor mode, + * so we need to translate guest + * supervisor permissions into user permissions. */ + mas3 &= ~E500_TLB_USER_PERM_MASK; + mas3 |= (mas3 & E500_TLB_SUPER_PERM_MASK) << 1; + } + mas3 |= E500_TLB_SUPER_PERM_MASK; +#endif + return mas3; +} + +static inline u32 e500_shadow_mas2_attrib(u32 mas2, int usermode) +{ +#ifdef CONFIG_SMP + return (mas2 & MAS2_ATTRIB_MASK) | MAS2_M; +#else + return mas2 & MAS2_ATTRIB_MASK; +#endif +} + +/* + * writing shadow tlb entry to host TLB + */ +static inline void __write_host_tlbe(struct kvm_book3e_206_tlb_entry *stlbe, + uint32_t mas0) +{ + unsigned long flags; + + local_irq_save(flags); + mtspr(SPRN_MAS0, mas0); + mtspr(SPRN_MAS1, stlbe->mas1); + mtspr(SPRN_MAS2, (unsigned long)stlbe->mas2); + mtspr(SPRN_MAS3, (u32)stlbe->mas7_3); + mtspr(SPRN_MAS7, (u32)(stlbe->mas7_3 >> 32)); +#ifdef CONFIG_KVM_BOOKE_HV + mtspr(SPRN_MAS8, stlbe->mas8); +#endif + asm volatile("isync; tlbwe" : : : "memory"); + +#ifdef CONFIG_KVM_BOOKE_HV + /* Must clear mas8 for other host tlbwe's */ + mtspr(SPRN_MAS8, 0); + isync(); +#endif + local_irq_restore(flags); + + trace_kvm_booke206_stlb_write(mas0, stlbe->mas8, stlbe->mas1, + stlbe->mas2, stlbe->mas7_3); +} + +/* + * Acquire a mas0 with victim hint, as if we just took a TLB miss. + * + * We don't care about the address we're searching for, other than that it's + * in the right set and is not present in the TLB. Using a zero PID and a + * userspace address means we don't have to set and then restore MAS5, or + * calculate a proper MAS6 value. + */ +static u32 get_host_mas0(unsigned long eaddr) +{ + unsigned long flags; + u32 mas0; + + local_irq_save(flags); + mtspr(SPRN_MAS6, 0); + asm volatile("tlbsx 0, %0" : : "b" (eaddr & ~CONFIG_PAGE_OFFSET)); + mas0 = mfspr(SPRN_MAS0); + local_irq_restore(flags); + + return mas0; +} + +/* sesel is for tlb1 only */ +static inline void write_host_tlbe(struct kvmppc_vcpu_e500 *vcpu_e500, + int tlbsel, int sesel, struct kvm_book3e_206_tlb_entry *stlbe) +{ + u32 mas0; + + if (tlbsel == 0) { + mas0 = get_host_mas0(stlbe->mas2); + __write_host_tlbe(stlbe, mas0); + } else { + __write_host_tlbe(stlbe, + MAS0_TLBSEL(1) | + MAS0_ESEL(to_htlb1_esel(sesel))); + } +} + +/* sesel is for tlb1 only */ +static void write_stlbe(struct kvmppc_vcpu_e500 *vcpu_e500, + struct kvm_book3e_206_tlb_entry *gtlbe, + struct kvm_book3e_206_tlb_entry *stlbe, + int stlbsel, int sesel) +{ + int stid; + + preempt_disable(); + stid = kvmppc_e500_get_tlb_stid(&vcpu_e500->vcpu, gtlbe); + + stlbe->mas1 |= MAS1_TID(stid); + write_host_tlbe(vcpu_e500, stlbsel, sesel, stlbe); + preempt_enable(); +} + +#ifdef CONFIG_KVM_E500V2 +/* XXX should be a hook in the gva2hpa translation */ +void kvmppc_map_magic(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + struct kvm_book3e_206_tlb_entry magic; + ulong shared_page = ((ulong)vcpu->arch.shared) & PAGE_MASK; + unsigned int stid; + pfn_t pfn; + + pfn = (pfn_t)virt_to_phys((void *)shared_page) >> PAGE_SHIFT; + get_page(pfn_to_page(pfn)); + + preempt_disable(); + stid = kvmppc_e500_get_sid(vcpu_e500, 0, 0, 0, 0); + + magic.mas1 = MAS1_VALID | MAS1_TS | MAS1_TID(stid) | + MAS1_TSIZE(BOOK3E_PAGESZ_4K); + magic.mas2 = vcpu->arch.magic_page_ea | MAS2_M; + magic.mas7_3 = ((u64)pfn << PAGE_SHIFT) | + MAS3_SW | MAS3_SR | MAS3_UW | MAS3_UR; + magic.mas8 = 0; + + __write_host_tlbe(&magic, MAS0_TLBSEL(1) | MAS0_ESEL(tlbcam_index)); + preempt_enable(); +} +#endif + +void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, + int esel) +{ + struct kvm_book3e_206_tlb_entry *gtlbe = + get_entry(vcpu_e500, tlbsel, esel); + struct tlbe_ref *ref = &vcpu_e500->gtlb_priv[tlbsel][esel].ref; + + /* Don't bother with unmapped entries */ + if (!(ref->flags & E500_TLB_VALID)) + return; + + if (tlbsel == 1 && ref->flags & E500_TLB_BITMAP) { + u64 tmp = vcpu_e500->g2h_tlb1_map[esel]; + int hw_tlb_indx; + unsigned long flags; + + local_irq_save(flags); + while (tmp) { + hw_tlb_indx = __ilog2_u64(tmp & -tmp); + mtspr(SPRN_MAS0, + MAS0_TLBSEL(1) | + MAS0_ESEL(to_htlb1_esel(hw_tlb_indx))); + mtspr(SPRN_MAS1, 0); + asm volatile("tlbwe"); + vcpu_e500->h2g_tlb1_rmap[hw_tlb_indx] = 0; + tmp &= tmp - 1; + } + mb(); + vcpu_e500->g2h_tlb1_map[esel] = 0; + ref->flags &= ~(E500_TLB_BITMAP | E500_TLB_VALID); + local_irq_restore(flags); + } + + if (tlbsel == 1 && ref->flags & E500_TLB_TLB0) { + /* + * TLB1 entry is backed by 4k pages. This should happen + * rarely and is not worth optimizing. Invalidate everything. + */ + kvmppc_e500_tlbil_all(vcpu_e500); + ref->flags &= ~(E500_TLB_TLB0 | E500_TLB_VALID); + } + + /* Already invalidated in between */ + if (!(ref->flags & E500_TLB_VALID)) + return; + + /* Guest tlbe is backed by at most one host tlbe per shadow pid. */ + kvmppc_e500_tlbil_one(vcpu_e500, gtlbe); + + /* Mark the TLB as not backed by the host anymore */ + ref->flags &= ~E500_TLB_VALID; +} + +static inline int tlbe_is_writable(struct kvm_book3e_206_tlb_entry *tlbe) +{ + return tlbe->mas7_3 & (MAS3_SW|MAS3_UW); +} + +static inline void kvmppc_e500_ref_setup(struct tlbe_ref *ref, + struct kvm_book3e_206_tlb_entry *gtlbe, + pfn_t pfn) +{ + ref->pfn = pfn; + ref->flags = E500_TLB_VALID; + + if (tlbe_is_writable(gtlbe)) + kvm_set_pfn_dirty(pfn); +} + +static inline void kvmppc_e500_ref_release(struct tlbe_ref *ref) +{ + if (ref->flags & E500_TLB_VALID) { + trace_kvm_booke206_ref_release(ref->pfn, ref->flags); + ref->flags = 0; + } +} + +static void clear_tlb1_bitmap(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + if (vcpu_e500->g2h_tlb1_map) + memset(vcpu_e500->g2h_tlb1_map, 0, + sizeof(u64) * vcpu_e500->gtlb_params[1].entries); + if (vcpu_e500->h2g_tlb1_rmap) + memset(vcpu_e500->h2g_tlb1_rmap, 0, + sizeof(unsigned int) * host_tlb_params[1].entries); +} + +static void clear_tlb_privs(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + int tlbsel = 0; + int i; + + for (i = 0; i < vcpu_e500->gtlb_params[tlbsel].entries; i++) { + struct tlbe_ref *ref = + &vcpu_e500->gtlb_priv[tlbsel][i].ref; + kvmppc_e500_ref_release(ref); + } +} + +static void clear_tlb_refs(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + int stlbsel = 1; + int i; + + kvmppc_e500_tlbil_all(vcpu_e500); + + for (i = 0; i < host_tlb_params[stlbsel].entries; i++) { + struct tlbe_ref *ref = + &vcpu_e500->tlb_refs[stlbsel][i]; + kvmppc_e500_ref_release(ref); + } + + clear_tlb_privs(vcpu_e500); +} + +void kvmppc_core_flush_tlb(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + clear_tlb_refs(vcpu_e500); + clear_tlb1_bitmap(vcpu_e500); +} + +/* TID must be supplied by the caller */ +static void kvmppc_e500_setup_stlbe( + struct kvm_vcpu *vcpu, + struct kvm_book3e_206_tlb_entry *gtlbe, + int tsize, struct tlbe_ref *ref, u64 gvaddr, + struct kvm_book3e_206_tlb_entry *stlbe) +{ + pfn_t pfn = ref->pfn; + u32 pr = vcpu->arch.shared->msr & MSR_PR; + + BUG_ON(!(ref->flags & E500_TLB_VALID)); + + /* Force IPROT=0 for all guest mappings. */ + stlbe->mas1 = MAS1_TSIZE(tsize) | get_tlb_sts(gtlbe) | MAS1_VALID; + stlbe->mas2 = (gvaddr & MAS2_EPN) | + e500_shadow_mas2_attrib(gtlbe->mas2, pr); + stlbe->mas7_3 = ((u64)pfn << PAGE_SHIFT) | + e500_shadow_mas3_attrib(gtlbe->mas7_3, pr); + +#ifdef CONFIG_KVM_BOOKE_HV + stlbe->mas8 = MAS8_TGS | vcpu->kvm->arch.lpid; +#endif +} + +static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, + u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe, + int tlbsel, struct kvm_book3e_206_tlb_entry *stlbe, + struct tlbe_ref *ref) +{ + struct kvm_memory_slot *slot; + unsigned long pfn = 0; /* silence GCC warning */ + unsigned long hva; + int pfnmap = 0; + int tsize = BOOK3E_PAGESZ_4K; + + /* + * Translate guest physical to true physical, acquiring + * a page reference if it is normal, non-reserved memory. + * + * gfn_to_memslot() must succeed because otherwise we wouldn't + * have gotten this far. Eventually we should just pass the slot + * pointer through from the first lookup. + */ + slot = gfn_to_memslot(vcpu_e500->vcpu.kvm, gfn); + hva = gfn_to_hva_memslot(slot, gfn); + + if (tlbsel == 1) { + struct vm_area_struct *vma; + down_read(¤t->mm->mmap_sem); + + vma = find_vma(current->mm, hva); + if (vma && hva >= vma->vm_start && + (vma->vm_flags & VM_PFNMAP)) { + /* + * This VMA is a physically contiguous region (e.g. + * /dev/mem) that bypasses normal Linux page + * management. Find the overlap between the + * vma and the memslot. + */ + + unsigned long start, end; + unsigned long slot_start, slot_end; + + pfnmap = 1; + + start = vma->vm_pgoff; + end = start + + ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT); + + pfn = start + ((hva - vma->vm_start) >> PAGE_SHIFT); + + slot_start = pfn - (gfn - slot->base_gfn); + slot_end = slot_start + slot->npages; + + if (start < slot_start) + start = slot_start; + if (end > slot_end) + end = slot_end; + + tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >> + MAS1_TSIZE_SHIFT; + + /* + * e500 doesn't implement the lowest tsize bit, + * or 1K pages. + */ + tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1); + + /* + * Now find the largest tsize (up to what the guest + * requested) that will cover gfn, stay within the + * range, and for which gfn and pfn are mutually + * aligned. + */ + + for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) { + unsigned long gfn_start, gfn_end, tsize_pages; + tsize_pages = 1 << (tsize - 2); + + gfn_start = gfn & ~(tsize_pages - 1); + gfn_end = gfn_start + tsize_pages; + + if (gfn_start + pfn - gfn < start) + continue; + if (gfn_end + pfn - gfn > end) + continue; + if ((gfn & (tsize_pages - 1)) != + (pfn & (tsize_pages - 1))) + continue; + + gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1); + pfn &= ~(tsize_pages - 1); + break; + } + } else if (vma && hva >= vma->vm_start && + (vma->vm_flags & VM_HUGETLB)) { + unsigned long psize = vma_kernel_pagesize(vma); + + tsize = (gtlbe->mas1 & MAS1_TSIZE_MASK) >> + MAS1_TSIZE_SHIFT; + + /* + * Take the largest page size that satisfies both host + * and guest mapping + */ + tsize = min(__ilog2(psize) - 10, tsize); + + /* + * e500 doesn't implement the lowest tsize bit, + * or 1K pages. + */ + tsize = max(BOOK3E_PAGESZ_4K, tsize & ~1); + } + + up_read(¤t->mm->mmap_sem); + } + + if (likely(!pfnmap)) { + unsigned long tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT); + pfn = gfn_to_pfn_memslot(slot, gfn); + if (is_error_noslot_pfn(pfn)) { + printk(KERN_ERR "Couldn't get real page for gfn %lx!\n", + (long)gfn); + return -EINVAL; + } + + /* Align guest and physical address to page map boundaries */ + pfn &= ~(tsize_pages - 1); + gvaddr &= ~((tsize_pages << PAGE_SHIFT) - 1); + } + + /* Drop old ref and setup new one. */ + kvmppc_e500_ref_release(ref); + kvmppc_e500_ref_setup(ref, gtlbe, pfn); + + kvmppc_e500_setup_stlbe(&vcpu_e500->vcpu, gtlbe, tsize, + ref, gvaddr, stlbe); + + /* Clear i-cache for new pages */ + kvmppc_mmu_flush_icache(pfn); + + /* Drop refcount on page, so that mmu notifiers can clear it */ + kvm_release_pfn_clean(pfn); + + return 0; +} + +/* XXX only map the one-one case, for now use TLB0 */ +static int kvmppc_e500_tlb0_map(struct kvmppc_vcpu_e500 *vcpu_e500, int esel, + struct kvm_book3e_206_tlb_entry *stlbe) +{ + struct kvm_book3e_206_tlb_entry *gtlbe; + struct tlbe_ref *ref; + int stlbsel = 0; + int sesel = 0; + int r; + + gtlbe = get_entry(vcpu_e500, 0, esel); + ref = &vcpu_e500->gtlb_priv[0][esel].ref; + + r = kvmppc_e500_shadow_map(vcpu_e500, get_tlb_eaddr(gtlbe), + get_tlb_raddr(gtlbe) >> PAGE_SHIFT, + gtlbe, 0, stlbe, ref); + if (r) + return r; + + write_stlbe(vcpu_e500, gtlbe, stlbe, stlbsel, sesel); + + return 0; +} + +static int kvmppc_e500_tlb1_map_tlb1(struct kvmppc_vcpu_e500 *vcpu_e500, + struct tlbe_ref *ref, + int esel) +{ + unsigned int sesel = vcpu_e500->host_tlb1_nv++; + + if (unlikely(vcpu_e500->host_tlb1_nv >= tlb1_max_shadow_size())) + vcpu_e500->host_tlb1_nv = 0; + + vcpu_e500->tlb_refs[1][sesel] = *ref; + vcpu_e500->g2h_tlb1_map[esel] |= (u64)1 << sesel; + vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_BITMAP; + if (vcpu_e500->h2g_tlb1_rmap[sesel]) { + unsigned int idx = vcpu_e500->h2g_tlb1_rmap[sesel]; + vcpu_e500->g2h_tlb1_map[idx] &= ~(1ULL << sesel); + } + vcpu_e500->h2g_tlb1_rmap[sesel] = esel; + + return sesel; +} + +/* Caller must ensure that the specified guest TLB entry is safe to insert into + * the shadow TLB. */ +/* For both one-one and one-to-many */ +static int kvmppc_e500_tlb1_map(struct kvmppc_vcpu_e500 *vcpu_e500, + u64 gvaddr, gfn_t gfn, struct kvm_book3e_206_tlb_entry *gtlbe, + struct kvm_book3e_206_tlb_entry *stlbe, int esel) +{ + struct tlbe_ref ref; + int sesel; + int r; + + ref.flags = 0; + r = kvmppc_e500_shadow_map(vcpu_e500, gvaddr, gfn, gtlbe, 1, stlbe, + &ref); + if (r) + return r; + + /* Use TLB0 when we can only map a page with 4k */ + if (get_tlb_tsize(stlbe) == BOOK3E_PAGESZ_4K) { + vcpu_e500->gtlb_priv[1][esel].ref.flags |= E500_TLB_TLB0; + write_stlbe(vcpu_e500, gtlbe, stlbe, 0, 0); + return 0; + } + + /* Otherwise map into TLB1 */ + sesel = kvmppc_e500_tlb1_map_tlb1(vcpu_e500, &ref, esel); + write_stlbe(vcpu_e500, gtlbe, stlbe, 1, sesel); + + return 0; +} + +void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 eaddr, gpa_t gpaddr, + unsigned int index) +{ + struct kvmppc_vcpu_e500 *vcpu_e500 = to_e500(vcpu); + struct tlbe_priv *priv; + struct kvm_book3e_206_tlb_entry *gtlbe, stlbe; + int tlbsel = tlbsel_of(index); + int esel = esel_of(index); + + gtlbe = get_entry(vcpu_e500, tlbsel, esel); + + switch (tlbsel) { + case 0: + priv = &vcpu_e500->gtlb_priv[tlbsel][esel]; + + /* Triggers after clear_tlb_refs or on initial mapping */ + if (!(priv->ref.flags & E500_TLB_VALID)) { + kvmppc_e500_tlb0_map(vcpu_e500, esel, &stlbe); + } else { + kvmppc_e500_setup_stlbe(vcpu, gtlbe, BOOK3E_PAGESZ_4K, + &priv->ref, eaddr, &stlbe); + write_stlbe(vcpu_e500, gtlbe, &stlbe, 0, 0); + } + break; + + case 1: { + gfn_t gfn = gpaddr >> PAGE_SHIFT; + kvmppc_e500_tlb1_map(vcpu_e500, eaddr, gfn, gtlbe, &stlbe, + esel); + break; + } + + default: + BUG(); + break; + } +} + +/************* MMU Notifiers *************/ + +int kvm_unmap_hva(struct kvm *kvm, unsigned long hva) +{ + trace_kvm_unmap_hva(hva); + + /* + * Flush all shadow tlb entries everywhere. This is slow, but + * we are 100% sure that we catch the to be unmapped page + */ + kvm_flush_remote_tlbs(kvm); + + return 0; +} + +int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) +{ + /* kvm_unmap_hva flushes everything anyways */ + kvm_unmap_hva(kvm, start); + + return 0; +} + +int kvm_age_hva(struct kvm *kvm, unsigned long hva) +{ + /* XXX could be more clever ;) */ + return 0; +} + +int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) +{ + /* XXX could be more clever ;) */ + return 0; +} + +void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) +{ + /* The page will get remapped properly on its next fault */ + kvm_unmap_hva(kvm, hva); +} + +/*****************************************/ + +int e500_mmu_host_init(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + host_tlb_params[0].entries = mfspr(SPRN_TLB0CFG) & TLBnCFG_N_ENTRY; + host_tlb_params[1].entries = mfspr(SPRN_TLB1CFG) & TLBnCFG_N_ENTRY; + + /* + * This should never happen on real e500 hardware, but is + * architecturally possible -- e.g. in some weird nested + * virtualization case. + */ + if (host_tlb_params[0].entries == 0 || + host_tlb_params[1].entries == 0) { + pr_err("%s: need to know host tlb size\n", __func__); + return -ENODEV; + } + + host_tlb_params[0].ways = (mfspr(SPRN_TLB0CFG) & TLBnCFG_ASSOC) >> + TLBnCFG_ASSOC_SHIFT; + host_tlb_params[1].ways = host_tlb_params[1].entries; + + if (!is_power_of_2(host_tlb_params[0].entries) || + !is_power_of_2(host_tlb_params[0].ways) || + host_tlb_params[0].entries < host_tlb_params[0].ways || + host_tlb_params[0].ways == 0) { + pr_err("%s: bad tlb0 host config: %u entries %u ways\n", + __func__, host_tlb_params[0].entries, + host_tlb_params[0].ways); + return -ENODEV; + } + + host_tlb_params[0].sets = + host_tlb_params[0].entries / host_tlb_params[0].ways; + host_tlb_params[1].sets = 1; + + vcpu_e500->tlb_refs[0] = + kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[0].entries, + GFP_KERNEL); + if (!vcpu_e500->tlb_refs[0]) + goto err; + + vcpu_e500->tlb_refs[1] = + kzalloc(sizeof(struct tlbe_ref) * host_tlb_params[1].entries, + GFP_KERNEL); + if (!vcpu_e500->tlb_refs[1]) + goto err; + + vcpu_e500->h2g_tlb1_rmap = kzalloc(sizeof(unsigned int) * + host_tlb_params[1].entries, + GFP_KERNEL); + if (!vcpu_e500->h2g_tlb1_rmap) + goto err; + + return 0; + +err: + kfree(vcpu_e500->tlb_refs[0]); + kfree(vcpu_e500->tlb_refs[1]); + return -EINVAL; +} + +void e500_mmu_host_uninit(struct kvmppc_vcpu_e500 *vcpu_e500) +{ + kfree(vcpu_e500->h2g_tlb1_rmap); + kfree(vcpu_e500->tlb_refs[0]); + kfree(vcpu_e500->tlb_refs[1]); +} diff --git a/arch/powerpc/kvm/e500_mmu_host.h b/arch/powerpc/kvm/e500_mmu_host.h new file mode 100644 index 0000000..7624835 --- /dev/null +++ b/arch/powerpc/kvm/e500_mmu_host.h @@ -0,0 +1,18 @@ +/* + * Copyright (C) 2008-2013 Freescale Semiconductor, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + */ + +#ifndef KVM_E500_MMU_HOST_H +#define KVM_E500_MMU_HOST_H + +void inval_gtlbe_on_host(struct kvmppc_vcpu_e500 *vcpu_e500, int tlbsel, + int esel); + +int e500_mmu_host_init(struct kvmppc_vcpu_e500 *vcpu_e500); +void e500_mmu_host_uninit(struct kvmppc_vcpu_e500 *vcpu_e500); + +#endif /* KVM_E500_MMU_HOST_H */ diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index 9d9cddc..7a73b6f 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -150,8 +150,6 @@ static int kvmppc_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, int rs) case SPRN_TBWL: break; case SPRN_TBWU: break; - case SPRN_MSSSR0: break; - case SPRN_DEC: vcpu->arch.dec = spr_val; kvmppc_emulate_dec(vcpu); @@ -202,9 +200,6 @@ static int kvmppc_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, int rt) case SPRN_PIR: spr_val = vcpu->vcpu_id; break; - case SPRN_MSSSR0: - spr_val = 0; - break; /* Note: mftb and TBRL/TBWL are user-accessible, so * the guest can always access the real TB anyways. diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 70739a0..934413c 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -237,7 +237,8 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu) r = RESUME_HOST; break; default: - BUG(); + WARN_ON(1); + r = RESUME_GUEST; } return r; @@ -305,6 +306,7 @@ int kvm_dev_ioctl_check_extension(long ext) #ifdef CONFIG_BOOKE case KVM_CAP_PPC_BOOKE_SREGS: case KVM_CAP_PPC_BOOKE_WATCHDOG: + case KVM_CAP_PPC_EPR: #else case KVM_CAP_PPC_SEGSTATE: case KVM_CAP_PPC_HIOR: @@ -412,7 +414,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_memory_slot old, struct kvm_userspace_memory_region *mem, - int user_alloc) + bool user_alloc) { return kvmppc_core_prepare_memory_region(kvm, memslot, mem); } @@ -420,7 +422,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, struct kvm_memory_slot old, - int user_alloc) + bool user_alloc) { kvmppc_core_commit_memory_region(kvm, mem, old); } @@ -720,6 +722,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) for (i = 0; i < 9; ++i) kvmppc_set_gpr(vcpu, 4 + i, run->papr_hcall.args[i]); vcpu->arch.hcall_needed = 0; +#ifdef CONFIG_BOOKE + } else if (vcpu->arch.epr_needed) { + kvmppc_set_epr(vcpu, run->epr.epr); + vcpu->arch.epr_needed = 0; +#endif } r = kvmppc_vcpu_run(run, vcpu); @@ -761,6 +768,10 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, r = 0; vcpu->arch.papr_enabled = true; break; + case KVM_CAP_PPC_EPR: + r = 0; + vcpu->arch.epr_enabled = cap->args[0]; + break; #ifdef CONFIG_BOOKE case KVM_CAP_PPC_BOOKE_WATCHDOG: r = 0; |