From 210b1607012cc9034841a393e0591b2c86d9e26c Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 19 Sep 2013 16:26:18 +0200 Subject: KVM: s390: Removed SIE_INTERCEPT_UCONTROL The SIE_INTERCEPT_UCONTROL can be removed by moving the related code from kvm_arch_vcpu_ioctl_run() to vcpu_post_run(). Signed-off-by: Thomas Huth Acked-by: Cornelia Huck Signed-off-by: Cornelia Huck --- arch/s390/kvm/kvm-s390.c | 24 ++++++------------------ arch/s390/kvm/kvm-s390.h | 2 -- 2 files changed, 6 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 569494e..7f47835 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -732,14 +732,12 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) if (exit_reason >= 0) { rc = 0; - } else { - if (kvm_is_ucontrol(vcpu->kvm)) { - rc = SIE_INTERCEPT_UCONTROL; - } else { - VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); - trace_kvm_s390_sie_fault(vcpu); - rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); - } + } else if (kvm_is_ucontrol(vcpu->kvm)) { + vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL; + vcpu->run->s390_ucontrol.trans_exc_code = + current->thread.gmap_addr; + vcpu->run->s390_ucontrol.pgm_code = 0x10; + rc = -EREMOTE; } memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16); @@ -833,16 +831,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) rc = -EINTR; } -#ifdef CONFIG_KVM_S390_UCONTROL - if (rc == SIE_INTERCEPT_UCONTROL) { - kvm_run->exit_reason = KVM_EXIT_S390_UCONTROL; - kvm_run->s390_ucontrol.trans_exc_code = - current->thread.gmap_addr; - kvm_run->s390_ucontrol.pgm_code = 0x10; - rc = 0; - } -#endif - if (rc == -EOPNOTSUPP) { /* intercept cannot be handled in-kernel, prepare kvm-run */ kvm_run->exit_reason = KVM_EXIT_S390_SIEIC; diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index b44912a..aad541f 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -27,8 +27,6 @@ typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); /* declare vfacilities extern */ extern unsigned long *vfacilities; -/* negativ values are error codes, positive values for internal conditions */ -#define SIE_INTERCEPT_UCONTROL (1<<0) int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu); #define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\ -- cgit v1.1 From ac5b03420150241dc2db3cb4aa4f58c1e7e4640f Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Mon, 7 Oct 2013 17:50:22 +0200 Subject: KVM: s390: Removed VIRTIODESCSPACE VIRTIODESCSPACE is completely unused nowadays and thus can be removed without any problems. Signed-off-by: Thomas Huth Acked-by: Cornelia Huck Signed-off-by: Cornelia Huck --- arch/s390/kvm/kvm-s390.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index aad541f..fcd25b4 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -19,9 +19,6 @@ #include #include -/* The current code can have up to 256 pages for virtio */ -#define VIRTIODESCSPACE (256ul * 4096ul) - typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu); /* declare vfacilities extern */ -- cgit v1.1 From f092669e743048f50c714a1af7f8e3478d7b9e1b Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 9 Oct 2013 14:15:54 +0200 Subject: KVM: s390: Fix access to CR6 in TPI handler The TPI handler currently uses vcpu->run->s.regs.crs[6] to get the current value of CR6. I think this is wrong, because vcpu->run->s.regs.crs is only updated when kvm_arch_vcpu_ioctl_run() drops back to userspace. So let's change the TPI handler to use vcpu->arch.sie_block->gcr[6] instead. Signed-off-by: Thomas Huth Acked-by: Christian Borntraeger Signed-off-by: Cornelia Huck --- arch/s390/kvm/priv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 2440602..b18fe52 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -197,7 +197,7 @@ static int handle_tpi(struct kvm_vcpu *vcpu) if (addr & 3) return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION); cc = 0; - inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->run->s.regs.crs[6], 0); + inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->arch.sie_block->gcr[6], 0); if (!inti) goto no_interrupt; cc = 1; -- cgit v1.1 From c95221f69dfa5d3696b2b91374cbd7e5897657c5 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 9 Oct 2013 16:49:03 +0200 Subject: KVM: s390: Do not set CC3 for EQBS and SQBS The EQBS and SQBS instructions do not set CC3 for invalid channels, but should throw an operation exception instead when not available. Thus they should not be handled by the handle_io_inst() wrapper but drop to userspace instead (which will then inject the operation exception). Signed-off-by: Thomas Huth Acked-by: Christian Borntraeger Signed-off-by: Cornelia Huck --- arch/s390/kvm/priv.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index b18fe52..05537ab 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -638,7 +638,6 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) static const intercept_handler_t b9_handlers[256] = { [0x8d] = handle_epsw, - [0x9c] = handle_io_inst, [0xaf] = handle_pfmf, }; @@ -731,7 +730,6 @@ static int handle_lctlg(struct kvm_vcpu *vcpu) static const intercept_handler_t eb_handlers[256] = { [0x2f] = handle_lctlg, - [0x8a] = handle_io_inst, }; int kvm_s390_handle_eb(struct kvm_vcpu *vcpu) -- cgit v1.1 From e879892c725217a4af1012f31ae56be762473216 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 6 Nov 2013 15:46:33 +0100 Subject: KVM: s390: Always store status during SIGP STOP_AND_STORE_STATUS The SIGP order STOP_AND_STORE_STATUS is defined to stop a CPU and store its status. However, we only stored the status if the CPU was still running, so make sure that the status is now also stored if the CPU was already stopped. This fixes the problem that the CPU information was not stored correctly in kdump files, rendering them unreadable. Signed-off-by: Thomas Huth Acked-by: Christian Borntraeger Cc: stable@vger.kernel.org Signed-off-by: Cornelia Huck --- arch/s390/kvm/kvm-s390.c | 25 +++++++++++++++---------- arch/s390/kvm/kvm-s390.h | 4 ++-- arch/s390/kvm/sigp.c | 15 ++++++++++++++- 3 files changed, 31 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 7f47835..55eb8de 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -873,7 +873,7 @@ static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, void *from, * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit * KVM_S390_STORE_STATUS_PREFIXED: -> prefix */ -int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) +int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr) { unsigned char archmode = 1; int prefix; @@ -891,15 +891,6 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) } else prefix = 0; - /* - * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy - * copying in vcpu load/put. Lets update our copies before we save - * it into the save area - */ - save_fp_ctl(&vcpu->arch.guest_fpregs.fpc); - save_fp_regs(vcpu->arch.guest_fpregs.fprs); - save_access_regs(vcpu->run->s.regs.acrs); - if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs), vcpu->arch.guest_fpregs.fprs, 128, prefix)) return -EFAULT; @@ -944,6 +935,20 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) return 0; } +int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr) +{ + /* + * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy + * copying in vcpu load/put. Lets update our copies before we save + * it into the save area + */ + save_fp_ctl(&vcpu->arch.guest_fpregs.fpc); + save_fp_regs(vcpu->arch.guest_fpregs.fprs); + save_access_regs(vcpu->run->s.regs.acrs); + + return kvm_s390_store_status_unloaded(vcpu, addr); +} + static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, struct kvm_enable_cap *cap) { diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index fcd25b4..36f6b18 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -145,8 +145,8 @@ int kvm_s390_handle_eb(struct kvm_vcpu *vcpu); int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu); /* implemented in kvm-s390.c */ -int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, - unsigned long addr); +int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr); +int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr); void s390_vcpu_block(struct kvm_vcpu *vcpu); void s390_vcpu_unblock(struct kvm_vcpu *vcpu); void exit_sie(struct kvm_vcpu *vcpu); diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index bec398c..6805601 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -130,6 +130,7 @@ unlock: static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) { struct kvm_s390_interrupt_info *inti; + int rc = SIGP_CC_ORDER_CODE_ACCEPTED; inti = kzalloc(sizeof(*inti), GFP_ATOMIC); if (!inti) @@ -139,6 +140,8 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) spin_lock_bh(&li->lock); if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) { kfree(inti); + if ((action & ACTION_STORE_ON_STOP) != 0) + rc = -ESHUTDOWN; goto out; } list_add_tail(&inti->list, &li->list); @@ -150,7 +153,7 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action) out: spin_unlock_bh(&li->lock); - return SIGP_CC_ORDER_CODE_ACCEPTED; + return rc; } static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action) @@ -174,6 +177,16 @@ static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action) unlock: spin_unlock(&fi->lock); VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr); + + if ((action & ACTION_STORE_ON_STOP) != 0 && rc == -ESHUTDOWN) { + /* If the CPU has already been stopped, we still have + * to save the status when doing stop-and-store. This + * has to be done after unlocking all spinlocks. */ + struct kvm_vcpu *dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + rc = kvm_s390_store_status_unloaded(dst_vcpu, + KVM_S390_STORE_STATUS_NOADDR); + } + return rc; } -- cgit v1.1 From 178bd789775ab29233e0553155253ec8d73af71f Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 13 Nov 2013 20:28:18 +0100 Subject: KVM: s390: Fix clock comparator field for STORE STATUS Only the most 7 significant bytes of the clock comparator must be saved to the status area, and the byte at offset 304 has to be zero. Signed-off-by: Thomas Huth Reviewed-by: Cornelia Huck Signed-off-by: Cornelia Huck --- arch/s390/kvm/kvm-s390.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 55eb8de..1bb1dda 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -877,6 +877,7 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr) { unsigned char archmode = 1; int prefix; + u64 clkcomp; if (addr == KVM_S390_STORE_STATUS_NOADDR) { if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1)) @@ -920,8 +921,9 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr) &vcpu->arch.sie_block->cputm, 8, prefix)) return -EFAULT; + clkcomp = vcpu->arch.sie_block->ckc >> 8; if (__guestcopy(vcpu, addr + offsetof(struct save_area, clk_cmp), - &vcpu->arch.sie_block->ckc, 8, prefix)) + &clkcomp, 8, prefix)) return -EFAULT; if (__guestcopy(vcpu, addr + offsetof(struct save_area, acc_regs), -- cgit v1.1 From 743db27c526e0f31cc507959d662e97e2048a86f Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 11 Nov 2013 13:56:47 +0100 Subject: KVM: s390: fix diagnose code extraction The diagnose code to be used is the contents of the base register (if not zero), plus the displacement. The current code ignores the base register contents. So let's fix that... Reviewed-by: Cornelia Huck Cc: stable@vger.kernel.org Signed-off-by: Heiko Carstens Signed-off-by: Cornelia Huck --- arch/s390/kvm/diag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 78d967f..5ff29be 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -137,7 +137,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) int kvm_s390_handle_diag(struct kvm_vcpu *vcpu) { - int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16; + int code = kvm_s390_get_base_disp_rs(vcpu) & 0xffff; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); -- cgit v1.1 From 00e9e435f97b409db8986f9cd35d126ae2d02a0c Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 13 Nov 2013 20:48:51 +0100 Subject: KVM: s390: Add SIGP store-status-at-address order The STORE STATUS AT ADDRESS order of SIGP was still missing. Now it is supported, using the common kvm_s390_store_status() function. Signed-off-by: Thomas Huth Reviewed-by: Cornelia Huck Signed-off-by: Cornelia Huck --- arch/s390/kvm/sigp.c | 35 +++++++++++++++++++++++++++++++++++ arch/s390/kvm/trace.h | 1 + 2 files changed, 36 insertions(+) (limited to 'arch') diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 6805601..c137ed3 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -275,6 +275,37 @@ out_fi: return rc; } +static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id, + u32 addr, u64 *reg) +{ + struct kvm_vcpu *dst_vcpu = NULL; + int flags; + int rc; + + if (cpu_id < KVM_MAX_VCPUS) + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_id); + if (!dst_vcpu) + return SIGP_CC_NOT_OPERATIONAL; + + spin_lock_bh(&dst_vcpu->arch.local_int.lock); + flags = atomic_read(dst_vcpu->arch.local_int.cpuflags); + spin_unlock_bh(&dst_vcpu->arch.local_int.lock); + if (!(flags & CPUSTAT_STOPPED)) { + *reg &= 0xffffffff00000000UL; + *reg |= SIGP_STATUS_INCORRECT_STATE; + return SIGP_CC_STATUS_STORED; + } + + addr &= 0x7ffffe00; + rc = kvm_s390_store_status_unloaded(dst_vcpu, addr); + if (rc == -EFAULT) { + *reg &= 0xffffffff00000000UL; + *reg |= SIGP_STATUS_INVALID_PARAMETER; + rc = SIGP_CC_STATUS_STORED; + } + return rc; +} + static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr, u64 *reg) { @@ -379,6 +410,10 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) rc = __sigp_stop(vcpu, cpu_addr, ACTION_STORE_ON_STOP | ACTION_STOP_ON_STOP); break; + case SIGP_STORE_STATUS_AT_ADDRESS: + rc = __sigp_store_status_at_addr(vcpu, cpu_addr, parameter, + &vcpu->run->s.regs.gprs[r1]); + break; case SIGP_SET_ARCHITECTURE: vcpu->stat.instruction_sigp_arch++; rc = __sigp_set_arch(vcpu, parameter); diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h index 0c991c6..3db76b2 100644 --- a/arch/s390/kvm/trace.h +++ b/arch/s390/kvm/trace.h @@ -175,6 +175,7 @@ TRACE_EVENT(kvm_s390_intercept_validity, {SIGP_STOP_AND_STORE_STATUS, "stop and store status"}, \ {SIGP_SET_ARCHITECTURE, "set architecture"}, \ {SIGP_SET_PREFIX, "set prefix"}, \ + {SIGP_STORE_STATUS_AT_ADDRESS, "store status at addr"}, \ {SIGP_SENSE_RUNNING, "sense running"}, \ {SIGP_RESTART, "restart"} -- cgit v1.1 From 36daca9bb36f0395755817d1b0c45ab6fbf0441b Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 14 Nov 2013 11:08:20 +0100 Subject: KVM: s390: Removed kvm_s390_inject_sigp_stop() The function kvm_s390_inject_sigp_stop() as been unused since the removal of the old mmu reload code and thus can be removed safely. Signed-off-by: Thomas Huth Reviewed-by: Cornelia Huck Signed-off-by: Cornelia Huck --- arch/s390/kvm/kvm-s390.h | 1 - arch/s390/kvm/sigp.c | 6 ------ 2 files changed, 7 deletions(-) (limited to 'arch') diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 36f6b18..095cf51 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -128,7 +128,6 @@ int __must_check kvm_s390_inject_vm(struct kvm *kvm, int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu, struct kvm_s390_interrupt *s390int); int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code); -int __must_check kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action); struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm, u64 cr6, u64 schid); diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index c137ed3..c370058 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -190,12 +190,6 @@ unlock: return rc; } -int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action) -{ - struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - return __inject_sigp_stop(li, action); -} - static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter) { int rc; -- cgit v1.1 From 4fda342cc7f577599c53fd27b99c953c7b1da18a Mon Sep 17 00:00:00 2001 From: Santosh Shilimkar Date: Tue, 19 Nov 2013 14:59:12 -0500 Subject: arm/arm64: kvm: Use virt_to_idmap instead of virt_to_phys for idmap mappings KVM initialisation fails on architectures implementing virt_to_idmap() because virt_to_phys() on such architectures won't fetch you the correct idmap page. So update the KVM ARM code to use the virt_to_idmap() to fix the issue. Since the KVM code is shared between arm and arm64, we create kvm_virt_to_phys() and handle the redirection in respective headers. Cc: Christoffer Dall Cc: Marc Zyngier Cc: Catalin Marinas Signed-off-by: Santosh Shilimkar Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_mmu.h | 1 + arch/arm/kvm/mmu.c | 8 ++++---- arch/arm64/include/asm/kvm_mmu.h | 1 + 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 77de4a4..2d122ad 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -140,6 +140,7 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva, } #define kvm_flush_dcache_to_poc(a,l) __cpuc_flush_dcache_area((a), (l)) +#define kvm_virt_to_phys(x) virt_to_idmap((unsigned long)(x)) #endif /* !__ASSEMBLY__ */ diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 5809069..659db0e 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -916,9 +916,9 @@ int kvm_mmu_init(void) { int err; - hyp_idmap_start = virt_to_phys(__hyp_idmap_text_start); - hyp_idmap_end = virt_to_phys(__hyp_idmap_text_end); - hyp_idmap_vector = virt_to_phys(__kvm_hyp_init); + hyp_idmap_start = kvm_virt_to_phys(__hyp_idmap_text_start); + hyp_idmap_end = kvm_virt_to_phys(__hyp_idmap_text_end); + hyp_idmap_vector = kvm_virt_to_phys(__kvm_hyp_init); if ((hyp_idmap_start ^ hyp_idmap_end) & PAGE_MASK) { /* @@ -945,7 +945,7 @@ int kvm_mmu_init(void) */ kvm_flush_dcache_to_poc(init_bounce_page, len); - phys_base = virt_to_phys(init_bounce_page); + phys_base = kvm_virt_to_phys(init_bounce_page); hyp_idmap_vector += phys_base - hyp_idmap_start; hyp_idmap_start = phys_base; hyp_idmap_end = phys_base + len; diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 680f74e..7f1f940 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -136,6 +136,7 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva, } #define kvm_flush_dcache_to_poc(a,l) __flush_dcache_area((a), (l)) +#define kvm_virt_to_phys(x) __virt_to_phys((unsigned long)(x)) #endif /* __ASSEMBLY__ */ #endif /* __ARM64_KVM_MMU_H__ */ -- cgit v1.1 From 949c007acd8b6887cf5f3ac86512a7b12fa245dc Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Tue, 26 Nov 2013 12:27:16 +0100 Subject: KVM: s390: Use helper function to set CC in SIGP handler We've got a helper function for setting the condition code now, so let's use it in the SIGP handler, too. Signed-off-by: Thomas Huth Reviewed-by: Cornelia Huck Signed-off-by: Cornelia Huck --- arch/s390/kvm/sigp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index c370058..bc0d85a 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -435,7 +435,6 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) if (rc < 0) return rc; - vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44); - vcpu->arch.sie_block->gpsw.mask |= (rc & 3ul) << 44; + kvm_s390_set_psw_cc(vcpu, rc); return 0; } -- cgit v1.1 From b13d3580ee47ba3b2814e90b8a9b8241f7a4ba83 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Thu, 21 Nov 2013 16:01:48 +0100 Subject: KVM: s390: Add the SIGP order CONDITIONAL EMERGENCY SIGNAL This patch adds the missing SIGP order "conditional emergency signal" by calling the "emergency signal" SIGP handler if the required conditions are met. Signed-off-by: Thomas Huth Reviewed-by: Cornelia Huck Signed-off-by: Cornelia Huck --- arch/s390/include/asm/sigp.h | 1 + arch/s390/kvm/sigp.c | 37 ++++++++++++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h index 5a87d16..c002cd5 100644 --- a/arch/s390/include/asm/sigp.h +++ b/arch/s390/include/asm/sigp.h @@ -12,6 +12,7 @@ #define SIGP_SET_PREFIX 13 #define SIGP_STORE_STATUS_AT_ADDRESS 14 #define SIGP_SET_ARCHITECTURE 18 +#define SIGP_COND_EMERGENCY_SIGNAL 19 #define SIGP_SENSE_RUNNING 21 /* SIGP condition codes */ diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index bc0d85a..eee1402 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -1,7 +1,7 @@ /* * handling interprocessor communication * - * Copyright IBM Corp. 2008, 2009 + * Copyright IBM Corp. 2008, 2013 * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License (version 2 only) @@ -89,6 +89,37 @@ unlock: return rc; } +static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr, + u16 asn, u64 *reg) +{ + struct kvm_vcpu *dst_vcpu = NULL; + const u64 psw_int_mask = PSW_MASK_IO | PSW_MASK_EXT; + u16 p_asn, s_asn; + psw_t *psw; + u32 flags; + + if (cpu_addr < KVM_MAX_VCPUS) + dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr); + if (!dst_vcpu) + return SIGP_CC_NOT_OPERATIONAL; + flags = atomic_read(&dst_vcpu->arch.sie_block->cpuflags); + psw = &dst_vcpu->arch.sie_block->gpsw; + p_asn = dst_vcpu->arch.sie_block->gcr[4] & 0xffff; /* Primary ASN */ + s_asn = dst_vcpu->arch.sie_block->gcr[3] & 0xffff; /* Secondary ASN */ + + /* Deliver the emergency signal? */ + if (!(flags & CPUSTAT_STOPPED) + || (psw->mask & psw_int_mask) != psw_int_mask + || ((flags & CPUSTAT_WAIT) && psw->addr != 0) + || (!(flags & CPUSTAT_WAIT) && (asn == p_asn || asn == s_asn))) { + return __sigp_emergency(vcpu, cpu_addr); + } else { + *reg &= 0xffffffff00000000UL; + *reg |= SIGP_STATUS_INCORRECT_STATE; + return SIGP_CC_STATUS_STORED; + } +} + static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr) { struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; @@ -417,6 +448,10 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) rc = __sigp_set_prefix(vcpu, cpu_addr, parameter, &vcpu->run->s.regs.gprs[r1]); break; + case SIGP_COND_EMERGENCY_SIGNAL: + rc = __sigp_conditional_emergency(vcpu, cpu_addr, parameter, + &vcpu->run->s.regs.gprs[r1]); + break; case SIGP_SENSE_RUNNING: vcpu->stat.instruction_sigp_sense_running++; rc = __sigp_sense_running(vcpu, cpu_addr, -- cgit v1.1 From cc92d6dea11cd43842e20cd05c066963de586417 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Wed, 27 Nov 2013 11:47:10 +0100 Subject: KVM: s390: Reworked SIGP RESTART order When SIGP RESTART detected an illegal CPU address, there is no need to drop to userspace, we can return CC3 to the guest directly instead. Also renamed __sigp_restart() to sigp_check_callable() (since this is a better description of what the function is really doing) and moved a string specific to RESTART to the calling place instead, so that this function gets usable by other SIGP orders, too. Signed-off-by: Thomas Huth Reviewed-by: Cornelia Huck Signed-off-by: Cornelia Huck --- arch/s390/kvm/sigp.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index eee1402..509547d 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -363,7 +363,8 @@ static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr, return rc; } -static int __sigp_restart(struct kvm_vcpu *vcpu, u16 cpu_addr) +/* Test whether the destination CPU is available and not busy */ +static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr) { struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int; struct kvm_s390_local_interrupt *li; @@ -382,9 +383,6 @@ static int __sigp_restart(struct kvm_vcpu *vcpu, u16 cpu_addr) spin_lock_bh(&li->lock); if (li->action_bits & ACTION_STOP_ON_STOP) rc = SIGP_CC_BUSY; - else - VCPU_EVENT(vcpu, 4, "sigp restart %x to handle userspace", - cpu_addr); spin_unlock_bh(&li->lock); out: spin_unlock(&fi->lock); @@ -459,10 +457,15 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) break; case SIGP_RESTART: vcpu->stat.instruction_sigp_restart++; - rc = __sigp_restart(vcpu, cpu_addr); - if (rc == SIGP_CC_BUSY) - break; - /* user space must know about restart */ + rc = sigp_check_callable(vcpu, cpu_addr); + if (rc == SIGP_CC_ORDER_CODE_ACCEPTED) { + VCPU_EVENT(vcpu, 4, + "sigp restart %x to handle userspace", + cpu_addr); + /* user space must know about restart */ + rc = -EOPNOTSUPP; + } + break; default: return -EOPNOTSUPP; } -- cgit v1.1 From 58bc33b2b700f8524772f3fc20272da2187060c8 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Tue, 3 Dec 2013 12:54:55 +0100 Subject: KVM: s390: SIGP START has to report BUSY while stopping a CPU Just like the RESTART order, the START order also has to report BUSY while a STOP request is pending, to avoid that the START might be ignored due to a race condition between the STOP and the START order. Signed-off-by: Thomas Huth Reviewed-by: Cornelia Huck Signed-off-by: Cornelia Huck --- arch/s390/include/asm/sigp.h | 1 + arch/s390/kvm/sigp.c | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h index c002cd5..d091aa1 100644 --- a/arch/s390/include/asm/sigp.h +++ b/arch/s390/include/asm/sigp.h @@ -5,6 +5,7 @@ #define SIGP_SENSE 1 #define SIGP_EXTERNAL_CALL 2 #define SIGP_EMERGENCY_SIGNAL 3 +#define SIGP_START 4 #define SIGP_STOP 5 #define SIGP_RESTART 6 #define SIGP_STOP_AND_STORE_STATUS 9 diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c index 509547d..87c2b3a 100644 --- a/arch/s390/kvm/sigp.c +++ b/arch/s390/kvm/sigp.c @@ -455,6 +455,11 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu) rc = __sigp_sense_running(vcpu, cpu_addr, &vcpu->run->s.regs.gprs[r1]); break; + case SIGP_START: + rc = sigp_check_callable(vcpu, cpu_addr); + if (rc == SIGP_CC_ORDER_CODE_ACCEPTED) + rc = -EOPNOTSUPP; /* Handle START in user space */ + break; case SIGP_RESTART: vcpu->stat.instruction_sigp_restart++; rc = sigp_check_callable(vcpu, cpu_addr); -- cgit v1.1 From ff1f3cb4b3ac5d039f02679f34cb1498d110d241 Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Mon, 9 Dec 2013 18:30:01 +0100 Subject: KVM: s390: ioeventfd: ignore leftmost bits The diagnose 500 subcode 3 contains the 32 bit subchannel id in bits 32-63 (counting from the left). As for other I/O instructions, bits 0-31 should be ignored and thus not be passed to kvm_io_bus_write_cookie(). This fixes a bug where the guest passed non-zero bits 0-31 which the host tried to interpret, leading to ioeventfd notification failures. Cc: stable@vger.kernel.org Signed-off-by: Dominik Dingel Reviewed-by: Cornelia Huck Signed-off-by: Cornelia Huck --- arch/s390/kvm/diag.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 5ff29be..8216c0e 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -121,7 +121,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu) * - gpr 4 contains the index on the bus (optionally) */ ret = kvm_io_bus_write_cookie(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS, - vcpu->run->s.regs.gprs[2], + vcpu->run->s.regs.gprs[2] & 0xffffffff, 8, &vcpu->run->s.regs.gprs[3], vcpu->run->s.regs.gprs[4]); -- cgit v1.1 From 2961e8764faad212234e93907a370a7c36a67da5 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Mon, 25 Nov 2013 15:37:13 +0200 Subject: KVM: VMX: shadow VM_(ENTRY|EXIT)_CONTROLS vmcs field VM_(ENTRY|EXIT)_CONTROLS vmcs fields are read/written on each guest entry but most times it can be avoided since values do not changes. Keep fields copy in memory to avoid unnecessary reads from vmcs. Signed-off-by: Gleb Natapov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 112 ++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 85 insertions(+), 27 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b2fe1c2..1024689 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -418,6 +418,8 @@ struct vcpu_vmx { u64 msr_host_kernel_gs_base; u64 msr_guest_kernel_gs_base; #endif + u32 vm_entry_controls_shadow; + u32 vm_exit_controls_shadow; /* * loaded_vmcs points to the VMCS currently used in this vcpu. For a * non-nested (L1) guest, it always points to vmcs01. For a nested @@ -1326,6 +1328,62 @@ static void vmcs_set_bits(unsigned long field, u32 mask) vmcs_writel(field, vmcs_readl(field) | mask); } +static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val) +{ + vmcs_write32(VM_ENTRY_CONTROLS, val); + vmx->vm_entry_controls_shadow = val; +} + +static inline void vm_entry_controls_set(struct vcpu_vmx *vmx, u32 val) +{ + if (vmx->vm_entry_controls_shadow != val) + vm_entry_controls_init(vmx, val); +} + +static inline u32 vm_entry_controls_get(struct vcpu_vmx *vmx) +{ + return vmx->vm_entry_controls_shadow; +} + + +static inline void vm_entry_controls_setbit(struct vcpu_vmx *vmx, u32 val) +{ + vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) | val); +} + +static inline void vm_entry_controls_clearbit(struct vcpu_vmx *vmx, u32 val) +{ + vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) & ~val); +} + +static inline void vm_exit_controls_init(struct vcpu_vmx *vmx, u32 val) +{ + vmcs_write32(VM_EXIT_CONTROLS, val); + vmx->vm_exit_controls_shadow = val; +} + +static inline void vm_exit_controls_set(struct vcpu_vmx *vmx, u32 val) +{ + if (vmx->vm_exit_controls_shadow != val) + vm_exit_controls_init(vmx, val); +} + +static inline u32 vm_exit_controls_get(struct vcpu_vmx *vmx) +{ + return vmx->vm_exit_controls_shadow; +} + + +static inline void vm_exit_controls_setbit(struct vcpu_vmx *vmx, u32 val) +{ + vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) | val); +} + +static inline void vm_exit_controls_clearbit(struct vcpu_vmx *vmx, u32 val) +{ + vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) & ~val); +} + static void vmx_segment_cache_clear(struct vcpu_vmx *vmx) { vmx->segment_cache.bitmask = 0; @@ -1410,11 +1468,11 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) vmcs_write32(EXCEPTION_BITMAP, eb); } -static void clear_atomic_switch_msr_special(unsigned long entry, - unsigned long exit) +static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx, + unsigned long entry, unsigned long exit) { - vmcs_clear_bits(VM_ENTRY_CONTROLS, entry); - vmcs_clear_bits(VM_EXIT_CONTROLS, exit); + vm_entry_controls_clearbit(vmx, entry); + vm_exit_controls_clearbit(vmx, exit); } static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) @@ -1425,14 +1483,15 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) switch (msr) { case MSR_EFER: if (cpu_has_load_ia32_efer) { - clear_atomic_switch_msr_special(VM_ENTRY_LOAD_IA32_EFER, + clear_atomic_switch_msr_special(vmx, + VM_ENTRY_LOAD_IA32_EFER, VM_EXIT_LOAD_IA32_EFER); return; } break; case MSR_CORE_PERF_GLOBAL_CTRL: if (cpu_has_load_perf_global_ctrl) { - clear_atomic_switch_msr_special( + clear_atomic_switch_msr_special(vmx, VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL); return; @@ -1453,14 +1512,15 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr) vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr); } -static void add_atomic_switch_msr_special(unsigned long entry, - unsigned long exit, unsigned long guest_val_vmcs, - unsigned long host_val_vmcs, u64 guest_val, u64 host_val) +static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx, + unsigned long entry, unsigned long exit, + unsigned long guest_val_vmcs, unsigned long host_val_vmcs, + u64 guest_val, u64 host_val) { vmcs_write64(guest_val_vmcs, guest_val); vmcs_write64(host_val_vmcs, host_val); - vmcs_set_bits(VM_ENTRY_CONTROLS, entry); - vmcs_set_bits(VM_EXIT_CONTROLS, exit); + vm_entry_controls_setbit(vmx, entry); + vm_exit_controls_setbit(vmx, exit); } static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, @@ -1472,7 +1532,8 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, switch (msr) { case MSR_EFER: if (cpu_has_load_ia32_efer) { - add_atomic_switch_msr_special(VM_ENTRY_LOAD_IA32_EFER, + add_atomic_switch_msr_special(vmx, + VM_ENTRY_LOAD_IA32_EFER, VM_EXIT_LOAD_IA32_EFER, GUEST_IA32_EFER, HOST_IA32_EFER, @@ -1482,7 +1543,7 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, break; case MSR_CORE_PERF_GLOBAL_CTRL: if (cpu_has_load_perf_global_ctrl) { - add_atomic_switch_msr_special( + add_atomic_switch_msr_special(vmx, VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL, GUEST_IA32_PERF_GLOBAL_CTRL, @@ -3182,14 +3243,10 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer) vmx_load_host_state(to_vmx(vcpu)); vcpu->arch.efer = efer; if (efer & EFER_LMA) { - vmcs_write32(VM_ENTRY_CONTROLS, - vmcs_read32(VM_ENTRY_CONTROLS) | - VM_ENTRY_IA32E_MODE); + vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); msr->data = efer; } else { - vmcs_write32(VM_ENTRY_CONTROLS, - vmcs_read32(VM_ENTRY_CONTROLS) & - ~VM_ENTRY_IA32E_MODE); + vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); msr->data = efer & ~EFER_LME; } @@ -3217,9 +3274,7 @@ static void enter_lmode(struct kvm_vcpu *vcpu) static void exit_lmode(struct kvm_vcpu *vcpu) { - vmcs_write32(VM_ENTRY_CONTROLS, - vmcs_read32(VM_ENTRY_CONTROLS) - & ~VM_ENTRY_IA32E_MODE); + vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE); vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA); } @@ -4346,10 +4401,11 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) ++vmx->nmsrs; } - vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl); + + vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl); /* 22.2.1, 20.8.1 */ - vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl); + vm_entry_controls_init(vmx, vmcs_config.vmentry_ctrl); vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); set_cr4_guest_host_mask(vmx); @@ -7759,12 +7815,12 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) exit_control = vmcs_config.vmexit_ctrl; if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; - vmcs_write32(VM_EXIT_CONTROLS, exit_control); + vm_exit_controls_init(vmx, exit_control); /* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are * emulated by vmx_set_efer(), below. */ - vmcs_write32(VM_ENTRY_CONTROLS, + vm_entry_controls_init(vmx, (vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER & ~VM_ENTRY_IA32E_MODE) | (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); @@ -8186,7 +8242,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs12->vm_entry_controls = (vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) | - (vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE); + (vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE); /* TODO: These cannot have changed unless we have MSR bitmaps and * the relevant bit asks not to trap the change */ @@ -8390,6 +8446,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) vcpu->cpu = cpu; put_cpu(); + vm_entry_controls_init(vmx, vmcs_read32(VM_ENTRY_CONTROLS)); + vm_exit_controls_init(vmx, vmcs_read32(VM_EXIT_CONTROLS)); vmx_segment_cache_clear(vmx); /* if no vmcs02 cache requested, remove the one we used */ -- cgit v1.1 From 6dfacadd5858882eee1983995854d4e1fb1b966e Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 4 Dec 2013 08:58:54 +0100 Subject: KVM: nVMX: Add support for activity state HLT We can easily emulate the HLT activity state for L1: If it decides that L2 shall be halted on entry, just invoke the normal emulation of halt after switching to L2. We do not depend on specific host features to provide this, so we can expose the capability unconditionally. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/vmx.h | 1 + arch/x86/kvm/vmx.c | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 966502d..2067264 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -100,6 +100,7 @@ #define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f #define VMX_MISC_SAVE_EFER_LMA 0x00000020 +#define VMX_MISC_ACTIVITY_HLT 0x00000040 /* VMCS Encodings */ enum vmcs_field { diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1024689..f90320b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2340,6 +2340,7 @@ static __init void nested_vmx_setup_ctls_msrs(void) rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high); nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK | VMX_MISC_SAVE_EFER_LMA; + nested_vmx_misc_low |= VMX_MISC_ACTIVITY_HLT; nested_vmx_misc_high = 0; } @@ -7938,7 +7939,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) return 1; } - if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE) { + if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE && + vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) { nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); return 1; } @@ -8067,6 +8069,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) prepare_vmcs02(vcpu, vmcs12); + if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) + return kvm_emulate_halt(vcpu); + /* * Note no nested_vmx_succeed or nested_vmx_fail here. At this point * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet -- cgit v1.1 From c08ac06ab3f3cdb8d34376c3a8a5e46a31a62c8f Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Fri, 13 Dec 2013 15:07:21 +0900 Subject: KVM: Use cond_resched() directly and remove useless kvm_resched() Since the commit 15ad7146 ("KVM: Use the scheduler preemption notifiers to make kvm preemptible"), the remaining stuff in this function is a simple cond_resched() call with an extra need_resched() check which was there to avoid dropping VCPUs unnecessarily. Now it is meaningless. Signed-off-by: Takuya Yoshikawa Signed-off-by: Paolo Bonzini --- arch/ia64/kvm/kvm-ia64.c | 2 +- arch/powerpc/kvm/book3s_hv.c | 2 +- arch/x86/kvm/x86.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 985bf80..53f44be 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -702,7 +702,7 @@ again: out: srcu_read_unlock(&vcpu->kvm->srcu, idx); if (r > 0) { - kvm_resched(vcpu); + cond_resched(); idx = srcu_read_lock(&vcpu->kvm->srcu); goto again; } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 072287f..3fa99b2 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1348,7 +1348,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc) kvm_guest_exit(); preempt_enable(); - kvm_resched(vcpu); + cond_resched(); spin_lock(&vc->lock); now = get_tb(); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 21ef1ba..4fb1ee6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6125,7 +6125,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) } if (need_resched()) { srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); - kvm_resched(vcpu); + cond_resched(); vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); } } -- cgit v1.1 From 9357d93952143b178fa9d1f5095b8f273b01a1f1 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Fri, 13 Dec 2013 15:08:38 +0900 Subject: KVM: x86: Add comment on vcpu_enter_guest()'s return value Giving proper names to the 0 and 1 was once suggested. But since 0 is returned to the userspace, giving it another name can introduce extra confusion. This patch just explains the meanings instead. Signed-off-by: Takuya Yoshikawa Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4fb1ee6..1dc0359 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5865,6 +5865,11 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) kvm_apic_update_tmr(vcpu, tmr); } +/* + * Returns 1 to let __vcpu_run() continue the guest execution loop without + * exiting to the userspace. Otherwise, the value will be returned to the + * userspace. + */ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) { int r; -- cgit v1.1 From ca3f257ae570c37d3da30a524a2f61ce602c6c99 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 16 Dec 2013 12:55:46 +0100 Subject: KVM: nVMX: Support direct APIC access from L2 It's a pathological case, but still a valid one: If L1 disables APIC virtualization and also allows L2 to directly write to the APIC page, we have to forcibly enable APIC virtualization while in L2 if the in-kernel APIC is in use. This allows to run the direct interrupt test case in the vmx unit test without x2APIC. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f90320b..31eb577 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7763,6 +7763,11 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) else vmcs_write64(APIC_ACCESS_ADDR, page_to_phys(vmx->nested.apic_access_page)); + } else if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) { + exec_control |= + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + vmcs_write64(APIC_ACCESS_ADDR, + page_to_phys(vcpu->kvm->arch.apic_access_page)); } vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); -- cgit v1.1 From 4c4d563b49830a66537c3f51070dad74d7a81d3a Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 18 Dec 2013 19:16:24 +0100 Subject: KVM: VMX: Do not skip the instruction if handle_dr injects a fault If kvm_get_dr or kvm_set_dr reports that it raised a fault, we must not advance the instruction pointer. Otherwise the exception will hit the wrong instruction. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 31eb577..9cc5484 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5137,10 +5137,14 @@ static int handle_dr(struct kvm_vcpu *vcpu) reg = DEBUG_REG_ACCESS_REG(exit_qualification); if (exit_qualification & TYPE_MOV_FROM_DR) { unsigned long val; - if (!kvm_get_dr(vcpu, dr, &val)) - kvm_register_write(vcpu, reg, val); + + if (kvm_get_dr(vcpu, dr, &val)) + return 1; + kvm_register_write(vcpu, reg, val); } else - kvm_set_dr(vcpu, dr, vcpu->arch.regs[reg]); + if (kvm_set_dr(vcpu, dr, vcpu->arch.regs[reg])) + return 1; + skip_emulated_instruction(vcpu); return 1; } -- cgit v1.1 From 989c6b34f6a9480e397b170cc62237e89bf4fdb9 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 19 Dec 2013 15:28:51 -0200 Subject: KVM: MMU: handle invalid root_hpa at __direct_map It is possible for __direct_map to be called on invalid root_hpa (-1), two examples: 1) try_async_pf -> can_do_async_pf -> vmx_interrupt_allowed -> nested_vmx_vmexit 2) vmx_handle_exit -> vmx_interrupt_allowed -> nested_vmx_vmexit Then to load_vmcs12_host_state and kvm_mmu_reset_context. Check for this possibility, let fault exception be regenerated. BZ: https://bugzilla.redhat.com/show_bug.cgi?id=924916 Signed-off-by: Marcelo Tosatti Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 40772ef..31a5702 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2659,6 +2659,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write, int emulate = 0; gfn_t pseudo_gfn; + if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) + return 0; + for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { if (iterator.level == level) { mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, -- cgit v1.1 From 478a8237f656d86d25b3e4e4bf3c48f590156294 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 19 Nov 2013 17:43:19 -0800 Subject: arm: KVM: Don't return PSCI_INVAL if waitqueue is inactive The current KVM implementation of PSCI returns INVALID_PARAMETERS if the waitqueue for the corresponding CPU is not active. This does not seem correct, since KVM should not care what the specific thread is doing, for example, user space may not have called KVM_RUN on this VCPU yet or the thread may be busy looping to user space because it received a signal; this is really up to the user space implementation. Instead we should check specifically that the CPU is marked as being turned off, regardless of the VCPU thread state, and if it is, we shall simply clear the pause flag on the CPU and wake up the thread if it happens to be blocked for us. Further, the implementation seems to be racy when executing multiple VCPU threads. There really isn't a reasonable user space programming scheme to ensure all secondary CPUs have reached kvm_vcpu_first_run_init before turning on the boot CPU. Therefore, set the pause flag on the vcpu at VCPU init time (which can reasonably be expected to be completed for all CPUs by user space before running any VCPUs) and clear both this flag and the feature (in case the feature can somehow get set again in the future) and ping the waitqueue on turning on a VCPU using PSCI. Reported-by: Peter Maydell Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 30 +++++++++++++++++++----------- arch/arm/kvm/psci.c | 11 ++++++----- 2 files changed, 25 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 2a700e0..151eb91 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -478,15 +478,6 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) return ret; } - /* - * Handle the "start in power-off" case by calling into the - * PSCI code. - */ - if (test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) { - *vcpu_reg(vcpu, 0) = KVM_PSCI_FN_CPU_OFF; - kvm_psci_call(vcpu); - } - return 0; } @@ -700,6 +691,24 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, return -EINVAL; } +static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, + struct kvm_vcpu_init *init) +{ + int ret; + + ret = kvm_vcpu_set_target(vcpu, init); + if (ret) + return ret; + + /* + * Handle the "start in power-off" case by marking the VCPU as paused. + */ + if (__test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) + vcpu->arch.pause = true; + + return 0; +} + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { @@ -713,8 +722,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (copy_from_user(&init, argp, sizeof(init))) return -EFAULT; - return kvm_vcpu_set_target(vcpu, &init); - + return kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init); } case KVM_SET_ONE_REG: case KVM_GET_ONE_REG: { diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c index 0881bf1..448f60e 100644 --- a/arch/arm/kvm/psci.c +++ b/arch/arm/kvm/psci.c @@ -54,15 +54,15 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) } } - if (!vcpu) + /* + * Make sure the caller requested a valid CPU and that the CPU is + * turned off. + */ + if (!vcpu || !vcpu->arch.pause) return KVM_PSCI_RET_INVAL; target_pc = *vcpu_reg(source_vcpu, 2); - wq = kvm_arch_vcpu_wq(vcpu); - if (!waitqueue_active(wq)) - return KVM_PSCI_RET_INVAL; - kvm_reset_vcpu(vcpu); /* Gracefully handle Thumb2 entry point */ @@ -79,6 +79,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) vcpu->arch.pause = false; smp_mb(); /* Make sure the above is visible */ + wq = kvm_arch_vcpu_wq(vcpu); wake_up_interruptible(wq); return KVM_PSCI_RET_SUCCESS; -- cgit v1.1 From a1a64387adeeba7a34ce06f2774e81f496ee803b Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sat, 16 Nov 2013 10:51:25 -0800 Subject: arm/arm64: KVM: arch_timer: Initialize cntvoff at kvm_init Initialize the cntvoff at kvm_init_vm time, not before running the VCPUs at the first time because that will overwrite any potentially restored values from user space. Cc: Andre Przywara Acked-by: Marc Zynger Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 2a700e0..13205bd 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -137,6 +137,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (ret) goto out_free_stage2_pgd; + kvm_timer_init(kvm); + /* Mark the initial VMID generation invalid */ kvm->arch.vmid_gen = 0; -- cgit v1.1 From 39735a3a390431bcf60f9174b7d64f787fd6afa9 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Fri, 13 Dec 2013 14:23:26 +0100 Subject: ARM/KVM: save and restore generic timer registers For migration to work we need to save (and later restore) the state of each core's virtual generic timer. Since this is per VCPU, we can use the [gs]et_one_reg ioctl and export the three needed registers (control, counter, compare value). Though they live in cp15 space, we don't use the existing list, since they need special accessor functions and the arch timer is optional. Acked-by: Marc Zynger Signed-off-by: Andre Przywara Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 3 ++ arch/arm/include/uapi/asm/kvm.h | 20 +++++++++ arch/arm/kvm/guest.c | 92 ++++++++++++++++++++++++++++++++++++++- arch/arm64/include/uapi/asm/kvm.h | 18 ++++++++ 4 files changed, 132 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 8a6f6db..098f7dd 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -225,4 +225,7 @@ static inline int kvm_arch_dev_ioctl_check_extension(long ext) int kvm_perf_init(void); int kvm_perf_teardown(void); +u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); +int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); + #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index c498b60..835b867 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -119,6 +119,26 @@ struct kvm_arch_memory_slot { #define KVM_REG_ARM_32_CRN_MASK 0x0000000000007800 #define KVM_REG_ARM_32_CRN_SHIFT 11 +#define ARM_CP15_REG_SHIFT_MASK(x,n) \ + (((x) << KVM_REG_ARM_ ## n ## _SHIFT) & KVM_REG_ARM_ ## n ## _MASK) + +#define __ARM_CP15_REG(op1,crn,crm,op2) \ + (KVM_REG_ARM | (15 << KVM_REG_ARM_COPROC_SHIFT) | \ + ARM_CP15_REG_SHIFT_MASK(op1, OPC1) | \ + ARM_CP15_REG_SHIFT_MASK(crn, 32_CRN) | \ + ARM_CP15_REG_SHIFT_MASK(crm, CRM) | \ + ARM_CP15_REG_SHIFT_MASK(op2, 32_OPC2)) + +#define ARM_CP15_REG32(...) (__ARM_CP15_REG(__VA_ARGS__) | KVM_REG_SIZE_U32) + +#define __ARM_CP15_REG64(op1,crm) \ + (__ARM_CP15_REG(op1, 0, crm, 0) | KVM_REG_SIZE_U64) +#define ARM_CP15_REG64(...) __ARM_CP15_REG64(__VA_ARGS__) + +#define KVM_REG_ARM_TIMER_CTL ARM_CP15_REG32(0, 14, 3, 1) +#define KVM_REG_ARM_TIMER_CNT ARM_CP15_REG64(1, 14) +#define KVM_REG_ARM_TIMER_CVAL ARM_CP15_REG64(3, 14) + /* Normal registers are mapped as coprocessor 16. */ #define KVM_REG_ARM_CORE (0x0010 << KVM_REG_ARM_COPROC_SHIFT) #define KVM_REG_ARM_CORE_REG(name) (offsetof(struct kvm_regs, name) / 4) diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 20f8d97..2786eae 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -109,6 +109,83 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) return -EINVAL; } +#ifndef CONFIG_KVM_ARM_TIMER + +#define NUM_TIMER_REGS 0 + +static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + return 0; +} + +static bool is_timer_reg(u64 index) +{ + return false; +} + +int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) +{ + return 0; +} + +u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) +{ + return 0; +} + +#else + +#define NUM_TIMER_REGS 3 + +static bool is_timer_reg(u64 index) +{ + switch (index) { + case KVM_REG_ARM_TIMER_CTL: + case KVM_REG_ARM_TIMER_CNT: + case KVM_REG_ARM_TIMER_CVAL: + return true; + } + return false; +} + +static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + if (put_user(KVM_REG_ARM_TIMER_CTL, uindices)) + return -EFAULT; + uindices++; + if (put_user(KVM_REG_ARM_TIMER_CNT, uindices)) + return -EFAULT; + uindices++; + if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices)) + return -EFAULT; + + return 0; +} + +#endif + +static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + void __user *uaddr = (void __user *)(long)reg->addr; + u64 val; + int ret; + + ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)); + if (ret != 0) + return ret; + + return kvm_arm_timer_set_reg(vcpu, reg->id, val); +} + +static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + void __user *uaddr = (void __user *)(long)reg->addr; + u64 val; + + val = kvm_arm_timer_get_reg(vcpu, reg->id); + return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)); +} + static unsigned long num_core_regs(void) { return sizeof(struct kvm_regs) / sizeof(u32); @@ -121,7 +198,8 @@ static unsigned long num_core_regs(void) */ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu) { - return num_core_regs() + kvm_arm_num_coproc_regs(vcpu); + return num_core_regs() + kvm_arm_num_coproc_regs(vcpu) + + NUM_TIMER_REGS; } /** @@ -133,6 +211,7 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) { unsigned int i; const u64 core_reg = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE; + int ret; for (i = 0; i < sizeof(struct kvm_regs)/sizeof(u32); i++) { if (put_user(core_reg | i, uindices)) @@ -140,6 +219,11 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) uindices++; } + ret = copy_timer_indices(vcpu, uindices); + if (ret) + return ret; + uindices += NUM_TIMER_REGS; + return kvm_arm_copy_coproc_indices(vcpu, uindices); } @@ -153,6 +237,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) return get_core_reg(vcpu, reg); + if (is_timer_reg(reg->id)) + return get_timer_reg(vcpu, reg); + return kvm_arm_coproc_get_reg(vcpu, reg); } @@ -166,6 +253,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE) return set_core_reg(vcpu, reg); + if (is_timer_reg(reg->id)) + return set_timer_reg(vcpu, reg); + return kvm_arm_coproc_set_reg(vcpu, reg); } diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 5031f42..7c25ca8 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -129,6 +129,24 @@ struct kvm_arch_memory_slot { #define KVM_REG_ARM64_SYSREG_OP2_MASK 0x0000000000000007 #define KVM_REG_ARM64_SYSREG_OP2_SHIFT 0 +#define ARM64_SYS_REG_SHIFT_MASK(x,n) \ + (((x) << KVM_REG_ARM64_SYSREG_ ## n ## _SHIFT) & \ + KVM_REG_ARM64_SYSREG_ ## n ## _MASK) + +#define __ARM64_SYS_REG(op0,op1,crn,crm,op2) \ + (KVM_REG_ARM64 | KVM_REG_ARM64_SYSREG | \ + ARM64_SYS_REG_SHIFT_MASK(op0, OP0) | \ + ARM64_SYS_REG_SHIFT_MASK(op1, OP1) | \ + ARM64_SYS_REG_SHIFT_MASK(crn, CRN) | \ + ARM64_SYS_REG_SHIFT_MASK(crm, CRM) | \ + ARM64_SYS_REG_SHIFT_MASK(op2, OP2)) + +#define ARM64_SYS_REG(...) (__ARM64_SYS_REG(__VA_ARGS__) | KVM_REG_SIZE_U64) + +#define KVM_REG_ARM_TIMER_CTL ARM64_SYS_REG(3, 3, 14, 3, 1) +#define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2) +#define KVM_REG_ARM_TIMER_CVAL ARM64_SYS_REG(3, 3, 14, 0, 2) + /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT 24 #define KVM_ARM_IRQ_TYPE_MASK 0xff -- cgit v1.1 From e1ba0207a1b3714bb3f000e506285ae5123cdfa7 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 23 Sep 2013 14:55:55 -0700 Subject: ARM: KVM: Allow creating the VGIC after VCPUs Rework the VGIC initialization slightly to allow initialization of the vgic cpu-specific state even if the irqchip (the VGIC) hasn't been created by user space yet. This is safe, because the vgic data structures are already allocated when the CPU is allocated if VGIC support is compiled into the kernel. Further, the init process does not depend on any other information and the sacrifice is a slight performance degradation for creating VMs in the no-VGIC case. The reason is that the new device control API doesn't mandate creating the VGIC before creating the VCPU and it is unreasonable to require user space to create the VGIC before creating the VCPUs. At the same time move the irqchip_in_kernel check out of kvm_vcpu_first_run_init and into the init function to make the per-vcpu and global init functions symmetric and add comments on the exported functions making it a bit easier to understand the init flow by only looking at vgic.c. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 13205bd..c9fe9d7 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -464,6 +464,8 @@ static void update_vttbr(struct kvm *kvm) static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) { + int ret; + if (likely(vcpu->arch.has_run_once)) return 0; @@ -473,9 +475,8 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) * Initialize the VGIC before running a vcpu the first time on * this VM. */ - if (irqchip_in_kernel(vcpu->kvm) && - unlikely(!vgic_initialized(vcpu->kvm))) { - int ret = kvm_vgic_init(vcpu->kvm); + if (unlikely(!vgic_initialized(vcpu->kvm))) { + ret = kvm_vgic_init(vcpu->kvm); if (ret) return ret; } -- cgit v1.1 From 7330672befe6269e575f79b924a7068b26c144b4 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 25 Oct 2013 17:29:18 +0100 Subject: KVM: arm-vgic: Support KVM_CREATE_DEVICE for VGIC Support creating the ARM VGIC device through the KVM_CREATE_DEVICE ioctl, which can then later be leveraged to use the KVM_{GET/SET}_DEVICE_ATTR, which is useful both for setting addresses in a more generic API than the ARM-specific one and is useful for save/restore of VGIC state. Adds KVM_CAP_DEVICE_CTRL to ARM capabilities. Note that we change the check for creating a VGIC from bailing out if any VCPUs were created, to bailing out if any VCPUs were ever run. This is an important distinction that shouldn't break anything, but allows creating the VGIC after the VCPUs have been created. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index c9fe9d7..cc7c41a 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -190,6 +190,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_IRQCHIP: r = vgic_present; break; + case KVM_CAP_DEVICE_CTRL: case KVM_CAP_USER_MEMORY: case KVM_CAP_SYNC_MMU: case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: -- cgit v1.1 From ce01e4e8874d410738f4b4733b26642d6611a331 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 23 Sep 2013 14:55:56 -0700 Subject: KVM: arm-vgic: Set base addr through device API Support setting the distributor and cpu interface base addresses in the VM physical address space through the KVM_{SET,GET}_DEVICE_ATTR API in addition to the ARM specific API. This has the added benefit of being able to share more code in user space and do things in a uniform manner. Also deprecate the older API at the same time, but backwards compatibility will be maintained. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/include/uapi/asm/kvm.h | 2 ++ arch/arm/kvm/arm.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 835b867..76a7427 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -163,6 +163,8 @@ struct kvm_arch_memory_slot { #define KVM_REG_ARM_VFP_FPINST 0x1009 #define KVM_REG_ARM_VFP_FPINST2 0x100A +/* Device Control API: ARM VGIC */ +#define KVM_DEV_ARM_VGIC_GRP_ADDR 0 /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT 24 diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index cc7c41a..f290b22 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -776,7 +776,7 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, case KVM_ARM_DEVICE_VGIC_V2: if (!vgic_present) return -ENXIO; - return kvm_vgic_set_addr(kvm, type, dev_addr->addr); + return kvm_vgic_addr(kvm, type, &dev_addr->addr, true); default: return -ENODEV; } -- cgit v1.1 From e9b152cb957cb194437f37e79f0f3c9d34fe53d6 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 11 Dec 2013 20:29:11 -0800 Subject: arm/arm64: kvm: Set vcpu->cpu to -1 on vcpu_put The arch-generic KVM code expects the cpu field of a vcpu to be -1 if the vcpu is no longer assigned to a cpu. This is used for the optimized make_all_cpus_request path and will be used by the vgic code to check that no vcpus are running. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index f290b22..b92ff6d3 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -342,6 +342,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { + /* + * The arch-generic KVM code expects the cpu field of a vcpu to be -1 + * if the vcpu is no longer assigned to a cpu. This is used for the + * optimized make_all_cpus_request path. + */ + vcpu->cpu = -1; + kvm_arm_set_running_vcpu(NULL); } -- cgit v1.1 From c07a0191ef2de1f9510f12d1f88e3b0b5cd8d66f Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 25 Oct 2013 21:17:31 +0100 Subject: KVM: arm-vgic: Add vgic reg access from dev attr Add infrastructure to handle distributor and cpu interface register accesses through the KVM_{GET/SET}_DEVICE_ATTR interface by adding the KVM_DEV_ARM_VGIC_GRP_DIST_REGS and KVM_DEV_ARM_VGIC_GRP_CPU_REGS groups and defining the semantics of the attr field to be the MMIO offset as specified in the GICv2 specs. Missing register accesses or other changes in individual register access functions to support save/restore of the VGIC state is added in subsequent patches. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/include/uapi/asm/kvm.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 76a7427..ef0c878 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -165,6 +165,12 @@ struct kvm_arch_memory_slot { /* Device Control API: ARM VGIC */ #define KVM_DEV_ARM_VGIC_GRP_ADDR 0 +#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 +#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS 2 +#define KVM_DEV_ARM_VGIC_CPUID_SHIFT 32 +#define KVM_DEV_ARM_VGIC_CPUID_MASK (0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT) +#define KVM_DEV_ARM_VGIC_OFFSET_SHIFT 0 +#define KVM_DEV_ARM_VGIC_OFFSET_MASK (0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT) /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT 24 -- cgit v1.1 From da7814700a0c408bead58ce4714b7625ffbaade1 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Thu, 12 Dec 2013 16:12:22 +0000 Subject: arm64: KVM: Add Kconfig option for max VCPUs per-Guest Current max VCPUs per-Guest is set to 4 which is preventing us from creating a Guest (or VM) with 8 VCPUs on Host (e.g. X-Gene Storm SOC) with 8 Host CPUs. The correct value of max VCPUs per-Guest should be same as the max CPUs supported by GICv2 which is 8 but, increasing value of max VCPUs per-Guest can make things slower hence we add Kconfig option to let KVM users select appropriate max VCPUs per-Guest. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/kvm_host.h | 7 ++++++- arch/arm64/kvm/Kconfig | 11 +++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 5d85a02..0a1d697 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -26,7 +26,12 @@ #include #include -#define KVM_MAX_VCPUS 4 +#if defined(CONFIG_KVM_ARM_MAX_VCPUS) +#define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS +#else +#define KVM_MAX_VCPUS 0 +#endif + #define KVM_USER_MEM_SLOTS 32 #define KVM_PRIVATE_MEM_SLOTS 4 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 4480ab3..8ba85e9 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -36,6 +36,17 @@ config KVM_ARM_HOST ---help--- Provides host support for ARM processors. +config KVM_ARM_MAX_VCPUS + int "Number maximum supported virtual CPUs per VM" + depends on KVM_ARM_HOST + default 4 + help + Static number of max supported virtual CPUs per VM. + + If you choose a high number, the vcpu structures will be quite + large, so only choose a reasonable number that you expect to + actually use. + config KVM_ARM_VGIC bool depends on KVM_ARM_HOST && OF -- cgit v1.1 From e28100bd8ed9e37b7cd4578140a1e7f95bd40835 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Thu, 14 Nov 2013 15:20:08 +0000 Subject: arm64: KVM: Support X-Gene guest VCPU on APM X-Gene host This patch allows us to have X-Gene guest VCPU when using KVM arm64 on APM X-Gene host. We add KVM_ARM_TARGET_XGENE_POTENZA for X-Gene Potenza compatible guest VCPU and we return KVM_ARM_TARGET_XGENE_POTENZA in kvm_target_cpu() when running on X-Gene host with Potenza core. [maz: sanitized the commit log] Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Signed-off-by: Marc Zyngier --- arch/arm64/include/uapi/asm/kvm.h | 3 ++- arch/arm64/kvm/guest.c | 32 +++++++++++++++++++------------- arch/arm64/kvm/sys_regs_generic_v8.c | 3 +++ 3 files changed, 24 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 5031f42..d9f026b 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -55,8 +55,9 @@ struct kvm_regs { #define KVM_ARM_TARGET_AEM_V8 0 #define KVM_ARM_TARGET_FOUNDATION_V8 1 #define KVM_ARM_TARGET_CORTEX_A57 2 +#define KVM_ARM_TARGET_XGENE_POTENZA 3 -#define KVM_ARM_NUM_TARGETS 3 +#define KVM_ARM_NUM_TARGETS 4 /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */ #define KVM_ARM_DEVICE_TYPE_SHIFT 0 diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 3f0731e..0874557 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -207,20 +207,26 @@ int __attribute_const__ kvm_target_cpu(void) unsigned long implementor = read_cpuid_implementor(); unsigned long part_number = read_cpuid_part_number(); - if (implementor != ARM_CPU_IMP_ARM) - return -EINVAL; + switch (implementor) { + case ARM_CPU_IMP_ARM: + switch (part_number) { + case ARM_CPU_PART_AEM_V8: + return KVM_ARM_TARGET_AEM_V8; + case ARM_CPU_PART_FOUNDATION: + return KVM_ARM_TARGET_FOUNDATION_V8; + case ARM_CPU_PART_CORTEX_A57: + return KVM_ARM_TARGET_CORTEX_A57; + }; + break; + case ARM_CPU_IMP_APM: + switch (part_number) { + case APM_CPU_PART_POTENZA: + return KVM_ARM_TARGET_XGENE_POTENZA; + }; + break; + }; - switch (part_number) { - case ARM_CPU_PART_AEM_V8: - return KVM_ARM_TARGET_AEM_V8; - case ARM_CPU_PART_FOUNDATION: - return KVM_ARM_TARGET_FOUNDATION_V8; - case ARM_CPU_PART_CORTEX_A57: - /* Currently handled by the generic backend */ - return KVM_ARM_TARGET_CORTEX_A57; - default: - return -EINVAL; - } + return -EINVAL; } int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c index 4268ab9..8fe6f76 100644 --- a/arch/arm64/kvm/sys_regs_generic_v8.c +++ b/arch/arm64/kvm/sys_regs_generic_v8.c @@ -90,6 +90,9 @@ static int __init sys_reg_genericv8_init(void) &genericv8_target_table); kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57, &genericv8_target_table); + kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA, + &genericv8_target_table); + return 0; } late_initcall(sys_reg_genericv8_init); -- cgit v1.1 From e5cf9dcdbfd26cd4e1991db08755da900454efeb Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Thu, 12 Dec 2013 16:12:23 +0000 Subject: arm64: KVM: Force undefined exception for Guest SMC intructions The SMC-based PSCI emulation for Guest is going to be very different from the in-kernel HVC-based PSCI emulation hence for now just inject undefined exception when Guest executes SMC instruction. Signed-off-by: Anup Patel Signed-off-by: Pranavkumar Sawargaonkar Signed-off-by: marc Zyngier --- arch/arm64/kvm/handle_exit.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 8da5606..df84d7b 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -39,9 +39,6 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) { - if (kvm_psci_call(vcpu)) - return 1; - kvm_inject_undefined(vcpu); return 1; } -- cgit v1.1 From 2f0a6397dd3cac2fb05b46cad08c1d532c04d6b8 Mon Sep 17 00:00:00 2001 From: Zhihui Zhang Date: Mon, 30 Dec 2013 15:56:29 -0500 Subject: KVM: VMX: check use I/O bitmap first before unconditional I/O exit According to Table C-1 of Intel SDM 3C, a VM exit happens on an I/O instruction when "use I/O bitmaps" VM-execution control was 0 _and_ the "unconditional I/O exiting" VM-execution control was 1. So we can't just check "unconditional I/O exiting" alone. This patch was improved by suggestion from Jan Kiszka. Reviewed-by: Jan Kiszka Signed-off-by: Zhihui Zhang Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 9cc5484..0abf8b7 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6521,11 +6521,8 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, int size; u8 b; - if (nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING)) - return 1; - if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) - return 0; + return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING); exit_qualification = vmcs_readl(EXIT_QUALIFICATION); -- cgit v1.1 From 96893977b8f732493815e7a2b552c37e1bb967e5 Mon Sep 17 00:00:00 2001 From: Chen Fan Date: Thu, 2 Jan 2014 17:14:11 +0800 Subject: KVM: x86: Fix debug typo error in lapic fix the 'vcpi' typos when apic_debug is enabled. Signed-off-by: Chen Fan Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/lapic.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 5439117..206715b 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -432,7 +432,7 @@ static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu) u8 val; if (pv_eoi_get_user(vcpu, &val) < 0) apic_debug("Can't read EOI MSR value: 0x%llx\n", - (unsigned long long)vcpi->arch.pv_eoi.msr_val); + (unsigned long long)vcpu->arch.pv_eoi.msr_val); return val & 0x1; } @@ -440,7 +440,7 @@ static void pv_eoi_set_pending(struct kvm_vcpu *vcpu) { if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) { apic_debug("Can't set EOI MSR value: 0x%llx\n", - (unsigned long long)vcpi->arch.pv_eoi.msr_val); + (unsigned long long)vcpu->arch.pv_eoi.msr_val); return; } __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); @@ -450,7 +450,7 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) { if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) { apic_debug("Can't clear EOI MSR value: 0x%llx\n", - (unsigned long long)vcpi->arch.pv_eoi.msr_val); + (unsigned long long)vcpu->arch.pv_eoi.msr_val); return; } __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); -- cgit v1.1 From 26a865f4aa8e66a6d94958de7656f7f1b03c6c56 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Fri, 3 Jan 2014 17:00:51 -0200 Subject: KVM: VMX: fix use after free of vmx->loaded_vmcs After free_loaded_vmcs executes, the "loaded_vmcs" structure is kfreed, and now vmx->loaded_vmcs points to a kfreed area. Subsequent free_loaded_vmcs then attempts to manipulate vmx->loaded_vmcs. Switch the order to avoid the problem. https://bugzilla.redhat.com/show_bug.cgi?id=1047892 Reviewed-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/vmx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0abf8b7..7661eb1 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7390,8 +7390,8 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu) struct vcpu_vmx *vmx = to_vmx(vcpu); free_vpid(vmx); - free_nested(vmx); free_loaded_vmcs(vmx->loaded_vmcs); + free_nested(vmx); kfree(vmx->guest_msrs); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, vmx); -- cgit v1.1 From 136d737fd20102f1be9b02356590fd55e3a40d0e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 13 Dec 2013 16:56:06 +0000 Subject: arm/arm64: KVM: relax the requirements of VMA alignment for THP The THP code in KVM/ARM is a bit restrictive in not allowing a THP to be used if the VMA is not 2MB aligned. Actually, it is not so much the VMA that matters, but the associated memslot: A process can perfectly mmap a region with no particular alignment restriction, and then pass a 2MB aligned address to KVM. In this case, KVM will only use this 2MB aligned region, and will ignore the range between vma->vm_start and memslot->userspace_addr. It can also choose to place this memslot at whatever alignment it wants in the IPA space. In the end, what matters is the relative alignment of the user space and IPA mappings with respect to a 2M page. They absolutely must be the same if you want to use THP. Cc: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/kvm/mmu.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 659db0e..7789857 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -667,14 +667,16 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT; } else { /* - * Pages belonging to VMAs not aligned to the PMD mapping - * granularity cannot be mapped using block descriptors even - * if the pages belong to a THP for the process, because the - * stage-2 block descriptor will cover more than a single THP - * and we loose atomicity for unmapping, updates, and splits - * of the THP or other pages in the stage-2 block range. + * Pages belonging to memslots that don't have the same + * alignment for userspace and IPA cannot be mapped using + * block descriptors even if the pages belong to a THP for + * the process, because the stage-2 block descriptor will + * cover more than a single THP and we loose atomicity for + * unmapping, updates, and splits of the THP or other pages + * in the stage-2 block range. */ - if (vma->vm_start & ~PMD_MASK) + if ((memslot->userspace_addr & ~PMD_MASK) != + ((memslot->base_gfn << PAGE_SHIFT) & ~PMD_MASK)) force_pte = true; } up_read(¤t->mm->mmap_sem); -- cgit v1.1 From 61466710de078c697106fa5b70ec7afc9feab520 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Tue, 7 Jan 2014 13:45:15 +0530 Subject: KVM: ARM: Remove duplicate include trace.h was included twice. Remove duplicate inclusion. Signed-off-by: Sachin Kamat Signed-off-by: Christoffer Dall --- arch/arm/kvm/handle_exit.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index a920790..0de91fc 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -26,8 +26,6 @@ #include "trace.h" -#include "trace.h" - typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *); static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) -- cgit v1.1 From 37f6a4e237303549c8676dfe1fd1991ceab512eb Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Fri, 3 Jan 2014 17:09:32 -0200 Subject: KVM: x86: handle invalid root_hpa everywhere Rom Freiman notes other code paths vulnerable to bug fixed by 989c6b34f6a9480e397b. Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/mmu.c | 9 +++++++++ arch/x86/kvm/paging_tmpl.h | 8 ++++++++ 2 files changed, 17 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 31a5702..e50425d 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2832,6 +2832,9 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, bool ret = false; u64 spte = 0ull; + if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) + return false; + if (!page_fault_can_be_fast(error_code)) return false; @@ -3227,6 +3230,9 @@ static u64 walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr) struct kvm_shadow_walk_iterator iterator; u64 spte = 0ull; + if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) + return spte; + walk_shadow_page_lockless_begin(vcpu); for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) if (!is_shadow_present_pte(spte)) @@ -4513,6 +4519,9 @@ int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]) u64 spte; int nr_sptes = 0; + if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) + return nr_sptes; + walk_shadow_page_lockless_begin(vcpu); for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) { sptes[iterator.level-1] = spte; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index ad75d77..cba218a 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -569,6 +569,9 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, if (FNAME(gpte_changed)(vcpu, gw, top_level)) goto out_gpte_changed; + if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) + goto out_gpte_changed; + for (shadow_walk_init(&it, vcpu, addr); shadow_walk_okay(&it) && it.level > gw->level; shadow_walk_next(&it)) { @@ -820,6 +823,11 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva) */ mmu_topup_memory_caches(vcpu); + if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) { + WARN_ON(1); + return; + } + spin_lock(&vcpu->kvm->mmu_lock); for_each_shadow_entry(vcpu, gva, iterator) { level = iterator.level; -- cgit v1.1 From 9ed96e87c5748de4c2807ef17e81287c7304186c Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 6 Jan 2014 12:00:02 -0200 Subject: KVM: x86: limit PIT timer frequency Limit PIT timer frequency similarly to the limit applied by LAPIC timer. Cc: stable@kernel.org Reviewed-by: Jan Kiszka Signed-off-by: Marcelo Tosatti Signed-off-by: Paolo Bonzini --- arch/x86/kvm/i8254.c | 18 ++++++++++++++++++ arch/x86/kvm/lapic.c | 3 --- arch/x86/kvm/x86.c | 3 +++ arch/x86/kvm/x86.h | 2 ++ 4 files changed, 23 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 412a5aa..518d864 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -37,6 +37,7 @@ #include "irq.h" #include "i8254.h" +#include "x86.h" #ifndef CONFIG_X86_64 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y)) @@ -349,6 +350,23 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period) atomic_set(&ps->pending, 0); ps->irq_ack = 1; + /* + * Do not allow the guest to program periodic timers with small + * interval, since the hrtimers are not throttled by the host + * scheduler. + */ + if (ps->is_periodic) { + s64 min_period = min_timer_period_us * 1000LL; + + if (ps->period < min_period) { + pr_info_ratelimited( + "kvm: requested %lld ns " + "i8254 timer period limited to %lld ns\n", + ps->period, min_period); + ps->period = min_period; + } + } + hrtimer_start(&ps->timer, ktime_add_ns(ktime_get(), interval), HRTIMER_MODE_ABS); } diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 206715b..1ac0093 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -71,9 +71,6 @@ #define VEC_POS(v) ((v) & (32 - 1)) #define REG_POS(v) (((v) >> 5) << 4) -static unsigned int min_timer_period_us = 500; -module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); - static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val) { *((u32 *) (apic->regs + reg_off)) = val; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1dc0359..0fd2bd7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -94,6 +94,9 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops); static bool ignore_msrs = 0; module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); +unsigned int min_timer_period_us = 500; +module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); + bool kvm_has_tsc_control; EXPORT_SYMBOL_GPL(kvm_has_tsc_control); u32 kvm_max_guest_tsc_khz; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 587fb9e..8da5823 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -125,5 +125,7 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt, #define KVM_SUPPORTED_XCR0 (XSTATE_FP | XSTATE_SSE | XSTATE_YMM) extern u64 host_xcr0; +extern unsigned int min_timer_period_us; + extern struct static_key kvm_no_apic_vcpu; #endif -- cgit v1.1 From f25e656d31ad112612839edaded18920cafea3b1 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Mon, 6 Jan 2014 12:18:59 -0200 Subject: KVM: x86: fix tsc catchup issue with tsc scaling To fix a problem related to different resolution of TSC and system clock, the offset in TSC units is approximated by delta = vcpu->hv_clock.tsc_timestamp - vcpu->last_guest_tsc (Guest TSC value at (Guest TSC value at last VM-exit) the last kvm_guest_time_update call) Delta is then later scaled using mult,shift pair found in hv_clock structure (which is correct against tsc_timestamp in that structure). However, if a frequency change is performed between these two points, this delta is measured using different TSC frequencies, but scaled using mult,shift pair for one frequency only. The end result is an incorrect delta. The bug which this code works around is not the only cause for clock backwards events. The global accumulator is still necessary, so remove the max_kernel_ns fix and rely on the global accumulator for no clock backwards events. Signed-off-by: Marcelo Tosatti Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 41 +---------------------------------------- 1 file changed, 1 insertion(+), 40 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0fd2bd7..842abd3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1487,7 +1487,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) unsigned long flags, this_tsc_khz; struct kvm_vcpu_arch *vcpu = &v->arch; struct kvm_arch *ka = &v->kvm->arch; - s64 kernel_ns, max_kernel_ns; + s64 kernel_ns; u64 tsc_timestamp, host_tsc; struct pvclock_vcpu_time_info guest_hv_clock; u8 pvclock_flags; @@ -1546,37 +1546,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) if (!vcpu->pv_time_enabled) return 0; - /* - * Time as measured by the TSC may go backwards when resetting the base - * tsc_timestamp. The reason for this is that the TSC resolution is - * higher than the resolution of the other clock scales. Thus, many - * possible measurments of the TSC correspond to one measurement of any - * other clock, and so a spread of values is possible. This is not a - * problem for the computation of the nanosecond clock; with TSC rates - * around 1GHZ, there can only be a few cycles which correspond to one - * nanosecond value, and any path through this code will inevitably - * take longer than that. However, with the kernel_ns value itself, - * the precision may be much lower, down to HZ granularity. If the - * first sampling of TSC against kernel_ns ends in the low part of the - * range, and the second in the high end of the range, we can get: - * - * (TSC - offset_low) * S + kns_old > (TSC - offset_high) * S + kns_new - * - * As the sampling errors potentially range in the thousands of cycles, - * it is possible such a time value has already been observed by the - * guest. To protect against this, we must compute the system time as - * observed by the guest and ensure the new system time is greater. - */ - max_kernel_ns = 0; - if (vcpu->hv_clock.tsc_timestamp) { - max_kernel_ns = vcpu->last_guest_tsc - - vcpu->hv_clock.tsc_timestamp; - max_kernel_ns = pvclock_scale_delta(max_kernel_ns, - vcpu->hv_clock.tsc_to_system_mul, - vcpu->hv_clock.tsc_shift); - max_kernel_ns += vcpu->last_kernel_ns; - } - if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) { kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz, &vcpu->hv_clock.tsc_shift, @@ -1584,14 +1553,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v) vcpu->hw_tsc_khz = this_tsc_khz; } - /* with a master tuple, - * pvclock clock reads always increase at the (scaled) rate - * of guest TSC - no need to deal with sampling errors. - */ - if (!use_master_clock) { - if (max_kernel_ns > kernel_ns) - kernel_ns = max_kernel_ns; - } /* With all the info we got, fill in the values */ vcpu->hv_clock.tsc_timestamp = tsc_timestamp; vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset; -- cgit v1.1 From aab6d7ce37cf20753a336dc74473cf8a8aefa7c0 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 15 Jan 2014 18:07:31 +0100 Subject: KVM: remove useless write to vcpu->hv_clock.tsc_timestamp After the previous patch from Marcelo, the comment before this write became obsolete. In fact, the write is unnecessary. The calls to kvm_write_tsc ultimately result in a master clock update as soon as all TSCs agree and the master clock is re-enabled. This master clock update will rewrite tsc_timestamp. So, together with the comment, delete the dead write too. Reviewed-by: Marcelo Tosatti Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 842abd3..0fbdced 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1278,8 +1278,6 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr) kvm->arch.last_tsc_write = data; kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz; - /* Reset of TSC must disable overshoot protection below */ - vcpu->arch.hv_clock.tsc_timestamp = 0; vcpu->arch.last_guest_tsc = data; /* Keep track of which generation this VCPU has synchronized to */ -- cgit v1.1 From e984097b553ed2d6551c805223e4057421370f00 Mon Sep 17 00:00:00 2001 From: Vadim Rozenfeld Date: Thu, 16 Jan 2014 20:18:37 +1100 Subject: add support for Hyper-V reference time counter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off: Peter Lieven Signed-off: Gleb Natapov Signed-off: Vadim Rozenfeld After some consideration I decided to submit only Hyper-V reference counters support this time. I will submit iTSC support as a separate patch as soon as it is ready. v1 -> v2 1. mark TSC page dirty as suggested by Eric Northup and Gleb 2. disable local irq when calling get_kernel_ns, as it was done by Peter Lieven 3. move check for TSC page enable from second patch to this one. v3 -> v4     Get rid of ref counter offset. v4 -> v5 replace __copy_to_user with kvm_write_guest when updateing iTSC page. Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++ arch/x86/kvm/x86.c | 28 +++++++++++++++++++++++++++- 3 files changed, 41 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ae5d783..33fef07 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -605,6 +605,7 @@ struct kvm_arch { /* fields used by HYPER-V emulation */ u64 hv_guest_os_id; u64 hv_hypercall; + u64 hv_tsc_page; #ifdef CONFIG_KVM_MMU_AUDIT int audit_point; diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index b8f1c01..462efe7 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h @@ -28,6 +28,9 @@ /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE (1 << 1) +/* A partition's reference time stamp counter (TSC) page */ +#define HV_X64_MSR_REFERENCE_TSC 0x40000021 + /* * There is a single feature flag that signifies the presence of the MSR * that can be used to retrieve both the local APIC Timer frequency as @@ -198,6 +201,9 @@ #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK \ (~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1)) +#define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 +#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 + #define HV_PROCESSOR_POWER_STATE_C0 0 #define HV_PROCESSOR_POWER_STATE_C1 1 #define HV_PROCESSOR_POWER_STATE_C2 2 @@ -210,4 +216,11 @@ #define HV_STATUS_INVALID_ALIGNMENT 4 #define HV_STATUS_INSUFFICIENT_BUFFERS 19 +typedef struct _HV_REFERENCE_TSC_PAGE { + __u32 tsc_sequence; + __u32 res1; + __u64 tsc_scale; + __s64 tsc_offset; +} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; + #endif diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0fbdced..0b3fd80 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -839,11 +839,12 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc); * kvm-specific. Those are put in the beginning of the list. */ -#define KVM_SAVE_MSRS_BEGIN 10 +#define KVM_SAVE_MSRS_BEGIN 12 static u32 msrs_to_save[] = { MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK, MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW, HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL, + HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC, HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, MSR_KVM_PV_EOI_EN, MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, @@ -1788,6 +1789,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) switch (msr) { case HV_X64_MSR_GUEST_OS_ID: case HV_X64_MSR_HYPERCALL: + case HV_X64_MSR_REFERENCE_TSC: + case HV_X64_MSR_TIME_REF_COUNT: r = true; break; } @@ -1829,6 +1832,20 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data) kvm->arch.hv_hypercall = data; break; } + case HV_X64_MSR_REFERENCE_TSC: { + u64 gfn; + HV_REFERENCE_TSC_PAGE tsc_ref; + memset(&tsc_ref, 0, sizeof(tsc_ref)); + kvm->arch.hv_tsc_page = data; + if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE)) + break; + gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT; + if (kvm_write_guest(kvm, data, + &tsc_ref, sizeof(tsc_ref))) + return 1; + mark_page_dirty(kvm, gfn); + break; + } default: vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x " "data 0x%llx\n", msr, data); @@ -2253,6 +2270,14 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case HV_X64_MSR_HYPERCALL: data = kvm->arch.hv_hypercall; break; + case HV_X64_MSR_TIME_REF_COUNT: { + data = + div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100); + break; + } + case HV_X64_MSR_REFERENCE_TSC: + data = kvm->arch.hv_tsc_page; + break; default: vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); return 1; @@ -2566,6 +2591,7 @@ int kvm_dev_ioctl_check_extension(long ext) #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT case KVM_CAP_ASSIGN_DEV_IRQ: case KVM_CAP_PCI_2_3: + case KVM_CAP_HYPERV_TIME: #endif r = 1; break; -- cgit v1.1 From 9926c9fdbdd54bb229fe6fdbd15ca3af2b8425ae Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sat, 4 Jan 2014 18:47:15 +0100 Subject: KVM: x86: Sync DR7 on KVM_SET_DEBUGREGS Whenever we change arch.dr7, we also have to call kvm_update_dr7. In case guest debugging is off, this will synchronize the new state into hardware. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0b3fd80..59907c9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2976,6 +2976,7 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); vcpu->arch.dr6 = dbgregs->dr6; vcpu->arch.dr7 = dbgregs->dr7; + kvm_update_dr7(vcpu); return 0; } -- cgit v1.1 From 73aaf249ee2287b4686ff079dcbdbbb658156e64 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sat, 4 Jan 2014 18:47:16 +0100 Subject: KVM: SVM: Fix reading of DR6 In contrast to VMX, SVM dose not automatically transfer DR6 into the VCPU's arch.dr6. So if we face a DR6 read, we must consult a new vendor hook to obtain the current value. And as SVM now picks the DR6 state from its VMCB, we also need a set callback in order to write updates of DR6 back. Fixes a regression of 020df0794f. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/svm.c | 15 +++++++++++++++ arch/x86/kvm/vmx.c | 11 +++++++++++ arch/x86/kvm/x86.c | 19 +++++++++++++++++-- 4 files changed, 45 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 33fef07..fdf83af 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -700,6 +700,8 @@ struct kvm_x86_ops { void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt); + u64 (*get_dr6)(struct kvm_vcpu *vcpu); + void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value); void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value); void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg); unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index c7168a5..e81df8f 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1671,6 +1671,19 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd) mark_dirty(svm->vmcb, VMCB_ASID); } +static u64 svm_get_dr6(struct kvm_vcpu *vcpu) +{ + return to_svm(vcpu)->vmcb->save.dr6; +} + +static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + svm->vmcb->save.dr6 = value; + mark_dirty(svm->vmcb, VMCB_DR); +} + static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value) { struct vcpu_svm *svm = to_svm(vcpu); @@ -4286,6 +4299,8 @@ static struct kvm_x86_ops svm_x86_ops = { .set_idt = svm_set_idt, .get_gdt = svm_get_gdt, .set_gdt = svm_set_gdt, + .get_dr6 = svm_get_dr6, + .set_dr6 = svm_set_dr6, .set_dr7 = svm_set_dr7, .cache_reg = svm_cache_reg, .get_rflags = svm_get_rflags, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7661eb1..79b360e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5149,6 +5149,15 @@ static int handle_dr(struct kvm_vcpu *vcpu) return 1; } +static u64 vmx_get_dr6(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.dr6; +} + +static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val) +{ +} + static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val) { vmcs_writel(GUEST_DR7, val); @@ -8556,6 +8565,8 @@ static struct kvm_x86_ops vmx_x86_ops = { .set_idt = vmx_set_idt, .get_gdt = vmx_get_gdt, .set_gdt = vmx_set_gdt, + .get_dr6 = vmx_get_dr6, + .set_dr6 = vmx_set_dr6, .set_dr7 = vmx_set_dr7, .cache_reg = vmx_cache_reg, .get_rflags = vmx_get_rflags, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 59907c9..59b95b1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -722,6 +722,12 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_get_cr8); +static void kvm_update_dr6(struct kvm_vcpu *vcpu) +{ + if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)) + kvm_x86_ops->set_dr6(vcpu, vcpu->arch.dr6); +} + static void kvm_update_dr7(struct kvm_vcpu *vcpu) { unsigned long dr7; @@ -750,6 +756,7 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val) if (val & 0xffffffff00000000ULL) return -1; /* #GP */ vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1; + kvm_update_dr6(vcpu); break; case 5: if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) @@ -791,7 +798,10 @@ static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val) return 1; /* fall through */ case 6: - *val = vcpu->arch.dr6; + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) + *val = vcpu->arch.dr6; + else + *val = kvm_x86_ops->get_dr6(vcpu); break; case 5: if (kvm_read_cr4_bits(vcpu, X86_CR4_DE)) @@ -2960,8 +2970,11 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu, struct kvm_debugregs *dbgregs) { + unsigned long val; + memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db)); - dbgregs->dr6 = vcpu->arch.dr6; + _kvm_get_dr(vcpu, 6, &val); + dbgregs->dr6 = val; dbgregs->dr7 = vcpu->arch.dr7; dbgregs->flags = 0; memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved)); @@ -2975,6 +2988,7 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu, memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db)); vcpu->arch.dr6 = dbgregs->dr6; + kvm_update_dr6(vcpu); vcpu->arch.dr7 = dbgregs->dr7; kvm_update_dr7(vcpu); @@ -6749,6 +6763,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu) memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db)); vcpu->arch.dr6 = DR6_FIXED_1; + kvm_update_dr6(vcpu); vcpu->arch.dr7 = DR7_FIXED_1; kvm_update_dr7(vcpu); -- cgit v1.1 From 8246bf52c75aa9b9b336a84f31ed2248754d0f71 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sat, 4 Jan 2014 18:47:17 +0100 Subject: KVM: VMX: Fix DR6 update on #DB exception According to the SDM, only bits 0-3 of DR6 "may" be cleared by "certain" debug exception. So do update them on #DB exception in KVM, but leave the rest alone, only setting BD and BS in addition to already set bits in DR6. This also aligns us with kvm_vcpu_check_singlestep. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 79b360e..c8eb27f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4869,7 +4869,8 @@ static int handle_exception(struct kvm_vcpu *vcpu) dr6 = vmcs_readl(EXIT_QUALIFICATION); if (!(vcpu->guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { - vcpu->arch.dr6 = dr6 | DR6_FIXED_1; + vcpu->arch.dr6 &= ~15; + vcpu->arch.dr6 |= dr6; kvm_queue_exception(vcpu, DB_VECTOR); return 1; } -- cgit v1.1 From 42124925c1f580068661bebd963d7c102175a8a9 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sat, 4 Jan 2014 18:47:19 +0100 Subject: KVM: nVMX: Leave VMX mode on clearing of feature control MSR When userspace sets MSR_IA32_FEATURE_CONTROL to 0, make sure we leave root and non-root mode, fully disabling VMX. The register state of the VCPU is undefined after this step, so userspace has to set it to a proper state afterward. This enables to reboot a VM while it is running some hypervisor code. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c8eb27f..bff5555 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2455,6 +2455,8 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) return 1; } +static void vmx_leave_nested(struct kvm_vcpu *vcpu); + static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { u32 msr_index = msr_info->index; @@ -2470,6 +2472,8 @@ static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) & FEATURE_CONTROL_LOCKED) return 0; to_vmx(vcpu)->nested.msr_ia32_feature_control = data; + if (host_initialized && data == 0) + vmx_leave_nested(vcpu); return 1; } @@ -8504,6 +8508,16 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) } /* + * Forcibly leave nested mode in order to be able to reset the VCPU later on. + */ +static void vmx_leave_nested(struct kvm_vcpu *vcpu) +{ + if (is_guest_mode(vcpu)) + nested_vmx_vmexit(vcpu); + free_nested(to_vmx(vcpu)); +} + +/* * L1's failure to enter L2 is a subset of a normal exit, as explained in * 23.7 "VM-entry failures during or after loading guest state" (this also * lists the acceptable exit-reason and exit-qualification parameters). -- cgit v1.1 From 533558bcb69ef28aff81b6ae9acda8943575319f Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sat, 4 Jan 2014 18:47:20 +0100 Subject: KVM: nVMX: Pass vmexit parameters to nested_vmx_vmexit Instead of fixing up the vmcs12 after the nested vmexit, pass key parameters already when calling nested_vmx_vmexit. This will help tracing those vmexits. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 63 +++++++++++++++++++++++++++++------------------------- 1 file changed, 34 insertions(+), 29 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index bff5555..e3578b3 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1058,7 +1058,9 @@ static inline bool is_exception(u32 intr_info) == (INTR_TYPE_HARD_EXCEPTION | INTR_INFO_VALID_MASK); } -static void nested_vmx_vmexit(struct kvm_vcpu *vcpu); +static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, + u32 exit_intr_info, + unsigned long exit_qualification); static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, u32 reason, unsigned long qualification); @@ -1967,7 +1969,9 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr) if (!(vmcs12->exception_bitmap & (1u << nr))) return 0; - nested_vmx_vmexit(vcpu); + nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason, + vmcs_read32(VM_EXIT_INTR_INFO), + vmcs_readl(EXIT_QUALIFICATION)); return 1; } @@ -4649,15 +4653,12 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) { if (is_guest_mode(vcpu)) { - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - if (to_vmx(vcpu)->nested.nested_run_pending) return 0; if (nested_exit_on_nmi(vcpu)) { - nested_vmx_vmexit(vcpu); - vmcs12->vm_exit_reason = EXIT_REASON_EXCEPTION_NMI; - vmcs12->vm_exit_intr_info = NMI_VECTOR | - INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK; + nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, + NMI_VECTOR | INTR_TYPE_NMI_INTR | + INTR_INFO_VALID_MASK, 0); /* * The NMI-triggered VM exit counts as injection: * clear this one and block further NMIs. @@ -4679,15 +4680,11 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) { if (is_guest_mode(vcpu)) { - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - if (to_vmx(vcpu)->nested.nested_run_pending) return 0; if (nested_exit_on_intr(vcpu)) { - nested_vmx_vmexit(vcpu); - vmcs12->vm_exit_reason = - EXIT_REASON_EXTERNAL_INTERRUPT; - vmcs12->vm_exit_intr_info = 0; + nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, + 0, 0); /* * fall through to normal code, but now in L1, not L2 */ @@ -6849,7 +6846,9 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) return handle_invalid_guest_state(vcpu); if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) { - nested_vmx_vmexit(vcpu); + nested_vmx_vmexit(vcpu, exit_reason, + vmcs_read32(VM_EXIT_INTR_INFO), + vmcs_readl(EXIT_QUALIFICATION)); return 1; } @@ -7590,15 +7589,14 @@ static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) { - struct vmcs12 *vmcs12; - nested_vmx_vmexit(vcpu); - vmcs12 = get_vmcs12(vcpu); + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + u32 exit_reason; if (fault->error_code & PFERR_RSVD_MASK) - vmcs12->vm_exit_reason = EXIT_REASON_EPT_MISCONFIG; + exit_reason = EXIT_REASON_EPT_MISCONFIG; else - vmcs12->vm_exit_reason = EXIT_REASON_EPT_VIOLATION; - vmcs12->exit_qualification = vcpu->arch.exit_qualification; + exit_reason = EXIT_REASON_EPT_VIOLATION; + nested_vmx_vmexit(vcpu, exit_reason, 0, vcpu->arch.exit_qualification); vmcs12->guest_physical_address = fault->address; } @@ -7636,7 +7634,9 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu, /* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */ if (vmcs12->exception_bitmap & (1u << PF_VECTOR)) - nested_vmx_vmexit(vcpu); + nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason, + vmcs_read32(VM_EXIT_INTR_INFO), + vmcs_readl(EXIT_QUALIFICATION)); else kvm_inject_page_fault(vcpu, fault); } @@ -8191,7 +8191,9 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu, * exit-information fields only. Other fields are modified by L1 with VMWRITE, * which already writes to vmcs12 directly. */ -static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) +static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, + u32 exit_reason, u32 exit_intr_info, + unsigned long exit_qualification) { /* update guest state fields: */ vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); @@ -8282,10 +8284,10 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) /* update exit information fields: */ - vmcs12->vm_exit_reason = to_vmx(vcpu)->exit_reason; - vmcs12->exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + vmcs12->vm_exit_reason = exit_reason; + vmcs12->exit_qualification = exit_qualification; - vmcs12->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + vmcs12->vm_exit_intr_info = exit_intr_info; if ((vmcs12->vm_exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) == (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) @@ -8452,7 +8454,9 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, * and modify vmcs12 to make it see what it would expect to see there if * L2 was its real guest. Must only be called when in L2 (is_guest_mode()) */ -static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) +static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, + u32 exit_intr_info, + unsigned long exit_qualification) { struct vcpu_vmx *vmx = to_vmx(vcpu); int cpu; @@ -8462,7 +8466,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) WARN_ON_ONCE(vmx->nested.nested_run_pending); leave_guest_mode(vcpu); - prepare_vmcs12(vcpu, vmcs12); + prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info, + exit_qualification); cpu = get_cpu(); vmx->loaded_vmcs = &vmx->vmcs01; @@ -8513,7 +8518,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu) static void vmx_leave_nested(struct kvm_vcpu *vcpu) { if (is_guest_mode(vcpu)) - nested_vmx_vmexit(vcpu); + nested_vmx_vmexit(vcpu, -1, 0, 0); free_nested(to_vmx(vcpu)); } -- cgit v1.1 From 542060ea79c861e100411a5a44df747b56a693df Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sat, 4 Jan 2014 18:47:21 +0100 Subject: KVM: nVMX: Add tracepoints for nested_vmexit and nested_vmexit_inject Already used by nested SVM for tracing nested vmexit: kvm_nested_vmexit marks exits from L2 to L0 while kvm_nested_vmexit_inject marks vmexits that are reflected to L1. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e3578b3..e539c45 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6697,6 +6697,13 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) struct vmcs12 *vmcs12 = get_vmcs12(vcpu); u32 exit_reason = vmx->exit_reason; + trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason, + vmcs_readl(EXIT_QUALIFICATION), + vmx->idt_vectoring_info, + intr_info, + vmcs_read32(VM_EXIT_INTR_ERROR_CODE), + KVM_ISA_VMX); + if (vmx->nested.nested_run_pending) return 0; @@ -8469,6 +8476,13 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info, exit_qualification); + trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason, + vmcs12->exit_qualification, + vmcs12->idt_vectoring_info_field, + vmcs12->vm_exit_intr_info, + vmcs12->vm_exit_intr_error_code, + KVM_ISA_VMX); + cpu = get_cpu(); vmx->loaded_vmcs = &vmx->vmcs01; vmx_vcpu_put(vcpu); -- cgit v1.1 From cae501397a25dc1e88375925c5e93a264d4a55ba Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sat, 4 Jan 2014 18:47:22 +0100 Subject: KVM: nVMX: Clean up handling of VMX-related MSRs This simplifies the code and also stops issuing warning about writing to unhandled MSRs when VMX is disabled or the Feature Control MSR is locked - we do handle them all according to the spec. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini --- arch/x86/include/uapi/asm/msr-index.h | 1 + arch/x86/kvm/vmx.c | 79 ++++++++++------------------------- 2 files changed, 24 insertions(+), 56 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index 37813b5..2e4a42d 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -527,6 +527,7 @@ #define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e #define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f #define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490 +#define MSR_IA32_VMX_VMFUNC 0x00000491 /* VMX_BASIC bits and bitmasks */ #define VMX_BASIC_VMCS_SIZE_SHIFT 32 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e539c45..fc4a255 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2361,32 +2361,10 @@ static inline u64 vmx_control_msr(u32 low, u32 high) return low | ((u64)high << 32); } -/* - * If we allow our guest to use VMX instructions (i.e., nested VMX), we should - * also let it use VMX-specific MSRs. - * vmx_get_vmx_msr() and vmx_set_vmx_msr() return 1 when we handled a - * VMX-specific MSR, or 0 when we haven't (and the caller should handle it - * like all other MSRs). - */ +/* Returns 0 on success, non-0 otherwise. */ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) { - if (!nested_vmx_allowed(vcpu) && msr_index >= MSR_IA32_VMX_BASIC && - msr_index <= MSR_IA32_VMX_TRUE_ENTRY_CTLS) { - /* - * According to the spec, processors which do not support VMX - * should throw a #GP(0) when VMX capability MSRs are read. - */ - kvm_queue_exception_e(vcpu, GP_VECTOR, 0); - return 1; - } - switch (msr_index) { - case MSR_IA32_FEATURE_CONTROL: - if (nested_vmx_allowed(vcpu)) { - *pdata = to_vmx(vcpu)->nested.msr_ia32_feature_control; - break; - } - return 0; case MSR_IA32_VMX_BASIC: /* * This MSR reports some information about VMX support. We @@ -2453,38 +2431,9 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) *pdata = nested_vmx_ept_caps; break; default: - return 0; - } - - return 1; -} - -static void vmx_leave_nested(struct kvm_vcpu *vcpu); - -static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) -{ - u32 msr_index = msr_info->index; - u64 data = msr_info->data; - bool host_initialized = msr_info->host_initiated; - - if (!nested_vmx_allowed(vcpu)) - return 0; - - if (msr_index == MSR_IA32_FEATURE_CONTROL) { - if (!host_initialized && - to_vmx(vcpu)->nested.msr_ia32_feature_control - & FEATURE_CONTROL_LOCKED) - return 0; - to_vmx(vcpu)->nested.msr_ia32_feature_control = data; - if (host_initialized && data == 0) - vmx_leave_nested(vcpu); return 1; } - /* - * No need to treat VMX capability MSRs specially: If we don't handle - * them, handle_wrmsr will #GP(0), which is correct (they are readonly) - */ return 0; } @@ -2530,13 +2479,20 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) case MSR_IA32_SYSENTER_ESP: data = vmcs_readl(GUEST_SYSENTER_ESP); break; + case MSR_IA32_FEATURE_CONTROL: + if (!nested_vmx_allowed(vcpu)) + return 1; + data = to_vmx(vcpu)->nested.msr_ia32_feature_control; + break; + case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: + if (!nested_vmx_allowed(vcpu)) + return 1; + return vmx_get_vmx_msr(vcpu, msr_index, pdata); case MSR_TSC_AUX: if (!to_vmx(vcpu)->rdtscp_enabled) return 1; /* Otherwise falls through */ default: - if (vmx_get_vmx_msr(vcpu, msr_index, pdata)) - return 0; msr = find_msr_entry(to_vmx(vcpu), msr_index); if (msr) { data = msr->data; @@ -2549,6 +2505,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) return 0; } +static void vmx_leave_nested(struct kvm_vcpu *vcpu); + /* * Writes msr value into into the appropriate "register". * Returns 0 on success, non-0 otherwise. @@ -2603,6 +2561,17 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_IA32_TSC_ADJUST: ret = kvm_set_msr_common(vcpu, msr_info); break; + case MSR_IA32_FEATURE_CONTROL: + if (!nested_vmx_allowed(vcpu) || + (to_vmx(vcpu)->nested.msr_ia32_feature_control & + FEATURE_CONTROL_LOCKED && !msr_info->host_initiated)) + return 1; + vmx->nested.msr_ia32_feature_control = data; + if (msr_info->host_initiated && data == 0) + vmx_leave_nested(vcpu); + break; + case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: + return 1; /* they are read-only */ case MSR_TSC_AUX: if (!vmx->rdtscp_enabled) return 1; @@ -2611,8 +2580,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 1; /* Otherwise falls through */ default: - if (vmx_set_vmx_msr(vcpu, msr_info)) - break; msr = find_msr_entry(vmx, msr_index); if (msr) { msr->data = data; -- cgit v1.1 From 7af40ad37b3f097f367cbe9c0198caccce6fd83b Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sat, 4 Jan 2014 18:47:23 +0100 Subject: KVM: nVMX: Fix nested_run_pending on activity state HLT When we suspend the guest in HLT state, the nested run is no longer pending - we emulated it completely. So only set nested_run_pending after checking the activity state. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index fc4a255..f9a5433 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8046,8 +8046,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) enter_guest_mode(vcpu); - vmx->nested.nested_run_pending = 1; - vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); cpu = get_cpu(); @@ -8066,6 +8064,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) return kvm_emulate_halt(vcpu); + vmx->nested.nested_run_pending = 1; + /* * Note no nested_vmx_succeed or nested_vmx_fail here. At this point * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet -- cgit v1.1 From 3edf1e698ff638c2ab095e8c60fd11c5d292fc5f Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sat, 4 Jan 2014 18:47:24 +0100 Subject: KVM: nVMX: Update guest activity state field on L2 exits Set guest activity state in L1's VMCS according to the VCPUs mp_state. This ensures we report the correct state in case we L2 executed HLT or if we put L2 into HLT state and it was now woken up by an event. Signed-off-by: Jan Kiszka Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index f9a5433..407b05c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8219,6 +8219,10 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); vmcs12->guest_pending_dbg_exceptions = vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS); + if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) + vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT; + else + vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE; if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) && (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER)) -- cgit v1.1 From 699bde3b6c95319749a8e1b7aa2b3f6bee84bff8 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 20 Jan 2014 12:34:13 +0100 Subject: KVM: s390: Fix memory access error detection Seems that commit 210b1607012cc9034841a393e0591b2c86d9e26c (KVM: s390: Removed SIE_INTERCEPT_UCONTROL) lost a hunk when we reworked our patch queue to rework the async_fp code. We now ignore faults on the sie instruction (guest accesses non-existing memory) instead of sending a fault into the guest. This leads to hang situations with the old virtio transport that checks for descriptor memory after guest memory. Instead of bailing out this code now goes wild... Lets re-add the check. Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 1bb1dda..7635c00 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -738,6 +738,10 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason) current->thread.gmap_addr; vcpu->run->s390_ucontrol.pgm_code = 0x10; rc = -EREMOTE; + } else { + VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction"); + trace_kvm_s390_sie_fault(vcpu); + rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); } memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16); -- cgit v1.1 From 94491620e1362f6065ab821c13eb54b716ada19f Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Fri, 17 Jan 2014 18:02:38 -0800 Subject: kvm: make KVM_MMU_AUDIT help text more readable Make KVM_MMU_AUDIT kconfig help text readable and collapse two spaces between words down to one space. Signed-off-by: Randy Dunlap Reviewed-by: Xiao Guangrong Signed-off-by: Paolo Bonzini --- arch/x86/kvm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index b89c5db..287e4c8 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -80,7 +80,7 @@ config KVM_MMU_AUDIT depends on KVM && TRACEPOINTS ---help--- This option adds a R/W kVM module parameter 'mmu_audit', which allows - audit KVM MMU at runtime. + auditing of KVM MMU events at runtime. config KVM_DEVICE_ASSIGNMENT bool "KVM legacy PCI device assignment support" -- cgit v1.1