From 210b1607012cc9034841a393e0591b2c86d9e26c Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@linux.vnet.ibm.com>
Date: Thu, 19 Sep 2013 16:26:18 +0200
Subject: KVM: s390: Removed SIE_INTERCEPT_UCONTROL

The SIE_INTERCEPT_UCONTROL can be removed by moving the related code
from kvm_arch_vcpu_ioctl_run() to vcpu_post_run().

Signed-off-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
---
 arch/s390/kvm/kvm-s390.c | 24 ++++++------------------
 arch/s390/kvm/kvm-s390.h |  2 --
 2 files changed, 6 insertions(+), 20 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 569494e..7f47835 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -732,14 +732,12 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
 
 	if (exit_reason >= 0) {
 		rc = 0;
-	} else {
-		if (kvm_is_ucontrol(vcpu->kvm)) {
-			rc = SIE_INTERCEPT_UCONTROL;
-		} else {
-			VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
-			trace_kvm_s390_sie_fault(vcpu);
-			rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
-		}
+	} else if (kvm_is_ucontrol(vcpu->kvm)) {
+		vcpu->run->exit_reason = KVM_EXIT_S390_UCONTROL;
+		vcpu->run->s390_ucontrol.trans_exc_code =
+						current->thread.gmap_addr;
+		vcpu->run->s390_ucontrol.pgm_code = 0x10;
+		rc = -EREMOTE;
 	}
 
 	memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
@@ -833,16 +831,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 		rc = -EINTR;
 	}
 
-#ifdef CONFIG_KVM_S390_UCONTROL
-	if (rc == SIE_INTERCEPT_UCONTROL) {
-		kvm_run->exit_reason = KVM_EXIT_S390_UCONTROL;
-		kvm_run->s390_ucontrol.trans_exc_code =
-			current->thread.gmap_addr;
-		kvm_run->s390_ucontrol.pgm_code = 0x10;
-		rc = 0;
-	}
-#endif
-
 	if (rc == -EOPNOTSUPP) {
 		/* intercept cannot be handled in-kernel, prepare kvm-run */
 		kvm_run->exit_reason         = KVM_EXIT_S390_SIEIC;
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index b44912a..aad541f 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -27,8 +27,6 @@ typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
 /* declare vfacilities extern */
 extern unsigned long *vfacilities;
 
-/* negativ values are error codes, positive values for internal conditions */
-#define SIE_INTERCEPT_UCONTROL		(1<<0)
 int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu);
 
 #define VM_EVENT(d_kvm, d_loglevel, d_string, d_args...)\
-- 
cgit v1.1


From ac5b03420150241dc2db3cb4aa4f58c1e7e4640f Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@linux.vnet.ibm.com>
Date: Mon, 7 Oct 2013 17:50:22 +0200
Subject: KVM: s390: Removed VIRTIODESCSPACE

VIRTIODESCSPACE is completely unused nowadays and thus can be removed
without any problems.

Signed-off-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
---
 arch/s390/kvm/kvm-s390.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index aad541f..fcd25b4 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -19,9 +19,6 @@
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
 
-/* The current code can have up to 256 pages for virtio */
-#define VIRTIODESCSPACE (256ul * 4096ul)
-
 typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
 
 /* declare vfacilities extern */
-- 
cgit v1.1


From f092669e743048f50c714a1af7f8e3478d7b9e1b Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@linux.vnet.ibm.com>
Date: Wed, 9 Oct 2013 14:15:54 +0200
Subject: KVM: s390: Fix access to CR6 in TPI handler

The TPI handler currently uses vcpu->run->s.regs.crs[6] to get the current
value of CR6. I think this is wrong, because vcpu->run->s.regs.crs is
only updated when kvm_arch_vcpu_ioctl_run() drops back to userspace.
So let's change the TPI handler to use vcpu->arch.sie_block->gcr[6] instead.

Signed-off-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
---
 arch/s390/kvm/priv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 2440602..b18fe52 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -197,7 +197,7 @@ static int handle_tpi(struct kvm_vcpu *vcpu)
 	if (addr & 3)
 		return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
 	cc = 0;
-	inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->run->s.regs.crs[6], 0);
+	inti = kvm_s390_get_io_int(vcpu->kvm, vcpu->arch.sie_block->gcr[6], 0);
 	if (!inti)
 		goto no_interrupt;
 	cc = 1;
-- 
cgit v1.1


From c95221f69dfa5d3696b2b91374cbd7e5897657c5 Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@linux.vnet.ibm.com>
Date: Wed, 9 Oct 2013 16:49:03 +0200
Subject: KVM: s390: Do not set CC3 for EQBS and SQBS

The EQBS and SQBS instructions do not set CC3 for invalid channels, but
should throw an operation exception instead when not available. Thus they
should not be handled by the handle_io_inst() wrapper but drop to userspace
instead (which will then inject the operation exception).

Signed-off-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
---
 arch/s390/kvm/priv.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index b18fe52..05537ab 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -638,7 +638,6 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
 
 static const intercept_handler_t b9_handlers[256] = {
 	[0x8d] = handle_epsw,
-	[0x9c] = handle_io_inst,
 	[0xaf] = handle_pfmf,
 };
 
@@ -731,7 +730,6 @@ static int handle_lctlg(struct kvm_vcpu *vcpu)
 
 static const intercept_handler_t eb_handlers[256] = {
 	[0x2f] = handle_lctlg,
-	[0x8a] = handle_io_inst,
 };
 
 int kvm_s390_handle_eb(struct kvm_vcpu *vcpu)
-- 
cgit v1.1


From e879892c725217a4af1012f31ae56be762473216 Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@linux.vnet.ibm.com>
Date: Wed, 6 Nov 2013 15:46:33 +0100
Subject: KVM: s390: Always store status during SIGP STOP_AND_STORE_STATUS

The SIGP order STOP_AND_STORE_STATUS is defined to stop a CPU and store
its status. However, we only stored the status if the CPU was still
running, so make sure that the status is now also stored if the CPU was
already stopped. This fixes the problem that the CPU information was
not stored correctly in kdump files, rendering them unreadable.

Signed-off-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Acked-by: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: stable@vger.kernel.org
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
---
 arch/s390/kvm/kvm-s390.c | 25 +++++++++++++++----------
 arch/s390/kvm/kvm-s390.h |  4 ++--
 arch/s390/kvm/sigp.c     | 15 ++++++++++++++-
 3 files changed, 31 insertions(+), 13 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 7f47835..55eb8de 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -873,7 +873,7 @@ static int __guestcopy(struct kvm_vcpu *vcpu, u64 guestdest, void *from,
  * KVM_S390_STORE_STATUS_NOADDR: -> 0x1200 on 64 bit
  * KVM_S390_STORE_STATUS_PREFIXED: -> prefix
  */
-int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
+int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr)
 {
 	unsigned char archmode = 1;
 	int prefix;
@@ -891,15 +891,6 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
 	} else
 		prefix = 0;
 
-	/*
-	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
-	 * copying in vcpu load/put. Lets update our copies before we save
-	 * it into the save area
-	 */
-	save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
-	save_fp_regs(vcpu->arch.guest_fpregs.fprs);
-	save_access_regs(vcpu->run->s.regs.acrs);
-
 	if (__guestcopy(vcpu, addr + offsetof(struct save_area, fp_regs),
 			vcpu->arch.guest_fpregs.fprs, 128, prefix))
 		return -EFAULT;
@@ -944,6 +935,20 @@ int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
 	return 0;
 }
 
+int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr)
+{
+	/*
+	 * The guest FPRS and ACRS are in the host FPRS/ACRS due to the lazy
+	 * copying in vcpu load/put. Lets update our copies before we save
+	 * it into the save area
+	 */
+	save_fp_ctl(&vcpu->arch.guest_fpregs.fpc);
+	save_fp_regs(vcpu->arch.guest_fpregs.fprs);
+	save_access_regs(vcpu->run->s.regs.acrs);
+
+	return kvm_s390_store_status_unloaded(vcpu, addr);
+}
+
 static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
 				     struct kvm_enable_cap *cap)
 {
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index fcd25b4..36f6b18 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -145,8 +145,8 @@ int kvm_s390_handle_eb(struct kvm_vcpu *vcpu);
 int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu);
 
 /* implemented in kvm-s390.c */
-int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu,
-				 unsigned long addr);
+int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr);
+int kvm_s390_vcpu_store_status(struct kvm_vcpu *vcpu, unsigned long addr);
 void s390_vcpu_block(struct kvm_vcpu *vcpu);
 void s390_vcpu_unblock(struct kvm_vcpu *vcpu);
 void exit_sie(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index bec398c..6805601 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -130,6 +130,7 @@ unlock:
 static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
 {
 	struct kvm_s390_interrupt_info *inti;
+	int rc = SIGP_CC_ORDER_CODE_ACCEPTED;
 
 	inti = kzalloc(sizeof(*inti), GFP_ATOMIC);
 	if (!inti)
@@ -139,6 +140,8 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
 	spin_lock_bh(&li->lock);
 	if ((atomic_read(li->cpuflags) & CPUSTAT_STOPPED)) {
 		kfree(inti);
+		if ((action & ACTION_STORE_ON_STOP) != 0)
+			rc = -ESHUTDOWN;
 		goto out;
 	}
 	list_add_tail(&inti->list, &li->list);
@@ -150,7 +153,7 @@ static int __inject_sigp_stop(struct kvm_s390_local_interrupt *li, int action)
 out:
 	spin_unlock_bh(&li->lock);
 
-	return SIGP_CC_ORDER_CODE_ACCEPTED;
+	return rc;
 }
 
 static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action)
@@ -174,6 +177,16 @@ static int __sigp_stop(struct kvm_vcpu *vcpu, u16 cpu_addr, int action)
 unlock:
 	spin_unlock(&fi->lock);
 	VCPU_EVENT(vcpu, 4, "sent sigp stop to cpu %x", cpu_addr);
+
+	if ((action & ACTION_STORE_ON_STOP) != 0 && rc == -ESHUTDOWN) {
+		/* If the CPU has already been stopped, we still have
+		 * to save the status when doing stop-and-store. This
+		 * has to be done after unlocking all spinlocks. */
+		struct kvm_vcpu *dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+		rc = kvm_s390_store_status_unloaded(dst_vcpu,
+						KVM_S390_STORE_STATUS_NOADDR);
+	}
+
 	return rc;
 }
 
-- 
cgit v1.1


From 178bd789775ab29233e0553155253ec8d73af71f Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@linux.vnet.ibm.com>
Date: Wed, 13 Nov 2013 20:28:18 +0100
Subject: KVM: s390: Fix clock comparator field for STORE STATUS

Only the most 7 significant bytes of the clock comparator must be
saved to the status area, and the byte at offset 304 has to be zero.

Signed-off-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
---
 arch/s390/kvm/kvm-s390.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 55eb8de..1bb1dda 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -877,6 +877,7 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr)
 {
 	unsigned char archmode = 1;
 	int prefix;
+	u64 clkcomp;
 
 	if (addr == KVM_S390_STORE_STATUS_NOADDR) {
 		if (copy_to_guest_absolute(vcpu, 163ul, &archmode, 1))
@@ -920,8 +921,9 @@ int kvm_s390_store_status_unloaded(struct kvm_vcpu *vcpu, unsigned long addr)
 			&vcpu->arch.sie_block->cputm, 8, prefix))
 		return -EFAULT;
 
+	clkcomp = vcpu->arch.sie_block->ckc >> 8;
 	if (__guestcopy(vcpu, addr + offsetof(struct save_area, clk_cmp),
-			&vcpu->arch.sie_block->ckc, 8, prefix))
+			&clkcomp, 8, prefix))
 		return -EFAULT;
 
 	if (__guestcopy(vcpu, addr + offsetof(struct save_area, acc_regs),
-- 
cgit v1.1


From 743db27c526e0f31cc507959d662e97e2048a86f Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Mon, 11 Nov 2013 13:56:47 +0100
Subject: KVM: s390: fix diagnose code extraction

The diagnose code to be used is the contents of the base register (if not
zero), plus the displacement. The current code ignores the base register
contents. So let's fix that...

Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Cc: stable@vger.kernel.org
Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
---
 arch/s390/kvm/diag.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 78d967f..5ff29be 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -137,7 +137,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
 
 int kvm_s390_handle_diag(struct kvm_vcpu *vcpu)
 {
-	int code = (vcpu->arch.sie_block->ipb & 0xfff0000) >> 16;
+	int code = kvm_s390_get_base_disp_rs(vcpu) & 0xffff;
 
 	if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
 		return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
-- 
cgit v1.1


From 00e9e435f97b409db8986f9cd35d126ae2d02a0c Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@linux.vnet.ibm.com>
Date: Wed, 13 Nov 2013 20:48:51 +0100
Subject: KVM: s390: Add SIGP store-status-at-address order

The STORE STATUS AT ADDRESS order of SIGP was still missing.
Now it is supported, using the common kvm_s390_store_status()
function.

Signed-off-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
---
 arch/s390/kvm/sigp.c  | 35 +++++++++++++++++++++++++++++++++++
 arch/s390/kvm/trace.h |  1 +
 2 files changed, 36 insertions(+)

(limited to 'arch')

diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 6805601..c137ed3 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -275,6 +275,37 @@ out_fi:
 	return rc;
 }
 
+static int __sigp_store_status_at_addr(struct kvm_vcpu *vcpu, u16 cpu_id,
+					u32 addr, u64 *reg)
+{
+	struct kvm_vcpu *dst_vcpu = NULL;
+	int flags;
+	int rc;
+
+	if (cpu_id < KVM_MAX_VCPUS)
+		dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_id);
+	if (!dst_vcpu)
+		return SIGP_CC_NOT_OPERATIONAL;
+
+	spin_lock_bh(&dst_vcpu->arch.local_int.lock);
+	flags = atomic_read(dst_vcpu->arch.local_int.cpuflags);
+	spin_unlock_bh(&dst_vcpu->arch.local_int.lock);
+	if (!(flags & CPUSTAT_STOPPED)) {
+		*reg &= 0xffffffff00000000UL;
+		*reg |= SIGP_STATUS_INCORRECT_STATE;
+		return SIGP_CC_STATUS_STORED;
+	}
+
+	addr &= 0x7ffffe00;
+	rc = kvm_s390_store_status_unloaded(dst_vcpu, addr);
+	if (rc == -EFAULT) {
+		*reg &= 0xffffffff00000000UL;
+		*reg |= SIGP_STATUS_INVALID_PARAMETER;
+		rc = SIGP_CC_STATUS_STORED;
+	}
+	return rc;
+}
+
 static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr,
 				u64 *reg)
 {
@@ -379,6 +410,10 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
 		rc = __sigp_stop(vcpu, cpu_addr, ACTION_STORE_ON_STOP |
 						 ACTION_STOP_ON_STOP);
 		break;
+	case SIGP_STORE_STATUS_AT_ADDRESS:
+		rc = __sigp_store_status_at_addr(vcpu, cpu_addr, parameter,
+						 &vcpu->run->s.regs.gprs[r1]);
+		break;
 	case SIGP_SET_ARCHITECTURE:
 		vcpu->stat.instruction_sigp_arch++;
 		rc = __sigp_set_arch(vcpu, parameter);
diff --git a/arch/s390/kvm/trace.h b/arch/s390/kvm/trace.h
index 0c991c6..3db76b2 100644
--- a/arch/s390/kvm/trace.h
+++ b/arch/s390/kvm/trace.h
@@ -175,6 +175,7 @@ TRACE_EVENT(kvm_s390_intercept_validity,
 	{SIGP_STOP_AND_STORE_STATUS, "stop and store status"},	\
 	{SIGP_SET_ARCHITECTURE, "set architecture"},		\
 	{SIGP_SET_PREFIX, "set prefix"},			\
+	{SIGP_STORE_STATUS_AT_ADDRESS, "store status at addr"},	\
 	{SIGP_SENSE_RUNNING, "sense running"},			\
 	{SIGP_RESTART, "restart"}
 
-- 
cgit v1.1


From 36daca9bb36f0395755817d1b0c45ab6fbf0441b Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@linux.vnet.ibm.com>
Date: Thu, 14 Nov 2013 11:08:20 +0100
Subject: KVM: s390: Removed kvm_s390_inject_sigp_stop()

The function kvm_s390_inject_sigp_stop() as been unused since the
removal of the old mmu reload code and thus can be removed safely.

Signed-off-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
---
 arch/s390/kvm/kvm-s390.h | 1 -
 arch/s390/kvm/sigp.c     | 6 ------
 2 files changed, 7 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index 36f6b18..095cf51 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -128,7 +128,6 @@ int __must_check kvm_s390_inject_vm(struct kvm *kvm,
 int __must_check kvm_s390_inject_vcpu(struct kvm_vcpu *vcpu,
 				      struct kvm_s390_interrupt *s390int);
 int __must_check kvm_s390_inject_program_int(struct kvm_vcpu *vcpu, u16 code);
-int __must_check kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action);
 struct kvm_s390_interrupt_info *kvm_s390_get_io_int(struct kvm *kvm,
 						    u64 cr6, u64 schid);
 
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index c137ed3..c370058 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -190,12 +190,6 @@ unlock:
 	return rc;
 }
 
-int kvm_s390_inject_sigp_stop(struct kvm_vcpu *vcpu, int action)
-{
-	struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int;
-	return __inject_sigp_stop(li, action);
-}
-
 static int __sigp_set_arch(struct kvm_vcpu *vcpu, u32 parameter)
 {
 	int rc;
-- 
cgit v1.1


From 4fda342cc7f577599c53fd27b99c953c7b1da18a Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@ti.com>
Date: Tue, 19 Nov 2013 14:59:12 -0500
Subject: arm/arm64: kvm: Use virt_to_idmap instead of virt_to_phys for idmap
 mappings

KVM initialisation fails on architectures implementing virt_to_idmap()
because virt_to_phys() on such architectures won't fetch you the correct
idmap page.

So update the KVM ARM code to use the virt_to_idmap() to fix the issue.
Since the KVM code is shared between arm and arm64, we create
kvm_virt_to_phys() and handle the redirection in respective headers.

Cc: Christoffer Dall <christoffer.dall@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_mmu.h   | 1 +
 arch/arm/kvm/mmu.c               | 8 ++++----
 arch/arm64/include/asm/kvm_mmu.h | 1 +
 3 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index 77de4a4..2d122ad 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -140,6 +140,7 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva,
 }
 
 #define kvm_flush_dcache_to_poc(a,l)	__cpuc_flush_dcache_area((a), (l))
+#define kvm_virt_to_phys(x)		virt_to_idmap((unsigned long)(x))
 
 #endif	/* !__ASSEMBLY__ */
 
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 5809069..659db0e 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -916,9 +916,9 @@ int kvm_mmu_init(void)
 {
 	int err;
 
-	hyp_idmap_start = virt_to_phys(__hyp_idmap_text_start);
-	hyp_idmap_end = virt_to_phys(__hyp_idmap_text_end);
-	hyp_idmap_vector = virt_to_phys(__kvm_hyp_init);
+	hyp_idmap_start = kvm_virt_to_phys(__hyp_idmap_text_start);
+	hyp_idmap_end = kvm_virt_to_phys(__hyp_idmap_text_end);
+	hyp_idmap_vector = kvm_virt_to_phys(__kvm_hyp_init);
 
 	if ((hyp_idmap_start ^ hyp_idmap_end) & PAGE_MASK) {
 		/*
@@ -945,7 +945,7 @@ int kvm_mmu_init(void)
 		 */
 		kvm_flush_dcache_to_poc(init_bounce_page, len);
 
-		phys_base = virt_to_phys(init_bounce_page);
+		phys_base = kvm_virt_to_phys(init_bounce_page);
 		hyp_idmap_vector += phys_base - hyp_idmap_start;
 		hyp_idmap_start = phys_base;
 		hyp_idmap_end = phys_base + len;
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 680f74e..7f1f940 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -136,6 +136,7 @@ static inline void coherent_icache_guest_page(struct kvm *kvm, hva_t hva,
 }
 
 #define kvm_flush_dcache_to_poc(a,l)	__flush_dcache_area((a), (l))
+#define kvm_virt_to_phys(x)		__virt_to_phys((unsigned long)(x))
 
 #endif /* __ASSEMBLY__ */
 #endif /* __ARM64_KVM_MMU_H__ */
-- 
cgit v1.1


From 949c007acd8b6887cf5f3ac86512a7b12fa245dc Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@linux.vnet.ibm.com>
Date: Tue, 26 Nov 2013 12:27:16 +0100
Subject: KVM: s390: Use helper function to set CC in SIGP handler

We've got a helper function for setting the condition code now,
so let's use it in the SIGP handler, too.

Signed-off-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
---
 arch/s390/kvm/sigp.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index c370058..bc0d85a 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -435,7 +435,6 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
 	if (rc < 0)
 		return rc;
 
-	vcpu->arch.sie_block->gpsw.mask &= ~(3ul << 44);
-	vcpu->arch.sie_block->gpsw.mask |= (rc & 3ul) << 44;
+	kvm_s390_set_psw_cc(vcpu, rc);
 	return 0;
 }
-- 
cgit v1.1


From b13d3580ee47ba3b2814e90b8a9b8241f7a4ba83 Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@linux.vnet.ibm.com>
Date: Thu, 21 Nov 2013 16:01:48 +0100
Subject: KVM: s390: Add the SIGP order CONDITIONAL EMERGENCY SIGNAL

This patch adds the missing SIGP order "conditional emergency
signal" by calling the "emergency signal" SIGP handler if the
required conditions are met.

Signed-off-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
---
 arch/s390/include/asm/sigp.h |  1 +
 arch/s390/kvm/sigp.c         | 37 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 37 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h
index 5a87d16..c002cd5 100644
--- a/arch/s390/include/asm/sigp.h
+++ b/arch/s390/include/asm/sigp.h
@@ -12,6 +12,7 @@
 #define SIGP_SET_PREFIX		     13
 #define SIGP_STORE_STATUS_AT_ADDRESS 14
 #define SIGP_SET_ARCHITECTURE	     18
+#define SIGP_COND_EMERGENCY_SIGNAL   19
 #define SIGP_SENSE_RUNNING	     21
 
 /* SIGP condition codes */
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index bc0d85a..eee1402 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -1,7 +1,7 @@
 /*
  * handling interprocessor communication
  *
- * Copyright IBM Corp. 2008, 2009
+ * Copyright IBM Corp. 2008, 2013
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License (version 2 only)
@@ -89,6 +89,37 @@ unlock:
 	return rc;
 }
 
+static int __sigp_conditional_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr,
+					u16 asn, u64 *reg)
+{
+	struct kvm_vcpu *dst_vcpu = NULL;
+	const u64 psw_int_mask = PSW_MASK_IO | PSW_MASK_EXT;
+	u16 p_asn, s_asn;
+	psw_t *psw;
+	u32 flags;
+
+	if (cpu_addr < KVM_MAX_VCPUS)
+		dst_vcpu = kvm_get_vcpu(vcpu->kvm, cpu_addr);
+	if (!dst_vcpu)
+		return SIGP_CC_NOT_OPERATIONAL;
+	flags = atomic_read(&dst_vcpu->arch.sie_block->cpuflags);
+	psw = &dst_vcpu->arch.sie_block->gpsw;
+	p_asn = dst_vcpu->arch.sie_block->gcr[4] & 0xffff;  /* Primary ASN */
+	s_asn = dst_vcpu->arch.sie_block->gcr[3] & 0xffff;  /* Secondary ASN */
+
+	/* Deliver the emergency signal? */
+	if (!(flags & CPUSTAT_STOPPED)
+	    || (psw->mask & psw_int_mask) != psw_int_mask
+	    || ((flags & CPUSTAT_WAIT) && psw->addr != 0)
+	    || (!(flags & CPUSTAT_WAIT) && (asn == p_asn || asn == s_asn))) {
+		return __sigp_emergency(vcpu, cpu_addr);
+	} else {
+		*reg &= 0xffffffff00000000UL;
+		*reg |= SIGP_STATUS_INCORRECT_STATE;
+		return SIGP_CC_STATUS_STORED;
+	}
+}
+
 static int __sigp_external_call(struct kvm_vcpu *vcpu, u16 cpu_addr)
 {
 	struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
@@ -417,6 +448,10 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
 		rc = __sigp_set_prefix(vcpu, cpu_addr, parameter,
 				       &vcpu->run->s.regs.gprs[r1]);
 		break;
+	case SIGP_COND_EMERGENCY_SIGNAL:
+		rc = __sigp_conditional_emergency(vcpu, cpu_addr, parameter,
+						  &vcpu->run->s.regs.gprs[r1]);
+		break;
 	case SIGP_SENSE_RUNNING:
 		vcpu->stat.instruction_sigp_sense_running++;
 		rc = __sigp_sense_running(vcpu, cpu_addr,
-- 
cgit v1.1


From cc92d6dea11cd43842e20cd05c066963de586417 Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@linux.vnet.ibm.com>
Date: Wed, 27 Nov 2013 11:47:10 +0100
Subject: KVM: s390: Reworked SIGP RESTART order

When SIGP RESTART detected an illegal CPU address, there is no need to
drop to userspace, we can return CC3 to the guest directly instead.
Also renamed __sigp_restart() to sigp_check_callable() (since this
is a better description of what the function is really doing) and
moved a string specific to RESTART to the calling place instead, so
that this function gets usable by other SIGP orders, too.

Signed-off-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
---
 arch/s390/kvm/sigp.c | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index eee1402..509547d 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -363,7 +363,8 @@ static int __sigp_sense_running(struct kvm_vcpu *vcpu, u16 cpu_addr,
 	return rc;
 }
 
-static int __sigp_restart(struct kvm_vcpu *vcpu, u16 cpu_addr)
+/* Test whether the destination CPU is available and not busy */
+static int sigp_check_callable(struct kvm_vcpu *vcpu, u16 cpu_addr)
 {
 	struct kvm_s390_float_interrupt *fi = &vcpu->kvm->arch.float_int;
 	struct kvm_s390_local_interrupt *li;
@@ -382,9 +383,6 @@ static int __sigp_restart(struct kvm_vcpu *vcpu, u16 cpu_addr)
 	spin_lock_bh(&li->lock);
 	if (li->action_bits & ACTION_STOP_ON_STOP)
 		rc = SIGP_CC_BUSY;
-	else
-		VCPU_EVENT(vcpu, 4, "sigp restart %x to handle userspace",
-			cpu_addr);
 	spin_unlock_bh(&li->lock);
 out:
 	spin_unlock(&fi->lock);
@@ -459,10 +457,15 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
 		break;
 	case SIGP_RESTART:
 		vcpu->stat.instruction_sigp_restart++;
-		rc = __sigp_restart(vcpu, cpu_addr);
-		if (rc == SIGP_CC_BUSY)
-			break;
-		/* user space must know about restart */
+		rc = sigp_check_callable(vcpu, cpu_addr);
+		if (rc == SIGP_CC_ORDER_CODE_ACCEPTED) {
+			VCPU_EVENT(vcpu, 4,
+				   "sigp restart %x to handle userspace",
+				   cpu_addr);
+			/* user space must know about restart */
+			rc = -EOPNOTSUPP;
+		}
+		break;
 	default:
 		return -EOPNOTSUPP;
 	}
-- 
cgit v1.1


From 58bc33b2b700f8524772f3fc20272da2187060c8 Mon Sep 17 00:00:00 2001
From: Thomas Huth <thuth@linux.vnet.ibm.com>
Date: Tue, 3 Dec 2013 12:54:55 +0100
Subject: KVM: s390: SIGP START has to report BUSY while stopping a CPU

Just like the RESTART order, the START order also has to report BUSY
while a STOP request is pending, to avoid that the START might be
ignored due to a race condition between the STOP and the START order.

Signed-off-by: Thomas Huth <thuth@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
---
 arch/s390/include/asm/sigp.h | 1 +
 arch/s390/kvm/sigp.c         | 5 +++++
 2 files changed, 6 insertions(+)

(limited to 'arch')

diff --git a/arch/s390/include/asm/sigp.h b/arch/s390/include/asm/sigp.h
index c002cd5..d091aa1 100644
--- a/arch/s390/include/asm/sigp.h
+++ b/arch/s390/include/asm/sigp.h
@@ -5,6 +5,7 @@
 #define SIGP_SENSE		      1
 #define SIGP_EXTERNAL_CALL	      2
 #define SIGP_EMERGENCY_SIGNAL	      3
+#define SIGP_START		      4
 #define SIGP_STOP		      5
 #define SIGP_RESTART		      6
 #define SIGP_STOP_AND_STORE_STATUS    9
diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
index 509547d..87c2b3a 100644
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -455,6 +455,11 @@ int kvm_s390_handle_sigp(struct kvm_vcpu *vcpu)
 		rc = __sigp_sense_running(vcpu, cpu_addr,
 					  &vcpu->run->s.regs.gprs[r1]);
 		break;
+	case SIGP_START:
+		rc = sigp_check_callable(vcpu, cpu_addr);
+		if (rc == SIGP_CC_ORDER_CODE_ACCEPTED)
+			rc = -EOPNOTSUPP;    /* Handle START in user space */
+		break;
 	case SIGP_RESTART:
 		vcpu->stat.instruction_sigp_restart++;
 		rc = sigp_check_callable(vcpu, cpu_addr);
-- 
cgit v1.1


From ff1f3cb4b3ac5d039f02679f34cb1498d110d241 Mon Sep 17 00:00:00 2001
From: Dominik Dingel <dingel@linux.vnet.ibm.com>
Date: Mon, 9 Dec 2013 18:30:01 +0100
Subject: KVM: s390: ioeventfd: ignore leftmost bits

The diagnose 500 subcode 3 contains the 32 bit subchannel id in bits 32-63
(counting from the left). As for other I/O instructions, bits 0-31 should be
ignored and thus not be passed to kvm_io_bus_write_cookie().

This fixes a bug where the guest passed non-zero bits 0-31 which the
host tried to interpret, leading to ioeventfd notification failures.

Cc: stable@vger.kernel.org
Signed-off-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
---
 arch/s390/kvm/diag.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c
index 5ff29be..8216c0e 100644
--- a/arch/s390/kvm/diag.c
+++ b/arch/s390/kvm/diag.c
@@ -121,7 +121,7 @@ static int __diag_virtio_hypercall(struct kvm_vcpu *vcpu)
 	 * - gpr 4 contains the index on the bus (optionally)
 	 */
 	ret = kvm_io_bus_write_cookie(vcpu->kvm, KVM_VIRTIO_CCW_NOTIFY_BUS,
-				      vcpu->run->s.regs.gprs[2],
+				      vcpu->run->s.regs.gprs[2] & 0xffffffff,
 				      8, &vcpu->run->s.regs.gprs[3],
 				      vcpu->run->s.regs.gprs[4]);
 
-- 
cgit v1.1


From 2961e8764faad212234e93907a370a7c36a67da5 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Mon, 25 Nov 2013 15:37:13 +0200
Subject: KVM: VMX: shadow VM_(ENTRY|EXIT)_CONTROLS vmcs field

VM_(ENTRY|EXIT)_CONTROLS vmcs fields are read/written on each guest
entry but most times it can be avoided since values do not changes.
Keep fields copy in memory to avoid unnecessary reads from vmcs.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 112 ++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 85 insertions(+), 27 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index b2fe1c2..1024689 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -418,6 +418,8 @@ struct vcpu_vmx {
 	u64 		      msr_host_kernel_gs_base;
 	u64 		      msr_guest_kernel_gs_base;
 #endif
+	u32 vm_entry_controls_shadow;
+	u32 vm_exit_controls_shadow;
 	/*
 	 * loaded_vmcs points to the VMCS currently used in this vcpu. For a
 	 * non-nested (L1) guest, it always points to vmcs01. For a nested
@@ -1326,6 +1328,62 @@ static void vmcs_set_bits(unsigned long field, u32 mask)
 	vmcs_writel(field, vmcs_readl(field) | mask);
 }
 
+static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val)
+{
+	vmcs_write32(VM_ENTRY_CONTROLS, val);
+	vmx->vm_entry_controls_shadow = val;
+}
+
+static inline void vm_entry_controls_set(struct vcpu_vmx *vmx, u32 val)
+{
+	if (vmx->vm_entry_controls_shadow != val)
+		vm_entry_controls_init(vmx, val);
+}
+
+static inline u32 vm_entry_controls_get(struct vcpu_vmx *vmx)
+{
+	return vmx->vm_entry_controls_shadow;
+}
+
+
+static inline void vm_entry_controls_setbit(struct vcpu_vmx *vmx, u32 val)
+{
+	vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) | val);
+}
+
+static inline void vm_entry_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
+{
+	vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) & ~val);
+}
+
+static inline void vm_exit_controls_init(struct vcpu_vmx *vmx, u32 val)
+{
+	vmcs_write32(VM_EXIT_CONTROLS, val);
+	vmx->vm_exit_controls_shadow = val;
+}
+
+static inline void vm_exit_controls_set(struct vcpu_vmx *vmx, u32 val)
+{
+	if (vmx->vm_exit_controls_shadow != val)
+		vm_exit_controls_init(vmx, val);
+}
+
+static inline u32 vm_exit_controls_get(struct vcpu_vmx *vmx)
+{
+	return vmx->vm_exit_controls_shadow;
+}
+
+
+static inline void vm_exit_controls_setbit(struct vcpu_vmx *vmx, u32 val)
+{
+	vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) | val);
+}
+
+static inline void vm_exit_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
+{
+	vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) & ~val);
+}
+
 static void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
 {
 	vmx->segment_cache.bitmask = 0;
@@ -1410,11 +1468,11 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
 	vmcs_write32(EXCEPTION_BITMAP, eb);
 }
 
-static void clear_atomic_switch_msr_special(unsigned long entry,
-		unsigned long exit)
+static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
+		unsigned long entry, unsigned long exit)
 {
-	vmcs_clear_bits(VM_ENTRY_CONTROLS, entry);
-	vmcs_clear_bits(VM_EXIT_CONTROLS, exit);
+	vm_entry_controls_clearbit(vmx, entry);
+	vm_exit_controls_clearbit(vmx, exit);
 }
 
 static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
@@ -1425,14 +1483,15 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
 	switch (msr) {
 	case MSR_EFER:
 		if (cpu_has_load_ia32_efer) {
-			clear_atomic_switch_msr_special(VM_ENTRY_LOAD_IA32_EFER,
+			clear_atomic_switch_msr_special(vmx,
+					VM_ENTRY_LOAD_IA32_EFER,
 					VM_EXIT_LOAD_IA32_EFER);
 			return;
 		}
 		break;
 	case MSR_CORE_PERF_GLOBAL_CTRL:
 		if (cpu_has_load_perf_global_ctrl) {
-			clear_atomic_switch_msr_special(
+			clear_atomic_switch_msr_special(vmx,
 					VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
 					VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL);
 			return;
@@ -1453,14 +1512,15 @@ static void clear_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr)
 	vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, m->nr);
 }
 
-static void add_atomic_switch_msr_special(unsigned long entry,
-		unsigned long exit, unsigned long guest_val_vmcs,
-		unsigned long host_val_vmcs, u64 guest_val, u64 host_val)
+static void add_atomic_switch_msr_special(struct vcpu_vmx *vmx,
+		unsigned long entry, unsigned long exit,
+		unsigned long guest_val_vmcs, unsigned long host_val_vmcs,
+		u64 guest_val, u64 host_val)
 {
 	vmcs_write64(guest_val_vmcs, guest_val);
 	vmcs_write64(host_val_vmcs, host_val);
-	vmcs_set_bits(VM_ENTRY_CONTROLS, entry);
-	vmcs_set_bits(VM_EXIT_CONTROLS, exit);
+	vm_entry_controls_setbit(vmx, entry);
+	vm_exit_controls_setbit(vmx, exit);
 }
 
 static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
@@ -1472,7 +1532,8 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
 	switch (msr) {
 	case MSR_EFER:
 		if (cpu_has_load_ia32_efer) {
-			add_atomic_switch_msr_special(VM_ENTRY_LOAD_IA32_EFER,
+			add_atomic_switch_msr_special(vmx,
+					VM_ENTRY_LOAD_IA32_EFER,
 					VM_EXIT_LOAD_IA32_EFER,
 					GUEST_IA32_EFER,
 					HOST_IA32_EFER,
@@ -1482,7 +1543,7 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr,
 		break;
 	case MSR_CORE_PERF_GLOBAL_CTRL:
 		if (cpu_has_load_perf_global_ctrl) {
-			add_atomic_switch_msr_special(
+			add_atomic_switch_msr_special(vmx,
 					VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL,
 					VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL,
 					GUEST_IA32_PERF_GLOBAL_CTRL,
@@ -3182,14 +3243,10 @@ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
 	vmx_load_host_state(to_vmx(vcpu));
 	vcpu->arch.efer = efer;
 	if (efer & EFER_LMA) {
-		vmcs_write32(VM_ENTRY_CONTROLS,
-			     vmcs_read32(VM_ENTRY_CONTROLS) |
-			     VM_ENTRY_IA32E_MODE);
+		vm_entry_controls_setbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
 		msr->data = efer;
 	} else {
-		vmcs_write32(VM_ENTRY_CONTROLS,
-			     vmcs_read32(VM_ENTRY_CONTROLS) &
-			     ~VM_ENTRY_IA32E_MODE);
+		vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
 
 		msr->data = efer & ~EFER_LME;
 	}
@@ -3217,9 +3274,7 @@ static void enter_lmode(struct kvm_vcpu *vcpu)
 
 static void exit_lmode(struct kvm_vcpu *vcpu)
 {
-	vmcs_write32(VM_ENTRY_CONTROLS,
-		     vmcs_read32(VM_ENTRY_CONTROLS)
-		     & ~VM_ENTRY_IA32E_MODE);
+	vm_entry_controls_clearbit(to_vmx(vcpu), VM_ENTRY_IA32E_MODE);
 	vmx_set_efer(vcpu, vcpu->arch.efer & ~EFER_LMA);
 }
 
@@ -4346,10 +4401,11 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 		++vmx->nmsrs;
 	}
 
-	vmcs_write32(VM_EXIT_CONTROLS, vmcs_config.vmexit_ctrl);
+
+	vm_exit_controls_init(vmx, vmcs_config.vmexit_ctrl);
 
 	/* 22.2.1, 20.8.1 */
-	vmcs_write32(VM_ENTRY_CONTROLS, vmcs_config.vmentry_ctrl);
+	vm_entry_controls_init(vmx, vmcs_config.vmentry_ctrl);
 
 	vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
 	set_cr4_guest_host_mask(vmx);
@@ -7759,12 +7815,12 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 	exit_control = vmcs_config.vmexit_ctrl;
 	if (vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER)
 		exit_control |= VM_EXIT_SAVE_VMX_PREEMPTION_TIMER;
-	vmcs_write32(VM_EXIT_CONTROLS, exit_control);
+	vm_exit_controls_init(vmx, exit_control);
 
 	/* vmcs12's VM_ENTRY_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE are
 	 * emulated by vmx_set_efer(), below.
 	 */
-	vmcs_write32(VM_ENTRY_CONTROLS,
+	vm_entry_controls_init(vmx, 
 		(vmcs12->vm_entry_controls & ~VM_ENTRY_LOAD_IA32_EFER &
 			~VM_ENTRY_IA32E_MODE) |
 		(vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE));
@@ -8186,7 +8242,7 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 
 	vmcs12->vm_entry_controls =
 		(vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
-		(vmcs_read32(VM_ENTRY_CONTROLS) & VM_ENTRY_IA32E_MODE);
+		(vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE);
 
 	/* TODO: These cannot have changed unless we have MSR bitmaps and
 	 * the relevant bit asks not to trap the change */
@@ -8390,6 +8446,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
 	vcpu->cpu = cpu;
 	put_cpu();
 
+	vm_entry_controls_init(vmx, vmcs_read32(VM_ENTRY_CONTROLS));
+	vm_exit_controls_init(vmx, vmcs_read32(VM_EXIT_CONTROLS));
 	vmx_segment_cache_clear(vmx);
 
 	/* if no vmcs02 cache requested, remove the one we used */
-- 
cgit v1.1


From 6dfacadd5858882eee1983995854d4e1fb1b966e Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Wed, 4 Dec 2013 08:58:54 +0100
Subject: KVM: nVMX: Add support for activity state HLT

We can easily emulate the HLT activity state for L1: If it decides that
L2 shall be halted on entry, just invoke the normal emulation of halt
after switching to L2. We do not depend on specific host features to
provide this, so we can expose the capability unconditionally.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/vmx.h | 1 +
 arch/x86/kvm/vmx.c         | 7 ++++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index 966502d..2067264 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -100,6 +100,7 @@
 
 #define VMX_MISC_PREEMPTION_TIMER_RATE_MASK	0x0000001f
 #define VMX_MISC_SAVE_EFER_LMA			0x00000020
+#define VMX_MISC_ACTIVITY_HLT			0x00000040
 
 /* VMCS Encodings */
 enum vmcs_field {
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1024689..f90320b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2340,6 +2340,7 @@ static __init void nested_vmx_setup_ctls_msrs(void)
 	rdmsr(MSR_IA32_VMX_MISC, nested_vmx_misc_low, nested_vmx_misc_high);
 	nested_vmx_misc_low &= VMX_MISC_PREEMPTION_TIMER_RATE_MASK |
 		VMX_MISC_SAVE_EFER_LMA;
+	nested_vmx_misc_low |= VMX_MISC_ACTIVITY_HLT;
 	nested_vmx_misc_high = 0;
 }
 
@@ -7938,7 +7939,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 		return 1;
 	}
 
-	if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE) {
+	if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
+	    vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) {
 		nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
 		return 1;
 	}
@@ -8067,6 +8069,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 
 	prepare_vmcs02(vcpu, vmcs12);
 
+	if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
+		return kvm_emulate_halt(vcpu);
+
 	/*
 	 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
 	 * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
-- 
cgit v1.1


From c08ac06ab3f3cdb8d34376c3a8a5e46a31a62c8f Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
Date: Fri, 13 Dec 2013 15:07:21 +0900
Subject: KVM: Use cond_resched() directly and remove useless kvm_resched()

Since the commit 15ad7146 ("KVM: Use the scheduler preemption notifiers
to make kvm preemptible"), the remaining stuff in this function is a
simple cond_resched() call with an extra need_resched() check which was
there to avoid dropping VCPUs unnecessarily.  Now it is meaningless.

Signed-off-by: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c     | 2 +-
 arch/powerpc/kvm/book3s_hv.c | 2 +-
 arch/x86/kvm/x86.c           | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 985bf80..53f44be 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -702,7 +702,7 @@ again:
 out:
 	srcu_read_unlock(&vcpu->kvm->srcu, idx);
 	if (r > 0) {
-		kvm_resched(vcpu);
+		cond_resched();
 		idx = srcu_read_lock(&vcpu->kvm->srcu);
 		goto again;
 	}
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 072287f..3fa99b2 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1348,7 +1348,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
 	kvm_guest_exit();
 
 	preempt_enable();
-	kvm_resched(vcpu);
+	cond_resched();
 
 	spin_lock(&vc->lock);
 	now = get_tb();
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 21ef1ba..4fb1ee6 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6125,7 +6125,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
 		}
 		if (need_resched()) {
 			srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
-			kvm_resched(vcpu);
+			cond_resched();
 			vcpu->srcu_idx = srcu_read_lock(&kvm->srcu);
 		}
 	}
-- 
cgit v1.1


From 9357d93952143b178fa9d1f5095b8f273b01a1f1 Mon Sep 17 00:00:00 2001
From: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
Date: Fri, 13 Dec 2013 15:08:38 +0900
Subject: KVM: x86: Add comment on vcpu_enter_guest()'s return value

Giving proper names to the 0 and 1 was once suggested.  But since 0 is
returned to the userspace, giving it another name can introduce extra
confusion.  This patch just explains the meanings instead.

Signed-off-by: Takuya Yoshikawa <yoshikawa_takuya_b1@lab.ntt.co.jp>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4fb1ee6..1dc0359 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5865,6 +5865,11 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
 	kvm_apic_update_tmr(vcpu, tmr);
 }
 
+/*
+ * Returns 1 to let __vcpu_run() continue the guest execution loop without
+ * exiting to the userspace.  Otherwise, the value will be returned to the
+ * userspace.
+ */
 static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 {
 	int r;
-- 
cgit v1.1


From ca3f257ae570c37d3da30a524a2f61ce602c6c99 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Mon, 16 Dec 2013 12:55:46 +0100
Subject: KVM: nVMX: Support direct APIC access from L2

It's a pathological case, but still a valid one: If L1 disables APIC
virtualization and also allows L2 to directly write to the APIC page, we
have to forcibly enable APIC virtualization while in L2 if the in-kernel
APIC is in use.

This allows to run the direct interrupt test case in the vmx unit test
without x2APIC.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f90320b..31eb577 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -7763,6 +7763,11 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 			else
 				vmcs_write64(APIC_ACCESS_ADDR,
 				  page_to_phys(vmx->nested.apic_access_page));
+		} else if (vm_need_virtualize_apic_accesses(vmx->vcpu.kvm)) {
+			exec_control |=
+				SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+			vmcs_write64(APIC_ACCESS_ADDR,
+				page_to_phys(vcpu->kvm->arch.apic_access_page));
 		}
 
 		vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
-- 
cgit v1.1


From 4c4d563b49830a66537c3f51070dad74d7a81d3a Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Wed, 18 Dec 2013 19:16:24 +0100
Subject: KVM: VMX: Do not skip the instruction if handle_dr injects a fault

If kvm_get_dr or kvm_set_dr reports that it raised a fault, we must not
advance the instruction pointer. Otherwise the exception will hit the
wrong instruction.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 31eb577..9cc5484 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5137,10 +5137,14 @@ static int handle_dr(struct kvm_vcpu *vcpu)
 	reg = DEBUG_REG_ACCESS_REG(exit_qualification);
 	if (exit_qualification & TYPE_MOV_FROM_DR) {
 		unsigned long val;
-		if (!kvm_get_dr(vcpu, dr, &val))
-			kvm_register_write(vcpu, reg, val);
+
+		if (kvm_get_dr(vcpu, dr, &val))
+			return 1;
+		kvm_register_write(vcpu, reg, val);
 	} else
-		kvm_set_dr(vcpu, dr, vcpu->arch.regs[reg]);
+		if (kvm_set_dr(vcpu, dr, vcpu->arch.regs[reg]))
+			return 1;
+
 	skip_emulated_instruction(vcpu);
 	return 1;
 }
-- 
cgit v1.1


From 989c6b34f6a9480e397b170cc62237e89bf4fdb9 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Thu, 19 Dec 2013 15:28:51 -0200
Subject: KVM: MMU: handle invalid root_hpa at __direct_map

It is possible for __direct_map to be called on invalid root_hpa
(-1), two examples:

1) try_async_pf -> can_do_async_pf
    -> vmx_interrupt_allowed -> nested_vmx_vmexit
2) vmx_handle_exit -> vmx_interrupt_allowed -> nested_vmx_vmexit

Then to load_vmcs12_host_state and kvm_mmu_reset_context.

Check for this possibility, let fault exception be regenerated.

BZ: https://bugzilla.redhat.com/show_bug.cgi?id=924916

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 40772ef..31a5702 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2659,6 +2659,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
 	int emulate = 0;
 	gfn_t pseudo_gfn;
 
+	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+		return 0;
+
 	for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
 		if (iterator.level == level) {
 			mmu_set_spte(vcpu, iterator.sptep, ACC_ALL,
-- 
cgit v1.1


From 478a8237f656d86d25b3e4e4bf3c48f590156294 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Tue, 19 Nov 2013 17:43:19 -0800
Subject: arm: KVM: Don't return PSCI_INVAL if waitqueue is inactive

The current KVM implementation of PSCI returns INVALID_PARAMETERS if the
waitqueue for the corresponding CPU is not active.  This does not seem
correct, since KVM should not care what the specific thread is doing,
for example, user space may not have called KVM_RUN on this VCPU yet or
the thread may be busy looping to user space because it received a
signal; this is really up to the user space implementation.  Instead we
should check specifically that the CPU is marked as being turned off,
regardless of the VCPU thread state, and if it is, we shall
simply clear the pause flag on the CPU and wake up the thread if it
happens to be blocked for us.

Further, the implementation seems to be racy when executing multiple
VCPU threads.  There really isn't a reasonable user space programming
scheme to ensure all secondary CPUs have reached kvm_vcpu_first_run_init
before turning on the boot CPU.

Therefore, set the pause flag on the vcpu at VCPU init time (which can
reasonably be expected to be completed for all CPUs by user space before
running any VCPUs) and clear both this flag and the feature (in case the
feature can somehow get set again in the future) and ping the waitqueue
on turning on a VCPU using PSCI.

Reported-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c  | 30 +++++++++++++++++++-----------
 arch/arm/kvm/psci.c | 11 ++++++-----
 2 files changed, 25 insertions(+), 16 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 2a700e0..151eb91 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -478,15 +478,6 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 			return ret;
 	}
 
-	/*
-	 * Handle the "start in power-off" case by calling into the
-	 * PSCI code.
-	 */
-	if (test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) {
-		*vcpu_reg(vcpu, 0) = KVM_PSCI_FN_CPU_OFF;
-		kvm_psci_call(vcpu);
-	}
-
 	return 0;
 }
 
@@ -700,6 +691,24 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
 	return -EINVAL;
 }
 
+static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
+					 struct kvm_vcpu_init *init)
+{
+	int ret;
+
+	ret = kvm_vcpu_set_target(vcpu, init);
+	if (ret)
+		return ret;
+
+	/*
+	 * Handle the "start in power-off" case by marking the VCPU as paused.
+	 */
+	if (__test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
+		vcpu->arch.pause = true;
+
+	return 0;
+}
+
 long kvm_arch_vcpu_ioctl(struct file *filp,
 			 unsigned int ioctl, unsigned long arg)
 {
@@ -713,8 +722,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 		if (copy_from_user(&init, argp, sizeof(init)))
 			return -EFAULT;
 
-		return kvm_vcpu_set_target(vcpu, &init);
-
+		return kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init);
 	}
 	case KVM_SET_ONE_REG:
 	case KVM_GET_ONE_REG: {
diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 0881bf1..448f60e 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -54,15 +54,15 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 		}
 	}
 
-	if (!vcpu)
+	/*
+	 * Make sure the caller requested a valid CPU and that the CPU is
+	 * turned off.
+	 */
+	if (!vcpu || !vcpu->arch.pause)
 		return KVM_PSCI_RET_INVAL;
 
 	target_pc = *vcpu_reg(source_vcpu, 2);
 
-	wq = kvm_arch_vcpu_wq(vcpu);
-	if (!waitqueue_active(wq))
-		return KVM_PSCI_RET_INVAL;
-
 	kvm_reset_vcpu(vcpu);
 
 	/* Gracefully handle Thumb2 entry point */
@@ -79,6 +79,7 @@ static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu)
 	vcpu->arch.pause = false;
 	smp_mb();		/* Make sure the above is visible */
 
+	wq = kvm_arch_vcpu_wq(vcpu);
 	wake_up_interruptible(wq);
 
 	return KVM_PSCI_RET_SUCCESS;
-- 
cgit v1.1


From a1a64387adeeba7a34ce06f2774e81f496ee803b Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Sat, 16 Nov 2013 10:51:25 -0800
Subject: arm/arm64: KVM: arch_timer: Initialize cntvoff at kvm_init

Initialize the cntvoff at kvm_init_vm time, not before running the VCPUs
at the first time because that will overwrite any potentially restored
values from user space.

Cc: Andre Przywara <andre.przywara@linaro.org>
Acked-by: Marc Zynger <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 2a700e0..13205bd 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -137,6 +137,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	if (ret)
 		goto out_free_stage2_pgd;
 
+	kvm_timer_init(kvm);
+
 	/* Mark the initial VMID generation invalid */
 	kvm->arch.vmid_gen = 0;
 
-- 
cgit v1.1


From 39735a3a390431bcf60f9174b7d64f787fd6afa9 Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@linaro.org>
Date: Fri, 13 Dec 2013 14:23:26 +0100
Subject: ARM/KVM: save and restore generic timer registers

For migration to work we need to save (and later restore) the state of
each core's virtual generic timer.
Since this is per VCPU, we can use the [gs]et_one_reg ioctl and export
the three needed registers (control, counter, compare value).
Though they live in cp15 space, we don't use the existing list, since
they need special accessor functions and the arch timer is optional.

Acked-by: Marc Zynger <marc.zyngier@arm.com>
Signed-off-by: Andre Przywara <andre.przywara@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/asm/kvm_host.h   |  3 ++
 arch/arm/include/uapi/asm/kvm.h   | 20 +++++++++
 arch/arm/kvm/guest.c              | 92 ++++++++++++++++++++++++++++++++++++++-
 arch/arm64/include/uapi/asm/kvm.h | 18 ++++++++
 4 files changed, 132 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 8a6f6db..098f7dd 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -225,4 +225,7 @@ static inline int kvm_arch_dev_ioctl_check_extension(long ext)
 int kvm_perf_init(void);
 int kvm_perf_teardown(void);
 
+u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
+int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
+
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index c498b60..835b867 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -119,6 +119,26 @@ struct kvm_arch_memory_slot {
 #define KVM_REG_ARM_32_CRN_MASK		0x0000000000007800
 #define KVM_REG_ARM_32_CRN_SHIFT	11
 
+#define ARM_CP15_REG_SHIFT_MASK(x,n) \
+	(((x) << KVM_REG_ARM_ ## n ## _SHIFT) & KVM_REG_ARM_ ## n ## _MASK)
+
+#define __ARM_CP15_REG(op1,crn,crm,op2) \
+	(KVM_REG_ARM | (15 << KVM_REG_ARM_COPROC_SHIFT) | \
+	ARM_CP15_REG_SHIFT_MASK(op1, OPC1) | \
+	ARM_CP15_REG_SHIFT_MASK(crn, 32_CRN) | \
+	ARM_CP15_REG_SHIFT_MASK(crm, CRM) | \
+	ARM_CP15_REG_SHIFT_MASK(op2, 32_OPC2))
+
+#define ARM_CP15_REG32(...) (__ARM_CP15_REG(__VA_ARGS__) | KVM_REG_SIZE_U32)
+
+#define __ARM_CP15_REG64(op1,crm) \
+	(__ARM_CP15_REG(op1, 0, crm, 0) | KVM_REG_SIZE_U64)
+#define ARM_CP15_REG64(...) __ARM_CP15_REG64(__VA_ARGS__)
+
+#define KVM_REG_ARM_TIMER_CTL		ARM_CP15_REG32(0, 14, 3, 1)
+#define KVM_REG_ARM_TIMER_CNT		ARM_CP15_REG64(1, 14) 
+#define KVM_REG_ARM_TIMER_CVAL		ARM_CP15_REG64(3, 14) 
+
 /* Normal registers are mapped as coprocessor 16. */
 #define KVM_REG_ARM_CORE		(0x0010 << KVM_REG_ARM_COPROC_SHIFT)
 #define KVM_REG_ARM_CORE_REG(name)	(offsetof(struct kvm_regs, name) / 4)
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 20f8d97..2786eae 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -109,6 +109,83 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 	return -EINVAL;
 }
 
+#ifndef CONFIG_KVM_ARM_TIMER
+
+#define NUM_TIMER_REGS 0
+
+static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+	return 0;
+}
+
+static bool is_timer_reg(u64 index)
+{
+	return false;
+}
+
+int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value)
+{
+	return 0;
+}
+
+u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid)
+{
+	return 0;
+}
+
+#else
+
+#define NUM_TIMER_REGS 3
+
+static bool is_timer_reg(u64 index)
+{
+	switch (index) {
+	case KVM_REG_ARM_TIMER_CTL:
+	case KVM_REG_ARM_TIMER_CNT:
+	case KVM_REG_ARM_TIMER_CVAL:
+		return true;
+	}
+	return false;
+}
+
+static int copy_timer_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
+{
+	if (put_user(KVM_REG_ARM_TIMER_CTL, uindices))
+		return -EFAULT;
+	uindices++;
+	if (put_user(KVM_REG_ARM_TIMER_CNT, uindices))
+		return -EFAULT;
+	uindices++;
+	if (put_user(KVM_REG_ARM_TIMER_CVAL, uindices))
+		return -EFAULT;
+
+	return 0;
+}
+
+#endif
+
+static int set_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	void __user *uaddr = (void __user *)(long)reg->addr;
+	u64 val;
+	int ret;
+
+	ret = copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id));
+	if (ret != 0)
+		return ret;
+
+	return kvm_arm_timer_set_reg(vcpu, reg->id, val);
+}
+
+static int get_timer_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
+{
+	void __user *uaddr = (void __user *)(long)reg->addr;
+	u64 val;
+
+	val = kvm_arm_timer_get_reg(vcpu, reg->id);
+	return copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id));
+}
+
 static unsigned long num_core_regs(void)
 {
 	return sizeof(struct kvm_regs) / sizeof(u32);
@@ -121,7 +198,8 @@ static unsigned long num_core_regs(void)
  */
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu)
 {
-	return num_core_regs() + kvm_arm_num_coproc_regs(vcpu);
+	return num_core_regs() + kvm_arm_num_coproc_regs(vcpu)
+		+ NUM_TIMER_REGS;
 }
 
 /**
@@ -133,6 +211,7 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
 {
 	unsigned int i;
 	const u64 core_reg = KVM_REG_ARM | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE;
+	int ret;
 
 	for (i = 0; i < sizeof(struct kvm_regs)/sizeof(u32); i++) {
 		if (put_user(core_reg | i, uindices))
@@ -140,6 +219,11 @@ int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
 		uindices++;
 	}
 
+	ret = copy_timer_indices(vcpu, uindices);
+	if (ret)
+		return ret;
+	uindices += NUM_TIMER_REGS;
+
 	return kvm_arm_copy_coproc_indices(vcpu, uindices);
 }
 
@@ -153,6 +237,9 @@ int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
 		return get_core_reg(vcpu, reg);
 
+	if (is_timer_reg(reg->id))
+		return get_timer_reg(vcpu, reg);
+
 	return kvm_arm_coproc_get_reg(vcpu, reg);
 }
 
@@ -166,6 +253,9 @@ int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
 	if ((reg->id & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_CORE)
 		return set_core_reg(vcpu, reg);
 
+	if (is_timer_reg(reg->id))
+		return set_timer_reg(vcpu, reg);
+
 	return kvm_arm_coproc_set_reg(vcpu, reg);
 }
 
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 5031f42..7c25ca8 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -129,6 +129,24 @@ struct kvm_arch_memory_slot {
 #define KVM_REG_ARM64_SYSREG_OP2_MASK	0x0000000000000007
 #define KVM_REG_ARM64_SYSREG_OP2_SHIFT	0
 
+#define ARM64_SYS_REG_SHIFT_MASK(x,n) \
+	(((x) << KVM_REG_ARM64_SYSREG_ ## n ## _SHIFT) & \
+	KVM_REG_ARM64_SYSREG_ ## n ## _MASK)
+
+#define __ARM64_SYS_REG(op0,op1,crn,crm,op2) \
+	(KVM_REG_ARM64 | KVM_REG_ARM64_SYSREG | \
+	ARM64_SYS_REG_SHIFT_MASK(op0, OP0) | \
+	ARM64_SYS_REG_SHIFT_MASK(op1, OP1) | \
+	ARM64_SYS_REG_SHIFT_MASK(crn, CRN) | \
+	ARM64_SYS_REG_SHIFT_MASK(crm, CRM) | \
+	ARM64_SYS_REG_SHIFT_MASK(op2, OP2))
+
+#define ARM64_SYS_REG(...) (__ARM64_SYS_REG(__VA_ARGS__) | KVM_REG_SIZE_U64)
+
+#define KVM_REG_ARM_TIMER_CTL		ARM64_SYS_REG(3, 3, 14, 3, 1)
+#define KVM_REG_ARM_TIMER_CNT		ARM64_SYS_REG(3, 3, 14, 3, 2)
+#define KVM_REG_ARM_TIMER_CVAL		ARM64_SYS_REG(3, 3, 14, 0, 2)
+
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT		24
 #define KVM_ARM_IRQ_TYPE_MASK		0xff
-- 
cgit v1.1


From e1ba0207a1b3714bb3f000e506285ae5123cdfa7 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Mon, 23 Sep 2013 14:55:55 -0700
Subject: ARM: KVM: Allow creating the VGIC after VCPUs

Rework the VGIC initialization slightly to allow initialization of the
vgic cpu-specific state even if the irqchip (the VGIC) hasn't been
created by user space yet.  This is safe, because the vgic data
structures are already allocated when the CPU is allocated if VGIC
support is compiled into the kernel.  Further, the init process does not
depend on any other information and the sacrifice is a slight
performance degradation for creating VMs in the no-VGIC case.

The reason is that the new device control API doesn't mandate creating
the VGIC before creating the VCPU and it is unreasonable to require user
space to create the VGIC before creating the VCPUs.

At the same time move the irqchip_in_kernel check out of
kvm_vcpu_first_run_init and into the init function to make the per-vcpu
and global init functions symmetric and add comments on the exported
functions making it a bit easier to understand the init flow by only
looking at vgic.c.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 13205bd..c9fe9d7 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -464,6 +464,8 @@ static void update_vttbr(struct kvm *kvm)
 
 static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 {
+	int ret;
+
 	if (likely(vcpu->arch.has_run_once))
 		return 0;
 
@@ -473,9 +475,8 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 	 * Initialize the VGIC before running a vcpu the first time on
 	 * this VM.
 	 */
-	if (irqchip_in_kernel(vcpu->kvm) &&
-	    unlikely(!vgic_initialized(vcpu->kvm))) {
-		int ret = kvm_vgic_init(vcpu->kvm);
+	if (unlikely(!vgic_initialized(vcpu->kvm))) {
+		ret = kvm_vgic_init(vcpu->kvm);
 		if (ret)
 			return ret;
 	}
-- 
cgit v1.1


From 7330672befe6269e575f79b924a7068b26c144b4 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Fri, 25 Oct 2013 17:29:18 +0100
Subject: KVM: arm-vgic: Support KVM_CREATE_DEVICE for VGIC

Support creating the ARM VGIC device through the KVM_CREATE_DEVICE
ioctl, which can then later be leveraged to use the
KVM_{GET/SET}_DEVICE_ATTR, which is useful both for setting addresses in
a more generic API than the ARM-specific one and is useful for
save/restore of VGIC state.

Adds KVM_CAP_DEVICE_CTRL to ARM capabilities.

Note that we change the check for creating a VGIC from bailing out if
any VCPUs were created, to bailing out if any VCPUs were ever run.  This
is an important distinction that shouldn't break anything, but allows
creating the VGIC after the VCPUs have been created.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index c9fe9d7..cc7c41a 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -190,6 +190,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 	case KVM_CAP_IRQCHIP:
 		r = vgic_present;
 		break;
+	case KVM_CAP_DEVICE_CTRL:
 	case KVM_CAP_USER_MEMORY:
 	case KVM_CAP_SYNC_MMU:
 	case KVM_CAP_DESTROY_MEMORY_REGION_WORKS:
-- 
cgit v1.1


From ce01e4e8874d410738f4b4733b26642d6611a331 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Mon, 23 Sep 2013 14:55:56 -0700
Subject: KVM: arm-vgic: Set base addr through device API

Support setting the distributor and cpu interface base addresses in the
VM physical address space through the KVM_{SET,GET}_DEVICE_ATTR API
in addition to the ARM specific API.

This has the added benefit of being able to share more code in user
space and do things in a uniform manner.

Also deprecate the older API at the same time, but backwards
compatibility will be maintained.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/uapi/asm/kvm.h | 2 ++
 arch/arm/kvm/arm.c              | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 835b867..76a7427 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -163,6 +163,8 @@ struct kvm_arch_memory_slot {
 #define KVM_REG_ARM_VFP_FPINST		0x1009
 #define KVM_REG_ARM_VFP_FPINST2		0x100A
 
+/* Device Control API: ARM VGIC */
+#define KVM_DEV_ARM_VGIC_GRP_ADDR	0
 
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT		24
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index cc7c41a..f290b22 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -776,7 +776,7 @@ static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm,
 	case KVM_ARM_DEVICE_VGIC_V2:
 		if (!vgic_present)
 			return -ENXIO;
-		return kvm_vgic_set_addr(kvm, type, dev_addr->addr);
+		return kvm_vgic_addr(kvm, type, &dev_addr->addr, true);
 	default:
 		return -ENODEV;
 	}
-- 
cgit v1.1


From e9b152cb957cb194437f37e79f0f3c9d34fe53d6 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Wed, 11 Dec 2013 20:29:11 -0800
Subject: arm/arm64: kvm: Set vcpu->cpu to -1 on vcpu_put

The arch-generic KVM code expects the cpu field of a vcpu to be -1 if
the vcpu is no longer assigned to a cpu.  This is used for the optimized
make_all_cpus_request path and will be used by the vgic code to check
that no vcpus are running.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index f290b22..b92ff6d3 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -342,6 +342,13 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
 {
+	/*
+	 * The arch-generic KVM code expects the cpu field of a vcpu to be -1
+	 * if the vcpu is no longer assigned to a cpu.  This is used for the
+	 * optimized make_all_cpus_request path.
+	 */
+	vcpu->cpu = -1;
+
 	kvm_arm_set_running_vcpu(NULL);
 }
 
-- 
cgit v1.1


From c07a0191ef2de1f9510f12d1f88e3b0b5cd8d66f Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Fri, 25 Oct 2013 21:17:31 +0100
Subject: KVM: arm-vgic: Add vgic reg access from dev attr

Add infrastructure to handle distributor and cpu interface register
accesses through the KVM_{GET/SET}_DEVICE_ATTR interface by adding the
KVM_DEV_ARM_VGIC_GRP_DIST_REGS and KVM_DEV_ARM_VGIC_GRP_CPU_REGS groups
and defining the semantics of the attr field to be the MMIO offset as
specified in the GICv2 specs.

Missing register accesses or other changes in individual register access
functions to support save/restore of the VGIC state is added in
subsequent patches.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/include/uapi/asm/kvm.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'arch')

diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index 76a7427..ef0c878 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -165,6 +165,12 @@ struct kvm_arch_memory_slot {
 
 /* Device Control API: ARM VGIC */
 #define KVM_DEV_ARM_VGIC_GRP_ADDR	0
+#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS	1
+#define KVM_DEV_ARM_VGIC_GRP_CPU_REGS	2
+#define   KVM_DEV_ARM_VGIC_CPUID_SHIFT	32
+#define   KVM_DEV_ARM_VGIC_CPUID_MASK	(0xffULL << KVM_DEV_ARM_VGIC_CPUID_SHIFT)
+#define   KVM_DEV_ARM_VGIC_OFFSET_SHIFT	0
+#define   KVM_DEV_ARM_VGIC_OFFSET_MASK	(0xffffffffULL << KVM_DEV_ARM_VGIC_OFFSET_SHIFT)
 
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_TYPE_SHIFT		24
-- 
cgit v1.1


From da7814700a0c408bead58ce4714b7625ffbaade1 Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Thu, 12 Dec 2013 16:12:22 +0000
Subject: arm64: KVM: Add Kconfig option for max VCPUs per-Guest

Current max VCPUs per-Guest is set to 4 which is preventing
us from creating a Guest (or VM) with 8 VCPUs on Host (e.g.
X-Gene Storm SOC) with 8 Host CPUs.

The correct value of max VCPUs per-Guest should be same as
the max CPUs supported by GICv2 which is 8 but, increasing
value of max VCPUs per-Guest can make things slower hence
we add Kconfig option to let KVM users select appropriate
max VCPUs per-Guest.

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/include/asm/kvm_host.h |  7 ++++++-
 arch/arm64/kvm/Kconfig            | 11 +++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 5d85a02..0a1d697 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -26,7 +26,12 @@
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
 
-#define KVM_MAX_VCPUS 4
+#if defined(CONFIG_KVM_ARM_MAX_VCPUS)
+#define KVM_MAX_VCPUS CONFIG_KVM_ARM_MAX_VCPUS
+#else
+#define KVM_MAX_VCPUS 0
+#endif
+
 #define KVM_USER_MEM_SLOTS 32
 #define KVM_PRIVATE_MEM_SLOTS 4
 #define KVM_COALESCED_MMIO_PAGE_OFFSET 1
diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig
index 4480ab3..8ba85e9 100644
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@@ -36,6 +36,17 @@ config KVM_ARM_HOST
 	---help---
 	  Provides host support for ARM processors.
 
+config KVM_ARM_MAX_VCPUS
+	int "Number maximum supported virtual CPUs per VM"
+	depends on KVM_ARM_HOST
+	default 4
+	help
+	  Static number of max supported virtual CPUs per VM.
+
+	  If you choose a high number, the vcpu structures will be quite
+	  large, so only choose a reasonable number that you expect to
+	  actually use.
+
 config KVM_ARM_VGIC
 	bool
 	depends on KVM_ARM_HOST && OF
-- 
cgit v1.1


From e28100bd8ed9e37b7cd4578140a1e7f95bd40835 Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Thu, 14 Nov 2013 15:20:08 +0000
Subject: arm64: KVM: Support X-Gene guest VCPU on APM X-Gene host

This patch allows us to have X-Gene guest VCPU when using KVM arm64
on APM X-Gene host.

We add KVM_ARM_TARGET_XGENE_POTENZA for X-Gene Potenza compatible
guest VCPU and we return KVM_ARM_TARGET_XGENE_POTENZA in kvm_target_cpu()
when running on X-Gene host with Potenza core.

[maz: sanitized the commit log]

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/include/uapi/asm/kvm.h    |  3 ++-
 arch/arm64/kvm/guest.c               | 32 +++++++++++++++++++-------------
 arch/arm64/kvm/sys_regs_generic_v8.c |  3 +++
 3 files changed, 24 insertions(+), 14 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index 5031f42..d9f026b 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -55,8 +55,9 @@ struct kvm_regs {
 #define KVM_ARM_TARGET_AEM_V8		0
 #define KVM_ARM_TARGET_FOUNDATION_V8	1
 #define KVM_ARM_TARGET_CORTEX_A57	2
+#define KVM_ARM_TARGET_XGENE_POTENZA	3
 
-#define KVM_ARM_NUM_TARGETS		3
+#define KVM_ARM_NUM_TARGETS		4
 
 /* KVM_ARM_SET_DEVICE_ADDR ioctl id encoding */
 #define KVM_ARM_DEVICE_TYPE_SHIFT	0
diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c
index 3f0731e..0874557 100644
--- a/arch/arm64/kvm/guest.c
+++ b/arch/arm64/kvm/guest.c
@@ -207,20 +207,26 @@ int __attribute_const__ kvm_target_cpu(void)
 	unsigned long implementor = read_cpuid_implementor();
 	unsigned long part_number = read_cpuid_part_number();
 
-	if (implementor != ARM_CPU_IMP_ARM)
-		return -EINVAL;
+	switch (implementor) {
+	case ARM_CPU_IMP_ARM:
+		switch (part_number) {
+		case ARM_CPU_PART_AEM_V8:
+			return KVM_ARM_TARGET_AEM_V8;
+		case ARM_CPU_PART_FOUNDATION:
+			return KVM_ARM_TARGET_FOUNDATION_V8;
+		case ARM_CPU_PART_CORTEX_A57:
+			return KVM_ARM_TARGET_CORTEX_A57;
+		};
+		break;
+	case ARM_CPU_IMP_APM:
+		switch (part_number) {
+		case APM_CPU_PART_POTENZA:
+			return KVM_ARM_TARGET_XGENE_POTENZA;
+		};
+		break;
+	};
 
-	switch (part_number) {
-	case ARM_CPU_PART_AEM_V8:
-		return KVM_ARM_TARGET_AEM_V8;
-	case ARM_CPU_PART_FOUNDATION:
-		return KVM_ARM_TARGET_FOUNDATION_V8;
-	case ARM_CPU_PART_CORTEX_A57:
-		/* Currently handled by the generic backend */
-		return KVM_ARM_TARGET_CORTEX_A57;
-	default:
-		return -EINVAL;
-	}
+	return -EINVAL;
 }
 
 int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
diff --git a/arch/arm64/kvm/sys_regs_generic_v8.c b/arch/arm64/kvm/sys_regs_generic_v8.c
index 4268ab9..8fe6f76 100644
--- a/arch/arm64/kvm/sys_regs_generic_v8.c
+++ b/arch/arm64/kvm/sys_regs_generic_v8.c
@@ -90,6 +90,9 @@ static int __init sys_reg_genericv8_init(void)
 					  &genericv8_target_table);
 	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_CORTEX_A57,
 					  &genericv8_target_table);
+	kvm_register_target_sys_reg_table(KVM_ARM_TARGET_XGENE_POTENZA,
+					  &genericv8_target_table);
+
 	return 0;
 }
 late_initcall(sys_reg_genericv8_init);
-- 
cgit v1.1


From e5cf9dcdbfd26cd4e1991db08755da900454efeb Mon Sep 17 00:00:00 2001
From: Anup Patel <anup.patel@linaro.org>
Date: Thu, 12 Dec 2013 16:12:23 +0000
Subject: arm64: KVM: Force undefined exception for Guest SMC intructions

The SMC-based PSCI emulation for Guest is going to be very different
from the in-kernel HVC-based PSCI emulation hence for now just inject
undefined exception when Guest executes SMC instruction.

Signed-off-by: Anup Patel <anup.patel@linaro.org>
Signed-off-by: Pranavkumar Sawargaonkar <pranavkumar@linaro.org>
Signed-off-by: marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm64/kvm/handle_exit.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'arch')

diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
index 8da5606..df84d7b 100644
--- a/arch/arm64/kvm/handle_exit.c
+++ b/arch/arm64/kvm/handle_exit.c
@@ -39,9 +39,6 @@ static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 
 static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run)
 {
-	if (kvm_psci_call(vcpu))
-		return 1;
-
 	kvm_inject_undefined(vcpu);
 	return 1;
 }
-- 
cgit v1.1


From 2f0a6397dd3cac2fb05b46cad08c1d532c04d6b8 Mon Sep 17 00:00:00 2001
From: Zhihui Zhang <zzhsuny@gmail.com>
Date: Mon, 30 Dec 2013 15:56:29 -0500
Subject: KVM: VMX: check use I/O bitmap first before unconditional I/O exit

According to Table C-1 of Intel SDM 3C, a VM exit happens on an I/O instruction when
"use I/O bitmaps" VM-execution control was 0 _and_ the "unconditional I/O exiting"
VM-execution control was 1. So we can't just check "unconditional I/O exiting" alone.
This patch was improved by suggestion from Jan Kiszka.

Reviewed-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Zhihui Zhang <zzhsuny@gmail.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/vmx.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9cc5484..0abf8b7 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6521,11 +6521,8 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
 	int size;
 	u8 b;
 
-	if (nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING))
-		return 1;
-
 	if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
-		return 0;
+		return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
 
 	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
 
-- 
cgit v1.1


From 96893977b8f732493815e7a2b552c37e1bb967e5 Mon Sep 17 00:00:00 2001
From: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
Date: Thu, 2 Jan 2014 17:14:11 +0800
Subject: KVM: x86: Fix debug typo error in lapic

fix the 'vcpi' typos when apic_debug is enabled.

Signed-off-by: Chen Fan <chen.fan.fnst@cn.fujitsu.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/lapic.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 5439117..206715b 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -432,7 +432,7 @@ static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
 	u8 val;
 	if (pv_eoi_get_user(vcpu, &val) < 0)
 		apic_debug("Can't read EOI MSR value: 0x%llx\n",
-			   (unsigned long long)vcpi->arch.pv_eoi.msr_val);
+			   (unsigned long long)vcpu->arch.pv_eoi.msr_val);
 	return val & 0x1;
 }
 
@@ -440,7 +440,7 @@ static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
 {
 	if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) {
 		apic_debug("Can't set EOI MSR value: 0x%llx\n",
-			   (unsigned long long)vcpi->arch.pv_eoi.msr_val);
+			   (unsigned long long)vcpu->arch.pv_eoi.msr_val);
 		return;
 	}
 	__set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
@@ -450,7 +450,7 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
 {
 	if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) {
 		apic_debug("Can't clear EOI MSR value: 0x%llx\n",
-			   (unsigned long long)vcpi->arch.pv_eoi.msr_val);
+			   (unsigned long long)vcpu->arch.pv_eoi.msr_val);
 		return;
 	}
 	__clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
-- 
cgit v1.1


From 26a865f4aa8e66a6d94958de7656f7f1b03c6c56 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Fri, 3 Jan 2014 17:00:51 -0200
Subject: KVM: VMX: fix use after free of vmx->loaded_vmcs

After free_loaded_vmcs executes, the "loaded_vmcs" structure
is kfreed, and now vmx->loaded_vmcs points to a kfreed area.
Subsequent free_loaded_vmcs then attempts to manipulate
vmx->loaded_vmcs.

Switch the order to avoid the problem.

https://bugzilla.redhat.com/show_bug.cgi?id=1047892

Reviewed-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/vmx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 0abf8b7..7661eb1 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -7390,8 +7390,8 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 
 	free_vpid(vmx);
-	free_nested(vmx);
 	free_loaded_vmcs(vmx->loaded_vmcs);
+	free_nested(vmx);
 	kfree(vmx->guest_msrs);
 	kvm_vcpu_uninit(vcpu);
 	kmem_cache_free(kvm_vcpu_cache, vmx);
-- 
cgit v1.1


From 136d737fd20102f1be9b02356590fd55e3a40d0e Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Fri, 13 Dec 2013 16:56:06 +0000
Subject: arm/arm64: KVM: relax the requirements of VMA alignment for THP

The THP code in KVM/ARM is a bit restrictive in not allowing a THP
to be used if the VMA is not 2MB aligned. Actually, it is not so much
the VMA that matters, but the associated memslot:

A process can perfectly mmap a region with no particular alignment
restriction, and then pass a 2MB aligned address to KVM. In this
case, KVM will only use this 2MB aligned region, and will ignore
the range between vma->vm_start and memslot->userspace_addr.

It can also choose to place this memslot at whatever alignment it
wants in the IPA space. In the end, what matters is the relative
alignment of the user space and IPA mappings with respect to a
2M page. They absolutely must be the same if you want to use THP.

Cc: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/mmu.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 659db0e..7789857 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -667,14 +667,16 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 		gfn = (fault_ipa & PMD_MASK) >> PAGE_SHIFT;
 	} else {
 		/*
-		 * Pages belonging to VMAs not aligned to the PMD mapping
-		 * granularity cannot be mapped using block descriptors even
-		 * if the pages belong to a THP for the process, because the
-		 * stage-2 block descriptor will cover more than a single THP
-		 * and we loose atomicity for unmapping, updates, and splits
-		 * of the THP or other pages in the stage-2 block range.
+		 * Pages belonging to memslots that don't have the same
+		 * alignment for userspace and IPA cannot be mapped using
+		 * block descriptors even if the pages belong to a THP for
+		 * the process, because the stage-2 block descriptor will
+		 * cover more than a single THP and we loose atomicity for
+		 * unmapping, updates, and splits of the THP or other pages
+		 * in the stage-2 block range.
 		 */
-		if (vma->vm_start & ~PMD_MASK)
+		if ((memslot->userspace_addr & ~PMD_MASK) !=
+		    ((memslot->base_gfn << PAGE_SHIFT) & ~PMD_MASK))
 			force_pte = true;
 	}
 	up_read(&current->mm->mmap_sem);
-- 
cgit v1.1


From 61466710de078c697106fa5b70ec7afc9feab520 Mon Sep 17 00:00:00 2001
From: Sachin Kamat <sachin.kamat@linaro.org>
Date: Tue, 7 Jan 2014 13:45:15 +0530
Subject: KVM: ARM: Remove duplicate include

trace.h was included twice. Remove duplicate inclusion.

Signed-off-by: Sachin Kamat <sachin.kamat@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/handle_exit.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch')

diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c
index a920790..0de91fc 100644
--- a/arch/arm/kvm/handle_exit.c
+++ b/arch/arm/kvm/handle_exit.c
@@ -26,8 +26,6 @@
 
 #include "trace.h"
 
-#include "trace.h"
-
 typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *);
 
 static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run)
-- 
cgit v1.1


From 37f6a4e237303549c8676dfe1fd1991ceab512eb Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Fri, 3 Jan 2014 17:09:32 -0200
Subject: KVM: x86: handle invalid root_hpa everywhere

Rom Freiman <rom@stratoscale.com> notes other code paths vulnerable to
bug fixed by 989c6b34f6a9480e397b.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 arch/x86/kvm/mmu.c         | 9 +++++++++
 arch/x86/kvm/paging_tmpl.h | 8 ++++++++
 2 files changed, 17 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 31a5702..e50425d 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2832,6 +2832,9 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level,
 	bool ret = false;
 	u64 spte = 0ull;
 
+	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+		return false;
+
 	if (!page_fault_can_be_fast(error_code))
 		return false;
 
@@ -3227,6 +3230,9 @@ static u64 walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr)
 	struct kvm_shadow_walk_iterator iterator;
 	u64 spte = 0ull;
 
+	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+		return spte;
+
 	walk_shadow_page_lockless_begin(vcpu);
 	for_each_shadow_entry_lockless(vcpu, addr, iterator, spte)
 		if (!is_shadow_present_pte(spte))
@@ -4513,6 +4519,9 @@ int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4])
 	u64 spte;
 	int nr_sptes = 0;
 
+	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+		return nr_sptes;
+
 	walk_shadow_page_lockless_begin(vcpu);
 	for_each_shadow_entry_lockless(vcpu, addr, iterator, spte) {
 		sptes[iterator.level-1] = spte;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index ad75d77..cba218a 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -569,6 +569,9 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
 	if (FNAME(gpte_changed)(vcpu, gw, top_level))
 		goto out_gpte_changed;
 
+	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+		goto out_gpte_changed;
+
 	for (shadow_walk_init(&it, vcpu, addr);
 	     shadow_walk_okay(&it) && it.level > gw->level;
 	     shadow_walk_next(&it)) {
@@ -820,6 +823,11 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 	 */
 	mmu_topup_memory_caches(vcpu);
 
+	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) {
+		WARN_ON(1);
+		return;
+	}
+
 	spin_lock(&vcpu->kvm->mmu_lock);
 	for_each_shadow_entry(vcpu, gva, iterator) {
 		level = iterator.level;
-- 
cgit v1.1


From 9ed96e87c5748de4c2807ef17e81287c7304186c Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Mon, 6 Jan 2014 12:00:02 -0200
Subject: KVM: x86: limit PIT timer frequency

Limit PIT timer frequency similarly to the limit applied by
LAPIC timer.

Cc: stable@kernel.org
Reviewed-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/i8254.c | 18 ++++++++++++++++++
 arch/x86/kvm/lapic.c |  3 ---
 arch/x86/kvm/x86.c   |  3 +++
 arch/x86/kvm/x86.h   |  2 ++
 4 files changed, 23 insertions(+), 3 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 412a5aa..518d864 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -37,6 +37,7 @@
 
 #include "irq.h"
 #include "i8254.h"
+#include "x86.h"
 
 #ifndef CONFIG_X86_64
 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
@@ -349,6 +350,23 @@ static void create_pit_timer(struct kvm *kvm, u32 val, int is_period)
 	atomic_set(&ps->pending, 0);
 	ps->irq_ack = 1;
 
+	/*
+	 * Do not allow the guest to program periodic timers with small
+	 * interval, since the hrtimers are not throttled by the host
+	 * scheduler.
+	 */
+	if (ps->is_periodic) {
+		s64 min_period = min_timer_period_us * 1000LL;
+
+		if (ps->period < min_period) {
+			pr_info_ratelimited(
+			    "kvm: requested %lld ns "
+			    "i8254 timer period limited to %lld ns\n",
+			    ps->period, min_period);
+			ps->period = min_period;
+		}
+	}
+
 	hrtimer_start(&ps->timer, ktime_add_ns(ktime_get(), interval),
 		      HRTIMER_MODE_ABS);
 }
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 206715b..1ac0093 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -71,9 +71,6 @@
 #define VEC_POS(v) ((v) & (32 - 1))
 #define REG_POS(v) (((v) >> 5) << 4)
 
-static unsigned int min_timer_period_us = 500;
-module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
-
 static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val)
 {
 	*((u32 *) (apic->regs + reg_off)) = val;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1dc0359..0fd2bd7 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -94,6 +94,9 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops);
 static bool ignore_msrs = 0;
 module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR);
 
+unsigned int min_timer_period_us = 500;
+module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR);
+
 bool kvm_has_tsc_control;
 EXPORT_SYMBOL_GPL(kvm_has_tsc_control);
 u32  kvm_max_guest_tsc_khz;
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index 587fb9e..8da5823 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -125,5 +125,7 @@ int kvm_write_guest_virt_system(struct x86_emulate_ctxt *ctxt,
 #define KVM_SUPPORTED_XCR0	(XSTATE_FP | XSTATE_SSE | XSTATE_YMM)
 extern u64 host_xcr0;
 
+extern unsigned int min_timer_period_us;
+
 extern struct static_key kvm_no_apic_vcpu;
 #endif
-- 
cgit v1.1


From f25e656d31ad112612839edaded18920cafea3b1 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Mon, 6 Jan 2014 12:18:59 -0200
Subject: KVM: x86: fix tsc catchup issue with tsc scaling

To fix a problem related to different resolution of TSC and system clock,
the offset in TSC units is approximated by

delta = vcpu->hv_clock.tsc_timestamp 	- 	vcpu->last_guest_tsc

(Guest TSC value at 			(Guest TSC value at last VM-exit)
the last kvm_guest_time_update
call)

Delta is then later scaled using mult,shift pair found in hv_clock
structure (which is correct against tsc_timestamp in that
structure).

However, if a frequency change is performed between these two points,
this delta is measured using different TSC frequencies, but scaled using
mult,shift pair for one frequency only.

The end result is an incorrect delta.

The bug which this code works around is not the only cause for
clock backwards events. The global accumulator is still
necessary, so remove the max_kernel_ns fix and rely on the
global accumulator for no clock backwards events.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 41 +----------------------------------------
 1 file changed, 1 insertion(+), 40 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0fd2bd7..842abd3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1487,7 +1487,7 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 	unsigned long flags, this_tsc_khz;
 	struct kvm_vcpu_arch *vcpu = &v->arch;
 	struct kvm_arch *ka = &v->kvm->arch;
-	s64 kernel_ns, max_kernel_ns;
+	s64 kernel_ns;
 	u64 tsc_timestamp, host_tsc;
 	struct pvclock_vcpu_time_info guest_hv_clock;
 	u8 pvclock_flags;
@@ -1546,37 +1546,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 	if (!vcpu->pv_time_enabled)
 		return 0;
 
-	/*
-	 * Time as measured by the TSC may go backwards when resetting the base
-	 * tsc_timestamp.  The reason for this is that the TSC resolution is
-	 * higher than the resolution of the other clock scales.  Thus, many
-	 * possible measurments of the TSC correspond to one measurement of any
-	 * other clock, and so a spread of values is possible.  This is not a
-	 * problem for the computation of the nanosecond clock; with TSC rates
-	 * around 1GHZ, there can only be a few cycles which correspond to one
-	 * nanosecond value, and any path through this code will inevitably
-	 * take longer than that.  However, with the kernel_ns value itself,
-	 * the precision may be much lower, down to HZ granularity.  If the
-	 * first sampling of TSC against kernel_ns ends in the low part of the
-	 * range, and the second in the high end of the range, we can get:
-	 *
-	 * (TSC - offset_low) * S + kns_old > (TSC - offset_high) * S + kns_new
-	 *
-	 * As the sampling errors potentially range in the thousands of cycles,
-	 * it is possible such a time value has already been observed by the
-	 * guest.  To protect against this, we must compute the system time as
-	 * observed by the guest and ensure the new system time is greater.
-	 */
-	max_kernel_ns = 0;
-	if (vcpu->hv_clock.tsc_timestamp) {
-		max_kernel_ns = vcpu->last_guest_tsc -
-				vcpu->hv_clock.tsc_timestamp;
-		max_kernel_ns = pvclock_scale_delta(max_kernel_ns,
-				    vcpu->hv_clock.tsc_to_system_mul,
-				    vcpu->hv_clock.tsc_shift);
-		max_kernel_ns += vcpu->last_kernel_ns;
-	}
-
 	if (unlikely(vcpu->hw_tsc_khz != this_tsc_khz)) {
 		kvm_get_time_scale(NSEC_PER_SEC / 1000, this_tsc_khz,
 				   &vcpu->hv_clock.tsc_shift,
@@ -1584,14 +1553,6 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
 		vcpu->hw_tsc_khz = this_tsc_khz;
 	}
 
-	/* with a master <monotonic time, tsc value> tuple,
-	 * pvclock clock reads always increase at the (scaled) rate
-	 * of guest TSC - no need to deal with sampling errors.
-	 */
-	if (!use_master_clock) {
-		if (max_kernel_ns > kernel_ns)
-			kernel_ns = max_kernel_ns;
-	}
 	/* With all the info we got, fill in the values */
 	vcpu->hv_clock.tsc_timestamp = tsc_timestamp;
 	vcpu->hv_clock.system_time = kernel_ns + v->kvm->arch.kvmclock_offset;
-- 
cgit v1.1


From aab6d7ce37cf20753a336dc74473cf8a8aefa7c0 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 15 Jan 2014 18:07:31 +0100
Subject: KVM: remove useless write to vcpu->hv_clock.tsc_timestamp

After the previous patch from Marcelo, the comment before this write
became obsolete.  In fact, the write is unnecessary.  The calls to
kvm_write_tsc ultimately result in a master clock update as soon as
all TSCs agree and the master clock is re-enabled.  This master
clock update will rewrite tsc_timestamp.

So, together with the comment, delete the dead write too.

Reviewed-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 842abd3..0fbdced 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1278,8 +1278,6 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
 	kvm->arch.last_tsc_write = data;
 	kvm->arch.last_tsc_khz = vcpu->arch.virtual_tsc_khz;
 
-	/* Reset of TSC must disable overshoot protection below */
-	vcpu->arch.hv_clock.tsc_timestamp = 0;
 	vcpu->arch.last_guest_tsc = data;
 
 	/* Keep track of which generation this VCPU has synchronized to */
-- 
cgit v1.1


From e984097b553ed2d6551c805223e4057421370f00 Mon Sep 17 00:00:00 2001
From: Vadim Rozenfeld <vrozenfe@redhat.com>
Date: Thu, 16 Jan 2014 20:18:37 +1100
Subject: add support for Hyper-V reference time counter
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off: Peter Lieven <pl@kamp.de>
Signed-off: Gleb Natapov
Signed-off: Vadim Rozenfeld <vrozenfe@redhat.com>

After some consideration I decided to submit only Hyper-V reference
counters support this time. I will submit iTSC support as a separate
patch as soon as it is ready.

v1 -> v2
1. mark TSC page dirty as suggested by
    Eric Northup <digitaleric@google.com> and Gleb
2. disable local irq when calling get_kernel_ns,
    as it was done by Peter Lieven <pl@amp.de>
3. move check for TSC page enable from second patch
    to this one.

v3 -> v4
    Get rid of ref counter offset.

v4 -> v5
    replace __copy_to_user with kvm_write_guest
    when updateing iTSC page.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h    |  1 +
 arch/x86/include/uapi/asm/hyperv.h | 13 +++++++++++++
 arch/x86/kvm/x86.c                 | 28 +++++++++++++++++++++++++++-
 3 files changed, 41 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ae5d783..33fef07 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -605,6 +605,7 @@ struct kvm_arch {
 	/* fields used by HYPER-V emulation */
 	u64 hv_guest_os_id;
 	u64 hv_hypercall;
+	u64 hv_tsc_page;
 
 	#ifdef CONFIG_KVM_MMU_AUDIT
 	int audit_point;
diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h
index b8f1c01..462efe7 100644
--- a/arch/x86/include/uapi/asm/hyperv.h
+++ b/arch/x86/include/uapi/asm/hyperv.h
@@ -28,6 +28,9 @@
 /* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/
 #define HV_X64_MSR_TIME_REF_COUNT_AVAILABLE	(1 << 1)
 
+/* A partition's reference time stamp counter (TSC) page */
+#define HV_X64_MSR_REFERENCE_TSC		0x40000021
+
 /*
  * There is a single feature flag that signifies the presence of the MSR
  * that can be used to retrieve both the local APIC Timer frequency as
@@ -198,6 +201,9 @@
 #define HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_MASK	\
 		(~((1ull << HV_X64_MSR_APIC_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
 
+#define HV_X64_MSR_TSC_REFERENCE_ENABLE		0x00000001
+#define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT	12
+
 #define HV_PROCESSOR_POWER_STATE_C0		0
 #define HV_PROCESSOR_POWER_STATE_C1		1
 #define HV_PROCESSOR_POWER_STATE_C2		2
@@ -210,4 +216,11 @@
 #define HV_STATUS_INVALID_ALIGNMENT		4
 #define HV_STATUS_INSUFFICIENT_BUFFERS		19
 
+typedef struct _HV_REFERENCE_TSC_PAGE {
+	__u32 tsc_sequence;
+	__u32 res1;
+	__u64 tsc_scale;
+	__s64 tsc_offset;
+} HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE;
+
 #endif
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0fbdced..0b3fd80 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -839,11 +839,12 @@ EXPORT_SYMBOL_GPL(kvm_rdpmc);
  * kvm-specific. Those are put in the beginning of the list.
  */
 
-#define KVM_SAVE_MSRS_BEGIN	10
+#define KVM_SAVE_MSRS_BEGIN	12
 static u32 msrs_to_save[] = {
 	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
 	MSR_KVM_SYSTEM_TIME_NEW, MSR_KVM_WALL_CLOCK_NEW,
 	HV_X64_MSR_GUEST_OS_ID, HV_X64_MSR_HYPERCALL,
+	HV_X64_MSR_TIME_REF_COUNT, HV_X64_MSR_REFERENCE_TSC,
 	HV_X64_MSR_APIC_ASSIST_PAGE, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME,
 	MSR_KVM_PV_EOI_EN,
 	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
@@ -1788,6 +1789,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr)
 	switch (msr) {
 	case HV_X64_MSR_GUEST_OS_ID:
 	case HV_X64_MSR_HYPERCALL:
+	case HV_X64_MSR_REFERENCE_TSC:
+	case HV_X64_MSR_TIME_REF_COUNT:
 		r = true;
 		break;
 	}
@@ -1829,6 +1832,20 @@ static int set_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 		kvm->arch.hv_hypercall = data;
 		break;
 	}
+	case HV_X64_MSR_REFERENCE_TSC: {
+		u64 gfn;
+		HV_REFERENCE_TSC_PAGE tsc_ref;
+		memset(&tsc_ref, 0, sizeof(tsc_ref));
+		kvm->arch.hv_tsc_page = data;
+		if (!(data & HV_X64_MSR_TSC_REFERENCE_ENABLE))
+			break;
+		gfn = data >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
+		if (kvm_write_guest(kvm, data,
+			&tsc_ref, sizeof(tsc_ref)))
+			return 1;
+		mark_page_dirty(kvm, gfn);
+		break;
+	}
 	default:
 		vcpu_unimpl(vcpu, "HYPER-V unimplemented wrmsr: 0x%x "
 			    "data 0x%llx\n", msr, data);
@@ -2253,6 +2270,14 @@ static int get_msr_hyperv_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
 	case HV_X64_MSR_HYPERCALL:
 		data = kvm->arch.hv_hypercall;
 		break;
+	case HV_X64_MSR_TIME_REF_COUNT: {
+		data =
+		     div_u64(get_kernel_ns() + kvm->arch.kvmclock_offset, 100);
+		break;
+	}
+	case HV_X64_MSR_REFERENCE_TSC:
+		data = kvm->arch.hv_tsc_page;
+		break;
 	default:
 		vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr);
 		return 1;
@@ -2566,6 +2591,7 @@ int kvm_dev_ioctl_check_extension(long ext)
 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
 	case KVM_CAP_ASSIGN_DEV_IRQ:
 	case KVM_CAP_PCI_2_3:
+	case KVM_CAP_HYPERV_TIME:
 #endif
 		r = 1;
 		break;
-- 
cgit v1.1


From 9926c9fdbdd54bb229fe6fdbd15ca3af2b8425ae Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Sat, 4 Jan 2014 18:47:15 +0100
Subject: KVM: x86: Sync DR7 on KVM_SET_DEBUGREGS

Whenever we change arch.dr7, we also have to call kvm_update_dr7. In
case guest debugging is off, this will synchronize the new state into
hardware.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0b3fd80..59907c9 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2976,6 +2976,7 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
 	memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
 	vcpu->arch.dr6 = dbgregs->dr6;
 	vcpu->arch.dr7 = dbgregs->dr7;
+	kvm_update_dr7(vcpu);
 
 	return 0;
 }
-- 
cgit v1.1


From 73aaf249ee2287b4686ff079dcbdbbb658156e64 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Sat, 4 Jan 2014 18:47:16 +0100
Subject: KVM: SVM: Fix reading of DR6

In contrast to VMX, SVM dose not automatically transfer DR6 into the
VCPU's arch.dr6. So if we face a DR6 read, we must consult a new vendor
hook to obtain the current value. And as SVM now picks the DR6 state
from its VMCB, we also need a set callback in order to write updates of
DR6 back.

Fixes a regression of 020df0794f.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/svm.c              | 15 +++++++++++++++
 arch/x86/kvm/vmx.c              | 11 +++++++++++
 arch/x86/kvm/x86.c              | 19 +++++++++++++++++--
 4 files changed, 45 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 33fef07..fdf83af 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -700,6 +700,8 @@ struct kvm_x86_ops {
 	void (*set_idt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
 	void (*get_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
 	void (*set_gdt)(struct kvm_vcpu *vcpu, struct desc_ptr *dt);
+	u64 (*get_dr6)(struct kvm_vcpu *vcpu);
+	void (*set_dr6)(struct kvm_vcpu *vcpu, unsigned long value);
 	void (*set_dr7)(struct kvm_vcpu *vcpu, unsigned long value);
 	void (*cache_reg)(struct kvm_vcpu *vcpu, enum kvm_reg reg);
 	unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index c7168a5..e81df8f 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1671,6 +1671,19 @@ static void new_asid(struct vcpu_svm *svm, struct svm_cpu_data *sd)
 	mark_dirty(svm->vmcb, VMCB_ASID);
 }
 
+static u64 svm_get_dr6(struct kvm_vcpu *vcpu)
+{
+	return to_svm(vcpu)->vmcb->save.dr6;
+}
+
+static void svm_set_dr6(struct kvm_vcpu *vcpu, unsigned long value)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	svm->vmcb->save.dr6 = value;
+	mark_dirty(svm->vmcb, VMCB_DR);
+}
+
 static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
 {
 	struct vcpu_svm *svm = to_svm(vcpu);
@@ -4286,6 +4299,8 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.set_idt = svm_set_idt,
 	.get_gdt = svm_get_gdt,
 	.set_gdt = svm_set_gdt,
+	.get_dr6 = svm_get_dr6,
+	.set_dr6 = svm_set_dr6,
 	.set_dr7 = svm_set_dr7,
 	.cache_reg = svm_cache_reg,
 	.get_rflags = svm_get_rflags,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 7661eb1..79b360e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5149,6 +5149,15 @@ static int handle_dr(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
+static u64 vmx_get_dr6(struct kvm_vcpu *vcpu)
+{
+	return vcpu->arch.dr6;
+}
+
+static void vmx_set_dr6(struct kvm_vcpu *vcpu, unsigned long val)
+{
+}
+
 static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
 {
 	vmcs_writel(GUEST_DR7, val);
@@ -8556,6 +8565,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.set_idt = vmx_set_idt,
 	.get_gdt = vmx_get_gdt,
 	.set_gdt = vmx_set_gdt,
+	.get_dr6 = vmx_get_dr6,
+	.set_dr6 = vmx_set_dr6,
 	.set_dr7 = vmx_set_dr7,
 	.cache_reg = vmx_cache_reg,
 	.get_rflags = vmx_get_rflags,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 59907c9..59b95b1 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -722,6 +722,12 @@ unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_get_cr8);
 
+static void kvm_update_dr6(struct kvm_vcpu *vcpu)
+{
+	if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
+		kvm_x86_ops->set_dr6(vcpu, vcpu->arch.dr6);
+}
+
 static void kvm_update_dr7(struct kvm_vcpu *vcpu)
 {
 	unsigned long dr7;
@@ -750,6 +756,7 @@ static int __kvm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long val)
 		if (val & 0xffffffff00000000ULL)
 			return -1; /* #GP */
 		vcpu->arch.dr6 = (val & DR6_VOLATILE) | DR6_FIXED_1;
+		kvm_update_dr6(vcpu);
 		break;
 	case 5:
 		if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
@@ -791,7 +798,10 @@ static int _kvm_get_dr(struct kvm_vcpu *vcpu, int dr, unsigned long *val)
 			return 1;
 		/* fall through */
 	case 6:
-		*val = vcpu->arch.dr6;
+		if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
+			*val = vcpu->arch.dr6;
+		else
+			*val = kvm_x86_ops->get_dr6(vcpu);
 		break;
 	case 5:
 		if (kvm_read_cr4_bits(vcpu, X86_CR4_DE))
@@ -2960,8 +2970,11 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
 static void kvm_vcpu_ioctl_x86_get_debugregs(struct kvm_vcpu *vcpu,
 					     struct kvm_debugregs *dbgregs)
 {
+	unsigned long val;
+
 	memcpy(dbgregs->db, vcpu->arch.db, sizeof(vcpu->arch.db));
-	dbgregs->dr6 = vcpu->arch.dr6;
+	_kvm_get_dr(vcpu, 6, &val);
+	dbgregs->dr6 = val;
 	dbgregs->dr7 = vcpu->arch.dr7;
 	dbgregs->flags = 0;
 	memset(&dbgregs->reserved, 0, sizeof(dbgregs->reserved));
@@ -2975,6 +2988,7 @@ static int kvm_vcpu_ioctl_x86_set_debugregs(struct kvm_vcpu *vcpu,
 
 	memcpy(vcpu->arch.db, dbgregs->db, sizeof(vcpu->arch.db));
 	vcpu->arch.dr6 = dbgregs->dr6;
+	kvm_update_dr6(vcpu);
 	vcpu->arch.dr7 = dbgregs->dr7;
 	kvm_update_dr7(vcpu);
 
@@ -6749,6 +6763,7 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu)
 
 	memset(vcpu->arch.db, 0, sizeof(vcpu->arch.db));
 	vcpu->arch.dr6 = DR6_FIXED_1;
+	kvm_update_dr6(vcpu);
 	vcpu->arch.dr7 = DR7_FIXED_1;
 	kvm_update_dr7(vcpu);
 
-- 
cgit v1.1


From 8246bf52c75aa9b9b336a84f31ed2248754d0f71 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Sat, 4 Jan 2014 18:47:17 +0100
Subject: KVM: VMX: Fix DR6 update on #DB exception

According to the SDM, only bits 0-3 of DR6 "may" be cleared by "certain"
debug exception. So do update them on #DB exception in KVM, but leave
the rest alone, only setting BD and BS in addition to already set bits
in DR6. This also aligns us with kvm_vcpu_check_singlestep.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 79b360e..c8eb27f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -4869,7 +4869,8 @@ static int handle_exception(struct kvm_vcpu *vcpu)
 		dr6 = vmcs_readl(EXIT_QUALIFICATION);
 		if (!(vcpu->guest_debug &
 		      (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
-			vcpu->arch.dr6 = dr6 | DR6_FIXED_1;
+			vcpu->arch.dr6 &= ~15;
+			vcpu->arch.dr6 |= dr6;
 			kvm_queue_exception(vcpu, DB_VECTOR);
 			return 1;
 		}
-- 
cgit v1.1


From 42124925c1f580068661bebd963d7c102175a8a9 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Sat, 4 Jan 2014 18:47:19 +0100
Subject: KVM: nVMX: Leave VMX mode on clearing of feature control MSR

When userspace sets MSR_IA32_FEATURE_CONTROL to 0, make sure we leave
root and non-root mode, fully disabling VMX. The register state of the
VCPU is undefined after this step, so userspace has to set it to a
proper state afterward.

This enables to reboot a VM while it is running some hypervisor code.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c8eb27f..bff5555 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2455,6 +2455,8 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 	return 1;
 }
 
+static void vmx_leave_nested(struct kvm_vcpu *vcpu);
+
 static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 {
 	u32 msr_index = msr_info->index;
@@ -2470,6 +2472,8 @@ static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 				& FEATURE_CONTROL_LOCKED)
 			return 0;
 		to_vmx(vcpu)->nested.msr_ia32_feature_control = data;
+		if (host_initialized && data == 0)
+			vmx_leave_nested(vcpu);
 		return 1;
 	}
 
@@ -8504,6 +8508,16 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
 }
 
 /*
+ * Forcibly leave nested mode in order to be able to reset the VCPU later on.
+ */
+static void vmx_leave_nested(struct kvm_vcpu *vcpu)
+{
+	if (is_guest_mode(vcpu))
+		nested_vmx_vmexit(vcpu);
+	free_nested(to_vmx(vcpu));
+}
+
+/*
  * L1's failure to enter L2 is a subset of a normal exit, as explained in
  * 23.7 "VM-entry failures during or after loading guest state" (this also
  * lists the acceptable exit-reason and exit-qualification parameters).
-- 
cgit v1.1


From 533558bcb69ef28aff81b6ae9acda8943575319f Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Sat, 4 Jan 2014 18:47:20 +0100
Subject: KVM: nVMX: Pass vmexit parameters to nested_vmx_vmexit

Instead of fixing up the vmcs12 after the nested vmexit, pass key
parameters already when calling nested_vmx_vmexit. This will help
tracing those vmexits.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 63 +++++++++++++++++++++++++++++-------------------------
 1 file changed, 34 insertions(+), 29 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index bff5555..e3578b3 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1058,7 +1058,9 @@ static inline bool is_exception(u32 intr_info)
 		== (INTR_TYPE_HARD_EXCEPTION | INTR_INFO_VALID_MASK);
 }
 
-static void nested_vmx_vmexit(struct kvm_vcpu *vcpu);
+static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
+			      u32 exit_intr_info,
+			      unsigned long exit_qualification);
 static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu,
 			struct vmcs12 *vmcs12,
 			u32 reason, unsigned long qualification);
@@ -1967,7 +1969,9 @@ static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr)
 	if (!(vmcs12->exception_bitmap & (1u << nr)))
 		return 0;
 
-	nested_vmx_vmexit(vcpu);
+	nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason,
+			  vmcs_read32(VM_EXIT_INTR_INFO),
+			  vmcs_readl(EXIT_QUALIFICATION));
 	return 1;
 }
 
@@ -4649,15 +4653,12 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
 static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
 {
 	if (is_guest_mode(vcpu)) {
-		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
-
 		if (to_vmx(vcpu)->nested.nested_run_pending)
 			return 0;
 		if (nested_exit_on_nmi(vcpu)) {
-			nested_vmx_vmexit(vcpu);
-			vmcs12->vm_exit_reason = EXIT_REASON_EXCEPTION_NMI;
-			vmcs12->vm_exit_intr_info = NMI_VECTOR |
-				INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK;
+			nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
+					  NMI_VECTOR | INTR_TYPE_NMI_INTR |
+					  INTR_INFO_VALID_MASK, 0);
 			/*
 			 * The NMI-triggered VM exit counts as injection:
 			 * clear this one and block further NMIs.
@@ -4679,15 +4680,11 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
 static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
 {
 	if (is_guest_mode(vcpu)) {
-		struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
-
 		if (to_vmx(vcpu)->nested.nested_run_pending)
 			return 0;
 		if (nested_exit_on_intr(vcpu)) {
-			nested_vmx_vmexit(vcpu);
-			vmcs12->vm_exit_reason =
-				EXIT_REASON_EXTERNAL_INTERRUPT;
-			vmcs12->vm_exit_intr_info = 0;
+			nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT,
+					  0, 0);
 			/*
 			 * fall through to normal code, but now in L1, not L2
 			 */
@@ -6849,7 +6846,9 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
 		return handle_invalid_guest_state(vcpu);
 
 	if (is_guest_mode(vcpu) && nested_vmx_exit_handled(vcpu)) {
-		nested_vmx_vmexit(vcpu);
+		nested_vmx_vmexit(vcpu, exit_reason,
+				  vmcs_read32(VM_EXIT_INTR_INFO),
+				  vmcs_readl(EXIT_QUALIFICATION));
 		return 1;
 	}
 
@@ -7590,15 +7589,14 @@ static void vmx_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry)
 static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
 		struct x86_exception *fault)
 {
-	struct vmcs12 *vmcs12;
-	nested_vmx_vmexit(vcpu);
-	vmcs12 = get_vmcs12(vcpu);
+	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
+	u32 exit_reason;
 
 	if (fault->error_code & PFERR_RSVD_MASK)
-		vmcs12->vm_exit_reason = EXIT_REASON_EPT_MISCONFIG;
+		exit_reason = EXIT_REASON_EPT_MISCONFIG;
 	else
-		vmcs12->vm_exit_reason = EXIT_REASON_EPT_VIOLATION;
-	vmcs12->exit_qualification = vcpu->arch.exit_qualification;
+		exit_reason = EXIT_REASON_EPT_VIOLATION;
+	nested_vmx_vmexit(vcpu, exit_reason, 0, vcpu->arch.exit_qualification);
 	vmcs12->guest_physical_address = fault->address;
 }
 
@@ -7636,7 +7634,9 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
 
 	/* TODO: also check PFEC_MATCH/MASK, not just EB.PF. */
 	if (vmcs12->exception_bitmap & (1u << PF_VECTOR))
-		nested_vmx_vmexit(vcpu);
+		nested_vmx_vmexit(vcpu, to_vmx(vcpu)->exit_reason,
+				  vmcs_read32(VM_EXIT_INTR_INFO),
+				  vmcs_readl(EXIT_QUALIFICATION));
 	else
 		kvm_inject_page_fault(vcpu, fault);
 }
@@ -8191,7 +8191,9 @@ static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
  * exit-information fields only. Other fields are modified by L1 with VMWRITE,
  * which already writes to vmcs12 directly.
  */
-static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
+static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
+			   u32 exit_reason, u32 exit_intr_info,
+			   unsigned long exit_qualification)
 {
 	/* update guest state fields: */
 	vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
@@ -8282,10 +8284,10 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
 
 	/* update exit information fields: */
 
-	vmcs12->vm_exit_reason  = to_vmx(vcpu)->exit_reason;
-	vmcs12->exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
+	vmcs12->vm_exit_reason = exit_reason;
+	vmcs12->exit_qualification = exit_qualification;
 
-	vmcs12->vm_exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+	vmcs12->vm_exit_intr_info = exit_intr_info;
 	if ((vmcs12->vm_exit_intr_info &
 	     (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) ==
 	    (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK))
@@ -8452,7 +8454,9 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
  * and modify vmcs12 to make it see what it would expect to see there if
  * L2 was its real guest. Must only be called when in L2 (is_guest_mode())
  */
-static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
+static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
+			      u32 exit_intr_info,
+			      unsigned long exit_qualification)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	int cpu;
@@ -8462,7 +8466,8 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
 	WARN_ON_ONCE(vmx->nested.nested_run_pending);
 
 	leave_guest_mode(vcpu);
-	prepare_vmcs12(vcpu, vmcs12);
+	prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
+		       exit_qualification);
 
 	cpu = get_cpu();
 	vmx->loaded_vmcs = &vmx->vmcs01;
@@ -8513,7 +8518,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu)
 static void vmx_leave_nested(struct kvm_vcpu *vcpu)
 {
 	if (is_guest_mode(vcpu))
-		nested_vmx_vmexit(vcpu);
+		nested_vmx_vmexit(vcpu, -1, 0, 0);
 	free_nested(to_vmx(vcpu));
 }
 
-- 
cgit v1.1


From 542060ea79c861e100411a5a44df747b56a693df Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Sat, 4 Jan 2014 18:47:21 +0100
Subject: KVM: nVMX: Add tracepoints for nested_vmexit and nested_vmexit_inject

Already used by nested SVM for tracing nested vmexit: kvm_nested_vmexit
marks exits from L2 to L0 while kvm_nested_vmexit_inject marks vmexits
that are reflected to L1.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e3578b3..e539c45 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6697,6 +6697,13 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
 	struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
 	u32 exit_reason = vmx->exit_reason;
 
+	trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason,
+				vmcs_readl(EXIT_QUALIFICATION),
+				vmx->idt_vectoring_info,
+				intr_info,
+				vmcs_read32(VM_EXIT_INTR_ERROR_CODE),
+				KVM_ISA_VMX);
+
 	if (vmx->nested.nested_run_pending)
 		return 0;
 
@@ -8469,6 +8476,13 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
 	prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info,
 		       exit_qualification);
 
+	trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
+				       vmcs12->exit_qualification,
+				       vmcs12->idt_vectoring_info_field,
+				       vmcs12->vm_exit_intr_info,
+				       vmcs12->vm_exit_intr_error_code,
+				       KVM_ISA_VMX);
+
 	cpu = get_cpu();
 	vmx->loaded_vmcs = &vmx->vmcs01;
 	vmx_vcpu_put(vcpu);
-- 
cgit v1.1


From cae501397a25dc1e88375925c5e93a264d4a55ba Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Sat, 4 Jan 2014 18:47:22 +0100
Subject: KVM: nVMX: Clean up handling of VMX-related MSRs

This simplifies the code and also stops issuing warning about writing to
unhandled MSRs when VMX is disabled or the Feature Control MSR is
locked - we do handle them all according to the spec.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/uapi/asm/msr-index.h |  1 +
 arch/x86/kvm/vmx.c                    | 79 ++++++++++-------------------------
 2 files changed, 24 insertions(+), 56 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index 37813b5..2e4a42d 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h
@@ -527,6 +527,7 @@
 #define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e
 #define MSR_IA32_VMX_TRUE_EXIT_CTLS      0x0000048f
 #define MSR_IA32_VMX_TRUE_ENTRY_CTLS     0x00000490
+#define MSR_IA32_VMX_VMFUNC             0x00000491
 
 /* VMX_BASIC bits and bitmasks */
 #define VMX_BASIC_VMCS_SIZE_SHIFT	32
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e539c45..fc4a255 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2361,32 +2361,10 @@ static inline u64 vmx_control_msr(u32 low, u32 high)
 	return low | ((u64)high << 32);
 }
 
-/*
- * If we allow our guest to use VMX instructions (i.e., nested VMX), we should
- * also let it use VMX-specific MSRs.
- * vmx_get_vmx_msr() and vmx_set_vmx_msr() return 1 when we handled a
- * VMX-specific MSR, or 0 when we haven't (and the caller should handle it
- * like all other MSRs).
- */
+/* Returns 0 on success, non-0 otherwise. */
 static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 {
-	if (!nested_vmx_allowed(vcpu) && msr_index >= MSR_IA32_VMX_BASIC &&
-		     msr_index <= MSR_IA32_VMX_TRUE_ENTRY_CTLS) {
-		/*
-		 * According to the spec, processors which do not support VMX
-		 * should throw a #GP(0) when VMX capability MSRs are read.
-		 */
-		kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
-		return 1;
-	}
-
 	switch (msr_index) {
-	case MSR_IA32_FEATURE_CONTROL:
-		if (nested_vmx_allowed(vcpu)) {
-			*pdata = to_vmx(vcpu)->nested.msr_ia32_feature_control;
-			break;
-		}
-		return 0;
 	case MSR_IA32_VMX_BASIC:
 		/*
 		 * This MSR reports some information about VMX support. We
@@ -2453,38 +2431,9 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 		*pdata = nested_vmx_ept_caps;
 		break;
 	default:
-		return 0;
-	}
-
-	return 1;
-}
-
-static void vmx_leave_nested(struct kvm_vcpu *vcpu);
-
-static int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
-{
-	u32 msr_index = msr_info->index;
-	u64 data = msr_info->data;
-	bool host_initialized = msr_info->host_initiated;
-
-	if (!nested_vmx_allowed(vcpu))
-		return 0;
-
-	if (msr_index == MSR_IA32_FEATURE_CONTROL) {
-		if (!host_initialized &&
-				to_vmx(vcpu)->nested.msr_ia32_feature_control
-				& FEATURE_CONTROL_LOCKED)
-			return 0;
-		to_vmx(vcpu)->nested.msr_ia32_feature_control = data;
-		if (host_initialized && data == 0)
-			vmx_leave_nested(vcpu);
 		return 1;
 	}
 
-	/*
-	 * No need to treat VMX capability MSRs specially: If we don't handle
-	 * them, handle_wrmsr will #GP(0), which is correct (they are readonly)
-	 */
 	return 0;
 }
 
@@ -2530,13 +2479,20 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 	case MSR_IA32_SYSENTER_ESP:
 		data = vmcs_readl(GUEST_SYSENTER_ESP);
 		break;
+	case MSR_IA32_FEATURE_CONTROL:
+		if (!nested_vmx_allowed(vcpu))
+			return 1;
+		data = to_vmx(vcpu)->nested.msr_ia32_feature_control;
+		break;
+	case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
+		if (!nested_vmx_allowed(vcpu))
+			return 1;
+		return vmx_get_vmx_msr(vcpu, msr_index, pdata);
 	case MSR_TSC_AUX:
 		if (!to_vmx(vcpu)->rdtscp_enabled)
 			return 1;
 		/* Otherwise falls through */
 	default:
-		if (vmx_get_vmx_msr(vcpu, msr_index, pdata))
-			return 0;
 		msr = find_msr_entry(to_vmx(vcpu), msr_index);
 		if (msr) {
 			data = msr->data;
@@ -2549,6 +2505,8 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 	return 0;
 }
 
+static void vmx_leave_nested(struct kvm_vcpu *vcpu);
+
 /*
  * Writes msr value into into the appropriate "register".
  * Returns 0 on success, non-0 otherwise.
@@ -2603,6 +2561,17 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 	case MSR_IA32_TSC_ADJUST:
 		ret = kvm_set_msr_common(vcpu, msr_info);
 		break;
+	case MSR_IA32_FEATURE_CONTROL:
+		if (!nested_vmx_allowed(vcpu) ||
+		    (to_vmx(vcpu)->nested.msr_ia32_feature_control &
+		     FEATURE_CONTROL_LOCKED && !msr_info->host_initiated))
+			return 1;
+		vmx->nested.msr_ia32_feature_control = data;
+		if (msr_info->host_initiated && data == 0)
+			vmx_leave_nested(vcpu);
+		break;
+	case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
+		return 1; /* they are read-only */
 	case MSR_TSC_AUX:
 		if (!vmx->rdtscp_enabled)
 			return 1;
@@ -2611,8 +2580,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
 			return 1;
 		/* Otherwise falls through */
 	default:
-		if (vmx_set_vmx_msr(vcpu, msr_info))
-			break;
 		msr = find_msr_entry(vmx, msr_index);
 		if (msr) {
 			msr->data = data;
-- 
cgit v1.1


From 7af40ad37b3f097f367cbe9c0198caccce6fd83b Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Sat, 4 Jan 2014 18:47:23 +0100
Subject: KVM: nVMX: Fix nested_run_pending on activity state HLT

When we suspend the guest in HLT state, the nested run is no longer
pending - we emulated it completely. So only set nested_run_pending
after checking the activity state.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index fc4a255..f9a5433 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8046,8 +8046,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 
 	enter_guest_mode(vcpu);
 
-	vmx->nested.nested_run_pending = 1;
-
 	vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET);
 
 	cpu = get_cpu();
@@ -8066,6 +8064,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
 	if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT)
 		return kvm_emulate_halt(vcpu);
 
+	vmx->nested.nested_run_pending = 1;
+
 	/*
 	 * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
 	 * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
-- 
cgit v1.1


From 3edf1e698ff638c2ab095e8c60fd11c5d292fc5f Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Sat, 4 Jan 2014 18:47:24 +0100
Subject: KVM: nVMX: Update guest activity state field on L2 exits

Set guest activity state in L1's VMCS according to the VCPUs mp_state.
This ensures we report the correct state in case we L2 executed HLT or
if we put L2 into HLT state and it was now woken up by an event.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/vmx.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch')

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f9a5433..407b05c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -8219,6 +8219,10 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
 		vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
 	vmcs12->guest_pending_dbg_exceptions =
 		vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
+	if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
+		vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT;
+	else
+		vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
 
 	if ((vmcs12->pin_based_vm_exec_control & PIN_BASED_VMX_PREEMPTION_TIMER) &&
 	    (vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER))
-- 
cgit v1.1


From 699bde3b6c95319749a8e1b7aa2b3f6bee84bff8 Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Mon, 20 Jan 2014 12:34:13 +0100
Subject: KVM: s390: Fix memory access error detection

Seems that commit 210b1607012cc9034841a393e0591b2c86d9e26c
(KVM: s390: Removed SIE_INTERCEPT_UCONTROL) lost a hunk when we
reworked our patch queue to rework the async_fp code. We now
ignore faults on the sie instruction (guest accesses non-existing
memory) instead of sending a fault into the guest. This leads to
hang situations with the old virtio transport that checks for
descriptor memory after guest memory. Instead of bailing out this
code now goes wild...
Lets re-add the check.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 arch/s390/kvm/kvm-s390.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'arch')

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 1bb1dda..7635c00 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -738,6 +738,10 @@ static int vcpu_post_run(struct kvm_vcpu *vcpu, int exit_reason)
 						current->thread.gmap_addr;
 		vcpu->run->s390_ucontrol.pgm_code = 0x10;
 		rc = -EREMOTE;
+	} else {
+		VCPU_EVENT(vcpu, 3, "%s", "fault in sie instruction");
+		trace_kvm_s390_sie_fault(vcpu);
+		rc = kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING);
 	}
 
 	memcpy(&vcpu->run->s.regs.gprs[14], &vcpu->arch.sie_block->gg14, 16);
-- 
cgit v1.1


From 94491620e1362f6065ab821c13eb54b716ada19f Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Fri, 17 Jan 2014 18:02:38 -0800
Subject: kvm: make KVM_MMU_AUDIT help text more readable

Make KVM_MMU_AUDIT kconfig help text readable and collapse
two spaces between words down to one space.

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Reviewed-by: Xiao Guangrong <xiaoguangrong@linux.vnet.ibm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch')

diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index b89c5db..287e4c8 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -80,7 +80,7 @@ config KVM_MMU_AUDIT
 	depends on KVM && TRACEPOINTS
 	---help---
 	 This option adds a R/W kVM module parameter 'mmu_audit', which allows
-	 audit  KVM MMU at runtime.
+	 auditing of KVM MMU events at runtime.
 
 config KVM_DEVICE_ASSIGNMENT
 	bool "KVM legacy PCI device assignment support"
-- 
cgit v1.1