From 07a9748c78cfc39b54f06125a216b67b9c8f09ed Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 10 Nov 2014 08:33:55 +0000
Subject: arm/arm64: kvm: drop inappropriate use of kvm_is_mmio_pfn()

Instead of using kvm_is_mmio_pfn() to decide whether a host region
should be stage 2 mapped with device attributes, add a new static
function kvm_is_device_pfn() that disregards RAM pages with the
reserved bit set, as those should usually not be mapped as device
memory.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/kvm/mmu.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'arch/arm/kvm')

diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 57a403a..b007438 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -834,6 +834,11 @@ static bool kvm_is_write_fault(struct kvm_vcpu *vcpu)
 	return kvm_vcpu_dabt_iswrite(vcpu);
 }
 
+static bool kvm_is_device_pfn(unsigned long pfn)
+{
+	return !pfn_valid(pfn);
+}
+
 static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			  struct kvm_memory_slot *memslot, unsigned long hva,
 			  unsigned long fault_status)
@@ -904,7 +909,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	if (is_error_pfn(pfn))
 		return -EFAULT;
 
-	if (kvm_is_mmio_pfn(pfn))
+	if (kvm_is_device_pfn(pfn))
 		mem_type = PAGE_S2_DEVICE;
 
 	spin_lock(&kvm->mmu_lock);
-- 
cgit v1.1


From 840f4bfbe03f1ce94ade8fdf84e8cd925ef15a48 Mon Sep 17 00:00:00 2001
From: Laszlo Ersek <lersek@redhat.com>
Date: Mon, 17 Nov 2014 14:58:52 +0000
Subject: arm, arm64: KVM: allow forced dcache flush on page faults

To allow handling of incoherent memslots in a subsequent patch, this
patch adds a paramater 'ipa_uncached' to cache_coherent_guest_page()
so that we can instruct it to flush the page's contents to DRAM even
if the guest has caching globally enabled.

Signed-off-by: Laszlo Ersek <lersek@redhat.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/kvm/mmu.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'arch/arm/kvm')

diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index b007438..cb924c6 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -852,6 +852,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	struct vm_area_struct *vma;
 	pfn_t pfn;
 	pgprot_t mem_type = PAGE_S2;
+	bool fault_ipa_uncached;
 
 	write_fault = kvm_is_write_fault(vcpu);
 	if (fault_status == FSC_PERM && !write_fault) {
@@ -918,6 +919,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	if (!hugetlb && !force_pte)
 		hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
 
+	fault_ipa_uncached = false;
+
 	if (hugetlb) {
 		pmd_t new_pmd = pfn_pmd(pfn, mem_type);
 		new_pmd = pmd_mkhuge(new_pmd);
@@ -925,7 +928,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			kvm_set_s2pmd_writable(&new_pmd);
 			kvm_set_pfn_dirty(pfn);
 		}
-		coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE);
+		coherent_cache_guest_page(vcpu, hva & PMD_MASK, PMD_SIZE,
+					  fault_ipa_uncached);
 		ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd);
 	} else {
 		pte_t new_pte = pfn_pte(pfn, mem_type);
@@ -933,7 +937,8 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 			kvm_set_s2pte_writable(&new_pte);
 			kvm_set_pfn_dirty(pfn);
 		}
-		coherent_cache_guest_page(vcpu, hva, PAGE_SIZE);
+		coherent_cache_guest_page(vcpu, hva, PAGE_SIZE,
+					  fault_ipa_uncached);
 		ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte,
 			pgprot_val(mem_type) == pgprot_val(PAGE_S2_DEVICE));
 	}
-- 
cgit v1.1


From 849260c72c6b8bd53850cb00b80027db3a273c2c Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 17 Nov 2014 14:58:53 +0000
Subject: arm, arm64: KVM: handle potential incoherency of readonly memslots

Readonly memslots are often used to implement emulation of ROMs and
NOR flashes, in which case the guest may legally map these regions as
uncached.
To deal with the incoherency associated with uncached guest mappings,
treat all readonly memslots as incoherent, and ensure that pages that
belong to regions tagged as such are flushed to DRAM before being passed
to the guest.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/kvm/mmu.c | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

(limited to 'arch/arm/kvm')

diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index cb924c6..f2a9874 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -919,7 +919,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa,
 	if (!hugetlb && !force_pte)
 		hugetlb = transparent_hugepage_adjust(&pfn, &fault_ipa);
 
-	fault_ipa_uncached = false;
+	fault_ipa_uncached = memslot->flags & KVM_MEMSLOT_INCOHERENT;
 
 	if (hugetlb) {
 		pmd_t new_pmd = pfn_pmd(pfn, mem_type);
@@ -1298,11 +1298,12 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 		hva = vm_end;
 	} while (hva < reg_end);
 
-	if (ret) {
-		spin_lock(&kvm->mmu_lock);
+	spin_lock(&kvm->mmu_lock);
+	if (ret)
 		unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size);
-		spin_unlock(&kvm->mmu_lock);
-	}
+	else
+		stage2_flush_memslot(kvm, memslot);
+	spin_unlock(&kvm->mmu_lock);
 	return ret;
 }
 
@@ -1314,6 +1315,15 @@ void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
 int kvm_arch_create_memslot(struct kvm *kvm, struct kvm_memory_slot *slot,
 			    unsigned long npages)
 {
+	/*
+	 * Readonly memslots are not incoherent with the caches by definition,
+	 * but in practice, they are used mostly to emulate ROMs or NOR flashes
+	 * that the guest may consider devices and hence map as uncached.
+	 * To prevent incoherency issues in these cases, tag all readonly
+	 * regions as incoherent.
+	 */
+	if (slot->flags & KVM_MEM_READONLY)
+		slot->flags |= KVM_MEMSLOT_INCOHERENT;
 	return 0;
 }
 
-- 
cgit v1.1


From 5100f9833e1881c800bc088e70afa4b9a1409f51 Mon Sep 17 00:00:00 2001
From: Andre Przywara <andre.przywara@arm.com>
Date: Thu, 6 Nov 2014 12:11:45 +0000
Subject: arm/arm64: KVM: avoid unnecessary guest register mangling on MMIO
 read

Currently we mangle the endianness of the guest's register even on an
MMIO _read_, where it is completely useless, because we will not use
the value of that register.
Rework the io_mem_abort() function to clearly separate between reads
and writes and only do the endianness mangling on MMIO writes.

Signed-off-by: Andre Przywara <andre.przywara@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/arm/kvm/mmio.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

(limited to 'arch/arm/kvm')

diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
index 4cb5a93..5d3bfc0 100644
--- a/arch/arm/kvm/mmio.c
+++ b/arch/arm/kvm/mmio.c
@@ -187,15 +187,18 @@ int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
 	}
 
 	rt = vcpu->arch.mmio_decode.rt;
-	data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt), mmio.len);
 
-	trace_kvm_mmio((mmio.is_write) ? KVM_TRACE_MMIO_WRITE :
-					 KVM_TRACE_MMIO_READ_UNSATISFIED,
-			mmio.len, fault_ipa,
-			(mmio.is_write) ? data : 0);
+	if (mmio.is_write) {
+		data = vcpu_data_guest_to_host(vcpu, *vcpu_reg(vcpu, rt),
+					       mmio.len);
 
-	if (mmio.is_write)
+		trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, mmio.len,
+			       fault_ipa, data);
 		mmio_write_buf(mmio.data, mmio.len, data);
+	} else {
+		trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, mmio.len,
+			       fault_ipa, 0);
+	}
 
 	if (vgic_handle_mmio(vcpu, run, &mmio))
 		return 1;
-- 
cgit v1.1


From 03f1d4c17edb31b41b14ca3a749ae38d2dd6639d Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Tue, 2 Dec 2014 15:27:51 +0100
Subject: arm/arm64: KVM: Don't clear the VCPU_POWER_OFF flag

If a VCPU was originally started with power off (typically to be brought
up by PSCI in SMP configurations), there is no need to clear the
POWER_OFF flag in the kernel, as this flag is only tested during the
init ioctl itself.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/arm/kvm')

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 9e193c8..b160bea 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -661,7 +661,7 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
 	/*
 	 * Handle the "start in power-off" case by marking the VCPU as paused.
 	 */
-	if (__test_and_clear_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
+	if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
 		vcpu->arch.pause = true;
 
 	return 0;
-- 
cgit v1.1


From 3ad8b3de526a76fbe9466b366059e4958957b88f Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Thu, 16 Oct 2014 16:14:43 +0200
Subject: arm/arm64: KVM: Correct KVM_ARM_VCPU_INIT power off option

The implementation of KVM_ARM_VCPU_INIT is currently not doing what
userspace expects, namely making sure that a vcpu which may have been
turned off using PSCI is returned to its initial state, which would be
powered on if userspace does not set the KVM_ARM_VCPU_POWER_OFF flag.

Implement the expected functionality and clarify the ABI.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'arch/arm/kvm')

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index b160bea..edc1964 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -663,6 +663,8 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
 	 */
 	if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features))
 		vcpu->arch.pause = true;
+	else
+		vcpu->arch.pause = false;
 
 	return 0;
 }
-- 
cgit v1.1


From b856a59141b1066d3c896a0d0231f84dabd040af Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Thu, 16 Oct 2014 17:21:16 +0200
Subject: arm/arm64: KVM: Reset the HCR on each vcpu when resetting the vcpu

When userspace resets the vcpu using KVM_ARM_VCPU_INIT, we should also
reset the HCR, because we now modify the HCR dynamically to
enable/disable trapping of guest accesses to the VM registers.

This is crucial for reboot of VMs working since otherwise we will not be
doing the necessary cache maintenance operations when faulting in pages
with the guest MMU off.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c   | 2 ++
 arch/arm/kvm/guest.c | 1 -
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'arch/arm/kvm')

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index edc1964..24c9ca4 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -658,6 +658,8 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
 	if (ret)
 		return ret;
 
+	vcpu_reset_hcr(vcpu);
+
 	/*
 	 * Handle the "start in power-off" case by marking the VCPU as paused.
 	 */
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index cc0b787..8c97208 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -38,7 +38,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
 int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
 {
-	vcpu->arch.hcr = HCR_GUEST_MASK;
 	return 0;
 }
 
-- 
cgit v1.1


From f7fa034dc8559c7d7326bfc8bd1a26175abd931a Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Thu, 16 Oct 2014 16:40:53 +0200
Subject: arm/arm64: KVM: Clarify KVM_ARM_VCPU_INIT ABI

It is not clear that this ioctl can be called multiple times for a given
vcpu.  Userspace already does this, so clarify the ABI.

Also specify that userspace is expected to always make secondary and
subsequent calls to the ioctl with the same parameters for the VCPU as
the initial call (which userspace also already does).

Add code to check that userspace doesn't violate that ABI in the future,
and move the kvm_vcpu_set_target() function which is currently
duplicated between the 32-bit and 64-bit versions in guest.c to a common
static function in arm.c, shared between both architectures.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c   | 43 +++++++++++++++++++++++++++++++++++++++++++
 arch/arm/kvm/guest.c | 25 -------------------------
 2 files changed, 43 insertions(+), 25 deletions(-)

(limited to 'arch/arm/kvm')

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 24c9ca4..4043769 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -263,6 +263,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
 	/* Force users to call KVM_ARM_VCPU_INIT */
 	vcpu->arch.target = -1;
+	bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
 
 	/* Set up the timer */
 	kvm_timer_vcpu_init(vcpu);
@@ -649,6 +650,48 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level,
 	return -EINVAL;
 }
 
+static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
+			       const struct kvm_vcpu_init *init)
+{
+	unsigned int i;
+	int phys_target = kvm_target_cpu();
+
+	if (init->target != phys_target)
+		return -EINVAL;
+
+	/*
+	 * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
+	 * use the same target.
+	 */
+	if (vcpu->arch.target != -1 && vcpu->arch.target != init->target)
+		return -EINVAL;
+
+	/* -ENOENT for unknown features, -EINVAL for invalid combinations. */
+	for (i = 0; i < sizeof(init->features) * 8; i++) {
+		bool set = (init->features[i / 32] & (1 << (i % 32)));
+
+		if (set && i >= KVM_VCPU_MAX_FEATURES)
+			return -ENOENT;
+
+		/*
+		 * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must
+		 * use the same feature set.
+		 */
+		if (vcpu->arch.target != -1 && i < KVM_VCPU_MAX_FEATURES &&
+		    test_bit(i, vcpu->arch.features) != set)
+			return -EINVAL;
+
+		if (set)
+			set_bit(i, vcpu->arch.features);
+	}
+
+	vcpu->arch.target = phys_target;
+
+	/* Now we know what it is, we can reset it. */
+	return kvm_reset_vcpu(vcpu);
+}
+
+
 static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
 					 struct kvm_vcpu_init *init)
 {
diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c
index 8c97208..384bab6 100644
--- a/arch/arm/kvm/guest.c
+++ b/arch/arm/kvm/guest.c
@@ -273,31 +273,6 @@ int __attribute_const__ kvm_target_cpu(void)
 	}
 }
 
-int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
-			const struct kvm_vcpu_init *init)
-{
-	unsigned int i;
-
-	/* We can only cope with guest==host and only on A15/A7 (for now). */
-	if (init->target != kvm_target_cpu())
-		return -EINVAL;
-
-	vcpu->arch.target = init->target;
-	bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES);
-
-	/* -ENOENT for unknown features, -EINVAL for invalid combinations. */
-	for (i = 0; i < sizeof(init->features) * 8; i++) {
-		if (test_bit(i, (void *)init->features)) {
-			if (i >= KVM_VCPU_MAX_FEATURES)
-				return -ENOENT;
-			set_bit(i, vcpu->arch.features);
-		}
-	}
-
-	/* Now we know what it is, we can reset it. */
-	return kvm_reset_vcpu(vcpu);
-}
-
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init)
 {
 	int target = kvm_target_cpu();
-- 
cgit v1.1


From cf5d318865e25f887d49a0c6083bbc6dcd1905b1 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Thu, 16 Oct 2014 17:00:18 +0200
Subject: arm/arm64: KVM: Turn off vcpus on PSCI shutdown/reboot

When a vcpu calls SYSTEM_OFF or SYSTEM_RESET with PSCI v0.2, the vcpus
should really be turned off for the VM adhering to the suggestions in
the PSCI spec, and it's the sane thing to do.

Also, clarify the behavior and expectations for exits to user space with
the KVM_EXIT_SYSTEM_EVENT case.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/psci.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'arch/arm/kvm')

diff --git a/arch/arm/kvm/psci.c b/arch/arm/kvm/psci.c
index 09cf377..58cb324 100644
--- a/arch/arm/kvm/psci.c
+++ b/arch/arm/kvm/psci.c
@@ -15,6 +15,7 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/preempt.h>
 #include <linux/kvm_host.h>
 #include <linux/wait.h>
 
@@ -166,6 +167,23 @@ static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu)
 
 static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type)
 {
+	int i;
+	struct kvm_vcpu *tmp;
+
+	/*
+	 * The KVM ABI specifies that a system event exit may call KVM_RUN
+	 * again and may perform shutdown/reboot at a later time that when the
+	 * actual request is made.  Since we are implementing PSCI and a
+	 * caller of PSCI reboot and shutdown expects that the system shuts
+	 * down or reboots immediately, let's make sure that VCPUs are not run
+	 * after this call is handled and before the VCPUs have been
+	 * re-initialized.
+	 */
+	kvm_for_each_vcpu(i, tmp, vcpu->kvm) {
+		tmp->arch.pause = true;
+		kvm_vcpu_kick(tmp);
+	}
+
 	memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event));
 	vcpu->run->system_event.type = type;
 	vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT;
-- 
cgit v1.1


From 957db105c99792ae8ef61ffc9ae77d910f6471da Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Thu, 27 Nov 2014 10:35:03 +0100
Subject: arm/arm64: KVM: Introduce stage2_unmap_vm

Introduce a new function to unmap user RAM regions in the stage2 page
tables.  This is needed on reboot (or when the guest turns off the MMU)
to ensure we fault in pages again and make the dcache, RAM, and icache
coherent.

Using unmap_stage2_range for the whole guest physical range does not
work, because that unmaps IO regions (such as the GIC) which will not be
recreated or in the best case faulted in on a page-by-page basis.

Call this function on secondary and subsequent calls to the
KVM_ARM_VCPU_INIT ioctl so that a reset VCPU will detect the guest
Stage-1 MMU is off when faulting in pages and make the caches coherent.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c |  7 ++++++
 arch/arm/kvm/mmu.c | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 72 insertions(+)

(limited to 'arch/arm/kvm')

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 4043769..da87c07 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -701,6 +701,13 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu,
 	if (ret)
 		return ret;
 
+	/*
+	 * Ensure a rebooted VM will fault in RAM pages and detect if the
+	 * guest MMU is turned off and flush the caches as needed.
+	 */
+	if (vcpu->arch.has_run_once)
+		stage2_unmap_vm(vcpu->kvm);
+
 	vcpu_reset_hcr(vcpu);
 
 	/*
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index f2a9874..3756dd3 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -611,6 +611,71 @@ static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size)
 	unmap_range(kvm, kvm->arch.pgd, start, size);
 }
 
+static void stage2_unmap_memslot(struct kvm *kvm,
+				 struct kvm_memory_slot *memslot)
+{
+	hva_t hva = memslot->userspace_addr;
+	phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT;
+	phys_addr_t size = PAGE_SIZE * memslot->npages;
+	hva_t reg_end = hva + size;
+
+	/*
+	 * A memory region could potentially cover multiple VMAs, and any holes
+	 * between them, so iterate over all of them to find out if we should
+	 * unmap any of them.
+	 *
+	 *     +--------------------------------------------+
+	 * +---------------+----------------+   +----------------+
+	 * |   : VMA 1     |      VMA 2     |   |    VMA 3  :    |
+	 * +---------------+----------------+   +----------------+
+	 *     |               memory region                |
+	 *     +--------------------------------------------+
+	 */
+	do {
+		struct vm_area_struct *vma = find_vma(current->mm, hva);
+		hva_t vm_start, vm_end;
+
+		if (!vma || vma->vm_start >= reg_end)
+			break;
+
+		/*
+		 * Take the intersection of this VMA with the memory region
+		 */
+		vm_start = max(hva, vma->vm_start);
+		vm_end = min(reg_end, vma->vm_end);
+
+		if (!(vma->vm_flags & VM_PFNMAP)) {
+			gpa_t gpa = addr + (vm_start - memslot->userspace_addr);
+			unmap_stage2_range(kvm, gpa, vm_end - vm_start);
+		}
+		hva = vm_end;
+	} while (hva < reg_end);
+}
+
+/**
+ * stage2_unmap_vm - Unmap Stage-2 RAM mappings
+ * @kvm: The struct kvm pointer
+ *
+ * Go through the memregions and unmap any reguler RAM
+ * backing memory already mapped to the VM.
+ */
+void stage2_unmap_vm(struct kvm *kvm)
+{
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
+	int idx;
+
+	idx = srcu_read_lock(&kvm->srcu);
+	spin_lock(&kvm->mmu_lock);
+
+	slots = kvm_memslots(kvm);
+	kvm_for_each_memslot(memslot, slots)
+		stage2_unmap_memslot(kvm, memslot);
+
+	spin_unlock(&kvm->mmu_lock);
+	srcu_read_unlock(&kvm->srcu, idx);
+}
+
 /**
  * kvm_free_stage2_pgd - free all stage-2 tables
  * @kvm:	The KVM struct pointer for the VM.
-- 
cgit v1.1


From 6d3cfbe21bef5b66530b50ad16c88fdc71a04c35 Mon Sep 17 00:00:00 2001
From: Peter Maydell <peter.maydell@linaro.org>
Date: Thu, 4 Dec 2014 15:02:24 +0000
Subject: arm/arm64: KVM: vgic: move reset initialization into vgic_init_maps()

VGIC initialization currently happens in three phases:
 (1) kvm_vgic_create() (triggered by userspace GIC creation)
 (2) vgic_init_maps() (triggered by userspace GIC register read/write
     requests, or from kvm_vgic_init() if not already run)
 (3) kvm_vgic_init() (triggered by first VM run)

We were doing initialization of some state to correspond with the
state of a freshly-reset GIC in kvm_vgic_init(); this is too late,
since it will overwrite changes made by userspace using the
register access APIs before the VM is run. Move this initialization
earlier, into the vgic_init_maps() phase.

This fixes a bug where QEMU could successfully restore a saved
VM state snapshot into a VM that had already been run, but could
not restore it "from cold" using the -loadvm command line option
(the symptoms being that the restored VM would run but interrupts
were ignored).

Finally rename vgic_init_maps to vgic_init and renamed kvm_vgic_init to
kvm_vgic_map_resources.

  [ This patch is originally written by Peter Maydell, but I have
    modified it somewhat heavily, renaming various bits and moving code
    around.  If something is broken, I am to be blamed. - Christoffer ]

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Eric Auger <eric.auger@linaro.org>
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'arch/arm/kvm')

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index da87c07..fa4b97c 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -428,11 +428,11 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 	vcpu->arch.has_run_once = true;
 
 	/*
-	 * Initialize the VGIC before running a vcpu the first time on
-	 * this VM.
+	 * Map the VGIC hardware resources before running a vcpu the first
+	 * time on this VM.
 	 */
 	if (unlikely(!vgic_initialized(vcpu->kvm))) {
-		ret = kvm_vgic_init(vcpu->kvm);
+		ret = kvm_vgic_map_resources(vcpu->kvm);
 		if (ret)
 			return ret;
 	}
-- 
cgit v1.1


From c52edf5f8caff878afc93c1b1e9a3d9490a9932f Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Tue, 9 Dec 2014 14:28:09 +0100
Subject: arm/arm64: KVM: Rename vgic_initialized to vgic_ready

The vgic_initialized() macro currently returns the state of the
vgic->ready flag, which indicates if the vgic is ready to be used when
running a VM, not specifically if its internal state has been
initialized.

Rename the macro accordingly in preparation for a more nuanced
initialization flow.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Eric Auger <eric.auger@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'arch/arm/kvm')

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index fa4b97c..c5a05f2 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -431,7 +431,7 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 	 * Map the VGIC hardware resources before running a vcpu the first
 	 * time on this VM.
 	 */
-	if (unlikely(!vgic_initialized(vcpu->kvm))) {
+	if (unlikely(!vgic_ready(vcpu->kvm))) {
 		ret = kvm_vgic_map_resources(vcpu->kvm);
 		if (ret)
 			return ret;
-- 
cgit v1.1


From 716139df2517fbc3f2306dbe8eba0fa88dca0189 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Tue, 9 Dec 2014 14:33:45 +0100
Subject: arm/arm64: KVM: Don't allow creating VCPUs after vgic_initialized

When the vgic initializes its internal state it does so based on the
number of VCPUs available at the time.  If we allow KVM to create more
VCPUs after the VGIC has been initialized, we are likely to error out in
unfortunate ways later, perform buffer overflows etc.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Eric Auger <eric.auger@linaro.org>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'arch/arm/kvm')

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index c5a05f2..66f37c4 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -213,6 +213,11 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
 	int err;
 	struct kvm_vcpu *vcpu;
 
+	if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) {
+		err = -EBUSY;
+		goto out;
+	}
+
 	vcpu = kmem_cache_zalloc(kvm_vcpu_cache, GFP_KERNEL);
 	if (!vcpu) {
 		err = -ENOMEM;
-- 
cgit v1.1


From 05971120fca43e0357789a14b3386bb56eef2201 Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Fri, 12 Dec 2014 21:19:23 +0100
Subject: arm/arm64: KVM: Require in-kernel vgic for the arch timers

It is curently possible to run a VM with architected timers support
without creating an in-kernel VGIC, which will result in interrupts from
the virtual timer going nowhere.

To address this issue, move the architected timers initialization to the
time when we run a VCPU for the first time, and then only initialize
(and enable) the architected timers if we have a properly created and
initialized in-kernel VGIC.

When injecting interrupts from the virtual timer to the vgic, the
current setup should ensure that this never calls an on-demand init of
the VGIC, which is the only call path that could return an error from
kvm_vgic_inject_irq(), so capture the return value and raise a warning
if there's an error there.

We also change the kvm_timer_init() function from returning an int to be
a void function, since the function always succeeds.

Reviewed-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'arch/arm/kvm')

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 66f37c4..2d6d910 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -425,6 +425,7 @@ static void update_vttbr(struct kvm *kvm)
 
 static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 {
+	struct kvm *kvm = vcpu->kvm;
 	int ret;
 
 	if (likely(vcpu->arch.has_run_once))
@@ -436,12 +437,20 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu)
 	 * Map the VGIC hardware resources before running a vcpu the first
 	 * time on this VM.
 	 */
-	if (unlikely(!vgic_ready(vcpu->kvm))) {
-		ret = kvm_vgic_map_resources(vcpu->kvm);
+	if (unlikely(!vgic_ready(kvm))) {
+		ret = kvm_vgic_map_resources(kvm);
 		if (ret)
 			return ret;
 	}
 
+	/*
+	 * Enable the arch timers only if we have an in-kernel VGIC
+	 * and it has been properly initialized, since we cannot handle
+	 * interrupts from the virtual timer with a userspace gic.
+	 */
+	if (irqchip_in_kernel(kvm) && vgic_initialized(kvm))
+		kvm_timer_enable(kvm);
+
 	return 0;
 }
 
-- 
cgit v1.1