summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/arm/include/asm/kvm_host.h3
-rw-r--r--arch/arm/include/uapi/asm/kvm.h2
-rw-r--r--arch/arm/kvm/arm.c3
-rw-r--r--arch/arm64/include/asm/kvm_host.h3
-rw-r--r--arch/arm64/include/uapi/asm/kvm.h2
-rw-r--r--arch/mips/Kconfig1
-rw-r--r--arch/mips/include/asm/cpu-features.h10
-rw-r--r--arch/mips/include/asm/cpu-info.h2
-rw-r--r--arch/mips/include/asm/cpu.h1
-rw-r--r--arch/mips/include/asm/kvm_host.h468
-rw-r--r--arch/mips/include/asm/maar.h10
-rw-r--r--arch/mips/include/asm/mipsregs.h62
-rw-r--r--arch/mips/include/asm/tlb.h6
-rw-r--r--arch/mips/include/uapi/asm/inst.h2
-rw-r--r--arch/mips/include/uapi/asm/kvm.h22
-rw-r--r--arch/mips/kernel/cpu-probe.c13
-rw-r--r--arch/mips/kernel/time.c1
-rw-r--r--arch/mips/kvm/Kconfig27
-rw-r--r--arch/mips/kvm/Makefile9
-rw-r--r--arch/mips/kvm/emulate.c502
-rw-r--r--arch/mips/kvm/entry.c132
-rw-r--r--arch/mips/kvm/hypcall.c53
-rw-r--r--arch/mips/kvm/interrupt.h5
-rw-r--r--arch/mips/kvm/mips.c123
-rw-r--r--arch/mips/kvm/mmu.c20
-rw-r--r--arch/mips/kvm/tlb.c441
-rw-r--r--arch/mips/kvm/trace.h74
-rw-r--r--arch/mips/kvm/trap_emul.c73
-rw-r--r--arch/mips/kvm/vz.c3223
-rw-r--r--arch/mips/mm/cache.c1
-rw-r--r--arch/mips/mm/init.c2
-rw-r--r--arch/powerpc/include/asm/disassemble.h5
-rw-r--r--arch/powerpc/include/asm/iommu.h32
-rw-r--r--arch/powerpc/include/asm/kvm_host.h35
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h22
-rw-r--r--arch/powerpc/include/asm/mmu_context.h4
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h58
-rw-r--r--arch/powerpc/include/uapi/asm/kvm.h3
-rw-r--r--arch/powerpc/kernel/iommu.c91
-rw-r--r--arch/powerpc/kvm/Kconfig1
-rw-r--r--arch/powerpc/kvm/book3s.c18
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu.c1
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_host.c7
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c315
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c303
-rw-r--r--arch/powerpc/kvm/book3s_emulate.c34
-rw-r--r--arch/powerpc/kvm/book3s_hv.c8
-rw-r--r--arch/powerpc/kvm/book3s_pr.c14
-rw-r--r--arch/powerpc/kvm/book3s_pr_papr.c2
-rw-r--r--arch/powerpc/kvm/booke.c9
-rw-r--r--arch/powerpc/kvm/e500_mmu_host.c5
-rw-r--r--arch/powerpc/kvm/emulate.c8
-rw-r--r--arch/powerpc/kvm/emulate_loadstore.c472
-rw-r--r--arch/powerpc/kvm/powerpc.c327
-rw-r--r--arch/powerpc/mm/mmu_context_iommu.c39
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c46
-rw-r--r--arch/powerpc/platforms/powernv/pci.c1
-rw-r--r--arch/powerpc/platforms/pseries/iommu.c3
-rw-r--r--arch/powerpc/platforms/pseries/vio.c2
-rw-r--r--arch/s390/include/asm/cpacf.h5
-rw-r--r--arch/s390/include/asm/elf.h1
-rw-r--r--arch/s390/include/asm/kvm_host.h42
-rw-r--r--arch/s390/include/asm/lowcore.h9
-rw-r--r--arch/s390/include/asm/nmi.h12
-rw-r--r--arch/s390/include/asm/processor.h5
-rw-r--r--arch/s390/include/asm/sclp.h1
-rw-r--r--arch/s390/include/asm/setup.h2
-rw-r--r--arch/s390/include/asm/switch_to.h3
-rw-r--r--arch/s390/include/asm/thread_info.h12
-rw-r--r--arch/s390/include/uapi/asm/Kbuild1
-rw-r--r--arch/s390/include/uapi/asm/guarded_storage.h77
-rw-r--r--arch/s390/include/uapi/asm/kvm.h29
-rw-r--r--arch/s390/include/uapi/asm/unistd.h2
-rw-r--r--arch/s390/kernel/Makefile2
-rw-r--r--arch/s390/kernel/asm-offsets.c2
-rw-r--r--arch/s390/kernel/compat_wrapper.c1
-rw-r--r--arch/s390/kernel/early.c2
-rw-r--r--arch/s390/kernel/entry.S26
-rw-r--r--arch/s390/kernel/entry.h2
-rw-r--r--arch/s390/kernel/guarded_storage.c128
-rw-r--r--arch/s390/kernel/machine_kexec.c13
-rw-r--r--arch/s390/kernel/nmi.c19
-rw-r--r--arch/s390/kernel/process.c7
-rw-r--r--arch/s390/kernel/processor.c2
-rw-r--r--arch/s390/kernel/ptrace.c86
-rw-r--r--arch/s390/kernel/setup.c18
-rw-r--r--arch/s390/kernel/smp.c43
-rw-r--r--arch/s390/kernel/syscalls.S2
-rw-r--r--arch/s390/kvm/gaccess.c6
-rw-r--r--arch/s390/kvm/intercept.c27
-rw-r--r--arch/s390/kvm/interrupt.c137
-rw-r--r--arch/s390/kvm/kvm-s390.c141
-rw-r--r--arch/s390/kvm/kvm-s390.h4
-rw-r--r--arch/s390/kvm/priv.c50
-rw-r--r--arch/s390/kvm/sthyi.c3
-rw-r--r--arch/s390/kvm/trace-s390.h52
-rw-r--r--arch/s390/kvm/vsie.c78
-rw-r--r--arch/s390/tools/gen_facilities.c1
-rw-r--r--arch/um/include/shared/os.h4
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl1
-rw-r--r--arch/x86/include/asm/cpufeatures.h1
-rw-r--r--arch/x86/include/asm/kvm_emulate.h4
-rw-r--r--arch/x86/include/asm/kvm_host.h16
-rw-r--r--arch/x86/include/asm/kvm_page_track.h1
-rw-r--r--arch/x86/include/asm/msr-index.h11
-rw-r--r--arch/x86/include/asm/processor.h2
-rw-r--r--arch/x86/include/asm/proto.h4
-rw-r--r--arch/x86/include/asm/thread_info.h6
-rw-r--r--arch/x86/include/asm/tlbflush.h10
-rw-r--r--arch/x86/include/asm/vmx.h4
-rw-r--r--arch/x86/include/uapi/asm/kvm.h3
-rw-r--r--arch/x86/include/uapi/asm/prctl.h11
-rw-r--r--arch/x86/include/uapi/asm/vmx.h25
-rw-r--r--arch/x86/kernel/cpu/intel.c40
-rw-r--r--arch/x86/kernel/kvm.c4
-rw-r--r--arch/x86/kernel/process.c151
-rw-r--r--arch/x86/kernel/process_32.c7
-rw-r--r--arch/x86/kernel/process_64.c48
-rw-r--r--arch/x86/kernel/ptrace.c8
-rw-r--r--arch/x86/kvm/Kconfig12
-rw-r--r--arch/x86/kvm/Makefile2
-rw-r--r--arch/x86/kvm/assigned-dev.c1058
-rw-r--r--arch/x86/kvm/assigned-dev.h32
-rw-r--r--arch/x86/kvm/cpuid.c3
-rw-r--r--arch/x86/kvm/cpuid.h11
-rw-r--r--arch/x86/kvm/emulate.c23
-rw-r--r--arch/x86/kvm/i8259.c75
-rw-r--r--arch/x86/kvm/ioapic.c31
-rw-r--r--arch/x86/kvm/ioapic.h16
-rw-r--r--arch/x86/kvm/iommu.c356
-rw-r--r--arch/x86/kvm/irq.c2
-rw-r--r--arch/x86/kvm/irq.h32
-rw-r--r--arch/x86/kvm/irq_comm.c45
-rw-r--r--arch/x86/kvm/mmu.c4
-rw-r--r--arch/x86/kvm/mmu.h3
-rw-r--r--arch/x86/kvm/page_track.c8
-rw-r--r--arch/x86/kvm/paging_tmpl.h54
-rw-r--r--arch/x86/kvm/svm.c17
-rw-r--r--arch/x86/kvm/vmx.c369
-rw-r--r--arch/x86/kvm/x86.c233
-rw-r--r--arch/x86/kvm/x86.h36
-rw-r--r--arch/x86/um/Makefile2
-rw-r--r--arch/x86/um/asm/ptrace.h2
-rw-r--r--arch/x86/um/os-Linux/prctl.c4
-rw-r--r--arch/x86/um/syscalls_32.c7
-rw-r--r--arch/x86/um/syscalls_64.c20
146 files changed, 8282 insertions, 2750 deletions
diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index d488b88..f0e6657 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -30,7 +30,6 @@
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
#define KVM_USER_MEM_SLOTS 32
-#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
#define KVM_HAVE_ONE_REG
#define KVM_HALT_POLL_NS_DEFAULT 500000
@@ -45,7 +44,7 @@
#define KVM_MAX_VCPUS VGIC_V2_MAX_CPUS
#endif
-#define KVM_REQ_VCPU_EXIT 8
+#define KVM_REQ_VCPU_EXIT (8 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
int __attribute_const__ kvm_target_cpu(void);
diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h
index a5838d6..a887263 100644
--- a/arch/arm/include/uapi/asm/kvm.h
+++ b/arch/arm/include/uapi/asm/kvm.h
@@ -27,6 +27,8 @@
#define __KVM_HAVE_IRQ_LINE
#define __KVM_HAVE_READONLY_MEM
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+
#define KVM_REG_SIZE(id) \
(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 7941699..8966e59 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -208,9 +208,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_IMMEDIATE_EXIT:
r = 1;
break;
- case KVM_CAP_COALESCED_MMIO:
- r = KVM_COALESCED_MMIO_PAGE_OFFSET;
- break;
case KVM_CAP_ARM_SET_DEVICE_ADDR:
r = 1;
break;
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 578df18..5e19165 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -31,7 +31,6 @@
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
#define KVM_USER_MEM_SLOTS 512
-#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
#define KVM_HALT_POLL_NS_DEFAULT 500000
#include <kvm/arm_vgic.h>
@@ -42,7 +41,7 @@
#define KVM_VCPU_MAX_FEATURES 4
-#define KVM_REQ_VCPU_EXIT 8
+#define KVM_REQ_VCPU_EXIT (8 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
int __attribute_const__ kvm_target_cpu(void);
int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h
index cd6bea4..869ee48 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -39,6 +39,8 @@
#define __KVM_HAVE_IRQ_LINE
#define __KVM_HAVE_READONLY_MEM
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+
#define KVM_REG_SIZE(id) \
(1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index a008a9f..0a4adbc 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1687,6 +1687,7 @@ config CPU_CAVIUM_OCTEON
select USB_EHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
select USB_OHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
select MIPS_L1_CACHE_SHIFT_7
+ select HAVE_KVM
help
The Cavium Octeon processor is a highly integrated chip containing
many ethernet hardware widgets for networking tasks. The processor
diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h
index e961c8a..494d382 100644
--- a/arch/mips/include/asm/cpu-features.h
+++ b/arch/mips/include/asm/cpu-features.h
@@ -444,6 +444,10 @@
# define cpu_has_msa 0
#endif
+#ifndef cpu_has_ufr
+# define cpu_has_ufr (cpu_data[0].options & MIPS_CPU_UFR)
+#endif
+
#ifndef cpu_has_fre
# define cpu_has_fre (cpu_data[0].options & MIPS_CPU_FRE)
#endif
@@ -528,6 +532,9 @@
#ifndef cpu_guest_has_htw
#define cpu_guest_has_htw (cpu_data[0].guest.options & MIPS_CPU_HTW)
#endif
+#ifndef cpu_guest_has_mvh
+#define cpu_guest_has_mvh (cpu_data[0].guest.options & MIPS_CPU_MVH)
+#endif
#ifndef cpu_guest_has_msa
#define cpu_guest_has_msa (cpu_data[0].guest.ases & MIPS_ASE_MSA)
#endif
@@ -543,6 +550,9 @@
#ifndef cpu_guest_has_maar
#define cpu_guest_has_maar (cpu_data[0].guest.options & MIPS_CPU_MAAR)
#endif
+#ifndef cpu_guest_has_userlocal
+#define cpu_guest_has_userlocal (cpu_data[0].guest.options & MIPS_CPU_ULRI)
+#endif
/*
* Guest dynamic capabilities
diff --git a/arch/mips/include/asm/cpu-info.h b/arch/mips/include/asm/cpu-info.h
index edbe273..be3b4c2 100644
--- a/arch/mips/include/asm/cpu-info.h
+++ b/arch/mips/include/asm/cpu-info.h
@@ -33,6 +33,7 @@ struct guest_info {
unsigned long ases_dyn;
unsigned long long options;
unsigned long long options_dyn;
+ int tlbsize;
u8 conf;
u8 kscratch_mask;
};
@@ -109,6 +110,7 @@ struct cpuinfo_mips {
struct guest_info guest;
unsigned int gtoffset_mask;
unsigned int guestid_mask;
+ unsigned int guestid_cache;
} __attribute__((aligned(SMP_CACHE_BYTES)));
extern struct cpuinfo_mips cpu_data[];
diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h
index 9a83724..98f5930 100644
--- a/arch/mips/include/asm/cpu.h
+++ b/arch/mips/include/asm/cpu.h
@@ -415,6 +415,7 @@ enum cpu_type_enum {
#define MIPS_CPU_GUESTCTL2 MBIT_ULL(50) /* CPU has VZ GuestCtl2 register */
#define MIPS_CPU_GUESTID MBIT_ULL(51) /* CPU uses VZ ASE GuestID feature */
#define MIPS_CPU_DRG MBIT_ULL(52) /* CPU has VZ Direct Root to Guest (DRG) */
+#define MIPS_CPU_UFR MBIT_ULL(53) /* CPU supports User mode FR switching */
/*
* CPU ASE encodings
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 05e785f..2998479 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -10,6 +10,7 @@
#ifndef __MIPS_KVM_HOST_H__
#define __MIPS_KVM_HOST_H__
+#include <linux/cpumask.h>
#include <linux/mutex.h>
#include <linux/hrtimer.h>
#include <linux/interrupt.h>
@@ -33,12 +34,23 @@
#define KVM_REG_MIPS_CP0_ENTRYLO0 MIPS_CP0_64(2, 0)
#define KVM_REG_MIPS_CP0_ENTRYLO1 MIPS_CP0_64(3, 0)
#define KVM_REG_MIPS_CP0_CONTEXT MIPS_CP0_64(4, 0)
+#define KVM_REG_MIPS_CP0_CONTEXTCONFIG MIPS_CP0_32(4, 1)
#define KVM_REG_MIPS_CP0_USERLOCAL MIPS_CP0_64(4, 2)
+#define KVM_REG_MIPS_CP0_XCONTEXTCONFIG MIPS_CP0_64(4, 3)
#define KVM_REG_MIPS_CP0_PAGEMASK MIPS_CP0_32(5, 0)
#define KVM_REG_MIPS_CP0_PAGEGRAIN MIPS_CP0_32(5, 1)
+#define KVM_REG_MIPS_CP0_SEGCTL0 MIPS_CP0_64(5, 2)
+#define KVM_REG_MIPS_CP0_SEGCTL1 MIPS_CP0_64(5, 3)
+#define KVM_REG_MIPS_CP0_SEGCTL2 MIPS_CP0_64(5, 4)
+#define KVM_REG_MIPS_CP0_PWBASE MIPS_CP0_64(5, 5)
+#define KVM_REG_MIPS_CP0_PWFIELD MIPS_CP0_64(5, 6)
+#define KVM_REG_MIPS_CP0_PWSIZE MIPS_CP0_64(5, 7)
#define KVM_REG_MIPS_CP0_WIRED MIPS_CP0_32(6, 0)
+#define KVM_REG_MIPS_CP0_PWCTL MIPS_CP0_32(6, 6)
#define KVM_REG_MIPS_CP0_HWRENA MIPS_CP0_32(7, 0)
#define KVM_REG_MIPS_CP0_BADVADDR MIPS_CP0_64(8, 0)
+#define KVM_REG_MIPS_CP0_BADINSTR MIPS_CP0_32(8, 1)
+#define KVM_REG_MIPS_CP0_BADINSTRP MIPS_CP0_32(8, 2)
#define KVM_REG_MIPS_CP0_COUNT MIPS_CP0_32(9, 0)
#define KVM_REG_MIPS_CP0_ENTRYHI MIPS_CP0_64(10, 0)
#define KVM_REG_MIPS_CP0_COMPARE MIPS_CP0_32(11, 0)
@@ -55,6 +67,7 @@
#define KVM_REG_MIPS_CP0_CONFIG4 MIPS_CP0_32(16, 4)
#define KVM_REG_MIPS_CP0_CONFIG5 MIPS_CP0_32(16, 5)
#define KVM_REG_MIPS_CP0_CONFIG7 MIPS_CP0_32(16, 7)
+#define KVM_REG_MIPS_CP0_MAARI MIPS_CP0_64(17, 2)
#define KVM_REG_MIPS_CP0_XCONTEXT MIPS_CP0_64(20, 0)
#define KVM_REG_MIPS_CP0_ERROREPC MIPS_CP0_64(30, 0)
#define KVM_REG_MIPS_CP0_KSCRATCH1 MIPS_CP0_64(31, 2)
@@ -70,9 +83,13 @@
/* memory slots that does not exposed to userspace */
#define KVM_PRIVATE_MEM_SLOTS 0
-#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
#define KVM_HALT_POLL_NS_DEFAULT 500000
+#ifdef CONFIG_KVM_MIPS_VZ
+extern unsigned long GUESTID_MASK;
+extern unsigned long GUESTID_FIRST_VERSION;
+extern unsigned long GUESTID_VERSION_MASK;
+#endif
/*
@@ -145,6 +162,16 @@ struct kvm_vcpu_stat {
u64 fpe_exits;
u64 msa_disabled_exits;
u64 flush_dcache_exits;
+#ifdef CONFIG_KVM_MIPS_VZ
+ u64 vz_gpsi_exits;
+ u64 vz_gsfc_exits;
+ u64 vz_hc_exits;
+ u64 vz_grr_exits;
+ u64 vz_gva_exits;
+ u64 vz_ghfc_exits;
+ u64 vz_gpa_exits;
+ u64 vz_resvd_exits;
+#endif
u64 halt_successful_poll;
u64 halt_attempted_poll;
u64 halt_poll_invalid;
@@ -157,6 +184,8 @@ struct kvm_arch_memory_slot {
struct kvm_arch {
/* Guest physical mm */
struct mm_struct gpa_mm;
+ /* Mask of CPUs needing GPA ASID flush */
+ cpumask_t asid_flush_mask;
};
#define N_MIPS_COPROC_REGS 32
@@ -214,6 +243,11 @@ struct mips_coproc {
#define MIPS_CP0_CONFIG4_SEL 4
#define MIPS_CP0_CONFIG5_SEL 5
+#define MIPS_CP0_GUESTCTL2 10
+#define MIPS_CP0_GUESTCTL2_SEL 5
+#define MIPS_CP0_GTOFFSET 12
+#define MIPS_CP0_GTOFFSET_SEL 7
+
/* Resume Flags */
#define RESUME_FLAG_DR (1<<0) /* Reload guest nonvolatile state? */
#define RESUME_FLAG_HOST (1<<1) /* Resume host? */
@@ -229,6 +263,7 @@ enum emulation_result {
EMULATE_WAIT, /* WAIT instruction */
EMULATE_PRIV_FAIL,
EMULATE_EXCEPT, /* A guest exception has been generated */
+ EMULATE_HYPERCALL, /* HYPCALL instruction */
};
#define mips3_paddr_to_tlbpfn(x) \
@@ -276,13 +311,18 @@ struct kvm_mmu_memory_cache {
struct kvm_vcpu_arch {
void *guest_ebase;
int (*vcpu_run)(struct kvm_run *run, struct kvm_vcpu *vcpu);
+
+ /* Host registers preserved across guest mode execution */
unsigned long host_stack;
unsigned long host_gp;
+ unsigned long host_pgd;
+ unsigned long host_entryhi;
/* Host CP0 registers used when handling exits from guest */
unsigned long host_cp0_badvaddr;
unsigned long host_cp0_epc;
u32 host_cp0_cause;
+ u32 host_cp0_guestctl0;
u32 host_cp0_badinstr;
u32 host_cp0_badinstrp;
@@ -340,7 +380,23 @@ struct kvm_vcpu_arch {
/* Cache some mmu pages needed inside spinlock regions */
struct kvm_mmu_memory_cache mmu_page_cache;
+#ifdef CONFIG_KVM_MIPS_VZ
+ /* vcpu's vzguestid is different on each host cpu in an smp system */
+ u32 vzguestid[NR_CPUS];
+
+ /* wired guest TLB entries */
+ struct kvm_mips_tlb *wired_tlb;
+ unsigned int wired_tlb_limit;
+ unsigned int wired_tlb_used;
+
+ /* emulated guest MAAR registers */
+ unsigned long maar[6];
+#endif
+
+ /* Last CPU the VCPU state was loaded on */
int last_sched_cpu;
+ /* Last CPU the VCPU actually executed guest code on */
+ int last_exec_cpu;
/* WAIT executed */
int wait;
@@ -349,78 +405,6 @@ struct kvm_vcpu_arch {
u8 msa_enabled;
};
-
-#define kvm_read_c0_guest_index(cop0) (cop0->reg[MIPS_CP0_TLB_INDEX][0])
-#define kvm_write_c0_guest_index(cop0, val) (cop0->reg[MIPS_CP0_TLB_INDEX][0] = val)
-#define kvm_read_c0_guest_entrylo0(cop0) (cop0->reg[MIPS_CP0_TLB_LO0][0])
-#define kvm_write_c0_guest_entrylo0(cop0, val) (cop0->reg[MIPS_CP0_TLB_LO0][0] = (val))
-#define kvm_read_c0_guest_entrylo1(cop0) (cop0->reg[MIPS_CP0_TLB_LO1][0])
-#define kvm_write_c0_guest_entrylo1(cop0, val) (cop0->reg[MIPS_CP0_TLB_LO1][0] = (val))
-#define kvm_read_c0_guest_context(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0])
-#define kvm_write_c0_guest_context(cop0, val) (cop0->reg[MIPS_CP0_TLB_CONTEXT][0] = (val))
-#define kvm_read_c0_guest_userlocal(cop0) (cop0->reg[MIPS_CP0_TLB_CONTEXT][2])
-#define kvm_write_c0_guest_userlocal(cop0, val) (cop0->reg[MIPS_CP0_TLB_CONTEXT][2] = (val))
-#define kvm_read_c0_guest_pagemask(cop0) (cop0->reg[MIPS_CP0_TLB_PG_MASK][0])
-#define kvm_write_c0_guest_pagemask(cop0, val) (cop0->reg[MIPS_CP0_TLB_PG_MASK][0] = (val))
-#define kvm_read_c0_guest_wired(cop0) (cop0->reg[MIPS_CP0_TLB_WIRED][0])
-#define kvm_write_c0_guest_wired(cop0, val) (cop0->reg[MIPS_CP0_TLB_WIRED][0] = (val))
-#define kvm_read_c0_guest_hwrena(cop0) (cop0->reg[MIPS_CP0_HWRENA][0])
-#define kvm_write_c0_guest_hwrena(cop0, val) (cop0->reg[MIPS_CP0_HWRENA][0] = (val))
-#define kvm_read_c0_guest_badvaddr(cop0) (cop0->reg[MIPS_CP0_BAD_VADDR][0])
-#define kvm_write_c0_guest_badvaddr(cop0, val) (cop0->reg[MIPS_CP0_BAD_VADDR][0] = (val))
-#define kvm_read_c0_guest_count(cop0) (cop0->reg[MIPS_CP0_COUNT][0])
-#define kvm_write_c0_guest_count(cop0, val) (cop0->reg[MIPS_CP0_COUNT][0] = (val))
-#define kvm_read_c0_guest_entryhi(cop0) (cop0->reg[MIPS_CP0_TLB_HI][0])
-#define kvm_write_c0_guest_entryhi(cop0, val) (cop0->reg[MIPS_CP0_TLB_HI][0] = (val))
-#define kvm_read_c0_guest_compare(cop0) (cop0->reg[MIPS_CP0_COMPARE][0])
-#define kvm_write_c0_guest_compare(cop0, val) (cop0->reg[MIPS_CP0_COMPARE][0] = (val))
-#define kvm_read_c0_guest_status(cop0) (cop0->reg[MIPS_CP0_STATUS][0])
-#define kvm_write_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] = (val))
-#define kvm_read_c0_guest_intctl(cop0) (cop0->reg[MIPS_CP0_STATUS][1])
-#define kvm_write_c0_guest_intctl(cop0, val) (cop0->reg[MIPS_CP0_STATUS][1] = (val))
-#define kvm_read_c0_guest_cause(cop0) (cop0->reg[MIPS_CP0_CAUSE][0])
-#define kvm_write_c0_guest_cause(cop0, val) (cop0->reg[MIPS_CP0_CAUSE][0] = (val))
-#define kvm_read_c0_guest_epc(cop0) (cop0->reg[MIPS_CP0_EXC_PC][0])
-#define kvm_write_c0_guest_epc(cop0, val) (cop0->reg[MIPS_CP0_EXC_PC][0] = (val))
-#define kvm_read_c0_guest_prid(cop0) (cop0->reg[MIPS_CP0_PRID][0])
-#define kvm_write_c0_guest_prid(cop0, val) (cop0->reg[MIPS_CP0_PRID][0] = (val))
-#define kvm_read_c0_guest_ebase(cop0) (cop0->reg[MIPS_CP0_PRID][1])
-#define kvm_write_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] = (val))
-#define kvm_read_c0_guest_config(cop0) (cop0->reg[MIPS_CP0_CONFIG][0])
-#define kvm_read_c0_guest_config1(cop0) (cop0->reg[MIPS_CP0_CONFIG][1])
-#define kvm_read_c0_guest_config2(cop0) (cop0->reg[MIPS_CP0_CONFIG][2])
-#define kvm_read_c0_guest_config3(cop0) (cop0->reg[MIPS_CP0_CONFIG][3])
-#define kvm_read_c0_guest_config4(cop0) (cop0->reg[MIPS_CP0_CONFIG][4])
-#define kvm_read_c0_guest_config5(cop0) (cop0->reg[MIPS_CP0_CONFIG][5])
-#define kvm_read_c0_guest_config7(cop0) (cop0->reg[MIPS_CP0_CONFIG][7])
-#define kvm_write_c0_guest_config(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][0] = (val))
-#define kvm_write_c0_guest_config1(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][1] = (val))
-#define kvm_write_c0_guest_config2(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][2] = (val))
-#define kvm_write_c0_guest_config3(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][3] = (val))
-#define kvm_write_c0_guest_config4(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][4] = (val))
-#define kvm_write_c0_guest_config5(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][5] = (val))
-#define kvm_write_c0_guest_config7(cop0, val) (cop0->reg[MIPS_CP0_CONFIG][7] = (val))
-#define kvm_read_c0_guest_errorepc(cop0) (cop0->reg[MIPS_CP0_ERROR_PC][0])
-#define kvm_write_c0_guest_errorepc(cop0, val) (cop0->reg[MIPS_CP0_ERROR_PC][0] = (val))
-#define kvm_read_c0_guest_kscratch1(cop0) (cop0->reg[MIPS_CP0_DESAVE][2])
-#define kvm_read_c0_guest_kscratch2(cop0) (cop0->reg[MIPS_CP0_DESAVE][3])
-#define kvm_read_c0_guest_kscratch3(cop0) (cop0->reg[MIPS_CP0_DESAVE][4])
-#define kvm_read_c0_guest_kscratch4(cop0) (cop0->reg[MIPS_CP0_DESAVE][5])
-#define kvm_read_c0_guest_kscratch5(cop0) (cop0->reg[MIPS_CP0_DESAVE][6])
-#define kvm_read_c0_guest_kscratch6(cop0) (cop0->reg[MIPS_CP0_DESAVE][7])
-#define kvm_write_c0_guest_kscratch1(cop0, val) (cop0->reg[MIPS_CP0_DESAVE][2] = (val))
-#define kvm_write_c0_guest_kscratch2(cop0, val) (cop0->reg[MIPS_CP0_DESAVE][3] = (val))
-#define kvm_write_c0_guest_kscratch3(cop0, val) (cop0->reg[MIPS_CP0_DESAVE][4] = (val))
-#define kvm_write_c0_guest_kscratch4(cop0, val) (cop0->reg[MIPS_CP0_DESAVE][5] = (val))
-#define kvm_write_c0_guest_kscratch5(cop0, val) (cop0->reg[MIPS_CP0_DESAVE][6] = (val))
-#define kvm_write_c0_guest_kscratch6(cop0, val) (cop0->reg[MIPS_CP0_DESAVE][7] = (val))
-
-/*
- * Some of the guest registers may be modified asynchronously (e.g. from a
- * hrtimer callback in hard irq context) and therefore need stronger atomicity
- * guarantees than other registers.
- */
-
static inline void _kvm_atomic_set_c0_guest_reg(unsigned long *reg,
unsigned long val)
{
@@ -471,26 +455,286 @@ static inline void _kvm_atomic_change_c0_guest_reg(unsigned long *reg,
} while (unlikely(!temp));
}
-#define kvm_set_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] |= (val))
-#define kvm_clear_c0_guest_status(cop0, val) (cop0->reg[MIPS_CP0_STATUS][0] &= ~(val))
+/* Guest register types, used in accessor build below */
+#define __KVMT32 u32
+#define __KVMTl unsigned long
-/* Cause can be modified asynchronously from hardirq hrtimer callback */
-#define kvm_set_c0_guest_cause(cop0, val) \
- _kvm_atomic_set_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0], val)
-#define kvm_clear_c0_guest_cause(cop0, val) \
- _kvm_atomic_clear_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0], val)
-#define kvm_change_c0_guest_cause(cop0, change, val) \
- _kvm_atomic_change_c0_guest_reg(&cop0->reg[MIPS_CP0_CAUSE][0], \
- change, val)
-
-#define kvm_set_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] |= (val))
-#define kvm_clear_c0_guest_ebase(cop0, val) (cop0->reg[MIPS_CP0_PRID][1] &= ~(val))
-#define kvm_change_c0_guest_ebase(cop0, change, val) \
+/*
+ * __BUILD_KVM_$ops_SAVED(): kvm_$op_sw_gc0_$reg()
+ * These operate on the saved guest C0 state in RAM.
+ */
+
+/* Generate saved context simple accessors */
+#define __BUILD_KVM_RW_SAVED(name, type, _reg, sel) \
+static inline __KVMT##type kvm_read_sw_gc0_##name(struct mips_coproc *cop0) \
+{ \
+ return cop0->reg[(_reg)][(sel)]; \
+} \
+static inline void kvm_write_sw_gc0_##name(struct mips_coproc *cop0, \
+ __KVMT##type val) \
+{ \
+ cop0->reg[(_reg)][(sel)] = val; \
+}
+
+/* Generate saved context bitwise modifiers */
+#define __BUILD_KVM_SET_SAVED(name, type, _reg, sel) \
+static inline void kvm_set_sw_gc0_##name(struct mips_coproc *cop0, \
+ __KVMT##type val) \
+{ \
+ cop0->reg[(_reg)][(sel)] |= val; \
+} \
+static inline void kvm_clear_sw_gc0_##name(struct mips_coproc *cop0, \
+ __KVMT##type val) \
+{ \
+ cop0->reg[(_reg)][(sel)] &= ~val; \
+} \
+static inline void kvm_change_sw_gc0_##name(struct mips_coproc *cop0, \
+ __KVMT##type mask, \
+ __KVMT##type val) \
+{ \
+ unsigned long _mask = mask; \
+ cop0->reg[(_reg)][(sel)] &= ~_mask; \
+ cop0->reg[(_reg)][(sel)] |= val & _mask; \
+}
+
+/* Generate saved context atomic bitwise modifiers */
+#define __BUILD_KVM_ATOMIC_SAVED(name, type, _reg, sel) \
+static inline void kvm_set_sw_gc0_##name(struct mips_coproc *cop0, \
+ __KVMT##type val) \
+{ \
+ _kvm_atomic_set_c0_guest_reg(&cop0->reg[(_reg)][(sel)], val); \
+} \
+static inline void kvm_clear_sw_gc0_##name(struct mips_coproc *cop0, \
+ __KVMT##type val) \
+{ \
+ _kvm_atomic_clear_c0_guest_reg(&cop0->reg[(_reg)][(sel)], val); \
+} \
+static inline void kvm_change_sw_gc0_##name(struct mips_coproc *cop0, \
+ __KVMT##type mask, \
+ __KVMT##type val) \
+{ \
+ _kvm_atomic_change_c0_guest_reg(&cop0->reg[(_reg)][(sel)], mask, \
+ val); \
+}
+
+/*
+ * __BUILD_KVM_$ops_VZ(): kvm_$op_vz_gc0_$reg()
+ * These operate on the VZ guest C0 context in hardware.
+ */
+
+/* Generate VZ guest context simple accessors */
+#define __BUILD_KVM_RW_VZ(name, type, _reg, sel) \
+static inline __KVMT##type kvm_read_vz_gc0_##name(struct mips_coproc *cop0) \
+{ \
+ return read_gc0_##name(); \
+} \
+static inline void kvm_write_vz_gc0_##name(struct mips_coproc *cop0, \
+ __KVMT##type val) \
+{ \
+ write_gc0_##name(val); \
+}
+
+/* Generate VZ guest context bitwise modifiers */
+#define __BUILD_KVM_SET_VZ(name, type, _reg, sel) \
+static inline void kvm_set_vz_gc0_##name(struct mips_coproc *cop0, \
+ __KVMT##type val) \
+{ \
+ set_gc0_##name(val); \
+} \
+static inline void kvm_clear_vz_gc0_##name(struct mips_coproc *cop0, \
+ __KVMT##type val) \
+{ \
+ clear_gc0_##name(val); \
+} \
+static inline void kvm_change_vz_gc0_##name(struct mips_coproc *cop0, \
+ __KVMT##type mask, \
+ __KVMT##type val) \
+{ \
+ change_gc0_##name(mask, val); \
+}
+
+/* Generate VZ guest context save/restore to/from saved context */
+#define __BUILD_KVM_SAVE_VZ(name, _reg, sel) \
+static inline void kvm_restore_gc0_##name(struct mips_coproc *cop0) \
+{ \
+ write_gc0_##name(cop0->reg[(_reg)][(sel)]); \
+} \
+static inline void kvm_save_gc0_##name(struct mips_coproc *cop0) \
+{ \
+ cop0->reg[(_reg)][(sel)] = read_gc0_##name(); \
+}
+
+/*
+ * __BUILD_KVM_$ops_WRAP(): kvm_$op_$name1() -> kvm_$op_$name2()
+ * These wrap a set of operations to provide them with a different name.
+ */
+
+/* Generate simple accessor wrapper */
+#define __BUILD_KVM_RW_WRAP(name1, name2, type) \
+static inline __KVMT##type kvm_read_##name1(struct mips_coproc *cop0) \
+{ \
+ return kvm_read_##name2(cop0); \
+} \
+static inline void kvm_write_##name1(struct mips_coproc *cop0, \
+ __KVMT##type val) \
+{ \
+ kvm_write_##name2(cop0, val); \
+}
+
+/* Generate bitwise modifier wrapper */
+#define __BUILD_KVM_SET_WRAP(name1, name2, type) \
+static inline void kvm_set_##name1(struct mips_coproc *cop0, \
+ __KVMT##type val) \
{ \
- kvm_clear_c0_guest_ebase(cop0, change); \
- kvm_set_c0_guest_ebase(cop0, ((val) & (change))); \
+ kvm_set_##name2(cop0, val); \
+} \
+static inline void kvm_clear_##name1(struct mips_coproc *cop0, \
+ __KVMT##type val) \
+{ \
+ kvm_clear_##name2(cop0, val); \
+} \
+static inline void kvm_change_##name1(struct mips_coproc *cop0, \
+ __KVMT##type mask, \
+ __KVMT##type val) \
+{ \
+ kvm_change_##name2(cop0, mask, val); \
}
+/*
+ * __BUILD_KVM_$ops_SW(): kvm_$op_c0_guest_$reg() -> kvm_$op_sw_gc0_$reg()
+ * These generate accessors operating on the saved context in RAM, and wrap them
+ * with the common guest C0 accessors (for use by common emulation code).
+ */
+
+#define __BUILD_KVM_RW_SW(name, type, _reg, sel) \
+ __BUILD_KVM_RW_SAVED(name, type, _reg, sel) \
+ __BUILD_KVM_RW_WRAP(c0_guest_##name, sw_gc0_##name, type)
+
+#define __BUILD_KVM_SET_SW(name, type, _reg, sel) \
+ __BUILD_KVM_SET_SAVED(name, type, _reg, sel) \
+ __BUILD_KVM_SET_WRAP(c0_guest_##name, sw_gc0_##name, type)
+
+#define __BUILD_KVM_ATOMIC_SW(name, type, _reg, sel) \
+ __BUILD_KVM_ATOMIC_SAVED(name, type, _reg, sel) \
+ __BUILD_KVM_SET_WRAP(c0_guest_##name, sw_gc0_##name, type)
+
+#ifndef CONFIG_KVM_MIPS_VZ
+
+/*
+ * T&E (trap & emulate software based virtualisation)
+ * We generate the common accessors operating exclusively on the saved context
+ * in RAM.
+ */
+
+#define __BUILD_KVM_RW_HW __BUILD_KVM_RW_SW
+#define __BUILD_KVM_SET_HW __BUILD_KVM_SET_SW
+#define __BUILD_KVM_ATOMIC_HW __BUILD_KVM_ATOMIC_SW
+
+#else
+
+/*
+ * VZ (hardware assisted virtualisation)
+ * These macros use the active guest state in VZ mode (hardware registers),
+ */
+
+/*
+ * __BUILD_KVM_$ops_HW(): kvm_$op_c0_guest_$reg() -> kvm_$op_vz_gc0_$reg()
+ * These generate accessors operating on the VZ guest context in hardware, and
+ * wrap them with the common guest C0 accessors (for use by common emulation
+ * code).
+ *
+ * Accessors operating on the saved context in RAM are also generated to allow
+ * convenient explicit saving and restoring of the state.
+ */
+
+#define __BUILD_KVM_RW_HW(name, type, _reg, sel) \
+ __BUILD_KVM_RW_SAVED(name, type, _reg, sel) \
+ __BUILD_KVM_RW_VZ(name, type, _reg, sel) \
+ __BUILD_KVM_RW_WRAP(c0_guest_##name, vz_gc0_##name, type) \
+ __BUILD_KVM_SAVE_VZ(name, _reg, sel)
+
+#define __BUILD_KVM_SET_HW(name, type, _reg, sel) \
+ __BUILD_KVM_SET_SAVED(name, type, _reg, sel) \
+ __BUILD_KVM_SET_VZ(name, type, _reg, sel) \
+ __BUILD_KVM_SET_WRAP(c0_guest_##name, vz_gc0_##name, type)
+
+/*
+ * We can't do atomic modifications of COP0 state if hardware can modify it.
+ * Races must be handled explicitly.
+ */
+#define __BUILD_KVM_ATOMIC_HW __BUILD_KVM_SET_HW
+
+#endif
+
+/*
+ * Define accessors for CP0 registers that are accessible to the guest. These
+ * are primarily used by common emulation code, which may need to access the
+ * registers differently depending on the implementation.
+ *
+ * fns_hw/sw name type reg num select
+ */
+__BUILD_KVM_RW_HW(index, 32, MIPS_CP0_TLB_INDEX, 0)
+__BUILD_KVM_RW_HW(entrylo0, l, MIPS_CP0_TLB_LO0, 0)
+__BUILD_KVM_RW_HW(entrylo1, l, MIPS_CP0_TLB_LO1, 0)
+__BUILD_KVM_RW_HW(context, l, MIPS_CP0_TLB_CONTEXT, 0)
+__BUILD_KVM_RW_HW(contextconfig, 32, MIPS_CP0_TLB_CONTEXT, 1)
+__BUILD_KVM_RW_HW(userlocal, l, MIPS_CP0_TLB_CONTEXT, 2)
+__BUILD_KVM_RW_HW(xcontextconfig, l, MIPS_CP0_TLB_CONTEXT, 3)
+__BUILD_KVM_RW_HW(pagemask, l, MIPS_CP0_TLB_PG_MASK, 0)
+__BUILD_KVM_RW_HW(pagegrain, 32, MIPS_CP0_TLB_PG_MASK, 1)
+__BUILD_KVM_RW_HW(segctl0, l, MIPS_CP0_TLB_PG_MASK, 2)
+__BUILD_KVM_RW_HW(segctl1, l, MIPS_CP0_TLB_PG_MASK, 3)
+__BUILD_KVM_RW_HW(segctl2, l, MIPS_CP0_TLB_PG_MASK, 4)
+__BUILD_KVM_RW_HW(pwbase, l, MIPS_CP0_TLB_PG_MASK, 5)
+__BUILD_KVM_RW_HW(pwfield, l, MIPS_CP0_TLB_PG_MASK, 6)
+__BUILD_KVM_RW_HW(pwsize, l, MIPS_CP0_TLB_PG_MASK, 7)
+__BUILD_KVM_RW_HW(wired, 32, MIPS_CP0_TLB_WIRED, 0)
+__BUILD_KVM_RW_HW(pwctl, 32, MIPS_CP0_TLB_WIRED, 6)
+__BUILD_KVM_RW_HW(hwrena, 32, MIPS_CP0_HWRENA, 0)
+__BUILD_KVM_RW_HW(badvaddr, l, MIPS_CP0_BAD_VADDR, 0)
+__BUILD_KVM_RW_HW(badinstr, 32, MIPS_CP0_BAD_VADDR, 1)
+__BUILD_KVM_RW_HW(badinstrp, 32, MIPS_CP0_BAD_VADDR, 2)
+__BUILD_KVM_RW_SW(count, 32, MIPS_CP0_COUNT, 0)
+__BUILD_KVM_RW_HW(entryhi, l, MIPS_CP0_TLB_HI, 0)
+__BUILD_KVM_RW_HW(compare, 32, MIPS_CP0_COMPARE, 0)
+__BUILD_KVM_RW_HW(status, 32, MIPS_CP0_STATUS, 0)
+__BUILD_KVM_RW_HW(intctl, 32, MIPS_CP0_STATUS, 1)
+__BUILD_KVM_RW_HW(cause, 32, MIPS_CP0_CAUSE, 0)
+__BUILD_KVM_RW_HW(epc, l, MIPS_CP0_EXC_PC, 0)
+__BUILD_KVM_RW_SW(prid, 32, MIPS_CP0_PRID, 0)
+__BUILD_KVM_RW_HW(ebase, l, MIPS_CP0_PRID, 1)
+__BUILD_KVM_RW_HW(config, 32, MIPS_CP0_CONFIG, 0)
+__BUILD_KVM_RW_HW(config1, 32, MIPS_CP0_CONFIG, 1)
+__BUILD_KVM_RW_HW(config2, 32, MIPS_CP0_CONFIG, 2)
+__BUILD_KVM_RW_HW(config3, 32, MIPS_CP0_CONFIG, 3)
+__BUILD_KVM_RW_HW(config4, 32, MIPS_CP0_CONFIG, 4)
+__BUILD_KVM_RW_HW(config5, 32, MIPS_CP0_CONFIG, 5)
+__BUILD_KVM_RW_HW(config6, 32, MIPS_CP0_CONFIG, 6)
+__BUILD_KVM_RW_HW(config7, 32, MIPS_CP0_CONFIG, 7)
+__BUILD_KVM_RW_SW(maari, l, MIPS_CP0_LLADDR, 2)
+__BUILD_KVM_RW_HW(xcontext, l, MIPS_CP0_TLB_XCONTEXT, 0)
+__BUILD_KVM_RW_HW(errorepc, l, MIPS_CP0_ERROR_PC, 0)
+__BUILD_KVM_RW_HW(kscratch1, l, MIPS_CP0_DESAVE, 2)
+__BUILD_KVM_RW_HW(kscratch2, l, MIPS_CP0_DESAVE, 3)
+__BUILD_KVM_RW_HW(kscratch3, l, MIPS_CP0_DESAVE, 4)
+__BUILD_KVM_RW_HW(kscratch4, l, MIPS_CP0_DESAVE, 5)
+__BUILD_KVM_RW_HW(kscratch5, l, MIPS_CP0_DESAVE, 6)
+__BUILD_KVM_RW_HW(kscratch6, l, MIPS_CP0_DESAVE, 7)
+
+/* Bitwise operations (on HW state) */
+__BUILD_KVM_SET_HW(status, 32, MIPS_CP0_STATUS, 0)
+/* Cause can be modified asynchronously from hardirq hrtimer callback */
+__BUILD_KVM_ATOMIC_HW(cause, 32, MIPS_CP0_CAUSE, 0)
+__BUILD_KVM_SET_HW(ebase, l, MIPS_CP0_PRID, 1)
+
+/* Bitwise operations (on saved state) */
+__BUILD_KVM_SET_SAVED(config, 32, MIPS_CP0_CONFIG, 0)
+__BUILD_KVM_SET_SAVED(config1, 32, MIPS_CP0_CONFIG, 1)
+__BUILD_KVM_SET_SAVED(config2, 32, MIPS_CP0_CONFIG, 2)
+__BUILD_KVM_SET_SAVED(config3, 32, MIPS_CP0_CONFIG, 3)
+__BUILD_KVM_SET_SAVED(config4, 32, MIPS_CP0_CONFIG, 4)
+__BUILD_KVM_SET_SAVED(config5, 32, MIPS_CP0_CONFIG, 5)
+
/* Helpers */
static inline bool kvm_mips_guest_can_have_fpu(struct kvm_vcpu_arch *vcpu)
@@ -531,6 +775,10 @@ struct kvm_mips_callbacks {
int (*handle_msa_fpe)(struct kvm_vcpu *vcpu);
int (*handle_fpe)(struct kvm_vcpu *vcpu);
int (*handle_msa_disabled)(struct kvm_vcpu *vcpu);
+ int (*handle_guest_exit)(struct kvm_vcpu *vcpu);
+ int (*hardware_enable)(void);
+ void (*hardware_disable)(void);
+ int (*check_extension)(struct kvm *kvm, long ext);
int (*vcpu_init)(struct kvm_vcpu *vcpu);
void (*vcpu_uninit)(struct kvm_vcpu *vcpu);
int (*vcpu_setup)(struct kvm_vcpu *vcpu);
@@ -599,6 +847,10 @@ u32 kvm_get_user_asid(struct kvm_vcpu *vcpu);
u32 kvm_get_commpage_asid (struct kvm_vcpu *vcpu);
+#ifdef CONFIG_KVM_MIPS_VZ
+int kvm_mips_handle_vz_root_tlb_fault(unsigned long badvaddr,
+ struct kvm_vcpu *vcpu, bool write_fault);
+#endif
extern int kvm_mips_handle_kseg0_tlb_fault(unsigned long badbaddr,
struct kvm_vcpu *vcpu,
bool write_fault);
@@ -625,6 +877,18 @@ extern int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long entryhi,
extern int kvm_mips_guest_tlb_lookup(struct kvm_vcpu *vcpu,
unsigned long entryhi);
+#ifdef CONFIG_KVM_MIPS_VZ
+int kvm_vz_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long entryhi);
+int kvm_vz_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long gva,
+ unsigned long *gpa);
+void kvm_vz_local_flush_roottlb_all_guests(void);
+void kvm_vz_local_flush_guesttlb_all(void);
+void kvm_vz_save_guesttlb(struct kvm_mips_tlb *buf, unsigned int index,
+ unsigned int count);
+void kvm_vz_load_guesttlb(const struct kvm_mips_tlb *buf, unsigned int index,
+ unsigned int count);
+#endif
+
void kvm_mips_suspend_mm(int cpu);
void kvm_mips_resume_mm(int cpu);
@@ -795,7 +1059,7 @@ extern enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
u32 kvm_mips_read_count(struct kvm_vcpu *vcpu);
void kvm_mips_write_count(struct kvm_vcpu *vcpu, u32 count);
void kvm_mips_write_compare(struct kvm_vcpu *vcpu, u32 compare, bool ack);
-void kvm_mips_init_count(struct kvm_vcpu *vcpu);
+void kvm_mips_init_count(struct kvm_vcpu *vcpu, unsigned long count_hz);
int kvm_mips_set_count_ctl(struct kvm_vcpu *vcpu, s64 count_ctl);
int kvm_mips_set_count_resume(struct kvm_vcpu *vcpu, s64 count_resume);
int kvm_mips_set_count_hz(struct kvm_vcpu *vcpu, s64 count_hz);
@@ -803,6 +1067,20 @@ void kvm_mips_count_enable_cause(struct kvm_vcpu *vcpu);
void kvm_mips_count_disable_cause(struct kvm_vcpu *vcpu);
enum hrtimer_restart kvm_mips_count_timeout(struct kvm_vcpu *vcpu);
+/* fairly internal functions requiring some care to use */
+int kvm_mips_count_disabled(struct kvm_vcpu *vcpu);
+ktime_t kvm_mips_freeze_hrtimer(struct kvm_vcpu *vcpu, u32 *count);
+int kvm_mips_restore_hrtimer(struct kvm_vcpu *vcpu, ktime_t before,
+ u32 count, int min_drift);
+
+#ifdef CONFIG_KVM_MIPS_VZ
+void kvm_vz_acquire_htimer(struct kvm_vcpu *vcpu);
+void kvm_vz_lose_htimer(struct kvm_vcpu *vcpu);
+#else
+static inline void kvm_vz_acquire_htimer(struct kvm_vcpu *vcpu) {}
+static inline void kvm_vz_lose_htimer(struct kvm_vcpu *vcpu) {}
+#endif
+
enum emulation_result kvm_mips_check_privilege(u32 cause,
u32 *opc,
struct kvm_run *run,
@@ -827,11 +1105,20 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst,
struct kvm_run *run,
struct kvm_vcpu *vcpu);
+/* COP0 */
+enum emulation_result kvm_mips_emul_wait(struct kvm_vcpu *vcpu);
+
unsigned int kvm_mips_config1_wrmask(struct kvm_vcpu *vcpu);
unsigned int kvm_mips_config3_wrmask(struct kvm_vcpu *vcpu);
unsigned int kvm_mips_config4_wrmask(struct kvm_vcpu *vcpu);
unsigned int kvm_mips_config5_wrmask(struct kvm_vcpu *vcpu);
+/* Hypercalls (hypcall.c) */
+
+enum emulation_result kvm_mips_emul_hypcall(struct kvm_vcpu *vcpu,
+ union mips_instruction inst);
+int kvm_mips_handle_hypcall(struct kvm_vcpu *vcpu);
+
/* Dynamic binary translation */
extern int kvm_mips_trans_cache_index(union mips_instruction inst,
u32 *opc, struct kvm_vcpu *vcpu);
@@ -846,7 +1133,6 @@ extern int kvm_mips_trans_mtc0(union mips_instruction inst, u32 *opc,
extern void kvm_mips_dump_stats(struct kvm_vcpu *vcpu);
extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm);
-static inline void kvm_arch_hardware_disable(void) {}
static inline void kvm_arch_hardware_unsetup(void) {}
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
static inline void kvm_arch_free_memslot(struct kvm *kvm,
diff --git a/arch/mips/include/asm/maar.h b/arch/mips/include/asm/maar.h
index 21d9607..e10f78b 100644
--- a/arch/mips/include/asm/maar.h
+++ b/arch/mips/include/asm/maar.h
@@ -36,7 +36,7 @@ unsigned platform_maar_init(unsigned num_pairs);
* @upper: The highest address that the MAAR pair will affect. Must be
* aligned to one byte before a 2^16 byte boundary.
* @attrs: The accessibility attributes to program, eg. MIPS_MAAR_S. The
- * MIPS_MAAR_V attribute will automatically be set.
+ * MIPS_MAAR_VL attribute will automatically be set.
*
* Program the pair of MAAR registers specified by idx to apply the attributes
* specified by attrs to the range of addresses from lower to higher.
@@ -49,10 +49,10 @@ static inline void write_maar_pair(unsigned idx, phys_addr_t lower,
BUG_ON(((upper & 0xffff) != 0xffff)
|| ((upper & ~0xffffull) & ~(MIPS_MAAR_ADDR << 4)));
- /* Automatically set MIPS_MAAR_V */
- attrs |= MIPS_MAAR_V;
+ /* Automatically set MIPS_MAAR_VL */
+ attrs |= MIPS_MAAR_VL;
- /* Write the upper address & attributes (only MIPS_MAAR_V matters) */
+ /* Write the upper address & attributes (only MIPS_MAAR_VL matters) */
write_c0_maari(idx << 1);
back_to_back_c0_hazard();
write_c0_maar(((upper >> 4) & MIPS_MAAR_ADDR) | attrs);
@@ -81,7 +81,7 @@ extern void maar_init(void);
* @upper: The highest address that the MAAR pair will affect. Must be
* aligned to one byte before a 2^16 byte boundary.
* @attrs: The accessibility attributes to program, eg. MIPS_MAAR_S. The
- * MIPS_MAAR_V attribute will automatically be set.
+ * MIPS_MAAR_VL attribute will automatically be set.
*
* Describes the configuration of a pair of Memory Accessibility Attribute
* Registers - applying attributes from attrs to the range of physical
diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h
index f8d1d2f..6875b69 100644
--- a/arch/mips/include/asm/mipsregs.h
+++ b/arch/mips/include/asm/mipsregs.h
@@ -34,8 +34,10 @@
*/
#ifdef __ASSEMBLY__
#define _ULCAST_
+#define _U64CAST_
#else
#define _ULCAST_ (unsigned long)
+#define _U64CAST_ (u64)
#endif
/*
@@ -217,8 +219,10 @@
/*
* Wired register bits
*/
-#define MIPSR6_WIRED_LIMIT (_ULCAST_(0xffff) << 16)
-#define MIPSR6_WIRED_WIRED (_ULCAST_(0xffff) << 0)
+#define MIPSR6_WIRED_LIMIT_SHIFT 16
+#define MIPSR6_WIRED_LIMIT (_ULCAST_(0xffff) << MIPSR6_WIRED_LIMIT_SHIFT)
+#define MIPSR6_WIRED_WIRED_SHIFT 0
+#define MIPSR6_WIRED_WIRED (_ULCAST_(0xffff) << MIPSR6_WIRED_WIRED_SHIFT)
/*
* Values used for computation of new tlb entries
@@ -645,6 +649,7 @@
#define MIPS_CONF5_LLB (_ULCAST_(1) << 4)
#define MIPS_CONF5_MVH (_ULCAST_(1) << 5)
#define MIPS_CONF5_VP (_ULCAST_(1) << 7)
+#define MIPS_CONF5_SBRI (_ULCAST_(1) << 6)
#define MIPS_CONF5_FRE (_ULCAST_(1) << 8)
#define MIPS_CONF5_UFE (_ULCAST_(1) << 9)
#define MIPS_CONF5_MSAEN (_ULCAST_(1) << 27)
@@ -719,10 +724,14 @@
#define XLR_PERFCTRL_ALLTHREADS (_ULCAST_(1) << 13)
/* MAAR bit definitions */
+#define MIPS_MAAR_VH (_U64CAST_(1) << 63)
#define MIPS_MAAR_ADDR ((BIT_ULL(BITS_PER_LONG - 12) - 1) << 12)
#define MIPS_MAAR_ADDR_SHIFT 12
#define MIPS_MAAR_S (_ULCAST_(1) << 1)
-#define MIPS_MAAR_V (_ULCAST_(1) << 0)
+#define MIPS_MAAR_VL (_ULCAST_(1) << 0)
+
+/* MAARI bit definitions */
+#define MIPS_MAARI_INDEX (_ULCAST_(0x3f) << 0)
/* EBase bit definitions */
#define MIPS_EBASE_CPUNUM_SHIFT 0
@@ -736,6 +745,10 @@
#define MIPS_CMGCRB_BASE 11
#define MIPS_CMGCRF_BASE (~_ULCAST_((1 << MIPS_CMGCRB_BASE) - 1))
+/* LLAddr bit definitions */
+#define MIPS_LLADDR_LLB_SHIFT 0
+#define MIPS_LLADDR_LLB (_ULCAST_(1) << MIPS_LLADDR_LLB_SHIFT)
+
/*
* Bits in the MIPS32 Memory Segmentation registers.
*/
@@ -961,6 +974,22 @@
/* Flush FTLB */
#define LOONGSON_DIAG_FTLB (_ULCAST_(1) << 13)
+/* CvmCtl register field definitions */
+#define CVMCTL_IPPCI_SHIFT 7
+#define CVMCTL_IPPCI (_U64CAST_(0x7) << CVMCTL_IPPCI_SHIFT)
+#define CVMCTL_IPTI_SHIFT 4
+#define CVMCTL_IPTI (_U64CAST_(0x7) << CVMCTL_IPTI_SHIFT)
+
+/* CvmMemCtl2 register field definitions */
+#define CVMMEMCTL2_INHIBITTS (_U64CAST_(1) << 17)
+
+/* CvmVMConfig register field definitions */
+#define CVMVMCONF_DGHT (_U64CAST_(1) << 60)
+#define CVMVMCONF_MMUSIZEM1_S 12
+#define CVMVMCONF_MMUSIZEM1 (_U64CAST_(0xff) << CVMVMCONF_MMUSIZEM1_S)
+#define CVMVMCONF_RMMUSIZEM1_S 0
+#define CVMVMCONF_RMMUSIZEM1 (_U64CAST_(0xff) << CVMVMCONF_RMMUSIZEM1_S)
+
/*
* Coprocessor 1 (FPU) register names
*/
@@ -1720,6 +1749,13 @@ do { \
#define read_c0_cvmmemctl() __read_64bit_c0_register($11, 7)
#define write_c0_cvmmemctl(val) __write_64bit_c0_register($11, 7, val)
+
+#define read_c0_cvmmemctl2() __read_64bit_c0_register($16, 6)
+#define write_c0_cvmmemctl2(val) __write_64bit_c0_register($16, 6, val)
+
+#define read_c0_cvmvmconfig() __read_64bit_c0_register($16, 7)
+#define write_c0_cvmvmconfig(val) __write_64bit_c0_register($16, 7, val)
+
/*
* The cacheerr registers are not standardized. On OCTEON, they are
* 64 bits wide.
@@ -1989,6 +2025,8 @@ do { \
#define read_gc0_epc() __read_ulong_gc0_register(14, 0)
#define write_gc0_epc(val) __write_ulong_gc0_register(14, 0, val)
+#define read_gc0_prid() __read_32bit_gc0_register(15, 0)
+
#define read_gc0_ebase() __read_32bit_gc0_register(15, 1)
#define write_gc0_ebase(val) __write_32bit_gc0_register(15, 1, val)
@@ -2012,6 +2050,9 @@ do { \
#define write_gc0_config6(val) __write_32bit_gc0_register(16, 6, val)
#define write_gc0_config7(val) __write_32bit_gc0_register(16, 7, val)
+#define read_gc0_lladdr() __read_ulong_gc0_register(17, 0)
+#define write_gc0_lladdr(val) __write_ulong_gc0_register(17, 0, val)
+
#define read_gc0_watchlo0() __read_ulong_gc0_register(18, 0)
#define read_gc0_watchlo1() __read_ulong_gc0_register(18, 1)
#define read_gc0_watchlo2() __read_ulong_gc0_register(18, 2)
@@ -2090,6 +2131,19 @@ do { \
#define write_gc0_kscratch5(val) __write_ulong_gc0_register(31, 6, val)
#define write_gc0_kscratch6(val) __write_ulong_gc0_register(31, 7, val)
+/* Cavium OCTEON (cnMIPS) */
+#define read_gc0_cvmcount() __read_ulong_gc0_register(9, 6)
+#define write_gc0_cvmcount(val) __write_ulong_gc0_register(9, 6, val)
+
+#define read_gc0_cvmctl() __read_64bit_gc0_register(9, 7)
+#define write_gc0_cvmctl(val) __write_64bit_gc0_register(9, 7, val)
+
+#define read_gc0_cvmmemctl() __read_64bit_gc0_register(11, 7)
+#define write_gc0_cvmmemctl(val) __write_64bit_gc0_register(11, 7, val)
+
+#define read_gc0_cvmmemctl2() __read_64bit_gc0_register(16, 6)
+#define write_gc0_cvmmemctl2(val) __write_64bit_gc0_register(16, 6, val)
+
/*
* Macros to access the floating point coprocessor control registers
*/
@@ -2696,9 +2750,11 @@ __BUILD_SET_C0(brcm_mode)
*/
#define __BUILD_SET_GC0(name) __BUILD_SET_COMMON(gc0_##name)
+__BUILD_SET_GC0(wired)
__BUILD_SET_GC0(status)
__BUILD_SET_GC0(cause)
__BUILD_SET_GC0(ebase)
+__BUILD_SET_GC0(config1)
/*
* Return low 10 bits of ebase.
diff --git a/arch/mips/include/asm/tlb.h b/arch/mips/include/asm/tlb.h
index dd179fd..939734d 100644
--- a/arch/mips/include/asm/tlb.h
+++ b/arch/mips/include/asm/tlb.h
@@ -21,9 +21,11 @@
*/
#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-#define UNIQUE_ENTRYHI(idx) \
- ((CKSEG0 + ((idx) << (PAGE_SHIFT + 1))) | \
+#define _UNIQUE_ENTRYHI(base, idx) \
+ (((base) + ((idx) << (PAGE_SHIFT + 1))) | \
(cpu_has_tlbinv ? MIPS_ENTRYHI_EHINV : 0))
+#define UNIQUE_ENTRYHI(idx) _UNIQUE_ENTRYHI(CKSEG0, idx)
+#define UNIQUE_GUEST_ENTRYHI(idx) _UNIQUE_ENTRYHI(CKSEG1, idx)
static inline unsigned int num_wired_entries(void)
{
diff --git a/arch/mips/include/uapi/asm/inst.h b/arch/mips/include/uapi/asm/inst.h
index 77429d1..b5e46ae 100644
--- a/arch/mips/include/uapi/asm/inst.h
+++ b/arch/mips/include/uapi/asm/inst.h
@@ -179,7 +179,7 @@ enum cop0_coi_func {
tlbr_op = 0x01, tlbwi_op = 0x02,
tlbwr_op = 0x06, tlbp_op = 0x08,
rfe_op = 0x10, eret_op = 0x18,
- wait_op = 0x20,
+ wait_op = 0x20, hypcall_op = 0x28
};
/*
diff --git a/arch/mips/include/uapi/asm/kvm.h b/arch/mips/include/uapi/asm/kvm.h
index a8a0199..0318c6b 100644
--- a/arch/mips/include/uapi/asm/kvm.h
+++ b/arch/mips/include/uapi/asm/kvm.h
@@ -21,6 +21,8 @@
#define __KVM_HAVE_READONLY_MEM
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+
/*
* for KVM_GET_REGS and KVM_SET_REGS
*
@@ -54,9 +56,14 @@ struct kvm_fpu {
* Register set = 0: GP registers from kvm_regs (see definitions below).
*
* Register set = 1: CP0 registers.
- * bits[15..8] - Must be zero.
- * bits[7..3] - Register 'rd' index.
- * bits[2..0] - Register 'sel' index.
+ * bits[15..8] - COP0 register set.
+ *
+ * COP0 register set = 0: Main CP0 registers.
+ * bits[7..3] - Register 'rd' index.
+ * bits[2..0] - Register 'sel' index.
+ *
+ * COP0 register set = 1: MAARs.
+ * bits[7..0] - MAAR index.
*
* Register set = 2: KVM specific registers (see definitions below).
*
@@ -115,6 +122,15 @@ struct kvm_fpu {
/*
+ * KVM_REG_MIPS_CP0 - Coprocessor 0 registers.
+ */
+
+#define KVM_REG_MIPS_MAAR (KVM_REG_MIPS_CP0 | (1 << 8))
+#define KVM_REG_MIPS_CP0_MAAR(n) (KVM_REG_MIPS_MAAR | \
+ KVM_REG_SIZE_U64 | (n))
+
+
+/*
* KVM_REG_MIPS_KVM - KVM specific control registers.
*/
diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c
index 07718bb..c72a4cd 100644
--- a/arch/mips/kernel/cpu-probe.c
+++ b/arch/mips/kernel/cpu-probe.c
@@ -289,6 +289,8 @@ static void cpu_set_fpu_opts(struct cpuinfo_mips *c)
MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) {
if (c->fpu_id & MIPS_FPIR_3D)
c->ases |= MIPS_ASE_MIPS3D;
+ if (c->fpu_id & MIPS_FPIR_UFRP)
+ c->options |= MIPS_CPU_UFR;
if (c->fpu_id & MIPS_FPIR_FREP)
c->options |= MIPS_CPU_FRE;
}
@@ -1003,7 +1005,8 @@ static inline unsigned int decode_guest_config3(struct cpuinfo_mips *c)
unsigned int config3, config3_dyn;
probe_gc0_config_dyn(config3, config3, config3_dyn,
- MIPS_CONF_M | MIPS_CONF3_MSA | MIPS_CONF3_CTXTC);
+ MIPS_CONF_M | MIPS_CONF3_MSA | MIPS_CONF3_ULRI |
+ MIPS_CONF3_CTXTC);
if (config3 & MIPS_CONF3_CTXTC)
c->guest.options |= MIPS_CPU_CTXTC;
@@ -1013,6 +1016,9 @@ static inline unsigned int decode_guest_config3(struct cpuinfo_mips *c)
if (config3 & MIPS_CONF3_PW)
c->guest.options |= MIPS_CPU_HTW;
+ if (config3 & MIPS_CONF3_ULRI)
+ c->guest.options |= MIPS_CPU_ULRI;
+
if (config3 & MIPS_CONF3_SC)
c->guest.options |= MIPS_CPU_SEGMENTS;
@@ -1051,7 +1057,7 @@ static inline unsigned int decode_guest_config5(struct cpuinfo_mips *c)
unsigned int config5, config5_dyn;
probe_gc0_config_dyn(config5, config5, config5_dyn,
- MIPS_CONF_M | MIPS_CONF5_MRP);
+ MIPS_CONF_M | MIPS_CONF5_MVH | MIPS_CONF5_MRP);
if (config5 & MIPS_CONF5_MRP)
c->guest.options |= MIPS_CPU_MAAR;
@@ -1061,6 +1067,9 @@ static inline unsigned int decode_guest_config5(struct cpuinfo_mips *c)
if (config5 & MIPS_CONF5_LLB)
c->guest.options |= MIPS_CPU_RW_LLB;
+ if (config5 & MIPS_CONF5_MVH)
+ c->guest.options |= MIPS_CPU_MVH;
+
if (config5 & MIPS_CONF_M)
c->guest.conf |= BIT(6);
return config5 & MIPS_CONF_M;
diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c
index a7f8126..c036157 100644
--- a/arch/mips/kernel/time.c
+++ b/arch/mips/kernel/time.c
@@ -70,6 +70,7 @@ EXPORT_SYMBOL(perf_irq);
*/
unsigned int mips_hpt_frequency;
+EXPORT_SYMBOL_GPL(mips_hpt_frequency);
/*
* This function exists in order to cause an error due to a duplicate
diff --git a/arch/mips/kvm/Kconfig b/arch/mips/kvm/Kconfig
index 6506732..50a722d 100644
--- a/arch/mips/kvm/Kconfig
+++ b/arch/mips/kvm/Kconfig
@@ -26,11 +26,34 @@ config KVM
select SRCU
---help---
Support for hosting Guest kernels.
- Currently supported on MIPS32 processors.
+
+choice
+ prompt "Virtualization mode"
+ depends on KVM
+ default KVM_MIPS_TE
+
+config KVM_MIPS_TE
+ bool "Trap & Emulate"
+ ---help---
+ Use trap and emulate to virtualize 32-bit guests in user mode. This
+ does not require any special hardware Virtualization support beyond
+ standard MIPS32/64 r2 or later, but it does require the guest kernel
+ to be configured with CONFIG_KVM_GUEST=y so that it resides in the
+ user address segment.
+
+config KVM_MIPS_VZ
+ bool "MIPS Virtualization (VZ) ASE"
+ ---help---
+ Use the MIPS Virtualization (VZ) ASE to virtualize guests. This
+ supports running unmodified guest kernels (with CONFIG_KVM_GUEST=n),
+ but requires hardware support.
+
+endchoice
config KVM_MIPS_DYN_TRANS
bool "KVM/MIPS: Dynamic binary translation to reduce traps"
- depends on KVM
+ depends on KVM_MIPS_TE
+ default y
---help---
When running in Trap & Emulate mode patch privileged
instructions to reduce the number of traps.
diff --git a/arch/mips/kvm/Makefile b/arch/mips/kvm/Makefile
index 847429d..45d90f5 100644
--- a/arch/mips/kvm/Makefile
+++ b/arch/mips/kvm/Makefile
@@ -9,8 +9,15 @@ common-objs-$(CONFIG_CPU_HAS_MSA) += msa.o
kvm-objs := $(common-objs-y) mips.o emulate.o entry.o \
interrupt.o stats.o commpage.o \
- dyntrans.o trap_emul.o fpu.o
+ fpu.o
+kvm-objs += hypcall.o
kvm-objs += mmu.o
+ifdef CONFIG_KVM_MIPS_VZ
+kvm-objs += vz.o
+else
+kvm-objs += dyntrans.o
+kvm-objs += trap_emul.o
+endif
obj-$(CONFIG_KVM) += kvm.o
obj-y += callback.o tlb.o
diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c
index d40cfaa..4144bfa 100644
--- a/arch/mips/kvm/emulate.c
+++ b/arch/mips/kvm/emulate.c
@@ -308,7 +308,7 @@ int kvm_get_badinstrp(u32 *opc, struct kvm_vcpu *vcpu, u32 *out)
* CP0_Cause.DC bit or the count_ctl.DC bit.
* 0 otherwise (in which case CP0_Count timer is running).
*/
-static inline int kvm_mips_count_disabled(struct kvm_vcpu *vcpu)
+int kvm_mips_count_disabled(struct kvm_vcpu *vcpu)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
@@ -467,7 +467,7 @@ u32 kvm_mips_read_count(struct kvm_vcpu *vcpu)
*
* Returns: The ktime at the point of freeze.
*/
-static ktime_t kvm_mips_freeze_hrtimer(struct kvm_vcpu *vcpu, u32 *count)
+ktime_t kvm_mips_freeze_hrtimer(struct kvm_vcpu *vcpu, u32 *count)
{
ktime_t now;
@@ -517,6 +517,82 @@ static void kvm_mips_resume_hrtimer(struct kvm_vcpu *vcpu,
}
/**
+ * kvm_mips_restore_hrtimer() - Restore hrtimer after a gap, updating expiry.
+ * @vcpu: Virtual CPU.
+ * @before: Time before Count was saved, lower bound of drift calculation.
+ * @count: CP0_Count at point of restore.
+ * @min_drift: Minimum amount of drift permitted before correction.
+ * Must be <= 0.
+ *
+ * Restores the timer from a particular @count, accounting for drift. This can
+ * be used in conjunction with kvm_mips_freeze_timer() when a hardware timer is
+ * to be used for a period of time, but the exact ktime corresponding to the
+ * final Count that must be restored is not known.
+ *
+ * It is gauranteed that a timer interrupt immediately after restore will be
+ * handled, but not if CP0_Compare is exactly at @count. That case should
+ * already be handled when the hardware timer state is saved.
+ *
+ * Assumes !kvm_mips_count_disabled(@vcpu) (guest CP0_Count timer is not
+ * stopped).
+ *
+ * Returns: Amount of correction to count_bias due to drift.
+ */
+int kvm_mips_restore_hrtimer(struct kvm_vcpu *vcpu, ktime_t before,
+ u32 count, int min_drift)
+{
+ ktime_t now, count_time;
+ u32 now_count, before_count;
+ u64 delta;
+ int drift, ret = 0;
+
+ /* Calculate expected count at before */
+ before_count = vcpu->arch.count_bias +
+ kvm_mips_ktime_to_count(vcpu, before);
+
+ /*
+ * Detect significantly negative drift, where count is lower than
+ * expected. Some negative drift is expected when hardware counter is
+ * set after kvm_mips_freeze_timer(), and it is harmless to allow the
+ * time to jump forwards a little, within reason. If the drift is too
+ * significant, adjust the bias to avoid a big Guest.CP0_Count jump.
+ */
+ drift = count - before_count;
+ if (drift < min_drift) {
+ count_time = before;
+ vcpu->arch.count_bias += drift;
+ ret = drift;
+ goto resume;
+ }
+
+ /* Calculate expected count right now */
+ now = ktime_get();
+ now_count = vcpu->arch.count_bias + kvm_mips_ktime_to_count(vcpu, now);
+
+ /*
+ * Detect positive drift, where count is higher than expected, and
+ * adjust the bias to avoid guest time going backwards.
+ */
+ drift = count - now_count;
+ if (drift > 0) {
+ count_time = now;
+ vcpu->arch.count_bias += drift;
+ ret = drift;
+ goto resume;
+ }
+
+ /* Subtract nanosecond delta to find ktime when count was read */
+ delta = (u64)(u32)(now_count - count);
+ delta = div_u64(delta * NSEC_PER_SEC, vcpu->arch.count_hz);
+ count_time = ktime_sub_ns(now, delta);
+
+resume:
+ /* Resume using the calculated ktime */
+ kvm_mips_resume_hrtimer(vcpu, count_time, count);
+ return ret;
+}
+
+/**
* kvm_mips_write_count() - Modify the count and update timer.
* @vcpu: Virtual CPU.
* @count: Guest CP0_Count value to set.
@@ -543,16 +619,15 @@ void kvm_mips_write_count(struct kvm_vcpu *vcpu, u32 count)
/**
* kvm_mips_init_count() - Initialise timer.
* @vcpu: Virtual CPU.
+ * @count_hz: Frequency of timer.
*
- * Initialise the timer to a sensible frequency, namely 100MHz, zero it, and set
- * it going if it's enabled.
+ * Initialise the timer to the specified frequency, zero it, and set it going if
+ * it's enabled.
*/
-void kvm_mips_init_count(struct kvm_vcpu *vcpu)
+void kvm_mips_init_count(struct kvm_vcpu *vcpu, unsigned long count_hz)
{
- /* 100 MHz */
- vcpu->arch.count_hz = 100*1000*1000;
- vcpu->arch.count_period = div_u64((u64)NSEC_PER_SEC << 32,
- vcpu->arch.count_hz);
+ vcpu->arch.count_hz = count_hz;
+ vcpu->arch.count_period = div_u64((u64)NSEC_PER_SEC << 32, count_hz);
vcpu->arch.count_dyn_bias = 0;
/* Starting at 0 */
@@ -622,7 +697,9 @@ void kvm_mips_write_compare(struct kvm_vcpu *vcpu, u32 compare, bool ack)
struct mips_coproc *cop0 = vcpu->arch.cop0;
int dc;
u32 old_compare = kvm_read_c0_guest_compare(cop0);
- ktime_t now;
+ s32 delta = compare - old_compare;
+ u32 cause;
+ ktime_t now = ktime_set(0, 0); /* silence bogus GCC warning */
u32 count;
/* if unchanged, must just be an ack */
@@ -634,6 +711,21 @@ void kvm_mips_write_compare(struct kvm_vcpu *vcpu, u32 compare, bool ack)
return;
}
+ /*
+ * If guest CP0_Compare moves forward, CP0_GTOffset should be adjusted
+ * too to prevent guest CP0_Count hitting guest CP0_Compare.
+ *
+ * The new GTOffset corresponds to the new value of CP0_Compare, and is
+ * set prior to it being written into the guest context. We disable
+ * preemption until the new value is written to prevent restore of a
+ * GTOffset corresponding to the old CP0_Compare value.
+ */
+ if (IS_ENABLED(CONFIG_KVM_MIPS_VZ) && delta > 0) {
+ preempt_disable();
+ write_c0_gtoffset(compare - read_c0_count());
+ back_to_back_c0_hazard();
+ }
+
/* freeze_hrtimer() takes care of timer interrupts <= count */
dc = kvm_mips_count_disabled(vcpu);
if (!dc)
@@ -641,12 +733,36 @@ void kvm_mips_write_compare(struct kvm_vcpu *vcpu, u32 compare, bool ack)
if (ack)
kvm_mips_callbacks->dequeue_timer_int(vcpu);
+ else if (IS_ENABLED(CONFIG_KVM_MIPS_VZ))
+ /*
+ * With VZ, writing CP0_Compare acks (clears) CP0_Cause.TI, so
+ * preserve guest CP0_Cause.TI if we don't want to ack it.
+ */
+ cause = kvm_read_c0_guest_cause(cop0);
kvm_write_c0_guest_compare(cop0, compare);
+ if (IS_ENABLED(CONFIG_KVM_MIPS_VZ)) {
+ if (delta > 0)
+ preempt_enable();
+
+ back_to_back_c0_hazard();
+
+ if (!ack && cause & CAUSEF_TI)
+ kvm_write_c0_guest_cause(cop0, cause);
+ }
+
/* resume_hrtimer() takes care of timer interrupts > count */
if (!dc)
kvm_mips_resume_hrtimer(vcpu, now, count);
+
+ /*
+ * If guest CP0_Compare is moving backward, we delay CP0_GTOffset change
+ * until after the new CP0_Compare is written, otherwise new guest
+ * CP0_Count could hit new guest CP0_Compare.
+ */
+ if (IS_ENABLED(CONFIG_KVM_MIPS_VZ) && delta <= 0)
+ write_c0_gtoffset(compare - read_c0_count());
}
/**
@@ -857,6 +973,7 @@ enum emulation_result kvm_mips_emul_wait(struct kvm_vcpu *vcpu)
++vcpu->stat.wait_exits;
trace_kvm_exit(vcpu, KVM_TRACE_EXIT_WAIT);
if (!vcpu->arch.pending_exceptions) {
+ kvm_vz_lose_htimer(vcpu);
vcpu->arch.wait = 1;
kvm_vcpu_block(vcpu);
@@ -865,7 +982,7 @@ enum emulation_result kvm_mips_emul_wait(struct kvm_vcpu *vcpu)
* check if any I/O interrupts are pending.
*/
if (kvm_check_request(KVM_REQ_UNHALT, vcpu)) {
- clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
+ kvm_clear_request(KVM_REQ_UNHALT, vcpu);
vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
}
}
@@ -873,17 +990,62 @@ enum emulation_result kvm_mips_emul_wait(struct kvm_vcpu *vcpu)
return EMULATE_DONE;
}
-/*
- * XXXKYMA: Linux doesn't seem to use TLBR, return EMULATE_FAIL for now so that
- * we can catch this, if things ever change
- */
+static void kvm_mips_change_entryhi(struct kvm_vcpu *vcpu,
+ unsigned long entryhi)
+{
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+ struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm;
+ int cpu, i;
+ u32 nasid = entryhi & KVM_ENTRYHI_ASID;
+
+ if (((kvm_read_c0_guest_entryhi(cop0) & KVM_ENTRYHI_ASID) != nasid)) {
+ trace_kvm_asid_change(vcpu, kvm_read_c0_guest_entryhi(cop0) &
+ KVM_ENTRYHI_ASID, nasid);
+
+ /*
+ * Flush entries from the GVA page tables.
+ * Guest user page table will get flushed lazily on re-entry to
+ * guest user if the guest ASID actually changes.
+ */
+ kvm_mips_flush_gva_pt(kern_mm->pgd, KMF_KERN);
+
+ /*
+ * Regenerate/invalidate kernel MMU context.
+ * The user MMU context will be regenerated lazily on re-entry
+ * to guest user if the guest ASID actually changes.
+ */
+ preempt_disable();
+ cpu = smp_processor_id();
+ get_new_mmu_context(kern_mm, cpu);
+ for_each_possible_cpu(i)
+ if (i != cpu)
+ cpu_context(i, kern_mm) = 0;
+ preempt_enable();
+ }
+ kvm_write_c0_guest_entryhi(cop0, entryhi);
+}
+
enum emulation_result kvm_mips_emul_tlbr(struct kvm_vcpu *vcpu)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
+ struct kvm_mips_tlb *tlb;
unsigned long pc = vcpu->arch.pc;
+ int index;
- kvm_err("[%#lx] COP0_TLBR [%ld]\n", pc, kvm_read_c0_guest_index(cop0));
- return EMULATE_FAIL;
+ index = kvm_read_c0_guest_index(cop0);
+ if (index < 0 || index >= KVM_MIPS_GUEST_TLB_SIZE) {
+ /* UNDEFINED */
+ kvm_debug("[%#lx] TLBR Index %#x out of range\n", pc, index);
+ index &= KVM_MIPS_GUEST_TLB_SIZE - 1;
+ }
+
+ tlb = &vcpu->arch.guest_tlb[index];
+ kvm_write_c0_guest_pagemask(cop0, tlb->tlb_mask);
+ kvm_write_c0_guest_entrylo0(cop0, tlb->tlb_lo[0]);
+ kvm_write_c0_guest_entrylo1(cop0, tlb->tlb_lo[1]);
+ kvm_mips_change_entryhi(vcpu, tlb->tlb_hi);
+
+ return EMULATE_DONE;
}
/**
@@ -1105,11 +1267,9 @@ enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst,
struct kvm_vcpu *vcpu)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
- struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm;
enum emulation_result er = EMULATE_DONE;
u32 rt, rd, sel;
unsigned long curr_pc;
- int cpu, i;
/*
* Update PC and hold onto current PC in case there is
@@ -1143,6 +1303,9 @@ enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst,
case wait_op:
er = kvm_mips_emul_wait(vcpu);
break;
+ case hypcall_op:
+ er = kvm_mips_emul_hypcall(vcpu, inst);
+ break;
}
} else {
rt = inst.c0r_format.rt;
@@ -1208,44 +1371,8 @@ enum emulation_result kvm_mips_emulate_CP0(union mips_instruction inst,
kvm_change_c0_guest_ebase(cop0, 0x1ffff000,
vcpu->arch.gprs[rt]);
} else if (rd == MIPS_CP0_TLB_HI && sel == 0) {
- u32 nasid =
- vcpu->arch.gprs[rt] & KVM_ENTRYHI_ASID;
- if (((kvm_read_c0_guest_entryhi(cop0) &
- KVM_ENTRYHI_ASID) != nasid)) {
- trace_kvm_asid_change(vcpu,
- kvm_read_c0_guest_entryhi(cop0)
- & KVM_ENTRYHI_ASID,
- nasid);
-
- /*
- * Flush entries from the GVA page
- * tables.
- * Guest user page table will get
- * flushed lazily on re-entry to guest
- * user if the guest ASID actually
- * changes.
- */
- kvm_mips_flush_gva_pt(kern_mm->pgd,
- KMF_KERN);
-
- /*
- * Regenerate/invalidate kernel MMU
- * context.
- * The user MMU context will be
- * regenerated lazily on re-entry to
- * guest user if the guest ASID actually
- * changes.
- */
- preempt_disable();
- cpu = smp_processor_id();
- get_new_mmu_context(kern_mm, cpu);
- for_each_possible_cpu(i)
- if (i != cpu)
- cpu_context(i, kern_mm) = 0;
- preempt_enable();
- }
- kvm_write_c0_guest_entryhi(cop0,
- vcpu->arch.gprs[rt]);
+ kvm_mips_change_entryhi(vcpu,
+ vcpu->arch.gprs[rt]);
}
/* Are we writing to COUNT */
else if ((rd == MIPS_CP0_COUNT) && (sel == 0)) {
@@ -1474,9 +1601,8 @@ enum emulation_result kvm_mips_emulate_store(union mips_instruction inst,
struct kvm_run *run,
struct kvm_vcpu *vcpu)
{
- enum emulation_result er = EMULATE_DO_MMIO;
+ enum emulation_result er;
u32 rt;
- u32 bytes;
void *data = run->mmio.data;
unsigned long curr_pc;
@@ -1491,103 +1617,74 @@ enum emulation_result kvm_mips_emulate_store(union mips_instruction inst,
rt = inst.i_format.rt;
+ run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa(
+ vcpu->arch.host_cp0_badvaddr);
+ if (run->mmio.phys_addr == KVM_INVALID_ADDR)
+ goto out_fail;
+
switch (inst.i_format.opcode) {
- case sb_op:
- bytes = 1;
- if (bytes > sizeof(run->mmio.data)) {
- kvm_err("%s: bad MMIO length: %d\n", __func__,
- run->mmio.len);
- }
- run->mmio.phys_addr =
- kvm_mips_callbacks->gva_to_gpa(vcpu->arch.
- host_cp0_badvaddr);
- if (run->mmio.phys_addr == KVM_INVALID_ADDR) {
- er = EMULATE_FAIL;
- break;
- }
- run->mmio.len = bytes;
- run->mmio.is_write = 1;
- vcpu->mmio_needed = 1;
- vcpu->mmio_is_write = 1;
- *(u8 *) data = vcpu->arch.gprs[rt];
- kvm_debug("OP_SB: eaddr: %#lx, gpr: %#lx, data: %#x\n",
- vcpu->arch.host_cp0_badvaddr, vcpu->arch.gprs[rt],
- *(u8 *) data);
+#if defined(CONFIG_64BIT) && defined(CONFIG_KVM_MIPS_VZ)
+ case sd_op:
+ run->mmio.len = 8;
+ *(u64 *)data = vcpu->arch.gprs[rt];
+ kvm_debug("[%#lx] OP_SD: eaddr: %#lx, gpr: %#lx, data: %#llx\n",
+ vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr,
+ vcpu->arch.gprs[rt], *(u64 *)data);
break;
+#endif
case sw_op:
- bytes = 4;
- if (bytes > sizeof(run->mmio.data)) {
- kvm_err("%s: bad MMIO length: %d\n", __func__,
- run->mmio.len);
- }
- run->mmio.phys_addr =
- kvm_mips_callbacks->gva_to_gpa(vcpu->arch.
- host_cp0_badvaddr);
- if (run->mmio.phys_addr == KVM_INVALID_ADDR) {
- er = EMULATE_FAIL;
- break;
- }
-
- run->mmio.len = bytes;
- run->mmio.is_write = 1;
- vcpu->mmio_needed = 1;
- vcpu->mmio_is_write = 1;
- *(u32 *) data = vcpu->arch.gprs[rt];
+ run->mmio.len = 4;
+ *(u32 *)data = vcpu->arch.gprs[rt];
kvm_debug("[%#lx] OP_SW: eaddr: %#lx, gpr: %#lx, data: %#x\n",
vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr,
- vcpu->arch.gprs[rt], *(u32 *) data);
+ vcpu->arch.gprs[rt], *(u32 *)data);
break;
case sh_op:
- bytes = 2;
- if (bytes > sizeof(run->mmio.data)) {
- kvm_err("%s: bad MMIO length: %d\n", __func__,
- run->mmio.len);
- }
- run->mmio.phys_addr =
- kvm_mips_callbacks->gva_to_gpa(vcpu->arch.
- host_cp0_badvaddr);
- if (run->mmio.phys_addr == KVM_INVALID_ADDR) {
- er = EMULATE_FAIL;
- break;
- }
-
- run->mmio.len = bytes;
- run->mmio.is_write = 1;
- vcpu->mmio_needed = 1;
- vcpu->mmio_is_write = 1;
- *(u16 *) data = vcpu->arch.gprs[rt];
+ run->mmio.len = 2;
+ *(u16 *)data = vcpu->arch.gprs[rt];
kvm_debug("[%#lx] OP_SH: eaddr: %#lx, gpr: %#lx, data: %#x\n",
vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr,
- vcpu->arch.gprs[rt], *(u32 *) data);
+ vcpu->arch.gprs[rt], *(u16 *)data);
+ break;
+
+ case sb_op:
+ run->mmio.len = 1;
+ *(u8 *)data = vcpu->arch.gprs[rt];
+
+ kvm_debug("[%#lx] OP_SB: eaddr: %#lx, gpr: %#lx, data: %#x\n",
+ vcpu->arch.pc, vcpu->arch.host_cp0_badvaddr,
+ vcpu->arch.gprs[rt], *(u8 *)data);
break;
default:
kvm_err("Store not yet supported (inst=0x%08x)\n",
inst.word);
- er = EMULATE_FAIL;
- break;
+ goto out_fail;
}
- /* Rollback PC if emulation was unsuccessful */
- if (er == EMULATE_FAIL)
- vcpu->arch.pc = curr_pc;
+ run->mmio.is_write = 1;
+ vcpu->mmio_needed = 1;
+ vcpu->mmio_is_write = 1;
+ return EMULATE_DO_MMIO;
- return er;
+out_fail:
+ /* Rollback PC if emulation was unsuccessful */
+ vcpu->arch.pc = curr_pc;
+ return EMULATE_FAIL;
}
enum emulation_result kvm_mips_emulate_load(union mips_instruction inst,
u32 cause, struct kvm_run *run,
struct kvm_vcpu *vcpu)
{
- enum emulation_result er = EMULATE_DO_MMIO;
+ enum emulation_result er;
unsigned long curr_pc;
u32 op, rt;
- u32 bytes;
rt = inst.i_format.rt;
op = inst.i_format.opcode;
@@ -1606,96 +1703,53 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst,
vcpu->arch.io_gpr = rt;
+ run->mmio.phys_addr = kvm_mips_callbacks->gva_to_gpa(
+ vcpu->arch.host_cp0_badvaddr);
+ if (run->mmio.phys_addr == KVM_INVALID_ADDR)
+ return EMULATE_FAIL;
+
+ vcpu->mmio_needed = 2; /* signed */
switch (op) {
- case lw_op:
- bytes = 4;
- if (bytes > sizeof(run->mmio.data)) {
- kvm_err("%s: bad MMIO length: %d\n", __func__,
- run->mmio.len);
- er = EMULATE_FAIL;
- break;
- }
- run->mmio.phys_addr =
- kvm_mips_callbacks->gva_to_gpa(vcpu->arch.
- host_cp0_badvaddr);
- if (run->mmio.phys_addr == KVM_INVALID_ADDR) {
- er = EMULATE_FAIL;
- break;
- }
+#if defined(CONFIG_64BIT) && defined(CONFIG_KVM_MIPS_VZ)
+ case ld_op:
+ run->mmio.len = 8;
+ break;
- run->mmio.len = bytes;
- run->mmio.is_write = 0;
- vcpu->mmio_needed = 1;
- vcpu->mmio_is_write = 0;
+ case lwu_op:
+ vcpu->mmio_needed = 1; /* unsigned */
+ /* fall through */
+#endif
+ case lw_op:
+ run->mmio.len = 4;
break;
- case lh_op:
case lhu_op:
- bytes = 2;
- if (bytes > sizeof(run->mmio.data)) {
- kvm_err("%s: bad MMIO length: %d\n", __func__,
- run->mmio.len);
- er = EMULATE_FAIL;
- break;
- }
- run->mmio.phys_addr =
- kvm_mips_callbacks->gva_to_gpa(vcpu->arch.
- host_cp0_badvaddr);
- if (run->mmio.phys_addr == KVM_INVALID_ADDR) {
- er = EMULATE_FAIL;
- break;
- }
-
- run->mmio.len = bytes;
- run->mmio.is_write = 0;
- vcpu->mmio_needed = 1;
- vcpu->mmio_is_write = 0;
-
- if (op == lh_op)
- vcpu->mmio_needed = 2;
- else
- vcpu->mmio_needed = 1;
-
+ vcpu->mmio_needed = 1; /* unsigned */
+ /* fall through */
+ case lh_op:
+ run->mmio.len = 2;
break;
case lbu_op:
+ vcpu->mmio_needed = 1; /* unsigned */
+ /* fall through */
case lb_op:
- bytes = 1;
- if (bytes > sizeof(run->mmio.data)) {
- kvm_err("%s: bad MMIO length: %d\n", __func__,
- run->mmio.len);
- er = EMULATE_FAIL;
- break;
- }
- run->mmio.phys_addr =
- kvm_mips_callbacks->gva_to_gpa(vcpu->arch.
- host_cp0_badvaddr);
- if (run->mmio.phys_addr == KVM_INVALID_ADDR) {
- er = EMULATE_FAIL;
- break;
- }
-
- run->mmio.len = bytes;
- run->mmio.is_write = 0;
- vcpu->mmio_is_write = 0;
-
- if (op == lb_op)
- vcpu->mmio_needed = 2;
- else
- vcpu->mmio_needed = 1;
-
+ run->mmio.len = 1;
break;
default:
kvm_err("Load not yet supported (inst=0x%08x)\n",
inst.word);
- er = EMULATE_FAIL;
- break;
+ vcpu->mmio_needed = 0;
+ return EMULATE_FAIL;
}
- return er;
+ run->mmio.is_write = 0;
+ vcpu->mmio_is_write = 0;
+ return EMULATE_DO_MMIO;
}
+#ifndef CONFIG_KVM_MIPS_VZ
static enum emulation_result kvm_mips_guest_cache_op(int (*fn)(unsigned long),
unsigned long curr_pc,
unsigned long addr,
@@ -1786,11 +1840,35 @@ enum emulation_result kvm_mips_emulate_cache(union mips_instruction inst,
vcpu->arch.pc, vcpu->arch.gprs[31], cache, op, base,
arch->gprs[base], offset);
- if (cache == Cache_D)
+ if (cache == Cache_D) {
+#ifdef CONFIG_CPU_R4K_CACHE_TLB
r4k_blast_dcache();
- else if (cache == Cache_I)
+#else
+ switch (boot_cpu_type()) {
+ case CPU_CAVIUM_OCTEON3:
+ /* locally flush icache */
+ local_flush_icache_range(0, 0);
+ break;
+ default:
+ __flush_cache_all();
+ break;
+ }
+#endif
+ } else if (cache == Cache_I) {
+#ifdef CONFIG_CPU_R4K_CACHE_TLB
r4k_blast_icache();
- else {
+#else
+ switch (boot_cpu_type()) {
+ case CPU_CAVIUM_OCTEON3:
+ /* locally flush icache */
+ local_flush_icache_range(0, 0);
+ break;
+ default:
+ flush_icache_all();
+ break;
+ }
+#endif
+ } else {
kvm_err("%s: unsupported CACHE INDEX operation\n",
__func__);
return EMULATE_FAIL;
@@ -1870,18 +1948,6 @@ enum emulation_result kvm_mips_emulate_inst(u32 cause, u32 *opc,
case cop0_op:
er = kvm_mips_emulate_CP0(inst, opc, cause, run, vcpu);
break;
- case sb_op:
- case sh_op:
- case sw_op:
- er = kvm_mips_emulate_store(inst, cause, run, vcpu);
- break;
- case lb_op:
- case lbu_op:
- case lhu_op:
- case lh_op:
- case lw_op:
- er = kvm_mips_emulate_load(inst, cause, run, vcpu);
- break;
#ifndef CONFIG_CPU_MIPSR6
case cache_op:
@@ -1915,6 +1981,7 @@ unknown:
return er;
}
+#endif /* CONFIG_KVM_MIPS_VZ */
/**
* kvm_mips_guest_exception_base() - Find guest exception vector base address.
@@ -2524,8 +2591,15 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu,
vcpu->arch.pc = vcpu->arch.io_pc;
switch (run->mmio.len) {
+ case 8:
+ *gpr = *(s64 *)run->mmio.data;
+ break;
+
case 4:
- *gpr = *(s32 *) run->mmio.data;
+ if (vcpu->mmio_needed == 2)
+ *gpr = *(s32 *)run->mmio.data;
+ else
+ *gpr = *(u32 *)run->mmio.data;
break;
case 2:
diff --git a/arch/mips/kvm/entry.c b/arch/mips/kvm/entry.c
index c5b254c..16e1c93 100644
--- a/arch/mips/kvm/entry.c
+++ b/arch/mips/kvm/entry.c
@@ -51,12 +51,15 @@
#define RA 31
/* Some CP0 registers */
+#define C0_PWBASE 5, 5
#define C0_HWRENA 7, 0
#define C0_BADVADDR 8, 0
#define C0_BADINSTR 8, 1
#define C0_BADINSTRP 8, 2
#define C0_ENTRYHI 10, 0
+#define C0_GUESTCTL1 10, 4
#define C0_STATUS 12, 0
+#define C0_GUESTCTL0 12, 6
#define C0_CAUSE 13, 0
#define C0_EPC 14, 0
#define C0_EBASE 15, 1
@@ -292,8 +295,8 @@ static void *kvm_mips_build_enter_guest(void *addr)
unsigned int i;
struct uasm_label labels[2];
struct uasm_reloc relocs[2];
- struct uasm_label *l = labels;
- struct uasm_reloc *r = relocs;
+ struct uasm_label __maybe_unused *l = labels;
+ struct uasm_reloc __maybe_unused *r = relocs;
memset(labels, 0, sizeof(labels));
memset(relocs, 0, sizeof(relocs));
@@ -302,7 +305,67 @@ static void *kvm_mips_build_enter_guest(void *addr)
UASM_i_LW(&p, T0, offsetof(struct kvm_vcpu_arch, pc), K1);
UASM_i_MTC0(&p, T0, C0_EPC);
- /* Set the ASID for the Guest Kernel */
+#ifdef CONFIG_KVM_MIPS_VZ
+ /* Save normal linux process pgd (VZ guarantees pgd_reg is set) */
+ UASM_i_MFC0(&p, K0, c0_kscratch(), pgd_reg);
+ UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu_arch, host_pgd), K1);
+
+ /*
+ * Set up KVM GPA pgd.
+ * This does roughly the same as TLBMISS_HANDLER_SETUP_PGD():
+ * - call tlbmiss_handler_setup_pgd(mm->pgd)
+ * - write mm->pgd into CP0_PWBase
+ *
+ * We keep S0 pointing at struct kvm so we can load the ASID below.
+ */
+ UASM_i_LW(&p, S0, (int)offsetof(struct kvm_vcpu, kvm) -
+ (int)offsetof(struct kvm_vcpu, arch), K1);
+ UASM_i_LW(&p, A0, offsetof(struct kvm, arch.gpa_mm.pgd), S0);
+ UASM_i_LA(&p, T9, (unsigned long)tlbmiss_handler_setup_pgd);
+ uasm_i_jalr(&p, RA, T9);
+ /* delay slot */
+ if (cpu_has_htw)
+ UASM_i_MTC0(&p, A0, C0_PWBASE);
+ else
+ uasm_i_nop(&p);
+
+ /* Set GM bit to setup eret to VZ guest context */
+ uasm_i_addiu(&p, V1, ZERO, 1);
+ uasm_i_mfc0(&p, K0, C0_GUESTCTL0);
+ uasm_i_ins(&p, K0, V1, MIPS_GCTL0_GM_SHIFT, 1);
+ uasm_i_mtc0(&p, K0, C0_GUESTCTL0);
+
+ if (cpu_has_guestid) {
+ /*
+ * Set root mode GuestID, so that root TLB refill handler can
+ * use the correct GuestID in the root TLB.
+ */
+
+ /* Get current GuestID */
+ uasm_i_mfc0(&p, T0, C0_GUESTCTL1);
+ /* Set GuestCtl1.RID = GuestCtl1.ID */
+ uasm_i_ext(&p, T1, T0, MIPS_GCTL1_ID_SHIFT,
+ MIPS_GCTL1_ID_WIDTH);
+ uasm_i_ins(&p, T0, T1, MIPS_GCTL1_RID_SHIFT,
+ MIPS_GCTL1_RID_WIDTH);
+ uasm_i_mtc0(&p, T0, C0_GUESTCTL1);
+
+ /* GuestID handles dealiasing so we don't need to touch ASID */
+ goto skip_asid_restore;
+ }
+
+ /* Root ASID Dealias (RAD) */
+
+ /* Save host ASID */
+ UASM_i_MFC0(&p, K0, C0_ENTRYHI);
+ UASM_i_SW(&p, K0, offsetof(struct kvm_vcpu_arch, host_entryhi),
+ K1);
+
+ /* Set the root ASID for the Guest */
+ UASM_i_ADDIU(&p, T1, S0,
+ offsetof(struct kvm, arch.gpa_mm.context.asid));
+#else
+ /* Set the ASID for the Guest Kernel or User */
UASM_i_LW(&p, T0, offsetof(struct kvm_vcpu_arch, cop0), K1);
UASM_i_LW(&p, T0, offsetof(struct mips_coproc, reg[MIPS_CP0_STATUS][0]),
T0);
@@ -315,6 +378,7 @@ static void *kvm_mips_build_enter_guest(void *addr)
UASM_i_ADDIU(&p, T1, K1, offsetof(struct kvm_vcpu_arch,
guest_user_mm.context.asid));
uasm_l_kernel_asid(&l, p);
+#endif
/* t1: contains the base of the ASID array, need to get the cpu id */
/* smp_processor_id */
@@ -339,6 +403,7 @@ static void *kvm_mips_build_enter_guest(void *addr)
uasm_i_andi(&p, K0, K0, MIPS_ENTRYHI_ASID);
#endif
+#ifndef CONFIG_KVM_MIPS_VZ
/*
* Set up KVM T&E GVA pgd.
* This does roughly the same as TLBMISS_HANDLER_SETUP_PGD():
@@ -351,7 +416,11 @@ static void *kvm_mips_build_enter_guest(void *addr)
UASM_i_LA(&p, T9, (unsigned long)tlbmiss_handler_setup_pgd);
uasm_i_jalr(&p, RA, T9);
uasm_i_mtc0(&p, K0, C0_ENTRYHI);
-
+#else
+ /* Set up KVM VZ root ASID (!guestid) */
+ uasm_i_mtc0(&p, K0, C0_ENTRYHI);
+skip_asid_restore:
+#endif
uasm_i_ehb(&p);
/* Disable RDHWR access */
@@ -559,13 +628,10 @@ void *kvm_mips_build_exit(void *addr)
/* Now that context has been saved, we can use other registers */
/* Restore vcpu */
- UASM_i_MFC0(&p, A1, scratch_vcpu[0], scratch_vcpu[1]);
- uasm_i_move(&p, S1, A1);
+ UASM_i_MFC0(&p, S1, scratch_vcpu[0], scratch_vcpu[1]);
/* Restore run (vcpu->run) */
- UASM_i_LW(&p, A0, offsetof(struct kvm_vcpu, run), A1);
- /* Save pointer to run in s0, will be saved by the compiler */
- uasm_i_move(&p, S0, A0);
+ UASM_i_LW(&p, S0, offsetof(struct kvm_vcpu, run), S1);
/*
* Save Host level EPC, BadVaddr and Cause to VCPU, useful to process
@@ -641,6 +707,52 @@ void *kvm_mips_build_exit(void *addr)
uasm_l_msa_1(&l, p);
}
+#ifdef CONFIG_KVM_MIPS_VZ
+ /* Restore host ASID */
+ if (!cpu_has_guestid) {
+ UASM_i_LW(&p, K0, offsetof(struct kvm_vcpu_arch, host_entryhi),
+ K1);
+ UASM_i_MTC0(&p, K0, C0_ENTRYHI);
+ }
+
+ /*
+ * Set up normal Linux process pgd.
+ * This does roughly the same as TLBMISS_HANDLER_SETUP_PGD():
+ * - call tlbmiss_handler_setup_pgd(mm->pgd)
+ * - write mm->pgd into CP0_PWBase
+ */
+ UASM_i_LW(&p, A0,
+ offsetof(struct kvm_vcpu_arch, host_pgd), K1);
+ UASM_i_LA(&p, T9, (unsigned long)tlbmiss_handler_setup_pgd);
+ uasm_i_jalr(&p, RA, T9);
+ /* delay slot */
+ if (cpu_has_htw)
+ UASM_i_MTC0(&p, A0, C0_PWBASE);
+ else
+ uasm_i_nop(&p);
+
+ /* Clear GM bit so we don't enter guest mode when EXL is cleared */
+ uasm_i_mfc0(&p, K0, C0_GUESTCTL0);
+ uasm_i_ins(&p, K0, ZERO, MIPS_GCTL0_GM_SHIFT, 1);
+ uasm_i_mtc0(&p, K0, C0_GUESTCTL0);
+
+ /* Save GuestCtl0 so we can access GExcCode after CPU migration */
+ uasm_i_sw(&p, K0,
+ offsetof(struct kvm_vcpu_arch, host_cp0_guestctl0), K1);
+
+ if (cpu_has_guestid) {
+ /*
+ * Clear root mode GuestID, so that root TLB operations use the
+ * root GuestID in the root TLB.
+ */
+ uasm_i_mfc0(&p, T0, C0_GUESTCTL1);
+ /* Set GuestCtl1.RID = MIPS_GCTL1_ROOT_GUESTID (i.e. 0) */
+ uasm_i_ins(&p, T0, ZERO, MIPS_GCTL1_RID_SHIFT,
+ MIPS_GCTL1_RID_WIDTH);
+ uasm_i_mtc0(&p, T0, C0_GUESTCTL1);
+ }
+#endif
+
/* Now that the new EBASE has been loaded, unset BEV and KSU_USER */
uasm_i_addiu(&p, AT, ZERO, ~(ST0_EXL | KSU_USER | ST0_IE));
uasm_i_and(&p, V0, V0, AT);
@@ -680,6 +792,8 @@ void *kvm_mips_build_exit(void *addr)
* Now jump to the kvm_mips_handle_exit() to see if we can deal
* with this in the kernel
*/
+ uasm_i_move(&p, A0, S0);
+ uasm_i_move(&p, A1, S1);
UASM_i_LA(&p, T9, (unsigned long)kvm_mips_handle_exit);
uasm_i_jalr(&p, RA, T9);
UASM_i_ADDIU(&p, SP, SP, -CALLFRAME_SIZ);
diff --git a/arch/mips/kvm/hypcall.c b/arch/mips/kvm/hypcall.c
new file mode 100644
index 0000000..8306343
--- /dev/null
+++ b/arch/mips/kvm/hypcall.c
@@ -0,0 +1,53 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * KVM/MIPS: Hypercall handling.
+ *
+ * Copyright (C) 2015 Imagination Technologies Ltd.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/kvm_para.h>
+
+#define MAX_HYPCALL_ARGS 4
+
+enum emulation_result kvm_mips_emul_hypcall(struct kvm_vcpu *vcpu,
+ union mips_instruction inst)
+{
+ unsigned int code = (inst.co_format.code >> 5) & 0x3ff;
+
+ kvm_debug("[%#lx] HYPCALL %#03x\n", vcpu->arch.pc, code);
+
+ switch (code) {
+ case 0:
+ return EMULATE_HYPERCALL;
+ default:
+ return EMULATE_FAIL;
+ };
+}
+
+static int kvm_mips_hypercall(struct kvm_vcpu *vcpu, unsigned long num,
+ const unsigned long *args, unsigned long *hret)
+{
+ /* Report unimplemented hypercall to guest */
+ *hret = -KVM_ENOSYS;
+ return RESUME_GUEST;
+}
+
+int kvm_mips_handle_hypcall(struct kvm_vcpu *vcpu)
+{
+ unsigned long num, args[MAX_HYPCALL_ARGS];
+
+ /* read hypcall number and arguments */
+ num = vcpu->arch.gprs[2]; /* v0 */
+ args[0] = vcpu->arch.gprs[4]; /* a0 */
+ args[1] = vcpu->arch.gprs[5]; /* a1 */
+ args[2] = vcpu->arch.gprs[6]; /* a2 */
+ args[3] = vcpu->arch.gprs[7]; /* a3 */
+
+ return kvm_mips_hypercall(vcpu, num,
+ args, &vcpu->arch.gprs[2] /* v0 */);
+}
diff --git a/arch/mips/kvm/interrupt.h b/arch/mips/kvm/interrupt.h
index fb118a2..3bf0a49 100644
--- a/arch/mips/kvm/interrupt.h
+++ b/arch/mips/kvm/interrupt.h
@@ -30,8 +30,13 @@
#define C_TI (_ULCAST_(1) << 30)
+#ifdef CONFIG_KVM_MIPS_VZ
+#define KVM_MIPS_IRQ_DELIVER_ALL_AT_ONCE (1)
+#define KVM_MIPS_IRQ_CLEAR_ALL_AT_ONCE (1)
+#else
#define KVM_MIPS_IRQ_DELIVER_ALL_AT_ONCE (0)
#define KVM_MIPS_IRQ_CLEAR_ALL_AT_ONCE (0)
+#endif
void kvm_mips_queue_irq(struct kvm_vcpu *vcpu, unsigned int priority);
void kvm_mips_dequeue_irq(struct kvm_vcpu *vcpu, unsigned int priority);
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 15a1b17..d4b2ad1 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -59,6 +59,16 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "fpe", VCPU_STAT(fpe_exits), KVM_STAT_VCPU },
{ "msa_disabled", VCPU_STAT(msa_disabled_exits), KVM_STAT_VCPU },
{ "flush_dcache", VCPU_STAT(flush_dcache_exits), KVM_STAT_VCPU },
+#ifdef CONFIG_KVM_MIPS_VZ
+ { "vz_gpsi", VCPU_STAT(vz_gpsi_exits), KVM_STAT_VCPU },
+ { "vz_gsfc", VCPU_STAT(vz_gsfc_exits), KVM_STAT_VCPU },
+ { "vz_hc", VCPU_STAT(vz_hc_exits), KVM_STAT_VCPU },
+ { "vz_grr", VCPU_STAT(vz_grr_exits), KVM_STAT_VCPU },
+ { "vz_gva", VCPU_STAT(vz_gva_exits), KVM_STAT_VCPU },
+ { "vz_ghfc", VCPU_STAT(vz_ghfc_exits), KVM_STAT_VCPU },
+ { "vz_gpa", VCPU_STAT(vz_gpa_exits), KVM_STAT_VCPU },
+ { "vz_resvd", VCPU_STAT(vz_resvd_exits), KVM_STAT_VCPU },
+#endif
{ "halt_successful_poll", VCPU_STAT(halt_successful_poll), KVM_STAT_VCPU },
{ "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), KVM_STAT_VCPU },
{ "halt_poll_invalid", VCPU_STAT(halt_poll_invalid), KVM_STAT_VCPU },
@@ -66,6 +76,19 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{NULL}
};
+bool kvm_trace_guest_mode_change;
+
+int kvm_guest_mode_change_trace_reg(void)
+{
+ kvm_trace_guest_mode_change = 1;
+ return 0;
+}
+
+void kvm_guest_mode_change_trace_unreg(void)
+{
+ kvm_trace_guest_mode_change = 0;
+}
+
/*
* XXXKYMA: We are simulatoring a processor that has the WII bit set in
* Config7, so we are "runnable" if interrupts are pending
@@ -82,7 +105,12 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
int kvm_arch_hardware_enable(void)
{
- return 0;
+ return kvm_mips_callbacks->hardware_enable();
+}
+
+void kvm_arch_hardware_disable(void)
+{
+ kvm_mips_callbacks->hardware_disable();
}
int kvm_arch_hardware_setup(void)
@@ -97,6 +125,18 @@ void kvm_arch_check_processor_compat(void *rtn)
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
{
+ switch (type) {
+#ifdef CONFIG_KVM_MIPS_VZ
+ case KVM_VM_MIPS_VZ:
+#else
+ case KVM_VM_MIPS_TE:
+#endif
+ break;
+ default:
+ /* Unsupported KVM type */
+ return -EINVAL;
+ };
+
/* Allocate page table to map GPA -> RPA */
kvm->arch.gpa_mm.pgd = kvm_pgd_alloc();
if (!kvm->arch.gpa_mm.pgd)
@@ -301,8 +341,10 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
/* Build guest exception vectors dynamically in unmapped memory */
handler = gebase + 0x2000;
- /* TLB refill */
+ /* TLB refill (or XTLB refill on 64-bit VZ where KX=1) */
refill_start = gebase;
+ if (IS_ENABLED(CONFIG_KVM_MIPS_VZ) && IS_ENABLED(CONFIG_64BIT))
+ refill_start += 0x080;
refill_end = kvm_mips_build_tlb_refill_exception(refill_start, handler);
/* General Exception Entry point */
@@ -353,9 +395,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id)
/* Init */
vcpu->arch.last_sched_cpu = -1;
-
- /* Start off the timer */
- kvm_mips_init_count(vcpu);
+ vcpu->arch.last_exec_cpu = -1;
return vcpu;
@@ -1030,9 +1070,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_IMMEDIATE_EXIT:
r = 1;
break;
- case KVM_CAP_COALESCED_MMIO:
- r = KVM_COALESCED_MMIO_PAGE_OFFSET;
- break;
case KVM_CAP_NR_VCPUS:
r = num_online_cpus();
break;
@@ -1059,7 +1096,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = cpu_has_msa && !(boot_cpu_data.msa_id & MSA_IR_WRPF);
break;
default:
- r = 0;
+ r = kvm_mips_callbacks->check_extension(kvm, ext);
break;
}
return r;
@@ -1067,7 +1104,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
{
- return kvm_mips_pending_timer(vcpu);
+ return kvm_mips_pending_timer(vcpu) ||
+ kvm_read_c0_guest_cause(vcpu->arch.cop0) & C_TI;
}
int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu)
@@ -1092,7 +1130,7 @@ int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu)
kvm_debug("\tlo: 0x%08lx\n", vcpu->arch.lo);
cop0 = vcpu->arch.cop0;
- kvm_debug("\tStatus: 0x%08lx, Cause: 0x%08lx\n",
+ kvm_debug("\tStatus: 0x%08x, Cause: 0x%08x\n",
kvm_read_c0_guest_status(cop0),
kvm_read_c0_guest_cause(cop0));
@@ -1208,7 +1246,8 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
vcpu->mode = OUTSIDE_GUEST_MODE;
/* re-enable HTW before enabling interrupts */
- htw_start();
+ if (!IS_ENABLED(CONFIG_KVM_MIPS_VZ))
+ htw_start();
/* Set a default exit reason */
run->exit_reason = KVM_EXIT_UNKNOWN;
@@ -1226,17 +1265,20 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
cause, opc, run, vcpu);
trace_kvm_exit(vcpu, exccode);
- /*
- * Do a privilege check, if in UM most of these exit conditions end up
- * causing an exception to be delivered to the Guest Kernel
- */
- er = kvm_mips_check_privilege(cause, opc, run, vcpu);
- if (er == EMULATE_PRIV_FAIL) {
- goto skip_emul;
- } else if (er == EMULATE_FAIL) {
- run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- ret = RESUME_HOST;
- goto skip_emul;
+ if (!IS_ENABLED(CONFIG_KVM_MIPS_VZ)) {
+ /*
+ * Do a privilege check, if in UM most of these exit conditions
+ * end up causing an exception to be delivered to the Guest
+ * Kernel
+ */
+ er = kvm_mips_check_privilege(cause, opc, run, vcpu);
+ if (er == EMULATE_PRIV_FAIL) {
+ goto skip_emul;
+ } else if (er == EMULATE_FAIL) {
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ ret = RESUME_HOST;
+ goto skip_emul;
+ }
}
switch (exccode) {
@@ -1267,7 +1309,7 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
break;
case EXCCODE_TLBS:
- kvm_debug("TLB ST fault: cause %#x, status %#lx, PC: %p, BadVaddr: %#lx\n",
+ kvm_debug("TLB ST fault: cause %#x, status %#x, PC: %p, BadVaddr: %#lx\n",
cause, kvm_read_c0_guest_status(vcpu->arch.cop0), opc,
badvaddr);
@@ -1328,12 +1370,17 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
ret = kvm_mips_callbacks->handle_msa_disabled(vcpu);
break;
+ case EXCCODE_GE:
+ /* defer exit accounting to handler */
+ ret = kvm_mips_callbacks->handle_guest_exit(vcpu);
+ break;
+
default:
if (cause & CAUSEF_BD)
opc += 1;
inst = 0;
kvm_get_badinstr(opc, vcpu, &inst);
- kvm_err("Exception Code: %d, not yet handled, @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#lx\n",
+ kvm_err("Exception Code: %d, not yet handled, @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#x\n",
exccode, opc, inst, badvaddr,
kvm_read_c0_guest_status(vcpu->arch.cop0));
kvm_arch_vcpu_dump_regs(vcpu);
@@ -1346,6 +1393,9 @@ int kvm_mips_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu)
skip_emul:
local_irq_disable();
+ if (ret == RESUME_GUEST)
+ kvm_vz_acquire_htimer(vcpu);
+
if (er == EMULATE_DONE && !(ret & RESUME_HOST))
kvm_mips_deliver_interrupts(vcpu, cause);
@@ -1391,7 +1441,8 @@ skip_emul:
}
/* Disable HTW before returning to guest or host */
- htw_stop();
+ if (!IS_ENABLED(CONFIG_KVM_MIPS_VZ))
+ htw_stop();
return ret;
}
@@ -1527,16 +1578,18 @@ void kvm_drop_fpu(struct kvm_vcpu *vcpu)
void kvm_lose_fpu(struct kvm_vcpu *vcpu)
{
/*
- * FPU & MSA get disabled in root context (hardware) when it is disabled
- * in guest context (software), but the register state in the hardware
- * may still be in use. This is why we explicitly re-enable the hardware
- * before saving.
+ * With T&E, FPU & MSA get disabled in root context (hardware) when it
+ * is disabled in guest context (software), but the register state in
+ * the hardware may still be in use.
+ * This is why we explicitly re-enable the hardware before saving.
*/
preempt_disable();
if (cpu_has_msa && vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA) {
- set_c0_config5(MIPS_CONF5_MSAEN);
- enable_fpu_hazard();
+ if (!IS_ENABLED(CONFIG_KVM_MIPS_VZ)) {
+ set_c0_config5(MIPS_CONF5_MSAEN);
+ enable_fpu_hazard();
+ }
__kvm_save_msa(&vcpu->arch);
trace_kvm_aux(vcpu, KVM_TRACE_AUX_SAVE, KVM_TRACE_AUX_FPU_MSA);
@@ -1549,8 +1602,10 @@ void kvm_lose_fpu(struct kvm_vcpu *vcpu)
}
vcpu->arch.aux_inuse &= ~(KVM_MIPS_AUX_FPU | KVM_MIPS_AUX_MSA);
} else if (vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU) {
- set_c0_status(ST0_CU1);
- enable_fpu_hazard();
+ if (!IS_ENABLED(CONFIG_KVM_MIPS_VZ)) {
+ set_c0_status(ST0_CU1);
+ enable_fpu_hazard();
+ }
__kvm_save_fpu(&vcpu->arch);
vcpu->arch.aux_inuse &= ~KVM_MIPS_AUX_FPU;
diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c
index cb0faad..ee64db0 100644
--- a/arch/mips/kvm/mmu.c
+++ b/arch/mips/kvm/mmu.c
@@ -992,6 +992,22 @@ static pte_t kvm_mips_gpa_pte_to_gva_mapped(pte_t pte, long entrylo)
return kvm_mips_gpa_pte_to_gva_unmapped(pte);
}
+#ifdef CONFIG_KVM_MIPS_VZ
+int kvm_mips_handle_vz_root_tlb_fault(unsigned long badvaddr,
+ struct kvm_vcpu *vcpu,
+ bool write_fault)
+{
+ int ret;
+
+ ret = kvm_mips_map_page(vcpu, badvaddr, write_fault, NULL, NULL);
+ if (ret)
+ return ret;
+
+ /* Invalidate this entry in the TLB */
+ return kvm_vz_host_tlb_inv(vcpu, badvaddr);
+}
+#endif
+
/* XXXKYMA: Must be called with interrupts disabled */
int kvm_mips_handle_kseg0_tlb_fault(unsigned long badvaddr,
struct kvm_vcpu *vcpu,
@@ -1225,6 +1241,10 @@ int kvm_get_inst(u32 *opc, struct kvm_vcpu *vcpu, u32 *out)
{
int err;
+ if (WARN(IS_ENABLED(CONFIG_KVM_MIPS_VZ),
+ "Expect BadInstr/BadInstrP registers to be used with VZ\n"))
+ return -EINVAL;
+
retry:
kvm_trap_emul_gva_lockless_begin(vcpu);
err = get_user(*out, opc);
diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c
index 2819eb7..7c6336d 100644
--- a/arch/mips/kvm/tlb.c
+++ b/arch/mips/kvm/tlb.c
@@ -33,6 +33,25 @@
#define KVM_GUEST_PC_TLB 0
#define KVM_GUEST_SP_TLB 1
+#ifdef CONFIG_KVM_MIPS_VZ
+unsigned long GUESTID_MASK;
+EXPORT_SYMBOL_GPL(GUESTID_MASK);
+unsigned long GUESTID_FIRST_VERSION;
+EXPORT_SYMBOL_GPL(GUESTID_FIRST_VERSION);
+unsigned long GUESTID_VERSION_MASK;
+EXPORT_SYMBOL_GPL(GUESTID_VERSION_MASK);
+
+static u32 kvm_mips_get_root_asid(struct kvm_vcpu *vcpu)
+{
+ struct mm_struct *gpa_mm = &vcpu->kvm->arch.gpa_mm;
+
+ if (cpu_has_guestid)
+ return 0;
+ else
+ return cpu_asid(smp_processor_id(), gpa_mm);
+}
+#endif
+
static u32 kvm_mips_get_kernel_asid(struct kvm_vcpu *vcpu)
{
struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm;
@@ -166,6 +185,13 @@ int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va,
local_irq_restore(flags);
+ /*
+ * We don't want to get reserved instruction exceptions for missing tlb
+ * entries.
+ */
+ if (cpu_has_vtag_icache)
+ flush_icache_all();
+
if (user && idx_user >= 0)
kvm_debug("%s: Invalidated guest user entryhi %#lx @ idx %d\n",
__func__, (va & VPN2_MASK) |
@@ -179,6 +205,421 @@ int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va,
}
EXPORT_SYMBOL_GPL(kvm_mips_host_tlb_inv);
+#ifdef CONFIG_KVM_MIPS_VZ
+
+/* GuestID management */
+
+/**
+ * clear_root_gid() - Set GuestCtl1.RID for normal root operation.
+ */
+static inline void clear_root_gid(void)
+{
+ if (cpu_has_guestid) {
+ clear_c0_guestctl1(MIPS_GCTL1_RID);
+ mtc0_tlbw_hazard();
+ }
+}
+
+/**
+ * set_root_gid_to_guest_gid() - Set GuestCtl1.RID to match GuestCtl1.ID.
+ *
+ * Sets the root GuestID to match the current guest GuestID, for TLB operation
+ * on the GPA->RPA mappings in the root TLB.
+ *
+ * The caller must be sure to disable HTW while the root GID is set, and
+ * possibly longer if TLB registers are modified.
+ */
+static inline void set_root_gid_to_guest_gid(void)
+{
+ unsigned int guestctl1;
+
+ if (cpu_has_guestid) {
+ back_to_back_c0_hazard();
+ guestctl1 = read_c0_guestctl1();
+ guestctl1 = (guestctl1 & ~MIPS_GCTL1_RID) |
+ ((guestctl1 & MIPS_GCTL1_ID) >> MIPS_GCTL1_ID_SHIFT)
+ << MIPS_GCTL1_RID_SHIFT;
+ write_c0_guestctl1(guestctl1);
+ mtc0_tlbw_hazard();
+ }
+}
+
+int kvm_vz_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va)
+{
+ int idx;
+ unsigned long flags, old_entryhi;
+
+ local_irq_save(flags);
+ htw_stop();
+
+ /* Set root GuestID for root probe and write of guest TLB entry */
+ set_root_gid_to_guest_gid();
+
+ old_entryhi = read_c0_entryhi();
+
+ idx = _kvm_mips_host_tlb_inv((va & VPN2_MASK) |
+ kvm_mips_get_root_asid(vcpu));
+
+ write_c0_entryhi(old_entryhi);
+ clear_root_gid();
+ mtc0_tlbw_hazard();
+
+ htw_start();
+ local_irq_restore(flags);
+
+ /*
+ * We don't want to get reserved instruction exceptions for missing tlb
+ * entries.
+ */
+ if (cpu_has_vtag_icache)
+ flush_icache_all();
+
+ if (idx > 0)
+ kvm_debug("%s: Invalidated root entryhi %#lx @ idx %d\n",
+ __func__, (va & VPN2_MASK) |
+ kvm_mips_get_root_asid(vcpu), idx);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_vz_host_tlb_inv);
+
+/**
+ * kvm_vz_guest_tlb_lookup() - Lookup a guest VZ TLB mapping.
+ * @vcpu: KVM VCPU pointer.
+ * @gpa: Guest virtual address in a TLB mapped guest segment.
+ * @gpa: Ponter to output guest physical address it maps to.
+ *
+ * Converts a guest virtual address in a guest TLB mapped segment to a guest
+ * physical address, by probing the guest TLB.
+ *
+ * Returns: 0 if guest TLB mapping exists for @gva. *@gpa will have been
+ * written.
+ * -EFAULT if no guest TLB mapping exists for @gva. *@gpa may not
+ * have been written.
+ */
+int kvm_vz_guest_tlb_lookup(struct kvm_vcpu *vcpu, unsigned long gva,
+ unsigned long *gpa)
+{
+ unsigned long o_entryhi, o_entrylo[2], o_pagemask;
+ unsigned int o_index;
+ unsigned long entrylo[2], pagemask, pagemaskbit, pa;
+ unsigned long flags;
+ int index;
+
+ /* Probe the guest TLB for a mapping */
+ local_irq_save(flags);
+ /* Set root GuestID for root probe of guest TLB entry */
+ htw_stop();
+ set_root_gid_to_guest_gid();
+
+ o_entryhi = read_gc0_entryhi();
+ o_index = read_gc0_index();
+
+ write_gc0_entryhi((o_entryhi & 0x3ff) | (gva & ~0xfffl));
+ mtc0_tlbw_hazard();
+ guest_tlb_probe();
+ tlb_probe_hazard();
+
+ index = read_gc0_index();
+ if (index < 0) {
+ /* No match, fail */
+ write_gc0_entryhi(o_entryhi);
+ write_gc0_index(o_index);
+
+ clear_root_gid();
+ htw_start();
+ local_irq_restore(flags);
+ return -EFAULT;
+ }
+
+ /* Match! read the TLB entry */
+ o_entrylo[0] = read_gc0_entrylo0();
+ o_entrylo[1] = read_gc0_entrylo1();
+ o_pagemask = read_gc0_pagemask();
+
+ mtc0_tlbr_hazard();
+ guest_tlb_read();
+ tlb_read_hazard();
+
+ entrylo[0] = read_gc0_entrylo0();
+ entrylo[1] = read_gc0_entrylo1();
+ pagemask = ~read_gc0_pagemask() & ~0x1fffl;
+
+ write_gc0_entryhi(o_entryhi);
+ write_gc0_index(o_index);
+ write_gc0_entrylo0(o_entrylo[0]);
+ write_gc0_entrylo1(o_entrylo[1]);
+ write_gc0_pagemask(o_pagemask);
+
+ clear_root_gid();
+ htw_start();
+ local_irq_restore(flags);
+
+ /* Select one of the EntryLo values and interpret the GPA */
+ pagemaskbit = (pagemask ^ (pagemask & (pagemask - 1))) >> 1;
+ pa = entrylo[!!(gva & pagemaskbit)];
+
+ /*
+ * TLB entry may have become invalid since TLB probe if physical FTLB
+ * entries are shared between threads (e.g. I6400).
+ */
+ if (!(pa & ENTRYLO_V))
+ return -EFAULT;
+
+ /*
+ * Note, this doesn't take guest MIPS32 XPA into account, where PFN is
+ * split with XI/RI in the middle.
+ */
+ pa = (pa << 6) & ~0xfffl;
+ pa |= gva & ~(pagemask | pagemaskbit);
+
+ *gpa = pa;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_vz_guest_tlb_lookup);
+
+/**
+ * kvm_vz_local_flush_roottlb_all_guests() - Flush all root TLB entries for
+ * guests.
+ *
+ * Invalidate all entries in root tlb which are GPA mappings.
+ */
+void kvm_vz_local_flush_roottlb_all_guests(void)
+{
+ unsigned long flags;
+ unsigned long old_entryhi, old_pagemask, old_guestctl1;
+ int entry;
+
+ if (WARN_ON(!cpu_has_guestid))
+ return;
+
+ local_irq_save(flags);
+ htw_stop();
+
+ /* TLBR may clobber EntryHi.ASID, PageMask, and GuestCtl1.RID */
+ old_entryhi = read_c0_entryhi();
+ old_pagemask = read_c0_pagemask();
+ old_guestctl1 = read_c0_guestctl1();
+
+ /*
+ * Invalidate guest entries in root TLB while leaving root entries
+ * intact when possible.
+ */
+ for (entry = 0; entry < current_cpu_data.tlbsize; entry++) {
+ write_c0_index(entry);
+ mtc0_tlbw_hazard();
+ tlb_read();
+ tlb_read_hazard();
+
+ /* Don't invalidate non-guest (RVA) mappings in the root TLB */
+ if (!(read_c0_guestctl1() & MIPS_GCTL1_RID))
+ continue;
+
+ /* Make sure all entries differ. */
+ write_c0_entryhi(UNIQUE_ENTRYHI(entry));
+ write_c0_entrylo0(0);
+ write_c0_entrylo1(0);
+ write_c0_guestctl1(0);
+ mtc0_tlbw_hazard();
+ tlb_write_indexed();
+ }
+
+ write_c0_entryhi(old_entryhi);
+ write_c0_pagemask(old_pagemask);
+ write_c0_guestctl1(old_guestctl1);
+ tlbw_use_hazard();
+
+ htw_start();
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(kvm_vz_local_flush_roottlb_all_guests);
+
+/**
+ * kvm_vz_local_flush_guesttlb_all() - Flush all guest TLB entries.
+ *
+ * Invalidate all entries in guest tlb irrespective of guestid.
+ */
+void kvm_vz_local_flush_guesttlb_all(void)
+{
+ unsigned long flags;
+ unsigned long old_index;
+ unsigned long old_entryhi;
+ unsigned long old_entrylo[2];
+ unsigned long old_pagemask;
+ int entry;
+ u64 cvmmemctl2 = 0;
+
+ local_irq_save(flags);
+
+ /* Preserve all clobbered guest registers */
+ old_index = read_gc0_index();
+ old_entryhi = read_gc0_entryhi();
+ old_entrylo[0] = read_gc0_entrylo0();
+ old_entrylo[1] = read_gc0_entrylo1();
+ old_pagemask = read_gc0_pagemask();
+
+ switch (current_cpu_type()) {
+ case CPU_CAVIUM_OCTEON3:
+ /* Inhibit machine check due to multiple matching TLB entries */
+ cvmmemctl2 = read_c0_cvmmemctl2();
+ cvmmemctl2 |= CVMMEMCTL2_INHIBITTS;
+ write_c0_cvmmemctl2(cvmmemctl2);
+ break;
+ };
+
+ /* Invalidate guest entries in guest TLB */
+ write_gc0_entrylo0(0);
+ write_gc0_entrylo1(0);
+ write_gc0_pagemask(0);
+ for (entry = 0; entry < current_cpu_data.guest.tlbsize; entry++) {
+ /* Make sure all entries differ. */
+ write_gc0_index(entry);
+ write_gc0_entryhi(UNIQUE_GUEST_ENTRYHI(entry));
+ mtc0_tlbw_hazard();
+ guest_tlb_write_indexed();
+ }
+
+ if (cvmmemctl2) {
+ cvmmemctl2 &= ~CVMMEMCTL2_INHIBITTS;
+ write_c0_cvmmemctl2(cvmmemctl2);
+ };
+
+ write_gc0_index(old_index);
+ write_gc0_entryhi(old_entryhi);
+ write_gc0_entrylo0(old_entrylo[0]);
+ write_gc0_entrylo1(old_entrylo[1]);
+ write_gc0_pagemask(old_pagemask);
+ tlbw_use_hazard();
+
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(kvm_vz_local_flush_guesttlb_all);
+
+/**
+ * kvm_vz_save_guesttlb() - Save a range of guest TLB entries.
+ * @buf: Buffer to write TLB entries into.
+ * @index: Start index.
+ * @count: Number of entries to save.
+ *
+ * Save a range of guest TLB entries. The caller must ensure interrupts are
+ * disabled.
+ */
+void kvm_vz_save_guesttlb(struct kvm_mips_tlb *buf, unsigned int index,
+ unsigned int count)
+{
+ unsigned int end = index + count;
+ unsigned long old_entryhi, old_entrylo0, old_entrylo1, old_pagemask;
+ unsigned int guestctl1 = 0;
+ int old_index, i;
+
+ /* Save registers we're about to clobber */
+ old_index = read_gc0_index();
+ old_entryhi = read_gc0_entryhi();
+ old_entrylo0 = read_gc0_entrylo0();
+ old_entrylo1 = read_gc0_entrylo1();
+ old_pagemask = read_gc0_pagemask();
+
+ /* Set root GuestID for root probe */
+ htw_stop();
+ set_root_gid_to_guest_gid();
+ if (cpu_has_guestid)
+ guestctl1 = read_c0_guestctl1();
+
+ /* Read each entry from guest TLB */
+ for (i = index; i < end; ++i, ++buf) {
+ write_gc0_index(i);
+
+ mtc0_tlbr_hazard();
+ guest_tlb_read();
+ tlb_read_hazard();
+
+ if (cpu_has_guestid &&
+ (read_c0_guestctl1() ^ guestctl1) & MIPS_GCTL1_RID) {
+ /* Entry invalid or belongs to another guest */
+ buf->tlb_hi = UNIQUE_GUEST_ENTRYHI(i);
+ buf->tlb_lo[0] = 0;
+ buf->tlb_lo[1] = 0;
+ buf->tlb_mask = 0;
+ } else {
+ /* Entry belongs to the right guest */
+ buf->tlb_hi = read_gc0_entryhi();
+ buf->tlb_lo[0] = read_gc0_entrylo0();
+ buf->tlb_lo[1] = read_gc0_entrylo1();
+ buf->tlb_mask = read_gc0_pagemask();
+ }
+ }
+
+ /* Clear root GuestID again */
+ clear_root_gid();
+ htw_start();
+
+ /* Restore clobbered registers */
+ write_gc0_index(old_index);
+ write_gc0_entryhi(old_entryhi);
+ write_gc0_entrylo0(old_entrylo0);
+ write_gc0_entrylo1(old_entrylo1);
+ write_gc0_pagemask(old_pagemask);
+
+ tlbw_use_hazard();
+}
+EXPORT_SYMBOL_GPL(kvm_vz_save_guesttlb);
+
+/**
+ * kvm_vz_load_guesttlb() - Save a range of guest TLB entries.
+ * @buf: Buffer to read TLB entries from.
+ * @index: Start index.
+ * @count: Number of entries to load.
+ *
+ * Load a range of guest TLB entries. The caller must ensure interrupts are
+ * disabled.
+ */
+void kvm_vz_load_guesttlb(const struct kvm_mips_tlb *buf, unsigned int index,
+ unsigned int count)
+{
+ unsigned int end = index + count;
+ unsigned long old_entryhi, old_entrylo0, old_entrylo1, old_pagemask;
+ int old_index, i;
+
+ /* Save registers we're about to clobber */
+ old_index = read_gc0_index();
+ old_entryhi = read_gc0_entryhi();
+ old_entrylo0 = read_gc0_entrylo0();
+ old_entrylo1 = read_gc0_entrylo1();
+ old_pagemask = read_gc0_pagemask();
+
+ /* Set root GuestID for root probe */
+ htw_stop();
+ set_root_gid_to_guest_gid();
+
+ /* Write each entry to guest TLB */
+ for (i = index; i < end; ++i, ++buf) {
+ write_gc0_index(i);
+ write_gc0_entryhi(buf->tlb_hi);
+ write_gc0_entrylo0(buf->tlb_lo[0]);
+ write_gc0_entrylo1(buf->tlb_lo[1]);
+ write_gc0_pagemask(buf->tlb_mask);
+
+ mtc0_tlbw_hazard();
+ guest_tlb_write_indexed();
+ }
+
+ /* Clear root GuestID again */
+ clear_root_gid();
+ htw_start();
+
+ /* Restore clobbered registers */
+ write_gc0_index(old_index);
+ write_gc0_entryhi(old_entryhi);
+ write_gc0_entrylo0(old_entrylo0);
+ write_gc0_entrylo1(old_entrylo1);
+ write_gc0_pagemask(old_pagemask);
+
+ tlbw_use_hazard();
+}
+EXPORT_SYMBOL_GPL(kvm_vz_load_guesttlb);
+
+#endif
+
/**
* kvm_mips_suspend_mm() - Suspend the active mm.
* @cpu The CPU we're running on.
diff --git a/arch/mips/kvm/trace.h b/arch/mips/kvm/trace.h
index c858cf1..a8c7fd7 100644
--- a/arch/mips/kvm/trace.h
+++ b/arch/mips/kvm/trace.h
@@ -18,6 +18,13 @@
#define TRACE_INCLUDE_FILE trace
/*
+ * arch/mips/kvm/mips.c
+ */
+extern bool kvm_trace_guest_mode_change;
+int kvm_guest_mode_change_trace_reg(void);
+void kvm_guest_mode_change_trace_unreg(void);
+
+/*
* Tracepoints for VM enters
*/
DECLARE_EVENT_CLASS(kvm_transition,
@@ -62,10 +69,20 @@ DEFINE_EVENT(kvm_transition, kvm_out,
#define KVM_TRACE_EXIT_MSA_FPE 14
#define KVM_TRACE_EXIT_FPE 15
#define KVM_TRACE_EXIT_MSA_DISABLED 21
+#define KVM_TRACE_EXIT_GUEST_EXIT 27
/* Further exit reasons */
#define KVM_TRACE_EXIT_WAIT 32
#define KVM_TRACE_EXIT_CACHE 33
#define KVM_TRACE_EXIT_SIGNAL 34
+/* 32 exit reasons correspond to GuestCtl0.GExcCode (VZ) */
+#define KVM_TRACE_EXIT_GEXCCODE_BASE 64
+#define KVM_TRACE_EXIT_GPSI 64 /* 0 */
+#define KVM_TRACE_EXIT_GSFC 65 /* 1 */
+#define KVM_TRACE_EXIT_HC 66 /* 2 */
+#define KVM_TRACE_EXIT_GRR 67 /* 3 */
+#define KVM_TRACE_EXIT_GVA 72 /* 8 */
+#define KVM_TRACE_EXIT_GHFC 73 /* 9 */
+#define KVM_TRACE_EXIT_GPA 74 /* 10 */
/* Tracepoints for VM exits */
#define kvm_trace_symbol_exit_types \
@@ -83,9 +100,17 @@ DEFINE_EVENT(kvm_transition, kvm_out,
{ KVM_TRACE_EXIT_MSA_FPE, "MSA FPE" }, \
{ KVM_TRACE_EXIT_FPE, "FPE" }, \
{ KVM_TRACE_EXIT_MSA_DISABLED, "MSA Disabled" }, \
+ { KVM_TRACE_EXIT_GUEST_EXIT, "Guest Exit" }, \
{ KVM_TRACE_EXIT_WAIT, "WAIT" }, \
{ KVM_TRACE_EXIT_CACHE, "CACHE" }, \
- { KVM_TRACE_EXIT_SIGNAL, "Signal" }
+ { KVM_TRACE_EXIT_SIGNAL, "Signal" }, \
+ { KVM_TRACE_EXIT_GPSI, "GPSI" }, \
+ { KVM_TRACE_EXIT_GSFC, "GSFC" }, \
+ { KVM_TRACE_EXIT_HC, "HC" }, \
+ { KVM_TRACE_EXIT_GRR, "GRR" }, \
+ { KVM_TRACE_EXIT_GVA, "GVA" }, \
+ { KVM_TRACE_EXIT_GHFC, "GHFC" }, \
+ { KVM_TRACE_EXIT_GPA, "GPA" }
TRACE_EVENT(kvm_exit,
TP_PROTO(struct kvm_vcpu *vcpu, unsigned int reason),
@@ -158,6 +183,8 @@ TRACE_EVENT(kvm_exit,
{ KVM_TRACE_COP0(16, 4), "Config4" }, \
{ KVM_TRACE_COP0(16, 5), "Config5" }, \
{ KVM_TRACE_COP0(16, 7), "Config7" }, \
+ { KVM_TRACE_COP0(17, 1), "MAAR" }, \
+ { KVM_TRACE_COP0(17, 2), "MAARI" }, \
{ KVM_TRACE_COP0(26, 0), "ECC" }, \
{ KVM_TRACE_COP0(30, 0), "ErrorEPC" }, \
{ KVM_TRACE_COP0(31, 2), "KScratch1" }, \
@@ -268,6 +295,51 @@ TRACE_EVENT(kvm_asid_change,
__entry->new_asid)
);
+TRACE_EVENT(kvm_guestid_change,
+ TP_PROTO(struct kvm_vcpu *vcpu, unsigned int guestid),
+ TP_ARGS(vcpu, guestid),
+ TP_STRUCT__entry(
+ __field(unsigned int, guestid)
+ ),
+
+ TP_fast_assign(
+ __entry->guestid = guestid;
+ ),
+
+ TP_printk("GuestID: 0x%02x",
+ __entry->guestid)
+);
+
+TRACE_EVENT_FN(kvm_guest_mode_change,
+ TP_PROTO(struct kvm_vcpu *vcpu),
+ TP_ARGS(vcpu),
+ TP_STRUCT__entry(
+ __field(unsigned long, epc)
+ __field(unsigned long, pc)
+ __field(unsigned long, badvaddr)
+ __field(unsigned int, status)
+ __field(unsigned int, cause)
+ ),
+
+ TP_fast_assign(
+ __entry->epc = kvm_read_c0_guest_epc(vcpu->arch.cop0);
+ __entry->pc = vcpu->arch.pc;
+ __entry->badvaddr = kvm_read_c0_guest_badvaddr(vcpu->arch.cop0);
+ __entry->status = kvm_read_c0_guest_status(vcpu->arch.cop0);
+ __entry->cause = kvm_read_c0_guest_cause(vcpu->arch.cop0);
+ ),
+
+ TP_printk("EPC: 0x%08lx PC: 0x%08lx Status: 0x%08x Cause: 0x%08x BadVAddr: 0x%08lx",
+ __entry->epc,
+ __entry->pc,
+ __entry->status,
+ __entry->cause,
+ __entry->badvaddr),
+
+ kvm_guest_mode_change_trace_reg,
+ kvm_guest_mode_change_trace_unreg
+);
+
#endif /* _TRACE_KVM_H */
/* This part must be outside protection */
diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c
index b1fa53b..a563759 100644
--- a/arch/mips/kvm/trap_emul.c
+++ b/arch/mips/kvm/trap_emul.c
@@ -12,6 +12,7 @@
#include <linux/errno.h>
#include <linux/err.h>
#include <linux/kvm_host.h>
+#include <linux/log2.h>
#include <linux/uaccess.h>
#include <linux/vmalloc.h>
#include <asm/mmu_context.h>
@@ -40,6 +41,29 @@ static gpa_t kvm_trap_emul_gva_to_gpa_cb(gva_t gva)
return gpa;
}
+static int kvm_trap_emul_no_handler(struct kvm_vcpu *vcpu)
+{
+ u32 __user *opc = (u32 __user *) vcpu->arch.pc;
+ u32 cause = vcpu->arch.host_cp0_cause;
+ u32 exccode = (cause & CAUSEF_EXCCODE) >> CAUSEB_EXCCODE;
+ unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr;
+ u32 inst = 0;
+
+ /*
+ * Fetch the instruction.
+ */
+ if (cause & CAUSEF_BD)
+ opc += 1;
+ kvm_get_badinstr(opc, vcpu, &inst);
+
+ kvm_err("Exception Code: %d not handled @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#x\n",
+ exccode, opc, inst, badvaddr,
+ kvm_read_c0_guest_status(vcpu->arch.cop0));
+ kvm_arch_vcpu_dump_regs(vcpu);
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ return RESUME_HOST;
+}
+
static int kvm_trap_emul_handle_cop_unusable(struct kvm_vcpu *vcpu)
{
struct mips_coproc *cop0 = vcpu->arch.cop0;
@@ -82,6 +106,10 @@ static int kvm_trap_emul_handle_cop_unusable(struct kvm_vcpu *vcpu)
ret = RESUME_HOST;
break;
+ case EMULATE_HYPERCALL:
+ ret = kvm_mips_handle_hypcall(vcpu);
+ break;
+
default:
BUG();
}
@@ -484,6 +512,31 @@ static int kvm_trap_emul_handle_msa_disabled(struct kvm_vcpu *vcpu)
return ret;
}
+static int kvm_trap_emul_hardware_enable(void)
+{
+ return 0;
+}
+
+static void kvm_trap_emul_hardware_disable(void)
+{
+}
+
+static int kvm_trap_emul_check_extension(struct kvm *kvm, long ext)
+{
+ int r;
+
+ switch (ext) {
+ case KVM_CAP_MIPS_TE:
+ r = 1;
+ break;
+ default:
+ r = 0;
+ break;
+ }
+
+ return r;
+}
+
static int kvm_trap_emul_vcpu_init(struct kvm_vcpu *vcpu)
{
struct mm_struct *kern_mm = &vcpu->arch.guest_kernel_mm;
@@ -561,6 +614,9 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu)
u32 config, config1;
int vcpu_id = vcpu->vcpu_id;
+ /* Start off the timer at 100 MHz */
+ kvm_mips_init_count(vcpu, 100*1000*1000);
+
/*
* Arch specific stuff, set up config registers properly so that the
* guest will come up as expected
@@ -589,6 +645,13 @@ static int kvm_trap_emul_vcpu_setup(struct kvm_vcpu *vcpu)
/* Read the cache characteristics from the host Config1 Register */
config1 = (read_c0_config1() & ~0x7f);
+ /* DCache line size not correctly reported in Config1 on Octeon CPUs */
+ if (cpu_dcache_line_size()) {
+ config1 &= ~MIPS_CONF1_DL;
+ config1 |= ((ilog2(cpu_dcache_line_size()) - 1) <<
+ MIPS_CONF1_DL_SHF) & MIPS_CONF1_DL;
+ }
+
/* Set up MMU size */
config1 &= ~(0x3f << 25);
config1 |= ((KVM_MIPS_GUEST_TLB_SIZE - 1) << 25);
@@ -892,10 +955,12 @@ static int kvm_trap_emul_set_one_reg(struct kvm_vcpu *vcpu,
if (v & CAUSEF_DC) {
/* disable timer first */
kvm_mips_count_disable_cause(vcpu);
- kvm_change_c0_guest_cause(cop0, ~CAUSEF_DC, v);
+ kvm_change_c0_guest_cause(cop0, (u32)~CAUSEF_DC,
+ v);
} else {
/* enable timer last */
- kvm_change_c0_guest_cause(cop0, ~CAUSEF_DC, v);
+ kvm_change_c0_guest_cause(cop0, (u32)~CAUSEF_DC,
+ v);
kvm_mips_count_enable_cause(vcpu);
}
} else {
@@ -1230,7 +1295,11 @@ static struct kvm_mips_callbacks kvm_trap_emul_callbacks = {
.handle_msa_fpe = kvm_trap_emul_handle_msa_fpe,
.handle_fpe = kvm_trap_emul_handle_fpe,
.handle_msa_disabled = kvm_trap_emul_handle_msa_disabled,
+ .handle_guest_exit = kvm_trap_emul_no_handler,
+ .hardware_enable = kvm_trap_emul_hardware_enable,
+ .hardware_disable = kvm_trap_emul_hardware_disable,
+ .check_extension = kvm_trap_emul_check_extension,
.vcpu_init = kvm_trap_emul_vcpu_init,
.vcpu_uninit = kvm_trap_emul_vcpu_uninit,
.vcpu_setup = kvm_trap_emul_vcpu_setup,
diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c
new file mode 100644
index 0000000..71d8856
--- /dev/null
+++ b/arch/mips/kvm/vz.c
@@ -0,0 +1,3223 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * KVM/MIPS: Support for hardware virtualization extensions
+ *
+ * Copyright (C) 2012 MIPS Technologies, Inc. All rights reserved.
+ * Authors: Yann Le Du <ledu@kymasys.com>
+ */
+
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/preempt.h>
+#include <linux/vmalloc.h>
+#include <asm/cacheflush.h>
+#include <asm/cacheops.h>
+#include <asm/cmpxchg.h>
+#include <asm/fpu.h>
+#include <asm/hazards.h>
+#include <asm/inst.h>
+#include <asm/mmu_context.h>
+#include <asm/r4kcache.h>
+#include <asm/time.h>
+#include <asm/tlb.h>
+#include <asm/tlbex.h>
+
+#include <linux/kvm_host.h>
+
+#include "interrupt.h"
+
+#include "trace.h"
+
+/* Pointers to last VCPU loaded on each physical CPU */
+static struct kvm_vcpu *last_vcpu[NR_CPUS];
+/* Pointers to last VCPU executed on each physical CPU */
+static struct kvm_vcpu *last_exec_vcpu[NR_CPUS];
+
+/*
+ * Number of guest VTLB entries to use, so we can catch inconsistency between
+ * CPUs.
+ */
+static unsigned int kvm_vz_guest_vtlb_size;
+
+static inline long kvm_vz_read_gc0_ebase(void)
+{
+ if (sizeof(long) == 8 && cpu_has_ebase_wg)
+ return read_gc0_ebase_64();
+ else
+ return read_gc0_ebase();
+}
+
+static inline void kvm_vz_write_gc0_ebase(long v)
+{
+ /*
+ * First write with WG=1 to write upper bits, then write again in case
+ * WG should be left at 0.
+ * write_gc0_ebase_64() is no longer UNDEFINED since R6.
+ */
+ if (sizeof(long) == 8 &&
+ (cpu_has_mips64r6 || cpu_has_ebase_wg)) {
+ write_gc0_ebase_64(v | MIPS_EBASE_WG);
+ write_gc0_ebase_64(v);
+ } else {
+ write_gc0_ebase(v | MIPS_EBASE_WG);
+ write_gc0_ebase(v);
+ }
+}
+
+/*
+ * These Config bits may be writable by the guest:
+ * Config: [K23, KU] (!TLB), K0
+ * Config1: (none)
+ * Config2: [TU, SU] (impl)
+ * Config3: ISAOnExc
+ * Config4: FTLBPageSize
+ * Config5: K, CV, MSAEn, UFE, FRE, SBRI, UFR
+ */
+
+static inline unsigned int kvm_vz_config_guest_wrmask(struct kvm_vcpu *vcpu)
+{
+ return CONF_CM_CMASK;
+}
+
+static inline unsigned int kvm_vz_config1_guest_wrmask(struct kvm_vcpu *vcpu)
+{
+ return 0;
+}
+
+static inline unsigned int kvm_vz_config2_guest_wrmask(struct kvm_vcpu *vcpu)
+{
+ return 0;
+}
+
+static inline unsigned int kvm_vz_config3_guest_wrmask(struct kvm_vcpu *vcpu)
+{
+ return MIPS_CONF3_ISA_OE;
+}
+
+static inline unsigned int kvm_vz_config4_guest_wrmask(struct kvm_vcpu *vcpu)
+{
+ /* no need to be exact */
+ return MIPS_CONF4_VFTLBPAGESIZE;
+}
+
+static inline unsigned int kvm_vz_config5_guest_wrmask(struct kvm_vcpu *vcpu)
+{
+ unsigned int mask = MIPS_CONF5_K | MIPS_CONF5_CV | MIPS_CONF5_SBRI;
+
+ /* Permit MSAEn changes if MSA supported and enabled */
+ if (kvm_mips_guest_has_msa(&vcpu->arch))
+ mask |= MIPS_CONF5_MSAEN;
+
+ /*
+ * Permit guest FPU mode changes if FPU is enabled and the relevant
+ * feature exists according to FIR register.
+ */
+ if (kvm_mips_guest_has_fpu(&vcpu->arch)) {
+ if (cpu_has_ufr)
+ mask |= MIPS_CONF5_UFR;
+ if (cpu_has_fre)
+ mask |= MIPS_CONF5_FRE | MIPS_CONF5_UFE;
+ }
+
+ return mask;
+}
+
+/*
+ * VZ optionally allows these additional Config bits to be written by root:
+ * Config: M, [MT]
+ * Config1: M, [MMUSize-1, C2, MD, PC, WR, CA], FP
+ * Config2: M
+ * Config3: M, MSAP, [BPG], ULRI, [DSP2P, DSPP], CTXTC, [ITL, LPA, VEIC,
+ * VInt, SP, CDMM, MT, SM, TL]
+ * Config4: M, [VTLBSizeExt, MMUSizeExt]
+ * Config5: MRP
+ */
+
+static inline unsigned int kvm_vz_config_user_wrmask(struct kvm_vcpu *vcpu)
+{
+ return kvm_vz_config_guest_wrmask(vcpu) | MIPS_CONF_M;
+}
+
+static inline unsigned int kvm_vz_config1_user_wrmask(struct kvm_vcpu *vcpu)
+{
+ unsigned int mask = kvm_vz_config1_guest_wrmask(vcpu) | MIPS_CONF_M;
+
+ /* Permit FPU to be present if FPU is supported */
+ if (kvm_mips_guest_can_have_fpu(&vcpu->arch))
+ mask |= MIPS_CONF1_FP;
+
+ return mask;
+}
+
+static inline unsigned int kvm_vz_config2_user_wrmask(struct kvm_vcpu *vcpu)
+{
+ return kvm_vz_config2_guest_wrmask(vcpu) | MIPS_CONF_M;
+}
+
+static inline unsigned int kvm_vz_config3_user_wrmask(struct kvm_vcpu *vcpu)
+{
+ unsigned int mask = kvm_vz_config3_guest_wrmask(vcpu) | MIPS_CONF_M |
+ MIPS_CONF3_ULRI | MIPS_CONF3_CTXTC;
+
+ /* Permit MSA to be present if MSA is supported */
+ if (kvm_mips_guest_can_have_msa(&vcpu->arch))
+ mask |= MIPS_CONF3_MSA;
+
+ return mask;
+}
+
+static inline unsigned int kvm_vz_config4_user_wrmask(struct kvm_vcpu *vcpu)
+{
+ return kvm_vz_config4_guest_wrmask(vcpu) | MIPS_CONF_M;
+}
+
+static inline unsigned int kvm_vz_config5_user_wrmask(struct kvm_vcpu *vcpu)
+{
+ return kvm_vz_config5_guest_wrmask(vcpu) | MIPS_CONF5_MRP;
+}
+
+static gpa_t kvm_vz_gva_to_gpa_cb(gva_t gva)
+{
+ /* VZ guest has already converted gva to gpa */
+ return gva;
+}
+
+static void kvm_vz_queue_irq(struct kvm_vcpu *vcpu, unsigned int priority)
+{
+ set_bit(priority, &vcpu->arch.pending_exceptions);
+ clear_bit(priority, &vcpu->arch.pending_exceptions_clr);
+}
+
+static void kvm_vz_dequeue_irq(struct kvm_vcpu *vcpu, unsigned int priority)
+{
+ clear_bit(priority, &vcpu->arch.pending_exceptions);
+ set_bit(priority, &vcpu->arch.pending_exceptions_clr);
+}
+
+static void kvm_vz_queue_timer_int_cb(struct kvm_vcpu *vcpu)
+{
+ /*
+ * timer expiry is asynchronous to vcpu execution therefore defer guest
+ * cp0 accesses
+ */
+ kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_TIMER);
+}
+
+static void kvm_vz_dequeue_timer_int_cb(struct kvm_vcpu *vcpu)
+{
+ /*
+ * timer expiry is asynchronous to vcpu execution therefore defer guest
+ * cp0 accesses
+ */
+ kvm_vz_dequeue_irq(vcpu, MIPS_EXC_INT_TIMER);
+}
+
+static void kvm_vz_queue_io_int_cb(struct kvm_vcpu *vcpu,
+ struct kvm_mips_interrupt *irq)
+{
+ int intr = (int)irq->irq;
+
+ /*
+ * interrupts are asynchronous to vcpu execution therefore defer guest
+ * cp0 accesses
+ */
+ switch (intr) {
+ case 2:
+ kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_IO);
+ break;
+
+ case 3:
+ kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_IPI_1);
+ break;
+
+ case 4:
+ kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_IPI_2);
+ break;
+
+ default:
+ break;
+ }
+
+}
+
+static void kvm_vz_dequeue_io_int_cb(struct kvm_vcpu *vcpu,
+ struct kvm_mips_interrupt *irq)
+{
+ int intr = (int)irq->irq;
+
+ /*
+ * interrupts are asynchronous to vcpu execution therefore defer guest
+ * cp0 accesses
+ */
+ switch (intr) {
+ case -2:
+ kvm_vz_dequeue_irq(vcpu, MIPS_EXC_INT_IO);
+ break;
+
+ case -3:
+ kvm_vz_dequeue_irq(vcpu, MIPS_EXC_INT_IPI_1);
+ break;
+
+ case -4:
+ kvm_vz_dequeue_irq(vcpu, MIPS_EXC_INT_IPI_2);
+ break;
+
+ default:
+ break;
+ }
+
+}
+
+static u32 kvm_vz_priority_to_irq[MIPS_EXC_MAX] = {
+ [MIPS_EXC_INT_TIMER] = C_IRQ5,
+ [MIPS_EXC_INT_IO] = C_IRQ0,
+ [MIPS_EXC_INT_IPI_1] = C_IRQ1,
+ [MIPS_EXC_INT_IPI_2] = C_IRQ2,
+};
+
+static int kvm_vz_irq_deliver_cb(struct kvm_vcpu *vcpu, unsigned int priority,
+ u32 cause)
+{
+ u32 irq = (priority < MIPS_EXC_MAX) ?
+ kvm_vz_priority_to_irq[priority] : 0;
+
+ switch (priority) {
+ case MIPS_EXC_INT_TIMER:
+ set_gc0_cause(C_TI);
+ break;
+
+ case MIPS_EXC_INT_IO:
+ case MIPS_EXC_INT_IPI_1:
+ case MIPS_EXC_INT_IPI_2:
+ if (cpu_has_guestctl2)
+ set_c0_guestctl2(irq);
+ else
+ set_gc0_cause(irq);
+ break;
+
+ default:
+ break;
+ }
+
+ clear_bit(priority, &vcpu->arch.pending_exceptions);
+ return 1;
+}
+
+static int kvm_vz_irq_clear_cb(struct kvm_vcpu *vcpu, unsigned int priority,
+ u32 cause)
+{
+ u32 irq = (priority < MIPS_EXC_MAX) ?
+ kvm_vz_priority_to_irq[priority] : 0;
+
+ switch (priority) {
+ case MIPS_EXC_INT_TIMER:
+ /*
+ * Call to kvm_write_c0_guest_compare() clears Cause.TI in
+ * kvm_mips_emulate_CP0(). Explicitly clear irq associated with
+ * Cause.IP[IPTI] if GuestCtl2 virtual interrupt register not
+ * supported or if not using GuestCtl2 Hardware Clear.
+ */
+ if (cpu_has_guestctl2) {
+ if (!(read_c0_guestctl2() & (irq << 14)))
+ clear_c0_guestctl2(irq);
+ } else {
+ clear_gc0_cause(irq);
+ }
+ break;
+
+ case MIPS_EXC_INT_IO:
+ case MIPS_EXC_INT_IPI_1:
+ case MIPS_EXC_INT_IPI_2:
+ /* Clear GuestCtl2.VIP irq if not using Hardware Clear */
+ if (cpu_has_guestctl2) {
+ if (!(read_c0_guestctl2() & (irq << 14)))
+ clear_c0_guestctl2(irq);
+ } else {
+ clear_gc0_cause(irq);
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ clear_bit(priority, &vcpu->arch.pending_exceptions_clr);
+ return 1;
+}
+
+/*
+ * VZ guest timer handling.
+ */
+
+/**
+ * kvm_vz_should_use_htimer() - Find whether to use the VZ hard guest timer.
+ * @vcpu: Virtual CPU.
+ *
+ * Returns: true if the VZ GTOffset & real guest CP0_Count should be used
+ * instead of software emulation of guest timer.
+ * false otherwise.
+ */
+static bool kvm_vz_should_use_htimer(struct kvm_vcpu *vcpu)
+{
+ if (kvm_mips_count_disabled(vcpu))
+ return false;
+
+ /* Chosen frequency must match real frequency */
+ if (mips_hpt_frequency != vcpu->arch.count_hz)
+ return false;
+
+ /* We don't support a CP0_GTOffset with fewer bits than CP0_Count */
+ if (current_cpu_data.gtoffset_mask != 0xffffffff)
+ return false;
+
+ return true;
+}
+
+/**
+ * _kvm_vz_restore_stimer() - Restore soft timer state.
+ * @vcpu: Virtual CPU.
+ * @compare: CP0_Compare register value, restored by caller.
+ * @cause: CP0_Cause register to restore.
+ *
+ * Restore VZ state relating to the soft timer. The hard timer can be enabled
+ * later.
+ */
+static void _kvm_vz_restore_stimer(struct kvm_vcpu *vcpu, u32 compare,
+ u32 cause)
+{
+ /*
+ * Avoid spurious counter interrupts by setting Guest CP0_Count to just
+ * after Guest CP0_Compare.
+ */
+ write_c0_gtoffset(compare - read_c0_count());
+
+ back_to_back_c0_hazard();
+ write_gc0_cause(cause);
+}
+
+/**
+ * _kvm_vz_restore_htimer() - Restore hard timer state.
+ * @vcpu: Virtual CPU.
+ * @compare: CP0_Compare register value, restored by caller.
+ * @cause: CP0_Cause register to restore.
+ *
+ * Restore hard timer Guest.Count & Guest.Cause taking care to preserve the
+ * value of Guest.CP0_Cause.TI while restoring Guest.CP0_Cause.
+ */
+static void _kvm_vz_restore_htimer(struct kvm_vcpu *vcpu,
+ u32 compare, u32 cause)
+{
+ u32 start_count, after_count;
+ ktime_t freeze_time;
+ unsigned long flags;
+
+ /*
+ * Freeze the soft-timer and sync the guest CP0_Count with it. We do
+ * this with interrupts disabled to avoid latency.
+ */
+ local_irq_save(flags);
+ freeze_time = kvm_mips_freeze_hrtimer(vcpu, &start_count);
+ write_c0_gtoffset(start_count - read_c0_count());
+ local_irq_restore(flags);
+
+ /* restore guest CP0_Cause, as TI may already be set */
+ back_to_back_c0_hazard();
+ write_gc0_cause(cause);
+
+ /*
+ * The above sequence isn't atomic and would result in lost timer
+ * interrupts if we're not careful. Detect if a timer interrupt is due
+ * and assert it.
+ */
+ back_to_back_c0_hazard();
+ after_count = read_gc0_count();
+ if (after_count - start_count > compare - start_count - 1)
+ kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_TIMER);
+}
+
+/**
+ * kvm_vz_restore_timer() - Restore timer state.
+ * @vcpu: Virtual CPU.
+ *
+ * Restore soft timer state from saved context.
+ */
+static void kvm_vz_restore_timer(struct kvm_vcpu *vcpu)
+{
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+ u32 cause, compare;
+
+ compare = kvm_read_sw_gc0_compare(cop0);
+ cause = kvm_read_sw_gc0_cause(cop0);
+
+ write_gc0_compare(compare);
+ _kvm_vz_restore_stimer(vcpu, compare, cause);
+}
+
+/**
+ * kvm_vz_acquire_htimer() - Switch to hard timer state.
+ * @vcpu: Virtual CPU.
+ *
+ * Restore hard timer state on top of existing soft timer state if possible.
+ *
+ * Since hard timer won't remain active over preemption, preemption should be
+ * disabled by the caller.
+ */
+void kvm_vz_acquire_htimer(struct kvm_vcpu *vcpu)
+{
+ u32 gctl0;
+
+ gctl0 = read_c0_guestctl0();
+ if (!(gctl0 & MIPS_GCTL0_GT) && kvm_vz_should_use_htimer(vcpu)) {
+ /* enable guest access to hard timer */
+ write_c0_guestctl0(gctl0 | MIPS_GCTL0_GT);
+
+ _kvm_vz_restore_htimer(vcpu, read_gc0_compare(),
+ read_gc0_cause());
+ }
+}
+
+/**
+ * _kvm_vz_save_htimer() - Switch to software emulation of guest timer.
+ * @vcpu: Virtual CPU.
+ * @compare: Pointer to write compare value to.
+ * @cause: Pointer to write cause value to.
+ *
+ * Save VZ guest timer state and switch to software emulation of guest CP0
+ * timer. The hard timer must already be in use, so preemption should be
+ * disabled.
+ */
+static void _kvm_vz_save_htimer(struct kvm_vcpu *vcpu,
+ u32 *out_compare, u32 *out_cause)
+{
+ u32 cause, compare, before_count, end_count;
+ ktime_t before_time;
+
+ compare = read_gc0_compare();
+ *out_compare = compare;
+
+ before_time = ktime_get();
+
+ /*
+ * Record the CP0_Count *prior* to saving CP0_Cause, so we have a time
+ * at which no pending timer interrupt is missing.
+ */
+ before_count = read_gc0_count();
+ back_to_back_c0_hazard();
+ cause = read_gc0_cause();
+ *out_cause = cause;
+
+ /*
+ * Record a final CP0_Count which we will transfer to the soft-timer.
+ * This is recorded *after* saving CP0_Cause, so we don't get any timer
+ * interrupts from just after the final CP0_Count point.
+ */
+ back_to_back_c0_hazard();
+ end_count = read_gc0_count();
+
+ /*
+ * The above sequence isn't atomic, so we could miss a timer interrupt
+ * between reading CP0_Cause and end_count. Detect and record any timer
+ * interrupt due between before_count and end_count.
+ */
+ if (end_count - before_count > compare - before_count - 1)
+ kvm_vz_queue_irq(vcpu, MIPS_EXC_INT_TIMER);
+
+ /*
+ * Restore soft-timer, ignoring a small amount of negative drift due to
+ * delay between freeze_hrtimer and setting CP0_GTOffset.
+ */
+ kvm_mips_restore_hrtimer(vcpu, before_time, end_count, -0x10000);
+}
+
+/**
+ * kvm_vz_save_timer() - Save guest timer state.
+ * @vcpu: Virtual CPU.
+ *
+ * Save VZ guest timer state and switch to soft guest timer if hard timer was in
+ * use.
+ */
+static void kvm_vz_save_timer(struct kvm_vcpu *vcpu)
+{
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+ u32 gctl0, compare, cause;
+
+ gctl0 = read_c0_guestctl0();
+ if (gctl0 & MIPS_GCTL0_GT) {
+ /* disable guest use of hard timer */
+ write_c0_guestctl0(gctl0 & ~MIPS_GCTL0_GT);
+
+ /* save hard timer state */
+ _kvm_vz_save_htimer(vcpu, &compare, &cause);
+ } else {
+ compare = read_gc0_compare();
+ cause = read_gc0_cause();
+ }
+
+ /* save timer-related state to VCPU context */
+ kvm_write_sw_gc0_cause(cop0, cause);
+ kvm_write_sw_gc0_compare(cop0, compare);
+}
+
+/**
+ * kvm_vz_lose_htimer() - Ensure hard guest timer is not in use.
+ * @vcpu: Virtual CPU.
+ *
+ * Transfers the state of the hard guest timer to the soft guest timer, leaving
+ * guest state intact so it can continue to be used with the soft timer.
+ */
+void kvm_vz_lose_htimer(struct kvm_vcpu *vcpu)
+{
+ u32 gctl0, compare, cause;
+
+ preempt_disable();
+ gctl0 = read_c0_guestctl0();
+ if (gctl0 & MIPS_GCTL0_GT) {
+ /* disable guest use of timer */
+ write_c0_guestctl0(gctl0 & ~MIPS_GCTL0_GT);
+
+ /* switch to soft timer */
+ _kvm_vz_save_htimer(vcpu, &compare, &cause);
+
+ /* leave soft timer in usable state */
+ _kvm_vz_restore_stimer(vcpu, compare, cause);
+ }
+ preempt_enable();
+}
+
+/**
+ * is_eva_access() - Find whether an instruction is an EVA memory accessor.
+ * @inst: 32-bit instruction encoding.
+ *
+ * Finds whether @inst encodes an EVA memory access instruction, which would
+ * indicate that emulation of it should access the user mode address space
+ * instead of the kernel mode address space. This matters for MUSUK segments
+ * which are TLB mapped for user mode but unmapped for kernel mode.
+ *
+ * Returns: Whether @inst encodes an EVA accessor instruction.
+ */
+static bool is_eva_access(union mips_instruction inst)
+{
+ if (inst.spec3_format.opcode != spec3_op)
+ return false;
+
+ switch (inst.spec3_format.func) {
+ case lwle_op:
+ case lwre_op:
+ case cachee_op:
+ case sbe_op:
+ case she_op:
+ case sce_op:
+ case swe_op:
+ case swle_op:
+ case swre_op:
+ case prefe_op:
+ case lbue_op:
+ case lhue_op:
+ case lbe_op:
+ case lhe_op:
+ case lle_op:
+ case lwe_op:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/**
+ * is_eva_am_mapped() - Find whether an access mode is mapped.
+ * @vcpu: KVM VCPU state.
+ * @am: 3-bit encoded access mode.
+ * @eu: Segment becomes unmapped and uncached when Status.ERL=1.
+ *
+ * Decode @am to find whether it encodes a mapped segment for the current VCPU
+ * state. Where necessary @eu and the actual instruction causing the fault are
+ * taken into account to make the decision.
+ *
+ * Returns: Whether the VCPU faulted on a TLB mapped address.
+ */
+static bool is_eva_am_mapped(struct kvm_vcpu *vcpu, unsigned int am, bool eu)
+{
+ u32 am_lookup;
+ int err;
+
+ /*
+ * Interpret access control mode. We assume address errors will already
+ * have been caught by the guest, leaving us with:
+ * AM UM SM KM 31..24 23..16
+ * UK 0 000 Unm 0 0
+ * MK 1 001 TLB 1
+ * MSK 2 010 TLB TLB 1
+ * MUSK 3 011 TLB TLB TLB 1
+ * MUSUK 4 100 TLB TLB Unm 0 1
+ * USK 5 101 Unm Unm 0 0
+ * - 6 110 0 0
+ * UUSK 7 111 Unm Unm Unm 0 0
+ *
+ * We shift a magic value by AM across the sign bit to find if always
+ * TLB mapped, and if not shift by 8 again to find if it depends on KM.
+ */
+ am_lookup = 0x70080000 << am;
+ if ((s32)am_lookup < 0) {
+ /*
+ * MK, MSK, MUSK
+ * Always TLB mapped, unless SegCtl.EU && ERL
+ */
+ if (!eu || !(read_gc0_status() & ST0_ERL))
+ return true;
+ } else {
+ am_lookup <<= 8;
+ if ((s32)am_lookup < 0) {
+ union mips_instruction inst;
+ unsigned int status;
+ u32 *opc;
+
+ /*
+ * MUSUK
+ * TLB mapped if not in kernel mode
+ */
+ status = read_gc0_status();
+ if (!(status & (ST0_EXL | ST0_ERL)) &&
+ (status & ST0_KSU))
+ return true;
+ /*
+ * EVA access instructions in kernel
+ * mode access user address space.
+ */
+ opc = (u32 *)vcpu->arch.pc;
+ if (vcpu->arch.host_cp0_cause & CAUSEF_BD)
+ opc += 1;
+ err = kvm_get_badinstr(opc, vcpu, &inst.word);
+ if (!err && is_eva_access(inst))
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/**
+ * kvm_vz_gva_to_gpa() - Convert valid GVA to GPA.
+ * @vcpu: KVM VCPU state.
+ * @gva: Guest virtual address to convert.
+ * @gpa: Output guest physical address.
+ *
+ * Convert a guest virtual address (GVA) which is valid according to the guest
+ * context, to a guest physical address (GPA).
+ *
+ * Returns: 0 on success.
+ * -errno on failure.
+ */
+static int kvm_vz_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
+ unsigned long *gpa)
+{
+ u32 gva32 = gva;
+ unsigned long segctl;
+
+ if ((long)gva == (s32)gva32) {
+ /* Handle canonical 32-bit virtual address */
+ if (cpu_guest_has_segments) {
+ unsigned long mask, pa;
+
+ switch (gva32 >> 29) {
+ case 0:
+ case 1: /* CFG5 (1GB) */
+ segctl = read_gc0_segctl2() >> 16;
+ mask = (unsigned long)0xfc0000000ull;
+ break;
+ case 2:
+ case 3: /* CFG4 (1GB) */
+ segctl = read_gc0_segctl2();
+ mask = (unsigned long)0xfc0000000ull;
+ break;
+ case 4: /* CFG3 (512MB) */
+ segctl = read_gc0_segctl1() >> 16;
+ mask = (unsigned long)0xfe0000000ull;
+ break;
+ case 5: /* CFG2 (512MB) */
+ segctl = read_gc0_segctl1();
+ mask = (unsigned long)0xfe0000000ull;
+ break;
+ case 6: /* CFG1 (512MB) */
+ segctl = read_gc0_segctl0() >> 16;
+ mask = (unsigned long)0xfe0000000ull;
+ break;
+ case 7: /* CFG0 (512MB) */
+ segctl = read_gc0_segctl0();
+ mask = (unsigned long)0xfe0000000ull;
+ break;
+ default:
+ /*
+ * GCC 4.9 isn't smart enough to figure out that
+ * segctl and mask are always initialised.
+ */
+ unreachable();
+ }
+
+ if (is_eva_am_mapped(vcpu, (segctl >> 4) & 0x7,
+ segctl & 0x0008))
+ goto tlb_mapped;
+
+ /* Unmapped, find guest physical address */
+ pa = (segctl << 20) & mask;
+ pa |= gva32 & ~mask;
+ *gpa = pa;
+ return 0;
+ } else if ((s32)gva32 < (s32)0xc0000000) {
+ /* legacy unmapped KSeg0 or KSeg1 */
+ *gpa = gva32 & 0x1fffffff;
+ return 0;
+ }
+#ifdef CONFIG_64BIT
+ } else if ((gva & 0xc000000000000000) == 0x8000000000000000) {
+ /* XKPHYS */
+ if (cpu_guest_has_segments) {
+ /*
+ * Each of the 8 regions can be overridden by SegCtl2.XR
+ * to use SegCtl1.XAM.
+ */
+ segctl = read_gc0_segctl2();
+ if (segctl & (1ull << (56 + ((gva >> 59) & 0x7)))) {
+ segctl = read_gc0_segctl1();
+ if (is_eva_am_mapped(vcpu, (segctl >> 59) & 0x7,
+ 0))
+ goto tlb_mapped;
+ }
+
+ }
+ /*
+ * Traditionally fully unmapped.
+ * Bits 61:59 specify the CCA, which we can just mask off here.
+ * Bits 58:PABITS should be zero, but we shouldn't have got here
+ * if it wasn't.
+ */
+ *gpa = gva & 0x07ffffffffffffff;
+ return 0;
+#endif
+ }
+
+tlb_mapped:
+ return kvm_vz_guest_tlb_lookup(vcpu, gva, gpa);
+}
+
+/**
+ * kvm_vz_badvaddr_to_gpa() - Convert GVA BadVAddr from root exception to GPA.
+ * @vcpu: KVM VCPU state.
+ * @badvaddr: Root BadVAddr.
+ * @gpa: Output guest physical address.
+ *
+ * VZ implementations are permitted to report guest virtual addresses (GVA) in
+ * BadVAddr on a root exception during guest execution, instead of the more
+ * convenient guest physical addresses (GPA). When we get a GVA, this function
+ * converts it to a GPA, taking into account guest segmentation and guest TLB
+ * state.
+ *
+ * Returns: 0 on success.
+ * -errno on failure.
+ */
+static int kvm_vz_badvaddr_to_gpa(struct kvm_vcpu *vcpu, unsigned long badvaddr,
+ unsigned long *gpa)
+{
+ unsigned int gexccode = (vcpu->arch.host_cp0_guestctl0 &
+ MIPS_GCTL0_GEXC) >> MIPS_GCTL0_GEXC_SHIFT;
+
+ /* If BadVAddr is GPA, then all is well in the world */
+ if (likely(gexccode == MIPS_GCTL0_GEXC_GPA)) {
+ *gpa = badvaddr;
+ return 0;
+ }
+
+ /* Otherwise we'd expect it to be GVA ... */
+ if (WARN(gexccode != MIPS_GCTL0_GEXC_GVA,
+ "Unexpected gexccode %#x\n", gexccode))
+ return -EINVAL;
+
+ /* ... and we need to perform the GVA->GPA translation in software */
+ return kvm_vz_gva_to_gpa(vcpu, badvaddr, gpa);
+}
+
+static int kvm_trap_vz_no_handler(struct kvm_vcpu *vcpu)
+{
+ u32 *opc = (u32 *) vcpu->arch.pc;
+ u32 cause = vcpu->arch.host_cp0_cause;
+ u32 exccode = (cause & CAUSEF_EXCCODE) >> CAUSEB_EXCCODE;
+ unsigned long badvaddr = vcpu->arch.host_cp0_badvaddr;
+ u32 inst = 0;
+
+ /*
+ * Fetch the instruction.
+ */
+ if (cause & CAUSEF_BD)
+ opc += 1;
+ kvm_get_badinstr(opc, vcpu, &inst);
+
+ kvm_err("Exception Code: %d not handled @ PC: %p, inst: 0x%08x BadVaddr: %#lx Status: %#x\n",
+ exccode, opc, inst, badvaddr,
+ read_gc0_status());
+ kvm_arch_vcpu_dump_regs(vcpu);
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ return RESUME_HOST;
+}
+
+static unsigned long mips_process_maar(unsigned int op, unsigned long val)
+{
+ /* Mask off unused bits */
+ unsigned long mask = 0xfffff000 | MIPS_MAAR_S | MIPS_MAAR_VL;
+
+ if (read_gc0_pagegrain() & PG_ELPA)
+ mask |= 0x00ffffff00000000ull;
+ if (cpu_guest_has_mvh)
+ mask |= MIPS_MAAR_VH;
+
+ /* Set or clear VH */
+ if (op == mtc_op) {
+ /* clear VH */
+ val &= ~MIPS_MAAR_VH;
+ } else if (op == dmtc_op) {
+ /* set VH to match VL */
+ val &= ~MIPS_MAAR_VH;
+ if (val & MIPS_MAAR_VL)
+ val |= MIPS_MAAR_VH;
+ }
+
+ return val & mask;
+}
+
+static void kvm_write_maari(struct kvm_vcpu *vcpu, unsigned long val)
+{
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+
+ val &= MIPS_MAARI_INDEX;
+ if (val == MIPS_MAARI_INDEX)
+ kvm_write_sw_gc0_maari(cop0, ARRAY_SIZE(vcpu->arch.maar) - 1);
+ else if (val < ARRAY_SIZE(vcpu->arch.maar))
+ kvm_write_sw_gc0_maari(cop0, val);
+}
+
+static enum emulation_result kvm_vz_gpsi_cop0(union mips_instruction inst,
+ u32 *opc, u32 cause,
+ struct kvm_run *run,
+ struct kvm_vcpu *vcpu)
+{
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+ enum emulation_result er = EMULATE_DONE;
+ u32 rt, rd, sel;
+ unsigned long curr_pc;
+ unsigned long val;
+
+ /*
+ * Update PC and hold onto current PC in case there is
+ * an error and we want to rollback the PC
+ */
+ curr_pc = vcpu->arch.pc;
+ er = update_pc(vcpu, cause);
+ if (er == EMULATE_FAIL)
+ return er;
+
+ if (inst.co_format.co) {
+ switch (inst.co_format.func) {
+ case wait_op:
+ er = kvm_mips_emul_wait(vcpu);
+ break;
+ default:
+ er = EMULATE_FAIL;
+ }
+ } else {
+ rt = inst.c0r_format.rt;
+ rd = inst.c0r_format.rd;
+ sel = inst.c0r_format.sel;
+
+ switch (inst.c0r_format.rs) {
+ case dmfc_op:
+ case mfc_op:
+#ifdef CONFIG_KVM_MIPS_DEBUG_COP0_COUNTERS
+ cop0->stat[rd][sel]++;
+#endif
+ if (rd == MIPS_CP0_COUNT &&
+ sel == 0) { /* Count */
+ val = kvm_mips_read_count(vcpu);
+ } else if (rd == MIPS_CP0_COMPARE &&
+ sel == 0) { /* Compare */
+ val = read_gc0_compare();
+ } else if (rd == MIPS_CP0_LLADDR &&
+ sel == 0) { /* LLAddr */
+ if (cpu_guest_has_rw_llb)
+ val = read_gc0_lladdr() &
+ MIPS_LLADDR_LLB;
+ else
+ val = 0;
+ } else if (rd == MIPS_CP0_LLADDR &&
+ sel == 1 && /* MAAR */
+ cpu_guest_has_maar &&
+ !cpu_guest_has_dyn_maar) {
+ /* MAARI must be in range */
+ BUG_ON(kvm_read_sw_gc0_maari(cop0) >=
+ ARRAY_SIZE(vcpu->arch.maar));
+ val = vcpu->arch.maar[
+ kvm_read_sw_gc0_maari(cop0)];
+ } else if ((rd == MIPS_CP0_PRID &&
+ (sel == 0 || /* PRid */
+ sel == 2 || /* CDMMBase */
+ sel == 3)) || /* CMGCRBase */
+ (rd == MIPS_CP0_STATUS &&
+ (sel == 2 || /* SRSCtl */
+ sel == 3)) || /* SRSMap */
+ (rd == MIPS_CP0_CONFIG &&
+ (sel == 7)) || /* Config7 */
+ (rd == MIPS_CP0_LLADDR &&
+ (sel == 2) && /* MAARI */
+ cpu_guest_has_maar &&
+ !cpu_guest_has_dyn_maar) ||
+ (rd == MIPS_CP0_ERRCTL &&
+ (sel == 0))) { /* ErrCtl */
+ val = cop0->reg[rd][sel];
+ } else {
+ val = 0;
+ er = EMULATE_FAIL;
+ }
+
+ if (er != EMULATE_FAIL) {
+ /* Sign extend */
+ if (inst.c0r_format.rs == mfc_op)
+ val = (int)val;
+ vcpu->arch.gprs[rt] = val;
+ }
+
+ trace_kvm_hwr(vcpu, (inst.c0r_format.rs == mfc_op) ?
+ KVM_TRACE_MFC0 : KVM_TRACE_DMFC0,
+ KVM_TRACE_COP0(rd, sel), val);
+ break;
+
+ case dmtc_op:
+ case mtc_op:
+#ifdef CONFIG_KVM_MIPS_DEBUG_COP0_COUNTERS
+ cop0->stat[rd][sel]++;
+#endif
+ val = vcpu->arch.gprs[rt];
+ trace_kvm_hwr(vcpu, (inst.c0r_format.rs == mtc_op) ?
+ KVM_TRACE_MTC0 : KVM_TRACE_DMTC0,
+ KVM_TRACE_COP0(rd, sel), val);
+
+ if (rd == MIPS_CP0_COUNT &&
+ sel == 0) { /* Count */
+ kvm_vz_lose_htimer(vcpu);
+ kvm_mips_write_count(vcpu, vcpu->arch.gprs[rt]);
+ } else if (rd == MIPS_CP0_COMPARE &&
+ sel == 0) { /* Compare */
+ kvm_mips_write_compare(vcpu,
+ vcpu->arch.gprs[rt],
+ true);
+ } else if (rd == MIPS_CP0_LLADDR &&
+ sel == 0) { /* LLAddr */
+ /*
+ * P5600 generates GPSI on guest MTC0 LLAddr.
+ * Only allow the guest to clear LLB.
+ */
+ if (cpu_guest_has_rw_llb &&
+ !(val & MIPS_LLADDR_LLB))
+ write_gc0_lladdr(0);
+ } else if (rd == MIPS_CP0_LLADDR &&
+ sel == 1 && /* MAAR */
+ cpu_guest_has_maar &&
+ !cpu_guest_has_dyn_maar) {
+ val = mips_process_maar(inst.c0r_format.rs,
+ val);
+
+ /* MAARI must be in range */
+ BUG_ON(kvm_read_sw_gc0_maari(cop0) >=
+ ARRAY_SIZE(vcpu->arch.maar));
+ vcpu->arch.maar[kvm_read_sw_gc0_maari(cop0)] =
+ val;
+ } else if (rd == MIPS_CP0_LLADDR &&
+ (sel == 2) && /* MAARI */
+ cpu_guest_has_maar &&
+ !cpu_guest_has_dyn_maar) {
+ kvm_write_maari(vcpu, val);
+ } else if (rd == MIPS_CP0_ERRCTL &&
+ (sel == 0)) { /* ErrCtl */
+ /* ignore the written value */
+ } else {
+ er = EMULATE_FAIL;
+ }
+ break;
+
+ default:
+ er = EMULATE_FAIL;
+ break;
+ }
+ }
+ /* Rollback PC only if emulation was unsuccessful */
+ if (er == EMULATE_FAIL) {
+ kvm_err("[%#lx]%s: unsupported cop0 instruction 0x%08x\n",
+ curr_pc, __func__, inst.word);
+
+ vcpu->arch.pc = curr_pc;
+ }
+
+ return er;
+}
+
+static enum emulation_result kvm_vz_gpsi_cache(union mips_instruction inst,
+ u32 *opc, u32 cause,
+ struct kvm_run *run,
+ struct kvm_vcpu *vcpu)
+{
+ enum emulation_result er = EMULATE_DONE;
+ u32 cache, op_inst, op, base;
+ s16 offset;
+ struct kvm_vcpu_arch *arch = &vcpu->arch;
+ unsigned long va, curr_pc;
+
+ /*
+ * Update PC and hold onto current PC in case there is
+ * an error and we want to rollback the PC
+ */
+ curr_pc = vcpu->arch.pc;
+ er = update_pc(vcpu, cause);
+ if (er == EMULATE_FAIL)
+ return er;
+
+ base = inst.i_format.rs;
+ op_inst = inst.i_format.rt;
+ if (cpu_has_mips_r6)
+ offset = inst.spec3_format.simmediate;
+ else
+ offset = inst.i_format.simmediate;
+ cache = op_inst & CacheOp_Cache;
+ op = op_inst & CacheOp_Op;
+
+ va = arch->gprs[base] + offset;
+
+ kvm_debug("CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n",
+ cache, op, base, arch->gprs[base], offset);
+
+ /* Secondary or tirtiary cache ops ignored */
+ if (cache != Cache_I && cache != Cache_D)
+ return EMULATE_DONE;
+
+ switch (op_inst) {
+ case Index_Invalidate_I:
+ flush_icache_line_indexed(va);
+ return EMULATE_DONE;
+ case Index_Writeback_Inv_D:
+ flush_dcache_line_indexed(va);
+ return EMULATE_DONE;
+ case Hit_Invalidate_I:
+ case Hit_Invalidate_D:
+ case Hit_Writeback_Inv_D:
+ if (boot_cpu_type() == CPU_CAVIUM_OCTEON3) {
+ /* We can just flush entire icache */
+ local_flush_icache_range(0, 0);
+ return EMULATE_DONE;
+ }
+
+ /* So far, other platforms support guest hit cache ops */
+ break;
+ default:
+ break;
+ };
+
+ kvm_err("@ %#lx/%#lx CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n",
+ curr_pc, vcpu->arch.gprs[31], cache, op, base, arch->gprs[base],
+ offset);
+ /* Rollback PC */
+ vcpu->arch.pc = curr_pc;
+
+ return EMULATE_FAIL;
+}
+
+static enum emulation_result kvm_trap_vz_handle_gpsi(u32 cause, u32 *opc,
+ struct kvm_vcpu *vcpu)
+{
+ enum emulation_result er = EMULATE_DONE;
+ struct kvm_vcpu_arch *arch = &vcpu->arch;
+ struct kvm_run *run = vcpu->run;
+ union mips_instruction inst;
+ int rd, rt, sel;
+ int err;
+
+ /*
+ * Fetch the instruction.
+ */
+ if (cause & CAUSEF_BD)
+ opc += 1;
+ err = kvm_get_badinstr(opc, vcpu, &inst.word);
+ if (err)
+ return EMULATE_FAIL;
+
+ switch (inst.r_format.opcode) {
+ case cop0_op:
+ er = kvm_vz_gpsi_cop0(inst, opc, cause, run, vcpu);
+ break;
+#ifndef CONFIG_CPU_MIPSR6
+ case cache_op:
+ trace_kvm_exit(vcpu, KVM_TRACE_EXIT_CACHE);
+ er = kvm_vz_gpsi_cache(inst, opc, cause, run, vcpu);
+ break;
+#endif
+ case spec3_op:
+ switch (inst.spec3_format.func) {
+#ifdef CONFIG_CPU_MIPSR6
+ case cache6_op:
+ trace_kvm_exit(vcpu, KVM_TRACE_EXIT_CACHE);
+ er = kvm_vz_gpsi_cache(inst, opc, cause, run, vcpu);
+ break;
+#endif
+ case rdhwr_op:
+ if (inst.r_format.rs || (inst.r_format.re >> 3))
+ goto unknown;
+
+ rd = inst.r_format.rd;
+ rt = inst.r_format.rt;
+ sel = inst.r_format.re & 0x7;
+
+ switch (rd) {
+ case MIPS_HWR_CC: /* Read count register */
+ arch->gprs[rt] =
+ (long)(int)kvm_mips_read_count(vcpu);
+ break;
+ default:
+ trace_kvm_hwr(vcpu, KVM_TRACE_RDHWR,
+ KVM_TRACE_HWR(rd, sel), 0);
+ goto unknown;
+ };
+
+ trace_kvm_hwr(vcpu, KVM_TRACE_RDHWR,
+ KVM_TRACE_HWR(rd, sel), arch->gprs[rt]);
+
+ er = update_pc(vcpu, cause);
+ break;
+ default:
+ goto unknown;
+ };
+ break;
+unknown:
+
+ default:
+ kvm_err("GPSI exception not supported (%p/%#x)\n",
+ opc, inst.word);
+ kvm_arch_vcpu_dump_regs(vcpu);
+ er = EMULATE_FAIL;
+ break;
+ }
+
+ return er;
+}
+
+static enum emulation_result kvm_trap_vz_handle_gsfc(u32 cause, u32 *opc,
+ struct kvm_vcpu *vcpu)
+{
+ enum emulation_result er = EMULATE_DONE;
+ struct kvm_vcpu_arch *arch = &vcpu->arch;
+ union mips_instruction inst;
+ int err;
+
+ /*
+ * Fetch the instruction.
+ */
+ if (cause & CAUSEF_BD)
+ opc += 1;
+ err = kvm_get_badinstr(opc, vcpu, &inst.word);
+ if (err)
+ return EMULATE_FAIL;
+
+ /* complete MTC0 on behalf of guest and advance EPC */
+ if (inst.c0r_format.opcode == cop0_op &&
+ inst.c0r_format.rs == mtc_op &&
+ inst.c0r_format.z == 0) {
+ int rt = inst.c0r_format.rt;
+ int rd = inst.c0r_format.rd;
+ int sel = inst.c0r_format.sel;
+ unsigned int val = arch->gprs[rt];
+ unsigned int old_val, change;
+
+ trace_kvm_hwr(vcpu, KVM_TRACE_MTC0, KVM_TRACE_COP0(rd, sel),
+ val);
+
+ if ((rd == MIPS_CP0_STATUS) && (sel == 0)) {
+ /* FR bit should read as zero if no FPU */
+ if (!kvm_mips_guest_has_fpu(&vcpu->arch))
+ val &= ~(ST0_CU1 | ST0_FR);
+
+ /*
+ * Also don't allow FR to be set if host doesn't support
+ * it.
+ */
+ if (!(boot_cpu_data.fpu_id & MIPS_FPIR_F64))
+ val &= ~ST0_FR;
+
+ old_val = read_gc0_status();
+ change = val ^ old_val;
+
+ if (change & ST0_FR) {
+ /*
+ * FPU and Vector register state is made
+ * UNPREDICTABLE by a change of FR, so don't
+ * even bother saving it.
+ */
+ kvm_drop_fpu(vcpu);
+ }
+
+ /*
+ * If MSA state is already live, it is undefined how it
+ * interacts with FR=0 FPU state, and we don't want to
+ * hit reserved instruction exceptions trying to save
+ * the MSA state later when CU=1 && FR=1, so play it
+ * safe and save it first.
+ */
+ if (change & ST0_CU1 && !(val & ST0_FR) &&
+ vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA)
+ kvm_lose_fpu(vcpu);
+
+ write_gc0_status(val);
+ } else if ((rd == MIPS_CP0_CAUSE) && (sel == 0)) {
+ u32 old_cause = read_gc0_cause();
+ u32 change = old_cause ^ val;
+
+ /* DC bit enabling/disabling timer? */
+ if (change & CAUSEF_DC) {
+ if (val & CAUSEF_DC) {
+ kvm_vz_lose_htimer(vcpu);
+ kvm_mips_count_disable_cause(vcpu);
+ } else {
+ kvm_mips_count_enable_cause(vcpu);
+ }
+ }
+
+ /* Only certain bits are RW to the guest */
+ change &= (CAUSEF_DC | CAUSEF_IV | CAUSEF_WP |
+ CAUSEF_IP0 | CAUSEF_IP1);
+
+ /* WP can only be cleared */
+ change &= ~CAUSEF_WP | old_cause;
+
+ write_gc0_cause(old_cause ^ change);
+ } else if ((rd == MIPS_CP0_STATUS) && (sel == 1)) { /* IntCtl */
+ write_gc0_intctl(val);
+ } else if ((rd == MIPS_CP0_CONFIG) && (sel == 5)) {
+ old_val = read_gc0_config5();
+ change = val ^ old_val;
+ /* Handle changes in FPU/MSA modes */
+ preempt_disable();
+
+ /*
+ * Propagate FRE changes immediately if the FPU
+ * context is already loaded.
+ */
+ if (change & MIPS_CONF5_FRE &&
+ vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU)
+ change_c0_config5(MIPS_CONF5_FRE, val);
+
+ preempt_enable();
+
+ val = old_val ^
+ (change & kvm_vz_config5_guest_wrmask(vcpu));
+ write_gc0_config5(val);
+ } else {
+ kvm_err("Handle GSFC, unsupported field change @ %p: %#x\n",
+ opc, inst.word);
+ er = EMULATE_FAIL;
+ }
+
+ if (er != EMULATE_FAIL)
+ er = update_pc(vcpu, cause);
+ } else {
+ kvm_err("Handle GSFC, unrecognized instruction @ %p: %#x\n",
+ opc, inst.word);
+ er = EMULATE_FAIL;
+ }
+
+ return er;
+}
+
+static enum emulation_result kvm_trap_vz_handle_ghfc(u32 cause, u32 *opc,
+ struct kvm_vcpu *vcpu)
+{
+ /*
+ * Presumably this is due to MC (guest mode change), so lets trace some
+ * relevant info.
+ */
+ trace_kvm_guest_mode_change(vcpu);
+
+ return EMULATE_DONE;
+}
+
+static enum emulation_result kvm_trap_vz_handle_hc(u32 cause, u32 *opc,
+ struct kvm_vcpu *vcpu)
+{
+ enum emulation_result er;
+ union mips_instruction inst;
+ unsigned long curr_pc;
+ int err;
+
+ if (cause & CAUSEF_BD)
+ opc += 1;
+ err = kvm_get_badinstr(opc, vcpu, &inst.word);
+ if (err)
+ return EMULATE_FAIL;
+
+ /*
+ * Update PC and hold onto current PC in case there is
+ * an error and we want to rollback the PC
+ */
+ curr_pc = vcpu->arch.pc;
+ er = update_pc(vcpu, cause);
+ if (er == EMULATE_FAIL)
+ return er;
+
+ er = kvm_mips_emul_hypcall(vcpu, inst);
+ if (er == EMULATE_FAIL)
+ vcpu->arch.pc = curr_pc;
+
+ return er;
+}
+
+static enum emulation_result kvm_trap_vz_no_handler_guest_exit(u32 gexccode,
+ u32 cause,
+ u32 *opc,
+ struct kvm_vcpu *vcpu)
+{
+ u32 inst;
+
+ /*
+ * Fetch the instruction.
+ */
+ if (cause & CAUSEF_BD)
+ opc += 1;
+ kvm_get_badinstr(opc, vcpu, &inst);
+
+ kvm_err("Guest Exception Code: %d not yet handled @ PC: %p, inst: 0x%08x Status: %#x\n",
+ gexccode, opc, inst, read_gc0_status());
+
+ return EMULATE_FAIL;
+}
+
+static int kvm_trap_vz_handle_guest_exit(struct kvm_vcpu *vcpu)
+{
+ u32 *opc = (u32 *) vcpu->arch.pc;
+ u32 cause = vcpu->arch.host_cp0_cause;
+ enum emulation_result er = EMULATE_DONE;
+ u32 gexccode = (vcpu->arch.host_cp0_guestctl0 &
+ MIPS_GCTL0_GEXC) >> MIPS_GCTL0_GEXC_SHIFT;
+ int ret = RESUME_GUEST;
+
+ trace_kvm_exit(vcpu, KVM_TRACE_EXIT_GEXCCODE_BASE + gexccode);
+ switch (gexccode) {
+ case MIPS_GCTL0_GEXC_GPSI:
+ ++vcpu->stat.vz_gpsi_exits;
+ er = kvm_trap_vz_handle_gpsi(cause, opc, vcpu);
+ break;
+ case MIPS_GCTL0_GEXC_GSFC:
+ ++vcpu->stat.vz_gsfc_exits;
+ er = kvm_trap_vz_handle_gsfc(cause, opc, vcpu);
+ break;
+ case MIPS_GCTL0_GEXC_HC:
+ ++vcpu->stat.vz_hc_exits;
+ er = kvm_trap_vz_handle_hc(cause, opc, vcpu);
+ break;
+ case MIPS_GCTL0_GEXC_GRR:
+ ++vcpu->stat.vz_grr_exits;
+ er = kvm_trap_vz_no_handler_guest_exit(gexccode, cause, opc,
+ vcpu);
+ break;
+ case MIPS_GCTL0_GEXC_GVA:
+ ++vcpu->stat.vz_gva_exits;
+ er = kvm_trap_vz_no_handler_guest_exit(gexccode, cause, opc,
+ vcpu);
+ break;
+ case MIPS_GCTL0_GEXC_GHFC:
+ ++vcpu->stat.vz_ghfc_exits;
+ er = kvm_trap_vz_handle_ghfc(cause, opc, vcpu);
+ break;
+ case MIPS_GCTL0_GEXC_GPA:
+ ++vcpu->stat.vz_gpa_exits;
+ er = kvm_trap_vz_no_handler_guest_exit(gexccode, cause, opc,
+ vcpu);
+ break;
+ default:
+ ++vcpu->stat.vz_resvd_exits;
+ er = kvm_trap_vz_no_handler_guest_exit(gexccode, cause, opc,
+ vcpu);
+ break;
+
+ }
+
+ if (er == EMULATE_DONE) {
+ ret = RESUME_GUEST;
+ } else if (er == EMULATE_HYPERCALL) {
+ ret = kvm_mips_handle_hypcall(vcpu);
+ } else {
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ ret = RESUME_HOST;
+ }
+ return ret;
+}
+
+/**
+ * kvm_trap_vz_handle_cop_unusuable() - Guest used unusable coprocessor.
+ * @vcpu: Virtual CPU context.
+ *
+ * Handle when the guest attempts to use a coprocessor which hasn't been allowed
+ * by the root context.
+ */
+static int kvm_trap_vz_handle_cop_unusable(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ u32 cause = vcpu->arch.host_cp0_cause;
+ enum emulation_result er = EMULATE_FAIL;
+ int ret = RESUME_GUEST;
+
+ if (((cause & CAUSEF_CE) >> CAUSEB_CE) == 1) {
+ /*
+ * If guest FPU not present, the FPU operation should have been
+ * treated as a reserved instruction!
+ * If FPU already in use, we shouldn't get this at all.
+ */
+ if (WARN_ON(!kvm_mips_guest_has_fpu(&vcpu->arch) ||
+ vcpu->arch.aux_inuse & KVM_MIPS_AUX_FPU)) {
+ preempt_enable();
+ return EMULATE_FAIL;
+ }
+
+ kvm_own_fpu(vcpu);
+ er = EMULATE_DONE;
+ }
+ /* other coprocessors not handled */
+
+ switch (er) {
+ case EMULATE_DONE:
+ ret = RESUME_GUEST;
+ break;
+
+ case EMULATE_FAIL:
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ ret = RESUME_HOST;
+ break;
+
+ default:
+ BUG();
+ }
+ return ret;
+}
+
+/**
+ * kvm_trap_vz_handle_msa_disabled() - Guest used MSA while disabled in root.
+ * @vcpu: Virtual CPU context.
+ *
+ * Handle when the guest attempts to use MSA when it is disabled in the root
+ * context.
+ */
+static int kvm_trap_vz_handle_msa_disabled(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+
+ /*
+ * If MSA not present or not exposed to guest or FR=0, the MSA operation
+ * should have been treated as a reserved instruction!
+ * Same if CU1=1, FR=0.
+ * If MSA already in use, we shouldn't get this at all.
+ */
+ if (!kvm_mips_guest_has_msa(&vcpu->arch) ||
+ (read_gc0_status() & (ST0_CU1 | ST0_FR)) == ST0_CU1 ||
+ !(read_gc0_config5() & MIPS_CONF5_MSAEN) ||
+ vcpu->arch.aux_inuse & KVM_MIPS_AUX_MSA) {
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ return RESUME_HOST;
+ }
+
+ kvm_own_msa(vcpu);
+
+ return RESUME_GUEST;
+}
+
+static int kvm_trap_vz_handle_tlb_ld_miss(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ u32 *opc = (u32 *) vcpu->arch.pc;
+ u32 cause = vcpu->arch.host_cp0_cause;
+ ulong badvaddr = vcpu->arch.host_cp0_badvaddr;
+ union mips_instruction inst;
+ enum emulation_result er = EMULATE_DONE;
+ int err, ret = RESUME_GUEST;
+
+ if (kvm_mips_handle_vz_root_tlb_fault(badvaddr, vcpu, false)) {
+ /* A code fetch fault doesn't count as an MMIO */
+ if (kvm_is_ifetch_fault(&vcpu->arch)) {
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ return RESUME_HOST;
+ }
+
+ /* Fetch the instruction */
+ if (cause & CAUSEF_BD)
+ opc += 1;
+ err = kvm_get_badinstr(opc, vcpu, &inst.word);
+ if (err) {
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ return RESUME_HOST;
+ }
+
+ /* Treat as MMIO */
+ er = kvm_mips_emulate_load(inst, cause, run, vcpu);
+ if (er == EMULATE_FAIL) {
+ kvm_err("Guest Emulate Load from MMIO space failed: PC: %p, BadVaddr: %#lx\n",
+ opc, badvaddr);
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ }
+ }
+
+ if (er == EMULATE_DONE) {
+ ret = RESUME_GUEST;
+ } else if (er == EMULATE_DO_MMIO) {
+ run->exit_reason = KVM_EXIT_MMIO;
+ ret = RESUME_HOST;
+ } else {
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ ret = RESUME_HOST;
+ }
+ return ret;
+}
+
+static int kvm_trap_vz_handle_tlb_st_miss(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ u32 *opc = (u32 *) vcpu->arch.pc;
+ u32 cause = vcpu->arch.host_cp0_cause;
+ ulong badvaddr = vcpu->arch.host_cp0_badvaddr;
+ union mips_instruction inst;
+ enum emulation_result er = EMULATE_DONE;
+ int err;
+ int ret = RESUME_GUEST;
+
+ /* Just try the access again if we couldn't do the translation */
+ if (kvm_vz_badvaddr_to_gpa(vcpu, badvaddr, &badvaddr))
+ return RESUME_GUEST;
+ vcpu->arch.host_cp0_badvaddr = badvaddr;
+
+ if (kvm_mips_handle_vz_root_tlb_fault(badvaddr, vcpu, true)) {
+ /* Fetch the instruction */
+ if (cause & CAUSEF_BD)
+ opc += 1;
+ err = kvm_get_badinstr(opc, vcpu, &inst.word);
+ if (err) {
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ return RESUME_HOST;
+ }
+
+ /* Treat as MMIO */
+ er = kvm_mips_emulate_store(inst, cause, run, vcpu);
+ if (er == EMULATE_FAIL) {
+ kvm_err("Guest Emulate Store to MMIO space failed: PC: %p, BadVaddr: %#lx\n",
+ opc, badvaddr);
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ }
+ }
+
+ if (er == EMULATE_DONE) {
+ ret = RESUME_GUEST;
+ } else if (er == EMULATE_DO_MMIO) {
+ run->exit_reason = KVM_EXIT_MMIO;
+ ret = RESUME_HOST;
+ } else {
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ ret = RESUME_HOST;
+ }
+ return ret;
+}
+
+static u64 kvm_vz_get_one_regs[] = {
+ KVM_REG_MIPS_CP0_INDEX,
+ KVM_REG_MIPS_CP0_ENTRYLO0,
+ KVM_REG_MIPS_CP0_ENTRYLO1,
+ KVM_REG_MIPS_CP0_CONTEXT,
+ KVM_REG_MIPS_CP0_PAGEMASK,
+ KVM_REG_MIPS_CP0_PAGEGRAIN,
+ KVM_REG_MIPS_CP0_WIRED,
+ KVM_REG_MIPS_CP0_HWRENA,
+ KVM_REG_MIPS_CP0_BADVADDR,
+ KVM_REG_MIPS_CP0_COUNT,
+ KVM_REG_MIPS_CP0_ENTRYHI,
+ KVM_REG_MIPS_CP0_COMPARE,
+ KVM_REG_MIPS_CP0_STATUS,
+ KVM_REG_MIPS_CP0_INTCTL,
+ KVM_REG_MIPS_CP0_CAUSE,
+ KVM_REG_MIPS_CP0_EPC,
+ KVM_REG_MIPS_CP0_PRID,
+ KVM_REG_MIPS_CP0_EBASE,
+ KVM_REG_MIPS_CP0_CONFIG,
+ KVM_REG_MIPS_CP0_CONFIG1,
+ KVM_REG_MIPS_CP0_CONFIG2,
+ KVM_REG_MIPS_CP0_CONFIG3,
+ KVM_REG_MIPS_CP0_CONFIG4,
+ KVM_REG_MIPS_CP0_CONFIG5,
+#ifdef CONFIG_64BIT
+ KVM_REG_MIPS_CP0_XCONTEXT,
+#endif
+ KVM_REG_MIPS_CP0_ERROREPC,
+
+ KVM_REG_MIPS_COUNT_CTL,
+ KVM_REG_MIPS_COUNT_RESUME,
+ KVM_REG_MIPS_COUNT_HZ,
+};
+
+static u64 kvm_vz_get_one_regs_contextconfig[] = {
+ KVM_REG_MIPS_CP0_CONTEXTCONFIG,
+#ifdef CONFIG_64BIT
+ KVM_REG_MIPS_CP0_XCONTEXTCONFIG,
+#endif
+};
+
+static u64 kvm_vz_get_one_regs_segments[] = {
+ KVM_REG_MIPS_CP0_SEGCTL0,
+ KVM_REG_MIPS_CP0_SEGCTL1,
+ KVM_REG_MIPS_CP0_SEGCTL2,
+};
+
+static u64 kvm_vz_get_one_regs_htw[] = {
+ KVM_REG_MIPS_CP0_PWBASE,
+ KVM_REG_MIPS_CP0_PWFIELD,
+ KVM_REG_MIPS_CP0_PWSIZE,
+ KVM_REG_MIPS_CP0_PWCTL,
+};
+
+static u64 kvm_vz_get_one_regs_kscratch[] = {
+ KVM_REG_MIPS_CP0_KSCRATCH1,
+ KVM_REG_MIPS_CP0_KSCRATCH2,
+ KVM_REG_MIPS_CP0_KSCRATCH3,
+ KVM_REG_MIPS_CP0_KSCRATCH4,
+ KVM_REG_MIPS_CP0_KSCRATCH5,
+ KVM_REG_MIPS_CP0_KSCRATCH6,
+};
+
+static unsigned long kvm_vz_num_regs(struct kvm_vcpu *vcpu)
+{
+ unsigned long ret;
+
+ ret = ARRAY_SIZE(kvm_vz_get_one_regs);
+ if (cpu_guest_has_userlocal)
+ ++ret;
+ if (cpu_guest_has_badinstr)
+ ++ret;
+ if (cpu_guest_has_badinstrp)
+ ++ret;
+ if (cpu_guest_has_contextconfig)
+ ret += ARRAY_SIZE(kvm_vz_get_one_regs_contextconfig);
+ if (cpu_guest_has_segments)
+ ret += ARRAY_SIZE(kvm_vz_get_one_regs_segments);
+ if (cpu_guest_has_htw)
+ ret += ARRAY_SIZE(kvm_vz_get_one_regs_htw);
+ if (cpu_guest_has_maar && !cpu_guest_has_dyn_maar)
+ ret += 1 + ARRAY_SIZE(vcpu->arch.maar);
+ ret += __arch_hweight8(cpu_data[0].guest.kscratch_mask);
+
+ return ret;
+}
+
+static int kvm_vz_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices)
+{
+ u64 index;
+ unsigned int i;
+
+ if (copy_to_user(indices, kvm_vz_get_one_regs,
+ sizeof(kvm_vz_get_one_regs)))
+ return -EFAULT;
+ indices += ARRAY_SIZE(kvm_vz_get_one_regs);
+
+ if (cpu_guest_has_userlocal) {
+ index = KVM_REG_MIPS_CP0_USERLOCAL;
+ if (copy_to_user(indices, &index, sizeof(index)))
+ return -EFAULT;
+ ++indices;
+ }
+ if (cpu_guest_has_badinstr) {
+ index = KVM_REG_MIPS_CP0_BADINSTR;
+ if (copy_to_user(indices, &index, sizeof(index)))
+ return -EFAULT;
+ ++indices;
+ }
+ if (cpu_guest_has_badinstrp) {
+ index = KVM_REG_MIPS_CP0_BADINSTRP;
+ if (copy_to_user(indices, &index, sizeof(index)))
+ return -EFAULT;
+ ++indices;
+ }
+ if (cpu_guest_has_contextconfig) {
+ if (copy_to_user(indices, kvm_vz_get_one_regs_contextconfig,
+ sizeof(kvm_vz_get_one_regs_contextconfig)))
+ return -EFAULT;
+ indices += ARRAY_SIZE(kvm_vz_get_one_regs_contextconfig);
+ }
+ if (cpu_guest_has_segments) {
+ if (copy_to_user(indices, kvm_vz_get_one_regs_segments,
+ sizeof(kvm_vz_get_one_regs_segments)))
+ return -EFAULT;
+ indices += ARRAY_SIZE(kvm_vz_get_one_regs_segments);
+ }
+ if (cpu_guest_has_htw) {
+ if (copy_to_user(indices, kvm_vz_get_one_regs_htw,
+ sizeof(kvm_vz_get_one_regs_htw)))
+ return -EFAULT;
+ indices += ARRAY_SIZE(kvm_vz_get_one_regs_htw);
+ }
+ if (cpu_guest_has_maar && !cpu_guest_has_dyn_maar) {
+ for (i = 0; i < ARRAY_SIZE(vcpu->arch.maar); ++i) {
+ index = KVM_REG_MIPS_CP0_MAAR(i);
+ if (copy_to_user(indices, &index, sizeof(index)))
+ return -EFAULT;
+ ++indices;
+ }
+
+ index = KVM_REG_MIPS_CP0_MAARI;
+ if (copy_to_user(indices, &index, sizeof(index)))
+ return -EFAULT;
+ ++indices;
+ }
+ for (i = 0; i < 6; ++i) {
+ if (!cpu_guest_has_kscr(i + 2))
+ continue;
+
+ if (copy_to_user(indices, &kvm_vz_get_one_regs_kscratch[i],
+ sizeof(kvm_vz_get_one_regs_kscratch[i])))
+ return -EFAULT;
+ ++indices;
+ }
+
+ return 0;
+}
+
+static inline s64 entrylo_kvm_to_user(unsigned long v)
+{
+ s64 mask, ret = v;
+
+ if (BITS_PER_LONG == 32) {
+ /*
+ * KVM API exposes 64-bit version of the register, so move the
+ * RI/XI bits up into place.
+ */
+ mask = MIPS_ENTRYLO_RI | MIPS_ENTRYLO_XI;
+ ret &= ~mask;
+ ret |= ((s64)v & mask) << 32;
+ }
+ return ret;
+}
+
+static inline unsigned long entrylo_user_to_kvm(s64 v)
+{
+ unsigned long mask, ret = v;
+
+ if (BITS_PER_LONG == 32) {
+ /*
+ * KVM API exposes 64-bit versiono of the register, so move the
+ * RI/XI bits down into place.
+ */
+ mask = MIPS_ENTRYLO_RI | MIPS_ENTRYLO_XI;
+ ret &= ~mask;
+ ret |= (v >> 32) & mask;
+ }
+ return ret;
+}
+
+static int kvm_vz_get_one_reg(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg,
+ s64 *v)
+{
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+ unsigned int idx;
+
+ switch (reg->id) {
+ case KVM_REG_MIPS_CP0_INDEX:
+ *v = (long)read_gc0_index();
+ break;
+ case KVM_REG_MIPS_CP0_ENTRYLO0:
+ *v = entrylo_kvm_to_user(read_gc0_entrylo0());
+ break;
+ case KVM_REG_MIPS_CP0_ENTRYLO1:
+ *v = entrylo_kvm_to_user(read_gc0_entrylo1());
+ break;
+ case KVM_REG_MIPS_CP0_CONTEXT:
+ *v = (long)read_gc0_context();
+ break;
+ case KVM_REG_MIPS_CP0_CONTEXTCONFIG:
+ if (!cpu_guest_has_contextconfig)
+ return -EINVAL;
+ *v = read_gc0_contextconfig();
+ break;
+ case KVM_REG_MIPS_CP0_USERLOCAL:
+ if (!cpu_guest_has_userlocal)
+ return -EINVAL;
+ *v = read_gc0_userlocal();
+ break;
+#ifdef CONFIG_64BIT
+ case KVM_REG_MIPS_CP0_XCONTEXTCONFIG:
+ if (!cpu_guest_has_contextconfig)
+ return -EINVAL;
+ *v = read_gc0_xcontextconfig();
+ break;
+#endif
+ case KVM_REG_MIPS_CP0_PAGEMASK:
+ *v = (long)read_gc0_pagemask();
+ break;
+ case KVM_REG_MIPS_CP0_PAGEGRAIN:
+ *v = (long)read_gc0_pagegrain();
+ break;
+ case KVM_REG_MIPS_CP0_SEGCTL0:
+ if (!cpu_guest_has_segments)
+ return -EINVAL;
+ *v = read_gc0_segctl0();
+ break;
+ case KVM_REG_MIPS_CP0_SEGCTL1:
+ if (!cpu_guest_has_segments)
+ return -EINVAL;
+ *v = read_gc0_segctl1();
+ break;
+ case KVM_REG_MIPS_CP0_SEGCTL2:
+ if (!cpu_guest_has_segments)
+ return -EINVAL;
+ *v = read_gc0_segctl2();
+ break;
+ case KVM_REG_MIPS_CP0_PWBASE:
+ if (!cpu_guest_has_htw)
+ return -EINVAL;
+ *v = read_gc0_pwbase();
+ break;
+ case KVM_REG_MIPS_CP0_PWFIELD:
+ if (!cpu_guest_has_htw)
+ return -EINVAL;
+ *v = read_gc0_pwfield();
+ break;
+ case KVM_REG_MIPS_CP0_PWSIZE:
+ if (!cpu_guest_has_htw)
+ return -EINVAL;
+ *v = read_gc0_pwsize();
+ break;
+ case KVM_REG_MIPS_CP0_WIRED:
+ *v = (long)read_gc0_wired();
+ break;
+ case KVM_REG_MIPS_CP0_PWCTL:
+ if (!cpu_guest_has_htw)
+ return -EINVAL;
+ *v = read_gc0_pwctl();
+ break;
+ case KVM_REG_MIPS_CP0_HWRENA:
+ *v = (long)read_gc0_hwrena();
+ break;
+ case KVM_REG_MIPS_CP0_BADVADDR:
+ *v = (long)read_gc0_badvaddr();
+ break;
+ case KVM_REG_MIPS_CP0_BADINSTR:
+ if (!cpu_guest_has_badinstr)
+ return -EINVAL;
+ *v = read_gc0_badinstr();
+ break;
+ case KVM_REG_MIPS_CP0_BADINSTRP:
+ if (!cpu_guest_has_badinstrp)
+ return -EINVAL;
+ *v = read_gc0_badinstrp();
+ break;
+ case KVM_REG_MIPS_CP0_COUNT:
+ *v = kvm_mips_read_count(vcpu);
+ break;
+ case KVM_REG_MIPS_CP0_ENTRYHI:
+ *v = (long)read_gc0_entryhi();
+ break;
+ case KVM_REG_MIPS_CP0_COMPARE:
+ *v = (long)read_gc0_compare();
+ break;
+ case KVM_REG_MIPS_CP0_STATUS:
+ *v = (long)read_gc0_status();
+ break;
+ case KVM_REG_MIPS_CP0_INTCTL:
+ *v = read_gc0_intctl();
+ break;
+ case KVM_REG_MIPS_CP0_CAUSE:
+ *v = (long)read_gc0_cause();
+ break;
+ case KVM_REG_MIPS_CP0_EPC:
+ *v = (long)read_gc0_epc();
+ break;
+ case KVM_REG_MIPS_CP0_PRID:
+ switch (boot_cpu_type()) {
+ case CPU_CAVIUM_OCTEON3:
+ /* Octeon III has a read-only guest.PRid */
+ *v = read_gc0_prid();
+ break;
+ default:
+ *v = (long)kvm_read_c0_guest_prid(cop0);
+ break;
+ };
+ break;
+ case KVM_REG_MIPS_CP0_EBASE:
+ *v = kvm_vz_read_gc0_ebase();
+ break;
+ case KVM_REG_MIPS_CP0_CONFIG:
+ *v = read_gc0_config();
+ break;
+ case KVM_REG_MIPS_CP0_CONFIG1:
+ if (!cpu_guest_has_conf1)
+ return -EINVAL;
+ *v = read_gc0_config1();
+ break;
+ case KVM_REG_MIPS_CP0_CONFIG2:
+ if (!cpu_guest_has_conf2)
+ return -EINVAL;
+ *v = read_gc0_config2();
+ break;
+ case KVM_REG_MIPS_CP0_CONFIG3:
+ if (!cpu_guest_has_conf3)
+ return -EINVAL;
+ *v = read_gc0_config3();
+ break;
+ case KVM_REG_MIPS_CP0_CONFIG4:
+ if (!cpu_guest_has_conf4)
+ return -EINVAL;
+ *v = read_gc0_config4();
+ break;
+ case KVM_REG_MIPS_CP0_CONFIG5:
+ if (!cpu_guest_has_conf5)
+ return -EINVAL;
+ *v = read_gc0_config5();
+ break;
+ case KVM_REG_MIPS_CP0_MAAR(0) ... KVM_REG_MIPS_CP0_MAAR(0x3f):
+ if (!cpu_guest_has_maar || cpu_guest_has_dyn_maar)
+ return -EINVAL;
+ idx = reg->id - KVM_REG_MIPS_CP0_MAAR(0);
+ if (idx >= ARRAY_SIZE(vcpu->arch.maar))
+ return -EINVAL;
+ *v = vcpu->arch.maar[idx];
+ break;
+ case KVM_REG_MIPS_CP0_MAARI:
+ if (!cpu_guest_has_maar || cpu_guest_has_dyn_maar)
+ return -EINVAL;
+ *v = kvm_read_sw_gc0_maari(vcpu->arch.cop0);
+ break;
+#ifdef CONFIG_64BIT
+ case KVM_REG_MIPS_CP0_XCONTEXT:
+ *v = read_gc0_xcontext();
+ break;
+#endif
+ case KVM_REG_MIPS_CP0_ERROREPC:
+ *v = (long)read_gc0_errorepc();
+ break;
+ case KVM_REG_MIPS_CP0_KSCRATCH1 ... KVM_REG_MIPS_CP0_KSCRATCH6:
+ idx = reg->id - KVM_REG_MIPS_CP0_KSCRATCH1 + 2;
+ if (!cpu_guest_has_kscr(idx))
+ return -EINVAL;
+ switch (idx) {
+ case 2:
+ *v = (long)read_gc0_kscratch1();
+ break;
+ case 3:
+ *v = (long)read_gc0_kscratch2();
+ break;
+ case 4:
+ *v = (long)read_gc0_kscratch3();
+ break;
+ case 5:
+ *v = (long)read_gc0_kscratch4();
+ break;
+ case 6:
+ *v = (long)read_gc0_kscratch5();
+ break;
+ case 7:
+ *v = (long)read_gc0_kscratch6();
+ break;
+ }
+ break;
+ case KVM_REG_MIPS_COUNT_CTL:
+ *v = vcpu->arch.count_ctl;
+ break;
+ case KVM_REG_MIPS_COUNT_RESUME:
+ *v = ktime_to_ns(vcpu->arch.count_resume);
+ break;
+ case KVM_REG_MIPS_COUNT_HZ:
+ *v = vcpu->arch.count_hz;
+ break;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int kvm_vz_set_one_reg(struct kvm_vcpu *vcpu,
+ const struct kvm_one_reg *reg,
+ s64 v)
+{
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+ unsigned int idx;
+ int ret = 0;
+ unsigned int cur, change;
+
+ switch (reg->id) {
+ case KVM_REG_MIPS_CP0_INDEX:
+ write_gc0_index(v);
+ break;
+ case KVM_REG_MIPS_CP0_ENTRYLO0:
+ write_gc0_entrylo0(entrylo_user_to_kvm(v));
+ break;
+ case KVM_REG_MIPS_CP0_ENTRYLO1:
+ write_gc0_entrylo1(entrylo_user_to_kvm(v));
+ break;
+ case KVM_REG_MIPS_CP0_CONTEXT:
+ write_gc0_context(v);
+ break;
+ case KVM_REG_MIPS_CP0_CONTEXTCONFIG:
+ if (!cpu_guest_has_contextconfig)
+ return -EINVAL;
+ write_gc0_contextconfig(v);
+ break;
+ case KVM_REG_MIPS_CP0_USERLOCAL:
+ if (!cpu_guest_has_userlocal)
+ return -EINVAL;
+ write_gc0_userlocal(v);
+ break;
+#ifdef CONFIG_64BIT
+ case KVM_REG_MIPS_CP0_XCONTEXTCONFIG:
+ if (!cpu_guest_has_contextconfig)
+ return -EINVAL;
+ write_gc0_xcontextconfig(v);
+ break;
+#endif
+ case KVM_REG_MIPS_CP0_PAGEMASK:
+ write_gc0_pagemask(v);
+ break;
+ case KVM_REG_MIPS_CP0_PAGEGRAIN:
+ write_gc0_pagegrain(v);
+ break;
+ case KVM_REG_MIPS_CP0_SEGCTL0:
+ if (!cpu_guest_has_segments)
+ return -EINVAL;
+ write_gc0_segctl0(v);
+ break;
+ case KVM_REG_MIPS_CP0_SEGCTL1:
+ if (!cpu_guest_has_segments)
+ return -EINVAL;
+ write_gc0_segctl1(v);
+ break;
+ case KVM_REG_MIPS_CP0_SEGCTL2:
+ if (!cpu_guest_has_segments)
+ return -EINVAL;
+ write_gc0_segctl2(v);
+ break;
+ case KVM_REG_MIPS_CP0_PWBASE:
+ if (!cpu_guest_has_htw)
+ return -EINVAL;
+ write_gc0_pwbase(v);
+ break;
+ case KVM_REG_MIPS_CP0_PWFIELD:
+ if (!cpu_guest_has_htw)
+ return -EINVAL;
+ write_gc0_pwfield(v);
+ break;
+ case KVM_REG_MIPS_CP0_PWSIZE:
+ if (!cpu_guest_has_htw)
+ return -EINVAL;
+ write_gc0_pwsize(v);
+ break;
+ case KVM_REG_MIPS_CP0_WIRED:
+ change_gc0_wired(MIPSR6_WIRED_WIRED, v);
+ break;
+ case KVM_REG_MIPS_CP0_PWCTL:
+ if (!cpu_guest_has_htw)
+ return -EINVAL;
+ write_gc0_pwctl(v);
+ break;
+ case KVM_REG_MIPS_CP0_HWRENA:
+ write_gc0_hwrena(v);
+ break;
+ case KVM_REG_MIPS_CP0_BADVADDR:
+ write_gc0_badvaddr(v);
+ break;
+ case KVM_REG_MIPS_CP0_BADINSTR:
+ if (!cpu_guest_has_badinstr)
+ return -EINVAL;
+ write_gc0_badinstr(v);
+ break;
+ case KVM_REG_MIPS_CP0_BADINSTRP:
+ if (!cpu_guest_has_badinstrp)
+ return -EINVAL;
+ write_gc0_badinstrp(v);
+ break;
+ case KVM_REG_MIPS_CP0_COUNT:
+ kvm_mips_write_count(vcpu, v);
+ break;
+ case KVM_REG_MIPS_CP0_ENTRYHI:
+ write_gc0_entryhi(v);
+ break;
+ case KVM_REG_MIPS_CP0_COMPARE:
+ kvm_mips_write_compare(vcpu, v, false);
+ break;
+ case KVM_REG_MIPS_CP0_STATUS:
+ write_gc0_status(v);
+ break;
+ case KVM_REG_MIPS_CP0_INTCTL:
+ write_gc0_intctl(v);
+ break;
+ case KVM_REG_MIPS_CP0_CAUSE:
+ /*
+ * If the timer is stopped or started (DC bit) it must look
+ * atomic with changes to the timer interrupt pending bit (TI).
+ * A timer interrupt should not happen in between.
+ */
+ if ((read_gc0_cause() ^ v) & CAUSEF_DC) {
+ if (v & CAUSEF_DC) {
+ /* disable timer first */
+ kvm_mips_count_disable_cause(vcpu);
+ change_gc0_cause((u32)~CAUSEF_DC, v);
+ } else {
+ /* enable timer last */
+ change_gc0_cause((u32)~CAUSEF_DC, v);
+ kvm_mips_count_enable_cause(vcpu);
+ }
+ } else {
+ write_gc0_cause(v);
+ }
+ break;
+ case KVM_REG_MIPS_CP0_EPC:
+ write_gc0_epc(v);
+ break;
+ case KVM_REG_MIPS_CP0_PRID:
+ switch (boot_cpu_type()) {
+ case CPU_CAVIUM_OCTEON3:
+ /* Octeon III has a guest.PRid, but its read-only */
+ break;
+ default:
+ kvm_write_c0_guest_prid(cop0, v);
+ break;
+ };
+ break;
+ case KVM_REG_MIPS_CP0_EBASE:
+ kvm_vz_write_gc0_ebase(v);
+ break;
+ case KVM_REG_MIPS_CP0_CONFIG:
+ cur = read_gc0_config();
+ change = (cur ^ v) & kvm_vz_config_user_wrmask(vcpu);
+ if (change) {
+ v = cur ^ change;
+ write_gc0_config(v);
+ }
+ break;
+ case KVM_REG_MIPS_CP0_CONFIG1:
+ if (!cpu_guest_has_conf1)
+ break;
+ cur = read_gc0_config1();
+ change = (cur ^ v) & kvm_vz_config1_user_wrmask(vcpu);
+ if (change) {
+ v = cur ^ change;
+ write_gc0_config1(v);
+ }
+ break;
+ case KVM_REG_MIPS_CP0_CONFIG2:
+ if (!cpu_guest_has_conf2)
+ break;
+ cur = read_gc0_config2();
+ change = (cur ^ v) & kvm_vz_config2_user_wrmask(vcpu);
+ if (change) {
+ v = cur ^ change;
+ write_gc0_config2(v);
+ }
+ break;
+ case KVM_REG_MIPS_CP0_CONFIG3:
+ if (!cpu_guest_has_conf3)
+ break;
+ cur = read_gc0_config3();
+ change = (cur ^ v) & kvm_vz_config3_user_wrmask(vcpu);
+ if (change) {
+ v = cur ^ change;
+ write_gc0_config3(v);
+ }
+ break;
+ case KVM_REG_MIPS_CP0_CONFIG4:
+ if (!cpu_guest_has_conf4)
+ break;
+ cur = read_gc0_config4();
+ change = (cur ^ v) & kvm_vz_config4_user_wrmask(vcpu);
+ if (change) {
+ v = cur ^ change;
+ write_gc0_config4(v);
+ }
+ break;
+ case KVM_REG_MIPS_CP0_CONFIG5:
+ if (!cpu_guest_has_conf5)
+ break;
+ cur = read_gc0_config5();
+ change = (cur ^ v) & kvm_vz_config5_user_wrmask(vcpu);
+ if (change) {
+ v = cur ^ change;
+ write_gc0_config5(v);
+ }
+ break;
+ case KVM_REG_MIPS_CP0_MAAR(0) ... KVM_REG_MIPS_CP0_MAAR(0x3f):
+ if (!cpu_guest_has_maar || cpu_guest_has_dyn_maar)
+ return -EINVAL;
+ idx = reg->id - KVM_REG_MIPS_CP0_MAAR(0);
+ if (idx >= ARRAY_SIZE(vcpu->arch.maar))
+ return -EINVAL;
+ vcpu->arch.maar[idx] = mips_process_maar(dmtc_op, v);
+ break;
+ case KVM_REG_MIPS_CP0_MAARI:
+ if (!cpu_guest_has_maar || cpu_guest_has_dyn_maar)
+ return -EINVAL;
+ kvm_write_maari(vcpu, v);
+ break;
+#ifdef CONFIG_64BIT
+ case KVM_REG_MIPS_CP0_XCONTEXT:
+ write_gc0_xcontext(v);
+ break;
+#endif
+ case KVM_REG_MIPS_CP0_ERROREPC:
+ write_gc0_errorepc(v);
+ break;
+ case KVM_REG_MIPS_CP0_KSCRATCH1 ... KVM_REG_MIPS_CP0_KSCRATCH6:
+ idx = reg->id - KVM_REG_MIPS_CP0_KSCRATCH1 + 2;
+ if (!cpu_guest_has_kscr(idx))
+ return -EINVAL;
+ switch (idx) {
+ case 2:
+ write_gc0_kscratch1(v);
+ break;
+ case 3:
+ write_gc0_kscratch2(v);
+ break;
+ case 4:
+ write_gc0_kscratch3(v);
+ break;
+ case 5:
+ write_gc0_kscratch4(v);
+ break;
+ case 6:
+ write_gc0_kscratch5(v);
+ break;
+ case 7:
+ write_gc0_kscratch6(v);
+ break;
+ }
+ break;
+ case KVM_REG_MIPS_COUNT_CTL:
+ ret = kvm_mips_set_count_ctl(vcpu, v);
+ break;
+ case KVM_REG_MIPS_COUNT_RESUME:
+ ret = kvm_mips_set_count_resume(vcpu, v);
+ break;
+ case KVM_REG_MIPS_COUNT_HZ:
+ ret = kvm_mips_set_count_hz(vcpu, v);
+ break;
+ default:
+ return -EINVAL;
+ }
+ return ret;
+}
+
+#define guestid_cache(cpu) (cpu_data[cpu].guestid_cache)
+static void kvm_vz_get_new_guestid(unsigned long cpu, struct kvm_vcpu *vcpu)
+{
+ unsigned long guestid = guestid_cache(cpu);
+
+ if (!(++guestid & GUESTID_MASK)) {
+ if (cpu_has_vtag_icache)
+ flush_icache_all();
+
+ if (!guestid) /* fix version if needed */
+ guestid = GUESTID_FIRST_VERSION;
+
+ ++guestid; /* guestid 0 reserved for root */
+
+ /* start new guestid cycle */
+ kvm_vz_local_flush_roottlb_all_guests();
+ kvm_vz_local_flush_guesttlb_all();
+ }
+
+ guestid_cache(cpu) = guestid;
+}
+
+/* Returns 1 if the guest TLB may be clobbered */
+static int kvm_vz_check_requests(struct kvm_vcpu *vcpu, int cpu)
+{
+ int ret = 0;
+ int i;
+
+ if (!vcpu->requests)
+ return 0;
+
+ if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) {
+ if (cpu_has_guestid) {
+ /* Drop all GuestIDs for this VCPU */
+ for_each_possible_cpu(i)
+ vcpu->arch.vzguestid[i] = 0;
+ /* This will clobber guest TLB contents too */
+ ret = 1;
+ }
+ /*
+ * For Root ASID Dealias (RAD) we don't do anything here, but we
+ * still need the request to ensure we recheck asid_flush_mask.
+ * We can still return 0 as only the root TLB will be affected
+ * by a root ASID flush.
+ */
+ }
+
+ return ret;
+}
+
+static void kvm_vz_vcpu_save_wired(struct kvm_vcpu *vcpu)
+{
+ unsigned int wired = read_gc0_wired();
+ struct kvm_mips_tlb *tlbs;
+ int i;
+
+ /* Expand the wired TLB array if necessary */
+ wired &= MIPSR6_WIRED_WIRED;
+ if (wired > vcpu->arch.wired_tlb_limit) {
+ tlbs = krealloc(vcpu->arch.wired_tlb, wired *
+ sizeof(*vcpu->arch.wired_tlb), GFP_ATOMIC);
+ if (WARN_ON(!tlbs)) {
+ /* Save whatever we can */
+ wired = vcpu->arch.wired_tlb_limit;
+ } else {
+ vcpu->arch.wired_tlb = tlbs;
+ vcpu->arch.wired_tlb_limit = wired;
+ }
+ }
+
+ if (wired)
+ /* Save wired entries from the guest TLB */
+ kvm_vz_save_guesttlb(vcpu->arch.wired_tlb, 0, wired);
+ /* Invalidate any dropped entries since last time */
+ for (i = wired; i < vcpu->arch.wired_tlb_used; ++i) {
+ vcpu->arch.wired_tlb[i].tlb_hi = UNIQUE_GUEST_ENTRYHI(i);
+ vcpu->arch.wired_tlb[i].tlb_lo[0] = 0;
+ vcpu->arch.wired_tlb[i].tlb_lo[1] = 0;
+ vcpu->arch.wired_tlb[i].tlb_mask = 0;
+ }
+ vcpu->arch.wired_tlb_used = wired;
+}
+
+static void kvm_vz_vcpu_load_wired(struct kvm_vcpu *vcpu)
+{
+ /* Load wired entries into the guest TLB */
+ if (vcpu->arch.wired_tlb)
+ kvm_vz_load_guesttlb(vcpu->arch.wired_tlb, 0,
+ vcpu->arch.wired_tlb_used);
+}
+
+static void kvm_vz_vcpu_load_tlb(struct kvm_vcpu *vcpu, int cpu)
+{
+ struct kvm *kvm = vcpu->kvm;
+ struct mm_struct *gpa_mm = &kvm->arch.gpa_mm;
+ bool migrated;
+
+ /*
+ * Are we entering guest context on a different CPU to last time?
+ * If so, the VCPU's guest TLB state on this CPU may be stale.
+ */
+ migrated = (vcpu->arch.last_exec_cpu != cpu);
+ vcpu->arch.last_exec_cpu = cpu;
+
+ /*
+ * A vcpu's GuestID is set in GuestCtl1.ID when the vcpu is loaded and
+ * remains set until another vcpu is loaded in. As a rule GuestRID
+ * remains zeroed when in root context unless the kernel is busy
+ * manipulating guest tlb entries.
+ */
+ if (cpu_has_guestid) {
+ /*
+ * Check if our GuestID is of an older version and thus invalid.
+ *
+ * We also discard the stored GuestID if we've executed on
+ * another CPU, as the guest mappings may have changed without
+ * hypervisor knowledge.
+ */
+ if (migrated ||
+ (vcpu->arch.vzguestid[cpu] ^ guestid_cache(cpu)) &
+ GUESTID_VERSION_MASK) {
+ kvm_vz_get_new_guestid(cpu, vcpu);
+ vcpu->arch.vzguestid[cpu] = guestid_cache(cpu);
+ trace_kvm_guestid_change(vcpu,
+ vcpu->arch.vzguestid[cpu]);
+ }
+
+ /* Restore GuestID */
+ change_c0_guestctl1(GUESTID_MASK, vcpu->arch.vzguestid[cpu]);
+ } else {
+ /*
+ * The Guest TLB only stores a single guest's TLB state, so
+ * flush it if another VCPU has executed on this CPU.
+ *
+ * We also flush if we've executed on another CPU, as the guest
+ * mappings may have changed without hypervisor knowledge.
+ */
+ if (migrated || last_exec_vcpu[cpu] != vcpu)
+ kvm_vz_local_flush_guesttlb_all();
+ last_exec_vcpu[cpu] = vcpu;
+
+ /*
+ * Root ASID dealiases guest GPA mappings in the root TLB.
+ * Allocate new root ASID if needed.
+ */
+ if (cpumask_test_and_clear_cpu(cpu, &kvm->arch.asid_flush_mask)
+ || (cpu_context(cpu, gpa_mm) ^ asid_cache(cpu)) &
+ asid_version_mask(cpu))
+ get_new_mmu_context(gpa_mm, cpu);
+ }
+}
+
+static int kvm_vz_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+ bool migrated, all;
+
+ /*
+ * Have we migrated to a different CPU?
+ * If so, any old guest TLB state may be stale.
+ */
+ migrated = (vcpu->arch.last_sched_cpu != cpu);
+
+ /*
+ * Was this the last VCPU to run on this CPU?
+ * If not, any old guest state from this VCPU will have been clobbered.
+ */
+ all = migrated || (last_vcpu[cpu] != vcpu);
+ last_vcpu[cpu] = vcpu;
+
+ /*
+ * Restore CP0_Wired unconditionally as we clear it after use, and
+ * restore wired guest TLB entries (while in guest context).
+ */
+ kvm_restore_gc0_wired(cop0);
+ if (current->flags & PF_VCPU) {
+ tlbw_use_hazard();
+ kvm_vz_vcpu_load_tlb(vcpu, cpu);
+ kvm_vz_vcpu_load_wired(vcpu);
+ }
+
+ /*
+ * Restore timer state regardless, as e.g. Cause.TI can change over time
+ * if left unmaintained.
+ */
+ kvm_vz_restore_timer(vcpu);
+
+ /* Set MC bit if we want to trace guest mode changes */
+ if (kvm_trace_guest_mode_change)
+ set_c0_guestctl0(MIPS_GCTL0_MC);
+ else
+ clear_c0_guestctl0(MIPS_GCTL0_MC);
+
+ /* Don't bother restoring registers multiple times unless necessary */
+ if (!all)
+ return 0;
+
+ /*
+ * Restore config registers first, as some implementations restrict
+ * writes to other registers when the corresponding feature bits aren't
+ * set. For example Status.CU1 cannot be set unless Config1.FP is set.
+ */
+ kvm_restore_gc0_config(cop0);
+ if (cpu_guest_has_conf1)
+ kvm_restore_gc0_config1(cop0);
+ if (cpu_guest_has_conf2)
+ kvm_restore_gc0_config2(cop0);
+ if (cpu_guest_has_conf3)
+ kvm_restore_gc0_config3(cop0);
+ if (cpu_guest_has_conf4)
+ kvm_restore_gc0_config4(cop0);
+ if (cpu_guest_has_conf5)
+ kvm_restore_gc0_config5(cop0);
+ if (cpu_guest_has_conf6)
+ kvm_restore_gc0_config6(cop0);
+ if (cpu_guest_has_conf7)
+ kvm_restore_gc0_config7(cop0);
+
+ kvm_restore_gc0_index(cop0);
+ kvm_restore_gc0_entrylo0(cop0);
+ kvm_restore_gc0_entrylo1(cop0);
+ kvm_restore_gc0_context(cop0);
+ if (cpu_guest_has_contextconfig)
+ kvm_restore_gc0_contextconfig(cop0);
+#ifdef CONFIG_64BIT
+ kvm_restore_gc0_xcontext(cop0);
+ if (cpu_guest_has_contextconfig)
+ kvm_restore_gc0_xcontextconfig(cop0);
+#endif
+ kvm_restore_gc0_pagemask(cop0);
+ kvm_restore_gc0_pagegrain(cop0);
+ kvm_restore_gc0_hwrena(cop0);
+ kvm_restore_gc0_badvaddr(cop0);
+ kvm_restore_gc0_entryhi(cop0);
+ kvm_restore_gc0_status(cop0);
+ kvm_restore_gc0_intctl(cop0);
+ kvm_restore_gc0_epc(cop0);
+ kvm_vz_write_gc0_ebase(kvm_read_sw_gc0_ebase(cop0));
+ if (cpu_guest_has_userlocal)
+ kvm_restore_gc0_userlocal(cop0);
+
+ kvm_restore_gc0_errorepc(cop0);
+
+ /* restore KScratch registers if enabled in guest */
+ if (cpu_guest_has_conf4) {
+ if (cpu_guest_has_kscr(2))
+ kvm_restore_gc0_kscratch1(cop0);
+ if (cpu_guest_has_kscr(3))
+ kvm_restore_gc0_kscratch2(cop0);
+ if (cpu_guest_has_kscr(4))
+ kvm_restore_gc0_kscratch3(cop0);
+ if (cpu_guest_has_kscr(5))
+ kvm_restore_gc0_kscratch4(cop0);
+ if (cpu_guest_has_kscr(6))
+ kvm_restore_gc0_kscratch5(cop0);
+ if (cpu_guest_has_kscr(7))
+ kvm_restore_gc0_kscratch6(cop0);
+ }
+
+ if (cpu_guest_has_badinstr)
+ kvm_restore_gc0_badinstr(cop0);
+ if (cpu_guest_has_badinstrp)
+ kvm_restore_gc0_badinstrp(cop0);
+
+ if (cpu_guest_has_segments) {
+ kvm_restore_gc0_segctl0(cop0);
+ kvm_restore_gc0_segctl1(cop0);
+ kvm_restore_gc0_segctl2(cop0);
+ }
+
+ /* restore HTW registers */
+ if (cpu_guest_has_htw) {
+ kvm_restore_gc0_pwbase(cop0);
+ kvm_restore_gc0_pwfield(cop0);
+ kvm_restore_gc0_pwsize(cop0);
+ kvm_restore_gc0_pwctl(cop0);
+ }
+
+ /* restore Root.GuestCtl2 from unused Guest guestctl2 register */
+ if (cpu_has_guestctl2)
+ write_c0_guestctl2(
+ cop0->reg[MIPS_CP0_GUESTCTL2][MIPS_CP0_GUESTCTL2_SEL]);
+
+ /*
+ * We should clear linked load bit to break interrupted atomics. This
+ * prevents a SC on the next VCPU from succeeding by matching a LL on
+ * the previous VCPU.
+ */
+ if (cpu_guest_has_rw_llb)
+ write_gc0_lladdr(0);
+
+ return 0;
+}
+
+static int kvm_vz_vcpu_put(struct kvm_vcpu *vcpu, int cpu)
+{
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+
+ if (current->flags & PF_VCPU)
+ kvm_vz_vcpu_save_wired(vcpu);
+
+ kvm_lose_fpu(vcpu);
+
+ kvm_save_gc0_index(cop0);
+ kvm_save_gc0_entrylo0(cop0);
+ kvm_save_gc0_entrylo1(cop0);
+ kvm_save_gc0_context(cop0);
+ if (cpu_guest_has_contextconfig)
+ kvm_save_gc0_contextconfig(cop0);
+#ifdef CONFIG_64BIT
+ kvm_save_gc0_xcontext(cop0);
+ if (cpu_guest_has_contextconfig)
+ kvm_save_gc0_xcontextconfig(cop0);
+#endif
+ kvm_save_gc0_pagemask(cop0);
+ kvm_save_gc0_pagegrain(cop0);
+ kvm_save_gc0_wired(cop0);
+ /* allow wired TLB entries to be overwritten */
+ clear_gc0_wired(MIPSR6_WIRED_WIRED);
+ kvm_save_gc0_hwrena(cop0);
+ kvm_save_gc0_badvaddr(cop0);
+ kvm_save_gc0_entryhi(cop0);
+ kvm_save_gc0_status(cop0);
+ kvm_save_gc0_intctl(cop0);
+ kvm_save_gc0_epc(cop0);
+ kvm_write_sw_gc0_ebase(cop0, kvm_vz_read_gc0_ebase());
+ if (cpu_guest_has_userlocal)
+ kvm_save_gc0_userlocal(cop0);
+
+ /* only save implemented config registers */
+ kvm_save_gc0_config(cop0);
+ if (cpu_guest_has_conf1)
+ kvm_save_gc0_config1(cop0);
+ if (cpu_guest_has_conf2)
+ kvm_save_gc0_config2(cop0);
+ if (cpu_guest_has_conf3)
+ kvm_save_gc0_config3(cop0);
+ if (cpu_guest_has_conf4)
+ kvm_save_gc0_config4(cop0);
+ if (cpu_guest_has_conf5)
+ kvm_save_gc0_config5(cop0);
+ if (cpu_guest_has_conf6)
+ kvm_save_gc0_config6(cop0);
+ if (cpu_guest_has_conf7)
+ kvm_save_gc0_config7(cop0);
+
+ kvm_save_gc0_errorepc(cop0);
+
+ /* save KScratch registers if enabled in guest */
+ if (cpu_guest_has_conf4) {
+ if (cpu_guest_has_kscr(2))
+ kvm_save_gc0_kscratch1(cop0);
+ if (cpu_guest_has_kscr(3))
+ kvm_save_gc0_kscratch2(cop0);
+ if (cpu_guest_has_kscr(4))
+ kvm_save_gc0_kscratch3(cop0);
+ if (cpu_guest_has_kscr(5))
+ kvm_save_gc0_kscratch4(cop0);
+ if (cpu_guest_has_kscr(6))
+ kvm_save_gc0_kscratch5(cop0);
+ if (cpu_guest_has_kscr(7))
+ kvm_save_gc0_kscratch6(cop0);
+ }
+
+ if (cpu_guest_has_badinstr)
+ kvm_save_gc0_badinstr(cop0);
+ if (cpu_guest_has_badinstrp)
+ kvm_save_gc0_badinstrp(cop0);
+
+ if (cpu_guest_has_segments) {
+ kvm_save_gc0_segctl0(cop0);
+ kvm_save_gc0_segctl1(cop0);
+ kvm_save_gc0_segctl2(cop0);
+ }
+
+ /* save HTW registers if enabled in guest */
+ if (cpu_guest_has_htw &&
+ kvm_read_sw_gc0_config3(cop0) & MIPS_CONF3_PW) {
+ kvm_save_gc0_pwbase(cop0);
+ kvm_save_gc0_pwfield(cop0);
+ kvm_save_gc0_pwsize(cop0);
+ kvm_save_gc0_pwctl(cop0);
+ }
+
+ kvm_vz_save_timer(vcpu);
+
+ /* save Root.GuestCtl2 in unused Guest guestctl2 register */
+ if (cpu_has_guestctl2)
+ cop0->reg[MIPS_CP0_GUESTCTL2][MIPS_CP0_GUESTCTL2_SEL] =
+ read_c0_guestctl2();
+
+ return 0;
+}
+
+/**
+ * kvm_vz_resize_guest_vtlb() - Attempt to resize guest VTLB.
+ * @size: Number of guest VTLB entries (0 < @size <= root VTLB entries).
+ *
+ * Attempt to resize the guest VTLB by writing guest Config registers. This is
+ * necessary for cores with a shared root/guest TLB to avoid overlap with wired
+ * entries in the root VTLB.
+ *
+ * Returns: The resulting guest VTLB size.
+ */
+static unsigned int kvm_vz_resize_guest_vtlb(unsigned int size)
+{
+ unsigned int config4 = 0, ret = 0, limit;
+
+ /* Write MMUSize - 1 into guest Config registers */
+ if (cpu_guest_has_conf1)
+ change_gc0_config1(MIPS_CONF1_TLBS,
+ (size - 1) << MIPS_CONF1_TLBS_SHIFT);
+ if (cpu_guest_has_conf4) {
+ config4 = read_gc0_config4();
+ if (cpu_has_mips_r6 || (config4 & MIPS_CONF4_MMUEXTDEF) ==
+ MIPS_CONF4_MMUEXTDEF_VTLBSIZEEXT) {
+ config4 &= ~MIPS_CONF4_VTLBSIZEEXT;
+ config4 |= ((size - 1) >> MIPS_CONF1_TLBS_SIZE) <<
+ MIPS_CONF4_VTLBSIZEEXT_SHIFT;
+ } else if ((config4 & MIPS_CONF4_MMUEXTDEF) ==
+ MIPS_CONF4_MMUEXTDEF_MMUSIZEEXT) {
+ config4 &= ~MIPS_CONF4_MMUSIZEEXT;
+ config4 |= ((size - 1) >> MIPS_CONF1_TLBS_SIZE) <<
+ MIPS_CONF4_MMUSIZEEXT_SHIFT;
+ }
+ write_gc0_config4(config4);
+ }
+
+ /*
+ * Set Guest.Wired.Limit = 0 (no limit up to Guest.MMUSize-1), unless it
+ * would exceed Root.Wired.Limit (clearing Guest.Wired.Wired so write
+ * not dropped)
+ */
+ if (cpu_has_mips_r6) {
+ limit = (read_c0_wired() & MIPSR6_WIRED_LIMIT) >>
+ MIPSR6_WIRED_LIMIT_SHIFT;
+ if (size - 1 <= limit)
+ limit = 0;
+ write_gc0_wired(limit << MIPSR6_WIRED_LIMIT_SHIFT);
+ }
+
+ /* Read back MMUSize - 1 */
+ back_to_back_c0_hazard();
+ if (cpu_guest_has_conf1)
+ ret = (read_gc0_config1() & MIPS_CONF1_TLBS) >>
+ MIPS_CONF1_TLBS_SHIFT;
+ if (config4) {
+ if (cpu_has_mips_r6 || (config4 & MIPS_CONF4_MMUEXTDEF) ==
+ MIPS_CONF4_MMUEXTDEF_VTLBSIZEEXT)
+ ret |= ((config4 & MIPS_CONF4_VTLBSIZEEXT) >>
+ MIPS_CONF4_VTLBSIZEEXT_SHIFT) <<
+ MIPS_CONF1_TLBS_SIZE;
+ else if ((config4 & MIPS_CONF4_MMUEXTDEF) ==
+ MIPS_CONF4_MMUEXTDEF_MMUSIZEEXT)
+ ret |= ((config4 & MIPS_CONF4_MMUSIZEEXT) >>
+ MIPS_CONF4_MMUSIZEEXT_SHIFT) <<
+ MIPS_CONF1_TLBS_SIZE;
+ }
+ return ret + 1;
+}
+
+static int kvm_vz_hardware_enable(void)
+{
+ unsigned int mmu_size, guest_mmu_size, ftlb_size;
+ u64 guest_cvmctl, cvmvmconfig;
+
+ switch (current_cpu_type()) {
+ case CPU_CAVIUM_OCTEON3:
+ /* Set up guest timer/perfcount IRQ lines */
+ guest_cvmctl = read_gc0_cvmctl();
+ guest_cvmctl &= ~CVMCTL_IPTI;
+ guest_cvmctl |= 7ull << CVMCTL_IPTI_SHIFT;
+ guest_cvmctl &= ~CVMCTL_IPPCI;
+ guest_cvmctl |= 6ull << CVMCTL_IPPCI_SHIFT;
+ write_gc0_cvmctl(guest_cvmctl);
+
+ cvmvmconfig = read_c0_cvmvmconfig();
+ /* No I/O hole translation. */
+ cvmvmconfig |= CVMVMCONF_DGHT;
+ /* Halve the root MMU size */
+ mmu_size = ((cvmvmconfig & CVMVMCONF_MMUSIZEM1)
+ >> CVMVMCONF_MMUSIZEM1_S) + 1;
+ guest_mmu_size = mmu_size / 2;
+ mmu_size -= guest_mmu_size;
+ cvmvmconfig &= ~CVMVMCONF_RMMUSIZEM1;
+ cvmvmconfig |= mmu_size - 1;
+ write_c0_cvmvmconfig(cvmvmconfig);
+
+ /* Update our records */
+ current_cpu_data.tlbsize = mmu_size;
+ current_cpu_data.tlbsizevtlb = mmu_size;
+ current_cpu_data.guest.tlbsize = guest_mmu_size;
+
+ /* Flush moved entries in new (guest) context */
+ kvm_vz_local_flush_guesttlb_all();
+ break;
+ default:
+ /*
+ * ImgTec cores tend to use a shared root/guest TLB. To avoid
+ * overlap of root wired and guest entries, the guest TLB may
+ * need resizing.
+ */
+ mmu_size = current_cpu_data.tlbsizevtlb;
+ ftlb_size = current_cpu_data.tlbsize - mmu_size;
+
+ /* Try switching to maximum guest VTLB size for flush */
+ guest_mmu_size = kvm_vz_resize_guest_vtlb(mmu_size);
+ current_cpu_data.guest.tlbsize = guest_mmu_size + ftlb_size;
+ kvm_vz_local_flush_guesttlb_all();
+
+ /*
+ * Reduce to make space for root wired entries and at least 2
+ * root non-wired entries. This does assume that long-term wired
+ * entries won't be added later.
+ */
+ guest_mmu_size = mmu_size - num_wired_entries() - 2;
+ guest_mmu_size = kvm_vz_resize_guest_vtlb(guest_mmu_size);
+ current_cpu_data.guest.tlbsize = guest_mmu_size + ftlb_size;
+
+ /*
+ * Write the VTLB size, but if another CPU has already written,
+ * check it matches or we won't provide a consistent view to the
+ * guest. If this ever happens it suggests an asymmetric number
+ * of wired entries.
+ */
+ if (cmpxchg(&kvm_vz_guest_vtlb_size, 0, guest_mmu_size) &&
+ WARN(guest_mmu_size != kvm_vz_guest_vtlb_size,
+ "Available guest VTLB size mismatch"))
+ return -EINVAL;
+ break;
+ }
+
+ /*
+ * Enable virtualization features granting guest direct control of
+ * certain features:
+ * CP0=1: Guest coprocessor 0 context.
+ * AT=Guest: Guest MMU.
+ * CG=1: Hit (virtual address) CACHE operations (optional).
+ * CF=1: Guest Config registers.
+ * CGI=1: Indexed flush CACHE operations (optional).
+ */
+ write_c0_guestctl0(MIPS_GCTL0_CP0 |
+ (MIPS_GCTL0_AT_GUEST << MIPS_GCTL0_AT_SHIFT) |
+ MIPS_GCTL0_CG | MIPS_GCTL0_CF);
+ if (cpu_has_guestctl0ext)
+ set_c0_guestctl0ext(MIPS_GCTL0EXT_CGI);
+
+ if (cpu_has_guestid) {
+ write_c0_guestctl1(0);
+ kvm_vz_local_flush_roottlb_all_guests();
+
+ GUESTID_MASK = current_cpu_data.guestid_mask;
+ GUESTID_FIRST_VERSION = GUESTID_MASK + 1;
+ GUESTID_VERSION_MASK = ~GUESTID_MASK;
+
+ current_cpu_data.guestid_cache = GUESTID_FIRST_VERSION;
+ }
+
+ /* clear any pending injected virtual guest interrupts */
+ if (cpu_has_guestctl2)
+ clear_c0_guestctl2(0x3f << 10);
+
+ return 0;
+}
+
+static void kvm_vz_hardware_disable(void)
+{
+ u64 cvmvmconfig;
+ unsigned int mmu_size;
+
+ /* Flush any remaining guest TLB entries */
+ kvm_vz_local_flush_guesttlb_all();
+
+ switch (current_cpu_type()) {
+ case CPU_CAVIUM_OCTEON3:
+ /*
+ * Allocate whole TLB for root. Existing guest TLB entries will
+ * change ownership to the root TLB. We should be safe though as
+ * they've already been flushed above while in guest TLB.
+ */
+ cvmvmconfig = read_c0_cvmvmconfig();
+ mmu_size = ((cvmvmconfig & CVMVMCONF_MMUSIZEM1)
+ >> CVMVMCONF_MMUSIZEM1_S) + 1;
+ cvmvmconfig &= ~CVMVMCONF_RMMUSIZEM1;
+ cvmvmconfig |= mmu_size - 1;
+ write_c0_cvmvmconfig(cvmvmconfig);
+
+ /* Update our records */
+ current_cpu_data.tlbsize = mmu_size;
+ current_cpu_data.tlbsizevtlb = mmu_size;
+ current_cpu_data.guest.tlbsize = 0;
+
+ /* Flush moved entries in new (root) context */
+ local_flush_tlb_all();
+ break;
+ }
+
+ if (cpu_has_guestid) {
+ write_c0_guestctl1(0);
+ kvm_vz_local_flush_roottlb_all_guests();
+ }
+}
+
+static int kvm_vz_check_extension(struct kvm *kvm, long ext)
+{
+ int r;
+
+ switch (ext) {
+ case KVM_CAP_MIPS_VZ:
+ /* we wouldn't be here unless cpu_has_vz */
+ r = 1;
+ break;
+#ifdef CONFIG_64BIT
+ case KVM_CAP_MIPS_64BIT:
+ /* We support 64-bit registers/operations and addresses */
+ r = 2;
+ break;
+#endif
+ default:
+ r = 0;
+ break;
+ }
+
+ return r;
+}
+
+static int kvm_vz_vcpu_init(struct kvm_vcpu *vcpu)
+{
+ int i;
+
+ for_each_possible_cpu(i)
+ vcpu->arch.vzguestid[i] = 0;
+
+ return 0;
+}
+
+static void kvm_vz_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+ int cpu;
+
+ /*
+ * If the VCPU is freed and reused as another VCPU, we don't want the
+ * matching pointer wrongly hanging around in last_vcpu[] or
+ * last_exec_vcpu[].
+ */
+ for_each_possible_cpu(cpu) {
+ if (last_vcpu[cpu] == vcpu)
+ last_vcpu[cpu] = NULL;
+ if (last_exec_vcpu[cpu] == vcpu)
+ last_exec_vcpu[cpu] = NULL;
+ }
+}
+
+static int kvm_vz_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+ struct mips_coproc *cop0 = vcpu->arch.cop0;
+ unsigned long count_hz = 100*1000*1000; /* default to 100 MHz */
+
+ /*
+ * Start off the timer at the same frequency as the host timer, but the
+ * soft timer doesn't handle frequencies greater than 1GHz yet.
+ */
+ if (mips_hpt_frequency && mips_hpt_frequency <= NSEC_PER_SEC)
+ count_hz = mips_hpt_frequency;
+ kvm_mips_init_count(vcpu, count_hz);
+
+ /*
+ * Initialize guest register state to valid architectural reset state.
+ */
+
+ /* PageGrain */
+ if (cpu_has_mips_r6)
+ kvm_write_sw_gc0_pagegrain(cop0, PG_RIE | PG_XIE | PG_IEC);
+ /* Wired */
+ if (cpu_has_mips_r6)
+ kvm_write_sw_gc0_wired(cop0,
+ read_gc0_wired() & MIPSR6_WIRED_LIMIT);
+ /* Status */
+ kvm_write_sw_gc0_status(cop0, ST0_BEV | ST0_ERL);
+ if (cpu_has_mips_r6)
+ kvm_change_sw_gc0_status(cop0, ST0_FR, read_gc0_status());
+ /* IntCtl */
+ kvm_write_sw_gc0_intctl(cop0, read_gc0_intctl() &
+ (INTCTLF_IPFDC | INTCTLF_IPPCI | INTCTLF_IPTI));
+ /* PRId */
+ kvm_write_sw_gc0_prid(cop0, boot_cpu_data.processor_id);
+ /* EBase */
+ kvm_write_sw_gc0_ebase(cop0, (s32)0x80000000 | vcpu->vcpu_id);
+ /* Config */
+ kvm_save_gc0_config(cop0);
+ /* architecturally writable (e.g. from guest) */
+ kvm_change_sw_gc0_config(cop0, CONF_CM_CMASK,
+ _page_cachable_default >> _CACHE_SHIFT);
+ /* architecturally read only, but maybe writable from root */
+ kvm_change_sw_gc0_config(cop0, MIPS_CONF_MT, read_c0_config());
+ if (cpu_guest_has_conf1) {
+ kvm_set_sw_gc0_config(cop0, MIPS_CONF_M);
+ /* Config1 */
+ kvm_save_gc0_config1(cop0);
+ /* architecturally read only, but maybe writable from root */
+ kvm_clear_sw_gc0_config1(cop0, MIPS_CONF1_C2 |
+ MIPS_CONF1_MD |
+ MIPS_CONF1_PC |
+ MIPS_CONF1_WR |
+ MIPS_CONF1_CA |
+ MIPS_CONF1_FP);
+ }
+ if (cpu_guest_has_conf2) {
+ kvm_set_sw_gc0_config1(cop0, MIPS_CONF_M);
+ /* Config2 */
+ kvm_save_gc0_config2(cop0);
+ }
+ if (cpu_guest_has_conf3) {
+ kvm_set_sw_gc0_config2(cop0, MIPS_CONF_M);
+ /* Config3 */
+ kvm_save_gc0_config3(cop0);
+ /* architecturally writable (e.g. from guest) */
+ kvm_clear_sw_gc0_config3(cop0, MIPS_CONF3_ISA_OE);
+ /* architecturally read only, but maybe writable from root */
+ kvm_clear_sw_gc0_config3(cop0, MIPS_CONF3_MSA |
+ MIPS_CONF3_BPG |
+ MIPS_CONF3_ULRI |
+ MIPS_CONF3_DSP |
+ MIPS_CONF3_CTXTC |
+ MIPS_CONF3_ITL |
+ MIPS_CONF3_LPA |
+ MIPS_CONF3_VEIC |
+ MIPS_CONF3_VINT |
+ MIPS_CONF3_SP |
+ MIPS_CONF3_CDMM |
+ MIPS_CONF3_MT |
+ MIPS_CONF3_SM |
+ MIPS_CONF3_TL);
+ }
+ if (cpu_guest_has_conf4) {
+ kvm_set_sw_gc0_config3(cop0, MIPS_CONF_M);
+ /* Config4 */
+ kvm_save_gc0_config4(cop0);
+ }
+ if (cpu_guest_has_conf5) {
+ kvm_set_sw_gc0_config4(cop0, MIPS_CONF_M);
+ /* Config5 */
+ kvm_save_gc0_config5(cop0);
+ /* architecturally writable (e.g. from guest) */
+ kvm_clear_sw_gc0_config5(cop0, MIPS_CONF5_K |
+ MIPS_CONF5_CV |
+ MIPS_CONF5_MSAEN |
+ MIPS_CONF5_UFE |
+ MIPS_CONF5_FRE |
+ MIPS_CONF5_SBRI |
+ MIPS_CONF5_UFR);
+ /* architecturally read only, but maybe writable from root */
+ kvm_clear_sw_gc0_config5(cop0, MIPS_CONF5_MRP);
+ }
+
+ if (cpu_guest_has_contextconfig) {
+ /* ContextConfig */
+ kvm_write_sw_gc0_contextconfig(cop0, 0x007ffff0);
+#ifdef CONFIG_64BIT
+ /* XContextConfig */
+ /* bits SEGBITS-13+3:4 set */
+ kvm_write_sw_gc0_xcontextconfig(cop0,
+ ((1ull << (cpu_vmbits - 13)) - 1) << 4);
+#endif
+ }
+
+ /* Implementation dependent, use the legacy layout */
+ if (cpu_guest_has_segments) {
+ /* SegCtl0, SegCtl1, SegCtl2 */
+ kvm_write_sw_gc0_segctl0(cop0, 0x00200010);
+ kvm_write_sw_gc0_segctl1(cop0, 0x00000002 |
+ (_page_cachable_default >> _CACHE_SHIFT) <<
+ (16 + MIPS_SEGCFG_C_SHIFT));
+ kvm_write_sw_gc0_segctl2(cop0, 0x00380438);
+ }
+
+ /* reset HTW registers */
+ if (cpu_guest_has_htw && cpu_has_mips_r6) {
+ /* PWField */
+ kvm_write_sw_gc0_pwfield(cop0, 0x0c30c302);
+ /* PWSize */
+ kvm_write_sw_gc0_pwsize(cop0, 1 << MIPS_PWSIZE_PTW_SHIFT);
+ }
+
+ /* start with no pending virtual guest interrupts */
+ if (cpu_has_guestctl2)
+ cop0->reg[MIPS_CP0_GUESTCTL2][MIPS_CP0_GUESTCTL2_SEL] = 0;
+
+ /* Put PC at reset vector */
+ vcpu->arch.pc = CKSEG1ADDR(0x1fc00000);
+
+ return 0;
+}
+
+static void kvm_vz_flush_shadow_all(struct kvm *kvm)
+{
+ if (cpu_has_guestid) {
+ /* Flush GuestID for each VCPU individually */
+ kvm_flush_remote_tlbs(kvm);
+ } else {
+ /*
+ * For each CPU there is a single GPA ASID used by all VCPUs in
+ * the VM, so it doesn't make sense for the VCPUs to handle
+ * invalidation of these ASIDs individually.
+ *
+ * Instead mark all CPUs as needing ASID invalidation in
+ * asid_flush_mask, and just use kvm_flush_remote_tlbs(kvm) to
+ * kick any running VCPUs so they check asid_flush_mask.
+ */
+ cpumask_setall(&kvm->arch.asid_flush_mask);
+ kvm_flush_remote_tlbs(kvm);
+ }
+}
+
+static void kvm_vz_flush_shadow_memslot(struct kvm *kvm,
+ const struct kvm_memory_slot *slot)
+{
+ kvm_vz_flush_shadow_all(kvm);
+}
+
+static void kvm_vz_vcpu_reenter(struct kvm_run *run, struct kvm_vcpu *vcpu)
+{
+ int cpu = smp_processor_id();
+ int preserve_guest_tlb;
+
+ preserve_guest_tlb = kvm_vz_check_requests(vcpu, cpu);
+
+ if (preserve_guest_tlb)
+ kvm_vz_vcpu_save_wired(vcpu);
+
+ kvm_vz_vcpu_load_tlb(vcpu, cpu);
+
+ if (preserve_guest_tlb)
+ kvm_vz_vcpu_load_wired(vcpu);
+}
+
+static int kvm_vz_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
+{
+ int cpu = smp_processor_id();
+ int r;
+
+ kvm_vz_acquire_htimer(vcpu);
+ /* Check if we have any exceptions/interrupts pending */
+ kvm_mips_deliver_interrupts(vcpu, read_gc0_cause());
+
+ kvm_vz_check_requests(vcpu, cpu);
+ kvm_vz_vcpu_load_tlb(vcpu, cpu);
+ kvm_vz_vcpu_load_wired(vcpu);
+
+ r = vcpu->arch.vcpu_run(run, vcpu);
+
+ kvm_vz_vcpu_save_wired(vcpu);
+
+ return r;
+}
+
+static struct kvm_mips_callbacks kvm_vz_callbacks = {
+ .handle_cop_unusable = kvm_trap_vz_handle_cop_unusable,
+ .handle_tlb_mod = kvm_trap_vz_handle_tlb_st_miss,
+ .handle_tlb_ld_miss = kvm_trap_vz_handle_tlb_ld_miss,
+ .handle_tlb_st_miss = kvm_trap_vz_handle_tlb_st_miss,
+ .handle_addr_err_st = kvm_trap_vz_no_handler,
+ .handle_addr_err_ld = kvm_trap_vz_no_handler,
+ .handle_syscall = kvm_trap_vz_no_handler,
+ .handle_res_inst = kvm_trap_vz_no_handler,
+ .handle_break = kvm_trap_vz_no_handler,
+ .handle_msa_disabled = kvm_trap_vz_handle_msa_disabled,
+ .handle_guest_exit = kvm_trap_vz_handle_guest_exit,
+
+ .hardware_enable = kvm_vz_hardware_enable,
+ .hardware_disable = kvm_vz_hardware_disable,
+ .check_extension = kvm_vz_check_extension,
+ .vcpu_init = kvm_vz_vcpu_init,
+ .vcpu_uninit = kvm_vz_vcpu_uninit,
+ .vcpu_setup = kvm_vz_vcpu_setup,
+ .flush_shadow_all = kvm_vz_flush_shadow_all,
+ .flush_shadow_memslot = kvm_vz_flush_shadow_memslot,
+ .gva_to_gpa = kvm_vz_gva_to_gpa_cb,
+ .queue_timer_int = kvm_vz_queue_timer_int_cb,
+ .dequeue_timer_int = kvm_vz_dequeue_timer_int_cb,
+ .queue_io_int = kvm_vz_queue_io_int_cb,
+ .dequeue_io_int = kvm_vz_dequeue_io_int_cb,
+ .irq_deliver = kvm_vz_irq_deliver_cb,
+ .irq_clear = kvm_vz_irq_clear_cb,
+ .num_regs = kvm_vz_num_regs,
+ .copy_reg_indices = kvm_vz_copy_reg_indices,
+ .get_one_reg = kvm_vz_get_one_reg,
+ .set_one_reg = kvm_vz_set_one_reg,
+ .vcpu_load = kvm_vz_vcpu_load,
+ .vcpu_put = kvm_vz_vcpu_put,
+ .vcpu_run = kvm_vz_vcpu_run,
+ .vcpu_reenter = kvm_vz_vcpu_reenter,
+};
+
+int kvm_mips_emulation_init(struct kvm_mips_callbacks **install_callbacks)
+{
+ if (!cpu_has_vz)
+ return -ENODEV;
+
+ /*
+ * VZ requires at least 2 KScratch registers, so it should have been
+ * possible to allocate pgd_reg.
+ */
+ if (WARN(pgd_reg == -1,
+ "pgd_reg not allocated even though cpu_has_vz\n"))
+ return -ENODEV;
+
+ pr_info("Starting KVM with MIPS VZ extensions\n");
+
+ *install_callbacks = &kvm_vz_callbacks;
+ return 0;
+}
diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c
index 6db3413..899e462 100644
--- a/arch/mips/mm/cache.c
+++ b/arch/mips/mm/cache.c
@@ -24,6 +24,7 @@
/* Cache operations. */
void (*flush_cache_all)(void);
void (*__flush_cache_all)(void);
+EXPORT_SYMBOL_GPL(__flush_cache_all);
void (*flush_cache_mm)(struct mm_struct *mm);
void (*flush_cache_range)(struct vm_area_struct *vma, unsigned long start,
unsigned long end);
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index aa75849..3ca2028 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -348,7 +348,7 @@ void maar_init(void)
upper = ((upper & MIPS_MAAR_ADDR) << 4) | 0xffff;
pr_info(" [%d]: ", i / 2);
- if (!(attr & MIPS_MAAR_V)) {
+ if (!(attr & MIPS_MAAR_VL)) {
pr_cont("disabled\n");
continue;
}
diff --git a/arch/powerpc/include/asm/disassemble.h b/arch/powerpc/include/asm/disassemble.h
index 4852e84..c0a5505 100644
--- a/arch/powerpc/include/asm/disassemble.h
+++ b/arch/powerpc/include/asm/disassemble.h
@@ -87,6 +87,11 @@ static inline unsigned int get_oc(u32 inst)
return (inst >> 11) & 0x7fff;
}
+static inline unsigned int get_tx_or_sx(u32 inst)
+{
+ return (inst) & 0x1;
+}
+
#define IS_XFORM(inst) (get_op(inst) == 31)
#define IS_DSFORM(inst) (get_op(inst) >= 56)
diff --git a/arch/powerpc/include/asm/iommu.h b/arch/powerpc/include/asm/iommu.h
index 2c1d507..8a8ce22 100644
--- a/arch/powerpc/include/asm/iommu.h
+++ b/arch/powerpc/include/asm/iommu.h
@@ -64,6 +64,11 @@ struct iommu_table_ops {
long index,
unsigned long *hpa,
enum dma_data_direction *direction);
+ /* Real mode */
+ int (*exchange_rm)(struct iommu_table *tbl,
+ long index,
+ unsigned long *hpa,
+ enum dma_data_direction *direction);
#endif
void (*clear)(struct iommu_table *tbl,
long index, long npages);
@@ -114,6 +119,7 @@ struct iommu_table {
struct list_head it_group_list;/* List of iommu_table_group_link */
unsigned long *it_userspace; /* userspace view of the table */
struct iommu_table_ops *it_ops;
+ struct kref it_kref;
};
#define IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry) \
@@ -146,8 +152,8 @@ static inline void *get_iommu_table_base(struct device *dev)
extern int dma_iommu_dma_supported(struct device *dev, u64 mask);
-/* Frees table for an individual device node */
-extern void iommu_free_table(struct iommu_table *tbl, const char *node_name);
+extern struct iommu_table *iommu_tce_table_get(struct iommu_table *tbl);
+extern int iommu_tce_table_put(struct iommu_table *tbl);
/* Initializes an iommu_table based in values set in the passed-in
* structure
@@ -208,6 +214,8 @@ extern void iommu_del_device(struct device *dev);
extern int __init tce_iommu_bus_notifier_init(void);
extern long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
unsigned long *hpa, enum dma_data_direction *direction);
+extern long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry,
+ unsigned long *hpa, enum dma_data_direction *direction);
#else
static inline void iommu_register_group(struct iommu_table_group *table_group,
int pci_domain_number,
@@ -288,11 +296,21 @@ static inline void iommu_restore(void)
#endif
/* The API to support IOMMU operations for VFIO */
-extern int iommu_tce_clear_param_check(struct iommu_table *tbl,
- unsigned long ioba, unsigned long tce_value,
- unsigned long npages);
-extern int iommu_tce_put_param_check(struct iommu_table *tbl,
- unsigned long ioba, unsigned long tce);
+extern int iommu_tce_check_ioba(unsigned long page_shift,
+ unsigned long offset, unsigned long size,
+ unsigned long ioba, unsigned long npages);
+extern int iommu_tce_check_gpa(unsigned long page_shift,
+ unsigned long gpa);
+
+#define iommu_tce_clear_param_check(tbl, ioba, tce_value, npages) \
+ (iommu_tce_check_ioba((tbl)->it_page_shift, \
+ (tbl)->it_offset, (tbl)->it_size, \
+ (ioba), (npages)) || (tce_value))
+#define iommu_tce_put_param_check(tbl, ioba, gpa) \
+ (iommu_tce_check_ioba((tbl)->it_page_shift, \
+ (tbl)->it_offset, (tbl)->it_size, \
+ (ioba), 1) || \
+ iommu_tce_check_gpa((tbl)->it_page_shift, (gpa)))
extern void iommu_flush_tce(struct iommu_table *tbl);
extern int iommu_take_ownership(struct iommu_table *tbl);
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 7bba8f4..77c6082 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -45,9 +45,6 @@
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
-#ifdef CONFIG_KVM_MMIO
-#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
-#endif
#define KVM_HALT_POLL_NS_DEFAULT 10000 /* 10 us */
/* These values are internal and can be increased later */
@@ -191,6 +188,13 @@ struct kvmppc_pginfo {
atomic_t refcnt;
};
+struct kvmppc_spapr_tce_iommu_table {
+ struct rcu_head rcu;
+ struct list_head next;
+ struct iommu_table *tbl;
+ struct kref kref;
+};
+
struct kvmppc_spapr_tce_table {
struct list_head list;
struct kvm *kvm;
@@ -199,6 +203,7 @@ struct kvmppc_spapr_tce_table {
u32 page_shift;
u64 offset; /* in pages */
u64 size; /* window size in pages */
+ struct list_head iommu_tables;
struct page *pages[0];
};
@@ -345,6 +350,7 @@ struct kvmppc_pte {
bool may_read : 1;
bool may_write : 1;
bool may_execute : 1;
+ unsigned long wimg;
u8 page_size; /* MMU_PAGE_xxx */
};
@@ -441,6 +447,11 @@ struct mmio_hpte_cache {
unsigned int index;
};
+#define KVMPPC_VSX_COPY_NONE 0
+#define KVMPPC_VSX_COPY_WORD 1
+#define KVMPPC_VSX_COPY_DWORD 2
+#define KVMPPC_VSX_COPY_DWORD_LOAD_DUMP 3
+
struct openpic;
struct kvm_vcpu_arch {
@@ -644,6 +655,21 @@ struct kvm_vcpu_arch {
u8 io_gpr; /* GPR used as IO source/target */
u8 mmio_host_swabbed;
u8 mmio_sign_extend;
+ /* conversion between single and double precision */
+ u8 mmio_sp64_extend;
+ /*
+ * Number of simulations for vsx.
+ * If we use 2*8bytes to simulate 1*16bytes,
+ * then the number should be 2 and
+ * mmio_vsx_copy_type=KVMPPC_VSX_COPY_DWORD.
+ * If we use 4*4bytes to simulate 1*16bytes,
+ * the number should be 4 and
+ * mmio_vsx_copy_type=KVMPPC_VSX_COPY_WORD.
+ */
+ u8 mmio_vsx_copy_nums;
+ u8 mmio_vsx_offset;
+ u8 mmio_vsx_copy_type;
+ u8 mmio_vsx_tx_sx_enabled;
u8 osi_needed;
u8 osi_enabled;
u8 papr_enabled;
@@ -732,6 +758,8 @@ struct kvm_vcpu_arch {
};
#define VCPU_FPR(vcpu, i) (vcpu)->arch.fp.fpr[i][TS_FPROFFSET]
+#define VCPU_VSX_FPR(vcpu, i, j) ((vcpu)->arch.fp.fpr[i][j])
+#define VCPU_VSX_VR(vcpu, i) ((vcpu)->arch.vr.vr[i])
/* Values for vcpu->arch.state */
#define KVMPPC_VCPU_NOTREADY 0
@@ -745,6 +773,7 @@ struct kvm_vcpu_arch {
#define KVM_MMIO_REG_FPR 0x0020
#define KVM_MMIO_REG_QPR 0x0040
#define KVM_MMIO_REG_FQPR 0x0060
+#define KVM_MMIO_REG_VSX 0x0080
#define __KVM_HAVE_ARCH_WQP
#define __KVM_HAVE_CREATE_DEVICE
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index dd11c4c..5885d32 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -78,9 +78,15 @@ extern int kvmppc_handle_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
extern int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int rt, unsigned int bytes,
int is_default_endian);
+extern int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int rt, unsigned int bytes,
+ int is_default_endian, int mmio_sign_extend);
extern int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
u64 val, unsigned int bytes,
int is_default_endian);
+extern int kvmppc_handle_vsx_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ int rs, unsigned int bytes,
+ int is_default_endian);
extern int kvmppc_load_last_inst(struct kvm_vcpu *vcpu,
enum instruction_type type, u32 *inst);
@@ -132,6 +138,9 @@ extern void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu);
extern int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu);
extern int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu);
extern void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags);
+extern void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu);
+extern void kvmppc_core_queue_vsx_unavail(struct kvm_vcpu *vcpu);
extern void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu);
extern void kvmppc_core_dequeue_dec(struct kvm_vcpu *vcpu);
extern void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
@@ -164,13 +173,19 @@ extern long kvmppc_prepare_vrma(struct kvm *kvm,
extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu,
struct kvm_memory_slot *memslot, unsigned long porder);
extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu);
+extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
+ struct iommu_group *grp);
+extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm,
+ struct iommu_group *grp);
extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvm_create_spapr_tce_64 *args);
extern struct kvmppc_spapr_tce_table *kvmppc_find_table(
- struct kvm_vcpu *vcpu, unsigned long liobn);
-extern long kvmppc_ioba_validate(struct kvmppc_spapr_tce_table *stt,
- unsigned long ioba, unsigned long npages);
+ struct kvm *kvm, unsigned long liobn);
+#define kvmppc_ioba_validate(stt, ioba, npages) \
+ (iommu_tce_check_ioba((stt)->page_shift, (stt)->offset, \
+ (stt)->size, (ioba), (npages)) ? \
+ H_PARAMETER : H_SUCCESS)
extern long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *tt,
unsigned long tce);
extern long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
@@ -240,6 +255,7 @@ union kvmppc_one_reg {
u64 dval;
vector128 vval;
u64 vsxval[2];
+ u32 vsx32val[4];
struct {
u64 addr;
u64 length;
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index b9e3f0a..c70c827 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -29,10 +29,14 @@ extern void mm_iommu_init(struct mm_struct *mm);
extern void mm_iommu_cleanup(struct mm_struct *mm);
extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm,
unsigned long ua, unsigned long size);
+extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(
+ struct mm_struct *mm, unsigned long ua, unsigned long size);
extern struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
unsigned long ua, unsigned long entries);
extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
unsigned long ua, unsigned long *hpa);
+extern long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
+ unsigned long ua, unsigned long *hpa);
extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem);
extern void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem);
#endif
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index e7d6d86..73f06f4 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -86,32 +86,79 @@
#define OP_TRAP_64 2
#define OP_31_XOP_TRAP 4
+#define OP_31_XOP_LDX 21
#define OP_31_XOP_LWZX 23
+#define OP_31_XOP_LDUX 53
#define OP_31_XOP_DCBST 54
#define OP_31_XOP_LWZUX 55
#define OP_31_XOP_TRAP_64 68
#define OP_31_XOP_DCBF 86
#define OP_31_XOP_LBZX 87
+#define OP_31_XOP_STDX 149
#define OP_31_XOP_STWX 151
+#define OP_31_XOP_STDUX 181
+#define OP_31_XOP_STWUX 183
#define OP_31_XOP_STBX 215
#define OP_31_XOP_LBZUX 119
#define OP_31_XOP_STBUX 247
#define OP_31_XOP_LHZX 279
#define OP_31_XOP_LHZUX 311
#define OP_31_XOP_MFSPR 339
+#define OP_31_XOP_LWAX 341
#define OP_31_XOP_LHAX 343
+#define OP_31_XOP_LWAUX 373
#define OP_31_XOP_LHAUX 375
#define OP_31_XOP_STHX 407
#define OP_31_XOP_STHUX 439
#define OP_31_XOP_MTSPR 467
#define OP_31_XOP_DCBI 470
+#define OP_31_XOP_LDBRX 532
#define OP_31_XOP_LWBRX 534
#define OP_31_XOP_TLBSYNC 566
+#define OP_31_XOP_STDBRX 660
#define OP_31_XOP_STWBRX 662
+#define OP_31_XOP_STFSX 663
+#define OP_31_XOP_STFSUX 695
+#define OP_31_XOP_STFDX 727
+#define OP_31_XOP_STFDUX 759
#define OP_31_XOP_LHBRX 790
+#define OP_31_XOP_LFIWAX 855
+#define OP_31_XOP_LFIWZX 887
#define OP_31_XOP_STHBRX 918
+#define OP_31_XOP_STFIWX 983
+
+/* VSX Scalar Load Instructions */
+#define OP_31_XOP_LXSDX 588
+#define OP_31_XOP_LXSSPX 524
+#define OP_31_XOP_LXSIWAX 76
+#define OP_31_XOP_LXSIWZX 12
+
+/* VSX Scalar Store Instructions */
+#define OP_31_XOP_STXSDX 716
+#define OP_31_XOP_STXSSPX 652
+#define OP_31_XOP_STXSIWX 140
+
+/* VSX Vector Load Instructions */
+#define OP_31_XOP_LXVD2X 844
+#define OP_31_XOP_LXVW4X 780
+
+/* VSX Vector Load and Splat Instruction */
+#define OP_31_XOP_LXVDSX 332
+
+/* VSX Vector Store Instructions */
+#define OP_31_XOP_STXVD2X 972
+#define OP_31_XOP_STXVW4X 908
+
+#define OP_31_XOP_LFSX 535
+#define OP_31_XOP_LFSUX 567
+#define OP_31_XOP_LFDX 599
+#define OP_31_XOP_LFDUX 631
#define OP_LWZ 32
+#define OP_STFS 52
+#define OP_STFSU 53
+#define OP_STFD 54
+#define OP_STFDU 55
#define OP_LD 58
#define OP_LWZU 33
#define OP_LBZ 34
@@ -127,6 +174,17 @@
#define OP_LHAU 43
#define OP_STH 44
#define OP_STHU 45
+#define OP_LMW 46
+#define OP_STMW 47
+#define OP_LFS 48
+#define OP_LFSU 49
+#define OP_LFD 50
+#define OP_LFDU 51
+#define OP_STFS 52
+#define OP_STFSU 53
+#define OP_STFD 54
+#define OP_STFDU 55
+#define OP_LQ 56
/* sorted alphabetically */
#define PPC_INST_BHRBE 0x7c00025c
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 4edbe4b..07fbeb9 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -29,6 +29,9 @@
#define __KVM_HAVE_IRQ_LINE
#define __KVM_HAVE_GUEST_DEBUG
+/* Not always available, but if it is, this is the correct offset. */
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+
struct kvm_regs {
__u64 pc;
__u64 cr;
diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 5f202a5..f2b724c 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -711,13 +711,16 @@ struct iommu_table *iommu_init_table(struct iommu_table *tbl, int nid)
return tbl;
}
-void iommu_free_table(struct iommu_table *tbl, const char *node_name)
+static void iommu_table_free(struct kref *kref)
{
unsigned long bitmap_sz;
unsigned int order;
+ struct iommu_table *tbl;
- if (!tbl)
- return;
+ tbl = container_of(kref, struct iommu_table, it_kref);
+
+ if (tbl->it_ops->free)
+ tbl->it_ops->free(tbl);
if (!tbl->it_map) {
kfree(tbl);
@@ -733,7 +736,7 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name)
/* verify that table contains no entries */
if (!bitmap_empty(tbl->it_map, tbl->it_size))
- pr_warn("%s: Unexpected TCEs for %s\n", __func__, node_name);
+ pr_warn("%s: Unexpected TCEs\n", __func__);
/* calculate bitmap size in bytes */
bitmap_sz = BITS_TO_LONGS(tbl->it_size) * sizeof(unsigned long);
@@ -746,6 +749,24 @@ void iommu_free_table(struct iommu_table *tbl, const char *node_name)
kfree(tbl);
}
+struct iommu_table *iommu_tce_table_get(struct iommu_table *tbl)
+{
+ if (kref_get_unless_zero(&tbl->it_kref))
+ return tbl;
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(iommu_tce_table_get);
+
+int iommu_tce_table_put(struct iommu_table *tbl)
+{
+ if (WARN_ON(!tbl))
+ return 0;
+
+ return kref_put(&tbl->it_kref, iommu_table_free);
+}
+EXPORT_SYMBOL_GPL(iommu_tce_table_put);
+
/* Creates TCEs for a user provided buffer. The user buffer must be
* contiguous real kernel storage (not vmalloc). The address passed here
* comprises a page address and offset into that page. The dma_addr_t
@@ -942,47 +963,36 @@ void iommu_flush_tce(struct iommu_table *tbl)
}
EXPORT_SYMBOL_GPL(iommu_flush_tce);
-int iommu_tce_clear_param_check(struct iommu_table *tbl,
- unsigned long ioba, unsigned long tce_value,
- unsigned long npages)
+int iommu_tce_check_ioba(unsigned long page_shift,
+ unsigned long offset, unsigned long size,
+ unsigned long ioba, unsigned long npages)
{
- /* tbl->it_ops->clear() does not support any value but 0 */
- if (tce_value)
- return -EINVAL;
+ unsigned long mask = (1UL << page_shift) - 1;
- if (ioba & ~IOMMU_PAGE_MASK(tbl))
+ if (ioba & mask)
return -EINVAL;
- ioba >>= tbl->it_page_shift;
- if (ioba < tbl->it_offset)
+ ioba >>= page_shift;
+ if (ioba < offset)
return -EINVAL;
- if ((ioba + npages) > (tbl->it_offset + tbl->it_size))
+ if ((ioba + 1) > (offset + size))
return -EINVAL;
return 0;
}
-EXPORT_SYMBOL_GPL(iommu_tce_clear_param_check);
+EXPORT_SYMBOL_GPL(iommu_tce_check_ioba);
-int iommu_tce_put_param_check(struct iommu_table *tbl,
- unsigned long ioba, unsigned long tce)
+int iommu_tce_check_gpa(unsigned long page_shift, unsigned long gpa)
{
- if (tce & ~IOMMU_PAGE_MASK(tbl))
- return -EINVAL;
-
- if (ioba & ~IOMMU_PAGE_MASK(tbl))
- return -EINVAL;
+ unsigned long mask = (1UL << page_shift) - 1;
- ioba >>= tbl->it_page_shift;
- if (ioba < tbl->it_offset)
- return -EINVAL;
-
- if ((ioba + 1) > (tbl->it_offset + tbl->it_size))
+ if (gpa & mask)
return -EINVAL;
return 0;
}
-EXPORT_SYMBOL_GPL(iommu_tce_put_param_check);
+EXPORT_SYMBOL_GPL(iommu_tce_check_gpa);
long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
unsigned long *hpa, enum dma_data_direction *direction)
@@ -1004,6 +1014,31 @@ long iommu_tce_xchg(struct iommu_table *tbl, unsigned long entry,
}
EXPORT_SYMBOL_GPL(iommu_tce_xchg);
+#ifdef CONFIG_PPC_BOOK3S_64
+long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry,
+ unsigned long *hpa, enum dma_data_direction *direction)
+{
+ long ret;
+
+ ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
+
+ if (!ret && ((*direction == DMA_FROM_DEVICE) ||
+ (*direction == DMA_BIDIRECTIONAL))) {
+ struct page *pg = realmode_pfn_to_page(*hpa >> PAGE_SHIFT);
+
+ if (likely(pg)) {
+ SetPageDirty(pg);
+ } else {
+ tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
+ ret = -EFAULT;
+ }
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(iommu_tce_xchg_rm);
+#endif
+
int iommu_take_ownership(struct iommu_table *tbl)
{
unsigned long flags, i, sz = (tbl->it_size + 7) >> 3;
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 029be26..65a471d 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -67,6 +67,7 @@ config KVM_BOOK3S_64
select KVM_BOOK3S_64_HANDLER
select KVM
select KVM_BOOK3S_PR_POSSIBLE if !KVM_BOOK3S_HV_POSSIBLE
+ select SPAPR_TCE_IOMMU if IOMMU_SUPPORT
---help---
Support running unmodified book3s_64 and book3s_32 guest kernels
in virtual machines on book3s_64 host processors.
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index b6b5c18..0ff0d07 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -197,6 +197,24 @@ void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong flags)
}
EXPORT_SYMBOL_GPL(kvmppc_core_queue_program);
+void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu)
+{
+ /* might as well deliver this straight away */
+ kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_FP_UNAVAIL, 0);
+}
+
+void kvmppc_core_queue_vec_unavail(struct kvm_vcpu *vcpu)
+{
+ /* might as well deliver this straight away */
+ kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_ALTIVEC, 0);
+}
+
+void kvmppc_core_queue_vsx_unavail(struct kvm_vcpu *vcpu)
+{
+ /* might as well deliver this straight away */
+ kvmppc_inject_interrupt(vcpu, BOOK3S_INTERRUPT_VSX, 0);
+}
+
void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
{
kvmppc_book3s_queue_irqprio(vcpu, BOOK3S_INTERRUPT_DECREMENTER);
diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 7015357..29ebe2f 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -319,6 +319,7 @@ do_second:
gpte->may_execute = true;
gpte->may_read = false;
gpte->may_write = false;
+ gpte->wimg = r & HPTE_R_WIMG;
switch (pp) {
case 0:
diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c b/arch/powerpc/kvm/book3s_64_mmu_host.c
index a587e8f..145a618 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_host.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
@@ -145,6 +145,8 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct kvmppc_pte *orig_pte,
else
kvmppc_mmu_flush_icache(pfn);
+ rflags = (rflags & ~HPTE_R_WIMG) | orig_pte->wimg;
+
/*
* Use 64K pages if possible; otherwise, on 64K page kernels,
* we need to transfer 4 more bits from guest real to host real addr.
@@ -177,12 +179,15 @@ map_again:
ret = mmu_hash_ops.hpte_insert(hpteg, vpn, hpaddr, rflags, vflags,
hpsize, hpsize, MMU_SEGSIZE_256M);
- if (ret < 0) {
+ if (ret == -1) {
/* If we couldn't map a primary PTE, try a secondary */
hash = ~hash;
vflags ^= HPTE_V_SECONDARY;
attempt++;
goto map_again;
+ } else if (ret < 0) {
+ r = -EIO;
+ goto out_unlock;
} else {
trace_kvm_book3s_64_mmu_map(rflags, hpteg,
vpn, hpaddr, orig_pte);
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index 3e26cd4..a160c14 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -28,6 +28,8 @@
#include <linux/hugetlb.h>
#include <linux/list.h>
#include <linux/anon_inodes.h>
+#include <linux/iommu.h>
+#include <linux/file.h>
#include <asm/tlbflush.h>
#include <asm/kvm_ppc.h>
@@ -40,6 +42,7 @@
#include <asm/udbg.h>
#include <asm/iommu.h>
#include <asm/tce.h>
+#include <asm/mmu_context.h>
static unsigned long kvmppc_tce_pages(unsigned long iommu_pages)
{
@@ -91,6 +94,137 @@ static long kvmppc_account_memlimit(unsigned long stt_pages, bool inc)
return ret;
}
+static void kvm_spapr_tce_iommu_table_free(struct rcu_head *head)
+{
+ struct kvmppc_spapr_tce_iommu_table *stit = container_of(head,
+ struct kvmppc_spapr_tce_iommu_table, rcu);
+
+ iommu_tce_table_put(stit->tbl);
+
+ kfree(stit);
+}
+
+static void kvm_spapr_tce_liobn_put(struct kref *kref)
+{
+ struct kvmppc_spapr_tce_iommu_table *stit = container_of(kref,
+ struct kvmppc_spapr_tce_iommu_table, kref);
+
+ list_del_rcu(&stit->next);
+
+ call_rcu(&stit->rcu, kvm_spapr_tce_iommu_table_free);
+}
+
+extern void kvm_spapr_tce_release_iommu_group(struct kvm *kvm,
+ struct iommu_group *grp)
+{
+ int i;
+ struct kvmppc_spapr_tce_table *stt;
+ struct kvmppc_spapr_tce_iommu_table *stit, *tmp;
+ struct iommu_table_group *table_group = NULL;
+
+ list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) {
+
+ table_group = iommu_group_get_iommudata(grp);
+ if (WARN_ON(!table_group))
+ continue;
+
+ list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) {
+ for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+ if (table_group->tables[i] != stit->tbl)
+ continue;
+
+ kref_put(&stit->kref, kvm_spapr_tce_liobn_put);
+ return;
+ }
+ }
+ }
+}
+
+extern long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd,
+ struct iommu_group *grp)
+{
+ struct kvmppc_spapr_tce_table *stt = NULL;
+ bool found = false;
+ struct iommu_table *tbl = NULL;
+ struct iommu_table_group *table_group;
+ long i;
+ struct kvmppc_spapr_tce_iommu_table *stit;
+ struct fd f;
+
+ f = fdget(tablefd);
+ if (!f.file)
+ return -EBADF;
+
+ list_for_each_entry_rcu(stt, &kvm->arch.spapr_tce_tables, list) {
+ if (stt == f.file->private_data) {
+ found = true;
+ break;
+ }
+ }
+
+ fdput(f);
+
+ if (!found)
+ return -EINVAL;
+
+ table_group = iommu_group_get_iommudata(grp);
+ if (WARN_ON(!table_group))
+ return -EFAULT;
+
+ for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) {
+ struct iommu_table *tbltmp = table_group->tables[i];
+
+ if (!tbltmp)
+ continue;
+ /*
+ * Make sure hardware table parameters are exactly the same;
+ * this is used in the TCE handlers where boundary checks
+ * use only the first attached table.
+ */
+ if ((tbltmp->it_page_shift == stt->page_shift) &&
+ (tbltmp->it_offset == stt->offset) &&
+ (tbltmp->it_size == stt->size)) {
+ /*
+ * Reference the table to avoid races with
+ * add/remove DMA windows.
+ */
+ tbl = iommu_tce_table_get(tbltmp);
+ break;
+ }
+ }
+ if (!tbl)
+ return -EINVAL;
+
+ list_for_each_entry_rcu(stit, &stt->iommu_tables, next) {
+ if (tbl != stit->tbl)
+ continue;
+
+ if (!kref_get_unless_zero(&stit->kref)) {
+ /* stit is being destroyed */
+ iommu_tce_table_put(tbl);
+ return -ENOTTY;
+ }
+ /*
+ * The table is already known to this KVM, we just increased
+ * its KVM reference counter and can return.
+ */
+ return 0;
+ }
+
+ stit = kzalloc(sizeof(*stit), GFP_KERNEL);
+ if (!stit) {
+ iommu_tce_table_put(tbl);
+ return -ENOMEM;
+ }
+
+ stit->tbl = tbl;
+ kref_init(&stit->kref);
+
+ list_add_rcu(&stit->next, &stt->iommu_tables);
+
+ return 0;
+}
+
static void release_spapr_tce_table(struct rcu_head *head)
{
struct kvmppc_spapr_tce_table *stt = container_of(head,
@@ -130,9 +264,18 @@ static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma)
static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
{
struct kvmppc_spapr_tce_table *stt = filp->private_data;
+ struct kvmppc_spapr_tce_iommu_table *stit, *tmp;
list_del_rcu(&stt->list);
+ list_for_each_entry_safe(stit, tmp, &stt->iommu_tables, next) {
+ WARN_ON(!kref_read(&stit->kref));
+ while (1) {
+ if (kref_put(&stit->kref, kvm_spapr_tce_liobn_put))
+ break;
+ }
+ }
+
kvm_put_kvm(stt->kvm);
kvmppc_account_memlimit(
@@ -164,7 +307,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
return -EBUSY;
}
- size = args->size;
+ size = _ALIGN_UP(args->size, PAGE_SIZE >> 3);
npages = kvmppc_tce_pages(size);
ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true);
if (ret) {
@@ -183,6 +326,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
stt->offset = args->offset;
stt->size = size;
stt->kvm = kvm;
+ INIT_LIST_HEAD_RCU(&stt->iommu_tables);
for (i = 0; i < npages; i++) {
stt->pages[i] = alloc_page(GFP_KERNEL | __GFP_ZERO);
@@ -211,15 +355,106 @@ fail:
return ret;
}
+static void kvmppc_clear_tce(struct iommu_table *tbl, unsigned long entry)
+{
+ unsigned long hpa = 0;
+ enum dma_data_direction dir = DMA_NONE;
+
+ iommu_tce_xchg(tbl, entry, &hpa, &dir);
+}
+
+static long kvmppc_tce_iommu_mapped_dec(struct kvm *kvm,
+ struct iommu_table *tbl, unsigned long entry)
+{
+ struct mm_iommu_table_group_mem_t *mem = NULL;
+ const unsigned long pgsize = 1ULL << tbl->it_page_shift;
+ unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
+
+ if (!pua)
+ /* it_userspace allocation might be delayed */
+ return H_TOO_HARD;
+
+ mem = mm_iommu_lookup(kvm->mm, *pua, pgsize);
+ if (!mem)
+ return H_TOO_HARD;
+
+ mm_iommu_mapped_dec(mem);
+
+ *pua = 0;
+
+ return H_SUCCESS;
+}
+
+static long kvmppc_tce_iommu_unmap(struct kvm *kvm,
+ struct iommu_table *tbl, unsigned long entry)
+{
+ enum dma_data_direction dir = DMA_NONE;
+ unsigned long hpa = 0;
+ long ret;
+
+ if (WARN_ON_ONCE(iommu_tce_xchg(tbl, entry, &hpa, &dir)))
+ return H_HARDWARE;
+
+ if (dir == DMA_NONE)
+ return H_SUCCESS;
+
+ ret = kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry);
+ if (ret != H_SUCCESS)
+ iommu_tce_xchg(tbl, entry, &hpa, &dir);
+
+ return ret;
+}
+
+long kvmppc_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl,
+ unsigned long entry, unsigned long ua,
+ enum dma_data_direction dir)
+{
+ long ret;
+ unsigned long hpa, *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
+ struct mm_iommu_table_group_mem_t *mem;
+
+ if (!pua)
+ /* it_userspace allocation might be delayed */
+ return H_TOO_HARD;
+
+ mem = mm_iommu_lookup(kvm->mm, ua, 1ULL << tbl->it_page_shift);
+ if (!mem)
+ /* This only handles v2 IOMMU type, v1 is handled via ioctl() */
+ return H_TOO_HARD;
+
+ if (WARN_ON_ONCE(mm_iommu_ua_to_hpa(mem, ua, &hpa)))
+ return H_HARDWARE;
+
+ if (mm_iommu_mapped_inc(mem))
+ return H_CLOSED;
+
+ ret = iommu_tce_xchg(tbl, entry, &hpa, &dir);
+ if (WARN_ON_ONCE(ret)) {
+ mm_iommu_mapped_dec(mem);
+ return H_HARDWARE;
+ }
+
+ if (dir != DMA_NONE)
+ kvmppc_tce_iommu_mapped_dec(kvm, tbl, entry);
+
+ *pua = ua;
+
+ return 0;
+}
+
long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba, unsigned long tce)
{
- struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn);
- long ret;
+ struct kvmppc_spapr_tce_table *stt;
+ long ret, idx;
+ struct kvmppc_spapr_tce_iommu_table *stit;
+ unsigned long entry, ua = 0;
+ enum dma_data_direction dir;
/* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
/* liobn, ioba, tce); */
+ stt = kvmppc_find_table(vcpu->kvm, liobn);
if (!stt)
return H_TOO_HARD;
@@ -231,7 +466,35 @@ long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
if (ret != H_SUCCESS)
return ret;
- kvmppc_tce_put(stt, ioba >> stt->page_shift, tce);
+ dir = iommu_tce_direction(tce);
+ if ((dir != DMA_NONE) && kvmppc_gpa_to_ua(vcpu->kvm,
+ tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), &ua, NULL))
+ return H_PARAMETER;
+
+ entry = ioba >> stt->page_shift;
+
+ list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
+ if (dir == DMA_NONE) {
+ ret = kvmppc_tce_iommu_unmap(vcpu->kvm,
+ stit->tbl, entry);
+ } else {
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ ret = kvmppc_tce_iommu_map(vcpu->kvm, stit->tbl,
+ entry, ua, dir);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ }
+
+ if (ret == H_SUCCESS)
+ continue;
+
+ if (ret == H_TOO_HARD)
+ return ret;
+
+ WARN_ON_ONCE(1);
+ kvmppc_clear_tce(stit->tbl, entry);
+ }
+
+ kvmppc_tce_put(stt, entry, tce);
return H_SUCCESS;
}
@@ -246,8 +509,9 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
unsigned long entry, ua = 0;
u64 __user *tces;
u64 tce;
+ struct kvmppc_spapr_tce_iommu_table *stit;
- stt = kvmppc_find_table(vcpu, liobn);
+ stt = kvmppc_find_table(vcpu->kvm, liobn);
if (!stt)
return H_TOO_HARD;
@@ -284,6 +548,26 @@ long kvmppc_h_put_tce_indirect(struct kvm_vcpu *vcpu,
if (ret != H_SUCCESS)
goto unlock_exit;
+ if (kvmppc_gpa_to_ua(vcpu->kvm,
+ tce & ~(TCE_PCI_READ | TCE_PCI_WRITE),
+ &ua, NULL))
+ return H_PARAMETER;
+
+ list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
+ ret = kvmppc_tce_iommu_map(vcpu->kvm,
+ stit->tbl, entry + i, ua,
+ iommu_tce_direction(tce));
+
+ if (ret == H_SUCCESS)
+ continue;
+
+ if (ret == H_TOO_HARD)
+ goto unlock_exit;
+
+ WARN_ON_ONCE(1);
+ kvmppc_clear_tce(stit->tbl, entry);
+ }
+
kvmppc_tce_put(stt, entry + i, tce);
}
@@ -300,8 +584,9 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
{
struct kvmppc_spapr_tce_table *stt;
long i, ret;
+ struct kvmppc_spapr_tce_iommu_table *stit;
- stt = kvmppc_find_table(vcpu, liobn);
+ stt = kvmppc_find_table(vcpu->kvm, liobn);
if (!stt)
return H_TOO_HARD;
@@ -313,6 +598,24 @@ long kvmppc_h_stuff_tce(struct kvm_vcpu *vcpu,
if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ))
return H_PARAMETER;
+ list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
+ unsigned long entry = ioba >> stit->tbl->it_page_shift;
+
+ for (i = 0; i < npages; ++i) {
+ ret = kvmppc_tce_iommu_unmap(vcpu->kvm,
+ stit->tbl, entry + i);
+
+ if (ret == H_SUCCESS)
+ continue;
+
+ if (ret == H_TOO_HARD)
+ return ret;
+
+ WARN_ON_ONCE(1);
+ kvmppc_clear_tce(stit->tbl, entry);
+ }
+ }
+
for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index e4c4ea9..eda0a8f 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -40,6 +40,31 @@
#include <asm/iommu.h>
#include <asm/tce.h>
+#ifdef CONFIG_BUG
+
+#define WARN_ON_ONCE_RM(condition) ({ \
+ static bool __section(.data.unlikely) __warned; \
+ int __ret_warn_once = !!(condition); \
+ \
+ if (unlikely(__ret_warn_once && !__warned)) { \
+ __warned = true; \
+ pr_err("WARN_ON_ONCE_RM: (%s) at %s:%u\n", \
+ __stringify(condition), \
+ __func__, __LINE__); \
+ dump_stack(); \
+ } \
+ unlikely(__ret_warn_once); \
+})
+
+#else
+
+#define WARN_ON_ONCE_RM(condition) ({ \
+ int __ret_warn_on = !!(condition); \
+ unlikely(__ret_warn_on); \
+})
+
+#endif
+
#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
/*
@@ -48,10 +73,9 @@
* WARNING: This will be called in real or virtual mode on HV KVM and virtual
* mode on PR KVM
*/
-struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm_vcpu *vcpu,
+struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm *kvm,
unsigned long liobn)
{
- struct kvm *kvm = vcpu->kvm;
struct kvmppc_spapr_tce_table *stt;
list_for_each_entry_lockless(stt, &kvm->arch.spapr_tce_tables, list)
@@ -63,27 +87,6 @@ struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm_vcpu *vcpu,
EXPORT_SYMBOL_GPL(kvmppc_find_table);
/*
- * Validates IO address.
- *
- * WARNING: This will be called in real-mode on HV KVM and virtual
- * mode on PR KVM
- */
-long kvmppc_ioba_validate(struct kvmppc_spapr_tce_table *stt,
- unsigned long ioba, unsigned long npages)
-{
- unsigned long mask = (1ULL << stt->page_shift) - 1;
- unsigned long idx = ioba >> stt->page_shift;
-
- if ((ioba & mask) || (idx < stt->offset) ||
- (idx - stt->offset + npages > stt->size) ||
- (idx + npages < idx))
- return H_PARAMETER;
-
- return H_SUCCESS;
-}
-EXPORT_SYMBOL_GPL(kvmppc_ioba_validate);
-
-/*
* Validates TCE address.
* At the moment flags and page mask are validated.
* As the host kernel does not access those addresses (just puts them
@@ -96,10 +99,14 @@ EXPORT_SYMBOL_GPL(kvmppc_ioba_validate);
*/
long kvmppc_tce_validate(struct kvmppc_spapr_tce_table *stt, unsigned long tce)
{
- unsigned long page_mask = ~((1ULL << stt->page_shift) - 1);
- unsigned long mask = ~(page_mask | TCE_PCI_WRITE | TCE_PCI_READ);
+ unsigned long gpa = tce & ~(TCE_PCI_READ | TCE_PCI_WRITE);
+ enum dma_data_direction dir = iommu_tce_direction(tce);
+
+ /* Allow userspace to poison TCE table */
+ if (dir == DMA_NONE)
+ return H_SUCCESS;
- if (tce & mask)
+ if (iommu_tce_check_gpa(stt->page_shift, gpa))
return H_PARAMETER;
return H_SUCCESS;
@@ -179,15 +186,122 @@ long kvmppc_gpa_to_ua(struct kvm *kvm, unsigned long gpa,
EXPORT_SYMBOL_GPL(kvmppc_gpa_to_ua);
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+static void kvmppc_rm_clear_tce(struct iommu_table *tbl, unsigned long entry)
+{
+ unsigned long hpa = 0;
+ enum dma_data_direction dir = DMA_NONE;
+
+ iommu_tce_xchg_rm(tbl, entry, &hpa, &dir);
+}
+
+static long kvmppc_rm_tce_iommu_mapped_dec(struct kvm *kvm,
+ struct iommu_table *tbl, unsigned long entry)
+{
+ struct mm_iommu_table_group_mem_t *mem = NULL;
+ const unsigned long pgsize = 1ULL << tbl->it_page_shift;
+ unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
+
+ if (!pua)
+ /* it_userspace allocation might be delayed */
+ return H_TOO_HARD;
+
+ pua = (void *) vmalloc_to_phys(pua);
+ if (WARN_ON_ONCE_RM(!pua))
+ return H_HARDWARE;
+
+ mem = mm_iommu_lookup_rm(kvm->mm, *pua, pgsize);
+ if (!mem)
+ return H_TOO_HARD;
+
+ mm_iommu_mapped_dec(mem);
+
+ *pua = 0;
+
+ return H_SUCCESS;
+}
+
+static long kvmppc_rm_tce_iommu_unmap(struct kvm *kvm,
+ struct iommu_table *tbl, unsigned long entry)
+{
+ enum dma_data_direction dir = DMA_NONE;
+ unsigned long hpa = 0;
+ long ret;
+
+ if (iommu_tce_xchg_rm(tbl, entry, &hpa, &dir))
+ /*
+ * real mode xchg can fail if struct page crosses
+ * a page boundary
+ */
+ return H_TOO_HARD;
+
+ if (dir == DMA_NONE)
+ return H_SUCCESS;
+
+ ret = kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry);
+ if (ret)
+ iommu_tce_xchg_rm(tbl, entry, &hpa, &dir);
+
+ return ret;
+}
+
+static long kvmppc_rm_tce_iommu_map(struct kvm *kvm, struct iommu_table *tbl,
+ unsigned long entry, unsigned long ua,
+ enum dma_data_direction dir)
+{
+ long ret;
+ unsigned long hpa = 0;
+ unsigned long *pua = IOMMU_TABLE_USERSPACE_ENTRY(tbl, entry);
+ struct mm_iommu_table_group_mem_t *mem;
+
+ if (!pua)
+ /* it_userspace allocation might be delayed */
+ return H_TOO_HARD;
+
+ mem = mm_iommu_lookup_rm(kvm->mm, ua, 1ULL << tbl->it_page_shift);
+ if (!mem)
+ return H_TOO_HARD;
+
+ if (WARN_ON_ONCE_RM(mm_iommu_ua_to_hpa_rm(mem, ua, &hpa)))
+ return H_HARDWARE;
+
+ pua = (void *) vmalloc_to_phys(pua);
+ if (WARN_ON_ONCE_RM(!pua))
+ return H_HARDWARE;
+
+ if (WARN_ON_ONCE_RM(mm_iommu_mapped_inc(mem)))
+ return H_CLOSED;
+
+ ret = iommu_tce_xchg_rm(tbl, entry, &hpa, &dir);
+ if (ret) {
+ mm_iommu_mapped_dec(mem);
+ /*
+ * real mode xchg can fail if struct page crosses
+ * a page boundary
+ */
+ return H_TOO_HARD;
+ }
+
+ if (dir != DMA_NONE)
+ kvmppc_rm_tce_iommu_mapped_dec(kvm, tbl, entry);
+
+ *pua = ua;
+
+ return 0;
+}
+
long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba, unsigned long tce)
{
- struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn);
+ struct kvmppc_spapr_tce_table *stt;
long ret;
+ struct kvmppc_spapr_tce_iommu_table *stit;
+ unsigned long entry, ua = 0;
+ enum dma_data_direction dir;
/* udbg_printf("H_PUT_TCE(): liobn=0x%lx ioba=0x%lx, tce=0x%lx\n", */
/* liobn, ioba, tce); */
+ stt = kvmppc_find_table(vcpu->kvm, liobn);
if (!stt)
return H_TOO_HARD;
@@ -199,7 +313,32 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
if (ret != H_SUCCESS)
return ret;
- kvmppc_tce_put(stt, ioba >> stt->page_shift, tce);
+ dir = iommu_tce_direction(tce);
+ if ((dir != DMA_NONE) && kvmppc_gpa_to_ua(vcpu->kvm,
+ tce & ~(TCE_PCI_READ | TCE_PCI_WRITE), &ua, NULL))
+ return H_PARAMETER;
+
+ entry = ioba >> stt->page_shift;
+
+ list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
+ if (dir == DMA_NONE)
+ ret = kvmppc_rm_tce_iommu_unmap(vcpu->kvm,
+ stit->tbl, entry);
+ else
+ ret = kvmppc_rm_tce_iommu_map(vcpu->kvm,
+ stit->tbl, entry, ua, dir);
+
+ if (ret == H_SUCCESS)
+ continue;
+
+ if (ret == H_TOO_HARD)
+ return ret;
+
+ WARN_ON_ONCE_RM(1);
+ kvmppc_rm_clear_tce(stit->tbl, entry);
+ }
+
+ kvmppc_tce_put(stt, entry, tce);
return H_SUCCESS;
}
@@ -239,8 +378,10 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
long i, ret = H_SUCCESS;
unsigned long tces, entry, ua = 0;
unsigned long *rmap = NULL;
+ bool prereg = false;
+ struct kvmppc_spapr_tce_iommu_table *stit;
- stt = kvmppc_find_table(vcpu, liobn);
+ stt = kvmppc_find_table(vcpu->kvm, liobn);
if (!stt)
return H_TOO_HARD;
@@ -259,23 +400,49 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
if (ret != H_SUCCESS)
return ret;
- if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, &rmap))
- return H_TOO_HARD;
+ if (mm_iommu_preregistered(vcpu->kvm->mm)) {
+ /*
+ * We get here if guest memory was pre-registered which
+ * is normally VFIO case and gpa->hpa translation does not
+ * depend on hpt.
+ */
+ struct mm_iommu_table_group_mem_t *mem;
- rmap = (void *) vmalloc_to_phys(rmap);
+ if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, NULL))
+ return H_TOO_HARD;
- /*
- * Synchronize with the MMU notifier callbacks in
- * book3s_64_mmu_hv.c (kvm_unmap_hva_hv etc.).
- * While we have the rmap lock, code running on other CPUs
- * cannot finish unmapping the host real page that backs
- * this guest real page, so we are OK to access the host
- * real page.
- */
- lock_rmap(rmap);
- if (kvmppc_rm_ua_to_hpa(vcpu, ua, &tces)) {
- ret = H_TOO_HARD;
- goto unlock_exit;
+ mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K);
+ if (mem)
+ prereg = mm_iommu_ua_to_hpa_rm(mem, ua, &tces) == 0;
+ }
+
+ if (!prereg) {
+ /*
+ * This is usually a case of a guest with emulated devices only
+ * when TCE list is not in preregistered memory.
+ * We do not require memory to be preregistered in this case
+ * so lock rmap and do __find_linux_pte_or_hugepte().
+ */
+ if (kvmppc_gpa_to_ua(vcpu->kvm, tce_list, &ua, &rmap))
+ return H_TOO_HARD;
+
+ rmap = (void *) vmalloc_to_phys(rmap);
+ if (WARN_ON_ONCE_RM(!rmap))
+ return H_HARDWARE;
+
+ /*
+ * Synchronize with the MMU notifier callbacks in
+ * book3s_64_mmu_hv.c (kvm_unmap_hva_hv etc.).
+ * While we have the rmap lock, code running on other CPUs
+ * cannot finish unmapping the host real page that backs
+ * this guest real page, so we are OK to access the host
+ * real page.
+ */
+ lock_rmap(rmap);
+ if (kvmppc_rm_ua_to_hpa(vcpu, ua, &tces)) {
+ ret = H_TOO_HARD;
+ goto unlock_exit;
+ }
}
for (i = 0; i < npages; ++i) {
@@ -285,11 +452,33 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
if (ret != H_SUCCESS)
goto unlock_exit;
+ ua = 0;
+ if (kvmppc_gpa_to_ua(vcpu->kvm,
+ tce & ~(TCE_PCI_READ | TCE_PCI_WRITE),
+ &ua, NULL))
+ return H_PARAMETER;
+
+ list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
+ ret = kvmppc_rm_tce_iommu_map(vcpu->kvm,
+ stit->tbl, entry + i, ua,
+ iommu_tce_direction(tce));
+
+ if (ret == H_SUCCESS)
+ continue;
+
+ if (ret == H_TOO_HARD)
+ goto unlock_exit;
+
+ WARN_ON_ONCE_RM(1);
+ kvmppc_rm_clear_tce(stit->tbl, entry);
+ }
+
kvmppc_tce_put(stt, entry + i, tce);
}
unlock_exit:
- unlock_rmap(rmap);
+ if (rmap)
+ unlock_rmap(rmap);
return ret;
}
@@ -300,8 +489,9 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
{
struct kvmppc_spapr_tce_table *stt;
long i, ret;
+ struct kvmppc_spapr_tce_iommu_table *stit;
- stt = kvmppc_find_table(vcpu, liobn);
+ stt = kvmppc_find_table(vcpu->kvm, liobn);
if (!stt)
return H_TOO_HARD;
@@ -313,6 +503,24 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
if (tce_value & (TCE_PCI_WRITE | TCE_PCI_READ))
return H_PARAMETER;
+ list_for_each_entry_lockless(stit, &stt->iommu_tables, next) {
+ unsigned long entry = ioba >> stit->tbl->it_page_shift;
+
+ for (i = 0; i < npages; ++i) {
+ ret = kvmppc_rm_tce_iommu_unmap(vcpu->kvm,
+ stit->tbl, entry + i);
+
+ if (ret == H_SUCCESS)
+ continue;
+
+ if (ret == H_TOO_HARD)
+ return ret;
+
+ WARN_ON_ONCE_RM(1);
+ kvmppc_rm_clear_tce(stit->tbl, entry);
+ }
+ }
+
for (i = 0; i < npages; ++i, ioba += (1ULL << stt->page_shift))
kvmppc_tce_put(stt, ioba >> stt->page_shift, tce_value);
@@ -322,12 +530,13 @@ long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba)
{
- struct kvmppc_spapr_tce_table *stt = kvmppc_find_table(vcpu, liobn);
+ struct kvmppc_spapr_tce_table *stt;
long ret;
unsigned long idx;
struct page *page;
u64 *tbl;
+ stt = kvmppc_find_table(vcpu->kvm, liobn);
if (!stt)
return H_TOO_HARD;
diff --git a/arch/powerpc/kvm/book3s_emulate.c b/arch/powerpc/kvm/book3s_emulate.c
index 8359752..68d6898 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -503,10 +503,18 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
break;
unprivileged:
default:
- printk(KERN_INFO "KVM: invalid SPR write: %d\n", sprn);
-#ifndef DEBUG_SPR
- emulated = EMULATE_FAIL;
-#endif
+ pr_info_ratelimited("KVM: invalid SPR write: %d\n", sprn);
+ if (sprn & 0x10) {
+ if (kvmppc_get_msr(vcpu) & MSR_PR) {
+ kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV);
+ emulated = EMULATE_AGAIN;
+ }
+ } else {
+ if ((kvmppc_get_msr(vcpu) & MSR_PR) || sprn == 0) {
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+ emulated = EMULATE_AGAIN;
+ }
+ }
break;
}
@@ -648,10 +656,20 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val
break;
default:
unprivileged:
- printk(KERN_INFO "KVM: invalid SPR read: %d\n", sprn);
-#ifndef DEBUG_SPR
- emulated = EMULATE_FAIL;
-#endif
+ pr_info_ratelimited("KVM: invalid SPR read: %d\n", sprn);
+ if (sprn & 0x10) {
+ if (kvmppc_get_msr(vcpu) & MSR_PR) {
+ kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV);
+ emulated = EMULATE_AGAIN;
+ }
+ } else {
+ if ((kvmppc_get_msr(vcpu) & MSR_PR) || sprn == 0 ||
+ sprn == 4 || sprn == 5 || sprn == 6) {
+ kvmppc_core_queue_program(vcpu, SRR1_PROGILL);
+ emulated = EMULATE_AGAIN;
+ }
+ }
+
break;
}
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 1ec86d9..06b7d8a 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3624,11 +3624,9 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
return -EIO;
mutex_lock(&kvm->lock);
+ if (!kvm->arch.pimap)
+ goto unlock;
- if (kvm->arch.pimap == NULL) {
- mutex_unlock(&kvm->lock);
- return 0;
- }
pimap = kvm->arch.pimap;
for (i = 0; i < pimap->n_mapped; i++) {
@@ -3650,7 +3648,7 @@ static int kvmppc_clr_passthru_irq(struct kvm *kvm, int host_irq, int guest_gsi)
* We don't free this structure even when the count goes to
* zero. The structure is freed when we destroy the VM.
*/
-
+ unlock:
mutex_unlock(&kvm->lock);
return 0;
}
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index d4dfc0c..69a0944 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -349,7 +349,7 @@ static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
if (msr & MSR_POW) {
if (!vcpu->arch.pending_exceptions) {
kvm_vcpu_block(vcpu);
- clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
+ kvm_clear_request(KVM_REQ_UNHALT, vcpu);
vcpu->stat.halt_wakeup++;
/* Unset POW bit after we woke up */
@@ -537,8 +537,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
int r = RESUME_GUEST;
int relocated;
int page_found = 0;
- struct kvmppc_pte pte;
- bool is_mmio = false;
+ struct kvmppc_pte pte = { 0 };
bool dr = (kvmppc_get_msr(vcpu) & MSR_DR) ? true : false;
bool ir = (kvmppc_get_msr(vcpu) & MSR_IR) ? true : false;
u64 vsid;
@@ -616,8 +615,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
/* Page not found in guest SLB */
kvmppc_set_dar(vcpu, kvmppc_get_fault_dar(vcpu));
kvmppc_book3s_queue_irqprio(vcpu, vec + 0x80);
- } else if (!is_mmio &&
- kvmppc_visible_gpa(vcpu, pte.raddr)) {
+ } else if (kvmppc_visible_gpa(vcpu, pte.raddr)) {
if (data && !(vcpu->arch.fault_dsisr & DSISR_NOHPTE)) {
/*
* There is already a host HPTE there, presumably
@@ -627,7 +625,11 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
kvmppc_mmu_unmap_page(vcpu, &pte);
}
/* The guest's PTE is not mapped yet. Map on the host */
- kvmppc_mmu_map_page(vcpu, &pte, iswrite);
+ if (kvmppc_mmu_map_page(vcpu, &pte, iswrite) == -EIO) {
+ /* Exit KVM if mapping failed */
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ return RESUME_HOST;
+ }
if (data)
vcpu->stat.sp_storage++;
else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c
index f102616..bcbeeb6 100644
--- a/arch/powerpc/kvm/book3s_pr_papr.c
+++ b/arch/powerpc/kvm/book3s_pr_papr.c
@@ -344,7 +344,7 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd)
case H_CEDE:
kvmppc_set_msr_fast(vcpu, kvmppc_get_msr(vcpu) | MSR_EE);
kvm_vcpu_block(vcpu);
- clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
+ kvm_clear_request(KVM_REQ_UNHALT, vcpu);
vcpu->stat.halt_wakeup++;
return EMULATE_DONE;
case H_LOGICAL_CI_LOAD:
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 0514cbd..3eaac38 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -300,6 +300,11 @@ void kvmppc_core_queue_program(struct kvm_vcpu *vcpu, ulong esr_flags)
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM);
}
+void kvmppc_core_queue_fpunavail(struct kvm_vcpu *vcpu)
+{
+ kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL);
+}
+
void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
{
kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DECREMENTER);
@@ -579,7 +584,7 @@ static void arm_next_watchdog(struct kvm_vcpu *vcpu)
* userspace, so clear the KVM_REQ_WATCHDOG request.
*/
if ((vcpu->arch.tsr & (TSR_ENW | TSR_WIS)) != (TSR_ENW | TSR_WIS))
- clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests);
+ kvm_clear_request(KVM_REQ_WATCHDOG, vcpu);
spin_lock_irqsave(&vcpu->arch.wdt_lock, flags);
nr_jiffies = watchdog_next_timeout(vcpu);
@@ -690,7 +695,7 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu)
if (vcpu->arch.shared->msr & MSR_WE) {
local_irq_enable();
kvm_vcpu_block(vcpu);
- clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
+ kvm_clear_request(KVM_REQ_UNHALT, vcpu);
hard_irq_disable();
kvmppc_set_exit_type(vcpu, EMULATED_MTMSRWE_EXITS);
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 0fda423..77fd043 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -797,9 +797,8 @@ int e500_mmu_host_init(struct kvmppc_vcpu_e500 *vcpu_e500)
host_tlb_params[0].sets =
host_tlb_params[0].entries / host_tlb_params[0].ways;
host_tlb_params[1].sets = 1;
-
- vcpu_e500->h2g_tlb1_rmap = kzalloc(sizeof(unsigned int) *
- host_tlb_params[1].entries,
+ vcpu_e500->h2g_tlb1_rmap = kcalloc(host_tlb_params[1].entries,
+ sizeof(*vcpu_e500->h2g_tlb1_rmap),
GFP_KERNEL);
if (!vcpu_e500->h2g_tlb1_rmap)
return -EINVAL;
diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c
index b379146..c873ffe 100644
--- a/arch/powerpc/kvm/emulate.c
+++ b/arch/powerpc/kvm/emulate.c
@@ -259,10 +259,18 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
case OP_31_XOP_MFSPR:
emulated = kvmppc_emulate_mfspr(vcpu, sprn, rt);
+ if (emulated == EMULATE_AGAIN) {
+ emulated = EMULATE_DONE;
+ advance = 0;
+ }
break;
case OP_31_XOP_MTSPR:
emulated = kvmppc_emulate_mtspr(vcpu, sprn, rs);
+ if (emulated == EMULATE_AGAIN) {
+ emulated = EMULATE_DONE;
+ advance = 0;
+ }
break;
case OP_31_XOP_TLBSYNC:
diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c
index 6d3c0ee..af83353 100644
--- a/arch/powerpc/kvm/emulate_loadstore.c
+++ b/arch/powerpc/kvm/emulate_loadstore.c
@@ -34,18 +34,38 @@
#include "timing.h"
#include "trace.h"
-/* XXX to do:
- * lhax
- * lhaux
- * lswx
- * lswi
- * stswx
- * stswi
- * lha
- * lhau
- * lmw
- * stmw
+#ifdef CONFIG_PPC_FPU
+static bool kvmppc_check_fp_disabled(struct kvm_vcpu *vcpu)
+{
+ if (!(kvmppc_get_msr(vcpu) & MSR_FP)) {
+ kvmppc_core_queue_fpunavail(vcpu);
+ return true;
+ }
+
+ return false;
+}
+#endif /* CONFIG_PPC_FPU */
+
+#ifdef CONFIG_VSX
+static bool kvmppc_check_vsx_disabled(struct kvm_vcpu *vcpu)
+{
+ if (!(kvmppc_get_msr(vcpu) & MSR_VSX)) {
+ kvmppc_core_queue_vsx_unavail(vcpu);
+ return true;
+ }
+
+ return false;
+}
+#endif /* CONFIG_VSX */
+
+/*
+ * XXX to do:
+ * lfiwax, lfiwzx
+ * vector loads and stores
*
+ * Instructions that trap when used on cache-inhibited mappings
+ * are not emulated here: multiple and string instructions,
+ * lq/stq, and the load-reserve/store-conditional instructions.
*/
int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
{
@@ -66,6 +86,19 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
rs = get_rs(inst);
rt = get_rt(inst);
+ /*
+ * if mmio_vsx_tx_sx_enabled == 0, copy data between
+ * VSR[0..31] and memory
+ * if mmio_vsx_tx_sx_enabled == 1, copy data between
+ * VSR[32..63] and memory
+ */
+ vcpu->arch.mmio_vsx_tx_sx_enabled = get_tx_or_sx(inst);
+ vcpu->arch.mmio_vsx_copy_nums = 0;
+ vcpu->arch.mmio_vsx_offset = 0;
+ vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_NONE;
+ vcpu->arch.mmio_sp64_extend = 0;
+ vcpu->arch.mmio_sign_extend = 0;
+
switch (get_op(inst)) {
case 31:
switch (get_xop(inst)) {
@@ -73,6 +106,11 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
break;
+ case OP_31_XOP_LWZUX:
+ emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+ break;
+
case OP_31_XOP_LBZX:
emulated = kvmppc_handle_load(run, vcpu, rt, 1, 1);
break;
@@ -82,22 +120,36 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
+ case OP_31_XOP_STDX:
+ emulated = kvmppc_handle_store(run, vcpu,
+ kvmppc_get_gpr(vcpu, rs), 8, 1);
+ break;
+
+ case OP_31_XOP_STDUX:
+ emulated = kvmppc_handle_store(run, vcpu,
+ kvmppc_get_gpr(vcpu, rs), 8, 1);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+ break;
+
case OP_31_XOP_STWX:
emulated = kvmppc_handle_store(run, vcpu,
- kvmppc_get_gpr(vcpu, rs),
- 4, 1);
+ kvmppc_get_gpr(vcpu, rs), 4, 1);
+ break;
+
+ case OP_31_XOP_STWUX:
+ emulated = kvmppc_handle_store(run, vcpu,
+ kvmppc_get_gpr(vcpu, rs), 4, 1);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
case OP_31_XOP_STBX:
emulated = kvmppc_handle_store(run, vcpu,
- kvmppc_get_gpr(vcpu, rs),
- 1, 1);
+ kvmppc_get_gpr(vcpu, rs), 1, 1);
break;
case OP_31_XOP_STBUX:
emulated = kvmppc_handle_store(run, vcpu,
- kvmppc_get_gpr(vcpu, rs),
- 1, 1);
+ kvmppc_get_gpr(vcpu, rs), 1, 1);
kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
@@ -105,6 +157,11 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
break;
+ case OP_31_XOP_LHAUX:
+ emulated = kvmppc_handle_loads(run, vcpu, rt, 2, 1);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+ break;
+
case OP_31_XOP_LHZX:
emulated = kvmppc_handle_load(run, vcpu, rt, 2, 1);
break;
@@ -116,14 +173,12 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
case OP_31_XOP_STHX:
emulated = kvmppc_handle_store(run, vcpu,
- kvmppc_get_gpr(vcpu, rs),
- 2, 1);
+ kvmppc_get_gpr(vcpu, rs), 2, 1);
break;
case OP_31_XOP_STHUX:
emulated = kvmppc_handle_store(run, vcpu,
- kvmppc_get_gpr(vcpu, rs),
- 2, 1);
+ kvmppc_get_gpr(vcpu, rs), 2, 1);
kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
@@ -143,8 +198,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
case OP_31_XOP_STWBRX:
emulated = kvmppc_handle_store(run, vcpu,
- kvmppc_get_gpr(vcpu, rs),
- 4, 0);
+ kvmppc_get_gpr(vcpu, rs), 4, 0);
break;
case OP_31_XOP_LHBRX:
@@ -153,10 +207,258 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
case OP_31_XOP_STHBRX:
emulated = kvmppc_handle_store(run, vcpu,
- kvmppc_get_gpr(vcpu, rs),
- 2, 0);
+ kvmppc_get_gpr(vcpu, rs), 2, 0);
+ break;
+
+ case OP_31_XOP_LDBRX:
+ emulated = kvmppc_handle_load(run, vcpu, rt, 8, 0);
+ break;
+
+ case OP_31_XOP_STDBRX:
+ emulated = kvmppc_handle_store(run, vcpu,
+ kvmppc_get_gpr(vcpu, rs), 8, 0);
+ break;
+
+ case OP_31_XOP_LDX:
+ emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1);
+ break;
+
+ case OP_31_XOP_LDUX:
+ emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+ break;
+
+ case OP_31_XOP_LWAX:
+ emulated = kvmppc_handle_loads(run, vcpu, rt, 4, 1);
+ break;
+
+ case OP_31_XOP_LWAUX:
+ emulated = kvmppc_handle_loads(run, vcpu, rt, 4, 1);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+ break;
+
+#ifdef CONFIG_PPC_FPU
+ case OP_31_XOP_LFSX:
+ if (kvmppc_check_fp_disabled(vcpu))
+ return EMULATE_DONE;
+ vcpu->arch.mmio_sp64_extend = 1;
+ emulated = kvmppc_handle_load(run, vcpu,
+ KVM_MMIO_REG_FPR|rt, 4, 1);
+ break;
+
+ case OP_31_XOP_LFSUX:
+ if (kvmppc_check_fp_disabled(vcpu))
+ return EMULATE_DONE;
+ vcpu->arch.mmio_sp64_extend = 1;
+ emulated = kvmppc_handle_load(run, vcpu,
+ KVM_MMIO_REG_FPR|rt, 4, 1);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+ break;
+
+ case OP_31_XOP_LFDX:
+ if (kvmppc_check_fp_disabled(vcpu))
+ return EMULATE_DONE;
+ emulated = kvmppc_handle_load(run, vcpu,
+ KVM_MMIO_REG_FPR|rt, 8, 1);
+ break;
+
+ case OP_31_XOP_LFDUX:
+ if (kvmppc_check_fp_disabled(vcpu))
+ return EMULATE_DONE;
+ emulated = kvmppc_handle_load(run, vcpu,
+ KVM_MMIO_REG_FPR|rt, 8, 1);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+ break;
+
+ case OP_31_XOP_LFIWAX:
+ if (kvmppc_check_fp_disabled(vcpu))
+ return EMULATE_DONE;
+ emulated = kvmppc_handle_loads(run, vcpu,
+ KVM_MMIO_REG_FPR|rt, 4, 1);
+ break;
+
+ case OP_31_XOP_LFIWZX:
+ if (kvmppc_check_fp_disabled(vcpu))
+ return EMULATE_DONE;
+ emulated = kvmppc_handle_load(run, vcpu,
+ KVM_MMIO_REG_FPR|rt, 4, 1);
+ break;
+
+ case OP_31_XOP_STFSX:
+ if (kvmppc_check_fp_disabled(vcpu))
+ return EMULATE_DONE;
+ vcpu->arch.mmio_sp64_extend = 1;
+ emulated = kvmppc_handle_store(run, vcpu,
+ VCPU_FPR(vcpu, rs), 4, 1);
+ break;
+
+ case OP_31_XOP_STFSUX:
+ if (kvmppc_check_fp_disabled(vcpu))
+ return EMULATE_DONE;
+ vcpu->arch.mmio_sp64_extend = 1;
+ emulated = kvmppc_handle_store(run, vcpu,
+ VCPU_FPR(vcpu, rs), 4, 1);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+ break;
+
+ case OP_31_XOP_STFDX:
+ if (kvmppc_check_fp_disabled(vcpu))
+ return EMULATE_DONE;
+ emulated = kvmppc_handle_store(run, vcpu,
+ VCPU_FPR(vcpu, rs), 8, 1);
+ break;
+
+ case OP_31_XOP_STFDUX:
+ if (kvmppc_check_fp_disabled(vcpu))
+ return EMULATE_DONE;
+ emulated = kvmppc_handle_store(run, vcpu,
+ VCPU_FPR(vcpu, rs), 8, 1);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+ break;
+
+ case OP_31_XOP_STFIWX:
+ if (kvmppc_check_fp_disabled(vcpu))
+ return EMULATE_DONE;
+ emulated = kvmppc_handle_store(run, vcpu,
+ VCPU_FPR(vcpu, rs), 4, 1);
+ break;
+#endif
+
+#ifdef CONFIG_VSX
+ case OP_31_XOP_LXSDX:
+ if (kvmppc_check_vsx_disabled(vcpu))
+ return EMULATE_DONE;
+ vcpu->arch.mmio_vsx_copy_nums = 1;
+ vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD;
+ emulated = kvmppc_handle_vsx_load(run, vcpu,
+ KVM_MMIO_REG_VSX|rt, 8, 1, 0);
+ break;
+
+ case OP_31_XOP_LXSSPX:
+ if (kvmppc_check_vsx_disabled(vcpu))
+ return EMULATE_DONE;
+ vcpu->arch.mmio_vsx_copy_nums = 1;
+ vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD;
+ vcpu->arch.mmio_sp64_extend = 1;
+ emulated = kvmppc_handle_vsx_load(run, vcpu,
+ KVM_MMIO_REG_VSX|rt, 4, 1, 0);
+ break;
+
+ case OP_31_XOP_LXSIWAX:
+ if (kvmppc_check_vsx_disabled(vcpu))
+ return EMULATE_DONE;
+ vcpu->arch.mmio_vsx_copy_nums = 1;
+ vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD;
+ emulated = kvmppc_handle_vsx_load(run, vcpu,
+ KVM_MMIO_REG_VSX|rt, 4, 1, 1);
+ break;
+
+ case OP_31_XOP_LXSIWZX:
+ if (kvmppc_check_vsx_disabled(vcpu))
+ return EMULATE_DONE;
+ vcpu->arch.mmio_vsx_copy_nums = 1;
+ vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD;
+ emulated = kvmppc_handle_vsx_load(run, vcpu,
+ KVM_MMIO_REG_VSX|rt, 4, 1, 0);
+ break;
+
+ case OP_31_XOP_LXVD2X:
+ /*
+ * In this case, the official load/store process is like this:
+ * Step1, exit from vm by page fault isr, then kvm save vsr.
+ * Please see guest_exit_cont->store_fp_state->SAVE_32VSRS
+ * as reference.
+ *
+ * Step2, copy data between memory and VCPU
+ * Notice: for LXVD2X/STXVD2X/LXVW4X/STXVW4X, we use
+ * 2copies*8bytes or 4copies*4bytes
+ * to simulate one copy of 16bytes.
+ * Also there is an endian issue here, we should notice the
+ * layout of memory.
+ * Please see MARCO of LXVD2X_ROT/STXVD2X_ROT as more reference.
+ * If host is little-endian, kvm will call XXSWAPD for
+ * LXVD2X_ROT/STXVD2X_ROT.
+ * So, if host is little-endian,
+ * the postion of memeory should be swapped.
+ *
+ * Step3, return to guest, kvm reset register.
+ * Please see kvmppc_hv_entry->load_fp_state->REST_32VSRS
+ * as reference.
+ */
+ if (kvmppc_check_vsx_disabled(vcpu))
+ return EMULATE_DONE;
+ vcpu->arch.mmio_vsx_copy_nums = 2;
+ vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD;
+ emulated = kvmppc_handle_vsx_load(run, vcpu,
+ KVM_MMIO_REG_VSX|rt, 8, 1, 0);
+ break;
+
+ case OP_31_XOP_LXVW4X:
+ if (kvmppc_check_vsx_disabled(vcpu))
+ return EMULATE_DONE;
+ vcpu->arch.mmio_vsx_copy_nums = 4;
+ vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_WORD;
+ emulated = kvmppc_handle_vsx_load(run, vcpu,
+ KVM_MMIO_REG_VSX|rt, 4, 1, 0);
+ break;
+
+ case OP_31_XOP_LXVDSX:
+ if (kvmppc_check_vsx_disabled(vcpu))
+ return EMULATE_DONE;
+ vcpu->arch.mmio_vsx_copy_nums = 1;
+ vcpu->arch.mmio_vsx_copy_type =
+ KVMPPC_VSX_COPY_DWORD_LOAD_DUMP;
+ emulated = kvmppc_handle_vsx_load(run, vcpu,
+ KVM_MMIO_REG_VSX|rt, 8, 1, 0);
+ break;
+
+ case OP_31_XOP_STXSDX:
+ if (kvmppc_check_vsx_disabled(vcpu))
+ return EMULATE_DONE;
+ vcpu->arch.mmio_vsx_copy_nums = 1;
+ vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD;
+ emulated = kvmppc_handle_vsx_store(run, vcpu,
+ rs, 8, 1);
break;
+ case OP_31_XOP_STXSSPX:
+ if (kvmppc_check_vsx_disabled(vcpu))
+ return EMULATE_DONE;
+ vcpu->arch.mmio_vsx_copy_nums = 1;
+ vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD;
+ vcpu->arch.mmio_sp64_extend = 1;
+ emulated = kvmppc_handle_vsx_store(run, vcpu,
+ rs, 4, 1);
+ break;
+
+ case OP_31_XOP_STXSIWX:
+ if (kvmppc_check_vsx_disabled(vcpu))
+ return EMULATE_DONE;
+ vcpu->arch.mmio_vsx_offset = 1;
+ vcpu->arch.mmio_vsx_copy_nums = 1;
+ vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_WORD;
+ emulated = kvmppc_handle_vsx_store(run, vcpu,
+ rs, 4, 1);
+ break;
+
+ case OP_31_XOP_STXVD2X:
+ if (kvmppc_check_vsx_disabled(vcpu))
+ return EMULATE_DONE;
+ vcpu->arch.mmio_vsx_copy_nums = 2;
+ vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_DWORD;
+ emulated = kvmppc_handle_vsx_store(run, vcpu,
+ rs, 8, 1);
+ break;
+
+ case OP_31_XOP_STXVW4X:
+ if (kvmppc_check_vsx_disabled(vcpu))
+ return EMULATE_DONE;
+ vcpu->arch.mmio_vsx_copy_nums = 4;
+ vcpu->arch.mmio_vsx_copy_type = KVMPPC_VSX_COPY_WORD;
+ emulated = kvmppc_handle_vsx_store(run, vcpu,
+ rs, 4, 1);
+ break;
+#endif /* CONFIG_VSX */
default:
emulated = EMULATE_FAIL;
break;
@@ -167,10 +469,60 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
break;
- /* TBD: Add support for other 64 bit load variants like ldu, ldux, ldx etc. */
+#ifdef CONFIG_PPC_FPU
+ case OP_STFS:
+ if (kvmppc_check_fp_disabled(vcpu))
+ return EMULATE_DONE;
+ vcpu->arch.mmio_sp64_extend = 1;
+ emulated = kvmppc_handle_store(run, vcpu,
+ VCPU_FPR(vcpu, rs),
+ 4, 1);
+ break;
+
+ case OP_STFSU:
+ if (kvmppc_check_fp_disabled(vcpu))
+ return EMULATE_DONE;
+ vcpu->arch.mmio_sp64_extend = 1;
+ emulated = kvmppc_handle_store(run, vcpu,
+ VCPU_FPR(vcpu, rs),
+ 4, 1);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+ break;
+
+ case OP_STFD:
+ if (kvmppc_check_fp_disabled(vcpu))
+ return EMULATE_DONE;
+ emulated = kvmppc_handle_store(run, vcpu,
+ VCPU_FPR(vcpu, rs),
+ 8, 1);
+ break;
+
+ case OP_STFDU:
+ if (kvmppc_check_fp_disabled(vcpu))
+ return EMULATE_DONE;
+ emulated = kvmppc_handle_store(run, vcpu,
+ VCPU_FPR(vcpu, rs),
+ 8, 1);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+ break;
+#endif
+
case OP_LD:
rt = get_rt(inst);
- emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1);
+ switch (inst & 3) {
+ case 0: /* ld */
+ emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1);
+ break;
+ case 1: /* ldu */
+ emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+ break;
+ case 2: /* lwa */
+ emulated = kvmppc_handle_loads(run, vcpu, rt, 4, 1);
+ break;
+ default:
+ emulated = EMULATE_FAIL;
+ }
break;
case OP_LWZU:
@@ -193,31 +545,37 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
4, 1);
break;
- /* TBD: Add support for other 64 bit store variants like stdu, stdux, stdx etc. */
case OP_STD:
rs = get_rs(inst);
- emulated = kvmppc_handle_store(run, vcpu,
- kvmppc_get_gpr(vcpu, rs),
- 8, 1);
+ switch (inst & 3) {
+ case 0: /* std */
+ emulated = kvmppc_handle_store(run, vcpu,
+ kvmppc_get_gpr(vcpu, rs), 8, 1);
+ break;
+ case 1: /* stdu */
+ emulated = kvmppc_handle_store(run, vcpu,
+ kvmppc_get_gpr(vcpu, rs), 8, 1);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+ break;
+ default:
+ emulated = EMULATE_FAIL;
+ }
break;
case OP_STWU:
emulated = kvmppc_handle_store(run, vcpu,
- kvmppc_get_gpr(vcpu, rs),
- 4, 1);
+ kvmppc_get_gpr(vcpu, rs), 4, 1);
kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
case OP_STB:
emulated = kvmppc_handle_store(run, vcpu,
- kvmppc_get_gpr(vcpu, rs),
- 1, 1);
+ kvmppc_get_gpr(vcpu, rs), 1, 1);
break;
case OP_STBU:
emulated = kvmppc_handle_store(run, vcpu,
- kvmppc_get_gpr(vcpu, rs),
- 1, 1);
+ kvmppc_get_gpr(vcpu, rs), 1, 1);
kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
@@ -241,16 +599,48 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu)
case OP_STH:
emulated = kvmppc_handle_store(run, vcpu,
- kvmppc_get_gpr(vcpu, rs),
- 2, 1);
+ kvmppc_get_gpr(vcpu, rs), 2, 1);
break;
case OP_STHU:
emulated = kvmppc_handle_store(run, vcpu,
- kvmppc_get_gpr(vcpu, rs),
- 2, 1);
+ kvmppc_get_gpr(vcpu, rs), 2, 1);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+ break;
+
+#ifdef CONFIG_PPC_FPU
+ case OP_LFS:
+ if (kvmppc_check_fp_disabled(vcpu))
+ return EMULATE_DONE;
+ vcpu->arch.mmio_sp64_extend = 1;
+ emulated = kvmppc_handle_load(run, vcpu,
+ KVM_MMIO_REG_FPR|rt, 4, 1);
+ break;
+
+ case OP_LFSU:
+ if (kvmppc_check_fp_disabled(vcpu))
+ return EMULATE_DONE;
+ vcpu->arch.mmio_sp64_extend = 1;
+ emulated = kvmppc_handle_load(run, vcpu,
+ KVM_MMIO_REG_FPR|rt, 4, 1);
+ kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
+ break;
+
+ case OP_LFD:
+ if (kvmppc_check_fp_disabled(vcpu))
+ return EMULATE_DONE;
+ emulated = kvmppc_handle_load(run, vcpu,
+ KVM_MMIO_REG_FPR|rt, 8, 1);
+ break;
+
+ case OP_LFDU:
+ if (kvmppc_check_fp_disabled(vcpu))
+ return EMULATE_DONE;
+ emulated = kvmppc_handle_load(run, vcpu,
+ KVM_MMIO_REG_FPR|rt, 8, 1);
kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
break;
+#endif
default:
emulated = EMULATE_FAIL;
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 95c91a9..1ee22a9 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -37,6 +37,7 @@
#include <asm/cputhreads.h>
#include <asm/irqflags.h>
#include <asm/iommu.h>
+#include <asm/switch_to.h>
#include "timing.h"
#include "irq.h"
#include "../mm/mmu_decl.h"
@@ -232,7 +233,7 @@ int kvmppc_kvm_pv(struct kvm_vcpu *vcpu)
case EV_HCALL_TOKEN(EV_IDLE):
r = EV_SUCCESS;
kvm_vcpu_block(vcpu);
- clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
+ kvm_clear_request(KVM_REQ_UNHALT, vcpu);
break;
default:
r = EV_UNIMPLEMENTED;
@@ -524,11 +525,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
/* We support this only for PR */
r = !hv_enabled;
break;
-#ifdef CONFIG_KVM_MMIO
- case KVM_CAP_COALESCED_MMIO:
- r = KVM_COALESCED_MMIO_PAGE_OFFSET;
- break;
-#endif
#ifdef CONFIG_KVM_MPIC
case KVM_CAP_IRQ_MPIC:
r = 1;
@@ -538,6 +534,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
#ifdef CONFIG_PPC_BOOK3S_64
case KVM_CAP_SPAPR_TCE:
case KVM_CAP_SPAPR_TCE_64:
+ /* fallthrough */
+ case KVM_CAP_SPAPR_TCE_VFIO:
case KVM_CAP_PPC_RTAS:
case KVM_CAP_PPC_FIXUP_HCALL:
case KVM_CAP_PPC_ENABLE_HCALL:
@@ -806,6 +804,129 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
kvm->arch.kvm_ops->irq_bypass_del_producer(cons, prod);
}
+#ifdef CONFIG_VSX
+static inline int kvmppc_get_vsr_dword_offset(int index)
+{
+ int offset;
+
+ if ((index != 0) && (index != 1))
+ return -1;
+
+#ifdef __BIG_ENDIAN
+ offset = index;
+#else
+ offset = 1 - index;
+#endif
+
+ return offset;
+}
+
+static inline int kvmppc_get_vsr_word_offset(int index)
+{
+ int offset;
+
+ if ((index > 3) || (index < 0))
+ return -1;
+
+#ifdef __BIG_ENDIAN
+ offset = index;
+#else
+ offset = 3 - index;
+#endif
+ return offset;
+}
+
+static inline void kvmppc_set_vsr_dword(struct kvm_vcpu *vcpu,
+ u64 gpr)
+{
+ union kvmppc_one_reg val;
+ int offset = kvmppc_get_vsr_dword_offset(vcpu->arch.mmio_vsx_offset);
+ int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
+
+ if (offset == -1)
+ return;
+
+ if (vcpu->arch.mmio_vsx_tx_sx_enabled) {
+ val.vval = VCPU_VSX_VR(vcpu, index);
+ val.vsxval[offset] = gpr;
+ VCPU_VSX_VR(vcpu, index) = val.vval;
+ } else {
+ VCPU_VSX_FPR(vcpu, index, offset) = gpr;
+ }
+}
+
+static inline void kvmppc_set_vsr_dword_dump(struct kvm_vcpu *vcpu,
+ u64 gpr)
+{
+ union kvmppc_one_reg val;
+ int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
+
+ if (vcpu->arch.mmio_vsx_tx_sx_enabled) {
+ val.vval = VCPU_VSX_VR(vcpu, index);
+ val.vsxval[0] = gpr;
+ val.vsxval[1] = gpr;
+ VCPU_VSX_VR(vcpu, index) = val.vval;
+ } else {
+ VCPU_VSX_FPR(vcpu, index, 0) = gpr;
+ VCPU_VSX_FPR(vcpu, index, 1) = gpr;
+ }
+}
+
+static inline void kvmppc_set_vsr_word(struct kvm_vcpu *vcpu,
+ u32 gpr32)
+{
+ union kvmppc_one_reg val;
+ int offset = kvmppc_get_vsr_word_offset(vcpu->arch.mmio_vsx_offset);
+ int index = vcpu->arch.io_gpr & KVM_MMIO_REG_MASK;
+ int dword_offset, word_offset;
+
+ if (offset == -1)
+ return;
+
+ if (vcpu->arch.mmio_vsx_tx_sx_enabled) {
+ val.vval = VCPU_VSX_VR(vcpu, index);
+ val.vsx32val[offset] = gpr32;
+ VCPU_VSX_VR(vcpu, index) = val.vval;
+ } else {
+ dword_offset = offset / 2;
+ word_offset = offset % 2;
+ val.vsxval[0] = VCPU_VSX_FPR(vcpu, index, dword_offset);
+ val.vsx32val[word_offset] = gpr32;
+ VCPU_VSX_FPR(vcpu, index, dword_offset) = val.vsxval[0];
+ }
+}
+#endif /* CONFIG_VSX */
+
+#ifdef CONFIG_PPC_FPU
+static inline u64 sp_to_dp(u32 fprs)
+{
+ u64 fprd;
+
+ preempt_disable();
+ enable_kernel_fp();
+ asm ("lfs%U1%X1 0,%1; stfd%U0%X0 0,%0" : "=m" (fprd) : "m" (fprs)
+ : "fr0");
+ preempt_enable();
+ return fprd;
+}
+
+static inline u32 dp_to_sp(u64 fprd)
+{
+ u32 fprs;
+
+ preempt_disable();
+ enable_kernel_fp();
+ asm ("lfd%U1%X1 0,%1; stfs%U0%X0 0,%0" : "=m" (fprs) : "m" (fprd)
+ : "fr0");
+ preempt_enable();
+ return fprs;
+}
+
+#else
+#define sp_to_dp(x) (x)
+#define dp_to_sp(x) (x)
+#endif /* CONFIG_PPC_FPU */
+
static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
struct kvm_run *run)
{
@@ -832,6 +953,10 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
}
}
+ /* conversion between single and double precision */
+ if ((vcpu->arch.mmio_sp64_extend) && (run->mmio.len == 4))
+ gpr = sp_to_dp(gpr);
+
if (vcpu->arch.mmio_sign_extend) {
switch (run->mmio.len) {
#ifdef CONFIG_PPC64
@@ -848,8 +973,6 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
}
}
- kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
-
switch (vcpu->arch.io_gpr & KVM_MMIO_REG_EXT_MASK) {
case KVM_MMIO_REG_GPR:
kvmppc_set_gpr(vcpu, vcpu->arch.io_gpr, gpr);
@@ -866,6 +989,17 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu,
vcpu->arch.qpr[vcpu->arch.io_gpr & KVM_MMIO_REG_MASK] = gpr;
break;
#endif
+#ifdef CONFIG_VSX
+ case KVM_MMIO_REG_VSX:
+ if (vcpu->arch.mmio_vsx_copy_type == KVMPPC_VSX_COPY_DWORD)
+ kvmppc_set_vsr_dword(vcpu, gpr);
+ else if (vcpu->arch.mmio_vsx_copy_type == KVMPPC_VSX_COPY_WORD)
+ kvmppc_set_vsr_word(vcpu, gpr);
+ else if (vcpu->arch.mmio_vsx_copy_type ==
+ KVMPPC_VSX_COPY_DWORD_LOAD_DUMP)
+ kvmppc_set_vsr_dword_dump(vcpu, gpr);
+ break;
+#endif
default:
BUG();
}
@@ -932,6 +1066,35 @@ int kvmppc_handle_loads(struct kvm_run *run, struct kvm_vcpu *vcpu,
return __kvmppc_handle_load(run, vcpu, rt, bytes, is_default_endian, 1);
}
+#ifdef CONFIG_VSX
+int kvmppc_handle_vsx_load(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ unsigned int rt, unsigned int bytes,
+ int is_default_endian, int mmio_sign_extend)
+{
+ enum emulation_result emulated = EMULATE_DONE;
+
+ /* Currently, mmio_vsx_copy_nums only allowed to be less than 4 */
+ if ( (vcpu->arch.mmio_vsx_copy_nums > 4) ||
+ (vcpu->arch.mmio_vsx_copy_nums < 0) ) {
+ return EMULATE_FAIL;
+ }
+
+ while (vcpu->arch.mmio_vsx_copy_nums) {
+ emulated = __kvmppc_handle_load(run, vcpu, rt, bytes,
+ is_default_endian, mmio_sign_extend);
+
+ if (emulated != EMULATE_DONE)
+ break;
+
+ vcpu->arch.paddr_accessed += run->mmio.len;
+
+ vcpu->arch.mmio_vsx_copy_nums--;
+ vcpu->arch.mmio_vsx_offset++;
+ }
+ return emulated;
+}
+#endif /* CONFIG_VSX */
+
int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
u64 val, unsigned int bytes, int is_default_endian)
{
@@ -957,6 +1120,9 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
vcpu->mmio_needed = 1;
vcpu->mmio_is_write = 1;
+ if ((vcpu->arch.mmio_sp64_extend) && (bytes == 4))
+ val = dp_to_sp(val);
+
/* Store the value at the lowest bytes in 'data'. */
if (!host_swabbed) {
switch (bytes) {
@@ -990,6 +1156,129 @@ int kvmppc_handle_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
}
EXPORT_SYMBOL_GPL(kvmppc_handle_store);
+#ifdef CONFIG_VSX
+static inline int kvmppc_get_vsr_data(struct kvm_vcpu *vcpu, int rs, u64 *val)
+{
+ u32 dword_offset, word_offset;
+ union kvmppc_one_reg reg;
+ int vsx_offset = 0;
+ int copy_type = vcpu->arch.mmio_vsx_copy_type;
+ int result = 0;
+
+ switch (copy_type) {
+ case KVMPPC_VSX_COPY_DWORD:
+ vsx_offset =
+ kvmppc_get_vsr_dword_offset(vcpu->arch.mmio_vsx_offset);
+
+ if (vsx_offset == -1) {
+ result = -1;
+ break;
+ }
+
+ if (!vcpu->arch.mmio_vsx_tx_sx_enabled) {
+ *val = VCPU_VSX_FPR(vcpu, rs, vsx_offset);
+ } else {
+ reg.vval = VCPU_VSX_VR(vcpu, rs);
+ *val = reg.vsxval[vsx_offset];
+ }
+ break;
+
+ case KVMPPC_VSX_COPY_WORD:
+ vsx_offset =
+ kvmppc_get_vsr_word_offset(vcpu->arch.mmio_vsx_offset);
+
+ if (vsx_offset == -1) {
+ result = -1;
+ break;
+ }
+
+ if (!vcpu->arch.mmio_vsx_tx_sx_enabled) {
+ dword_offset = vsx_offset / 2;
+ word_offset = vsx_offset % 2;
+ reg.vsxval[0] = VCPU_VSX_FPR(vcpu, rs, dword_offset);
+ *val = reg.vsx32val[word_offset];
+ } else {
+ reg.vval = VCPU_VSX_VR(vcpu, rs);
+ *val = reg.vsx32val[vsx_offset];
+ }
+ break;
+
+ default:
+ result = -1;
+ break;
+ }
+
+ return result;
+}
+
+int kvmppc_handle_vsx_store(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ int rs, unsigned int bytes, int is_default_endian)
+{
+ u64 val;
+ enum emulation_result emulated = EMULATE_DONE;
+
+ vcpu->arch.io_gpr = rs;
+
+ /* Currently, mmio_vsx_copy_nums only allowed to be less than 4 */
+ if ( (vcpu->arch.mmio_vsx_copy_nums > 4) ||
+ (vcpu->arch.mmio_vsx_copy_nums < 0) ) {
+ return EMULATE_FAIL;
+ }
+
+ while (vcpu->arch.mmio_vsx_copy_nums) {
+ if (kvmppc_get_vsr_data(vcpu, rs, &val) == -1)
+ return EMULATE_FAIL;
+
+ emulated = kvmppc_handle_store(run, vcpu,
+ val, bytes, is_default_endian);
+
+ if (emulated != EMULATE_DONE)
+ break;
+
+ vcpu->arch.paddr_accessed += run->mmio.len;
+
+ vcpu->arch.mmio_vsx_copy_nums--;
+ vcpu->arch.mmio_vsx_offset++;
+ }
+
+ return emulated;
+}
+
+static int kvmppc_emulate_mmio_vsx_loadstore(struct kvm_vcpu *vcpu,
+ struct kvm_run *run)
+{
+ enum emulation_result emulated = EMULATE_FAIL;
+ int r;
+
+ vcpu->arch.paddr_accessed += run->mmio.len;
+
+ if (!vcpu->mmio_is_write) {
+ emulated = kvmppc_handle_vsx_load(run, vcpu, vcpu->arch.io_gpr,
+ run->mmio.len, 1, vcpu->arch.mmio_sign_extend);
+ } else {
+ emulated = kvmppc_handle_vsx_store(run, vcpu,
+ vcpu->arch.io_gpr, run->mmio.len, 1);
+ }
+
+ switch (emulated) {
+ case EMULATE_DO_MMIO:
+ run->exit_reason = KVM_EXIT_MMIO;
+ r = RESUME_HOST;
+ break;
+ case EMULATE_FAIL:
+ pr_info("KVM: MMIO emulation failed (VSX repeat)\n");
+ run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+ r = RESUME_HOST;
+ break;
+ default:
+ r = RESUME_GUEST;
+ break;
+ }
+ return r;
+}
+#endif /* CONFIG_VSX */
+
int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg)
{
int r = 0;
@@ -1092,13 +1381,24 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
int r;
sigset_t sigsaved;
- if (vcpu->sigset_active)
- sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
-
if (vcpu->mmio_needed) {
+ vcpu->mmio_needed = 0;
if (!vcpu->mmio_is_write)
kvmppc_complete_mmio_load(vcpu, run);
- vcpu->mmio_needed = 0;
+#ifdef CONFIG_VSX
+ if (vcpu->arch.mmio_vsx_copy_nums > 0) {
+ vcpu->arch.mmio_vsx_copy_nums--;
+ vcpu->arch.mmio_vsx_offset++;
+ }
+
+ if (vcpu->arch.mmio_vsx_copy_nums > 0) {
+ r = kvmppc_emulate_mmio_vsx_loadstore(vcpu, run);
+ if (r == RESUME_HOST) {
+ vcpu->mmio_needed = 1;
+ return r;
+ }
+ }
+#endif
} else if (vcpu->arch.osi_needed) {
u64 *gprs = run->osi.gprs;
int i;
@@ -1120,6 +1420,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
#endif
}
+ if (vcpu->sigset_active)
+ sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
+
if (run->immediate_exit)
r = -EINTR;
else
diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c
index 497130c..fc67bd7 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -314,6 +314,25 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm,
}
EXPORT_SYMBOL_GPL(mm_iommu_lookup);
+struct mm_iommu_table_group_mem_t *mm_iommu_lookup_rm(struct mm_struct *mm,
+ unsigned long ua, unsigned long size)
+{
+ struct mm_iommu_table_group_mem_t *mem, *ret = NULL;
+
+ list_for_each_entry_lockless(mem, &mm->context.iommu_group_mem_list,
+ next) {
+ if ((mem->ua <= ua) &&
+ (ua + size <= mem->ua +
+ (mem->entries << PAGE_SHIFT))) {
+ ret = mem;
+ break;
+ }
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(mm_iommu_lookup_rm);
+
struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm,
unsigned long ua, unsigned long entries)
{
@@ -345,6 +364,26 @@ long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem,
}
EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa);
+long mm_iommu_ua_to_hpa_rm(struct mm_iommu_table_group_mem_t *mem,
+ unsigned long ua, unsigned long *hpa)
+{
+ const long entry = (ua - mem->ua) >> PAGE_SHIFT;
+ void *va = &mem->hpas[entry];
+ unsigned long *pa;
+
+ if (entry >= mem->entries)
+ return -EFAULT;
+
+ pa = (void *) vmalloc_to_phys(va);
+ if (!pa)
+ return -EFAULT;
+
+ *hpa = *pa | (ua & ~PAGE_MASK);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mm_iommu_ua_to_hpa_rm);
+
long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem)
{
if (atomic64_inc_not_zero(&mem->mapped))
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index e367382..ee4cdb5 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1424,8 +1424,7 @@ static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe
iommu_group_put(pe->table_group.group);
BUG_ON(pe->table_group.group);
}
- pnv_pci_ioda2_table_free_pages(tbl);
- iommu_free_table(tbl, of_node_full_name(dev->dev.of_node));
+ iommu_tce_table_put(tbl);
}
static void pnv_ioda_release_vf_PE(struct pci_dev *pdev)
@@ -1860,6 +1859,17 @@ static int pnv_ioda1_tce_xchg(struct iommu_table *tbl, long index,
return ret;
}
+
+static int pnv_ioda1_tce_xchg_rm(struct iommu_table *tbl, long index,
+ unsigned long *hpa, enum dma_data_direction *direction)
+{
+ long ret = pnv_tce_xchg(tbl, index, hpa, direction);
+
+ if (!ret)
+ pnv_pci_p7ioc_tce_invalidate(tbl, index, 1, true);
+
+ return ret;
+}
#endif
static void pnv_ioda1_tce_free(struct iommu_table *tbl, long index,
@@ -1874,6 +1884,7 @@ static struct iommu_table_ops pnv_ioda1_iommu_ops = {
.set = pnv_ioda1_tce_build,
#ifdef CONFIG_IOMMU_API
.exchange = pnv_ioda1_tce_xchg,
+ .exchange_rm = pnv_ioda1_tce_xchg_rm,
#endif
.clear = pnv_ioda1_tce_free,
.get = pnv_tce_get,
@@ -1948,7 +1959,7 @@ static void pnv_pci_ioda2_tce_invalidate(struct iommu_table *tbl,
{
struct iommu_table_group_link *tgl;
- list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
+ list_for_each_entry_lockless(tgl, &tbl->it_group_list, next) {
struct pnv_ioda_pe *pe = container_of(tgl->table_group,
struct pnv_ioda_pe, table_group);
struct pnv_phb *phb = pe->phb;
@@ -2004,6 +2015,17 @@ static int pnv_ioda2_tce_xchg(struct iommu_table *tbl, long index,
return ret;
}
+
+static int pnv_ioda2_tce_xchg_rm(struct iommu_table *tbl, long index,
+ unsigned long *hpa, enum dma_data_direction *direction)
+{
+ long ret = pnv_tce_xchg(tbl, index, hpa, direction);
+
+ if (!ret)
+ pnv_pci_ioda2_tce_invalidate(tbl, index, 1, true);
+
+ return ret;
+}
#endif
static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index,
@@ -2017,13 +2039,13 @@ static void pnv_ioda2_tce_free(struct iommu_table *tbl, long index,
static void pnv_ioda2_table_free(struct iommu_table *tbl)
{
pnv_pci_ioda2_table_free_pages(tbl);
- iommu_free_table(tbl, "pnv");
}
static struct iommu_table_ops pnv_ioda2_iommu_ops = {
.set = pnv_ioda2_tce_build,
#ifdef CONFIG_IOMMU_API
.exchange = pnv_ioda2_tce_xchg,
+ .exchange_rm = pnv_ioda2_tce_xchg_rm,
#endif
.clear = pnv_ioda2_tce_free,
.get = pnv_tce_get,
@@ -2203,7 +2225,7 @@ found:
__free_pages(tce_mem, get_order(tce32_segsz * segs));
if (tbl) {
pnv_pci_unlink_table_and_group(tbl, &pe->table_group);
- iommu_free_table(tbl, "pnv");
+ iommu_tce_table_put(tbl);
}
}
@@ -2293,16 +2315,16 @@ static long pnv_pci_ioda2_create_table(struct iommu_table_group *table_group,
if (!tbl)
return -ENOMEM;
+ tbl->it_ops = &pnv_ioda2_iommu_ops;
+
ret = pnv_pci_ioda2_table_alloc_pages(nid,
bus_offset, page_shift, window_size,
levels, tbl);
if (ret) {
- iommu_free_table(tbl, "pnv");
+ iommu_tce_table_put(tbl);
return ret;
}
- tbl->it_ops = &pnv_ioda2_iommu_ops;
-
*ptbl = tbl;
return 0;
@@ -2343,7 +2365,7 @@ static long pnv_pci_ioda2_setup_default_config(struct pnv_ioda_pe *pe)
if (rc) {
pe_err(pe, "Failed to configure 32-bit TCE table, err %ld\n",
rc);
- pnv_ioda2_table_free(tbl);
+ iommu_tce_table_put(tbl);
return rc;
}
@@ -2431,7 +2453,7 @@ static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
pnv_pci_ioda2_unset_window(&pe->table_group, 0);
if (pe->pbus)
pnv_ioda_setup_bus_dma(pe, pe->pbus, false);
- pnv_ioda2_table_free(tbl);
+ iommu_tce_table_put(tbl);
}
static void pnv_ioda2_release_ownership(struct iommu_table_group *table_group)
@@ -3406,7 +3428,7 @@ static void pnv_pci_ioda1_release_pe_dma(struct pnv_ioda_pe *pe)
}
free_pages(tbl->it_base, get_order(tbl->it_size << 3));
- iommu_free_table(tbl, "pnv");
+ iommu_tce_table_put(tbl);
}
static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe)
@@ -3433,7 +3455,7 @@ static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe)
}
pnv_pci_ioda2_table_free_pages(tbl);
- iommu_free_table(tbl, "pnv");
+ iommu_tce_table_put(tbl);
}
static void pnv_ioda_free_pe_seg(struct pnv_ioda_pe *pe,
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index eb835e9..204a829 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -767,6 +767,7 @@ struct iommu_table *pnv_pci_table_alloc(int nid)
tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, nid);
INIT_LIST_HEAD_RCU(&tbl->it_group_list);
+ kref_init(&tbl->it_kref);
return tbl;
}
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index 4d757ea..7ce5db2 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -74,6 +74,7 @@ static struct iommu_table_group *iommu_pseries_alloc_group(int node)
goto fail_exit;
INIT_LIST_HEAD_RCU(&tbl->it_group_list);
+ kref_init(&tbl->it_kref);
tgl->table_group = table_group;
list_add_rcu(&tgl->next, &tbl->it_group_list);
@@ -115,7 +116,7 @@ static void iommu_pseries_free_group(struct iommu_table_group *table_group,
BUG_ON(table_group->group);
}
#endif
- iommu_free_table(tbl, node_name);
+ iommu_tce_table_put(tbl);
kfree(table_group);
}
diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c
index 7204939..28b09fd 100644
--- a/arch/powerpc/platforms/pseries/vio.c
+++ b/arch/powerpc/platforms/pseries/vio.c
@@ -1318,7 +1318,7 @@ static void vio_dev_release(struct device *dev)
struct iommu_table *tbl = get_iommu_table_base(dev);
if (tbl)
- iommu_free_table(tbl, of_node_full_name(dev->of_node));
+ iommu_tce_table_put(tbl);
of_node_put(dev->of_node);
kfree(to_vio_dev(dev));
}
diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h
index e2dfbf2..31cac7d 100644
--- a/arch/s390/include/asm/cpacf.h
+++ b/arch/s390/include/asm/cpacf.h
@@ -26,6 +26,7 @@
#define CPACF_PCC 0xb92c /* MSA4 */
#define CPACF_KMCTR 0xb92d /* MSA4 */
#define CPACF_PPNO 0xb93c /* MSA5 */
+#define CPACF_KMA 0xb929 /* MSA8 */
/*
* En/decryption modifier bits
@@ -149,8 +150,8 @@ static inline void __cpacf_query(unsigned int opcode, cpacf_mask_t *mask)
asm volatile(
" spm 0\n" /* pckmo doesn't change the cc */
- /* Parameter registers are ignored, but may not be 0 */
- "0: .insn rrf,%[opc] << 16,2,2,2,0\n"
+ /* Parameter regs are ignored, but must be nonzero and unique */
+ "0: .insn rrf,%[opc] << 16,2,4,6,0\n"
" brc 1,0b\n" /* handle partial completion */
: "=m" (*mask)
: [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (opcode)
diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h
index 1d48880..e8f6230 100644
--- a/arch/s390/include/asm/elf.h
+++ b/arch/s390/include/asm/elf.h
@@ -105,6 +105,7 @@
#define HWCAP_S390_VXRS 2048
#define HWCAP_S390_VXRS_BCD 4096
#define HWCAP_S390_VXRS_EXT 8192
+#define HWCAP_S390_GS 16384
/* Internal bits, not exposed via elf */
#define HWCAP_INT_SIE 1UL
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index a41faf3..426614a 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -25,6 +25,7 @@
#include <asm/cpu.h>
#include <asm/fpu/api.h>
#include <asm/isc.h>
+#include <asm/guarded_storage.h>
#define KVM_S390_BSCA_CPU_SLOTS 64
#define KVM_S390_ESCA_CPU_SLOTS 248
@@ -121,6 +122,7 @@ struct esca_block {
#define CPUSTAT_SLSR 0x00002000
#define CPUSTAT_ZARCH 0x00000800
#define CPUSTAT_MCDS 0x00000100
+#define CPUSTAT_KSS 0x00000200
#define CPUSTAT_SM 0x00000080
#define CPUSTAT_IBS 0x00000040
#define CPUSTAT_GED2 0x00000010
@@ -164,16 +166,27 @@ struct kvm_s390_sie_block {
#define ICTL_RRBE 0x00001000
#define ICTL_TPROT 0x00000200
__u32 ictl; /* 0x0048 */
+#define ECA_CEI 0x80000000
+#define ECA_IB 0x40000000
+#define ECA_SIGPI 0x10000000
+#define ECA_MVPGI 0x01000000
+#define ECA_VX 0x00020000
+#define ECA_PROTEXCI 0x00002000
+#define ECA_SII 0x00000001
__u32 eca; /* 0x004c */
#define ICPT_INST 0x04
#define ICPT_PROGI 0x08
#define ICPT_INSTPROGI 0x0C
+#define ICPT_EXTREQ 0x10
#define ICPT_EXTINT 0x14
+#define ICPT_IOREQ 0x18
+#define ICPT_WAIT 0x1c
#define ICPT_VALIDITY 0x20
#define ICPT_STOP 0x28
#define ICPT_OPEREXC 0x2C
#define ICPT_PARTEXEC 0x38
#define ICPT_IOINST 0x40
+#define ICPT_KSS 0x5c
__u8 icptcode; /* 0x0050 */
__u8 icptstatus; /* 0x0051 */
__u16 ihcpu; /* 0x0052 */
@@ -182,10 +195,19 @@ struct kvm_s390_sie_block {
__u32 ipb; /* 0x0058 */
__u32 scaoh; /* 0x005c */
__u8 reserved60; /* 0x0060 */
+#define ECB_GS 0x40
+#define ECB_TE 0x10
+#define ECB_SRSI 0x04
+#define ECB_HOSTPROTINT 0x02
__u8 ecb; /* 0x0061 */
+#define ECB2_CMMA 0x80
+#define ECB2_IEP 0x20
+#define ECB2_PFMFI 0x08
+#define ECB2_ESCA 0x04
__u8 ecb2; /* 0x0062 */
-#define ECB3_AES 0x04
#define ECB3_DEA 0x08
+#define ECB3_AES 0x04
+#define ECB3_RI 0x01
__u8 ecb3; /* 0x0063 */
__u32 scaol; /* 0x0064 */
__u8 reserved68[4]; /* 0x0068 */
@@ -219,11 +241,14 @@ struct kvm_s390_sie_block {
__u32 crycbd; /* 0x00fc */
__u64 gcr[16]; /* 0x0100 */
__u64 gbea; /* 0x0180 */
- __u8 reserved188[24]; /* 0x0188 */
+ __u8 reserved188[8]; /* 0x0188 */
+ __u64 sdnxo; /* 0x0190 */
+ __u8 reserved198[8]; /* 0x0198 */
__u32 fac; /* 0x01a0 */
__u8 reserved1a4[20]; /* 0x01a4 */
__u64 cbrlo; /* 0x01b8 */
__u8 reserved1c0[8]; /* 0x01c0 */
+#define ECD_HOSTREGMGMT 0x20000000
__u32 ecd; /* 0x01c8 */
__u8 reserved1cc[18]; /* 0x01cc */
__u64 pp; /* 0x01de */
@@ -498,6 +523,12 @@ struct kvm_s390_local_interrupt {
#define FIRQ_CNTR_PFAULT 3
#define FIRQ_MAX_COUNT 4
+/* mask the AIS mode for a given ISC */
+#define AIS_MODE_MASK(isc) (0x80 >> isc)
+
+#define KVM_S390_AIS_MODE_ALL 0
+#define KVM_S390_AIS_MODE_SINGLE 1
+
struct kvm_s390_float_interrupt {
unsigned long pending_irqs;
spinlock_t lock;
@@ -507,6 +538,10 @@ struct kvm_s390_float_interrupt {
struct kvm_s390_ext_info srv_signal;
int next_rr_cpu;
unsigned long idle_mask[BITS_TO_LONGS(KVM_MAX_VCPUS)];
+ struct mutex ais_lock;
+ u8 simm;
+ u8 nimm;
+ int ais_enabled;
};
struct kvm_hw_wp_info_arch {
@@ -554,6 +589,7 @@ struct kvm_vcpu_arch {
/* if vsie is active, currently executed shadow sie control block */
struct kvm_s390_sie_block *vsie_block;
unsigned int host_acrs[NUM_ACRS];
+ struct gs_cb *host_gscb;
struct fpu host_fpregs;
struct kvm_s390_local_interrupt local_int;
struct hrtimer ckc_timer;
@@ -574,6 +610,7 @@ struct kvm_vcpu_arch {
*/
seqcount_t cputm_seqcount;
__u64 cputm_start;
+ bool gs_enabled;
};
struct kvm_vm_stat {
@@ -596,6 +633,7 @@ struct s390_io_adapter {
bool maskable;
bool masked;
bool swap;
+ bool suppressible;
struct rw_semaphore maps_lock;
struct list_head maps;
atomic_t nr_maps;
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index 61261e0..8a5b082 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -157,8 +157,8 @@ struct lowcore {
__u64 stfle_fac_list[32]; /* 0x0f00 */
__u8 pad_0x1000[0x11b0-0x1000]; /* 0x1000 */
- /* Pointer to vector register save area */
- __u64 vector_save_area_addr; /* 0x11b0 */
+ /* Pointer to the machine check extended save area */
+ __u64 mcesad; /* 0x11b0 */
/* 64 bit extparam used for pfault/diag 250: defined by architecture */
__u64 ext_params2; /* 0x11B8 */
@@ -182,10 +182,7 @@ struct lowcore {
/* Transaction abort diagnostic block */
__u8 pgm_tdb[256]; /* 0x1800 */
- __u8 pad_0x1900[0x1c00-0x1900]; /* 0x1900 */
-
- /* Software defined save area for vector registers */
- __u8 vector_save_area[1024]; /* 0x1c00 */
+ __u8 pad_0x1900[0x2000-0x1900]; /* 0x1900 */
} __packed;
#define S390_lowcore (*((struct lowcore *) 0))
diff --git a/arch/s390/include/asm/nmi.h b/arch/s390/include/asm/nmi.h
index b75fd91..e3e8895 100644
--- a/arch/s390/include/asm/nmi.h
+++ b/arch/s390/include/asm/nmi.h
@@ -58,7 +58,9 @@ union mci {
u64 ie : 1; /* 32 indirect storage error */
u64 ar : 1; /* 33 access register validity */
u64 da : 1; /* 34 delayed access exception */
- u64 : 7; /* 35-41 */
+ u64 : 1; /* 35 */
+ u64 gs : 1; /* 36 guarded storage registers */
+ u64 : 5; /* 37-41 */
u64 pr : 1; /* 42 tod programmable register validity */
u64 fc : 1; /* 43 fp control register validity */
u64 ap : 1; /* 44 ancillary report */
@@ -69,6 +71,14 @@ union mci {
};
};
+#define MCESA_ORIGIN_MASK (~0x3ffUL)
+#define MCESA_LC_MASK (0xfUL)
+
+struct mcesa {
+ u8 vector_save_area[1024];
+ u8 guarded_storage_save_area[32];
+};
+
struct pt_regs;
extern void s390_handle_mcck(void);
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index e498871..cc101f9 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -135,6 +135,8 @@ struct thread_struct {
struct list_head list;
/* cpu runtime instrumentation */
struct runtime_instr_cb *ri_cb;
+ struct gs_cb *gs_cb; /* Current guarded storage cb */
+ struct gs_cb *gs_bc_cb; /* Broadcast guarded storage cb */
unsigned char trap_tdb[256]; /* Transaction abort diagnose block */
/*
* Warning: 'fpu' is dynamically-sized. It *MUST* be at
@@ -215,6 +217,9 @@ void show_cacheinfo(struct seq_file *m);
/* Free all resources held by a thread. */
extern void release_thread(struct task_struct *);
+/* Free guarded storage control block for current */
+void exit_thread_gs(void);
+
/*
* Return saved PC of a blocked thread.
*/
diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h
index ace3bd3..6f5167b 100644
--- a/arch/s390/include/asm/sclp.h
+++ b/arch/s390/include/asm/sclp.h
@@ -75,6 +75,7 @@ struct sclp_info {
unsigned char has_pfmfi : 1;
unsigned char has_ibs : 1;
unsigned char has_skey : 1;
+ unsigned char has_kss : 1;
unsigned int ibc;
unsigned int mtid;
unsigned int mtid_cp;
diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h
index 30bdb5a..383bd83 100644
--- a/arch/s390/include/asm/setup.h
+++ b/arch/s390/include/asm/setup.h
@@ -31,6 +31,7 @@
#define MACHINE_FLAG_VX _BITUL(13)
#define MACHINE_FLAG_CAD _BITUL(14)
#define MACHINE_FLAG_NX _BITUL(15)
+#define MACHINE_FLAG_GS _BITUL(16)
#define LPP_MAGIC _BITUL(31)
#define LPP_PFAULT_PID_MASK _AC(0xffffffff, UL)
@@ -70,6 +71,7 @@ extern void detect_memory_memblock(void);
#define MACHINE_HAS_VX (S390_lowcore.machine_flags & MACHINE_FLAG_VX)
#define MACHINE_HAS_CAD (S390_lowcore.machine_flags & MACHINE_FLAG_CAD)
#define MACHINE_HAS_NX (S390_lowcore.machine_flags & MACHINE_FLAG_NX)
+#define MACHINE_HAS_GS (S390_lowcore.machine_flags & MACHINE_FLAG_GS)
/*
* Console mode. Override with conmode=
diff --git a/arch/s390/include/asm/switch_to.h b/arch/s390/include/asm/switch_to.h
index 12d45f0..f6c2b58 100644
--- a/arch/s390/include/asm/switch_to.h
+++ b/arch/s390/include/asm/switch_to.h
@@ -10,6 +10,7 @@
#include <linux/thread_info.h>
#include <asm/fpu/api.h>
#include <asm/ptrace.h>
+#include <asm/guarded_storage.h>
extern struct task_struct *__switch_to(void *, void *);
extern void update_cr_regs(struct task_struct *task);
@@ -33,12 +34,14 @@ static inline void restore_access_regs(unsigned int *acrs)
save_fpu_regs(); \
save_access_regs(&prev->thread.acrs[0]); \
save_ri_cb(prev->thread.ri_cb); \
+ save_gs_cb(prev->thread.gs_cb); \
} \
if (next->mm) { \
update_cr_regs(next); \
set_cpu_flag(CIF_FPU); \
restore_access_regs(&next->thread.acrs[0]); \
restore_ri_cb(next->thread.ri_cb, prev->thread.ri_cb); \
+ restore_gs_cb(next->thread.gs_cb); \
} \
prev = __switch_to(prev,next); \
} while (0)
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index a5b54a4..f36e6e2 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -54,11 +54,12 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
#define TIF_NOTIFY_RESUME 0 /* callback before returning to user */
#define TIF_SIGPENDING 1 /* signal pending */
#define TIF_NEED_RESCHED 2 /* rescheduling necessary */
-#define TIF_SYSCALL_TRACE 3 /* syscall trace active */
-#define TIF_SYSCALL_AUDIT 4 /* syscall auditing active */
-#define TIF_SECCOMP 5 /* secure computing */
-#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */
-#define TIF_UPROBE 7 /* breakpointed or single-stepping */
+#define TIF_UPROBE 3 /* breakpointed or single-stepping */
+#define TIF_GUARDED_STORAGE 4 /* load guarded storage control block */
+#define TIF_SYSCALL_TRACE 8 /* syscall trace active */
+#define TIF_SYSCALL_AUDIT 9 /* syscall auditing active */
+#define TIF_SECCOMP 10 /* secure computing */
+#define TIF_SYSCALL_TRACEPOINT 11 /* syscall tracepoint instrumentation */
#define TIF_31BIT 16 /* 32bit process */
#define TIF_MEMDIE 17 /* is terminating due to OOM killer */
#define TIF_RESTORE_SIGMASK 18 /* restore signal mask in do_signal() */
@@ -76,5 +77,6 @@ int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
#define _TIF_UPROBE _BITUL(TIF_UPROBE)
#define _TIF_31BIT _BITUL(TIF_31BIT)
#define _TIF_SINGLE_STEP _BITUL(TIF_SINGLE_STEP)
+#define _TIF_GUARDED_STORAGE _BITUL(TIF_GUARDED_STORAGE)
#endif /* _ASM_THREAD_INFO_H */
diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild
index 6848ba5..86b761e 100644
--- a/arch/s390/include/uapi/asm/Kbuild
+++ b/arch/s390/include/uapi/asm/Kbuild
@@ -12,6 +12,7 @@ header-y += dasd.h
header-y += debug.h
header-y += errno.h
header-y += fcntl.h
+header-y += guarded_storage.h
header-y += hypfs.h
header-y += ioctl.h
header-y += ioctls.h
diff --git a/arch/s390/include/uapi/asm/guarded_storage.h b/arch/s390/include/uapi/asm/guarded_storage.h
new file mode 100644
index 0000000..852850e
--- /dev/null
+++ b/arch/s390/include/uapi/asm/guarded_storage.h
@@ -0,0 +1,77 @@
+#ifndef _GUARDED_STORAGE_H
+#define _GUARDED_STORAGE_H
+
+#include <linux/types.h>
+
+struct gs_cb {
+ __u64 reserved;
+ __u64 gsd;
+ __u64 gssm;
+ __u64 gs_epl_a;
+};
+
+struct gs_epl {
+ __u8 pad1;
+ union {
+ __u8 gs_eam;
+ struct {
+ __u8 : 6;
+ __u8 e : 1;
+ __u8 b : 1;
+ };
+ };
+ union {
+ __u8 gs_eci;
+ struct {
+ __u8 tx : 1;
+ __u8 cx : 1;
+ __u8 : 5;
+ __u8 in : 1;
+ };
+ };
+ union {
+ __u8 gs_eai;
+ struct {
+ __u8 : 1;
+ __u8 t : 1;
+ __u8 as : 2;
+ __u8 ar : 4;
+ };
+ };
+ __u32 pad2;
+ __u64 gs_eha;
+ __u64 gs_eia;
+ __u64 gs_eoa;
+ __u64 gs_eir;
+ __u64 gs_era;
+};
+
+#define GS_ENABLE 0
+#define GS_DISABLE 1
+#define GS_SET_BC_CB 2
+#define GS_CLEAR_BC_CB 3
+#define GS_BROADCAST 4
+
+static inline void load_gs_cb(struct gs_cb *gs_cb)
+{
+ asm volatile(".insn rxy,0xe3000000004d,0,%0" : : "Q" (*gs_cb));
+}
+
+static inline void store_gs_cb(struct gs_cb *gs_cb)
+{
+ asm volatile(".insn rxy,0xe30000000049,0,%0" : : "Q" (*gs_cb));
+}
+
+static inline void save_gs_cb(struct gs_cb *gs_cb)
+{
+ if (gs_cb)
+ store_gs_cb(gs_cb);
+}
+
+static inline void restore_gs_cb(struct gs_cb *gs_cb)
+{
+ if (gs_cb)
+ load_gs_cb(gs_cb);
+}
+
+#endif /* _GUARDED_STORAGE_H */
diff --git a/arch/s390/include/uapi/asm/kvm.h b/arch/s390/include/uapi/asm/kvm.h
index a2ffec4..3dd2a1d 100644
--- a/arch/s390/include/uapi/asm/kvm.h
+++ b/arch/s390/include/uapi/asm/kvm.h
@@ -26,6 +26,8 @@
#define KVM_DEV_FLIC_ADAPTER_REGISTER 6
#define KVM_DEV_FLIC_ADAPTER_MODIFY 7
#define KVM_DEV_FLIC_CLEAR_IO_IRQ 8
+#define KVM_DEV_FLIC_AISM 9
+#define KVM_DEV_FLIC_AIRQ_INJECT 10
/*
* We can have up to 4*64k pending subchannels + 8 adapter interrupts,
* as well as up to ASYNC_PF_PER_VCPU*KVM_MAX_VCPUS pfault done interrupts.
@@ -41,7 +43,14 @@ struct kvm_s390_io_adapter {
__u8 isc;
__u8 maskable;
__u8 swap;
- __u8 pad;
+ __u8 flags;
+};
+
+#define KVM_S390_ADAPTER_SUPPRESSIBLE 0x01
+
+struct kvm_s390_ais_req {
+ __u8 isc;
+ __u16 mode;
};
#define KVM_S390_IO_ADAPTER_MASK 1
@@ -110,6 +119,7 @@ struct kvm_s390_vm_cpu_machine {
#define KVM_S390_VM_CPU_FEAT_CMMA 10
#define KVM_S390_VM_CPU_FEAT_PFMFI 11
#define KVM_S390_VM_CPU_FEAT_SIGPIF 12
+#define KVM_S390_VM_CPU_FEAT_KSS 13
struct kvm_s390_vm_cpu_feat {
__u64 feat[16];
};
@@ -131,7 +141,8 @@ struct kvm_s390_vm_cpu_subfunc {
__u8 kmo[16]; /* with MSA4 */
__u8 pcc[16]; /* with MSA4 */
__u8 ppno[16]; /* with MSA5 */
- __u8 reserved[1824];
+ __u8 kma[16]; /* with MSA8 */
+ __u8 reserved[1808];
};
/* kvm attributes for crypto */
@@ -197,6 +208,10 @@ struct kvm_guest_debug_arch {
#define KVM_SYNC_VRS (1UL << 6)
#define KVM_SYNC_RICCB (1UL << 7)
#define KVM_SYNC_FPRS (1UL << 8)
+#define KVM_SYNC_GSCB (1UL << 9)
+/* length and alignment of the sdnx as a power of two */
+#define SDNXC 8
+#define SDNXL (1UL << SDNXC)
/* definition of registers in kvm_run */
struct kvm_sync_regs {
__u64 prefix; /* prefix register */
@@ -217,8 +232,16 @@ struct kvm_sync_regs {
};
__u8 reserved[512]; /* for future vector expansion */
__u32 fpc; /* valid on KVM_SYNC_VRS or KVM_SYNC_FPRS */
- __u8 padding[52]; /* riccb needs to be 64byte aligned */
+ __u8 padding1[52]; /* riccb needs to be 64byte aligned */
__u8 riccb[64]; /* runtime instrumentation controls block */
+ __u8 padding2[192]; /* sdnx needs to be 256byte aligned */
+ union {
+ __u8 sdnx[SDNXL]; /* state description annex */
+ struct {
+ __u64 reserved1[2];
+ __u64 gscb[4];
+ };
+ };
};
#define KVM_REG_S390_TODPR (KVM_REG_S390 | KVM_REG_SIZE_U32 | 0x1)
diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h
index 152de9b..ea42290 100644
--- a/arch/s390/include/uapi/asm/unistd.h
+++ b/arch/s390/include/uapi/asm/unistd.h
@@ -313,7 +313,7 @@
#define __NR_copy_file_range 375
#define __NR_preadv2 376
#define __NR_pwritev2 377
-/* Number 378 is reserved for guarded storage */
+#define __NR_s390_guarded_storage 378
#define __NR_statx 379
#define NR_syscalls 380
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 060ce54..aa5adbd 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -57,7 +57,7 @@ obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o
obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o als.o
obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o
-obj-y += runtime_instr.o cache.o fpu.o dumpstack.o
+obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o
obj-y += entry.o reipl.o relocate_kernel.o
extra-y += head.o head64.o vmlinux.lds
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index c4b3570..6bb2963 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -175,7 +175,7 @@ int main(void)
/* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */
OFFSET(__LC_DUMP_REIPL, lowcore, ipib);
/* hardware defined lowcore locations 0x1000 - 0x18ff */
- OFFSET(__LC_VX_SAVE_AREA_ADDR, lowcore, vector_save_area_addr);
+ OFFSET(__LC_MCESAD, lowcore, mcesad);
OFFSET(__LC_EXT_PARAMS2, lowcore, ext_params2);
OFFSET(__LC_FPREGS_SAVE_AREA, lowcore, floating_pt_save_area);
OFFSET(__LC_GPREGS_SAVE_AREA, lowcore, gpregs_save_area);
diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c
index e89cc2e..986642a 100644
--- a/arch/s390/kernel/compat_wrapper.c
+++ b/arch/s390/kernel/compat_wrapper.c
@@ -178,4 +178,5 @@ COMPAT_SYSCALL_WRAP3(getpeername, int, fd, struct sockaddr __user *, usockaddr,
COMPAT_SYSCALL_WRAP6(sendto, int, fd, void __user *, buff, size_t, len, unsigned int, flags, struct sockaddr __user *, addr, int, addr_len);
COMPAT_SYSCALL_WRAP3(mlock2, unsigned long, start, size_t, len, int, flags);
COMPAT_SYSCALL_WRAP6(copy_file_range, int, fd_in, loff_t __user *, off_in, int, fd_out, loff_t __user *, off_out, size_t, len, unsigned int, flags);
+COMPAT_SYSCALL_WRAP2(s390_guarded_storage, int, command, struct gs_cb *, gs_cb);
COMPAT_SYSCALL_WRAP5(statx, int, dfd, const char __user *, path, unsigned, flags, unsigned, mask, struct statx __user *, buffer);
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 4e65c79..95298a4 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -358,6 +358,8 @@ static __init void detect_machine_facilities(void)
S390_lowcore.machine_flags |= MACHINE_FLAG_NX;
__ctl_set_bit(0, 20);
}
+ if (test_facility(133))
+ S390_lowcore.machine_flags |= MACHINE_FLAG_GS;
}
static inline void save_vector_registers(void)
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 6a7d737..fa8b8f2 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -47,7 +47,7 @@ STACK_SIZE = 1 << STACK_SHIFT
STACK_INIT = STACK_SIZE - STACK_FRAME_OVERHEAD - __PT_SIZE
_TIF_WORK = (_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_NEED_RESCHED | \
- _TIF_UPROBE)
+ _TIF_UPROBE | _TIF_GUARDED_STORAGE)
_TIF_TRACE = (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
_TIF_SYSCALL_TRACEPOINT)
_CIF_WORK = (_CIF_MCCK_PENDING | _CIF_ASCE_PRIMARY | \
@@ -332,6 +332,8 @@ ENTRY(system_call)
TSTMSK __TI_flags(%r12),_TIF_UPROBE
jo .Lsysc_uprobe_notify
#endif
+ TSTMSK __TI_flags(%r12),_TIF_GUARDED_STORAGE
+ jo .Lsysc_guarded_storage
TSTMSK __PT_FLAGS(%r11),_PIF_PER_TRAP
jo .Lsysc_singlestep
TSTMSK __TI_flags(%r12),_TIF_SIGPENDING
@@ -409,6 +411,14 @@ ENTRY(system_call)
#endif
#
+# _TIF_GUARDED_STORAGE is set, call guarded_storage_load
+#
+.Lsysc_guarded_storage:
+ lgr %r2,%r11 # pass pointer to pt_regs
+ larl %r14,.Lsysc_return
+ jg gs_load_bc_cb
+
+#
# _PIF_PER_TRAP is set, call do_per_trap
#
.Lsysc_singlestep:
@@ -663,6 +673,8 @@ ENTRY(io_int_handler)
jo .Lio_sigpending
TSTMSK __TI_flags(%r12),_TIF_NOTIFY_RESUME
jo .Lio_notify_resume
+ TSTMSK __TI_flags(%r12),_TIF_GUARDED_STORAGE
+ jo .Lio_guarded_storage
TSTMSK __LC_CPU_FLAGS,_CIF_FPU
jo .Lio_vxrs
TSTMSK __LC_CPU_FLAGS,(_CIF_ASCE_PRIMARY|_CIF_ASCE_SECONDARY)
@@ -697,6 +709,18 @@ ENTRY(io_int_handler)
jg load_fpu_regs
#
+# _TIF_GUARDED_STORAGE is set, call guarded_storage_load
+#
+.Lio_guarded_storage:
+ # TRACE_IRQS_ON already done at .Lio_return
+ ssm __LC_SVC_NEW_PSW # reenable interrupts
+ lgr %r2,%r11 # pass pointer to pt_regs
+ brasl %r14,gs_load_bc_cb
+ ssm __LC_PGM_NEW_PSW # disable I/O and ext. interrupts
+ TRACE_IRQS_OFF
+ j .Lio_return
+
+#
# _TIF_NEED_RESCHED is set, call schedule
#
.Lio_reschedule:
diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h
index 33f9018..dbf5f7e 100644
--- a/arch/s390/kernel/entry.h
+++ b/arch/s390/kernel/entry.h
@@ -74,12 +74,14 @@ long sys_sigreturn(void);
long sys_s390_personality(unsigned int personality);
long sys_s390_runtime_instr(int command, int signum);
+long sys_s390_guarded_storage(int command, struct gs_cb __user *);
long sys_s390_pci_mmio_write(unsigned long, const void __user *, size_t);
long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t);
DECLARE_PER_CPU(u64, mt_cycles[8]);
void verify_facilities(void);
+void gs_load_bc_cb(struct pt_regs *regs);
void set_fs_fixup(void);
#endif /* _ENTRY_H */
diff --git a/arch/s390/kernel/guarded_storage.c b/arch/s390/kernel/guarded_storage.c
new file mode 100644
index 0000000..6f06474
--- /dev/null
+++ b/arch/s390/kernel/guarded_storage.c
@@ -0,0 +1,128 @@
+/*
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/syscalls.h>
+#include <linux/signal.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <asm/guarded_storage.h>
+#include "entry.h"
+
+void exit_thread_gs(void)
+{
+ kfree(current->thread.gs_cb);
+ kfree(current->thread.gs_bc_cb);
+ current->thread.gs_cb = current->thread.gs_bc_cb = NULL;
+}
+
+static int gs_enable(void)
+{
+ struct gs_cb *gs_cb;
+
+ if (!current->thread.gs_cb) {
+ gs_cb = kzalloc(sizeof(*gs_cb), GFP_KERNEL);
+ if (!gs_cb)
+ return -ENOMEM;
+ gs_cb->gsd = 25;
+ preempt_disable();
+ __ctl_set_bit(2, 4);
+ load_gs_cb(gs_cb);
+ current->thread.gs_cb = gs_cb;
+ preempt_enable();
+ }
+ return 0;
+}
+
+static int gs_disable(void)
+{
+ if (current->thread.gs_cb) {
+ preempt_disable();
+ kfree(current->thread.gs_cb);
+ current->thread.gs_cb = NULL;
+ __ctl_clear_bit(2, 4);
+ preempt_enable();
+ }
+ return 0;
+}
+
+static int gs_set_bc_cb(struct gs_cb __user *u_gs_cb)
+{
+ struct gs_cb *gs_cb;
+
+ gs_cb = current->thread.gs_bc_cb;
+ if (!gs_cb) {
+ gs_cb = kzalloc(sizeof(*gs_cb), GFP_KERNEL);
+ if (!gs_cb)
+ return -ENOMEM;
+ current->thread.gs_bc_cb = gs_cb;
+ }
+ if (copy_from_user(gs_cb, u_gs_cb, sizeof(*gs_cb)))
+ return -EFAULT;
+ return 0;
+}
+
+static int gs_clear_bc_cb(void)
+{
+ struct gs_cb *gs_cb;
+
+ gs_cb = current->thread.gs_bc_cb;
+ current->thread.gs_bc_cb = NULL;
+ kfree(gs_cb);
+ return 0;
+}
+
+void gs_load_bc_cb(struct pt_regs *regs)
+{
+ struct gs_cb *gs_cb;
+
+ preempt_disable();
+ clear_thread_flag(TIF_GUARDED_STORAGE);
+ gs_cb = current->thread.gs_bc_cb;
+ if (gs_cb) {
+ kfree(current->thread.gs_cb);
+ current->thread.gs_bc_cb = NULL;
+ __ctl_set_bit(2, 4);
+ load_gs_cb(gs_cb);
+ current->thread.gs_cb = gs_cb;
+ }
+ preempt_enable();
+}
+
+static int gs_broadcast(void)
+{
+ struct task_struct *sibling;
+
+ read_lock(&tasklist_lock);
+ for_each_thread(current, sibling) {
+ if (!sibling->thread.gs_bc_cb)
+ continue;
+ if (test_and_set_tsk_thread_flag(sibling, TIF_GUARDED_STORAGE))
+ kick_process(sibling);
+ }
+ read_unlock(&tasklist_lock);
+ return 0;
+}
+
+SYSCALL_DEFINE2(s390_guarded_storage, int, command,
+ struct gs_cb __user *, gs_cb)
+{
+ if (!MACHINE_HAS_GS)
+ return -EOPNOTSUPP;
+ switch (command) {
+ case GS_ENABLE:
+ return gs_enable();
+ case GS_DISABLE:
+ return gs_disable();
+ case GS_SET_BC_CB:
+ return gs_set_bc_cb(gs_cb);
+ case GS_CLEAR_BC_CB:
+ return gs_clear_bc_cb();
+ case GS_BROADCAST:
+ return gs_broadcast();
+ default:
+ return -EINVAL;
+ }
+}
diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c
index 3074c1d..db5658d 100644
--- a/arch/s390/kernel/machine_kexec.c
+++ b/arch/s390/kernel/machine_kexec.c
@@ -27,6 +27,7 @@
#include <asm/cacheflush.h>
#include <asm/os_info.h>
#include <asm/switch_to.h>
+#include <asm/nmi.h>
typedef void (*relocate_kernel_t)(kimage_entry_t *, unsigned long);
@@ -102,6 +103,8 @@ static void __do_machine_kdump(void *image)
*/
static noinline void __machine_kdump(void *image)
{
+ struct mcesa *mcesa;
+ unsigned long cr2_old, cr2_new;
int this_cpu, cpu;
lgr_info_log();
@@ -114,8 +117,16 @@ static noinline void __machine_kdump(void *image)
continue;
}
/* Store status of the boot CPU */
+ mcesa = (struct mcesa *)(S390_lowcore.mcesad & MCESA_ORIGIN_MASK);
if (MACHINE_HAS_VX)
- save_vx_regs((void *) &S390_lowcore.vector_save_area);
+ save_vx_regs((__vector128 *) mcesa->vector_save_area);
+ if (MACHINE_HAS_GS) {
+ __ctl_store(cr2_old, 2, 2);
+ cr2_new = cr2_old | (1UL << 4);
+ __ctl_load(cr2_new, 2, 2);
+ save_gs_cb((struct gs_cb *) mcesa->guarded_storage_save_area);
+ __ctl_load(cr2_old, 2, 2);
+ }
/*
* To create a good backchain for this CPU in the dump store_status
* is passed the address of a function. The address is saved into
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 9bf8327..9855895 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -106,6 +106,7 @@ static int notrace s390_validate_registers(union mci mci, int umode)
int kill_task;
u64 zero;
void *fpt_save_area;
+ struct mcesa *mcesa;
kill_task = 0;
zero = 0;
@@ -165,6 +166,7 @@ static int notrace s390_validate_registers(union mci mci, int umode)
: : "Q" (S390_lowcore.fpt_creg_save_area));
}
+ mcesa = (struct mcesa *)(S390_lowcore.mcesad & MCESA_ORIGIN_MASK);
if (!MACHINE_HAS_VX) {
/* Validate floating point registers */
asm volatile(
@@ -209,8 +211,8 @@ static int notrace s390_validate_registers(union mci mci, int umode)
" la 1,%0\n"
" .word 0xe70f,0x1000,0x0036\n" /* vlm 0,15,0(1) */
" .word 0xe70f,0x1100,0x0c36\n" /* vlm 16,31,256(1) */
- : : "Q" (*(struct vx_array *)
- &S390_lowcore.vector_save_area) : "1");
+ : : "Q" (*(struct vx_array *) mcesa->vector_save_area)
+ : "1");
__ctl_load(S390_lowcore.cregs_save_area[0], 0, 0);
}
/* Validate access registers */
@@ -224,6 +226,19 @@ static int notrace s390_validate_registers(union mci mci, int umode)
*/
kill_task = 1;
}
+ /* Validate guarded storage registers */
+ if (MACHINE_HAS_GS && (S390_lowcore.cregs_save_area[2] & (1UL << 4))) {
+ if (!mci.gs)
+ /*
+ * Guarded storage register can't be restored and
+ * the current processes uses guarded storage.
+ * It has to be terminated.
+ */
+ kill_task = 1;
+ else
+ load_gs_cb((struct gs_cb *)
+ mcesa->guarded_storage_save_area);
+ }
/*
* We don't even try to validate the TOD register, since we simply
* can't write something sensible into that register.
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index f29e41c..999d715 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -73,8 +73,10 @@ extern void kernel_thread_starter(void);
*/
void exit_thread(struct task_struct *tsk)
{
- if (tsk == current)
+ if (tsk == current) {
exit_thread_runtime_instr();
+ exit_thread_gs();
+ }
}
void flush_thread(void)
@@ -159,6 +161,9 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long new_stackp,
/* Don't copy runtime instrumentation info */
p->thread.ri_cb = NULL;
frame->childregs.psw.mask &= ~PSW_MASK_RI;
+ /* Don't copy guarded storage control block */
+ p->thread.gs_cb = NULL;
+ p->thread.gs_bc_cb = NULL;
/* Set a new TLS ? */
if (clone_flags & CLONE_SETTLS) {
diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c
index 928b929..c737098 100644
--- a/arch/s390/kernel/processor.c
+++ b/arch/s390/kernel/processor.c
@@ -95,7 +95,7 @@ static void show_cpu_summary(struct seq_file *m, void *v)
{
static const char *hwcap_str[] = {
"esan3", "zarch", "stfle", "msa", "ldisp", "eimm", "dfp",
- "edat", "etf3eh", "highgprs", "te", "vx", "vxd", "vxe"
+ "edat", "etf3eh", "highgprs", "te", "vx", "vxd", "vxe", "gs"
};
static const char * const int_hwcap_str[] = {
"sie"
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index c14df0a..c933e25 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -44,30 +44,42 @@ void update_cr_regs(struct task_struct *task)
struct pt_regs *regs = task_pt_regs(task);
struct thread_struct *thread = &task->thread;
struct per_regs old, new;
-
+ unsigned long cr0_old, cr0_new;
+ unsigned long cr2_old, cr2_new;
+ int cr0_changed, cr2_changed;
+
+ __ctl_store(cr0_old, 0, 0);
+ __ctl_store(cr2_old, 2, 2);
+ cr0_new = cr0_old;
+ cr2_new = cr2_old;
/* Take care of the enable/disable of transactional execution. */
if (MACHINE_HAS_TE) {
- unsigned long cr, cr_new;
-
- __ctl_store(cr, 0, 0);
/* Set or clear transaction execution TXC bit 8. */
- cr_new = cr | (1UL << 55);
+ cr0_new |= (1UL << 55);
if (task->thread.per_flags & PER_FLAG_NO_TE)
- cr_new &= ~(1UL << 55);
- if (cr_new != cr)
- __ctl_load(cr_new, 0, 0);
+ cr0_new &= ~(1UL << 55);
/* Set or clear transaction execution TDC bits 62 and 63. */
- __ctl_store(cr, 2, 2);
- cr_new = cr & ~3UL;
+ cr2_new &= ~3UL;
if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND) {
if (task->thread.per_flags & PER_FLAG_TE_ABORT_RAND_TEND)
- cr_new |= 1UL;
+ cr2_new |= 1UL;
else
- cr_new |= 2UL;
+ cr2_new |= 2UL;
}
- if (cr_new != cr)
- __ctl_load(cr_new, 2, 2);
}
+ /* Take care of enable/disable of guarded storage. */
+ if (MACHINE_HAS_GS) {
+ cr2_new &= ~(1UL << 4);
+ if (task->thread.gs_cb)
+ cr2_new |= (1UL << 4);
+ }
+ /* Load control register 0/2 iff changed */
+ cr0_changed = cr0_new != cr0_old;
+ cr2_changed = cr2_new != cr2_old;
+ if (cr0_changed)
+ __ctl_load(cr0_new, 0, 0);
+ if (cr2_changed)
+ __ctl_load(cr2_new, 2, 2);
/* Copy user specified PER registers */
new.control = thread->per_user.control;
new.start = thread->per_user.start;
@@ -1137,6 +1149,36 @@ static int s390_system_call_set(struct task_struct *target,
data, 0, sizeof(unsigned int));
}
+static int s390_gs_cb_get(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ void *kbuf, void __user *ubuf)
+{
+ struct gs_cb *data = target->thread.gs_cb;
+
+ if (!MACHINE_HAS_GS)
+ return -ENODEV;
+ if (!data)
+ return -ENODATA;
+ return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ data, 0, sizeof(struct gs_cb));
+}
+
+static int s390_gs_cb_set(struct task_struct *target,
+ const struct user_regset *regset,
+ unsigned int pos, unsigned int count,
+ const void *kbuf, const void __user *ubuf)
+{
+ struct gs_cb *data = target->thread.gs_cb;
+
+ if (!MACHINE_HAS_GS)
+ return -ENODEV;
+ if (!data)
+ return -ENODATA;
+ return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ data, 0, sizeof(struct gs_cb));
+}
+
static const struct user_regset s390_regsets[] = {
{
.core_note_type = NT_PRSTATUS,
@@ -1194,6 +1236,14 @@ static const struct user_regset s390_regsets[] = {
.get = s390_vxrs_high_get,
.set = s390_vxrs_high_set,
},
+ {
+ .core_note_type = NT_S390_GS_CB,
+ .n = sizeof(struct gs_cb) / sizeof(__u64),
+ .size = sizeof(__u64),
+ .align = sizeof(__u64),
+ .get = s390_gs_cb_get,
+ .set = s390_gs_cb_set,
+ },
};
static const struct user_regset_view user_s390_view = {
@@ -1422,6 +1472,14 @@ static const struct user_regset s390_compat_regsets[] = {
.get = s390_compat_regs_high_get,
.set = s390_compat_regs_high_set,
},
+ {
+ .core_note_type = NT_S390_GS_CB,
+ .n = sizeof(struct gs_cb) / sizeof(__u64),
+ .size = sizeof(__u64),
+ .align = sizeof(__u64),
+ .get = s390_gs_cb_get,
+ .set = s390_gs_cb_set,
+ },
};
static const struct user_regset_view user_s390_compat_view = {
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 911dc0b..3ae756c 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -339,9 +339,15 @@ static void __init setup_lowcore(void)
lc->stfl_fac_list = S390_lowcore.stfl_fac_list;
memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
MAX_FACILITY_BIT/8);
- if (MACHINE_HAS_VX)
- lc->vector_save_area_addr =
- (unsigned long) &lc->vector_save_area;
+ if (MACHINE_HAS_VX || MACHINE_HAS_GS) {
+ unsigned long bits, size;
+
+ bits = MACHINE_HAS_GS ? 11 : 10;
+ size = 1UL << bits;
+ lc->mcesad = (__u64) memblock_virt_alloc(size, size);
+ if (MACHINE_HAS_GS)
+ lc->mcesad |= bits;
+ }
lc->vdso_per_cpu_data = (unsigned long) &lc->paste[0];
lc->sync_enter_timer = S390_lowcore.sync_enter_timer;
lc->async_enter_timer = S390_lowcore.async_enter_timer;
@@ -779,6 +785,12 @@ static int __init setup_hwcaps(void)
elf_hwcap |= HWCAP_S390_VXRS_BCD;
}
+ /*
+ * Guarded storage support HWCAP_S390_GS is bit 12.
+ */
+ if (MACHINE_HAS_GS)
+ elf_hwcap |= HWCAP_S390_GS;
+
get_cpu_id(&cpu_id);
add_device_randomness(&cpu_id, sizeof(cpu_id));
switch (cpu_id.machine) {
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index 47a973b..286bcee8 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -51,6 +51,7 @@
#include <asm/os_info.h>
#include <asm/sigp.h>
#include <asm/idle.h>
+#include <asm/nmi.h>
#include "entry.h"
enum {
@@ -78,6 +79,8 @@ struct pcpu {
static u8 boot_core_type;
static struct pcpu pcpu_devices[NR_CPUS];
+static struct kmem_cache *pcpu_mcesa_cache;
+
unsigned int smp_cpu_mt_shift;
EXPORT_SYMBOL(smp_cpu_mt_shift);
@@ -188,8 +191,10 @@ static void pcpu_ec_call(struct pcpu *pcpu, int ec_bit)
static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
{
unsigned long async_stack, panic_stack;
+ unsigned long mcesa_origin, mcesa_bits;
struct lowcore *lc;
+ mcesa_origin = mcesa_bits = 0;
if (pcpu != &pcpu_devices[0]) {
pcpu->lowcore = (struct lowcore *)
__get_free_pages(GFP_KERNEL | GFP_DMA, LC_ORDER);
@@ -197,20 +202,27 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
panic_stack = __get_free_page(GFP_KERNEL);
if (!pcpu->lowcore || !panic_stack || !async_stack)
goto out;
+ if (MACHINE_HAS_VX || MACHINE_HAS_GS) {
+ mcesa_origin = (unsigned long)
+ kmem_cache_alloc(pcpu_mcesa_cache, GFP_KERNEL);
+ if (!mcesa_origin)
+ goto out;
+ mcesa_bits = MACHINE_HAS_GS ? 11 : 0;
+ }
} else {
async_stack = pcpu->lowcore->async_stack - ASYNC_FRAME_OFFSET;
panic_stack = pcpu->lowcore->panic_stack - PANIC_FRAME_OFFSET;
+ mcesa_origin = pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK;
+ mcesa_bits = pcpu->lowcore->mcesad & MCESA_LC_MASK;
}
lc = pcpu->lowcore;
memcpy(lc, &S390_lowcore, 512);
memset((char *) lc + 512, 0, sizeof(*lc) - 512);
lc->async_stack = async_stack + ASYNC_FRAME_OFFSET;
lc->panic_stack = panic_stack + PANIC_FRAME_OFFSET;
+ lc->mcesad = mcesa_origin | mcesa_bits;
lc->cpu_nr = cpu;
lc->spinlock_lockval = arch_spin_lockval(cpu);
- if (MACHINE_HAS_VX)
- lc->vector_save_area_addr =
- (unsigned long) &lc->vector_save_area;
if (vdso_alloc_per_cpu(lc))
goto out;
lowcore_ptr[cpu] = lc;
@@ -218,6 +230,9 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
return 0;
out:
if (pcpu != &pcpu_devices[0]) {
+ if (mcesa_origin)
+ kmem_cache_free(pcpu_mcesa_cache,
+ (void *) mcesa_origin);
free_page(panic_stack);
free_pages(async_stack, ASYNC_ORDER);
free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
@@ -229,11 +244,17 @@ out:
static void pcpu_free_lowcore(struct pcpu *pcpu)
{
+ unsigned long mcesa_origin;
+
pcpu_sigp_retry(pcpu, SIGP_SET_PREFIX, 0);
lowcore_ptr[pcpu - pcpu_devices] = NULL;
vdso_free_per_cpu(pcpu->lowcore);
if (pcpu == &pcpu_devices[0])
return;
+ if (MACHINE_HAS_VX || MACHINE_HAS_GS) {
+ mcesa_origin = pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK;
+ kmem_cache_free(pcpu_mcesa_cache, (void *) mcesa_origin);
+ }
free_page(pcpu->lowcore->panic_stack-PANIC_FRAME_OFFSET);
free_pages(pcpu->lowcore->async_stack-ASYNC_FRAME_OFFSET, ASYNC_ORDER);
free_pages((unsigned long) pcpu->lowcore, LC_ORDER);
@@ -550,9 +571,11 @@ int smp_store_status(int cpu)
if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_STATUS_AT_ADDRESS,
pa) != SIGP_CC_ORDER_CODE_ACCEPTED)
return -EIO;
- if (!MACHINE_HAS_VX)
+ if (!MACHINE_HAS_VX && !MACHINE_HAS_GS)
return 0;
- pa = __pa(pcpu->lowcore->vector_save_area_addr);
+ pa = __pa(pcpu->lowcore->mcesad & MCESA_ORIGIN_MASK);
+ if (MACHINE_HAS_GS)
+ pa |= pcpu->lowcore->mcesad & MCESA_LC_MASK;
if (__pcpu_sigp_relax(pcpu->address, SIGP_STORE_ADDITIONAL_STATUS,
pa) != SIGP_CC_ORDER_CODE_ACCEPTED)
return -EIO;
@@ -897,12 +920,22 @@ void __init smp_fill_possible_mask(void)
void __init smp_prepare_cpus(unsigned int max_cpus)
{
+ unsigned long size;
+
/* request the 0x1201 emergency signal external interrupt */
if (register_external_irq(EXT_IRQ_EMERGENCY_SIG, do_ext_call_interrupt))
panic("Couldn't request external interrupt 0x1201");
/* request the 0x1202 external call external interrupt */
if (register_external_irq(EXT_IRQ_EXTERNAL_CALL, do_ext_call_interrupt))
panic("Couldn't request external interrupt 0x1202");
+ /* create slab cache for the machine-check-extended-save-areas */
+ if (MACHINE_HAS_VX || MACHINE_HAS_GS) {
+ size = 1UL << (MACHINE_HAS_GS ? 11 : 10);
+ pcpu_mcesa_cache = kmem_cache_create("nmi_save_areas",
+ size, size, 0, NULL);
+ if (!pcpu_mcesa_cache)
+ panic("Couldn't create nmi save area cache");
+ }
}
void __init smp_prepare_boot_cpu(void)
diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index 2659b5c..54fce7b 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -386,5 +386,5 @@ SYSCALL(sys_mlock2,compat_sys_mlock2)
SYSCALL(sys_copy_file_range,compat_sys_copy_file_range) /* 375 */
SYSCALL(sys_preadv2,compat_sys_preadv2)
SYSCALL(sys_pwritev2,compat_sys_pwritev2)
-NI_SYSCALL
+SYSCALL(sys_s390_guarded_storage,compat_sys_s390_guarded_storage) /* 378 */
SYSCALL(sys_statx,compat_sys_statx)
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index d55c829..709aca9 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -262,7 +262,7 @@ struct aste {
int ipte_lock_held(struct kvm_vcpu *vcpu)
{
- if (vcpu->arch.sie_block->eca & 1) {
+ if (vcpu->arch.sie_block->eca & ECA_SII) {
int rc;
read_lock(&vcpu->kvm->arch.sca_lock);
@@ -361,7 +361,7 @@ static void ipte_unlock_siif(struct kvm_vcpu *vcpu)
void ipte_lock(struct kvm_vcpu *vcpu)
{
- if (vcpu->arch.sie_block->eca & 1)
+ if (vcpu->arch.sie_block->eca & ECA_SII)
ipte_lock_siif(vcpu);
else
ipte_lock_simple(vcpu);
@@ -369,7 +369,7 @@ void ipte_lock(struct kvm_vcpu *vcpu)
void ipte_unlock(struct kvm_vcpu *vcpu)
{
- if (vcpu->arch.sie_block->eca & 1)
+ if (vcpu->arch.sie_block->eca & ECA_SII)
ipte_unlock_siif(vcpu);
else
ipte_unlock_simple(vcpu);
diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
index 59920f9..a4752bf 100644
--- a/arch/s390/kvm/intercept.c
+++ b/arch/s390/kvm/intercept.c
@@ -35,6 +35,7 @@ static const intercept_handler_t instruction_handlers[256] = {
[0xb6] = kvm_s390_handle_stctl,
[0xb7] = kvm_s390_handle_lctl,
[0xb9] = kvm_s390_handle_b9,
+ [0xe3] = kvm_s390_handle_e3,
[0xe5] = kvm_s390_handle_e5,
[0xeb] = kvm_s390_handle_eb,
};
@@ -368,8 +369,7 @@ static int handle_operexc(struct kvm_vcpu *vcpu)
trace_kvm_s390_handle_operexc(vcpu, vcpu->arch.sie_block->ipa,
vcpu->arch.sie_block->ipb);
- if (vcpu->arch.sie_block->ipa == 0xb256 &&
- test_kvm_facility(vcpu->kvm, 74))
+ if (vcpu->arch.sie_block->ipa == 0xb256)
return handle_sthyi(vcpu);
if (vcpu->arch.sie_block->ipa == 0 && vcpu->kvm->arch.user_instr0)
@@ -404,28 +404,31 @@ int kvm_handle_sie_intercept(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP;
switch (vcpu->arch.sie_block->icptcode) {
- case 0x10:
- case 0x18:
+ case ICPT_EXTREQ:
+ case ICPT_IOREQ:
return handle_noop(vcpu);
- case 0x04:
+ case ICPT_INST:
rc = handle_instruction(vcpu);
break;
- case 0x08:
+ case ICPT_PROGI:
return handle_prog(vcpu);
- case 0x14:
+ case ICPT_EXTINT:
return handle_external_interrupt(vcpu);
- case 0x1c:
+ case ICPT_WAIT:
return kvm_s390_handle_wait(vcpu);
- case 0x20:
+ case ICPT_VALIDITY:
return handle_validity(vcpu);
- case 0x28:
+ case ICPT_STOP:
return handle_stop(vcpu);
- case 0x2c:
+ case ICPT_OPEREXC:
rc = handle_operexc(vcpu);
break;
- case 0x38:
+ case ICPT_PARTEXEC:
rc = handle_partial_execution(vcpu);
break;
+ case ICPT_KSS:
+ rc = kvm_s390_skey_check_enable(vcpu);
+ break;
default:
return -EOPNOTSUPP;
}
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 0f8f141..caf15c8 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -410,6 +410,7 @@ static int __write_machine_check(struct kvm_vcpu *vcpu,
struct kvm_s390_mchk_info *mchk)
{
unsigned long ext_sa_addr;
+ unsigned long lc;
freg_t fprs[NUM_FPRS];
union mci mci;
int rc;
@@ -418,12 +419,34 @@ static int __write_machine_check(struct kvm_vcpu *vcpu,
/* take care of lazy register loading */
save_fpu_regs();
save_access_regs(vcpu->run->s.regs.acrs);
+ if (MACHINE_HAS_GS && vcpu->arch.gs_enabled)
+ save_gs_cb(current->thread.gs_cb);
/* Extended save area */
- rc = read_guest_lc(vcpu, __LC_VX_SAVE_AREA_ADDR, &ext_sa_addr,
- sizeof(unsigned long));
- /* Only bits 0-53 are used for address formation */
- ext_sa_addr &= ~0x3ffUL;
+ rc = read_guest_lc(vcpu, __LC_MCESAD, &ext_sa_addr,
+ sizeof(unsigned long));
+ /* Only bits 0 through 63-LC are used for address formation */
+ lc = ext_sa_addr & MCESA_LC_MASK;
+ if (test_kvm_facility(vcpu->kvm, 133)) {
+ switch (lc) {
+ case 0:
+ case 10:
+ ext_sa_addr &= ~0x3ffUL;
+ break;
+ case 11:
+ ext_sa_addr &= ~0x7ffUL;
+ break;
+ case 12:
+ ext_sa_addr &= ~0xfffUL;
+ break;
+ default:
+ ext_sa_addr = 0;
+ break;
+ }
+ } else {
+ ext_sa_addr &= ~0x3ffUL;
+ }
+
if (!rc && mci.vr && ext_sa_addr && test_kvm_facility(vcpu->kvm, 129)) {
if (write_guest_abs(vcpu, ext_sa_addr, vcpu->run->s.regs.vrs,
512))
@@ -431,6 +454,14 @@ static int __write_machine_check(struct kvm_vcpu *vcpu,
} else {
mci.vr = 0;
}
+ if (!rc && mci.gs && ext_sa_addr && test_kvm_facility(vcpu->kvm, 133)
+ && (lc == 11 || lc == 12)) {
+ if (write_guest_abs(vcpu, ext_sa_addr + 1024,
+ &vcpu->run->s.regs.gscb, 32))
+ mci.gs = 0;
+ } else {
+ mci.gs = 0;
+ }
/* General interruption information */
rc |= put_guest_lc(vcpu, 1, (u8 __user *) __LC_AR_MODE_ID);
@@ -1968,6 +1999,8 @@ static int register_io_adapter(struct kvm_device *dev,
adapter->maskable = adapter_info.maskable;
adapter->masked = false;
adapter->swap = adapter_info.swap;
+ adapter->suppressible = (adapter_info.flags) &
+ KVM_S390_ADAPTER_SUPPRESSIBLE;
dev->kvm->arch.adapters[adapter->id] = adapter;
return 0;
@@ -2121,6 +2154,87 @@ static int clear_io_irq(struct kvm *kvm, struct kvm_device_attr *attr)
return 0;
}
+static int modify_ais_mode(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+ struct kvm_s390_ais_req req;
+ int ret = 0;
+
+ if (!fi->ais_enabled)
+ return -ENOTSUPP;
+
+ if (copy_from_user(&req, (void __user *)attr->addr, sizeof(req)))
+ return -EFAULT;
+
+ if (req.isc > MAX_ISC)
+ return -EINVAL;
+
+ trace_kvm_s390_modify_ais_mode(req.isc,
+ (fi->simm & AIS_MODE_MASK(req.isc)) ?
+ (fi->nimm & AIS_MODE_MASK(req.isc)) ?
+ 2 : KVM_S390_AIS_MODE_SINGLE :
+ KVM_S390_AIS_MODE_ALL, req.mode);
+
+ mutex_lock(&fi->ais_lock);
+ switch (req.mode) {
+ case KVM_S390_AIS_MODE_ALL:
+ fi->simm &= ~AIS_MODE_MASK(req.isc);
+ fi->nimm &= ~AIS_MODE_MASK(req.isc);
+ break;
+ case KVM_S390_AIS_MODE_SINGLE:
+ fi->simm |= AIS_MODE_MASK(req.isc);
+ fi->nimm &= ~AIS_MODE_MASK(req.isc);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+ mutex_unlock(&fi->ais_lock);
+
+ return ret;
+}
+
+static int kvm_s390_inject_airq(struct kvm *kvm,
+ struct s390_io_adapter *adapter)
+{
+ struct kvm_s390_float_interrupt *fi = &kvm->arch.float_int;
+ struct kvm_s390_interrupt s390int = {
+ .type = KVM_S390_INT_IO(1, 0, 0, 0),
+ .parm = 0,
+ .parm64 = (adapter->isc << 27) | 0x80000000,
+ };
+ int ret = 0;
+
+ if (!fi->ais_enabled || !adapter->suppressible)
+ return kvm_s390_inject_vm(kvm, &s390int);
+
+ mutex_lock(&fi->ais_lock);
+ if (fi->nimm & AIS_MODE_MASK(adapter->isc)) {
+ trace_kvm_s390_airq_suppressed(adapter->id, adapter->isc);
+ goto out;
+ }
+
+ ret = kvm_s390_inject_vm(kvm, &s390int);
+ if (!ret && (fi->simm & AIS_MODE_MASK(adapter->isc))) {
+ fi->nimm |= AIS_MODE_MASK(adapter->isc);
+ trace_kvm_s390_modify_ais_mode(adapter->isc,
+ KVM_S390_AIS_MODE_SINGLE, 2);
+ }
+out:
+ mutex_unlock(&fi->ais_lock);
+ return ret;
+}
+
+static int flic_inject_airq(struct kvm *kvm, struct kvm_device_attr *attr)
+{
+ unsigned int id = attr->attr;
+ struct s390_io_adapter *adapter = get_io_adapter(kvm, id);
+
+ if (!adapter)
+ return -EINVAL;
+
+ return kvm_s390_inject_airq(kvm, adapter);
+}
+
static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
{
int r = 0;
@@ -2157,6 +2271,12 @@ static int flic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
case KVM_DEV_FLIC_CLEAR_IO_IRQ:
r = clear_io_irq(dev->kvm, attr);
break;
+ case KVM_DEV_FLIC_AISM:
+ r = modify_ais_mode(dev->kvm, attr);
+ break;
+ case KVM_DEV_FLIC_AIRQ_INJECT:
+ r = flic_inject_airq(dev->kvm, attr);
+ break;
default:
r = -EINVAL;
}
@@ -2176,6 +2296,8 @@ static int flic_has_attr(struct kvm_device *dev,
case KVM_DEV_FLIC_ADAPTER_REGISTER:
case KVM_DEV_FLIC_ADAPTER_MODIFY:
case KVM_DEV_FLIC_CLEAR_IO_IRQ:
+ case KVM_DEV_FLIC_AISM:
+ case KVM_DEV_FLIC_AIRQ_INJECT:
return 0;
}
return -ENXIO;
@@ -2286,12 +2408,7 @@ static int set_adapter_int(struct kvm_kernel_irq_routing_entry *e,
ret = adapter_indicators_set(kvm, adapter, &e->adapter);
up_read(&adapter->maps_lock);
if ((ret > 0) && !adapter->masked) {
- struct kvm_s390_interrupt s390int = {
- .type = KVM_S390_INT_IO(1, 0, 0, 0),
- .parm = 0,
- .parm64 = (adapter->isc << 27) | 0x80000000,
- };
- ret = kvm_s390_inject_vm(kvm, &s390int);
+ ret = kvm_s390_inject_airq(kvm, adapter);
if (ret == 0)
ret = 1;
}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index fd6cd05b..4bafb0a 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -276,6 +276,10 @@ static void kvm_s390_cpu_feat_init(void)
__cpacf_query(CPACF_PPNO, (cpacf_mask_t *)
kvm_s390_available_subfunc.ppno);
+ if (test_facility(146)) /* MSA8 */
+ __cpacf_query(CPACF_KMA, (cpacf_mask_t *)
+ kvm_s390_available_subfunc.kma);
+
if (MACHINE_HAS_ESOP)
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
/*
@@ -300,6 +304,8 @@ static void kvm_s390_cpu_feat_init(void)
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_CEI);
if (sclp.has_ibs)
allow_cpu_feat(KVM_S390_VM_CPU_FEAT_IBS);
+ if (sclp.has_kss)
+ allow_cpu_feat(KVM_S390_VM_CPU_FEAT_KSS);
/*
* KVM_S390_VM_CPU_FEAT_SKEY: Wrong shadow of PTE.I bits will make
* all skey handling functions read/set the skey from the PGSTE
@@ -380,6 +386,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_SKEYS:
case KVM_CAP_S390_IRQ_STATE:
case KVM_CAP_S390_USER_INSTR0:
+ case KVM_CAP_S390_AIS:
r = 1;
break;
case KVM_CAP_S390_MEM_OP:
@@ -405,6 +412,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_RI:
r = test_facility(64);
break;
+ case KVM_CAP_S390_GS:
+ r = test_facility(133);
+ break;
default:
r = 0;
}
@@ -541,6 +551,34 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap)
VM_EVENT(kvm, 3, "ENABLE: CAP_S390_RI %s",
r ? "(not available)" : "(success)");
break;
+ case KVM_CAP_S390_AIS:
+ mutex_lock(&kvm->lock);
+ if (kvm->created_vcpus) {
+ r = -EBUSY;
+ } else {
+ set_kvm_facility(kvm->arch.model.fac_mask, 72);
+ set_kvm_facility(kvm->arch.model.fac_list, 72);
+ kvm->arch.float_int.ais_enabled = 1;
+ r = 0;
+ }
+ mutex_unlock(&kvm->lock);
+ VM_EVENT(kvm, 3, "ENABLE: AIS %s",
+ r ? "(not available)" : "(success)");
+ break;
+ case KVM_CAP_S390_GS:
+ r = -EINVAL;
+ mutex_lock(&kvm->lock);
+ if (atomic_read(&kvm->online_vcpus)) {
+ r = -EBUSY;
+ } else if (test_facility(133)) {
+ set_kvm_facility(kvm->arch.model.fac_mask, 133);
+ set_kvm_facility(kvm->arch.model.fac_list, 133);
+ r = 0;
+ }
+ mutex_unlock(&kvm->lock);
+ VM_EVENT(kvm, 3, "ENABLE: CAP_S390_GS %s",
+ r ? "(not available)" : "(success)");
+ break;
case KVM_CAP_S390_USER_STSI:
VM_EVENT(kvm, 3, "%s", "ENABLE: CAP_S390_USER_STSI");
kvm->arch.user_stsi = 1;
@@ -1498,6 +1536,10 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm_s390_crypto_init(kvm);
+ mutex_init(&kvm->arch.float_int.ais_lock);
+ kvm->arch.float_int.simm = 0;
+ kvm->arch.float_int.nimm = 0;
+ kvm->arch.float_int.ais_enabled = 0;
spin_lock_init(&kvm->arch.float_int.lock);
for (i = 0; i < FIRQ_LIST_COUNT; i++)
INIT_LIST_HEAD(&kvm->arch.float_int.lists[i]);
@@ -1646,7 +1688,7 @@ static void sca_add_vcpu(struct kvm_vcpu *vcpu)
sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block;
vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32);
vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU;
- vcpu->arch.sie_block->ecb2 |= 0x04U;
+ vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn);
} else {
struct bsca_block *sca = vcpu->kvm->arch.sca;
@@ -1700,7 +1742,7 @@ static int sca_switch_to_extended(struct kvm *kvm)
kvm_for_each_vcpu(vcpu_idx, vcpu, kvm) {
vcpu->arch.sie_block->scaoh = scaoh;
vcpu->arch.sie_block->scaol = scaol;
- vcpu->arch.sie_block->ecb2 |= 0x04U;
+ vcpu->arch.sie_block->ecb2 |= ECB2_ESCA;
}
kvm->arch.sca = new_sca;
kvm->arch.use_esca = 1;
@@ -1749,6 +1791,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
kvm_s390_set_prefix(vcpu, 0);
if (test_kvm_facility(vcpu->kvm, 64))
vcpu->run->kvm_valid_regs |= KVM_SYNC_RICCB;
+ if (test_kvm_facility(vcpu->kvm, 133))
+ vcpu->run->kvm_valid_regs |= KVM_SYNC_GSCB;
/* fprs can be synchronized via vrs, even if the guest has no vx. With
* MACHINE_HAS_VX, (load|store)_fpu_regs() will work with vrs format.
*/
@@ -1939,8 +1983,8 @@ int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu)
if (!vcpu->arch.sie_block->cbrlo)
return -ENOMEM;
- vcpu->arch.sie_block->ecb2 |= 0x80;
- vcpu->arch.sie_block->ecb2 &= ~0x08;
+ vcpu->arch.sie_block->ecb2 |= ECB2_CMMA;
+ vcpu->arch.sie_block->ecb2 &= ~ECB2_PFMFI;
return 0;
}
@@ -1970,31 +2014,37 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
/* pgste_set_pte has special handling for !MACHINE_HAS_ESOP */
if (MACHINE_HAS_ESOP)
- vcpu->arch.sie_block->ecb |= 0x02;
+ vcpu->arch.sie_block->ecb |= ECB_HOSTPROTINT;
if (test_kvm_facility(vcpu->kvm, 9))
- vcpu->arch.sie_block->ecb |= 0x04;
+ vcpu->arch.sie_block->ecb |= ECB_SRSI;
if (test_kvm_facility(vcpu->kvm, 73))
- vcpu->arch.sie_block->ecb |= 0x10;
+ vcpu->arch.sie_block->ecb |= ECB_TE;
if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi)
- vcpu->arch.sie_block->ecb2 |= 0x08;
+ vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
if (test_kvm_facility(vcpu->kvm, 130))
- vcpu->arch.sie_block->ecb2 |= 0x20;
- vcpu->arch.sie_block->eca = 0x1002000U;
+ vcpu->arch.sie_block->ecb2 |= ECB2_IEP;
+ vcpu->arch.sie_block->eca = ECA_MVPGI | ECA_PROTEXCI;
if (sclp.has_cei)
- vcpu->arch.sie_block->eca |= 0x80000000U;
+ vcpu->arch.sie_block->eca |= ECA_CEI;
if (sclp.has_ib)
- vcpu->arch.sie_block->eca |= 0x40000000U;
+ vcpu->arch.sie_block->eca |= ECA_IB;
if (sclp.has_siif)
- vcpu->arch.sie_block->eca |= 1;
+ vcpu->arch.sie_block->eca |= ECA_SII;
if (sclp.has_sigpif)
- vcpu->arch.sie_block->eca |= 0x10000000U;
+ vcpu->arch.sie_block->eca |= ECA_SIGPI;
if (test_kvm_facility(vcpu->kvm, 129)) {
- vcpu->arch.sie_block->eca |= 0x00020000;
- vcpu->arch.sie_block->ecd |= 0x20000000;
+ vcpu->arch.sie_block->eca |= ECA_VX;
+ vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
}
+ vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx)
+ | SDNXC;
vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb;
- vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
+
+ if (sclp.has_kss)
+ atomic_or(CPUSTAT_KSS, &vcpu->arch.sie_block->cpuflags);
+ else
+ vcpu->arch.sie_block->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
if (vcpu->kvm->arch.use_cmma) {
rc = kvm_s390_vcpu_setup_cmma(vcpu);
@@ -2446,7 +2496,7 @@ retry:
}
/* nothing to do, just clear the request */
- clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
+ kvm_clear_request(KVM_REQ_UNHALT, vcpu);
return 0;
}
@@ -2719,6 +2769,11 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
{
+ struct runtime_instr_cb *riccb;
+ struct gs_cb *gscb;
+
+ riccb = (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
+ gscb = (struct gs_cb *) &kvm_run->s.regs.gscb;
vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
if (kvm_run->kvm_dirty_regs & KVM_SYNC_PREFIX)
@@ -2747,12 +2802,24 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
* we should enable RI here instead of doing the lazy enablement.
*/
if ((kvm_run->kvm_dirty_regs & KVM_SYNC_RICCB) &&
- test_kvm_facility(vcpu->kvm, 64)) {
- struct runtime_instr_cb *riccb =
- (struct runtime_instr_cb *) &kvm_run->s.regs.riccb;
-
- if (riccb->valid)
- vcpu->arch.sie_block->ecb3 |= 0x01;
+ test_kvm_facility(vcpu->kvm, 64) &&
+ riccb->valid &&
+ !(vcpu->arch.sie_block->ecb3 & ECB3_RI)) {
+ VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (sync_regs)");
+ vcpu->arch.sie_block->ecb3 |= ECB3_RI;
+ }
+ /*
+ * If userspace sets the gscb (e.g. after migration) to non-zero,
+ * we should enable GS here instead of doing the lazy enablement.
+ */
+ if ((kvm_run->kvm_dirty_regs & KVM_SYNC_GSCB) &&
+ test_kvm_facility(vcpu->kvm, 133) &&
+ gscb->gssm &&
+ !vcpu->arch.gs_enabled) {
+ VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (sync_regs)");
+ vcpu->arch.sie_block->ecb |= ECB_GS;
+ vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
+ vcpu->arch.gs_enabled = 1;
}
save_access_regs(vcpu->arch.host_acrs);
restore_access_regs(vcpu->run->s.regs.acrs);
@@ -2768,6 +2835,20 @@ static void sync_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if (test_fp_ctl(current->thread.fpu.fpc))
/* User space provided an invalid FPC, let's clear it */
current->thread.fpu.fpc = 0;
+ if (MACHINE_HAS_GS) {
+ preempt_disable();
+ __ctl_set_bit(2, 4);
+ if (current->thread.gs_cb) {
+ vcpu->arch.host_gscb = current->thread.gs_cb;
+ save_gs_cb(vcpu->arch.host_gscb);
+ }
+ if (vcpu->arch.gs_enabled) {
+ current->thread.gs_cb = (struct gs_cb *)
+ &vcpu->run->s.regs.gscb;
+ restore_gs_cb(current->thread.gs_cb);
+ }
+ preempt_enable();
+ }
kvm_run->kvm_dirty_regs = 0;
}
@@ -2794,6 +2875,18 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
/* Restore will be done lazily at return */
current->thread.fpu.fpc = vcpu->arch.host_fpregs.fpc;
current->thread.fpu.regs = vcpu->arch.host_fpregs.regs;
+ if (MACHINE_HAS_GS) {
+ __ctl_set_bit(2, 4);
+ if (vcpu->arch.gs_enabled)
+ save_gs_cb(current->thread.gs_cb);
+ preempt_disable();
+ current->thread.gs_cb = vcpu->arch.host_gscb;
+ restore_gs_cb(vcpu->arch.host_gscb);
+ preempt_enable();
+ if (!vcpu->arch.host_gscb)
+ __ctl_clear_bit(2, 4);
+ vcpu->arch.host_gscb = NULL;
+ }
}
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index af9fa91..55f5c84 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -25,7 +25,7 @@
typedef int (*intercept_handler_t)(struct kvm_vcpu *vcpu);
/* Transactional Memory Execution related macros */
-#define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & 0x10))
+#define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & ECB_TE))
#define TDB_FORMAT1 1
#define IS_ITDB_VALID(vcpu) ((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1))
@@ -246,6 +246,7 @@ static inline void kvm_s390_retry_instr(struct kvm_vcpu *vcpu)
int is_valid_psw(psw_t *psw);
int kvm_s390_handle_aa(struct kvm_vcpu *vcpu);
int kvm_s390_handle_b2(struct kvm_vcpu *vcpu);
+int kvm_s390_handle_e3(struct kvm_vcpu *vcpu);
int kvm_s390_handle_e5(struct kvm_vcpu *vcpu);
int kvm_s390_handle_01(struct kvm_vcpu *vcpu);
int kvm_s390_handle_b9(struct kvm_vcpu *vcpu);
@@ -253,6 +254,7 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu);
int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu);
int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu);
int kvm_s390_handle_eb(struct kvm_vcpu *vcpu);
+int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu);
/* implemented in vsie.c */
int kvm_s390_handle_vsie(struct kvm_vcpu *vcpu);
diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c
index 64b6a30..c03106c 100644
--- a/arch/s390/kvm/priv.c
+++ b/arch/s390/kvm/priv.c
@@ -37,7 +37,8 @@
static int handle_ri(struct kvm_vcpu *vcpu)
{
if (test_kvm_facility(vcpu->kvm, 64)) {
- vcpu->arch.sie_block->ecb3 |= 0x01;
+ VCPU_EVENT(vcpu, 3, "%s", "ENABLE: RI (lazy)");
+ vcpu->arch.sie_block->ecb3 |= ECB3_RI;
kvm_s390_retry_instr(vcpu);
return 0;
} else
@@ -52,6 +53,33 @@ int kvm_s390_handle_aa(struct kvm_vcpu *vcpu)
return -EOPNOTSUPP;
}
+static int handle_gs(struct kvm_vcpu *vcpu)
+{
+ if (test_kvm_facility(vcpu->kvm, 133)) {
+ VCPU_EVENT(vcpu, 3, "%s", "ENABLE: GS (lazy)");
+ preempt_disable();
+ __ctl_set_bit(2, 4);
+ current->thread.gs_cb = (struct gs_cb *)&vcpu->run->s.regs.gscb;
+ restore_gs_cb(current->thread.gs_cb);
+ preempt_enable();
+ vcpu->arch.sie_block->ecb |= ECB_GS;
+ vcpu->arch.sie_block->ecd |= ECD_HOSTREGMGMT;
+ vcpu->arch.gs_enabled = 1;
+ kvm_s390_retry_instr(vcpu);
+ return 0;
+ } else
+ return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
+}
+
+int kvm_s390_handle_e3(struct kvm_vcpu *vcpu)
+{
+ int code = vcpu->arch.sie_block->ipb & 0xff;
+
+ if (code == 0x49 || code == 0x4d)
+ return handle_gs(vcpu);
+ else
+ return -EOPNOTSUPP;
+}
/* Handle SCK (SET CLOCK) interception */
static int handle_set_clock(struct kvm_vcpu *vcpu)
{
@@ -170,18 +198,25 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu)
return 0;
}
-static int __skey_check_enable(struct kvm_vcpu *vcpu)
+int kvm_s390_skey_check_enable(struct kvm_vcpu *vcpu)
{
int rc = 0;
+ struct kvm_s390_sie_block *sie_block = vcpu->arch.sie_block;
trace_kvm_s390_skey_related_inst(vcpu);
- if (!(vcpu->arch.sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)))
+ if (!(sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE)) &&
+ !(atomic_read(&sie_block->cpuflags) & CPUSTAT_KSS))
return rc;
rc = s390_enable_skey();
VCPU_EVENT(vcpu, 3, "enabling storage keys for guest: %d", rc);
- if (!rc)
- vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE);
+ if (!rc) {
+ if (atomic_read(&sie_block->cpuflags) & CPUSTAT_KSS)
+ atomic_andnot(CPUSTAT_KSS, &sie_block->cpuflags);
+ else
+ sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE |
+ ICTL_RRBE);
+ }
return rc;
}
@@ -190,7 +225,7 @@ static int try_handle_skey(struct kvm_vcpu *vcpu)
int rc;
vcpu->stat.instruction_storage_key++;
- rc = __skey_check_enable(vcpu);
+ rc = kvm_s390_skey_check_enable(vcpu);
if (rc)
return rc;
if (sclp.has_skey) {
@@ -759,6 +794,7 @@ static const intercept_handler_t b2_handlers[256] = {
[0x3b] = handle_io_inst,
[0x3c] = handle_io_inst,
[0x50] = handle_ipte_interlock,
+ [0x56] = handle_sthyi,
[0x5f] = handle_io_inst,
[0x74] = handle_io_inst,
[0x76] = handle_io_inst,
@@ -887,7 +923,7 @@ static int handle_pfmf(struct kvm_vcpu *vcpu)
}
if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK) {
- int rc = __skey_check_enable(vcpu);
+ int rc = kvm_s390_skey_check_enable(vcpu);
if (rc)
return rc;
diff --git a/arch/s390/kvm/sthyi.c b/arch/s390/kvm/sthyi.c
index 05c98bb..926b524 100644
--- a/arch/s390/kvm/sthyi.c
+++ b/arch/s390/kvm/sthyi.c
@@ -404,6 +404,9 @@ int handle_sthyi(struct kvm_vcpu *vcpu)
u64 code, addr, cc = 0;
struct sthyi_sctns *sctns = NULL;
+ if (!test_kvm_facility(vcpu->kvm, 74))
+ return kvm_s390_inject_program_int(vcpu, PGM_OPERATION);
+
/*
* STHYI requires extensive locking in the higher hypervisors
* and is very computational/memory expensive. Therefore we
diff --git a/arch/s390/kvm/trace-s390.h b/arch/s390/kvm/trace-s390.h
index 396485b..78b7e84 100644
--- a/arch/s390/kvm/trace-s390.h
+++ b/arch/s390/kvm/trace-s390.h
@@ -280,6 +280,58 @@ TRACE_EVENT(kvm_s390_enable_disable_ibs,
__entry->state ? "enabling" : "disabling", __entry->id)
);
+/*
+ * Trace point for modifying ais mode for a given isc.
+ */
+TRACE_EVENT(kvm_s390_modify_ais_mode,
+ TP_PROTO(__u8 isc, __u16 from, __u16 to),
+ TP_ARGS(isc, from, to),
+
+ TP_STRUCT__entry(
+ __field(__u8, isc)
+ __field(__u16, from)
+ __field(__u16, to)
+ ),
+
+ TP_fast_assign(
+ __entry->isc = isc;
+ __entry->from = from;
+ __entry->to = to;
+ ),
+
+ TP_printk("for isc %x, modifying interruption mode from %s to %s",
+ __entry->isc,
+ (__entry->from == KVM_S390_AIS_MODE_ALL) ?
+ "ALL-Interruptions Mode" :
+ (__entry->from == KVM_S390_AIS_MODE_SINGLE) ?
+ "Single-Interruption Mode" : "No-Interruptions Mode",
+ (__entry->to == KVM_S390_AIS_MODE_ALL) ?
+ "ALL-Interruptions Mode" :
+ (__entry->to == KVM_S390_AIS_MODE_SINGLE) ?
+ "Single-Interruption Mode" : "No-Interruptions Mode")
+ );
+
+/*
+ * Trace point for suppressed adapter I/O interrupt.
+ */
+TRACE_EVENT(kvm_s390_airq_suppressed,
+ TP_PROTO(__u32 id, __u8 isc),
+ TP_ARGS(id, isc),
+
+ TP_STRUCT__entry(
+ __field(__u32, id)
+ __field(__u8, isc)
+ ),
+
+ TP_fast_assign(
+ __entry->id = id;
+ __entry->isc = isc;
+ ),
+
+ TP_printk("adapter I/O interrupt suppressed (id:%x isc:%x)",
+ __entry->id, __entry->isc)
+ );
+
#endif /* _TRACE_KVMS390_H */
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index 5491be3..4719ecb 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -117,6 +117,8 @@ static int prepare_cpuflags(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
newflags |= cpuflags & CPUSTAT_SM;
if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IBS))
newflags |= cpuflags & CPUSTAT_IBS;
+ if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_KSS))
+ newflags |= cpuflags & CPUSTAT_KSS;
atomic_set(&scb_s->cpuflags, newflags);
return 0;
@@ -249,7 +251,7 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
{
struct kvm_s390_sie_block *scb_o = vsie_page->scb_o;
struct kvm_s390_sie_block *scb_s = &vsie_page->scb_s;
- bool had_tx = scb_s->ecb & 0x10U;
+ bool had_tx = scb_s->ecb & ECB_TE;
unsigned long new_mso = 0;
int rc;
@@ -289,7 +291,9 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
* bits. Therefore we cannot provide interpretation and would later
* have to provide own emulation handlers.
*/
- scb_s->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
+ if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_KSS))
+ scb_s->ictl |= ICTL_ISKE | ICTL_SSKE | ICTL_RRBE;
+
scb_s->icpua = scb_o->icpua;
if (!(atomic_read(&scb_s->cpuflags) & CPUSTAT_SM))
@@ -307,34 +311,39 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
scb_s->ihcpu = scb_o->ihcpu;
/* MVPG and Protection Exception Interpretation are always available */
- scb_s->eca |= scb_o->eca & 0x01002000U;
+ scb_s->eca |= scb_o->eca & (ECA_MVPGI | ECA_PROTEXCI);
/* Host-protection-interruption introduced with ESOP */
if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_ESOP))
- scb_s->ecb |= scb_o->ecb & 0x02U;
+ scb_s->ecb |= scb_o->ecb & ECB_HOSTPROTINT;
/* transactional execution */
if (test_kvm_facility(vcpu->kvm, 73)) {
/* remap the prefix is tx is toggled on */
- if ((scb_o->ecb & 0x10U) && !had_tx)
+ if ((scb_o->ecb & ECB_TE) && !had_tx)
prefix_unmapped(vsie_page);
- scb_s->ecb |= scb_o->ecb & 0x10U;
+ scb_s->ecb |= scb_o->ecb & ECB_TE;
}
/* SIMD */
if (test_kvm_facility(vcpu->kvm, 129)) {
- scb_s->eca |= scb_o->eca & 0x00020000U;
- scb_s->ecd |= scb_o->ecd & 0x20000000U;
+ scb_s->eca |= scb_o->eca & ECA_VX;
+ scb_s->ecd |= scb_o->ecd & ECD_HOSTREGMGMT;
}
/* Run-time-Instrumentation */
if (test_kvm_facility(vcpu->kvm, 64))
- scb_s->ecb3 |= scb_o->ecb3 & 0x01U;
+ scb_s->ecb3 |= scb_o->ecb3 & ECB3_RI;
/* Instruction Execution Prevention */
if (test_kvm_facility(vcpu->kvm, 130))
- scb_s->ecb2 |= scb_o->ecb2 & 0x20U;
+ scb_s->ecb2 |= scb_o->ecb2 & ECB2_IEP;
+ /* Guarded Storage */
+ if (test_kvm_facility(vcpu->kvm, 133)) {
+ scb_s->ecb |= scb_o->ecb & ECB_GS;
+ scb_s->ecd |= scb_o->ecd & ECD_HOSTREGMGMT;
+ }
if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIIF))
- scb_s->eca |= scb_o->eca & 0x00000001U;
+ scb_s->eca |= scb_o->eca & ECA_SII;
if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IB))
- scb_s->eca |= scb_o->eca & 0x40000000U;
+ scb_s->eca |= scb_o->eca & ECA_IB;
if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_CEI))
- scb_s->eca |= scb_o->eca & 0x80000000U;
+ scb_s->eca |= scb_o->eca & ECA_CEI;
prepare_ibc(vcpu, vsie_page);
rc = shadow_crycb(vcpu, vsie_page);
@@ -406,7 +415,7 @@ static int map_prefix(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
prefix += scb_s->mso;
rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap, prefix);
- if (!rc && (scb_s->ecb & 0x10U))
+ if (!rc && (scb_s->ecb & ECB_TE))
rc = kvm_s390_shadow_fault(vcpu, vsie_page->gmap,
prefix + PAGE_SIZE);
/*
@@ -496,6 +505,13 @@ static void unpin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
unpin_guest_page(vcpu->kvm, gpa, hpa);
scb_s->riccbd = 0;
}
+
+ hpa = scb_s->sdnxo;
+ if (hpa) {
+ gpa = scb_o->sdnxo;
+ unpin_guest_page(vcpu->kvm, gpa, hpa);
+ scb_s->sdnxo = 0;
+ }
}
/*
@@ -543,7 +559,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
}
gpa = scb_o->itdba & ~0xffUL;
- if (gpa && (scb_s->ecb & 0x10U)) {
+ if (gpa && (scb_s->ecb & ECB_TE)) {
if (!(gpa & ~0x1fffU)) {
rc = set_validity_icpt(scb_s, 0x0080U);
goto unpin;
@@ -558,8 +574,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
}
gpa = scb_o->gvrd & ~0x1ffUL;
- if (gpa && (scb_s->eca & 0x00020000U) &&
- !(scb_s->ecd & 0x20000000U)) {
+ if (gpa && (scb_s->eca & ECA_VX) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
if (!(gpa & ~0x1fffUL)) {
rc = set_validity_icpt(scb_s, 0x1310U);
goto unpin;
@@ -577,7 +592,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
}
gpa = scb_o->riccbd & ~0x3fUL;
- if (gpa && (scb_s->ecb3 & 0x01U)) {
+ if (gpa && (scb_s->ecb3 & ECB3_RI)) {
if (!(gpa & ~0x1fffUL)) {
rc = set_validity_icpt(scb_s, 0x0043U);
goto unpin;
@@ -591,6 +606,33 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
goto unpin;
scb_s->riccbd = hpa;
}
+ if ((scb_s->ecb & ECB_GS) && !(scb_s->ecd & ECD_HOSTREGMGMT)) {
+ unsigned long sdnxc;
+
+ gpa = scb_o->sdnxo & ~0xfUL;
+ sdnxc = scb_o->sdnxo & 0xfUL;
+ if (!gpa || !(gpa & ~0x1fffUL)) {
+ rc = set_validity_icpt(scb_s, 0x10b0U);
+ goto unpin;
+ }
+ if (sdnxc < 6 || sdnxc > 12) {
+ rc = set_validity_icpt(scb_s, 0x10b1U);
+ goto unpin;
+ }
+ if (gpa & ((1 << sdnxc) - 1)) {
+ rc = set_validity_icpt(scb_s, 0x10b2U);
+ goto unpin;
+ }
+ /* Due to alignment rules (checked above) this cannot
+ * cross page boundaries
+ */
+ rc = pin_guest_page(vcpu->kvm, gpa, &hpa);
+ if (rc == -EINVAL)
+ rc = set_validity_icpt(scb_s, 0x10b0U);
+ if (rc)
+ goto unpin;
+ scb_s->sdnxo = hpa | sdnxc;
+ }
return 0;
unpin:
unpin_blocks(vcpu, vsie_page);
diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c
index 0cf802d..be63fbd 100644
--- a/arch/s390/tools/gen_facilities.c
+++ b/arch/s390/tools/gen_facilities.c
@@ -82,6 +82,7 @@ static struct facility_def facility_defs[] = {
78, /* enhanced-DAT 2 */
130, /* instruction-execution-protection */
131, /* enhanced-SOP 2 and side-effect */
+ 146, /* msa extension 8 */
-1 /* END */
}
},
diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
index de5d572..cd1fa97 100644
--- a/arch/um/include/shared/os.h
+++ b/arch/um/include/shared/os.h
@@ -302,8 +302,8 @@ extern int ignore_sigio_fd(int fd);
extern void maybe_sigio_broken(int fd, int read);
extern void sigio_broken(int fd, int read);
-/* sys-x86_64/prctl.c */
-extern int os_arch_prctl(int pid, int code, unsigned long *addr);
+/* prctl.c */
+extern int os_arch_prctl(int pid, int option, unsigned long *arg2);
/* tty.c */
extern int get_pty(void);
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index 9ba050f..0af59fa 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -390,3 +390,4 @@
381 i386 pkey_alloc sys_pkey_alloc
382 i386 pkey_free sys_pkey_free
383 i386 statx sys_statx
+384 i386 arch_prctl sys_arch_prctl compat_sys_arch_prctl
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index b04bb6d..0fe0044 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -187,6 +187,7 @@
* Reuse free bits when adding new feature flags!
*/
#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */
+#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */
#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
#define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 3e8c287..0559626 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -221,6 +221,9 @@ struct x86_emulate_ops {
void (*get_cpuid)(struct x86_emulate_ctxt *ctxt,
u32 *eax, u32 *ebx, u32 *ecx, u32 *edx);
void (*set_nmi_mask)(struct x86_emulate_ctxt *ctxt, bool masked);
+
+ unsigned (*get_hflags)(struct x86_emulate_ctxt *ctxt);
+ void (*set_hflags)(struct x86_emulate_ctxt *ctxt, unsigned hflags);
};
typedef u32 __attribute__((vector_size(16))) sse128_t;
@@ -290,7 +293,6 @@ struct x86_emulate_ctxt {
/* interruptibility state, as a result of execution of STI or MOV SS */
int interruptibility;
- int emul_flags;
bool perm_ok; /* do not check permissions if true */
bool ud; /* inject an #UD if host doesn't support insn */
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 74ef58c..84c8489 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -43,8 +43,6 @@
#define KVM_PRIVATE_MEM_SLOTS 3
#define KVM_MEM_SLOTS_NUM (KVM_USER_MEM_SLOTS + KVM_PRIVATE_MEM_SLOTS)
-#define KVM_PIO_PAGE_OFFSET 1
-#define KVM_COALESCED_MMIO_PAGE_OFFSET 2
#define KVM_HALT_POLL_NS_DEFAULT 400000
#define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS
@@ -63,10 +61,10 @@
#define KVM_REQ_PMI 19
#define KVM_REQ_SMI 20
#define KVM_REQ_MASTERCLOCK_UPDATE 21
-#define KVM_REQ_MCLOCK_INPROGRESS 22
-#define KVM_REQ_SCAN_IOAPIC 23
+#define KVM_REQ_MCLOCK_INPROGRESS (22 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
+#define KVM_REQ_SCAN_IOAPIC (23 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_GLOBAL_CLOCK_UPDATE 24
-#define KVM_REQ_APIC_PAGE_RELOAD 25
+#define KVM_REQ_APIC_PAGE_RELOAD (25 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
#define KVM_REQ_HV_CRASH 26
#define KVM_REQ_IOAPIC_EOI_EXIT 27
#define KVM_REQ_HV_RESET 28
@@ -343,9 +341,10 @@ struct kvm_mmu {
void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
u64 *spte, const void *pte);
hpa_t root_hpa;
- int root_level;
- int shadow_root_level;
union kvm_mmu_page_role base_role;
+ u8 root_level;
+ u8 shadow_root_level;
+ u8 ept_ad;
bool direct_map;
/*
@@ -612,6 +611,8 @@ struct kvm_vcpu_arch {
unsigned long dr7;
unsigned long eff_db[KVM_NR_DB_REGS];
unsigned long guest_debug_dr7;
+ u64 msr_platform_info;
+ u64 msr_misc_features_enables;
u64 mcg_cap;
u64 mcg_status;
@@ -727,6 +728,7 @@ struct kvm_hv {
enum kvm_irqchip_mode {
KVM_IRQCHIP_NONE,
+ KVM_IRQCHIP_INIT_IN_PROGRESS, /* temporarily set during creation */
KVM_IRQCHIP_KERNEL, /* created with KVM_CREATE_IRQCHIP */
KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */
};
diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h
index d74747b..c4eda79 100644
--- a/arch/x86/include/asm/kvm_page_track.h
+++ b/arch/x86/include/asm/kvm_page_track.h
@@ -46,6 +46,7 @@ struct kvm_page_track_notifier_node {
};
void kvm_page_track_init(struct kvm *kvm);
+void kvm_page_track_cleanup(struct kvm *kvm);
void kvm_page_track_free_memslot(struct kvm_memory_slot *free,
struct kvm_memory_slot *dont);
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index d8b5f8a..673f9ac 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -45,6 +45,8 @@
#define MSR_IA32_PERFCTR1 0x000000c2
#define MSR_FSB_FREQ 0x000000cd
#define MSR_PLATFORM_INFO 0x000000ce
+#define MSR_PLATFORM_INFO_CPUID_FAULT_BIT 31
+#define MSR_PLATFORM_INFO_CPUID_FAULT BIT_ULL(MSR_PLATFORM_INFO_CPUID_FAULT_BIT)
#define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2
#define NHM_C3_AUTO_DEMOTE (1UL << 25)
@@ -127,6 +129,7 @@
/* DEBUGCTLMSR bits (others vary by model): */
#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */
+#define DEBUGCTLMSR_BTF_SHIFT 1
#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */
#define DEBUGCTLMSR_TR (1UL << 6)
#define DEBUGCTLMSR_BTS (1UL << 7)
@@ -552,10 +555,12 @@
#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT 39
#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT)
-/* MISC_FEATURE_ENABLES non-architectural features */
-#define MSR_MISC_FEATURE_ENABLES 0x00000140
+/* MISC_FEATURES_ENABLES non-architectural features */
+#define MSR_MISC_FEATURES_ENABLES 0x00000140
-#define MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT 1
+#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT 0
+#define MSR_MISC_FEATURES_ENABLES_CPUID_FAULT BIT_ULL(MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT)
+#define MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT 1
#define MSR_IA32_TSC_DEADLINE 0x000006E0
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index f385eca..a80c1b3 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -884,6 +884,8 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
extern int get_tsc_mode(unsigned long adr);
extern int set_tsc_mode(unsigned int val);
+DECLARE_PER_CPU(u64, msr_misc_features_shadow);
+
/* Register/unregister a process' MPX related resource */
#define MPX_ENABLE_MANAGEMENT() mpx_enable_management()
#define MPX_DISABLE_MANAGEMENT() mpx_disable_management()
diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h
index 9b9b30b1..8d3964f 100644
--- a/arch/x86/include/asm/proto.h
+++ b/arch/x86/include/asm/proto.h
@@ -9,6 +9,7 @@ void syscall_init(void);
#ifdef CONFIG_X86_64
void entry_SYSCALL_64(void);
+long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2);
#endif
#ifdef CONFIG_X86_32
@@ -30,6 +31,7 @@ void x86_report_nx(void);
extern int reboot_force;
-long do_arch_prctl(struct task_struct *task, int code, unsigned long addr);
+long do_arch_prctl_common(struct task_struct *task, int option,
+ unsigned long cpuid_enabled);
#endif /* _ASM_X86_PROTO_H */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index ad6f5eb0..9fc44b9 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -87,6 +87,7 @@ struct thread_info {
#define TIF_SECCOMP 8 /* secure computing */
#define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */
#define TIF_UPROBE 12 /* breakpointed or singlestepping */
+#define TIF_NOCPUID 15 /* CPUID is not accessible in userland */
#define TIF_NOTSC 16 /* TSC is not accessible in userland */
#define TIF_IA32 17 /* IA32 compatibility process */
#define TIF_NOHZ 19 /* in adaptive nohz mode */
@@ -110,6 +111,7 @@ struct thread_info {
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY)
#define _TIF_UPROBE (1 << TIF_UPROBE)
+#define _TIF_NOCPUID (1 << TIF_NOCPUID)
#define _TIF_NOTSC (1 << TIF_NOTSC)
#define _TIF_IA32 (1 << TIF_IA32)
#define _TIF_NOHZ (1 << TIF_NOHZ)
@@ -138,7 +140,7 @@ struct thread_info {
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW \
- (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
+ (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP)
#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
@@ -239,6 +241,8 @@ static inline int arch_within_stack_frames(const void * const stack,
extern void arch_task_cache_init(void);
extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
extern void arch_release_task_struct(struct task_struct *tsk);
+extern void arch_setup_new_exec(void);
+#define arch_setup_new_exec arch_setup_new_exec
#endif /* !__ASSEMBLY__ */
#endif /* _ASM_X86_THREAD_INFO_H */
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index fc5abff..75d002b 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -110,6 +110,16 @@ static inline void cr4_clear_bits(unsigned long mask)
}
}
+static inline void cr4_toggle_bits(unsigned long mask)
+{
+ unsigned long cr4;
+
+ cr4 = this_cpu_read(cpu_tlbstate.cr4);
+ cr4 ^= mask;
+ this_cpu_write(cpu_tlbstate.cr4, cr4);
+ __write_cr4(cr4);
+}
+
/* Read the CR4 shadow. */
static inline unsigned long cr4_read_shadow(void)
{
diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h
index cc54b70..35cd06f 100644
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -70,8 +70,10 @@
#define SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100
#define SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200
#define SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400
+#define SECONDARY_EXEC_RDRAND 0x00000800
#define SECONDARY_EXEC_ENABLE_INVPCID 0x00001000
#define SECONDARY_EXEC_SHADOW_VMCS 0x00004000
+#define SECONDARY_EXEC_RDSEED 0x00010000
#define SECONDARY_EXEC_ENABLE_PML 0x00020000
#define SECONDARY_EXEC_XSAVES 0x00100000
#define SECONDARY_EXEC_TSC_SCALING 0x02000000
@@ -516,12 +518,14 @@ struct vmx_msr_entry {
#define EPT_VIOLATION_READABLE_BIT 3
#define EPT_VIOLATION_WRITABLE_BIT 4
#define EPT_VIOLATION_EXECUTABLE_BIT 5
+#define EPT_VIOLATION_GVA_TRANSLATED_BIT 8
#define EPT_VIOLATION_ACC_READ (1 << EPT_VIOLATION_ACC_READ_BIT)
#define EPT_VIOLATION_ACC_WRITE (1 << EPT_VIOLATION_ACC_WRITE_BIT)
#define EPT_VIOLATION_ACC_INSTR (1 << EPT_VIOLATION_ACC_INSTR_BIT)
#define EPT_VIOLATION_READABLE (1 << EPT_VIOLATION_READABLE_BIT)
#define EPT_VIOLATION_WRITABLE (1 << EPT_VIOLATION_WRITABLE_BIT)
#define EPT_VIOLATION_EXECUTABLE (1 << EPT_VIOLATION_EXECUTABLE_BIT)
+#define EPT_VIOLATION_GVA_TRANSLATED (1 << EPT_VIOLATION_GVA_TRANSLATED_BIT)
/*
* VM-instruction error numbers
diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
index 739c0c5..c2824d0 100644
--- a/arch/x86/include/uapi/asm/kvm.h
+++ b/arch/x86/include/uapi/asm/kvm.h
@@ -9,6 +9,9 @@
#include <linux/types.h>
#include <linux/ioctl.h>
+#define KVM_PIO_PAGE_OFFSET 1
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 2
+
#define DE_VECTOR 0
#define DB_VECTOR 1
#define BP_VECTOR 3
diff --git a/arch/x86/include/uapi/asm/prctl.h b/arch/x86/include/uapi/asm/prctl.h
index 835aa51..c457655 100644
--- a/arch/x86/include/uapi/asm/prctl.h
+++ b/arch/x86/include/uapi/asm/prctl.h
@@ -1,10 +1,13 @@
#ifndef _ASM_X86_PRCTL_H
#define _ASM_X86_PRCTL_H
-#define ARCH_SET_GS 0x1001
-#define ARCH_SET_FS 0x1002
-#define ARCH_GET_FS 0x1003
-#define ARCH_GET_GS 0x1004
+#define ARCH_SET_GS 0x1001
+#define ARCH_SET_FS 0x1002
+#define ARCH_GET_FS 0x1003
+#define ARCH_GET_GS 0x1004
+
+#define ARCH_GET_CPUID 0x1011
+#define ARCH_SET_CPUID 0x1012
#define ARCH_MAP_VDSO_X32 0x2001
#define ARCH_MAP_VDSO_32 0x2002
diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index 1445865..690a2dc 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -76,7 +76,11 @@
#define EXIT_REASON_WBINVD 54
#define EXIT_REASON_XSETBV 55
#define EXIT_REASON_APIC_WRITE 56
+#define EXIT_REASON_RDRAND 57
#define EXIT_REASON_INVPCID 58
+#define EXIT_REASON_VMFUNC 59
+#define EXIT_REASON_ENCLS 60
+#define EXIT_REASON_RDSEED 61
#define EXIT_REASON_PML_FULL 62
#define EXIT_REASON_XSAVES 63
#define EXIT_REASON_XRSTORS 64
@@ -90,6 +94,7 @@
{ EXIT_REASON_TASK_SWITCH, "TASK_SWITCH" }, \
{ EXIT_REASON_CPUID, "CPUID" }, \
{ EXIT_REASON_HLT, "HLT" }, \
+ { EXIT_REASON_INVD, "INVD" }, \
{ EXIT_REASON_INVLPG, "INVLPG" }, \
{ EXIT_REASON_RDPMC, "RDPMC" }, \
{ EXIT_REASON_RDTSC, "RDTSC" }, \
@@ -108,6 +113,8 @@
{ EXIT_REASON_IO_INSTRUCTION, "IO_INSTRUCTION" }, \
{ EXIT_REASON_MSR_READ, "MSR_READ" }, \
{ EXIT_REASON_MSR_WRITE, "MSR_WRITE" }, \
+ { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \
+ { EXIT_REASON_MSR_LOAD_FAIL, "MSR_LOAD_FAIL" }, \
{ EXIT_REASON_MWAIT_INSTRUCTION, "MWAIT_INSTRUCTION" }, \
{ EXIT_REASON_MONITOR_TRAP_FLAG, "MONITOR_TRAP_FLAG" }, \
{ EXIT_REASON_MONITOR_INSTRUCTION, "MONITOR_INSTRUCTION" }, \
@@ -115,20 +122,24 @@
{ EXIT_REASON_MCE_DURING_VMENTRY, "MCE_DURING_VMENTRY" }, \
{ EXIT_REASON_TPR_BELOW_THRESHOLD, "TPR_BELOW_THRESHOLD" }, \
{ EXIT_REASON_APIC_ACCESS, "APIC_ACCESS" }, \
- { EXIT_REASON_GDTR_IDTR, "GDTR_IDTR" }, \
- { EXIT_REASON_LDTR_TR, "LDTR_TR" }, \
+ { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \
+ { EXIT_REASON_GDTR_IDTR, "GDTR_IDTR" }, \
+ { EXIT_REASON_LDTR_TR, "LDTR_TR" }, \
{ EXIT_REASON_EPT_VIOLATION, "EPT_VIOLATION" }, \
{ EXIT_REASON_EPT_MISCONFIG, "EPT_MISCONFIG" }, \
{ EXIT_REASON_INVEPT, "INVEPT" }, \
+ { EXIT_REASON_RDTSCP, "RDTSCP" }, \
{ EXIT_REASON_PREEMPTION_TIMER, "PREEMPTION_TIMER" }, \
+ { EXIT_REASON_INVVPID, "INVVPID" }, \
{ EXIT_REASON_WBINVD, "WBINVD" }, \
+ { EXIT_REASON_XSETBV, "XSETBV" }, \
{ EXIT_REASON_APIC_WRITE, "APIC_WRITE" }, \
- { EXIT_REASON_EOI_INDUCED, "EOI_INDUCED" }, \
- { EXIT_REASON_INVALID_STATE, "INVALID_STATE" }, \
- { EXIT_REASON_MSR_LOAD_FAIL, "MSR_LOAD_FAIL" }, \
- { EXIT_REASON_INVD, "INVD" }, \
- { EXIT_REASON_INVVPID, "INVVPID" }, \
+ { EXIT_REASON_RDRAND, "RDRAND" }, \
{ EXIT_REASON_INVPCID, "INVPCID" }, \
+ { EXIT_REASON_VMFUNC, "VMFUNC" }, \
+ { EXIT_REASON_ENCLS, "ENCLS" }, \
+ { EXIT_REASON_RDSEED, "RDSEED" }, \
+ { EXIT_REASON_PML_FULL, "PML_FULL" }, \
{ EXIT_REASON_XSAVES, "XSAVES" }, \
{ EXIT_REASON_XRSTORS, "XRSTORS" }
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 0631977..dfa90a3 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -90,16 +90,12 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c)
return;
}
- if (ring3mwait_disabled) {
- msr_clear_bit(MSR_MISC_FEATURE_ENABLES,
- MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT);
+ if (ring3mwait_disabled)
return;
- }
-
- msr_set_bit(MSR_MISC_FEATURE_ENABLES,
- MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT);
set_cpu_cap(c, X86_FEATURE_RING3MWAIT);
+ this_cpu_or(msr_misc_features_shadow,
+ 1UL << MSR_MISC_FEATURES_ENABLES_RING3MWAIT_BIT);
if (c == &boot_cpu_data)
ELF_HWCAP2 |= HWCAP2_RING3MWAIT;
@@ -488,6 +484,34 @@ static void intel_bsp_resume(struct cpuinfo_x86 *c)
init_intel_energy_perf(c);
}
+static void init_cpuid_fault(struct cpuinfo_x86 *c)
+{
+ u64 msr;
+
+ if (!rdmsrl_safe(MSR_PLATFORM_INFO, &msr)) {
+ if (msr & MSR_PLATFORM_INFO_CPUID_FAULT)
+ set_cpu_cap(c, X86_FEATURE_CPUID_FAULT);
+ }
+}
+
+static void init_intel_misc_features(struct cpuinfo_x86 *c)
+{
+ u64 msr;
+
+ if (rdmsrl_safe(MSR_MISC_FEATURES_ENABLES, &msr))
+ return;
+
+ /* Clear all MISC features */
+ this_cpu_write(msr_misc_features_shadow, 0);
+
+ /* Check features and update capabilities and shadow control bits */
+ init_cpuid_fault(c);
+ probe_xeon_phi_r3mwait(c);
+
+ msr = this_cpu_read(msr_misc_features_shadow);
+ wrmsrl(MSR_MISC_FEATURES_ENABLES, msr);
+}
+
static void init_intel(struct cpuinfo_x86 *c)
{
unsigned int l2 = 0;
@@ -602,7 +626,7 @@ static void init_intel(struct cpuinfo_x86 *c)
init_intel_energy_perf(c);
- probe_xeon_phi_r3mwait(c);
+ init_intel_misc_features(c);
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 14f65a5..da5c097 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -396,9 +396,9 @@ static u64 kvm_steal_clock(int cpu)
src = &per_cpu(steal_time, cpu);
do {
version = src->version;
- rmb();
+ virt_rmb();
steal = src->steal;
- rmb();
+ virt_rmb();
} while ((version & 1) || (version != src->version));
return steal;
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index f675915..0bb8842 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -37,6 +37,7 @@
#include <asm/vm86.h>
#include <asm/switch_to.h>
#include <asm/desc.h>
+#include <asm/prctl.h>
/*
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -124,11 +125,6 @@ void flush_thread(void)
fpu__clear(&tsk->thread.fpu);
}
-static void hard_disable_TSC(void)
-{
- cr4_set_bits(X86_CR4_TSD);
-}
-
void disable_TSC(void)
{
preempt_disable();
@@ -137,15 +133,10 @@ void disable_TSC(void)
* Must flip the CPU state synchronously with
* TIF_NOTSC in the current running context.
*/
- hard_disable_TSC();
+ cr4_set_bits(X86_CR4_TSD);
preempt_enable();
}
-static void hard_enable_TSC(void)
-{
- cr4_clear_bits(X86_CR4_TSD);
-}
-
static void enable_TSC(void)
{
preempt_disable();
@@ -154,7 +145,7 @@ static void enable_TSC(void)
* Must flip the CPU state synchronously with
* TIF_NOTSC in the current running context.
*/
- hard_enable_TSC();
+ cr4_clear_bits(X86_CR4_TSD);
preempt_enable();
}
@@ -182,54 +173,129 @@ int set_tsc_mode(unsigned int val)
return 0;
}
-void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
- struct tss_struct *tss)
-{
- struct thread_struct *prev, *next;
-
- prev = &prev_p->thread;
- next = &next_p->thread;
+DEFINE_PER_CPU(u64, msr_misc_features_shadow);
- if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^
- test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) {
- unsigned long debugctl = get_debugctlmsr();
+static void set_cpuid_faulting(bool on)
+{
+ u64 msrval;
- debugctl &= ~DEBUGCTLMSR_BTF;
- if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP))
- debugctl |= DEBUGCTLMSR_BTF;
+ msrval = this_cpu_read(msr_misc_features_shadow);
+ msrval &= ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT;
+ msrval |= (on << MSR_MISC_FEATURES_ENABLES_CPUID_FAULT_BIT);
+ this_cpu_write(msr_misc_features_shadow, msrval);
+ wrmsrl(MSR_MISC_FEATURES_ENABLES, msrval);
+}
- update_debugctlmsr(debugctl);
+static void disable_cpuid(void)
+{
+ preempt_disable();
+ if (!test_and_set_thread_flag(TIF_NOCPUID)) {
+ /*
+ * Must flip the CPU state synchronously with
+ * TIF_NOCPUID in the current running context.
+ */
+ set_cpuid_faulting(true);
}
+ preempt_enable();
+}
- if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
- test_tsk_thread_flag(next_p, TIF_NOTSC)) {
- /* prev and next are different */
- if (test_tsk_thread_flag(next_p, TIF_NOTSC))
- hard_disable_TSC();
- else
- hard_enable_TSC();
+static void enable_cpuid(void)
+{
+ preempt_disable();
+ if (test_and_clear_thread_flag(TIF_NOCPUID)) {
+ /*
+ * Must flip the CPU state synchronously with
+ * TIF_NOCPUID in the current running context.
+ */
+ set_cpuid_faulting(false);
}
+ preempt_enable();
+}
+
+static int get_cpuid_mode(void)
+{
+ return !test_thread_flag(TIF_NOCPUID);
+}
+
+static int set_cpuid_mode(struct task_struct *task, unsigned long cpuid_enabled)
+{
+ if (!static_cpu_has(X86_FEATURE_CPUID_FAULT))
+ return -ENODEV;
+
+ if (cpuid_enabled)
+ enable_cpuid();
+ else
+ disable_cpuid();
+
+ return 0;
+}
+
+/*
+ * Called immediately after a successful exec.
+ */
+void arch_setup_new_exec(void)
+{
+ /* If cpuid was previously disabled for this task, re-enable it. */
+ if (test_thread_flag(TIF_NOCPUID))
+ enable_cpuid();
+}
- if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
+static inline void switch_to_bitmap(struct tss_struct *tss,
+ struct thread_struct *prev,
+ struct thread_struct *next,
+ unsigned long tifp, unsigned long tifn)
+{
+ if (tifn & _TIF_IO_BITMAP) {
/*
* Copy the relevant range of the IO bitmap.
* Normally this is 128 bytes or less:
*/
memcpy(tss->io_bitmap, next->io_bitmap_ptr,
max(prev->io_bitmap_max, next->io_bitmap_max));
-
/*
* Make sure that the TSS limit is correct for the CPU
* to notice the IO bitmap.
*/
refresh_tss_limit();
- } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
+ } else if (tifp & _TIF_IO_BITMAP) {
/*
* Clear any possible leftover bits:
*/
memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
}
+}
+
+void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
+ struct tss_struct *tss)
+{
+ struct thread_struct *prev, *next;
+ unsigned long tifp, tifn;
+
+ prev = &prev_p->thread;
+ next = &next_p->thread;
+
+ tifn = READ_ONCE(task_thread_info(next_p)->flags);
+ tifp = READ_ONCE(task_thread_info(prev_p)->flags);
+ switch_to_bitmap(tss, prev, next, tifp, tifn);
+
propagate_user_return_notify(prev_p, next_p);
+
+ if ((tifp & _TIF_BLOCKSTEP || tifn & _TIF_BLOCKSTEP) &&
+ arch_has_block_step()) {
+ unsigned long debugctl, msk;
+
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+ debugctl &= ~DEBUGCTLMSR_BTF;
+ msk = tifn & _TIF_BLOCKSTEP;
+ debugctl |= (msk >> TIF_BLOCKSTEP) << DEBUGCTLMSR_BTF_SHIFT;
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+ }
+
+ if ((tifp ^ tifn) & _TIF_NOTSC)
+ cr4_toggle_bits(X86_CR4_TSD);
+
+ if ((tifp ^ tifn) & _TIF_NOCPUID)
+ set_cpuid_faulting(!!(tifn & _TIF_NOCPUID));
}
/*
@@ -550,3 +616,16 @@ out:
put_task_stack(p);
return ret;
}
+
+long do_arch_prctl_common(struct task_struct *task, int option,
+ unsigned long cpuid_enabled)
+{
+ switch (option) {
+ case ARCH_GET_CPUID:
+ return get_cpuid_mode();
+ case ARCH_SET_CPUID:
+ return set_cpuid_mode(task, cpuid_enabled);
+ }
+
+ return -EINVAL;
+}
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 4c818f8..ff40e74 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -37,6 +37,7 @@
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/kdebug.h>
+#include <linux/syscalls.h>
#include <asm/pgtable.h>
#include <asm/ldt.h>
@@ -56,6 +57,7 @@
#include <asm/switch_to.h>
#include <asm/vm86.h>
#include <asm/intel_rdt.h>
+#include <asm/proto.h>
void __show_regs(struct pt_regs *regs, int all)
{
@@ -304,3 +306,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
return prev_p;
}
+
+SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
+{
+ return do_arch_prctl_common(current, option, arg2);
+}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index d6b784a..ea1a618 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -37,6 +37,7 @@
#include <linux/uaccess.h>
#include <linux/io.h>
#include <linux/ftrace.h>
+#include <linux/syscalls.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
@@ -204,7 +205,7 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
(struct user_desc __user *)tls, 0);
else
#endif
- err = do_arch_prctl(p, ARCH_SET_FS, tls);
+ err = do_arch_prctl_64(p, ARCH_SET_FS, tls);
if (err)
goto out;
}
@@ -547,70 +548,72 @@ static long prctl_map_vdso(const struct vdso_image *image, unsigned long addr)
}
#endif
-long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
+long do_arch_prctl_64(struct task_struct *task, int option, unsigned long arg2)
{
int ret = 0;
int doit = task == current;
int cpu;
- switch (code) {
+ switch (option) {
case ARCH_SET_GS:
- if (addr >= TASK_SIZE_MAX)
+ if (arg2 >= TASK_SIZE_MAX)
return -EPERM;
cpu = get_cpu();
task->thread.gsindex = 0;
- task->thread.gsbase = addr;
+ task->thread.gsbase = arg2;
if (doit) {
load_gs_index(0);
- ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, addr);
+ ret = wrmsrl_safe(MSR_KERNEL_GS_BASE, arg2);
}
put_cpu();
break;
case ARCH_SET_FS:
/* Not strictly needed for fs, but do it for symmetry
with gs */
- if (addr >= TASK_SIZE_MAX)
+ if (arg2 >= TASK_SIZE_MAX)
return -EPERM;
cpu = get_cpu();
task->thread.fsindex = 0;
- task->thread.fsbase = addr;
+ task->thread.fsbase = arg2;
if (doit) {
/* set the selector to 0 to not confuse __switch_to */
loadsegment(fs, 0);
- ret = wrmsrl_safe(MSR_FS_BASE, addr);
+ ret = wrmsrl_safe(MSR_FS_BASE, arg2);
}
put_cpu();
break;
case ARCH_GET_FS: {
unsigned long base;
+
if (doit)
rdmsrl(MSR_FS_BASE, base);
else
base = task->thread.fsbase;
- ret = put_user(base, (unsigned long __user *)addr);
+ ret = put_user(base, (unsigned long __user *)arg2);
break;
}
case ARCH_GET_GS: {
unsigned long base;
+
if (doit)
rdmsrl(MSR_KERNEL_GS_BASE, base);
else
base = task->thread.gsbase;
- ret = put_user(base, (unsigned long __user *)addr);
+ ret = put_user(base, (unsigned long __user *)arg2);
break;
}
#ifdef CONFIG_CHECKPOINT_RESTORE
# ifdef CONFIG_X86_X32_ABI
case ARCH_MAP_VDSO_X32:
- return prctl_map_vdso(&vdso_image_x32, addr);
+ return prctl_map_vdso(&vdso_image_x32, arg2);
# endif
# if defined CONFIG_X86_32 || defined CONFIG_IA32_EMULATION
case ARCH_MAP_VDSO_32:
- return prctl_map_vdso(&vdso_image_32, addr);
+ return prctl_map_vdso(&vdso_image_32, arg2);
# endif
case ARCH_MAP_VDSO_64:
- return prctl_map_vdso(&vdso_image_64, addr);
+ return prctl_map_vdso(&vdso_image_64, arg2);
#endif
default:
@@ -621,10 +624,23 @@ long do_arch_prctl(struct task_struct *task, int code, unsigned long addr)
return ret;
}
-long sys_arch_prctl(int code, unsigned long addr)
+SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
+{
+ long ret;
+
+ ret = do_arch_prctl_64(current, option, arg2);
+ if (ret == -EINVAL)
+ ret = do_arch_prctl_common(current, option, arg2);
+
+ return ret;
+}
+
+#ifdef CONFIG_IA32_EMULATION
+COMPAT_SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
{
- return do_arch_prctl(current, code, addr);
+ return do_arch_prctl_common(current, option, arg2);
}
+#endif
unsigned long KSTK_ESP(struct task_struct *task)
{
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 2364b23..f37d181 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -396,12 +396,12 @@ static int putreg(struct task_struct *child,
if (value >= TASK_SIZE_MAX)
return -EIO;
/*
- * When changing the segment base, use do_arch_prctl
+ * When changing the segment base, use do_arch_prctl_64
* to set either thread.fs or thread.fsindex and the
* corresponding GDT slot.
*/
if (child->thread.fsbase != value)
- return do_arch_prctl(child, ARCH_SET_FS, value);
+ return do_arch_prctl_64(child, ARCH_SET_FS, value);
return 0;
case offsetof(struct user_regs_struct,gs_base):
/*
@@ -410,7 +410,7 @@ static int putreg(struct task_struct *child,
if (value >= TASK_SIZE_MAX)
return -EIO;
if (child->thread.gsbase != value)
- return do_arch_prctl(child, ARCH_SET_GS, value);
+ return do_arch_prctl_64(child, ARCH_SET_GS, value);
return 0;
#endif
}
@@ -869,7 +869,7 @@ long arch_ptrace(struct task_struct *child, long request,
Works just like arch_prctl, except that the arguments
are reversed. */
case PTRACE_ARCH_PRCTL:
- ret = do_arch_prctl(child, data, addr);
+ ret = do_arch_prctl_64(child, data, addr);
break;
#endif
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index ab8e32f..760433b 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -86,18 +86,6 @@ config KVM_MMU_AUDIT
This option adds a R/W kVM module parameter 'mmu_audit', which allows
auditing of KVM MMU events at runtime.
-config KVM_DEVICE_ASSIGNMENT
- bool "KVM legacy PCI device assignment support (DEPRECATED)"
- depends on KVM && PCI && IOMMU_API
- default n
- ---help---
- Provide support for legacy PCI device assignment through KVM. The
- kernel now also supports a full featured userspace device driver
- framework through VFIO, which supersedes this support and provides
- better security.
-
- If unsure, say N.
-
# OK, it's a little counter-intuitive to do this, but it puts it neatly under
# the virtualization menu.
source drivers/vhost/Kconfig
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 3bff207..09d4b17 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -15,8 +15,6 @@ kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
hyperv.o page_track.o debugfs.o
-kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT) += assigned-dev.o iommu.o
-
kvm-intel-y += vmx.o pmu_intel.o
kvm-amd-y += svm.o pmu_amd.o
diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c
deleted file mode 100644
index 308b859..0000000
--- a/arch/x86/kvm/assigned-dev.c
+++ /dev/null
@@ -1,1058 +0,0 @@
-/*
- * Kernel-based Virtual Machine - device assignment support
- *
- * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates.
- *
- * This work is licensed under the terms of the GNU GPL, version 2. See
- * the COPYING file in the top-level directory.
- *
- */
-
-#include <linux/kvm_host.h>
-#include <linux/kvm.h>
-#include <linux/uaccess.h>
-#include <linux/vmalloc.h>
-#include <linux/errno.h>
-#include <linux/spinlock.h>
-#include <linux/pci.h>
-#include <linux/interrupt.h>
-#include <linux/slab.h>
-#include <linux/namei.h>
-#include <linux/fs.h>
-#include "irq.h"
-#include "assigned-dev.h"
-#include "trace/events/kvm.h"
-
-struct kvm_assigned_dev_kernel {
- struct kvm_irq_ack_notifier ack_notifier;
- struct list_head list;
- int assigned_dev_id;
- int host_segnr;
- int host_busnr;
- int host_devfn;
- unsigned int entries_nr;
- int host_irq;
- bool host_irq_disabled;
- bool pci_2_3;
- struct msix_entry *host_msix_entries;
- int guest_irq;
- struct msix_entry *guest_msix_entries;
- unsigned long irq_requested_type;
- int irq_source_id;
- int flags;
- struct pci_dev *dev;
- struct kvm *kvm;
- spinlock_t intx_lock;
- spinlock_t intx_mask_lock;
- char irq_name[32];
- struct pci_saved_state *pci_saved_state;
-};
-
-static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
- int assigned_dev_id)
-{
- struct kvm_assigned_dev_kernel *match;
-
- list_for_each_entry(match, head, list) {
- if (match->assigned_dev_id == assigned_dev_id)
- return match;
- }
- return NULL;
-}
-
-static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
- *assigned_dev, int irq)
-{
- int i, index;
- struct msix_entry *host_msix_entries;
-
- host_msix_entries = assigned_dev->host_msix_entries;
-
- index = -1;
- for (i = 0; i < assigned_dev->entries_nr; i++)
- if (irq == host_msix_entries[i].vector) {
- index = i;
- break;
- }
- if (index < 0)
- printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
-
- return index;
-}
-
-static irqreturn_t kvm_assigned_dev_intx(int irq, void *dev_id)
-{
- struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
- int ret;
-
- spin_lock(&assigned_dev->intx_lock);
- if (pci_check_and_mask_intx(assigned_dev->dev)) {
- assigned_dev->host_irq_disabled = true;
- ret = IRQ_WAKE_THREAD;
- } else
- ret = IRQ_NONE;
- spin_unlock(&assigned_dev->intx_lock);
-
- return ret;
-}
-
-static void
-kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev,
- int vector)
-{
- if (unlikely(assigned_dev->irq_requested_type &
- KVM_DEV_IRQ_GUEST_INTX)) {
- spin_lock(&assigned_dev->intx_mask_lock);
- if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX))
- kvm_set_irq(assigned_dev->kvm,
- assigned_dev->irq_source_id, vector, 1,
- false);
- spin_unlock(&assigned_dev->intx_mask_lock);
- } else
- kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
- vector, 1, false);
-}
-
-static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id)
-{
- struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
-
- if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
- spin_lock_irq(&assigned_dev->intx_lock);
- disable_irq_nosync(irq);
- assigned_dev->host_irq_disabled = true;
- spin_unlock_irq(&assigned_dev->intx_lock);
- }
-
- kvm_assigned_dev_raise_guest_irq(assigned_dev,
- assigned_dev->guest_irq);
-
- return IRQ_HANDLED;
-}
-
-/*
- * Deliver an IRQ in an atomic context if we can, or return a failure,
- * user can retry in a process context.
- * Return value:
- * -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context.
- * Other values - No need to retry.
- */
-static int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq,
- int level)
-{
- struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
- struct kvm_kernel_irq_routing_entry *e;
- int ret = -EINVAL;
- int idx;
-
- trace_kvm_set_irq(irq, level, irq_source_id);
-
- /*
- * Injection into either PIC or IOAPIC might need to scan all CPUs,
- * which would need to be retried from thread context; when same GSI
- * is connected to both PIC and IOAPIC, we'd have to report a
- * partial failure here.
- * Since there's no easy way to do this, we only support injecting MSI
- * which is limited to 1:1 GSI mapping.
- */
- idx = srcu_read_lock(&kvm->irq_srcu);
- if (kvm_irq_map_gsi(kvm, entries, irq) > 0) {
- e = &entries[0];
- ret = kvm_arch_set_irq_inatomic(e, kvm, irq_source_id,
- irq, level);
- }
- srcu_read_unlock(&kvm->irq_srcu, idx);
- return ret;
-}
-
-
-static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id)
-{
- struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
- int ret = kvm_set_irq_inatomic(assigned_dev->kvm,
- assigned_dev->irq_source_id,
- assigned_dev->guest_irq, 1);
- return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
-}
-
-static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id)
-{
- struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
-
- kvm_assigned_dev_raise_guest_irq(assigned_dev,
- assigned_dev->guest_irq);
-
- return IRQ_HANDLED;
-}
-
-static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id)
-{
- struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
- int index = find_index_from_host_irq(assigned_dev, irq);
- u32 vector;
- int ret = 0;
-
- if (index >= 0) {
- vector = assigned_dev->guest_msix_entries[index].vector;
- ret = kvm_set_irq_inatomic(assigned_dev->kvm,
- assigned_dev->irq_source_id,
- vector, 1);
- }
-
- return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
-}
-
-static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id)
-{
- struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
- int index = find_index_from_host_irq(assigned_dev, irq);
- u32 vector;
-
- if (index >= 0) {
- vector = assigned_dev->guest_msix_entries[index].vector;
- kvm_assigned_dev_raise_guest_irq(assigned_dev, vector);
- }
-
- return IRQ_HANDLED;
-}
-
-/* Ack the irq line for an assigned device */
-static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
-{
- struct kvm_assigned_dev_kernel *dev =
- container_of(kian, struct kvm_assigned_dev_kernel,
- ack_notifier);
-
- kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0, false);
-
- spin_lock(&dev->intx_mask_lock);
-
- if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) {
- bool reassert = false;
-
- spin_lock_irq(&dev->intx_lock);
- /*
- * The guest IRQ may be shared so this ack can come from an
- * IRQ for another guest device.
- */
- if (dev->host_irq_disabled) {
- if (!(dev->flags & KVM_DEV_ASSIGN_PCI_2_3))
- enable_irq(dev->host_irq);
- else if (!pci_check_and_unmask_intx(dev->dev))
- reassert = true;
- dev->host_irq_disabled = reassert;
- }
- spin_unlock_irq(&dev->intx_lock);
-
- if (reassert)
- kvm_set_irq(dev->kvm, dev->irq_source_id,
- dev->guest_irq, 1, false);
- }
-
- spin_unlock(&dev->intx_mask_lock);
-}
-
-static void deassign_guest_irq(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *assigned_dev)
-{
- if (assigned_dev->ack_notifier.gsi != -1)
- kvm_unregister_irq_ack_notifier(kvm,
- &assigned_dev->ack_notifier);
-
- kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
- assigned_dev->guest_irq, 0, false);
-
- if (assigned_dev->irq_source_id != -1)
- kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
- assigned_dev->irq_source_id = -1;
- assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
-}
-
-/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
-static void deassign_host_irq(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *assigned_dev)
-{
- /*
- * We disable irq here to prevent further events.
- *
- * Notice this maybe result in nested disable if the interrupt type is
- * INTx, but it's OK for we are going to free it.
- *
- * If this function is a part of VM destroy, please ensure that till
- * now, the kvm state is still legal for probably we also have to wait
- * on a currently running IRQ handler.
- */
- if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
- int i;
- for (i = 0; i < assigned_dev->entries_nr; i++)
- disable_irq(assigned_dev->host_msix_entries[i].vector);
-
- for (i = 0; i < assigned_dev->entries_nr; i++)
- free_irq(assigned_dev->host_msix_entries[i].vector,
- assigned_dev);
-
- assigned_dev->entries_nr = 0;
- kfree(assigned_dev->host_msix_entries);
- kfree(assigned_dev->guest_msix_entries);
- pci_disable_msix(assigned_dev->dev);
- } else {
- /* Deal with MSI and INTx */
- if ((assigned_dev->irq_requested_type &
- KVM_DEV_IRQ_HOST_INTX) &&
- (assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
- spin_lock_irq(&assigned_dev->intx_lock);
- pci_intx(assigned_dev->dev, false);
- spin_unlock_irq(&assigned_dev->intx_lock);
- synchronize_irq(assigned_dev->host_irq);
- } else
- disable_irq(assigned_dev->host_irq);
-
- free_irq(assigned_dev->host_irq, assigned_dev);
-
- if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
- pci_disable_msi(assigned_dev->dev);
- }
-
- assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK);
-}
-
-static int kvm_deassign_irq(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *assigned_dev,
- unsigned long irq_requested_type)
-{
- unsigned long guest_irq_type, host_irq_type;
-
- if (!irqchip_in_kernel(kvm))
- return -EINVAL;
- /* no irq assignment to deassign */
- if (!assigned_dev->irq_requested_type)
- return -ENXIO;
-
- host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK;
- guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK;
-
- if (host_irq_type)
- deassign_host_irq(kvm, assigned_dev);
- if (guest_irq_type)
- deassign_guest_irq(kvm, assigned_dev);
-
- return 0;
-}
-
-static void kvm_free_assigned_irq(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *assigned_dev)
-{
- kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
-}
-
-static void kvm_free_assigned_device(struct kvm *kvm,
- struct kvm_assigned_dev_kernel
- *assigned_dev)
-{
- kvm_free_assigned_irq(kvm, assigned_dev);
-
- pci_reset_function(assigned_dev->dev);
- if (pci_load_and_free_saved_state(assigned_dev->dev,
- &assigned_dev->pci_saved_state))
- printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
- __func__, dev_name(&assigned_dev->dev->dev));
- else
- pci_restore_state(assigned_dev->dev);
-
- pci_clear_dev_assigned(assigned_dev->dev);
-
- pci_release_regions(assigned_dev->dev);
- pci_disable_device(assigned_dev->dev);
- pci_dev_put(assigned_dev->dev);
-
- list_del(&assigned_dev->list);
- kfree(assigned_dev);
-}
-
-void kvm_free_all_assigned_devices(struct kvm *kvm)
-{
- struct kvm_assigned_dev_kernel *assigned_dev, *tmp;
-
- list_for_each_entry_safe(assigned_dev, tmp,
- &kvm->arch.assigned_dev_head, list) {
- kvm_free_assigned_device(kvm, assigned_dev);
- }
-}
-
-static int assigned_device_enable_host_intx(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *dev)
-{
- irq_handler_t irq_handler;
- unsigned long flags;
-
- dev->host_irq = dev->dev->irq;
-
- /*
- * We can only share the IRQ line with other host devices if we are
- * able to disable the IRQ source at device-level - independently of
- * the guest driver. Otherwise host devices may suffer from unbounded
- * IRQ latencies when the guest keeps the line asserted.
- */
- if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
- irq_handler = kvm_assigned_dev_intx;
- flags = IRQF_SHARED;
- } else {
- irq_handler = NULL;
- flags = IRQF_ONESHOT;
- }
- if (request_threaded_irq(dev->host_irq, irq_handler,
- kvm_assigned_dev_thread_intx, flags,
- dev->irq_name, dev))
- return -EIO;
-
- if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
- spin_lock_irq(&dev->intx_lock);
- pci_intx(dev->dev, true);
- spin_unlock_irq(&dev->intx_lock);
- }
- return 0;
-}
-
-static int assigned_device_enable_host_msi(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *dev)
-{
- int r;
-
- if (!dev->dev->msi_enabled) {
- r = pci_enable_msi(dev->dev);
- if (r)
- return r;
- }
-
- dev->host_irq = dev->dev->irq;
- if (request_threaded_irq(dev->host_irq, kvm_assigned_dev_msi,
- kvm_assigned_dev_thread_msi, 0,
- dev->irq_name, dev)) {
- pci_disable_msi(dev->dev);
- return -EIO;
- }
-
- return 0;
-}
-
-static int assigned_device_enable_host_msix(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *dev)
-{
- int i, r = -EINVAL;
-
- /* host_msix_entries and guest_msix_entries should have been
- * initialized */
- if (dev->entries_nr == 0)
- return r;
-
- r = pci_enable_msix_exact(dev->dev,
- dev->host_msix_entries, dev->entries_nr);
- if (r)
- return r;
-
- for (i = 0; i < dev->entries_nr; i++) {
- r = request_threaded_irq(dev->host_msix_entries[i].vector,
- kvm_assigned_dev_msix,
- kvm_assigned_dev_thread_msix,
- 0, dev->irq_name, dev);
- if (r)
- goto err;
- }
-
- return 0;
-err:
- for (i -= 1; i >= 0; i--)
- free_irq(dev->host_msix_entries[i].vector, dev);
- pci_disable_msix(dev->dev);
- return r;
-}
-
-static int assigned_device_enable_guest_intx(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *dev,
- struct kvm_assigned_irq *irq)
-{
- dev->guest_irq = irq->guest_irq;
- dev->ack_notifier.gsi = irq->guest_irq;
- return 0;
-}
-
-static int assigned_device_enable_guest_msi(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *dev,
- struct kvm_assigned_irq *irq)
-{
- dev->guest_irq = irq->guest_irq;
- dev->ack_notifier.gsi = -1;
- return 0;
-}
-
-static int assigned_device_enable_guest_msix(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *dev,
- struct kvm_assigned_irq *irq)
-{
- dev->guest_irq = irq->guest_irq;
- dev->ack_notifier.gsi = -1;
- return 0;
-}
-
-static int assign_host_irq(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *dev,
- __u32 host_irq_type)
-{
- int r = -EEXIST;
-
- if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
- return r;
-
- snprintf(dev->irq_name, sizeof(dev->irq_name), "kvm:%s",
- pci_name(dev->dev));
-
- switch (host_irq_type) {
- case KVM_DEV_IRQ_HOST_INTX:
- r = assigned_device_enable_host_intx(kvm, dev);
- break;
- case KVM_DEV_IRQ_HOST_MSI:
- r = assigned_device_enable_host_msi(kvm, dev);
- break;
- case KVM_DEV_IRQ_HOST_MSIX:
- r = assigned_device_enable_host_msix(kvm, dev);
- break;
- default:
- r = -EINVAL;
- }
- dev->host_irq_disabled = false;
-
- if (!r)
- dev->irq_requested_type |= host_irq_type;
-
- return r;
-}
-
-static int assign_guest_irq(struct kvm *kvm,
- struct kvm_assigned_dev_kernel *dev,
- struct kvm_assigned_irq *irq,
- unsigned long guest_irq_type)
-{
- int id;
- int r = -EEXIST;
-
- if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK)
- return r;
-
- id = kvm_request_irq_source_id(kvm);
- if (id < 0)
- return id;
-
- dev->irq_source_id = id;
-
- switch (guest_irq_type) {
- case KVM_DEV_IRQ_GUEST_INTX:
- r = assigned_device_enable_guest_intx(kvm, dev, irq);
- break;
- case KVM_DEV_IRQ_GUEST_MSI:
- r = assigned_device_enable_guest_msi(kvm, dev, irq);
- break;
- case KVM_DEV_IRQ_GUEST_MSIX:
- r = assigned_device_enable_guest_msix(kvm, dev, irq);
- break;
- default:
- r = -EINVAL;
- }
-
- if (!r) {
- dev->irq_requested_type |= guest_irq_type;
- if (dev->ack_notifier.gsi != -1)
- kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
- } else {
- kvm_free_irq_source_id(kvm, dev->irq_source_id);
- dev->irq_source_id = -1;
- }
-
- return r;
-}
-
-/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
-static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
- struct kvm_assigned_irq *assigned_irq)
-{
- int r = -EINVAL;
- struct kvm_assigned_dev_kernel *match;
- unsigned long host_irq_type, guest_irq_type;
-
- if (!irqchip_in_kernel(kvm))
- return r;
-
- mutex_lock(&kvm->lock);
- r = -ENODEV;
- match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
- assigned_irq->assigned_dev_id);
- if (!match)
- goto out;
-
- host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK);
- guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK);
-
- r = -EINVAL;
- /* can only assign one type at a time */
- if (hweight_long(host_irq_type) > 1)
- goto out;
- if (hweight_long(guest_irq_type) > 1)
- goto out;
- if (host_irq_type == 0 && guest_irq_type == 0)
- goto out;
-
- r = 0;
- if (host_irq_type)
- r = assign_host_irq(kvm, match, host_irq_type);
- if (r)
- goto out;
-
- if (guest_irq_type)
- r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type);
-out:
- mutex_unlock(&kvm->lock);
- return r;
-}
-
-static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
- struct kvm_assigned_irq
- *assigned_irq)
-{
- int r = -ENODEV;
- struct kvm_assigned_dev_kernel *match;
- unsigned long irq_type;
-
- mutex_lock(&kvm->lock);
-
- match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
- assigned_irq->assigned_dev_id);
- if (!match)
- goto out;
-
- irq_type = assigned_irq->flags & (KVM_DEV_IRQ_HOST_MASK |
- KVM_DEV_IRQ_GUEST_MASK);
- r = kvm_deassign_irq(kvm, match, irq_type);
-out:
- mutex_unlock(&kvm->lock);
- return r;
-}
-
-/*
- * We want to test whether the caller has been granted permissions to
- * use this device. To be able to configure and control the device,
- * the user needs access to PCI configuration space and BAR resources.
- * These are accessed through PCI sysfs. PCI config space is often
- * passed to the process calling this ioctl via file descriptor, so we
- * can't rely on access to that file. We can check for permissions
- * on each of the BAR resource files, which is a pretty clear
- * indicator that the user has been granted access to the device.
- */
-static int probe_sysfs_permissions(struct pci_dev *dev)
-{
-#ifdef CONFIG_SYSFS
- int i;
- bool bar_found = false;
-
- for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) {
- char *kpath, *syspath;
- struct path path;
- struct inode *inode;
- int r;
-
- if (!pci_resource_len(dev, i))
- continue;
-
- kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL);
- if (!kpath)
- return -ENOMEM;
-
- /* Per sysfs-rules, sysfs is always at /sys */
- syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i);
- kfree(kpath);
- if (!syspath)
- return -ENOMEM;
-
- r = kern_path(syspath, LOOKUP_FOLLOW, &path);
- kfree(syspath);
- if (r)
- return r;
-
- inode = d_backing_inode(path.dentry);
-
- r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS);
- path_put(&path);
- if (r)
- return r;
-
- bar_found = true;
- }
-
- /* If no resources, probably something special */
- if (!bar_found)
- return -EPERM;
-
- return 0;
-#else
- return -EINVAL; /* No way to control the device without sysfs */
-#endif
-}
-
-static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
- struct kvm_assigned_pci_dev *assigned_dev)
-{
- int r = 0, idx;
- struct kvm_assigned_dev_kernel *match;
- struct pci_dev *dev;
-
- if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU))
- return -EINVAL;
-
- mutex_lock(&kvm->lock);
- idx = srcu_read_lock(&kvm->srcu);
-
- match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
- assigned_dev->assigned_dev_id);
- if (match) {
- /* device already assigned */
- r = -EEXIST;
- goto out;
- }
-
- match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
- if (match == NULL) {
- printk(KERN_INFO "%s: Couldn't allocate memory\n",
- __func__);
- r = -ENOMEM;
- goto out;
- }
- dev = pci_get_domain_bus_and_slot(assigned_dev->segnr,
- assigned_dev->busnr,
- assigned_dev->devfn);
- if (!dev) {
- printk(KERN_INFO "%s: host device not found\n", __func__);
- r = -EINVAL;
- goto out_free;
- }
-
- /* Don't allow bridges to be assigned */
- if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) {
- r = -EPERM;
- goto out_put;
- }
-
- r = probe_sysfs_permissions(dev);
- if (r)
- goto out_put;
-
- if (pci_enable_device(dev)) {
- printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
- r = -EBUSY;
- goto out_put;
- }
- r = pci_request_regions(dev, "kvm_assigned_device");
- if (r) {
- printk(KERN_INFO "%s: Could not get access to device regions\n",
- __func__);
- goto out_disable;
- }
-
- pci_reset_function(dev);
- pci_save_state(dev);
- match->pci_saved_state = pci_store_saved_state(dev);
- if (!match->pci_saved_state)
- printk(KERN_DEBUG "%s: Couldn't store %s saved state\n",
- __func__, dev_name(&dev->dev));
-
- if (!pci_intx_mask_supported(dev))
- assigned_dev->flags &= ~KVM_DEV_ASSIGN_PCI_2_3;
-
- match->assigned_dev_id = assigned_dev->assigned_dev_id;
- match->host_segnr = assigned_dev->segnr;
- match->host_busnr = assigned_dev->busnr;
- match->host_devfn = assigned_dev->devfn;
- match->flags = assigned_dev->flags;
- match->dev = dev;
- spin_lock_init(&match->intx_lock);
- spin_lock_init(&match->intx_mask_lock);
- match->irq_source_id = -1;
- match->kvm = kvm;
- match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
-
- list_add(&match->list, &kvm->arch.assigned_dev_head);
-
- if (!kvm->arch.iommu_domain) {
- r = kvm_iommu_map_guest(kvm);
- if (r)
- goto out_list_del;
- }
- r = kvm_assign_device(kvm, match->dev);
- if (r)
- goto out_list_del;
-
-out:
- srcu_read_unlock(&kvm->srcu, idx);
- mutex_unlock(&kvm->lock);
- return r;
-out_list_del:
- if (pci_load_and_free_saved_state(dev, &match->pci_saved_state))
- printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
- __func__, dev_name(&dev->dev));
- list_del(&match->list);
- pci_release_regions(dev);
-out_disable:
- pci_disable_device(dev);
-out_put:
- pci_dev_put(dev);
-out_free:
- kfree(match);
- srcu_read_unlock(&kvm->srcu, idx);
- mutex_unlock(&kvm->lock);
- return r;
-}
-
-static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
- struct kvm_assigned_pci_dev *assigned_dev)
-{
- int r = 0;
- struct kvm_assigned_dev_kernel *match;
-
- mutex_lock(&kvm->lock);
-
- match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
- assigned_dev->assigned_dev_id);
- if (!match) {
- printk(KERN_INFO "%s: device hasn't been assigned before, "
- "so cannot be deassigned\n", __func__);
- r = -EINVAL;
- goto out;
- }
-
- kvm_deassign_device(kvm, match->dev);
-
- kvm_free_assigned_device(kvm, match);
-
-out:
- mutex_unlock(&kvm->lock);
- return r;
-}
-
-
-static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
- struct kvm_assigned_msix_nr *entry_nr)
-{
- int r = 0;
- struct kvm_assigned_dev_kernel *adev;
-
- mutex_lock(&kvm->lock);
-
- adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
- entry_nr->assigned_dev_id);
- if (!adev) {
- r = -EINVAL;
- goto msix_nr_out;
- }
-
- if (adev->entries_nr == 0) {
- adev->entries_nr = entry_nr->entry_nr;
- if (adev->entries_nr == 0 ||
- adev->entries_nr > KVM_MAX_MSIX_PER_DEV) {
- r = -EINVAL;
- goto msix_nr_out;
- }
-
- adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
- entry_nr->entry_nr,
- GFP_KERNEL);
- if (!adev->host_msix_entries) {
- r = -ENOMEM;
- goto msix_nr_out;
- }
- adev->guest_msix_entries =
- kzalloc(sizeof(struct msix_entry) * entry_nr->entry_nr,
- GFP_KERNEL);
- if (!adev->guest_msix_entries) {
- kfree(adev->host_msix_entries);
- r = -ENOMEM;
- goto msix_nr_out;
- }
- } else /* Not allowed set MSI-X number twice */
- r = -EINVAL;
-msix_nr_out:
- mutex_unlock(&kvm->lock);
- return r;
-}
-
-static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
- struct kvm_assigned_msix_entry *entry)
-{
- int r = 0, i;
- struct kvm_assigned_dev_kernel *adev;
-
- mutex_lock(&kvm->lock);
-
- adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
- entry->assigned_dev_id);
-
- if (!adev) {
- r = -EINVAL;
- goto msix_entry_out;
- }
-
- for (i = 0; i < adev->entries_nr; i++)
- if (adev->guest_msix_entries[i].vector == 0 ||
- adev->guest_msix_entries[i].entry == entry->entry) {
- adev->guest_msix_entries[i].entry = entry->entry;
- adev->guest_msix_entries[i].vector = entry->gsi;
- adev->host_msix_entries[i].entry = entry->entry;
- break;
- }
- if (i == adev->entries_nr) {
- r = -ENOSPC;
- goto msix_entry_out;
- }
-
-msix_entry_out:
- mutex_unlock(&kvm->lock);
-
- return r;
-}
-
-static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm,
- struct kvm_assigned_pci_dev *assigned_dev)
-{
- int r = 0;
- struct kvm_assigned_dev_kernel *match;
-
- mutex_lock(&kvm->lock);
-
- match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
- assigned_dev->assigned_dev_id);
- if (!match) {
- r = -ENODEV;
- goto out;
- }
-
- spin_lock(&match->intx_mask_lock);
-
- match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX;
- match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX;
-
- if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) {
- if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) {
- kvm_set_irq(match->kvm, match->irq_source_id,
- match->guest_irq, 0, false);
- /*
- * Masking at hardware-level is performed on demand,
- * i.e. when an IRQ actually arrives at the host.
- */
- } else if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
- /*
- * Unmask the IRQ line if required. Unmasking at
- * device level will be performed by user space.
- */
- spin_lock_irq(&match->intx_lock);
- if (match->host_irq_disabled) {
- enable_irq(match->host_irq);
- match->host_irq_disabled = false;
- }
- spin_unlock_irq(&match->intx_lock);
- }
- }
-
- spin_unlock(&match->intx_mask_lock);
-
-out:
- mutex_unlock(&kvm->lock);
- return r;
-}
-
-long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
- unsigned long arg)
-{
- void __user *argp = (void __user *)arg;
- int r;
-
- switch (ioctl) {
- case KVM_ASSIGN_PCI_DEVICE: {
- struct kvm_assigned_pci_dev assigned_dev;
-
- r = -EFAULT;
- if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
- goto out;
- r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
- if (r)
- goto out;
- break;
- }
- case KVM_ASSIGN_IRQ: {
- r = -EOPNOTSUPP;
- break;
- }
- case KVM_ASSIGN_DEV_IRQ: {
- struct kvm_assigned_irq assigned_irq;
-
- r = -EFAULT;
- if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
- goto out;
- r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
- if (r)
- goto out;
- break;
- }
- case KVM_DEASSIGN_DEV_IRQ: {
- struct kvm_assigned_irq assigned_irq;
-
- r = -EFAULT;
- if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
- goto out;
- r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq);
- if (r)
- goto out;
- break;
- }
- case KVM_DEASSIGN_PCI_DEVICE: {
- struct kvm_assigned_pci_dev assigned_dev;
-
- r = -EFAULT;
- if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
- goto out;
- r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev);
- if (r)
- goto out;
- break;
- }
- case KVM_ASSIGN_SET_MSIX_NR: {
- struct kvm_assigned_msix_nr entry_nr;
- r = -EFAULT;
- if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
- goto out;
- r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
- if (r)
- goto out;
- break;
- }
- case KVM_ASSIGN_SET_MSIX_ENTRY: {
- struct kvm_assigned_msix_entry entry;
- r = -EFAULT;
- if (copy_from_user(&entry, argp, sizeof entry))
- goto out;
- r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
- if (r)
- goto out;
- break;
- }
- case KVM_ASSIGN_SET_INTX_MASK: {
- struct kvm_assigned_pci_dev assigned_dev;
-
- r = -EFAULT;
- if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
- goto out;
- r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev);
- break;
- }
- default:
- r = -ENOTTY;
- break;
- }
-out:
- return r;
-}
diff --git a/arch/x86/kvm/assigned-dev.h b/arch/x86/kvm/assigned-dev.h
deleted file mode 100644
index a428c1a..0000000
--- a/arch/x86/kvm/assigned-dev.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef ARCH_X86_KVM_ASSIGNED_DEV_H
-#define ARCH_X86_KVM_ASSIGNED_DEV_H
-
-#include <linux/kvm_host.h>
-
-#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
-int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev);
-int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev);
-
-int kvm_iommu_map_guest(struct kvm *kvm);
-int kvm_iommu_unmap_guest(struct kvm *kvm);
-
-long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
- unsigned long arg);
-
-void kvm_free_all_assigned_devices(struct kvm *kvm);
-#else
-static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
-{
- return 0;
-}
-
-static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
- unsigned long arg)
-{
- return -ENOTTY;
-}
-
-static inline void kvm_free_all_assigned_devices(struct kvm *kvm) {}
-#endif /* CONFIG_KVM_DEVICE_ASSIGNMENT */
-
-#endif /* ARCH_X86_KVM_ASSIGNED_DEV_H */
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index efde6cc..a181ae7 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -876,6 +876,9 @@ int kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
{
u32 eax, ebx, ecx, edx;
+ if (cpuid_fault_enabled(vcpu) && !kvm_require_cpl(vcpu, 0))
+ return 1;
+
eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
kvm_cpuid(vcpu, &eax, &ebx, &ecx, &edx);
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index 35058c2..a6fd40a 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -205,4 +205,15 @@ static inline int guest_cpuid_stepping(struct kvm_vcpu *vcpu)
return x86_stepping(best->eax);
}
+static inline bool supports_cpuid_fault(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.msr_platform_info & MSR_PLATFORM_INFO_CPUID_FAULT;
+}
+
+static inline bool cpuid_fault_enabled(struct kvm_vcpu *vcpu)
+{
+ return vcpu->arch.msr_misc_features_enables &
+ MSR_MISC_FEATURES_ENABLES_CPUID_FAULT;
+}
+
#endif
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 45c7306..c25cfaf 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2547,7 +2547,7 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
u64 smbase;
int ret;
- if ((ctxt->emul_flags & X86EMUL_SMM_MASK) == 0)
+ if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_MASK) == 0)
return emulate_ud(ctxt);
/*
@@ -2596,11 +2596,11 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt)
return X86EMUL_UNHANDLEABLE;
}
- if ((ctxt->emul_flags & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
+ if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_INSIDE_NMI_MASK) == 0)
ctxt->ops->set_nmi_mask(ctxt, false);
- ctxt->emul_flags &= ~X86EMUL_SMM_INSIDE_NMI_MASK;
- ctxt->emul_flags &= ~X86EMUL_SMM_MASK;
+ ctxt->ops->set_hflags(ctxt, ctxt->ops->get_hflags(ctxt) &
+ ~(X86EMUL_SMM_INSIDE_NMI_MASK | X86EMUL_SMM_MASK));
return X86EMUL_CONTINUE;
}
@@ -3854,6 +3854,13 @@ static int em_sti(struct x86_emulate_ctxt *ctxt)
static int em_cpuid(struct x86_emulate_ctxt *ctxt)
{
u32 eax, ebx, ecx, edx;
+ u64 msr = 0;
+
+ ctxt->ops->get_msr(ctxt, MSR_MISC_FEATURES_ENABLES, &msr);
+ if (msr & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT &&
+ ctxt->ops->cpl(ctxt)) {
+ return emulate_gp(ctxt, 0);
+ }
eax = reg_read(ctxt, VCPU_REGS_RAX);
ecx = reg_read(ctxt, VCPU_REGS_RCX);
@@ -5316,6 +5323,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
const struct x86_emulate_ops *ops = ctxt->ops;
int rc = X86EMUL_CONTINUE;
int saved_dst_type = ctxt->dst.type;
+ unsigned emul_flags;
ctxt->mem_read.pos = 0;
@@ -5330,6 +5338,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
goto done;
}
+ emul_flags = ctxt->ops->get_hflags(ctxt);
if (unlikely(ctxt->d &
(No64|Undefined|Sse|Mmx|Intercept|CheckPerm|Priv|Prot|String))) {
if ((ctxt->mode == X86EMUL_MODE_PROT64 && (ctxt->d & No64)) ||
@@ -5363,7 +5372,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
fetch_possible_mmx_operand(ctxt, &ctxt->dst);
}
- if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && ctxt->intercept) {
+ if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && ctxt->intercept) {
rc = emulator_check_intercept(ctxt, ctxt->intercept,
X86_ICPT_PRE_EXCEPT);
if (rc != X86EMUL_CONTINUE)
@@ -5392,7 +5401,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
goto done;
}
- if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
+ if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
rc = emulator_check_intercept(ctxt, ctxt->intercept,
X86_ICPT_POST_EXCEPT);
if (rc != X86EMUL_CONTINUE)
@@ -5446,7 +5455,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt)
special_insn:
- if (unlikely(ctxt->emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
+ if (unlikely(emul_flags & X86EMUL_GUEST_MASK) && (ctxt->d & Intercept)) {
rc = emulator_check_intercept(ctxt, ctxt->intercept,
X86_ICPT_POST_MEMACCESS);
if (rc != X86EMUL_CONTINUE)
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 73ea24d..bdcd413 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -49,7 +49,7 @@ static void pic_unlock(struct kvm_pic *s)
__releases(&s->lock)
{
bool wakeup = s->wakeup_needed;
- struct kvm_vcpu *vcpu, *found = NULL;
+ struct kvm_vcpu *vcpu;
int i;
s->wakeup_needed = false;
@@ -59,16 +59,11 @@ static void pic_unlock(struct kvm_pic *s)
if (wakeup) {
kvm_for_each_vcpu(i, vcpu, s->kvm) {
if (kvm_apic_accept_pic_intr(vcpu)) {
- found = vcpu;
- break;
+ kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_vcpu_kick(vcpu);
+ return;
}
}
-
- if (!found)
- return;
-
- kvm_make_request(KVM_REQ_EVENT, found);
- kvm_vcpu_kick(found);
}
}
@@ -239,7 +234,7 @@ static inline void pic_intack(struct kvm_kpic_state *s, int irq)
int kvm_pic_read_irq(struct kvm *kvm)
{
int irq, irq2, intno;
- struct kvm_pic *s = pic_irqchip(kvm);
+ struct kvm_pic *s = kvm->arch.vpic;
s->output = 0;
@@ -273,7 +268,7 @@ int kvm_pic_read_irq(struct kvm *kvm)
return intno;
}
-void kvm_pic_reset(struct kvm_kpic_state *s)
+static void kvm_pic_reset(struct kvm_kpic_state *s)
{
int irq, i;
struct kvm_vcpu *vcpu;
@@ -422,19 +417,16 @@ static u32 pic_poll_read(struct kvm_kpic_state *s, u32 addr1)
return ret;
}
-static u32 pic_ioport_read(void *opaque, u32 addr1)
+static u32 pic_ioport_read(void *opaque, u32 addr)
{
struct kvm_kpic_state *s = opaque;
- unsigned int addr;
int ret;
- addr = addr1;
- addr &= 1;
if (s->poll) {
- ret = pic_poll_read(s, addr1);
+ ret = pic_poll_read(s, addr);
s->poll = 0;
} else
- if (addr == 0)
+ if ((addr & 1) == 0)
if (s->read_reg_select)
ret = s->isr;
else
@@ -456,76 +448,64 @@ static u32 elcr_ioport_read(void *opaque, u32 addr1)
return s->elcr;
}
-static int picdev_in_range(gpa_t addr)
-{
- switch (addr) {
- case 0x20:
- case 0x21:
- case 0xa0:
- case 0xa1:
- case 0x4d0:
- case 0x4d1:
- return 1;
- default:
- return 0;
- }
-}
-
static int picdev_write(struct kvm_pic *s,
gpa_t addr, int len, const void *val)
{
unsigned char data = *(unsigned char *)val;
- if (!picdev_in_range(addr))
- return -EOPNOTSUPP;
if (len != 1) {
pr_pic_unimpl("non byte write\n");
return 0;
}
- pic_lock(s);
switch (addr) {
case 0x20:
case 0x21:
case 0xa0:
case 0xa1:
+ pic_lock(s);
pic_ioport_write(&s->pics[addr >> 7], addr, data);
+ pic_unlock(s);
break;
case 0x4d0:
case 0x4d1:
+ pic_lock(s);
elcr_ioport_write(&s->pics[addr & 1], addr, data);
+ pic_unlock(s);
break;
+ default:
+ return -EOPNOTSUPP;
}
- pic_unlock(s);
return 0;
}
static int picdev_read(struct kvm_pic *s,
gpa_t addr, int len, void *val)
{
- unsigned char data = 0;
- if (!picdev_in_range(addr))
- return -EOPNOTSUPP;
+ unsigned char *data = (unsigned char *)val;
if (len != 1) {
memset(val, 0, len);
pr_pic_unimpl("non byte read\n");
return 0;
}
- pic_lock(s);
switch (addr) {
case 0x20:
case 0x21:
case 0xa0:
case 0xa1:
- data = pic_ioport_read(&s->pics[addr >> 7], addr);
+ pic_lock(s);
+ *data = pic_ioport_read(&s->pics[addr >> 7], addr);
+ pic_unlock(s);
break;
case 0x4d0:
case 0x4d1:
- data = elcr_ioport_read(&s->pics[addr & 1], addr);
+ pic_lock(s);
+ *data = elcr_ioport_read(&s->pics[addr & 1], addr);
+ pic_unlock(s);
break;
+ default:
+ return -EOPNOTSUPP;
}
- *(unsigned char *)val = data;
- pic_unlock(s);
return 0;
}
@@ -576,7 +556,7 @@ static int picdev_eclr_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev,
*/
static void pic_irq_request(struct kvm *kvm, int level)
{
- struct kvm_pic *s = pic_irqchip(kvm);
+ struct kvm_pic *s = kvm->arch.vpic;
if (!s->output)
s->wakeup_needed = true;
@@ -657,9 +637,14 @@ void kvm_pic_destroy(struct kvm *kvm)
{
struct kvm_pic *vpic = kvm->arch.vpic;
+ if (!vpic)
+ return;
+
+ mutex_lock(&kvm->slots_lock);
kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_master);
kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_slave);
kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_eclr);
+ mutex_unlock(&kvm->slots_lock);
kvm->arch.vpic = NULL;
kfree(vpic);
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 6e219e5..bdff437 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -266,11 +266,9 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, ulong *ioapic_handled_vectors)
spin_unlock(&ioapic->lock);
}
-void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
+void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm)
{
- struct kvm_ioapic *ioapic = kvm->arch.vioapic;
-
- if (!ioapic)
+ if (!ioapic_in_kernel(kvm))
return;
kvm_make_scan_ioapic_request(kvm);
}
@@ -315,7 +313,7 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG
&& ioapic->irr & (1 << index))
ioapic_service(ioapic, index, false);
- kvm_vcpu_request_scan_ioapic(ioapic->kvm);
+ kvm_make_scan_ioapic_request(ioapic->kvm);
break;
}
}
@@ -624,10 +622,8 @@ int kvm_ioapic_init(struct kvm *kvm)
if (ret < 0) {
kvm->arch.vioapic = NULL;
kfree(ioapic);
- return ret;
}
- kvm_vcpu_request_scan_ioapic(kvm);
return ret;
}
@@ -635,37 +631,36 @@ void kvm_ioapic_destroy(struct kvm *kvm)
{
struct kvm_ioapic *ioapic = kvm->arch.vioapic;
+ if (!ioapic)
+ return;
+
cancel_delayed_work_sync(&ioapic->eoi_inject);
+ mutex_lock(&kvm->slots_lock);
kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
+ mutex_unlock(&kvm->slots_lock);
kvm->arch.vioapic = NULL;
kfree(ioapic);
}
-int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
+void kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
{
- struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
- if (!ioapic)
- return -EINVAL;
+ struct kvm_ioapic *ioapic = kvm->arch.vioapic;
spin_lock(&ioapic->lock);
memcpy(state, ioapic, sizeof(struct kvm_ioapic_state));
state->irr &= ~ioapic->irr_delivered;
spin_unlock(&ioapic->lock);
- return 0;
}
-int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
+void kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
{
- struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
- if (!ioapic)
- return -EINVAL;
+ struct kvm_ioapic *ioapic = kvm->arch.vioapic;
spin_lock(&ioapic->lock);
memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
ioapic->irr = 0;
ioapic->irr_delivered = 0;
- kvm_vcpu_request_scan_ioapic(kvm);
+ kvm_make_scan_ioapic_request(kvm);
kvm_ioapic_inject_all(ioapic, state->irr);
spin_unlock(&ioapic->lock);
- return 0;
}
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index 1cc6e54..29ce197 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -105,17 +105,13 @@ do { \
#define ASSERT(x) do { } while (0)
#endif
-static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
-{
- return kvm->arch.vioapic;
-}
-
static inline int ioapic_in_kernel(struct kvm *kvm)
{
- int ret;
+ int mode = kvm->arch.irqchip_mode;
- ret = (ioapic_irqchip(kvm) != NULL);
- return ret;
+ /* Matches smp_wmb() when setting irqchip_mode */
+ smp_rmb();
+ return mode == KVM_IRQCHIP_KERNEL;
}
void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
@@ -132,8 +128,8 @@ void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id);
int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
struct kvm_lapic_irq *irq,
struct dest_map *dest_map);
-int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
-int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
+void kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
+void kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu,
ulong *ioapic_handled_vectors);
void kvm_scan_ioapic_routes(struct kvm_vcpu *vcpu,
diff --git a/arch/x86/kvm/iommu.c b/arch/x86/kvm/iommu.c
deleted file mode 100644
index b181426..0000000
--- a/arch/x86/kvm/iommu.c
+++ /dev/null
@@ -1,356 +0,0 @@
-/*
- * Copyright (c) 2006, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Copyright (C) 2006-2008 Intel Corporation
- * Copyright IBM Corporation, 2008
- * Copyright 2010 Red Hat, Inc. and/or its affiliates.
- *
- * Author: Allen M. Kay <allen.m.kay@intel.com>
- * Author: Weidong Han <weidong.han@intel.com>
- * Author: Ben-Ami Yassour <benami@il.ibm.com>
- */
-
-#include <linux/list.h>
-#include <linux/kvm_host.h>
-#include <linux/moduleparam.h>
-#include <linux/pci.h>
-#include <linux/stat.h>
-#include <linux/iommu.h>
-#include "assigned-dev.h"
-
-static bool allow_unsafe_assigned_interrupts;
-module_param_named(allow_unsafe_assigned_interrupts,
- allow_unsafe_assigned_interrupts, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(allow_unsafe_assigned_interrupts,
- "Enable device assignment on platforms without interrupt remapping support.");
-
-static int kvm_iommu_unmap_memslots(struct kvm *kvm);
-static void kvm_iommu_put_pages(struct kvm *kvm,
- gfn_t base_gfn, unsigned long npages);
-
-static kvm_pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn,
- unsigned long npages)
-{
- gfn_t end_gfn;
- kvm_pfn_t pfn;
-
- pfn = gfn_to_pfn_memslot(slot, gfn);
- end_gfn = gfn + npages;
- gfn += 1;
-
- if (is_error_noslot_pfn(pfn))
- return pfn;
-
- while (gfn < end_gfn)
- gfn_to_pfn_memslot(slot, gfn++);
-
- return pfn;
-}
-
-static void kvm_unpin_pages(struct kvm *kvm, kvm_pfn_t pfn,
- unsigned long npages)
-{
- unsigned long i;
-
- for (i = 0; i < npages; ++i)
- kvm_release_pfn_clean(pfn + i);
-}
-
-int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
-{
- gfn_t gfn, end_gfn;
- kvm_pfn_t pfn;
- int r = 0;
- struct iommu_domain *domain = kvm->arch.iommu_domain;
- int flags;
-
- /* check if iommu exists and in use */
- if (!domain)
- return 0;
-
- gfn = slot->base_gfn;
- end_gfn = gfn + slot->npages;
-
- flags = IOMMU_READ;
- if (!(slot->flags & KVM_MEM_READONLY))
- flags |= IOMMU_WRITE;
- if (!kvm->arch.iommu_noncoherent)
- flags |= IOMMU_CACHE;
-
-
- while (gfn < end_gfn) {
- unsigned long page_size;
-
- /* Check if already mapped */
- if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) {
- gfn += 1;
- continue;
- }
-
- /* Get the page size we could use to map */
- page_size = kvm_host_page_size(kvm, gfn);
-
- /* Make sure the page_size does not exceed the memslot */
- while ((gfn + (page_size >> PAGE_SHIFT)) > end_gfn)
- page_size >>= 1;
-
- /* Make sure gfn is aligned to the page size we want to map */
- while ((gfn << PAGE_SHIFT) & (page_size - 1))
- page_size >>= 1;
-
- /* Make sure hva is aligned to the page size we want to map */
- while (__gfn_to_hva_memslot(slot, gfn) & (page_size - 1))
- page_size >>= 1;
-
- /*
- * Pin all pages we are about to map in memory. This is
- * important because we unmap and unpin in 4kb steps later.
- */
- pfn = kvm_pin_pages(slot, gfn, page_size >> PAGE_SHIFT);
- if (is_error_noslot_pfn(pfn)) {
- gfn += 1;
- continue;
- }
-
- /* Map into IO address space */
- r = iommu_map(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn),
- page_size, flags);
- if (r) {
- printk(KERN_ERR "kvm_iommu_map_address:"
- "iommu failed to map pfn=%llx\n", pfn);
- kvm_unpin_pages(kvm, pfn, page_size >> PAGE_SHIFT);
- goto unmap_pages;
- }
-
- gfn += page_size >> PAGE_SHIFT;
-
- cond_resched();
- }
-
- return 0;
-
-unmap_pages:
- kvm_iommu_put_pages(kvm, slot->base_gfn, gfn - slot->base_gfn);
- return r;
-}
-
-static int kvm_iommu_map_memslots(struct kvm *kvm)
-{
- int idx, r = 0;
- struct kvm_memslots *slots;
- struct kvm_memory_slot *memslot;
-
- if (kvm->arch.iommu_noncoherent)
- kvm_arch_register_noncoherent_dma(kvm);
-
- idx = srcu_read_lock(&kvm->srcu);
- slots = kvm_memslots(kvm);
-
- kvm_for_each_memslot(memslot, slots) {
- r = kvm_iommu_map_pages(kvm, memslot);
- if (r)
- break;
- }
- srcu_read_unlock(&kvm->srcu, idx);
-
- return r;
-}
-
-int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev)
-{
- struct iommu_domain *domain = kvm->arch.iommu_domain;
- int r;
- bool noncoherent;
-
- /* check if iommu exists and in use */
- if (!domain)
- return 0;
-
- if (pdev == NULL)
- return -ENODEV;
-
- r = iommu_attach_device(domain, &pdev->dev);
- if (r) {
- dev_err(&pdev->dev, "kvm assign device failed ret %d", r);
- return r;
- }
-
- noncoherent = !iommu_capable(&pci_bus_type, IOMMU_CAP_CACHE_COHERENCY);
-
- /* Check if need to update IOMMU page table for guest memory */
- if (noncoherent != kvm->arch.iommu_noncoherent) {
- kvm_iommu_unmap_memslots(kvm);
- kvm->arch.iommu_noncoherent = noncoherent;
- r = kvm_iommu_map_memslots(kvm);
- if (r)
- goto out_unmap;
- }
-
- kvm_arch_start_assignment(kvm);
- pci_set_dev_assigned(pdev);
-
- dev_info(&pdev->dev, "kvm assign device\n");
-
- return 0;
-out_unmap:
- kvm_iommu_unmap_memslots(kvm);
- return r;
-}
-
-int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev)
-{
- struct iommu_domain *domain = kvm->arch.iommu_domain;
-
- /* check if iommu exists and in use */
- if (!domain)
- return 0;
-
- if (pdev == NULL)
- return -ENODEV;
-
- iommu_detach_device(domain, &pdev->dev);
-
- pci_clear_dev_assigned(pdev);
- kvm_arch_end_assignment(kvm);
-
- dev_info(&pdev->dev, "kvm deassign device\n");
-
- return 0;
-}
-
-int kvm_iommu_map_guest(struct kvm *kvm)
-{
- int r;
-
- if (!iommu_present(&pci_bus_type)) {
- printk(KERN_ERR "%s: iommu not found\n", __func__);
- return -ENODEV;
- }
-
- mutex_lock(&kvm->slots_lock);
-
- kvm->arch.iommu_domain = iommu_domain_alloc(&pci_bus_type);
- if (!kvm->arch.iommu_domain) {
- r = -ENOMEM;
- goto out_unlock;
- }
-
- if (!allow_unsafe_assigned_interrupts &&
- !iommu_capable(&pci_bus_type, IOMMU_CAP_INTR_REMAP)) {
- printk(KERN_WARNING "%s: No interrupt remapping support,"
- " disallowing device assignment."
- " Re-enable with \"allow_unsafe_assigned_interrupts=1\""
- " module option.\n", __func__);
- iommu_domain_free(kvm->arch.iommu_domain);
- kvm->arch.iommu_domain = NULL;
- r = -EPERM;
- goto out_unlock;
- }
-
- r = kvm_iommu_map_memslots(kvm);
- if (r)
- kvm_iommu_unmap_memslots(kvm);
-
-out_unlock:
- mutex_unlock(&kvm->slots_lock);
- return r;
-}
-
-static void kvm_iommu_put_pages(struct kvm *kvm,
- gfn_t base_gfn, unsigned long npages)
-{
- struct iommu_domain *domain;
- gfn_t end_gfn, gfn;
- kvm_pfn_t pfn;
- u64 phys;
-
- domain = kvm->arch.iommu_domain;
- end_gfn = base_gfn + npages;
- gfn = base_gfn;
-
- /* check if iommu exists and in use */
- if (!domain)
- return;
-
- while (gfn < end_gfn) {
- unsigned long unmap_pages;
- size_t size;
-
- /* Get physical address */
- phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn));
-
- if (!phys) {
- gfn++;
- continue;
- }
-
- pfn = phys >> PAGE_SHIFT;
-
- /* Unmap address from IO address space */
- size = iommu_unmap(domain, gfn_to_gpa(gfn), PAGE_SIZE);
- unmap_pages = 1ULL << get_order(size);
-
- /* Unpin all pages we just unmapped to not leak any memory */
- kvm_unpin_pages(kvm, pfn, unmap_pages);
-
- gfn += unmap_pages;
-
- cond_resched();
- }
-}
-
-void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
-{
- kvm_iommu_put_pages(kvm, slot->base_gfn, slot->npages);
-}
-
-static int kvm_iommu_unmap_memslots(struct kvm *kvm)
-{
- int idx;
- struct kvm_memslots *slots;
- struct kvm_memory_slot *memslot;
-
- idx = srcu_read_lock(&kvm->srcu);
- slots = kvm_memslots(kvm);
-
- kvm_for_each_memslot(memslot, slots)
- kvm_iommu_unmap_pages(kvm, memslot);
-
- srcu_read_unlock(&kvm->srcu, idx);
-
- if (kvm->arch.iommu_noncoherent)
- kvm_arch_unregister_noncoherent_dma(kvm);
-
- return 0;
-}
-
-int kvm_iommu_unmap_guest(struct kvm *kvm)
-{
- struct iommu_domain *domain = kvm->arch.iommu_domain;
-
- /* check if iommu exists and in use */
- if (!domain)
- return 0;
-
- mutex_lock(&kvm->slots_lock);
- kvm_iommu_unmap_memslots(kvm);
- kvm->arch.iommu_domain = NULL;
- kvm->arch.iommu_noncoherent = false;
- mutex_unlock(&kvm->slots_lock);
-
- iommu_domain_free(domain);
- return 0;
-}
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 60d91c9..5c24811 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -60,7 +60,7 @@ static int kvm_cpu_has_extint(struct kvm_vcpu *v)
if (irqchip_split(v->kvm))
return pending_userspace_extint(v);
else
- return pic_irqchip(v->kvm)->output;
+ return v->kvm->arch.vpic->output;
} else
return 0;
}
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 40d5b2c..0edd22c 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -78,40 +78,42 @@ void kvm_pic_destroy(struct kvm *kvm);
int kvm_pic_read_irq(struct kvm *kvm);
void kvm_pic_update_irq(struct kvm_pic *s);
-static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
-{
- return kvm->arch.vpic;
-}
-
static inline int pic_in_kernel(struct kvm *kvm)
{
- int ret;
+ int mode = kvm->arch.irqchip_mode;
- ret = (pic_irqchip(kvm) != NULL);
- return ret;
+ /* Matches smp_wmb() when setting irqchip_mode */
+ smp_rmb();
+ return mode == KVM_IRQCHIP_KERNEL;
}
static inline int irqchip_split(struct kvm *kvm)
{
- return kvm->arch.irqchip_mode == KVM_IRQCHIP_SPLIT;
+ int mode = kvm->arch.irqchip_mode;
+
+ /* Matches smp_wmb() when setting irqchip_mode */
+ smp_rmb();
+ return mode == KVM_IRQCHIP_SPLIT;
}
static inline int irqchip_kernel(struct kvm *kvm)
{
- return kvm->arch.irqchip_mode == KVM_IRQCHIP_KERNEL;
+ int mode = kvm->arch.irqchip_mode;
+
+ /* Matches smp_wmb() when setting irqchip_mode */
+ smp_rmb();
+ return mode == KVM_IRQCHIP_KERNEL;
}
static inline int irqchip_in_kernel(struct kvm *kvm)
{
- bool ret = kvm->arch.irqchip_mode != KVM_IRQCHIP_NONE;
+ int mode = kvm->arch.irqchip_mode;
- /* Matches with wmb after initializing kvm->irq_routing. */
+ /* Matches smp_wmb() when setting irqchip_mode */
smp_rmb();
- return ret;
+ return mode > KVM_IRQCHIP_INIT_IN_PROGRESS;
}
-void kvm_pic_reset(struct kvm_kpic_state *s);
-
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
index 6825cd3..4517a4c 100644
--- a/arch/x86/kvm/irq_comm.c
+++ b/arch/x86/kvm/irq_comm.c
@@ -42,7 +42,7 @@ static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
struct kvm *kvm, int irq_source_id, int level,
bool line_status)
{
- struct kvm_pic *pic = pic_irqchip(kvm);
+ struct kvm_pic *pic = kvm->arch.vpic;
return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level);
}
@@ -232,11 +232,11 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
goto unlock;
}
clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
- if (!ioapic_in_kernel(kvm))
+ if (!irqchip_kernel(kvm))
goto unlock;
kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id);
- kvm_pic_clear_all(pic_irqchip(kvm), irq_source_id);
+ kvm_pic_clear_all(kvm->arch.vpic, irq_source_id);
unlock:
mutex_unlock(&kvm->irq_lock);
}
@@ -278,38 +278,35 @@ int kvm_set_routing_entry(struct kvm *kvm,
struct kvm_kernel_irq_routing_entry *e,
const struct kvm_irq_routing_entry *ue)
{
- int r = -EINVAL;
- int delta;
- unsigned max_pin;
+ /* also allow creation of routes during KVM_IRQCHIP_INIT_IN_PROGRESS */
+ if (kvm->arch.irqchip_mode == KVM_IRQCHIP_NONE)
+ return -EINVAL;
+ /* Matches smp_wmb() when setting irqchip_mode */
+ smp_rmb();
switch (ue->type) {
case KVM_IRQ_ROUTING_IRQCHIP:
- delta = 0;
+ if (irqchip_split(kvm))
+ return -EINVAL;
+ e->irqchip.pin = ue->u.irqchip.pin;
switch (ue->u.irqchip.irqchip) {
case KVM_IRQCHIP_PIC_SLAVE:
- delta = 8;
+ e->irqchip.pin += PIC_NUM_PINS / 2;
/* fall through */
case KVM_IRQCHIP_PIC_MASTER:
- if (!pic_in_kernel(kvm))
- goto out;
-
+ if (ue->u.irqchip.pin >= PIC_NUM_PINS / 2)
+ return -EINVAL;
e->set = kvm_set_pic_irq;
- max_pin = PIC_NUM_PINS;
break;
case KVM_IRQCHIP_IOAPIC:
- if (!ioapic_in_kernel(kvm))
- goto out;
-
- max_pin = KVM_IOAPIC_NUM_PINS;
+ if (ue->u.irqchip.pin >= KVM_IOAPIC_NUM_PINS)
+ return -EINVAL;
e->set = kvm_set_ioapic_irq;
break;
default:
- goto out;
+ return -EINVAL;
}
e->irqchip.irqchip = ue->u.irqchip.irqchip;
- e->irqchip.pin = ue->u.irqchip.pin + delta;
- if (e->irqchip.pin >= max_pin)
- goto out;
break;
case KVM_IRQ_ROUTING_MSI:
e->set = kvm_set_msi;
@@ -318,7 +315,7 @@ int kvm_set_routing_entry(struct kvm *kvm,
e->msi.data = ue->u.msi.data;
if (kvm_msi_route_invalid(kvm, e))
- goto out;
+ return -EINVAL;
break;
case KVM_IRQ_ROUTING_HV_SINT:
e->set = kvm_hv_set_sint;
@@ -326,12 +323,10 @@ int kvm_set_routing_entry(struct kvm *kvm,
e->hv_sint.sint = ue->u.hv_sint.sint;
break;
default:
- goto out;
+ return -EINVAL;
}
- r = 0;
-out:
- return r;
+ return 0;
}
bool kvm_intr_is_single_vcpu(struct kvm *kvm, struct kvm_lapic_irq *irq,
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ac78105..5586765 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4340,7 +4340,8 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu)
}
EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu);
-void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly)
+void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
+ bool accessed_dirty)
{
struct kvm_mmu *context = &vcpu->arch.mmu;
@@ -4349,6 +4350,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly)
context->shadow_root_level = kvm_x86_ops->get_tdp_level();
context->nx = true;
+ context->ept_ad = accessed_dirty;
context->page_fault = ept_page_fault;
context->gva_to_gpa = ept_gva_to_gpa;
context->sync_page = ept_sync_page;
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index ddc56e9..d8ccb32 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -74,7 +74,8 @@ enum {
int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct);
void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
-void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly);
+void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
+ bool accessed_dirty);
static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
{
diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c
index 37942e4..60168cd 100644
--- a/arch/x86/kvm/page_track.c
+++ b/arch/x86/kvm/page_track.c
@@ -160,6 +160,14 @@ bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn,
return !!ACCESS_ONCE(slot->arch.gfn_track[mode][index]);
}
+void kvm_page_track_cleanup(struct kvm *kvm)
+{
+ struct kvm_page_track_notifier_head *head;
+
+ head = &kvm->arch.track_notifier_head;
+ cleanup_srcu_struct(&head->track_srcu);
+}
+
void kvm_page_track_init(struct kvm *kvm)
{
struct kvm_page_track_notifier_head *head;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index a011054..314d207 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -23,13 +23,6 @@
* so the code in this file is compiled twice, once per pte size.
*/
-/*
- * This is used to catch non optimized PT_GUEST_(DIRTY|ACCESS)_SHIFT macro
- * uses for EPT without A/D paging type.
- */
-extern u64 __pure __using_nonexistent_pte_bit(void)
- __compiletime_error("wrong use of PT_GUEST_(DIRTY|ACCESS)_SHIFT");
-
#if PTTYPE == 64
#define pt_element_t u64
#define guest_walker guest_walker64
@@ -39,10 +32,9 @@ extern u64 __pure __using_nonexistent_pte_bit(void)
#define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
#define PT_LEVEL_BITS PT64_LEVEL_BITS
- #define PT_GUEST_ACCESSED_MASK PT_ACCESSED_MASK
- #define PT_GUEST_DIRTY_MASK PT_DIRTY_MASK
#define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT
#define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT
+ #define PT_HAVE_ACCESSED_DIRTY(mmu) true
#ifdef CONFIG_X86_64
#define PT_MAX_FULL_LEVELS 4
#define CMPXCHG cmpxchg
@@ -60,10 +52,9 @@ extern u64 __pure __using_nonexistent_pte_bit(void)
#define PT_INDEX(addr, level) PT32_INDEX(addr, level)
#define PT_LEVEL_BITS PT32_LEVEL_BITS
#define PT_MAX_FULL_LEVELS 2
- #define PT_GUEST_ACCESSED_MASK PT_ACCESSED_MASK
- #define PT_GUEST_DIRTY_MASK PT_DIRTY_MASK
#define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT
#define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT
+ #define PT_HAVE_ACCESSED_DIRTY(mmu) true
#define CMPXCHG cmpxchg
#elif PTTYPE == PTTYPE_EPT
#define pt_element_t u64
@@ -74,16 +65,18 @@ extern u64 __pure __using_nonexistent_pte_bit(void)
#define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl)
#define PT_INDEX(addr, level) PT64_INDEX(addr, level)
#define PT_LEVEL_BITS PT64_LEVEL_BITS
- #define PT_GUEST_ACCESSED_MASK 0
- #define PT_GUEST_DIRTY_MASK 0
- #define PT_GUEST_DIRTY_SHIFT __using_nonexistent_pte_bit()
- #define PT_GUEST_ACCESSED_SHIFT __using_nonexistent_pte_bit()
+ #define PT_GUEST_DIRTY_SHIFT 9
+ #define PT_GUEST_ACCESSED_SHIFT 8
+ #define PT_HAVE_ACCESSED_DIRTY(mmu) ((mmu)->ept_ad)
#define CMPXCHG cmpxchg64
#define PT_MAX_FULL_LEVELS 4
#else
#error Invalid PTTYPE value
#endif
+#define PT_GUEST_DIRTY_MASK (1 << PT_GUEST_DIRTY_SHIFT)
+#define PT_GUEST_ACCESSED_MASK (1 << PT_GUEST_ACCESSED_SHIFT)
+
#define gpte_to_gfn_lvl FNAME(gpte_to_gfn_lvl)
#define gpte_to_gfn(pte) gpte_to_gfn_lvl((pte), PT_PAGE_TABLE_LEVEL)
@@ -111,12 +104,13 @@ static gfn_t gpte_to_gfn_lvl(pt_element_t gpte, int lvl)
return (gpte & PT_LVL_ADDR_MASK(lvl)) >> PAGE_SHIFT;
}
-static inline void FNAME(protect_clean_gpte)(unsigned *access, unsigned gpte)
+static inline void FNAME(protect_clean_gpte)(struct kvm_mmu *mmu, unsigned *access,
+ unsigned gpte)
{
unsigned mask;
/* dirty bit is not supported, so no need to track it */
- if (!PT_GUEST_DIRTY_MASK)
+ if (!PT_HAVE_ACCESSED_DIRTY(mmu))
return;
BUILD_BUG_ON(PT_WRITABLE_MASK != ACC_WRITE_MASK);
@@ -171,7 +165,7 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu,
goto no_present;
/* if accessed bit is not supported prefetch non accessed gpte */
- if (PT_GUEST_ACCESSED_MASK && !(gpte & PT_GUEST_ACCESSED_MASK))
+ if (PT_HAVE_ACCESSED_DIRTY(&vcpu->arch.mmu) && !(gpte & PT_GUEST_ACCESSED_MASK))
goto no_present;
return false;
@@ -217,7 +211,7 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
int ret;
/* dirty/accessed bits are not supported, so no need to update them */
- if (!PT_GUEST_DIRTY_MASK)
+ if (!PT_HAVE_ACCESSED_DIRTY(mmu))
return 0;
for (level = walker->max_level; level >= walker->level; --level) {
@@ -286,7 +280,9 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
pt_element_t __user *uninitialized_var(ptep_user);
gfn_t table_gfn;
unsigned index, pt_access, pte_access, accessed_dirty, pte_pkey;
+ unsigned nested_access;
gpa_t pte_gpa;
+ bool have_ad;
int offset;
const int write_fault = access & PFERR_WRITE_MASK;
const int user_fault = access & PFERR_USER_MASK;
@@ -299,6 +295,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
retry_walk:
walker->level = mmu->root_level;
pte = mmu->get_cr3(vcpu);
+ have_ad = PT_HAVE_ACCESSED_DIRTY(mmu);
#if PTTYPE == 64
if (walker->level == PT32E_ROOT_LEVEL) {
@@ -312,7 +309,15 @@ retry_walk:
walker->max_level = walker->level;
ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu)));
- accessed_dirty = PT_GUEST_ACCESSED_MASK;
+ accessed_dirty = have_ad ? PT_GUEST_ACCESSED_MASK : 0;
+
+ /*
+ * FIXME: on Intel processors, loads of the PDPTE registers for PAE paging
+ * by the MOV to CR instruction are treated as reads and do not cause the
+ * processor to set the dirty flag in any EPT paging-structure entry.
+ */
+ nested_access = (have_ad ? PFERR_WRITE_MASK : 0) | PFERR_USER_MASK;
+
pt_access = pte_access = ACC_ALL;
++walker->level;
@@ -332,7 +337,7 @@ retry_walk:
walker->pte_gpa[walker->level - 1] = pte_gpa;
real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn),
- PFERR_USER_MASK|PFERR_WRITE_MASK,
+ nested_access,
&walker->fault);
/*
@@ -394,7 +399,7 @@ retry_walk:
walker->gfn = real_gpa >> PAGE_SHIFT;
if (!write_fault)
- FNAME(protect_clean_gpte)(&pte_access, pte);
+ FNAME(protect_clean_gpte)(mmu, &pte_access, pte);
else
/*
* On a write fault, fold the dirty bit into accessed_dirty.
@@ -485,7 +490,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
gfn = gpte_to_gfn(gpte);
pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
- FNAME(protect_clean_gpte)(&pte_access, gpte);
+ FNAME(protect_clean_gpte)(&vcpu->arch.mmu, &pte_access, gpte);
pfn = pte_prefetch_gfn_to_pfn(vcpu, gfn,
no_dirty_log && (pte_access & ACC_WRITE_MASK));
if (is_error_pfn(pfn))
@@ -979,7 +984,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
gfn = gpte_to_gfn(gpte);
pte_access = sp->role.access;
pte_access &= FNAME(gpte_access)(vcpu, gpte);
- FNAME(protect_clean_gpte)(&pte_access, gpte);
+ FNAME(protect_clean_gpte)(&vcpu->arch.mmu, &pte_access, gpte);
if (sync_mmio_spte(vcpu, &sp->spt[i], gfn, pte_access,
&nr_present))
@@ -1025,3 +1030,4 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
#undef PT_GUEST_DIRTY_MASK
#undef PT_GUEST_DIRTY_SHIFT
#undef PT_GUEST_ACCESSED_SHIFT
+#undef PT_HAVE_ACCESSED_DIRTY
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index d1efe2c..c41f03e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1198,10 +1198,13 @@ static void init_vmcb(struct vcpu_svm *svm)
set_intercept(svm, INTERCEPT_CLGI);
set_intercept(svm, INTERCEPT_SKINIT);
set_intercept(svm, INTERCEPT_WBINVD);
- set_intercept(svm, INTERCEPT_MONITOR);
- set_intercept(svm, INTERCEPT_MWAIT);
set_intercept(svm, INTERCEPT_XSETBV);
+ if (!kvm_mwait_in_guest()) {
+ set_intercept(svm, INTERCEPT_MONITOR);
+ set_intercept(svm, INTERCEPT_MWAIT);
+ }
+
control->iopm_base_pa = iopm_base;
control->msrpm_base_pa = __pa(svm->msrpm);
control->int_ctl = V_INTR_MASKING_MASK;
@@ -1379,6 +1382,9 @@ static void avic_vm_destroy(struct kvm *kvm)
unsigned long flags;
struct kvm_arch *vm_data = &kvm->arch;
+ if (!avic)
+ return;
+
avic_free_vm_id(vm_data->avic_vm_id);
if (vm_data->avic_logical_id_table_page)
@@ -5253,6 +5259,12 @@ static inline void avic_post_state_restore(struct kvm_vcpu *vcpu)
avic_handle_ldr_update(vcpu);
}
+static void svm_setup_mce(struct kvm_vcpu *vcpu)
+{
+ /* [63:9] are reserved. */
+ vcpu->arch.mcg_cap &= 0x1ff;
+}
+
static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.cpu_has_kvm_support = has_svm,
.disabled_by_bios = is_disabled,
@@ -5364,6 +5376,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.pmu_ops = &amd_pmu_ops,
.deliver_posted_interrupt = svm_deliver_avic_intr,
.update_pi_irte = svm_update_pi_irte,
+ .setup_mce = svm_setup_mce,
};
static int __init svm_init(void)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 98e82ee..75e6cc9 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -84,9 +84,6 @@ module_param_named(eptad, enable_ept_ad_bits, bool, S_IRUGO);
static bool __read_mostly emulate_invalid_guest_state = true;
module_param(emulate_invalid_guest_state, bool, S_IRUGO);
-static bool __read_mostly vmm_exclusive = 1;
-module_param(vmm_exclusive, bool, S_IRUGO);
-
static bool __read_mostly fasteoi = 1;
module_param(fasteoi, bool, S_IRUGO);
@@ -615,10 +612,6 @@ struct vcpu_vmx {
int vpid;
bool emulation_required;
- /* Support for vnmi-less CPUs */
- int soft_vnmi_blocked;
- ktime_t entry_time;
- s64 vnmi_blocked_time;
u32 exit_reason;
/* Posted interrupt descriptor */
@@ -914,8 +907,6 @@ static void nested_release_page_clean(struct page *page)
static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu);
static u64 construct_eptp(unsigned long root_hpa);
-static void kvm_cpu_vmxon(u64 addr);
-static void kvm_cpu_vmxoff(void);
static bool vmx_xsaves_supported(void);
static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr);
static void vmx_set_segment(struct kvm_vcpu *vcpu,
@@ -1239,6 +1230,11 @@ static inline bool cpu_has_vmx_invvpid_global(void)
return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT;
}
+static inline bool cpu_has_vmx_invvpid(void)
+{
+ return vmx_capability.vpid & VMX_VPID_INVVPID_BIT;
+}
+
static inline bool cpu_has_vmx_ept(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
@@ -1285,11 +1281,6 @@ static inline bool cpu_has_vmx_invpcid(void)
SECONDARY_EXEC_ENABLE_INVPCID;
}
-static inline bool cpu_has_virtual_nmis(void)
-{
- return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
-}
-
static inline bool cpu_has_vmx_wbinvd_exit(void)
{
return vmcs_config.cpu_based_2nd_exec_ctrl &
@@ -2235,15 +2226,10 @@ static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
bool already_loaded = vmx->loaded_vmcs->cpu == cpu;
- if (!vmm_exclusive)
- kvm_cpu_vmxon(phys_addr);
- else if (!already_loaded)
- loaded_vmcs_clear(vmx->loaded_vmcs);
-
if (!already_loaded) {
+ loaded_vmcs_clear(vmx->loaded_vmcs);
local_irq_disable();
crash_disable_local_vmclear(cpu);
@@ -2321,11 +2307,6 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
vmx_vcpu_pi_put(vcpu);
__vmx_load_host_state(to_vmx(vcpu));
- if (!vmm_exclusive) {
- __loaded_vmcs_clear(to_vmx(vcpu)->loaded_vmcs);
- vcpu->cpu = -1;
- kvm_cpu_vmxoff();
- }
}
static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
@@ -2749,11 +2730,11 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
vmx->nested.nested_vmx_secondary_ctls_high);
vmx->nested.nested_vmx_secondary_ctls_low = 0;
vmx->nested.nested_vmx_secondary_ctls_high &=
+ SECONDARY_EXEC_RDRAND | SECONDARY_EXEC_RDSEED |
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
SECONDARY_EXEC_RDTSCP |
SECONDARY_EXEC_DESC |
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
- SECONDARY_EXEC_ENABLE_VPID |
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
SECONDARY_EXEC_WBINVD_EXITING |
@@ -2764,14 +2745,16 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
vmx->nested.nested_vmx_secondary_ctls_high |=
SECONDARY_EXEC_ENABLE_EPT;
vmx->nested.nested_vmx_ept_caps = VMX_EPT_PAGE_WALK_4_BIT |
- VMX_EPTP_WB_BIT | VMX_EPT_2MB_PAGE_BIT |
- VMX_EPT_INVEPT_BIT;
+ VMX_EPTP_WB_BIT | VMX_EPT_INVEPT_BIT;
if (cpu_has_vmx_ept_execute_only())
vmx->nested.nested_vmx_ept_caps |=
VMX_EPT_EXECUTE_ONLY_BIT;
vmx->nested.nested_vmx_ept_caps &= vmx_capability.ept;
vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
- VMX_EPT_EXTENT_CONTEXT_BIT;
+ VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT |
+ VMX_EPT_1GB_PAGE_BIT;
+ if (enable_ept_ad_bits)
+ vmx->nested.nested_vmx_ept_caps |= VMX_EPT_AD_BIT;
} else
vmx->nested.nested_vmx_ept_caps = 0;
@@ -2781,10 +2764,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
* though it is treated as global context. The alternative is
* not failing the single-context invvpid, and it is worse.
*/
- if (enable_vpid)
+ if (enable_vpid) {
+ vmx->nested.nested_vmx_secondary_ctls_high |=
+ SECONDARY_EXEC_ENABLE_VPID;
vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT |
VMX_VPID_EXTENT_SUPPORTED_MASK;
- else
+ } else
vmx->nested.nested_vmx_vpid_caps = 0;
if (enable_unrestricted_guest)
@@ -3416,6 +3401,7 @@ static __init int vmx_disabled_by_bios(void)
static void kvm_cpu_vmxon(u64 addr)
{
+ cr4_set_bits(X86_CR4_VMXE);
intel_pt_handle_vmx(1);
asm volatile (ASM_VMX_VMXON_RAX
@@ -3458,12 +3444,8 @@ static int hardware_enable(void)
/* enable and lock */
wrmsrl(MSR_IA32_FEATURE_CONTROL, old | test_bits);
}
- cr4_set_bits(X86_CR4_VMXE);
-
- if (vmm_exclusive) {
- kvm_cpu_vmxon(phys_addr);
- ept_sync_global();
- }
+ kvm_cpu_vmxon(phys_addr);
+ ept_sync_global();
native_store_gdt(this_cpu_ptr(&host_gdt));
@@ -3489,15 +3471,13 @@ static void kvm_cpu_vmxoff(void)
asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
intel_pt_handle_vmx(0);
+ cr4_clear_bits(X86_CR4_VMXE);
}
static void hardware_disable(void)
{
- if (vmm_exclusive) {
- vmclear_local_loaded_vmcss();
- kvm_cpu_vmxoff();
- }
- cr4_clear_bits(X86_CR4_VMXE);
+ vmclear_local_loaded_vmcss();
+ kvm_cpu_vmxoff();
}
static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
@@ -3547,11 +3527,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
CPU_BASED_USE_IO_BITMAPS |
CPU_BASED_MOV_DR_EXITING |
CPU_BASED_USE_TSC_OFFSETING |
- CPU_BASED_MWAIT_EXITING |
- CPU_BASED_MONITOR_EXITING |
CPU_BASED_INVLPG_EXITING |
CPU_BASED_RDPMC_EXITING;
+ if (!kvm_mwait_in_guest())
+ min |= CPU_BASED_MWAIT_EXITING |
+ CPU_BASED_MONITOR_EXITING;
+
opt = CPU_BASED_TPR_SHADOW |
CPU_BASED_USE_MSR_BITMAPS |
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
@@ -3617,9 +3599,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
&_vmexit_control) < 0)
return -EIO;
- min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
- opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
- PIN_BASED_VMX_PREEMPTION_TIMER;
+ min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING |
+ PIN_BASED_VIRTUAL_NMIS;
+ opt = PIN_BASED_POSTED_INTR | PIN_BASED_VMX_PREEMPTION_TIMER;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
&_pin_based_exec_control) < 0)
return -EIO;
@@ -4011,11 +3993,12 @@ static void exit_lmode(struct kvm_vcpu *vcpu)
static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid)
{
- vpid_sync_context(vpid);
if (enable_ept) {
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
return;
ept_sync_context(construct_eptp(vcpu->arch.mmu.root_hpa));
+ } else {
+ vpid_sync_context(vpid);
}
}
@@ -4024,6 +4007,12 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu)
__vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid);
}
+static void vmx_flush_tlb_ept_only(struct kvm_vcpu *vcpu)
+{
+ if (enable_ept)
+ vmx_flush_tlb(vcpu);
+}
+
static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu)
{
ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits;
@@ -5285,8 +5274,6 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
vmx->rmode.vm86_active = 0;
- vmx->soft_vnmi_blocked = 0;
-
vmx->vcpu.arch.regs[VCPU_REGS_RDX] = get_rdx_init_val();
kvm_set_cr8(vcpu, 0);
@@ -5406,8 +5393,7 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
static void enable_nmi_window(struct kvm_vcpu *vcpu)
{
- if (!cpu_has_virtual_nmis() ||
- vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
+ if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
enable_irq_window(vcpu);
return;
}
@@ -5448,19 +5434,6 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
if (!is_guest_mode(vcpu)) {
- if (!cpu_has_virtual_nmis()) {
- /*
- * Tracking the NMI-blocked state in software is built upon
- * finding the next open IRQ window. This, in turn, depends on
- * well-behaving guests: They have to keep IRQs disabled at
- * least as long as the NMI handler runs. Otherwise we may
- * cause NMI nesting, maybe breaking the guest. But as this is
- * highly unlikely, we can live with the residual risk.
- */
- vmx->soft_vnmi_blocked = 1;
- vmx->vnmi_blocked_time = 0;
- }
-
++vcpu->stat.nmi_injections;
vmx->nmi_known_unmasked = false;
}
@@ -5477,8 +5450,6 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
{
- if (!cpu_has_virtual_nmis())
- return to_vmx(vcpu)->soft_vnmi_blocked;
if (to_vmx(vcpu)->nmi_known_unmasked)
return false;
return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
@@ -5488,20 +5459,13 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
- if (!cpu_has_virtual_nmis()) {
- if (vmx->soft_vnmi_blocked != masked) {
- vmx->soft_vnmi_blocked = masked;
- vmx->vnmi_blocked_time = 0;
- }
- } else {
- vmx->nmi_known_unmasked = !masked;
- if (masked)
- vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
- GUEST_INTR_STATE_NMI);
- else
- vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
- GUEST_INTR_STATE_NMI);
- }
+ vmx->nmi_known_unmasked = !masked;
+ if (masked)
+ vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+ GUEST_INTR_STATE_NMI);
+ else
+ vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
+ GUEST_INTR_STATE_NMI);
}
static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
@@ -5509,9 +5473,6 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
if (to_vmx(vcpu)->nested.nested_run_pending)
return 0;
- if (!cpu_has_virtual_nmis() && to_vmx(vcpu)->soft_vnmi_blocked)
- return 0;
-
return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
(GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
| GUEST_INTR_STATE_NMI));
@@ -6232,21 +6193,18 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
unsigned long exit_qualification;
gpa_t gpa;
u32 error_code;
- int gla_validity;
exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
- gla_validity = (exit_qualification >> 7) & 0x3;
- if (gla_validity == 0x2) {
- printk(KERN_ERR "EPT: Handling EPT violation failed!\n");
- printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n",
- (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS),
- vmcs_readl(GUEST_LINEAR_ADDRESS));
- printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n",
- (long unsigned int)exit_qualification);
- vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
- vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_VIOLATION;
- return 0;
+ if (is_guest_mode(vcpu)
+ && !(exit_qualification & EPT_VIOLATION_GVA_TRANSLATED)) {
+ /*
+ * Fix up exit_qualification according to whether guest
+ * page table accesses are reads or writes.
+ */
+ u64 eptp = nested_ept_get_cr3(vcpu);
+ if (!(eptp & VMX_EPT_AD_ENABLE_BIT))
+ exit_qualification &= ~EPT_VIOLATION_ACC_WRITE;
}
/*
@@ -6256,7 +6214,6 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
* AAK134, BY25.
*/
if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
- cpu_has_virtual_nmis() &&
(exit_qualification & INTR_INFO_UNBLOCK_NMI))
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
@@ -6342,7 +6299,7 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
if (intr_window_requested && vmx_interrupt_allowed(vcpu))
return handle_interrupt_window(&vmx->vcpu);
- if (test_bit(KVM_REQ_EVENT, &vcpu->requests))
+ if (kvm_test_request(KVM_REQ_EVENT, vcpu))
return 1;
err = emulate_instruction(vcpu, EMULTYPE_NO_REEXECUTE);
@@ -6517,8 +6474,10 @@ static __init int hardware_setup(void)
if (boot_cpu_has(X86_FEATURE_NX))
kvm_enable_efer_bits(EFER_NX);
- if (!cpu_has_vmx_vpid())
+ if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() ||
+ !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global()))
enable_vpid = 0;
+
if (!cpu_has_vmx_shadow_vmcs())
enable_shadow_vmcs = 0;
if (enable_shadow_vmcs)
@@ -7093,34 +7052,24 @@ out_msr_bitmap:
static int handle_vmon(struct kvm_vcpu *vcpu)
{
int ret;
- struct kvm_segment cs;
struct vcpu_vmx *vmx = to_vmx(vcpu);
const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED
| FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX;
- /* The Intel VMX Instruction Reference lists a bunch of bits that
- * are prerequisite to running VMXON, most notably cr4.VMXE must be
- * set to 1 (see vmx_set_cr4() for when we allow the guest to set this).
- * Otherwise, we should fail with #UD. We test these now:
+ /*
+ * The Intel VMX Instruction Reference lists a bunch of bits that are
+ * prerequisite to running VMXON, most notably cr4.VMXE must be set to
+ * 1 (see vmx_set_cr4() for when we allow the guest to set this).
+ * Otherwise, we should fail with #UD. But most faulting conditions
+ * have already been checked by hardware, prior to the VM-exit for
+ * VMXON. We do test guest cr4.VMXE because processor CR4 always has
+ * that bit set to 1 in non-root mode.
*/
- if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE) ||
- !kvm_read_cr0_bits(vcpu, X86_CR0_PE) ||
- (vmx_get_rflags(vcpu) & X86_EFLAGS_VM)) {
- kvm_queue_exception(vcpu, UD_VECTOR);
- return 1;
- }
-
- vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
- if (is_long_mode(vcpu) && !cs.l) {
+ if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) {
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
- if (vmx_get_cpl(vcpu)) {
- kvm_inject_gp(vcpu, 0);
- return 1;
- }
-
if (vmx->nested.vmxon) {
nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
return kvm_skip_emulated_instruction(vcpu);
@@ -7147,29 +7096,15 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
* Intel's VMX Instruction Reference specifies a common set of prerequisites
* for running VMX instructions (except VMXON, whose prerequisites are
* slightly different). It also specifies what exception to inject otherwise.
+ * Note that many of these exceptions have priority over VM exits, so they
+ * don't have to be checked again here.
*/
static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
{
- struct kvm_segment cs;
- struct vcpu_vmx *vmx = to_vmx(vcpu);
-
- if (!vmx->nested.vmxon) {
+ if (!to_vmx(vcpu)->nested.vmxon) {
kvm_queue_exception(vcpu, UD_VECTOR);
return 0;
}
-
- vmx_get_segment(vcpu, &cs, VCPU_SREG_CS);
- if ((vmx_get_rflags(vcpu) & X86_EFLAGS_VM) ||
- (is_long_mode(vcpu) && !cs.l)) {
- kvm_queue_exception(vcpu, UD_VECTOR);
- return 0;
- }
-
- if (vmx_get_cpl(vcpu)) {
- kvm_inject_gp(vcpu, 0);
- return 0;
- }
-
return 1;
}
@@ -7513,7 +7448,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
if (get_vmx_mem_address(vcpu, exit_qualification,
vmx_instruction_info, true, &gva))
return 1;
- /* _system ok, as nested_vmx_check_permission verified cpl=0 */
+ /* _system ok, as hardware has verified cpl=0 */
kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, gva,
&field_value, (is_long_mode(vcpu) ? 8 : 4), NULL);
}
@@ -7646,7 +7581,7 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
if (get_vmx_mem_address(vcpu, exit_qualification,
vmx_instruction_info, true, &vmcs_gva))
return 1;
- /* ok to use *_system, as nested_vmx_check_permission verified cpl=0 */
+ /* ok to use *_system, as hardware has verified cpl=0 */
if (kvm_write_guest_virt_system(&vcpu->arch.emulate_ctxt, vmcs_gva,
(void *)&to_vmx(vcpu)->nested.current_vmptr,
sizeof(u64), &e)) {
@@ -7679,11 +7614,6 @@ static int handle_invept(struct kvm_vcpu *vcpu)
if (!nested_vmx_check_permission(vcpu))
return 1;
- if (!kvm_read_cr0_bits(vcpu, X86_CR0_PE)) {
- kvm_queue_exception(vcpu, UD_VECTOR);
- return 1;
- }
-
vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
type = kvm_register_readl(vcpu, (vmx_instruction_info >> 28) & 0xf);
@@ -7805,7 +7735,6 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
* "blocked by NMI" bit has to be set before next VM entry.
*/
if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
- cpu_has_virtual_nmis() &&
(exit_qualification & INTR_INFO_UNBLOCK_NMI))
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
GUEST_INTR_STATE_NMI);
@@ -8107,6 +8036,10 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
case EXIT_REASON_RDPMC:
return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING);
+ case EXIT_REASON_RDRAND:
+ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDRAND);
+ case EXIT_REASON_RDSEED:
+ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDSEED);
case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP:
return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
@@ -8477,31 +8410,12 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
return 0;
}
- if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked &&
- !(is_guest_mode(vcpu) && nested_cpu_has_virtual_nmis(
- get_vmcs12(vcpu))))) {
- if (vmx_interrupt_allowed(vcpu)) {
- vmx->soft_vnmi_blocked = 0;
- } else if (vmx->vnmi_blocked_time > 1000000000LL &&
- vcpu->arch.nmi_pending) {
- /*
- * This CPU don't support us in finding the end of an
- * NMI-blocked window if the guest runs with IRQs
- * disabled. So we pull the trigger after 1 s of
- * futile waiting, but inform the user about this.
- */
- printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
- "state on VCPU %d after 1 s timeout\n",
- __func__, vcpu->vcpu_id);
- vmx->soft_vnmi_blocked = 0;
- }
- }
-
if (exit_reason < kvm_vmx_max_exit_handlers
&& kvm_vmx_exit_handlers[exit_reason])
return kvm_vmx_exit_handlers[exit_reason](vcpu);
else {
- WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_reason);
+ vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n",
+ exit_reason);
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
}
@@ -8547,6 +8461,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set)
} else {
sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+ vmx_flush_tlb_ept_only(vcpu);
}
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
@@ -8572,8 +8487,10 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa)
*/
if (!is_guest_mode(vcpu) ||
!nested_cpu_has2(get_vmcs12(&vmx->vcpu),
- SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
+ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
vmcs_write64(APIC_ACCESS_ADDR, hpa);
+ vmx_flush_tlb_ept_only(vcpu);
+ }
}
static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr)
@@ -8768,37 +8685,33 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
- if (cpu_has_virtual_nmis()) {
- if (vmx->nmi_known_unmasked)
- return;
- /*
- * Can't use vmx->exit_intr_info since we're not sure what
- * the exit reason is.
- */
- exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
- unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
- vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
- /*
- * SDM 3: 27.7.1.2 (September 2008)
- * Re-set bit "block by NMI" before VM entry if vmexit caused by
- * a guest IRET fault.
- * SDM 3: 23.2.2 (September 2008)
- * Bit 12 is undefined in any of the following cases:
- * If the VM exit sets the valid bit in the IDT-vectoring
- * information field.
- * If the VM exit is due to a double fault.
- */
- if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
- vector != DF_VECTOR && !idtv_info_valid)
- vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
- GUEST_INTR_STATE_NMI);
- else
- vmx->nmi_known_unmasked =
- !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
- & GUEST_INTR_STATE_NMI);
- } else if (unlikely(vmx->soft_vnmi_blocked))
- vmx->vnmi_blocked_time +=
- ktime_to_ns(ktime_sub(ktime_get(), vmx->entry_time));
+ if (vmx->nmi_known_unmasked)
+ return;
+ /*
+ * Can't use vmx->exit_intr_info since we're not sure what
+ * the exit reason is.
+ */
+ exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+ unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
+ vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
+ /*
+ * SDM 3: 27.7.1.2 (September 2008)
+ * Re-set bit "block by NMI" before VM entry if vmexit caused by
+ * a guest IRET fault.
+ * SDM 3: 23.2.2 (September 2008)
+ * Bit 12 is undefined in any of the following cases:
+ * If the VM exit sets the valid bit in the IDT-vectoring
+ * information field.
+ * If the VM exit is due to a double fault.
+ */
+ if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
+ vector != DF_VECTOR && !idtv_info_valid)
+ vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+ GUEST_INTR_STATE_NMI);
+ else
+ vmx->nmi_known_unmasked =
+ !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
+ & GUEST_INTR_STATE_NMI);
}
static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
@@ -8915,10 +8828,6 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned long debugctlmsr, cr4;
- /* Record the guest's net vcpu time for enforced NMI injections. */
- if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
- vmx->entry_time = ktime_get();
-
/* Don't enter VMX if guest state is invalid, let the exit handler
start emulation until we arrive back to a valid state */
if (vmx->emulation_required)
@@ -9126,16 +9035,16 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmx_complete_interrupts(vmx);
}
-static void vmx_load_vmcs01(struct kvm_vcpu *vcpu)
+static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
int cpu;
- if (vmx->loaded_vmcs == &vmx->vmcs01)
+ if (vmx->loaded_vmcs == vmcs)
return;
cpu = get_cpu();
- vmx->loaded_vmcs = &vmx->vmcs01;
+ vmx->loaded_vmcs = vmcs;
vmx_vcpu_put(vcpu);
vmx_vcpu_load(vcpu, cpu);
vcpu->cpu = cpu;
@@ -9153,7 +9062,7 @@ static void vmx_free_vcpu_nested(struct kvm_vcpu *vcpu)
r = vcpu_load(vcpu);
BUG_ON(r);
- vmx_load_vmcs01(vcpu);
+ vmx_switch_vmcs(vcpu, &vmx->vmcs01);
free_nested(vmx);
vcpu_put(vcpu);
}
@@ -9214,11 +9123,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
vmx->loaded_vmcs->shadow_vmcs = NULL;
if (!vmx->loaded_vmcs->vmcs)
goto free_msrs;
- if (!vmm_exclusive)
- kvm_cpu_vmxon(__pa(per_cpu(vmxarea, raw_smp_processor_id())));
loaded_vmcs_init(vmx->loaded_vmcs);
- if (!vmm_exclusive)
- kvm_cpu_vmxoff();
cpu = get_cpu();
vmx_vcpu_load(&vmx->vcpu, cpu);
@@ -9478,17 +9383,26 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
return get_vmcs12(vcpu)->ept_pointer;
}
-static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
+static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
{
+ u64 eptp;
+
WARN_ON(mmu_is_nested(vcpu));
+ eptp = nested_ept_get_cr3(vcpu);
+ if ((eptp & VMX_EPT_AD_ENABLE_BIT) && !enable_ept_ad_bits)
+ return 1;
+
+ kvm_mmu_unload(vcpu);
kvm_init_shadow_ept_mmu(vcpu,
to_vmx(vcpu)->nested.nested_vmx_ept_caps &
- VMX_EPT_EXECUTE_ONLY_BIT);
+ VMX_EPT_EXECUTE_ONLY_BIT,
+ eptp & VMX_EPT_AD_ENABLE_BIT);
vcpu->arch.mmu.set_cr3 = vmx_set_cr3;
vcpu->arch.mmu.get_cr3 = nested_ept_get_cr3;
vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault;
vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
+ return 0;
}
static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
@@ -9974,7 +9888,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 exec_control;
- bool nested_ept_enabled = false;
vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
@@ -10121,8 +10034,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmcs12->guest_intr_status);
}
- nested_ept_enabled = (exec_control & SECONDARY_EXEC_ENABLE_EPT) != 0;
-
/*
* Write an illegal value to APIC_ACCESS_ADDR. Later,
* nested_get_vmcs12_pages will either fix it up or
@@ -10253,8 +10164,13 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
}
if (nested_cpu_has_ept(vmcs12)) {
- kvm_mmu_unload(vcpu);
- nested_ept_init_mmu_context(vcpu);
+ if (nested_ept_init_mmu_context(vcpu)) {
+ *entry_failure_code = ENTRY_FAIL_DEFAULT;
+ return 1;
+ }
+ } else if (nested_cpu_has2(vmcs12,
+ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
+ vmx_flush_tlb_ept_only(vcpu);
}
/*
@@ -10282,12 +10198,10 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmx_set_efer(vcpu, vcpu->arch.efer);
/* Shadow page tables on either EPT or shadow page tables. */
- if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_ept_enabled,
+ if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
entry_failure_code))
return 1;
- kvm_mmu_reset_context(vcpu);
-
if (!enable_ept)
vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
@@ -10407,7 +10321,6 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
struct loaded_vmcs *vmcs02;
- int cpu;
u32 msr_entry_idx;
u32 exit_qual;
@@ -10420,18 +10333,12 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
- cpu = get_cpu();
- vmx->loaded_vmcs = vmcs02;
- vmx_vcpu_put(vcpu);
- vmx_vcpu_load(vcpu, cpu);
- vcpu->cpu = cpu;
- put_cpu();
-
+ vmx_switch_vmcs(vcpu, vmcs02);
vmx_segment_cache_clear(vmx);
if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) {
leave_guest_mode(vcpu);
- vmx_load_vmcs01(vcpu);
+ vmx_switch_vmcs(vcpu, &vmx->vmcs01);
nested_vmx_entry_failure(vcpu, vmcs12,
EXIT_REASON_INVALID_STATE, exit_qual);
return 1;
@@ -10444,7 +10351,7 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
vmcs12->vm_entry_msr_load_count);
if (msr_entry_idx) {
leave_guest_mode(vcpu);
- vmx_load_vmcs01(vcpu);
+ vmx_switch_vmcs(vcpu, &vmx->vmcs01);
nested_vmx_entry_failure(vcpu, vmcs12,
EXIT_REASON_MSR_LOAD_FAIL, msr_entry_idx);
return 1;
@@ -11012,7 +10919,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
if (unlikely(vmx->fail))
vm_inst_error = vmcs_read32(VM_INSTRUCTION_ERROR);
- vmx_load_vmcs01(vcpu);
+ vmx_switch_vmcs(vcpu, &vmx->vmcs01);
if ((exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
&& nested_exit_intr_ack_set(vcpu)) {
@@ -11056,6 +10963,10 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
vmx->nested.change_vmcs01_virtual_x2apic_mode = false;
vmx_set_virtual_x2apic_mode(vcpu,
vcpu->arch.apic_base & X2APIC_ENABLE);
+ } else if (!nested_cpu_has_ept(vmcs12) &&
+ nested_cpu_has2(vmcs12,
+ SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
+ vmx_flush_tlb_ept_only(vcpu);
}
/* This is needed for same reason as it was needed in prepare_vmcs02 */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 1faf620..be2ade5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -27,7 +27,6 @@
#include "kvm_cache_regs.h"
#include "x86.h"
#include "cpuid.h"
-#include "assigned-dev.h"
#include "pmu.h"
#include "hyperv.h"
@@ -1008,6 +1007,8 @@ static u32 emulated_msrs[] = {
MSR_IA32_MCG_CTL,
MSR_IA32_MCG_EXT_CTL,
MSR_IA32_SMBASE,
+ MSR_PLATFORM_INFO,
+ MSR_MISC_FEATURES_ENABLES,
};
static unsigned num_emulated_msrs;
@@ -1444,10 +1445,10 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
struct kvm *kvm = vcpu->kvm;
u64 offset, ns, elapsed;
unsigned long flags;
- s64 usdiff;
bool matched;
bool already_matched;
u64 data = msr->data;
+ bool synchronizing = false;
raw_spin_lock_irqsave(&kvm->arch.tsc_write_lock, flags);
offset = kvm_compute_tsc_offset(vcpu, data);
@@ -1455,51 +1456,34 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
elapsed = ns - kvm->arch.last_tsc_nsec;
if (vcpu->arch.virtual_tsc_khz) {
- int faulted = 0;
-
- /* n.b - signed multiplication and division required */
- usdiff = data - kvm->arch.last_tsc_write;
-#ifdef CONFIG_X86_64
- usdiff = (usdiff * 1000) / vcpu->arch.virtual_tsc_khz;
-#else
- /* do_div() only does unsigned */
- asm("1: idivl %[divisor]\n"
- "2: xor %%edx, %%edx\n"
- " movl $0, %[faulted]\n"
- "3:\n"
- ".section .fixup,\"ax\"\n"
- "4: movl $1, %[faulted]\n"
- " jmp 3b\n"
- ".previous\n"
-
- _ASM_EXTABLE(1b, 4b)
-
- : "=A"(usdiff), [faulted] "=r" (faulted)
- : "A"(usdiff * 1000), [divisor] "rm"(vcpu->arch.virtual_tsc_khz));
-
-#endif
- do_div(elapsed, 1000);
- usdiff -= elapsed;
- if (usdiff < 0)
- usdiff = -usdiff;
-
- /* idivl overflow => difference is larger than USEC_PER_SEC */
- if (faulted)
- usdiff = USEC_PER_SEC;
- } else
- usdiff = USEC_PER_SEC; /* disable TSC match window below */
+ if (data == 0 && msr->host_initiated) {
+ /*
+ * detection of vcpu initialization -- need to sync
+ * with other vCPUs. This particularly helps to keep
+ * kvm_clock stable after CPU hotplug
+ */
+ synchronizing = true;
+ } else {
+ u64 tsc_exp = kvm->arch.last_tsc_write +
+ nsec_to_cycles(vcpu, elapsed);
+ u64 tsc_hz = vcpu->arch.virtual_tsc_khz * 1000LL;
+ /*
+ * Special case: TSC write with a small delta (1 second)
+ * of virtual cycle time against real time is
+ * interpreted as an attempt to synchronize the CPU.
+ */
+ synchronizing = data < tsc_exp + tsc_hz &&
+ data + tsc_hz > tsc_exp;
+ }
+ }
/*
- * Special case: TSC write with a small delta (1 second) of virtual
- * cycle time against real time is interpreted as an attempt to
- * synchronize the CPU.
- *
* For a reliable TSC, we can match TSC offsets, and for an unstable
* TSC, we add elapsed time in this computation. We could let the
* compensation code attempt to catch up if we fall behind, but
* it's better to try to match offsets from the beginning.
*/
- if (usdiff < USEC_PER_SEC &&
+ if (synchronizing &&
vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
if (!check_tsc_unstable()) {
offset = kvm->arch.cur_tsc_offset;
@@ -1769,13 +1753,13 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
/* guest entries allowed */
kvm_for_each_vcpu(i, vcpu, kvm)
- clear_bit(KVM_REQ_MCLOCK_INPROGRESS, &vcpu->requests);
+ kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
spin_unlock(&ka->pvclock_gtod_sync_lock);
#endif
}
-static u64 __get_kvmclock_ns(struct kvm *kvm)
+u64 get_kvmclock_ns(struct kvm *kvm)
{
struct kvm_arch *ka = &kvm->arch;
struct pvclock_vcpu_time_info hv_clock;
@@ -1796,18 +1780,6 @@ static u64 __get_kvmclock_ns(struct kvm *kvm)
return __pvclock_read_cycles(&hv_clock, rdtsc());
}
-u64 get_kvmclock_ns(struct kvm *kvm)
-{
- unsigned long flags;
- s64 ns;
-
- local_irq_save(flags);
- ns = __get_kvmclock_ns(kvm);
- local_irq_restore(flags);
-
- return ns;
-}
-
static void kvm_setup_pvclock_page(struct kvm_vcpu *v)
{
struct kvm_vcpu_arch *vcpu = &v->arch;
@@ -2155,6 +2127,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_VM_HSAVE_PA:
case MSR_AMD64_PATCH_LOADER:
case MSR_AMD64_BU_CFG2:
+ case MSR_AMD64_DC_CFG:
break;
case MSR_EFER:
@@ -2230,8 +2203,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
bool tmp = (msr == MSR_KVM_SYSTEM_TIME);
if (ka->boot_vcpu_runs_old_kvmclock != tmp)
- set_bit(KVM_REQ_MASTERCLOCK_UPDATE,
- &vcpu->requests);
+ kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
ka->boot_vcpu_runs_old_kvmclock = tmp;
}
@@ -2331,6 +2303,21 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
vcpu->arch.osvw.status = data;
break;
+ case MSR_PLATFORM_INFO:
+ if (!msr_info->host_initiated ||
+ data & ~MSR_PLATFORM_INFO_CPUID_FAULT ||
+ (!(data & MSR_PLATFORM_INFO_CPUID_FAULT) &&
+ cpuid_fault_enabled(vcpu)))
+ return 1;
+ vcpu->arch.msr_platform_info = data;
+ break;
+ case MSR_MISC_FEATURES_ENABLES:
+ if (data & ~MSR_MISC_FEATURES_ENABLES_CPUID_FAULT ||
+ (data & MSR_MISC_FEATURES_ENABLES_CPUID_FAULT &&
+ !supports_cpuid_fault(vcpu)))
+ return 1;
+ vcpu->arch.msr_misc_features_enables = data;
+ break;
default:
if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
return xen_hvm_config(vcpu, data);
@@ -2417,6 +2404,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_FAM10H_MMIO_CONF_BASE:
case MSR_AMD64_BU_CFG2:
case MSR_IA32_PERF_CTL:
+ case MSR_AMD64_DC_CFG:
msr_info->data = 0;
break;
case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
@@ -2545,6 +2533,12 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
msr_info->data = vcpu->arch.osvw.status;
break;
+ case MSR_PLATFORM_INFO:
+ msr_info->data = vcpu->arch.msr_platform_info;
+ break;
+ case MSR_MISC_FEATURES_ENABLES:
+ msr_info->data = vcpu->arch.msr_misc_features_enables;
+ break;
default:
if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data);
@@ -2675,15 +2669,14 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_SET_BOOT_CPU_ID:
case KVM_CAP_SPLIT_IRQCHIP:
case KVM_CAP_IMMEDIATE_EXIT:
-#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
- case KVM_CAP_ASSIGN_DEV_IRQ:
- case KVM_CAP_PCI_2_3:
-#endif
r = 1;
break;
case KVM_CAP_ADJUST_CLOCK:
r = KVM_CLOCK_TSC_STABLE;
break;
+ case KVM_CAP_X86_GUEST_MWAIT:
+ r = kvm_mwait_in_guest();
+ break;
case KVM_CAP_X86_SMM:
/* SMBASE is usually relocated above 1M on modern chipsets,
* and SMM handlers might indeed rely on 4G segment limits,
@@ -2695,9 +2688,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
*/
r = kvm_x86_ops->cpu_has_high_real_mode_segbase();
break;
- case KVM_CAP_COALESCED_MMIO:
- r = KVM_COALESCED_MMIO_PAGE_OFFSET;
- break;
case KVM_CAP_VAPIC:
r = !kvm_x86_ops->cpu_has_accelerated_tpr();
break;
@@ -2713,11 +2703,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_PV_MMU: /* obsolete */
r = 0;
break;
-#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
- case KVM_CAP_IOMMU:
- r = iommu_present(&pci_bus_type);
- break;
-#endif
case KVM_CAP_MCE:
r = KVM_MAX_MCE_BANKS;
break;
@@ -2816,11 +2801,6 @@ static bool need_emulate_wbinvd(struct kvm_vcpu *vcpu)
return kvm_arch_has_noncoherent_dma(vcpu->kvm);
}
-static inline void kvm_migrate_timers(struct kvm_vcpu *vcpu)
-{
- set_bit(KVM_REQ_MIGRATE_TIMER, &vcpu->requests);
-}
-
void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
/* Address WBINVD may be executed by guest */
@@ -2864,7 +2844,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (!vcpu->kvm->arch.use_master_clock || vcpu->cpu == -1)
kvm_make_request(KVM_REQ_GLOBAL_CLOCK_UPDATE, vcpu);
if (vcpu->cpu != cpu)
- kvm_migrate_timers(vcpu);
+ kvm_make_request(KVM_REQ_MIGRATE_TIMER, vcpu);
vcpu->cpu = cpu;
}
@@ -3124,7 +3104,14 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
return -EINVAL;
if (events->exception.injected &&
- (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR))
+ (events->exception.nr > 31 || events->exception.nr == NMI_VECTOR ||
+ is_guest_mode(vcpu)))
+ return -EINVAL;
+
+ /* INITs are latched while in SMM */
+ if (events->flags & KVM_VCPUEVENT_VALID_SMM &&
+ (events->smi.smm || events->smi.pending) &&
+ vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED)
return -EINVAL;
process_nmi(vcpu);
@@ -3721,22 +3708,21 @@ static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
{
+ struct kvm_pic *pic = kvm->arch.vpic;
int r;
r = 0;
switch (chip->chip_id) {
case KVM_IRQCHIP_PIC_MASTER:
- memcpy(&chip->chip.pic,
- &pic_irqchip(kvm)->pics[0],
+ memcpy(&chip->chip.pic, &pic->pics[0],
sizeof(struct kvm_pic_state));
break;
case KVM_IRQCHIP_PIC_SLAVE:
- memcpy(&chip->chip.pic,
- &pic_irqchip(kvm)->pics[1],
+ memcpy(&chip->chip.pic, &pic->pics[1],
sizeof(struct kvm_pic_state));
break;
case KVM_IRQCHIP_IOAPIC:
- r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
+ kvm_get_ioapic(kvm, &chip->chip.ioapic);
break;
default:
r = -EINVAL;
@@ -3747,32 +3733,31 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
{
+ struct kvm_pic *pic = kvm->arch.vpic;
int r;
r = 0;
switch (chip->chip_id) {
case KVM_IRQCHIP_PIC_MASTER:
- spin_lock(&pic_irqchip(kvm)->lock);
- memcpy(&pic_irqchip(kvm)->pics[0],
- &chip->chip.pic,
+ spin_lock(&pic->lock);
+ memcpy(&pic->pics[0], &chip->chip.pic,
sizeof(struct kvm_pic_state));
- spin_unlock(&pic_irqchip(kvm)->lock);
+ spin_unlock(&pic->lock);
break;
case KVM_IRQCHIP_PIC_SLAVE:
- spin_lock(&pic_irqchip(kvm)->lock);
- memcpy(&pic_irqchip(kvm)->pics[1],
- &chip->chip.pic,
+ spin_lock(&pic->lock);
+ memcpy(&pic->pics[1], &chip->chip.pic,
sizeof(struct kvm_pic_state));
- spin_unlock(&pic_irqchip(kvm)->lock);
+ spin_unlock(&pic->lock);
break;
case KVM_IRQCHIP_IOAPIC:
- r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
+ kvm_set_ioapic(kvm, &chip->chip.ioapic);
break;
default:
r = -EINVAL;
break;
}
- kvm_pic_update_irq(pic_irqchip(kvm));
+ kvm_pic_update_irq(pic);
return r;
}
@@ -3934,9 +3919,14 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
goto split_irqchip_unlock;
if (kvm->created_vcpus)
goto split_irqchip_unlock;
+ kvm->arch.irqchip_mode = KVM_IRQCHIP_INIT_IN_PROGRESS;
r = kvm_setup_empty_irq_routing(kvm);
- if (r)
+ if (r) {
+ kvm->arch.irqchip_mode = KVM_IRQCHIP_NONE;
+ /* Pairs with smp_rmb() when reading irqchip_mode */
+ smp_wmb();
goto split_irqchip_unlock;
+ }
/* Pairs with irqchip_in_kernel. */
smp_wmb();
kvm->arch.irqchip_mode = KVM_IRQCHIP_SPLIT;
@@ -4018,20 +4008,18 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = kvm_ioapic_init(kvm);
if (r) {
- mutex_lock(&kvm->slots_lock);
kvm_pic_destroy(kvm);
- mutex_unlock(&kvm->slots_lock);
goto create_irqchip_unlock;
}
+ kvm->arch.irqchip_mode = KVM_IRQCHIP_INIT_IN_PROGRESS;
r = kvm_setup_default_irq_routing(kvm);
if (r) {
- mutex_lock(&kvm->slots_lock);
- mutex_lock(&kvm->irq_lock);
+ kvm->arch.irqchip_mode = KVM_IRQCHIP_NONE;
+ /* Pairs with smp_rmb() when reading irqchip_mode */
+ smp_wmb();
kvm_ioapic_destroy(kvm);
kvm_pic_destroy(kvm);
- mutex_unlock(&kvm->irq_lock);
- mutex_unlock(&kvm->slots_lock);
goto create_irqchip_unlock;
}
/* Write kvm->irq_routing before enabling irqchip_in_kernel. */
@@ -4196,10 +4184,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
goto out;
r = 0;
- local_irq_disable();
- now_ns = __get_kvmclock_ns(kvm);
+ now_ns = get_kvmclock_ns(kvm);
kvm->arch.kvmclock_offset += user_ns.clock - now_ns;
- local_irq_enable();
kvm_gen_update_masterclock(kvm);
break;
}
@@ -4207,11 +4193,9 @@ long kvm_arch_vm_ioctl(struct file *filp,
struct kvm_clock_data user_ns;
u64 now_ns;
- local_irq_disable();
- now_ns = __get_kvmclock_ns(kvm);
+ now_ns = get_kvmclock_ns(kvm);
user_ns.clock = now_ns;
user_ns.flags = kvm->arch.use_master_clock ? KVM_CLOCK_TSC_STABLE : 0;
- local_irq_enable();
memset(&user_ns.pad, 0, sizeof(user_ns.pad));
r = -EFAULT;
@@ -4230,7 +4214,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
break;
}
default:
- r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
+ r = -ENOTTY;
}
out:
return r;
@@ -5223,6 +5207,16 @@ static void emulator_set_nmi_mask(struct x86_emulate_ctxt *ctxt, bool masked)
kvm_x86_ops->set_nmi_mask(emul_to_vcpu(ctxt), masked);
}
+static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt)
+{
+ return emul_to_vcpu(ctxt)->arch.hflags;
+}
+
+static void emulator_set_hflags(struct x86_emulate_ctxt *ctxt, unsigned emul_flags)
+{
+ kvm_set_hflags(emul_to_vcpu(ctxt), emul_flags);
+}
+
static const struct x86_emulate_ops emulate_ops = {
.read_gpr = emulator_read_gpr,
.write_gpr = emulator_write_gpr,
@@ -5262,6 +5256,8 @@ static const struct x86_emulate_ops emulate_ops = {
.intercept = emulator_intercept,
.get_cpuid = emulator_get_cpuid,
.set_nmi_mask = emulator_set_nmi_mask,
+ .get_hflags = emulator_get_hflags,
+ .set_hflags = emulator_set_hflags,
};
static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask)
@@ -5314,7 +5310,6 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
BUILD_BUG_ON(HF_GUEST_MASK != X86EMUL_GUEST_MASK);
BUILD_BUG_ON(HF_SMM_MASK != X86EMUL_SMM_MASK);
BUILD_BUG_ON(HF_SMM_INSIDE_NMI_MASK != X86EMUL_SMM_INSIDE_NMI_MASK);
- ctxt->emul_flags = vcpu->arch.hflags;
init_decode_cache(ctxt);
vcpu->arch.emulate_regs_need_sync_from_vcpu = false;
@@ -5718,8 +5713,6 @@ restart:
unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
toggle_interruptibility(vcpu, ctxt->interruptibility);
vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
- if (vcpu->arch.hflags != ctxt->emul_flags)
- kvm_set_hflags(vcpu, ctxt->emul_flags);
kvm_rip_write(vcpu, ctxt->eip);
if (r == EMULATE_DONE)
kvm_vcpu_check_singlestep(vcpu, rflags, &r);
@@ -6869,7 +6862,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
/*
* 1) We should set ->mode before checking ->requests. Please see
- * the comment in kvm_make_all_cpus_request.
+ * the comment in kvm_vcpu_exiting_guest_mode().
*
* 2) For APICv, we should set ->mode before checking PIR.ON. This
* pairs with the memory barrier implicit in pi_test_and_set_on
@@ -7051,7 +7044,7 @@ static int vcpu_run(struct kvm_vcpu *vcpu)
if (r <= 0)
break;
- clear_bit(KVM_REQ_PENDING_TIMER, &vcpu->requests);
+ kvm_clear_request(KVM_REQ_PENDING_TIMER, vcpu);
if (kvm_cpu_has_pending_timer(vcpu))
kvm_inject_pending_timer_irqs(vcpu);
@@ -7179,7 +7172,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)) {
kvm_vcpu_block(vcpu);
kvm_apic_accept_events(vcpu);
- clear_bit(KVM_REQ_UNHALT, &vcpu->requests);
+ kvm_clear_request(KVM_REQ_UNHALT, vcpu);
r = -EAGAIN;
goto out;
}
@@ -7355,6 +7348,12 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
mp_state->mp_state != KVM_MP_STATE_RUNNABLE)
return -EINVAL;
+ /* INITs are latched while in SMM */
+ if ((is_smm(vcpu) || vcpu->arch.smi_pending) &&
+ (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED ||
+ mp_state->mp_state == KVM_MP_STATE_INIT_RECEIVED))
+ return -EINVAL;
+
if (mp_state->mp_state == KVM_MP_STATE_SIPI_RECEIVED) {
vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
set_bit(KVM_APIC_SIPI, &vcpu->arch.apic->pending_events);
@@ -7724,6 +7723,9 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
if (!init_event) {
kvm_pmu_reset(vcpu);
vcpu->arch.smbase = 0x30000;
+
+ vcpu->arch.msr_platform_info = MSR_PLATFORM_INFO_CPUID_FAULT;
+ vcpu->arch.msr_misc_features_enables = 0;
}
memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));
@@ -8068,7 +8070,6 @@ void kvm_arch_sync_events(struct kvm *kvm)
{
cancel_delayed_work_sync(&kvm->arch.kvmclock_sync_work);
cancel_delayed_work_sync(&kvm->arch.kvmclock_update_work);
- kvm_free_all_assigned_devices(kvm);
kvm_free_pit(kvm);
}
@@ -8152,12 +8153,12 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
}
if (kvm_x86_ops->vm_destroy)
kvm_x86_ops->vm_destroy(kvm);
- kvm_iommu_unmap_guest(kvm);
- kfree(kvm->arch.vpic);
- kfree(kvm->arch.vioapic);
+ kvm_pic_destroy(kvm);
+ kvm_ioapic_destroy(kvm);
kvm_free_vcpus(kvm);
kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
kvm_mmu_uninit_vm(kvm);
+ kvm_page_track_cleanup(kvm);
}
void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free,
@@ -8384,7 +8385,7 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu)
if (atomic_read(&vcpu->arch.nmi_queued))
return true;
- if (test_bit(KVM_REQ_SMI, &vcpu->requests))
+ if (kvm_test_request(KVM_REQ_SMI, vcpu))
return true;
if (kvm_arch_interrupt_allowed(vcpu) &&
@@ -8566,11 +8567,11 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
{
struct x86_exception fault;
- trace_kvm_async_pf_ready(work->arch.token, work->gva);
if (work->wakeup_all)
work->arch.token = ~0; /* broadcast wakeup */
else
kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
+ trace_kvm_async_pf_ready(work->arch.token, work->gva);
if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) &&
!apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) {
diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
index e8ff3e4..6120670 100644
--- a/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@ -1,6 +1,8 @@
#ifndef ARCH_X86_KVM_X86_H
#define ARCH_X86_KVM_X86_H
+#include <asm/processor.h>
+#include <asm/mwait.h>
#include <linux/kvm_host.h>
#include <asm/pvclock.h>
#include "kvm_cache_regs.h"
@@ -212,4 +214,38 @@ static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)
__rem; \
})
+static inline bool kvm_mwait_in_guest(void)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ if (!cpu_has(&boot_cpu_data, X86_FEATURE_MWAIT))
+ return false;
+
+ switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_AMD:
+ /* All AMD CPUs have a working MWAIT implementation */
+ return true;
+ case X86_VENDOR_INTEL:
+ /* Handle Intel below */
+ break;
+ default:
+ return false;
+ }
+
+ /*
+ * Intel CPUs without CPUID5_ECX_INTERRUPT_BREAK are problematic as
+ * they would allow guest to stop the CPU completely by disabling
+ * interrupts then invoking MWAIT.
+ */
+ if (boot_cpu_data.cpuid_level < CPUID_MWAIT_LEAF)
+ return false;
+
+ cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
+
+ if (!(ecx & CPUID5_ECX_INTERRUPT_BREAK))
+ return false;
+
+ return true;
+}
+
#endif
diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile
index e7e7055..69f0827 100644
--- a/arch/x86/um/Makefile
+++ b/arch/x86/um/Makefile
@@ -16,7 +16,7 @@ obj-y = bug.o bugs_$(BITS).o delay.o fault.o ldt.o \
ifeq ($(CONFIG_X86_32),y)
-obj-y += checksum_32.o
+obj-y += checksum_32.o syscalls_32.o
obj-$(CONFIG_ELF_CORE) += elfcore.o
subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o
diff --git a/arch/x86/um/asm/ptrace.h b/arch/x86/um/asm/ptrace.h
index e59eef2..b291ca5 100644
--- a/arch/x86/um/asm/ptrace.h
+++ b/arch/x86/um/asm/ptrace.h
@@ -78,7 +78,7 @@ static inline int ptrace_set_thread_area(struct task_struct *child, int idx,
return -ENOSYS;
}
-extern long arch_prctl(struct task_struct *task, int code,
+extern long arch_prctl(struct task_struct *task, int option,
unsigned long __user *addr);
#endif
diff --git a/arch/x86/um/os-Linux/prctl.c b/arch/x86/um/os-Linux/prctl.c
index 96eb2bd..8431e87 100644
--- a/arch/x86/um/os-Linux/prctl.c
+++ b/arch/x86/um/os-Linux/prctl.c
@@ -6,7 +6,7 @@
#include <sys/ptrace.h>
#include <asm/ptrace.h>
-int os_arch_prctl(int pid, int code, unsigned long *addr)
+int os_arch_prctl(int pid, int option, unsigned long *arg2)
{
- return ptrace(PTRACE_ARCH_PRCTL, pid, (unsigned long) addr, code);
+ return ptrace(PTRACE_ARCH_PRCTL, pid, (unsigned long) arg2, option);
}
diff --git a/arch/x86/um/syscalls_32.c b/arch/x86/um/syscalls_32.c
new file mode 100644
index 0000000..627d688
--- /dev/null
+++ b/arch/x86/um/syscalls_32.c
@@ -0,0 +1,7 @@
+#include <linux/syscalls.h>
+#include <os.h>
+
+SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
+{
+ return -EINVAL;
+}
diff --git a/arch/x86/um/syscalls_64.c b/arch/x86/um/syscalls_64.c
index 10d9070..58f5166 100644
--- a/arch/x86/um/syscalls_64.c
+++ b/arch/x86/um/syscalls_64.c
@@ -7,13 +7,15 @@
#include <linux/sched.h>
#include <linux/sched/mm.h>
+#include <linux/syscalls.h>
#include <linux/uaccess.h>
#include <asm/prctl.h> /* XXX This should get the constants from libc */
#include <os.h>
-long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
+long arch_prctl(struct task_struct *task, int option,
+ unsigned long __user *arg2)
{
- unsigned long *ptr = addr, tmp;
+ unsigned long *ptr = arg2, tmp;
long ret;
int pid = task->mm->context.id.u.pid;
@@ -30,7 +32,7 @@ long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
* arch_prctl is run on the host, then the registers are read
* back.
*/
- switch (code) {
+ switch (option) {
case ARCH_SET_FS:
case ARCH_SET_GS:
ret = restore_registers(pid, &current->thread.regs.regs);
@@ -50,11 +52,11 @@ long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
ptr = &tmp;
}
- ret = os_arch_prctl(pid, code, ptr);
+ ret = os_arch_prctl(pid, option, ptr);
if (ret)
return ret;
- switch (code) {
+ switch (option) {
case ARCH_SET_FS:
current->thread.arch.fs = (unsigned long) ptr;
ret = save_registers(pid, &current->thread.regs.regs);
@@ -63,19 +65,19 @@ long arch_prctl(struct task_struct *task, int code, unsigned long __user *addr)
ret = save_registers(pid, &current->thread.regs.regs);
break;
case ARCH_GET_FS:
- ret = put_user(tmp, addr);
+ ret = put_user(tmp, arg2);
break;
case ARCH_GET_GS:
- ret = put_user(tmp, addr);
+ ret = put_user(tmp, arg2);
break;
}
return ret;
}
-long sys_arch_prctl(int code, unsigned long addr)
+SYSCALL_DEFINE2(arch_prctl, int, option, unsigned long, arg2)
{
- return arch_prctl(current, code, (unsigned long __user *) addr);
+ return arch_prctl(current, option, (unsigned long __user *) arg2);
}
void arch_switch_to(struct task_struct *to)
OpenPOWER on IntegriCloud