diff options
54 files changed, 2698 insertions, 6065 deletions
diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 299306d..5237e1b 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2182,7 +2182,7 @@ after pausing the vcpu, but before it is resumed. 4.71 KVM_SIGNAL_MSI Capability: KVM_CAP_SIGNAL_MSI -Architectures: x86 +Architectures: x86 arm64 Type: vm ioctl Parameters: struct kvm_msi (in) Returns: >0 on delivery, 0 if guest blocked the MSI, and -1 on error @@ -2195,10 +2195,18 @@ struct kvm_msi { __u32 address_hi; __u32 data; __u32 flags; - __u8 pad[16]; + __u32 devid; + __u8 pad[12]; }; -No flags are defined so far. The corresponding field must be 0. +flags: KVM_MSI_VALID_DEVID: devid contains a valid value +devid: If KVM_MSI_VALID_DEVID is set, contains a unique device identifier + for the device that wrote the MSI message. + For PCI, this is usually a BFD identifier in the lower 16 bits. + +The per-VM KVM_CAP_MSI_DEVID capability advertises the need to provide +the device ID. If this capability is not set, userland cannot rely on +the kernel to allow the KVM_MSI_VALID_DEVID flag being set. On x86, address_hi is ignored unless the KVM_CAP_X2APIC_API capability is enabled. If it is enabled, address_hi bits 31-8 provide bits 31-8 of the diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt index 59541d4..89182f8 100644 --- a/Documentation/virtual/kvm/devices/arm-vgic.txt +++ b/Documentation/virtual/kvm/devices/arm-vgic.txt @@ -4,16 +4,22 @@ ARM Virtual Generic Interrupt Controller (VGIC) Device types supported: KVM_DEV_TYPE_ARM_VGIC_V2 ARM Generic Interrupt Controller v2.0 KVM_DEV_TYPE_ARM_VGIC_V3 ARM Generic Interrupt Controller v3.0 + KVM_DEV_TYPE_ARM_VGIC_ITS ARM Interrupt Translation Service Controller -Only one VGIC instance may be instantiated through either this API or the -legacy KVM_CREATE_IRQCHIP api. The created VGIC will act as the VM interrupt -controller, requiring emulated user-space devices to inject interrupts to the -VGIC instead of directly to CPUs. +Only one VGIC instance of the V2/V3 types above may be instantiated through +either this API or the legacy KVM_CREATE_IRQCHIP api. The created VGIC will +act as the VM interrupt controller, requiring emulated user-space devices to +inject interrupts to the VGIC instead of directly to CPUs. Creating a guest GICv3 device requires a host GICv3 as well. GICv3 implementations with hardware compatibility support allow a guest GICv2 as well. +Creating a virtual ITS controller requires a host GICv3 (but does not depend +on having physical ITS controllers). +There can be multiple ITS controllers per guest, each of them has to have +a separate, non-overlapping MMIO region. + Groups: KVM_DEV_ARM_VGIC_GRP_ADDR Attributes: @@ -39,6 +45,13 @@ Groups: Only valid for KVM_DEV_TYPE_ARM_VGIC_V3. This address needs to be 64K aligned. + KVM_VGIC_V3_ADDR_TYPE_ITS (rw, 64-bit) + Base address in the guest physical address space of the GICv3 ITS + control register frame. The ITS allows MSI(-X) interrupts to be + injected into guests. This extension is optional. If the kernel + does not support the ITS, the call returns -ENODEV. + Only valid for KVM_DEV_TYPE_ARM_VGIC_ITS. + This address needs to be 64K aligned and the region covers 128K. KVM_DEV_ARM_VGIC_GRP_DIST_REGS Attributes: @@ -109,8 +122,8 @@ Groups: KVM_DEV_ARM_VGIC_GRP_CTRL Attributes: KVM_DEV_ARM_VGIC_CTRL_INIT - request the initialization of the VGIC, no additional parameter in - kvm_device_attr.addr. + request the initialization of the VGIC or ITS, no additional parameter + in kvm_device_attr.addr. Errors: -ENXIO: VGIC not properly configured as required prior to calling this attribute diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index 3d5a5cd..58faff5 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h @@ -66,6 +66,8 @@ extern void __kvm_tlb_flush_vmid(struct kvm *kvm); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); extern void __init_stage2_translation(void); + +extern void __kvm_hyp_reset(unsigned long); #endif #endif /* __ARM_KVM_ASM_H__ */ diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 96387d4..de338d9 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -241,8 +241,7 @@ int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *); int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, int exception_index); -static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr, - phys_addr_t pgd_ptr, +static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, unsigned long hyp_stack_ptr, unsigned long vector_ptr) { @@ -251,18 +250,13 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr, * code. The init code doesn't need to preserve these * registers as r0-r3 are already callee saved according to * the AAPCS. - * Note that we slightly misuse the prototype by casing the + * Note that we slightly misuse the prototype by casting the * stack pointer to a void *. - * - * We don't have enough registers to perform the full init in - * one go. Install the boot PGD first, and then install the - * runtime PGD, stack pointer and vectors. The PGDs are always - * passed as the third argument, in order to be passed into - * r2-r3 to the init code (yes, this is compliant with the - * PCS!). - */ - kvm_call_hyp(NULL, 0, boot_pgd_ptr); + * The PGDs are always passed as the third argument, in order + * to be passed into r2-r3 to the init code (yes, this is + * compliant with the PCS!). + */ kvm_call_hyp((void*)hyp_stack_ptr, vector_ptr, pgd_ptr); } @@ -272,16 +266,13 @@ static inline void __cpu_init_stage2(void) kvm_call_hyp(__init_stage2_translation); } -static inline void __cpu_reset_hyp_mode(phys_addr_t boot_pgd_ptr, +static inline void __cpu_reset_hyp_mode(unsigned long vector_ptr, phys_addr_t phys_idmap_start) { - /* - * TODO - * kvm_call_reset(boot_pgd_ptr, phys_idmap_start); - */ + kvm_call_hyp((void *)virt_to_idmap(__kvm_hyp_reset), vector_ptr); } -static inline int kvm_arch_dev_ioctl_check_extension(long ext) +static inline int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext) { return 0; } diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h index f0e8607..6eaff28 100644 --- a/arch/arm/include/asm/kvm_hyp.h +++ b/arch/arm/include/asm/kvm_hyp.h @@ -25,9 +25,6 @@ #define __hyp_text __section(.hyp.text) notrace -#define kern_hyp_va(v) (v) -#define hyp_kern_va(v) (v) - #define __ACCESS_CP15(CRn, Op1, CRm, Op2) \ "mrc", "mcr", __stringify(p15, Op1, %0, CRn, CRm, Op2), u32 #define __ACCESS_CP15_64(Op1, CRm) \ diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index f9a6506..3bb803d 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -26,16 +26,7 @@ * We directly use the kernel VA for the HYP, as we can directly share * the mapping (HTTBR "covers" TTBR1). */ -#define HYP_PAGE_OFFSET_MASK UL(~0) -#define HYP_PAGE_OFFSET PAGE_OFFSET -#define KERN_TO_HYP(kva) (kva) - -/* - * Our virtual mapping for the boot-time MMU-enable code. Must be - * shared across all the page-tables. Conveniently, we use the vectors - * page, where no kernel data will ever be shared with HYP. - */ -#define TRAMPOLINE_VA UL(CONFIG_VECTORS_BASE) +#define kern_hyp_va(kva) (kva) /* * KVM_MMU_CACHE_MIN_PAGES is the number of stage2 page table translation levels. @@ -49,9 +40,8 @@ #include <asm/pgalloc.h> #include <asm/stage2_pgtable.h> -int create_hyp_mappings(void *from, void *to); +int create_hyp_mappings(void *from, void *to, pgprot_t prot); int create_hyp_io_mappings(void *from, void *to, phys_addr_t); -void free_boot_hyp_pgd(void); void free_hyp_pgds(void); void stage2_unmap_vm(struct kvm *kvm); @@ -65,7 +55,6 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run); void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu); phys_addr_t kvm_mmu_get_httbr(void); -phys_addr_t kvm_mmu_get_boot_httbr(void); phys_addr_t kvm_get_idmap_vector(void); phys_addr_t kvm_get_idmap_start(void); int kvm_mmu_init(void); diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 348caab..e0d76ba 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -97,7 +97,9 @@ extern pgprot_t pgprot_s2_device; #define PAGE_READONLY_EXEC _MOD_PROT(pgprot_user, L_PTE_USER | L_PTE_RDONLY) #define PAGE_KERNEL _MOD_PROT(pgprot_kernel, L_PTE_XN) #define PAGE_KERNEL_EXEC pgprot_kernel -#define PAGE_HYP _MOD_PROT(pgprot_kernel, L_PTE_HYP) +#define PAGE_HYP _MOD_PROT(pgprot_kernel, L_PTE_HYP | L_PTE_XN) +#define PAGE_HYP_EXEC _MOD_PROT(pgprot_kernel, L_PTE_HYP | L_PTE_RDONLY) +#define PAGE_HYP_RO _MOD_PROT(pgprot_kernel, L_PTE_HYP | L_PTE_RDONLY | L_PTE_XN) #define PAGE_HYP_DEVICE _MOD_PROT(pgprot_hyp_device, L_PTE_HYP) #define PAGE_S2 _MOD_PROT(pgprot_s2, L_PTE_S2_RDONLY) #define PAGE_S2_DEVICE _MOD_PROT(pgprot_s2_device, L_PTE_S2_RDONLY) diff --git a/arch/arm/include/asm/virt.h b/arch/arm/include/asm/virt.h index d4ceaf5..a2e75b8 100644 --- a/arch/arm/include/asm/virt.h +++ b/arch/arm/include/asm/virt.h @@ -80,6 +80,10 @@ static inline bool is_kernel_in_hyp_mode(void) return false; } +/* The section containing the hypervisor idmap text */ +extern char __hyp_idmap_text_start[]; +extern char __hyp_idmap_text_end[]; + /* The section containing the hypervisor text */ extern char __hyp_text_start[]; extern char __hyp_text_end[]; diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 02abfff..95a0005 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -46,13 +46,6 @@ config KVM_ARM_HOST ---help--- Provides host support for ARM processors. -config KVM_NEW_VGIC - bool "New VGIC implementation" - depends on KVM - default y - ---help--- - uses the new VGIC implementation - source drivers/vhost/Kconfig endif # VIRTUALIZATION diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index a596b58..5e28df8 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -22,7 +22,6 @@ obj-y += kvm-arm.o init.o interrupts.o obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o -ifeq ($(CONFIG_KVM_NEW_VGIC),y) obj-y += $(KVM)/arm/vgic/vgic.o obj-y += $(KVM)/arm/vgic/vgic-init.o obj-y += $(KVM)/arm/vgic/vgic-irqfd.o @@ -30,9 +29,4 @@ obj-y += $(KVM)/arm/vgic/vgic-v2.o obj-y += $(KVM)/arm/vgic/vgic-mmio.o obj-y += $(KVM)/arm/vgic/vgic-mmio-v2.o obj-y += $(KVM)/arm/vgic/vgic-kvm-device.o -else -obj-y += $(KVM)/arm/vgic.o -obj-y += $(KVM)/arm/vgic-v2.o -obj-y += $(KVM)/arm/vgic-v2-emul.o -endif obj-y += $(KVM)/arm/arch_timer.o diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 9ac4970..abdb2ea 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -20,6 +20,7 @@ #include <linux/errno.h> #include <linux/err.h> #include <linux/kvm_host.h> +#include <linux/list.h> #include <linux/module.h> #include <linux/vmalloc.h> #include <linux/fs.h> @@ -122,7 +123,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (ret) goto out_fail_alloc; - ret = create_hyp_mappings(kvm, kvm + 1); + ret = create_hyp_mappings(kvm, kvm + 1, PAGE_HYP); if (ret) goto out_free_stage2_pgd; @@ -201,7 +202,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = KVM_MAX_VCPUS; break; default: - r = kvm_arch_dev_ioctl_check_extension(ext); + r = kvm_arch_dev_ioctl_check_extension(kvm, ext); break; } return r; @@ -239,7 +240,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) if (err) goto free_vcpu; - err = create_hyp_mappings(vcpu, vcpu + 1); + err = create_hyp_mappings(vcpu, vcpu + 1, PAGE_HYP); if (err) goto vcpu_uninit; @@ -1038,7 +1039,6 @@ long kvm_arch_vm_ioctl(struct file *filp, static void cpu_init_hyp_mode(void *dummy) { - phys_addr_t boot_pgd_ptr; phys_addr_t pgd_ptr; unsigned long hyp_stack_ptr; unsigned long stack_page; @@ -1047,13 +1047,12 @@ static void cpu_init_hyp_mode(void *dummy) /* Switch from the HYP stub to our own HYP init vector */ __hyp_set_vectors(kvm_get_idmap_vector()); - boot_pgd_ptr = kvm_mmu_get_boot_httbr(); pgd_ptr = kvm_mmu_get_httbr(); stack_page = __this_cpu_read(kvm_arm_hyp_stack_page); hyp_stack_ptr = stack_page + PAGE_SIZE; vector_ptr = (unsigned long)kvm_ksym_ref(__kvm_hyp_vector); - __cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr); + __cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr); __cpu_init_stage2(); kvm_arm_init_debug(); @@ -1075,15 +1074,9 @@ static void cpu_hyp_reinit(void) static void cpu_hyp_reset(void) { - phys_addr_t boot_pgd_ptr; - phys_addr_t phys_idmap_start; - - if (!is_kernel_in_hyp_mode()) { - boot_pgd_ptr = kvm_mmu_get_boot_httbr(); - phys_idmap_start = kvm_get_idmap_start(); - - __cpu_reset_hyp_mode(boot_pgd_ptr, phys_idmap_start); - } + if (!is_kernel_in_hyp_mode()) + __cpu_reset_hyp_mode(hyp_default_vectors, + kvm_get_idmap_start()); } static void _kvm_arch_hardware_enable(void *discard) @@ -1293,14 +1286,14 @@ static int init_hyp_mode(void) * Map the Hyp-code called directly from the host */ err = create_hyp_mappings(kvm_ksym_ref(__hyp_text_start), - kvm_ksym_ref(__hyp_text_end)); + kvm_ksym_ref(__hyp_text_end), PAGE_HYP_EXEC); if (err) { kvm_err("Cannot map world-switch code\n"); goto out_err; } err = create_hyp_mappings(kvm_ksym_ref(__start_rodata), - kvm_ksym_ref(__end_rodata)); + kvm_ksym_ref(__end_rodata), PAGE_HYP_RO); if (err) { kvm_err("Cannot map rodata section\n"); goto out_err; @@ -1311,7 +1304,8 @@ static int init_hyp_mode(void) */ for_each_possible_cpu(cpu) { char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu); - err = create_hyp_mappings(stack_page, stack_page + PAGE_SIZE); + err = create_hyp_mappings(stack_page, stack_page + PAGE_SIZE, + PAGE_HYP); if (err) { kvm_err("Cannot map hyp stack\n"); @@ -1323,7 +1317,7 @@ static int init_hyp_mode(void) kvm_cpu_context_t *cpu_ctxt; cpu_ctxt = per_cpu_ptr(kvm_host_cpu_state, cpu); - err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1); + err = create_hyp_mappings(cpu_ctxt, cpu_ctxt + 1, PAGE_HYP); if (err) { kvm_err("Cannot map host CPU state: %d\n", err); @@ -1331,10 +1325,6 @@ static int init_hyp_mode(void) } } -#ifndef CONFIG_HOTPLUG_CPU - free_boot_hyp_pgd(); -#endif - /* set size of VMID supported by CPU */ kvm_vmid_bits = kvm_get_vmid_bits(); kvm_info("%d-bit VMID\n", kvm_vmid_bits); diff --git a/arch/arm/kvm/init.S b/arch/arm/kvm/init.S index 1f9ae17..bf89c91 100644 --- a/arch/arm/kvm/init.S +++ b/arch/arm/kvm/init.S @@ -32,23 +32,13 @@ * r2,r3 = Hypervisor pgd pointer * * The init scenario is: - * - We jump in HYP with four parameters: boot HYP pgd, runtime HYP pgd, - * runtime stack, runtime vectors - * - Enable the MMU with the boot pgd - * - Jump to a target into the trampoline page (remember, this is the same - * physical page!) - * - Now switch to the runtime pgd (same VA, and still the same physical - * page!) + * - We jump in HYP with 3 parameters: runtime HYP pgd, runtime stack, + * runtime vectors * - Invalidate TLBs * - Set stack and vectors + * - Setup the page tables + * - Enable the MMU * - Profit! (or eret, if you only care about the code). - * - * As we only have four registers available to pass parameters (and we - * need six), we split the init in two phases: - * - Phase 1: r0 = 0, r1 = 0, r2,r3 contain the boot PGD. - * Provides the basic HYP init, and enable the MMU. - * - Phase 2: r0 = ToS, r1 = vectors, r2,r3 contain the runtime PGD. - * Switches to the runtime PGD, set stack and vectors. */ .text @@ -68,8 +58,11 @@ __kvm_hyp_init: W(b) . __do_hyp_init: - cmp r0, #0 @ We have a SP? - bne phase2 @ Yes, second stage init + @ Set stack pointer + mov sp, r0 + + @ Set HVBAR to point to the HYP vectors + mcr p15, 4, r1, c12, c0, 0 @ HVBAR @ Set the HTTBR to point to the hypervisor PGD pointer passed mcrr p15, 4, rr_lo_hi(r2, r3), c2 @@ -114,34 +107,25 @@ __do_hyp_init: THUMB( ldr r2, =(HSCTLR_M | HSCTLR_A | HSCTLR_TE) ) orr r1, r1, r2 orr r0, r0, r1 - isb mcr p15, 4, r0, c1, c0, 0 @ HSCR + isb - @ End of init phase-1 eret -phase2: - @ Set stack pointer - mov sp, r0 - - @ Set HVBAR to point to the HYP vectors - mcr p15, 4, r1, c12, c0, 0 @ HVBAR - - @ Jump to the trampoline page - ldr r0, =TRAMPOLINE_VA - adr r1, target - bfi r0, r1, #0, #PAGE_SHIFT - ret r0 + @ r0 : stub vectors address +ENTRY(__kvm_hyp_reset) + /* We're now in idmap, disable MMU */ + mrc p15, 4, r1, c1, c0, 0 @ HSCTLR + ldr r2, =(HSCTLR_M | HSCTLR_A | HSCTLR_C | HSCTLR_I) + bic r1, r1, r2 + mcr p15, 4, r1, c1, c0, 0 @ HSCTLR -target: @ We're now in the trampoline code, switch page tables - mcrr p15, 4, rr_lo_hi(r2, r3), c2 + /* Install stub vectors */ + mcr p15, 4, r0, c12, c0, 0 @ HVBAR isb - @ Invalidate the old TLBs - mcr p15, 4, r0, c8, c7, 0 @ TLBIALLH - dsb ish - eret +ENDPROC(__kvm_hyp_reset) .ltorg diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 45c43ae..bda27b6 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -32,8 +32,6 @@ #include "trace.h" -extern char __hyp_idmap_text_start[], __hyp_idmap_text_end[]; - static pgd_t *boot_hyp_pgd; static pgd_t *hyp_pgd; static pgd_t *merged_hyp_pgd; @@ -484,28 +482,6 @@ static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size) } /** - * free_boot_hyp_pgd - free HYP boot page tables - * - * Free the HYP boot page tables. The bounce page is also freed. - */ -void free_boot_hyp_pgd(void) -{ - mutex_lock(&kvm_hyp_pgd_mutex); - - if (boot_hyp_pgd) { - unmap_hyp_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); - unmap_hyp_range(boot_hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); - free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order); - boot_hyp_pgd = NULL; - } - - if (hyp_pgd) - unmap_hyp_range(hyp_pgd, TRAMPOLINE_VA, PAGE_SIZE); - - mutex_unlock(&kvm_hyp_pgd_mutex); -} - -/** * free_hyp_pgds - free Hyp-mode page tables * * Assumes hyp_pgd is a page table used strictly in Hyp-mode and @@ -519,15 +495,20 @@ void free_hyp_pgds(void) { unsigned long addr; - free_boot_hyp_pgd(); - mutex_lock(&kvm_hyp_pgd_mutex); + if (boot_hyp_pgd) { + unmap_hyp_range(boot_hyp_pgd, hyp_idmap_start, PAGE_SIZE); + free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order); + boot_hyp_pgd = NULL; + } + if (hyp_pgd) { + unmap_hyp_range(hyp_pgd, hyp_idmap_start, PAGE_SIZE); for (addr = PAGE_OFFSET; virt_addr_valid(addr); addr += PGDIR_SIZE) - unmap_hyp_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); + unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE); for (addr = VMALLOC_START; is_vmalloc_addr((void*)addr); addr += PGDIR_SIZE) - unmap_hyp_range(hyp_pgd, KERN_TO_HYP(addr), PGDIR_SIZE); + unmap_hyp_range(hyp_pgd, kern_hyp_va(addr), PGDIR_SIZE); free_pages((unsigned long)hyp_pgd, hyp_pgd_order); hyp_pgd = NULL; @@ -679,17 +660,18 @@ static phys_addr_t kvm_kaddr_to_phys(void *kaddr) * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode * @from: The virtual kernel start address of the range * @to: The virtual kernel end address of the range (exclusive) + * @prot: The protection to be applied to this range * * The same virtual address as the kernel virtual address is also used * in Hyp-mode mapping (modulo HYP_PAGE_OFFSET) to the same underlying * physical pages. */ -int create_hyp_mappings(void *from, void *to) +int create_hyp_mappings(void *from, void *to, pgprot_t prot) { phys_addr_t phys_addr; unsigned long virt_addr; - unsigned long start = KERN_TO_HYP((unsigned long)from); - unsigned long end = KERN_TO_HYP((unsigned long)to); + unsigned long start = kern_hyp_va((unsigned long)from); + unsigned long end = kern_hyp_va((unsigned long)to); if (is_kernel_in_hyp_mode()) return 0; @@ -704,7 +686,7 @@ int create_hyp_mappings(void *from, void *to) err = __create_hyp_mappings(hyp_pgd, virt_addr, virt_addr + PAGE_SIZE, __phys_to_pfn(phys_addr), - PAGE_HYP); + prot); if (err) return err; } @@ -723,8 +705,8 @@ int create_hyp_mappings(void *from, void *to) */ int create_hyp_io_mappings(void *from, void *to, phys_addr_t phys_addr) { - unsigned long start = KERN_TO_HYP((unsigned long)from); - unsigned long end = KERN_TO_HYP((unsigned long)to); + unsigned long start = kern_hyp_va((unsigned long)from); + unsigned long end = kern_hyp_va((unsigned long)to); if (is_kernel_in_hyp_mode()) return 0; @@ -1687,14 +1669,6 @@ phys_addr_t kvm_mmu_get_httbr(void) return virt_to_phys(hyp_pgd); } -phys_addr_t kvm_mmu_get_boot_httbr(void) -{ - if (__kvm_cpu_uses_extended_idmap()) - return virt_to_phys(merged_hyp_pgd); - else - return virt_to_phys(boot_hyp_pgd); -} - phys_addr_t kvm_get_idmap_vector(void) { return hyp_idmap_vector; @@ -1705,6 +1679,22 @@ phys_addr_t kvm_get_idmap_start(void) return hyp_idmap_start; } +static int kvm_map_idmap_text(pgd_t *pgd) +{ + int err; + + /* Create the idmap in the boot page tables */ + err = __create_hyp_mappings(pgd, + hyp_idmap_start, hyp_idmap_end, + __phys_to_pfn(hyp_idmap_start), + PAGE_HYP_EXEC); + if (err) + kvm_err("Failed to idmap %lx-%lx\n", + hyp_idmap_start, hyp_idmap_end); + + return err; +} + int kvm_mmu_init(void) { int err; @@ -1719,28 +1709,41 @@ int kvm_mmu_init(void) */ BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK); - hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order); - boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order); + kvm_info("IDMAP page: %lx\n", hyp_idmap_start); + kvm_info("HYP VA range: %lx:%lx\n", + kern_hyp_va(PAGE_OFFSET), kern_hyp_va(~0UL)); - if (!hyp_pgd || !boot_hyp_pgd) { - kvm_err("Hyp mode PGD not allocated\n"); - err = -ENOMEM; + if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) && + hyp_idmap_start < kern_hyp_va(~0UL)) { + /* + * The idmap page is intersecting with the VA space, + * it is not safe to continue further. + */ + kvm_err("IDMAP intersecting with HYP VA, unable to continue\n"); + err = -EINVAL; goto out; } - /* Create the idmap in the boot page tables */ - err = __create_hyp_mappings(boot_hyp_pgd, - hyp_idmap_start, hyp_idmap_end, - __phys_to_pfn(hyp_idmap_start), - PAGE_HYP); - - if (err) { - kvm_err("Failed to idmap %lx-%lx\n", - hyp_idmap_start, hyp_idmap_end); + hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order); + if (!hyp_pgd) { + kvm_err("Hyp mode PGD not allocated\n"); + err = -ENOMEM; goto out; } if (__kvm_cpu_uses_extended_idmap()) { + boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + hyp_pgd_order); + if (!boot_hyp_pgd) { + kvm_err("Hyp boot PGD not allocated\n"); + err = -ENOMEM; + goto out; + } + + err = kvm_map_idmap_text(boot_hyp_pgd); + if (err) + goto out; + merged_hyp_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); if (!merged_hyp_pgd) { kvm_err("Failed to allocate extra HYP pgd\n"); @@ -1748,29 +1751,10 @@ int kvm_mmu_init(void) } __kvm_extend_hypmap(boot_hyp_pgd, hyp_pgd, merged_hyp_pgd, hyp_idmap_start); - return 0; - } - - /* Map the very same page at the trampoline VA */ - err = __create_hyp_mappings(boot_hyp_pgd, - TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE, - __phys_to_pfn(hyp_idmap_start), - PAGE_HYP); - if (err) { - kvm_err("Failed to map trampoline @%lx into boot HYP pgd\n", - TRAMPOLINE_VA); - goto out; - } - - /* Map the same page again into the runtime page tables */ - err = __create_hyp_mappings(hyp_pgd, - TRAMPOLINE_VA, TRAMPOLINE_VA + PAGE_SIZE, - __phys_to_pfn(hyp_idmap_start), - PAGE_HYP); - if (err) { - kvm_err("Failed to map trampoline @%lx into runtime HYP pgd\n", - TRAMPOLINE_VA); - goto out; + } else { + err = kvm_map_idmap_text(hyp_pgd); + if (err) + goto out; } return 0; diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 224efe7..d40edbb 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -36,8 +36,9 @@ #define ARM64_HAS_VIRT_HOST_EXTN 11 #define ARM64_WORKAROUND_CAVIUM_27456 12 #define ARM64_HAS_32BIT_EL0 13 +#define ARM64_HYP_OFFSET_LOW 14 -#define ARM64_NCAPS 14 +#define ARM64_NCAPS 15 #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 49095fc..3eda975 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -47,8 +47,7 @@ int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); -int kvm_arch_dev_ioctl_check_extension(long ext); -unsigned long kvm_hyp_reset_entry(void); +int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext); void __extended_idmap_trampoline(phys_addr_t boot_pgd, phys_addr_t idmap_start); struct kvm_arch { @@ -348,8 +347,7 @@ int kvm_perf_teardown(void); struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr); -static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr, - phys_addr_t pgd_ptr, +static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, unsigned long hyp_stack_ptr, unsigned long vector_ptr) { @@ -357,19 +355,14 @@ static inline void __cpu_init_hyp_mode(phys_addr_t boot_pgd_ptr, * Call initialization code, and switch to the full blown * HYP code. */ - __kvm_call_hyp((void *)boot_pgd_ptr, pgd_ptr, - hyp_stack_ptr, vector_ptr); + __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr); } -static inline void __cpu_reset_hyp_mode(phys_addr_t boot_pgd_ptr, +void __kvm_hyp_teardown(void); +static inline void __cpu_reset_hyp_mode(unsigned long vector_ptr, phys_addr_t phys_idmap_start) { - /* - * Call reset code, and switch back to stub hyp vectors. - * Uses __kvm_call_hyp() to avoid kaslr's kvm_ksym_ref() translation. - */ - __kvm_call_hyp((void *)kvm_hyp_reset_entry(), - boot_pgd_ptr, phys_idmap_start); + kvm_call_hyp(__kvm_hyp_teardown, phys_idmap_start); } static inline void kvm_arch_hardware_unsetup(void) {} diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 44eaff7..cff5105 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -25,29 +25,6 @@ #define __hyp_text __section(.hyp.text) notrace -static inline unsigned long __kern_hyp_va(unsigned long v) -{ - asm volatile(ALTERNATIVE("and %0, %0, %1", - "nop", - ARM64_HAS_VIRT_HOST_EXTN) - : "+r" (v) : "i" (HYP_PAGE_OFFSET_MASK)); - return v; -} - -#define kern_hyp_va(v) (typeof(v))(__kern_hyp_va((unsigned long)(v))) - -static inline unsigned long __hyp_kern_va(unsigned long v) -{ - u64 offset = PAGE_OFFSET - HYP_PAGE_OFFSET; - asm volatile(ALTERNATIVE("add %0, %0, %1", - "nop", - ARM64_HAS_VIRT_HOST_EXTN) - : "+r" (v) : "r" (offset)); - return v; -} - -#define hyp_kern_va(v) (typeof(v))(__hyp_kern_va((unsigned long)(v))) - #define read_sysreg_elx(r,nvh,vh) \ ({ \ u64 reg; \ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index f05ac27..b6bb834 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -29,21 +29,48 @@ * * Instead, give the HYP mode its own VA region at a fixed offset from * the kernel by just masking the top bits (which are all ones for a - * kernel address). + * kernel address). We need to find out how many bits to mask. * - * ARMv8.1 (using VHE) does have a TTBR1_EL2, and doesn't use these - * macros (the entire kernel runs at EL2). + * We want to build a set of page tables that cover both parts of the + * idmap (the trampoline page used to initialize EL2), and our normal + * runtime VA space, at the same time. + * + * Given that the kernel uses VA_BITS for its entire address space, + * and that half of that space (VA_BITS - 1) is used for the linear + * mapping, we can also limit the EL2 space to (VA_BITS - 1). + * + * The main question is "Within the VA_BITS space, does EL2 use the + * top or the bottom half of that space to shadow the kernel's linear + * mapping?". As we need to idmap the trampoline page, this is + * determined by the range in which this page lives. + * + * If the page is in the bottom half, we have to use the top half. If + * the page is in the top half, we have to use the bottom half: + * + * T = __virt_to_phys(__hyp_idmap_text_start) + * if (T & BIT(VA_BITS - 1)) + * HYP_VA_MIN = 0 //idmap in upper half + * else + * HYP_VA_MIN = 1 << (VA_BITS - 1) + * HYP_VA_MAX = HYP_VA_MIN + (1 << (VA_BITS - 1)) - 1 + * + * This of course assumes that the trampoline page exists within the + * VA_BITS range. If it doesn't, then it means we're in the odd case + * where the kernel idmap (as well as HYP) uses more levels than the + * kernel runtime page tables (as seen when the kernel is configured + * for 4k pages, 39bits VA, and yet memory lives just above that + * limit, forcing the idmap to use 4 levels of page tables while the + * kernel itself only uses 3). In this particular case, it doesn't + * matter which side of VA_BITS we use, as we're guaranteed not to + * conflict with anything. + * + * When using VHE, there are no separate hyp mappings and all KVM + * functionality is already mapped as part of the main kernel + * mappings, and none of this applies in that case. */ -#define HYP_PAGE_OFFSET_SHIFT VA_BITS -#define HYP_PAGE_OFFSET_MASK ((UL(1) << HYP_PAGE_OFFSET_SHIFT) - 1) -#define HYP_PAGE_OFFSET (PAGE_OFFSET & HYP_PAGE_OFFSET_MASK) -/* - * Our virtual mapping for the idmap-ed MMU-enable code. Must be - * shared across all the page-tables. Conveniently, we use the last - * possible page, where no kernel mapping will ever exist. - */ -#define TRAMPOLINE_VA (HYP_PAGE_OFFSET_MASK & PAGE_MASK) +#define HYP_PAGE_OFFSET_HIGH_MASK ((UL(1) << VA_BITS) - 1) +#define HYP_PAGE_OFFSET_LOW_MASK ((UL(1) << (VA_BITS - 1)) - 1) #ifdef __ASSEMBLY__ @@ -53,13 +80,33 @@ /* * Convert a kernel VA into a HYP VA. * reg: VA to be converted. + * + * This generates the following sequences: + * - High mask: + * and x0, x0, #HYP_PAGE_OFFSET_HIGH_MASK + * nop + * - Low mask: + * and x0, x0, #HYP_PAGE_OFFSET_HIGH_MASK + * and x0, x0, #HYP_PAGE_OFFSET_LOW_MASK + * - VHE: + * nop + * nop + * + * The "low mask" version works because the mask is a strict subset of + * the "high mask", hence performing the first mask for nothing. + * Should be completely invisible on any viable CPU. */ .macro kern_hyp_va reg -alternative_if_not ARM64_HAS_VIRT_HOST_EXTN - and \reg, \reg, #HYP_PAGE_OFFSET_MASK +alternative_if_not ARM64_HAS_VIRT_HOST_EXTN + and \reg, \reg, #HYP_PAGE_OFFSET_HIGH_MASK alternative_else nop alternative_endif +alternative_if_not ARM64_HYP_OFFSET_LOW + nop +alternative_else + and \reg, \reg, #HYP_PAGE_OFFSET_LOW_MASK +alternative_endif .endm #else @@ -70,7 +117,22 @@ alternative_endif #include <asm/mmu_context.h> #include <asm/pgtable.h> -#define KERN_TO_HYP(kva) ((unsigned long)kva - PAGE_OFFSET + HYP_PAGE_OFFSET) +static inline unsigned long __kern_hyp_va(unsigned long v) +{ + asm volatile(ALTERNATIVE("and %0, %0, %1", + "nop", + ARM64_HAS_VIRT_HOST_EXTN) + : "+r" (v) + : "i" (HYP_PAGE_OFFSET_HIGH_MASK)); + asm volatile(ALTERNATIVE("nop", + "and %0, %0, %1", + ARM64_HYP_OFFSET_LOW) + : "+r" (v) + : "i" (HYP_PAGE_OFFSET_LOW_MASK)); + return v; +} + +#define kern_hyp_va(v) (typeof(v))(__kern_hyp_va((unsigned long)(v))) /* * We currently only support a 40bit IPA. @@ -81,9 +143,8 @@ alternative_endif #include <asm/stage2_pgtable.h> -int create_hyp_mappings(void *from, void *to); +int create_hyp_mappings(void *from, void *to, pgprot_t prot); int create_hyp_io_mappings(void *from, void *to, phys_addr_t); -void free_boot_hyp_pgd(void); void free_hyp_pgds(void); void stage2_unmap_vm(struct kvm *kvm); @@ -97,7 +158,6 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run); void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu); phys_addr_t kvm_mmu_get_httbr(void); -phys_addr_t kvm_mmu_get_boot_httbr(void); phys_addr_t kvm_get_idmap_vector(void); phys_addr_t kvm_get_idmap_start(void); int kvm_mmu_init(void); diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 2813748..c3ae239 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -164,6 +164,7 @@ #define PTE_CONT (_AT(pteval_t, 1) << 52) /* Contiguous range */ #define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */ #define PTE_UXN (_AT(pteval_t, 1) << 54) /* User XN */ +#define PTE_HYP_XN (_AT(pteval_t, 1) << 54) /* HYP XN */ /* * AttrIndx[2:0] encoding (mapping attributes defined in the MAIR* registers). diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 29fcb33..39f5252 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -55,7 +55,9 @@ #define PAGE_KERNEL_EXEC __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE) #define PAGE_KERNEL_EXEC_CONT __pgprot(_PAGE_DEFAULT | PTE_UXN | PTE_DIRTY | PTE_WRITE | PTE_CONT) -#define PAGE_HYP __pgprot(_PAGE_DEFAULT | PTE_HYP) +#define PAGE_HYP __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_HYP_XN) +#define PAGE_HYP_EXEC __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY) +#define PAGE_HYP_RO __pgprot(_PAGE_DEFAULT | PTE_HYP | PTE_RDONLY | PTE_HYP_XN) #define PAGE_HYP_DEVICE __pgprot(PROT_DEVICE_nGnRE | PTE_HYP) #define PAGE_S2 __pgprot(PROT_DEFAULT | PTE_S2_MEMATTR(MT_S2_NORMAL) | PTE_S2_RDONLY) diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index dcbcf8d..88aa8ec 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -82,6 +82,10 @@ extern void verify_cpu_run_el(void); static inline void verify_cpu_run_el(void) {} #endif +/* The section containing the hypervisor idmap text */ +extern char __hyp_idmap_text_start[]; +extern char __hyp_idmap_text_end[]; + /* The section containing the hypervisor text */ extern char __hyp_text_start[]; extern char __hyp_text_end[]; diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index f209ea1..3051f86 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -87,9 +87,11 @@ struct kvm_regs { /* Supported VGICv3 address types */ #define KVM_VGIC_V3_ADDR_TYPE_DIST 2 #define KVM_VGIC_V3_ADDR_TYPE_REDIST 3 +#define KVM_VGIC_ITS_ADDR_TYPE 4 #define KVM_VGIC_V3_DIST_SIZE SZ_64K #define KVM_VGIC_V3_REDIST_SIZE (2 * SZ_64K) +#define KVM_VGIC_V3_ITS_SIZE (2 * SZ_64K) #define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ #define KVM_ARM_VCPU_EL1_32BIT 1 /* CPU running a 32bit VM */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 811773d..ffb3e14 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -726,6 +726,19 @@ static bool runs_at_el2(const struct arm64_cpu_capabilities *entry, int __unused return is_kernel_in_hyp_mode(); } +static bool hyp_offset_low(const struct arm64_cpu_capabilities *entry, + int __unused) +{ + phys_addr_t idmap_addr = virt_to_phys(__hyp_idmap_text_start); + + /* + * Activate the lower HYP offset only if: + * - the idmap doesn't clash with it, + * - the kernel is not running at EL2. + */ + return idmap_addr > GENMASK(VA_BITS - 2, 0) && !is_kernel_in_hyp_mode(); +} + static const struct arm64_cpu_capabilities arm64_features[] = { { .desc = "GIC system register CPU interface", @@ -803,6 +816,12 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .field_pos = ID_AA64PFR0_EL0_SHIFT, .min_field_value = ID_AA64PFR0_EL0_32BIT_64BIT, }, + { + .desc = "Reduced HYP mapping offset", + .capability = ARM64_HYP_OFFSET_LOW, + .def_scope = SCOPE_SYSTEM, + .matches = hyp_offset_low, + }, {}, }; diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index c4f26ef..9d2eff0 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -36,6 +36,7 @@ config KVM select HAVE_KVM_IRQFD select KVM_ARM_VGIC_V3 select KVM_ARM_PMU if HW_PERF_EVENTS + select HAVE_KVM_MSI ---help--- Support hosting virtualized guest machines. We don't support KVM with 16K page tables yet, due to the multiple @@ -54,13 +55,6 @@ config KVM_ARM_PMU Adds support for a virtual Performance Monitoring Unit (PMU) in virtual machines. -config KVM_NEW_VGIC - bool "New VGIC implementation" - depends on KVM - default y - ---help--- - uses the new VGIC implementation - source drivers/vhost/Kconfig endif # VIRTUALIZATION diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index a7a958c..a5b9664 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -20,7 +20,6 @@ kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o -ifeq ($(CONFIG_KVM_NEW_VGIC),y) kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-init.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-irqfd.o @@ -30,12 +29,6 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v2.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v3.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-kvm-device.o -else -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v2-emul.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic-v3-emul.o -endif +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-its.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o diff --git a/arch/arm64/kvm/hyp-init.S b/arch/arm64/kvm/hyp-init.S index a873a6d..6b29d3d 100644 --- a/arch/arm64/kvm/hyp-init.S +++ b/arch/arm64/kvm/hyp-init.S @@ -53,10 +53,9 @@ __invalid: b . /* - * x0: HYP boot pgd - * x1: HYP pgd - * x2: HYP stack - * x3: HYP vectors + * x0: HYP pgd + * x1: HYP stack + * x2: HYP vectors */ __do_hyp_init: @@ -110,71 +109,27 @@ __do_hyp_init: msr sctlr_el2, x4 isb - /* Skip the trampoline dance if we merged the boot and runtime PGDs */ - cmp x0, x1 - b.eq merged - - /* MMU is now enabled. Get ready for the trampoline dance */ - ldr x4, =TRAMPOLINE_VA - adr x5, target - bfi x4, x5, #0, #PAGE_SHIFT - br x4 - -target: /* We're now in the trampoline code, switch page tables */ - msr ttbr0_el2, x1 - isb - - /* Invalidate the old TLBs */ - tlbi alle2 - dsb sy - -merged: /* Set the stack and new vectors */ + kern_hyp_va x1 + mov sp, x1 kern_hyp_va x2 - mov sp, x2 - kern_hyp_va x3 - msr vbar_el2, x3 + msr vbar_el2, x2 /* Hello, World! */ eret ENDPROC(__kvm_hyp_init) /* - * Reset kvm back to the hyp stub. This is the trampoline dance in - * reverse. If kvm used an extended idmap, __extended_idmap_trampoline - * calls this code directly in the idmap. In this case switching to the - * boot tables is a no-op. - * - * x0: HYP boot pgd - * x1: HYP phys_idmap_start + * Reset kvm back to the hyp stub. */ ENTRY(__kvm_hyp_reset) - /* We're in trampoline code in VA, switch back to boot page tables */ - msr ttbr0_el2, x0 - isb - - /* Ensure the PA branch doesn't find a stale tlb entry or stale code. */ - ic iallu - tlbi alle2 - dsb sy - isb - - /* Branch into PA space */ - adr x0, 1f - bfi x1, x0, #0, #PAGE_SHIFT - br x1 - /* We're now in idmap, disable MMU */ -1: mrs x0, sctlr_el2 + mrs x0, sctlr_el2 ldr x1, =SCTLR_ELx_FLAGS bic x0, x0, x1 // Clear SCTL_M and etc msr sctlr_el2, x0 isb - /* Invalidate the old TLBs */ - tlbi alle2 - dsb sy - /* Install stub vectors */ adr_l x0, __hyp_stub_vectors msr vbar_el2, x0 diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index 70254a6..ce9e5e5 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -164,22 +164,3 @@ alternative_endif eret ENDPROC(__fpsimd_guest_restore) - -/* - * When using the extended idmap, we don't have a trampoline page we can use - * while we switch pages tables during __kvm_hyp_reset. Accessing the idmap - * directly would be ideal, but if we're using the extended idmap then the - * idmap is located above HYP_PAGE_OFFSET, and the address will be masked by - * kvm_call_hyp using kern_hyp_va. - * - * x0: HYP boot pgd - * x1: HYP phys_idmap_start - */ -ENTRY(__extended_idmap_trampoline) - mov x4, x1 - adr_l x3, __kvm_hyp_reset - - /* insert __kvm_hyp_reset()s offset into phys_idmap_start */ - bfi x4, x3, #0, #PAGE_SHIFT - br x4 -ENDPROC(__extended_idmap_trampoline) diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 2d87f36..f6d9694 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -62,6 +62,21 @@ ENTRY(__vhe_hyp_call) isb ret ENDPROC(__vhe_hyp_call) + +/* + * Compute the idmap address of __kvm_hyp_reset based on the idmap + * start passed as a parameter, and jump there. + * + * x0: HYP phys_idmap_start + */ +ENTRY(__kvm_hyp_teardown) + mov x4, x0 + adr_l x3, __kvm_hyp_reset + + /* insert __kvm_hyp_reset()s offset into phys_idmap_start */ + bfi x4, x3, #0, #PAGE_SHIFT + br x4 +ENDPROC(__kvm_hyp_teardown) el1_sync: // Guest trapped into EL2 save_x0_to_x3 diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 437cfad..81f21a2 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -299,9 +299,16 @@ static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:% static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par) { - unsigned long str_va = (unsigned long)__hyp_panic_string; + unsigned long str_va; - __hyp_do_panic(hyp_kern_va(str_va), + /* + * Force the panic string to be loaded from the literal pool, + * making sure it is a kernel address and not a PC-relative + * reference. + */ + asm volatile("ldr %0, =__hyp_panic_string" : "=r" (str_va)); + + __hyp_do_panic(str_va, spsr, elr, read_sysreg(esr_el2), read_sysreg_el2(far), read_sysreg(hpfar_el2), par, diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 7be24f2..5bc4608 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -65,7 +65,7 @@ static bool cpu_has_32bit_el1(void) * We currently assume that the number of HW registers is uniform * across all CPUs (see cpuinfo_sanity_check). */ -int kvm_arch_dev_ioctl_check_extension(long ext) +int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext) { int r; @@ -86,6 +86,12 @@ int kvm_arch_dev_ioctl_check_extension(long ext) case KVM_CAP_VCPU_ATTRIBUTES: r = 1; break; + case KVM_CAP_MSI_DEVID: + if (!kvm) + r = -EINVAL; + else + r = kvm->arch.vgic.msis_require_devid; + break; default: r = 0; } @@ -132,31 +138,3 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) /* Reset timer */ return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq); } - -extern char __hyp_idmap_text_start[]; - -unsigned long kvm_hyp_reset_entry(void) -{ - if (!__kvm_cpu_uses_extended_idmap()) { - unsigned long offset; - - /* - * Find the address of __kvm_hyp_reset() in the trampoline page. - * This is present in the running page tables, and the boot page - * tables, so we call the code here to start the trampoline - * dance in reverse. - */ - offset = (unsigned long)__kvm_hyp_reset - - ((unsigned long)__hyp_idmap_text_start & PAGE_MASK); - - return TRAMPOLINE_VA + offset; - } else { - /* - * KVM is running with merged page tables, which don't have the - * trampoline page mapped. We know the idmap is still mapped, - * but can't be called into directly. Use - * __extended_idmap_trampoline to do the call. - */ - return (unsigned long)kvm_ksym_ref(__extended_idmap_trampoline); - } -} diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index a57d650..b0b225c 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1546,7 +1546,7 @@ static void unhandled_cp_access(struct kvm_vcpu *vcpu, struct sys_reg_params *params) { u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu); - int cp; + int cp = -1; switch(hsr_ec) { case ESR_ELx_EC_CP15_32: @@ -1558,7 +1558,7 @@ static void unhandled_cp_access(struct kvm_vcpu *vcpu, cp = 14; break; default: - WARN_ON((cp = -1)); + WARN_ON(1); } kvm_err("Unsupported guest CP%d access at: %08lx\n", diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index da0a5248..540da51 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -1,6 +1,5 @@ /* - * Copyright (C) 2012 ARM Ltd. - * Author: Marc Zyngier <marc.zyngier@arm.com> + * Copyright (C) 2015, 2016 ARM Ltd. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -12,16 +11,10 @@ * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program. If not, see <http://www.gnu.org/licenses/>. */ - -#ifndef __ASM_ARM_KVM_VGIC_H -#define __ASM_ARM_KVM_VGIC_H - -#ifdef CONFIG_KVM_NEW_VGIC -#include <kvm/vgic/vgic.h> -#else +#ifndef __KVM_ARM_VGIC_H +#define __KVM_ARM_VGIC_H #include <linux/kernel.h> #include <linux/kvm.h> @@ -29,248 +22,187 @@ #include <linux/spinlock.h> #include <linux/types.h> #include <kvm/iodev.h> -#include <linux/irqchip/arm-gic-common.h> +#include <linux/list.h> -#define VGIC_NR_IRQS_LEGACY 256 +#define VGIC_V3_MAX_CPUS 255 +#define VGIC_V2_MAX_CPUS 8 +#define VGIC_NR_IRQS_LEGACY 256 #define VGIC_NR_SGIS 16 #define VGIC_NR_PPIS 16 #define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS) +#define VGIC_MAX_PRIVATE (VGIC_NR_PRIVATE_IRQS - 1) +#define VGIC_MAX_SPI 1019 +#define VGIC_MAX_RESERVED 1023 +#define VGIC_MIN_LPI 8192 -#define VGIC_V2_MAX_LRS (1 << 6) -#define VGIC_V3_MAX_LRS 16 -#define VGIC_MAX_IRQS 1024 -#define VGIC_V2_MAX_CPUS 8 -#define VGIC_V3_MAX_CPUS 255 +enum vgic_type { + VGIC_V2, /* Good ol' GICv2 */ + VGIC_V3, /* New fancy GICv3 */ +}; -#if (VGIC_NR_IRQS_LEGACY & 31) -#error "VGIC_NR_IRQS must be a multiple of 32" -#endif +/* same for all guests, as depending only on the _host's_ GIC model */ +struct vgic_global { + /* type of the host GIC */ + enum vgic_type type; -#if (VGIC_NR_IRQS_LEGACY > VGIC_MAX_IRQS) -#error "VGIC_NR_IRQS must be <= 1024" -#endif + /* Physical address of vgic virtual cpu interface */ + phys_addr_t vcpu_base; -/* - * The GIC distributor registers describing interrupts have two parts: - * - 32 per-CPU interrupts (SGI + PPI) - * - a bunch of shared interrupts (SPI) - */ -struct vgic_bitmap { - /* - * - One UL per VCPU for private interrupts (assumes UL is at - * least 32 bits) - * - As many UL as necessary for shared interrupts. - * - * The private interrupts are accessed via the "private" - * field, one UL per vcpu (the state for vcpu n is in - * private[n]). The shared interrupts are accessed via the - * "shared" pointer (IRQn state is at bit n-32 in the bitmap). - */ - unsigned long *private; - unsigned long *shared; -}; + /* virtual control interface mapping */ + void __iomem *vctrl_base; -struct vgic_bytemap { - /* - * - 8 u32 per VCPU for private interrupts - * - As many u32 as necessary for shared interrupts. - * - * The private interrupts are accessed via the "private" - * field, (the state for vcpu n is in private[n*8] to - * private[n*8 + 7]). The shared interrupts are accessed via - * the "shared" pointer (IRQn state is at byte (n-32)%4 of the - * shared[(n-32)/4] word). - */ - u32 *private; - u32 *shared; -}; + /* Number of implemented list registers */ + int nr_lr; -struct kvm_vcpu; + /* Maintenance IRQ number */ + unsigned int maint_irq; -enum vgic_type { - VGIC_V2, /* Good ol' GICv2 */ - VGIC_V3, /* New fancy GICv3 */ + /* maximum number of VCPUs allowed (GICv2 limits us to 8) */ + int max_gic_vcpus; + + /* Only needed for the legacy KVM_CREATE_IRQCHIP */ + bool can_emulate_gicv2; }; -#define LR_STATE_PENDING (1 << 0) -#define LR_STATE_ACTIVE (1 << 1) -#define LR_STATE_MASK (3 << 0) -#define LR_EOI_INT (1 << 2) -#define LR_HW (1 << 3) +extern struct vgic_global kvm_vgic_global_state; -struct vgic_lr { - unsigned irq:10; - union { - unsigned hwirq:10; - unsigned source:3; - }; - unsigned state:4; -}; +#define VGIC_V2_MAX_LRS (1 << 6) +#define VGIC_V3_MAX_LRS 16 +#define VGIC_V3_LR_INDEX(lr) (VGIC_V3_MAX_LRS - 1 - lr) -struct vgic_vmcr { - u32 ctlr; - u32 abpr; - u32 bpr; - u32 pmr; +enum vgic_irq_config { + VGIC_CONFIG_EDGE = 0, + VGIC_CONFIG_LEVEL }; -struct vgic_ops { - struct vgic_lr (*get_lr)(const struct kvm_vcpu *, int); - void (*set_lr)(struct kvm_vcpu *, int, struct vgic_lr); - u64 (*get_elrsr)(const struct kvm_vcpu *vcpu); - u64 (*get_eisr)(const struct kvm_vcpu *vcpu); - void (*clear_eisr)(struct kvm_vcpu *vcpu); - u32 (*get_interrupt_status)(const struct kvm_vcpu *vcpu); - void (*enable_underflow)(struct kvm_vcpu *vcpu); - void (*disable_underflow)(struct kvm_vcpu *vcpu); - void (*get_vmcr)(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); - void (*set_vmcr)(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); - void (*enable)(struct kvm_vcpu *vcpu); +struct vgic_irq { + spinlock_t irq_lock; /* Protects the content of the struct */ + struct list_head lpi_list; /* Used to link all LPIs together */ + struct list_head ap_list; + + struct kvm_vcpu *vcpu; /* SGIs and PPIs: The VCPU + * SPIs and LPIs: The VCPU whose ap_list + * this is queued on. + */ + + struct kvm_vcpu *target_vcpu; /* The VCPU that this interrupt should + * be sent to, as a result of the + * targets reg (v2) or the + * affinity reg (v3). + */ + + u32 intid; /* Guest visible INTID */ + bool pending; + bool line_level; /* Level only */ + bool soft_pending; /* Level only */ + bool active; /* not used for LPIs */ + bool enabled; + bool hw; /* Tied to HW IRQ */ + struct kref refcount; /* Used for LPIs */ + u32 hwintid; /* HW INTID number */ + union { + u8 targets; /* GICv2 target VCPUs mask */ + u32 mpidr; /* GICv3 target VCPU */ + }; + u8 source; /* GICv2 SGIs only */ + u8 priority; + enum vgic_irq_config config; /* Level or edge */ }; -struct vgic_params { - /* vgic type */ - enum vgic_type type; - /* Physical address of vgic virtual cpu interface */ - phys_addr_t vcpu_base; - /* Number of list registers */ - u32 nr_lr; - /* Interrupt number */ - unsigned int maint_irq; - /* Virtual control interface base address */ - void __iomem *vctrl_base; - int max_gic_vcpus; - /* Only needed for the legacy KVM_CREATE_IRQCHIP */ - bool can_emulate_gicv2; -}; +struct vgic_register_region; +struct vgic_its; -struct vgic_vm_ops { - bool (*queue_sgi)(struct kvm_vcpu *, int irq); - void (*add_sgi_source)(struct kvm_vcpu *, int irq, int source); - int (*init_model)(struct kvm *); - int (*map_resources)(struct kvm *, const struct vgic_params *); +enum iodev_type { + IODEV_CPUIF, + IODEV_DIST, + IODEV_REDIST, + IODEV_ITS }; struct vgic_io_device { - gpa_t addr; - int len; - const struct vgic_io_range *reg_ranges; - struct kvm_vcpu *redist_vcpu; + gpa_t base_addr; + union { + struct kvm_vcpu *redist_vcpu; + struct vgic_its *its; + }; + const struct vgic_register_region *regions; + enum iodev_type iodev_type; + int nr_regions; struct kvm_io_device dev; }; -struct irq_phys_map { - u32 virt_irq; - u32 phys_irq; -}; - -struct irq_phys_map_entry { - struct list_head entry; - struct rcu_head rcu; - struct irq_phys_map map; +struct vgic_its { + /* The base address of the ITS control register frame */ + gpa_t vgic_its_base; + + bool enabled; + bool initialized; + struct vgic_io_device iodev; + struct kvm_device *dev; + + /* These registers correspond to GITS_BASER{0,1} */ + u64 baser_device_table; + u64 baser_coll_table; + + /* Protects the command queue */ + struct mutex cmd_lock; + u64 cbaser; + u32 creadr; + u32 cwriter; + + /* Protects the device and collection lists */ + struct mutex its_lock; + struct list_head device_list; + struct list_head collection_list; }; struct vgic_dist { - spinlock_t lock; bool in_kernel; bool ready; + bool initialized; /* vGIC model the kernel emulates for the guest (GICv2 or GICv3) */ u32 vgic_model; - int nr_cpus; - int nr_irqs; + /* Do injected MSIs require an additional device ID? */ + bool msis_require_devid; + + int nr_spis; + /* TODO: Consider moving to global state */ /* Virtual control interface mapping */ void __iomem *vctrl_base; - /* Distributor and vcpu interface mapping in the guest */ - phys_addr_t vgic_dist_base; - /* GICv2 and GICv3 use different mapped register blocks */ + /* base addresses in guest physical address space: */ + gpa_t vgic_dist_base; /* distributor */ union { - phys_addr_t vgic_cpu_base; - phys_addr_t vgic_redist_base; + /* either a GICv2 CPU interface */ + gpa_t vgic_cpu_base; + /* or a number of GICv3 redistributor regions */ + gpa_t vgic_redist_base; }; - /* Distributor enabled */ - u32 enabled; - - /* Interrupt enabled (one bit per IRQ) */ - struct vgic_bitmap irq_enabled; - - /* Level-triggered interrupt external input is asserted */ - struct vgic_bitmap irq_level; - - /* - * Interrupt state is pending on the distributor - */ - struct vgic_bitmap irq_pending; - - /* - * Tracks writes to GICD_ISPENDRn and GICD_ICPENDRn for level-triggered - * interrupts. Essentially holds the state of the flip-flop in - * Figure 4-10 on page 4-101 in ARM IHI 0048B.b. - * Once set, it is only cleared for level-triggered interrupts on - * guest ACKs (when we queue it) or writes to GICD_ICPENDRn. - */ - struct vgic_bitmap irq_soft_pend; - - /* Level-triggered interrupt queued on VCPU interface */ - struct vgic_bitmap irq_queued; - - /* Interrupt was active when unqueue from VCPU interface */ - struct vgic_bitmap irq_active; - - /* Interrupt priority. Not used yet. */ - struct vgic_bytemap irq_priority; + /* distributor enabled */ + bool enabled; - /* Level/edge triggered */ - struct vgic_bitmap irq_cfg; + struct vgic_irq *spis; - /* - * Source CPU per SGI and target CPU: - * - * Each byte represent a SGI observable on a VCPU, each bit of - * this byte indicating if the corresponding VCPU has - * generated this interrupt. This is a GICv2 feature only. - * - * For VCPUn (n < 8), irq_sgi_sources[n*16] to [n*16 + 15] are - * the SGIs observable on VCPUn. - */ - u8 *irq_sgi_sources; + struct vgic_io_device dist_iodev; - /* - * Target CPU for each SPI: - * - * Array of available SPI, each byte indicating the target - * VCPU for SPI. IRQn (n >=32) is at irq_spi_cpu[n-32]. - */ - u8 *irq_spi_cpu; + bool has_its; /* - * Reverse lookup of irq_spi_cpu for faster compute pending: - * - * Array of bitmaps, one per VCPU, describing if IRQn is - * routed to a particular VCPU. + * Contains the attributes and gpa of the LPI configuration table. + * Since we report GICR_TYPER.CommonLPIAff as 0b00, we can share + * one address across all redistributors. + * GICv3 spec: 6.1.2 "LPI Configuration tables" */ - struct vgic_bitmap *irq_spi_target; - - /* Target MPIDR for each IRQ (needed for GICv3 IROUTERn) only */ - u32 *irq_spi_mpidr; + u64 propbaser; - /* Bitmap indicating which CPU has something pending */ - unsigned long *irq_pending_on_cpu; - - /* Bitmap indicating which CPU has active IRQs */ - unsigned long *irq_active_on_cpu; - - struct vgic_vm_ops vm_ops; - struct vgic_io_device dist_iodev; - struct vgic_io_device *redist_iodevs; - - /* Virtual irq to hwirq mapping */ - spinlock_t irq_phys_map_lock; - struct list_head irq_phys_map_list; + /* Protects the lpi_list and the count value below. */ + spinlock_t lpi_list_lock; + struct list_head lpi_list_head; + int lpi_list_count; }; struct vgic_v2_cpu_if { @@ -298,78 +230,88 @@ struct vgic_v3_cpu_if { }; struct vgic_cpu { - /* Pending/active/both interrupts on this VCPU */ - DECLARE_BITMAP(pending_percpu, VGIC_NR_PRIVATE_IRQS); - DECLARE_BITMAP(active_percpu, VGIC_NR_PRIVATE_IRQS); - DECLARE_BITMAP(pend_act_percpu, VGIC_NR_PRIVATE_IRQS); - - /* Pending/active/both shared interrupts, dynamically sized */ - unsigned long *pending_shared; - unsigned long *active_shared; - unsigned long *pend_act_shared; - /* CPU vif control registers for world switch */ union { struct vgic_v2_cpu_if vgic_v2; struct vgic_v3_cpu_if vgic_v3; }; - /* Protected by the distributor's irq_phys_map_lock */ - struct list_head irq_phys_map_list; + unsigned int used_lrs; + struct vgic_irq private_irqs[VGIC_NR_PRIVATE_IRQS]; - u64 live_lrs; -}; + spinlock_t ap_list_lock; /* Protects the ap_list */ + + /* + * List of IRQs that this VCPU should consider because they are either + * Active or Pending (hence the name; AP list), or because they recently + * were one of the two and need to be migrated off this list to another + * VCPU. + */ + struct list_head ap_list_head; -#define LR_EMPTY 0xff + u64 live_lrs; -#define INT_STATUS_EOI (1 << 0) -#define INT_STATUS_UNDERFLOW (1 << 1) + /* + * Members below are used with GICv3 emulation only and represent + * parts of the redistributor. + */ + struct vgic_io_device rd_iodev; + struct vgic_io_device sgi_iodev; -struct kvm; -struct kvm_vcpu; + /* Contains the attributes and gpa of the LPI pending tables. */ + u64 pendbaser; + + bool lpis_enabled; +}; int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write); -int kvm_vgic_hyp_init(void); -int kvm_vgic_map_resources(struct kvm *kvm); -int kvm_vgic_get_max_vcpus(void); void kvm_vgic_early_init(struct kvm *kvm); int kvm_vgic_create(struct kvm *kvm, u32 type); void kvm_vgic_destroy(struct kvm *kvm); void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu); void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu); -void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); -void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); -int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, +int kvm_vgic_map_resources(struct kvm *kvm); +int kvm_vgic_hyp_init(void); + +int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, bool level); -int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, - unsigned int virt_irq, bool level); -void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg); -int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); -int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, int virt_irq, int phys_irq); +int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, unsigned int intid, + bool level); +int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq); int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq); bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq); +int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); + #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) -#define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus)) +#define vgic_initialized(k) ((k)->arch.vgic.initialized) #define vgic_ready(k) ((k)->arch.vgic.ready) #define vgic_valid_spi(k, i) (((i) >= VGIC_NR_PRIVATE_IRQS) && \ - ((i) < (k)->arch.vgic.nr_irqs)) + ((i) < (k)->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)) + +bool kvm_vcpu_has_pending_irqs(struct kvm_vcpu *vcpu); +void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); +void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); -int vgic_v2_probe(const struct gic_kvm_info *gic_kvm_info, - const struct vgic_ops **ops, - const struct vgic_params **params); #ifdef CONFIG_KVM_ARM_VGIC_V3 -int vgic_v3_probe(const struct gic_kvm_info *gic_kvm_info, - const struct vgic_ops **ops, - const struct vgic_params **params); +void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg); #else -static inline int vgic_v3_probe(const struct gic_kvm_info *gic_kvm_info, - const struct vgic_ops **ops, - const struct vgic_params **params) +static inline void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg) { - return -ENODEV; } #endif -#endif /* old VGIC include */ -#endif +/** + * kvm_vgic_get_max_vcpus - Get the maximum number of VCPUs allowed by HW + * + * The host's GIC naturally limits the maximum amount of VCPUs a guest + * can use. + */ +static inline int kvm_vgic_get_max_vcpus(void) +{ + return kvm_vgic_global_state.max_gic_vcpus; +} + +int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi); + +#endif /* __KVM_ARM_VGIC_H */ diff --git a/include/kvm/vgic/vgic.h b/include/kvm/vgic/vgic.h deleted file mode 100644 index 3fbd175..0000000 --- a/include/kvm/vgic/vgic.h +++ /dev/null @@ -1,246 +0,0 @@ -/* - * Copyright (C) 2015, 2016 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ -#ifndef __ASM_ARM_KVM_VGIC_VGIC_H -#define __ASM_ARM_KVM_VGIC_VGIC_H - -#include <linux/kernel.h> -#include <linux/kvm.h> -#include <linux/irqreturn.h> -#include <linux/spinlock.h> -#include <linux/types.h> -#include <kvm/iodev.h> - -#define VGIC_V3_MAX_CPUS 255 -#define VGIC_V2_MAX_CPUS 8 -#define VGIC_NR_IRQS_LEGACY 256 -#define VGIC_NR_SGIS 16 -#define VGIC_NR_PPIS 16 -#define VGIC_NR_PRIVATE_IRQS (VGIC_NR_SGIS + VGIC_NR_PPIS) -#define VGIC_MAX_PRIVATE (VGIC_NR_PRIVATE_IRQS - 1) -#define VGIC_MAX_SPI 1019 -#define VGIC_MAX_RESERVED 1023 -#define VGIC_MIN_LPI 8192 - -enum vgic_type { - VGIC_V2, /* Good ol' GICv2 */ - VGIC_V3, /* New fancy GICv3 */ -}; - -/* same for all guests, as depending only on the _host's_ GIC model */ -struct vgic_global { - /* type of the host GIC */ - enum vgic_type type; - - /* Physical address of vgic virtual cpu interface */ - phys_addr_t vcpu_base; - - /* virtual control interface mapping */ - void __iomem *vctrl_base; - - /* Number of implemented list registers */ - int nr_lr; - - /* Maintenance IRQ number */ - unsigned int maint_irq; - - /* maximum number of VCPUs allowed (GICv2 limits us to 8) */ - int max_gic_vcpus; - - /* Only needed for the legacy KVM_CREATE_IRQCHIP */ - bool can_emulate_gicv2; -}; - -extern struct vgic_global kvm_vgic_global_state; - -#define VGIC_V2_MAX_LRS (1 << 6) -#define VGIC_V3_MAX_LRS 16 -#define VGIC_V3_LR_INDEX(lr) (VGIC_V3_MAX_LRS - 1 - lr) - -enum vgic_irq_config { - VGIC_CONFIG_EDGE = 0, - VGIC_CONFIG_LEVEL -}; - -struct vgic_irq { - spinlock_t irq_lock; /* Protects the content of the struct */ - struct list_head ap_list; - - struct kvm_vcpu *vcpu; /* SGIs and PPIs: The VCPU - * SPIs and LPIs: The VCPU whose ap_list - * this is queued on. - */ - - struct kvm_vcpu *target_vcpu; /* The VCPU that this interrupt should - * be sent to, as a result of the - * targets reg (v2) or the - * affinity reg (v3). - */ - - u32 intid; /* Guest visible INTID */ - bool pending; - bool line_level; /* Level only */ - bool soft_pending; /* Level only */ - bool active; /* not used for LPIs */ - bool enabled; - bool hw; /* Tied to HW IRQ */ - u32 hwintid; /* HW INTID number */ - union { - u8 targets; /* GICv2 target VCPUs mask */ - u32 mpidr; /* GICv3 target VCPU */ - }; - u8 source; /* GICv2 SGIs only */ - u8 priority; - enum vgic_irq_config config; /* Level or edge */ -}; - -struct vgic_register_region; - -struct vgic_io_device { - gpa_t base_addr; - struct kvm_vcpu *redist_vcpu; - const struct vgic_register_region *regions; - int nr_regions; - struct kvm_io_device dev; -}; - -struct vgic_dist { - bool in_kernel; - bool ready; - bool initialized; - - /* vGIC model the kernel emulates for the guest (GICv2 or GICv3) */ - u32 vgic_model; - - int nr_spis; - - /* TODO: Consider moving to global state */ - /* Virtual control interface mapping */ - void __iomem *vctrl_base; - - /* base addresses in guest physical address space: */ - gpa_t vgic_dist_base; /* distributor */ - union { - /* either a GICv2 CPU interface */ - gpa_t vgic_cpu_base; - /* or a number of GICv3 redistributor regions */ - gpa_t vgic_redist_base; - }; - - /* distributor enabled */ - bool enabled; - - struct vgic_irq *spis; - - struct vgic_io_device dist_iodev; - struct vgic_io_device *redist_iodevs; -}; - -struct vgic_v2_cpu_if { - u32 vgic_hcr; - u32 vgic_vmcr; - u32 vgic_misr; /* Saved only */ - u64 vgic_eisr; /* Saved only */ - u64 vgic_elrsr; /* Saved only */ - u32 vgic_apr; - u32 vgic_lr[VGIC_V2_MAX_LRS]; -}; - -struct vgic_v3_cpu_if { -#ifdef CONFIG_KVM_ARM_VGIC_V3 - u32 vgic_hcr; - u32 vgic_vmcr; - u32 vgic_sre; /* Restored only, change ignored */ - u32 vgic_misr; /* Saved only */ - u32 vgic_eisr; /* Saved only */ - u32 vgic_elrsr; /* Saved only */ - u32 vgic_ap0r[4]; - u32 vgic_ap1r[4]; - u64 vgic_lr[VGIC_V3_MAX_LRS]; -#endif -}; - -struct vgic_cpu { - /* CPU vif control registers for world switch */ - union { - struct vgic_v2_cpu_if vgic_v2; - struct vgic_v3_cpu_if vgic_v3; - }; - - unsigned int used_lrs; - struct vgic_irq private_irqs[VGIC_NR_PRIVATE_IRQS]; - - spinlock_t ap_list_lock; /* Protects the ap_list */ - - /* - * List of IRQs that this VCPU should consider because they are either - * Active or Pending (hence the name; AP list), or because they recently - * were one of the two and need to be migrated off this list to another - * VCPU. - */ - struct list_head ap_list_head; - - u64 live_lrs; -}; - -int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write); -void kvm_vgic_early_init(struct kvm *kvm); -int kvm_vgic_create(struct kvm *kvm, u32 type); -void kvm_vgic_destroy(struct kvm *kvm); -void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu); -void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu); -int kvm_vgic_map_resources(struct kvm *kvm); -int kvm_vgic_hyp_init(void); - -int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, - bool level); -int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, unsigned int intid, - bool level); -int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq); -int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq); -bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq); - -int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu); - -#define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) -#define vgic_initialized(k) ((k)->arch.vgic.initialized) -#define vgic_ready(k) ((k)->arch.vgic.ready) -#define vgic_valid_spi(k, i) (((i) >= VGIC_NR_PRIVATE_IRQS) && \ - ((i) < (k)->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)) - -bool kvm_vcpu_has_pending_irqs(struct kvm_vcpu *vcpu); -void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); -void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); - -#ifdef CONFIG_KVM_ARM_VGIC_V3 -void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg); -#else -static inline void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg) -{ -} -#endif - -/** - * kvm_vgic_get_max_vcpus - Get the maximum number of VCPUs allowed by HW - * - * The host's GIC naturally limits the maximum amount of VCPUs a guest - * can use. - */ -static inline int kvm_vgic_get_max_vcpus(void) -{ - return kvm_vgic_global_state.max_gic_vcpus; -} - -#endif /* __ASM_ARM_KVM_VGIC_VGIC_H */ diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index bfbd707..700b421 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -112,34 +112,76 @@ #define GICR_WAKER_ProcessorSleep (1U << 1) #define GICR_WAKER_ChildrenAsleep (1U << 2) -#define GICR_PROPBASER_NonShareable (0U << 10) -#define GICR_PROPBASER_InnerShareable (1U << 10) -#define GICR_PROPBASER_OuterShareable (2U << 10) -#define GICR_PROPBASER_SHAREABILITY_MASK (3UL << 10) -#define GICR_PROPBASER_nCnB (0U << 7) -#define GICR_PROPBASER_nC (1U << 7) -#define GICR_PROPBASER_RaWt (2U << 7) -#define GICR_PROPBASER_RaWb (3U << 7) -#define GICR_PROPBASER_WaWt (4U << 7) -#define GICR_PROPBASER_WaWb (5U << 7) -#define GICR_PROPBASER_RaWaWt (6U << 7) -#define GICR_PROPBASER_RaWaWb (7U << 7) -#define GICR_PROPBASER_CACHEABILITY_MASK (7U << 7) -#define GICR_PROPBASER_IDBITS_MASK (0x1f) - -#define GICR_PENDBASER_NonShareable (0U << 10) -#define GICR_PENDBASER_InnerShareable (1U << 10) -#define GICR_PENDBASER_OuterShareable (2U << 10) -#define GICR_PENDBASER_SHAREABILITY_MASK (3UL << 10) -#define GICR_PENDBASER_nCnB (0U << 7) -#define GICR_PENDBASER_nC (1U << 7) -#define GICR_PENDBASER_RaWt (2U << 7) -#define GICR_PENDBASER_RaWb (3U << 7) -#define GICR_PENDBASER_WaWt (4U << 7) -#define GICR_PENDBASER_WaWb (5U << 7) -#define GICR_PENDBASER_RaWaWt (6U << 7) -#define GICR_PENDBASER_RaWaWb (7U << 7) -#define GICR_PENDBASER_CACHEABILITY_MASK (7U << 7) +#define GIC_BASER_CACHE_nCnB 0ULL +#define GIC_BASER_CACHE_SameAsInner 0ULL +#define GIC_BASER_CACHE_nC 1ULL +#define GIC_BASER_CACHE_RaWt 2ULL +#define GIC_BASER_CACHE_RaWb 3ULL +#define GIC_BASER_CACHE_WaWt 4ULL +#define GIC_BASER_CACHE_WaWb 5ULL +#define GIC_BASER_CACHE_RaWaWt 6ULL +#define GIC_BASER_CACHE_RaWaWb 7ULL +#define GIC_BASER_CACHE_MASK 7ULL +#define GIC_BASER_NonShareable 0ULL +#define GIC_BASER_InnerShareable 1ULL +#define GIC_BASER_OuterShareable 2ULL +#define GIC_BASER_SHAREABILITY_MASK 3ULL + +#define GIC_BASER_CACHEABILITY(reg, inner_outer, type) \ + (GIC_BASER_CACHE_##type << reg##_##inner_outer##_CACHEABILITY_SHIFT) + +#define GIC_BASER_SHAREABILITY(reg, type) \ + (GIC_BASER_##type << reg##_SHAREABILITY_SHIFT) + +#define GICR_PROPBASER_SHAREABILITY_SHIFT (10) +#define GICR_PROPBASER_INNER_CACHEABILITY_SHIFT (7) +#define GICR_PROPBASER_OUTER_CACHEABILITY_SHIFT (56) +#define GICR_PROPBASER_SHAREABILITY_MASK \ + GIC_BASER_SHAREABILITY(GICR_PROPBASER, SHAREABILITY_MASK) +#define GICR_PROPBASER_INNER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, MASK) +#define GICR_PROPBASER_OUTER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_PROPBASER, OUTER, MASK) +#define GICR_PROPBASER_CACHEABILITY_MASK GICR_PROPBASER_INNER_CACHEABILITY_MASK + +#define GICR_PROPBASER_InnerShareable \ + GIC_BASER_SHAREABILITY(GICR_PROPBASER, InnerShareable) + +#define GICR_PROPBASER_nCnB GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, nCnB) +#define GICR_PROPBASER_nC GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, nC) +#define GICR_PROPBASER_RaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWt) +#define GICR_PROPBASER_RaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWt) +#define GICR_PROPBASER_WaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, WaWt) +#define GICR_PROPBASER_WaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, WaWb) +#define GICR_PROPBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWt) +#define GICR_PROPBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWb) + +#define GICR_PROPBASER_IDBITS_MASK (0x1f) + +#define GICR_PENDBASER_SHAREABILITY_SHIFT (10) +#define GICR_PENDBASER_INNER_CACHEABILITY_SHIFT (7) +#define GICR_PENDBASER_OUTER_CACHEABILITY_SHIFT (56) +#define GICR_PENDBASER_SHAREABILITY_MASK \ + GIC_BASER_SHAREABILITY(GICR_PENDBASER, SHAREABILITY_MASK) +#define GICR_PENDBASER_INNER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, MASK) +#define GICR_PENDBASER_OUTER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GICR_PENDBASER, OUTER, MASK) +#define GICR_PENDBASER_CACHEABILITY_MASK GICR_PENDBASER_INNER_CACHEABILITY_MASK + +#define GICR_PENDBASER_InnerShareable \ + GIC_BASER_SHAREABILITY(GICR_PENDBASER, InnerShareable) + +#define GICR_PENDBASER_nCnB GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, nCnB) +#define GICR_PENDBASER_nC GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, nC) +#define GICR_PENDBASER_RaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWt) +#define GICR_PENDBASER_RaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWt) +#define GICR_PENDBASER_WaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, WaWt) +#define GICR_PENDBASER_WaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, WaWb) +#define GICR_PENDBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWaWt) +#define GICR_PENDBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWaWb) + +#define GICR_PENDBASER_PTZ BIT_ULL(62) /* * Re-Distributor registers, offsets from SGI_base @@ -175,59 +217,90 @@ #define GITS_CWRITER 0x0088 #define GITS_CREADR 0x0090 #define GITS_BASER 0x0100 +#define GITS_IDREGS_BASE 0xffd0 +#define GITS_PIDR0 0xffe0 +#define GITS_PIDR1 0xffe4 #define GITS_PIDR2 GICR_PIDR2 +#define GITS_PIDR4 0xffd0 +#define GITS_CIDR0 0xfff0 +#define GITS_CIDR1 0xfff4 +#define GITS_CIDR2 0xfff8 +#define GITS_CIDR3 0xfffc #define GITS_TRANSLATER 0x10040 #define GITS_CTLR_ENABLE (1U << 0) #define GITS_CTLR_QUIESCENT (1U << 31) +#define GITS_TYPER_PLPIS (1UL << 0) +#define GITS_TYPER_IDBITS_SHIFT 8 #define GITS_TYPER_DEVBITS_SHIFT 13 #define GITS_TYPER_DEVBITS(r) ((((r) >> GITS_TYPER_DEVBITS_SHIFT) & 0x1f) + 1) #define GITS_TYPER_PTA (1UL << 19) - -#define GITS_CBASER_VALID (1UL << 63) -#define GITS_CBASER_nCnB (0UL << 59) -#define GITS_CBASER_nC (1UL << 59) -#define GITS_CBASER_RaWt (2UL << 59) -#define GITS_CBASER_RaWb (3UL << 59) -#define GITS_CBASER_WaWt (4UL << 59) -#define GITS_CBASER_WaWb (5UL << 59) -#define GITS_CBASER_RaWaWt (6UL << 59) -#define GITS_CBASER_RaWaWb (7UL << 59) -#define GITS_CBASER_CACHEABILITY_MASK (7UL << 59) -#define GITS_CBASER_NonShareable (0UL << 10) -#define GITS_CBASER_InnerShareable (1UL << 10) -#define GITS_CBASER_OuterShareable (2UL << 10) -#define GITS_CBASER_SHAREABILITY_MASK (3UL << 10) +#define GITS_TYPER_HWCOLLCNT_SHIFT 24 + +#define GITS_CBASER_VALID (1UL << 63) +#define GITS_CBASER_SHAREABILITY_SHIFT (10) +#define GITS_CBASER_INNER_CACHEABILITY_SHIFT (59) +#define GITS_CBASER_OUTER_CACHEABILITY_SHIFT (53) +#define GITS_CBASER_SHAREABILITY_MASK \ + GIC_BASER_SHAREABILITY(GITS_CBASER, SHAREABILITY_MASK) +#define GITS_CBASER_INNER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, MASK) +#define GITS_CBASER_OUTER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GITS_CBASER, OUTER, MASK) +#define GITS_CBASER_CACHEABILITY_MASK GITS_CBASER_INNER_CACHEABILITY_MASK + +#define GITS_CBASER_InnerShareable \ + GIC_BASER_SHAREABILITY(GITS_CBASER, InnerShareable) + +#define GITS_CBASER_nCnB GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, nCnB) +#define GITS_CBASER_nC GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, nC) +#define GITS_CBASER_RaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWt) +#define GITS_CBASER_RaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWt) +#define GITS_CBASER_WaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, WaWt) +#define GITS_CBASER_WaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, WaWb) +#define GITS_CBASER_RaWaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWt) +#define GITS_CBASER_RaWaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWb) #define GITS_BASER_NR_REGS 8 -#define GITS_BASER_VALID (1UL << 63) -#define GITS_BASER_nCnB (0UL << 59) -#define GITS_BASER_nC (1UL << 59) -#define GITS_BASER_RaWt (2UL << 59) -#define GITS_BASER_RaWb (3UL << 59) -#define GITS_BASER_WaWt (4UL << 59) -#define GITS_BASER_WaWb (5UL << 59) -#define GITS_BASER_RaWaWt (6UL << 59) -#define GITS_BASER_RaWaWb (7UL << 59) -#define GITS_BASER_CACHEABILITY_MASK (7UL << 59) -#define GITS_BASER_TYPE_SHIFT (56) +#define GITS_BASER_VALID (1UL << 63) +#define GITS_BASER_INDIRECT (1ULL << 62) + +#define GITS_BASER_INNER_CACHEABILITY_SHIFT (59) +#define GITS_BASER_OUTER_CACHEABILITY_SHIFT (53) +#define GITS_BASER_INNER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GITS_BASER, INNER, MASK) +#define GITS_BASER_CACHEABILITY_MASK GITS_BASER_INNER_CACHEABILITY_MASK +#define GITS_BASER_OUTER_CACHEABILITY_MASK \ + GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, MASK) +#define GITS_BASER_SHAREABILITY_MASK \ + GIC_BASER_SHAREABILITY(GITS_BASER, SHAREABILITY_MASK) + +#define GITS_BASER_nCnB GIC_BASER_CACHEABILITY(GITS_BASER, INNER, nCnB) +#define GITS_BASER_nC GIC_BASER_CACHEABILITY(GITS_BASER, INNER, nC) +#define GITS_BASER_RaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWt) +#define GITS_BASER_RaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWt) +#define GITS_BASER_WaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, WaWt) +#define GITS_BASER_WaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, WaWb) +#define GITS_BASER_RaWaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWaWt) +#define GITS_BASER_RaWaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWaWb) + +#define GITS_BASER_TYPE_SHIFT (56) #define GITS_BASER_TYPE(r) (((r) >> GITS_BASER_TYPE_SHIFT) & 7) -#define GITS_BASER_ENTRY_SIZE_SHIFT (48) +#define GITS_BASER_ENTRY_SIZE_SHIFT (48) #define GITS_BASER_ENTRY_SIZE(r) ((((r) >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0xff) + 1) -#define GITS_BASER_NonShareable (0UL << 10) -#define GITS_BASER_InnerShareable (1UL << 10) -#define GITS_BASER_OuterShareable (2UL << 10) #define GITS_BASER_SHAREABILITY_SHIFT (10) -#define GITS_BASER_SHAREABILITY_MASK (3UL << GITS_BASER_SHAREABILITY_SHIFT) +#define GITS_BASER_InnerShareable \ + GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable) #define GITS_BASER_PAGE_SIZE_SHIFT (8) #define GITS_BASER_PAGE_SIZE_4K (0UL << GITS_BASER_PAGE_SIZE_SHIFT) #define GITS_BASER_PAGE_SIZE_16K (1UL << GITS_BASER_PAGE_SIZE_SHIFT) #define GITS_BASER_PAGE_SIZE_64K (2UL << GITS_BASER_PAGE_SIZE_SHIFT) #define GITS_BASER_PAGE_SIZE_MASK (3UL << GITS_BASER_PAGE_SIZE_SHIFT) #define GITS_BASER_PAGES_MAX 256 +#define GITS_BASER_NR_PAGES(r) (((r) & 0xff) + 1) #define GITS_BASER_TYPE_NONE 0 #define GITS_BASER_TYPE_DEVICE 1 @@ -243,7 +316,10 @@ */ #define GITS_CMD_MAPD 0x08 #define GITS_CMD_MAPC 0x09 -#define GITS_CMD_MAPVI 0x0a +#define GITS_CMD_MAPTI 0x0a +/* older GIC documentation used MAPVI for this command */ +#define GITS_CMD_MAPVI GITS_CMD_MAPTI +#define GITS_CMD_MAPI 0x0b #define GITS_CMD_MOVI 0x01 #define GITS_CMD_DISCARD 0x0f #define GITS_CMD_INV 0x0c @@ -254,6 +330,22 @@ #define GITS_CMD_SYNC 0x05 /* + * ITS error numbers + */ +#define E_ITS_MOVI_UNMAPPED_INTERRUPT 0x010107 +#define E_ITS_MOVI_UNMAPPED_COLLECTION 0x010109 +#define E_ITS_CLEAR_UNMAPPED_INTERRUPT 0x010507 +#define E_ITS_MAPD_DEVICE_OOR 0x010801 +#define E_ITS_MAPC_PROCNUM_OOR 0x010902 +#define E_ITS_MAPC_COLLECTION_OOR 0x010903 +#define E_ITS_MAPTI_UNMAPPED_DEVICE 0x010a04 +#define E_ITS_MAPTI_PHYSICALID_OOR 0x010a06 +#define E_ITS_INV_UNMAPPED_INTERRUPT 0x010c07 +#define E_ITS_INVALL_UNMAPPED_COLLECTION 0x010d09 +#define E_ITS_MOVALL_PROCNUM_OOR 0x010e01 +#define E_ITS_DISCARD_UNMAPPED_INTERRUPT 0x010f07 + +/* * CPU interface registers */ #define ICC_CTLR_EL1_EOImode_drop_dir (0U << 1) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 60d339fa..aafd702 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -164,6 +164,8 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, struct kvm_io_device *dev); int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, struct kvm_io_device *dev); +struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, + gpa_t addr); #ifdef CONFIG_KVM_ASYNC_PF struct kvm_async_pf { diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 70941f4..8f2756c 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -868,6 +868,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_MAX_VCPU_ID 128 #define KVM_CAP_X2APIC_API 129 #define KVM_CAP_S390_USER_INSTR0 130 +#define KVM_CAP_MSI_DEVID 131 #ifdef KVM_CAP_IRQ_ROUTING @@ -1026,12 +1027,14 @@ struct kvm_one_reg { __u64 addr; }; +#define KVM_MSI_VALID_DEVID (1U << 0) struct kvm_msi { __u32 address_lo; __u32 address_hi; __u32 data; __u32 flags; - __u8 pad[16]; + __u32 devid; + __u8 pad[12]; }; struct kvm_arm_device_addr { @@ -1076,6 +1079,8 @@ enum kvm_device_type { #define KVM_DEV_TYPE_FLIC KVM_DEV_TYPE_FLIC KVM_DEV_TYPE_ARM_VGIC_V3, #define KVM_DEV_TYPE_ARM_VGIC_V3 KVM_DEV_TYPE_ARM_VGIC_V3 + KVM_DEV_TYPE_ARM_VGIC_ITS, +#define KVM_DEV_TYPE_ARM_VGIC_ITS KVM_DEV_TYPE_ARM_VGIC_ITS KVM_DEV_TYPE_MAX, }; diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c index 3a3a699..7cffd93 100644 --- a/virt/kvm/arm/hyp/vgic-v2-sr.c +++ b/virt/kvm/arm/hyp/vgic-v2-sr.c @@ -21,18 +21,11 @@ #include <asm/kvm_hyp.h> -#ifdef CONFIG_KVM_NEW_VGIC -extern struct vgic_global kvm_vgic_global_state; -#define vgic_v2_params kvm_vgic_global_state -#else -extern struct vgic_params vgic_v2_params; -#endif - static void __hyp_text save_maint_int_state(struct kvm_vcpu *vcpu, void __iomem *base) { struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - int nr_lr = (kern_hyp_va(&vgic_v2_params))->nr_lr; + int nr_lr = (kern_hyp_va(&kvm_vgic_global_state))->nr_lr; u32 eisr0, eisr1; int i; bool expect_mi; @@ -74,7 +67,7 @@ static void __hyp_text save_maint_int_state(struct kvm_vcpu *vcpu, static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base) { struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - int nr_lr = (kern_hyp_va(&vgic_v2_params))->nr_lr; + int nr_lr = (kern_hyp_va(&kvm_vgic_global_state))->nr_lr; u32 elrsr0, elrsr1; elrsr0 = readl_relaxed(base + GICH_ELRSR0); @@ -93,7 +86,7 @@ static void __hyp_text save_elrsr(struct kvm_vcpu *vcpu, void __iomem *base) static void __hyp_text save_lrs(struct kvm_vcpu *vcpu, void __iomem *base) { struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; - int nr_lr = (kern_hyp_va(&vgic_v2_params))->nr_lr; + int nr_lr = (kern_hyp_va(&kvm_vgic_global_state))->nr_lr; int i; for (i = 0; i < nr_lr; i++) { @@ -147,7 +140,7 @@ void __hyp_text __vgic_v2_restore_state(struct kvm_vcpu *vcpu) struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; struct vgic_dist *vgic = &kvm->arch.vgic; void __iomem *base = kern_hyp_va(vgic->vctrl_base); - int nr_lr = (kern_hyp_va(&vgic_v2_params))->nr_lr; + int nr_lr = (kern_hyp_va(&kvm_vgic_global_state))->nr_lr; int i; u64 live_lrs = 0; diff --git a/virt/kvm/arm/vgic-v2-emul.c b/virt/kvm/arm/vgic-v2-emul.c deleted file mode 100644 index 1b0bee0..0000000 --- a/virt/kvm/arm/vgic-v2-emul.c +++ /dev/null @@ -1,856 +0,0 @@ -/* - * Contains GICv2 specific emulation code, was in vgic.c before. - * - * Copyright (C) 2012 ARM Ltd. - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/cpu.h> -#include <linux/kvm.h> -#include <linux/kvm_host.h> -#include <linux/interrupt.h> -#include <linux/io.h> -#include <linux/uaccess.h> - -#include <linux/irqchip/arm-gic.h> - -#include <asm/kvm_emulate.h> -#include <asm/kvm_arm.h> -#include <asm/kvm_mmu.h> - -#include "vgic.h" - -#define GICC_ARCH_VERSION_V2 0x2 - -static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg); -static u8 *vgic_get_sgi_sources(struct vgic_dist *dist, int vcpu_id, int sgi) -{ - return dist->irq_sgi_sources + vcpu_id * VGIC_NR_SGIS + sgi; -} - -static bool handle_mmio_misc(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, phys_addr_t offset) -{ - u32 reg; - u32 word_offset = offset & 3; - - switch (offset & ~3) { - case 0: /* GICD_CTLR */ - reg = vcpu->kvm->arch.vgic.enabled; - vgic_reg_access(mmio, ®, word_offset, - ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); - if (mmio->is_write) { - vcpu->kvm->arch.vgic.enabled = reg & 1; - vgic_update_state(vcpu->kvm); - return true; - } - break; - - case 4: /* GICD_TYPER */ - reg = (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5; - reg |= (vcpu->kvm->arch.vgic.nr_irqs >> 5) - 1; - vgic_reg_access(mmio, ®, word_offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - break; - - case 8: /* GICD_IIDR */ - reg = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0); - vgic_reg_access(mmio, ®, word_offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - break; - } - - return false; -} - -static bool handle_mmio_set_enable_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - return vgic_handle_enable_reg(vcpu->kvm, mmio, offset, - vcpu->vcpu_id, ACCESS_WRITE_SETBIT); -} - -static bool handle_mmio_clear_enable_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - return vgic_handle_enable_reg(vcpu->kvm, mmio, offset, - vcpu->vcpu_id, ACCESS_WRITE_CLEARBIT); -} - -static bool handle_mmio_set_pending_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - return vgic_handle_set_pending_reg(vcpu->kvm, mmio, offset, - vcpu->vcpu_id); -} - -static bool handle_mmio_clear_pending_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - return vgic_handle_clear_pending_reg(vcpu->kvm, mmio, offset, - vcpu->vcpu_id); -} - -static bool handle_mmio_set_active_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - return vgic_handle_set_active_reg(vcpu->kvm, mmio, offset, - vcpu->vcpu_id); -} - -static bool handle_mmio_clear_active_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - return vgic_handle_clear_active_reg(vcpu->kvm, mmio, offset, - vcpu->vcpu_id); -} - -static bool handle_mmio_priority_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 *reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority, - vcpu->vcpu_id, offset); - vgic_reg_access(mmio, reg, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); - return false; -} - -#define GICD_ITARGETSR_SIZE 32 -#define GICD_CPUTARGETS_BITS 8 -#define GICD_IRQS_PER_ITARGETSR (GICD_ITARGETSR_SIZE / GICD_CPUTARGETS_BITS) -static u32 vgic_get_target_reg(struct kvm *kvm, int irq) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - int i; - u32 val = 0; - - irq -= VGIC_NR_PRIVATE_IRQS; - - for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) - val |= 1 << (dist->irq_spi_cpu[irq + i] + i * 8); - - return val; -} - -static void vgic_set_target_reg(struct kvm *kvm, u32 val, int irq) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - struct kvm_vcpu *vcpu; - int i, c; - unsigned long *bmap; - u32 target; - - irq -= VGIC_NR_PRIVATE_IRQS; - - /* - * Pick the LSB in each byte. This ensures we target exactly - * one vcpu per IRQ. If the byte is null, assume we target - * CPU0. - */ - for (i = 0; i < GICD_IRQS_PER_ITARGETSR; i++) { - int shift = i * GICD_CPUTARGETS_BITS; - - target = ffs((val >> shift) & 0xffU); - target = target ? (target - 1) : 0; - dist->irq_spi_cpu[irq + i] = target; - kvm_for_each_vcpu(c, vcpu, kvm) { - bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[c]); - if (c == target) - set_bit(irq + i, bmap); - else - clear_bit(irq + i, bmap); - } - } -} - -static bool handle_mmio_target_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 reg; - - /* We treat the banked interrupts targets as read-only */ - if (offset < 32) { - u32 roreg; - - roreg = 1 << vcpu->vcpu_id; - roreg |= roreg << 8; - roreg |= roreg << 16; - - vgic_reg_access(mmio, &roreg, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - return false; - } - - reg = vgic_get_target_reg(vcpu->kvm, offset & ~3U); - vgic_reg_access(mmio, ®, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); - if (mmio->is_write) { - vgic_set_target_reg(vcpu->kvm, reg, offset & ~3U); - vgic_update_state(vcpu->kvm); - return true; - } - - return false; -} - -static bool handle_mmio_cfg_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, phys_addr_t offset) -{ - u32 *reg; - - reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg, - vcpu->vcpu_id, offset >> 1); - - return vgic_handle_cfg_reg(reg, mmio, offset); -} - -static bool handle_mmio_sgi_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, phys_addr_t offset) -{ - u32 reg; - - vgic_reg_access(mmio, ®, offset, - ACCESS_READ_RAZ | ACCESS_WRITE_VALUE); - if (mmio->is_write) { - vgic_dispatch_sgi(vcpu, reg); - vgic_update_state(vcpu->kvm); - return true; - } - - return false; -} - -/* Handle reads of GICD_CPENDSGIRn and GICD_SPENDSGIRn */ -static bool read_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - int sgi; - int min_sgi = (offset & ~0x3); - int max_sgi = min_sgi + 3; - int vcpu_id = vcpu->vcpu_id; - u32 reg = 0; - - /* Copy source SGIs from distributor side */ - for (sgi = min_sgi; sgi <= max_sgi; sgi++) { - u8 sources = *vgic_get_sgi_sources(dist, vcpu_id, sgi); - - reg |= ((u32)sources) << (8 * (sgi - min_sgi)); - } - - mmio_data_write(mmio, ~0, reg); - return false; -} - -static bool write_set_clear_sgi_pend_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset, bool set) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - int sgi; - int min_sgi = (offset & ~0x3); - int max_sgi = min_sgi + 3; - int vcpu_id = vcpu->vcpu_id; - u32 reg; - bool updated = false; - - reg = mmio_data_read(mmio, ~0); - - /* Clear pending SGIs on the distributor */ - for (sgi = min_sgi; sgi <= max_sgi; sgi++) { - u8 mask = reg >> (8 * (sgi - min_sgi)); - u8 *src = vgic_get_sgi_sources(dist, vcpu_id, sgi); - - if (set) { - if ((*src & mask) != mask) - updated = true; - *src |= mask; - } else { - if (*src & mask) - updated = true; - *src &= ~mask; - } - } - - if (updated) - vgic_update_state(vcpu->kvm); - - return updated; -} - -static bool handle_mmio_sgi_set(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - if (!mmio->is_write) - return read_set_clear_sgi_pend_reg(vcpu, mmio, offset); - else - return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, true); -} - -static bool handle_mmio_sgi_clear(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - if (!mmio->is_write) - return read_set_clear_sgi_pend_reg(vcpu, mmio, offset); - else - return write_set_clear_sgi_pend_reg(vcpu, mmio, offset, false); -} - -static const struct vgic_io_range vgic_dist_ranges[] = { - { - .base = GIC_DIST_SOFTINT, - .len = 4, - .handle_mmio = handle_mmio_sgi_reg, - }, - { - .base = GIC_DIST_CTRL, - .len = 12, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_misc, - }, - { - .base = GIC_DIST_IGROUP, - .len = VGIC_MAX_IRQS / 8, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_raz_wi, - }, - { - .base = GIC_DIST_ENABLE_SET, - .len = VGIC_MAX_IRQS / 8, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_set_enable_reg, - }, - { - .base = GIC_DIST_ENABLE_CLEAR, - .len = VGIC_MAX_IRQS / 8, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_clear_enable_reg, - }, - { - .base = GIC_DIST_PENDING_SET, - .len = VGIC_MAX_IRQS / 8, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_set_pending_reg, - }, - { - .base = GIC_DIST_PENDING_CLEAR, - .len = VGIC_MAX_IRQS / 8, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_clear_pending_reg, - }, - { - .base = GIC_DIST_ACTIVE_SET, - .len = VGIC_MAX_IRQS / 8, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_set_active_reg, - }, - { - .base = GIC_DIST_ACTIVE_CLEAR, - .len = VGIC_MAX_IRQS / 8, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_clear_active_reg, - }, - { - .base = GIC_DIST_PRI, - .len = VGIC_MAX_IRQS, - .bits_per_irq = 8, - .handle_mmio = handle_mmio_priority_reg, - }, - { - .base = GIC_DIST_TARGET, - .len = VGIC_MAX_IRQS, - .bits_per_irq = 8, - .handle_mmio = handle_mmio_target_reg, - }, - { - .base = GIC_DIST_CONFIG, - .len = VGIC_MAX_IRQS / 4, - .bits_per_irq = 2, - .handle_mmio = handle_mmio_cfg_reg, - }, - { - .base = GIC_DIST_SGI_PENDING_CLEAR, - .len = VGIC_NR_SGIS, - .handle_mmio = handle_mmio_sgi_clear, - }, - { - .base = GIC_DIST_SGI_PENDING_SET, - .len = VGIC_NR_SGIS, - .handle_mmio = handle_mmio_sgi_set, - }, - {} -}; - -static void vgic_dispatch_sgi(struct kvm_vcpu *vcpu, u32 reg) -{ - struct kvm *kvm = vcpu->kvm; - struct vgic_dist *dist = &kvm->arch.vgic; - int nrcpus = atomic_read(&kvm->online_vcpus); - u8 target_cpus; - int sgi, mode, c, vcpu_id; - - vcpu_id = vcpu->vcpu_id; - - sgi = reg & 0xf; - target_cpus = (reg >> 16) & 0xff; - mode = (reg >> 24) & 3; - - switch (mode) { - case 0: - if (!target_cpus) - return; - break; - - case 1: - target_cpus = ((1 << nrcpus) - 1) & ~(1 << vcpu_id) & 0xff; - break; - - case 2: - target_cpus = 1 << vcpu_id; - break; - } - - kvm_for_each_vcpu(c, vcpu, kvm) { - if (target_cpus & 1) { - /* Flag the SGI as pending */ - vgic_dist_irq_set_pending(vcpu, sgi); - *vgic_get_sgi_sources(dist, c, sgi) |= 1 << vcpu_id; - kvm_debug("SGI%d from CPU%d to CPU%d\n", - sgi, vcpu_id, c); - } - - target_cpus >>= 1; - } -} - -static bool vgic_v2_queue_sgi(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - unsigned long sources; - int vcpu_id = vcpu->vcpu_id; - int c; - - sources = *vgic_get_sgi_sources(dist, vcpu_id, irq); - - for_each_set_bit(c, &sources, dist->nr_cpus) { - if (vgic_queue_irq(vcpu, c, irq)) - clear_bit(c, &sources); - } - - *vgic_get_sgi_sources(dist, vcpu_id, irq) = sources; - - /* - * If the sources bitmap has been cleared it means that we - * could queue all the SGIs onto link registers (see the - * clear_bit above), and therefore we are done with them in - * our emulated gic and can get rid of them. - */ - if (!sources) { - vgic_dist_irq_clear_pending(vcpu, irq); - vgic_cpu_irq_clear(vcpu, irq); - return true; - } - - return false; -} - -/** - * kvm_vgic_map_resources - Configure global VGIC state before running any VCPUs - * @kvm: pointer to the kvm struct - * - * Map the virtual CPU interface into the VM before running any VCPUs. We - * can't do this at creation time, because user space must first set the - * virtual CPU interface address in the guest physical address space. - */ -static int vgic_v2_map_resources(struct kvm *kvm, - const struct vgic_params *params) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - int ret = 0; - - if (!irqchip_in_kernel(kvm)) - return 0; - - mutex_lock(&kvm->lock); - - if (vgic_ready(kvm)) - goto out; - - if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) || - IS_VGIC_ADDR_UNDEF(dist->vgic_cpu_base)) { - kvm_err("Need to set vgic cpu and dist addresses first\n"); - ret = -ENXIO; - goto out; - } - - vgic_register_kvm_io_dev(kvm, dist->vgic_dist_base, - KVM_VGIC_V2_DIST_SIZE, - vgic_dist_ranges, -1, &dist->dist_iodev); - - /* - * Initialize the vgic if this hasn't already been done on demand by - * accessing the vgic state from userspace. - */ - ret = vgic_init(kvm); - if (ret) { - kvm_err("Unable to allocate maps\n"); - goto out_unregister; - } - - ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base, - params->vcpu_base, KVM_VGIC_V2_CPU_SIZE, - true); - if (ret) { - kvm_err("Unable to remap VGIC CPU to VCPU\n"); - goto out_unregister; - } - - dist->ready = true; - goto out; - -out_unregister: - kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dist->dist_iodev.dev); - -out: - if (ret) - kvm_vgic_destroy(kvm); - mutex_unlock(&kvm->lock); - return ret; -} - -static void vgic_v2_add_sgi_source(struct kvm_vcpu *vcpu, int irq, int source) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - *vgic_get_sgi_sources(dist, vcpu->vcpu_id, irq) |= 1 << source; -} - -static int vgic_v2_init_model(struct kvm *kvm) -{ - int i; - - for (i = VGIC_NR_PRIVATE_IRQS; i < kvm->arch.vgic.nr_irqs; i += 4) - vgic_set_target_reg(kvm, 0, i); - - return 0; -} - -void vgic_v2_init_emulation(struct kvm *kvm) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - - dist->vm_ops.queue_sgi = vgic_v2_queue_sgi; - dist->vm_ops.add_sgi_source = vgic_v2_add_sgi_source; - dist->vm_ops.init_model = vgic_v2_init_model; - dist->vm_ops.map_resources = vgic_v2_map_resources; - - kvm->arch.max_vcpus = VGIC_V2_MAX_CPUS; -} - -static bool handle_cpu_mmio_misc(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, phys_addr_t offset) -{ - bool updated = false; - struct vgic_vmcr vmcr; - u32 *vmcr_field; - u32 reg; - - vgic_get_vmcr(vcpu, &vmcr); - - switch (offset & ~0x3) { - case GIC_CPU_CTRL: - vmcr_field = &vmcr.ctlr; - break; - case GIC_CPU_PRIMASK: - vmcr_field = &vmcr.pmr; - break; - case GIC_CPU_BINPOINT: - vmcr_field = &vmcr.bpr; - break; - case GIC_CPU_ALIAS_BINPOINT: - vmcr_field = &vmcr.abpr; - break; - default: - BUG(); - } - - if (!mmio->is_write) { - reg = *vmcr_field; - mmio_data_write(mmio, ~0, reg); - } else { - reg = mmio_data_read(mmio, ~0); - if (reg != *vmcr_field) { - *vmcr_field = reg; - vgic_set_vmcr(vcpu, &vmcr); - updated = true; - } - } - return updated; -} - -static bool handle_mmio_abpr(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, phys_addr_t offset) -{ - return handle_cpu_mmio_misc(vcpu, mmio, GIC_CPU_ALIAS_BINPOINT); -} - -static bool handle_cpu_mmio_ident(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 reg; - - if (mmio->is_write) - return false; - - /* GICC_IIDR */ - reg = (PRODUCT_ID_KVM << 20) | - (GICC_ARCH_VERSION_V2 << 16) | - (IMPLEMENTER_ARM << 0); - mmio_data_write(mmio, ~0, reg); - return false; -} - -/* - * CPU Interface Register accesses - these are not accessed by the VM, but by - * user space for saving and restoring VGIC state. - */ -static const struct vgic_io_range vgic_cpu_ranges[] = { - { - .base = GIC_CPU_CTRL, - .len = 12, - .handle_mmio = handle_cpu_mmio_misc, - }, - { - .base = GIC_CPU_ALIAS_BINPOINT, - .len = 4, - .handle_mmio = handle_mmio_abpr, - }, - { - .base = GIC_CPU_ACTIVEPRIO, - .len = 16, - .handle_mmio = handle_mmio_raz_wi, - }, - { - .base = GIC_CPU_IDENT, - .len = 4, - .handle_mmio = handle_cpu_mmio_ident, - }, -}; - -static int vgic_attr_regs_access(struct kvm_device *dev, - struct kvm_device_attr *attr, - u32 *reg, bool is_write) -{ - const struct vgic_io_range *r = NULL, *ranges; - phys_addr_t offset; - int ret, cpuid, c; - struct kvm_vcpu *vcpu, *tmp_vcpu; - struct vgic_dist *vgic; - struct kvm_exit_mmio mmio; - u32 data; - - offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; - cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >> - KVM_DEV_ARM_VGIC_CPUID_SHIFT; - - mutex_lock(&dev->kvm->lock); - - ret = vgic_init(dev->kvm); - if (ret) - goto out; - - if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) { - ret = -EINVAL; - goto out; - } - - vcpu = kvm_get_vcpu(dev->kvm, cpuid); - vgic = &dev->kvm->arch.vgic; - - mmio.len = 4; - mmio.is_write = is_write; - mmio.data = &data; - if (is_write) - mmio_data_write(&mmio, ~0, *reg); - switch (attr->group) { - case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: - mmio.phys_addr = vgic->vgic_dist_base + offset; - ranges = vgic_dist_ranges; - break; - case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: - mmio.phys_addr = vgic->vgic_cpu_base + offset; - ranges = vgic_cpu_ranges; - break; - default: - BUG(); - } - r = vgic_find_range(ranges, 4, offset); - - if (unlikely(!r || !r->handle_mmio)) { - ret = -ENXIO; - goto out; - } - - - spin_lock(&vgic->lock); - - /* - * Ensure that no other VCPU is running by checking the vcpu->cpu - * field. If no other VPCUs are running we can safely access the VGIC - * state, because even if another VPU is run after this point, that - * VCPU will not touch the vgic state, because it will block on - * getting the vgic->lock in kvm_vgic_sync_hwstate(). - */ - kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) { - if (unlikely(tmp_vcpu->cpu != -1)) { - ret = -EBUSY; - goto out_vgic_unlock; - } - } - - /* - * Move all pending IRQs from the LRs on all VCPUs so the pending - * state can be properly represented in the register state accessible - * through this API. - */ - kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) - vgic_unqueue_irqs(tmp_vcpu); - - offset -= r->base; - r->handle_mmio(vcpu, &mmio, offset); - - if (!is_write) - *reg = mmio_data_read(&mmio, ~0); - - ret = 0; -out_vgic_unlock: - spin_unlock(&vgic->lock); -out: - mutex_unlock(&dev->kvm->lock); - return ret; -} - -static int vgic_v2_create(struct kvm_device *dev, u32 type) -{ - return kvm_vgic_create(dev->kvm, type); -} - -static void vgic_v2_destroy(struct kvm_device *dev) -{ - kfree(dev); -} - -static int vgic_v2_set_attr(struct kvm_device *dev, - struct kvm_device_attr *attr) -{ - int ret; - - ret = vgic_set_common_attr(dev, attr); - if (ret != -ENXIO) - return ret; - - switch (attr->group) { - case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: - case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: { - u32 __user *uaddr = (u32 __user *)(long)attr->addr; - u32 reg; - - if (get_user(reg, uaddr)) - return -EFAULT; - - return vgic_attr_regs_access(dev, attr, ®, true); - } - - } - - return -ENXIO; -} - -static int vgic_v2_get_attr(struct kvm_device *dev, - struct kvm_device_attr *attr) -{ - int ret; - - ret = vgic_get_common_attr(dev, attr); - if (ret != -ENXIO) - return ret; - - switch (attr->group) { - case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: - case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: { - u32 __user *uaddr = (u32 __user *)(long)attr->addr; - u32 reg = 0; - - ret = vgic_attr_regs_access(dev, attr, ®, false); - if (ret) - return ret; - return put_user(reg, uaddr); - } - - } - - return -ENXIO; -} - -static int vgic_v2_has_attr(struct kvm_device *dev, - struct kvm_device_attr *attr) -{ - phys_addr_t offset; - - switch (attr->group) { - case KVM_DEV_ARM_VGIC_GRP_ADDR: - switch (attr->attr) { - case KVM_VGIC_V2_ADDR_TYPE_DIST: - case KVM_VGIC_V2_ADDR_TYPE_CPU: - return 0; - } - break; - case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: - offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; - return vgic_has_attr_regs(vgic_dist_ranges, offset); - case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: - offset = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; - return vgic_has_attr_regs(vgic_cpu_ranges, offset); - case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: - return 0; - case KVM_DEV_ARM_VGIC_GRP_CTRL: - switch (attr->attr) { - case KVM_DEV_ARM_VGIC_CTRL_INIT: - return 0; - } - } - return -ENXIO; -} - -struct kvm_device_ops kvm_arm_vgic_v2_ops = { - .name = "kvm-arm-vgic-v2", - .create = vgic_v2_create, - .destroy = vgic_v2_destroy, - .set_attr = vgic_v2_set_attr, - .get_attr = vgic_v2_get_attr, - .has_attr = vgic_v2_has_attr, -}; diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c deleted file mode 100644 index 334cd7a..0000000 --- a/virt/kvm/arm/vgic-v2.c +++ /dev/null @@ -1,274 +0,0 @@ -/* - * Copyright (C) 2012,2013 ARM Limited, All Rights Reserved. - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/cpu.h> -#include <linux/kvm.h> -#include <linux/kvm_host.h> -#include <linux/interrupt.h> -#include <linux/io.h> - -#include <linux/irqchip/arm-gic.h> - -#include <asm/kvm_emulate.h> -#include <asm/kvm_arm.h> -#include <asm/kvm_mmu.h> - -static struct vgic_lr vgic_v2_get_lr(const struct kvm_vcpu *vcpu, int lr) -{ - struct vgic_lr lr_desc; - u32 val = vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr]; - - lr_desc.irq = val & GICH_LR_VIRTUALID; - if (lr_desc.irq <= 15) - lr_desc.source = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7; - else - lr_desc.source = 0; - lr_desc.state = 0; - - if (val & GICH_LR_PENDING_BIT) - lr_desc.state |= LR_STATE_PENDING; - if (val & GICH_LR_ACTIVE_BIT) - lr_desc.state |= LR_STATE_ACTIVE; - if (val & GICH_LR_EOI) - lr_desc.state |= LR_EOI_INT; - if (val & GICH_LR_HW) { - lr_desc.state |= LR_HW; - lr_desc.hwirq = (val & GICH_LR_PHYSID_CPUID) >> GICH_LR_PHYSID_CPUID_SHIFT; - } - - return lr_desc; -} - -static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr, - struct vgic_lr lr_desc) -{ - u32 lr_val; - - lr_val = lr_desc.irq; - - if (lr_desc.state & LR_STATE_PENDING) - lr_val |= GICH_LR_PENDING_BIT; - if (lr_desc.state & LR_STATE_ACTIVE) - lr_val |= GICH_LR_ACTIVE_BIT; - if (lr_desc.state & LR_EOI_INT) - lr_val |= GICH_LR_EOI; - - if (lr_desc.state & LR_HW) { - lr_val |= GICH_LR_HW; - lr_val |= (u32)lr_desc.hwirq << GICH_LR_PHYSID_CPUID_SHIFT; - } - - if (lr_desc.irq < VGIC_NR_SGIS) - lr_val |= (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT); - - vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val; - - if (!(lr_desc.state & LR_STATE_MASK)) - vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr); - else - vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr &= ~(1ULL << lr); -} - -static u64 vgic_v2_get_elrsr(const struct kvm_vcpu *vcpu) -{ - return vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr; -} - -static u64 vgic_v2_get_eisr(const struct kvm_vcpu *vcpu) -{ - return vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr; -} - -static void vgic_v2_clear_eisr(struct kvm_vcpu *vcpu) -{ - vcpu->arch.vgic_cpu.vgic_v2.vgic_eisr = 0; -} - -static u32 vgic_v2_get_interrupt_status(const struct kvm_vcpu *vcpu) -{ - u32 misr = vcpu->arch.vgic_cpu.vgic_v2.vgic_misr; - u32 ret = 0; - - if (misr & GICH_MISR_EOI) - ret |= INT_STATUS_EOI; - if (misr & GICH_MISR_U) - ret |= INT_STATUS_UNDERFLOW; - - return ret; -} - -static void vgic_v2_enable_underflow(struct kvm_vcpu *vcpu) -{ - vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr |= GICH_HCR_UIE; -} - -static void vgic_v2_disable_underflow(struct kvm_vcpu *vcpu) -{ - vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr &= ~GICH_HCR_UIE; -} - -static void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) -{ - u32 vmcr = vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr; - - vmcrp->ctlr = (vmcr & GICH_VMCR_CTRL_MASK) >> GICH_VMCR_CTRL_SHIFT; - vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >> GICH_VMCR_ALIAS_BINPOINT_SHIFT; - vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >> GICH_VMCR_BINPOINT_SHIFT; - vmcrp->pmr = (vmcr & GICH_VMCR_PRIMASK_MASK) >> GICH_VMCR_PRIMASK_SHIFT; -} - -static void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) -{ - u32 vmcr; - - vmcr = (vmcrp->ctlr << GICH_VMCR_CTRL_SHIFT) & GICH_VMCR_CTRL_MASK; - vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) & GICH_VMCR_ALIAS_BINPOINT_MASK; - vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & GICH_VMCR_BINPOINT_MASK; - vmcr |= (vmcrp->pmr << GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK; - - vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = vmcr; -} - -static void vgic_v2_enable(struct kvm_vcpu *vcpu) -{ - /* - * By forcing VMCR to zero, the GIC will restore the binary - * points to their reset values. Anything else resets to zero - * anyway. - */ - vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; - vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr = ~0; - - /* Get the show on the road... */ - vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; -} - -static const struct vgic_ops vgic_v2_ops = { - .get_lr = vgic_v2_get_lr, - .set_lr = vgic_v2_set_lr, - .get_elrsr = vgic_v2_get_elrsr, - .get_eisr = vgic_v2_get_eisr, - .clear_eisr = vgic_v2_clear_eisr, - .get_interrupt_status = vgic_v2_get_interrupt_status, - .enable_underflow = vgic_v2_enable_underflow, - .disable_underflow = vgic_v2_disable_underflow, - .get_vmcr = vgic_v2_get_vmcr, - .set_vmcr = vgic_v2_set_vmcr, - .enable = vgic_v2_enable, -}; - -struct vgic_params __section(.hyp.text) vgic_v2_params; - -static void vgic_cpu_init_lrs(void *params) -{ - struct vgic_params *vgic = params; - int i; - - for (i = 0; i < vgic->nr_lr; i++) - writel_relaxed(0, vgic->vctrl_base + GICH_LR0 + (i * 4)); -} - -/** - * vgic_v2_probe - probe for a GICv2 compatible interrupt controller - * @gic_kvm_info: pointer to the GIC description - * @ops: address of a pointer to the GICv2 operations - * @params: address of a pointer to HW-specific parameters - * - * Returns 0 if a GICv2 has been found, with the low level operations - * in *ops and the HW parameters in *params. Returns an error code - * otherwise. - */ -int vgic_v2_probe(const struct gic_kvm_info *gic_kvm_info, - const struct vgic_ops **ops, - const struct vgic_params **params) -{ - int ret; - struct vgic_params *vgic = &vgic_v2_params; - const struct resource *vctrl_res = &gic_kvm_info->vctrl; - const struct resource *vcpu_res = &gic_kvm_info->vcpu; - - memset(vgic, 0, sizeof(*vgic)); - - if (!gic_kvm_info->maint_irq) { - kvm_err("error getting vgic maintenance irq\n"); - ret = -ENXIO; - goto out; - } - vgic->maint_irq = gic_kvm_info->maint_irq; - - if (!gic_kvm_info->vctrl.start) { - kvm_err("GICH not present in the firmware table\n"); - ret = -ENXIO; - goto out; - } - - vgic->vctrl_base = ioremap(gic_kvm_info->vctrl.start, - resource_size(&gic_kvm_info->vctrl)); - if (!vgic->vctrl_base) { - kvm_err("Cannot ioremap GICH\n"); - ret = -ENOMEM; - goto out; - } - - vgic->nr_lr = readl_relaxed(vgic->vctrl_base + GICH_VTR); - vgic->nr_lr = (vgic->nr_lr & 0x3f) + 1; - - ret = create_hyp_io_mappings(vgic->vctrl_base, - vgic->vctrl_base + resource_size(vctrl_res), - vctrl_res->start); - if (ret) { - kvm_err("Cannot map VCTRL into hyp\n"); - goto out_unmap; - } - - if (!PAGE_ALIGNED(vcpu_res->start)) { - kvm_err("GICV physical address 0x%llx not page aligned\n", - (unsigned long long)vcpu_res->start); - ret = -ENXIO; - goto out_unmap; - } - - if (!PAGE_ALIGNED(resource_size(vcpu_res))) { - kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n", - (unsigned long long)resource_size(vcpu_res), - PAGE_SIZE); - ret = -ENXIO; - goto out_unmap; - } - - vgic->can_emulate_gicv2 = true; - kvm_register_device_ops(&kvm_arm_vgic_v2_ops, KVM_DEV_TYPE_ARM_VGIC_V2); - - vgic->vcpu_base = vcpu_res->start; - - kvm_info("GICH base=0x%llx, GICV base=0x%llx, IRQ=%d\n", - gic_kvm_info->vctrl.start, vgic->vcpu_base, vgic->maint_irq); - - vgic->type = VGIC_V2; - vgic->max_gic_vcpus = VGIC_V2_MAX_CPUS; - - on_each_cpu(vgic_cpu_init_lrs, vgic, 1); - - *ops = &vgic_v2_ops; - *params = vgic; - goto out; - -out_unmap: - iounmap(vgic->vctrl_base); -out: - return ret; -} diff --git a/virt/kvm/arm/vgic-v3-emul.c b/virt/kvm/arm/vgic-v3-emul.c deleted file mode 100644 index e661e7f..0000000 --- a/virt/kvm/arm/vgic-v3-emul.c +++ /dev/null @@ -1,1074 +0,0 @@ -/* - * GICv3 distributor and redistributor emulation - * - * GICv3 emulation is currently only supported on a GICv3 host (because - * we rely on the hardware's CPU interface virtualization support), but - * supports both hardware with or without the optional GICv2 backwards - * compatibility features. - * - * Limitations of the emulation: - * (RAZ/WI: read as zero, write ignore, RAO/WI: read as one, write ignore) - * - We do not support LPIs (yet). TYPER.LPIS is reported as 0 and is RAZ/WI. - * - We do not support the message based interrupts (MBIs) triggered by - * writes to the GICD_{SET,CLR}SPI_* registers. TYPER.MBIS is reported as 0. - * - We do not support the (optional) backwards compatibility feature. - * GICD_CTLR.ARE resets to 1 and is RAO/WI. If the _host_ GIC supports - * the compatiblity feature, you can use a GICv2 in the guest, though. - * - We only support a single security state. GICD_CTLR.DS is 1 and is RAO/WI. - * - Priorities are not emulated (same as the GICv2 emulation). Linux - * as a guest is fine with this, because it does not use priorities. - * - We only support Group1 interrupts. Again Linux uses only those. - * - * Copyright (C) 2014 ARM Ltd. - * Author: Andre Przywara <andre.przywara@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/cpu.h> -#include <linux/kvm.h> -#include <linux/kvm_host.h> -#include <linux/interrupt.h> - -#include <linux/irqchip/arm-gic-v3.h> -#include <kvm/arm_vgic.h> - -#include <asm/kvm_emulate.h> -#include <asm/kvm_arm.h> -#include <asm/kvm_mmu.h> - -#include "vgic.h" - -static bool handle_mmio_rao_wi(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, phys_addr_t offset) -{ - u32 reg = 0xffffffff; - - vgic_reg_access(mmio, ®, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - - return false; -} - -static bool handle_mmio_ctlr(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, phys_addr_t offset) -{ - u32 reg = 0; - - /* - * Force ARE and DS to 1, the guest cannot change this. - * For the time being we only support Group1 interrupts. - */ - if (vcpu->kvm->arch.vgic.enabled) - reg = GICD_CTLR_ENABLE_SS_G1; - reg |= GICD_CTLR_ARE_NS | GICD_CTLR_DS; - - vgic_reg_access(mmio, ®, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); - if (mmio->is_write) { - vcpu->kvm->arch.vgic.enabled = !!(reg & GICD_CTLR_ENABLE_SS_G1); - vgic_update_state(vcpu->kvm); - return true; - } - return false; -} - -/* - * As this implementation does not provide compatibility - * with GICv2 (ARE==1), we report zero CPUs in bits [5..7]. - * Also LPIs and MBIs are not supported, so we set the respective bits to 0. - * Also we report at most 2**10=1024 interrupt IDs (to match 1024 SPIs). - */ -#define INTERRUPT_ID_BITS 10 -static bool handle_mmio_typer(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, phys_addr_t offset) -{ - u32 reg; - - reg = (min(vcpu->kvm->arch.vgic.nr_irqs, 1024) >> 5) - 1; - - reg |= (INTERRUPT_ID_BITS - 1) << 19; - - vgic_reg_access(mmio, ®, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - - return false; -} - -static bool handle_mmio_iidr(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, phys_addr_t offset) -{ - u32 reg; - - reg = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0); - vgic_reg_access(mmio, ®, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - - return false; -} - -static bool handle_mmio_set_enable_reg_dist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8)) - return vgic_handle_enable_reg(vcpu->kvm, mmio, offset, - vcpu->vcpu_id, - ACCESS_WRITE_SETBIT); - - vgic_reg_access(mmio, NULL, offset, - ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); - return false; -} - -static bool handle_mmio_clear_enable_reg_dist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8)) - return vgic_handle_enable_reg(vcpu->kvm, mmio, offset, - vcpu->vcpu_id, - ACCESS_WRITE_CLEARBIT); - - vgic_reg_access(mmio, NULL, offset, - ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); - return false; -} - -static bool handle_mmio_set_pending_reg_dist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8)) - return vgic_handle_set_pending_reg(vcpu->kvm, mmio, offset, - vcpu->vcpu_id); - - vgic_reg_access(mmio, NULL, offset, - ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); - return false; -} - -static bool handle_mmio_clear_pending_reg_dist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8)) - return vgic_handle_clear_pending_reg(vcpu->kvm, mmio, offset, - vcpu->vcpu_id); - - vgic_reg_access(mmio, NULL, offset, - ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); - return false; -} - -static bool handle_mmio_set_active_reg_dist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8)) - return vgic_handle_set_active_reg(vcpu->kvm, mmio, offset, - vcpu->vcpu_id); - - vgic_reg_access(mmio, NULL, offset, - ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); - return false; -} - -static bool handle_mmio_clear_active_reg_dist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - if (likely(offset >= VGIC_NR_PRIVATE_IRQS / 8)) - return vgic_handle_clear_active_reg(vcpu->kvm, mmio, offset, - vcpu->vcpu_id); - - vgic_reg_access(mmio, NULL, offset, - ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); - return false; -} - -static bool handle_mmio_priority_reg_dist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 *reg; - - if (unlikely(offset < VGIC_NR_PRIVATE_IRQS)) { - vgic_reg_access(mmio, NULL, offset, - ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); - return false; - } - - reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority, - vcpu->vcpu_id, offset); - vgic_reg_access(mmio, reg, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); - return false; -} - -static bool handle_mmio_cfg_reg_dist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 *reg; - - if (unlikely(offset < VGIC_NR_PRIVATE_IRQS / 4)) { - vgic_reg_access(mmio, NULL, offset, - ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); - return false; - } - - reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg, - vcpu->vcpu_id, offset >> 1); - - return vgic_handle_cfg_reg(reg, mmio, offset); -} - -/* - * We use a compressed version of the MPIDR (all 32 bits in one 32-bit word) - * when we store the target MPIDR written by the guest. - */ -static u32 compress_mpidr(unsigned long mpidr) -{ - u32 ret; - - ret = MPIDR_AFFINITY_LEVEL(mpidr, 0); - ret |= MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8; - ret |= MPIDR_AFFINITY_LEVEL(mpidr, 2) << 16; - ret |= MPIDR_AFFINITY_LEVEL(mpidr, 3) << 24; - - return ret; -} - -static unsigned long uncompress_mpidr(u32 value) -{ - unsigned long mpidr; - - mpidr = ((value >> 0) & 0xFF) << MPIDR_LEVEL_SHIFT(0); - mpidr |= ((value >> 8) & 0xFF) << MPIDR_LEVEL_SHIFT(1); - mpidr |= ((value >> 16) & 0xFF) << MPIDR_LEVEL_SHIFT(2); - mpidr |= (u64)((value >> 24) & 0xFF) << MPIDR_LEVEL_SHIFT(3); - - return mpidr; -} - -/* - * Lookup the given MPIDR value to get the vcpu_id (if there is one) - * and store that in the irq_spi_cpu[] array. - * This limits the number of VCPUs to 255 for now, extending the data - * type (or storing kvm_vcpu pointers) should lift the limit. - * Store the original MPIDR value in an extra array to support read-as-written. - * Unallocated MPIDRs are translated to a special value and caught - * before any array accesses. - */ -static bool handle_mmio_route_reg(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - struct kvm *kvm = vcpu->kvm; - struct vgic_dist *dist = &kvm->arch.vgic; - int spi; - u32 reg; - int vcpu_id; - unsigned long *bmap, mpidr; - - /* - * The upper 32 bits of each 64 bit register are zero, - * as we don't support Aff3. - */ - if ((offset & 4)) { - vgic_reg_access(mmio, NULL, offset, - ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); - return false; - } - - /* This region only covers SPIs, so no handling of private IRQs here. */ - spi = offset / 8; - - /* get the stored MPIDR for this IRQ */ - mpidr = uncompress_mpidr(dist->irq_spi_mpidr[spi]); - reg = mpidr; - - vgic_reg_access(mmio, ®, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); - - if (!mmio->is_write) - return false; - - /* - * Now clear the currently assigned vCPU from the map, making room - * for the new one to be written below - */ - vcpu = kvm_mpidr_to_vcpu(kvm, mpidr); - if (likely(vcpu)) { - vcpu_id = vcpu->vcpu_id; - bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]); - __clear_bit(spi, bmap); - } - - dist->irq_spi_mpidr[spi] = compress_mpidr(reg); - vcpu = kvm_mpidr_to_vcpu(kvm, reg & MPIDR_HWID_BITMASK); - - /* - * The spec says that non-existent MPIDR values should not be - * forwarded to any existent (v)CPU, but should be able to become - * pending anyway. We simply keep the irq_spi_target[] array empty, so - * the interrupt will never be injected. - * irq_spi_cpu[irq] gets a magic value in this case. - */ - if (likely(vcpu)) { - vcpu_id = vcpu->vcpu_id; - dist->irq_spi_cpu[spi] = vcpu_id; - bmap = vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]); - __set_bit(spi, bmap); - } else { - dist->irq_spi_cpu[spi] = VCPU_NOT_ALLOCATED; - } - - vgic_update_state(kvm); - - return true; -} - -/* - * We should be careful about promising too much when a guest reads - * this register. Don't claim to be like any hardware implementation, - * but just report the GIC as version 3 - which is what a Linux guest - * would check. - */ -static bool handle_mmio_idregs(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 reg = 0; - - switch (offset + GICD_IDREGS) { - case GICD_PIDR2: - reg = 0x3b; - break; - } - - vgic_reg_access(mmio, ®, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - - return false; -} - -static const struct vgic_io_range vgic_v3_dist_ranges[] = { - { - .base = GICD_CTLR, - .len = 0x04, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_ctlr, - }, - { - .base = GICD_TYPER, - .len = 0x04, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_typer, - }, - { - .base = GICD_IIDR, - .len = 0x04, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_iidr, - }, - { - /* this register is optional, it is RAZ/WI if not implemented */ - .base = GICD_STATUSR, - .len = 0x04, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_raz_wi, - }, - { - /* this write only register is WI when TYPER.MBIS=0 */ - .base = GICD_SETSPI_NSR, - .len = 0x04, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_raz_wi, - }, - { - /* this write only register is WI when TYPER.MBIS=0 */ - .base = GICD_CLRSPI_NSR, - .len = 0x04, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_raz_wi, - }, - { - /* this is RAZ/WI when DS=1 */ - .base = GICD_SETSPI_SR, - .len = 0x04, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_raz_wi, - }, - { - /* this is RAZ/WI when DS=1 */ - .base = GICD_CLRSPI_SR, - .len = 0x04, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_raz_wi, - }, - { - .base = GICD_IGROUPR, - .len = 0x80, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_rao_wi, - }, - { - .base = GICD_ISENABLER, - .len = 0x80, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_set_enable_reg_dist, - }, - { - .base = GICD_ICENABLER, - .len = 0x80, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_clear_enable_reg_dist, - }, - { - .base = GICD_ISPENDR, - .len = 0x80, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_set_pending_reg_dist, - }, - { - .base = GICD_ICPENDR, - .len = 0x80, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_clear_pending_reg_dist, - }, - { - .base = GICD_ISACTIVER, - .len = 0x80, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_set_active_reg_dist, - }, - { - .base = GICD_ICACTIVER, - .len = 0x80, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_clear_active_reg_dist, - }, - { - .base = GICD_IPRIORITYR, - .len = 0x400, - .bits_per_irq = 8, - .handle_mmio = handle_mmio_priority_reg_dist, - }, - { - /* TARGETSRn is RES0 when ARE=1 */ - .base = GICD_ITARGETSR, - .len = 0x400, - .bits_per_irq = 8, - .handle_mmio = handle_mmio_raz_wi, - }, - { - .base = GICD_ICFGR, - .len = 0x100, - .bits_per_irq = 2, - .handle_mmio = handle_mmio_cfg_reg_dist, - }, - { - /* this is RAZ/WI when DS=1 */ - .base = GICD_IGRPMODR, - .len = 0x80, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_raz_wi, - }, - { - /* this is RAZ/WI when DS=1 */ - .base = GICD_NSACR, - .len = 0x100, - .bits_per_irq = 2, - .handle_mmio = handle_mmio_raz_wi, - }, - { - /* this is RAZ/WI when ARE=1 */ - .base = GICD_SGIR, - .len = 0x04, - .handle_mmio = handle_mmio_raz_wi, - }, - { - /* this is RAZ/WI when ARE=1 */ - .base = GICD_CPENDSGIR, - .len = 0x10, - .handle_mmio = handle_mmio_raz_wi, - }, - { - /* this is RAZ/WI when ARE=1 */ - .base = GICD_SPENDSGIR, - .len = 0x10, - .handle_mmio = handle_mmio_raz_wi, - }, - { - .base = GICD_IROUTER + 0x100, - .len = 0x1ee0, - .bits_per_irq = 64, - .handle_mmio = handle_mmio_route_reg, - }, - { - .base = GICD_IDREGS, - .len = 0x30, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_idregs, - }, - {}, -}; - -static bool handle_mmio_ctlr_redist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - /* since we don't support LPIs, this register is zero for now */ - vgic_reg_access(mmio, NULL, offset, - ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); - return false; -} - -static bool handle_mmio_typer_redist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 reg; - u64 mpidr; - struct kvm_vcpu *redist_vcpu = mmio->private; - int target_vcpu_id = redist_vcpu->vcpu_id; - - /* the upper 32 bits contain the affinity value */ - if ((offset & ~3) == 4) { - mpidr = kvm_vcpu_get_mpidr_aff(redist_vcpu); - reg = compress_mpidr(mpidr); - - vgic_reg_access(mmio, ®, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - return false; - } - - reg = redist_vcpu->vcpu_id << 8; - if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1) - reg |= GICR_TYPER_LAST; - vgic_reg_access(mmio, ®, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_IGNORED); - return false; -} - -static bool handle_mmio_set_enable_reg_redist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - struct kvm_vcpu *redist_vcpu = mmio->private; - - return vgic_handle_enable_reg(vcpu->kvm, mmio, offset, - redist_vcpu->vcpu_id, - ACCESS_WRITE_SETBIT); -} - -static bool handle_mmio_clear_enable_reg_redist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - struct kvm_vcpu *redist_vcpu = mmio->private; - - return vgic_handle_enable_reg(vcpu->kvm, mmio, offset, - redist_vcpu->vcpu_id, - ACCESS_WRITE_CLEARBIT); -} - -static bool handle_mmio_set_active_reg_redist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - struct kvm_vcpu *redist_vcpu = mmio->private; - - return vgic_handle_set_active_reg(vcpu->kvm, mmio, offset, - redist_vcpu->vcpu_id); -} - -static bool handle_mmio_clear_active_reg_redist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - struct kvm_vcpu *redist_vcpu = mmio->private; - - return vgic_handle_clear_active_reg(vcpu->kvm, mmio, offset, - redist_vcpu->vcpu_id); -} - -static bool handle_mmio_set_pending_reg_redist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - struct kvm_vcpu *redist_vcpu = mmio->private; - - return vgic_handle_set_pending_reg(vcpu->kvm, mmio, offset, - redist_vcpu->vcpu_id); -} - -static bool handle_mmio_clear_pending_reg_redist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - struct kvm_vcpu *redist_vcpu = mmio->private; - - return vgic_handle_clear_pending_reg(vcpu->kvm, mmio, offset, - redist_vcpu->vcpu_id); -} - -static bool handle_mmio_priority_reg_redist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - struct kvm_vcpu *redist_vcpu = mmio->private; - u32 *reg; - - reg = vgic_bytemap_get_reg(&vcpu->kvm->arch.vgic.irq_priority, - redist_vcpu->vcpu_id, offset); - vgic_reg_access(mmio, reg, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); - return false; -} - -static bool handle_mmio_cfg_reg_redist(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - struct kvm_vcpu *redist_vcpu = mmio->private; - - u32 *reg = vgic_bitmap_get_reg(&vcpu->kvm->arch.vgic.irq_cfg, - redist_vcpu->vcpu_id, offset >> 1); - - return vgic_handle_cfg_reg(reg, mmio, offset); -} - -#define SGI_base(x) ((x) + SZ_64K) - -static const struct vgic_io_range vgic_redist_ranges[] = { - { - .base = GICR_CTLR, - .len = 0x04, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_ctlr_redist, - }, - { - .base = GICR_TYPER, - .len = 0x08, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_typer_redist, - }, - { - .base = GICR_IIDR, - .len = 0x04, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_iidr, - }, - { - .base = GICR_WAKER, - .len = 0x04, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_raz_wi, - }, - { - .base = GICR_IDREGS, - .len = 0x30, - .bits_per_irq = 0, - .handle_mmio = handle_mmio_idregs, - }, - { - .base = SGI_base(GICR_IGROUPR0), - .len = 0x04, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_rao_wi, - }, - { - .base = SGI_base(GICR_ISENABLER0), - .len = 0x04, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_set_enable_reg_redist, - }, - { - .base = SGI_base(GICR_ICENABLER0), - .len = 0x04, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_clear_enable_reg_redist, - }, - { - .base = SGI_base(GICR_ISPENDR0), - .len = 0x04, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_set_pending_reg_redist, - }, - { - .base = SGI_base(GICR_ICPENDR0), - .len = 0x04, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_clear_pending_reg_redist, - }, - { - .base = SGI_base(GICR_ISACTIVER0), - .len = 0x04, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_set_active_reg_redist, - }, - { - .base = SGI_base(GICR_ICACTIVER0), - .len = 0x04, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_clear_active_reg_redist, - }, - { - .base = SGI_base(GICR_IPRIORITYR0), - .len = 0x20, - .bits_per_irq = 8, - .handle_mmio = handle_mmio_priority_reg_redist, - }, - { - .base = SGI_base(GICR_ICFGR0), - .len = 0x08, - .bits_per_irq = 2, - .handle_mmio = handle_mmio_cfg_reg_redist, - }, - { - .base = SGI_base(GICR_IGRPMODR0), - .len = 0x04, - .bits_per_irq = 1, - .handle_mmio = handle_mmio_raz_wi, - }, - { - .base = SGI_base(GICR_NSACR), - .len = 0x04, - .handle_mmio = handle_mmio_raz_wi, - }, - {}, -}; - -static bool vgic_v3_queue_sgi(struct kvm_vcpu *vcpu, int irq) -{ - if (vgic_queue_irq(vcpu, 0, irq)) { - vgic_dist_irq_clear_pending(vcpu, irq); - vgic_cpu_irq_clear(vcpu, irq); - return true; - } - - return false; -} - -static int vgic_v3_map_resources(struct kvm *kvm, - const struct vgic_params *params) -{ - int ret = 0; - struct vgic_dist *dist = &kvm->arch.vgic; - gpa_t rdbase = dist->vgic_redist_base; - struct vgic_io_device *iodevs = NULL; - int i; - - if (!irqchip_in_kernel(kvm)) - return 0; - - mutex_lock(&kvm->lock); - - if (vgic_ready(kvm)) - goto out; - - if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) || - IS_VGIC_ADDR_UNDEF(dist->vgic_redist_base)) { - kvm_err("Need to set vgic distributor addresses first\n"); - ret = -ENXIO; - goto out; - } - - /* - * For a VGICv3 we require the userland to explicitly initialize - * the VGIC before we need to use it. - */ - if (!vgic_initialized(kvm)) { - ret = -EBUSY; - goto out; - } - - ret = vgic_register_kvm_io_dev(kvm, dist->vgic_dist_base, - GIC_V3_DIST_SIZE, vgic_v3_dist_ranges, - -1, &dist->dist_iodev); - if (ret) - goto out; - - iodevs = kcalloc(dist->nr_cpus, sizeof(iodevs[0]), GFP_KERNEL); - if (!iodevs) { - ret = -ENOMEM; - goto out_unregister; - } - - for (i = 0; i < dist->nr_cpus; i++) { - ret = vgic_register_kvm_io_dev(kvm, rdbase, - SZ_128K, vgic_redist_ranges, - i, &iodevs[i]); - if (ret) - goto out_unregister; - rdbase += GIC_V3_REDIST_SIZE; - } - - dist->redist_iodevs = iodevs; - dist->ready = true; - goto out; - -out_unregister: - kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dist->dist_iodev.dev); - if (iodevs) { - for (i = 0; i < dist->nr_cpus; i++) { - if (iodevs[i].dev.ops) - kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, - &iodevs[i].dev); - } - } - -out: - if (ret) - kvm_vgic_destroy(kvm); - mutex_unlock(&kvm->lock); - return ret; -} - -static int vgic_v3_init_model(struct kvm *kvm) -{ - int i; - u32 mpidr; - struct vgic_dist *dist = &kvm->arch.vgic; - int nr_spis = dist->nr_irqs - VGIC_NR_PRIVATE_IRQS; - - dist->irq_spi_mpidr = kcalloc(nr_spis, sizeof(dist->irq_spi_mpidr[0]), - GFP_KERNEL); - - if (!dist->irq_spi_mpidr) - return -ENOMEM; - - /* Initialize the target VCPUs for each IRQ to VCPU 0 */ - mpidr = compress_mpidr(kvm_vcpu_get_mpidr_aff(kvm_get_vcpu(kvm, 0))); - for (i = VGIC_NR_PRIVATE_IRQS; i < dist->nr_irqs; i++) { - dist->irq_spi_cpu[i - VGIC_NR_PRIVATE_IRQS] = 0; - dist->irq_spi_mpidr[i - VGIC_NR_PRIVATE_IRQS] = mpidr; - vgic_bitmap_set_irq_val(dist->irq_spi_target, 0, i, 1); - } - - return 0; -} - -/* GICv3 does not keep track of SGI sources anymore. */ -static void vgic_v3_add_sgi_source(struct kvm_vcpu *vcpu, int irq, int source) -{ -} - -void vgic_v3_init_emulation(struct kvm *kvm) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - - dist->vm_ops.queue_sgi = vgic_v3_queue_sgi; - dist->vm_ops.add_sgi_source = vgic_v3_add_sgi_source; - dist->vm_ops.init_model = vgic_v3_init_model; - dist->vm_ops.map_resources = vgic_v3_map_resources; - - kvm->arch.max_vcpus = KVM_MAX_VCPUS; -} - -/* - * Compare a given affinity (level 1-3 and a level 0 mask, from the SGI - * generation register ICC_SGI1R_EL1) with a given VCPU. - * If the VCPU's MPIDR matches, return the level0 affinity, otherwise - * return -1. - */ -static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu) -{ - unsigned long affinity; - int level0; - - /* - * Split the current VCPU's MPIDR into affinity level 0 and the - * rest as this is what we have to compare against. - */ - affinity = kvm_vcpu_get_mpidr_aff(vcpu); - level0 = MPIDR_AFFINITY_LEVEL(affinity, 0); - affinity &= ~MPIDR_LEVEL_MASK; - - /* bail out if the upper three levels don't match */ - if (sgi_aff != affinity) - return -1; - - /* Is this VCPU's bit set in the mask ? */ - if (!(sgi_cpu_mask & BIT(level0))) - return -1; - - return level0; -} - -#define SGI_AFFINITY_LEVEL(reg, level) \ - ((((reg) & ICC_SGI1R_AFFINITY_## level ##_MASK) \ - >> ICC_SGI1R_AFFINITY_## level ##_SHIFT) << MPIDR_LEVEL_SHIFT(level)) - -/** - * vgic_v3_dispatch_sgi - handle SGI requests from VCPUs - * @vcpu: The VCPU requesting a SGI - * @reg: The value written into the ICC_SGI1R_EL1 register by that VCPU - * - * With GICv3 (and ARE=1) CPUs trigger SGIs by writing to a system register. - * This will trap in sys_regs.c and call this function. - * This ICC_SGI1R_EL1 register contains the upper three affinity levels of the - * target processors as well as a bitmask of 16 Aff0 CPUs. - * If the interrupt routing mode bit is not set, we iterate over all VCPUs to - * check for matching ones. If this bit is set, we signal all, but not the - * calling VCPU. - */ -void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg) -{ - struct kvm *kvm = vcpu->kvm; - struct kvm_vcpu *c_vcpu; - struct vgic_dist *dist = &kvm->arch.vgic; - u16 target_cpus; - u64 mpidr; - int sgi, c; - int vcpu_id = vcpu->vcpu_id; - bool broadcast; - int updated = 0; - - sgi = (reg & ICC_SGI1R_SGI_ID_MASK) >> ICC_SGI1R_SGI_ID_SHIFT; - broadcast = reg & BIT(ICC_SGI1R_IRQ_ROUTING_MODE_BIT); - target_cpus = (reg & ICC_SGI1R_TARGET_LIST_MASK) >> ICC_SGI1R_TARGET_LIST_SHIFT; - mpidr = SGI_AFFINITY_LEVEL(reg, 3); - mpidr |= SGI_AFFINITY_LEVEL(reg, 2); - mpidr |= SGI_AFFINITY_LEVEL(reg, 1); - - /* - * We take the dist lock here, because we come from the sysregs - * code path and not from the MMIO one (which already takes the lock). - */ - spin_lock(&dist->lock); - - /* - * We iterate over all VCPUs to find the MPIDRs matching the request. - * If we have handled one CPU, we clear it's bit to detect early - * if we are already finished. This avoids iterating through all - * VCPUs when most of the times we just signal a single VCPU. - */ - kvm_for_each_vcpu(c, c_vcpu, kvm) { - - /* Exit early if we have dealt with all requested CPUs */ - if (!broadcast && target_cpus == 0) - break; - - /* Don't signal the calling VCPU */ - if (broadcast && c == vcpu_id) - continue; - - if (!broadcast) { - int level0; - - level0 = match_mpidr(mpidr, target_cpus, c_vcpu); - if (level0 == -1) - continue; - - /* remove this matching VCPU from the mask */ - target_cpus &= ~BIT(level0); - } - - /* Flag the SGI as pending */ - vgic_dist_irq_set_pending(c_vcpu, sgi); - updated = 1; - kvm_debug("SGI%d from CPU%d to CPU%d\n", sgi, vcpu_id, c); - } - if (updated) - vgic_update_state(vcpu->kvm); - spin_unlock(&dist->lock); - if (updated) - vgic_kick_vcpus(vcpu->kvm); -} - -static int vgic_v3_create(struct kvm_device *dev, u32 type) -{ - return kvm_vgic_create(dev->kvm, type); -} - -static void vgic_v3_destroy(struct kvm_device *dev) -{ - kfree(dev); -} - -static int vgic_v3_set_attr(struct kvm_device *dev, - struct kvm_device_attr *attr) -{ - int ret; - - ret = vgic_set_common_attr(dev, attr); - if (ret != -ENXIO) - return ret; - - switch (attr->group) { - case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: - case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: - return -ENXIO; - } - - return -ENXIO; -} - -static int vgic_v3_get_attr(struct kvm_device *dev, - struct kvm_device_attr *attr) -{ - int ret; - - ret = vgic_get_common_attr(dev, attr); - if (ret != -ENXIO) - return ret; - - switch (attr->group) { - case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: - case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: - return -ENXIO; - } - - return -ENXIO; -} - -static int vgic_v3_has_attr(struct kvm_device *dev, - struct kvm_device_attr *attr) -{ - switch (attr->group) { - case KVM_DEV_ARM_VGIC_GRP_ADDR: - switch (attr->attr) { - case KVM_VGIC_V2_ADDR_TYPE_DIST: - case KVM_VGIC_V2_ADDR_TYPE_CPU: - return -ENXIO; - case KVM_VGIC_V3_ADDR_TYPE_DIST: - case KVM_VGIC_V3_ADDR_TYPE_REDIST: - return 0; - } - break; - case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: - case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: - return -ENXIO; - case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: - return 0; - case KVM_DEV_ARM_VGIC_GRP_CTRL: - switch (attr->attr) { - case KVM_DEV_ARM_VGIC_CTRL_INIT: - return 0; - } - } - return -ENXIO; -} - -struct kvm_device_ops kvm_arm_vgic_v3_ops = { - .name = "kvm-arm-vgic-v3", - .create = vgic_v3_create, - .destroy = vgic_v3_destroy, - .set_attr = vgic_v3_set_attr, - .get_attr = vgic_v3_get_attr, - .has_attr = vgic_v3_has_attr, -}; diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c deleted file mode 100644 index 75b02fa..0000000 --- a/virt/kvm/arm/vgic-v3.c +++ /dev/null @@ -1,279 +0,0 @@ -/* - * Copyright (C) 2013 ARM Limited, All Rights Reserved. - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#include <linux/cpu.h> -#include <linux/kvm.h> -#include <linux/kvm_host.h> -#include <linux/interrupt.h> -#include <linux/io.h> - -#include <linux/irqchip/arm-gic-v3.h> -#include <linux/irqchip/arm-gic-common.h> - -#include <asm/kvm_emulate.h> -#include <asm/kvm_arm.h> -#include <asm/kvm_asm.h> -#include <asm/kvm_mmu.h> - -static u32 ich_vtr_el2; - -static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr) -{ - struct vgic_lr lr_desc; - u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr]; - - if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) - lr_desc.irq = val & ICH_LR_VIRTUAL_ID_MASK; - else - lr_desc.irq = val & GICH_LR_VIRTUALID; - - lr_desc.source = 0; - if (lr_desc.irq <= 15 && - vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) - lr_desc.source = (val >> GICH_LR_PHYSID_CPUID_SHIFT) & 0x7; - - lr_desc.state = 0; - - if (val & ICH_LR_PENDING_BIT) - lr_desc.state |= LR_STATE_PENDING; - if (val & ICH_LR_ACTIVE_BIT) - lr_desc.state |= LR_STATE_ACTIVE; - if (val & ICH_LR_EOI) - lr_desc.state |= LR_EOI_INT; - if (val & ICH_LR_HW) { - lr_desc.state |= LR_HW; - lr_desc.hwirq = (val >> ICH_LR_PHYS_ID_SHIFT) & GENMASK(9, 0); - } - - return lr_desc; -} - -static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr, - struct vgic_lr lr_desc) -{ - u64 lr_val; - - lr_val = lr_desc.irq; - - /* - * Currently all guest IRQs are Group1, as Group0 would result - * in a FIQ in the guest, which it wouldn't expect. - * Eventually we want to make this configurable, so we may revisit - * this in the future. - */ - switch (vcpu->kvm->arch.vgic.vgic_model) { - case KVM_DEV_TYPE_ARM_VGIC_V3: - lr_val |= ICH_LR_GROUP; - break; - case KVM_DEV_TYPE_ARM_VGIC_V2: - if (lr_desc.irq < VGIC_NR_SGIS) - lr_val |= (u32)lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT; - break; - default: - BUG(); - } - - if (lr_desc.state & LR_STATE_PENDING) - lr_val |= ICH_LR_PENDING_BIT; - if (lr_desc.state & LR_STATE_ACTIVE) - lr_val |= ICH_LR_ACTIVE_BIT; - if (lr_desc.state & LR_EOI_INT) - lr_val |= ICH_LR_EOI; - if (lr_desc.state & LR_HW) { - lr_val |= ICH_LR_HW; - lr_val |= ((u64)lr_desc.hwirq) << ICH_LR_PHYS_ID_SHIFT; - } - - vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = lr_val; - - if (!(lr_desc.state & LR_STATE_MASK)) - vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr); - else - vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr &= ~(1U << lr); -} - -static u64 vgic_v3_get_elrsr(const struct kvm_vcpu *vcpu) -{ - return vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr; -} - -static u64 vgic_v3_get_eisr(const struct kvm_vcpu *vcpu) -{ - return vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr; -} - -static void vgic_v3_clear_eisr(struct kvm_vcpu *vcpu) -{ - vcpu->arch.vgic_cpu.vgic_v3.vgic_eisr = 0; -} - -static u32 vgic_v3_get_interrupt_status(const struct kvm_vcpu *vcpu) -{ - u32 misr = vcpu->arch.vgic_cpu.vgic_v3.vgic_misr; - u32 ret = 0; - - if (misr & ICH_MISR_EOI) - ret |= INT_STATUS_EOI; - if (misr & ICH_MISR_U) - ret |= INT_STATUS_UNDERFLOW; - - return ret; -} - -static void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) -{ - u32 vmcr = vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr; - - vmcrp->ctlr = (vmcr & ICH_VMCR_CTLR_MASK) >> ICH_VMCR_CTLR_SHIFT; - vmcrp->abpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT; - vmcrp->bpr = (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT; - vmcrp->pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT; -} - -static void vgic_v3_enable_underflow(struct kvm_vcpu *vcpu) -{ - vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr |= ICH_HCR_UIE; -} - -static void vgic_v3_disable_underflow(struct kvm_vcpu *vcpu) -{ - vcpu->arch.vgic_cpu.vgic_v3.vgic_hcr &= ~ICH_HCR_UIE; -} - -static void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) -{ - u32 vmcr; - - vmcr = (vmcrp->ctlr << ICH_VMCR_CTLR_SHIFT) & ICH_VMCR_CTLR_MASK; - vmcr |= (vmcrp->abpr << ICH_VMCR_BPR1_SHIFT) & ICH_VMCR_BPR1_MASK; - vmcr |= (vmcrp->bpr << ICH_VMCR_BPR0_SHIFT) & ICH_VMCR_BPR0_MASK; - vmcr |= (vmcrp->pmr << ICH_VMCR_PMR_SHIFT) & ICH_VMCR_PMR_MASK; - - vcpu->arch.vgic_cpu.vgic_v3.vgic_vmcr = vmcr; -} - -static void vgic_v3_enable(struct kvm_vcpu *vcpu) -{ - struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3; - - /* - * By forcing VMCR to zero, the GIC will restore the binary - * points to their reset values. Anything else resets to zero - * anyway. - */ - vgic_v3->vgic_vmcr = 0; - vgic_v3->vgic_elrsr = ~0; - - /* - * If we are emulating a GICv3, we do it in an non-GICv2-compatible - * way, so we force SRE to 1 to demonstrate this to the guest. - * This goes with the spec allowing the value to be RAO/WI. - */ - if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) - vgic_v3->vgic_sre = ICC_SRE_EL1_SRE; - else - vgic_v3->vgic_sre = 0; - - /* Get the show on the road... */ - vgic_v3->vgic_hcr = ICH_HCR_EN; -} - -static const struct vgic_ops vgic_v3_ops = { - .get_lr = vgic_v3_get_lr, - .set_lr = vgic_v3_set_lr, - .get_elrsr = vgic_v3_get_elrsr, - .get_eisr = vgic_v3_get_eisr, - .clear_eisr = vgic_v3_clear_eisr, - .get_interrupt_status = vgic_v3_get_interrupt_status, - .enable_underflow = vgic_v3_enable_underflow, - .disable_underflow = vgic_v3_disable_underflow, - .get_vmcr = vgic_v3_get_vmcr, - .set_vmcr = vgic_v3_set_vmcr, - .enable = vgic_v3_enable, -}; - -static struct vgic_params vgic_v3_params; - -static void vgic_cpu_init_lrs(void *params) -{ - kvm_call_hyp(__vgic_v3_init_lrs); -} - -/** - * vgic_v3_probe - probe for a GICv3 compatible interrupt controller - * @gic_kvm_info: pointer to the GIC description - * @ops: address of a pointer to the GICv3 operations - * @params: address of a pointer to HW-specific parameters - * - * Returns 0 if a GICv3 has been found, with the low level operations - * in *ops and the HW parameters in *params. Returns an error code - * otherwise. - */ -int vgic_v3_probe(const struct gic_kvm_info *gic_kvm_info, - const struct vgic_ops **ops, - const struct vgic_params **params) -{ - int ret = 0; - struct vgic_params *vgic = &vgic_v3_params; - const struct resource *vcpu_res = &gic_kvm_info->vcpu; - - vgic->maint_irq = gic_kvm_info->maint_irq; - - ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2); - - /* - * The ListRegs field is 5 bits, but there is a architectural - * maximum of 16 list registers. Just ignore bit 4... - */ - vgic->nr_lr = (ich_vtr_el2 & 0xf) + 1; - vgic->can_emulate_gicv2 = false; - - if (!vcpu_res->start) { - kvm_info("GICv3: no GICV resource entry\n"); - vgic->vcpu_base = 0; - } else if (!PAGE_ALIGNED(vcpu_res->start)) { - pr_warn("GICV physical address 0x%llx not page aligned\n", - (unsigned long long)vcpu_res->start); - vgic->vcpu_base = 0; - } else if (!PAGE_ALIGNED(resource_size(vcpu_res))) { - pr_warn("GICV size 0x%llx not a multiple of page size 0x%lx\n", - (unsigned long long)resource_size(vcpu_res), - PAGE_SIZE); - } else { - vgic->vcpu_base = vcpu_res->start; - vgic->can_emulate_gicv2 = true; - kvm_register_device_ops(&kvm_arm_vgic_v2_ops, - KVM_DEV_TYPE_ARM_VGIC_V2); - } - if (vgic->vcpu_base == 0) - kvm_info("disabling GICv2 emulation\n"); - kvm_register_device_ops(&kvm_arm_vgic_v3_ops, KVM_DEV_TYPE_ARM_VGIC_V3); - - vgic->vctrl_base = NULL; - vgic->type = VGIC_V3; - vgic->max_gic_vcpus = VGIC_V3_MAX_CPUS; - - kvm_info("GICV base=0x%llx, IRQ=%d\n", - vgic->vcpu_base, vgic->maint_irq); - - on_each_cpu(vgic_cpu_init_lrs, vgic, 1); - - *ops = &vgic_v3_ops; - *params = vgic; - - return ret; -} diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c deleted file mode 100644 index c3bfbb9..0000000 --- a/virt/kvm/arm/vgic.c +++ /dev/null @@ -1,2440 +0,0 @@ -/* - * Copyright (C) 2012 ARM Ltd. - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <linux/cpu.h> -#include <linux/kvm.h> -#include <linux/kvm_host.h> -#include <linux/interrupt.h> -#include <linux/io.h> -#include <linux/irq.h> -#include <linux/rculist.h> -#include <linux/uaccess.h> - -#include <asm/kvm_emulate.h> -#include <asm/kvm_arm.h> -#include <asm/kvm_mmu.h> -#include <trace/events/kvm.h> -#include <asm/kvm.h> -#include <kvm/iodev.h> -#include <linux/irqchip/arm-gic-common.h> - -#define CREATE_TRACE_POINTS -#include "trace.h" - -/* - * How the whole thing works (courtesy of Christoffer Dall): - * - * - At any time, the dist->irq_pending_on_cpu is the oracle that knows if - * something is pending on the CPU interface. - * - Interrupts that are pending on the distributor are stored on the - * vgic.irq_pending vgic bitmap (this bitmap is updated by both user land - * ioctls and guest mmio ops, and other in-kernel peripherals such as the - * arch. timers). - * - Every time the bitmap changes, the irq_pending_on_cpu oracle is - * recalculated - * - To calculate the oracle, we need info for each cpu from - * compute_pending_for_cpu, which considers: - * - PPI: dist->irq_pending & dist->irq_enable - * - SPI: dist->irq_pending & dist->irq_enable & dist->irq_spi_target - * - irq_spi_target is a 'formatted' version of the GICD_ITARGETSRn - * registers, stored on each vcpu. We only keep one bit of - * information per interrupt, making sure that only one vcpu can - * accept the interrupt. - * - If any of the above state changes, we must recalculate the oracle. - * - The same is true when injecting an interrupt, except that we only - * consider a single interrupt at a time. The irq_spi_cpu array - * contains the target CPU for each SPI. - * - * The handling of level interrupts adds some extra complexity. We - * need to track when the interrupt has been EOIed, so we can sample - * the 'line' again. This is achieved as such: - * - * - When a level interrupt is moved onto a vcpu, the corresponding - * bit in irq_queued is set. As long as this bit is set, the line - * will be ignored for further interrupts. The interrupt is injected - * into the vcpu with the GICH_LR_EOI bit set (generate a - * maintenance interrupt on EOI). - * - When the interrupt is EOIed, the maintenance interrupt fires, - * and clears the corresponding bit in irq_queued. This allows the - * interrupt line to be sampled again. - * - Note that level-triggered interrupts can also be set to pending from - * writes to GICD_ISPENDRn and lowering the external input line does not - * cause the interrupt to become inactive in such a situation. - * Conversely, writes to GICD_ICPENDRn do not cause the interrupt to become - * inactive as long as the external input line is held high. - * - * - * Initialization rules: there are multiple stages to the vgic - * initialization, both for the distributor and the CPU interfaces. - * - * Distributor: - * - * - kvm_vgic_early_init(): initialization of static data that doesn't - * depend on any sizing information or emulation type. No allocation - * is allowed there. - * - * - vgic_init(): allocation and initialization of the generic data - * structures that depend on sizing information (number of CPUs, - * number of interrupts). Also initializes the vcpu specific data - * structures. Can be executed lazily for GICv2. - * [to be renamed to kvm_vgic_init??] - * - * CPU Interface: - * - * - kvm_vgic_cpu_early_init(): initialization of static data that - * doesn't depend on any sizing information or emulation type. No - * allocation is allowed there. - */ - -#include "vgic.h" - -static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); -static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu); -static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); -static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); -static u64 vgic_get_elrsr(struct kvm_vcpu *vcpu); -static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu, - int virt_irq); -static int compute_pending_for_cpu(struct kvm_vcpu *vcpu); - -static const struct vgic_ops *vgic_ops; -static const struct vgic_params *vgic; - -static void add_sgi_source(struct kvm_vcpu *vcpu, int irq, int source) -{ - vcpu->kvm->arch.vgic.vm_ops.add_sgi_source(vcpu, irq, source); -} - -static bool queue_sgi(struct kvm_vcpu *vcpu, int irq) -{ - return vcpu->kvm->arch.vgic.vm_ops.queue_sgi(vcpu, irq); -} - -int kvm_vgic_map_resources(struct kvm *kvm) -{ - return kvm->arch.vgic.vm_ops.map_resources(kvm, vgic); -} - -/* - * struct vgic_bitmap contains a bitmap made of unsigned longs, but - * extracts u32s out of them. - * - * This does not work on 64-bit BE systems, because the bitmap access - * will store two consecutive 32-bit words with the higher-addressed - * register's bits at the lower index and the lower-addressed register's - * bits at the higher index. - * - * Therefore, swizzle the register index when accessing the 32-bit word - * registers to access the right register's value. - */ -#if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 64 -#define REG_OFFSET_SWIZZLE 1 -#else -#define REG_OFFSET_SWIZZLE 0 -#endif - -static int vgic_init_bitmap(struct vgic_bitmap *b, int nr_cpus, int nr_irqs) -{ - int nr_longs; - - nr_longs = nr_cpus + BITS_TO_LONGS(nr_irqs - VGIC_NR_PRIVATE_IRQS); - - b->private = kzalloc(sizeof(unsigned long) * nr_longs, GFP_KERNEL); - if (!b->private) - return -ENOMEM; - - b->shared = b->private + nr_cpus; - - return 0; -} - -static void vgic_free_bitmap(struct vgic_bitmap *b) -{ - kfree(b->private); - b->private = NULL; - b->shared = NULL; -} - -/* - * Call this function to convert a u64 value to an unsigned long * bitmask - * in a way that works on both 32-bit and 64-bit LE and BE platforms. - * - * Warning: Calling this function may modify *val. - */ -static unsigned long *u64_to_bitmask(u64 *val) -{ -#if defined(CONFIG_CPU_BIG_ENDIAN) && BITS_PER_LONG == 32 - *val = (*val >> 32) | (*val << 32); -#endif - return (unsigned long *)val; -} - -u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, int cpuid, u32 offset) -{ - offset >>= 2; - if (!offset) - return (u32 *)(x->private + cpuid) + REG_OFFSET_SWIZZLE; - else - return (u32 *)(x->shared) + ((offset - 1) ^ REG_OFFSET_SWIZZLE); -} - -static int vgic_bitmap_get_irq_val(struct vgic_bitmap *x, - int cpuid, int irq) -{ - if (irq < VGIC_NR_PRIVATE_IRQS) - return test_bit(irq, x->private + cpuid); - - return test_bit(irq - VGIC_NR_PRIVATE_IRQS, x->shared); -} - -void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid, - int irq, int val) -{ - unsigned long *reg; - - if (irq < VGIC_NR_PRIVATE_IRQS) { - reg = x->private + cpuid; - } else { - reg = x->shared; - irq -= VGIC_NR_PRIVATE_IRQS; - } - - if (val) - set_bit(irq, reg); - else - clear_bit(irq, reg); -} - -static unsigned long *vgic_bitmap_get_cpu_map(struct vgic_bitmap *x, int cpuid) -{ - return x->private + cpuid; -} - -unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x) -{ - return x->shared; -} - -static int vgic_init_bytemap(struct vgic_bytemap *x, int nr_cpus, int nr_irqs) -{ - int size; - - size = nr_cpus * VGIC_NR_PRIVATE_IRQS; - size += nr_irqs - VGIC_NR_PRIVATE_IRQS; - - x->private = kzalloc(size, GFP_KERNEL); - if (!x->private) - return -ENOMEM; - - x->shared = x->private + nr_cpus * VGIC_NR_PRIVATE_IRQS / sizeof(u32); - return 0; -} - -static void vgic_free_bytemap(struct vgic_bytemap *b) -{ - kfree(b->private); - b->private = NULL; - b->shared = NULL; -} - -u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset) -{ - u32 *reg; - - if (offset < VGIC_NR_PRIVATE_IRQS) { - reg = x->private; - offset += cpuid * VGIC_NR_PRIVATE_IRQS; - } else { - reg = x->shared; - offset -= VGIC_NR_PRIVATE_IRQS; - } - - return reg + (offset / sizeof(u32)); -} - -#define VGIC_CFG_LEVEL 0 -#define VGIC_CFG_EDGE 1 - -static bool vgic_irq_is_edge(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - int irq_val; - - irq_val = vgic_bitmap_get_irq_val(&dist->irq_cfg, vcpu->vcpu_id, irq); - return irq_val == VGIC_CFG_EDGE; -} - -static int vgic_irq_is_enabled(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - return vgic_bitmap_get_irq_val(&dist->irq_enabled, vcpu->vcpu_id, irq); -} - -static int vgic_irq_is_queued(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - return vgic_bitmap_get_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq); -} - -static int vgic_irq_is_active(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - return vgic_bitmap_get_irq_val(&dist->irq_active, vcpu->vcpu_id, irq); -} - -static void vgic_irq_set_queued(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 1); -} - -static void vgic_irq_clear_queued(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - vgic_bitmap_set_irq_val(&dist->irq_queued, vcpu->vcpu_id, irq, 0); -} - -static void vgic_irq_set_active(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 1); -} - -static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id, irq, 0); -} - -static int vgic_dist_irq_get_level(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - return vgic_bitmap_get_irq_val(&dist->irq_level, vcpu->vcpu_id, irq); -} - -static void vgic_dist_irq_set_level(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 1); -} - -static void vgic_dist_irq_clear_level(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - vgic_bitmap_set_irq_val(&dist->irq_level, vcpu->vcpu_id, irq, 0); -} - -static int vgic_dist_irq_soft_pend(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - return vgic_bitmap_get_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq); -} - -static void vgic_dist_irq_clear_soft_pend(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0); - if (!vgic_dist_irq_get_level(vcpu, irq)) { - vgic_dist_irq_clear_pending(vcpu, irq); - if (!compute_pending_for_cpu(vcpu)) - clear_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); - } -} - -static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - return vgic_bitmap_get_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq); -} - -void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 1); -} - -void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - vgic_bitmap_set_irq_val(&dist->irq_pending, vcpu->vcpu_id, irq, 0); -} - -static void vgic_cpu_irq_set(struct kvm_vcpu *vcpu, int irq) -{ - if (irq < VGIC_NR_PRIVATE_IRQS) - set_bit(irq, vcpu->arch.vgic_cpu.pending_percpu); - else - set_bit(irq - VGIC_NR_PRIVATE_IRQS, - vcpu->arch.vgic_cpu.pending_shared); -} - -void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq) -{ - if (irq < VGIC_NR_PRIVATE_IRQS) - clear_bit(irq, vcpu->arch.vgic_cpu.pending_percpu); - else - clear_bit(irq - VGIC_NR_PRIVATE_IRQS, - vcpu->arch.vgic_cpu.pending_shared); -} - -static bool vgic_can_sample_irq(struct kvm_vcpu *vcpu, int irq) -{ - return !vgic_irq_is_queued(vcpu, irq); -} - -/** - * vgic_reg_access - access vgic register - * @mmio: pointer to the data describing the mmio access - * @reg: pointer to the virtual backing of vgic distributor data - * @offset: least significant 2 bits used for word offset - * @mode: ACCESS_ mode (see defines above) - * - * Helper to make vgic register access easier using one of the access - * modes defined for vgic register access - * (read,raz,write-ignored,setbit,clearbit,write) - */ -void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg, - phys_addr_t offset, int mode) -{ - int word_offset = (offset & 3) * 8; - u32 mask = (1UL << (mmio->len * 8)) - 1; - u32 regval; - - /* - * Any alignment fault should have been delivered to the guest - * directly (ARM ARM B3.12.7 "Prioritization of aborts"). - */ - - if (reg) { - regval = *reg; - } else { - BUG_ON(mode != (ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED)); - regval = 0; - } - - if (mmio->is_write) { - u32 data = mmio_data_read(mmio, mask) << word_offset; - switch (ACCESS_WRITE_MASK(mode)) { - case ACCESS_WRITE_IGNORED: - return; - - case ACCESS_WRITE_SETBIT: - regval |= data; - break; - - case ACCESS_WRITE_CLEARBIT: - regval &= ~data; - break; - - case ACCESS_WRITE_VALUE: - regval = (regval & ~(mask << word_offset)) | data; - break; - } - *reg = regval; - } else { - switch (ACCESS_READ_MASK(mode)) { - case ACCESS_READ_RAZ: - regval = 0; - /* fall through */ - - case ACCESS_READ_VALUE: - mmio_data_write(mmio, mask, regval >> word_offset); - } - } -} - -bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - vgic_reg_access(mmio, NULL, offset, - ACCESS_READ_RAZ | ACCESS_WRITE_IGNORED); - return false; -} - -bool vgic_handle_enable_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, - phys_addr_t offset, int vcpu_id, int access) -{ - u32 *reg; - int mode = ACCESS_READ_VALUE | access; - struct kvm_vcpu *target_vcpu = kvm_get_vcpu(kvm, vcpu_id); - - reg = vgic_bitmap_get_reg(&kvm->arch.vgic.irq_enabled, vcpu_id, offset); - vgic_reg_access(mmio, reg, offset, mode); - if (mmio->is_write) { - if (access & ACCESS_WRITE_CLEARBIT) { - if (offset < 4) /* Force SGI enabled */ - *reg |= 0xffff; - vgic_retire_disabled_irqs(target_vcpu); - } - vgic_update_state(kvm); - return true; - } - - return false; -} - -bool vgic_handle_set_pending_reg(struct kvm *kvm, - struct kvm_exit_mmio *mmio, - phys_addr_t offset, int vcpu_id) -{ - u32 *reg, orig; - u32 level_mask; - int mode = ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT; - struct vgic_dist *dist = &kvm->arch.vgic; - - reg = vgic_bitmap_get_reg(&dist->irq_cfg, vcpu_id, offset); - level_mask = (~(*reg)); - - /* Mark both level and edge triggered irqs as pending */ - reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu_id, offset); - orig = *reg; - vgic_reg_access(mmio, reg, offset, mode); - - if (mmio->is_write) { - /* Set the soft-pending flag only for level-triggered irqs */ - reg = vgic_bitmap_get_reg(&dist->irq_soft_pend, - vcpu_id, offset); - vgic_reg_access(mmio, reg, offset, mode); - *reg &= level_mask; - - /* Ignore writes to SGIs */ - if (offset < 2) { - *reg &= ~0xffff; - *reg |= orig & 0xffff; - } - - vgic_update_state(kvm); - return true; - } - - return false; -} - -bool vgic_handle_clear_pending_reg(struct kvm *kvm, - struct kvm_exit_mmio *mmio, - phys_addr_t offset, int vcpu_id) -{ - u32 *level_active; - u32 *reg, orig; - int mode = ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT; - struct vgic_dist *dist = &kvm->arch.vgic; - - reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu_id, offset); - orig = *reg; - vgic_reg_access(mmio, reg, offset, mode); - if (mmio->is_write) { - /* Re-set level triggered level-active interrupts */ - level_active = vgic_bitmap_get_reg(&dist->irq_level, - vcpu_id, offset); - reg = vgic_bitmap_get_reg(&dist->irq_pending, vcpu_id, offset); - *reg |= *level_active; - - /* Ignore writes to SGIs */ - if (offset < 2) { - *reg &= ~0xffff; - *reg |= orig & 0xffff; - } - - /* Clear soft-pending flags */ - reg = vgic_bitmap_get_reg(&dist->irq_soft_pend, - vcpu_id, offset); - vgic_reg_access(mmio, reg, offset, mode); - - vgic_update_state(kvm); - return true; - } - return false; -} - -bool vgic_handle_set_active_reg(struct kvm *kvm, - struct kvm_exit_mmio *mmio, - phys_addr_t offset, int vcpu_id) -{ - u32 *reg; - struct vgic_dist *dist = &kvm->arch.vgic; - - reg = vgic_bitmap_get_reg(&dist->irq_active, vcpu_id, offset); - vgic_reg_access(mmio, reg, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_SETBIT); - - if (mmio->is_write) { - vgic_update_state(kvm); - return true; - } - - return false; -} - -bool vgic_handle_clear_active_reg(struct kvm *kvm, - struct kvm_exit_mmio *mmio, - phys_addr_t offset, int vcpu_id) -{ - u32 *reg; - struct vgic_dist *dist = &kvm->arch.vgic; - - reg = vgic_bitmap_get_reg(&dist->irq_active, vcpu_id, offset); - vgic_reg_access(mmio, reg, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); - - if (mmio->is_write) { - vgic_update_state(kvm); - return true; - } - - return false; -} - -static u32 vgic_cfg_expand(u16 val) -{ - u32 res = 0; - int i; - - /* - * Turn a 16bit value like abcd...mnop into a 32bit word - * a0b0c0d0...m0n0o0p0, which is what the HW cfg register is. - */ - for (i = 0; i < 16; i++) - res |= ((val >> i) & VGIC_CFG_EDGE) << (2 * i + 1); - - return res; -} - -static u16 vgic_cfg_compress(u32 val) -{ - u16 res = 0; - int i; - - /* - * Turn a 32bit word a0b0c0d0...m0n0o0p0 into 16bit value like - * abcd...mnop which is what we really care about. - */ - for (i = 0; i < 16; i++) - res |= ((val >> (i * 2 + 1)) & VGIC_CFG_EDGE) << i; - - return res; -} - -/* - * The distributor uses 2 bits per IRQ for the CFG register, but the - * LSB is always 0. As such, we only keep the upper bit, and use the - * two above functions to compress/expand the bits - */ -bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio, - phys_addr_t offset) -{ - u32 val; - - if (offset & 4) - val = *reg >> 16; - else - val = *reg & 0xffff; - - val = vgic_cfg_expand(val); - vgic_reg_access(mmio, &val, offset, - ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); - if (mmio->is_write) { - /* Ignore writes to read-only SGI and PPI bits */ - if (offset < 8) - return false; - - val = vgic_cfg_compress(val); - if (offset & 4) { - *reg &= 0xffff; - *reg |= val << 16; - } else { - *reg &= 0xffff << 16; - *reg |= val; - } - } - - return false; -} - -/** - * vgic_unqueue_irqs - move pending/active IRQs from LRs to the distributor - * @vgic_cpu: Pointer to the vgic_cpu struct holding the LRs - * - * Move any IRQs that have already been assigned to LRs back to the - * emulated distributor state so that the complete emulated state can be read - * from the main emulation structures without investigating the LRs. - */ -void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) -{ - u64 elrsr = vgic_get_elrsr(vcpu); - unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr); - int i; - - for_each_clear_bit(i, elrsr_ptr, vgic->nr_lr) { - struct vgic_lr lr = vgic_get_lr(vcpu, i); - - /* - * There are three options for the state bits: - * - * 01: pending - * 10: active - * 11: pending and active - */ - BUG_ON(!(lr.state & LR_STATE_MASK)); - - /* Reestablish SGI source for pending and active IRQs */ - if (lr.irq < VGIC_NR_SGIS) - add_sgi_source(vcpu, lr.irq, lr.source); - - /* - * If the LR holds an active (10) or a pending and active (11) - * interrupt then move the active state to the - * distributor tracking bit. - */ - if (lr.state & LR_STATE_ACTIVE) - vgic_irq_set_active(vcpu, lr.irq); - - /* - * Reestablish the pending state on the distributor and the - * CPU interface and mark the LR as free for other use. - */ - vgic_retire_lr(i, vcpu); - - /* Finally update the VGIC state. */ - vgic_update_state(vcpu->kvm); - } -} - -const -struct vgic_io_range *vgic_find_range(const struct vgic_io_range *ranges, - int len, gpa_t offset) -{ - while (ranges->len) { - if (offset >= ranges->base && - (offset + len) <= (ranges->base + ranges->len)) - return ranges; - ranges++; - } - - return NULL; -} - -static bool vgic_validate_access(const struct vgic_dist *dist, - const struct vgic_io_range *range, - unsigned long offset) -{ - int irq; - - if (!range->bits_per_irq) - return true; /* Not an irq-based access */ - - irq = offset * 8 / range->bits_per_irq; - if (irq >= dist->nr_irqs) - return false; - - return true; -} - -/* - * Call the respective handler function for the given range. - * We split up any 64 bit accesses into two consecutive 32 bit - * handler calls and merge the result afterwards. - * We do this in a little endian fashion regardless of the host's - * or guest's endianness, because the GIC is always LE and the rest of - * the code (vgic_reg_access) also puts it in a LE fashion already. - * At this point we have already identified the handle function, so - * range points to that one entry and offset is relative to this. - */ -static bool call_range_handler(struct kvm_vcpu *vcpu, - struct kvm_exit_mmio *mmio, - unsigned long offset, - const struct vgic_io_range *range) -{ - struct kvm_exit_mmio mmio32; - bool ret; - - if (likely(mmio->len <= 4)) - return range->handle_mmio(vcpu, mmio, offset); - - /* - * Any access bigger than 4 bytes (that we currently handle in KVM) - * is actually 8 bytes long, caused by a 64-bit access - */ - - mmio32.len = 4; - mmio32.is_write = mmio->is_write; - mmio32.private = mmio->private; - - mmio32.phys_addr = mmio->phys_addr + 4; - mmio32.data = &((u32 *)mmio->data)[1]; - ret = range->handle_mmio(vcpu, &mmio32, offset + 4); - - mmio32.phys_addr = mmio->phys_addr; - mmio32.data = &((u32 *)mmio->data)[0]; - ret |= range->handle_mmio(vcpu, &mmio32, offset); - - return ret; -} - -/** - * vgic_handle_mmio_access - handle an in-kernel MMIO access - * This is called by the read/write KVM IO device wrappers below. - * @vcpu: pointer to the vcpu performing the access - * @this: pointer to the KVM IO device in charge - * @addr: guest physical address of the access - * @len: size of the access - * @val: pointer to the data region - * @is_write: read or write access - * - * returns true if the MMIO access could be performed - */ -static int vgic_handle_mmio_access(struct kvm_vcpu *vcpu, - struct kvm_io_device *this, gpa_t addr, - int len, void *val, bool is_write) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - struct vgic_io_device *iodev = container_of(this, - struct vgic_io_device, dev); - const struct vgic_io_range *range; - struct kvm_exit_mmio mmio; - bool updated_state; - gpa_t offset; - - offset = addr - iodev->addr; - range = vgic_find_range(iodev->reg_ranges, len, offset); - if (unlikely(!range || !range->handle_mmio)) { - pr_warn("Unhandled access %d %08llx %d\n", is_write, addr, len); - return -ENXIO; - } - - mmio.phys_addr = addr; - mmio.len = len; - mmio.is_write = is_write; - mmio.data = val; - mmio.private = iodev->redist_vcpu; - - spin_lock(&dist->lock); - offset -= range->base; - if (vgic_validate_access(dist, range, offset)) { - updated_state = call_range_handler(vcpu, &mmio, offset, range); - } else { - if (!is_write) - memset(val, 0, len); - updated_state = false; - } - spin_unlock(&dist->lock); - - if (updated_state) - vgic_kick_vcpus(vcpu->kvm); - - return 0; -} - -static int vgic_handle_mmio_read(struct kvm_vcpu *vcpu, - struct kvm_io_device *this, - gpa_t addr, int len, void *val) -{ - return vgic_handle_mmio_access(vcpu, this, addr, len, val, false); -} - -static int vgic_handle_mmio_write(struct kvm_vcpu *vcpu, - struct kvm_io_device *this, - gpa_t addr, int len, const void *val) -{ - return vgic_handle_mmio_access(vcpu, this, addr, len, (void *)val, - true); -} - -static struct kvm_io_device_ops vgic_io_ops = { - .read = vgic_handle_mmio_read, - .write = vgic_handle_mmio_write, -}; - -/** - * vgic_register_kvm_io_dev - register VGIC register frame on the KVM I/O bus - * @kvm: The VM structure pointer - * @base: The (guest) base address for the register frame - * @len: Length of the register frame window - * @ranges: Describing the handler functions for each register - * @redist_vcpu_id: The VCPU ID to pass on to the handlers on call - * @iodev: Points to memory to be passed on to the handler - * - * @iodev stores the parameters of this function to be usable by the handler - * respectively the dispatcher function (since the KVM I/O bus framework lacks - * an opaque parameter). Initialization is done in this function, but the - * reference should be valid and unique for the whole VGIC lifetime. - * If the register frame is not mapped for a specific VCPU, pass -1 to - * @redist_vcpu_id. - */ -int vgic_register_kvm_io_dev(struct kvm *kvm, gpa_t base, int len, - const struct vgic_io_range *ranges, - int redist_vcpu_id, - struct vgic_io_device *iodev) -{ - struct kvm_vcpu *vcpu = NULL; - int ret; - - if (redist_vcpu_id >= 0) - vcpu = kvm_get_vcpu(kvm, redist_vcpu_id); - - iodev->addr = base; - iodev->len = len; - iodev->reg_ranges = ranges; - iodev->redist_vcpu = vcpu; - - kvm_iodevice_init(&iodev->dev, &vgic_io_ops); - - mutex_lock(&kvm->slots_lock); - - ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, base, len, - &iodev->dev); - mutex_unlock(&kvm->slots_lock); - - /* Mark the iodev as invalid if registration fails. */ - if (ret) - iodev->dev.ops = NULL; - - return ret; -} - -static int vgic_nr_shared_irqs(struct vgic_dist *dist) -{ - return dist->nr_irqs - VGIC_NR_PRIVATE_IRQS; -} - -static int compute_active_for_cpu(struct kvm_vcpu *vcpu) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - unsigned long *active, *enabled, *act_percpu, *act_shared; - unsigned long active_private, active_shared; - int nr_shared = vgic_nr_shared_irqs(dist); - int vcpu_id; - - vcpu_id = vcpu->vcpu_id; - act_percpu = vcpu->arch.vgic_cpu.active_percpu; - act_shared = vcpu->arch.vgic_cpu.active_shared; - - active = vgic_bitmap_get_cpu_map(&dist->irq_active, vcpu_id); - enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id); - bitmap_and(act_percpu, active, enabled, VGIC_NR_PRIVATE_IRQS); - - active = vgic_bitmap_get_shared_map(&dist->irq_active); - enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled); - bitmap_and(act_shared, active, enabled, nr_shared); - bitmap_and(act_shared, act_shared, - vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]), - nr_shared); - - active_private = find_first_bit(act_percpu, VGIC_NR_PRIVATE_IRQS); - active_shared = find_first_bit(act_shared, nr_shared); - - return (active_private < VGIC_NR_PRIVATE_IRQS || - active_shared < nr_shared); -} - -static int compute_pending_for_cpu(struct kvm_vcpu *vcpu) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - unsigned long *pending, *enabled, *pend_percpu, *pend_shared; - unsigned long pending_private, pending_shared; - int nr_shared = vgic_nr_shared_irqs(dist); - int vcpu_id; - - vcpu_id = vcpu->vcpu_id; - pend_percpu = vcpu->arch.vgic_cpu.pending_percpu; - pend_shared = vcpu->arch.vgic_cpu.pending_shared; - - if (!dist->enabled) { - bitmap_zero(pend_percpu, VGIC_NR_PRIVATE_IRQS); - bitmap_zero(pend_shared, nr_shared); - return 0; - } - - pending = vgic_bitmap_get_cpu_map(&dist->irq_pending, vcpu_id); - enabled = vgic_bitmap_get_cpu_map(&dist->irq_enabled, vcpu_id); - bitmap_and(pend_percpu, pending, enabled, VGIC_NR_PRIVATE_IRQS); - - pending = vgic_bitmap_get_shared_map(&dist->irq_pending); - enabled = vgic_bitmap_get_shared_map(&dist->irq_enabled); - bitmap_and(pend_shared, pending, enabled, nr_shared); - bitmap_and(pend_shared, pend_shared, - vgic_bitmap_get_shared_map(&dist->irq_spi_target[vcpu_id]), - nr_shared); - - pending_private = find_first_bit(pend_percpu, VGIC_NR_PRIVATE_IRQS); - pending_shared = find_first_bit(pend_shared, nr_shared); - return (pending_private < VGIC_NR_PRIVATE_IRQS || - pending_shared < vgic_nr_shared_irqs(dist)); -} - -/* - * Update the interrupt state and determine which CPUs have pending - * or active interrupts. Must be called with distributor lock held. - */ -void vgic_update_state(struct kvm *kvm) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - struct kvm_vcpu *vcpu; - int c; - - kvm_for_each_vcpu(c, vcpu, kvm) { - if (compute_pending_for_cpu(vcpu)) - set_bit(c, dist->irq_pending_on_cpu); - - if (compute_active_for_cpu(vcpu)) - set_bit(c, dist->irq_active_on_cpu); - else - clear_bit(c, dist->irq_active_on_cpu); - } -} - -static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr) -{ - return vgic_ops->get_lr(vcpu, lr); -} - -static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, - struct vgic_lr vlr) -{ - vgic_ops->set_lr(vcpu, lr, vlr); -} - -static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu) -{ - return vgic_ops->get_elrsr(vcpu); -} - -static inline u64 vgic_get_eisr(struct kvm_vcpu *vcpu) -{ - return vgic_ops->get_eisr(vcpu); -} - -static inline void vgic_clear_eisr(struct kvm_vcpu *vcpu) -{ - vgic_ops->clear_eisr(vcpu); -} - -static inline u32 vgic_get_interrupt_status(struct kvm_vcpu *vcpu) -{ - return vgic_ops->get_interrupt_status(vcpu); -} - -static inline void vgic_enable_underflow(struct kvm_vcpu *vcpu) -{ - vgic_ops->enable_underflow(vcpu); -} - -static inline void vgic_disable_underflow(struct kvm_vcpu *vcpu) -{ - vgic_ops->disable_underflow(vcpu); -} - -void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) -{ - vgic_ops->get_vmcr(vcpu, vmcr); -} - -void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) -{ - vgic_ops->set_vmcr(vcpu, vmcr); -} - -static inline void vgic_enable(struct kvm_vcpu *vcpu) -{ - vgic_ops->enable(vcpu); -} - -static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu) -{ - struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr); - - vgic_irq_clear_queued(vcpu, vlr.irq); - - /* - * We must transfer the pending state back to the distributor before - * retiring the LR, otherwise we may loose edge-triggered interrupts. - */ - if (vlr.state & LR_STATE_PENDING) { - vgic_dist_irq_set_pending(vcpu, vlr.irq); - vlr.hwirq = 0; - } - - vlr.state = 0; - vgic_set_lr(vcpu, lr_nr, vlr); -} - -static bool dist_active_irq(struct kvm_vcpu *vcpu) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - return test_bit(vcpu->vcpu_id, dist->irq_active_on_cpu); -} - -bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq) -{ - int i; - - for (i = 0; i < vgic->nr_lr; i++) { - struct vgic_lr vlr = vgic_get_lr(vcpu, i); - - if (vlr.irq == virt_irq && vlr.state & LR_STATE_ACTIVE) - return true; - } - - return vgic_irq_is_active(vcpu, virt_irq); -} - -/* - * An interrupt may have been disabled after being made pending on the - * CPU interface (the classic case is a timer running while we're - * rebooting the guest - the interrupt would kick as soon as the CPU - * interface gets enabled, with deadly consequences). - * - * The solution is to examine already active LRs, and check the - * interrupt is still enabled. If not, just retire it. - */ -static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) -{ - u64 elrsr = vgic_get_elrsr(vcpu); - unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr); - int lr; - - for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) { - struct vgic_lr vlr = vgic_get_lr(vcpu, lr); - - if (!vgic_irq_is_enabled(vcpu, vlr.irq)) - vgic_retire_lr(lr, vcpu); - } -} - -static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq, - int lr_nr, struct vgic_lr vlr) -{ - if (vgic_irq_is_active(vcpu, irq)) { - vlr.state |= LR_STATE_ACTIVE; - kvm_debug("Set active, clear distributor: 0x%x\n", vlr.state); - vgic_irq_clear_active(vcpu, irq); - vgic_update_state(vcpu->kvm); - } else { - WARN_ON(!vgic_dist_irq_is_pending(vcpu, irq)); - vlr.state |= LR_STATE_PENDING; - kvm_debug("Set pending: 0x%x\n", vlr.state); - } - - if (!vgic_irq_is_edge(vcpu, irq)) - vlr.state |= LR_EOI_INT; - - if (vlr.irq >= VGIC_NR_SGIS) { - struct irq_phys_map *map; - map = vgic_irq_map_search(vcpu, irq); - - if (map) { - vlr.hwirq = map->phys_irq; - vlr.state |= LR_HW; - vlr.state &= ~LR_EOI_INT; - - /* - * Make sure we're not going to sample this - * again, as a HW-backed interrupt cannot be - * in the PENDING_ACTIVE stage. - */ - vgic_irq_set_queued(vcpu, irq); - } - } - - vgic_set_lr(vcpu, lr_nr, vlr); -} - -/* - * Queue an interrupt to a CPU virtual interface. Return true on success, - * or false if it wasn't possible to queue it. - * sgi_source must be zero for any non-SGI interrupts. - */ -bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - u64 elrsr = vgic_get_elrsr(vcpu); - unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr); - struct vgic_lr vlr; - int lr; - - /* Sanitize the input... */ - BUG_ON(sgi_source_id & ~7); - BUG_ON(sgi_source_id && irq >= VGIC_NR_SGIS); - BUG_ON(irq >= dist->nr_irqs); - - kvm_debug("Queue IRQ%d\n", irq); - - /* Do we have an active interrupt for the same CPUID? */ - for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) { - vlr = vgic_get_lr(vcpu, lr); - if (vlr.irq == irq && vlr.source == sgi_source_id) { - kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq); - vgic_queue_irq_to_lr(vcpu, irq, lr, vlr); - return true; - } - } - - /* Try to use another LR for this interrupt */ - lr = find_first_bit(elrsr_ptr, vgic->nr_lr); - if (lr >= vgic->nr_lr) - return false; - - kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); - - vlr.irq = irq; - vlr.source = sgi_source_id; - vlr.state = 0; - vgic_queue_irq_to_lr(vcpu, irq, lr, vlr); - - return true; -} - -static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq) -{ - if (!vgic_can_sample_irq(vcpu, irq)) - return true; /* level interrupt, already queued */ - - if (vgic_queue_irq(vcpu, 0, irq)) { - if (vgic_irq_is_edge(vcpu, irq)) { - vgic_dist_irq_clear_pending(vcpu, irq); - vgic_cpu_irq_clear(vcpu, irq); - } else { - vgic_irq_set_queued(vcpu, irq); - } - - return true; - } - - return false; -} - -/* - * Fill the list registers with pending interrupts before running the - * guest. - */ -static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - unsigned long *pa_percpu, *pa_shared; - int i, vcpu_id; - int overflow = 0; - int nr_shared = vgic_nr_shared_irqs(dist); - - vcpu_id = vcpu->vcpu_id; - - pa_percpu = vcpu->arch.vgic_cpu.pend_act_percpu; - pa_shared = vcpu->arch.vgic_cpu.pend_act_shared; - - bitmap_or(pa_percpu, vgic_cpu->pending_percpu, vgic_cpu->active_percpu, - VGIC_NR_PRIVATE_IRQS); - bitmap_or(pa_shared, vgic_cpu->pending_shared, vgic_cpu->active_shared, - nr_shared); - /* - * We may not have any pending interrupt, or the interrupts - * may have been serviced from another vcpu. In all cases, - * move along. - */ - if (!kvm_vgic_vcpu_pending_irq(vcpu) && !dist_active_irq(vcpu)) - goto epilog; - - /* SGIs */ - for_each_set_bit(i, pa_percpu, VGIC_NR_SGIS) { - if (!queue_sgi(vcpu, i)) - overflow = 1; - } - - /* PPIs */ - for_each_set_bit_from(i, pa_percpu, VGIC_NR_PRIVATE_IRQS) { - if (!vgic_queue_hwirq(vcpu, i)) - overflow = 1; - } - - /* SPIs */ - for_each_set_bit(i, pa_shared, nr_shared) { - if (!vgic_queue_hwirq(vcpu, i + VGIC_NR_PRIVATE_IRQS)) - overflow = 1; - } - - - - -epilog: - if (overflow) { - vgic_enable_underflow(vcpu); - } else { - vgic_disable_underflow(vcpu); - /* - * We're about to run this VCPU, and we've consumed - * everything the distributor had in store for - * us. Claim we don't have anything pending. We'll - * adjust that if needed while exiting. - */ - clear_bit(vcpu_id, dist->irq_pending_on_cpu); - } -} - -static int process_queued_irq(struct kvm_vcpu *vcpu, - int lr, struct vgic_lr vlr) -{ - int pending = 0; - - /* - * If the IRQ was EOIed (called from vgic_process_maintenance) or it - * went from active to non-active (called from vgic_sync_hwirq) it was - * also ACKed and we we therefore assume we can clear the soft pending - * state (should it had been set) for this interrupt. - * - * Note: if the IRQ soft pending state was set after the IRQ was - * acked, it actually shouldn't be cleared, but we have no way of - * knowing that unless we start trapping ACKs when the soft-pending - * state is set. - */ - vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq); - - /* - * Tell the gic to start sampling this interrupt again. - */ - vgic_irq_clear_queued(vcpu, vlr.irq); - - /* Any additional pending interrupt? */ - if (vgic_irq_is_edge(vcpu, vlr.irq)) { - BUG_ON(!(vlr.state & LR_HW)); - pending = vgic_dist_irq_is_pending(vcpu, vlr.irq); - } else { - if (vgic_dist_irq_get_level(vcpu, vlr.irq)) { - vgic_cpu_irq_set(vcpu, vlr.irq); - pending = 1; - } else { - vgic_dist_irq_clear_pending(vcpu, vlr.irq); - vgic_cpu_irq_clear(vcpu, vlr.irq); - } - } - - /* - * Despite being EOIed, the LR may not have - * been marked as empty. - */ - vlr.state = 0; - vlr.hwirq = 0; - vgic_set_lr(vcpu, lr, vlr); - - return pending; -} - -static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) -{ - u32 status = vgic_get_interrupt_status(vcpu); - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - struct kvm *kvm = vcpu->kvm; - int level_pending = 0; - - kvm_debug("STATUS = %08x\n", status); - - if (status & INT_STATUS_EOI) { - /* - * Some level interrupts have been EOIed. Clear their - * active bit. - */ - u64 eisr = vgic_get_eisr(vcpu); - unsigned long *eisr_ptr = u64_to_bitmask(&eisr); - int lr; - - for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) { - struct vgic_lr vlr = vgic_get_lr(vcpu, lr); - - WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq)); - WARN_ON(vlr.state & LR_STATE_MASK); - - - /* - * kvm_notify_acked_irq calls kvm_set_irq() - * to reset the IRQ level, which grabs the dist->lock - * so we call this before taking the dist->lock. - */ - kvm_notify_acked_irq(kvm, 0, - vlr.irq - VGIC_NR_PRIVATE_IRQS); - - spin_lock(&dist->lock); - level_pending |= process_queued_irq(vcpu, lr, vlr); - spin_unlock(&dist->lock); - } - } - - if (status & INT_STATUS_UNDERFLOW) - vgic_disable_underflow(vcpu); - - /* - * In the next iterations of the vcpu loop, if we sync the vgic state - * after flushing it, but before entering the guest (this happens for - * pending signals and vmid rollovers), then make sure we don't pick - * up any old maintenance interrupts here. - */ - vgic_clear_eisr(vcpu); - - return level_pending; -} - -/* - * Save the physical active state, and reset it to inactive. - * - * Return true if there's a pending forwarded interrupt to queue. - */ -static bool vgic_sync_hwirq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - bool level_pending; - - if (!(vlr.state & LR_HW)) - return false; - - if (vlr.state & LR_STATE_ACTIVE) - return false; - - spin_lock(&dist->lock); - level_pending = process_queued_irq(vcpu, lr, vlr); - spin_unlock(&dist->lock); - return level_pending; -} - -/* Sync back the VGIC state after a guest run */ -static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - u64 elrsr; - unsigned long *elrsr_ptr; - int lr, pending; - bool level_pending; - - level_pending = vgic_process_maintenance(vcpu); - - /* Deal with HW interrupts, and clear mappings for empty LRs */ - for (lr = 0; lr < vgic->nr_lr; lr++) { - struct vgic_lr vlr = vgic_get_lr(vcpu, lr); - - level_pending |= vgic_sync_hwirq(vcpu, lr, vlr); - BUG_ON(vlr.irq >= dist->nr_irqs); - } - - /* Check if we still have something up our sleeve... */ - elrsr = vgic_get_elrsr(vcpu); - elrsr_ptr = u64_to_bitmask(&elrsr); - pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr); - if (level_pending || pending < vgic->nr_lr) - set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); -} - -void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - if (!irqchip_in_kernel(vcpu->kvm)) - return; - - spin_lock(&dist->lock); - __kvm_vgic_flush_hwstate(vcpu); - spin_unlock(&dist->lock); -} - -void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) -{ - if (!irqchip_in_kernel(vcpu->kvm)) - return; - - __kvm_vgic_sync_hwstate(vcpu); -} - -int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - - if (!irqchip_in_kernel(vcpu->kvm)) - return 0; - - return test_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); -} - -void vgic_kick_vcpus(struct kvm *kvm) -{ - struct kvm_vcpu *vcpu; - int c; - - /* - * We've injected an interrupt, time to find out who deserves - * a good kick... - */ - kvm_for_each_vcpu(c, vcpu, kvm) { - if (kvm_vgic_vcpu_pending_irq(vcpu)) - kvm_vcpu_kick(vcpu); - } -} - -static int vgic_validate_injection(struct kvm_vcpu *vcpu, int irq, int level) -{ - int edge_triggered = vgic_irq_is_edge(vcpu, irq); - - /* - * Only inject an interrupt if: - * - edge triggered and we have a rising edge - * - level triggered and we change level - */ - if (edge_triggered) { - int state = vgic_dist_irq_is_pending(vcpu, irq); - return level > state; - } else { - int state = vgic_dist_irq_get_level(vcpu, irq); - return level != state; - } -} - -static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, - unsigned int irq_num, bool level) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - struct kvm_vcpu *vcpu; - int edge_triggered, level_triggered; - int enabled; - bool ret = true, can_inject = true; - - trace_vgic_update_irq_pending(cpuid, irq_num, level); - - if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020)) - return -EINVAL; - - spin_lock(&dist->lock); - - vcpu = kvm_get_vcpu(kvm, cpuid); - edge_triggered = vgic_irq_is_edge(vcpu, irq_num); - level_triggered = !edge_triggered; - - if (!vgic_validate_injection(vcpu, irq_num, level)) { - ret = false; - goto out; - } - - if (irq_num >= VGIC_NR_PRIVATE_IRQS) { - cpuid = dist->irq_spi_cpu[irq_num - VGIC_NR_PRIVATE_IRQS]; - if (cpuid == VCPU_NOT_ALLOCATED) { - /* Pretend we use CPU0, and prevent injection */ - cpuid = 0; - can_inject = false; - } - vcpu = kvm_get_vcpu(kvm, cpuid); - } - - kvm_debug("Inject IRQ%d level %d CPU%d\n", irq_num, level, cpuid); - - if (level) { - if (level_triggered) - vgic_dist_irq_set_level(vcpu, irq_num); - vgic_dist_irq_set_pending(vcpu, irq_num); - } else { - if (level_triggered) { - vgic_dist_irq_clear_level(vcpu, irq_num); - if (!vgic_dist_irq_soft_pend(vcpu, irq_num)) { - vgic_dist_irq_clear_pending(vcpu, irq_num); - vgic_cpu_irq_clear(vcpu, irq_num); - if (!compute_pending_for_cpu(vcpu)) - clear_bit(cpuid, dist->irq_pending_on_cpu); - } - } - - ret = false; - goto out; - } - - enabled = vgic_irq_is_enabled(vcpu, irq_num); - - if (!enabled || !can_inject) { - ret = false; - goto out; - } - - if (!vgic_can_sample_irq(vcpu, irq_num)) { - /* - * Level interrupt in progress, will be picked up - * when EOId. - */ - ret = false; - goto out; - } - - if (level) { - vgic_cpu_irq_set(vcpu, irq_num); - set_bit(cpuid, dist->irq_pending_on_cpu); - } - -out: - spin_unlock(&dist->lock); - - if (ret) { - /* kick the specified vcpu */ - kvm_vcpu_kick(kvm_get_vcpu(kvm, cpuid)); - } - - return 0; -} - -static int vgic_lazy_init(struct kvm *kvm) -{ - int ret = 0; - - if (unlikely(!vgic_initialized(kvm))) { - /* - * We only provide the automatic initialization of the VGIC - * for the legacy case of a GICv2. Any other type must - * be explicitly initialized once setup with the respective - * KVM device call. - */ - if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2) - return -EBUSY; - - mutex_lock(&kvm->lock); - ret = vgic_init(kvm); - mutex_unlock(&kvm->lock); - } - - return ret; -} - -/** - * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic - * @kvm: The VM structure pointer - * @cpuid: The CPU for PPIs - * @irq_num: The IRQ number that is assigned to the device. This IRQ - * must not be mapped to a HW interrupt. - * @level: Edge-triggered: true: to trigger the interrupt - * false: to ignore the call - * Level-sensitive true: raise the input signal - * false: lower the input signal - * - * The GIC is not concerned with devices being active-LOW or active-HIGH for - * level-sensitive interrupts. You can think of the level parameter as 1 - * being HIGH and 0 being LOW and all devices being active-HIGH. - */ -int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int irq_num, - bool level) -{ - struct irq_phys_map *map; - int ret; - - ret = vgic_lazy_init(kvm); - if (ret) - return ret; - - map = vgic_irq_map_search(kvm_get_vcpu(kvm, cpuid), irq_num); - if (map) - return -EINVAL; - - return vgic_update_irq_pending(kvm, cpuid, irq_num, level); -} - -/** - * kvm_vgic_inject_mapped_irq - Inject a physically mapped IRQ to the vgic - * @kvm: The VM structure pointer - * @cpuid: The CPU for PPIs - * @virt_irq: The virtual IRQ to be injected - * @level: Edge-triggered: true: to trigger the interrupt - * false: to ignore the call - * Level-sensitive true: raise the input signal - * false: lower the input signal - * - * The GIC is not concerned with devices being active-LOW or active-HIGH for - * level-sensitive interrupts. You can think of the level parameter as 1 - * being HIGH and 0 being LOW and all devices being active-HIGH. - */ -int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, - unsigned int virt_irq, bool level) -{ - int ret; - - ret = vgic_lazy_init(kvm); - if (ret) - return ret; - - return vgic_update_irq_pending(kvm, cpuid, virt_irq, level); -} - -static irqreturn_t vgic_maintenance_handler(int irq, void *data) -{ - /* - * We cannot rely on the vgic maintenance interrupt to be - * delivered synchronously. This means we can only use it to - * exit the VM, and we perform the handling of EOIed - * interrupts on the exit path (see vgic_process_maintenance). - */ - return IRQ_HANDLED; -} - -static struct list_head *vgic_get_irq_phys_map_list(struct kvm_vcpu *vcpu, - int virt_irq) -{ - if (virt_irq < VGIC_NR_PRIVATE_IRQS) - return &vcpu->arch.vgic_cpu.irq_phys_map_list; - else - return &vcpu->kvm->arch.vgic.irq_phys_map_list; -} - -/** - * kvm_vgic_map_phys_irq - map a virtual IRQ to a physical IRQ - * @vcpu: The VCPU pointer - * @virt_irq: The virtual IRQ number for the guest - * @phys_irq: The hardware IRQ number of the host - * - * Establish a mapping between a guest visible irq (@virt_irq) and a - * hardware irq (@phys_irq). On injection, @virt_irq will be associated with - * the physical interrupt represented by @phys_irq. This mapping can be - * established multiple times as long as the parameters are the same. - * - * Returns 0 on success or an error value otherwise. - */ -int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, int virt_irq, int phys_irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - struct list_head *root = vgic_get_irq_phys_map_list(vcpu, virt_irq); - struct irq_phys_map *map; - struct irq_phys_map_entry *entry; - int ret = 0; - - /* Create a new mapping */ - entry = kzalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - return -ENOMEM; - - spin_lock(&dist->irq_phys_map_lock); - - /* Try to match an existing mapping */ - map = vgic_irq_map_search(vcpu, virt_irq); - if (map) { - /* Make sure this mapping matches */ - if (map->phys_irq != phys_irq) - ret = -EINVAL; - - /* Found an existing, valid mapping */ - goto out; - } - - map = &entry->map; - map->virt_irq = virt_irq; - map->phys_irq = phys_irq; - - list_add_tail_rcu(&entry->entry, root); - -out: - spin_unlock(&dist->irq_phys_map_lock); - /* If we've found a hit in the existing list, free the useless - * entry */ - if (ret || map != &entry->map) - kfree(entry); - return ret; -} - -static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu, - int virt_irq) -{ - struct list_head *root = vgic_get_irq_phys_map_list(vcpu, virt_irq); - struct irq_phys_map_entry *entry; - struct irq_phys_map *map; - - rcu_read_lock(); - - list_for_each_entry_rcu(entry, root, entry) { - map = &entry->map; - if (map->virt_irq == virt_irq) { - rcu_read_unlock(); - return map; - } - } - - rcu_read_unlock(); - - return NULL; -} - -static void vgic_free_phys_irq_map_rcu(struct rcu_head *rcu) -{ - struct irq_phys_map_entry *entry; - - entry = container_of(rcu, struct irq_phys_map_entry, rcu); - kfree(entry); -} - -/** - * kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping - * @vcpu: The VCPU pointer - * @virt_irq: The virtual IRQ number to be unmapped - * - * Remove an existing mapping between virtual and physical interrupts. - */ -int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq) -{ - struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - struct irq_phys_map_entry *entry; - struct list_head *root; - - root = vgic_get_irq_phys_map_list(vcpu, virt_irq); - - spin_lock(&dist->irq_phys_map_lock); - - list_for_each_entry(entry, root, entry) { - if (entry->map.virt_irq == virt_irq) { - list_del_rcu(&entry->entry); - call_rcu(&entry->rcu, vgic_free_phys_irq_map_rcu); - break; - } - } - - spin_unlock(&dist->irq_phys_map_lock); - - return 0; -} - -static void vgic_destroy_irq_phys_map(struct kvm *kvm, struct list_head *root) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - struct irq_phys_map_entry *entry; - - spin_lock(&dist->irq_phys_map_lock); - - list_for_each_entry(entry, root, entry) { - list_del_rcu(&entry->entry); - call_rcu(&entry->rcu, vgic_free_phys_irq_map_rcu); - } - - spin_unlock(&dist->irq_phys_map_lock); -} - -void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - - kfree(vgic_cpu->pending_shared); - kfree(vgic_cpu->active_shared); - kfree(vgic_cpu->pend_act_shared); - vgic_destroy_irq_phys_map(vcpu->kvm, &vgic_cpu->irq_phys_map_list); - vgic_cpu->pending_shared = NULL; - vgic_cpu->active_shared = NULL; - vgic_cpu->pend_act_shared = NULL; -} - -static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - int nr_longs = BITS_TO_LONGS(nr_irqs - VGIC_NR_PRIVATE_IRQS); - int sz = nr_longs * sizeof(unsigned long); - vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL); - vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL); - vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL); - - if (!vgic_cpu->pending_shared - || !vgic_cpu->active_shared - || !vgic_cpu->pend_act_shared) { - kvm_vgic_vcpu_destroy(vcpu); - return -ENOMEM; - } - - return 0; -} - -/** - * kvm_vgic_vcpu_early_init - Earliest possible per-vcpu vgic init stage - * - * No memory allocation should be performed here, only static init. - */ -void kvm_vgic_vcpu_early_init(struct kvm_vcpu *vcpu) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - INIT_LIST_HEAD(&vgic_cpu->irq_phys_map_list); -} - -/** - * kvm_vgic_get_max_vcpus - Get the maximum number of VCPUs allowed by HW - * - * The host's GIC naturally limits the maximum amount of VCPUs a guest - * can use. - */ -int kvm_vgic_get_max_vcpus(void) -{ - return vgic->max_gic_vcpus; -} - -void kvm_vgic_destroy(struct kvm *kvm) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - struct kvm_vcpu *vcpu; - int i; - - kvm_for_each_vcpu(i, vcpu, kvm) - kvm_vgic_vcpu_destroy(vcpu); - - vgic_free_bitmap(&dist->irq_enabled); - vgic_free_bitmap(&dist->irq_level); - vgic_free_bitmap(&dist->irq_pending); - vgic_free_bitmap(&dist->irq_soft_pend); - vgic_free_bitmap(&dist->irq_queued); - vgic_free_bitmap(&dist->irq_cfg); - vgic_free_bytemap(&dist->irq_priority); - if (dist->irq_spi_target) { - for (i = 0; i < dist->nr_cpus; i++) - vgic_free_bitmap(&dist->irq_spi_target[i]); - } - kfree(dist->irq_sgi_sources); - kfree(dist->irq_spi_cpu); - kfree(dist->irq_spi_mpidr); - kfree(dist->irq_spi_target); - kfree(dist->irq_pending_on_cpu); - kfree(dist->irq_active_on_cpu); - vgic_destroy_irq_phys_map(kvm, &dist->irq_phys_map_list); - dist->irq_sgi_sources = NULL; - dist->irq_spi_cpu = NULL; - dist->irq_spi_target = NULL; - dist->irq_pending_on_cpu = NULL; - dist->irq_active_on_cpu = NULL; - dist->nr_cpus = 0; -} - -/* - * Allocate and initialize the various data structures. Must be called - * with kvm->lock held! - */ -int vgic_init(struct kvm *kvm) -{ - struct vgic_dist *dist = &kvm->arch.vgic; - struct kvm_vcpu *vcpu; - int nr_cpus, nr_irqs; - int ret, i, vcpu_id; - - if (vgic_initialized(kvm)) - return 0; - - nr_cpus = dist->nr_cpus = atomic_read(&kvm->online_vcpus); - if (!nr_cpus) /* No vcpus? Can't be good... */ - return -ENODEV; - - /* - * If nobody configured the number of interrupts, use the - * legacy one. - */ - if (!dist->nr_irqs) - dist->nr_irqs = VGIC_NR_IRQS_LEGACY; - - nr_irqs = dist->nr_irqs; - - ret = vgic_init_bitmap(&dist->irq_enabled, nr_cpus, nr_irqs); - ret |= vgic_init_bitmap(&dist->irq_level, nr_cpus, nr_irqs); - ret |= vgic_init_bitmap(&dist->irq_pending, nr_cpus, nr_irqs); - ret |= vgic_init_bitmap(&dist->irq_soft_pend, nr_cpus, nr_irqs); - ret |= vgic_init_bitmap(&dist->irq_queued, nr_cpus, nr_irqs); - ret |= vgic_init_bitmap(&dist->irq_active, nr_cpus, nr_irqs); - ret |= vgic_init_bitmap(&dist->irq_cfg, nr_cpus, nr_irqs); - ret |= vgic_init_bytemap(&dist->irq_priority, nr_cpus, nr_irqs); - - if (ret) - goto out; - - dist->irq_sgi_sources = kzalloc(nr_cpus * VGIC_NR_SGIS, GFP_KERNEL); - dist->irq_spi_cpu = kzalloc(nr_irqs - VGIC_NR_PRIVATE_IRQS, GFP_KERNEL); - dist->irq_spi_target = kzalloc(sizeof(*dist->irq_spi_target) * nr_cpus, - GFP_KERNEL); - dist->irq_pending_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long), - GFP_KERNEL); - dist->irq_active_on_cpu = kzalloc(BITS_TO_LONGS(nr_cpus) * sizeof(long), - GFP_KERNEL); - if (!dist->irq_sgi_sources || - !dist->irq_spi_cpu || - !dist->irq_spi_target || - !dist->irq_pending_on_cpu || - !dist->irq_active_on_cpu) { - ret = -ENOMEM; - goto out; - } - - for (i = 0; i < nr_cpus; i++) - ret |= vgic_init_bitmap(&dist->irq_spi_target[i], - nr_cpus, nr_irqs); - - if (ret) - goto out; - - ret = kvm->arch.vgic.vm_ops.init_model(kvm); - if (ret) - goto out; - - kvm_for_each_vcpu(vcpu_id, vcpu, kvm) { - ret = vgic_vcpu_init_maps(vcpu, nr_irqs); - if (ret) { - kvm_err("VGIC: Failed to allocate vcpu memory\n"); - break; - } - - /* - * Enable and configure all SGIs to be edge-triggere and - * configure all PPIs as level-triggered. - */ - for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) { - if (i < VGIC_NR_SGIS) { - /* SGIs */ - vgic_bitmap_set_irq_val(&dist->irq_enabled, - vcpu->vcpu_id, i, 1); - vgic_bitmap_set_irq_val(&dist->irq_cfg, - vcpu->vcpu_id, i, - VGIC_CFG_EDGE); - } else if (i < VGIC_NR_PRIVATE_IRQS) { - /* PPIs */ - vgic_bitmap_set_irq_val(&dist->irq_cfg, - vcpu->vcpu_id, i, - VGIC_CFG_LEVEL); - } - } - - vgic_enable(vcpu); - } - -out: - if (ret) - kvm_vgic_destroy(kvm); - - return ret; -} - -static int init_vgic_model(struct kvm *kvm, int type) -{ - switch (type) { - case KVM_DEV_TYPE_ARM_VGIC_V2: - vgic_v2_init_emulation(kvm); - break; -#ifdef CONFIG_KVM_ARM_VGIC_V3 - case KVM_DEV_TYPE_ARM_VGIC_V3: - vgic_v3_init_emulation(kvm); - break; -#endif - default: - return -ENODEV; - } - - if (atomic_read(&kvm->online_vcpus) > kvm->arch.max_vcpus) - return -E2BIG; - - return 0; -} - -/** - * kvm_vgic_early_init - Earliest possible vgic initialization stage - * - * No memory allocation should be performed here, only static init. - */ -void kvm_vgic_early_init(struct kvm *kvm) -{ - spin_lock_init(&kvm->arch.vgic.lock); - spin_lock_init(&kvm->arch.vgic.irq_phys_map_lock); - INIT_LIST_HEAD(&kvm->arch.vgic.irq_phys_map_list); -} - -int kvm_vgic_create(struct kvm *kvm, u32 type) -{ - int i, vcpu_lock_idx = -1, ret; - struct kvm_vcpu *vcpu; - - mutex_lock(&kvm->lock); - - if (irqchip_in_kernel(kvm)) { - ret = -EEXIST; - goto out; - } - - /* - * This function is also called by the KVM_CREATE_IRQCHIP handler, - * which had no chance yet to check the availability of the GICv2 - * emulation. So check this here again. KVM_CREATE_DEVICE does - * the proper checks already. - */ - if (type == KVM_DEV_TYPE_ARM_VGIC_V2 && !vgic->can_emulate_gicv2) { - ret = -ENODEV; - goto out; - } - - /* - * Any time a vcpu is run, vcpu_load is called which tries to grab the - * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure - * that no other VCPUs are run while we create the vgic. - */ - ret = -EBUSY; - kvm_for_each_vcpu(i, vcpu, kvm) { - if (!mutex_trylock(&vcpu->mutex)) - goto out_unlock; - vcpu_lock_idx = i; - } - - kvm_for_each_vcpu(i, vcpu, kvm) { - if (vcpu->arch.has_run_once) - goto out_unlock; - } - ret = 0; - - ret = init_vgic_model(kvm, type); - if (ret) - goto out_unlock; - - kvm->arch.vgic.in_kernel = true; - kvm->arch.vgic.vgic_model = type; - kvm->arch.vgic.vctrl_base = vgic->vctrl_base; - kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; - kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; - kvm->arch.vgic.vgic_redist_base = VGIC_ADDR_UNDEF; - -out_unlock: - for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { - vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx); - mutex_unlock(&vcpu->mutex); - } - -out: - mutex_unlock(&kvm->lock); - return ret; -} - -static int vgic_ioaddr_overlap(struct kvm *kvm) -{ - phys_addr_t dist = kvm->arch.vgic.vgic_dist_base; - phys_addr_t cpu = kvm->arch.vgic.vgic_cpu_base; - - if (IS_VGIC_ADDR_UNDEF(dist) || IS_VGIC_ADDR_UNDEF(cpu)) - return 0; - if ((dist <= cpu && dist + KVM_VGIC_V2_DIST_SIZE > cpu) || - (cpu <= dist && cpu + KVM_VGIC_V2_CPU_SIZE > dist)) - return -EBUSY; - return 0; -} - -static int vgic_ioaddr_assign(struct kvm *kvm, phys_addr_t *ioaddr, - phys_addr_t addr, phys_addr_t size) -{ - int ret; - - if (addr & ~KVM_PHYS_MASK) - return -E2BIG; - - if (addr & (SZ_4K - 1)) - return -EINVAL; - - if (!IS_VGIC_ADDR_UNDEF(*ioaddr)) - return -EEXIST; - if (addr + size < addr) - return -EINVAL; - - *ioaddr = addr; - ret = vgic_ioaddr_overlap(kvm); - if (ret) - *ioaddr = VGIC_ADDR_UNDEF; - - return ret; -} - -/** - * kvm_vgic_addr - set or get vgic VM base addresses - * @kvm: pointer to the vm struct - * @type: the VGIC addr type, one of KVM_VGIC_V[23]_ADDR_TYPE_XXX - * @addr: pointer to address value - * @write: if true set the address in the VM address space, if false read the - * address - * - * Set or get the vgic base addresses for the distributor and the virtual CPU - * interface in the VM physical address space. These addresses are properties - * of the emulated core/SoC and therefore user space initially knows this - * information. - */ -int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) -{ - int r = 0; - struct vgic_dist *vgic = &kvm->arch.vgic; - int type_needed; - phys_addr_t *addr_ptr, block_size; - phys_addr_t alignment; - - mutex_lock(&kvm->lock); - switch (type) { - case KVM_VGIC_V2_ADDR_TYPE_DIST: - type_needed = KVM_DEV_TYPE_ARM_VGIC_V2; - addr_ptr = &vgic->vgic_dist_base; - block_size = KVM_VGIC_V2_DIST_SIZE; - alignment = SZ_4K; - break; - case KVM_VGIC_V2_ADDR_TYPE_CPU: - type_needed = KVM_DEV_TYPE_ARM_VGIC_V2; - addr_ptr = &vgic->vgic_cpu_base; - block_size = KVM_VGIC_V2_CPU_SIZE; - alignment = SZ_4K; - break; -#ifdef CONFIG_KVM_ARM_VGIC_V3 - case KVM_VGIC_V3_ADDR_TYPE_DIST: - type_needed = KVM_DEV_TYPE_ARM_VGIC_V3; - addr_ptr = &vgic->vgic_dist_base; - block_size = KVM_VGIC_V3_DIST_SIZE; - alignment = SZ_64K; - break; - case KVM_VGIC_V3_ADDR_TYPE_REDIST: - type_needed = KVM_DEV_TYPE_ARM_VGIC_V3; - addr_ptr = &vgic->vgic_redist_base; - block_size = KVM_VGIC_V3_REDIST_SIZE; - alignment = SZ_64K; - break; -#endif - default: - r = -ENODEV; - goto out; - } - - if (vgic->vgic_model != type_needed) { - r = -ENODEV; - goto out; - } - - if (write) { - if (!IS_ALIGNED(*addr, alignment)) - r = -EINVAL; - else - r = vgic_ioaddr_assign(kvm, addr_ptr, *addr, - block_size); - } else { - *addr = *addr_ptr; - } - -out: - mutex_unlock(&kvm->lock); - return r; -} - -int vgic_set_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr) -{ - int r; - - switch (attr->group) { - case KVM_DEV_ARM_VGIC_GRP_ADDR: { - u64 __user *uaddr = (u64 __user *)(long)attr->addr; - u64 addr; - unsigned long type = (unsigned long)attr->attr; - - if (copy_from_user(&addr, uaddr, sizeof(addr))) - return -EFAULT; - - r = kvm_vgic_addr(dev->kvm, type, &addr, true); - return (r == -ENODEV) ? -ENXIO : r; - } - case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { - u32 __user *uaddr = (u32 __user *)(long)attr->addr; - u32 val; - int ret = 0; - - if (get_user(val, uaddr)) - return -EFAULT; - - /* - * We require: - * - at least 32 SPIs on top of the 16 SGIs and 16 PPIs - * - at most 1024 interrupts - * - a multiple of 32 interrupts - */ - if (val < (VGIC_NR_PRIVATE_IRQS + 32) || - val > VGIC_MAX_IRQS || - (val & 31)) - return -EINVAL; - - mutex_lock(&dev->kvm->lock); - - if (vgic_ready(dev->kvm) || dev->kvm->arch.vgic.nr_irqs) - ret = -EBUSY; - else - dev->kvm->arch.vgic.nr_irqs = val; - - mutex_unlock(&dev->kvm->lock); - - return ret; - } - case KVM_DEV_ARM_VGIC_GRP_CTRL: { - switch (attr->attr) { - case KVM_DEV_ARM_VGIC_CTRL_INIT: - r = vgic_init(dev->kvm); - return r; - } - break; - } - } - - return -ENXIO; -} - -int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr) -{ - int r = -ENXIO; - - switch (attr->group) { - case KVM_DEV_ARM_VGIC_GRP_ADDR: { - u64 __user *uaddr = (u64 __user *)(long)attr->addr; - u64 addr; - unsigned long type = (unsigned long)attr->attr; - - r = kvm_vgic_addr(dev->kvm, type, &addr, false); - if (r) - return (r == -ENODEV) ? -ENXIO : r; - - if (copy_to_user(uaddr, &addr, sizeof(addr))) - return -EFAULT; - break; - } - case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { - u32 __user *uaddr = (u32 __user *)(long)attr->addr; - - r = put_user(dev->kvm->arch.vgic.nr_irqs, uaddr); - break; - } - - } - - return r; -} - -int vgic_has_attr_regs(const struct vgic_io_range *ranges, phys_addr_t offset) -{ - if (vgic_find_range(ranges, 4, offset)) - return 0; - else - return -ENXIO; -} - -static void vgic_init_maintenance_interrupt(void *info) -{ - enable_percpu_irq(vgic->maint_irq, 0); -} - -static int vgic_cpu_notify(struct notifier_block *self, - unsigned long action, void *cpu) -{ - switch (action) { - case CPU_STARTING: - case CPU_STARTING_FROZEN: - vgic_init_maintenance_interrupt(NULL); - break; - case CPU_DYING: - case CPU_DYING_FROZEN: - disable_percpu_irq(vgic->maint_irq); - break; - } - - return NOTIFY_OK; -} - -static struct notifier_block vgic_cpu_nb = { - .notifier_call = vgic_cpu_notify, -}; - -static int kvm_vgic_probe(void) -{ - const struct gic_kvm_info *gic_kvm_info; - int ret; - - gic_kvm_info = gic_get_kvm_info(); - if (!gic_kvm_info) - return -ENODEV; - - switch (gic_kvm_info->type) { - case GIC_V2: - ret = vgic_v2_probe(gic_kvm_info, &vgic_ops, &vgic); - break; - case GIC_V3: - ret = vgic_v3_probe(gic_kvm_info, &vgic_ops, &vgic); - break; - default: - ret = -ENODEV; - } - - return ret; -} - -int kvm_vgic_hyp_init(void) -{ - int ret; - - ret = kvm_vgic_probe(); - if (ret) { - kvm_err("error: KVM vGIC probing failed\n"); - return ret; - } - - ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler, - "vgic", kvm_get_running_vcpus()); - if (ret) { - kvm_err("Cannot register interrupt %d\n", vgic->maint_irq); - return ret; - } - - ret = __register_cpu_notifier(&vgic_cpu_nb); - if (ret) { - kvm_err("Cannot register vgic CPU notifier\n"); - goto out_free_irq; - } - - on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); - - return 0; - -out_free_irq: - free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus()); - return ret; -} - -int kvm_irq_map_gsi(struct kvm *kvm, - struct kvm_kernel_irq_routing_entry *entries, - int gsi) -{ - return 0; -} - -int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin) -{ - return pin; -} - -int kvm_set_irq(struct kvm *kvm, int irq_source_id, - u32 irq, int level, bool line_status) -{ - unsigned int spi = irq + VGIC_NR_PRIVATE_IRQS; - - trace_kvm_set_irq(irq, level, irq_source_id); - - BUG_ON(!vgic_initialized(kvm)); - - return kvm_vgic_inject_irq(kvm, 0, spi, level); -} - -/* MSI not implemented yet */ -int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm, int irq_source_id, - int level, bool line_status) -{ - return 0; -} diff --git a/virt/kvm/arm/vgic.h b/virt/kvm/arm/vgic.h deleted file mode 100644 index 0df74cb..0000000 --- a/virt/kvm/arm/vgic.h +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Copyright (C) 2012-2014 ARM Ltd. - * Author: Marc Zyngier <marc.zyngier@arm.com> - * - * Derived from virt/kvm/arm/vgic.c - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - */ - -#ifndef __KVM_VGIC_H__ -#define __KVM_VGIC_H__ - -#include <kvm/iodev.h> - -#define VGIC_ADDR_UNDEF (-1) -#define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF) - -#define PRODUCT_ID_KVM 0x4b /* ASCII code K */ -#define IMPLEMENTER_ARM 0x43b - -#define ACCESS_READ_VALUE (1 << 0) -#define ACCESS_READ_RAZ (0 << 0) -#define ACCESS_READ_MASK(x) ((x) & (1 << 0)) -#define ACCESS_WRITE_IGNORED (0 << 1) -#define ACCESS_WRITE_SETBIT (1 << 1) -#define ACCESS_WRITE_CLEARBIT (2 << 1) -#define ACCESS_WRITE_VALUE (3 << 1) -#define ACCESS_WRITE_MASK(x) ((x) & (3 << 1)) - -#define VCPU_NOT_ALLOCATED ((u8)-1) - -unsigned long *vgic_bitmap_get_shared_map(struct vgic_bitmap *x); - -void vgic_update_state(struct kvm *kvm); -int vgic_init_common_maps(struct kvm *kvm); - -u32 *vgic_bitmap_get_reg(struct vgic_bitmap *x, int cpuid, u32 offset); -u32 *vgic_bytemap_get_reg(struct vgic_bytemap *x, int cpuid, u32 offset); - -void vgic_dist_irq_set_pending(struct kvm_vcpu *vcpu, int irq); -void vgic_dist_irq_clear_pending(struct kvm_vcpu *vcpu, int irq); -void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, int irq); -void vgic_bitmap_set_irq_val(struct vgic_bitmap *x, int cpuid, - int irq, int val); - -void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); -void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); - -bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq); -void vgic_unqueue_irqs(struct kvm_vcpu *vcpu); - -struct kvm_exit_mmio { - phys_addr_t phys_addr; - void *data; - u32 len; - bool is_write; - void *private; -}; - -void vgic_reg_access(struct kvm_exit_mmio *mmio, u32 *reg, - phys_addr_t offset, int mode); -bool handle_mmio_raz_wi(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, - phys_addr_t offset); - -static inline -u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask) -{ - return le32_to_cpu(*((u32 *)mmio->data)) & mask; -} - -static inline -void mmio_data_write(struct kvm_exit_mmio *mmio, u32 mask, u32 value) -{ - *((u32 *)mmio->data) = cpu_to_le32(value) & mask; -} - -struct vgic_io_range { - phys_addr_t base; - unsigned long len; - int bits_per_irq; - bool (*handle_mmio)(struct kvm_vcpu *vcpu, struct kvm_exit_mmio *mmio, - phys_addr_t offset); -}; - -int vgic_register_kvm_io_dev(struct kvm *kvm, gpa_t base, int len, - const struct vgic_io_range *ranges, - int redist_id, - struct vgic_io_device *iodev); - -static inline bool is_in_range(phys_addr_t addr, unsigned long len, - phys_addr_t baseaddr, unsigned long size) -{ - return (addr >= baseaddr) && (addr + len <= baseaddr + size); -} - -const -struct vgic_io_range *vgic_find_range(const struct vgic_io_range *ranges, - int len, gpa_t offset); - -bool vgic_handle_enable_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, - phys_addr_t offset, int vcpu_id, int access); - -bool vgic_handle_set_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, - phys_addr_t offset, int vcpu_id); - -bool vgic_handle_clear_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, - phys_addr_t offset, int vcpu_id); - -bool vgic_handle_set_active_reg(struct kvm *kvm, - struct kvm_exit_mmio *mmio, - phys_addr_t offset, int vcpu_id); - -bool vgic_handle_clear_active_reg(struct kvm *kvm, - struct kvm_exit_mmio *mmio, - phys_addr_t offset, int vcpu_id); - -bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio, - phys_addr_t offset); - -void vgic_kick_vcpus(struct kvm *kvm); - -int vgic_has_attr_regs(const struct vgic_io_range *ranges, phys_addr_t offset); -int vgic_set_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr); -int vgic_get_common_attr(struct kvm_device *dev, struct kvm_device_attr *attr); - -int vgic_init(struct kvm *kvm); -void vgic_v2_init_emulation(struct kvm *kvm); -void vgic_v3_init_emulation(struct kvm *kvm); - -#endif diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index a1442f7..01a60dc 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -157,6 +157,9 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis) struct kvm_vcpu *vcpu0 = kvm_get_vcpu(kvm, 0); int i; + INIT_LIST_HEAD(&dist->lpi_list_head); + spin_lock_init(&dist->lpi_list_lock); + dist->spis = kcalloc(nr_spis, sizeof(struct vgic_irq), GFP_KERNEL); if (!dist->spis) return -ENOMEM; @@ -177,6 +180,7 @@ static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis) spin_lock_init(&irq->irq_lock); irq->vcpu = NULL; irq->target_vcpu = vcpu0; + kref_init(&irq->refcount); if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) irq->targets = 0; else @@ -211,6 +215,7 @@ static void kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) irq->vcpu = NULL; irq->target_vcpu = vcpu; irq->targets = 1U << vcpu->vcpu_id; + kref_init(&irq->refcount); if (vgic_irq_is_sgi(i)) { /* SGIs */ irq->enabled = 1; @@ -253,6 +258,9 @@ int vgic_init(struct kvm *kvm) if (ret) goto out; + if (vgic_has_its(kvm)) + dist->msis_require_devid = true; + kvm_for_each_vcpu(i, vcpu, kvm) kvm_vgic_vcpu_init(vcpu); @@ -271,7 +279,6 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm) dist->initialized = false; kfree(dist->spis); - kfree(dist->redist_iodevs); dist->nr_spis = 0; mutex_unlock(&kvm->lock); diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c new file mode 100644 index 0000000..07411cf --- /dev/null +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -0,0 +1,1500 @@ +/* + * GICv3 ITS emulation + * + * Copyright (C) 2015,2016 ARM Ltd. + * Author: Andre Przywara <andre.przywara@arm.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/cpu.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/interrupt.h> +#include <linux/list.h> +#include <linux/uaccess.h> + +#include <linux/irqchip/arm-gic-v3.h> + +#include <asm/kvm_emulate.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_mmu.h> + +#include "vgic.h" +#include "vgic-mmio.h" + +/* + * Creates a new (reference to a) struct vgic_irq for a given LPI. + * If this LPI is already mapped on another ITS, we increase its refcount + * and return a pointer to the existing structure. + * If this is a "new" LPI, we allocate and initialize a new struct vgic_irq. + * This function returns a pointer to the _unlocked_ structure. + */ +static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_irq *irq = vgic_get_irq(kvm, NULL, intid), *oldirq; + + /* In this case there is no put, since we keep the reference. */ + if (irq) + return irq; + + irq = kzalloc(sizeof(struct vgic_irq), GFP_KERNEL); + if (!irq) + return NULL; + + INIT_LIST_HEAD(&irq->lpi_list); + INIT_LIST_HEAD(&irq->ap_list); + spin_lock_init(&irq->irq_lock); + + irq->config = VGIC_CONFIG_EDGE; + kref_init(&irq->refcount); + irq->intid = intid; + + spin_lock(&dist->lpi_list_lock); + + /* + * There could be a race with another vgic_add_lpi(), so we need to + * check that we don't add a second list entry with the same LPI. + */ + list_for_each_entry(oldirq, &dist->lpi_list_head, lpi_list) { + if (oldirq->intid != intid) + continue; + + /* Someone was faster with adding this LPI, lets use that. */ + kfree(irq); + irq = oldirq; + + /* + * This increases the refcount, the caller is expected to + * call vgic_put_irq() on the returned pointer once it's + * finished with the IRQ. + */ + vgic_get_irq_kref(irq); + + goto out_unlock; + } + + list_add_tail(&irq->lpi_list, &dist->lpi_list_head); + dist->lpi_list_count++; + +out_unlock: + spin_unlock(&dist->lpi_list_lock); + + return irq; +} + +struct its_device { + struct list_head dev_list; + + /* the head for the list of ITTEs */ + struct list_head itt_head; + u32 device_id; +}; + +#define COLLECTION_NOT_MAPPED ((u32)~0) + +struct its_collection { + struct list_head coll_list; + + u32 collection_id; + u32 target_addr; +}; + +#define its_is_collection_mapped(coll) ((coll) && \ + ((coll)->target_addr != COLLECTION_NOT_MAPPED)) + +struct its_itte { + struct list_head itte_list; + + struct vgic_irq *irq; + struct its_collection *collection; + u32 lpi; + u32 event_id; +}; + +/* + * Find and returns a device in the device table for an ITS. + * Must be called with the its_lock mutex held. + */ +static struct its_device *find_its_device(struct vgic_its *its, u32 device_id) +{ + struct its_device *device; + + list_for_each_entry(device, &its->device_list, dev_list) + if (device_id == device->device_id) + return device; + + return NULL; +} + +/* + * Find and returns an interrupt translation table entry (ITTE) for a given + * Device ID/Event ID pair on an ITS. + * Must be called with the its_lock mutex held. + */ +static struct its_itte *find_itte(struct vgic_its *its, u32 device_id, + u32 event_id) +{ + struct its_device *device; + struct its_itte *itte; + + device = find_its_device(its, device_id); + if (device == NULL) + return NULL; + + list_for_each_entry(itte, &device->itt_head, itte_list) + if (itte->event_id == event_id) + return itte; + + return NULL; +} + +/* To be used as an iterator this macro misses the enclosing parentheses */ +#define for_each_lpi_its(dev, itte, its) \ + list_for_each_entry(dev, &(its)->device_list, dev_list) \ + list_for_each_entry(itte, &(dev)->itt_head, itte_list) + +/* + * We only implement 48 bits of PA at the moment, although the ITS + * supports more. Let's be restrictive here. + */ +#define BASER_ADDRESS(x) ((x) & GENMASK_ULL(47, 16)) +#define CBASER_ADDRESS(x) ((x) & GENMASK_ULL(47, 12)) +#define PENDBASER_ADDRESS(x) ((x) & GENMASK_ULL(47, 16)) +#define PROPBASER_ADDRESS(x) ((x) & GENMASK_ULL(47, 12)) + +#define GIC_LPI_OFFSET 8192 + +/* + * Finds and returns a collection in the ITS collection table. + * Must be called with the its_lock mutex held. + */ +static struct its_collection *find_collection(struct vgic_its *its, int coll_id) +{ + struct its_collection *collection; + + list_for_each_entry(collection, &its->collection_list, coll_list) { + if (coll_id == collection->collection_id) + return collection; + } + + return NULL; +} + +#define LPI_PROP_ENABLE_BIT(p) ((p) & LPI_PROP_ENABLED) +#define LPI_PROP_PRIORITY(p) ((p) & 0xfc) + +/* + * Reads the configuration data for a given LPI from guest memory and + * updates the fields in struct vgic_irq. + * If filter_vcpu is not NULL, applies only if the IRQ is targeting this + * VCPU. Unconditionally applies if filter_vcpu is NULL. + */ +static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, + struct kvm_vcpu *filter_vcpu) +{ + u64 propbase = PROPBASER_ADDRESS(kvm->arch.vgic.propbaser); + u8 prop; + int ret; + + ret = kvm_read_guest(kvm, propbase + irq->intid - GIC_LPI_OFFSET, + &prop, 1); + + if (ret) + return ret; + + spin_lock(&irq->irq_lock); + + if (!filter_vcpu || filter_vcpu == irq->target_vcpu) { + irq->priority = LPI_PROP_PRIORITY(prop); + irq->enabled = LPI_PROP_ENABLE_BIT(prop); + + vgic_queue_irq_unlock(kvm, irq); + } else { + spin_unlock(&irq->irq_lock); + } + + return 0; +} + +/* + * Create a snapshot of the current LPI list, so that we can enumerate all + * LPIs without holding any lock. + * Returns the array length and puts the kmalloc'ed array into intid_ptr. + */ +static int vgic_copy_lpi_list(struct kvm *kvm, u32 **intid_ptr) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_irq *irq; + u32 *intids; + int irq_count = dist->lpi_list_count, i = 0; + + /* + * We use the current value of the list length, which may change + * after the kmalloc. We don't care, because the guest shouldn't + * change anything while the command handling is still running, + * and in the worst case we would miss a new IRQ, which one wouldn't + * expect to be covered by this command anyway. + */ + intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL); + if (!intids) + return -ENOMEM; + + spin_lock(&dist->lpi_list_lock); + list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { + /* We don't need to "get" the IRQ, as we hold the list lock. */ + intids[i] = irq->intid; + if (++i == irq_count) + break; + } + spin_unlock(&dist->lpi_list_lock); + + *intid_ptr = intids; + return irq_count; +} + +/* + * Promotes the ITS view of affinity of an ITTE (which redistributor this LPI + * is targeting) to the VGIC's view, which deals with target VCPUs. + * Needs to be called whenever either the collection for a LPIs has + * changed or the collection itself got retargeted. + */ +static void update_affinity_itte(struct kvm *kvm, struct its_itte *itte) +{ + struct kvm_vcpu *vcpu; + + if (!its_is_collection_mapped(itte->collection)) + return; + + vcpu = kvm_get_vcpu(kvm, itte->collection->target_addr); + + spin_lock(&itte->irq->irq_lock); + itte->irq->target_vcpu = vcpu; + spin_unlock(&itte->irq->irq_lock); +} + +/* + * Updates the target VCPU for every LPI targeting this collection. + * Must be called with the its_lock mutex held. + */ +static void update_affinity_collection(struct kvm *kvm, struct vgic_its *its, + struct its_collection *coll) +{ + struct its_device *device; + struct its_itte *itte; + + for_each_lpi_its(device, itte, its) { + if (!itte->collection || coll != itte->collection) + continue; + + update_affinity_itte(kvm, itte); + } +} + +static u32 max_lpis_propbaser(u64 propbaser) +{ + int nr_idbits = (propbaser & 0x1f) + 1; + + return 1U << min(nr_idbits, INTERRUPT_ID_BITS_ITS); +} + +/* + * Scan the whole LPI pending table and sync the pending bit in there + * with our own data structures. This relies on the LPI being + * mapped before. + */ +static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) +{ + gpa_t pendbase = PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser); + struct vgic_irq *irq; + int last_byte_offset = -1; + int ret = 0; + u32 *intids; + int nr_irqs, i; + + nr_irqs = vgic_copy_lpi_list(vcpu->kvm, &intids); + if (nr_irqs < 0) + return nr_irqs; + + for (i = 0; i < nr_irqs; i++) { + int byte_offset, bit_nr; + u8 pendmask; + + byte_offset = intids[i] / BITS_PER_BYTE; + bit_nr = intids[i] % BITS_PER_BYTE; + + /* + * For contiguously allocated LPIs chances are we just read + * this very same byte in the last iteration. Reuse that. + */ + if (byte_offset != last_byte_offset) { + ret = kvm_read_guest(vcpu->kvm, pendbase + byte_offset, + &pendmask, 1); + if (ret) { + kfree(intids); + return ret; + } + last_byte_offset = byte_offset; + } + + irq = vgic_get_irq(vcpu->kvm, NULL, intids[i]); + spin_lock(&irq->irq_lock); + irq->pending = pendmask & (1U << bit_nr); + vgic_queue_irq_unlock(vcpu->kvm, irq); + vgic_put_irq(vcpu->kvm, irq); + } + + kfree(intids); + + return ret; +} + +static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + u32 reg = 0; + + mutex_lock(&its->cmd_lock); + if (its->creadr == its->cwriter) + reg |= GITS_CTLR_QUIESCENT; + if (its->enabled) + reg |= GITS_CTLR_ENABLE; + mutex_unlock(&its->cmd_lock); + + return reg; +} + +static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + its->enabled = !!(val & GITS_CTLR_ENABLE); +} + +static unsigned long vgic_mmio_read_its_typer(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + u64 reg = GITS_TYPER_PLPIS; + + /* + * We use linear CPU numbers for redistributor addressing, + * so GITS_TYPER.PTA is 0. + * Also we force all PROPBASER registers to be the same, so + * CommonLPIAff is 0 as well. + * To avoid memory waste in the guest, we keep the number of IDBits and + * DevBits low - as least for the time being. + */ + reg |= 0x0f << GITS_TYPER_DEVBITS_SHIFT; + reg |= 0x0f << GITS_TYPER_IDBITS_SHIFT; + + return extract_bytes(reg, addr & 7, len); +} + +static unsigned long vgic_mmio_read_its_iidr(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + return (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0); +} + +static unsigned long vgic_mmio_read_its_idregs(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + switch (addr & 0xffff) { + case GITS_PIDR0: + return 0x92; /* part number, bits[7:0] */ + case GITS_PIDR1: + return 0xb4; /* part number, bits[11:8] */ + case GITS_PIDR2: + return GIC_PIDR2_ARCH_GICv3 | 0x0b; + case GITS_PIDR4: + return 0x40; /* This is a 64K software visible page */ + /* The following are the ID registers for (any) GIC. */ + case GITS_CIDR0: + return 0x0d; + case GITS_CIDR1: + return 0xf0; + case GITS_CIDR2: + return 0x05; + case GITS_CIDR3: + return 0xb1; + } + + return 0; +} + +/* + * Find the target VCPU and the LPI number for a given devid/eventid pair + * and make this IRQ pending, possibly injecting it. + * Must be called with the its_lock mutex held. + */ +static void vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its, + u32 devid, u32 eventid) +{ + struct its_itte *itte; + + if (!its->enabled) + return; + + itte = find_itte(its, devid, eventid); + /* Triggering an unmapped IRQ gets silently dropped. */ + if (itte && its_is_collection_mapped(itte->collection)) { + struct kvm_vcpu *vcpu; + + vcpu = kvm_get_vcpu(kvm, itte->collection->target_addr); + if (vcpu && vcpu->arch.vgic_cpu.lpis_enabled) { + spin_lock(&itte->irq->irq_lock); + itte->irq->pending = true; + vgic_queue_irq_unlock(kvm, itte->irq); + } + } +} + +/* + * Queries the KVM IO bus framework to get the ITS pointer from the given + * doorbell address. + * We then call vgic_its_trigger_msi() with the decoded data. + */ +int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi) +{ + u64 address; + struct kvm_io_device *kvm_io_dev; + struct vgic_io_device *iodev; + + if (!vgic_has_its(kvm)) + return -ENODEV; + + if (!(msi->flags & KVM_MSI_VALID_DEVID)) + return -EINVAL; + + address = (u64)msi->address_hi << 32 | msi->address_lo; + + kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, address); + if (!kvm_io_dev) + return -ENODEV; + + iodev = container_of(kvm_io_dev, struct vgic_io_device, dev); + + mutex_lock(&iodev->its->its_lock); + vgic_its_trigger_msi(kvm, iodev->its, msi->devid, msi->data); + mutex_unlock(&iodev->its->its_lock); + + return 0; +} + +/* Requires the its_lock to be held. */ +static void its_free_itte(struct kvm *kvm, struct its_itte *itte) +{ + list_del(&itte->itte_list); + + /* This put matches the get in vgic_add_lpi. */ + vgic_put_irq(kvm, itte->irq); + + kfree(itte); +} + +static u64 its_cmd_mask_field(u64 *its_cmd, int word, int shift, int size) +{ + return (le64_to_cpu(its_cmd[word]) >> shift) & (BIT_ULL(size) - 1); +} + +#define its_cmd_get_command(cmd) its_cmd_mask_field(cmd, 0, 0, 8) +#define its_cmd_get_deviceid(cmd) its_cmd_mask_field(cmd, 0, 32, 32) +#define its_cmd_get_id(cmd) its_cmd_mask_field(cmd, 1, 0, 32) +#define its_cmd_get_physical_id(cmd) its_cmd_mask_field(cmd, 1, 32, 32) +#define its_cmd_get_collection(cmd) its_cmd_mask_field(cmd, 2, 0, 16) +#define its_cmd_get_target_addr(cmd) its_cmd_mask_field(cmd, 2, 16, 32) +#define its_cmd_get_validbit(cmd) its_cmd_mask_field(cmd, 2, 63, 1) + +/* + * The DISCARD command frees an Interrupt Translation Table Entry (ITTE). + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_discard(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 device_id = its_cmd_get_deviceid(its_cmd); + u32 event_id = its_cmd_get_id(its_cmd); + struct its_itte *itte; + + + itte = find_itte(its, device_id, event_id); + if (itte && itte->collection) { + /* + * Though the spec talks about removing the pending state, we + * don't bother here since we clear the ITTE anyway and the + * pending state is a property of the ITTE struct. + */ + its_free_itte(kvm, itte); + return 0; + } + + return E_ITS_DISCARD_UNMAPPED_INTERRUPT; +} + +/* + * The MOVI command moves an ITTE to a different collection. + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 device_id = its_cmd_get_deviceid(its_cmd); + u32 event_id = its_cmd_get_id(its_cmd); + u32 coll_id = its_cmd_get_collection(its_cmd); + struct kvm_vcpu *vcpu; + struct its_itte *itte; + struct its_collection *collection; + + itte = find_itte(its, device_id, event_id); + if (!itte) + return E_ITS_MOVI_UNMAPPED_INTERRUPT; + + if (!its_is_collection_mapped(itte->collection)) + return E_ITS_MOVI_UNMAPPED_COLLECTION; + + collection = find_collection(its, coll_id); + if (!its_is_collection_mapped(collection)) + return E_ITS_MOVI_UNMAPPED_COLLECTION; + + itte->collection = collection; + vcpu = kvm_get_vcpu(kvm, collection->target_addr); + + spin_lock(&itte->irq->irq_lock); + itte->irq->target_vcpu = vcpu; + spin_unlock(&itte->irq->irq_lock); + + return 0; +} + +/* + * Check whether an ID can be stored into the corresponding guest table. + * For a direct table this is pretty easy, but gets a bit nasty for + * indirect tables. We check whether the resulting guest physical address + * is actually valid (covered by a memslot and guest accessbible). + * For this we have to read the respective first level entry. + */ +static bool vgic_its_check_id(struct vgic_its *its, u64 baser, int id) +{ + int l1_tbl_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; + int index; + u64 indirect_ptr; + gfn_t gfn; + + if (!(baser & GITS_BASER_INDIRECT)) { + phys_addr_t addr; + + if (id >= (l1_tbl_size / GITS_BASER_ENTRY_SIZE(baser))) + return false; + + addr = BASER_ADDRESS(baser) + id * GITS_BASER_ENTRY_SIZE(baser); + gfn = addr >> PAGE_SHIFT; + + return kvm_is_visible_gfn(its->dev->kvm, gfn); + } + + /* calculate and check the index into the 1st level */ + index = id / (SZ_64K / GITS_BASER_ENTRY_SIZE(baser)); + if (index >= (l1_tbl_size / sizeof(u64))) + return false; + + /* Each 1st level entry is represented by a 64-bit value. */ + if (kvm_read_guest(its->dev->kvm, + BASER_ADDRESS(baser) + index * sizeof(indirect_ptr), + &indirect_ptr, sizeof(indirect_ptr))) + return false; + + indirect_ptr = le64_to_cpu(indirect_ptr); + + /* check the valid bit of the first level entry */ + if (!(indirect_ptr & BIT_ULL(63))) + return false; + + /* + * Mask the guest physical address and calculate the frame number. + * Any address beyond our supported 48 bits of PA will be caught + * by the actual check in the final step. + */ + indirect_ptr &= GENMASK_ULL(51, 16); + + /* Find the address of the actual entry */ + index = id % (SZ_64K / GITS_BASER_ENTRY_SIZE(baser)); + indirect_ptr += index * GITS_BASER_ENTRY_SIZE(baser); + gfn = indirect_ptr >> PAGE_SHIFT; + + return kvm_is_visible_gfn(its->dev->kvm, gfn); +} + +static int vgic_its_alloc_collection(struct vgic_its *its, + struct its_collection **colp, + u32 coll_id) +{ + struct its_collection *collection; + + if (!vgic_its_check_id(its, its->baser_coll_table, coll_id)) + return E_ITS_MAPC_COLLECTION_OOR; + + collection = kzalloc(sizeof(*collection), GFP_KERNEL); + + collection->collection_id = coll_id; + collection->target_addr = COLLECTION_NOT_MAPPED; + + list_add_tail(&collection->coll_list, &its->collection_list); + *colp = collection; + + return 0; +} + +static void vgic_its_free_collection(struct vgic_its *its, u32 coll_id) +{ + struct its_collection *collection; + struct its_device *device; + struct its_itte *itte; + + /* + * Clearing the mapping for that collection ID removes the + * entry from the list. If there wasn't any before, we can + * go home early. + */ + collection = find_collection(its, coll_id); + if (!collection) + return; + + for_each_lpi_its(device, itte, its) + if (itte->collection && + itte->collection->collection_id == coll_id) + itte->collection = NULL; + + list_del(&collection->coll_list); + kfree(collection); +} + +/* + * The MAPTI and MAPI commands map LPIs to ITTEs. + * Must be called with its_lock mutex held. + */ +static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 device_id = its_cmd_get_deviceid(its_cmd); + u32 event_id = its_cmd_get_id(its_cmd); + u32 coll_id = its_cmd_get_collection(its_cmd); + struct its_itte *itte; + struct its_device *device; + struct its_collection *collection, *new_coll = NULL; + int lpi_nr; + + device = find_its_device(its, device_id); + if (!device) + return E_ITS_MAPTI_UNMAPPED_DEVICE; + + if (its_cmd_get_command(its_cmd) == GITS_CMD_MAPTI) + lpi_nr = its_cmd_get_physical_id(its_cmd); + else + lpi_nr = event_id; + if (lpi_nr < GIC_LPI_OFFSET || + lpi_nr >= max_lpis_propbaser(kvm->arch.vgic.propbaser)) + return E_ITS_MAPTI_PHYSICALID_OOR; + + collection = find_collection(its, coll_id); + if (!collection) { + int ret = vgic_its_alloc_collection(its, &collection, coll_id); + if (ret) + return ret; + new_coll = collection; + } + + itte = find_itte(its, device_id, event_id); + if (!itte) { + itte = kzalloc(sizeof(struct its_itte), GFP_KERNEL); + if (!itte) { + if (new_coll) + vgic_its_free_collection(its, coll_id); + return -ENOMEM; + } + + itte->event_id = event_id; + list_add_tail(&itte->itte_list, &device->itt_head); + } + + itte->collection = collection; + itte->lpi = lpi_nr; + itte->irq = vgic_add_lpi(kvm, lpi_nr); + update_affinity_itte(kvm, itte); + + /* + * We "cache" the configuration table entries in out struct vgic_irq's. + * However we only have those structs for mapped IRQs, so we read in + * the respective config data from memory here upon mapping the LPI. + */ + update_lpi_config(kvm, itte->irq, NULL); + + return 0; +} + +/* Requires the its_lock to be held. */ +static void vgic_its_unmap_device(struct kvm *kvm, struct its_device *device) +{ + struct its_itte *itte, *temp; + + /* + * The spec says that unmapping a device with still valid + * ITTEs associated is UNPREDICTABLE. We remove all ITTEs, + * since we cannot leave the memory unreferenced. + */ + list_for_each_entry_safe(itte, temp, &device->itt_head, itte_list) + its_free_itte(kvm, itte); + + list_del(&device->dev_list); + kfree(device); +} + +/* + * MAPD maps or unmaps a device ID to Interrupt Translation Tables (ITTs). + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 device_id = its_cmd_get_deviceid(its_cmd); + bool valid = its_cmd_get_validbit(its_cmd); + struct its_device *device; + + if (!vgic_its_check_id(its, its->baser_device_table, device_id)) + return E_ITS_MAPD_DEVICE_OOR; + + device = find_its_device(its, device_id); + + /* + * The spec says that calling MAPD on an already mapped device + * invalidates all cached data for this device. We implement this + * by removing the mapping and re-establishing it. + */ + if (device) + vgic_its_unmap_device(kvm, device); + + /* + * The spec does not say whether unmapping a not-mapped device + * is an error, so we are done in any case. + */ + if (!valid) + return 0; + + device = kzalloc(sizeof(struct its_device), GFP_KERNEL); + if (!device) + return -ENOMEM; + + device->device_id = device_id; + INIT_LIST_HEAD(&device->itt_head); + + list_add_tail(&device->dev_list, &its->device_list); + + return 0; +} + +/* + * The MAPC command maps collection IDs to redistributors. + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_mapc(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u16 coll_id; + u32 target_addr; + struct its_collection *collection; + bool valid; + + valid = its_cmd_get_validbit(its_cmd); + coll_id = its_cmd_get_collection(its_cmd); + target_addr = its_cmd_get_target_addr(its_cmd); + + if (target_addr >= atomic_read(&kvm->online_vcpus)) + return E_ITS_MAPC_PROCNUM_OOR; + + if (!valid) { + vgic_its_free_collection(its, coll_id); + } else { + collection = find_collection(its, coll_id); + + if (!collection) { + int ret; + + ret = vgic_its_alloc_collection(its, &collection, + coll_id); + if (ret) + return ret; + collection->target_addr = target_addr; + } else { + collection->target_addr = target_addr; + update_affinity_collection(kvm, its, collection); + } + } + + return 0; +} + +/* + * The CLEAR command removes the pending state for a particular LPI. + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_clear(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 device_id = its_cmd_get_deviceid(its_cmd); + u32 event_id = its_cmd_get_id(its_cmd); + struct its_itte *itte; + + + itte = find_itte(its, device_id, event_id); + if (!itte) + return E_ITS_CLEAR_UNMAPPED_INTERRUPT; + + itte->irq->pending = false; + + return 0; +} + +/* + * The INV command syncs the configuration bits from the memory table. + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_inv(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 device_id = its_cmd_get_deviceid(its_cmd); + u32 event_id = its_cmd_get_id(its_cmd); + struct its_itte *itte; + + + itte = find_itte(its, device_id, event_id); + if (!itte) + return E_ITS_INV_UNMAPPED_INTERRUPT; + + return update_lpi_config(kvm, itte->irq, NULL); +} + +/* + * The INVALL command requests flushing of all IRQ data in this collection. + * Find the VCPU mapped to that collection, then iterate over the VM's list + * of mapped LPIs and update the configuration for each IRQ which targets + * the specified vcpu. The configuration will be read from the in-memory + * configuration table. + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 coll_id = its_cmd_get_collection(its_cmd); + struct its_collection *collection; + struct kvm_vcpu *vcpu; + struct vgic_irq *irq; + u32 *intids; + int irq_count, i; + + collection = find_collection(its, coll_id); + if (!its_is_collection_mapped(collection)) + return E_ITS_INVALL_UNMAPPED_COLLECTION; + + vcpu = kvm_get_vcpu(kvm, collection->target_addr); + + irq_count = vgic_copy_lpi_list(kvm, &intids); + if (irq_count < 0) + return irq_count; + + for (i = 0; i < irq_count; i++) { + irq = vgic_get_irq(kvm, NULL, intids[i]); + if (!irq) + continue; + update_lpi_config(kvm, irq, vcpu); + vgic_put_irq(kvm, irq); + } + + kfree(intids); + + return 0; +} + +/* + * The MOVALL command moves the pending state of all IRQs targeting one + * redistributor to another. We don't hold the pending state in the VCPUs, + * but in the IRQs instead, so there is really not much to do for us here. + * However the spec says that no IRQ must target the old redistributor + * afterwards, so we make sure that no LPI is using the associated target_vcpu. + * This command affects all LPIs in the system that target that redistributor. + */ +static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + u32 target1_addr = its_cmd_get_target_addr(its_cmd); + u32 target2_addr = its_cmd_mask_field(its_cmd, 3, 16, 32); + struct kvm_vcpu *vcpu1, *vcpu2; + struct vgic_irq *irq; + + if (target1_addr >= atomic_read(&kvm->online_vcpus) || + target2_addr >= atomic_read(&kvm->online_vcpus)) + return E_ITS_MOVALL_PROCNUM_OOR; + + if (target1_addr == target2_addr) + return 0; + + vcpu1 = kvm_get_vcpu(kvm, target1_addr); + vcpu2 = kvm_get_vcpu(kvm, target2_addr); + + spin_lock(&dist->lpi_list_lock); + + list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { + spin_lock(&irq->irq_lock); + + if (irq->target_vcpu == vcpu1) + irq->target_vcpu = vcpu2; + + spin_unlock(&irq->irq_lock); + } + + spin_unlock(&dist->lpi_list_lock); + + return 0; +} + +/* + * The INT command injects the LPI associated with that DevID/EvID pair. + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_int(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 msi_data = its_cmd_get_id(its_cmd); + u64 msi_devid = its_cmd_get_deviceid(its_cmd); + + vgic_its_trigger_msi(kvm, its, msi_devid, msi_data); + + return 0; +} + +/* + * This function is called with the its_cmd lock held, but the ITS data + * structure lock dropped. + */ +static int vgic_its_handle_command(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + int ret = -ENODEV; + + mutex_lock(&its->its_lock); + switch (its_cmd_get_command(its_cmd)) { + case GITS_CMD_MAPD: + ret = vgic_its_cmd_handle_mapd(kvm, its, its_cmd); + break; + case GITS_CMD_MAPC: + ret = vgic_its_cmd_handle_mapc(kvm, its, its_cmd); + break; + case GITS_CMD_MAPI: + ret = vgic_its_cmd_handle_mapi(kvm, its, its_cmd); + break; + case GITS_CMD_MAPTI: + ret = vgic_its_cmd_handle_mapi(kvm, its, its_cmd); + break; + case GITS_CMD_MOVI: + ret = vgic_its_cmd_handle_movi(kvm, its, its_cmd); + break; + case GITS_CMD_DISCARD: + ret = vgic_its_cmd_handle_discard(kvm, its, its_cmd); + break; + case GITS_CMD_CLEAR: + ret = vgic_its_cmd_handle_clear(kvm, its, its_cmd); + break; + case GITS_CMD_MOVALL: + ret = vgic_its_cmd_handle_movall(kvm, its, its_cmd); + break; + case GITS_CMD_INT: + ret = vgic_its_cmd_handle_int(kvm, its, its_cmd); + break; + case GITS_CMD_INV: + ret = vgic_its_cmd_handle_inv(kvm, its, its_cmd); + break; + case GITS_CMD_INVALL: + ret = vgic_its_cmd_handle_invall(kvm, its, its_cmd); + break; + case GITS_CMD_SYNC: + /* we ignore this command: we are in sync all of the time */ + ret = 0; + break; + } + mutex_unlock(&its->its_lock); + + return ret; +} + +static u64 vgic_sanitise_its_baser(u64 reg) +{ + reg = vgic_sanitise_field(reg, GITS_BASER_SHAREABILITY_MASK, + GITS_BASER_SHAREABILITY_SHIFT, + vgic_sanitise_shareability); + reg = vgic_sanitise_field(reg, GITS_BASER_INNER_CACHEABILITY_MASK, + GITS_BASER_INNER_CACHEABILITY_SHIFT, + vgic_sanitise_inner_cacheability); + reg = vgic_sanitise_field(reg, GITS_BASER_OUTER_CACHEABILITY_MASK, + GITS_BASER_OUTER_CACHEABILITY_SHIFT, + vgic_sanitise_outer_cacheability); + + /* Bits 15:12 contain bits 51:48 of the PA, which we don't support. */ + reg &= ~GENMASK_ULL(15, 12); + + /* We support only one (ITS) page size: 64K */ + reg = (reg & ~GITS_BASER_PAGE_SIZE_MASK) | GITS_BASER_PAGE_SIZE_64K; + + return reg; +} + +static u64 vgic_sanitise_its_cbaser(u64 reg) +{ + reg = vgic_sanitise_field(reg, GITS_CBASER_SHAREABILITY_MASK, + GITS_CBASER_SHAREABILITY_SHIFT, + vgic_sanitise_shareability); + reg = vgic_sanitise_field(reg, GITS_CBASER_INNER_CACHEABILITY_MASK, + GITS_CBASER_INNER_CACHEABILITY_SHIFT, + vgic_sanitise_inner_cacheability); + reg = vgic_sanitise_field(reg, GITS_CBASER_OUTER_CACHEABILITY_MASK, + GITS_CBASER_OUTER_CACHEABILITY_SHIFT, + vgic_sanitise_outer_cacheability); + + /* + * Sanitise the physical address to be 64k aligned. + * Also limit the physical addresses to 48 bits. + */ + reg &= ~(GENMASK_ULL(51, 48) | GENMASK_ULL(15, 12)); + + return reg; +} + +static unsigned long vgic_mmio_read_its_cbaser(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + return extract_bytes(its->cbaser, addr & 7, len); +} + +static void vgic_mmio_write_its_cbaser(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + /* When GITS_CTLR.Enable is 1, this register is RO. */ + if (its->enabled) + return; + + mutex_lock(&its->cmd_lock); + its->cbaser = update_64bit_reg(its->cbaser, addr & 7, len, val); + its->cbaser = vgic_sanitise_its_cbaser(its->cbaser); + its->creadr = 0; + /* + * CWRITER is architecturally UNKNOWN on reset, but we need to reset + * it to CREADR to make sure we start with an empty command buffer. + */ + its->cwriter = its->creadr; + mutex_unlock(&its->cmd_lock); +} + +#define ITS_CMD_BUFFER_SIZE(baser) ((((baser) & 0xff) + 1) << 12) +#define ITS_CMD_SIZE 32 +#define ITS_CMD_OFFSET(reg) ((reg) & GENMASK(19, 5)) + +/* + * By writing to CWRITER the guest announces new commands to be processed. + * To avoid any races in the first place, we take the its_cmd lock, which + * protects our ring buffer variables, so that there is only one user + * per ITS handling commands at a given time. + */ +static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + gpa_t cbaser; + u64 cmd_buf[4]; + u32 reg; + + if (!its) + return; + + mutex_lock(&its->cmd_lock); + + reg = update_64bit_reg(its->cwriter, addr & 7, len, val); + reg = ITS_CMD_OFFSET(reg); + if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) { + mutex_unlock(&its->cmd_lock); + return; + } + + its->cwriter = reg; + cbaser = CBASER_ADDRESS(its->cbaser); + + while (its->cwriter != its->creadr) { + int ret = kvm_read_guest(kvm, cbaser + its->creadr, + cmd_buf, ITS_CMD_SIZE); + /* + * If kvm_read_guest() fails, this could be due to the guest + * programming a bogus value in CBASER or something else going + * wrong from which we cannot easily recover. + * According to section 6.3.2 in the GICv3 spec we can just + * ignore that command then. + */ + if (!ret) + vgic_its_handle_command(kvm, its, cmd_buf); + + its->creadr += ITS_CMD_SIZE; + if (its->creadr == ITS_CMD_BUFFER_SIZE(its->cbaser)) + its->creadr = 0; + } + + mutex_unlock(&its->cmd_lock); +} + +static unsigned long vgic_mmio_read_its_cwriter(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + return extract_bytes(its->cwriter, addr & 0x7, len); +} + +static unsigned long vgic_mmio_read_its_creadr(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + return extract_bytes(its->creadr, addr & 0x7, len); +} + +#define BASER_INDEX(addr) (((addr) / sizeof(u64)) & 0x7) +static unsigned long vgic_mmio_read_its_baser(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + u64 reg; + + switch (BASER_INDEX(addr)) { + case 0: + reg = its->baser_device_table; + break; + case 1: + reg = its->baser_coll_table; + break; + default: + reg = 0; + break; + } + + return extract_bytes(reg, addr & 7, len); +} + +#define GITS_BASER_RO_MASK (GENMASK_ULL(52, 48) | GENMASK_ULL(58, 56)) +static void vgic_mmio_write_its_baser(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u64 entry_size, device_type; + u64 reg, *regptr, clearbits = 0; + + /* When GITS_CTLR.Enable is 1, we ignore write accesses. */ + if (its->enabled) + return; + + switch (BASER_INDEX(addr)) { + case 0: + regptr = &its->baser_device_table; + entry_size = 8; + device_type = GITS_BASER_TYPE_DEVICE; + break; + case 1: + regptr = &its->baser_coll_table; + entry_size = 8; + device_type = GITS_BASER_TYPE_COLLECTION; + clearbits = GITS_BASER_INDIRECT; + break; + default: + return; + } + + reg = update_64bit_reg(*regptr, addr & 7, len, val); + reg &= ~GITS_BASER_RO_MASK; + reg &= ~clearbits; + + reg |= (entry_size - 1) << GITS_BASER_ENTRY_SIZE_SHIFT; + reg |= device_type << GITS_BASER_TYPE_SHIFT; + reg = vgic_sanitise_its_baser(reg); + + *regptr = reg; +} + +#define REGISTER_ITS_DESC(off, rd, wr, length, acc) \ +{ \ + .reg_offset = off, \ + .len = length, \ + .access_flags = acc, \ + .its_read = rd, \ + .its_write = wr, \ +} + +static void its_mmio_write_wi(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, unsigned long val) +{ + /* Ignore */ +} + +static struct vgic_register_region its_registers[] = { + REGISTER_ITS_DESC(GITS_CTLR, + vgic_mmio_read_its_ctlr, vgic_mmio_write_its_ctlr, 4, + VGIC_ACCESS_32bit), + REGISTER_ITS_DESC(GITS_IIDR, + vgic_mmio_read_its_iidr, its_mmio_write_wi, 4, + VGIC_ACCESS_32bit), + REGISTER_ITS_DESC(GITS_TYPER, + vgic_mmio_read_its_typer, its_mmio_write_wi, 8, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_ITS_DESC(GITS_CBASER, + vgic_mmio_read_its_cbaser, vgic_mmio_write_its_cbaser, 8, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_ITS_DESC(GITS_CWRITER, + vgic_mmio_read_its_cwriter, vgic_mmio_write_its_cwriter, 8, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_ITS_DESC(GITS_CREADR, + vgic_mmio_read_its_creadr, its_mmio_write_wi, 8, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_ITS_DESC(GITS_BASER, + vgic_mmio_read_its_baser, vgic_mmio_write_its_baser, 0x40, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_ITS_DESC(GITS_IDREGS_BASE, + vgic_mmio_read_its_idregs, its_mmio_write_wi, 0x30, + VGIC_ACCESS_32bit), +}; + +/* This is called on setting the LPI enable bit in the redistributor. */ +void vgic_enable_lpis(struct kvm_vcpu *vcpu) +{ + if (!(vcpu->arch.vgic_cpu.pendbaser & GICR_PENDBASER_PTZ)) + its_sync_lpi_pending_table(vcpu); +} + +static int vgic_its_init_its(struct kvm *kvm, struct vgic_its *its) +{ + struct vgic_io_device *iodev = &its->iodev; + int ret; + + if (its->initialized) + return 0; + + if (IS_VGIC_ADDR_UNDEF(its->vgic_its_base)) + return -ENXIO; + + iodev->regions = its_registers; + iodev->nr_regions = ARRAY_SIZE(its_registers); + kvm_iodevice_init(&iodev->dev, &kvm_io_gic_ops); + + iodev->base_addr = its->vgic_its_base; + iodev->iodev_type = IODEV_ITS; + iodev->its = its; + mutex_lock(&kvm->slots_lock); + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, iodev->base_addr, + KVM_VGIC_V3_ITS_SIZE, &iodev->dev); + mutex_unlock(&kvm->slots_lock); + + if (!ret) + its->initialized = true; + + return ret; +} + +#define INITIAL_BASER_VALUE \ + (GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb) | \ + GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, SameAsInner) | \ + GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable) | \ + ((8ULL - 1) << GITS_BASER_ENTRY_SIZE_SHIFT) | \ + GITS_BASER_PAGE_SIZE_64K) + +#define INITIAL_PROPBASER_VALUE \ + (GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWb) | \ + GIC_BASER_CACHEABILITY(GICR_PROPBASER, OUTER, SameAsInner) | \ + GIC_BASER_SHAREABILITY(GICR_PROPBASER, InnerShareable)) + +static int vgic_its_create(struct kvm_device *dev, u32 type) +{ + struct vgic_its *its; + + if (type != KVM_DEV_TYPE_ARM_VGIC_ITS) + return -ENODEV; + + its = kzalloc(sizeof(struct vgic_its), GFP_KERNEL); + if (!its) + return -ENOMEM; + + mutex_init(&its->its_lock); + mutex_init(&its->cmd_lock); + + its->vgic_its_base = VGIC_ADDR_UNDEF; + + INIT_LIST_HEAD(&its->device_list); + INIT_LIST_HEAD(&its->collection_list); + + dev->kvm->arch.vgic.has_its = true; + its->initialized = false; + its->enabled = false; + its->dev = dev; + + its->baser_device_table = INITIAL_BASER_VALUE | + ((u64)GITS_BASER_TYPE_DEVICE << GITS_BASER_TYPE_SHIFT); + its->baser_coll_table = INITIAL_BASER_VALUE | + ((u64)GITS_BASER_TYPE_COLLECTION << GITS_BASER_TYPE_SHIFT); + dev->kvm->arch.vgic.propbaser = INITIAL_PROPBASER_VALUE; + + dev->private = its; + + return 0; +} + +static void vgic_its_destroy(struct kvm_device *kvm_dev) +{ + struct kvm *kvm = kvm_dev->kvm; + struct vgic_its *its = kvm_dev->private; + struct its_device *dev; + struct its_itte *itte; + struct list_head *dev_cur, *dev_temp; + struct list_head *cur, *temp; + + /* + * We may end up here without the lists ever having been initialized. + * Check this and bail out early to avoid dereferencing a NULL pointer. + */ + if (!its->device_list.next) + return; + + mutex_lock(&its->its_lock); + list_for_each_safe(dev_cur, dev_temp, &its->device_list) { + dev = container_of(dev_cur, struct its_device, dev_list); + list_for_each_safe(cur, temp, &dev->itt_head) { + itte = (container_of(cur, struct its_itte, itte_list)); + its_free_itte(kvm, itte); + } + list_del(dev_cur); + kfree(dev); + } + + list_for_each_safe(cur, temp, &its->collection_list) { + list_del(cur); + kfree(container_of(cur, struct its_collection, coll_list)); + } + mutex_unlock(&its->its_lock); + + kfree(its); +} + +static int vgic_its_has_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: + switch (attr->attr) { + case KVM_VGIC_ITS_ADDR_TYPE: + return 0; + } + break; + case KVM_DEV_ARM_VGIC_GRP_CTRL: + switch (attr->attr) { + case KVM_DEV_ARM_VGIC_CTRL_INIT: + return 0; + } + break; + } + return -ENXIO; +} + +static int vgic_its_set_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + struct vgic_its *its = dev->private; + int ret; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: { + u64 __user *uaddr = (u64 __user *)(long)attr->addr; + unsigned long type = (unsigned long)attr->attr; + u64 addr; + + if (type != KVM_VGIC_ITS_ADDR_TYPE) + return -ENODEV; + + if (its->initialized) + return -EBUSY; + + if (copy_from_user(&addr, uaddr, sizeof(addr))) + return -EFAULT; + + ret = vgic_check_ioaddr(dev->kvm, &its->vgic_its_base, + addr, SZ_64K); + if (ret) + return ret; + + its->vgic_its_base = addr; + + return 0; + } + case KVM_DEV_ARM_VGIC_GRP_CTRL: + switch (attr->attr) { + case KVM_DEV_ARM_VGIC_CTRL_INIT: + return vgic_its_init_its(dev->kvm, its); + } + break; + } + return -ENXIO; +} + +static int vgic_its_get_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: { + struct vgic_its *its = dev->private; + u64 addr = its->vgic_its_base; + u64 __user *uaddr = (u64 __user *)(long)attr->addr; + unsigned long type = (unsigned long)attr->attr; + + if (type != KVM_VGIC_ITS_ADDR_TYPE) + return -ENODEV; + + if (copy_to_user(uaddr, &addr, sizeof(addr))) + return -EFAULT; + break; + default: + return -ENXIO; + } + } + + return 0; +} + +static struct kvm_device_ops kvm_arm_vgic_its_ops = { + .name = "kvm-arm-vgic-its", + .create = vgic_its_create, + .destroy = vgic_its_destroy, + .set_attr = vgic_its_set_attr, + .get_attr = vgic_its_get_attr, + .has_attr = vgic_its_has_attr, +}; + +int kvm_vgic_register_its_device(void) +{ + return kvm_register_device_ops(&kvm_arm_vgic_its_ops, + KVM_DEV_TYPE_ARM_VGIC_ITS); +} diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c b/virt/kvm/arm/vgic/vgic-kvm-device.c index 0130c4b..1813f93 100644 --- a/virt/kvm/arm/vgic/vgic-kvm-device.c +++ b/virt/kvm/arm/vgic/vgic-kvm-device.c @@ -21,8 +21,8 @@ /* common helpers */ -static int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr, - phys_addr_t addr, phys_addr_t alignment) +int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr, + phys_addr_t addr, phys_addr_t alignment) { if (addr & ~KVM_PHYS_MASK) return -E2BIG; @@ -210,20 +210,27 @@ static void vgic_destroy(struct kvm_device *dev) kfree(dev); } -void kvm_register_vgic_device(unsigned long type) +int kvm_register_vgic_device(unsigned long type) { + int ret = -ENODEV; + switch (type) { case KVM_DEV_TYPE_ARM_VGIC_V2: - kvm_register_device_ops(&kvm_arm_vgic_v2_ops, - KVM_DEV_TYPE_ARM_VGIC_V2); + ret = kvm_register_device_ops(&kvm_arm_vgic_v2_ops, + KVM_DEV_TYPE_ARM_VGIC_V2); break; #ifdef CONFIG_KVM_ARM_VGIC_V3 case KVM_DEV_TYPE_ARM_VGIC_V3: - kvm_register_device_ops(&kvm_arm_vgic_v3_ops, - KVM_DEV_TYPE_ARM_VGIC_V3); + ret = kvm_register_device_ops(&kvm_arm_vgic_v3_ops, + KVM_DEV_TYPE_ARM_VGIC_V3); + if (ret) + break; + ret = kvm_vgic_register_its_device(); break; #endif } + + return ret; } /** vgic_attr_regs_access: allows user space to read/write VGIC registers @@ -428,4 +435,3 @@ struct kvm_device_ops kvm_arm_vgic_v3_ops = { }; #endif /* CONFIG_KVM_ARM_VGIC_V3 */ - diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c index a213936..b44b359 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c @@ -102,6 +102,7 @@ static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu, irq->source |= 1U << source_vcpu->vcpu_id; vgic_queue_irq_unlock(source_vcpu->kvm, irq); + vgic_put_irq(source_vcpu->kvm, irq); } } @@ -116,6 +117,8 @@ static unsigned long vgic_mmio_read_target(struct kvm_vcpu *vcpu, struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); val |= (u64)irq->targets << (i * 8); + + vgic_put_irq(vcpu->kvm, irq); } return val; @@ -143,6 +146,7 @@ static void vgic_mmio_write_target(struct kvm_vcpu *vcpu, irq->target_vcpu = kvm_get_vcpu(vcpu->kvm, target); spin_unlock(&irq->irq_lock); + vgic_put_irq(vcpu->kvm, irq); } } @@ -157,6 +161,8 @@ static unsigned long vgic_mmio_read_sgipend(struct kvm_vcpu *vcpu, struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); val |= (u64)irq->source << (i * 8); + + vgic_put_irq(vcpu->kvm, irq); } return val; } @@ -178,6 +184,7 @@ static void vgic_mmio_write_sgipendc(struct kvm_vcpu *vcpu, irq->pending = false; spin_unlock(&irq->irq_lock); + vgic_put_irq(vcpu->kvm, irq); } } @@ -201,6 +208,7 @@ static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu, } else { spin_unlock(&irq->irq_lock); } + vgic_put_irq(vcpu->kvm, irq); } } @@ -429,6 +437,7 @@ int vgic_v2_cpuif_uaccess(struct kvm_vcpu *vcpu, bool is_write, struct vgic_io_device dev = { .regions = vgic_v2_cpu_registers, .nr_regions = ARRAY_SIZE(vgic_v2_cpu_registers), + .iodev_type = IODEV_CPUIF, }; return vgic_uaccess(vcpu, &dev, is_write, offset, val); @@ -440,6 +449,7 @@ int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, struct vgic_io_device dev = { .regions = vgic_v2_dist_registers, .nr_regions = ARRAY_SIZE(vgic_v2_dist_registers), + .iodev_type = IODEV_DIST, }; return vgic_uaccess(vcpu, &dev, is_write, offset, val); diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index a0c515a..ff668e0 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -23,12 +23,35 @@ #include "vgic-mmio.h" /* extract @num bytes at @offset bytes offset in data */ -static unsigned long extract_bytes(unsigned long data, unsigned int offset, - unsigned int num) +unsigned long extract_bytes(unsigned long data, unsigned int offset, + unsigned int num) { return (data >> (offset * 8)) & GENMASK_ULL(num * 8 - 1, 0); } +/* allows updates of any half of a 64-bit register (or the whole thing) */ +u64 update_64bit_reg(u64 reg, unsigned int offset, unsigned int len, + unsigned long val) +{ + int lower = (offset & 4) * 8; + int upper = lower + 8 * len - 1; + + reg &= ~GENMASK_ULL(upper, lower); + val &= GENMASK_ULL(len * 8 - 1, 0); + + return reg | ((u64)val << lower); +} + +bool vgic_has_its(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + + if (dist->vgic_model != KVM_DEV_TYPE_ARM_VGIC_V3) + return false; + + return dist->has_its; +} + static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len) { @@ -43,7 +66,12 @@ static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu, case GICD_TYPER: value = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; value = (value >> 5) - 1; - value |= (INTERRUPT_ID_BITS_SPIS - 1) << 19; + if (vgic_has_its(vcpu->kvm)) { + value |= (INTERRUPT_ID_BITS_ITS - 1) << 19; + value |= GICD_TYPER_LPIS; + } else { + value |= (INTERRUPT_ID_BITS_SPIS - 1) << 19; + } break; case GICD_IIDR: value = (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0); @@ -80,15 +108,17 @@ static unsigned long vgic_mmio_read_irouter(struct kvm_vcpu *vcpu, { int intid = VGIC_ADDR_TO_INTID(addr, 64); struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid); + unsigned long ret = 0; if (!irq) return 0; /* The upper word is RAZ for us. */ - if (addr & 4) - return 0; + if (!(addr & 4)) + ret = extract_bytes(READ_ONCE(irq->mpidr), addr & 7, len); - return extract_bytes(READ_ONCE(irq->mpidr), addr & 7, len); + vgic_put_irq(vcpu->kvm, irq); + return ret; } static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu, @@ -96,15 +126,17 @@ static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu, unsigned long val) { int intid = VGIC_ADDR_TO_INTID(addr, 64); - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid); - - if (!irq) - return; + struct vgic_irq *irq; /* The upper word is WI for us since we don't implement Aff3. */ if (addr & 4) return; + irq = vgic_get_irq(vcpu->kvm, NULL, intid); + + if (!irq) + return; + spin_lock(&irq->irq_lock); /* We only care about and preserve Aff0, Aff1 and Aff2. */ @@ -112,6 +144,32 @@ static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu, irq->target_vcpu = kvm_mpidr_to_vcpu(vcpu->kvm, irq->mpidr); spin_unlock(&irq->irq_lock); + vgic_put_irq(vcpu->kvm, irq); +} + +static unsigned long vgic_mmio_read_v3r_ctlr(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + return vgic_cpu->lpis_enabled ? GICR_CTLR_ENABLE_LPIS : 0; +} + + +static void vgic_mmio_write_v3r_ctlr(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + bool was_enabled = vgic_cpu->lpis_enabled; + + if (!vgic_has_its(vcpu->kvm)) + return; + + vgic_cpu->lpis_enabled = val & GICR_CTLR_ENABLE_LPIS; + + if (!was_enabled && vgic_cpu->lpis_enabled) + vgic_enable_lpis(vcpu); } static unsigned long vgic_mmio_read_v3r_typer(struct kvm_vcpu *vcpu, @@ -125,6 +183,8 @@ static unsigned long vgic_mmio_read_v3r_typer(struct kvm_vcpu *vcpu, value |= ((target_vcpu_id & 0xffff) << 8); if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1) value |= GICR_TYPER_LAST; + if (vgic_has_its(vcpu->kvm)) + value |= GICR_TYPER_PLPIS; return extract_bytes(value, addr & 7, len); } @@ -147,6 +207,142 @@ static unsigned long vgic_mmio_read_v3_idregs(struct kvm_vcpu *vcpu, return 0; } +/* We want to avoid outer shareable. */ +u64 vgic_sanitise_shareability(u64 field) +{ + switch (field) { + case GIC_BASER_OuterShareable: + return GIC_BASER_InnerShareable; + default: + return field; + } +} + +/* Avoid any inner non-cacheable mapping. */ +u64 vgic_sanitise_inner_cacheability(u64 field) +{ + switch (field) { + case GIC_BASER_CACHE_nCnB: + case GIC_BASER_CACHE_nC: + return GIC_BASER_CACHE_RaWb; + default: + return field; + } +} + +/* Non-cacheable or same-as-inner are OK. */ +u64 vgic_sanitise_outer_cacheability(u64 field) +{ + switch (field) { + case GIC_BASER_CACHE_SameAsInner: + case GIC_BASER_CACHE_nC: + return field; + default: + return GIC_BASER_CACHE_nC; + } +} + +u64 vgic_sanitise_field(u64 reg, u64 field_mask, int field_shift, + u64 (*sanitise_fn)(u64)) +{ + u64 field = (reg & field_mask) >> field_shift; + + field = sanitise_fn(field) << field_shift; + return (reg & ~field_mask) | field; +} + +#define PROPBASER_RES0_MASK \ + (GENMASK_ULL(63, 59) | GENMASK_ULL(55, 52) | GENMASK_ULL(6, 5)) +#define PENDBASER_RES0_MASK \ + (BIT_ULL(63) | GENMASK_ULL(61, 59) | GENMASK_ULL(55, 52) | \ + GENMASK_ULL(15, 12) | GENMASK_ULL(6, 0)) + +static u64 vgic_sanitise_pendbaser(u64 reg) +{ + reg = vgic_sanitise_field(reg, GICR_PENDBASER_SHAREABILITY_MASK, + GICR_PENDBASER_SHAREABILITY_SHIFT, + vgic_sanitise_shareability); + reg = vgic_sanitise_field(reg, GICR_PENDBASER_INNER_CACHEABILITY_MASK, + GICR_PENDBASER_INNER_CACHEABILITY_SHIFT, + vgic_sanitise_inner_cacheability); + reg = vgic_sanitise_field(reg, GICR_PENDBASER_OUTER_CACHEABILITY_MASK, + GICR_PENDBASER_OUTER_CACHEABILITY_SHIFT, + vgic_sanitise_outer_cacheability); + + reg &= ~PENDBASER_RES0_MASK; + reg &= ~GENMASK_ULL(51, 48); + + return reg; +} + +static u64 vgic_sanitise_propbaser(u64 reg) +{ + reg = vgic_sanitise_field(reg, GICR_PROPBASER_SHAREABILITY_MASK, + GICR_PROPBASER_SHAREABILITY_SHIFT, + vgic_sanitise_shareability); + reg = vgic_sanitise_field(reg, GICR_PROPBASER_INNER_CACHEABILITY_MASK, + GICR_PROPBASER_INNER_CACHEABILITY_SHIFT, + vgic_sanitise_inner_cacheability); + reg = vgic_sanitise_field(reg, GICR_PROPBASER_OUTER_CACHEABILITY_MASK, + GICR_PROPBASER_OUTER_CACHEABILITY_SHIFT, + vgic_sanitise_outer_cacheability); + + reg &= ~PROPBASER_RES0_MASK; + reg &= ~GENMASK_ULL(51, 48); + return reg; +} + +static unsigned long vgic_mmio_read_propbase(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + return extract_bytes(dist->propbaser, addr & 7, len); +} + +static void vgic_mmio_write_propbase(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + u64 propbaser = dist->propbaser; + + /* Storing a value with LPIs already enabled is undefined */ + if (vgic_cpu->lpis_enabled) + return; + + propbaser = update_64bit_reg(propbaser, addr & 4, len, val); + propbaser = vgic_sanitise_propbaser(propbaser); + + dist->propbaser = propbaser; +} + +static unsigned long vgic_mmio_read_pendbase(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + return extract_bytes(vgic_cpu->pendbaser, addr & 7, len); +} + +static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + u64 pendbaser = vgic_cpu->pendbaser; + + /* Storing a value with LPIs already enabled is undefined */ + if (vgic_cpu->lpis_enabled) + return; + + pendbaser = update_64bit_reg(pendbaser, addr & 4, len, val); + pendbaser = vgic_sanitise_pendbaser(pendbaser); + + vgic_cpu->pendbaser = pendbaser; +} + /* * The GICv3 per-IRQ registers are split to control PPIs and SGIs in the * redistributors, while SPIs are covered by registers in the distributor @@ -218,7 +414,7 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = { static const struct vgic_register_region vgic_v3_rdbase_registers[] = { REGISTER_DESC_WITH_LENGTH(GICR_CTLR, - vgic_mmio_read_raz, vgic_mmio_write_wi, 4, + vgic_mmio_read_v3r_ctlr, vgic_mmio_write_v3r_ctlr, 4, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH(GICR_IIDR, vgic_mmio_read_v3r_iidr, vgic_mmio_write_wi, 4, @@ -227,10 +423,10 @@ static const struct vgic_register_region vgic_v3_rdbase_registers[] = { vgic_mmio_read_v3r_typer, vgic_mmio_write_wi, 8, VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH(GICR_PROPBASER, - vgic_mmio_read_raz, vgic_mmio_write_wi, 8, + vgic_mmio_read_propbase, vgic_mmio_write_propbase, 8, VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH(GICR_PENDBASER, - vgic_mmio_read_raz, vgic_mmio_write_wi, 8, + vgic_mmio_read_pendbase, vgic_mmio_write_pendbase, 8, VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH(GICR_IDREGS, vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48, @@ -285,24 +481,18 @@ unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev) int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t redist_base_address) { - int nr_vcpus = atomic_read(&kvm->online_vcpus); struct kvm_vcpu *vcpu; - struct vgic_io_device *devices; int c, ret = 0; - devices = kmalloc(sizeof(struct vgic_io_device) * nr_vcpus * 2, - GFP_KERNEL); - if (!devices) - return -ENOMEM; - kvm_for_each_vcpu(c, vcpu, kvm) { gpa_t rd_base = redist_base_address + c * SZ_64K * 2; gpa_t sgi_base = rd_base + SZ_64K; - struct vgic_io_device *rd_dev = &devices[c * 2]; - struct vgic_io_device *sgi_dev = &devices[c * 2 + 1]; + struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev; + struct vgic_io_device *sgi_dev = &vcpu->arch.vgic_cpu.sgi_iodev; kvm_iodevice_init(&rd_dev->dev, &kvm_io_gic_ops); rd_dev->base_addr = rd_base; + rd_dev->iodev_type = IODEV_REDIST; rd_dev->regions = vgic_v3_rdbase_registers; rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rdbase_registers); rd_dev->redist_vcpu = vcpu; @@ -317,6 +507,7 @@ int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t redist_base_address) kvm_iodevice_init(&sgi_dev->dev, &kvm_io_gic_ops); sgi_dev->base_addr = sgi_base; + sgi_dev->iodev_type = IODEV_REDIST; sgi_dev->regions = vgic_v3_sgibase_registers; sgi_dev->nr_regions = ARRAY_SIZE(vgic_v3_sgibase_registers); sgi_dev->redist_vcpu = vcpu; @@ -335,14 +526,15 @@ int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t redist_base_address) if (ret) { /* The current c failed, so we start with the previous one. */ for (c--; c >= 0; c--) { + struct vgic_cpu *vgic_cpu; + + vcpu = kvm_get_vcpu(kvm, c); + vgic_cpu = &vcpu->arch.vgic_cpu; kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, - &devices[c * 2].dev); + &vgic_cpu->rd_iodev.dev); kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, - &devices[c * 2 + 1].dev); + &vgic_cpu->sgi_iodev.dev); } - kfree(devices); - } else { - kvm->arch.vgic.redist_iodevs = devices; } return ret; @@ -451,5 +643,6 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg) irq->pending = true; vgic_queue_irq_unlock(vcpu->kvm, irq); + vgic_put_irq(vcpu->kvm, irq); } } diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index 9f6fab7..3bad3c5 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -56,6 +56,8 @@ unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu, if (irq->enabled) value |= (1U << i); + + vgic_put_irq(vcpu->kvm, irq); } return value; @@ -74,6 +76,8 @@ void vgic_mmio_write_senable(struct kvm_vcpu *vcpu, spin_lock(&irq->irq_lock); irq->enabled = true; vgic_queue_irq_unlock(vcpu->kvm, irq); + + vgic_put_irq(vcpu->kvm, irq); } } @@ -92,6 +96,7 @@ void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu, irq->enabled = false; spin_unlock(&irq->irq_lock); + vgic_put_irq(vcpu->kvm, irq); } } @@ -108,6 +113,8 @@ unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, if (irq->pending) value |= (1U << i); + + vgic_put_irq(vcpu->kvm, irq); } return value; @@ -129,6 +136,7 @@ void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, irq->soft_pending = true; vgic_queue_irq_unlock(vcpu->kvm, irq); + vgic_put_irq(vcpu->kvm, irq); } } @@ -152,6 +160,7 @@ void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, } spin_unlock(&irq->irq_lock); + vgic_put_irq(vcpu->kvm, irq); } } @@ -168,6 +177,8 @@ unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu, if (irq->active) value |= (1U << i); + + vgic_put_irq(vcpu->kvm, irq); } return value; @@ -242,6 +253,7 @@ void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, for_each_set_bit(i, &val, len * 8) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); vgic_mmio_change_active(vcpu, irq, false); + vgic_put_irq(vcpu->kvm, irq); } vgic_change_active_finish(vcpu, intid); } @@ -257,6 +269,7 @@ void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, for_each_set_bit(i, &val, len * 8) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); vgic_mmio_change_active(vcpu, irq, true); + vgic_put_irq(vcpu->kvm, irq); } vgic_change_active_finish(vcpu, intid); } @@ -272,6 +285,8 @@ unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu, struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); val |= (u64)irq->priority << (i * 8); + + vgic_put_irq(vcpu->kvm, irq); } return val; @@ -298,6 +313,8 @@ void vgic_mmio_write_priority(struct kvm_vcpu *vcpu, /* Narrow the priority range to what we actually support */ irq->priority = (val >> (i * 8)) & GENMASK(7, 8 - VGIC_PRI_BITS); spin_unlock(&irq->irq_lock); + + vgic_put_irq(vcpu->kvm, irq); } } @@ -313,6 +330,8 @@ unsigned long vgic_mmio_read_config(struct kvm_vcpu *vcpu, if (irq->config == VGIC_CONFIG_EDGE) value |= (2U << (i * 2)); + + vgic_put_irq(vcpu->kvm, irq); } return value; @@ -326,7 +345,7 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu, int i; for (i = 0; i < len * 4; i++) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + struct vgic_irq *irq; /* * The configuration cannot be changed for SGIs in general, @@ -337,14 +356,18 @@ void vgic_mmio_write_config(struct kvm_vcpu *vcpu, if (intid + i < VGIC_NR_PRIVATE_IRQS) continue; + irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); spin_lock(&irq->irq_lock); + if (test_bit(i * 2 + 1, &val)) { irq->config = VGIC_CONFIG_EDGE; } else { irq->config = VGIC_CONFIG_LEVEL; irq->pending = irq->line_level | irq->soft_pending; } + spin_unlock(&irq->irq_lock); + vgic_put_irq(vcpu->kvm, irq); } } @@ -450,8 +473,7 @@ static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, { struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev); const struct vgic_register_region *region; - struct kvm_vcpu *r_vcpu; - unsigned long data; + unsigned long data = 0; region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, addr - iodev->base_addr); @@ -460,8 +482,21 @@ static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, return 0; } - r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu; - data = region->read(r_vcpu, addr, len); + switch (iodev->iodev_type) { + case IODEV_CPUIF: + data = region->read(vcpu, addr, len); + break; + case IODEV_DIST: + data = region->read(vcpu, addr, len); + break; + case IODEV_REDIST: + data = region->read(iodev->redist_vcpu, addr, len); + break; + case IODEV_ITS: + data = region->its_read(vcpu->kvm, iodev->its, addr, len); + break; + } + vgic_data_host_to_mmio_bus(val, len, data); return 0; } @@ -471,7 +506,6 @@ static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, { struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev); const struct vgic_register_region *region; - struct kvm_vcpu *r_vcpu; unsigned long data = vgic_data_mmio_bus_to_host(val, len); region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, @@ -482,8 +516,21 @@ static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, if (!check_region(region, addr, len)) return 0; - r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu; - region->write(r_vcpu, addr, len, data); + switch (iodev->iodev_type) { + case IODEV_CPUIF: + region->write(vcpu, addr, len, data); + break; + case IODEV_DIST: + region->write(vcpu, addr, len, data); + break; + case IODEV_REDIST: + region->write(iodev->redist_vcpu, addr, len, data); + break; + case IODEV_ITS: + region->its_write(vcpu->kvm, iodev->its, addr, len, data); + break; + } + return 0; } @@ -513,6 +560,7 @@ int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, } io_device->base_addr = dist_base_address; + io_device->iodev_type = IODEV_DIST; io_device->redist_vcpu = NULL; mutex_lock(&kvm->slots_lock); diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h index 8509014..0b3ecf9 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.h +++ b/virt/kvm/arm/vgic/vgic-mmio.h @@ -21,10 +21,19 @@ struct vgic_register_region { unsigned int len; unsigned int bits_per_irq; unsigned int access_flags; - unsigned long (*read)(struct kvm_vcpu *vcpu, gpa_t addr, - unsigned int len); - void (*write)(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, - unsigned long val); + union { + unsigned long (*read)(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len); + unsigned long (*its_read)(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len); + }; + union { + void (*write)(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len, unsigned long val); + void (*its_write)(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val); + }; }; extern struct kvm_io_device_ops kvm_io_gic_ops; @@ -87,6 +96,12 @@ unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len); void vgic_data_host_to_mmio_bus(void *buf, unsigned int len, unsigned long data); +unsigned long extract_bytes(unsigned long data, unsigned int offset, + unsigned int num); + +u64 update_64bit_reg(u64 reg, unsigned int offset, unsigned int len, + unsigned long val); + unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len); @@ -147,4 +162,12 @@ unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev); unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev); +#ifdef CONFIG_KVM_ARM_VGIC_V3 +u64 vgic_sanitise_outer_cacheability(u64 reg); +u64 vgic_sanitise_inner_cacheability(u64 reg); +u64 vgic_sanitise_shareability(u64 reg); +u64 vgic_sanitise_field(u64 reg, u64 field_mask, int field_shift, + u64 (*sanitise_fn)(u64)); +#endif + #endif diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index e31405e..0bf6709 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -124,6 +124,7 @@ void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) } spin_unlock(&irq->irq_lock); + vgic_put_irq(vcpu->kvm, irq); } } @@ -332,20 +333,25 @@ int vgic_v2_probe(const struct gic_kvm_info *info) vtr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VTR); kvm_vgic_global_state.nr_lr = (vtr & 0x3f) + 1; + ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2); + if (ret) { + kvm_err("Cannot register GICv2 KVM device\n"); + iounmap(kvm_vgic_global_state.vctrl_base); + return ret; + } + ret = create_hyp_io_mappings(kvm_vgic_global_state.vctrl_base, kvm_vgic_global_state.vctrl_base + resource_size(&info->vctrl), info->vctrl.start); - if (ret) { kvm_err("Cannot map VCTRL into hyp\n"); + kvm_unregister_device_ops(KVM_DEV_TYPE_ARM_VGIC_V2); iounmap(kvm_vgic_global_state.vctrl_base); return ret; } kvm_vgic_global_state.can_emulate_gicv2 = true; - kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2); - kvm_vgic_global_state.vcpu_base = info->vcpu.start; kvm_vgic_global_state.type = VGIC_V2; kvm_vgic_global_state.max_gic_vcpus = VGIC_V2_MAX_CPUS; diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 346b4ad..0506543 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -81,6 +81,8 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) else intid = val & GICH_LR_VIRTUALID; irq = vgic_get_irq(vcpu->kvm, vcpu, intid); + if (!irq) /* An LPI could have been unmapped. */ + continue; spin_lock(&irq->irq_lock); @@ -113,6 +115,7 @@ void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) } spin_unlock(&irq->irq_lock); + vgic_put_irq(vcpu->kvm, irq); } } @@ -190,6 +193,11 @@ void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) vmcrp->pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT; } +#define INITIAL_PENDBASER_VALUE \ + (GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWb) | \ + GIC_BASER_CACHEABILITY(GICR_PENDBASER, OUTER, SameAsInner) | \ + GIC_BASER_SHAREABILITY(GICR_PENDBASER, InnerShareable)) + void vgic_v3_enable(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3; @@ -207,10 +215,12 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu) * way, so we force SRE to 1 to demonstrate this to the guest. * This goes with the spec allowing the value to be RAO/WI. */ - if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { vgic_v3->vgic_sre = ICC_SRE_EL1_SRE; - else + vcpu->arch.vgic_cpu.pendbaser = INITIAL_PENDBASER_VALUE; + } else { vgic_v3->vgic_sre = 0; + } /* Get the show on the road... */ vgic_v3->vgic_hcr = ICH_HCR_EN; @@ -296,6 +306,7 @@ out: int vgic_v3_probe(const struct gic_kvm_info *info) { u32 ich_vtr_el2 = kvm_call_hyp(__vgic_v3_get_ich_vtr_el2); + int ret; /* * The ListRegs field is 5 bits, but there is a architectural @@ -319,12 +330,22 @@ int vgic_v3_probe(const struct gic_kvm_info *info) } else { kvm_vgic_global_state.vcpu_base = info->vcpu.start; kvm_vgic_global_state.can_emulate_gicv2 = true; - kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2); + ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2); + if (ret) { + kvm_err("Cannot register GICv2 KVM device.\n"); + return ret; + } kvm_info("vgic-v2@%llx\n", info->vcpu.start); } + ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V3); + if (ret) { + kvm_err("Cannot register GICv3 KVM device.\n"); + kvm_unregister_device_ops(KVM_DEV_TYPE_ARM_VGIC_V2); + return ret; + } + if (kvm_vgic_global_state.vcpu_base == 0) kvm_info("disabling GICv2 emulation\n"); - kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V3); kvm_vgic_global_state.vctrl_base = NULL; kvm_vgic_global_state.type = VGIC_V3; diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 69b61ab..39f3358 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -33,10 +33,17 @@ struct vgic_global __section(.hyp.text) kvm_vgic_global_state; /* * Locking order is always: - * vgic_cpu->ap_list_lock - * vgic_irq->irq_lock + * its->cmd_lock (mutex) + * its->its_lock (mutex) + * vgic_cpu->ap_list_lock + * kvm->lpi_list_lock + * vgic_irq->irq_lock * - * (that is, always take the ap_list_lock before the struct vgic_irq lock). + * If you need to take multiple locks, always take the upper lock first, + * then the lower ones, e.g. first take the its_lock, then the irq_lock. + * If you are already holding a lock and need to take a higher one, you + * have to drop the lower ranking lock first and re-aquire it after having + * taken the upper one. * * When taking more than one ap_list_lock at the same time, always take the * lowest numbered VCPU's ap_list_lock first, so: @@ -45,6 +52,41 @@ struct vgic_global __section(.hyp.text) kvm_vgic_global_state; * spin_lock(vcpuY->arch.vgic_cpu.ap_list_lock); */ +/* + * Iterate over the VM's list of mapped LPIs to find the one with a + * matching interrupt ID and return a reference to the IRQ structure. + */ +static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_irq *irq = NULL; + + spin_lock(&dist->lpi_list_lock); + + list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { + if (irq->intid != intid) + continue; + + /* + * This increases the refcount, the caller is expected to + * call vgic_put_irq() later once it's finished with the IRQ. + */ + vgic_get_irq_kref(irq); + goto out_unlock; + } + irq = NULL; + +out_unlock: + spin_unlock(&dist->lpi_list_lock); + + return irq; +} + +/* + * This looks up the virtual interrupt ID to get the corresponding + * struct vgic_irq. It also increases the refcount, so any caller is expected + * to call vgic_put_irq() once it's finished with this IRQ. + */ struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 intid) { @@ -56,14 +98,43 @@ struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, if (intid <= VGIC_MAX_SPI) return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS]; - /* LPIs are not yet covered */ + /* LPIs */ if (intid >= VGIC_MIN_LPI) - return NULL; + return vgic_get_lpi(kvm, intid); WARN(1, "Looking up struct vgic_irq for reserved INTID"); return NULL; } +/* + * We can't do anything in here, because we lack the kvm pointer to + * lock and remove the item from the lpi_list. So we keep this function + * empty and use the return value of kref_put() to trigger the freeing. + */ +static void vgic_irq_release(struct kref *ref) +{ +} + +void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) +{ + struct vgic_dist *dist; + + if (irq->intid < VGIC_MIN_LPI) + return; + + if (!kref_put(&irq->refcount, vgic_irq_release)) + return; + + dist = &kvm->arch.vgic; + + spin_lock(&dist->lpi_list_lock); + list_del(&irq->lpi_list); + dist->lpi_list_count--; + spin_unlock(&dist->lpi_list_lock); + + kfree(irq); +} + /** * kvm_vgic_target_oracle - compute the target vcpu for an irq * @@ -236,6 +307,11 @@ retry: goto retry; } + /* + * Grab a reference to the irq to reflect the fact that it is + * now in the ap_list. + */ + vgic_get_irq_kref(irq); list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head); irq->vcpu = vcpu; @@ -269,14 +345,17 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, if (!irq) return -EINVAL; - if (irq->hw != mapped_irq) + if (irq->hw != mapped_irq) { + vgic_put_irq(kvm, irq); return -EINVAL; + } spin_lock(&irq->irq_lock); if (!vgic_validate_injection(irq, level)) { /* Nothing to see here, move along... */ spin_unlock(&irq->irq_lock); + vgic_put_irq(kvm, irq); return 0; } @@ -288,6 +367,7 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, } vgic_queue_irq_unlock(kvm, irq); + vgic_put_irq(kvm, irq); return 0; } @@ -330,25 +410,28 @@ int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq) irq->hwintid = phys_irq; spin_unlock(&irq->irq_lock); + vgic_put_irq(vcpu->kvm, irq); return 0; } int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq) { - struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq); - - BUG_ON(!irq); + struct vgic_irq *irq; if (!vgic_initialized(vcpu->kvm)) return -EAGAIN; + irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq); + BUG_ON(!irq); + spin_lock(&irq->irq_lock); irq->hw = false; irq->hwintid = 0; spin_unlock(&irq->irq_lock); + vgic_put_irq(vcpu->kvm, irq); return 0; } @@ -386,6 +469,15 @@ retry: list_del(&irq->ap_list); irq->vcpu = NULL; spin_unlock(&irq->irq_lock); + + /* + * This vgic_put_irq call matches the + * vgic_get_irq_kref in vgic_queue_irq_unlock, + * where we added the LPI to the ap_list. As + * we remove the irq from the list, we drop + * also drop the refcount. + */ + vgic_put_irq(vcpu->kvm, irq); continue; } @@ -614,6 +706,15 @@ bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq) spin_lock(&irq->irq_lock); map_is_active = irq->hw && irq->active; spin_unlock(&irq->irq_lock); + vgic_put_irq(vcpu->kvm, irq); return map_is_active; } + +int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) +{ + if (vgic_has_its(kvm)) + return vgic_its_inject_msi(kvm, msi); + else + return -ENODEV; +} diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index 7b300ca..1d8e21d 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -25,6 +25,7 @@ #define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF) #define INTERRUPT_ID_BITS_SPIS 10 +#define INTERRUPT_ID_BITS_ITS 16 #define VGIC_PRI_BITS 5 #define vgic_irq_is_sgi(intid) ((intid) < VGIC_NR_SGIS) @@ -38,9 +39,13 @@ struct vgic_vmcr { struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 intid); +void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq); bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq); void vgic_kick_vcpus(struct kvm *kvm); +int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr, + phys_addr_t addr, phys_addr_t alignment); + void vgic_v2_process_maintenance(struct kvm_vcpu *vcpu); void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu); void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); @@ -59,6 +64,14 @@ int vgic_v2_map_resources(struct kvm *kvm); int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, enum vgic_type); +static inline void vgic_get_irq_kref(struct vgic_irq *irq) +{ + if (irq->intid < VGIC_MIN_LPI) + return; + + kref_get(&irq->refcount); +} + #ifdef CONFIG_KVM_ARM_VGIC_V3 void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu); void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu); @@ -71,6 +84,10 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu); int vgic_v3_probe(const struct gic_kvm_info *info); int vgic_v3_map_resources(struct kvm *kvm); int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t dist_base_address); +bool vgic_has_its(struct kvm *kvm); +int kvm_vgic_register_its_device(void); +void vgic_enable_lpis(struct kvm_vcpu *vcpu); +int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi); #else static inline void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu) { @@ -122,9 +139,28 @@ static inline int vgic_register_redist_iodevs(struct kvm *kvm, { return -ENODEV; } + +static inline bool vgic_has_its(struct kvm *kvm) +{ + return false; +} + +static inline int kvm_vgic_register_its_device(void) +{ + return -ENODEV; +} + +static inline void vgic_enable_lpis(struct kvm_vcpu *vcpu) +{ +} + +static inline int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi) +{ + return -ENODEV; +} #endif -void kvm_register_vgic_device(unsigned long type); +int kvm_register_vgic_device(unsigned long type); int vgic_lazy_init(struct kvm *kvm); int vgic_init(struct kvm *kvm); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 154b9ab..61b31a5 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3545,6 +3545,30 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, return r; } +struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, + gpa_t addr) +{ + struct kvm_io_bus *bus; + int dev_idx, srcu_idx; + struct kvm_io_device *iodev = NULL; + + srcu_idx = srcu_read_lock(&kvm->srcu); + + bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); + + dev_idx = kvm_io_bus_get_first_dev(bus, addr, 1); + if (dev_idx < 0) + goto out_unlock; + + iodev = bus->range[dev_idx].dev; + +out_unlock: + srcu_read_unlock(&kvm->srcu, srcu_idx); + + return iodev; +} +EXPORT_SYMBOL_GPL(kvm_io_bus_get_dev); + static struct notifier_block kvm_cpu_notifier = { .notifier_call = kvm_cpu_hotplug, }; |