From 6f367769e3979ac399078f3aea020f1bbe9a2f79 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 20 Feb 2015 10:58:59 +0100 Subject: s390/jump label: add missing jump_label_apply_nops() call When modules are loaded we want to transform the compile time generated nops into runtime generated nops. Otherwise the jump label sanity check will detect invalid code when trying to patch code. Fixes this crash: Jump label code mismatch at __rds_conn_create+0x3c/0x720 Found: c0 04 00 00 00 01 Expected: c0 04 00 00 00 00 Kernel panic - not syncing: Corrupted kernel text CPU: 0 PID: 10 Comm: migration/0 Not tainted 3.19.0-01935-g006610f #14 Call Trace: <0000000000113260> show_trace+0xf8/0x158) <000000000011332a> show_stack+0x6a/0xe8 <000000000069fd64> dump_stack+0x7c/0xd8 <0000000000698d54> panic+0xe4/0x288 <00000000006984c6> jump_label_bug.isra.2+0xbe/0xc001 <000000000011200c> __jump_label_transform+0x94/0xc8 Reported-by: Sebastian Ott Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/module.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/s390') diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c index 36154a2..2ca9586 100644 --- a/arch/s390/kernel/module.c +++ b/arch/s390/kernel/module.c @@ -436,6 +436,7 @@ int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { + jump_label_apply_nops(me); vfree(me->arch.syminfo); me->arch.syminfo = NULL; return 0; -- cgit v1.1 From 72dace969da8bf953915fd1776d6c15e7a41a675 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 20 Feb 2015 08:33:31 +0100 Subject: s390/jump label: improve and fix sanity check Fix the output of the jump label sanity check and also print the code pattern that is supposed to be written to the jump label. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/jump_label.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/jump_label.c b/arch/s390/kernel/jump_label.c index cb2d51e..830066f 100644 --- a/arch/s390/kernel/jump_label.c +++ b/arch/s390/kernel/jump_label.c @@ -36,16 +36,20 @@ static void jump_label_make_branch(struct jump_entry *entry, struct insn *insn) insn->offset = (entry->target - entry->code) >> 1; } -static void jump_label_bug(struct jump_entry *entry, struct insn *insn) +static void jump_label_bug(struct jump_entry *entry, struct insn *expected, + struct insn *new) { unsigned char *ipc = (unsigned char *)entry->code; - unsigned char *ipe = (unsigned char *)insn; + unsigned char *ipe = (unsigned char *)expected; + unsigned char *ipn = (unsigned char *)new; pr_emerg("Jump label code mismatch at %pS [%p]\n", ipc, ipc); pr_emerg("Found: %02x %02x %02x %02x %02x %02x\n", ipc[0], ipc[1], ipc[2], ipc[3], ipc[4], ipc[5]); pr_emerg("Expected: %02x %02x %02x %02x %02x %02x\n", ipe[0], ipe[1], ipe[2], ipe[3], ipe[4], ipe[5]); + pr_emerg("New: %02x %02x %02x %02x %02x %02x\n", + ipn[0], ipn[1], ipn[2], ipn[3], ipn[4], ipn[5]); panic("Corrupted kernel text"); } @@ -69,10 +73,10 @@ static void __jump_label_transform(struct jump_entry *entry, } if (init) { if (memcmp((void *)entry->code, &orignop, sizeof(orignop))) - jump_label_bug(entry, &old); + jump_label_bug(entry, &orignop, &new); } else { if (memcmp((void *)entry->code, &old, sizeof(old))) - jump_label_bug(entry, &old); + jump_label_bug(entry, &old, &new); } probe_kernel_write((void *)entry->code, &new, sizeof(new)); } -- cgit v1.1 From f0483044c1c96089256cda4cf182eea1ead77fe4 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Wed, 25 Feb 2015 13:17:48 +0100 Subject: s390/pci: fix possible information leak in mmio syscall Make sure that even in error situations we do not use copy_to_user on uninitialized kernel memory. Cc: stable@vger.kernel.org # 3.19+ Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci_mmio.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c index 8aa271b..b1bb2b7 100644 --- a/arch/s390/pci/pci_mmio.c +++ b/arch/s390/pci/pci_mmio.c @@ -64,8 +64,7 @@ SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr, if (copy_from_user(buf, user_buffer, length)) goto out; - memcpy_toio(io_addr, buf, length); - ret = 0; + ret = zpci_memcpy_toio(io_addr, buf, length); out: if (buf != local_buf) kfree(buf); @@ -98,16 +97,16 @@ SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr, goto out; io_addr = (void __iomem *)((pfn << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK)); - ret = -EFAULT; - if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE) + if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE) { + ret = -EFAULT; goto out; - - memcpy_fromio(buf, io_addr, length); - - if (copy_to_user(user_buffer, buf, length)) + } + ret = zpci_memcpy_fromio(buf, io_addr, length); + if (ret) goto out; + if (copy_to_user(user_buffer, buf, length)) + ret = -EFAULT; - ret = 0; out: if (buf != local_buf) kfree(buf); -- cgit v1.1 From fb3d1c085c05e0e4b112d915dbd06b20b259e6c5 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 30 Jan 2015 10:31:13 +0100 Subject: s390: let the compiler do page clearing The hardware folks told me that for page clearing "when you exactly know what to do, hand written xc+pfd is usally faster then mvcl for page clearing, as it saves millicode overhead and parameter parsing and checking" as long as you dont need the cache bypassing. Turns out that gcc already does a proper xc,pfd loop. A small test on z196 that does buff = mmap(NULL, bufsize,PROT_EXEC|PROT_WRITE|PROT_READ,AP_PRIVATE| MAP_ANONYMOUS,0,0); for ( i = 0; i < bufsize; i+= 256) buff[i] = 0x5; gets 20% faster (touches every cache line of a page) and buff = mmap(NULL, bufsize,PROT_EXEC|PROT_WRITE|PROT_READ,AP_PRIVATE| MAP_ANONYMOUS,0,0); for ( i = 0; i < bufsize; i+= 4096) buff[i] = 0x5; is within noise ratio (touches one cache line of a page). As the clear_page is usually called for first memory accesses we can assume that at least one cache line is used afterwards, so this change should be always better. Another benchmark, a make -j 40 of my testsuite in tmpfs with hot caches on a 32cpu system: -- unpatched -- -- patched -- real 0m1.017s real 0m0.994s (~2% faster, but in noise) user 0m5.339s user 0m5.016s (~6% faster) sys 0m0.691s sys 0m0.632s (~8% faster) Let use the same define to memset as the asm-generic variant Signed-off-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/page.h | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 7b2ac6e..53eacbd 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -37,16 +37,7 @@ static inline void storage_key_init_range(unsigned long start, unsigned long end #endif } -static inline void clear_page(void *page) -{ - register unsigned long reg1 asm ("1") = 0; - register void *reg2 asm ("2") = page; - register unsigned long reg3 asm ("3") = 4096; - asm volatile( - " mvcl 2,0" - : "+d" (reg2), "+d" (reg3) : "d" (reg1) - : "memory", "cc"); -} +#define clear_page(page) memset((page), 0, PAGE_SIZE) /* * copy_page uses the mvcl instruction with 0xb0 padding byte in order to -- cgit v1.1 From 1803ba2d7a55af525c46d8ce9161521dd2ae4400 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Fri, 27 Feb 2015 16:43:21 +0100 Subject: s390/pci: fix [un]map_resources sequence Commit 8cfc99b58366 ("s390: add pci_iomap_range") introduced counters to keep track of the number of mappings created. This revealed that we don't have our internal mappings in order when using hotunplug or resume from hibernate. This patch addresses both issues. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 753a567..6500d26 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -483,9 +483,8 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev) airq_iv_free_bit(zpci_aisb_iv, zdev->aisb); } -static void zpci_map_resources(struct zpci_dev *zdev) +static void zpci_map_resources(struct pci_dev *pdev) { - struct pci_dev *pdev = zdev->pdev; resource_size_t len; int i; @@ -499,9 +498,8 @@ static void zpci_map_resources(struct zpci_dev *zdev) } } -static void zpci_unmap_resources(struct zpci_dev *zdev) +static void zpci_unmap_resources(struct pci_dev *pdev) { - struct pci_dev *pdev = zdev->pdev; resource_size_t len; int i; @@ -651,7 +649,7 @@ int pcibios_add_device(struct pci_dev *pdev) zdev->pdev = pdev; pdev->dev.groups = zpci_attr_groups; - zpci_map_resources(zdev); + zpci_map_resources(pdev); for (i = 0; i < PCI_BAR_COUNT; i++) { res = &pdev->resource[i]; @@ -663,6 +661,11 @@ int pcibios_add_device(struct pci_dev *pdev) return 0; } +void pcibios_release_device(struct pci_dev *pdev) +{ + zpci_unmap_resources(pdev); +} + int pcibios_enable_device(struct pci_dev *pdev, int mask) { struct zpci_dev *zdev = get_zdev(pdev); @@ -670,7 +673,6 @@ int pcibios_enable_device(struct pci_dev *pdev, int mask) zdev->pdev = pdev; zpci_debug_init_device(zdev); zpci_fmb_enable_device(zdev); - zpci_map_resources(zdev); return pci_enable_resources(pdev, mask); } @@ -679,7 +681,6 @@ void pcibios_disable_device(struct pci_dev *pdev) { struct zpci_dev *zdev = get_zdev(pdev); - zpci_unmap_resources(zdev); zpci_fmb_disable_device(zdev); zpci_debug_exit_device(zdev); zdev->pdev = NULL; @@ -688,7 +689,8 @@ void pcibios_disable_device(struct pci_dev *pdev) #ifdef CONFIG_HIBERNATE_CALLBACKS static int zpci_restore(struct device *dev) { - struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); + struct pci_dev *pdev = to_pci_dev(dev); + struct zpci_dev *zdev = get_zdev(pdev); int ret = 0; if (zdev->state != ZPCI_FN_STATE_ONLINE) @@ -698,7 +700,7 @@ static int zpci_restore(struct device *dev) if (ret) goto out; - zpci_map_resources(zdev); + zpci_map_resources(pdev); zpci_register_ioat(zdev, 0, zdev->start_dma + PAGE_OFFSET, zdev->start_dma + zdev->iommu_size - 1, (u64) zdev->dma_table); @@ -709,12 +711,14 @@ out: static int zpci_freeze(struct device *dev) { - struct zpci_dev *zdev = get_zdev(to_pci_dev(dev)); + struct pci_dev *pdev = to_pci_dev(dev); + struct zpci_dev *zdev = get_zdev(pdev); if (zdev->state != ZPCI_FN_STATE_ONLINE) return 0; zpci_unregister_ioat(zdev, 0); + zpci_unmap_resources(pdev); return clp_disable_fh(zdev); } -- cgit v1.1 From d94260832d0d78aba398de361c0416a93cabc046 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Fri, 27 Feb 2015 16:43:55 +0100 Subject: s390/pci: unify pci_iomap symbol exports Since commit 8cfc99b58366 ("s390: add pci_iomap_range") we use EXPORT_SYMBOL for pci_iomap but EXPORT_SYMBOL_GPL for pci_iounmap. Change the related functions to use EXPORT_SYMBOL like the asm-generic variants do. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/pci/pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 6500d26..f0b8544 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -287,7 +287,7 @@ void __iomem *pci_iomap_range(struct pci_dev *pdev, addr = ZPCI_IOMAP_ADDR_BASE | ((u64) idx << 48); return (void __iomem *) addr + offset; } -EXPORT_SYMBOL_GPL(pci_iomap_range); +EXPORT_SYMBOL(pci_iomap_range); void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) { @@ -309,7 +309,7 @@ void pci_iounmap(struct pci_dev *pdev, void __iomem *addr) } spin_unlock(&zpci_iomap_lock); } -EXPORT_SYMBOL_GPL(pci_iounmap); +EXPORT_SYMBOL(pci_iounmap); static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, int size, u32 *val) -- cgit v1.1 From a9ca8eb7afb4f1c90d8e43092e94c4e86785efbc Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 28 Feb 2015 11:35:26 +0100 Subject: s390/ftrace: fix crashes when switching tracers / add notrace to cpu_relax() With git commit 4d92f50249eb ("s390: reintroduce diag 44 calls for cpu_relax()") I reintroduced a non-trivial cpu_relax() variant on s390. The difference to the previous variant however is that the new version is an out-of-line function, which will be traced if function tracing is enabled. Switching to different tracers includes instruction patching. Therefore this is done within stop_machine() "context" to prevent that any function tracing is going on while instructions are being patched. With the new out-of-line variant of cpu_relax() this is not true anymore, since cpu_relax() gets called in a busy loop by all waiting cpus within stop_machine() until function patching is finished. Therefore cpu_relax() must be marked notrace. This fixes kernel crashes when frequently switching between "function" and "function_graph" tracers. Moving cpu_relax() to a header file again, doesn't work because of header include order dependencies. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/processor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 2610823..dc488e1 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -18,7 +18,7 @@ static DEFINE_PER_CPU(struct cpuid, cpu_id); -void cpu_relax(void) +void notrace cpu_relax(void) { if (!smp_cpu_mtid && MACHINE_HAS_DIAG44) asm volatile("diag 0,0,0x44"); -- cgit v1.1 From 691d5264158e58004904f285417fefaf8650ffe6 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Sun, 1 Mar 2015 06:56:45 -0800 Subject: s390/mm: fix incorrect ASCE after crst_table_downgrade The switch_mm function does nothing in case the prev and next mm are the same. It can happen that a crst_table_downgrade has changed the top-level pgd in the meantime on a different CPU. Always store the new ASCE to be picked up in entry.S. [heiko.carstens@de.ibm.com]: Bug was introduced with git commit 53e857f30867 ("s390/mm,tlb: race of lazy TLB flush vs. recreation of TLB entries") and causes random crashes due to broken page tables being used. Reported-by: Dominik Vogt Signed-off-by: Martin Schwidefsky Signed-off-by: Heiko Carstens --- arch/s390/include/asm/mmu_context.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index f49b719..8fb3802 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -62,6 +62,7 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, { int cpu = smp_processor_id(); + S390_lowcore.user_asce = next->context.asce_bits | __pa(next->pgd); if (prev == next) return; if (MACHINE_HAS_TLB_LC) @@ -73,7 +74,6 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, atomic_dec(&prev->context.attach_count); if (MACHINE_HAS_TLB_LC) cpumask_clear_cpu(cpu, &prev->context.cpu_attach_mask); - S390_lowcore.user_asce = next->context.asce_bits | __pa(next->pgd); } #define finish_arch_post_lock_switch finish_arch_post_lock_switch -- cgit v1.1 From ed6f76b464ab53e59adc7ec6cc8428d3d6ade1a5 Mon Sep 17 00:00:00 2001 From: Tony Krowiak Date: Tue, 24 Feb 2015 14:06:57 -0500 Subject: KVM: s390/cpacf: Enable key wrapping by default z/VM and LPAR enable key wrapping by default, lets do the same on KVM. Signed-off-by: Tony Krowiak Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 0c36239..b4d2030 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -839,9 +839,13 @@ static int kvm_s390_crypto_init(struct kvm *kvm) kvm_s390_set_crycb_format(kvm); - /* Disable AES/DEA protected key functions by default */ - kvm->arch.crypto.aes_kw = 0; - kvm->arch.crypto.dea_kw = 0; + /* Enable AES/DEA protected key functions by default */ + kvm->arch.crypto.aes_kw = 1; + kvm->arch.crypto.dea_kw = 1; + get_random_bytes(kvm->arch.crypto.crycb->aes_wrapping_key_mask, + sizeof(kvm->arch.crypto.crycb->aes_wrapping_key_mask)); + get_random_bytes(kvm->arch.crypto.crycb->dea_wrapping_key_mask, + sizeof(kvm->arch.crypto.crycb->dea_wrapping_key_mask)); return 0; } -- cgit v1.1 From 86044c8c14b618b11558d3cba96aa0548c81274d Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 26 Feb 2015 13:53:47 +0100 Subject: KVM: s390/cpacf: Fix kernel bug under z/VM Under z/VM PQAP might trigger an operation exception if no crypto cards are defined via APVIRTUAL or APDEDICATED. [ 386.098666] Kernel BUG at 0000000000135c56 [verbose debug info unavailable] [ 386.098693] illegal operation: 0001 ilc:2 [#1] SMP [...] [ 386.098751] Krnl PSW : 0704c00180000000 0000000000135c56 (kvm_s390_apxa_installed+0x46/0x98) [...] [ 386.098804] [<000000000013627c>] kvm_arch_init_vm+0x29c/0x358 [ 386.098806] [<000000000012d008>] kvm_dev_ioctl+0xc0/0x460 [ 386.098809] [<00000000002c639a>] do_vfs_ioctl+0x332/0x508 [ 386.098811] [<00000000002c660e>] SyS_ioctl+0x9e/0xb0 [ 386.098814] [<000000000070476a>] system_call+0xd6/0x258 [ 386.098815] [<000003fffc7400a2>] 0x3fffc7400a2 Lets add an extable entry and provide a zeroed config in that case. Reported-by: Stefan Zimmermann Signed-off-by: Christian Borntraeger Reviewed-by: Thomas Huth Tested-by: Stefan Zimmermann --- arch/s390/kvm/kvm-s390.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index b4d2030..18965f9 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -778,15 +778,18 @@ long kvm_arch_vm_ioctl(struct file *filp, static int kvm_s390_query_ap_config(u8 *config) { u32 fcn_code = 0x04000000UL; - u32 cc; + u32 cc = 0; + memset(config, 0, 128); asm volatile( "lgr 0,%1\n" "lgr 2,%2\n" ".long 0xb2af0000\n" /* PQAP(QCI) */ - "ipm %0\n" + "0: ipm %0\n" "srl %0,28\n" - : "=r" (cc) + "1:\n" + EX_TABLE(0b, 1b) + : "+r" (cc) : "r" (fcn_code), "r" (config) : "cc", "0", "2", "memory" ); -- cgit v1.1 From 94422ee880afc4af050bac172ea39af8e2130034 Mon Sep 17 00:00:00 2001 From: Michael Mueller Date: Thu, 26 Feb 2015 12:12:40 +0100 Subject: KVM: s390: fix in memory copy of facility lists The facility lists were not fully copied. Signed-off-by: Michael Mueller Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 18965f9..76894c8 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -579,7 +579,7 @@ static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) memcpy(&mach->fac_mask, kvm_s390_fac_list_mask, kvm_s390_fac_list_mask_size() * sizeof(u64)); memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list, - S390_ARCH_FAC_LIST_SIZE_U64); + S390_ARCH_FAC_LIST_SIZE_BYTE); if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach))) ret = -EFAULT; kfree(mach); @@ -903,7 +903,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) goto out_nofac; memcpy(kvm->arch.model.fac->kvm, S390_lowcore.stfle_fac_list, - S390_ARCH_FAC_LIST_SIZE_U64); + S390_ARCH_FAC_LIST_SIZE_BYTE); /* * If this KVM host runs *not* in a LPAR, relax the facility bits -- cgit v1.1 From 981467c930bdfa4be59acbbc9f3a80eb9e3167a8 Mon Sep 17 00:00:00 2001 From: Michael Mueller Date: Tue, 24 Feb 2015 13:51:04 +0100 Subject: KVM: s390: include guest facilities in kvm facility test Most facility related decisions in KVM have to take into account: - the facilities offered by the underlying run container (LPAR/VM) - the facilities supported by the KVM code itself - the facilities requested by a guest VM This patch adds the KVM driver requested facilities to the test routine. It additionally renames struct s390_model_fac to kvm_s390_fac and its field names to be more meaningful. The semantics of the facilities stored in the KVM architecture structure is changed. The address arch.model.fac->list now points to the guest facility list and arch.model.fac->mask points to the KVM facility mask. This patch fixes the behaviour of KVM for some facilities for guests that ignore the guest visible facility bits, e.g. guests could use transactional memory intructions on hosts supporting them even if the chosen cpu model would not offer them. The userspace interface is not affected by this change. Signed-off-by: Michael Mueller Signed-off-by: Christian Borntraeger --- arch/s390/include/asm/kvm_host.h | 12 ++++++------ arch/s390/kvm/kvm-s390.c | 30 ++++++++++++++++-------------- arch/s390/kvm/kvm-s390.h | 3 ++- arch/s390/kvm/priv.c | 2 +- 4 files changed, 25 insertions(+), 22 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index d84559e..f407bbf 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -515,15 +515,15 @@ struct s390_io_adapter { #define S390_ARCH_FAC_MASK_SIZE_U64 \ (S390_ARCH_FAC_MASK_SIZE_BYTE / sizeof(u64)) -struct s390_model_fac { - /* facilities used in SIE context */ - __u64 sie[S390_ARCH_FAC_LIST_SIZE_U64]; - /* subset enabled by kvm */ - __u64 kvm[S390_ARCH_FAC_LIST_SIZE_U64]; +struct kvm_s390_fac { + /* facility list requested by guest */ + __u64 list[S390_ARCH_FAC_LIST_SIZE_U64]; + /* facility mask supported by kvm & hosting machine */ + __u64 mask[S390_ARCH_FAC_LIST_SIZE_U64]; }; struct kvm_s390_cpu_model { - struct s390_model_fac *fac; + struct kvm_s390_fac *fac; struct cpuid cpu_id; unsigned short ibc; }; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 76894c8..5a02be4 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -522,7 +522,7 @@ static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) memcpy(&kvm->arch.model.cpu_id, &proc->cpuid, sizeof(struct cpuid)); kvm->arch.model.ibc = proc->ibc; - memcpy(kvm->arch.model.fac->kvm, proc->fac_list, + memcpy(kvm->arch.model.fac->list, proc->fac_list, S390_ARCH_FAC_LIST_SIZE_BYTE); } else ret = -EFAULT; @@ -556,7 +556,7 @@ static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr) } memcpy(&proc->cpuid, &kvm->arch.model.cpu_id, sizeof(struct cpuid)); proc->ibc = kvm->arch.model.ibc; - memcpy(&proc->fac_list, kvm->arch.model.fac->kvm, S390_ARCH_FAC_LIST_SIZE_BYTE); + memcpy(&proc->fac_list, kvm->arch.model.fac->list, S390_ARCH_FAC_LIST_SIZE_BYTE); if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc))) ret = -EFAULT; kfree(proc); @@ -576,8 +576,8 @@ static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) } get_cpu_id((struct cpuid *) &mach->cpuid); mach->ibc = sclp_get_ibc(); - memcpy(&mach->fac_mask, kvm_s390_fac_list_mask, - kvm_s390_fac_list_mask_size() * sizeof(u64)); + memcpy(&mach->fac_mask, kvm->arch.model.fac->mask, + S390_ARCH_FAC_LIST_SIZE_BYTE); memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list, S390_ARCH_FAC_LIST_SIZE_BYTE); if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach))) @@ -893,16 +893,16 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) /* * The architectural maximum amount of facilities is 16 kbit. To store * this amount, 2 kbyte of memory is required. Thus we need a full - * page to hold the active copy (arch.model.fac->sie) and the current - * facilities set (arch.model.fac->kvm). Its address size has to be + * page to hold the guest facility list (arch.model.fac->list) and the + * facility mask (arch.model.fac->mask). Its address size has to be * 31 bits and word aligned. */ kvm->arch.model.fac = - (struct s390_model_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA); + (struct kvm_s390_fac *) get_zeroed_page(GFP_KERNEL | GFP_DMA); if (!kvm->arch.model.fac) goto out_nofac; - memcpy(kvm->arch.model.fac->kvm, S390_lowcore.stfle_fac_list, + memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list, S390_ARCH_FAC_LIST_SIZE_BYTE); /* @@ -914,7 +914,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) */ if (!MACHINE_IS_LPAR) for (i = 0; i < kvm_s390_fac_list_mask_size(); i++) - kvm_s390_fac_list_mask[i] &= kvm->arch.model.fac->kvm[i]; + kvm_s390_fac_list_mask[i] &= kvm->arch.model.fac->mask[i]; /* * Apply the kvm facility mask to limit the kvm supported/tolerated @@ -922,11 +922,15 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) */ for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) { if (i < kvm_s390_fac_list_mask_size()) - kvm->arch.model.fac->kvm[i] &= kvm_s390_fac_list_mask[i]; + kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i]; else - kvm->arch.model.fac->kvm[i] = 0UL; + kvm->arch.model.fac->mask[i] = 0UL; } + /* Populate the facility list initially. */ + memcpy(kvm->arch.model.fac->list, kvm->arch.model.fac->mask, + S390_ARCH_FAC_LIST_SIZE_BYTE); + kvm_s390_get_cpu_id(&kvm->arch.model.cpu_id); kvm->arch.model.ibc = sclp_get_ibc() & 0x0fff; @@ -1172,8 +1176,6 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) mutex_lock(&vcpu->kvm->lock); vcpu->arch.cpu_id = vcpu->kvm->arch.model.cpu_id; - memcpy(vcpu->kvm->arch.model.fac->sie, vcpu->kvm->arch.model.fac->kvm, - S390_ARCH_FAC_LIST_SIZE_BYTE); vcpu->arch.sie_block->ibc = vcpu->kvm->arch.model.ibc; mutex_unlock(&vcpu->kvm->lock); @@ -1219,7 +1221,7 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, vcpu->arch.sie_block->scaol = (__u32)(__u64)kvm->arch.sca; set_bit(63 - id, (unsigned long *) &kvm->arch.sca->mcn); } - vcpu->arch.sie_block->fac = (int) (long) kvm->arch.model.fac->sie; + vcpu->arch.sie_block->fac = (int) (long) kvm->arch.model.fac->list; spin_lock_init(&vcpu->arch.local_int.lock); vcpu->arch.local_int.float_int = &kvm->arch.float_int; diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 985c211..c34109a 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -128,7 +128,8 @@ static inline void kvm_s390_set_psw_cc(struct kvm_vcpu *vcpu, unsigned long cc) /* test availability of facility in a kvm intance */ static inline int test_kvm_facility(struct kvm *kvm, unsigned long nr) { - return __test_facility(nr, kvm->arch.model.fac->kvm); + return __test_facility(nr, kvm->arch.model.fac->mask) && + __test_facility(nr, kvm->arch.model.fac->list); } /* are cpu states controlled by user space */ diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index bdd9b5b..3511169 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -348,7 +348,7 @@ static int handle_stfl(struct kvm_vcpu *vcpu) * We need to shift the lower 32 facility bits (bit 0-31) from a u64 * into a u32 memory representation. They will remain bits 0-31. */ - fac = *vcpu->kvm->arch.model.fac->sie >> 32; + fac = *vcpu->kvm->arch.model.fac->list >> 32; rc = write_guest_lc(vcpu, offsetof(struct _lowcore, stfl_fac_list), &fac, sizeof(fac)); if (rc) -- cgit v1.1 From fb5bf93f84c277546473be35543ed7890f6e6742 Mon Sep 17 00:00:00 2001 From: Michael Mueller Date: Fri, 27 Feb 2015 14:25:10 +0100 Subject: KVM: s390: non-LPAR case obsolete during facilities mask init With patch "include guest facilities in kvm facility test" it is no longer necessary to have special handling for the non-LPAR case. Signed-off-by: Michael Mueller Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 5a02be4..f6579cf 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -902,24 +902,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) if (!kvm->arch.model.fac) goto out_nofac; + /* Populate the facility mask initially. */ memcpy(kvm->arch.model.fac->mask, S390_lowcore.stfle_fac_list, S390_ARCH_FAC_LIST_SIZE_BYTE); - - /* - * If this KVM host runs *not* in a LPAR, relax the facility bits - * of the kvm facility mask by all missing facilities. This will allow - * to determine the right CPU model by means of the remaining facilities. - * Live guest migration must prohibit the migration of KVMs running in - * a LPAR to non LPAR hosts. - */ - if (!MACHINE_IS_LPAR) - for (i = 0; i < kvm_s390_fac_list_mask_size(); i++) - kvm_s390_fac_list_mask[i] &= kvm->arch.model.fac->mask[i]; - - /* - * Apply the kvm facility mask to limit the kvm supported/tolerated - * facility list. - */ for (i = 0; i < S390_ARCH_FAC_LIST_SIZE_U64; i++) { if (i < kvm_s390_fac_list_mask_size()) kvm->arch.model.fac->mask[i] &= kvm_s390_fac_list_mask[i]; -- cgit v1.1 From dc9be0fac70a2ad86e31a81372bb0bdfb6945353 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 5 Mar 2015 11:54:46 +0100 Subject: kvm: move advertising of KVM_CAP_IRQFD to common code POWER supports irqfds but forgot to advertise them. Some userspace does not check for the capability, but others check it---thus they work on x86 and s390 but not POWER. To avoid that other architectures in the future make the same mistake, let common code handle KVM_CAP_IRQFD the same way as KVM_CAP_IRQFD_RESAMPLE. Reported-and-tested-by: Greg Kurz Cc: stable@vger.kernel.org Fixes: 297e21053a52f060944e9f0de4c64fad9bcd72fc Signed-off-by: Paolo Bonzini Signed-off-by: Marcelo Tosatti --- arch/s390/kvm/kvm-s390.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index f6579cf..19e17bd 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -165,7 +165,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ONE_REG: case KVM_CAP_ENABLE_CAP: case KVM_CAP_S390_CSS_SUPPORT: - case KVM_CAP_IRQFD: case KVM_CAP_IOEVENTFD: case KVM_CAP_DEVICE_CTRL: case KVM_CAP_ENABLE_CAP_VM: -- cgit v1.1 From 0a64815091bd0ad6c6cdfaac2fae55b0f3ecf974 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Mon, 9 Mar 2015 17:34:18 +0100 Subject: s390/cpum_sf: add diagnostic sampling event only if it is authorized The SF_CYCLES_BASIC_DIAG is always registered even if it is turned of in the current hardware configuration. Because diagnostic-sampling is typically not turned on in the hardware configuration, do not register this perf event by default. Enable it only if the diagnostic-sampling function is authorized. Signed-off-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/perf_cpum_sf.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index c3f8d15..e6a1578 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1415,7 +1415,7 @@ CPUMF_EVENT_ATTR(SF, SF_CYCLES_BASIC_DIAG, PERF_EVENT_CPUM_SF_DIAG); static struct attribute *cpumsf_pmu_events_attr[] = { CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC), - CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG), + NULL, NULL, }; @@ -1606,8 +1606,11 @@ static int __init init_cpum_sampling_pmu(void) return -EINVAL; } - if (si.ad) + if (si.ad) { sfb_set_limits(CPUM_SF_MIN_SDB, CPUM_SF_MAX_SDB); + cpumsf_pmu_events_attr[1] = + CPUMF_EVENT_PTR(SF, SF_CYCLES_BASIC_DIAG); + } sfdbg = debug_register(KMSG_COMPONENT, 2, 1, 80); if (!sfdbg) -- cgit v1.1 From 20e76ee184a7cea155268377c4e414eea1dab6fd Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 13 Mar 2015 10:31:42 +0100 Subject: s390/ftrace: fix compile error if CONFIG_KPROBES is disabled Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/ftrace.c | 61 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 45 insertions(+), 16 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 82c1989..6c79f1b 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -57,6 +57,44 @@ unsigned long ftrace_plt; +static inline void ftrace_generate_orig_insn(struct ftrace_insn *insn) +{ +#ifdef CC_USING_HOTPATCH + /* brcl 0,0 */ + insn->opc = 0xc004; + insn->disp = 0; +#else + /* stg r14,8(r15) */ + insn->opc = 0xe3e0; + insn->disp = 0xf0080024; +#endif +} + +static inline int is_kprobe_on_ftrace(struct ftrace_insn *insn) +{ +#ifdef CONFIG_KPROBES + if (insn->opc == BREAKPOINT_INSTRUCTION) + return 1; +#endif + return 0; +} + +static inline void ftrace_generate_kprobe_nop_insn(struct ftrace_insn *insn) +{ +#ifdef CONFIG_KPROBES + insn->opc = BREAKPOINT_INSTRUCTION; + insn->disp = KPROBE_ON_FTRACE_NOP; +#endif +} + +static inline void ftrace_generate_kprobe_call_insn(struct ftrace_insn *insn) +{ +#ifdef CONFIG_KPROBES + insn->opc = BREAKPOINT_INSTRUCTION; + insn->disp = KPROBE_ON_FTRACE_CALL; +#endif +} + int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { @@ -72,16 +110,9 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, return -EFAULT; if (addr == MCOUNT_ADDR) { /* Initial code replacement */ -#ifdef CC_USING_HOTPATCH - /* We expect to see brcl 0,0 */ - ftrace_generate_nop_insn(&orig); -#else - /* We expect to see stg r14,8(r15) */ - orig.opc = 0xe3e0; - orig.disp = 0xf0080024; -#endif + ftrace_generate_orig_insn(&orig); ftrace_generate_nop_insn(&new); - } else if (old.opc == BREAKPOINT_INSTRUCTION) { + } else if (is_kprobe_on_ftrace(&old)) { /* * If we find a breakpoint instruction, a kprobe has been * placed at the beginning of the function. We write the @@ -89,9 +120,8 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, * bytes of the original instruction so that the kprobes * handler can execute a nop, if it reaches this breakpoint. */ - new.opc = orig.opc = BREAKPOINT_INSTRUCTION; - orig.disp = KPROBE_ON_FTRACE_CALL; - new.disp = KPROBE_ON_FTRACE_NOP; + ftrace_generate_kprobe_call_insn(&orig); + ftrace_generate_kprobe_nop_insn(&new); } else { /* Replace ftrace call with a nop. */ ftrace_generate_call_insn(&orig, rec->ip); @@ -111,7 +141,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) if (probe_kernel_read(&old, (void *) rec->ip, sizeof(old))) return -EFAULT; - if (old.opc == BREAKPOINT_INSTRUCTION) { + if (is_kprobe_on_ftrace(&old)) { /* * If we find a breakpoint instruction, a kprobe has been * placed at the beginning of the function. We write the @@ -119,9 +149,8 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) * bytes of the original instruction so that the kprobes * handler can execute a brasl if it reaches this breakpoint. */ - new.opc = orig.opc = BREAKPOINT_INSTRUCTION; - orig.disp = KPROBE_ON_FTRACE_NOP; - new.disp = KPROBE_ON_FTRACE_CALL; + ftrace_generate_kprobe_nop_insn(&orig); + ftrace_generate_kprobe_call_insn(&new); } else { /* Replace nop with an ftrace call. */ ftrace_generate_nop_insn(&orig); -- cgit v1.1 From e143fa93c28669a7f0851e6850b1da5b1945fd53 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 13 Mar 2015 11:19:09 +0100 Subject: s390/mm: limit STACK_RND_MASK for compat tasks For compat tasks the mmap randomization does not use the maximum randomization value from mmap_rnd_mask but the fixed value of 0x7ff. This needs to be respected in the definition of STACK_RND_MASK as well. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/elf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index c9df40b..c9c875d 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -211,7 +211,7 @@ do { \ extern unsigned long mmap_rnd_mask; -#define STACK_RND_MASK (mmap_rnd_mask) +#define STACK_RND_MASK (test_thread_flag(TIF_31BIT) ? 0x7ff : mmap_rnd_mask) #define ARCH_DLINFO \ do { \ -- cgit v1.1 From 1833c9f647e9bda1cd24653ff8f9c207b5f5b911 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sat, 21 Mar 2015 12:43:08 +0100 Subject: s390/smp: reenable smt after resume After a suspend/resume cycle we missed to enable smt again, which leads to all sorts of bugs, since the kernel assumes smt is enabled, while the hardware thinks it is not. Reported-and-tested-by: Sebastian Ott Reported-by: Stefan Haberland Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/swsusp_asm64.S | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/kernel/swsusp_asm64.S b/arch/s390/kernel/swsusp_asm64.S index 6b09fdf..ca62946 100644 --- a/arch/s390/kernel/swsusp_asm64.S +++ b/arch/s390/kernel/swsusp_asm64.S @@ -177,6 +177,17 @@ restart_entry: lhi %r1,1 sigp %r1,%r0,SIGP_SET_ARCHITECTURE sam64 +#ifdef CONFIG_SMP + larl %r1,smp_cpu_mt_shift + icm %r1,15,0(%r1) + jz smt_done + llgfr %r1,%r1 +smt_loop: + sigp %r1,%r0,SIGP_SET_MULTI_THREADING + brc 8,smt_done /* accepted */ + brc 2,smt_loop /* busy, try again */ +smt_done: +#endif larl %r1,.Lnew_pgm_check_psw lpswe 0(%r1) pgm_check_entry: -- cgit v1.1