From 9a99649f2a89fdfc9dde5d5401675561567bf99a Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Fri, 29 Jan 2016 15:13:30 +0100 Subject: s390/pci: remove pdev pointer from arch data For each PCI function we need to maintain arch specific data in struct zpci_dev which also contains a pointer to struct pci_dev. When a function is registered or deregistered (which is triggered by PCI common code) we need to adjust that pointer which could interfere with the machine check handler (triggered by FW) using zpci_dev->pdev. Since multiple instances of the same pdev could exist at a time this can't be solved with locking. Fix that by ditching the pdev pointer and use a bus walk to reach struct pci_dev (only one instance of a pdev can be registered at the bus at a time). Signed-off-by: Sebastian Ott Reviewed-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pci.h | 3 +-- arch/s390/pci/pci.c | 6 +----- arch/s390/pci/pci_debug.c | 5 ++--- arch/s390/pci/pci_dma.c | 21 ++++++++++++--------- arch/s390/pci/pci_event.c | 13 +++++++++++-- drivers/pci/hotplug/s390_pci_hpc.c | 8 ++++++-- 6 files changed, 33 insertions(+), 23 deletions(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index c873e68..dc763ea 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -66,7 +66,6 @@ struct s390_domain; /* Private data per function */ struct zpci_dev { - struct pci_dev *pdev; struct pci_bus *bus; struct list_head entry; /* list of all zpci_devices, needed for hotplug, etc. */ @@ -192,7 +191,7 @@ int zpci_fmb_disable_device(struct zpci_dev *); /* Debug */ int zpci_debug_init(void); void zpci_debug_exit(void); -void zpci_debug_init_device(struct zpci_dev *); +void zpci_debug_init_device(struct zpci_dev *, const char *); void zpci_debug_exit_device(struct zpci_dev *); void zpci_debug_info(struct zpci_dev *, struct seq_file *); diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 8f19c8f..f76d01e 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -637,11 +637,9 @@ static void zpci_cleanup_bus_resources(struct zpci_dev *zdev) int pcibios_add_device(struct pci_dev *pdev) { - struct zpci_dev *zdev = to_zpci(pdev); struct resource *res; int i; - zdev->pdev = pdev; pdev->dev.groups = zpci_attr_groups; zpci_map_resources(pdev); @@ -664,8 +662,7 @@ int pcibios_enable_device(struct pci_dev *pdev, int mask) { struct zpci_dev *zdev = to_zpci(pdev); - zdev->pdev = pdev; - zpci_debug_init_device(zdev); + zpci_debug_init_device(zdev, dev_name(&pdev->dev)); zpci_fmb_enable_device(zdev); return pci_enable_resources(pdev, mask); @@ -677,7 +674,6 @@ void pcibios_disable_device(struct pci_dev *pdev) zpci_fmb_disable_device(zdev); zpci_debug_exit_device(zdev); - zdev->pdev = NULL; } #ifdef CONFIG_HIBERNATE_CALLBACKS diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c index 4129b0a..c555de3 100644 --- a/arch/s390/pci/pci_debug.c +++ b/arch/s390/pci/pci_debug.c @@ -128,10 +128,9 @@ static const struct file_operations debugfs_pci_perf_fops = { .release = single_release, }; -void zpci_debug_init_device(struct zpci_dev *zdev) +void zpci_debug_init_device(struct zpci_dev *zdev, const char *name) { - zdev->debugfs_dev = debugfs_create_dir(dev_name(&zdev->pdev->dev), - debugfs_root); + zdev->debugfs_dev = debugfs_create_dir(name, debugfs_root); if (IS_ERR(zdev->debugfs_dev)) zdev->debugfs_dev = NULL; diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 4638b93..a06ce80 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -217,27 +217,29 @@ void dma_cleanup_tables(unsigned long *table) dma_free_cpu_table(table); } -static unsigned long __dma_alloc_iommu(struct zpci_dev *zdev, +static unsigned long __dma_alloc_iommu(struct device *dev, unsigned long start, int size) { + struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); unsigned long boundary_size; - boundary_size = ALIGN(dma_get_seg_boundary(&zdev->pdev->dev) + 1, + boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, PAGE_SIZE) >> PAGE_SHIFT; return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages, start, size, 0, boundary_size, 0); } -static unsigned long dma_alloc_iommu(struct zpci_dev *zdev, int size) +static unsigned long dma_alloc_iommu(struct device *dev, int size) { + struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); unsigned long offset, flags; int wrap = 0; spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); - offset = __dma_alloc_iommu(zdev, zdev->next_bit, size); + offset = __dma_alloc_iommu(dev, zdev->next_bit, size); if (offset == -1) { /* wrap-around */ - offset = __dma_alloc_iommu(zdev, 0, size); + offset = __dma_alloc_iommu(dev, 0, size); wrap = 1; } @@ -251,8 +253,9 @@ static unsigned long dma_alloc_iommu(struct zpci_dev *zdev, int size) return offset; } -static void dma_free_iommu(struct zpci_dev *zdev, unsigned long offset, int size) +static void dma_free_iommu(struct device *dev, unsigned long offset, int size) { + struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); unsigned long flags; spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); @@ -293,7 +296,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, /* This rounds up number of pages based on size and offset */ nr_pages = iommu_num_pages(pa, size, PAGE_SIZE); - iommu_page_index = dma_alloc_iommu(zdev, nr_pages); + iommu_page_index = dma_alloc_iommu(dev, nr_pages); if (iommu_page_index == -1) { ret = -ENOSPC; goto out_err; @@ -319,7 +322,7 @@ static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, return dma_addr + (offset & ~PAGE_MASK); out_free: - dma_free_iommu(zdev, iommu_page_index, nr_pages); + dma_free_iommu(dev, iommu_page_index, nr_pages); out_err: zpci_err("map error:\n"); zpci_err_dma(ret, pa); @@ -346,7 +349,7 @@ static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr, atomic64_add(npages, &zdev->unmapped_pages); iommu_page_index = (dma_addr - zdev->start_dma) >> PAGE_SHIFT; - dma_free_iommu(zdev, iommu_page_index, npages); + dma_free_iommu(dev, iommu_page_index, npages); } static void *s390_dma_alloc(struct device *dev, size_t size, diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index b0e0475..fb2a9a5 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -46,11 +46,14 @@ struct zpci_ccdf_avail { static void __zpci_event_error(struct zpci_ccdf_err *ccdf) { struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); - struct pci_dev *pdev = zdev ? zdev->pdev : NULL; + struct pci_dev *pdev = NULL; zpci_err("error CCDF:\n"); zpci_err_hex(ccdf, sizeof(*ccdf)); + if (zdev) + pdev = pci_get_slot(zdev->bus, ZPCI_DEVFN); + pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n", pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid); @@ -58,6 +61,7 @@ static void __zpci_event_error(struct zpci_ccdf_err *ccdf) return; pdev->error_state = pci_channel_io_perm_failure; + pci_dev_put(pdev); } void zpci_event_error(void *data) @@ -69,9 +73,12 @@ void zpci_event_error(void *data) static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) { struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); - struct pci_dev *pdev = zdev ? zdev->pdev : NULL; + struct pci_dev *pdev = NULL; int ret; + if (zdev) + pdev = pci_get_slot(zdev->bus, ZPCI_DEVFN); + pr_info("%s: Event 0x%x reconfigured PCI function 0x%x\n", pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid); zpci_err("avail CCDF:\n"); @@ -138,6 +145,8 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) default: break; } + if (pdev) + pci_dev_put(pdev); } void zpci_event_availability(void *data) diff --git a/drivers/pci/hotplug/s390_pci_hpc.c b/drivers/pci/hotplug/s390_pci_hpc.c index eb5efae..50b8b7d 100644 --- a/drivers/pci/hotplug/s390_pci_hpc.c +++ b/drivers/pci/hotplug/s390_pci_hpc.c @@ -93,13 +93,17 @@ out_deconfigure: static int disable_slot(struct hotplug_slot *hotplug_slot) { struct slot *slot = hotplug_slot->private; + struct pci_dev *pdev; int rc; if (!zpci_fn_configured(slot->zdev->state)) return -EIO; - if (slot->zdev->pdev) - pci_stop_and_remove_bus_device_locked(slot->zdev->pdev); + pdev = pci_get_slot(slot->zdev->bus, ZPCI_DEVFN); + if (pdev) { + pci_stop_and_remove_bus_device_locked(pdev); + pci_dev_put(pdev); + } rc = zpci_disable_device(slot->zdev); if (rc) -- cgit v1.1 From 2cfc5f9ce7f5e17553e84d36ea9563e677e369d1 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 2 Feb 2016 14:40:40 +0100 Subject: s390/xor: optimized xor routing using the XC instruction Acked-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/xor.h | 21 ++++++- arch/s390/lib/Makefile | 2 +- arch/s390/lib/xor.c | 134 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 155 insertions(+), 2 deletions(-) create mode 100644 arch/s390/lib/xor.c diff --git a/arch/s390/include/asm/xor.h b/arch/s390/include/asm/xor.h index c82eb12..c988df7 100644 --- a/arch/s390/include/asm/xor.h +++ b/arch/s390/include/asm/xor.h @@ -1 +1,20 @@ -#include +/* + * Optimited xor routines + * + * Copyright IBM Corp. 2016 + * Author(s): Martin Schwidefsky + */ +#ifndef _ASM_S390_XOR_H +#define _ASM_S390_XOR_H + +extern struct xor_block_template xor_block_xc; + +#undef XOR_TRY_TEMPLATES +#define XOR_TRY_TEMPLATES \ +do { \ + xor_speed(&xor_block_xc); \ +} while (0) + +#define XOR_SELECT_TEMPLATE(FASTEST) (&xor_block_xc) + +#endif /* _ASM_S390_XOR_H */ diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index 0e8fefe..1d1af31 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -3,7 +3,7 @@ # lib-y += delay.o string.o uaccess.o find.o -obj-y += mem.o +obj-y += mem.o xor.o lib-$(CONFIG_SMP) += spinlock.o lib-$(CONFIG_KPROBES) += probes.o lib-$(CONFIG_UPROBES) += probes.o diff --git a/arch/s390/lib/xor.c b/arch/s390/lib/xor.c new file mode 100644 index 0000000..7d94e3e --- /dev/null +++ b/arch/s390/lib/xor.c @@ -0,0 +1,134 @@ +/* + * Optimized xor_block operation for RAID4/5 + * + * Copyright IBM Corp. 2016 + * Author(s): Martin Schwidefsky + */ + +#include +#include +#include + +static void xor_xc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +{ + asm volatile( + " larl 1,2f\n" + " aghi %0,-1\n" + " jm 3f\n" + " srlg 0,%0,8\n" + " ltgr 0,0\n" + " jz 1f\n" + "0: xc 0(256,%1),0(%2)\n" + " la %1,256(%1)\n" + " la %2,256(%2)\n" + " brctg 0,0b\n" + "1: ex %0,0(1)\n" + " j 3f\n" + "2: xc 0(1,%1),0(%2)\n" + "3:\n" + : : "d" (bytes), "a" (p1), "a" (p2) + : "0", "1", "cc", "memory"); +} + +static void xor_xc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3) +{ + asm volatile( + " larl 1,2f\n" + " aghi %0,-1\n" + " jm 3f\n" + " srlg 0,%0,8\n" + " ltgr 0,0\n" + " jz 1f\n" + "0: xc 0(256,%1),0(%2)\n" + " xc 0(256,%1),0(%3)\n" + " la %1,256(%1)\n" + " la %2,256(%2)\n" + " la %3,256(%3)\n" + " brctg 0,0b\n" + "1: ex %0,0(1)\n" + " ex %0,6(1)\n" + " j 3f\n" + "2: xc 0(1,%1),0(%2)\n" + " xc 0(1,%1),0(%3)\n" + "3:\n" + : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3) + : : "0", "1", "cc", "memory"); +} + +static void xor_xc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4) +{ + asm volatile( + " larl 1,2f\n" + " aghi %0,-1\n" + " jm 3f\n" + " srlg 0,%0,8\n" + " ltgr 0,0\n" + " jz 1f\n" + "0: xc 0(256,%1),0(%2)\n" + " xc 0(256,%1),0(%3)\n" + " xc 0(256,%1),0(%4)\n" + " la %1,256(%1)\n" + " la %2,256(%2)\n" + " la %3,256(%3)\n" + " la %4,256(%4)\n" + " brctg 0,0b\n" + "1: ex %0,0(1)\n" + " ex %0,6(1)\n" + " ex %0,12(1)\n" + " j 3f\n" + "2: xc 0(1,%1),0(%2)\n" + " xc 0(1,%1),0(%3)\n" + " xc 0(1,%1),0(%4)\n" + "3:\n" + : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4) + : : "0", "1", "cc", "memory"); +} + +static void xor_xc_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, + unsigned long *p3, unsigned long *p4, unsigned long *p5) +{ + /* Get around a gcc oddity */ + register unsigned long *reg7 asm ("7") = p5; + + asm volatile( + " larl 1,2f\n" + " aghi %0,-1\n" + " jm 3f\n" + " srlg 0,%0,8\n" + " ltgr 0,0\n" + " jz 1f\n" + "0: xc 0(256,%1),0(%2)\n" + " xc 0(256,%1),0(%3)\n" + " xc 0(256,%1),0(%4)\n" + " xc 0(256,%1),0(%5)\n" + " la %1,256(%1)\n" + " la %2,256(%2)\n" + " la %3,256(%3)\n" + " la %4,256(%4)\n" + " la %5,256(%5)\n" + " brctg 0,0b\n" + "1: ex %0,0(1)\n" + " ex %0,6(1)\n" + " ex %0,12(1)\n" + " ex %0,18(1)\n" + " j 3f\n" + "2: xc 0(1,%1),0(%2)\n" + " xc 0(1,%1),0(%3)\n" + " xc 0(1,%1),0(%4)\n" + " xc 0(1,%1),0(%5)\n" + "3:\n" + : "+d" (bytes), "+a" (p1), "+a" (p2), "+a" (p3), "+a" (p4), + "+a" (reg7) + : : "0", "1", "cc", "memory"); +} + +struct xor_block_template xor_block_xc = { + .name = "xc", + .do_2 = xor_xc_2, + .do_3 = xor_xc_3, + .do_4 = xor_xc_4, + .do_5 = xor_xc_5, +}; +EXPORT_SYMBOL(xor_block_xc); -- cgit v1.1 From 007ccec53da35528bd06fa0063da55b1311054c1 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 4 Feb 2016 12:24:46 +0100 Subject: s390/pageattr: do a single TLB flush for change_page_attr The change of the access rights for an address range in the kernel address space is currently done with a loop of IPTE + a store of the modified PTE. Between the IPTE and the store the PTE will be invalid, this intermediate state can cause problems with concurrent accesses. Consider a change of a kernel area from read-write to read-only, a concurrent reader of that area should be fine but with the invalid PTE it might get an unexpected exception. Remove the IPTEs for each PTE and do a global flush after all PTEs have been modified. Reviewed-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pageattr.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index 749c984..f2a5c29 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -65,19 +65,17 @@ static pte_t *walk_page_table(unsigned long addr) static void change_page_attr(unsigned long addr, int numpages, pte_t (*set) (pte_t)) { - pte_t *ptep, pte; + pte_t *ptep; int i; for (i = 0; i < numpages; i++) { ptep = walk_page_table(addr); if (WARN_ON_ONCE(!ptep)) break; - pte = *ptep; - pte = set(pte); - __ptep_ipte(addr, ptep); - *ptep = pte; + *ptep = set(*ptep); addr += PAGE_SIZE; } + __tlb_flush_kernel(); } int set_memory_ro(unsigned long addr, int numpages) -- cgit v1.1 From 77ec8a545136272ca3056ca3759836df0cfee5e5 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 1 Feb 2016 14:12:08 +0100 Subject: s390/stacktrace: use nosched instead of savesched parameter Use the inversed "nosched" logic like all other architectures. Signed-off-by: Heiko Carstens Tested-by: Peter Oberparleiter Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/stacktrace.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index 8f64ebd..3872d14 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -14,7 +14,7 @@ static unsigned long save_context_stack(struct stack_trace *trace, unsigned long sp, unsigned long low, unsigned long high, - int savesched) + int nosched) { struct stack_frame *sf; struct pt_regs *regs; @@ -46,7 +46,7 @@ static unsigned long save_context_stack(struct stack_trace *trace, return sp; regs = (struct pt_regs *)sp; addr = regs->psw.addr; - if (savesched || !in_sched_functions(addr)) { + if (!nosched || !in_sched_functions(addr)) { if (!trace->skip) trace->entries[trace->nr_entries++] = addr; else @@ -66,13 +66,13 @@ static void __save_stack_trace(struct stack_trace *trace, unsigned long sp) frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); new_sp = save_context_stack(trace, sp, S390_lowcore.panic_stack + frame_size - PAGE_SIZE, - S390_lowcore.panic_stack + frame_size, 1); + S390_lowcore.panic_stack + frame_size, 0); new_sp = save_context_stack(trace, new_sp, S390_lowcore.async_stack + frame_size - ASYNC_SIZE, - S390_lowcore.async_stack + frame_size, 1); + S390_lowcore.async_stack + frame_size, 0); save_context_stack(trace, new_sp, S390_lowcore.thread_info, - S390_lowcore.thread_info + THREAD_SIZE, 1); + S390_lowcore.thread_info + THREAD_SIZE, 0); } void save_stack_trace(struct stack_trace *trace) @@ -98,7 +98,7 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) } low = (unsigned long) task_stack_page(tsk); high = (unsigned long) task_pt_regs(tsk); - save_context_stack(trace, sp, low, high, 0); + save_context_stack(trace, sp, low, high, 1); if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; } -- cgit v1.1 From 76737ce17ab4c88f14e4452076610474108246d7 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 31 Jan 2016 17:06:16 +0100 Subject: s390: add current_stack_pointer() helper function Implement current_stack_pointer() helper function and use it everywhere, instead of having several different inline assembly variants. Signed-off-by: Heiko Carstens Tested-by: Peter Oberparleiter Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/processor.h | 8 ++++++++ arch/s390/kernel/dumpstack.c | 16 ++++++++-------- arch/s390/kernel/irq.c | 3 +-- arch/s390/kernel/stacktrace.c | 9 +++------ 4 files changed, 20 insertions(+), 16 deletions(-) diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 1c4fe12..073e18b 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -203,6 +203,14 @@ unsigned long get_wchan(struct task_struct *p); /* Has task runtime instrumentation enabled ? */ #define is_ri_task(tsk) (!!(tsk)->thread.ri_cb) +static inline unsigned long current_stack_pointer(void) +{ + unsigned long sp; + + asm volatile("la %0,0(15)" : "=a" (sp)); + return sp; +} + static inline unsigned short stap(void) { unsigned short cpu_address; diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 02bd02f..feda48f 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -67,12 +67,11 @@ static void show_trace(struct task_struct *task, unsigned long *stack) { const unsigned long frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); - register unsigned long __r15 asm ("15"); unsigned long sp; sp = (unsigned long) stack; if (!sp) - sp = task ? task->thread.ksp : __r15; + sp = task ? task->thread.ksp : current_stack_pointer(); printk("Call Trace:\n"); #ifdef CONFIG_CHECK_STACK sp = __show_trace(sp, @@ -95,15 +94,16 @@ static void show_trace(struct task_struct *task, unsigned long *stack) void show_stack(struct task_struct *task, unsigned long *sp) { - register unsigned long *__r15 asm ("15"); unsigned long *stack; int i; - if (!sp) - stack = task ? (unsigned long *) task->thread.ksp : __r15; - else - stack = sp; - + stack = sp; + if (!stack) { + if (!task) + stack = (unsigned long *)current_stack_pointer(); + else + stack = (unsigned long *)task->thread.ksp; + } for (i = 0; i < 20; i++) { if (((addr_t) stack & (THREAD_SIZE-1)) == 0) break; diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index f41d520..c373a1d 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -164,8 +164,7 @@ void do_softirq_own_stack(void) { unsigned long old, new; - /* Get current stack pointer. */ - asm volatile("la %0,0(15)" : "=a" (old)); + old = current_stack_pointer(); /* Check against async. stack address range. */ new = S390_lowcore.async_stack; if (((new - old) >> (PAGE_SHIFT + THREAD_ORDER)) != 0) { diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index 3872d14..7dd8360 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -77,10 +77,9 @@ static void __save_stack_trace(struct stack_trace *trace, unsigned long sp) void save_stack_trace(struct stack_trace *trace) { - register unsigned long r15 asm ("15"); unsigned long sp; - sp = r15; + sp = current_stack_pointer(); __save_stack_trace(trace, sp); if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; @@ -92,10 +91,8 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) unsigned long sp, low, high; sp = tsk->thread.ksp; - if (tsk == current) { - /* Get current stack pointer. */ - asm volatile("la %0,0(15)" : "=a" (sp)); - } + if (tsk == current) + sp = current_stack_pointer(); low = (unsigned long) task_stack_page(tsk); high = (unsigned long) task_pt_regs(tsk); save_context_stack(trace, sp, low, high, 1); -- cgit v1.1 From ca0fd75a54cd4b1b1957bfa5611644f5b8b8465e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 31 Jan 2016 17:15:56 +0100 Subject: s390/dumpstack: add missing ri bit to show_registers() output Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/dumpstack.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index feda48f..9a4852b 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -142,7 +142,8 @@ void show_registers(struct pt_regs *regs) mask_bits(regs, PSW_MASK_MCHECK), mask_bits(regs, PSW_MASK_WAIT), mask_bits(regs, PSW_MASK_PSTATE), mask_bits(regs, PSW_MASK_ASC), mask_bits(regs, PSW_MASK_CC), mask_bits(regs, PSW_MASK_PM)); - printk(" EA:%x", mask_bits(regs, PSW_MASK_EA | PSW_MASK_BA)); + printk(" RI:%x EA:%x", mask_bits(regs, PSW_MASK_RI), + mask_bits(regs, PSW_MASK_EA | PSW_MASK_BA)); printk("\n%s GPRS: %016lx %016lx %016lx %016lx\n", mode, regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]); printk(" %016lx %016lx %016lx %016lx\n", -- cgit v1.1 From 1f021ea0da3bc499387d87e31aeb1e36d2db052b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Sun, 31 Jan 2016 17:36:59 +0100 Subject: s390/dumpstack: use bit fields to decode psw mask in show_registers() Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/dumpstack.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 9a4852b..e1c786d 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -121,13 +121,9 @@ static void show_last_breaking_event(struct pt_regs *regs) printk(" [<%016lx>] %pSR\n", regs->args[0], (void *)regs->args[0]); } -static inline int mask_bits(struct pt_regs *regs, unsigned long bits) -{ - return (regs->psw.mask & bits) / ((~bits + 1) & bits); -} - void show_registers(struct pt_regs *regs) { + struct psw_bits *psw = &psw_bits(regs->psw); char *mode; mode = user_mode(regs) ? "User" : "Krnl"; @@ -136,14 +132,9 @@ void show_registers(struct pt_regs *regs) printk(" (%pSR)", (void *)regs->psw.addr); printk("\n"); printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x " - "P:%x AS:%x CC:%x PM:%x", mask_bits(regs, PSW_MASK_PER), - mask_bits(regs, PSW_MASK_DAT), mask_bits(regs, PSW_MASK_IO), - mask_bits(regs, PSW_MASK_EXT), mask_bits(regs, PSW_MASK_KEY), - mask_bits(regs, PSW_MASK_MCHECK), mask_bits(regs, PSW_MASK_WAIT), - mask_bits(regs, PSW_MASK_PSTATE), mask_bits(regs, PSW_MASK_ASC), - mask_bits(regs, PSW_MASK_CC), mask_bits(regs, PSW_MASK_PM)); - printk(" RI:%x EA:%x", mask_bits(regs, PSW_MASK_RI), - mask_bits(regs, PSW_MASK_EA | PSW_MASK_BA)); + "P:%x AS:%x CC:%x PM:%x", psw->r, psw->t, psw->i, psw->e, + psw->key, psw->m, psw->w, psw->p, psw->as, psw->cc, psw->pm); + printk(" RI:%x EA:%x", psw->ri, psw->eaba); printk("\n%s GPRS: %016lx %016lx %016lx %016lx\n", mode, regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]); printk(" %016lx %016lx %016lx %016lx\n", -- cgit v1.1 From 9bfefde718c1352d9499125bce50b2a0e8a3db4c Mon Sep 17 00:00:00 2001 From: Stefan Haberland Date: Tue, 15 Dec 2015 11:00:51 +0100 Subject: s390/dasd: fix incorrect locking order for LCU device add/remove The correct lock order for LCU lock and cdev lock is to take the cdev lock first and afterwards the LCU lock. This is caused by the fact that LCU functions are called in an interrupt context with the cdev lock implicitly hold by CIO. To assure the right locking order but also be able to iterate over devices in a LCU introduce a trylock block that can be called with the device lock for one device hold and then takes the LCU lock and try to lock all devices accounted to this LCU. Afterwards all devices and the LCU itself are locked. Signed-off-by: Stefan Haberland Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd_alias.c | 235 ++++++++++++++++++++++++++-------------- 1 file changed, 152 insertions(+), 83 deletions(-) diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c index 286782c..cbbdd3e 100644 --- a/drivers/s390/block/dasd_alias.c +++ b/drivers/s390/block/dasd_alias.c @@ -319,22 +319,14 @@ static int _add_device_to_lcu(struct alias_lcu *lcu, struct dasd_eckd_private *private; struct alias_pav_group *group; struct dasd_uid uid; - unsigned long flags; private = (struct dasd_eckd_private *) device->private; - /* only lock if not already locked */ - if (device != pos) - spin_lock_irqsave_nested(get_ccwdev_lock(device->cdev), flags, - CDEV_NESTED_SECOND); private->uid.type = lcu->uac->unit[private->uid.real_unit_addr].ua_type; private->uid.base_unit_addr = lcu->uac->unit[private->uid.real_unit_addr].base_ua; uid = private->uid; - if (device != pos) - spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); - /* if we have no PAV anyway, we don't need to bother with PAV groups */ if (lcu->pav == NO_PAV) { list_move(&device->alias_list, &lcu->active_devices); @@ -411,6 +403,130 @@ suborder_not_supported(struct dasd_ccw_req *cqr) return 0; } +/* + * This function tries to lock all devices on an lcu via trylock + * return NULL on success otherwise return first failed device + */ +static struct dasd_device *_trylock_all_devices_on_lcu(struct alias_lcu *lcu, + struct dasd_device *pos) + +{ + struct alias_pav_group *pavgroup; + struct dasd_device *device; + + list_for_each_entry(device, &lcu->active_devices, alias_list) { + if (device == pos) + continue; + if (!spin_trylock(get_ccwdev_lock(device->cdev))) + return device; + } + list_for_each_entry(device, &lcu->inactive_devices, alias_list) { + if (device == pos) + continue; + if (!spin_trylock(get_ccwdev_lock(device->cdev))) + return device; + } + list_for_each_entry(pavgroup, &lcu->grouplist, group) { + list_for_each_entry(device, &pavgroup->baselist, alias_list) { + if (device == pos) + continue; + if (!spin_trylock(get_ccwdev_lock(device->cdev))) + return device; + } + list_for_each_entry(device, &pavgroup->aliaslist, alias_list) { + if (device == pos) + continue; + if (!spin_trylock(get_ccwdev_lock(device->cdev))) + return device; + } + } + return NULL; +} + +/* + * unlock all devices except the one that is specified as pos + * stop if enddev is specified and reached + */ +static void _unlock_all_devices_on_lcu(struct alias_lcu *lcu, + struct dasd_device *pos, + struct dasd_device *enddev) + +{ + struct alias_pav_group *pavgroup; + struct dasd_device *device; + + list_for_each_entry(device, &lcu->active_devices, alias_list) { + if (device == pos) + continue; + if (device == enddev) + return; + spin_unlock(get_ccwdev_lock(device->cdev)); + } + list_for_each_entry(device, &lcu->inactive_devices, alias_list) { + if (device == pos) + continue; + if (device == enddev) + return; + spin_unlock(get_ccwdev_lock(device->cdev)); + } + list_for_each_entry(pavgroup, &lcu->grouplist, group) { + list_for_each_entry(device, &pavgroup->baselist, alias_list) { + if (device == pos) + continue; + if (device == enddev) + return; + spin_unlock(get_ccwdev_lock(device->cdev)); + } + list_for_each_entry(device, &pavgroup->aliaslist, alias_list) { + if (device == pos) + continue; + if (device == enddev) + return; + spin_unlock(get_ccwdev_lock(device->cdev)); + } + } +} + +/* + * this function is needed because the locking order + * device lock -> lcu lock + * needs to be assured when iterating over devices in an LCU + * + * if a device is specified in pos then the device lock is already hold + */ +static void _trylock_and_lock_lcu_irqsave(struct alias_lcu *lcu, + struct dasd_device *pos, + unsigned long *flags) +{ + struct dasd_device *failed; + + do { + spin_lock_irqsave(&lcu->lock, *flags); + failed = _trylock_all_devices_on_lcu(lcu, pos); + if (failed) { + _unlock_all_devices_on_lcu(lcu, pos, failed); + spin_unlock_irqrestore(&lcu->lock, *flags); + cpu_relax(); + } + } while (failed); +} + +static void _trylock_and_lock_lcu(struct alias_lcu *lcu, + struct dasd_device *pos) +{ + struct dasd_device *failed; + + do { + spin_lock(&lcu->lock); + failed = _trylock_all_devices_on_lcu(lcu, pos); + if (failed) { + _unlock_all_devices_on_lcu(lcu, pos, failed); + spin_unlock(&lcu->lock); + cpu_relax(); + } + } while (failed); +} + static int read_unit_address_configuration(struct dasd_device *device, struct alias_lcu *lcu) { @@ -505,10 +621,7 @@ static int _lcu_update(struct dasd_device *refdev, struct alias_lcu *lcu) if (rc) return rc; - /* need to take cdev lock before lcu lock */ - spin_lock_irqsave_nested(get_ccwdev_lock(refdev->cdev), flags, - CDEV_NESTED_FIRST); - spin_lock(&lcu->lock); + _trylock_and_lock_lcu_irqsave(lcu, NULL, &flags); lcu->pav = NO_PAV; for (i = 0; i < MAX_DEVICES_PER_LCU; ++i) { switch (lcu->uac->unit[i].ua_type) { @@ -527,8 +640,8 @@ static int _lcu_update(struct dasd_device *refdev, struct alias_lcu *lcu) alias_list) { _add_device_to_lcu(lcu, device, refdev); } - spin_unlock(&lcu->lock); - spin_unlock_irqrestore(get_ccwdev_lock(refdev->cdev), flags); + _unlock_all_devices_on_lcu(lcu, NULL, NULL); + spin_unlock_irqrestore(&lcu->lock, flags); return 0; } @@ -616,8 +729,6 @@ int dasd_alias_add_device(struct dasd_device *device) private = (struct dasd_eckd_private *) device->private; lcu = private->lcu; rc = 0; - - /* need to take cdev lock before lcu lock */ spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); spin_lock(&lcu->lock); if (!(lcu->flags & UPDATE_PENDING)) { @@ -754,30 +865,19 @@ static void _restart_all_base_devices_on_lcu(struct alias_lcu *lcu) struct alias_pav_group *pavgroup; struct dasd_device *device; struct dasd_eckd_private *private; - unsigned long flags; /* active and inactive list can contain alias as well as base devices */ list_for_each_entry(device, &lcu->active_devices, alias_list) { private = (struct dasd_eckd_private *) device->private; - spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); - if (private->uid.type != UA_BASE_DEVICE) { - spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), - flags); + if (private->uid.type != UA_BASE_DEVICE) continue; - } - spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); dasd_schedule_block_bh(device->block); dasd_schedule_device_bh(device); } list_for_each_entry(device, &lcu->inactive_devices, alias_list) { private = (struct dasd_eckd_private *) device->private; - spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); - if (private->uid.type != UA_BASE_DEVICE) { - spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), - flags); + if (private->uid.type != UA_BASE_DEVICE) continue; - } - spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); dasd_schedule_block_bh(device->block); dasd_schedule_device_bh(device); } @@ -841,38 +941,20 @@ static void flush_all_alias_devices_on_lcu(struct alias_lcu *lcu) spin_unlock_irqrestore(&lcu->lock, flags); } -static void __stop_device_on_lcu(struct dasd_device *device, - struct dasd_device *pos) -{ - /* If pos == device then device is already locked! */ - if (pos == device) { - dasd_device_set_stop_bits(pos, DASD_STOPPED_SU); - return; - } - spin_lock(get_ccwdev_lock(pos->cdev)); - dasd_device_set_stop_bits(pos, DASD_STOPPED_SU); - spin_unlock(get_ccwdev_lock(pos->cdev)); -} - -/* - * This function is called in interrupt context, so the - * cdev lock for device is already locked! - */ -static void _stop_all_devices_on_lcu(struct alias_lcu *lcu, - struct dasd_device *device) +static void _stop_all_devices_on_lcu(struct alias_lcu *lcu) { struct alias_pav_group *pavgroup; - struct dasd_device *pos; + struct dasd_device *device; - list_for_each_entry(pos, &lcu->active_devices, alias_list) - __stop_device_on_lcu(device, pos); - list_for_each_entry(pos, &lcu->inactive_devices, alias_list) - __stop_device_on_lcu(device, pos); + list_for_each_entry(device, &lcu->active_devices, alias_list) + dasd_device_set_stop_bits(device, DASD_STOPPED_SU); + list_for_each_entry(device, &lcu->inactive_devices, alias_list) + dasd_device_set_stop_bits(device, DASD_STOPPED_SU); list_for_each_entry(pavgroup, &lcu->grouplist, group) { - list_for_each_entry(pos, &pavgroup->baselist, alias_list) - __stop_device_on_lcu(device, pos); - list_for_each_entry(pos, &pavgroup->aliaslist, alias_list) - __stop_device_on_lcu(device, pos); + list_for_each_entry(device, &pavgroup->baselist, alias_list) + dasd_device_set_stop_bits(device, DASD_STOPPED_SU); + list_for_each_entry(device, &pavgroup->aliaslist, alias_list) + dasd_device_set_stop_bits(device, DASD_STOPPED_SU); } } @@ -880,33 +962,16 @@ static void _unstop_all_devices_on_lcu(struct alias_lcu *lcu) { struct alias_pav_group *pavgroup; struct dasd_device *device; - unsigned long flags; - list_for_each_entry(device, &lcu->active_devices, alias_list) { - spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); + list_for_each_entry(device, &lcu->active_devices, alias_list) dasd_device_remove_stop_bits(device, DASD_STOPPED_SU); - spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); - } - - list_for_each_entry(device, &lcu->inactive_devices, alias_list) { - spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); + list_for_each_entry(device, &lcu->inactive_devices, alias_list) dasd_device_remove_stop_bits(device, DASD_STOPPED_SU); - spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); - } - list_for_each_entry(pavgroup, &lcu->grouplist, group) { - list_for_each_entry(device, &pavgroup->baselist, alias_list) { - spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); + list_for_each_entry(device, &pavgroup->baselist, alias_list) dasd_device_remove_stop_bits(device, DASD_STOPPED_SU); - spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), - flags); - } - list_for_each_entry(device, &pavgroup->aliaslist, alias_list) { - spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); + list_for_each_entry(device, &pavgroup->aliaslist, alias_list) dasd_device_remove_stop_bits(device, DASD_STOPPED_SU); - spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), - flags); - } } } @@ -932,13 +997,14 @@ static void summary_unit_check_handling_work(struct work_struct *work) spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); reset_summary_unit_check(lcu, device, suc_data->reason); - spin_lock_irqsave(&lcu->lock, flags); + _trylock_and_lock_lcu_irqsave(lcu, NULL, &flags); _unstop_all_devices_on_lcu(lcu); _restart_all_base_devices_on_lcu(lcu); /* 3. read new alias configuration */ _schedule_lcu_update(lcu, device); lcu->suc_data.device = NULL; dasd_put_device(device); + _unlock_all_devices_on_lcu(lcu, NULL, NULL); spin_unlock_irqrestore(&lcu->lock, flags); } @@ -974,10 +1040,7 @@ void dasd_alias_handle_summary_unit_check(struct dasd_device *device, " unit check (no lcu structure)"); return; } - spin_lock(&lcu->lock); - _stop_all_devices_on_lcu(lcu, device); - /* prepare for lcu_update */ - private->lcu->flags |= NEED_UAC_UPDATE | UPDATE_PENDING; + _trylock_and_lock_lcu(lcu, device); /* If this device is about to be removed just return and wait for * the next interrupt on a different device */ @@ -985,6 +1048,7 @@ void dasd_alias_handle_summary_unit_check(struct dasd_device *device, DBF_DEV_EVENT(DBF_WARNING, device, "%s", "device is in offline processing," " don't do summary unit check handling"); + _unlock_all_devices_on_lcu(lcu, device, NULL); spin_unlock(&lcu->lock); return; } @@ -993,12 +1057,17 @@ void dasd_alias_handle_summary_unit_check(struct dasd_device *device, DBF_DEV_EVENT(DBF_WARNING, device, "%s", "previous instance of summary unit check worker" " still pending"); + _unlock_all_devices_on_lcu(lcu, device, NULL); spin_unlock(&lcu->lock); return ; } + _stop_all_devices_on_lcu(lcu); + /* prepare for lcu_update */ + private->lcu->flags |= NEED_UAC_UPDATE | UPDATE_PENDING; lcu->suc_data.reason = reason; lcu->suc_data.device = device; dasd_get_device(device); + _unlock_all_devices_on_lcu(lcu, device, NULL); spin_unlock(&lcu->lock); if (!schedule_work(&lcu->suc_data.worker)) dasd_put_device(device); -- cgit v1.1 From 3c2c126a675bd8a95d5ed186fe9cd788f438bff1 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 5 Feb 2016 15:37:22 +0100 Subject: s390/mm: remove unnecessary indirection with pgste_update_all The first parameter of pgste_update_all is a pointer to a pte. Simplify the code by passing the pte value. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 64ead80..21e95cd 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -680,15 +680,15 @@ static inline void pgste_set(pte_t *ptep, pgste_t pgste) #endif } -static inline pgste_t pgste_update_all(pte_t *ptep, pgste_t pgste, +static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste, struct mm_struct *mm) { #ifdef CONFIG_PGSTE unsigned long address, bits, skey; - if (!mm_use_skey(mm) || pte_val(*ptep) & _PAGE_INVALID) + if (!mm_use_skey(mm) || pte_val(pte) & _PAGE_INVALID) return pgste; - address = pte_val(*ptep) & PAGE_MASK; + address = pte_val(pte) & PAGE_MASK; skey = (unsigned long) page_get_storage_key(address); bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); /* Transfer page changed & referenced bit to guest bits in pgste */ @@ -1067,7 +1067,7 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { pgste_t pgste; - pte_t pte, oldpte; + pte_t pte; int young; if (mm_has_pgste(vma->vm_mm)) { @@ -1075,17 +1075,16 @@ static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, pgste = pgste_ipte_notify(vma->vm_mm, addr, ptep, pgste); } - oldpte = pte = *ptep; + pte = *ptep; ptep_flush_direct(vma->vm_mm, addr, ptep); young = pte_young(pte); - pte = pte_mkold(pte); if (mm_has_pgste(vma->vm_mm)) { - pgste = pgste_update_all(&oldpte, pgste, vma->vm_mm); - pgste = pgste_set_pte(ptep, pgste, pte); + pgste = pgste_update_all(pte, pgste, vma->vm_mm); + pgste = pgste_set_pte(ptep, pgste, pte_mkold(pte)); pgste_set_unlock(ptep, pgste); } else - *ptep = pte; + *ptep = pte_mkold(pte); return young; } @@ -1127,7 +1126,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, pte_val(*ptep) = _PAGE_INVALID; if (mm_has_pgste(mm)) { - pgste = pgste_update_all(&pte, pgste, mm); + pgste = pgste_update_all(pte, pgste, mm); pgste_set_unlock(ptep, pgste); } return pte; @@ -1150,7 +1149,7 @@ static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, ptep_flush_lazy(mm, address, ptep); if (mm_has_pgste(mm)) { - pgste = pgste_update_all(&pte, pgste, mm); + pgste = pgste_update_all(pte, pgste, mm); pgste_set(ptep, pgste); } return pte; @@ -1191,7 +1190,7 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) pte_val(pte) |= _PAGE_UNUSED; - pgste = pgste_update_all(&pte, pgste, vma->vm_mm); + pgste = pgste_update_all(pte, pgste, vma->vm_mm); pgste_set_unlock(ptep, pgste); } return pte; @@ -1223,7 +1222,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, pte_val(*ptep) = _PAGE_INVALID; if (!full && mm_has_pgste(mm)) { - pgste = pgste_update_all(&pte, pgste, mm); + pgste = pgste_update_all(pte, pgste, mm); pgste_set_unlock(ptep, pgste); } return pte; -- cgit v1.1 From 758d39ebd3d5666edb3b1c339f7f138c349ff8bf Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 9 Feb 2016 12:58:54 +0100 Subject: s390/dumpstack: merge all four stack tracers We have four different stack tracers of which three had bugs. So it's time to merge them to a single stack tracer which allows to specify a call back function which will be called for each step. This patch changes behavior a bit: - the "nosched" and "in_sched_functions" check within save_stack_trace_tsk did work only for the last stack frame within a context. Now it considers the check for each stack frame like it should. - both the oprofile variant and the perf_events variant did save a return address twice if a zero back chain was detected, which indicates an interrupt frame. The new dump_trace function will call the oprofile and perf_events backends with the psw address that is contained within the corresponding pt_regs structure instead. - the original show_trace and save_context_stack functions did already use the psw address of the pt_regs structure if a zero back chain was detected. However now we ignore the psw address if it is a user space address. After all we trace the kernel stack and not the user space stack. This way we also get rid of the garbage user space address in case of warnings and / or panic call traces. So this should make life easier since now there is only one stack tracer left which we can break. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/processor.h | 4 ++ arch/s390/kernel/dumpstack.c | 63 +++++++++++++++++------------ arch/s390/kernel/perf_event.c | 56 +++----------------------- arch/s390/kernel/stacktrace.c | 84 ++++++++++----------------------------- arch/s390/oprofile/Makefile | 2 +- arch/s390/oprofile/backtrace.c | 78 ------------------------------------ arch/s390/oprofile/init.c | 20 +++++++++- 7 files changed, 89 insertions(+), 218 deletions(-) delete mode 100644 arch/s390/oprofile/backtrace.c diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 073e18b..d6fd22e 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -184,6 +184,10 @@ struct task_struct; struct mm_struct; struct seq_file; +typedef int (*dump_trace_func_t)(void *data, unsigned long address); +void dump_trace(dump_trace_func_t func, void *data, + struct task_struct *task, unsigned long sp); + void show_cacheinfo(struct seq_file *m); /* Free all resources held by a thread. */ diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index e1c786d..2150b01 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -19,28 +19,28 @@ #include /* - * For show_trace we have tree different stack to consider: + * For dump_trace we have tree different stack to consider: * - the panic stack which is used if the kernel stack has overflown * - the asynchronous interrupt stack (cpu related) * - the synchronous kernel stack (process related) - * The stack trace can start at any of the three stack and can potentially + * The stack trace can start at any of the three stacks and can potentially * touch all of them. The order is: panic stack, async stack, sync stack. */ static unsigned long -__show_trace(unsigned long sp, unsigned long low, unsigned long high) +__dump_trace(dump_trace_func_t func, void *data, unsigned long sp, + unsigned long low, unsigned long high) { struct stack_frame *sf; struct pt_regs *regs; - unsigned long addr; while (1) { if (sp < low || sp > high - sizeof(*sf)) return sp; sf = (struct stack_frame *) sp; - addr = sf->gprs[8]; - printk("([<%016lx>] %pSR)\n", addr, (void *)addr); /* Follow the backchain. */ while (1) { + if (func(data, sf->gprs[8])) + return sp; low = sp; sp = sf->back_chain; if (!sp) @@ -48,45 +48,58 @@ __show_trace(unsigned long sp, unsigned long low, unsigned long high) if (sp <= low || sp > high - sizeof(*sf)) return sp; sf = (struct stack_frame *) sp; - addr = sf->gprs[8]; - printk(" [<%016lx>] %pSR\n", addr, (void *)addr); } /* Zero backchain detected, check for interrupt frame. */ sp = (unsigned long) (sf + 1); if (sp <= low || sp > high - sizeof(*regs)) return sp; regs = (struct pt_regs *) sp; - addr = regs->psw.addr; - printk(" [<%016lx>] %pSR\n", addr, (void *)addr); + if (!user_mode(regs)) { + if (func(data, regs->psw.addr)) + return sp; + } low = sp; sp = regs->gprs[15]; } } -static void show_trace(struct task_struct *task, unsigned long *stack) +void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task, + unsigned long sp) { - const unsigned long frame_size = - STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); - unsigned long sp; + unsigned long frame_size; - sp = (unsigned long) stack; - if (!sp) - sp = task ? task->thread.ksp : current_stack_pointer(); - printk("Call Trace:\n"); + frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); #ifdef CONFIG_CHECK_STACK - sp = __show_trace(sp, + sp = __dump_trace(func, data, sp, S390_lowcore.panic_stack + frame_size - 4096, S390_lowcore.panic_stack + frame_size); #endif - sp = __show_trace(sp, + sp = __dump_trace(func, data, sp, S390_lowcore.async_stack + frame_size - ASYNC_SIZE, S390_lowcore.async_stack + frame_size); if (task) - __show_trace(sp, (unsigned long) task_stack_page(task), - (unsigned long) task_stack_page(task) + THREAD_SIZE); + __dump_trace(func, data, sp, + (unsigned long)task_stack_page(task), + (unsigned long)task_stack_page(task) + THREAD_SIZE); else - __show_trace(sp, S390_lowcore.thread_info, + __dump_trace(func, data, sp, + S390_lowcore.thread_info, S390_lowcore.thread_info + THREAD_SIZE); +} +EXPORT_SYMBOL_GPL(dump_trace); + +static int show_address(void *data, unsigned long address) +{ + printk("([<%016lx>] %pSR)\n", address, (void *)address); + return 0; +} + +static void show_trace(struct task_struct *task, unsigned long sp) +{ + if (!sp) + sp = task ? task->thread.ksp : current_stack_pointer(); + printk("Call Trace:\n"); + dump_trace(show_address, NULL, task, sp); if (!task) task = current; debug_show_held_locks(task); @@ -112,7 +125,7 @@ void show_stack(struct task_struct *task, unsigned long *sp) printk("%016lx ", *stack++); } printk("\n"); - show_trace(task, sp); + show_trace(task, (unsigned long)sp); } static void show_last_breaking_event(struct pt_regs *regs) @@ -152,7 +165,7 @@ void show_regs(struct pt_regs *regs) show_registers(regs); /* Show stack backtrace if pt_regs is from kernel mode */ if (!user_mode(regs)) - show_trace(NULL, (unsigned long *) regs->gprs[15]); + show_trace(NULL, regs->gprs[15]); show_last_breaking_event(regs); } diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index 0943b11..a1b7086 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -222,64 +222,20 @@ static int __init service_level_perf_register(void) } arch_initcall(service_level_perf_register); -/* See also arch/s390/kernel/traps.c */ -static unsigned long __store_trace(struct perf_callchain_entry *entry, - unsigned long sp, - unsigned long low, unsigned long high) +static int __perf_callchain_kernel(void *data, unsigned long address) { - struct stack_frame *sf; - struct pt_regs *regs; - - while (1) { - if (sp < low || sp > high - sizeof(*sf)) - return sp; - sf = (struct stack_frame *) sp; - perf_callchain_store(entry, sf->gprs[8]); - /* Follow the backchain. */ - while (1) { - low = sp; - sp = sf->back_chain; - if (!sp) - break; - if (sp <= low || sp > high - sizeof(*sf)) - return sp; - sf = (struct stack_frame *) sp; - perf_callchain_store(entry, sf->gprs[8]); - } - /* Zero backchain detected, check for interrupt frame. */ - sp = (unsigned long) (sf + 1); - if (sp <= low || sp > high - sizeof(*regs)) - return sp; - regs = (struct pt_regs *) sp; - perf_callchain_store(entry, sf->gprs[8]); - low = sp; - sp = regs->gprs[15]; - } + struct perf_callchain_entry *entry = data; + + perf_callchain_store(entry, address); + return 0; } void perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { - unsigned long head, frame_size; - struct stack_frame *head_sf; - if (user_mode(regs)) return; - - frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); - head = regs->gprs[15]; - head_sf = (struct stack_frame *) head; - - if (!head_sf || !head_sf->back_chain) - return; - - head = head_sf->back_chain; - head = __store_trace(entry, head, - S390_lowcore.async_stack + frame_size - ASYNC_SIZE, - S390_lowcore.async_stack + frame_size); - - __store_trace(entry, head, S390_lowcore.thread_info, - S390_lowcore.thread_info + THREAD_SIZE); + dump_trace(__perf_callchain_kernel, entry, NULL, regs->gprs[15]); } /* Perf defintions for PMU event attributes in sysfs */ diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index 7dd8360..44f84b2 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -10,69 +10,31 @@ #include #include -static unsigned long save_context_stack(struct stack_trace *trace, - unsigned long sp, - unsigned long low, - unsigned long high, - int nosched) +static int __save_address(void *data, unsigned long address, int nosched) { - struct stack_frame *sf; - struct pt_regs *regs; - unsigned long addr; + struct stack_trace *trace = data; - while(1) { - if (sp < low || sp > high) - return sp; - sf = (struct stack_frame *)sp; - while(1) { - addr = sf->gprs[8]; - if (!trace->skip) - trace->entries[trace->nr_entries++] = addr; - else - trace->skip--; - if (trace->nr_entries >= trace->max_entries) - return sp; - low = sp; - sp = sf->back_chain; - if (!sp) - break; - if (sp <= low || sp > high - sizeof(*sf)) - return sp; - sf = (struct stack_frame *)sp; - } - /* Zero backchain detected, check for interrupt frame. */ - sp = (unsigned long)(sf + 1); - if (sp <= low || sp > high - sizeof(*regs)) - return sp; - regs = (struct pt_regs *)sp; - addr = regs->psw.addr; - if (!nosched || !in_sched_functions(addr)) { - if (!trace->skip) - trace->entries[trace->nr_entries++] = addr; - else - trace->skip--; - } - if (trace->nr_entries >= trace->max_entries) - return sp; - low = sp; - sp = regs->gprs[15]; + if (nosched && in_sched_functions(address)) + return 0; + if (trace->skip > 0) { + trace->skip--; + return 0; } + if (trace->nr_entries < trace->max_entries) { + trace->entries[trace->nr_entries++] = address; + return 0; + } + return 1; } -static void __save_stack_trace(struct stack_trace *trace, unsigned long sp) +static int save_address(void *data, unsigned long address) { - unsigned long new_sp, frame_size; + return __save_address(data, address, 0); +} - frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); - new_sp = save_context_stack(trace, sp, - S390_lowcore.panic_stack + frame_size - PAGE_SIZE, - S390_lowcore.panic_stack + frame_size, 0); - new_sp = save_context_stack(trace, new_sp, - S390_lowcore.async_stack + frame_size - ASYNC_SIZE, - S390_lowcore.async_stack + frame_size, 0); - save_context_stack(trace, new_sp, - S390_lowcore.thread_info, - S390_lowcore.thread_info + THREAD_SIZE, 0); +static int save_address_nosched(void *data, unsigned long address) +{ + return __save_address(data, address, 1); } void save_stack_trace(struct stack_trace *trace) @@ -80,7 +42,7 @@ void save_stack_trace(struct stack_trace *trace) unsigned long sp; sp = current_stack_pointer(); - __save_stack_trace(trace, sp); + dump_trace(save_address, trace, NULL, sp); if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; } @@ -88,14 +50,12 @@ EXPORT_SYMBOL_GPL(save_stack_trace); void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) { - unsigned long sp, low, high; + unsigned long sp; sp = tsk->thread.ksp; if (tsk == current) sp = current_stack_pointer(); - low = (unsigned long) task_stack_page(tsk); - high = (unsigned long) task_pt_regs(tsk); - save_context_stack(trace, sp, low, high, 1); + dump_trace(save_address_nosched, trace, tsk, sp); if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; } @@ -106,7 +66,7 @@ void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) unsigned long sp; sp = kernel_stack_pointer(regs); - __save_stack_trace(trace, sp); + dump_trace(save_address, trace, NULL, sp); if (trace->nr_entries < trace->max_entries) trace->entries[trace->nr_entries++] = ULONG_MAX; } diff --git a/arch/s390/oprofile/Makefile b/arch/s390/oprofile/Makefile index 1bd2301..496e4a7 100644 --- a/arch/s390/oprofile/Makefile +++ b/arch/s390/oprofile/Makefile @@ -6,5 +6,5 @@ DRIVER_OBJS = $(addprefix ../../../drivers/oprofile/, \ oprofilefs.o oprofile_stats.o \ timer_int.o ) -oprofile-y := $(DRIVER_OBJS) init.o backtrace.o +oprofile-y := $(DRIVER_OBJS) init.o oprofile-y += hwsampler.o diff --git a/arch/s390/oprofile/backtrace.c b/arch/s390/oprofile/backtrace.c deleted file mode 100644 index 1884e17..0000000 --- a/arch/s390/oprofile/backtrace.c +++ /dev/null @@ -1,78 +0,0 @@ -/* - * S390 Version - * Copyright IBM Corp. 2005 - * Author(s): Andreas Krebbel - */ - -#include - -#include /* for struct stack_frame */ - -static unsigned long -__show_trace(unsigned int *depth, unsigned long sp, - unsigned long low, unsigned long high) -{ - struct stack_frame *sf; - struct pt_regs *regs; - - while (*depth) { - if (sp < low || sp > high - sizeof(*sf)) - return sp; - sf = (struct stack_frame *) sp; - (*depth)--; - oprofile_add_trace(sf->gprs[8]); - - /* Follow the backchain. */ - while (*depth) { - low = sp; - sp = sf->back_chain; - if (!sp) - break; - if (sp <= low || sp > high - sizeof(*sf)) - return sp; - sf = (struct stack_frame *) sp; - (*depth)--; - oprofile_add_trace(sf->gprs[8]); - - } - - if (*depth == 0) - break; - - /* Zero backchain detected, check for interrupt frame. */ - sp = (unsigned long) (sf + 1); - if (sp <= low || sp > high - sizeof(*regs)) - return sp; - regs = (struct pt_regs *) sp; - (*depth)--; - oprofile_add_trace(sf->gprs[8]); - low = sp; - sp = regs->gprs[15]; - } - return sp; -} - -void s390_backtrace(struct pt_regs * const regs, unsigned int depth) -{ - unsigned long head, frame_size; - struct stack_frame* head_sf; - - if (user_mode(regs)) - return; - - frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs); - head = regs->gprs[15]; - head_sf = (struct stack_frame*)head; - - if (!head_sf->back_chain) - return; - - head = head_sf->back_chain; - - head = __show_trace(&depth, head, - S390_lowcore.async_stack + frame_size - ASYNC_SIZE, - S390_lowcore.async_stack + frame_size); - - __show_trace(&depth, head, S390_lowcore.thread_info, - S390_lowcore.thread_info + THREAD_SIZE); -} diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c index 9cfa2ff..d85d1e6 100644 --- a/arch/s390/oprofile/init.c +++ b/arch/s390/oprofile/init.c @@ -20,8 +20,6 @@ #include "../../../drivers/oprofile/oprof.h" -extern void s390_backtrace(struct pt_regs * const regs, unsigned int depth); - #include "hwsampler.h" #include "op_counter.h" @@ -494,6 +492,24 @@ static void oprofile_hwsampler_exit(void) hwsampler_shutdown(); } +static int __s390_backtrace(void *data, unsigned long address) +{ + unsigned int *depth = data; + + if (*depth == 0) + return 1; + (*depth)--; + oprofile_add_trace(address); + return 0; +} + +static void s390_backtrace(struct pt_regs *regs, unsigned int depth) +{ + if (user_mode(regs)) + return; + dump_trace(__s390_backtrace, &depth, NULL, regs->gprs[15]); +} + int __init oprofile_arch_init(struct oprofile_operations *ops) { ops->backtrace = s390_backtrace; -- cgit v1.1 From 13c6a790d77c3fe81e1a25524d1f9115b0da9404 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 10 Feb 2016 16:47:14 +0100 Subject: s390/mm: correct comment about segment table entries The comment describing the bit encoding for segment table entries is incorrect in regard to the read and write bits. The segment read bit is 0x0002 and write is 0x0001, not the other way around. Reported-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 21e95cd..7be9ae8 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -298,15 +298,15 @@ static inline int is_module_addr(void *addr) /* * Segment table entry encoding (R = read-only, I = invalid, y = young bit): - * dy..R...I...wr + * dy..R...I...rw * prot-none, clean, old 00..1...1...00 * prot-none, clean, young 01..1...1...00 * prot-none, dirty, old 10..1...1...00 * prot-none, dirty, young 11..1...1...00 - * read-only, clean, old 00..1...1...01 - * read-only, clean, young 01..1...0...01 - * read-only, dirty, old 10..1...1...01 - * read-only, dirty, young 11..1...0...01 + * read-only, clean, old 00..1...1...10 + * read-only, clean, young 01..1...0...10 + * read-only, dirty, old 10..1...1...10 + * read-only, dirty, young 11..1...0...10 * read-write, clean, old 00..1...1...11 * read-write, clean, young 01..1...0...11 * read-write, dirty, old 10..0...1...11 -- cgit v1.1 From bb3aa614014e11aea2c1aab528752e12b54df62d Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 19 Feb 2016 10:46:08 +0100 Subject: s390: add z13s model number to z13 elf platform Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/Kconfig | 8 ++++---- arch/s390/kernel/setup.c | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 3be9c83..683af90 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -254,12 +254,12 @@ config MARCH_ZEC12 older machines. config MARCH_Z13 - bool "IBM z13" + bool "IBM z13s and z13" select HAVE_MARCH_Z13_FEATURES help - Select this to enable optimizations for IBM z13 (2964 series). - The kernel will be slightly faster but will not work on older - machines. + Select this to enable optimizations for IBM z13s and z13 (2965 and + 2964 series). The kernel will be slightly faster but will not work on + older machines. endchoice diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 9220db5..b1fbcc0 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -779,6 +779,7 @@ static int __init setup_hwcaps(void) strcpy(elf_platform, "zEC12"); break; case 0x2964: + case 0x2965: strcpy(elf_platform, "z13"); break; } -- cgit v1.1 From 993e0681084c8e84dd870bffedec9410778dfa87 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 19 Feb 2016 10:47:48 +0100 Subject: s390/oprofile: add z13/z13s model numbers Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/oprofile/init.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c index d85d1e6..791935a 100644 --- a/arch/s390/oprofile/init.c +++ b/arch/s390/oprofile/init.c @@ -454,6 +454,7 @@ static int oprofile_hwsampler_init(struct oprofile_operations *ops) case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break; case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break; case 0x2827: case 0x2828: ops->cpu_type = "s390/zEC12"; break; + case 0x2964: case 0x2965: ops->cpu_type = "s390/z13"; break; default: return -ENODEV; } } -- cgit v1.1 From 443a813304ec36d4e81264b6a452a412a6b3ad9b Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 24 Feb 2016 10:18:50 +0100 Subject: s390/kvm: simplify set_guest_storage_key Git commit ab3f285f227fec62868037e9b1b1fd18294a83b8 "KVM: s390/mm: try a cow on read only pages for key ops" added a fixup_user_fault to set_guest_storage_key force a copy on write if the page is mapped read-only. This is supposed to fix the problem of differing storage keys for shared mappings, e.g. the empty_zero_page. But if the storage key is set before the pte is mapped the storage key update is done on the pgste. A later fault will happily map the shared page with the key from the pgste. Eventually git commit 2faee8ff9dc6f4bfe46f6d2d110add858140fb20 "s390/mm: prevent and break zero page mappings in case of storage keys" fixed this problem for the empty_zero_page. The commit makes sure that guests enabled for storage keys will not use the empty_zero_page at all. As the call to fixup_user_fault in set_guest_storage_key depends on the order of the storage key operation vs. the fault that maps the pte it does not really fix anything. Just remove it. Reviewed-by: Dominik Dingel Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pgtable.c | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 5109827..6acd717 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -809,30 +809,13 @@ int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, spinlock_t *ptl; pgste_t old, new; pte_t *ptep; - bool unlocked; down_read(&mm->mmap_sem); -retry: - unlocked = false; ptep = get_locked_pte(mm, addr, &ptl); if (unlikely(!ptep)) { up_read(&mm->mmap_sem); return -EFAULT; } - if (!(pte_val(*ptep) & _PAGE_INVALID) && - (pte_val(*ptep) & _PAGE_PROTECT)) { - pte_unmap_unlock(ptep, ptl); - /* - * We do not really care about unlocked. We will retry either - * way. But this allows fixup_user_fault to enable userfaultfd. - */ - if (fixup_user_fault(current, mm, addr, FAULT_FLAG_WRITE, - &unlocked)) { - up_read(&mm->mmap_sem); - return -EFAULT; - } - goto retry; - } new = old = pgste_get_lock(ptep); pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | -- cgit v1.1 From ee8479bb97dc790904e9afad503ae833e28ed8d3 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 25 Feb 2016 10:28:49 +0100 Subject: s390/dis: use correct escape sequence for '%' character The double escape character sequence introduced with commit 272fa59ccb4f ("s390/dis: Fix handling of format specifiers") is not necessary anymore since commit 561e10300269 ("s390/dis: Fix printing of the register numbers"). Instead this now generates an extra '%' character: lg %%r1,160(%%r11) So fix this and basically revert 272fa59ccb4f. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/dis.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 62973ef..8cb9bfd 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -1920,23 +1920,16 @@ static int print_insn(char *buffer, unsigned char *code, unsigned long addr) } if (separator) ptr += sprintf(ptr, "%c", separator); - /* - * Use four '%' characters below because of the - * following two conversions: - * - * 1) sprintf: %%%%r -> %%r - * 2) printk : %%r -> %r - */ if (operand->flags & OPERAND_GPR) - ptr += sprintf(ptr, "%%%%r%i", value); + ptr += sprintf(ptr, "%%r%i", value); else if (operand->flags & OPERAND_FPR) - ptr += sprintf(ptr, "%%%%f%i", value); + ptr += sprintf(ptr, "%%f%i", value); else if (operand->flags & OPERAND_AR) - ptr += sprintf(ptr, "%%%%a%i", value); + ptr += sprintf(ptr, "%%a%i", value); else if (operand->flags & OPERAND_CR) - ptr += sprintf(ptr, "%%%%c%i", value); + ptr += sprintf(ptr, "%%c%i", value); else if (operand->flags & OPERAND_VR) - ptr += sprintf(ptr, "%%%%v%i", value); + ptr += sprintf(ptr, "%%v%i", value); else if (operand->flags & OPERAND_PCREL) ptr += sprintf(ptr, "%lx", (signed int) value + addr); -- cgit v1.1 From 5d7eccecf8621e3cf5adcec9cf80aa444b4610d4 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 24 Feb 2016 14:27:46 +0100 Subject: s390/fault: merge report_user_fault implementations We have two close to identical report_user_fault functions. Add a parameter to one and get rid of the other one in order to reduce code duplication. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/setup.h | 2 ++ arch/s390/kernel/traps.c | 21 ++------------------- arch/s390/mm/fault.c | 15 ++++++++------- 3 files changed, 12 insertions(+), 26 deletions(-) diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index 6983722..c0f0efb 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -101,6 +101,8 @@ extern void pfault_fini(void); #define pfault_fini() do { } while (0) #endif /* CONFIG_PFAULT */ +void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault); + extern void cmma_init(void); extern void (*_machine_restart)(char *command); diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 017eb03d..dd97a3e 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -22,8 +22,6 @@ #include #include "entry.h" -int show_unhandled_signals = 1; - static inline void __user *get_trap_ip(struct pt_regs *regs) { unsigned long address; @@ -35,21 +33,6 @@ static inline void __user *get_trap_ip(struct pt_regs *regs) return (void __user *) (address - (regs->int_code >> 16)); } -static inline void report_user_fault(struct pt_regs *regs, int signr) -{ - if ((task_pid_nr(current) > 1) && !show_unhandled_signals) - return; - if (!unhandled_signal(current, signr)) - return; - if (!printk_ratelimit()) - return; - printk("User process fault: interruption code %04x ilc:%d ", - regs->int_code & 0xffff, regs->int_code >> 17); - print_vma_addr("in ", regs->psw.addr); - printk("\n"); - show_regs(regs); -} - int is_valid_bugaddr(unsigned long addr) { return 1; @@ -65,7 +48,7 @@ void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str) info.si_code = si_code; info.si_addr = get_trap_ip(regs); force_sig_info(si_signo, &info, current); - report_user_fault(regs, si_signo); + report_user_fault(regs, si_signo, 0); } else { const struct exception_table_entry *fixup; fixup = search_exception_tables(regs->psw.addr); @@ -111,7 +94,7 @@ NOKPROBE_SYMBOL(do_per_trap); void default_trap_handler(struct pt_regs *regs) { if (user_mode(regs)) { - report_user_fault(regs, SIGSEGV); + report_user_fault(regs, SIGSEGV, 0); do_exit(SIGSEGV); } else die(regs, "Unknown program exception"); diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 791a414..64b3ad1 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -183,6 +183,8 @@ static void dump_fault_info(struct pt_regs *regs) { unsigned long asce; + pr_alert("Failing address: %016lx TEID: %016lx\n", + regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long); pr_alert("Fault in "); switch (regs->int_parm_long & 3) { case 3: @@ -218,7 +220,9 @@ static void dump_fault_info(struct pt_regs *regs) dump_pagetable(asce, regs->int_parm_long & __FAIL_ADDR_MASK); } -static inline void report_user_fault(struct pt_regs *regs, long signr) +int show_unhandled_signals = 1; + +void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault) { if ((task_pid_nr(current) > 1) && !show_unhandled_signals) return; @@ -230,9 +234,8 @@ static inline void report_user_fault(struct pt_regs *regs, long signr) regs->int_code & 0xffff, regs->int_code >> 17); print_vma_addr(KERN_CONT "in ", regs->psw.addr); printk(KERN_CONT "\n"); - printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n", - regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long); - dump_fault_info(regs); + if (is_mm_fault) + dump_fault_info(regs); show_regs(regs); } @@ -244,7 +247,7 @@ static noinline void do_sigsegv(struct pt_regs *regs, int si_code) { struct siginfo si; - report_user_fault(regs, SIGSEGV); + report_user_fault(regs, SIGSEGV, 1); si.si_signo = SIGSEGV; si.si_code = si_code; si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK); @@ -272,8 +275,6 @@ static noinline void do_no_context(struct pt_regs *regs) else printk(KERN_ALERT "Unable to handle kernel paging request" " in virtual user address space\n"); - printk(KERN_ALERT "failing address: %016lx TEID: %016lx\n", - regs->int_parm_long & __FAIL_ADDR_MASK, regs->int_parm_long); dump_fault_info(regs); die(regs, "Oops"); do_exit(SIGKILL); -- cgit v1.1 From b1685ab9bd3ae14830acac8ffdc7aafc0fb416e3 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 29 Feb 2016 16:05:35 +0100 Subject: s390/cpumf: Improve guest detection heuristics commit e22cf8ca6f75 ("s390/cpumf: rework program parameter setting to detect guest samples") requires guest changes to get proper guest/host. We can do better: We can use the primary asn value, which is set on all Linux variants to compare this with the host pp value. We now have the following cases: 1. Guest using PP host sample: gpp == 0, asn == hpp --> host guest sample: gpp != 0 --> guest 2. Guest not using PP host sample: gpp == 0, asn == hpp --> host guest sample: gpp == 0, asn != hpp --> guest As soon as the host no longer sets CR4, we must back out this heuristics - let's add a comment in switch_to. Signed-off-by: Christian Borntraeger Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry.S | 1 + arch/s390/kernel/perf_cpum_sf.c | 9 ++++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index cd5a191..4ba688c 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -186,6 +186,7 @@ ENTRY(__switch_to) stg %r5,__LC_THREAD_INFO # store thread info of next stg %r15,__LC_KERNEL_STACK # store end of kernel stack lg %r15,__THREAD_ksp(%r1) # load kernel stack of next + /* c4 is used in guest detection: arch/s390/kernel/perf_cpum_sf.c */ lctl %c4,%c4,__TASK_pid(%r3) # load pid to control reg. 4 mvc __LC_CURRENT_PID(4,%r0),__TASK_pid(%r3) # store pid of next lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 3d8da1e..1a43474 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -1022,10 +1022,13 @@ static int perf_push_sample(struct perf_event *event, struct sf_raw_sample *sfr) /* * A non-zero guest program parameter indicates a guest * sample. - * Note that some early samples might be misaccounted to - * the host. + * Note that some early samples or samples from guests without + * lpp usage would be misaccounted to the host. We use the asn + * value as a heuristic to detect most of these guest samples. + * If the value differs from the host hpp value, we assume + * it to be a KVM guest. */ - if (sfr->basic.gpp) + if (sfr->basic.gpp || sfr->basic.prim_asn != (u16) sfr->basic.hpp) sde_regs->in_guest = 1; overflow = 0; -- cgit v1.1 From f369b98ee30df4b4eaf57e2aab081529c2fad6d9 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 2 Mar 2016 10:14:08 +0100 Subject: s390/percpu: remove this_cpu_cmpxchg_double_4 git commit 26f15caaf993 ("s390/cmpxchg: simplify cmpxchg_double") removed support for cmpxchg_double for two consecutive four byte values, for which it would generate a cds instruction. However I forgot to remove the corresponding define in our percpu header file, which means that this_cpu_cmpxchg_double would now incorrectly generate a cdsg instruction if being used on a double four byte location. Therefore remove the percpu define as well. There is currently no user and therefore no bug fixed with this. Obviously any such user could and should simply use cmpxchg. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/percpu.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/s390/include/asm/percpu.h b/arch/s390/include/asm/percpu.h index 6d6556c..90240df 100644 --- a/arch/s390/include/asm/percpu.h +++ b/arch/s390/include/asm/percpu.h @@ -178,7 +178,6 @@ ret__; \ }) -#define this_cpu_cmpxchg_double_4 arch_this_cpu_cmpxchg_double #define this_cpu_cmpxchg_double_8 arch_this_cpu_cmpxchg_double #include -- cgit v1.1 From 46d1c03c82a717735dddc7a47f321abaccbcec78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=C3=B6ppner?= Date: Wed, 14 Oct 2015 17:01:04 +0200 Subject: s390/dasd: Improve dasd format code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Make sure a calling function can rely on data in fdata by resetting to its initial values - Move special treatment for track 0 and 1 to dasd_eckd_build_format - Replace dangerous backward goto with a loop logic - Add define for number that specifies the maximum amount of CCWs per request and is used for format_step calculation - Remove unused variable Signed-off-by: Jan Höppner Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd_eckd.c | 110 +++++++++++++++++++---------------------- drivers/s390/block/dasd_int.h | 7 +++ 2 files changed, 59 insertions(+), 58 deletions(-) diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 9083247..8181d67 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -2349,14 +2349,14 @@ dasd_eckd_build_format(struct dasd_device *base, * when formatting CDL */ if ((intensity & 0x08) && - fdata->start_unit == 0) { + address.cyl == 0 && address.head == 0) { if (i < 3) { ect->kl = 4; ect->dl = sizes_trk0[i] - 4; } } if ((intensity & 0x08) && - fdata->start_unit == 1) { + address.cyl == 0 && address.head == 1) { ect->kl = 44; ect->dl = LABEL_SIZE - 44; } @@ -2392,14 +2392,13 @@ dasd_eckd_format_device(struct dasd_device *base, int enable_pav) { struct dasd_ccw_req *cqr, *n; - struct dasd_block *block; struct dasd_eckd_private *private; struct list_head format_queue; struct dasd_device *device; - int old_stop, format_step; - int step, rc = 0, sleep_rc; + int old_start, old_stop, format_step; + int step, retry; + int rc = 0; - block = base->block; private = (struct dasd_eckd_private *) base->private; /* Sanity checks. */ @@ -2432,68 +2431,63 @@ dasd_eckd_format_device(struct dasd_device *base, INIT_LIST_HEAD(&format_queue); + old_start = fdata->start_unit; old_stop = fdata->stop_unit; - while (fdata->start_unit <= 1) { - fdata->stop_unit = fdata->start_unit; - cqr = dasd_eckd_build_format(base, fdata, enable_pav); - list_add(&cqr->blocklist, &format_queue); - - fdata->stop_unit = old_stop; - fdata->start_unit++; - if (fdata->start_unit > fdata->stop_unit) - goto sleep; - } + format_step = DASD_CQR_MAX_CCW / recs_per_track(&private->rdc_data, 0, + fdata->blksize); + do { + retry = 0; + while (fdata->start_unit <= old_stop) { + step = fdata->stop_unit - fdata->start_unit + 1; + if (step > format_step) { + fdata->stop_unit = + fdata->start_unit + format_step - 1; + } -retry: - format_step = 255 / recs_per_track(&private->rdc_data, 0, - fdata->blksize); - while (fdata->start_unit <= old_stop) { - step = fdata->stop_unit - fdata->start_unit + 1; - if (step > format_step) - fdata->stop_unit = fdata->start_unit + format_step - 1; + cqr = dasd_eckd_build_format(base, fdata, enable_pav); + if (IS_ERR(cqr)) { + rc = PTR_ERR(cqr); + if (rc == -ENOMEM) { + if (list_empty(&format_queue)) + goto out; + /* + * not enough memory available, start + * requests retry after first requests + * were finished + */ + retry = 1; + break; + } + goto out_err; + } + list_add_tail(&cqr->blocklist, &format_queue); - cqr = dasd_eckd_build_format(base, fdata, enable_pav); - if (IS_ERR(cqr)) { - if (PTR_ERR(cqr) == -ENOMEM) { - /* - * not enough memory available - * go to out and start requests - * retry after first requests were finished - */ - fdata->stop_unit = old_stop; - goto sleep; - } else - return PTR_ERR(cqr); + fdata->start_unit = fdata->stop_unit + 1; + fdata->stop_unit = old_stop; } - list_add(&cqr->blocklist, &format_queue); - fdata->start_unit = fdata->stop_unit + 1; - fdata->stop_unit = old_stop; - } + rc = dasd_sleep_on_queue(&format_queue); -sleep: - sleep_rc = dasd_sleep_on_queue(&format_queue); +out_err: + list_for_each_entry_safe(cqr, n, &format_queue, blocklist) { + device = cqr->startdev; + private = (struct dasd_eckd_private *) device->private; + if (cqr->status == DASD_CQR_FAILED) + rc = -EIO; + list_del_init(&cqr->blocklist); + dasd_sfree_request(cqr, device); + private->count--; + } - list_for_each_entry_safe(cqr, n, &format_queue, blocklist) { - device = cqr->startdev; - private = (struct dasd_eckd_private *) device->private; - if (cqr->status == DASD_CQR_FAILED) - rc = -EIO; - list_del_init(&cqr->blocklist); - dasd_sfree_request(cqr, device); - private->count--; - } + if (rc) + goto out; - if (sleep_rc) - return sleep_rc; + } while (retry); - /* - * in case of ENOMEM we need to retry after - * first requests are finished - */ - if (fdata->start_unit <= fdata->stop_unit) - goto retry; +out: + fdata->start_unit = old_start; + fdata->stop_unit = old_stop; return rc; } diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index 4aed5ed..ffd7723 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -241,6 +241,13 @@ struct dasd_ccw_req { typedef struct dasd_ccw_req *(*dasd_erp_fn_t) (struct dasd_ccw_req *); /* + * A single CQR can only contain a maximum of 255 CCWs. It is limited by + * the locate record and locate record extended count value which can only hold + * 1 Byte max. + */ +#define DASD_CQR_MAX_CCW 255 + +/* * Unique identifier for dasd device. */ #define UA_NOT_CONFIGURED 0x00 -- cgit v1.1 From 8542885bf5338bfccf779868a5d143620e794ff9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=C3=B6ppner?= Date: Mon, 21 Sep 2015 17:32:11 +0200 Subject: s390/dasd: Simplify code in format logic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, dasd_format is calling the format logic of a DASD discipline with PAV enabled. If that fails with an error code of -EAGAIN the value of retries is decremented and the discipline function is called with PAV turned off. The loop is supposed to try this up to 255 times until success. However, -EAGAIN can only occur once here and therefore the loop will never reach the 255 retries. So, replace the unnecessarily complicated loop logic and simply try again without PAV enabled in case of an -EAGAIN error. Signed-off-by: Jan Höppner Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd_ioctl.c | 33 ++++++--------------------------- 1 file changed, 6 insertions(+), 27 deletions(-) diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c index 02837d0..16bb5ec 100644 --- a/drivers/s390/block/dasd_ioctl.c +++ b/drivers/s390/block/dasd_ioctl.c @@ -203,9 +203,7 @@ static int dasd_format(struct dasd_block *block, struct format_data_t *fdata) { struct dasd_device *base; - int enable_pav = 1; - int rc, retries; - int start, stop; + int rc; base = block->base; if (base->discipline->format_device == NULL) @@ -233,30 +231,11 @@ dasd_format(struct dasd_block *block, struct format_data_t *fdata) bdput(bdev); } - retries = 255; - /* backup start- and endtrack for retries */ - start = fdata->start_unit; - stop = fdata->stop_unit; - do { - rc = base->discipline->format_device(base, fdata, enable_pav); - if (rc) { - if (rc == -EAGAIN) { - retries--; - /* disable PAV in case of errors */ - enable_pav = 0; - fdata->start_unit = start; - fdata->stop_unit = stop; - } else - return rc; - } else - /* success */ - break; - } while (retries); - - if (!retries) - return -EIO; - else - return 0; + rc = base->discipline->format_device(base, fdata, 1); + if (rc == -EAGAIN) + rc = base->discipline->format_device(base, fdata, 0); + + return rc; } /* -- cgit v1.1 From 570d237c19f8d0f1dfd214767f2d995b8291138a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=C3=B6ppner?= Date: Wed, 19 Aug 2015 11:16:35 +0200 Subject: s390/dasd: Refactor dasd format functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Prepare for new format checking function by renaming functions and moving reusable code to separate functions: - Move sanity checks into a new function and make it reusable. - Move common format code to a new function called dasd_eckd_format_process_data. - Create the generic function dasd_eckd_format_build_ccw_req, which itself will then decide what ccw request is being built according to the input data. (with upcoming functionality). Signed-off-by: Jan Höppner Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd_eckd.c | 58 +++++++++++++++++++++++++++++++++--------- 1 file changed, 46 insertions(+), 12 deletions(-) diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index 8181d67..e679209 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -2386,22 +2386,26 @@ dasd_eckd_build_format(struct dasd_device *base, return fcp; } -static int -dasd_eckd_format_device(struct dasd_device *base, - struct format_data_t *fdata, - int enable_pav) +/* + * Wrapper function to build a CCW request depending on input data + */ +static struct dasd_ccw_req * +dasd_eckd_format_build_ccw_req(struct dasd_device *base, + struct format_data_t *fdata, int enable_pav) +{ + return dasd_eckd_build_format(base, fdata, enable_pav); +} + +/* + * Sanity checks on format_data + */ +static int dasd_eckd_format_sanity_checks(struct dasd_device *base, + struct format_data_t *fdata) { - struct dasd_ccw_req *cqr, *n; struct dasd_eckd_private *private; - struct list_head format_queue; - struct dasd_device *device; - int old_start, old_stop, format_step; - int step, retry; - int rc = 0; private = (struct dasd_eckd_private *) base->private; - /* Sanity checks. */ if (fdata->start_unit >= (private->real_cyl * private->rdc_data.trk_per_cyl)) { dev_warn(&base->cdev->dev, @@ -2428,6 +2432,29 @@ dasd_eckd_format_device(struct dasd_device *base, fdata->blksize); return -EINVAL; } + return 0; +} + +/* + * This function will process format_data originally coming from an IOCTL + */ +static int dasd_eckd_format_process_data(struct dasd_device *base, + struct format_data_t *fdata, + int enable_pav) +{ + struct dasd_ccw_req *cqr, *n; + struct dasd_eckd_private *private; + struct list_head format_queue; + struct dasd_device *device; + int old_start, old_stop, format_step; + int step, retry; + int rc; + + private = (struct dasd_eckd_private *) base->private; + + rc = dasd_eckd_format_sanity_checks(base, fdata); + if (rc) + return rc; INIT_LIST_HEAD(&format_queue); @@ -2445,7 +2472,8 @@ dasd_eckd_format_device(struct dasd_device *base, fdata->start_unit + format_step - 1; } - cqr = dasd_eckd_build_format(base, fdata, enable_pav); + cqr = dasd_eckd_format_build_ccw_req(base, fdata, + enable_pav); if (IS_ERR(cqr)) { rc = PTR_ERR(cqr); if (rc == -ENOMEM) { @@ -2492,6 +2520,12 @@ out: return rc; } +static int dasd_eckd_format_device(struct dasd_device *base, + struct format_data_t *fdata, int enable_pav) +{ + return dasd_eckd_format_process_data(base, fdata, enable_pav); +} + static void dasd_eckd_handle_terminated_request(struct dasd_ccw_req *cqr) { if (cqr->retries < 0) { -- cgit v1.1 From 543691a4e1e040300ce6598a6ce6527d3144e5db Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Fri, 4 Mar 2016 10:34:05 +0100 Subject: s390/dasd: remove casts to dasd_*_private Convert dasd_device.private to be a void pointer to get rid of a lot of explicit casts. Signed-off-by: Sebastian Ott Reviewed-by: Stefan Haberland Acked-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- drivers/s390/block/dasd_alias.c | 55 +++++--------- drivers/s390/block/dasd_diag.c | 41 +++++----- drivers/s390/block/dasd_eckd.c | 161 ++++++++++++++-------------------------- drivers/s390/block/dasd_fba.c | 28 +++---- drivers/s390/block/dasd_int.h | 2 +- 5 files changed, 106 insertions(+), 181 deletions(-) diff --git a/drivers/s390/block/dasd_alias.c b/drivers/s390/block/dasd_alias.c index cbbdd3e..17ad574 100644 --- a/drivers/s390/block/dasd_alias.c +++ b/drivers/s390/block/dasd_alias.c @@ -185,14 +185,12 @@ static void _free_lcu(struct alias_lcu *lcu) */ int dasd_alias_make_device_known_to_lcu(struct dasd_device *device) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; unsigned long flags; struct alias_server *server, *newserver; struct alias_lcu *lcu, *newlcu; struct dasd_uid uid; - private = (struct dasd_eckd_private *) device->private; - device->discipline->get_uid(device, &uid); spin_lock_irqsave(&aliastree.lock, flags); server = _find_server(&uid); @@ -244,14 +242,13 @@ int dasd_alias_make_device_known_to_lcu(struct dasd_device *device) */ void dasd_alias_disconnect_device_from_lcu(struct dasd_device *device) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; unsigned long flags; struct alias_lcu *lcu; struct alias_server *server; int was_pending; struct dasd_uid uid; - private = (struct dasd_eckd_private *) device->private; lcu = private->lcu; /* nothing to do if already disconnected */ if (!lcu) @@ -316,12 +313,10 @@ static int _add_device_to_lcu(struct alias_lcu *lcu, struct dasd_device *pos) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; struct alias_pav_group *group; struct dasd_uid uid; - private = (struct dasd_eckd_private *) device->private; - private->uid.type = lcu->uac->unit[private->uid.real_unit_addr].ua_type; private->uid.base_unit_addr = lcu->uac->unit[private->uid.real_unit_addr].base_ua; @@ -362,10 +357,9 @@ static int _add_device_to_lcu(struct alias_lcu *lcu, static void _remove_device_from_lcu(struct alias_lcu *lcu, struct dasd_device *device) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; struct alias_pav_group *group; - private = (struct dasd_eckd_private *) device->private; list_move(&device->alias_list, &lcu->inactive_devices); group = private->pavgroup; if (!group) @@ -603,13 +597,13 @@ static int _lcu_update(struct dasd_device *refdev, struct alias_lcu *lcu) list_for_each_entry_safe(device, tempdev, &pavgroup->baselist, alias_list) { list_move(&device->alias_list, &lcu->active_devices); - private = (struct dasd_eckd_private *) device->private; + private = device->private; private->pavgroup = NULL; } list_for_each_entry_safe(device, tempdev, &pavgroup->aliaslist, alias_list) { list_move(&device->alias_list, &lcu->active_devices); - private = (struct dasd_eckd_private *) device->private; + private = device->private; private->pavgroup = NULL; } list_del(&pavgroup->group); @@ -721,12 +715,11 @@ static int _schedule_lcu_update(struct alias_lcu *lcu, int dasd_alias_add_device(struct dasd_device *device) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; struct alias_lcu *lcu; unsigned long flags; int rc; - private = (struct dasd_eckd_private *) device->private; lcu = private->lcu; rc = 0; spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); @@ -747,20 +740,18 @@ int dasd_alias_add_device(struct dasd_device *device) int dasd_alias_update_add_device(struct dasd_device *device) { - struct dasd_eckd_private *private; - private = (struct dasd_eckd_private *) device->private; + struct dasd_eckd_private *private = device->private; + private->lcu->flags |= UPDATE_PENDING; return dasd_alias_add_device(device); } int dasd_alias_remove_device(struct dasd_device *device) { - struct dasd_eckd_private *private; - struct alias_lcu *lcu; + struct dasd_eckd_private *private = device->private; + struct alias_lcu *lcu = private->lcu; unsigned long flags; - private = (struct dasd_eckd_private *) device->private; - lcu = private->lcu; /* nothing to do if already removed */ if (!lcu) return 0; @@ -772,16 +763,12 @@ int dasd_alias_remove_device(struct dasd_device *device) struct dasd_device *dasd_alias_get_start_dev(struct dasd_device *base_device) { - + struct dasd_eckd_private *alias_priv, *private = base_device->private; + struct alias_pav_group *group = private->pavgroup; + struct alias_lcu *lcu = private->lcu; struct dasd_device *alias_device; - struct alias_pav_group *group; - struct alias_lcu *lcu; - struct dasd_eckd_private *private, *alias_priv; unsigned long flags; - private = (struct dasd_eckd_private *) base_device->private; - group = private->pavgroup; - lcu = private->lcu; if (!group || !lcu) return NULL; if (lcu->pav == NO_PAV || @@ -817,7 +804,7 @@ struct dasd_device *dasd_alias_get_start_dev(struct dasd_device *base_device) group->next = list_first_entry(&alias_device->alias_list, struct dasd_device, alias_list); spin_unlock_irqrestore(&lcu->lock, flags); - alias_priv = (struct dasd_eckd_private *) alias_device->private; + alias_priv = alias_device->private; if ((alias_priv->count < private->count) && !alias_device->stopped && !test_bit(DASD_FLAG_OFFLINE, &alias_device->flags)) return alias_device; @@ -868,14 +855,14 @@ static void _restart_all_base_devices_on_lcu(struct alias_lcu *lcu) /* active and inactive list can contain alias as well as base devices */ list_for_each_entry(device, &lcu->active_devices, alias_list) { - private = (struct dasd_eckd_private *) device->private; + private = device->private; if (private->uid.type != UA_BASE_DEVICE) continue; dasd_schedule_block_bh(device->block); dasd_schedule_device_bh(device); } list_for_each_entry(device, &lcu->inactive_devices, alias_list) { - private = (struct dasd_eckd_private *) device->private; + private = device->private; if (private->uid.type != UA_BASE_DEVICE) continue; dasd_schedule_block_bh(device->block); @@ -912,7 +899,7 @@ static void flush_all_alias_devices_on_lcu(struct alias_lcu *lcu) spin_lock_irqsave(&lcu->lock, flags); list_for_each_entry_safe(device, temp, &lcu->active_devices, alias_list) { - private = (struct dasd_eckd_private *) device->private; + private = device->private; if (private->uid.type == UA_BASE_DEVICE) continue; list_move(&device->alias_list, &active); @@ -934,7 +921,7 @@ static void flush_all_alias_devices_on_lcu(struct alias_lcu *lcu) if (device == list_first_entry(&active, struct dasd_device, alias_list)) { list_move(&device->alias_list, &lcu->active_devices); - private = (struct dasd_eckd_private *) device->private; + private = device->private; private->pavgroup = NULL; } } @@ -1014,13 +1001,11 @@ static void summary_unit_check_handling_work(struct work_struct *work) void dasd_alias_handle_summary_unit_check(struct dasd_device *device, struct irb *irb) { + struct dasd_eckd_private *private = device->private; struct alias_lcu *lcu; char reason; - struct dasd_eckd_private *private; char *sense; - private = (struct dasd_eckd_private *) device->private; - sense = dasd_get_sense(irb); if (sense) { reason = sense[8]; diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c index cb61f30..38c5c6f 100644 --- a/drivers/s390/block/dasd_diag.c +++ b/drivers/s390/block/dasd_diag.c @@ -99,12 +99,10 @@ static inline int mdsk_init_io(struct dasd_device *device, unsigned int blocksize, blocknum_t offset, blocknum_t *end_block) { - struct dasd_diag_private *private; - struct dasd_diag_init_io *iib; + struct dasd_diag_private *private = device->private; + struct dasd_diag_init_io *iib = &private->iib; int rc; - private = (struct dasd_diag_private *) device->private; - iib = &private->iib; memset(iib, 0, sizeof (struct dasd_diag_init_io)); iib->dev_nr = private->dev_id.devno; @@ -125,12 +123,10 @@ mdsk_init_io(struct dasd_device *device, unsigned int blocksize, static inline int mdsk_term_io(struct dasd_device * device) { - struct dasd_diag_private *private; - struct dasd_diag_init_io *iib; + struct dasd_diag_private *private = device->private; + struct dasd_diag_init_io *iib = &private->iib; int rc; - private = (struct dasd_diag_private *) device->private; - iib = &private->iib; memset(iib, 0, sizeof (struct dasd_diag_init_io)); iib->dev_nr = private->dev_id.devno; rc = dia250(iib, TERM_BIO); @@ -175,8 +171,8 @@ dasd_start_diag(struct dasd_ccw_req * cqr) cqr->status = DASD_CQR_ERROR; return -EIO; } - private = (struct dasd_diag_private *) device->private; - dreq = (struct dasd_diag_req *) cqr->data; + private = device->private; + dreq = cqr->data; private->iob.dev_nr = private->dev_id.devno; private->iob.key = 0; @@ -315,18 +311,17 @@ static void dasd_ext_handler(struct ext_code ext_code, static int dasd_diag_check_device(struct dasd_device *device) { - struct dasd_block *block; - struct dasd_diag_private *private; + struct dasd_diag_private *private = device->private; struct dasd_diag_characteristics *rdc_data; - struct dasd_diag_bio bio; struct vtoc_cms_label *label; - blocknum_t end_block; + struct dasd_block *block; + struct dasd_diag_bio bio; unsigned int sb, bsize; + blocknum_t end_block; int rc; - private = (struct dasd_diag_private *) device->private; if (private == NULL) { - private = kzalloc(sizeof(struct dasd_diag_private),GFP_KERNEL); + private = kzalloc(sizeof(*private), GFP_KERNEL); if (private == NULL) { DBF_DEV_EVENT(DBF_WARNING, device, "%s", "Allocating memory for private DASD data " @@ -334,7 +329,7 @@ dasd_diag_check_device(struct dasd_device *device) return -ENOMEM; } ccw_device_get_id(device->cdev, &private->dev_id); - device->private = (void *) private; + device->private = private; } block = dasd_alloc_block(); if (IS_ERR(block)) { @@ -348,7 +343,7 @@ dasd_diag_check_device(struct dasd_device *device) block->base = device; /* Read Device Characteristics */ - rdc_data = (void *) &(private->rdc_data); + rdc_data = &private->rdc_data; rdc_data->dev_nr = private->dev_id.devno; rdc_data->rdc_len = sizeof (struct dasd_diag_characteristics); @@ -596,16 +591,14 @@ static int dasd_diag_fill_info(struct dasd_device * device, struct dasd_information2_t * info) { - struct dasd_diag_private *private; + struct dasd_diag_private *private = device->private; - private = (struct dasd_diag_private *) device->private; info->label_block = (unsigned int) private->pt_block; info->FBA_layout = 1; info->format = DASD_FORMAT_LDL; - info->characteristics_size = sizeof (struct dasd_diag_characteristics); - memcpy(info->characteristics, - &((struct dasd_diag_private *) device->private)->rdc_data, - sizeof (struct dasd_diag_characteristics)); + info->characteristics_size = sizeof(private->rdc_data); + memcpy(info->characteristics, &private->rdc_data, + sizeof(private->rdc_data)); info->confdata_size = 0; return 0; } diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c index e679209..75c032d 100644 --- a/drivers/s390/block/dasd_eckd.c +++ b/drivers/s390/block/dasd_eckd.c @@ -212,10 +212,9 @@ check_XRC (struct ccw1 *de_ccw, struct DE_eckd_data *data, struct dasd_device *device) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; int rc; - private = (struct dasd_eckd_private *) device->private; if (!private->rdc_data.facilities.XRC_supported) return 0; @@ -237,13 +236,11 @@ static int define_extent(struct ccw1 *ccw, struct DE_eckd_data *data, unsigned int trk, unsigned int totrk, int cmd, struct dasd_device *device) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; u32 begcyl, endcyl; u16 heads, beghead, endhead; int rc = 0; - private = (struct dasd_eckd_private *) device->private; - ccw->cmd_code = DASD_ECKD_CCW_DEFINE_EXTENT; ccw->flags = 0; ccw->count = 16; @@ -322,10 +319,9 @@ define_extent(struct ccw1 *ccw, struct DE_eckd_data *data, unsigned int trk, static int check_XRC_on_prefix(struct PFX_eckd_data *pfxdata, struct dasd_device *device) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; int rc; - private = (struct dasd_eckd_private *) device->private; if (!private->rdc_data.facilities.XRC_supported) return 0; @@ -346,12 +342,10 @@ static void fill_LRE_data(struct LRE_eckd_data *data, unsigned int trk, struct dasd_device *device, unsigned int reclen, unsigned int tlf) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; int sector; int dn, d; - private = (struct dasd_eckd_private *) device->private; - memset(data, 0, sizeof(*data)); sector = 0; if (rec_on_trk) { @@ -488,8 +482,8 @@ static int prefix_LRE(struct ccw1 *ccw, struct PFX_eckd_data *pfxdata, u16 heads, beghead, endhead; int rc = 0; - basepriv = (struct dasd_eckd_private *) basedev->private; - startpriv = (struct dasd_eckd_private *) startdev->private; + basepriv = basedev->private; + startpriv = startdev->private; dedata = &pfxdata->define_extent; lredata = &pfxdata->locate_record; @@ -631,12 +625,10 @@ locate_record(struct ccw1 *ccw, struct LO_eckd_data *data, unsigned int trk, unsigned int rec_on_trk, int no_rec, int cmd, struct dasd_device * device, int reclen) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; int sector; int dn, d; - private = (struct dasd_eckd_private *) device->private; - DBF_DEV_EVENT(DBF_INFO, device, "Locate: trk %d, rec %d, no_rec %d, cmd %d, reclen %d", trk, rec_on_trk, no_rec, cmd, reclen); @@ -800,10 +792,9 @@ static void create_uid(struct dasd_eckd_private *private) */ static int dasd_eckd_generate_uid(struct dasd_device *device) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; unsigned long flags; - private = (struct dasd_eckd_private *) device->private; if (!private) return -ENODEV; if (!private->ned || !private->gneq) @@ -816,11 +807,10 @@ static int dasd_eckd_generate_uid(struct dasd_device *device) static int dasd_eckd_get_uid(struct dasd_device *device, struct dasd_uid *uid) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; unsigned long flags; - if (device->private) { - private = (struct dasd_eckd_private *)device->private; + if (private) { spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); *uid = private->uid; spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); @@ -1034,10 +1024,9 @@ static unsigned char dasd_eckd_path_access(void *conf_data, int conf_len) static void dasd_eckd_clear_conf_data(struct dasd_device *device) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; int i; - private = (struct dasd_eckd_private *) device->private; private->conf_data = NULL; private->conf_len = 0; for (i = 0; i < 8; i++) { @@ -1058,7 +1047,7 @@ static int dasd_eckd_read_conf(struct dasd_device *device) struct dasd_uid *uid; char print_path_uid[60], print_device_uid[60]; - private = (struct dasd_eckd_private *) device->private; + private = device->private; path_data = &device->path_data; opm = ccw_device_get_path_mask(device->cdev); conf_data_saved = 0; @@ -1191,11 +1180,10 @@ static int dasd_eckd_read_conf(struct dasd_device *device) static int verify_fcx_max_data(struct dasd_device *device, __u8 lpm) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; int mdc; u32 fcx_max_data; - private = (struct dasd_eckd_private *) device->private; if (private->fcx_max_data) { mdc = ccw_device_get_mdc(device->cdev, lpm); if ((mdc < 0)) { @@ -1221,15 +1209,10 @@ static int verify_fcx_max_data(struct dasd_device *device, __u8 lpm) static int rebuild_device_uid(struct dasd_device *device, struct path_verification_work_data *data) { - struct dasd_eckd_private *private; - struct dasd_path *path_data; - __u8 lpm, opm; - int rc; - - rc = -ENODEV; - private = (struct dasd_eckd_private *) device->private; - path_data = &device->path_data; - opm = device->path_data.opm; + struct dasd_eckd_private *private = device->private; + struct dasd_path *path_data = &device->path_data; + __u8 lpm, opm = path_data->opm; + int rc = -ENODEV; for (lpm = 0x80; lpm; lpm >>= 1) { if (!(lpm & opm)) @@ -1463,14 +1446,13 @@ static int dasd_eckd_verify_path(struct dasd_device *device, __u8 lpm) static int dasd_eckd_read_features(struct dasd_device *device) { + struct dasd_eckd_private *private = device->private; struct dasd_psf_prssd_data *prssdp; struct dasd_rssd_features *features; struct dasd_ccw_req *cqr; struct ccw1 *ccw; int rc; - struct dasd_eckd_private *private; - private = (struct dasd_eckd_private *) device->private; memset(&private->features, 0, sizeof(struct dasd_rssd_features)); cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* PSF */ + 1 /* RSSD */, (sizeof(struct dasd_psf_prssd_data) + @@ -1605,11 +1587,9 @@ dasd_eckd_psf_ssc(struct dasd_device *device, int enable_pav, static int dasd_eckd_validate_server(struct dasd_device *device, unsigned long flags) { - int rc; - struct dasd_eckd_private *private; - int enable_pav; + struct dasd_eckd_private *private = device->private; + int enable_pav, rc; - private = (struct dasd_eckd_private *) device->private; if (private->uid.type == UA_BASE_PAV_ALIAS || private->uid.type == UA_HYPER_PAV_ALIAS) return 0; @@ -1662,14 +1642,13 @@ static void dasd_eckd_kick_validate_server(struct dasd_device *device) static u32 get_fcx_max_data(struct dasd_device *device) { - int tpm, mdc; + struct dasd_eckd_private *private = device->private; int fcx_in_css, fcx_in_gneq, fcx_in_features; - struct dasd_eckd_private *private; + int tpm, mdc; if (dasd_nofcx) return 0; /* is transport mode supported? */ - private = (struct dasd_eckd_private *) device->private; fcx_in_css = css_general_characteristics.fcx; fcx_in_gneq = private->gneq->reserved2[7] & 0x04; fcx_in_features = private->features.feature[40] & 0x80; @@ -1694,7 +1673,7 @@ static u32 get_fcx_max_data(struct dasd_device *device) static int dasd_eckd_check_characteristics(struct dasd_device *device) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; struct dasd_block *block; struct dasd_uid temp_uid; int rc, i; @@ -1713,7 +1692,6 @@ dasd_eckd_check_characteristics(struct dasd_device *device) dev_info(&device->cdev->dev, "The DASD is not operating in multipath mode\n"); } - private = (struct dasd_eckd_private *) device->private; if (!private) { private = kzalloc(sizeof(*private), GFP_KERNEL | GFP_DMA); if (!private) { @@ -1722,7 +1700,7 @@ dasd_eckd_check_characteristics(struct dasd_device *device) "failed\n"); return -ENOMEM; } - device->private = (void *) private; + device->private = private; } else { memset(private, 0, sizeof(*private)); } @@ -1837,10 +1815,9 @@ out_err1: static void dasd_eckd_uncheck_device(struct dasd_device *device) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; int i; - private = (struct dasd_eckd_private *) device->private; dasd_alias_disconnect_device_from_lcu(device); private->ned = NULL; private->sneq = NULL; @@ -1863,7 +1840,7 @@ static void dasd_eckd_uncheck_device(struct dasd_device *device) static struct dasd_ccw_req * dasd_eckd_analysis_ccw(struct dasd_device *device) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; struct eckd_count *count_data; struct LO_eckd_data *LO_data; struct dasd_ccw_req *cqr; @@ -1871,8 +1848,6 @@ dasd_eckd_analysis_ccw(struct dasd_device *device) int cplength, datasize; int i; - private = (struct dasd_eckd_private *) device->private; - cplength = 8; datasize = sizeof(struct DE_eckd_data) + 2*sizeof(struct LO_eckd_data); cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, cplength, datasize, device); @@ -1946,11 +1921,9 @@ static int dasd_eckd_analysis_evaluation(struct dasd_ccw_req *init_cqr) static void dasd_eckd_analysis_callback(struct dasd_ccw_req *init_cqr, void *data) { - struct dasd_eckd_private *private; - struct dasd_device *device; + struct dasd_device *device = init_cqr->startdev; + struct dasd_eckd_private *private = device->private; - device = init_cqr->startdev; - private = (struct dasd_eckd_private *) device->private; private->init_cqr_status = dasd_eckd_analysis_evaluation(init_cqr); dasd_sfree_request(init_cqr, device); dasd_kick_device(device); @@ -1977,15 +1950,13 @@ static int dasd_eckd_start_analysis(struct dasd_block *block) static int dasd_eckd_end_analysis(struct dasd_block *block) { - struct dasd_device *device; - struct dasd_eckd_private *private; + struct dasd_device *device = block->base; + struct dasd_eckd_private *private = device->private; struct eckd_count *count_area; unsigned int sb, blk_per_trk; int status, i; struct dasd_ccw_req *init_cqr; - device = block->base; - private = (struct dasd_eckd_private *) device->private; status = private->init_cqr_status; private->init_cqr_status = -1; if (status == INIT_CQR_ERROR) { @@ -2083,9 +2054,8 @@ raw: static int dasd_eckd_do_analysis(struct dasd_block *block) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = block->base->private; - private = (struct dasd_eckd_private *) block->base->private; if (private->init_cqr_status < 0) return dasd_eckd_start_analysis(block); else @@ -2112,9 +2082,8 @@ static int dasd_eckd_basic_to_known(struct dasd_device *device) static int dasd_eckd_fill_geometry(struct dasd_block *block, struct hd_geometry *geo) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = block->base->private; - private = (struct dasd_eckd_private *) block->base->private; if (dasd_check_blocksize(block->bp_block) == 0) { geo->sectors = recs_per_track(&private->rdc_data, 0, block->bp_block); @@ -2151,8 +2120,8 @@ dasd_eckd_build_format(struct dasd_device *base, if (!startdev) startdev = base; - start_priv = (struct dasd_eckd_private *) startdev->private; - base_priv = (struct dasd_eckd_private *) base->private; + start_priv = startdev->private; + base_priv = base->private; rpt = recs_per_track(&base_priv->rdc_data, 0, fdata->blksize); @@ -2402,9 +2371,7 @@ dasd_eckd_format_build_ccw_req(struct dasd_device *base, static int dasd_eckd_format_sanity_checks(struct dasd_device *base, struct format_data_t *fdata) { - struct dasd_eckd_private *private; - - private = (struct dasd_eckd_private *) base->private; + struct dasd_eckd_private *private = base->private; if (fdata->start_unit >= (private->real_cyl * private->rdc_data.trk_per_cyl)) { @@ -2442,16 +2409,14 @@ static int dasd_eckd_format_process_data(struct dasd_device *base, struct format_data_t *fdata, int enable_pav) { + struct dasd_eckd_private *private = base->private; struct dasd_ccw_req *cqr, *n; - struct dasd_eckd_private *private; struct list_head format_queue; struct dasd_device *device; int old_start, old_stop, format_step; int step, retry; int rc; - private = (struct dasd_eckd_private *) base->private; - rc = dasd_eckd_format_sanity_checks(base, fdata); if (rc) return rc; @@ -2500,7 +2465,7 @@ static int dasd_eckd_format_process_data(struct dasd_device *base, out_err: list_for_each_entry_safe(cqr, n, &format_queue, blocklist) { device = cqr->startdev; - private = (struct dasd_eckd_private *) device->private; + private = device->private; if (cqr->status == DASD_CQR_FAILED) rc = -EIO; list_del_init(&cqr->blocklist); @@ -2571,9 +2536,8 @@ static void dasd_eckd_check_for_device_change(struct dasd_device *device, { char mask; char *sense = NULL; - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; - private = (struct dasd_eckd_private *) device->private; /* first of all check for state change pending interrupt */ mask = DEV_STAT_ATTENTION | DEV_STAT_DEV_END | DEV_STAT_UNIT_EXCEP; if ((scsw_dstat(&irb->scsw) & mask) == mask) { @@ -2662,7 +2626,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp_cmd_single( struct dasd_device *basedev; basedev = block->base; - private = (struct dasd_eckd_private *) basedev->private; + private = basedev->private; if (rq_data_dir(req) == READ) cmd = DASD_ECKD_CCW_READ_MT; else if (rq_data_dir(req) == WRITE) @@ -3018,8 +2982,8 @@ static int prepare_itcw(struct itcw *itcw, /* setup prefix data */ - basepriv = (struct dasd_eckd_private *) basedev->private; - startpriv = (struct dasd_eckd_private *) startdev->private; + basepriv = basedev->private; + startpriv = startdev->private; dedata = &pfxdata.define_extent; lredata = &pfxdata.locate_record; @@ -3306,7 +3270,7 @@ static struct dasd_ccw_req *dasd_eckd_build_cp(struct dasd_device *startdev, struct dasd_ccw_req *cqr; basedev = block->base; - private = (struct dasd_eckd_private *) basedev->private; + private = basedev->private; /* Calculate number of blocks/records per track. */ blksize = block->bp_block; @@ -3531,7 +3495,7 @@ dasd_eckd_free_cp(struct dasd_ccw_req *cqr, struct request *req) if (!dasd_page_cache) goto out; - private = (struct dasd_eckd_private *) cqr->block->base->private; + private = cqr->block->base->private; blksize = cqr->block->bp_block; blk_per_trk = recs_per_track(&private->rdc_data, 0, blksize); recid = blk_rq_pos(req) >> cqr->block->s2b_shift; @@ -3615,7 +3579,7 @@ static struct dasd_ccw_req *dasd_eckd_build_alias_cp(struct dasd_device *base, startdev = dasd_alias_get_start_dev(base); if (!startdev) startdev = base; - private = (struct dasd_eckd_private *) startdev->private; + private = startdev->private; if (private->count >= DASD_ECKD_CHANQ_MAX_SIZE) return ERR_PTR(-EBUSY); @@ -3638,7 +3602,7 @@ static int dasd_eckd_free_alias_cp(struct dasd_ccw_req *cqr, unsigned long flags; spin_lock_irqsave(get_ccwdev_lock(cqr->memdev->cdev), flags); - private = (struct dasd_eckd_private *) cqr->memdev->private; + private = cqr->memdev->private; private->count--; spin_unlock_irqrestore(get_ccwdev_lock(cqr->memdev->cdev), flags); return dasd_eckd_free_cp(cqr, req); @@ -3648,15 +3612,14 @@ static int dasd_eckd_fill_info(struct dasd_device * device, struct dasd_information2_t * info) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; - private = (struct dasd_eckd_private *) device->private; info->label_block = 2; info->FBA_layout = private->uses_cdl ? 0 : 1; info->format = private->uses_cdl ? DASD_FORMAT_CDL : DASD_FORMAT_LDL; - info->characteristics_size = sizeof(struct dasd_eckd_characteristics); + info->characteristics_size = sizeof(private->rdc_data); memcpy(info->characteristics, &private->rdc_data, - sizeof(struct dasd_eckd_characteristics)); + sizeof(private->rdc_data)); info->confdata_size = min((unsigned long)private->conf_len, sizeof(info->configuration_data)); memcpy(info->configuration_data, private->conf_data, @@ -3969,8 +3932,7 @@ dasd_eckd_performance(struct dasd_device *device, void __user *argp) static int dasd_eckd_get_attrib(struct dasd_device *device, void __user *argp) { - struct dasd_eckd_private *private = - (struct dasd_eckd_private *)device->private; + struct dasd_eckd_private *private = device->private; struct attrib_data_t attrib = private->attrib; int rc; @@ -3994,8 +3956,7 @@ dasd_eckd_get_attrib(struct dasd_device *device, void __user *argp) static int dasd_eckd_set_attrib(struct dasd_device *device, void __user *argp) { - struct dasd_eckd_private *private = - (struct dasd_eckd_private *)device->private; + struct dasd_eckd_private *private = device->private; struct attrib_data_t attrib; if (!capable(CAP_SYS_ADMIN)) @@ -4458,15 +4419,13 @@ static int dasd_eckd_pm_freeze(struct dasd_device *device) static int dasd_eckd_restore_device(struct dasd_device *device) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; struct dasd_eckd_characteristics temp_rdc_data; int rc; struct dasd_uid temp_uid; unsigned long flags; unsigned long cqr_flags = 0; - private = (struct dasd_eckd_private *) device->private; - /* Read Configuration Data */ rc = dasd_eckd_read_conf(device); if (rc) { @@ -4530,14 +4489,12 @@ out_err: static int dasd_eckd_reload_device(struct dasd_device *device) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; int rc, old_base; char print_uid[60]; struct dasd_uid uid; unsigned long flags; - private = (struct dasd_eckd_private *) device->private; - spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags); old_base = private->uid.base_unit_addr; spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags); @@ -4584,12 +4541,10 @@ static int dasd_eckd_read_message_buffer(struct dasd_device *device, { struct dasd_rssd_messages *message_buf; struct dasd_psf_prssd_data *prssdp; - struct dasd_eckd_private *private; struct dasd_ccw_req *cqr; struct ccw1 *ccw; int rc; - private = (struct dasd_eckd_private *) device->private; cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* PSF */ + 1 /* RSSD */, (sizeof(struct dasd_psf_prssd_data) + sizeof(struct dasd_rssd_messages)), @@ -4714,11 +4669,10 @@ static struct dasd_conf_data *dasd_eckd_get_ref_conf(struct dasd_device *device, __u8 lpum, struct dasd_cuir_message *cuir) { - struct dasd_eckd_private *private; + struct dasd_eckd_private *private = device->private; struct dasd_conf_data *conf_data; int path, pos; - private = (struct dasd_eckd_private *) device->private; if (cuir->record_selector == 0) goto out; for (path = 0x80, pos = 0; path; path >>= 1, pos++) { @@ -4743,9 +4697,9 @@ out: static int dasd_eckd_cuir_scope(struct dasd_device *device, __u8 lpum, struct dasd_cuir_message *cuir) { + struct dasd_eckd_private *private = device->private; struct dasd_conf_data *ref_conf_data; unsigned long bitmask = 0, mask = 0; - struct dasd_eckd_private *private; struct dasd_conf_data *conf_data; unsigned int pos, path; char *ref_gneq, *gneq; @@ -4758,7 +4712,6 @@ static int dasd_eckd_cuir_scope(struct dasd_device *device, __u8 lpum, !(cuir->neq_map[0] | cuir->neq_map[1] | cuir->neq_map[2])) return lpum; - private = (struct dasd_eckd_private *) device->private; /* get reference conf data */ ref_conf_data = dasd_eckd_get_ref_conf(device, lpum, cuir); /* reference ned is determined by ned_map field */ @@ -4857,14 +4810,13 @@ static int dasd_eckd_cuir_quiesce(struct dasd_device *device, __u8 lpum, struct subchannel_id sch_id, struct dasd_cuir_message *cuir) { + struct dasd_eckd_private *private = device->private; struct alias_pav_group *pavgroup, *tempgroup; - struct dasd_eckd_private *private; struct dasd_device *dev, *n; unsigned long paths = 0; unsigned long flags; int tbcpm; - private = (struct dasd_eckd_private *) device->private; /* active devices */ list_for_each_entry_safe(dev, n, &private->lcu->active_devices, alias_list) { @@ -4920,13 +4872,12 @@ static int dasd_eckd_cuir_resume(struct dasd_device *device, __u8 lpum, struct subchannel_id sch_id, struct dasd_cuir_message *cuir) { + struct dasd_eckd_private *private = device->private; struct alias_pav_group *pavgroup, *tempgroup; - struct dasd_eckd_private *private; struct dasd_device *dev, *n; unsigned long paths = 0; int tbcpm; - private = (struct dasd_eckd_private *) device->private; /* * the path may have been added through a generic path event before * only trigger path verification if the path is not already in use diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c index c9262e7..d7b5b55 100644 --- a/drivers/s390/block/dasd_fba.c +++ b/drivers/s390/block/dasd_fba.c @@ -125,13 +125,11 @@ locate_record(struct ccw1 * ccw, struct LO_fba_data *data, int rw, static int dasd_fba_check_characteristics(struct dasd_device *device) { - struct dasd_block *block; - struct dasd_fba_private *private; + struct dasd_fba_private *private = device->private; struct ccw_device *cdev = device->cdev; - int rc; - int readonly; + struct dasd_block *block; + int readonly, rc; - private = (struct dasd_fba_private *) device->private; if (!private) { private = kzalloc(sizeof(*private), GFP_KERNEL | GFP_DMA); if (!private) { @@ -140,7 +138,7 @@ dasd_fba_check_characteristics(struct dasd_device *device) "data failed\n"); return -ENOMEM; } - device->private = (void *) private; + device->private = private; } else { memset(private, 0, sizeof(*private)); } @@ -192,10 +190,9 @@ dasd_fba_check_characteristics(struct dasd_device *device) static int dasd_fba_do_analysis(struct dasd_block *block) { - struct dasd_fba_private *private; + struct dasd_fba_private *private = block->base->private; int sb, rc; - private = (struct dasd_fba_private *) block->base->private; rc = dasd_check_blocksize(private->rdc_data.blk_size); if (rc) { DBF_DEV_EVENT(DBF_WARNING, block->base, "unknown blocksize %d", @@ -254,7 +251,7 @@ static struct dasd_ccw_req *dasd_fba_build_cp(struct dasd_device * memdev, struct dasd_block *block, struct request *req) { - struct dasd_fba_private *private; + struct dasd_fba_private *private = block->base->private; unsigned long *idaws; struct LO_fba_data *LO_data; struct dasd_ccw_req *cqr; @@ -267,7 +264,6 @@ static struct dasd_ccw_req *dasd_fba_build_cp(struct dasd_device * memdev, unsigned int blksize, off; unsigned char cmd; - private = (struct dasd_fba_private *) block->base->private; if (rq_data_dir(req) == READ) { cmd = DASD_FBA_CCW_READ; } else if (rq_data_dir(req) == WRITE) { @@ -379,7 +375,7 @@ static struct dasd_ccw_req *dasd_fba_build_cp(struct dasd_device * memdev, static int dasd_fba_free_cp(struct dasd_ccw_req *cqr, struct request *req) { - struct dasd_fba_private *private; + struct dasd_fba_private *private = cqr->block->base->private; struct ccw1 *ccw; struct req_iterator iter; struct bio_vec bv; @@ -389,7 +385,6 @@ dasd_fba_free_cp(struct dasd_ccw_req *cqr, struct request *req) if (!dasd_page_cache) goto out; - private = (struct dasd_fba_private *) cqr->block->base->private; blksize = cqr->block->bp_block; ccw = cqr->cpaddr; /* Skip over define extent & locate record. */ @@ -436,13 +431,14 @@ static int dasd_fba_fill_info(struct dasd_device * device, struct dasd_information2_t * info) { + struct dasd_fba_private *private = device->private; + info->label_block = 1; info->FBA_layout = 1; info->format = DASD_FORMAT_LDL; - info->characteristics_size = sizeof(struct dasd_fba_characteristics); - memcpy(info->characteristics, - &((struct dasd_fba_private *) device->private)->rdc_data, - sizeof (struct dasd_fba_characteristics)); + info->characteristics_size = sizeof(private->rdc_data); + memcpy(info->characteristics, &private->rdc_data, + sizeof(private->rdc_data)); info->confdata_size = 0; return 0; } diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h index ffd7723..8de29be 100644 --- a/drivers/s390/block/dasd_int.h +++ b/drivers/s390/block/dasd_int.h @@ -445,7 +445,7 @@ struct dasd_device { /* Device discipline stuff. */ struct dasd_discipline *discipline; struct dasd_discipline *base_discipline; - char *private; + void *private; struct dasd_path path_data; /* Device state and target state. */ -- cgit v1.1 From baebc70a4db86515d55ff1f226088a8e7f5821a0 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 3 Mar 2016 20:49:57 -0800 Subject: s390: Use pr_warn instead of pr_warning Convert the uses of pr_warning to pr_warn so there are fewer uses of the old pr_warning. Miscellanea: o Align arguments o Coalesce formats Signed-off-by: Joe Perches Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/cpcmd.c | 3 +-- arch/s390/kernel/debug.c | 6 ++---- arch/s390/kernel/time.c | 6 ++---- arch/s390/mm/extmem.c | 16 +++++++--------- drivers/s390/block/dasd_devmap.c | 10 ++++------ drivers/s390/block/dasd_diag.c | 28 +++++++++++++--------------- drivers/s390/block/dasd_genhd.c | 4 ++-- drivers/s390/block/dasd_ioctl.c | 5 ++--- drivers/s390/block/dasd_proc.c | 5 ++--- drivers/s390/block/dcssblk.c | 13 ++++++------- drivers/s390/char/monreader.c | 10 +++++----- drivers/s390/char/sclp_cmd.c | 27 ++++++++++++--------------- drivers/s390/char/sclp_cpi_sys.c | 6 ++---- drivers/s390/char/tape_core.c | 4 ++-- drivers/s390/char/vmlogrdr.c | 6 ++---- drivers/s390/cio/blacklist.c | 9 ++++----- drivers/s390/cio/ccwreq.c | 13 ++++++------- drivers/s390/cio/cio.c | 2 +- drivers/s390/cio/device.c | 23 ++++++++++------------- drivers/s390/net/lcs.c | 4 ++-- drivers/s390/net/qeth_l3_main.c | 2 +- 21 files changed, 88 insertions(+), 114 deletions(-) diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c index 7f76891..7f48e56 100644 --- a/arch/s390/kernel/cpcmd.c +++ b/arch/s390/kernel/cpcmd.c @@ -96,8 +96,7 @@ int cpcmd(const char *cmd, char *response, int rlen, int *response_code) (((unsigned long)response + rlen) >> 31)) { lowbuf = kmalloc(rlen, GFP_KERNEL | GFP_DMA); if (!lowbuf) { - pr_warning("The cpcmd kernel function failed to " - "allocate a response buffer\n"); + pr_warn("The cpcmd kernel function failed to allocate a response buffer\n"); return -ENOMEM; } spin_lock_irqsave(&cpcmd_lock, flags); diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index c890a55..aa12de7 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -699,8 +699,7 @@ debug_info_t *debug_register_mode(const char *name, int pages_per_area, /* Since debugfs currently does not support uid/gid other than root, */ /* we do not allow gid/uid != 0 until we get support for that. */ if ((uid != 0) || (gid != 0)) - pr_warning("Root becomes the owner of all s390dbf files " - "in sysfs\n"); + pr_warn("Root becomes the owner of all s390dbf files in sysfs\n"); BUG_ON(!initialized); mutex_lock(&debug_mutex); @@ -1307,8 +1306,7 @@ debug_input_level_fn(debug_info_t * id, struct debug_view *view, new_level = debug_get_uint(str); } if(new_level < 0) { - pr_warning("%s is not a valid level for a debug " - "feature\n", str); + pr_warn("%s is not a valid level for a debug feature\n", str); rc = -EINVAL; } else { debug_set_level(id, new_level); diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index 99f84ac31..c4e5f18 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -499,8 +499,7 @@ static void etr_reset(void) if (etr_port0_online && etr_port1_online) set_bit(CLOCK_SYNC_ETR, &clock_sync_flags); } else if (etr_port0_online || etr_port1_online) { - pr_warning("The real or virtual hardware system does " - "not provide an ETR interface\n"); + pr_warn("The real or virtual hardware system does not provide an ETR interface\n"); etr_port0_online = etr_port1_online = 0; } } @@ -1464,8 +1463,7 @@ static void __init stp_reset(void) if (rc == 0) set_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags); else if (stp_online) { - pr_warning("The real or virtual hardware system does " - "not provide an STP interface\n"); + pr_warn("The real or virtual hardware system does not provide an STP interface\n"); free_page((unsigned long) stp_page); stp_page = NULL; stp_online = 0; diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index a1bf4ad..02042b6 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -265,7 +265,7 @@ query_segment_type (struct dcss_segment *seg) goto out_free; } if (diag_cc > 1) { - pr_warning("Querying a DCSS type failed with rc=%ld\n", vmrc); + pr_warn("Querying a DCSS type failed with rc=%ld\n", vmrc); rc = dcss_diag_translate_rc (vmrc); goto out_free; } @@ -457,8 +457,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long goto out_resource; } if (diag_cc > 1) { - pr_warning("Loading DCSS %s failed with rc=%ld\n", name, - end_addr); + pr_warn("Loading DCSS %s failed with rc=%ld\n", name, end_addr); rc = dcss_diag_translate_rc(end_addr); dcss_diag(&purgeseg_scode, seg->dcss_name, &dummy, &dummy); @@ -574,8 +573,7 @@ segment_modify_shared (char *name, int do_nonshared) goto out_unlock; } if (atomic_read (&seg->ref_count) != 1) { - pr_warning("DCSS %s is in use and cannot be reloaded\n", - name); + pr_warn("DCSS %s is in use and cannot be reloaded\n", name); rc = -EAGAIN; goto out_unlock; } @@ -588,8 +586,8 @@ segment_modify_shared (char *name, int do_nonshared) seg->res->flags |= IORESOURCE_READONLY; if (request_resource(&iomem_resource, seg->res)) { - pr_warning("DCSS %s overlaps with used memory resources " - "and cannot be reloaded\n", name); + pr_warn("DCSS %s overlaps with used memory resources and cannot be reloaded\n", + name); rc = -EBUSY; kfree(seg->res); goto out_del_mem; @@ -607,8 +605,8 @@ segment_modify_shared (char *name, int do_nonshared) goto out_del_res; } if (diag_cc > 1) { - pr_warning("Reloading DCSS %s failed with rc=%ld\n", name, - end_addr); + pr_warn("Reloading DCSS %s failed with rc=%ld\n", + name, end_addr); rc = dcss_diag_translate_rc(end_addr); goto out_del_res; } diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c index 8286f74..2f18f61 100644 --- a/drivers/s390/block/dasd_devmap.c +++ b/drivers/s390/block/dasd_devmap.c @@ -214,8 +214,8 @@ dasd_feature_list(char *str, char **endp) else if (len == 8 && !strncmp(str, "failfast", 8)) features |= DASD_FEATURE_FAILFAST; else { - pr_warning("%*s is not a supported device option\n", - len, str); + pr_warn("%*s is not a supported device option\n", + len, str); rc = -EINVAL; } str += len; @@ -224,8 +224,7 @@ dasd_feature_list(char *str, char **endp) str++; } if (*str != ')') { - pr_warning("A closing parenthesis ')' is missing in the " - "dasd= parameter\n"); + pr_warn("A closing parenthesis ')' is missing in the dasd= parameter\n"); rc = -EINVAL; } else str++; @@ -348,8 +347,7 @@ dasd_parse_range( char *parsestring ) { return str + 1; if (*str == '\0') return str; - pr_warning("The dasd= parameter value %s has an invalid ending\n", - str); + pr_warn("The dasd= parameter value %s has an invalid ending\n", str); return ERR_PTR(-EINVAL); } diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c index 38c5c6f..bb2d26a 100644 --- a/drivers/s390/block/dasd_diag.c +++ b/drivers/s390/block/dasd_diag.c @@ -144,14 +144,13 @@ dasd_diag_erp(struct dasd_device *device) rc = mdsk_init_io(device, device->block->bp_block, 0, NULL); if (rc == 4) { if (!(test_and_set_bit(DASD_FLAG_DEVICE_RO, &device->flags))) - pr_warning("%s: The access mode of a DIAG device " - "changed to read-only\n", - dev_name(&device->cdev->dev)); + pr_warn("%s: The access mode of a DIAG device changed to read-only\n", + dev_name(&device->cdev->dev)); rc = 0; } if (rc) - pr_warning("%s: DIAG ERP failed with " - "rc=%d\n", dev_name(&device->cdev->dev), rc); + pr_warn("%s: DIAG ERP failed with rc=%d\n", + dev_name(&device->cdev->dev), rc); } /* Start a given request at the device. Return zero on success, non-zero @@ -367,9 +366,9 @@ dasd_diag_check_device(struct dasd_device *device) private->pt_block = 2; break; default: - pr_warning("%s: Device type %d is not supported " - "in DIAG mode\n", dev_name(&device->cdev->dev), - private->rdc_data.vdev_class); + pr_warn("%s: Device type %d is not supported in DIAG mode\n", + dev_name(&device->cdev->dev), + private->rdc_data.vdev_class); rc = -EOPNOTSUPP; goto out; } @@ -410,8 +409,8 @@ dasd_diag_check_device(struct dasd_device *device) private->iob.flaga = DASD_DIAG_FLAGA_DEFAULT; rc = dia250(&private->iob, RW_BIO); if (rc == 3) { - pr_warning("%s: A 64-bit DIAG call failed\n", - dev_name(&device->cdev->dev)); + pr_warn("%s: A 64-bit DIAG call failed\n", + dev_name(&device->cdev->dev)); rc = -EOPNOTSUPP; goto out_label; } @@ -420,9 +419,8 @@ dasd_diag_check_device(struct dasd_device *device) break; } if (bsize > PAGE_SIZE) { - pr_warning("%s: Accessing the DASD failed because of an " - "incorrect format (rc=%d)\n", - dev_name(&device->cdev->dev), rc); + pr_warn("%s: Accessing the DASD failed because of an incorrect format (rc=%d)\n", + dev_name(&device->cdev->dev), rc); rc = -EIO; goto out_label; } @@ -440,8 +438,8 @@ dasd_diag_check_device(struct dasd_device *device) block->s2b_shift++; rc = mdsk_init_io(device, block->bp_block, 0, NULL); if (rc && (rc != 4)) { - pr_warning("%s: DIAG initialization failed with rc=%d\n", - dev_name(&device->cdev->dev), rc); + pr_warn("%s: DIAG initialization failed with rc=%d\n", + dev_name(&device->cdev->dev), rc); rc = -EIO; } else { if (rc == 4) diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c index ef1d9fb..31d544a 100644 --- a/drivers/s390/block/dasd_genhd.c +++ b/drivers/s390/block/dasd_genhd.c @@ -178,8 +178,8 @@ int dasd_gendisk_init(void) /* Register to static dasd major 94 */ rc = register_blkdev(DASD_MAJOR, "dasd"); if (rc != 0) { - pr_warning("Registering the device driver with major number " - "%d failed\n", DASD_MAJOR); + pr_warn("Registering the device driver with major number %d failed\n", + DASD_MAJOR); return rc; } return 0; diff --git a/drivers/s390/block/dasd_ioctl.c b/drivers/s390/block/dasd_ioctl.c index 16bb5ec..90f30cc 100644 --- a/drivers/s390/block/dasd_ioctl.c +++ b/drivers/s390/block/dasd_ioctl.c @@ -265,9 +265,8 @@ dasd_ioctl_format(struct block_device *bdev, void __user *argp) return -EFAULT; } if (bdev != bdev->bd_contains) { - pr_warning("%s: The specified DASD is a partition and cannot " - "be formatted\n", - dev_name(&base->cdev->dev)); + pr_warn("%s: The specified DASD is a partition and cannot be formatted\n", + dev_name(&base->cdev->dev)); dasd_put_device(base); return -EINVAL; } diff --git a/drivers/s390/block/dasd_proc.c b/drivers/s390/block/dasd_proc.c index aa7bb2d..bad7a19 100644 --- a/drivers/s390/block/dasd_proc.c +++ b/drivers/s390/block/dasd_proc.c @@ -322,13 +322,12 @@ static ssize_t dasd_stats_proc_write(struct file *file, return user_len; out_parse_error: rc = -EINVAL; - pr_warning("%s is not a supported value for /proc/dasd/statistics\n", - str); + pr_warn("%s is not a supported value for /proc/dasd/statistics\n", str); out_error: vfree(buffer); return rc; #else - pr_warning("/proc/dasd/statistics: is not activated in this kernel\n"); + pr_warn("/proc/dasd/statistics: is not activated in this kernel\n"); return user_len; #endif /* CONFIG_DASD_PROFILE */ } diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index ce7b701..1bce9cf 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -738,15 +738,15 @@ dcssblk_remove_store(struct device *dev, struct device_attribute *attr, const ch dev_info = dcssblk_get_device_by_name(local_buf); if (dev_info == NULL) { up_write(&dcssblk_devices_sem); - pr_warning("Device %s cannot be removed because it is not a " - "known device\n", local_buf); + pr_warn("Device %s cannot be removed because it is not a known device\n", + local_buf); rc = -ENODEV; goto out_buf; } if (atomic_read(&dev_info->use_count) != 0) { up_write(&dcssblk_devices_sem); - pr_warning("Device %s cannot be removed while it is in " - "use\n", local_buf); + pr_warn("Device %s cannot be removed while it is in use\n", + local_buf); rc = -EBUSY; goto out_buf; } @@ -850,9 +850,8 @@ dcssblk_make_request(struct request_queue *q, struct bio *bio) case SEG_TYPE_SC: /* cannot write to these segments */ if (bio_data_dir(bio) == WRITE) { - pr_warning("Writing to %s failed because it " - "is a read-only device\n", - dev_name(&dev_info->dev)); + pr_warn("Writing to %s failed because it is a read-only device\n", + dev_name(&dev_info->dev)); goto fail; } } diff --git a/drivers/s390/char/monreader.c b/drivers/s390/char/monreader.c index fc94bfd..ebdeaa5 100644 --- a/drivers/s390/char/monreader.c +++ b/drivers/s390/char/monreader.c @@ -257,7 +257,7 @@ static void mon_iucv_message_pending(struct iucv_path *path, memcpy(&monpriv->msg_array[monpriv->write_index]->msg, msg, sizeof(*msg)); if (atomic_inc_return(&monpriv->msglim_count) == MON_MSGLIM) { - pr_warning("The read queue for monitor data is full\n"); + pr_warn("The read queue for monitor data is full\n"); monpriv->msg_array[monpriv->write_index]->msglim_reached = 1; } monpriv->write_index = (monpriv->write_index + 1) % MON_MSGLIM; @@ -342,8 +342,8 @@ static int mon_close(struct inode *inode, struct file *filp) if (monpriv->path) { rc = iucv_path_sever(monpriv->path, user_data_sever); if (rc) - pr_warning("Disconnecting the z/VM *MONITOR system " - "service failed with rc=%i\n", rc); + pr_warn("Disconnecting the z/VM *MONITOR system service failed with rc=%i\n", + rc); iucv_path_free(monpriv->path); } @@ -469,8 +469,8 @@ static int monreader_freeze(struct device *dev) if (monpriv->path) { rc = iucv_path_sever(monpriv->path, user_data_sever); if (rc) - pr_warning("Disconnecting the z/VM *MONITOR system " - "service failed with rc=%i\n", rc); + pr_warn("Disconnecting the z/VM *MONITOR system service failed with rc=%i\n", + rc); iucv_path_free(monpriv->path); } atomic_set(&monpriv->iucv_severed, 0); diff --git a/drivers/s390/char/sclp_cmd.c b/drivers/s390/char/sclp_cmd.c index 806239c..d3947ea 100644 --- a/drivers/s390/char/sclp_cmd.c +++ b/drivers/s390/char/sclp_cmd.c @@ -67,8 +67,8 @@ int sclp_sync_request_timeout(sclp_cmdw_t cmd, void *sccb, int timeout) /* Check response. */ if (request->status != SCLP_REQ_DONE) { - pr_warning("sync request failed (cmd=0x%08x, " - "status=0x%02x)\n", cmd, request->status); + pr_warn("sync request failed (cmd=0x%08x, status=0x%02x)\n", + cmd, request->status); rc = -EIO; } out: @@ -122,8 +122,8 @@ int sclp_get_core_info(struct sclp_core_info *info) if (rc) goto out; if (sccb->header.response_code != 0x0010) { - pr_warning("readcpuinfo failed (response=0x%04x)\n", - sccb->header.response_code); + pr_warn("readcpuinfo failed (response=0x%04x)\n", + sccb->header.response_code); rc = -EIO; goto out; } @@ -160,9 +160,8 @@ static int do_core_configure(sclp_cmdw_t cmd) case 0x0120: break; default: - pr_warning("configure cpu failed (cmd=0x%08x, " - "response=0x%04x)\n", cmd, - sccb->header.response_code); + pr_warn("configure cpu failed (cmd=0x%08x, response=0x%04x)\n", + cmd, sccb->header.response_code); rc = -EIO; break; } @@ -230,9 +229,8 @@ static int do_assign_storage(sclp_cmdw_t cmd, u16 rn) case 0x0120: break; default: - pr_warning("assign storage failed (cmd=0x%08x, " - "response=0x%04x, rn=0x%04x)\n", cmd, - sccb->header.response_code, rn); + pr_warn("assign storage failed (cmd=0x%08x, response=0x%04x, rn=0x%04x)\n", + cmd, sccb->header.response_code, rn); rc = -EIO; break; } @@ -675,9 +673,8 @@ static int do_chp_configure(sclp_cmdw_t cmd) case 0x0450: break; default: - pr_warning("configure channel-path failed " - "(cmd=0x%08x, response=0x%04x)\n", cmd, - sccb->header.response_code); + pr_warn("configure channel-path failed (cmd=0x%08x, response=0x%04x)\n", + cmd, sccb->header.response_code); rc = -EIO; break; } @@ -744,8 +741,8 @@ int sclp_chp_read_info(struct sclp_chp_info *info) if (rc) goto out; if (sccb->header.response_code != 0x0010) { - pr_warning("read channel-path info failed " - "(response=0x%04x)\n", sccb->header.response_code); + pr_warn("read channel-path info failed (response=0x%04x)\n", + sccb->header.response_code); rc = -EIO; goto out; } diff --git a/drivers/s390/char/sclp_cpi_sys.c b/drivers/s390/char/sclp_cpi_sys.c index 2acea80..f344e5b 100644 --- a/drivers/s390/char/sclp_cpi_sys.c +++ b/drivers/s390/char/sclp_cpi_sys.c @@ -154,16 +154,14 @@ static int cpi_req(void) wait_for_completion(&completion); if (req->status != SCLP_REQ_DONE) { - pr_warning("request failed (status=0x%02x)\n", - req->status); + pr_warn("request failed (status=0x%02x)\n", req->status); rc = -EIO; goto out_free_req; } response = ((struct cpi_sccb *) req->sccb)->header.response_code; if (response != 0x0020) { - pr_warning("request failed with response code 0x%x\n", - response); + pr_warn("request failed with response code 0x%x\n", response); rc = -EIO; } diff --git a/drivers/s390/char/tape_core.c b/drivers/s390/char/tape_core.c index f3b5123..3c379da 100644 --- a/drivers/s390/char/tape_core.c +++ b/drivers/s390/char/tape_core.c @@ -699,8 +699,8 @@ tape_generic_remove(struct ccw_device *cdev) */ DBF_EVENT(3, "(%08x): Drive in use vanished!\n", device->cdev_id); - pr_warning("%s: A tape unit was detached while in " - "use\n", dev_name(&device->cdev->dev)); + pr_warn("%s: A tape unit was detached while in use\n", + dev_name(&device->cdev->dev)); tape_state_set(device, TS_NOT_OPER); __tape_discard_requests(device); spin_unlock_irq(get_ccwdev_lock(device->cdev)); diff --git a/drivers/s390/char/vmlogrdr.c b/drivers/s390/char/vmlogrdr.c index 799c152..e883063 100644 --- a/drivers/s390/char/vmlogrdr.c +++ b/drivers/s390/char/vmlogrdr.c @@ -343,8 +343,7 @@ static int vmlogrdr_open (struct inode *inode, struct file *filp) if (logptr->autorecording) { ret = vmlogrdr_recording(logptr,1,logptr->autopurge); if (ret) - pr_warning("vmlogrdr: failed to start " - "recording automatically\n"); + pr_warn("vmlogrdr: failed to start recording automatically\n"); } /* create connection to the system service */ @@ -396,8 +395,7 @@ static int vmlogrdr_release (struct inode *inode, struct file *filp) if (logptr->autorecording) { ret = vmlogrdr_recording(logptr,0,logptr->autopurge); if (ret) - pr_warning("vmlogrdr: failed to stop " - "recording automatically\n"); + pr_warn("vmlogrdr: failed to stop recording automatically\n"); } logptr->dev_in_use = 0; diff --git a/drivers/s390/cio/blacklist.c b/drivers/s390/cio/blacklist.c index 20314aa..9082476 100644 --- a/drivers/s390/cio/blacklist.c +++ b/drivers/s390/cio/blacklist.c @@ -51,9 +51,8 @@ static int blacklist_range(range_action action, unsigned int from_ssid, { if ((from_ssid > to_ssid) || ((from_ssid == to_ssid) && (from > to))) { if (msgtrigger) - pr_warning("0.%x.%04x to 0.%x.%04x is not a valid " - "range for cio_ignore\n", from_ssid, from, - to_ssid, to); + pr_warn("0.%x.%04x to 0.%x.%04x is not a valid range for cio_ignore\n", + from_ssid, from, to_ssid, to); return 1; } @@ -140,8 +139,8 @@ static int parse_busid(char *str, unsigned int *cssid, unsigned int *ssid, rc = 0; out: if (rc && msgtrigger) - pr_warning("%s is not a valid device for the cio_ignore " - "kernel parameter\n", str); + pr_warn("%s is not a valid device for the cio_ignore kernel parameter\n", + str); return rc; } diff --git a/drivers/s390/cio/ccwreq.c b/drivers/s390/cio/ccwreq.c index 79f5991..2782100 100644 --- a/drivers/s390/cio/ccwreq.c +++ b/drivers/s390/cio/ccwreq.c @@ -333,13 +333,12 @@ void ccw_request_timeout(struct ccw_device *cdev) for (chp = 0; chp < 8; chp++) { if ((0x80 >> chp) & sch->schib.pmcw.lpum) - pr_warning("%s: No interrupt was received within %lus " - "(CS=%02x, DS=%02x, CHPID=%x.%02x)\n", - dev_name(&cdev->dev), req->timeout / HZ, - scsw_cstat(&sch->schib.scsw), - scsw_dstat(&sch->schib.scsw), - sch->schid.cssid, - sch->schib.pmcw.chpid[chp]); + pr_warn("%s: No interrupt was received within %lus (CS=%02x, DS=%02x, CHPID=%x.%02x)\n", + dev_name(&cdev->dev), req->timeout / HZ, + scsw_cstat(&sch->schib.scsw), + scsw_dstat(&sch->schib.scsw), + sch->schid.cssid, + sch->schib.pmcw.chpid[chp]); } if (!ccwreq_next_path(cdev)) { diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index 39a8ae5..de6fccc 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -656,7 +656,7 @@ struct subchannel *cio_probe_console(void) sch_no = cio_get_console_sch_no(); if (sch_no == -1) { - pr_warning("No CCW console was found\n"); + pr_warn("No CCW console was found\n"); return ERR_PTR(-ENODEV); } init_subchannel_id(&schid); diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 6aae684..7ada078 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -364,11 +364,11 @@ int ccw_device_set_offline(struct ccw_device *cdev) cdev->private->state == DEV_STATE_DISCONNECTED)); /* Inform the user if set offline failed. */ if (cdev->private->state == DEV_STATE_BOXED) { - pr_warning("%s: The device entered boxed state while " - "being set offline\n", dev_name(&cdev->dev)); + pr_warn("%s: The device entered boxed state while being set offline\n", + dev_name(&cdev->dev)); } else if (cdev->private->state == DEV_STATE_NOT_OPER) { - pr_warning("%s: The device stopped operating while " - "being set offline\n", dev_name(&cdev->dev)); + pr_warn("%s: The device stopped operating while being set offline\n", + dev_name(&cdev->dev)); } /* Give up reference from ccw_device_set_online(). */ put_device(&cdev->dev); @@ -429,13 +429,11 @@ int ccw_device_set_online(struct ccw_device *cdev) spin_unlock_irq(cdev->ccwlock); /* Inform the user that set online failed. */ if (cdev->private->state == DEV_STATE_BOXED) { - pr_warning("%s: Setting the device online failed " - "because it is boxed\n", - dev_name(&cdev->dev)); + pr_warn("%s: Setting the device online failed because it is boxed\n", + dev_name(&cdev->dev)); } else if (cdev->private->state == DEV_STATE_NOT_OPER) { - pr_warning("%s: Setting the device online failed " - "because it is not operational\n", - dev_name(&cdev->dev)); + pr_warn("%s: Setting the device online failed because it is not operational\n", + dev_name(&cdev->dev)); } /* Give up online reference since onlining failed. */ put_device(&cdev->dev); @@ -619,9 +617,8 @@ initiate_logging(struct device *dev, struct device_attribute *attr, rc = chsc_siosl(sch->schid); if (rc < 0) { - pr_warning("Logging for subchannel 0.%x.%04x failed with " - "errno=%d\n", - sch->schid.ssid, sch->schid.sch_no, rc); + pr_warn("Logging for subchannel 0.%x.%04x failed with errno=%d\n", + sch->schid.ssid, sch->schid.sch_no, rc); return rc; } pr_notice("Logging for subchannel 0.%x.%04x was triggered\n", diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c index 2f5b518..251db0a 100644 --- a/drivers/s390/net/lcs.c +++ b/drivers/s390/net/lcs.c @@ -1761,8 +1761,8 @@ lcs_get_control(struct lcs_card *card, struct lcs_cmd *cmd) lcs_schedule_recovery(card); break; case LCS_CMD_STOPLAN: - pr_warning("Stoplan for %s initiated by LGW.\n", - card->dev->name); + pr_warn("Stoplan for %s initiated by LGW\n", + card->dev->name); if (card->dev) netif_carrier_off(card->dev); break; diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 7c8c68c..ac54433 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -3624,7 +3624,7 @@ static int qeth_l3_register_notifiers(void) return rc; } #else - pr_warning("There is no IPv6 support for the layer 3 discipline\n"); + pr_warn("There is no IPv6 support for the layer 3 discipline\n"); #endif return 0; } -- cgit v1.1 From 988b86e69ded17f0f1209fd3ef1c4c7f1567dcc1 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 13 Jan 2016 12:54:28 +0100 Subject: s390/pci: add ioctl interface for CLP Provide a user space interface to issue call logical-processor instructions. Only selected CLP commands are allowed, enough to get the full overview of the installed PCI functions. Reviewed-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/clp.h | 27 +++++ arch/s390/include/asm/pci_clp.h | 30 +---- arch/s390/include/uapi/asm/clp.h | 28 +++++ arch/s390/pci/pci_clp.c | 247 +++++++++++++++++++++++++++++++++++++-- 4 files changed, 293 insertions(+), 39 deletions(-) create mode 100644 arch/s390/include/uapi/asm/clp.h diff --git a/arch/s390/include/asm/clp.h b/arch/s390/include/asm/clp.h index a0e71a5..5687d62 100644 --- a/arch/s390/include/asm/clp.h +++ b/arch/s390/include/asm/clp.h @@ -4,14 +4,23 @@ /* CLP common request & response block size */ #define CLP_BLK_SIZE PAGE_SIZE +#define CLP_LPS_BASE 0 +#define CLP_LPS_PCI 2 + struct clp_req_hdr { u16 len; u16 cmd; + u32 fmt : 4; + u32 reserved1 : 28; + u64 reserved2; } __packed; struct clp_rsp_hdr { u16 len; u16 rsp; + u32 fmt : 4; + u32 reserved1 : 28; + u64 reserved2; } __packed; /* CLP Response Codes */ @@ -25,4 +34,22 @@ struct clp_rsp_hdr { #define CLP_RC_NODATA 0x0080 /* No data available */ #define CLP_RC_FC_UNKNOWN 0x0100 /* Function code not recognized */ +/* Store logical-processor characteristics request */ +struct clp_req_slpc { + struct clp_req_hdr hdr; +} __packed; + +struct clp_rsp_slpc { + struct clp_rsp_hdr hdr; + u32 reserved2[4]; + u32 lpif[8]; + u32 reserved3[8]; + u32 lpic[8]; +} __packed; + +struct clp_req_rsp_slpc { + struct clp_req_slpc request; + struct clp_rsp_slpc response; +} __packed; + #endif diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h index dd78f92..e75c64cb 100644 --- a/arch/s390/include/asm/pci_clp.h +++ b/arch/s390/include/asm/pci_clp.h @@ -49,9 +49,6 @@ struct clp_fh_list_entry { /* List PCI functions request */ struct clp_req_list_pci { struct clp_req_hdr hdr; - u32 fmt : 4; /* cmd request block format */ - u32 : 28; - u64 reserved1; u64 resume_token; u64 reserved2; } __packed; @@ -59,9 +56,6 @@ struct clp_req_list_pci { /* List PCI functions response */ struct clp_rsp_list_pci { struct clp_rsp_hdr hdr; - u32 fmt : 4; /* cmd request block format */ - u32 : 28; - u64 reserved1; u64 resume_token; u32 reserved2; u16 max_fn; @@ -73,9 +67,6 @@ struct clp_rsp_list_pci { /* Query PCI function request */ struct clp_req_query_pci { struct clp_req_hdr hdr; - u32 fmt : 4; /* cmd request block format */ - u32 : 28; - u64 reserved1; u32 fh; /* function handle */ u32 reserved2; u64 reserved3; @@ -84,9 +75,6 @@ struct clp_req_query_pci { /* Query PCI function response */ struct clp_rsp_query_pci { struct clp_rsp_hdr hdr; - u32 fmt : 4; /* cmd request block format */ - u32 : 28; - u64 : 64; u16 vfn; /* virtual fn number */ u16 : 7; u16 util_str_avail : 1; /* utility string available? */ @@ -108,21 +96,15 @@ struct clp_rsp_query_pci { /* Query PCI function group request */ struct clp_req_query_pci_grp { struct clp_req_hdr hdr; - u32 fmt : 4; /* cmd request block format */ - u32 : 28; - u64 reserved1; - u32 : 24; + u32 reserved2 : 24; u32 pfgid : 8; /* function group id */ - u32 reserved2; - u64 reserved3; + u32 reserved3; + u64 reserved4; } __packed; /* Query PCI function group response */ struct clp_rsp_query_pci_grp { struct clp_rsp_hdr hdr; - u32 fmt : 4; /* cmd request block format */ - u32 : 28; - u64 reserved1; u16 : 4; u16 noi : 12; /* number of interrupts */ u8 version; @@ -141,9 +123,6 @@ struct clp_rsp_query_pci_grp { /* Set PCI function request */ struct clp_req_set_pci { struct clp_req_hdr hdr; - u32 fmt : 4; /* cmd request block format */ - u32 : 28; - u64 reserved1; u32 fh; /* function handle */ u16 reserved2; u8 oc; /* operation controls */ @@ -154,9 +133,6 @@ struct clp_req_set_pci { /* Set PCI function response */ struct clp_rsp_set_pci { struct clp_rsp_hdr hdr; - u32 fmt : 4; /* cmd request block format */ - u32 : 28; - u64 reserved1; u32 fh; /* function handle */ u32 reserved3; u64 reserved4; diff --git a/arch/s390/include/uapi/asm/clp.h b/arch/s390/include/uapi/asm/clp.h new file mode 100644 index 0000000..ab72d9d --- /dev/null +++ b/arch/s390/include/uapi/asm/clp.h @@ -0,0 +1,28 @@ +/* + * ioctl interface for /dev/clp + * + * Copyright IBM Corp. 2016 + * Author(s): Martin Schwidefsky + */ + +#ifndef _ASM_CLP_H +#define _ASM_CLP_H + +#include +#include + +struct clp_req { + unsigned int c : 1; + unsigned int r : 1; + unsigned int lps : 6; + unsigned int cmd : 8; + unsigned int : 16; + unsigned int reserved; + __u64 data_p; +}; + +#define CLP_IOCTL_MAGIC 'c' + +#define CLP_SYNC _IOWR(CLP_IOCTL_MAGIC, 0xC1, struct clp_req) + +#endif diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index d6e411e..21591dd 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -8,13 +8,19 @@ #define KMSG_COMPONENT "zpci" #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt +#include #include +#include #include #include #include #include +#include #include #include +#include +#include +#include static inline void zpci_err_clp(unsigned int rsp, int rc) { @@ -27,21 +33,43 @@ static inline void zpci_err_clp(unsigned int rsp, int rc) } /* - * Call Logical Processor - * Retry logic is handled by the caller. + * Call Logical Processor with c=1, lps=0 and command 1 + * to get the bit mask of installed logical processors */ -static inline u8 clp_instr(void *data) +static inline int clp_get_ilp(unsigned long *ilp) +{ + unsigned long mask; + int cc = 3; + + asm volatile ( + " .insn rrf,0xb9a00000,%[mask],%[cmd],8,0\n" + "0: ipm %[cc]\n" + " srl %[cc],28\n" + "1:\n" + EX_TABLE(0b, 1b) + : [cc] "+d" (cc), [mask] "=d" (mask) : [cmd] "a" (1) + : "cc"); + *ilp = mask; + return cc; +} + +/* + * Call Logical Processor with c=0, the give constant lps and an lpcb request. + */ +static inline int clp_req(void *data, unsigned int lps) { struct { u8 _[CLP_BLK_SIZE]; } *req = data; u64 ignored; - u8 cc; + int cc = 3; asm volatile ( - " .insn rrf,0xb9a00000,%[ign],%[req],0x0,0x2\n" - " ipm %[cc]\n" + " .insn rrf,0xb9a00000,%[ign],%[req],0,%[lps]\n" + "0: ipm %[cc]\n" " srl %[cc],28\n" - : [cc] "=d" (cc), [ign] "=d" (ignored), "+m" (*req) - : [req] "a" (req) + "1:\n" + EX_TABLE(0b, 1b) + : [cc] "+d" (cc), [ign] "=d" (ignored), "+m" (*req) + : [req] "a" (req), [lps] "i" (lps) : "cc"); return cc; } @@ -90,7 +118,7 @@ static int clp_query_pci_fngrp(struct zpci_dev *zdev, u8 pfgid) rrb->response.hdr.len = sizeof(rrb->response); rrb->request.pfgid = pfgid; - rc = clp_instr(rrb); + rc = clp_req(rrb, CLP_LPS_PCI); if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) clp_store_query_pci_fngrp(zdev, &rrb->response); else { @@ -143,7 +171,7 @@ static int clp_query_pci_fn(struct zpci_dev *zdev, u32 fh) rrb->response.hdr.len = sizeof(rrb->response); rrb->request.fh = fh; - rc = clp_instr(rrb); + rc = clp_req(rrb, CLP_LPS_PCI); if (!rc && rrb->response.hdr.rsp == CLP_RC_OK) { rc = clp_store_query_pci_fn(zdev, &rrb->response); if (rc) @@ -214,7 +242,7 @@ static int clp_set_pci_fn(u32 *fh, u8 nr_dma_as, u8 command) rrb->request.oc = command; rrb->request.ndas = nr_dma_as; - rc = clp_instr(rrb); + rc = clp_req(rrb, CLP_LPS_PCI); if (rrb->response.hdr.rsp == CLP_RC_SETPCIFN_BUSY) { retries--; if (retries < 0) @@ -280,7 +308,7 @@ static int clp_list_pci(struct clp_req_rsp_list_pci *rrb, rrb->request.resume_token = resume_token; /* Get PCI function handle list */ - rc = clp_instr(rrb); + rc = clp_req(rrb, CLP_LPS_PCI); if (rc || rrb->response.hdr.rsp != CLP_RC_OK) { zpci_err("List PCI FN:\n"); zpci_err_clp(rrb->response.hdr.rsp, rc); @@ -391,3 +419,198 @@ int clp_rescan_pci_devices_simple(void) clp_free_block(rrb); return rc; } + +static int clp_base_slpc(struct clp_req *req, struct clp_req_rsp_slpc *lpcb) +{ + unsigned long limit = PAGE_SIZE - sizeof(lpcb->request); + + if (lpcb->request.hdr.len != sizeof(lpcb->request) || + lpcb->response.hdr.len > limit) + return -EINVAL; + return clp_req(lpcb, CLP_LPS_BASE) ? -EOPNOTSUPP : 0; +} + +static int clp_base_command(struct clp_req *req, struct clp_req_hdr *lpcb) +{ + switch (lpcb->cmd) { + case 0x0001: /* store logical-processor characteristics */ + return clp_base_slpc(req, (void *) lpcb); + default: + return -EINVAL; + } +} + +static int clp_pci_slpc(struct clp_req *req, struct clp_req_rsp_slpc *lpcb) +{ + unsigned long limit = PAGE_SIZE - sizeof(lpcb->request); + + if (lpcb->request.hdr.len != sizeof(lpcb->request) || + lpcb->response.hdr.len > limit) + return -EINVAL; + return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0; +} + +static int clp_pci_list(struct clp_req *req, struct clp_req_rsp_list_pci *lpcb) +{ + unsigned long limit = PAGE_SIZE - sizeof(lpcb->request); + + if (lpcb->request.hdr.len != sizeof(lpcb->request) || + lpcb->response.hdr.len > limit) + return -EINVAL; + if (lpcb->request.reserved2 != 0) + return -EINVAL; + return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0; +} + +static int clp_pci_query(struct clp_req *req, + struct clp_req_rsp_query_pci *lpcb) +{ + unsigned long limit = PAGE_SIZE - sizeof(lpcb->request); + + if (lpcb->request.hdr.len != sizeof(lpcb->request) || + lpcb->response.hdr.len > limit) + return -EINVAL; + if (lpcb->request.reserved2 != 0 || lpcb->request.reserved3 != 0) + return -EINVAL; + return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0; +} + +static int clp_pci_query_grp(struct clp_req *req, + struct clp_req_rsp_query_pci_grp *lpcb) +{ + unsigned long limit = PAGE_SIZE - sizeof(lpcb->request); + + if (lpcb->request.hdr.len != sizeof(lpcb->request) || + lpcb->response.hdr.len > limit) + return -EINVAL; + if (lpcb->request.reserved2 != 0 || lpcb->request.reserved3 != 0 || + lpcb->request.reserved4 != 0) + return -EINVAL; + return clp_req(lpcb, CLP_LPS_PCI) ? -EOPNOTSUPP : 0; +} + +static int clp_pci_command(struct clp_req *req, struct clp_req_hdr *lpcb) +{ + switch (lpcb->cmd) { + case 0x0001: /* store logical-processor characteristics */ + return clp_pci_slpc(req, (void *) lpcb); + case 0x0002: /* list PCI functions */ + return clp_pci_list(req, (void *) lpcb); + case 0x0003: /* query PCI function */ + return clp_pci_query(req, (void *) lpcb); + case 0x0004: /* query PCI function group */ + return clp_pci_query_grp(req, (void *) lpcb); + default: + return -EINVAL; + } +} + +static int clp_normal_command(struct clp_req *req) +{ + struct clp_req_hdr *lpcb; + void __user *uptr; + int rc; + + rc = -EINVAL; + if (req->lps != 0 && req->lps != 2) + goto out; + + rc = -ENOMEM; + lpcb = clp_alloc_block(GFP_KERNEL); + if (!lpcb) + goto out; + + rc = -EFAULT; + uptr = (void __force __user *)(unsigned long) req->data_p; + if (copy_from_user(lpcb, uptr, PAGE_SIZE) != 0) + goto out_free; + + rc = -EINVAL; + if (lpcb->fmt != 0 || lpcb->reserved1 != 0 || lpcb->reserved2 != 0) + goto out_free; + + switch (req->lps) { + case 0: + rc = clp_base_command(req, lpcb); + break; + case 2: + rc = clp_pci_command(req, lpcb); + break; + } + if (rc) + goto out_free; + + rc = -EFAULT; + if (copy_to_user(uptr, lpcb, PAGE_SIZE) != 0) + goto out_free; + + rc = 0; + +out_free: + clp_free_block(lpcb); +out: + return rc; +} + +static int clp_immediate_command(struct clp_req *req) +{ + void __user *uptr; + unsigned long ilp; + int exists; + + if (req->cmd > 1 || clp_get_ilp(&ilp) != 0) + return -EINVAL; + + uptr = (void __force __user *)(unsigned long) req->data_p; + if (req->cmd == 0) { + /* Command code 0: test for a specific processor */ + exists = test_bit_inv(req->lps, &ilp); + return put_user(exists, (int __user *) uptr); + } + /* Command code 1: return bit mask of installed processors */ + return put_user(ilp, (unsigned long __user *) uptr); +} + +static long clp_misc_ioctl(struct file *filp, unsigned int cmd, + unsigned long arg) +{ + struct clp_req req; + void __user *argp; + + if (cmd != CLP_SYNC) + return -EINVAL; + + argp = is_compat_task() ? compat_ptr(arg) : (void __user *) arg; + if (copy_from_user(&req, argp, sizeof(req))) + return -EFAULT; + if (req.r != 0) + return -EINVAL; + return req.c ? clp_immediate_command(&req) : clp_normal_command(&req); +} + +static int clp_misc_release(struct inode *inode, struct file *filp) +{ + return 0; +} + +static const struct file_operations clp_misc_fops = { + .owner = THIS_MODULE, + .open = nonseekable_open, + .release = clp_misc_release, + .unlocked_ioctl = clp_misc_ioctl, + .compat_ioctl = clp_misc_ioctl, + .llseek = no_llseek, +}; + +static struct miscdevice clp_misc_device = { + .minor = MISC_DYNAMIC_MINOR, + .name = "clp", + .fops = &clp_misc_fops, +}; + +static int __init clp_misc_init(void) +{ + return misc_register(&clp_misc_device); +} + +device_initcall(clp_misc_init); -- cgit v1.1 From ebde765c0e85f48534f98779b22349bf00761b61 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 8 Mar 2016 11:08:09 +0100 Subject: s390/mm: uninline ptep_xxx functions from pgtable.h The code in the various ptep_xxx functions has grown quite large, consolidate them to four out-of-line functions: ptep_xchg_direct to exchange a pte with another with immediate flushing ptep_xchg_lazy to exchange a pte with another in a batched update ptep_modify_prot_start to begin a protection flags update ptep_modify_prot_commit to commit a protection flags update Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 374 ++++++---------------------------------- arch/s390/kvm/kvm-s390.c | 2 +- arch/s390/mm/pgtable.c | 295 +++++++++++++++++++++++++++---- 3 files changed, 318 insertions(+), 353 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 7be9ae8..d102c4e 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -680,69 +680,8 @@ static inline void pgste_set(pte_t *ptep, pgste_t pgste) #endif } -static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste, - struct mm_struct *mm) -{ -#ifdef CONFIG_PGSTE - unsigned long address, bits, skey; - - if (!mm_use_skey(mm) || pte_val(pte) & _PAGE_INVALID) - return pgste; - address = pte_val(pte) & PAGE_MASK; - skey = (unsigned long) page_get_storage_key(address); - bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); - /* Transfer page changed & referenced bit to guest bits in pgste */ - pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */ - /* Copy page access key and fetch protection bit to pgste */ - pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); - pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; -#endif - return pgste; - -} - -static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry, - struct mm_struct *mm) -{ -#ifdef CONFIG_PGSTE - unsigned long address; - unsigned long nkey; - - if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID) - return; - VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID)); - address = pte_val(entry) & PAGE_MASK; - /* - * Set page access key and fetch protection bit from pgste. - * The guest C/R information is still in the PGSTE, set real - * key C/R to 0. - */ - nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; - nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; - page_set_storage_key(address, nkey, 0); -#endif -} - -static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) -{ - if ((pte_val(entry) & _PAGE_PRESENT) && - (pte_val(entry) & _PAGE_WRITE) && - !(pte_val(entry) & _PAGE_INVALID)) { - if (!MACHINE_HAS_ESOP) { - /* - * Without enhanced suppression-on-protection force - * the dirty bit on for all writable ptes. - */ - pte_val(entry) |= _PAGE_DIRTY; - pte_val(entry) &= ~_PAGE_PROTECT; - } - if (!(pte_val(entry) & _PAGE_PROTECT)) - /* This pte allows write access, set user-dirty */ - pgste_val(pgste) |= PGSTE_UC_BIT; - } - *ptep = entry; - return pgste; -} +bool pgste_test_and_clear_dirty(struct mm_struct *, unsigned long address); +void ptep_ipte_notify(struct mm_struct *, unsigned long addr, pte_t *); /** * struct gmap_struct - guest address space @@ -791,47 +730,11 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr); int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags); void gmap_discard(struct gmap *, unsigned long from, unsigned long to); void __gmap_zap(struct gmap *, unsigned long gaddr); -bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *); void gmap_register_ipte_notifier(struct gmap_notifier *); void gmap_unregister_ipte_notifier(struct gmap_notifier *); int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len); -void gmap_do_ipte_notify(struct mm_struct *, unsigned long addr, pte_t *); - -static inline pgste_t pgste_ipte_notify(struct mm_struct *mm, - unsigned long addr, - pte_t *ptep, pgste_t pgste) -{ -#ifdef CONFIG_PGSTE - if (pgste_val(pgste) & PGSTE_IN_BIT) { - pgste_val(pgste) &= ~PGSTE_IN_BIT; - gmap_do_ipte_notify(mm, addr, ptep); - } -#endif - return pgste; -} - -/* - * Certain architectures need to do special things when PTEs - * within a page table are directly modified. Thus, the following - * hook is made available. - */ -static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t entry) -{ - pgste_t pgste; - - if (mm_has_pgste(mm)) { - pgste = pgste_get_lock(ptep); - pgste_val(pgste) &= ~_PGSTE_GPS_ZERO; - pgste_set_key(ptep, pgste, entry, mm); - pgste = pgste_set_pte(ptep, pgste, entry); - pgste_set_unlock(ptep, pgste); - } else { - *ptep = entry; - } -} /* * query functions pte_write/pte_dirty/pte_young only work if @@ -998,95 +901,30 @@ static inline void __ptep_ipte_range(unsigned long address, int nr, pte_t *ptep) } while (nr != 255); } -static inline void ptep_flush_direct(struct mm_struct *mm, - unsigned long address, pte_t *ptep) -{ - int active, count; - - if (pte_val(*ptep) & _PAGE_INVALID) - return; - active = (mm == current->active_mm) ? 1 : 0; - count = atomic_add_return(0x10000, &mm->context.attach_count); - if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && - cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) - __ptep_ipte_local(address, ptep); - else - __ptep_ipte(address, ptep); - atomic_sub(0x10000, &mm->context.attach_count); -} - -static inline void ptep_flush_lazy(struct mm_struct *mm, - unsigned long address, pte_t *ptep) -{ - int active, count; - - if (pte_val(*ptep) & _PAGE_INVALID) - return; - active = (mm == current->active_mm) ? 1 : 0; - count = atomic_add_return(0x10000, &mm->context.attach_count); - if ((count & 0xffff) <= active) { - pte_val(*ptep) |= _PAGE_INVALID; - mm->context.flush_mm = 1; - } else - __ptep_ipte(address, ptep); - atomic_sub(0x10000, &mm->context.attach_count); -} - /* - * Get (and clear) the user dirty bit for a pte. + * This is hard to understand. ptep_get_and_clear and ptep_clear_flush + * both clear the TLB for the unmapped pte. The reason is that + * ptep_get_and_clear is used in common code (e.g. change_pte_range) + * to modify an active pte. The sequence is + * 1) ptep_get_and_clear + * 2) set_pte_at + * 3) flush_tlb_range + * On s390 the tlb needs to get flushed with the modification of the pte + * if the pte is active. The only way how this can be implemented is to + * have ptep_get_and_clear do the tlb flush. In exchange flush_tlb_range + * is a nop. */ -static inline int ptep_test_and_clear_user_dirty(struct mm_struct *mm, - unsigned long addr, - pte_t *ptep) -{ - pgste_t pgste; - pte_t pte; - int dirty; - - if (!mm_has_pgste(mm)) - return 0; - pgste = pgste_get_lock(ptep); - dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); - pgste_val(pgste) &= ~PGSTE_UC_BIT; - pte = *ptep; - if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { - pgste = pgste_ipte_notify(mm, addr, ptep, pgste); - __ptep_ipte(addr, ptep); - if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) - pte_val(pte) |= _PAGE_PROTECT; - else - pte_val(pte) |= _PAGE_INVALID; - *ptep = pte; - } - pgste_set_unlock(ptep, pgste); - return dirty; -} +pte_t ptep_xchg_direct(struct mm_struct *, unsigned long, pte_t *, pte_t); +pte_t ptep_xchg_lazy(struct mm_struct *, unsigned long, pte_t *, pte_t); #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { - pgste_t pgste; - pte_t pte; - int young; - - if (mm_has_pgste(vma->vm_mm)) { - pgste = pgste_get_lock(ptep); - pgste = pgste_ipte_notify(vma->vm_mm, addr, ptep, pgste); - } - - pte = *ptep; - ptep_flush_direct(vma->vm_mm, addr, ptep); - young = pte_young(pte); - - if (mm_has_pgste(vma->vm_mm)) { - pgste = pgste_update_all(pte, pgste, vma->vm_mm); - pgste = pgste_set_pte(ptep, pgste, pte_mkold(pte)); - pgste_set_unlock(ptep, pgste); - } else - *ptep = pte_mkold(pte); + pte_t pte = *ptep; - return young; + pte = ptep_xchg_direct(vma->vm_mm, addr, ptep, pte_mkold(pte)); + return pte_young(pte); } #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH @@ -1096,104 +934,22 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, return ptep_test_and_clear_young(vma, address, ptep); } -/* - * This is hard to understand. ptep_get_and_clear and ptep_clear_flush - * both clear the TLB for the unmapped pte. The reason is that - * ptep_get_and_clear is used in common code (e.g. change_pte_range) - * to modify an active pte. The sequence is - * 1) ptep_get_and_clear - * 2) set_pte_at - * 3) flush_tlb_range - * On s390 the tlb needs to get flushed with the modification of the pte - * if the pte is active. The only way how this can be implemented is to - * have ptep_get_and_clear do the tlb flush. In exchange flush_tlb_range - * is a nop. - */ #define __HAVE_ARCH_PTEP_GET_AND_CLEAR static inline pte_t ptep_get_and_clear(struct mm_struct *mm, - unsigned long address, pte_t *ptep) + unsigned long addr, pte_t *ptep) { - pgste_t pgste; - pte_t pte; - - if (mm_has_pgste(mm)) { - pgste = pgste_get_lock(ptep); - pgste = pgste_ipte_notify(mm, address, ptep, pgste); - } - - pte = *ptep; - ptep_flush_lazy(mm, address, ptep); - pte_val(*ptep) = _PAGE_INVALID; - - if (mm_has_pgste(mm)) { - pgste = pgste_update_all(pte, pgste, mm); - pgste_set_unlock(ptep, pgste); - } - return pte; + return ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID)); } #define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION -static inline pte_t ptep_modify_prot_start(struct mm_struct *mm, - unsigned long address, - pte_t *ptep) -{ - pgste_t pgste; - pte_t pte; - - if (mm_has_pgste(mm)) { - pgste = pgste_get_lock(ptep); - pgste_ipte_notify(mm, address, ptep, pgste); - } - - pte = *ptep; - ptep_flush_lazy(mm, address, ptep); - - if (mm_has_pgste(mm)) { - pgste = pgste_update_all(pte, pgste, mm); - pgste_set(ptep, pgste); - } - return pte; -} - -static inline void ptep_modify_prot_commit(struct mm_struct *mm, - unsigned long address, - pte_t *ptep, pte_t pte) -{ - pgste_t pgste; - - if (mm_has_pgste(mm)) { - pgste = pgste_get(ptep); - pgste_set_key(ptep, pgste, pte, mm); - pgste = pgste_set_pte(ptep, pgste, pte); - pgste_set_unlock(ptep, pgste); - } else - *ptep = pte; -} +pte_t ptep_modify_prot_start(struct mm_struct *, unsigned long, pte_t *); +void ptep_modify_prot_commit(struct mm_struct *, unsigned long, pte_t *, pte_t); #define __HAVE_ARCH_PTEP_CLEAR_FLUSH static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, - unsigned long address, pte_t *ptep) + unsigned long addr, pte_t *ptep) { - pgste_t pgste; - pte_t pte; - - if (mm_has_pgste(vma->vm_mm)) { - pgste = pgste_get_lock(ptep); - pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste); - } - - pte = *ptep; - ptep_flush_direct(vma->vm_mm, address, ptep); - pte_val(*ptep) = _PAGE_INVALID; - - if (mm_has_pgste(vma->vm_mm)) { - if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) == - _PGSTE_GPS_USAGE_UNUSED) - pte_val(pte) |= _PAGE_UNUSED; - pgste = pgste_update_all(pte, pgste, vma->vm_mm); - pgste_set_unlock(ptep, pgste); - } - return pte; + return ptep_xchg_direct(vma->vm_mm, addr, ptep, __pte(_PAGE_INVALID)); } /* @@ -1205,80 +961,52 @@ static inline pte_t ptep_clear_flush(struct vm_area_struct *vma, */ #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, - unsigned long address, + unsigned long addr, pte_t *ptep, int full) { - pgste_t pgste; - pte_t pte; - - if (!full && mm_has_pgste(mm)) { - pgste = pgste_get_lock(ptep); - pgste = pgste_ipte_notify(mm, address, ptep, pgste); - } - - pte = *ptep; - if (!full) - ptep_flush_lazy(mm, address, ptep); - pte_val(*ptep) = _PAGE_INVALID; - - if (!full && mm_has_pgste(mm)) { - pgste = pgste_update_all(pte, pgste, mm); - pgste_set_unlock(ptep, pgste); + if (full) { + pte_t pte = *ptep; + *ptep = __pte(_PAGE_INVALID); + return pte; } - return pte; + return ptep_xchg_lazy(mm, addr, ptep, __pte(_PAGE_INVALID)); } #define __HAVE_ARCH_PTEP_SET_WRPROTECT -static inline pte_t ptep_set_wrprotect(struct mm_struct *mm, - unsigned long address, pte_t *ptep) +static inline void ptep_set_wrprotect(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) { - pgste_t pgste; pte_t pte = *ptep; - if (pte_write(pte)) { - if (mm_has_pgste(mm)) { - pgste = pgste_get_lock(ptep); - pgste = pgste_ipte_notify(mm, address, ptep, pgste); - } - - ptep_flush_lazy(mm, address, ptep); - pte = pte_wrprotect(pte); - - if (mm_has_pgste(mm)) { - pgste = pgste_set_pte(ptep, pgste, pte); - pgste_set_unlock(ptep, pgste); - } else - *ptep = pte; - } - return pte; + if (pte_write(pte)) + ptep_xchg_lazy(mm, addr, ptep, pte_wrprotect(pte)); } #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS static inline int ptep_set_access_flags(struct vm_area_struct *vma, - unsigned long address, pte_t *ptep, + unsigned long addr, pte_t *ptep, pte_t entry, int dirty) { - pgste_t pgste; - pte_t oldpte; - - oldpte = *ptep; - if (pte_same(oldpte, entry)) + if (pte_same(*ptep, entry)) return 0; - if (mm_has_pgste(vma->vm_mm)) { - pgste = pgste_get_lock(ptep); - pgste = pgste_ipte_notify(vma->vm_mm, address, ptep, pgste); - } + ptep_xchg_direct(vma->vm_mm, addr, ptep, entry); + return 1; +} - ptep_flush_direct(vma->vm_mm, address, ptep); +void set_pte_pgste_at(struct mm_struct *, unsigned long, pte_t *, pte_t); - if (mm_has_pgste(vma->vm_mm)) { - if (pte_val(oldpte) & _PAGE_INVALID) - pgste_set_key(ptep, pgste, entry, vma->vm_mm); - pgste = pgste_set_pte(ptep, pgste, entry); - pgste_set_unlock(ptep, pgste); - } else +/* + * Certain architectures need to do special things when PTEs + * within a page table are directly modified. Thus, the following + * hook is made available. + */ +static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t entry) +{ + if (mm_has_pgste(mm)) + set_pte_pgste_at(mm, addr, ptep, entry); + else *ptep = entry; - return 1; } /* diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 4af21c7..616e0a1 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -280,7 +280,7 @@ static void kvm_s390_sync_dirty_log(struct kvm *kvm, for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) { address = gfn_to_hva_memslot(memslot, cur_gfn); - if (gmap_test_and_clear_dirty(address, gmap)) + if (pgste_test_and_clear_dirty(gmap->mm, address)) mark_page_dirty(kvm, cur_gfn); } up_read(&gmap->mm->mmap_sem); diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 6acd717..30033aa 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -772,7 +772,7 @@ int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len) EXPORT_SYMBOL_GPL(gmap_ipte_notify); /** - * gmap_do_ipte_notify - call all invalidation callbacks for a specific pte. + * ptep_ipte_notify - call all invalidation callbacks for a specific pte. * @mm: pointer to the process mm_struct * @addr: virtual address in the process address space * @pte: pointer to the page table entry @@ -780,7 +780,7 @@ EXPORT_SYMBOL_GPL(gmap_ipte_notify); * This function is assumed to be called with the page table lock held * for the pte to notify. */ -void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte) +void ptep_ipte_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte) { unsigned long offset, gaddr; unsigned long *table; @@ -801,7 +801,7 @@ void gmap_do_ipte_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte) } spin_unlock(&gmap_notifier_lock); } -EXPORT_SYMBOL_GPL(gmap_do_ipte_notify); +EXPORT_SYMBOL_GPL(ptep_ipte_notify); int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned long key, bool nq) @@ -1158,6 +1158,266 @@ static inline void thp_split_mm(struct mm_struct *mm) } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +static inline pte_t ptep_flush_direct(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + int active, count; + pte_t old; + + old = *ptep; + if (unlikely(pte_val(old) & _PAGE_INVALID)) + return old; + active = (mm == current->active_mm) ? 1 : 0; + count = atomic_add_return(0x10000, &mm->context.attach_count); + if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && + cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) + __ptep_ipte_local(addr, ptep); + else + __ptep_ipte(addr, ptep); + atomic_sub(0x10000, &mm->context.attach_count); + return old; +} + +static inline pte_t ptep_flush_lazy(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + int active, count; + pte_t old; + + old = *ptep; + if (unlikely(pte_val(old) & _PAGE_INVALID)) + return old; + active = (mm == current->active_mm) ? 1 : 0; + count = atomic_add_return(0x10000, &mm->context.attach_count); + if ((count & 0xffff) <= active) { + pte_val(*ptep) |= _PAGE_INVALID; + mm->context.flush_mm = 1; + } else + __ptep_ipte(addr, ptep); + atomic_sub(0x10000, &mm->context.attach_count); + return old; +} + +static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste, + struct mm_struct *mm) +{ +#ifdef CONFIG_PGSTE + unsigned long address, bits, skey; + + if (!mm_use_skey(mm) || pte_val(pte) & _PAGE_INVALID) + return pgste; + address = pte_val(pte) & PAGE_MASK; + skey = (unsigned long) page_get_storage_key(address); + bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); + /* Transfer page changed & referenced bit to guest bits in pgste */ + pgste_val(pgste) |= bits << 48; /* GR bit & GC bit */ + /* Copy page access key and fetch protection bit to pgste */ + pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT); + pgste_val(pgste) |= (skey & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; +#endif + return pgste; + +} + +static inline void pgste_set_key(pte_t *ptep, pgste_t pgste, pte_t entry, + struct mm_struct *mm) +{ +#ifdef CONFIG_PGSTE + unsigned long address; + unsigned long nkey; + + if (!mm_use_skey(mm) || pte_val(entry) & _PAGE_INVALID) + return; + VM_BUG_ON(!(pte_val(*ptep) & _PAGE_INVALID)); + address = pte_val(entry) & PAGE_MASK; + /* + * Set page access key and fetch protection bit from pgste. + * The guest C/R information is still in the PGSTE, set real + * key C/R to 0. + */ + nkey = (pgste_val(pgste) & (PGSTE_ACC_BITS | PGSTE_FP_BIT)) >> 56; + nkey |= (pgste_val(pgste) & (PGSTE_GR_BIT | PGSTE_GC_BIT)) >> 48; + page_set_storage_key(address, nkey, 0); +#endif +} + +static inline pgste_t pgste_set_pte(pte_t *ptep, pgste_t pgste, pte_t entry) +{ +#ifdef CONFIG_PGSTE + if ((pte_val(entry) & _PAGE_PRESENT) && + (pte_val(entry) & _PAGE_WRITE) && + !(pte_val(entry) & _PAGE_INVALID)) { + if (!MACHINE_HAS_ESOP) { + /* + * Without enhanced suppression-on-protection force + * the dirty bit on for all writable ptes. + */ + pte_val(entry) |= _PAGE_DIRTY; + pte_val(entry) &= ~_PAGE_PROTECT; + } + if (!(pte_val(entry) & _PAGE_PROTECT)) + /* This pte allows write access, set user-dirty */ + pgste_val(pgste) |= PGSTE_UC_BIT; + } +#endif + *ptep = entry; + return pgste; +} + +static inline pgste_t pgste_ipte_notify(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, pgste_t pgste) +{ +#ifdef CONFIG_PGSTE + if (pgste_val(pgste) & PGSTE_IN_BIT) { + pgste_val(pgste) &= ~PGSTE_IN_BIT; + ptep_ipte_notify(mm, addr, ptep); + } +#endif + return pgste; +} + +#ifdef CONFIG_PGSTE +/* + * Test and reset if a guest page is dirty + */ +bool pgste_test_and_clear_dirty(struct mm_struct *mm, unsigned long addr) +{ + spinlock_t *ptl; + pgste_t pgste; + pte_t *ptep; + pte_t pte; + bool dirty; + + ptep = get_locked_pte(mm, addr, &ptl); + if (unlikely(!ptep)) + return false; + + pgste = pgste_get_lock(ptep); + dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); + pgste_val(pgste) &= ~PGSTE_UC_BIT; + pte = *ptep; + if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { + pgste = pgste_ipte_notify(mm, addr, ptep, pgste); + __ptep_ipte(addr, ptep); + if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) + pte_val(pte) |= _PAGE_PROTECT; + else + pte_val(pte) |= _PAGE_INVALID; + *ptep = pte; + } + pgste_set_unlock(ptep, pgste); + + spin_unlock(ptl); + return dirty; +} +EXPORT_SYMBOL_GPL(pgste_test_and_clear_dirty); + +void set_pte_pgste_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t entry) +{ + pgste_t pgste; + + /* the mm_has_pgste() check is done in set_pte_at() */ + pgste = pgste_get_lock(ptep); + pgste_val(pgste) &= ~_PGSTE_GPS_ZERO; + pgste_set_key(ptep, pgste, entry, mm); + pgste = pgste_set_pte(ptep, pgste, entry); + pgste_set_unlock(ptep, pgste); +} +EXPORT_SYMBOL(set_pte_pgste_at); +#endif + +static inline pgste_t ptep_xchg_start(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + pgste_t pgste = __pgste(0); + + if (mm_has_pgste(mm)) { + pgste = pgste_get_lock(ptep); + pgste = pgste_ipte_notify(mm, addr, ptep, pgste); + } + return pgste; +} + +static inline void ptep_xchg_commit(struct mm_struct *mm, + unsigned long addr, pte_t *ptep, + pgste_t pgste, pte_t old, pte_t new) +{ + if (mm_has_pgste(mm)) { + if (pte_val(old) & _PAGE_INVALID) + pgste_set_key(ptep, pgste, new, mm); + if (pte_val(new) & _PAGE_INVALID) { + pgste = pgste_update_all(old, pgste, mm); + if ((pgste_val(pgste) & _PGSTE_GPS_USAGE_MASK) == + _PGSTE_GPS_USAGE_UNUSED) + pte_val(old) |= _PAGE_UNUSED; + } + pgste = pgste_set_pte(ptep, pgste, new); + pgste_set_unlock(ptep, pgste); + } else { + *ptep = new; + } +} + +pte_t ptep_xchg_direct(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t new) +{ + pgste_t pgste; + pte_t old; + + pgste = ptep_xchg_start(mm, addr, ptep); + old = ptep_flush_direct(mm, addr, ptep); + ptep_xchg_commit(mm, addr, ptep, pgste, old, new); + return old; +} +EXPORT_SYMBOL(ptep_xchg_direct); + +pte_t ptep_xchg_lazy(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t new) +{ + pgste_t pgste; + pte_t old; + + pgste = ptep_xchg_start(mm, addr, ptep); + old = ptep_flush_lazy(mm, addr, ptep); + ptep_xchg_commit(mm, addr, ptep, pgste, old, new); + return old; +} +EXPORT_SYMBOL(ptep_xchg_lazy); + +pte_t ptep_modify_prot_start(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + pgste_t pgste; + pte_t old; + + pgste = ptep_xchg_start(mm, addr, ptep); + old = ptep_flush_lazy(mm, addr, ptep); + if (mm_has_pgste(mm)) { + pgste = pgste_update_all(old, pgste, mm); + pgste_set(ptep, pgste); + } + return old; +} +EXPORT_SYMBOL(ptep_modify_prot_start); + +void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + pgste_t pgste; + + if (mm_has_pgste(mm)) { + pgste = pgste_get(ptep); + pgste_set_key(ptep, pgste, pte, mm); + pgste = pgste_set_pte(ptep, pgste, pte); + pgste_set_unlock(ptep, pgste); + } else { + *ptep = pte; + } +} +EXPORT_SYMBOL(ptep_modify_prot_commit); + /* * switch on pgstes for its userspace process (for kvm) */ @@ -1190,17 +1450,15 @@ static int __s390_enable_skey(pte_t *pte, unsigned long addr, unsigned long ptev; pgste_t pgste; - pgste = pgste_get_lock(pte); /* * Remove all zero page mappings, * after establishing a policy to forbid zero page mappings * following faults for that page will get fresh anonymous pages */ - if (is_zero_pfn(pte_pfn(*pte))) { - ptep_flush_direct(walk->mm, addr, pte); - pte_val(*pte) = _PAGE_INVALID; - } + if (is_zero_pfn(pte_pfn(*pte))) + ptep_xchg_direct(walk->mm, addr, pte, __pte(_PAGE_INVALID)); /* Clear storage key */ + pgste = pgste_get_lock(pte); pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT); ptev = pte_val(*pte); @@ -1266,27 +1524,6 @@ void s390_reset_cmma(struct mm_struct *mm) } EXPORT_SYMBOL_GPL(s390_reset_cmma); -/* - * Test and reset if a guest page is dirty - */ -bool gmap_test_and_clear_dirty(unsigned long address, struct gmap *gmap) -{ - pte_t *pte; - spinlock_t *ptl; - bool dirty = false; - - pte = get_locked_pte(gmap->mm, address, &ptl); - if (unlikely(!pte)) - return false; - - if (ptep_test_and_clear_user_dirty(gmap->mm, address, pte)) - dirty = true; - - spin_unlock(ptl); - return dirty; -} -EXPORT_SYMBOL_GPL(gmap_test_and_clear_dirty); - #ifdef CONFIG_TRANSPARENT_HUGEPAGE int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) -- cgit v1.1 From 227be799c39a28bf5d68187a4ea1b43190d96515 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 8 Mar 2016 11:09:25 +0100 Subject: s390/mm: uninline pmdp_xxx functions from pgtable.h The pmdp_xxx function are smaller than their ptep_xxx counterparts but to keep things symmetrical unline them as well. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 136 ++++++++++++++++------------------------ arch/s390/mm/hugetlbpage.c | 7 +-- arch/s390/mm/pgtable.c | 93 +++++++++++++++++++-------- 3 files changed, 124 insertions(+), 112 deletions(-) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index d102c4e..572001c 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -520,15 +520,6 @@ static inline int pmd_bad(pmd_t pmd) return (pmd_val(pmd) & ~_SEGMENT_ENTRY_BITS) != 0; } -#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS -extern int pmdp_set_access_flags(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp, - pmd_t entry, int dirty); - -#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH -extern int pmdp_clear_flush_young(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp); - #define __HAVE_ARCH_PMD_WRITE static inline int pmd_write(pmd_t pmd) { @@ -1203,54 +1194,51 @@ static inline void __pmdp_idte_local(unsigned long address, pmd_t *pmdp) : "cc" ); } -static inline void pmdp_flush_direct(struct mm_struct *mm, - unsigned long address, pmd_t *pmdp) -{ - int active, count; +pmd_t pmdp_xchg_direct(struct mm_struct *, unsigned long, pmd_t *, pmd_t); +pmd_t pmdp_xchg_lazy(struct mm_struct *, unsigned long, pmd_t *, pmd_t); - if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID) - return; - if (!MACHINE_HAS_IDTE) { - __pmdp_csp(pmdp); - return; - } - active = (mm == current->active_mm) ? 1 : 0; - count = atomic_add_return(0x10000, &mm->context.attach_count); - if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && - cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) - __pmdp_idte_local(address, pmdp); - else - __pmdp_idte(address, pmdp); - atomic_sub(0x10000, &mm->context.attach_count); -} +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + +#define __HAVE_ARCH_PGTABLE_DEPOSIT +void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable); + +#define __HAVE_ARCH_PGTABLE_WITHDRAW +pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); -static inline void pmdp_flush_lazy(struct mm_struct *mm, - unsigned long address, pmd_t *pmdp) +#define __HAVE_ARCH_PMDP_SET_ACCESS_FLAGS +static inline int pmdp_set_access_flags(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmdp, + pmd_t entry, int dirty) { - int active, count; + VM_BUG_ON(addr & ~HPAGE_MASK); - if (pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID) - return; - active = (mm == current->active_mm) ? 1 : 0; - count = atomic_add_return(0x10000, &mm->context.attach_count); - if ((count & 0xffff) <= active) { - pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; - mm->context.flush_mm = 1; - } else if (MACHINE_HAS_IDTE) - __pmdp_idte(address, pmdp); - else - __pmdp_csp(pmdp); - atomic_sub(0x10000, &mm->context.attach_count); + entry = pmd_mkyoung(entry); + if (dirty) + entry = pmd_mkdirty(entry); + if (pmd_val(*pmdp) == pmd_val(entry)) + return 0; + pmdp_xchg_direct(vma->vm_mm, addr, pmdp, entry); + return 1; } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG +static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmdp) +{ + pmd_t pmd = *pmdp; -#define __HAVE_ARCH_PGTABLE_DEPOSIT -extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, - pgtable_t pgtable); + pmd = pmdp_xchg_direct(vma->vm_mm, addr, pmdp, pmd_mkold(pmd)); + return pmd_young(pmd); +} -#define __HAVE_ARCH_PGTABLE_WITHDRAW -extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); +#define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH +static inline int pmdp_clear_flush_young(struct vm_area_struct *vma, + unsigned long addr, pmd_t *pmdp) +{ + VM_BUG_ON(addr & ~HPAGE_MASK); + return pmdp_test_and_clear_young(vma, addr, pmdp); +} static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t entry) @@ -1266,66 +1254,48 @@ static inline pmd_t pmd_mkhuge(pmd_t pmd) return pmd; } -#define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG -static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp) -{ - pmd_t pmd; - - pmd = *pmdp; - pmdp_flush_direct(vma->vm_mm, address, pmdp); - *pmdp = pmd_mkold(pmd); - return pmd_young(pmd); -} - #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, - unsigned long address, pmd_t *pmdp) + unsigned long addr, pmd_t *pmdp) { - pmd_t pmd = *pmdp; - - pmdp_flush_direct(mm, address, pmdp); - pmd_clear(pmdp); - return pmd; + return pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID)); } #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm, - unsigned long address, + unsigned long addr, pmd_t *pmdp, int full) { - pmd_t pmd = *pmdp; - - if (!full) - pmdp_flush_lazy(mm, address, pmdp); - pmd_clear(pmdp); - return pmd; + if (full) { + pmd_t pmd = *pmdp; + *pmdp = __pmd(_SEGMENT_ENTRY_INVALID); + return pmd; + } + return pmdp_xchg_lazy(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID)); } #define __HAVE_ARCH_PMDP_HUGE_CLEAR_FLUSH static inline pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp) + unsigned long addr, pmd_t *pmdp) { - return pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp); + return pmdp_huge_get_and_clear(vma->vm_mm, addr, pmdp); } #define __HAVE_ARCH_PMDP_INVALIDATE static inline void pmdp_invalidate(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp) + unsigned long addr, pmd_t *pmdp) { - pmdp_flush_direct(vma->vm_mm, address, pmdp); + pmdp_xchg_direct(vma->vm_mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID)); } #define __HAVE_ARCH_PMDP_SET_WRPROTECT static inline void pmdp_set_wrprotect(struct mm_struct *mm, - unsigned long address, pmd_t *pmdp) + unsigned long addr, pmd_t *pmdp) { pmd_t pmd = *pmdp; - if (pmd_write(pmd)) { - pmdp_flush_direct(mm, address, pmdp); - set_pmd_at(mm, address, pmdp, pmd_wrprotect(pmd)); - } + if (pmd_write(pmd)) + pmd = pmdp_xchg_lazy(mm, addr, pmdp, pmd_wrprotect(pmd)); } static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index f81096b..1b5e898 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -105,11 +105,10 @@ pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { pmd_t *pmdp = (pmd_t *) ptep; - pte_t pte = huge_ptep_get(ptep); + pmd_t old; - pmdp_flush_direct(mm, addr, pmdp); - pmd_val(*pmdp) = _SEGMENT_ENTRY_EMPTY; - return pte; + old = pmdp_xchg_direct(mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_EMPTY)); + return __pmd_to_pte(old); } pte_t *huge_pte_alloc(struct mm_struct *mm, diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 30033aa..e241262 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -1418,6 +1418,74 @@ void ptep_modify_prot_commit(struct mm_struct *mm, unsigned long addr, } EXPORT_SYMBOL(ptep_modify_prot_commit); +static inline pmd_t pmdp_flush_direct(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp) +{ + int active, count; + pmd_t old; + + old = *pmdp; + if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) + return old; + if (!MACHINE_HAS_IDTE) { + __pmdp_csp(pmdp); + return old; + } + active = (mm == current->active_mm) ? 1 : 0; + count = atomic_add_return(0x10000, &mm->context.attach_count); + if (MACHINE_HAS_TLB_LC && (count & 0xffff) <= active && + cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) + __pmdp_idte_local(addr, pmdp); + else + __pmdp_idte(addr, pmdp); + atomic_sub(0x10000, &mm->context.attach_count); + return old; +} + +static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, + unsigned long addr, pmd_t *pmdp) +{ + int active, count; + pmd_t old; + + old = *pmdp; + if (pmd_val(old) & _SEGMENT_ENTRY_INVALID) + return old; + active = (mm == current->active_mm) ? 1 : 0; + count = atomic_add_return(0x10000, &mm->context.attach_count); + if ((count & 0xffff) <= active) { + pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID; + mm->context.flush_mm = 1; + } else if (MACHINE_HAS_IDTE) + __pmdp_idte(addr, pmdp); + else + __pmdp_csp(pmdp); + atomic_sub(0x10000, &mm->context.attach_count); + return old; +} + +pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t new) +{ + pmd_t old; + + old = pmdp_flush_direct(mm, addr, pmdp); + *pmdp = new; + return old; +} +EXPORT_SYMBOL(pmdp_xchg_direct); + +pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr, + pmd_t *pmdp, pmd_t new) +{ + pmd_t old; + + old = pmdp_flush_lazy(mm, addr, pmdp); + *pmdp = new; + return old; +} +EXPORT_SYMBOL(pmdp_xchg_lazy); + /* * switch on pgstes for its userspace process (for kvm) */ @@ -1525,31 +1593,6 @@ void s390_reset_cmma(struct mm_struct *mm) EXPORT_SYMBOL_GPL(s390_reset_cmma); #ifdef CONFIG_TRANSPARENT_HUGEPAGE -int pmdp_clear_flush_young(struct vm_area_struct *vma, unsigned long address, - pmd_t *pmdp) -{ - VM_BUG_ON(address & ~HPAGE_PMD_MASK); - /* No need to flush TLB - * On s390 reference bits are in storage key and never in TLB */ - return pmdp_test_and_clear_young(vma, address, pmdp); -} - -int pmdp_set_access_flags(struct vm_area_struct *vma, - unsigned long address, pmd_t *pmdp, - pmd_t entry, int dirty) -{ - VM_BUG_ON(address & ~HPAGE_PMD_MASK); - - entry = pmd_mkyoung(entry); - if (dirty) - entry = pmd_mkdirty(entry); - if (pmd_same(*pmdp, entry)) - return 0; - pmdp_invalidate(vma, address, pmdp); - set_pmd_at(vma->vm_mm, address, pmdp, entry); - return 1; -} - void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, pgtable_t pgtable) { -- cgit v1.1 From 1e133ab296f3ff8d9e58a5e758291ed39ba72ad7 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 8 Mar 2016 11:49:57 +0100 Subject: s390/mm: split arch/s390/mm/pgtable.c The pgtable.c file is quite big, before it grows any larger split it into pgtable.c, pgalloc.c and gmap.c. In addition move the gmap related header definitions into the new gmap.h header and all of the pgste helpers from pgtable.h to pgtable.c. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/gmap.h | 64 ++ arch/s390/include/asm/pgalloc.h | 4 - arch/s390/include/asm/pgtable.h | 123 +--- arch/s390/kernel/asm-offsets.c | 1 + arch/s390/kvm/diag.c | 1 + arch/s390/kvm/interrupt.c | 1 + arch/s390/kvm/kvm-s390.c | 3 +- arch/s390/kvm/priv.c | 1 + arch/s390/mm/Makefile | 4 +- arch/s390/mm/fault.c | 1 + arch/s390/mm/gmap.c | 774 ++++++++++++++++++++ arch/s390/mm/pgalloc.c | 360 +++++++++ arch/s390/mm/pgtable.c | 1532 ++++++--------------------------------- 13 files changed, 1464 insertions(+), 1405 deletions(-) create mode 100644 arch/s390/include/asm/gmap.h create mode 100644 arch/s390/mm/gmap.c create mode 100644 arch/s390/mm/pgalloc.c diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h new file mode 100644 index 0000000..d054c1b --- /dev/null +++ b/arch/s390/include/asm/gmap.h @@ -0,0 +1,64 @@ +/* + * KVM guest address space mapping code + * + * Copyright IBM Corp. 2007, 2016 + * Author(s): Martin Schwidefsky + */ + +#ifndef _ASM_S390_GMAP_H +#define _ASM_S390_GMAP_H + +/** + * struct gmap_struct - guest address space + * @crst_list: list of all crst tables used in the guest address space + * @mm: pointer to the parent mm_struct + * @guest_to_host: radix tree with guest to host address translation + * @host_to_guest: radix tree with pointer to segment table entries + * @guest_table_lock: spinlock to protect all entries in the guest page table + * @table: pointer to the page directory + * @asce: address space control element for gmap page table + * @pfault_enabled: defines if pfaults are applicable for the guest + */ +struct gmap { + struct list_head list; + struct list_head crst_list; + struct mm_struct *mm; + struct radix_tree_root guest_to_host; + struct radix_tree_root host_to_guest; + spinlock_t guest_table_lock; + unsigned long *table; + unsigned long asce; + unsigned long asce_end; + void *private; + bool pfault_enabled; +}; + +/** + * struct gmap_notifier - notify function block for page invalidation + * @notifier_call: address of callback function + */ +struct gmap_notifier { + struct list_head list; + void (*notifier_call)(struct gmap *gmap, unsigned long gaddr); +}; + +struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit); +void gmap_free(struct gmap *gmap); +void gmap_enable(struct gmap *gmap); +void gmap_disable(struct gmap *gmap); +int gmap_map_segment(struct gmap *gmap, unsigned long from, + unsigned long to, unsigned long len); +int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); +unsigned long __gmap_translate(struct gmap *, unsigned long gaddr); +unsigned long gmap_translate(struct gmap *, unsigned long gaddr); +int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr); +int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags); +void gmap_discard(struct gmap *, unsigned long from, unsigned long to); +void __gmap_zap(struct gmap *, unsigned long gaddr); +void gmap_unlink(struct mm_struct *, unsigned long *table, unsigned long vmaddr); + +void gmap_register_ipte_notifier(struct gmap_notifier *); +void gmap_unregister_ipte_notifier(struct gmap_notifier *); +int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len); + +#endif /* _ASM_S390_GMAP_H */ diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index 7b7858f..9248719 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -23,10 +23,6 @@ void page_table_free(struct mm_struct *, unsigned long *); void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long); extern int page_table_allocate_pgste; -int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, - unsigned long key, bool nq); -unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr); - static inline void clear_table(unsigned long *s, unsigned long val, size_t n) { typedef struct { char _[n]; } addrtype; diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 572001c..2f66645 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -622,111 +622,6 @@ static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd) return pmd; } -static inline pgste_t pgste_get_lock(pte_t *ptep) -{ - unsigned long new = 0; -#ifdef CONFIG_PGSTE - unsigned long old; - - preempt_disable(); - asm( - " lg %0,%2\n" - "0: lgr %1,%0\n" - " nihh %0,0xff7f\n" /* clear PCL bit in old */ - " oihh %1,0x0080\n" /* set PCL bit in new */ - " csg %0,%1,%2\n" - " jl 0b\n" - : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE]) - : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory"); -#endif - return __pgste(new); -} - -static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) -{ -#ifdef CONFIG_PGSTE - asm( - " nihh %1,0xff7f\n" /* clear PCL bit */ - " stg %1,%0\n" - : "=Q" (ptep[PTRS_PER_PTE]) - : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) - : "cc", "memory"); - preempt_enable(); -#endif -} - -static inline pgste_t pgste_get(pte_t *ptep) -{ - unsigned long pgste = 0; -#ifdef CONFIG_PGSTE - pgste = *(unsigned long *)(ptep + PTRS_PER_PTE); -#endif - return __pgste(pgste); -} - -static inline void pgste_set(pte_t *ptep, pgste_t pgste) -{ -#ifdef CONFIG_PGSTE - *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste; -#endif -} - -bool pgste_test_and_clear_dirty(struct mm_struct *, unsigned long address); -void ptep_ipte_notify(struct mm_struct *, unsigned long addr, pte_t *); - -/** - * struct gmap_struct - guest address space - * @crst_list: list of all crst tables used in the guest address space - * @mm: pointer to the parent mm_struct - * @guest_to_host: radix tree with guest to host address translation - * @host_to_guest: radix tree with pointer to segment table entries - * @guest_table_lock: spinlock to protect all entries in the guest page table - * @table: pointer to the page directory - * @asce: address space control element for gmap page table - * @pfault_enabled: defines if pfaults are applicable for the guest - */ -struct gmap { - struct list_head list; - struct list_head crst_list; - struct mm_struct *mm; - struct radix_tree_root guest_to_host; - struct radix_tree_root host_to_guest; - spinlock_t guest_table_lock; - unsigned long *table; - unsigned long asce; - unsigned long asce_end; - void *private; - bool pfault_enabled; -}; - -/** - * struct gmap_notifier - notify function block for page invalidation - * @notifier_call: address of callback function - */ -struct gmap_notifier { - struct list_head list; - void (*notifier_call)(struct gmap *gmap, unsigned long gaddr); -}; - -struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit); -void gmap_free(struct gmap *gmap); -void gmap_enable(struct gmap *gmap); -void gmap_disable(struct gmap *gmap); -int gmap_map_segment(struct gmap *gmap, unsigned long from, - unsigned long to, unsigned long len); -int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len); -unsigned long __gmap_translate(struct gmap *, unsigned long gaddr); -unsigned long gmap_translate(struct gmap *, unsigned long gaddr); -int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr); -int gmap_fault(struct gmap *, unsigned long gaddr, unsigned int fault_flags); -void gmap_discard(struct gmap *, unsigned long from, unsigned long to); -void __gmap_zap(struct gmap *, unsigned long gaddr); - - -void gmap_register_ipte_notifier(struct gmap_notifier *); -void gmap_unregister_ipte_notifier(struct gmap_notifier *); -int gmap_ipte_notify(struct gmap *, unsigned long start, unsigned long len); - /* * query functions pte_write/pte_dirty/pte_young only work if * pte_present() is true. Undefined behaviour if not.. @@ -984,7 +879,21 @@ static inline int ptep_set_access_flags(struct vm_area_struct *vma, return 1; } -void set_pte_pgste_at(struct mm_struct *, unsigned long, pte_t *, pte_t); +/* + * Additional functions to handle KVM guest page tables + */ +void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t entry); +void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep); +void ptep_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep); +void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, + pte_t *ptep , int reset); +void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep); + +bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long address); +int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, + unsigned char key, bool nq); +unsigned char get_guest_storage_key(struct mm_struct *mm, unsigned long addr); /* * Certain architectures need to do special things when PTEs @@ -995,7 +904,7 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t entry) { if (mm_has_pgste(mm)) - set_pte_pgste_at(mm, addr, ptep, entry); + ptep_set_pte_at(mm, addr, ptep, entry); else *ptep = entry; } diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 53bbc9e..1f95cc1 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -12,6 +12,7 @@ #include #include #include +#include /* * Make sure that the compiler is new enough. We want a compiler that diff --git a/arch/s390/kvm/diag.c b/arch/s390/kvm/diag.c index 05f7de9..1ea4095 100644 --- a/arch/s390/kvm/diag.c +++ b/arch/s390/kvm/diag.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include "kvm-s390.h" #include "trace.h" diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index f88ca72..e5e8739 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "kvm-s390.h" #include "gaccess.h" #include "trace-s390.h" diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 616e0a1..be1f028 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -280,7 +281,7 @@ static void kvm_s390_sync_dirty_log(struct kvm *kvm, for (cur_gfn = memslot->base_gfn; cur_gfn <= last_gfn; cur_gfn++) { address = gfn_to_hva_memslot(memslot, cur_gfn); - if (pgste_test_and_clear_dirty(gmap->mm, address)) + if (test_and_clear_guest_dirty(gmap->mm, address)) mark_page_dirty(kvm, cur_gfn); } up_read(&gmap->mm->mmap_sem); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index ed74e86..b632d8d 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index 839592c..2ae54ca 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -2,9 +2,11 @@ # Makefile for the linux s390-specific parts of the memory manager. # -obj-y := init.o fault.o extmem.o mmap.o vmem.o pgtable.o maccess.o +obj-y := init.o fault.o extmem.o mmap.o vmem.o maccess.o obj-y += page-states.o gup.o extable.o pageattr.o mem_detect.o +obj-y += pgtable.o pgalloc.o obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_S390_PTDUMP) += dump_pagetables.o +obj-$(CONFIG_PGSTE) += gmap.o diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 64b3ad1..cce577f 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c new file mode 100644 index 0000000..69247b4 --- /dev/null +++ b/arch/s390/mm/gmap.c @@ -0,0 +1,774 @@ +/* + * KVM guest address space mapping code + * + * Copyright IBM Corp. 2007, 2016 + * Author(s): Martin Schwidefsky + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/** + * gmap_alloc - allocate a guest address space + * @mm: pointer to the parent mm_struct + * @limit: maximum size of the gmap address space + * + * Returns a guest address space structure. + */ +struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit) +{ + struct gmap *gmap; + struct page *page; + unsigned long *table; + unsigned long etype, atype; + + if (limit < (1UL << 31)) { + limit = (1UL << 31) - 1; + atype = _ASCE_TYPE_SEGMENT; + etype = _SEGMENT_ENTRY_EMPTY; + } else if (limit < (1UL << 42)) { + limit = (1UL << 42) - 1; + atype = _ASCE_TYPE_REGION3; + etype = _REGION3_ENTRY_EMPTY; + } else if (limit < (1UL << 53)) { + limit = (1UL << 53) - 1; + atype = _ASCE_TYPE_REGION2; + etype = _REGION2_ENTRY_EMPTY; + } else { + limit = -1UL; + atype = _ASCE_TYPE_REGION1; + etype = _REGION1_ENTRY_EMPTY; + } + gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL); + if (!gmap) + goto out; + INIT_LIST_HEAD(&gmap->crst_list); + INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL); + INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC); + spin_lock_init(&gmap->guest_table_lock); + gmap->mm = mm; + page = alloc_pages(GFP_KERNEL, 2); + if (!page) + goto out_free; + page->index = 0; + list_add(&page->lru, &gmap->crst_list); + table = (unsigned long *) page_to_phys(page); + crst_table_init(table, etype); + gmap->table = table; + gmap->asce = atype | _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | __pa(table); + gmap->asce_end = limit; + down_write(&mm->mmap_sem); + list_add(&gmap->list, &mm->context.gmap_list); + up_write(&mm->mmap_sem); + return gmap; + +out_free: + kfree(gmap); +out: + return NULL; +} +EXPORT_SYMBOL_GPL(gmap_alloc); + +static void gmap_flush_tlb(struct gmap *gmap) +{ + if (MACHINE_HAS_IDTE) + __tlb_flush_asce(gmap->mm, gmap->asce); + else + __tlb_flush_global(); +} + +static void gmap_radix_tree_free(struct radix_tree_root *root) +{ + struct radix_tree_iter iter; + unsigned long indices[16]; + unsigned long index; + void **slot; + int i, nr; + + /* A radix tree is freed by deleting all of its entries */ + index = 0; + do { + nr = 0; + radix_tree_for_each_slot(slot, root, &iter, index) { + indices[nr] = iter.index; + if (++nr == 16) + break; + } + for (i = 0; i < nr; i++) { + index = indices[i]; + radix_tree_delete(root, index); + } + } while (nr > 0); +} + +/** + * gmap_free - free a guest address space + * @gmap: pointer to the guest address space structure + */ +void gmap_free(struct gmap *gmap) +{ + struct page *page, *next; + + /* Flush tlb. */ + if (MACHINE_HAS_IDTE) + __tlb_flush_asce(gmap->mm, gmap->asce); + else + __tlb_flush_global(); + + /* Free all segment & region tables. */ + list_for_each_entry_safe(page, next, &gmap->crst_list, lru) + __free_pages(page, 2); + gmap_radix_tree_free(&gmap->guest_to_host); + gmap_radix_tree_free(&gmap->host_to_guest); + down_write(&gmap->mm->mmap_sem); + list_del(&gmap->list); + up_write(&gmap->mm->mmap_sem); + kfree(gmap); +} +EXPORT_SYMBOL_GPL(gmap_free); + +/** + * gmap_enable - switch primary space to the guest address space + * @gmap: pointer to the guest address space structure + */ +void gmap_enable(struct gmap *gmap) +{ + S390_lowcore.gmap = (unsigned long) gmap; +} +EXPORT_SYMBOL_GPL(gmap_enable); + +/** + * gmap_disable - switch back to the standard primary address space + * @gmap: pointer to the guest address space structure + */ +void gmap_disable(struct gmap *gmap) +{ + S390_lowcore.gmap = 0UL; +} +EXPORT_SYMBOL_GPL(gmap_disable); + +/* + * gmap_alloc_table is assumed to be called with mmap_sem held + */ +static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, + unsigned long init, unsigned long gaddr) +{ + struct page *page; + unsigned long *new; + + /* since we dont free the gmap table until gmap_free we can unlock */ + page = alloc_pages(GFP_KERNEL, 2); + if (!page) + return -ENOMEM; + new = (unsigned long *) page_to_phys(page); + crst_table_init(new, init); + spin_lock(&gmap->mm->page_table_lock); + if (*table & _REGION_ENTRY_INVALID) { + list_add(&page->lru, &gmap->crst_list); + *table = (unsigned long) new | _REGION_ENTRY_LENGTH | + (*table & _REGION_ENTRY_TYPE_MASK); + page->index = gaddr; + page = NULL; + } + spin_unlock(&gmap->mm->page_table_lock); + if (page) + __free_pages(page, 2); + return 0; +} + +/** + * __gmap_segment_gaddr - find virtual address from segment pointer + * @entry: pointer to a segment table entry in the guest address space + * + * Returns the virtual address in the guest address space for the segment + */ +static unsigned long __gmap_segment_gaddr(unsigned long *entry) +{ + struct page *page; + unsigned long offset, mask; + + offset = (unsigned long) entry / sizeof(unsigned long); + offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE; + mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1); + page = virt_to_page((void *)((unsigned long) entry & mask)); + return page->index + offset; +} + +/** + * __gmap_unlink_by_vmaddr - unlink a single segment via a host address + * @gmap: pointer to the guest address space structure + * @vmaddr: address in the host process address space + * + * Returns 1 if a TLB flush is required + */ +static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr) +{ + unsigned long *entry; + int flush = 0; + + spin_lock(&gmap->guest_table_lock); + entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); + if (entry) { + flush = (*entry != _SEGMENT_ENTRY_INVALID); + *entry = _SEGMENT_ENTRY_INVALID; + } + spin_unlock(&gmap->guest_table_lock); + return flush; +} + +/** + * __gmap_unmap_by_gaddr - unmap a single segment via a guest address + * @gmap: pointer to the guest address space structure + * @gaddr: address in the guest address space + * + * Returns 1 if a TLB flush is required + */ +static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr) +{ + unsigned long vmaddr; + + vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host, + gaddr >> PMD_SHIFT); + return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0; +} + +/** + * gmap_unmap_segment - unmap segment from the guest address space + * @gmap: pointer to the guest address space structure + * @to: address in the guest address space + * @len: length of the memory area to unmap + * + * Returns 0 if the unmap succeeded, -EINVAL if not. + */ +int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) +{ + unsigned long off; + int flush; + + if ((to | len) & (PMD_SIZE - 1)) + return -EINVAL; + if (len == 0 || to + len < to) + return -EINVAL; + + flush = 0; + down_write(&gmap->mm->mmap_sem); + for (off = 0; off < len; off += PMD_SIZE) + flush |= __gmap_unmap_by_gaddr(gmap, to + off); + up_write(&gmap->mm->mmap_sem); + if (flush) + gmap_flush_tlb(gmap); + return 0; +} +EXPORT_SYMBOL_GPL(gmap_unmap_segment); + +/** + * gmap_map_segment - map a segment to the guest address space + * @gmap: pointer to the guest address space structure + * @from: source address in the parent address space + * @to: target address in the guest address space + * @len: length of the memory area to map + * + * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not. + */ +int gmap_map_segment(struct gmap *gmap, unsigned long from, + unsigned long to, unsigned long len) +{ + unsigned long off; + int flush; + + if ((from | to | len) & (PMD_SIZE - 1)) + return -EINVAL; + if (len == 0 || from + len < from || to + len < to || + from + len > TASK_MAX_SIZE || to + len > gmap->asce_end) + return -EINVAL; + + flush = 0; + down_write(&gmap->mm->mmap_sem); + for (off = 0; off < len; off += PMD_SIZE) { + /* Remove old translation */ + flush |= __gmap_unmap_by_gaddr(gmap, to + off); + /* Store new translation */ + if (radix_tree_insert(&gmap->guest_to_host, + (to + off) >> PMD_SHIFT, + (void *) from + off)) + break; + } + up_write(&gmap->mm->mmap_sem); + if (flush) + gmap_flush_tlb(gmap); + if (off >= len) + return 0; + gmap_unmap_segment(gmap, to, len); + return -ENOMEM; +} +EXPORT_SYMBOL_GPL(gmap_map_segment); + +/** + * __gmap_translate - translate a guest address to a user space address + * @gmap: pointer to guest mapping meta data structure + * @gaddr: guest address + * + * Returns user space address which corresponds to the guest address or + * -EFAULT if no such mapping exists. + * This function does not establish potentially missing page table entries. + * The mmap_sem of the mm that belongs to the address space must be held + * when this function gets called. + */ +unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) +{ + unsigned long vmaddr; + + vmaddr = (unsigned long) + radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT); + return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT; +} +EXPORT_SYMBOL_GPL(__gmap_translate); + +/** + * gmap_translate - translate a guest address to a user space address + * @gmap: pointer to guest mapping meta data structure + * @gaddr: guest address + * + * Returns user space address which corresponds to the guest address or + * -EFAULT if no such mapping exists. + * This function does not establish potentially missing page table entries. + */ +unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr) +{ + unsigned long rc; + + down_read(&gmap->mm->mmap_sem); + rc = __gmap_translate(gmap, gaddr); + up_read(&gmap->mm->mmap_sem); + return rc; +} +EXPORT_SYMBOL_GPL(gmap_translate); + +/** + * gmap_unlink - disconnect a page table from the gmap shadow tables + * @gmap: pointer to guest mapping meta data structure + * @table: pointer to the host page table + * @vmaddr: vm address associated with the host page table + */ +void gmap_unlink(struct mm_struct *mm, unsigned long *table, + unsigned long vmaddr) +{ + struct gmap *gmap; + int flush; + + list_for_each_entry(gmap, &mm->context.gmap_list, list) { + flush = __gmap_unlink_by_vmaddr(gmap, vmaddr); + if (flush) + gmap_flush_tlb(gmap); + } +} + +/** + * gmap_link - set up shadow page tables to connect a host to a guest address + * @gmap: pointer to guest mapping meta data structure + * @gaddr: guest address + * @vmaddr: vm address + * + * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT + * if the vm address is already mapped to a different guest segment. + * The mmap_sem of the mm that belongs to the address space must be held + * when this function gets called. + */ +int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) +{ + struct mm_struct *mm; + unsigned long *table; + spinlock_t *ptl; + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + int rc; + + /* Create higher level tables in the gmap page table */ + table = gmap->table; + if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) { + table += (gaddr >> 53) & 0x7ff; + if ((*table & _REGION_ENTRY_INVALID) && + gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY, + gaddr & 0xffe0000000000000UL)) + return -ENOMEM; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + } + if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) { + table += (gaddr >> 42) & 0x7ff; + if ((*table & _REGION_ENTRY_INVALID) && + gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY, + gaddr & 0xfffffc0000000000UL)) + return -ENOMEM; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + } + if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) { + table += (gaddr >> 31) & 0x7ff; + if ((*table & _REGION_ENTRY_INVALID) && + gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY, + gaddr & 0xffffffff80000000UL)) + return -ENOMEM; + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + } + table += (gaddr >> 20) & 0x7ff; + /* Walk the parent mm page table */ + mm = gmap->mm; + pgd = pgd_offset(mm, vmaddr); + VM_BUG_ON(pgd_none(*pgd)); + pud = pud_offset(pgd, vmaddr); + VM_BUG_ON(pud_none(*pud)); + pmd = pmd_offset(pud, vmaddr); + VM_BUG_ON(pmd_none(*pmd)); + /* large pmds cannot yet be handled */ + if (pmd_large(*pmd)) + return -EFAULT; + /* Link gmap segment table entry location to page table. */ + rc = radix_tree_preload(GFP_KERNEL); + if (rc) + return rc; + ptl = pmd_lock(mm, pmd); + spin_lock(&gmap->guest_table_lock); + if (*table == _SEGMENT_ENTRY_INVALID) { + rc = radix_tree_insert(&gmap->host_to_guest, + vmaddr >> PMD_SHIFT, table); + if (!rc) + *table = pmd_val(*pmd); + } else + rc = 0; + spin_unlock(&gmap->guest_table_lock); + spin_unlock(ptl); + radix_tree_preload_end(); + return rc; +} + +/** + * gmap_fault - resolve a fault on a guest address + * @gmap: pointer to guest mapping meta data structure + * @gaddr: guest address + * @fault_flags: flags to pass down to handle_mm_fault() + * + * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT + * if the vm address is already mapped to a different guest segment. + */ +int gmap_fault(struct gmap *gmap, unsigned long gaddr, + unsigned int fault_flags) +{ + unsigned long vmaddr; + int rc; + bool unlocked; + + down_read(&gmap->mm->mmap_sem); + +retry: + unlocked = false; + vmaddr = __gmap_translate(gmap, gaddr); + if (IS_ERR_VALUE(vmaddr)) { + rc = vmaddr; + goto out_up; + } + if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags, + &unlocked)) { + rc = -EFAULT; + goto out_up; + } + /* + * In the case that fixup_user_fault unlocked the mmap_sem during + * faultin redo __gmap_translate to not race with a map/unmap_segment. + */ + if (unlocked) + goto retry; + + rc = __gmap_link(gmap, gaddr, vmaddr); +out_up: + up_read(&gmap->mm->mmap_sem); + return rc; +} +EXPORT_SYMBOL_GPL(gmap_fault); + +/* + * this function is assumed to be called with mmap_sem held + */ +void __gmap_zap(struct gmap *gmap, unsigned long gaddr) +{ + unsigned long vmaddr; + spinlock_t *ptl; + pte_t *ptep; + + /* Find the vm address for the guest address */ + vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host, + gaddr >> PMD_SHIFT); + if (vmaddr) { + vmaddr |= gaddr & ~PMD_MASK; + /* Get pointer to the page table entry */ + ptep = get_locked_pte(gmap->mm, vmaddr, &ptl); + if (likely(ptep)) + ptep_zap_unused(gmap->mm, vmaddr, ptep, 0); + pte_unmap_unlock(ptep, ptl); + } +} +EXPORT_SYMBOL_GPL(__gmap_zap); + +void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to) +{ + unsigned long gaddr, vmaddr, size; + struct vm_area_struct *vma; + + down_read(&gmap->mm->mmap_sem); + for (gaddr = from; gaddr < to; + gaddr = (gaddr + PMD_SIZE) & PMD_MASK) { + /* Find the vm address for the guest address */ + vmaddr = (unsigned long) + radix_tree_lookup(&gmap->guest_to_host, + gaddr >> PMD_SHIFT); + if (!vmaddr) + continue; + vmaddr |= gaddr & ~PMD_MASK; + /* Find vma in the parent mm */ + vma = find_vma(gmap->mm, vmaddr); + size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK)); + zap_page_range(vma, vmaddr, size, NULL); + } + up_read(&gmap->mm->mmap_sem); +} +EXPORT_SYMBOL_GPL(gmap_discard); + +static LIST_HEAD(gmap_notifier_list); +static DEFINE_SPINLOCK(gmap_notifier_lock); + +/** + * gmap_register_ipte_notifier - register a pte invalidation callback + * @nb: pointer to the gmap notifier block + */ +void gmap_register_ipte_notifier(struct gmap_notifier *nb) +{ + spin_lock(&gmap_notifier_lock); + list_add(&nb->list, &gmap_notifier_list); + spin_unlock(&gmap_notifier_lock); +} +EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier); + +/** + * gmap_unregister_ipte_notifier - remove a pte invalidation callback + * @nb: pointer to the gmap notifier block + */ +void gmap_unregister_ipte_notifier(struct gmap_notifier *nb) +{ + spin_lock(&gmap_notifier_lock); + list_del_init(&nb->list); + spin_unlock(&gmap_notifier_lock); +} +EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier); + +/** + * gmap_ipte_notify - mark a range of ptes for invalidation notification + * @gmap: pointer to guest mapping meta data structure + * @gaddr: virtual address in the guest address space + * @len: size of area + * + * Returns 0 if for each page in the given range a gmap mapping exists and + * the invalidation notification could be set. If the gmap mapping is missing + * for one or more pages -EFAULT is returned. If no memory could be allocated + * -ENOMEM is returned. This function establishes missing page table entries. + */ +int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len) +{ + unsigned long addr; + spinlock_t *ptl; + pte_t *ptep; + bool unlocked; + int rc = 0; + + if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK)) + return -EINVAL; + down_read(&gmap->mm->mmap_sem); + while (len) { + unlocked = false; + /* Convert gmap address and connect the page tables */ + addr = __gmap_translate(gmap, gaddr); + if (IS_ERR_VALUE(addr)) { + rc = addr; + break; + } + /* Get the page mapped */ + if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE, + &unlocked)) { + rc = -EFAULT; + break; + } + /* While trying to map mmap_sem got unlocked. Let us retry */ + if (unlocked) + continue; + rc = __gmap_link(gmap, gaddr, addr); + if (rc) + break; + /* Walk the process page table, lock and get pte pointer */ + ptep = get_locked_pte(gmap->mm, addr, &ptl); + VM_BUG_ON(!ptep); + /* Set notification bit in the pgste of the pte */ + if ((pte_val(*ptep) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) { + ptep_set_notify(gmap->mm, addr, ptep); + gaddr += PAGE_SIZE; + len -= PAGE_SIZE; + } + pte_unmap_unlock(ptep, ptl); + } + up_read(&gmap->mm->mmap_sem); + return rc; +} +EXPORT_SYMBOL_GPL(gmap_ipte_notify); + +/** + * ptep_notify - call all invalidation callbacks for a specific pte. + * @mm: pointer to the process mm_struct + * @addr: virtual address in the process address space + * @pte: pointer to the page table entry + * + * This function is assumed to be called with the page table lock held + * for the pte to notify. + */ +void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte) +{ + unsigned long offset, gaddr; + unsigned long *table; + struct gmap_notifier *nb; + struct gmap *gmap; + + offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); + offset = offset * (4096 / sizeof(pte_t)); + spin_lock(&gmap_notifier_lock); + list_for_each_entry(gmap, &mm->context.gmap_list, list) { + table = radix_tree_lookup(&gmap->host_to_guest, + vmaddr >> PMD_SHIFT); + if (!table) + continue; + gaddr = __gmap_segment_gaddr(table) + offset; + list_for_each_entry(nb, &gmap_notifier_list, list) + nb->notifier_call(gmap, gaddr); + } + spin_unlock(&gmap_notifier_lock); +} +EXPORT_SYMBOL_GPL(ptep_notify); + +static inline void thp_split_mm(struct mm_struct *mm) +{ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + struct vm_area_struct *vma; + unsigned long addr; + + for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { + for (addr = vma->vm_start; + addr < vma->vm_end; + addr += PAGE_SIZE) + follow_page(vma, addr, FOLL_SPLIT); + vma->vm_flags &= ~VM_HUGEPAGE; + vma->vm_flags |= VM_NOHUGEPAGE; + } + mm->def_flags |= VM_NOHUGEPAGE; +#endif +} + +/* + * switch on pgstes for its userspace process (for kvm) + */ +int s390_enable_sie(void) +{ + struct mm_struct *mm = current->mm; + + /* Do we have pgstes? if yes, we are done */ + if (mm_has_pgste(mm)) + return 0; + /* Fail if the page tables are 2K */ + if (!mm_alloc_pgste(mm)) + return -EINVAL; + down_write(&mm->mmap_sem); + mm->context.has_pgste = 1; + /* split thp mappings and disable thp for future mappings */ + thp_split_mm(mm); + up_write(&mm->mmap_sem); + return 0; +} +EXPORT_SYMBOL_GPL(s390_enable_sie); + +/* + * Enable storage key handling from now on and initialize the storage + * keys with the default key. + */ +static int __s390_enable_skey(pte_t *pte, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + /* + * Remove all zero page mappings, + * after establishing a policy to forbid zero page mappings + * following faults for that page will get fresh anonymous pages + */ + if (is_zero_pfn(pte_pfn(*pte))) + ptep_xchg_direct(walk->mm, addr, pte, __pte(_PAGE_INVALID)); + /* Clear storage key */ + ptep_zap_key(walk->mm, addr, pte); + return 0; +} + +int s390_enable_skey(void) +{ + struct mm_walk walk = { .pte_entry = __s390_enable_skey }; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + int rc = 0; + + down_write(&mm->mmap_sem); + if (mm_use_skey(mm)) + goto out_up; + + mm->context.use_skey = 1; + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (ksm_madvise(vma, vma->vm_start, vma->vm_end, + MADV_UNMERGEABLE, &vma->vm_flags)) { + mm->context.use_skey = 0; + rc = -ENOMEM; + goto out_up; + } + } + mm->def_flags &= ~VM_MERGEABLE; + + walk.mm = mm; + walk_page_range(0, TASK_SIZE, &walk); + +out_up: + up_write(&mm->mmap_sem); + return rc; +} +EXPORT_SYMBOL_GPL(s390_enable_skey); + +/* + * Reset CMMA state, make all pages stable again. + */ +static int __s390_reset_cmma(pte_t *pte, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + ptep_zap_unused(walk->mm, addr, pte, 1); + return 0; +} + +void s390_reset_cmma(struct mm_struct *mm) +{ + struct mm_walk walk = { .pte_entry = __s390_reset_cmma }; + + down_write(&mm->mmap_sem); + walk.mm = mm; + walk_page_range(0, TASK_SIZE, &walk); + up_write(&mm->mmap_sem); +} +EXPORT_SYMBOL_GPL(s390_reset_cmma); diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c new file mode 100644 index 0000000..f6c3de2 --- /dev/null +++ b/arch/s390/mm/pgalloc.c @@ -0,0 +1,360 @@ +/* + * Page table allocation functions + * + * Copyright IBM Corp. 2016 + * Author(s): Martin Schwidefsky + */ + +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_PGSTE + +static int page_table_allocate_pgste_min = 0; +static int page_table_allocate_pgste_max = 1; +int page_table_allocate_pgste = 0; +EXPORT_SYMBOL(page_table_allocate_pgste); + +static struct ctl_table page_table_sysctl[] = { + { + .procname = "allocate_pgste", + .data = &page_table_allocate_pgste, + .maxlen = sizeof(int), + .mode = S_IRUGO | S_IWUSR, + .proc_handler = proc_dointvec, + .extra1 = &page_table_allocate_pgste_min, + .extra2 = &page_table_allocate_pgste_max, + }, + { } +}; + +static struct ctl_table page_table_sysctl_dir[] = { + { + .procname = "vm", + .maxlen = 0, + .mode = 0555, + .child = page_table_sysctl, + }, + { } +}; + +static int __init page_table_register_sysctl(void) +{ + return register_sysctl_table(page_table_sysctl_dir) ? 0 : -ENOMEM; +} +__initcall(page_table_register_sysctl); + +#endif /* CONFIG_PGSTE */ + +unsigned long *crst_table_alloc(struct mm_struct *mm) +{ + struct page *page = alloc_pages(GFP_KERNEL, 2); + + if (!page) + return NULL; + return (unsigned long *) page_to_phys(page); +} + +void crst_table_free(struct mm_struct *mm, unsigned long *table) +{ + free_pages((unsigned long) table, 2); +} + +static void __crst_table_upgrade(void *arg) +{ + struct mm_struct *mm = arg; + + if (current->active_mm == mm) { + clear_user_asce(); + set_user_asce(mm); + } + __tlb_flush_local(); +} + +int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) +{ + unsigned long *table, *pgd; + unsigned long entry; + int flush; + + BUG_ON(limit > TASK_MAX_SIZE); + flush = 0; +repeat: + table = crst_table_alloc(mm); + if (!table) + return -ENOMEM; + spin_lock_bh(&mm->page_table_lock); + if (mm->context.asce_limit < limit) { + pgd = (unsigned long *) mm->pgd; + if (mm->context.asce_limit <= (1UL << 31)) { + entry = _REGION3_ENTRY_EMPTY; + mm->context.asce_limit = 1UL << 42; + mm->context.asce_bits = _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | + _ASCE_TYPE_REGION3; + } else { + entry = _REGION2_ENTRY_EMPTY; + mm->context.asce_limit = 1UL << 53; + mm->context.asce_bits = _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | + _ASCE_TYPE_REGION2; + } + crst_table_init(table, entry); + pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd); + mm->pgd = (pgd_t *) table; + mm->task_size = mm->context.asce_limit; + table = NULL; + flush = 1; + } + spin_unlock_bh(&mm->page_table_lock); + if (table) + crst_table_free(mm, table); + if (mm->context.asce_limit < limit) + goto repeat; + if (flush) + on_each_cpu(__crst_table_upgrade, mm, 0); + return 0; +} + +void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) +{ + pgd_t *pgd; + + if (current->active_mm == mm) { + clear_user_asce(); + __tlb_flush_mm(mm); + } + while (mm->context.asce_limit > limit) { + pgd = mm->pgd; + switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) { + case _REGION_ENTRY_TYPE_R2: + mm->context.asce_limit = 1UL << 42; + mm->context.asce_bits = _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | + _ASCE_TYPE_REGION3; + break; + case _REGION_ENTRY_TYPE_R3: + mm->context.asce_limit = 1UL << 31; + mm->context.asce_bits = _ASCE_TABLE_LENGTH | + _ASCE_USER_BITS | + _ASCE_TYPE_SEGMENT; + break; + default: + BUG(); + } + mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); + mm->task_size = mm->context.asce_limit; + crst_table_free(mm, (unsigned long *) pgd); + } + if (current->active_mm == mm) + set_user_asce(mm); +} + +static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) +{ + unsigned int old, new; + + do { + old = atomic_read(v); + new = old ^ bits; + } while (atomic_cmpxchg(v, old, new) != old); + return new; +} + +/* + * page table entry allocation/free routines. + */ +unsigned long *page_table_alloc(struct mm_struct *mm) +{ + unsigned long *table; + struct page *page; + unsigned int mask, bit; + + /* Try to get a fragment of a 4K page as a 2K page table */ + if (!mm_alloc_pgste(mm)) { + table = NULL; + spin_lock_bh(&mm->context.list_lock); + if (!list_empty(&mm->context.pgtable_list)) { + page = list_first_entry(&mm->context.pgtable_list, + struct page, lru); + mask = atomic_read(&page->_mapcount); + mask = (mask | (mask >> 4)) & 3; + if (mask != 3) { + table = (unsigned long *) page_to_phys(page); + bit = mask & 1; /* =1 -> second 2K */ + if (bit) + table += PTRS_PER_PTE; + atomic_xor_bits(&page->_mapcount, 1U << bit); + list_del(&page->lru); + } + } + spin_unlock_bh(&mm->context.list_lock); + if (table) + return table; + } + /* Allocate a fresh page */ + page = alloc_page(GFP_KERNEL|__GFP_REPEAT); + if (!page) + return NULL; + if (!pgtable_page_ctor(page)) { + __free_page(page); + return NULL; + } + /* Initialize page table */ + table = (unsigned long *) page_to_phys(page); + if (mm_alloc_pgste(mm)) { + /* Return 4K page table with PGSTEs */ + atomic_set(&page->_mapcount, 3); + clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); + clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); + } else { + /* Return the first 2K fragment of the page */ + atomic_set(&page->_mapcount, 1); + clear_table(table, _PAGE_INVALID, PAGE_SIZE); + spin_lock_bh(&mm->context.list_lock); + list_add(&page->lru, &mm->context.pgtable_list); + spin_unlock_bh(&mm->context.list_lock); + } + return table; +} + +void page_table_free(struct mm_struct *mm, unsigned long *table) +{ + struct page *page; + unsigned int bit, mask; + + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + if (!mm_alloc_pgste(mm)) { + /* Free 2K page table fragment of a 4K page */ + bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)); + spin_lock_bh(&mm->context.list_lock); + mask = atomic_xor_bits(&page->_mapcount, 1U << bit); + if (mask & 3) + list_add(&page->lru, &mm->context.pgtable_list); + else + list_del(&page->lru); + spin_unlock_bh(&mm->context.list_lock); + if (mask != 0) + return; + } + + pgtable_page_dtor(page); + atomic_set(&page->_mapcount, -1); + __free_page(page); +} + +void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table, + unsigned long vmaddr) +{ + struct mm_struct *mm; + struct page *page; + unsigned int bit, mask; + + mm = tlb->mm; + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + if (mm_alloc_pgste(mm)) { + gmap_unlink(mm, table, vmaddr); + table = (unsigned long *) (__pa(table) | 3); + tlb_remove_table(tlb, table); + return; + } + bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)); + spin_lock_bh(&mm->context.list_lock); + mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit); + if (mask & 3) + list_add_tail(&page->lru, &mm->context.pgtable_list); + else + list_del(&page->lru); + spin_unlock_bh(&mm->context.list_lock); + table = (unsigned long *) (__pa(table) | (1U << bit)); + tlb_remove_table(tlb, table); +} + +static void __tlb_remove_table(void *_table) +{ + unsigned int mask = (unsigned long) _table & 3; + void *table = (void *)((unsigned long) _table ^ mask); + struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + + switch (mask) { + case 0: /* pmd or pud */ + free_pages((unsigned long) table, 2); + break; + case 1: /* lower 2K of a 4K page table */ + case 2: /* higher 2K of a 4K page table */ + if (atomic_xor_bits(&page->_mapcount, mask << 4) != 0) + break; + /* fallthrough */ + case 3: /* 4K page table with pgstes */ + pgtable_page_dtor(page); + atomic_set(&page->_mapcount, -1); + __free_page(page); + break; + } +} + +static void tlb_remove_table_smp_sync(void *arg) +{ + /* Simply deliver the interrupt */ +} + +static void tlb_remove_table_one(void *table) +{ + /* + * This isn't an RCU grace period and hence the page-tables cannot be + * assumed to be actually RCU-freed. + * + * It is however sufficient for software page-table walkers that rely + * on IRQ disabling. See the comment near struct mmu_table_batch. + */ + smp_call_function(tlb_remove_table_smp_sync, NULL, 1); + __tlb_remove_table(table); +} + +static void tlb_remove_table_rcu(struct rcu_head *head) +{ + struct mmu_table_batch *batch; + int i; + + batch = container_of(head, struct mmu_table_batch, rcu); + + for (i = 0; i < batch->nr; i++) + __tlb_remove_table(batch->tables[i]); + + free_page((unsigned long)batch); +} + +void tlb_table_flush(struct mmu_gather *tlb) +{ + struct mmu_table_batch **batch = &tlb->batch; + + if (*batch) { + call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); + *batch = NULL; + } +} + +void tlb_remove_table(struct mmu_gather *tlb, void *table) +{ + struct mmu_table_batch **batch = &tlb->batch; + + tlb->mm->context.flush_mm = 1; + if (*batch == NULL) { + *batch = (struct mmu_table_batch *) + __get_free_page(GFP_NOWAIT | __GFP_NOWARN); + if (*batch == NULL) { + __tlb_flush_mm_lazy(tlb->mm); + tlb_remove_table_one(table); + return; + } + (*batch)->nr = 0; + } + (*batch)->tables[(*batch)->nr++] = table; + if ((*batch)->nr == MAX_TABLE_BATCH) + tlb_flush_mmu(tlb); +} diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index e241262..4324b87 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -24,1140 +24,6 @@ #include #include -unsigned long *crst_table_alloc(struct mm_struct *mm) -{ - struct page *page = alloc_pages(GFP_KERNEL, 2); - - if (!page) - return NULL; - return (unsigned long *) page_to_phys(page); -} - -void crst_table_free(struct mm_struct *mm, unsigned long *table) -{ - free_pages((unsigned long) table, 2); -} - -static void __crst_table_upgrade(void *arg) -{ - struct mm_struct *mm = arg; - - if (current->active_mm == mm) { - clear_user_asce(); - set_user_asce(mm); - } - __tlb_flush_local(); -} - -int crst_table_upgrade(struct mm_struct *mm, unsigned long limit) -{ - unsigned long *table, *pgd; - unsigned long entry; - int flush; - - BUG_ON(limit > TASK_MAX_SIZE); - flush = 0; -repeat: - table = crst_table_alloc(mm); - if (!table) - return -ENOMEM; - spin_lock_bh(&mm->page_table_lock); - if (mm->context.asce_limit < limit) { - pgd = (unsigned long *) mm->pgd; - if (mm->context.asce_limit <= (1UL << 31)) { - entry = _REGION3_ENTRY_EMPTY; - mm->context.asce_limit = 1UL << 42; - mm->context.asce_bits = _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | - _ASCE_TYPE_REGION3; - } else { - entry = _REGION2_ENTRY_EMPTY; - mm->context.asce_limit = 1UL << 53; - mm->context.asce_bits = _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | - _ASCE_TYPE_REGION2; - } - crst_table_init(table, entry); - pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd); - mm->pgd = (pgd_t *) table; - mm->task_size = mm->context.asce_limit; - table = NULL; - flush = 1; - } - spin_unlock_bh(&mm->page_table_lock); - if (table) - crst_table_free(mm, table); - if (mm->context.asce_limit < limit) - goto repeat; - if (flush) - on_each_cpu(__crst_table_upgrade, mm, 0); - return 0; -} - -void crst_table_downgrade(struct mm_struct *mm, unsigned long limit) -{ - pgd_t *pgd; - - if (current->active_mm == mm) { - clear_user_asce(); - __tlb_flush_mm(mm); - } - while (mm->context.asce_limit > limit) { - pgd = mm->pgd; - switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) { - case _REGION_ENTRY_TYPE_R2: - mm->context.asce_limit = 1UL << 42; - mm->context.asce_bits = _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | - _ASCE_TYPE_REGION3; - break; - case _REGION_ENTRY_TYPE_R3: - mm->context.asce_limit = 1UL << 31; - mm->context.asce_bits = _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | - _ASCE_TYPE_SEGMENT; - break; - default: - BUG(); - } - mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); - mm->task_size = mm->context.asce_limit; - crst_table_free(mm, (unsigned long *) pgd); - } - if (current->active_mm == mm) - set_user_asce(mm); -} - -#ifdef CONFIG_PGSTE - -/** - * gmap_alloc - allocate a guest address space - * @mm: pointer to the parent mm_struct - * @limit: maximum address of the gmap address space - * - * Returns a guest address space structure. - */ -struct gmap *gmap_alloc(struct mm_struct *mm, unsigned long limit) -{ - struct gmap *gmap; - struct page *page; - unsigned long *table; - unsigned long etype, atype; - - if (limit < (1UL << 31)) { - limit = (1UL << 31) - 1; - atype = _ASCE_TYPE_SEGMENT; - etype = _SEGMENT_ENTRY_EMPTY; - } else if (limit < (1UL << 42)) { - limit = (1UL << 42) - 1; - atype = _ASCE_TYPE_REGION3; - etype = _REGION3_ENTRY_EMPTY; - } else if (limit < (1UL << 53)) { - limit = (1UL << 53) - 1; - atype = _ASCE_TYPE_REGION2; - etype = _REGION2_ENTRY_EMPTY; - } else { - limit = -1UL; - atype = _ASCE_TYPE_REGION1; - etype = _REGION1_ENTRY_EMPTY; - } - gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL); - if (!gmap) - goto out; - INIT_LIST_HEAD(&gmap->crst_list); - INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL); - INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC); - spin_lock_init(&gmap->guest_table_lock); - gmap->mm = mm; - page = alloc_pages(GFP_KERNEL, 2); - if (!page) - goto out_free; - page->index = 0; - list_add(&page->lru, &gmap->crst_list); - table = (unsigned long *) page_to_phys(page); - crst_table_init(table, etype); - gmap->table = table; - gmap->asce = atype | _ASCE_TABLE_LENGTH | - _ASCE_USER_BITS | __pa(table); - gmap->asce_end = limit; - down_write(&mm->mmap_sem); - list_add(&gmap->list, &mm->context.gmap_list); - up_write(&mm->mmap_sem); - return gmap; - -out_free: - kfree(gmap); -out: - return NULL; -} -EXPORT_SYMBOL_GPL(gmap_alloc); - -static void gmap_flush_tlb(struct gmap *gmap) -{ - if (MACHINE_HAS_IDTE) - __tlb_flush_asce(gmap->mm, gmap->asce); - else - __tlb_flush_global(); -} - -static void gmap_radix_tree_free(struct radix_tree_root *root) -{ - struct radix_tree_iter iter; - unsigned long indices[16]; - unsigned long index; - void **slot; - int i, nr; - - /* A radix tree is freed by deleting all of its entries */ - index = 0; - do { - nr = 0; - radix_tree_for_each_slot(slot, root, &iter, index) { - indices[nr] = iter.index; - if (++nr == 16) - break; - } - for (i = 0; i < nr; i++) { - index = indices[i]; - radix_tree_delete(root, index); - } - } while (nr > 0); -} - -/** - * gmap_free - free a guest address space - * @gmap: pointer to the guest address space structure - */ -void gmap_free(struct gmap *gmap) -{ - struct page *page, *next; - - /* Flush tlb. */ - if (MACHINE_HAS_IDTE) - __tlb_flush_asce(gmap->mm, gmap->asce); - else - __tlb_flush_global(); - - /* Free all segment & region tables. */ - list_for_each_entry_safe(page, next, &gmap->crst_list, lru) - __free_pages(page, 2); - gmap_radix_tree_free(&gmap->guest_to_host); - gmap_radix_tree_free(&gmap->host_to_guest); - down_write(&gmap->mm->mmap_sem); - list_del(&gmap->list); - up_write(&gmap->mm->mmap_sem); - kfree(gmap); -} -EXPORT_SYMBOL_GPL(gmap_free); - -/** - * gmap_enable - switch primary space to the guest address space - * @gmap: pointer to the guest address space structure - */ -void gmap_enable(struct gmap *gmap) -{ - S390_lowcore.gmap = (unsigned long) gmap; -} -EXPORT_SYMBOL_GPL(gmap_enable); - -/** - * gmap_disable - switch back to the standard primary address space - * @gmap: pointer to the guest address space structure - */ -void gmap_disable(struct gmap *gmap) -{ - S390_lowcore.gmap = 0UL; -} -EXPORT_SYMBOL_GPL(gmap_disable); - -/* - * gmap_alloc_table is assumed to be called with mmap_sem held - */ -static int gmap_alloc_table(struct gmap *gmap, unsigned long *table, - unsigned long init, unsigned long gaddr) -{ - struct page *page; - unsigned long *new; - - /* since we dont free the gmap table until gmap_free we can unlock */ - page = alloc_pages(GFP_KERNEL, 2); - if (!page) - return -ENOMEM; - new = (unsigned long *) page_to_phys(page); - crst_table_init(new, init); - spin_lock(&gmap->mm->page_table_lock); - if (*table & _REGION_ENTRY_INVALID) { - list_add(&page->lru, &gmap->crst_list); - *table = (unsigned long) new | _REGION_ENTRY_LENGTH | - (*table & _REGION_ENTRY_TYPE_MASK); - page->index = gaddr; - page = NULL; - } - spin_unlock(&gmap->mm->page_table_lock); - if (page) - __free_pages(page, 2); - return 0; -} - -/** - * __gmap_segment_gaddr - find virtual address from segment pointer - * @entry: pointer to a segment table entry in the guest address space - * - * Returns the virtual address in the guest address space for the segment - */ -static unsigned long __gmap_segment_gaddr(unsigned long *entry) -{ - struct page *page; - unsigned long offset, mask; - - offset = (unsigned long) entry / sizeof(unsigned long); - offset = (offset & (PTRS_PER_PMD - 1)) * PMD_SIZE; - mask = ~(PTRS_PER_PMD * sizeof(pmd_t) - 1); - page = virt_to_page((void *)((unsigned long) entry & mask)); - return page->index + offset; -} - -/** - * __gmap_unlink_by_vmaddr - unlink a single segment via a host address - * @gmap: pointer to the guest address space structure - * @vmaddr: address in the host process address space - * - * Returns 1 if a TLB flush is required - */ -static int __gmap_unlink_by_vmaddr(struct gmap *gmap, unsigned long vmaddr) -{ - unsigned long *entry; - int flush = 0; - - spin_lock(&gmap->guest_table_lock); - entry = radix_tree_delete(&gmap->host_to_guest, vmaddr >> PMD_SHIFT); - if (entry) { - flush = (*entry != _SEGMENT_ENTRY_INVALID); - *entry = _SEGMENT_ENTRY_INVALID; - } - spin_unlock(&gmap->guest_table_lock); - return flush; -} - -/** - * __gmap_unmap_by_gaddr - unmap a single segment via a guest address - * @gmap: pointer to the guest address space structure - * @gaddr: address in the guest address space - * - * Returns 1 if a TLB flush is required - */ -static int __gmap_unmap_by_gaddr(struct gmap *gmap, unsigned long gaddr) -{ - unsigned long vmaddr; - - vmaddr = (unsigned long) radix_tree_delete(&gmap->guest_to_host, - gaddr >> PMD_SHIFT); - return vmaddr ? __gmap_unlink_by_vmaddr(gmap, vmaddr) : 0; -} - -/** - * gmap_unmap_segment - unmap segment from the guest address space - * @gmap: pointer to the guest address space structure - * @to: address in the guest address space - * @len: length of the memory area to unmap - * - * Returns 0 if the unmap succeeded, -EINVAL if not. - */ -int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len) -{ - unsigned long off; - int flush; - - if ((to | len) & (PMD_SIZE - 1)) - return -EINVAL; - if (len == 0 || to + len < to) - return -EINVAL; - - flush = 0; - down_write(&gmap->mm->mmap_sem); - for (off = 0; off < len; off += PMD_SIZE) - flush |= __gmap_unmap_by_gaddr(gmap, to + off); - up_write(&gmap->mm->mmap_sem); - if (flush) - gmap_flush_tlb(gmap); - return 0; -} -EXPORT_SYMBOL_GPL(gmap_unmap_segment); - -/** - * gmap_mmap_segment - map a segment to the guest address space - * @gmap: pointer to the guest address space structure - * @from: source address in the parent address space - * @to: target address in the guest address space - * @len: length of the memory area to map - * - * Returns 0 if the mmap succeeded, -EINVAL or -ENOMEM if not. - */ -int gmap_map_segment(struct gmap *gmap, unsigned long from, - unsigned long to, unsigned long len) -{ - unsigned long off; - int flush; - - if ((from | to | len) & (PMD_SIZE - 1)) - return -EINVAL; - if (len == 0 || from + len < from || to + len < to || - from + len - 1 > TASK_MAX_SIZE || to + len - 1 > gmap->asce_end) - return -EINVAL; - - flush = 0; - down_write(&gmap->mm->mmap_sem); - for (off = 0; off < len; off += PMD_SIZE) { - /* Remove old translation */ - flush |= __gmap_unmap_by_gaddr(gmap, to + off); - /* Store new translation */ - if (radix_tree_insert(&gmap->guest_to_host, - (to + off) >> PMD_SHIFT, - (void *) from + off)) - break; - } - up_write(&gmap->mm->mmap_sem); - if (flush) - gmap_flush_tlb(gmap); - if (off >= len) - return 0; - gmap_unmap_segment(gmap, to, len); - return -ENOMEM; -} -EXPORT_SYMBOL_GPL(gmap_map_segment); - -/** - * __gmap_translate - translate a guest address to a user space address - * @gmap: pointer to guest mapping meta data structure - * @gaddr: guest address - * - * Returns user space address which corresponds to the guest address or - * -EFAULT if no such mapping exists. - * This function does not establish potentially missing page table entries. - * The mmap_sem of the mm that belongs to the address space must be held - * when this function gets called. - */ -unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr) -{ - unsigned long vmaddr; - - vmaddr = (unsigned long) - radix_tree_lookup(&gmap->guest_to_host, gaddr >> PMD_SHIFT); - return vmaddr ? (vmaddr | (gaddr & ~PMD_MASK)) : -EFAULT; -} -EXPORT_SYMBOL_GPL(__gmap_translate); - -/** - * gmap_translate - translate a guest address to a user space address - * @gmap: pointer to guest mapping meta data structure - * @gaddr: guest address - * - * Returns user space address which corresponds to the guest address or - * -EFAULT if no such mapping exists. - * This function does not establish potentially missing page table entries. - */ -unsigned long gmap_translate(struct gmap *gmap, unsigned long gaddr) -{ - unsigned long rc; - - down_read(&gmap->mm->mmap_sem); - rc = __gmap_translate(gmap, gaddr); - up_read(&gmap->mm->mmap_sem); - return rc; -} -EXPORT_SYMBOL_GPL(gmap_translate); - -/** - * gmap_unlink - disconnect a page table from the gmap shadow tables - * @gmap: pointer to guest mapping meta data structure - * @table: pointer to the host page table - * @vmaddr: vm address associated with the host page table - */ -static void gmap_unlink(struct mm_struct *mm, unsigned long *table, - unsigned long vmaddr) -{ - struct gmap *gmap; - int flush; - - list_for_each_entry(gmap, &mm->context.gmap_list, list) { - flush = __gmap_unlink_by_vmaddr(gmap, vmaddr); - if (flush) - gmap_flush_tlb(gmap); - } -} - -/** - * gmap_link - set up shadow page tables to connect a host to a guest address - * @gmap: pointer to guest mapping meta data structure - * @gaddr: guest address - * @vmaddr: vm address - * - * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT - * if the vm address is already mapped to a different guest segment. - * The mmap_sem of the mm that belongs to the address space must be held - * when this function gets called. - */ -int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) -{ - struct mm_struct *mm; - unsigned long *table; - spinlock_t *ptl; - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - int rc; - - /* Create higher level tables in the gmap page table */ - table = gmap->table; - if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION1) { - table += (gaddr >> 53) & 0x7ff; - if ((*table & _REGION_ENTRY_INVALID) && - gmap_alloc_table(gmap, table, _REGION2_ENTRY_EMPTY, - gaddr & 0xffe0000000000000UL)) - return -ENOMEM; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - } - if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION2) { - table += (gaddr >> 42) & 0x7ff; - if ((*table & _REGION_ENTRY_INVALID) && - gmap_alloc_table(gmap, table, _REGION3_ENTRY_EMPTY, - gaddr & 0xfffffc0000000000UL)) - return -ENOMEM; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - } - if ((gmap->asce & _ASCE_TYPE_MASK) >= _ASCE_TYPE_REGION3) { - table += (gaddr >> 31) & 0x7ff; - if ((*table & _REGION_ENTRY_INVALID) && - gmap_alloc_table(gmap, table, _SEGMENT_ENTRY_EMPTY, - gaddr & 0xffffffff80000000UL)) - return -ENOMEM; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - } - table += (gaddr >> 20) & 0x7ff; - /* Walk the parent mm page table */ - mm = gmap->mm; - pgd = pgd_offset(mm, vmaddr); - VM_BUG_ON(pgd_none(*pgd)); - pud = pud_offset(pgd, vmaddr); - VM_BUG_ON(pud_none(*pud)); - pmd = pmd_offset(pud, vmaddr); - VM_BUG_ON(pmd_none(*pmd)); - /* large pmds cannot yet be handled */ - if (pmd_large(*pmd)) - return -EFAULT; - /* Link gmap segment table entry location to page table. */ - rc = radix_tree_preload(GFP_KERNEL); - if (rc) - return rc; - ptl = pmd_lock(mm, pmd); - spin_lock(&gmap->guest_table_lock); - if (*table == _SEGMENT_ENTRY_INVALID) { - rc = radix_tree_insert(&gmap->host_to_guest, - vmaddr >> PMD_SHIFT, table); - if (!rc) - *table = pmd_val(*pmd); - } else - rc = 0; - spin_unlock(&gmap->guest_table_lock); - spin_unlock(ptl); - radix_tree_preload_end(); - return rc; -} - -/** - * gmap_fault - resolve a fault on a guest address - * @gmap: pointer to guest mapping meta data structure - * @gaddr: guest address - * @fault_flags: flags to pass down to handle_mm_fault() - * - * Returns 0 on success, -ENOMEM for out of memory conditions, and -EFAULT - * if the vm address is already mapped to a different guest segment. - */ -int gmap_fault(struct gmap *gmap, unsigned long gaddr, - unsigned int fault_flags) -{ - unsigned long vmaddr; - int rc; - bool unlocked; - - down_read(&gmap->mm->mmap_sem); - -retry: - unlocked = false; - vmaddr = __gmap_translate(gmap, gaddr); - if (IS_ERR_VALUE(vmaddr)) { - rc = vmaddr; - goto out_up; - } - if (fixup_user_fault(current, gmap->mm, vmaddr, fault_flags, - &unlocked)) { - rc = -EFAULT; - goto out_up; - } - /* - * In the case that fixup_user_fault unlocked the mmap_sem during - * faultin redo __gmap_translate to not race with a map/unmap_segment. - */ - if (unlocked) - goto retry; - - rc = __gmap_link(gmap, gaddr, vmaddr); -out_up: - up_read(&gmap->mm->mmap_sem); - return rc; -} -EXPORT_SYMBOL_GPL(gmap_fault); - -static void gmap_zap_swap_entry(swp_entry_t entry, struct mm_struct *mm) -{ - if (!non_swap_entry(entry)) - dec_mm_counter(mm, MM_SWAPENTS); - else if (is_migration_entry(entry)) { - struct page *page = migration_entry_to_page(entry); - - dec_mm_counter(mm, mm_counter(page)); - } - free_swap_and_cache(entry); -} - -/* - * this function is assumed to be called with mmap_sem held - */ -void __gmap_zap(struct gmap *gmap, unsigned long gaddr) -{ - unsigned long vmaddr, ptev, pgstev; - pte_t *ptep, pte; - spinlock_t *ptl; - pgste_t pgste; - - /* Find the vm address for the guest address */ - vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host, - gaddr >> PMD_SHIFT); - if (!vmaddr) - return; - vmaddr |= gaddr & ~PMD_MASK; - /* Get pointer to the page table entry */ - ptep = get_locked_pte(gmap->mm, vmaddr, &ptl); - if (unlikely(!ptep)) - return; - pte = *ptep; - if (!pte_swap(pte)) - goto out_pte; - /* Zap unused and logically-zero pages */ - pgste = pgste_get_lock(ptep); - pgstev = pgste_val(pgste); - ptev = pte_val(pte); - if (((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED) || - ((pgstev & _PGSTE_GPS_ZERO) && (ptev & _PAGE_INVALID))) { - gmap_zap_swap_entry(pte_to_swp_entry(pte), gmap->mm); - pte_clear(gmap->mm, vmaddr, ptep); - } - pgste_set_unlock(ptep, pgste); -out_pte: - pte_unmap_unlock(ptep, ptl); -} -EXPORT_SYMBOL_GPL(__gmap_zap); - -void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to) -{ - unsigned long gaddr, vmaddr, size; - struct vm_area_struct *vma; - - down_read(&gmap->mm->mmap_sem); - for (gaddr = from; gaddr < to; - gaddr = (gaddr + PMD_SIZE) & PMD_MASK) { - /* Find the vm address for the guest address */ - vmaddr = (unsigned long) - radix_tree_lookup(&gmap->guest_to_host, - gaddr >> PMD_SHIFT); - if (!vmaddr) - continue; - vmaddr |= gaddr & ~PMD_MASK; - /* Find vma in the parent mm */ - vma = find_vma(gmap->mm, vmaddr); - size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK)); - zap_page_range(vma, vmaddr, size, NULL); - } - up_read(&gmap->mm->mmap_sem); -} -EXPORT_SYMBOL_GPL(gmap_discard); - -static LIST_HEAD(gmap_notifier_list); -static DEFINE_SPINLOCK(gmap_notifier_lock); - -/** - * gmap_register_ipte_notifier - register a pte invalidation callback - * @nb: pointer to the gmap notifier block - */ -void gmap_register_ipte_notifier(struct gmap_notifier *nb) -{ - spin_lock(&gmap_notifier_lock); - list_add(&nb->list, &gmap_notifier_list); - spin_unlock(&gmap_notifier_lock); -} -EXPORT_SYMBOL_GPL(gmap_register_ipte_notifier); - -/** - * gmap_unregister_ipte_notifier - remove a pte invalidation callback - * @nb: pointer to the gmap notifier block - */ -void gmap_unregister_ipte_notifier(struct gmap_notifier *nb) -{ - spin_lock(&gmap_notifier_lock); - list_del_init(&nb->list); - spin_unlock(&gmap_notifier_lock); -} -EXPORT_SYMBOL_GPL(gmap_unregister_ipte_notifier); - -/** - * gmap_ipte_notify - mark a range of ptes for invalidation notification - * @gmap: pointer to guest mapping meta data structure - * @gaddr: virtual address in the guest address space - * @len: size of area - * - * Returns 0 if for each page in the given range a gmap mapping exists and - * the invalidation notification could be set. If the gmap mapping is missing - * for one or more pages -EFAULT is returned. If no memory could be allocated - * -ENOMEM is returned. This function establishes missing page table entries. - */ -int gmap_ipte_notify(struct gmap *gmap, unsigned long gaddr, unsigned long len) -{ - unsigned long addr; - spinlock_t *ptl; - pte_t *ptep, entry; - pgste_t pgste; - bool unlocked; - int rc = 0; - - if ((gaddr & ~PAGE_MASK) || (len & ~PAGE_MASK)) - return -EINVAL; - down_read(&gmap->mm->mmap_sem); - while (len) { - unlocked = false; - /* Convert gmap address and connect the page tables */ - addr = __gmap_translate(gmap, gaddr); - if (IS_ERR_VALUE(addr)) { - rc = addr; - break; - } - /* Get the page mapped */ - if (fixup_user_fault(current, gmap->mm, addr, FAULT_FLAG_WRITE, - &unlocked)) { - rc = -EFAULT; - break; - } - /* While trying to map mmap_sem got unlocked. Let us retry */ - if (unlocked) - continue; - rc = __gmap_link(gmap, gaddr, addr); - if (rc) - break; - /* Walk the process page table, lock and get pte pointer */ - ptep = get_locked_pte(gmap->mm, addr, &ptl); - VM_BUG_ON(!ptep); - /* Set notification bit in the pgste of the pte */ - entry = *ptep; - if ((pte_val(entry) & (_PAGE_INVALID | _PAGE_PROTECT)) == 0) { - pgste = pgste_get_lock(ptep); - pgste_val(pgste) |= PGSTE_IN_BIT; - pgste_set_unlock(ptep, pgste); - gaddr += PAGE_SIZE; - len -= PAGE_SIZE; - } - pte_unmap_unlock(ptep, ptl); - } - up_read(&gmap->mm->mmap_sem); - return rc; -} -EXPORT_SYMBOL_GPL(gmap_ipte_notify); - -/** - * ptep_ipte_notify - call all invalidation callbacks for a specific pte. - * @mm: pointer to the process mm_struct - * @addr: virtual address in the process address space - * @pte: pointer to the page table entry - * - * This function is assumed to be called with the page table lock held - * for the pte to notify. - */ -void ptep_ipte_notify(struct mm_struct *mm, unsigned long vmaddr, pte_t *pte) -{ - unsigned long offset, gaddr; - unsigned long *table; - struct gmap_notifier *nb; - struct gmap *gmap; - - offset = ((unsigned long) pte) & (255 * sizeof(pte_t)); - offset = offset * (4096 / sizeof(pte_t)); - spin_lock(&gmap_notifier_lock); - list_for_each_entry(gmap, &mm->context.gmap_list, list) { - table = radix_tree_lookup(&gmap->host_to_guest, - vmaddr >> PMD_SHIFT); - if (!table) - continue; - gaddr = __gmap_segment_gaddr(table) + offset; - list_for_each_entry(nb, &gmap_notifier_list, list) - nb->notifier_call(gmap, gaddr); - } - spin_unlock(&gmap_notifier_lock); -} -EXPORT_SYMBOL_GPL(ptep_ipte_notify); - -int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, - unsigned long key, bool nq) -{ - spinlock_t *ptl; - pgste_t old, new; - pte_t *ptep; - - down_read(&mm->mmap_sem); - ptep = get_locked_pte(mm, addr, &ptl); - if (unlikely(!ptep)) { - up_read(&mm->mmap_sem); - return -EFAULT; - } - - new = old = pgste_get_lock(ptep); - pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | - PGSTE_ACC_BITS | PGSTE_FP_BIT); - pgste_val(new) |= (key & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; - pgste_val(new) |= (key & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; - if (!(pte_val(*ptep) & _PAGE_INVALID)) { - unsigned long address, bits, skey; - - address = pte_val(*ptep) & PAGE_MASK; - skey = (unsigned long) page_get_storage_key(address); - bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); - skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT); - /* Set storage key ACC and FP */ - page_set_storage_key(address, skey, !nq); - /* Merge host changed & referenced into pgste */ - pgste_val(new) |= bits << 52; - } - /* changing the guest storage key is considered a change of the page */ - if ((pgste_val(new) ^ pgste_val(old)) & - (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT)) - pgste_val(new) |= PGSTE_UC_BIT; - - pgste_set_unlock(ptep, new); - pte_unmap_unlock(ptep, ptl); - up_read(&mm->mmap_sem); - return 0; -} -EXPORT_SYMBOL(set_guest_storage_key); - -unsigned long get_guest_storage_key(struct mm_struct *mm, unsigned long addr) -{ - spinlock_t *ptl; - pgste_t pgste; - pte_t *ptep; - uint64_t physaddr; - unsigned long key = 0; - - down_read(&mm->mmap_sem); - ptep = get_locked_pte(mm, addr, &ptl); - if (unlikely(!ptep)) { - up_read(&mm->mmap_sem); - return -EFAULT; - } - pgste = pgste_get_lock(ptep); - - if (pte_val(*ptep) & _PAGE_INVALID) { - key |= (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56; - key |= (pgste_val(pgste) & PGSTE_FP_BIT) >> 56; - key |= (pgste_val(pgste) & PGSTE_GR_BIT) >> 48; - key |= (pgste_val(pgste) & PGSTE_GC_BIT) >> 48; - } else { - physaddr = pte_val(*ptep) & PAGE_MASK; - key = page_get_storage_key(physaddr); - - /* Reflect guest's logical view, not physical */ - if (pgste_val(pgste) & PGSTE_GR_BIT) - key |= _PAGE_REFERENCED; - if (pgste_val(pgste) & PGSTE_GC_BIT) - key |= _PAGE_CHANGED; - } - - pgste_set_unlock(ptep, pgste); - pte_unmap_unlock(ptep, ptl); - up_read(&mm->mmap_sem); - return key; -} -EXPORT_SYMBOL(get_guest_storage_key); - -static int page_table_allocate_pgste_min = 0; -static int page_table_allocate_pgste_max = 1; -int page_table_allocate_pgste = 0; -EXPORT_SYMBOL(page_table_allocate_pgste); - -static struct ctl_table page_table_sysctl[] = { - { - .procname = "allocate_pgste", - .data = &page_table_allocate_pgste, - .maxlen = sizeof(int), - .mode = S_IRUGO | S_IWUSR, - .proc_handler = proc_dointvec, - .extra1 = &page_table_allocate_pgste_min, - .extra2 = &page_table_allocate_pgste_max, - }, - { } -}; - -static struct ctl_table page_table_sysctl_dir[] = { - { - .procname = "vm", - .maxlen = 0, - .mode = 0555, - .child = page_table_sysctl, - }, - { } -}; - -static int __init page_table_register_sysctl(void) -{ - return register_sysctl_table(page_table_sysctl_dir) ? 0 : -ENOMEM; -} -__initcall(page_table_register_sysctl); - -#else /* CONFIG_PGSTE */ - -static inline void gmap_unlink(struct mm_struct *mm, unsigned long *table, - unsigned long vmaddr) -{ -} - -#endif /* CONFIG_PGSTE */ - -static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits) -{ - unsigned int old, new; - - do { - old = atomic_read(v); - new = old ^ bits; - } while (atomic_cmpxchg(v, old, new) != old); - return new; -} - -/* - * page table entry allocation/free routines. - */ -unsigned long *page_table_alloc(struct mm_struct *mm) -{ - unsigned long *table; - struct page *page; - unsigned int mask, bit; - - /* Try to get a fragment of a 4K page as a 2K page table */ - if (!mm_alloc_pgste(mm)) { - table = NULL; - spin_lock_bh(&mm->context.list_lock); - if (!list_empty(&mm->context.pgtable_list)) { - page = list_first_entry(&mm->context.pgtable_list, - struct page, lru); - mask = atomic_read(&page->_mapcount); - mask = (mask | (mask >> 4)) & 3; - if (mask != 3) { - table = (unsigned long *) page_to_phys(page); - bit = mask & 1; /* =1 -> second 2K */ - if (bit) - table += PTRS_PER_PTE; - atomic_xor_bits(&page->_mapcount, 1U << bit); - list_del(&page->lru); - } - } - spin_unlock_bh(&mm->context.list_lock); - if (table) - return table; - } - /* Allocate a fresh page */ - page = alloc_page(GFP_KERNEL|__GFP_REPEAT); - if (!page) - return NULL; - if (!pgtable_page_ctor(page)) { - __free_page(page); - return NULL; - } - /* Initialize page table */ - table = (unsigned long *) page_to_phys(page); - if (mm_alloc_pgste(mm)) { - /* Return 4K page table with PGSTEs */ - atomic_set(&page->_mapcount, 3); - clear_table(table, _PAGE_INVALID, PAGE_SIZE/2); - clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2); - } else { - /* Return the first 2K fragment of the page */ - atomic_set(&page->_mapcount, 1); - clear_table(table, _PAGE_INVALID, PAGE_SIZE); - spin_lock_bh(&mm->context.list_lock); - list_add(&page->lru, &mm->context.pgtable_list); - spin_unlock_bh(&mm->context.list_lock); - } - return table; -} - -void page_table_free(struct mm_struct *mm, unsigned long *table) -{ - struct page *page; - unsigned int bit, mask; - - page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - if (!mm_alloc_pgste(mm)) { - /* Free 2K page table fragment of a 4K page */ - bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)); - spin_lock_bh(&mm->context.list_lock); - mask = atomic_xor_bits(&page->_mapcount, 1U << bit); - if (mask & 3) - list_add(&page->lru, &mm->context.pgtable_list); - else - list_del(&page->lru); - spin_unlock_bh(&mm->context.list_lock); - if (mask != 0) - return; - } - - pgtable_page_dtor(page); - atomic_set(&page->_mapcount, -1); - __free_page(page); -} - -void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table, - unsigned long vmaddr) -{ - struct mm_struct *mm; - struct page *page; - unsigned int bit, mask; - - mm = tlb->mm; - page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - if (mm_alloc_pgste(mm)) { - gmap_unlink(mm, table, vmaddr); - table = (unsigned long *) (__pa(table) | 3); - tlb_remove_table(tlb, table); - return; - } - bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)); - spin_lock_bh(&mm->context.list_lock); - mask = atomic_xor_bits(&page->_mapcount, 0x11U << bit); - if (mask & 3) - list_add_tail(&page->lru, &mm->context.pgtable_list); - else - list_del(&page->lru); - spin_unlock_bh(&mm->context.list_lock); - table = (unsigned long *) (__pa(table) | (1U << bit)); - tlb_remove_table(tlb, table); -} - -static void __tlb_remove_table(void *_table) -{ - unsigned int mask = (unsigned long) _table & 3; - void *table = (void *)((unsigned long) _table ^ mask); - struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - - switch (mask) { - case 0: /* pmd or pud */ - free_pages((unsigned long) table, 2); - break; - case 1: /* lower 2K of a 4K page table */ - case 2: /* higher 2K of a 4K page table */ - if (atomic_xor_bits(&page->_mapcount, mask << 4) != 0) - break; - /* fallthrough */ - case 3: /* 4K page table with pgstes */ - pgtable_page_dtor(page); - atomic_set(&page->_mapcount, -1); - __free_page(page); - break; - } -} - -static void tlb_remove_table_smp_sync(void *arg) -{ - /* Simply deliver the interrupt */ -} - -static void tlb_remove_table_one(void *table) -{ - /* - * This isn't an RCU grace period and hence the page-tables cannot be - * assumed to be actually RCU-freed. - * - * It is however sufficient for software page-table walkers that rely - * on IRQ disabling. See the comment near struct mmu_table_batch. - */ - smp_call_function(tlb_remove_table_smp_sync, NULL, 1); - __tlb_remove_table(table); -} - -static void tlb_remove_table_rcu(struct rcu_head *head) -{ - struct mmu_table_batch *batch; - int i; - - batch = container_of(head, struct mmu_table_batch, rcu); - - for (i = 0; i < batch->nr; i++) - __tlb_remove_table(batch->tables[i]); - - free_page((unsigned long)batch); -} - -void tlb_table_flush(struct mmu_gather *tlb) -{ - struct mmu_table_batch **batch = &tlb->batch; - - if (*batch) { - call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); - *batch = NULL; - } -} - -void tlb_remove_table(struct mmu_gather *tlb, void *table) -{ - struct mmu_table_batch **batch = &tlb->batch; - - tlb->mm->context.flush_mm = 1; - if (*batch == NULL) { - *batch = (struct mmu_table_batch *) - __get_free_page(GFP_NOWAIT | __GFP_NOWARN); - if (*batch == NULL) { - __tlb_flush_mm_lazy(tlb->mm); - tlb_remove_table_one(table); - return; - } - (*batch)->nr = 0; - } - (*batch)->tables[(*batch)->nr++] = table; - if ((*batch)->nr == MAX_TABLE_BATCH) - tlb_flush_mmu(tlb); -} - -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -static inline void thp_split_vma(struct vm_area_struct *vma) -{ - unsigned long addr; - - for (addr = vma->vm_start; addr < vma->vm_end; addr += PAGE_SIZE) - follow_page(vma, addr, FOLL_SPLIT); -} - -static inline void thp_split_mm(struct mm_struct *mm) -{ - struct vm_area_struct *vma; - - for (vma = mm->mmap; vma != NULL; vma = vma->vm_next) { - thp_split_vma(vma); - vma->vm_flags &= ~VM_HUGEPAGE; - vma->vm_flags |= VM_NOHUGEPAGE; - } - mm->def_flags |= VM_NOHUGEPAGE; -} -#else -static inline void thp_split_mm(struct mm_struct *mm) -{ -} -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ - static inline pte_t ptep_flush_direct(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { @@ -1198,6 +64,55 @@ static inline pte_t ptep_flush_lazy(struct mm_struct *mm, return old; } +static inline pgste_t pgste_get_lock(pte_t *ptep) +{ + unsigned long new = 0; +#ifdef CONFIG_PGSTE + unsigned long old; + + preempt_disable(); + asm( + " lg %0,%2\n" + "0: lgr %1,%0\n" + " nihh %0,0xff7f\n" /* clear PCL bit in old */ + " oihh %1,0x0080\n" /* set PCL bit in new */ + " csg %0,%1,%2\n" + " jl 0b\n" + : "=&d" (old), "=&d" (new), "=Q" (ptep[PTRS_PER_PTE]) + : "Q" (ptep[PTRS_PER_PTE]) : "cc", "memory"); +#endif + return __pgste(new); +} + +static inline void pgste_set_unlock(pte_t *ptep, pgste_t pgste) +{ +#ifdef CONFIG_PGSTE + asm( + " nihh %1,0xff7f\n" /* clear PCL bit */ + " stg %1,%0\n" + : "=Q" (ptep[PTRS_PER_PTE]) + : "d" (pgste_val(pgste)), "Q" (ptep[PTRS_PER_PTE]) + : "cc", "memory"); + preempt_enable(); +#endif +} + +static inline pgste_t pgste_get(pte_t *ptep) +{ + unsigned long pgste = 0; +#ifdef CONFIG_PGSTE + pgste = *(unsigned long *)(ptep + PTRS_PER_PTE); +#endif + return __pgste(pgste); +} + +static inline void pgste_set(pte_t *ptep, pgste_t pgste) +{ +#ifdef CONFIG_PGSTE + *(pgste_t *)(ptep + PTRS_PER_PTE) = pgste; +#endif +} + static inline pgste_t pgste_update_all(pte_t pte, pgste_t pgste, struct mm_struct *mm) { @@ -1271,63 +186,12 @@ static inline pgste_t pgste_ipte_notify(struct mm_struct *mm, #ifdef CONFIG_PGSTE if (pgste_val(pgste) & PGSTE_IN_BIT) { pgste_val(pgste) &= ~PGSTE_IN_BIT; - ptep_ipte_notify(mm, addr, ptep); + ptep_notify(mm, addr, ptep); } #endif return pgste; } -#ifdef CONFIG_PGSTE -/* - * Test and reset if a guest page is dirty - */ -bool pgste_test_and_clear_dirty(struct mm_struct *mm, unsigned long addr) -{ - spinlock_t *ptl; - pgste_t pgste; - pte_t *ptep; - pte_t pte; - bool dirty; - - ptep = get_locked_pte(mm, addr, &ptl); - if (unlikely(!ptep)) - return false; - - pgste = pgste_get_lock(ptep); - dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); - pgste_val(pgste) &= ~PGSTE_UC_BIT; - pte = *ptep; - if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { - pgste = pgste_ipte_notify(mm, addr, ptep, pgste); - __ptep_ipte(addr, ptep); - if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) - pte_val(pte) |= _PAGE_PROTECT; - else - pte_val(pte) |= _PAGE_INVALID; - *ptep = pte; - } - pgste_set_unlock(ptep, pgste); - - spin_unlock(ptl); - return dirty; -} -EXPORT_SYMBOL_GPL(pgste_test_and_clear_dirty); - -void set_pte_pgste_at(struct mm_struct *mm, unsigned long addr, - pte_t *ptep, pte_t entry) -{ - pgste_t pgste; - - /* the mm_has_pgste() check is done in set_pte_at() */ - pgste = pgste_get_lock(ptep); - pgste_val(pgste) &= ~_PGSTE_GPS_ZERO; - pgste_set_key(ptep, pgste, entry, mm); - pgste = pgste_set_pte(ptep, pgste, entry); - pgste_set_unlock(ptep, pgste); -} -EXPORT_SYMBOL(set_pte_pgste_at); -#endif - static inline pgste_t ptep_xchg_start(struct mm_struct *mm, unsigned long addr, pte_t *ptep) { @@ -1486,112 +350,6 @@ pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr, } EXPORT_SYMBOL(pmdp_xchg_lazy); -/* - * switch on pgstes for its userspace process (for kvm) - */ -int s390_enable_sie(void) -{ - struct mm_struct *mm = current->mm; - - /* Do we have pgstes? if yes, we are done */ - if (mm_has_pgste(mm)) - return 0; - /* Fail if the page tables are 2K */ - if (!mm_alloc_pgste(mm)) - return -EINVAL; - down_write(&mm->mmap_sem); - mm->context.has_pgste = 1; - /* split thp mappings and disable thp for future mappings */ - thp_split_mm(mm); - up_write(&mm->mmap_sem); - return 0; -} -EXPORT_SYMBOL_GPL(s390_enable_sie); - -/* - * Enable storage key handling from now on and initialize the storage - * keys with the default key. - */ -static int __s390_enable_skey(pte_t *pte, unsigned long addr, - unsigned long next, struct mm_walk *walk) -{ - unsigned long ptev; - pgste_t pgste; - - /* - * Remove all zero page mappings, - * after establishing a policy to forbid zero page mappings - * following faults for that page will get fresh anonymous pages - */ - if (is_zero_pfn(pte_pfn(*pte))) - ptep_xchg_direct(walk->mm, addr, pte, __pte(_PAGE_INVALID)); - /* Clear storage key */ - pgste = pgste_get_lock(pte); - pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT | - PGSTE_GR_BIT | PGSTE_GC_BIT); - ptev = pte_val(*pte); - if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE)) - page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1); - pgste_set_unlock(pte, pgste); - return 0; -} - -int s390_enable_skey(void) -{ - struct mm_walk walk = { .pte_entry = __s390_enable_skey }; - struct mm_struct *mm = current->mm; - struct vm_area_struct *vma; - int rc = 0; - - down_write(&mm->mmap_sem); - if (mm_use_skey(mm)) - goto out_up; - - mm->context.use_skey = 1; - for (vma = mm->mmap; vma; vma = vma->vm_next) { - if (ksm_madvise(vma, vma->vm_start, vma->vm_end, - MADV_UNMERGEABLE, &vma->vm_flags)) { - mm->context.use_skey = 0; - rc = -ENOMEM; - goto out_up; - } - } - mm->def_flags &= ~VM_MERGEABLE; - - walk.mm = mm; - walk_page_range(0, TASK_SIZE, &walk); - -out_up: - up_write(&mm->mmap_sem); - return rc; -} -EXPORT_SYMBOL_GPL(s390_enable_skey); - -/* - * Reset CMMA state, make all pages stable again. - */ -static int __s390_reset_cmma(pte_t *pte, unsigned long addr, - unsigned long next, struct mm_walk *walk) -{ - pgste_t pgste; - - pgste = pgste_get_lock(pte); - pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK; - pgste_set_unlock(pte, pgste); - return 0; -} - -void s390_reset_cmma(struct mm_struct *mm) -{ - struct mm_walk walk = { .pte_entry = __s390_reset_cmma }; - - down_write(&mm->mmap_sem); - walk.mm = mm; - walk_page_range(0, TASK_SIZE, &walk); - up_write(&mm->mmap_sem); -} -EXPORT_SYMBOL_GPL(s390_reset_cmma); - #ifdef CONFIG_TRANSPARENT_HUGEPAGE void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, pgtable_t pgtable) @@ -1632,3 +390,193 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) return pgtable; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + +#ifdef CONFIG_PGSTE +void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t entry) +{ + pgste_t pgste; + + /* the mm_has_pgste() check is done in set_pte_at() */ + pgste = pgste_get_lock(ptep); + pgste_val(pgste) &= ~_PGSTE_GPS_ZERO; + pgste_set_key(ptep, pgste, entry, mm); + pgste = pgste_set_pte(ptep, pgste, entry); + pgste_set_unlock(ptep, pgste); +} + +void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ + pgste_t pgste; + + pgste = pgste_get_lock(ptep); + pgste_val(pgste) |= PGSTE_IN_BIT; + pgste_set_unlock(ptep, pgste); +} + +static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) +{ + if (!non_swap_entry(entry)) + dec_mm_counter(mm, MM_SWAPENTS); + else if (is_migration_entry(entry)) { + struct page *page = migration_entry_to_page(entry); + + dec_mm_counter(mm, mm_counter(page)); + } + free_swap_and_cache(entry); +} + +void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, int reset) +{ + unsigned long pgstev; + pgste_t pgste; + pte_t pte; + + /* Zap unused and logically-zero pages */ + pgste = pgste_get_lock(ptep); + pgstev = pgste_val(pgste); + pte = *ptep; + if (pte_swap(pte) && + ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED || + (pgstev & _PGSTE_GPS_ZERO))) { + ptep_zap_swap_entry(mm, pte_to_swp_entry(pte)); + pte_clear(mm, addr, ptep); + } + if (reset) + pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK; + pgste_set_unlock(ptep, pgste); +} + +void ptep_zap_key(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ + unsigned long ptev; + pgste_t pgste; + + /* Clear storage key */ + pgste = pgste_get_lock(ptep); + pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT | + PGSTE_GR_BIT | PGSTE_GC_BIT); + ptev = pte_val(*ptep); + if (!(ptev & _PAGE_INVALID) && (ptev & _PAGE_WRITE)) + page_set_storage_key(ptev & PAGE_MASK, PAGE_DEFAULT_KEY, 1); + pgste_set_unlock(ptep, pgste); +} + +/* + * Test and reset if a guest page is dirty + */ +bool test_and_clear_guest_dirty(struct mm_struct *mm, unsigned long addr) +{ + spinlock_t *ptl; + pgste_t pgste; + pte_t *ptep; + pte_t pte; + bool dirty; + + ptep = get_locked_pte(mm, addr, &ptl); + if (unlikely(!ptep)) + return false; + + pgste = pgste_get_lock(ptep); + dirty = !!(pgste_val(pgste) & PGSTE_UC_BIT); + pgste_val(pgste) &= ~PGSTE_UC_BIT; + pte = *ptep; + if (dirty && (pte_val(pte) & _PAGE_PRESENT)) { + pgste = pgste_ipte_notify(mm, addr, ptep, pgste); + __ptep_ipte(addr, ptep); + if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE)) + pte_val(pte) |= _PAGE_PROTECT; + else + pte_val(pte) |= _PAGE_INVALID; + *ptep = pte; + } + pgste_set_unlock(ptep, pgste); + + spin_unlock(ptl); + return dirty; +} +EXPORT_SYMBOL_GPL(test_and_clear_guest_dirty); + +int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, + unsigned char key, bool nq) +{ + unsigned long keyul; + spinlock_t *ptl; + pgste_t old, new; + pte_t *ptep; + + down_read(&mm->mmap_sem); + ptep = get_locked_pte(mm, addr, &ptl); + if (unlikely(!ptep)) { + up_read(&mm->mmap_sem); + return -EFAULT; + } + + new = old = pgste_get_lock(ptep); + pgste_val(new) &= ~(PGSTE_GR_BIT | PGSTE_GC_BIT | + PGSTE_ACC_BITS | PGSTE_FP_BIT); + keyul = (unsigned long) key; + pgste_val(new) |= (keyul & (_PAGE_CHANGED | _PAGE_REFERENCED)) << 48; + pgste_val(new) |= (keyul & (_PAGE_ACC_BITS | _PAGE_FP_BIT)) << 56; + if (!(pte_val(*ptep) & _PAGE_INVALID)) { + unsigned long address, bits, skey; + + address = pte_val(*ptep) & PAGE_MASK; + skey = (unsigned long) page_get_storage_key(address); + bits = skey & (_PAGE_CHANGED | _PAGE_REFERENCED); + skey = key & (_PAGE_ACC_BITS | _PAGE_FP_BIT); + /* Set storage key ACC and FP */ + page_set_storage_key(address, skey, !nq); + /* Merge host changed & referenced into pgste */ + pgste_val(new) |= bits << 52; + } + /* changing the guest storage key is considered a change of the page */ + if ((pgste_val(new) ^ pgste_val(old)) & + (PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT)) + pgste_val(new) |= PGSTE_UC_BIT; + + pgste_set_unlock(ptep, new); + pte_unmap_unlock(ptep, ptl); + up_read(&mm->mmap_sem); + return 0; +} +EXPORT_SYMBOL(set_guest_storage_key); + +unsigned char get_guest_storage_key(struct mm_struct *mm, unsigned long addr) +{ + unsigned char key; + spinlock_t *ptl; + pgste_t pgste; + pte_t *ptep; + + down_read(&mm->mmap_sem); + ptep = get_locked_pte(mm, addr, &ptl); + if (unlikely(!ptep)) { + up_read(&mm->mmap_sem); + return -EFAULT; + } + pgste = pgste_get_lock(ptep); + + if (pte_val(*ptep) & _PAGE_INVALID) { + key = (pgste_val(pgste) & PGSTE_ACC_BITS) >> 56; + key |= (pgste_val(pgste) & PGSTE_FP_BIT) >> 56; + key |= (pgste_val(pgste) & PGSTE_GR_BIT) >> 48; + key |= (pgste_val(pgste) & PGSTE_GC_BIT) >> 48; + } else { + key = page_get_storage_key(pte_val(*ptep) & PAGE_MASK); + + /* Reflect guest's logical view, not physical */ + if (pgste_val(pgste) & PGSTE_GR_BIT) + key |= _PAGE_REFERENCED; + if (pgste_val(pgste) & PGSTE_GC_BIT) + key |= _PAGE_CHANGED; + } + + pgste_set_unlock(ptep, pgste); + pte_unmap_unlock(ptep, ptl); + up_read(&mm->mmap_sem); + return key; +} +EXPORT_SYMBOL(get_guest_storage_key); +#endif -- cgit v1.1 From 7eb792bf7c253cb63202aae72885f5f7abdd8668 Mon Sep 17 00:00:00 2001 From: Adam Buchbinder Date: Fri, 4 Mar 2016 11:20:04 -0800 Subject: s390: Fix misspellings in comments Signed-off-by: Adam Buchbinder Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/perf_event.h | 2 +- arch/s390/include/asm/rwsem.h | 2 +- arch/s390/kernel/perf_cpum_cf.c | 2 +- arch/s390/kernel/perf_event.c | 2 +- arch/s390/kvm/guestdbg.c | 4 ++-- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/s390/include/asm/perf_event.h b/arch/s390/include/asm/perf_event.h index f897ec7..1f7ff85 100644 --- a/arch/s390/include/asm/perf_event.h +++ b/arch/s390/include/asm/perf_event.h @@ -21,7 +21,7 @@ #define PMU_F_ERR_LSDA 0x0200 #define PMU_F_ERR_MASK (PMU_F_ERR_IBE|PMU_F_ERR_LSDA) -/* Perf defintions for PMU event attributes in sysfs */ +/* Perf definitions for PMU event attributes in sysfs */ extern __init const struct attribute_group **cpumf_cf_event_group(void); extern ssize_t cpumf_events_sysfs_show(struct device *dev, struct device_attribute *attr, diff --git a/arch/s390/include/asm/rwsem.h b/arch/s390/include/asm/rwsem.h index 4b43ee7..fead491 100644 --- a/arch/s390/include/asm/rwsem.h +++ b/arch/s390/include/asm/rwsem.h @@ -31,7 +31,7 @@ * This should be totally fair - if anything is waiting, a process that wants a * lock will go to the back of the queue. When the currently active lock is * released, if there's a writer at the front of the queue, then that and only - * that will be woken up; if there's a bunch of consequtive readers at the + * that will be woken up; if there's a bunch of consecutive readers at the * front, then they'll all be woken up, but no other readers will be. */ diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 929c147..58bf457 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -383,7 +383,7 @@ static int __hw_perf_event_init(struct perf_event *event) /* Validate the counter that is assigned to this event. * Because the counter facility can use numerous counters at the - * same time without constraints, it is not necessary to explicity + * same time without constraints, it is not necessary to explicitly * validate event groups (event->group_leader != event). */ err = validate_event(hwc); diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index a1b7086..c3e4099 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -238,7 +238,7 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry, dump_trace(__perf_callchain_kernel, entry, NULL, regs->gprs[15]); } -/* Perf defintions for PMU event attributes in sysfs */ +/* Perf definitions for PMU event attributes in sysfs */ ssize_t cpumf_events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page) { diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c index d697312..e8c6843 100644 --- a/arch/s390/kvm/guestdbg.c +++ b/arch/s390/kvm/guestdbg.c @@ -17,7 +17,7 @@ /* * Extends the address range given by *start and *stop to include the address * range starting with estart and the length len. Takes care of overflowing - * intervals and tries to minimize the overall intervall size. + * intervals and tries to minimize the overall interval size. */ static void extend_address_range(u64 *start, u64 *stop, u64 estart, int len) { @@ -72,7 +72,7 @@ static void enable_all_hw_bp(struct kvm_vcpu *vcpu) return; /* - * If the guest is not interrested in branching events, we can savely + * If the guest is not interested in branching events, we can safely * limit them to the PER address range. */ if (!(*cr9 & PER_EVENT_BRANCH)) -- cgit v1.1 From 8f100bb1ff27873dd71f636da670e503b9ade3c6 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Thu, 10 Mar 2016 10:32:21 +0100 Subject: s390/cpumf: add missing lpp magic initialization Add the missing lpp magic initialization for cpu 0. Without this all samples on cpu 0 do not have the most significant bit set in the program parameter field, which we use to distinguish between guest and host samples if the pid is also 0. We did initialize the lpp magic in the absolute zero lowcore but forgot that when switching to the allocated lowcore on cpu 0 only. Reported-by: Shu Juan Zhang Acked-by: Christian Borntraeger Cc: stable@vger.kernel.org # v4.4+ Fixes: e22cf8ca6f75 ("s390/cpumf: rework program parameter setting to detect guest samples") Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/setup.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index b1fbcc0..cc46767 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -327,6 +327,7 @@ static void __init setup_lowcore(void) + PAGE_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs); lc->current_task = (unsigned long) init_thread_union.thread_info.task; lc->thread_info = (unsigned long) &init_thread_union; + lc->lpp = LPP_MAGIC; lc->machine_flags = S390_lowcore.machine_flags; lc->stfl_fac_list = S390_lowcore.stfl_fac_list; memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list, -- cgit v1.1 From e370e4769463a65dcf8806fa26d2874e0542ac41 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 10 Mar 2016 09:52:55 +0100 Subject: s390: fix floating pointer register corruption (again) There is a tricky interaction between the machine check handler and the critical sections of load_fpu_regs and save_fpu_regs functions. If the machine check interrupts one of the two functions the critical section cleanup will complete the function before the machine check handler s390_do_machine_check is called. Trouble is that the machine check handler needs to validate the floating point registers *before* and not *after* the completion of load_fpu_regs/save_fpu_regs. The simplest solution is to rewind the PSW to the start of the load_fpu_regs/save_fpu_regs and retry the function after the return from the machine check handler. Tested-by: Christian Borntraeger Cc: # 4.3+ Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/entry.S | 106 +---------------------------------------------- 1 file changed, 2 insertions(+), 104 deletions(-) diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 4ba688c..2d47f9c 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -1200,114 +1200,12 @@ cleanup_critical: .quad .Lpsw_idle_lpsw .Lcleanup_save_fpu_regs: - TSTMSK __LC_CPU_FLAGS,_CIF_FPU - bor %r14 - clg %r9,BASED(.Lcleanup_save_fpu_regs_done) - jhe 5f - clg %r9,BASED(.Lcleanup_save_fpu_regs_fp) - jhe 4f - clg %r9,BASED(.Lcleanup_save_fpu_regs_vx_high) - jhe 3f - clg %r9,BASED(.Lcleanup_save_fpu_regs_vx_low) - jhe 2f - clg %r9,BASED(.Lcleanup_save_fpu_fpc_end) - jhe 1f - lg %r2,__LC_CURRENT - aghi %r2,__TASK_thread -0: # Store floating-point controls - stfpc __THREAD_FPU_fpc(%r2) -1: # Load register save area and check if VX is active - lg %r3,__THREAD_FPU_regs(%r2) - TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX - jz 4f # no VX -> store FP regs -2: # Store vector registers (V0-V15) - VSTM %v0,%v15,0,%r3 # vstm 0,15,0(3) -3: # Store vector registers (V16-V31) - VSTM %v16,%v31,256,%r3 # vstm 16,31,256(3) - j 5f # -> done, set CIF_FPU flag -4: # Store floating-point registers - std 0,0(%r3) - std 1,8(%r3) - std 2,16(%r3) - std 3,24(%r3) - std 4,32(%r3) - std 5,40(%r3) - std 6,48(%r3) - std 7,56(%r3) - std 8,64(%r3) - std 9,72(%r3) - std 10,80(%r3) - std 11,88(%r3) - std 12,96(%r3) - std 13,104(%r3) - std 14,112(%r3) - std 15,120(%r3) -5: # Set CIF_FPU flag - oi __LC_CPU_FLAGS+7,_CIF_FPU - lg %r9,48(%r11) # return from save_fpu_regs + larl %r9,save_fpu_regs br %r14 -.Lcleanup_save_fpu_fpc_end: - .quad .Lsave_fpu_regs_fpc_end -.Lcleanup_save_fpu_regs_vx_low: - .quad .Lsave_fpu_regs_vx_low -.Lcleanup_save_fpu_regs_vx_high: - .quad .Lsave_fpu_regs_vx_high -.Lcleanup_save_fpu_regs_fp: - .quad .Lsave_fpu_regs_fp -.Lcleanup_save_fpu_regs_done: - .quad .Lsave_fpu_regs_done .Lcleanup_load_fpu_regs: - TSTMSK __LC_CPU_FLAGS,_CIF_FPU - bnor %r14 - clg %r9,BASED(.Lcleanup_load_fpu_regs_done) - jhe 1f - clg %r9,BASED(.Lcleanup_load_fpu_regs_fp) - jhe 2f - clg %r9,BASED(.Lcleanup_load_fpu_regs_vx_high) - jhe 3f - clg %r9,BASED(.Lcleanup_load_fpu_regs_vx) - jhe 4f - lg %r4,__LC_CURRENT - aghi %r4,__TASK_thread - lfpc __THREAD_FPU_fpc(%r4) - TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_VX - lg %r4,__THREAD_FPU_regs(%r4) # %r4 <- reg save area - jz 2f # -> no VX, load FP regs -4: # Load V0 ..V15 registers - VLM %v0,%v15,0,%r4 -3: # Load V16..V31 registers - VLM %v16,%v31,256,%r4 - j 1f -2: # Load floating-point registers - ld 0,0(%r4) - ld 1,8(%r4) - ld 2,16(%r4) - ld 3,24(%r4) - ld 4,32(%r4) - ld 5,40(%r4) - ld 6,48(%r4) - ld 7,56(%r4) - ld 8,64(%r4) - ld 9,72(%r4) - ld 10,80(%r4) - ld 11,88(%r4) - ld 12,96(%r4) - ld 13,104(%r4) - ld 14,112(%r4) - ld 15,120(%r4) -1: # Clear CIF_FPU bit - ni __LC_CPU_FLAGS+7,255-_CIF_FPU - lg %r9,48(%r11) # return from load_fpu_regs + larl %r9,load_fpu_regs br %r14 -.Lcleanup_load_fpu_regs_vx: - .quad .Lload_fpu_regs_vx -.Lcleanup_load_fpu_regs_vx_high: - .quad .Lload_fpu_regs_vx_high -.Lcleanup_load_fpu_regs_fp: - .quad .Lload_fpu_regs_fp -.Lcleanup_load_fpu_regs_done: - .quad .Lload_fpu_regs_done /* * Integer constants -- cgit v1.1 From 80c544ded25ac14d7cc3e555abb8ed2c2da99b84 Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Mon, 14 Mar 2016 15:47:23 +0100 Subject: s390/pci: enforce fmb page boundary rule The function measurement block must not cross a page boundary. Ensure that by raising the alignment requirement to the smallest power of 2 larger than the size of the fmb. Fixes: d0b088531 ("s390/pci: performance statistics and debug infrastructure") Cc: stable@vger.kernel.org # v3.8+ Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pci.h | 2 +- arch/s390/pci/pci.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index dc763ea..f833082d 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -45,7 +45,7 @@ struct zpci_fmb { u64 rpcit_ops; u64 dma_rbytes; u64 dma_wbytes; -} __packed __aligned(16); +} __packed __aligned(64); enum zpci_state { ZPCI_FN_STATE_RESERVED, diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index f76d01e..9fd59a7 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -860,8 +860,11 @@ static inline int barsize(u8 size) static int zpci_mem_init(void) { + BUILD_BUG_ON(!is_power_of_2(__alignof__(struct zpci_fmb)) || + __alignof__(struct zpci_fmb) < sizeof(struct zpci_fmb)); + zdev_fmb_cache = kmem_cache_create("PCI_FMB_cache", sizeof(struct zpci_fmb), - 16, 0, NULL); + __alignof__(struct zpci_fmb), 0, NULL); if (!zdev_fmb_cache) goto error_fmb; -- cgit v1.1